-- access.asn
--$Revision: 6.0 $
--*********************************************************************
--
--  access.asn
--
--     messages for data access
--
--*********************************************************************

NCBI-Access DEFINITIONS ::=
BEGIN

EXPORTS Link-set;

    -- links between same class = neighbors
    -- links between other classes = links

Link-set ::= SEQUENCE {
    num INTEGER ,                         -- number of links to this doc type
    uids SEQUENCE OF INTEGER OPTIONAL ,     -- the links
    weights SEQUENCE OF INTEGER OPTIONAL }  -- the weights


END

-- biblio.asn
--$Revision: 6.2 $
--****************************************************************
--
--  NCBI Bibliographic data elements
--  by James Ostell, 1990
--
--  Taken from the American National Standard for
--      Bibliographic References
--      ANSI Z39.29-1977
--  Version 3.0 - June 1994
--  PubMedId added in 1996
--  ArticleIds and eprint elements added in 1999
--
--****************************************************************

NCBI-Biblio DEFINITIONS ::=
BEGIN

EXPORTS Cit-art, Cit-jour, Cit-book, Cit-pat, Cit-let, Id-pat, Cit-gen,
        Cit-proc, Cit-sub, Title, Author, PubMedId;

IMPORTS Person-id, Date, Dbtag FROM NCBI-General;

    -- Article Ids

ArticleId ::= CHOICE {         -- can be many ids for an article
	pubmed PubMedId ,      -- see types below
	medline MedlineUID ,
	doi DOI ,
	pii PII ,
	pmcid PmcID ,
	pmcpid PmcPid ,
        pmpid PmPid ,
        other Dbtag  }    -- generic catch all
    
PubMedId ::= INTEGER           -- Id from the PubMed database at NCBI
MedlineUID ::= INTEGER         -- Id from MEDLINE
DOI ::= VisibleString          -- Document Object Identifier
PII ::= VisibleString          -- Controlled Publisher Identifier
PmcID ::= INTEGER              -- PubMed Central Id
PmcPid ::= VisibleString       -- Publisher Id supplied to PubMed Central
PmPid ::= VisibleString        -- Publisher Id supplied to PubMed

ArticleIdSet ::= SET OF ArticleId

    -- Status Dates

PubStatus ::= INTEGER {            -- points of publication
    received  (1) ,            -- date manuscript received for review
    accepted  (2) ,            -- accepted for publication
    epublish  (3) ,            -- published electronically by publisher
    ppublish  (4) ,            -- published in print by publisher
    revised   (5) ,            -- article revised by publisher/author
    pmc       (6) ,            -- article first appeared in PubMed Central
    pmcr      (7) ,            -- article revision in PubMed Central
    pubmed    (8) ,            -- article citation first appeared in PubMed
    pubmedr   (9) ,            -- article citation revision in PubMed
    aheadofprint (10),         -- epublish, but will be followed by print
    premedline (11),           -- date into PreMedline status
    medline    (12),           -- date made a MEDLINE record
    other    (255) }

PubStatusDate ::= SEQUENCE {   -- done as a structure so fields can be added
    pubstatus PubStatus ,
    date Date }                -- time may be added later

PubStatusDateSet ::= SET OF PubStatusDate
    
    -- Citation Types

Cit-art ::= SEQUENCE {                  -- article in journal or book
    title Title OPTIONAL ,              -- title of paper (ANSI requires)
    authors Auth-list OPTIONAL ,        -- authors (ANSI requires)
    from CHOICE {                       -- journal or book
        journal Cit-jour ,
        book Cit-book ,
        proc Cit-proc } ,
    ids ArticleIdSet OPTIONAL }         -- lots of ids

Cit-jour ::= SEQUENCE {             -- Journal citation
    title Title ,                   -- title of journal
    imp Imprint }

Cit-book ::= SEQUENCE {              -- Book citation
    title Title ,                    -- Title of book
    coll Title OPTIONAL ,            -- part of a collection
    authors Auth-list,               -- authors
    imp Imprint }

Cit-proc ::= SEQUENCE {             -- Meeting proceedings
    book Cit-book ,                 -- citation to meeting
    meet Meeting }                  -- time and location of meeting

    -- Patent number and date-issue were made optional in 1997 to
    --   support patent applications being issued from the USPTO
    --   Semantically a Cit-pat must have either a patent number or
    --   an application number (or both) to be valid

Cit-pat ::= SEQUENCE {                  -- patent citation
    title VisibleString ,
    authors Auth-list,                  -- author/inventor
    country VisibleString ,             -- Patent Document Country
    doc-type VisibleString ,            -- Patent Document Type
    number VisibleString OPTIONAL,      -- Patent Document Number
    date-issue Date OPTIONAL,           -- Patent Issue/Pub Date
    class SEQUENCE OF VisibleString OPTIONAL ,      -- Patent Doc Class Code 
    app-number VisibleString OPTIONAL , -- Patent Doc Appl Number
    app-date Date OPTIONAL ,            -- Patent Appl File Date
    applicants Auth-list OPTIONAL ,     -- Applicants
    assignees Auth-list OPTIONAL ,      -- Assignees
    priority SEQUENCE OF Patent-priority OPTIONAL , -- Priorities
    abstract VisibleString OPTIONAL }   -- abstract of patent

Patent-priority ::= SEQUENCE {
    country VisibleString ,             -- Patent country code
    number VisibleString ,              -- number assigned in that country
    date Date }                         -- date of application

Id-pat ::= SEQUENCE {                   -- just to identify a patent
    country VisibleString ,             -- Patent Document Country
    id CHOICE {
        number VisibleString ,          -- Patent Document Number
        app-number VisibleString } ,    -- Patent Doc Appl Number
    doc-type VisibleString OPTIONAL }   -- Patent Doc Type

Cit-let ::= SEQUENCE {                  -- letter, thesis, or manuscript
    cit Cit-book ,                      -- same fields as a book
    man-id VisibleString OPTIONAL ,     -- Manuscript identifier
    type ENUMERATED {
        manuscript (1) ,
        letter (2) ,
        thesis (3) } OPTIONAL }
                                -- NOTE: this is just to cite a
                                -- direct data submission, see NCBI-Submit
                                -- for the form of a sequence submission
Cit-sub ::= SEQUENCE {               -- citation for a direct submission
    authors Auth-list ,              -- not necessarily authors of the paper
    imp Imprint OPTIONAL ,			 -- this only used to get date.. will go
    medium ENUMERATED {              -- medium of submission
        paper   (1) ,
        tape    (2) ,
        floppy  (3) ,
        email   (4) ,
        other   (255) } OPTIONAL ,
    date Date OPTIONAL ,              -- replaces imp, will become required
    descr VisibleString OPTIONAL }    -- description of changes for public view
    
Cit-gen ::= SEQUENCE {      -- NOT from ANSI, this is a catchall
    cit VisibleString OPTIONAL ,     -- anything, not parsable
    authors Auth-list OPTIONAL ,
    muid INTEGER OPTIONAL ,      -- medline uid
    journal Title OPTIONAL ,
    volume VisibleString OPTIONAL ,
    issue VisibleString OPTIONAL ,
    pages VisibleString OPTIONAL ,
    date Date OPTIONAL ,
    serial-number INTEGER OPTIONAL ,   -- for GenBank style references
    title VisibleString OPTIONAL ,     -- eg. cit="unpublished",title="title"
	pmid PubMedId OPTIONAL }           -- PubMed Id
    
    
    -- Authorship Group
Auth-list ::= SEQUENCE {
        names CHOICE {
            std SEQUENCE OF Author ,        -- full citations
            ml SEQUENCE OF VisibleString ,  -- MEDLINE, semi-structured
            str SEQUENCE OF VisibleString } , -- free for all
        affil Affil OPTIONAL }        -- author affiliation

Author ::= SEQUENCE {
    name Person-id ,                        -- Author, Primary or Secondary
    level ENUMERATED {
        primary (1),
        secondary (2) } OPTIONAL ,
    role ENUMERATED {                   -- Author Role Indicator
        compiler (1),
        editor (2),
        patent-assignee (3),
        translator (4) } OPTIONAL ,
    affil Affil OPTIONAL ,
    is-corr BOOLEAN OPTIONAL }          -- TRUE if corresponding author

Affil ::= CHOICE {
    str VisibleString ,                 -- unparsed string
    std SEQUENCE {                      -- std representation
    affil VisibleString OPTIONAL ,      -- Author Affiliation, Name
    div VisibleString OPTIONAL ,        -- Author Affiliation, Division
    city VisibleString OPTIONAL ,       -- Author Affiliation, City
    sub VisibleString OPTIONAL ,        -- Author Affiliation, County Sub
    country VisibleString OPTIONAL ,    -- Author Affiliation, Country
    street VisibleString OPTIONAL ,    -- street address, not ANSI
    email VisibleString OPTIONAL ,
    fax VisibleString OPTIONAL ,
    phone VisibleString OPTIONAL ,
    postal-code VisibleString OPTIONAL }}

    -- Title Group
    -- Valid for = A = Analytic (Cit-art)
    --             J = Journals (Cit-jour)
    --             B = Book (Cit-book)
                                                 -- Valid for:
Title ::= SET OF CHOICE {
    name VisibleString ,    -- Title, Anal,Coll,Mono    AJB
    tsub VisibleString ,    -- Title, Subordinate       A B
    trans VisibleString ,   -- Title, Translated        AJB
    jta VisibleString ,     -- Title, Abbreviated        J
    iso-jta VisibleString , -- specifically ISO jta      J
    ml-jta VisibleString ,  -- specifically MEDLINE jta  J
    coden VisibleString ,   -- a coden                   J
    issn VisibleString ,    -- ISSN                      J
    abr VisibleString ,     -- Title, Abbreviated         B
    isbn VisibleString }    -- ISBN                       B

Imprint ::= SEQUENCE {                  -- Imprint group
    date Date ,                         -- date of publication
    volume VisibleString OPTIONAL ,
    issue VisibleString OPTIONAL ,
    pages VisibleString OPTIONAL ,
    section VisibleString OPTIONAL ,
    pub Affil OPTIONAL,                     -- publisher, required for book
    cprt Date OPTIONAL,                     -- copyright date, "    "   "
    part-sup VisibleString OPTIONAL ,       -- part/sup of volume
    language VisibleString DEFAULT "ENG" ,  -- put here for simplicity
    prepub ENUMERATED {                     -- for prepublication citations
        submitted (1) ,                     -- submitted, not accepted
        in-press (2) ,                      -- accepted, not published
        other (255)  } OPTIONAL ,
    part-supi VisibleString OPTIONAL ,      -- part/sup on issue
    retract CitRetract OPTIONAL ,           -- retraction info
    pubstatus PubStatus OPTIONAL ,          -- current status of this publication
    history PubStatusDateSet OPTIONAL }     -- dates for this record

CitRetract ::= SEQUENCE {
    type ENUMERATED {                    -- retraction of an entry
        retracted (1) ,               -- this citation retracted
        notice (2) ,                  -- this citation is a retraction notice
        in-error (3) ,                -- an erratum was published about this
        erratum (4) } ,               -- this is a published erratum
    exp VisibleString OPTIONAL }      -- citation and/or explanation

Meeting ::= SEQUENCE {
    number VisibleString ,
    date Date ,
    place Affil OPTIONAL }

            
END


-- biotree.asn
--$Revision: 1.4 $
--*********************************************************************
--
--  biotree.asn
--
--     BioTree ASN
--     Anatoliy Kuznetsov
--
--*********************************************************************

NCBI-BioTree DEFINITIONS ::=
BEGIN

EXPORTS BioTreeContainer, DistanceMatrix;

BioTreeContainer ::= SEQUENCE {
   treetype  VisibleString OPTIONAL,  -- hint on what kind of tree is that
   fdict     FeatureDictSet,          -- features dictionary 
   nodes     NodeSet                  -- set of nodes with encoded topology
}


NodeSet ::= SET OF Node


Node ::= SEQUENCE {
   id         INTEGER,             -- node uid
   parent     INTEGER OPTIONAL,    -- parent node id
   features   NodeFeatureSet OPTIONAL
}


NodeFeatureSet ::= SET OF NodeFeature


NodeFeature ::= SEQUENCE {
   featureid   INTEGER,
   value       VisibleString
}


FeatureDictSet ::= SET OF FeatureDescr


FeatureDescr ::= SEQUENCE {
   id    INTEGER,              -- feature id
   name  VisibleString         -- feature name
}


DistanceMatrix ::= SEQUENCE {
   labels     SEQUENCE OF VisibleString,  -- n labels
   distances  SEQUENCE OF REAL            -- n(n-1)/2 pairwise distances
                                          -- (0, 1)...(0, n), (1, 2)...(1, n)...
}


END
-- blast.asn
--  ----------------------------------------------------------------------------
--
--                            PUBLIC DOMAIN NOTICE
--                National Center for Biotechnology Information
--
--  This software/database is a "United States Government Work" under the terms
--  of the United States Copyright Act.  It was written as part of the author's
--  official duties as a United States Government employee and thus cannot be
--  copyrighted.  This software/database is freely available to the public for
--  use.  The National Library of Medicine and the U.S. Government have not
--  placed any restriction on its use or reproduction.
--
--  Although all reasonable efforts have been taken to ensure the accuracy and
--  reliability of the software and data, the NLM and the U.S. Government do not
--  and cannot warrant the performance or results that may be obtained by using
--  this software or data.  The NLM and the U.S. Government disclaim all
--  warranties, express or implied, including warranties of performance,
--  merchantability or fitness for any particular purpose.
--
--  Please cite the authors in any work or product based on this material.
--
--  ----------------------------------------------------------------------------
--
--  Authors: Tom Madden, Tim Boemker
--
--  ASN.1 interface to BLAST.
--
--  ----------------------------------------------------------------------------

NCBI-Blast4 DEFINITIONS ::=
BEGIN

EXPORTS
    Blast4-ka-block,
    Blast4-value,
    Blast4-parameter,
    Blast4-parameters;

IMPORTS
    Bioseq                  FROM NCBI-Sequence
    Seq-data                FROM NCBI-Sequence
    Bioseq-set              FROM NCBI-Seqset
    PssmWithParameters      FROM NCBI-ScoreMat
    Seq-id,
    Seq-loc                 FROM NCBI-Seqloc
    Seq-align,
    Seq-align-set           FROM NCBI-Seqalign;

--  --------------------------------------------------------------------
--
--  Requests
--
--  --------------------------------------------------------------------

Blast4-request ::= SEQUENCE {
    ident                   VisibleString OPTIONAL,
    body                    Blast4-request-body
}

Blast4-request-body ::= CHOICE {
    finish-params           Blast4-finish-params-request,
    get-databases           NULL,
    get-matrices            NULL,
    get-parameters          NULL,
    get-paramsets           NULL,
    get-programs            NULL,
    get-search-results      Blast4-get-search-results-request,
    get-sequences           Blast4-get-sequences-request,
    queue-search            Blast4-queue-search-request,
    get-request-info        Blast4-get-request-info-request,
    get-sequence-parts      Blast4-get-seq-parts-request
}

Blast4-finish-params-request ::= SEQUENCE {
    program                 VisibleString,
    service                 VisibleString,
    paramset                VisibleString OPTIONAL,
    params                  Blast4-parameters OPTIONAL
}

Blast4-get-search-results-request ::= SEQUENCE {
    request-id              VisibleString
}

Blast4-get-sequences-request ::= SEQUENCE {
    database                Blast4-database,
    seq-ids                 SEQUENCE OF Seq-id
}

-- If a PSSM is used (ie. for PSI-Blast), it must contain a "query"
-- for formatting purposes.  Bioseq-set may contain any number of
-- queries, specified as data.  Seq-loc-list may contain only the
-- "whole" or "interval" types.  In the case of "whole", any number of
-- queries may be used; in the case of "interval", there should be
-- exactly one query.  (This is limited by the BlastObject.)

Blast4-queries ::= CHOICE {
    pssm                    PssmWithParameters,
    seq-loc-list            SEQUENCE OF Seq-loc,
    bioseq-set              Bioseq-set
}

-- Options have been broken down into two groups as part of the BLAST
-- API work.  The algorithm options essentially correspond to those
-- options available via the CBlastOptions class.
--   algorithm-options: Options for BLAST (ie. seq comparison) algorithm.
--   program-options:   Other options, such as which seqs. to compare.

Blast4-queue-search-request ::= SEQUENCE {
    program                 VisibleString,
    service                 VisibleString,
    queries                 Blast4-queries,
    subject                 Blast4-subject,
    paramset                VisibleString OPTIONAL,
    algorithm-options       Blast4-parameters OPTIONAL,
    program-options         Blast4-parameters OPTIONAL
}

-- Fetch information about the search request.

Blast4-get-request-info-request ::= SEQUENCE {
    request-id              VisibleString
}

Blast4-get-request-info-reply ::= SEQUENCE {
    database                Blast4-database,
    program                 VisibleString,
    service                 VisibleString,
    created-by              VisibleString,
    queries                 Blast4-queries,
    algorithm-options       Blast4-parameters,
    program-options         Blast4-parameters
}

-- Fetch parts of a sequence a-la cart.

Blast4-get-seq-parts-request ::= SEQUENCE {
    database                Blast4-database,
    id                      Seq-id,
    need-meta-data          BOOLEAN,
    
    -- If end is 0, no data will be fetched.  If end is past the
    -- length of the sequence, it will be adjusted to the end of
    -- the sequence (this allows fetching of the first chunk in
    -- cases where the length is not yet known).
    
    start                   INTEGER OPTIONAL,
    end                     INTEGER OPTIONAL
}


--  --------------------------------------------------------------------
--
--  Replies
--
--  --------------------------------------------------------------------

Blast4-reply ::= SEQUENCE {
    errors                  SEQUENCE OF Blast4-error OPTIONAL,
    body                    Blast4-reply-body
}

Blast4-reply-body ::= CHOICE {
    finish-params           Blast4-finish-params-reply,
    get-databases           Blast4-get-databases-reply,
    get-matrices            Blast4-get-matrices-reply,
    get-parameters          Blast4-get-parameters-reply,
    get-paramsets           Blast4-get-paramsets-reply,
    get-programs            Blast4-get-programs-reply,
    get-search-results      Blast4-get-search-results-reply,
    get-sequences           Blast4-get-sequences-reply,
    queue-search            Blast4-queue-search-reply,
    get-queries             Blast4-get-queries-reply,
    get-request-info        Blast4-get-request-info-reply,
    get-sequence-parts      Blast4-get-seq-parts-reply
}

Blast4-finish-params-reply ::= Blast4-parameters

Blast4-get-databases-reply ::= SEQUENCE OF Blast4-database-info

Blast4-get-matrices-reply ::= SEQUENCE OF Blast4-matrix-id

Blast4-get-parameters-reply ::= SEQUENCE OF Blast4-parameter-info

Blast4-get-paramsets-reply ::= SEQUENCE OF Blast4-paramset-info

Blast4-get-programs-reply ::= SEQUENCE OF Blast4-program-info

Blast4-get-search-results-reply ::= SEQUENCE {
    alignments              Seq-align-set OPTIONAL,
    phi-alignments          Blast4-phi-alignments OPTIONAL,

    -- Masking locations for the query sequence(s). Each element of this set
    -- corresponds to a single query's translation frame as appropriate.
    masks                   SEQUENCE OF Blast4-mask OPTIONAL,

    ka-blocks               SEQUENCE OF Blast4-ka-block OPTIONAL,
    search-stats            SEQUENCE OF VisibleString OPTIONAL,
    pssm                    PssmWithParameters OPTIONAL
}

Blast4-get-sequences-reply ::= SEQUENCE OF Bioseq

Blast4-queue-search-reply ::= SEQUENCE {
    request-id              VisibleString OPTIONAL
}

Blast4-get-queries-reply ::= SEQUENCE {
    queries                 Blast4-queries
}

Blast4-get-seq-parts-reply ::= SEQUENCE {
    bioseq                  Bioseq OPTIONAL,
    ids                     SEQUENCE OF Seq-id OPTIONAL,
    length                  INTEGER OPTIONAL,
    data                    Seq-data OPTIONAL
}

--  --------------------------------------------------------------------
--
--  Errors
--
--  --------------------------------------------------------------------

Blast4-error ::= SEQUENCE {
    code                    INTEGER,
    message                 VisibleString OPTIONAL
}

Blast4-error-flags ::= ENUMERATED {
    warning                 (1024),
    error                   (2048)
}

Blast4-error-code ::= INTEGER {
	-- warnings
	conversion-warning		(1024),

	-- errors
    internal-error          (2048),
    not-implemented         (2049),
    not-allowed             (2050),
    bad-request             (2051),
    bad-request-id          (2052),
    search-pending          (2053)
}

--  --------------------------------------------------------------------
--
--  Other types in alphabetical order
--
--  --------------------------------------------------------------------

Blast4-cutoff ::= CHOICE {
    e-value                 REAL,
    raw-score               INTEGER
}

Blast4-database ::= SEQUENCE {
    name                    VisibleString,
    type                    Blast4-residue-type
}

-- Borrowed from seq.asn

Blast4-seqtech ::= INTEGER {
        unknown (0) ,
        standard (1) ,          -- standard sequencing
        est (2) ,               -- Expressed Sequence Tag
        sts (3) ,               -- Sequence Tagged Site
        survey (4) ,            -- one-pass genomic sequence
        genemap (5) ,           -- from genetic mapping techniques
        physmap (6) ,           -- from physical mapping techniques
        derived (7) ,           -- derived from other data, not a primary entity
        concept-trans (8) ,     -- conceptual translation
        seq-pept (9) ,          -- peptide was sequenced
        both (10) ,             -- concept transl. w/ partial pept. seq.
        seq-pept-overlap (11) , -- sequenced peptide, ordered by overlap
        seq-pept-homol (12) ,   -- sequenced peptide, ordered by homology
        concept-trans-a (13) ,  -- conceptual transl. supplied by author
        htgs-1 (14) ,           -- unordered High Throughput sequence contig
        htgs-2 (15) ,           -- ordered High Throughput sequence contig
        htgs-3 (16) ,           -- finished High Throughput sequence
        fli-cdna (17) ,         -- full length insert cDNA
        htgs-0 (18) ,           -- single genomic reads for coordination
        htc (19) ,              -- high throughput cDNA
        wgs (20) ,              -- whole genome shotgun sequencing
        other (255)             -- use Source.techexp
}

Blast4-database-info ::= SEQUENCE {
    database                Blast4-database,
    description             VisibleString,
    last-updated            VisibleString,
    total-length            BigInt,
    num-sequences           BigInt,
    seqtech                 Blast4-seqtech,
    taxid                   INTEGER
}

Blast4-frame-type ::= ENUMERATED {
    notset                  (0),
    plus1                   (1),
    plus2                   (2),
    plus3                   (3),
    minus1                  (4),
    minus2                  (5),
    minus3                  (6)
}

Blast4-ka-block ::= SEQUENCE {
    lambda                  REAL,
    k                       REAL,
    h                       REAL,
    gapped                  BOOLEAN
}

-- Masking locations for a query's frame. The locations field is a single
-- Seq-loc of type Packed-int, which contains all the masking locations for the
-- translation frame specified by the frame field.
-- Notes:
-- On input (i.e.: when the client specifies masking locations as a
-- Blast4-parameter), in the case of protein queries, the frame field must 
-- always be notset, in the case of nucleotide queries (regardless of whether 
-- the search will translate these or not), the frame must be plus1. Masking 
-- locations in the translated encoding are not permitted.
-- On output (i.e.: when blast 4 server encodes these as part of the 
-- Blast4-get-search-results-reply), the same conventions as above apply for
-- non-translated protein and nucleotide queries, but in the case of translated
-- nucleotide queries, the frame field can be specified in any of the
-- translation frames as appropriate.
Blast4-mask ::= SEQUENCE {
    locations               SEQUENCE OF Seq-loc,
    frame                   Blast4-frame-type
}

Blast4-matrix-id ::= SEQUENCE {
    residue-type            Blast4-residue-type,
    name                    VisibleString
}

Blast4-parameter ::= SEQUENCE {
    name                    VisibleString,
    value                   Blast4-value
}

Blast4-parameter-info ::= SEQUENCE {
    name                    VisibleString,
    type                    VisibleString
}

Blast4-paramset-info ::= SEQUENCE {
    program                 VisibleString,
    name                    VisibleString
}

Blast4-program-info ::= SEQUENCE {
    program                 VisibleString,
    services                SEQUENCE OF VisibleString
}

Blast4-residue-type ::= ENUMERATED {
    unknown                 (0),
    protein                 (1),
    nucleotide              (2)
}

Blast4-strand-type ::= ENUMERATED {
    forward-strand          (1),
    reverse-strand          (2),
    both-strands            (3)
}

Blast4-subject ::= CHOICE {
    database                VisibleString,
    sequences               SEQUENCE OF Bioseq
}

Blast4-parameters ::= SEQUENCE OF Blast4-parameter

Blast4-phi-alignments ::= SEQUENCE {
    num-alignments          INTEGER,
    seq-locs                SEQUENCE OF Seq-loc
}

Blast4-value ::= CHOICE {

    -- scalar types
    big-integer             BigInt,
    bioseq                  Bioseq,
    boolean                 BOOLEAN,
    cutoff                  Blast4-cutoff,
    integer                 INTEGER,
    matrix                  PssmWithParameters,
    real                    REAL,
    seq-align               Seq-align,
    seq-id                  Seq-id,
    seq-loc                 Seq-loc,
    strand-type             Blast4-strand-type,
    string                  VisibleString,

    -- lists of scalar types
    big-integer-list        SEQUENCE OF BigInt,
    bioseq-list             SEQUENCE OF Bioseq,
    boolean-list            SEQUENCE OF BOOLEAN,
    cutoff-list             SEQUENCE OF Blast4-cutoff,
    integer-list            SEQUENCE OF INTEGER,
    matrix-list             SEQUENCE OF PssmWithParameters,
    real-list               SEQUENCE OF REAL,
    seq-align-list          SEQUENCE OF Seq-align,
    seq-id-list             SEQUENCE OF Seq-id,
    seq-loc-list            SEQUENCE OF Seq-loc,
    strand-type-list        SEQUENCE OF Blast4-strand-type,
    string-list             SEQUENCE OF VisibleString,

    -- imported collection types
    bioseq-set              Bioseq-set,
    seq-align-set           Seq-align-set,

    -- Intended to represent user-provided masking locations for a single query
    -- sequence (name field in Blast4-parameter should be "LCaseMask").
    -- Multiple Blast4-parameters of this type are needed to specify masking
    -- locations for multiple queries.
    query-mask              Blast4-mask
}

END

-- blastdb.asn
--$Id: blastdb.asn 145295 2008-11-10 15:59:38Z camacho $
--
-- Notes:
--
-- taxonomy: an integer is proposed, which would require some sort of 
-- table (or network connection) to do the conversions from integer 
-- to various names.  This could save quite a bit of space for databases 
-- that are predominantly of one organism (e.g., human in htgs).
-- I've proposed here that table contain scientific-, common-, and 
-- blast-names at the advice of Scott Federhen.  Scott also was in 
-- favor of having the complete lineage in the file, but it seems like 
-- this would be seldom used and we could have a view with a link back 
-- to the taxonomy page for anyone needing it. Since one file would 
-- suffice for all blast databases, it seems like this should be a new file.
--
-- memberships: a sequence of integers is proposed.  Each bit of an integer 
-- would indicate membership in some (virtual) blast database (e.g., pdb, 
-- swissprot) or some classification (e.g., mRNA, genomic).
--
-- links: a sequence of integers is proposed.  Each bit of an integer would 
-- indicate a link that could be established based upon the gi of the 
-- database sequence.
--

NCBI-BlastDL DEFINITIONS ::=
BEGIN

EXPORTS Blast-def-line-set, Blast-def-line;
        
IMPORTS Seq-id, Seq-loc FROM NCBI-Seqloc;

Blast-def-line-set ::= SEQUENCE OF Blast-def-line  -- all deflines for an entry

Blast-def-line ::= SEQUENCE {
	title VisibleString OPTIONAL,             -- simple title
	seqid SEQUENCE OF Seq-id,                 -- Regular NCBI Seq-Id
	taxid  INTEGER OPTIONAL,                  -- taxonomy id
	memberships SEQUENCE OF INTEGER OPTIONAL, -- bit arrays
	links SEQUENCE OF INTEGER OPTIONAL,       -- bit arrays
	other-info SEQUENCE OF INTEGER OPTIONAL   -- for future use (probably genomic sequences)
}

-- This defines the possible sequence filtering algorithms to be used in a
-- BLAST database
Blast-filter-program ::= INTEGER {
    not-set             (0),
    dust                (10),
    seg                 (20),
    windowmasker        (30),
    repeat              (40),
    other               (100),
    max                 (255)
}

Blast-mask-list ::= SEQUENCE {
    masks SEQUENCE OF Seq-loc,
    more BOOLEAN
}

Blast-db-mask-info ::= SEQUENCE {
    algo-id      INTEGER,
    algo-program Blast-filter-program,
    algo-options VisibleString,
    masks        Blast-mask-list
}

END


-- blastxml.asn
--$Id: blastxml.asn 120927 2008-02-28 18:57:30Z ucko $

NCBI-BlastOutput DEFINITIONS ::=
BEGIN

BlastOutput ::= SEQUENCE {
	program VisibleString ,		-- BLAST program: blastp, tblastx etc.
	version VisibleString ,		-- Program version 
	reference VisibleString ,	-- Steven, David, Tom and others
	db VisibleString ,		-- BLAST Database name
	query-ID VisibleString ,	-- SeqId of query
	query-def VisibleString ,	-- Definition line of query
	query-len INTEGER ,		-- length of query sequence
	query-seq VisibleString OPTIONAL ,	-- query sequence itself
	param Parameters,		-- search parameters
        iterations SEQUENCE OF Iteration,
        mbstat Statistics OPTIONAL        -- Mega BLAST search statistics
}
Iteration ::= SEQUENCE {
	iter-num INTEGER ,	         -- iteration number
	query-ID VisibleString OPTIONAL, -- SeqId of query
	query-def VisibleString OPTIONAL,-- Definition line of query
	query-len INTEGER OPTIONAL ,	 -- length of query sequence
	hits SEQUENCE OF Hit OPTIONAL,	 -- Hits one for every db sequence
	stat Statistics OPTIONAL,	 -- search statistics            
        message VisibleString OPTIONAL   -- Some (error?) information
}
Parameters ::= SEQUENCE {
	matrix VisibleString OPTIONAL ,	-- Matrix used (-M)
	expect REAL ,			-- Expectation threshold (-e)
	include REAL OPTIONAL ,		-- Inclusion threshold (-h)
	sc-match INTEGER OPTIONAL ,	-- match score for NT (-r)
	sc-mismatch INTEGER OPTIONAL ,	-- mismatch score for NT (-q)
	gap-open INTEGER ,		-- Gap opening cost (-G)
	gap-extend INTEGER ,		-- Gap extension cost (-E)
	filter VisibleString OPTIONAL,  -- Filtering options (-F)
	pattern VisibleString OPTIONAL,	-- PHI-BLAST pattern
        entrez-query VisibleString OPTIONAL -- Limit of request to Entrez query
}

Statistics ::= SEQUENCE {
	db-num INTEGER ,		-- Number of sequences in BLAST db
	db-len BigInt ,	                -- Length of BLAST db
	hsp-len INTEGER ,		-- Effective HSP length
	eff-space REAL,			-- Effective search space
        kappa REAL,                     -- Karlin-Altschul parameter K
        lambda REAL,                    -- Karlin-Altschul parameter Lambda
        entropy REAL                    -- Karlin-Altschul parameter H
}       

Hit ::= SEQUENCE {
	num INTEGER ,			-- hit number
	id VisibleString ,		-- SeqId of subject
	def VisibleString ,		-- definition line of subject
	accession VisibleString ,       -- accession
	len INTEGER ,			-- length of subject
	hsps SEQUENCE OF Hsp OPTIONAL	-- all HSP regions for the given subject
}

Hsp ::= SEQUENCE {
	num INTEGER ,			-- HSP number
	bit-score REAL ,		-- score (in bits) of HSP
	score REAL ,			-- score of HSP
	evalue REAL ,			-- e-value of HSP
	query-from INTEGER ,		-- start of HSP in query
	query-to INTEGER ,		-- end of HSP
	hit-from INTEGER,		-- start of HSP in subject
	hit-to INTEGER ,		-- end of HSP in subject
	pattern-from INTEGER OPTIONAL ,	-- start of PHI-BLAST pattern
	pattern-to INTEGER OPTIONAL ,	-- end of PHI-BLAST pattern
	query-frame INTEGER OPTIONAL ,	-- translation frame of query
	hit-frame INTEGER OPTIONAL ,	-- translation frame of subject
	identity INTEGER OPTIONAL ,	-- number of identities in HSP
	positive INTEGER OPTIONAL ,	-- number of positives in HSP
	gaps INTEGER OPTIONAL ,		-- number of gaps in HSP
	align-len INTEGER OPTIONAL ,	-- length of the alignment used
	density INTEGER OPTIONAL ,	-- score density
	qseq VisibleString ,		-- alignment string for the query (with gaps)
	hseq VisibleString,		-- alignment string for subject (with gaps)
	midline VisibleString OPTIONAL	-- formating middle line
}

END

-- cdd.asn
--$Revision: 150675 $
--**********************************************************************
--
--  Definitions for CDD's 
--
--  NCBI Structure Group
--
--  National Center for Biotechnology Information
--  National Institutes of Health
--  Bethesda, MD 20894 USA
--
--  October 1999
--
--  asntool -m cdd.asn -w 100 -o cdd.h
--  asntool -B objcdd -m cdd.asn -G -w 100 -I objseq.h objsset.h -K cdd.h \
--          -M asn.all
--**********************************************************************

NCBI-Cdd DEFINITIONS ::=
-- NCBI Conserved Domain Definition


BEGIN

EXPORTS  Cdd-id, Cdd-id-set, Cdd, Cdd-set, Cdd-tree, Cdd-tree-set, Cdd-pref-nodes, Cdd-Project;

IMPORTS  Date                    FROM NCBI-General
         Pub                     FROM NCBI-Pub
         Biostruc-annot-set      FROM MMDB
         Bioseq                  FROM NCBI-Sequence
         Seq-annot               FROM NCBI-Sequence
         Seq-entry               FROM NCBI-Seqset
         Org-ref                 FROM NCBI-Organism
         Seq-id                  FROM NCBI-Seqloc
         Seq-interval            FROM NCBI-Seqloc
         Seq-loc                 FROM NCBI-Seqloc
         Seq-feat                FROM NCBI-Seqfeat
         Score-set               FROM NCBI-Seqalign
         Cn3d-style-dictionary,
         Cn3d-user-annotations   FROM NCBI-Cn3d
         PssmWithParameters      FROM NCBI-ScoreMat;
         
-- dealing with lists of preferred tax-nodes 

Cdd-org-ref ::= SEQUENCE {
  reference     Org-ref,
  active        BOOLEAN DEFAULT TRUE,
  parent-tax-id INTEGER OPTIONAL,
  rank          VisibleString OPTIONAL
}
Cdd-org-ref-set ::= SET OF Cdd-org-ref

Cdd-pref-node-descr ::= CHOICE {
  create-date      Date,
  description      VisibleString
}

Cdd-pref-node-descr-set ::= SET OF Cdd-pref-node-descr

Cdd-pref-nodes ::= SEQUENCE {
   preferred-nodes Cdd-org-ref-set,
   model-organisms Cdd-org-ref-set OPTIONAL,
   optional-nodes  Cdd-org-ref-set OPTIONAL,
   description     Cdd-pref-node-descr-set OPTIONAL
}        

-- Cdd's should not exist without a unique accession, but alternative id's may
-- be present as well. It is conceivable that a CD which is created as a merged
-- product of two highly redundant CDs will retain the source ids in addition 
-- to its new unique id

Global-id ::= SEQUENCE {
  accession     VisibleString,          -- SMART, Pfam, LOAD or CD accession
  release       VisibleString OPTIONAL, -- to hold CD-Database release number
                                        -- if desired, currently not used
  version       INTEGER       OPTIONAL, -- version 0 is the seed, version
                                        -- numbers increase with update/curate
                                        -- cycles
  database      VisibleString OPTIONAL  -- this is NOT the source!, rather the
}                                       -- database the object resides in
                                        -- currently not in use

Cdd-id ::= CHOICE {
  uid           INTEGER,                -- for synchronization with Entrez
                                        -- holds PSSM-Ids
  gid           Global-id               -- holds accession/version pairs
}

Cdd-id-set ::= SEQUENCE OF Cdd-id

Cdd-repeat ::= SEQUENCE {               -- record whether the CD contains
                                        -- repeated sequence/structure motifs
  count         INTEGER,                -- number of tandem repeats in the CD
  location      Seq-loc OPTIONAL,       -- location on the representative
  avglen        INTEGER OPTIONAL        -- average repeat length
}


Cdd-book-ref ::= SEQUENCE {             -- record a link to Entrez Books
  bookname      VisibleString,          -- abbreviated book title
  textelement   ENUMERATED { unassigned(0),   -- type of element 
                             section(1),      -- a section or paragraph
                             figgrp(2),       -- a figure or set of figures
                             table(3),        -- a table
                             chapter(4),      -- a whole chapter
                             biblist(5),      -- a lisf of references
                             box(6),          -- an inserted box
                             glossary(7),     -- glossary
                             appendix(8),     -- appendix
                             other(255) },
  elementid     INTEGER OPTIONAL,       -- numerical address of the text-element
  subelementid  INTEGER OPTIONAL,       -- exact address, used with section
  celementid    VisibleString OPTIONAL, -- address of the text element, if character string
  csubelementid VisibleString OPTIONAL  -- exact address, if character string

}

-- The description of CDD's refers to the specific set of aligned sequences,
-- the region that is being aligned and the information contained in the
-- alignment. It may contain a lengthy comment
-- describing the function of the domain as well as its origin and all
-- other anecdotal information that can't be pressed into a rigid scheme.
-- Crosslinks to reference papers available in PubMed are possible as well.
-- There can be as many of these as you want in the CDD.

Cdd-descr ::= CHOICE {
  othername     VisibleString,          -- alternative names for the CDD
                                        -- if domain has several common names
  category      VisibleString,          -- intracellular, extracellular, etc.
                                        -- to record spatial and/or temporal
                                        -- expression in free-text format
  comment       VisibleString,          -- this is where descriptions go
  reference     Pub,                    -- a citation describing the domain
  create-date   Date,                   -- Date of first creation/dump
  tax-source    Org-ref,                -- holds the highest common tax node
  source        VisibleString,          -- the database the seeds were created
                                        -- from, e.g. SMART, PFAM, etc..
  status        INTEGER { unassigned(0),
                          finished-ok(1),     -- a public curated CD
                          pending-release(2), -- needs work done, not yet released
                          other-asis(3),      -- imported as-is, immediate release
                          matrix-only(4),     -- CD holds a Psi-Blast PSSM only,
                                              -- does not contain alignment data
                          update-running(5),  -- has been flagged for
                                              -- update (in queue)
                          auto-updated(6),    -- update finished, no
                                              -- work necessary
                          claimed(7),         -- is earmarked for curation
                          curated-complete(8),-- public curated member of a
                                              -- completed family
                          other(255) },       -- for CD production?
  update-date   Date,                         -- Date of last version change
  scrapbook     SEQUENCE OF VisibleString,    -- for storing curation notes
                                              -- those won't make it into public
                                              -- distributions
  source-id     Cdd-id-set,                   -- for linking back to source db
  repeats       Cdd-repeat,                   -- to record repeat counts
  old-root      Cdd-id-set,                   -- to record short-term history
  curation-status INTEGER { unassigned(0),    -- to record curation status
                            prein (1),        -- when CD is checked out from
                            ofc (2),          -- the tracking database, for 
                            iac (3),          -- use within curation software
                            ofv1 (4),
                            iav1 (5),
                            ofv2 (6),
                            iav2 (7),
                            postin (8),
                            other (255) },
  readonly-status INTEGER { unassigned(0),    -- to record read-only status
                            readonly (1),     -- when CD is checked out from
                            readwrite (2),    -- the tracking database, for
                            other (255) },    -- use within curation software
  book-ref      Cdd-book-ref,                 -- links to Entrez/books
  attribution   Pub,                          -- add citations and/or author names
  title         VisibleString                 -- hold short descriptive text
}

Cdd-descr-set ::= SET OF Cdd-descr

-- the Cdd-tree stores the hierarchy of CDDs. These objects are stored separate
-- from the CDs to allow for fast retrieval and use as an 'index' into CDs
-- all the components in a CD-tree match components in the full-sized CD
-- and should be synchronized

Cdd-tree ::= SEQUENCE {
  name          VisibleString,          -- short name  copied from CD
  id            Cdd-id-set,             -- IDs         copied from CD
  description   Cdd-descr-set OPTIONAL, -- description copied from CD
  parent        Cdd-id     OPTIONAL,    -- CD is the result of a split/merge
  children      Cdd-id-set OPTIONAL,    -- this CD has been split
  siblings      Cdd-id-set OPTIONAL,    -- related CDs (have common hits)
  neighbors     Cdd-id-set OPTIONAL     -- co-occurring CDs (non-overlapping 
                                        -- hits to same sequences)
}

Cdd-tree-set ::= SEQUENCE OF Cdd-tree

-- Matrix definitions, these are supposed to store PSSMs and corresponding 
-- matrices of relative residue frequencies.
-- the number of columns and rows is listed explicitly, values in columns
-- are stored column by column, i.e. in groups of nrows values for each column

Matrix ::= SEQUENCE {
  ncolumns      INTEGER,
  nrows         INTEGER,
  row-labels    SEQUENCE OF VisibleString OPTIONAL,
  scale-factor  INTEGER,
  columns       SEQUENCE OF INTEGER
}

-- definition for matrix of pairwise "distances", stored as the upper 
-- triangle of a squared n x n matrix (excluding the diagonal), this is
-- supposed to store pairwise percentages of identical residues, pairwise
-- alignment scores or E-values from pairwise BLAST sequence comparisons

Triangle ::= SEQUENCE {
  nelements     INTEGER,
  scores        Score-set OPTIONAL,
  div-ranks     SEQUENCE OF INTEGER OPTIONAL
}

-- Update-align is supposed to contain alignments that still need some work
-- done to fit into the CD-proper alignment. These originate from the
-- CD update process (generated by Blast, for example) or may be created in
-- an editing session to save its state

Update-comment ::= CHOICE {
  comment       VisibleString,          -- free text to describe nature of
                                        -- Update-align
  addthis       Seq-loc,                -- suggestion for inclusion in the CD
                                        -- without corresponding alignment
  replaces      Seq-loc,                -- if one or several alignment rows are
                                        -- to be replaced by the Update-align
  reject-loc    Seq-loc,                -- if used with Reject-id, specify a
                                        -- location on a sequence which should
                                        -- not be used
  reference     Pub                     -- if update alignment imported from
                                        -- citation and for whenever it seems
                                        -- necessary to cite
}

-- Both fields are optional, as the Update-align may be a Seq-annot without
-- description, or a suggestion to add a sequence without the corresponding
-- alignment

Update-align ::= SEQUENCE {
  description   SEQUENCE OF Update-comment OPTIONAL,  
  seqannot      Seq-annot OPTIONAL,     -- contains the SeqAlign
  type          INTEGER { unassigned(0),
                          update(1),
                          update-3d(2),
                          demoted(51),
                          demoted-3d(52),
                          other(255)}
}

Reject-id ::= SEQUENCE {
  description   SEQUENCE OF Update-comment OPTIONAL,
  ids           SET OF Seq-id
}

Feature-evidence ::= CHOICE {
  comment       VisibleString,          -- so we can spell out what doesn't
                                        -- fit in any other category
  reference     Pub,                    -- evidence via a literature reference
  bsannot       Biostruc-annot-set,     -- evidence via Biostruc-features, such
                                        -- as structure superpositions 
  seqfeat       Seq-feat,               -- evidence is a Sequence feature found
                                        -- elsewhere
  book-ref      Cdd-book-ref            -- evidence is a book chapter or figure
}

Align-annot ::= SEQUENCE {
  location      Seq-loc,                -- points to a location in one of the
                                        -- aligned sequences, usually the
                                        -- master/representative
  description   VisibleString OPTIONAL, -- to hold descriptions/names like
                                        -- "Heme binding site" or "catalytic
                                        -- triad" etc., something that should
                                        -- be used for labels in visualization
  evidence      SEQUENCE OF Feature-evidence OPTIONAL,  -- evidence we can
                                                        -- compute with
  type          INTEGER OPTIONAL        -- for typing annotated features
}

Align-annot-set ::= SEQUENCE OF Align-annot

-- the Domain-parent records an evolutionary relationship which may not be
-- as simple as a classical parent-child relationship in a typical hierarchy,
-- i.e. where a CD is merely a specific subgroup ("child") of a more general
-- diverse alignment model ("parent"). A CD alignment model may be the result
-- of an ancient fusion event, combining two or more domains into a bigger unit
-- which has subsequently undergone a divergent evolutionary process similar to
-- what may have happened to a single "domain". A CD alignment model may 
-- also reflect the result of a deletion event, where a specific subgroup
-- lacks part of a (set of) domain(s), but where the part present is found to
-- be highly similar to a putative "parent", with some added evidence for
-- an actual deletion, like from the distribution of truncated copies in phylogenetic
-- lineages. Deletion events which affect different parts of a set of
-- duplicated domain architectures may be indistinguishable from actual
-- fission events, which means that we may want to represent the latter as
-- deletions after duplication and do not need a special case for fissions.

Domain-parent ::= SEQUENCE {

  parent-type    INTEGER { classical           (0), -- the classification of parent child relations
                           fusion              (1),
                           deletion            (2),
                           permutation         (3),
                           other               (255) },
  parentid       Cdd-id,                -- identify the section parent by accession
  seqannot       Seq-annot OPTIONAL     -- contains the sequence alignment linking
                                        -- CD alignment models, should align the 
                                        -- masters/representatives of each CD
}


-- record sequence trees generated by a suitable algorithm.

Sequence-tree ::= SEQUENCE {
  cdAccession    VisibleString OPTIONAL,
  algorithm      Algorithm-type,
  isAnnotated    BOOLEAN DEFAULT FALSE,
  root           SeqTree-node
}

SeqTree-node ::= SEQUENCE {
  isAnnotated    BOOLEAN DEFAULT FALSE,
  name           VisibleString           OPTIONAL,
  distance       REAL                    OPTIONAL,
  children       CHOICE {
    children SEQUENCE OF SeqTree-node,
    footprint SEQUENCE {
      seqRange   Seq-interval,
      rowId      INTEGER OPTIONAL
    }
  },
  annotation     Node-annotation         OPTIONAL
}

Algorithm-type ::= SEQUENCE {
  scoring-Scheme    INTEGER { unassigned           (0),
                              percent-id           (1),
                              kimura-corrected     (2),
                              aligned-score        (3),
                              aligned-score-ext    (4),
                              aligned-score-filled (5),
                              blast-footprint      (6),
                              blast-full           (7),
			      hybrid-aligned-score (8),
                              other           (255) },
  clustering-Method INTEGER { unassigned             (0),
                              single-linkage         (1),
                              neighbor-joining       (2),
                              fast-minimum-evolution (3),
                              other                (255) },
  score-Matrix      INTEGER { unassigned (0),
                              blosum45   (1),
                              blosum62   (2),
                              blosum80   (3),
                              pam30      (4),
                              pam70      (5),
                              pam250     (6),
                              other    (255) } OPTIONAL,
  gapOpen           INTEGER OPTIONAL,
  gapExtend         INTEGER OPTIONAL,
  gapScaleFactor    INTEGER OPTIONAL,
  nTerminalExt      INTEGER OPTIONAL,
  cTerminalExt      INTEGER OPTIONAL,
  tree-scope        INTEGER { allDescendants       (0),
		                  immediateChildrenOnly(1),
		                  selfOnly             (2),
		                  other              (255) } OPTIONAL,
  coloring-scope    INTEGER { allDescendants        (0),
		                  immediateChildrenOnly (1),
		                  other               (255) } OPTIONAL
}

Node-annotation ::= SEQUENCE {
  presentInChildCD VisibleString OPTIONAL,
  note             VisibleString OPTIONAL
}

-- the Cdd is the basic ASN.1 object storing an annotated and curated set of
-- alignments (formulated as a set of pairwise master-slave alignments). 
-- The alignment data are contained in Seq-annots, and a special type of
-- object, the Update-align, contains additional alignment data from unfinished
-- editing sessions and update processes. The Biostruc-annot-set holds 
-- structure superposition information for multiple structure-derived rows in
-- the alignment.
-- Version numbers in Global-ids are meant to be updated every time the Cdd is
-- changed in a way that does not require Global-ids to be changed (sequences
-- added in update cycle, annotation changed, alignment errors fixed)

Cdd ::= SEQUENCE {
  name          VisibleString,          -- a short name (can be the accession..)
  id            Cdd-id-set,             -- this CD's Ids
  description   Cdd-descr-set OPTIONAL, -- status, references, etc.
  seqannot      SEQUENCE OF Seq-annot    OPTIONAL,  -- contains the CD alignment
  features      Biostruc-annot-set       OPTIONAL,  -- contains structure
                                                    -- alignment data
                                                    -- or "core" definitions
  sequences     Seq-entry     OPTIONAL, -- store as bioseq-set inside seq-entry
  profile-range Seq-interval  OPTIONAL, -- profile for this region only
                                        -- also stores the Seq-id of the master
  trunc-master  Bioseq        OPTIONAL, -- holds the truncated master, which
                                        -- may be something like a consensus,
                                        -- uses the same sequence coordinate
                                        -- frame as the profile-range
  posfreq       Matrix        OPTIONAL, -- relative residue frequencies
  scoremat      Matrix        OPTIONAL, -- Position dependent score matrix
  distance      Triangle      OPTIONAL, -- pairwise distances for all seqs.
  parent        Cdd-id        OPTIONAL, -- this CD is the result of a split
  children      Cdd-id-set    OPTIONAL, -- this CD has been split, not used
  siblings      Cdd-id-set    OPTIONAL, -- related CDs (common hits), clusters
  neighbors     Cdd-id-set    OPTIONAL, -- co-occurring CDs, not used
  pending       SEQUENCE OF Update-align OPTIONAL,  -- contains alignments from
                                                    -- update or "lower panel"
  rejects       SEQUENCE OF Reject-id    OPTIONAL,  -- SeqIds of rejected CD-
                                                    -- members, ignore in update
  master3d      SET OF Seq-id OPTIONAL, -- record if CD has a 3D representative
  alignannot    Align-annot-set OPTIONAL,           -- alignment annotation
  style-dictionary Cn3d-style-dictionary OPTIONAL,  -- record rendering styles
  user-annotations Cn3d-user-annotations OPTIONAL,  -- user annotations in Cn3D
  ancestors     SEQUENCE OF Domain-parent OPTIONAL, -- list of parents
  scoreparams   PssmWithParameters       OPTIONAL,
  seqtree       Sequence-tree            OPTIONAL
}

Cdd-set ::= SET OF Cdd


-- Cdd projects store a set of CDs, typically related to each other
-- relationships would be specified using the ancestors fields in the
-- individual CD objects. For use with CD-Tree, a program to visualize
-- curated CD hierarchies and evidence for hierarchical family structures.

Cdd-Viewer-Rect ::= SEQUENCE {
  top           INTEGER,           -- top coordinate
  left          INTEGER,           -- left  coordinate
  width         INTEGER,           -- width 
  height        INTEGER            -- height
}

Cdd-Viewer ::= SEQUENCE {
  ctrl          INTEGER {                   -- viewer type
                  unassigned          (0),
		  cd-info             (1),
		  align-annot         (2),
		  seq-list            (3),
		  seq-tree            (4),
		  merge-preview       (5),
		  cross-hits          (6),
		  notes               (7),
		  tax-tree            (8),
		  dart                (9),
		  dart-selected-rows (10),
		  other (255)
                },
  rect          Cdd-Viewer-Rect OPTIONAL,  -- viewer rectangle
  accessions    SEQUENCE OF VisibleString  -- list of accessions associated with a viewer
}

Cdd-Script ::= SEQUENCE {
  type          INTEGER {
                  unassigned (0),
                  user-recorded (1),
		  server-generated (2),
                  other (255)
                } OPTIONAL,
  name          VisibleString OPTIONAL,   -- user assigned name/description
  commands      VisibleString             -- actual script commands
}


-- cd colors are as:  0000FF for red, 00FF00 for green, FF0000 for blue

Cdd-Project ::= SEQUENCE {
  cds           SEQUENCE OF Cdd ,         -- cds
  cdcolor       SEQUENCE OF INTEGER,      -- colors  
  viewers       SEQUENCE OF Cdd-Viewer,   -- Sequence viewers
  log           VisibleString,            -- log
  scripts       SEQUENCE OF Cdd-Script OPTIONAL    -- command scripts
}

END

-- cn3d.asn
--$Revision: 1.15 $
--**********************************************************************
--
--  Definitions for Cn3D-specific data (rendering settings,
--    user annotations, etc.)
--
--  by Paul Thiessen
--
--  National Center for Biotechnology Information
--  National Institutes of Health
--  Bethesda, MD 20894 USA
--
-- asntool -m cn3d.asn -w 100 -o cn3d.h
-- asntool -B objcn3d -m cn3d.asn -G -w 100 -K cn3d.h -I mapcn3d.h \
--   -M ../mmdb1.asn,../mmdb2.asn,../mmdb3.asn
--**********************************************************************

NCBI-Cn3d DEFINITIONS ::=
-- Cn3D-specific information

BEGIN

EXPORTS  Cn3d-style-dictionary, Cn3d-user-annotations;

IMPORTS  Biostruc-id FROM MMDB
         Molecule-id, Residue-id FROM MMDB-Chemical-graph;


-- values of enumerations must match those in cn3d/style_manager.hpp!

Cn3d-backbone-type ::= ENUMERATED {     -- for different types of backbones
    off (1),
    trace (2),
    partial (3),
    complete (4)
}

Cn3d-drawing-style ::= ENUMERATED {     -- atom/bond/object rendering styles
    -- for atoms and bonds
    wire (1),
    tubes (2),
    ball-and-stick (3),
    space-fill (4),
    wire-worm (5),
    tube-worm (6),
    -- for 3d-objects
    with-arrows (7),
    without-arrows (8)
}

Cn3d-color-scheme ::= ENUMERATED {  -- available color schemes (not all
                                    -- necessarily applicable to all objects)
    element (1),
    object (2),
    molecule (3),
    domain (4),
    residue (20),
    secondary-structure (5),
    user-select (6),
    -- different alignment conservation coloring (currently only for proteins)
    aligned (7),
    identity (8),
    variety (9),
    weighted-variety (10),
    information-content (11),
    fit (12),
    block-fit (17),
    block-z-fit (18),
    block-row-fit (19),
    -- other schemes
    temperature (13),
    hydrophobicity (14),
    charge (15),
    rainbow (16)
}

-- RGB triplet, interpreted (after division by the scale-factor) as floating
-- point values which should range from [0..1]. The default scale-factor is
-- 255, so that one can conveniently set integer byte values [0..255] for
-- colors with the scale-factor already set appropriately to map to [0..1].
--    An alpha value is allowed, but is currently ignored by Cn3D.
Cn3d-color ::= SEQUENCE {
    scale-factor INTEGER DEFAULT 255,
    red INTEGER,
    green INTEGER,
    blue INTEGER,
    alpha INTEGER DEFAULT 255
}

Cn3d-backbone-style ::= SEQUENCE {  -- style blob for backbones only
    type Cn3d-backbone-type,
    style Cn3d-drawing-style,
    color-scheme Cn3d-color-scheme,
    user-color Cn3d-color
}

Cn3d-general-style ::= SEQUENCE {   -- style blob for other objects
    is-on BOOLEAN,
    style Cn3d-drawing-style,
    color-scheme Cn3d-color-scheme,
    user-color Cn3d-color
}

Cn3d-backbone-label-style ::= SEQUENCE { -- style blob for backbone labels
    spacing INTEGER,        -- zero means none
    type ENUMERATED {
        one-letter (1),
        three-letter (2)
    },
    number ENUMERATED {
        none (0),
        sequential (1),     -- from 1, by residues present, to match sequence
        pdb (2)             -- use number assigned by PDB
    },
    termini BOOLEAN,
    white BOOLEAN           -- all white, or (if false) color of alpha carbon
}

-- rendering settings for Cn3D (mirrors StyleSettings class)
Cn3d-style-settings ::= SEQUENCE {
    name VisibleString OPTIONAL,                -- a name (for favorites)
    protein-backbone Cn3d-backbone-style,       -- backbone styles
    nucleotide-backbone Cn3d-backbone-style,
    protein-sidechains Cn3d-general-style,      -- styles for other stuff
    nucleotide-sidechains Cn3d-general-style,
    heterogens Cn3d-general-style,
    solvents Cn3d-general-style,
    connections Cn3d-general-style,
    helix-objects Cn3d-general-style,
    strand-objects Cn3d-general-style,
    virtual-disulfides-on BOOLEAN,              -- virtual disulfides
    virtual-disulfide-color Cn3d-color,
    hydrogens-on BOOLEAN,                       -- hydrogens
    background-color Cn3d-color,                -- background
    -- floating point parameters - scale-factor applies to all the following:
    scale-factor INTEGER,
    space-fill-proportion INTEGER,
    ball-radius INTEGER,
    stick-radius INTEGER,
    tube-radius INTEGER,
    tube-worm-radius INTEGER,
    helix-radius INTEGER,
    strand-width INTEGER,
    strand-thickness INTEGER,
    -- backbone labels (no labels if not present)
    protein-labels Cn3d-backbone-label-style OPTIONAL,
    nucleotide-labels Cn3d-backbone-label-style OPTIONAL,
    -- ion labels
    ion-labels BOOLEAN OPTIONAL
}

Cn3d-style-settings-set ::= SET OF Cn3d-style-settings

Cn3d-style-table-id ::= INTEGER

Cn3d-style-table-item ::= SEQUENCE {
    id Cn3d-style-table-id,
    style Cn3d-style-settings
}

-- the global settings, and a lookup table of styles for user annotations.
Cn3d-style-dictionary ::= SEQUENCE {
    global-style Cn3d-style-settings,
    style-table SEQUENCE OF Cn3d-style-table-item OPTIONAL
}

-- a range of residues in a chain, identified by MMDB residue-id
-- (e.g., numbered from 1)
Cn3d-residue-range ::= SEQUENCE {
    from Residue-id,
    to Residue-id
}

-- set of locations on a particular chain
Cn3d-molecule-location ::= SEQUENCE {
    molecule-id Molecule-id,    -- MMDB molecule id
    -- which residues; whole molecule implied if absent
    residues SEQUENCE OF Cn3d-residue-range OPTIONAL
}

-- set of locations on a particular structure object (e.g., a PDB/MMDB
-- structure), which may include multiple ranges of residues each on
-- multiple chains.
Cn3d-object-location ::= SEQUENCE {
    structure-id Biostruc-id,
    residues SEQUENCE OF Cn3d-molecule-location
}

-- information for an individual user annotation
Cn3d-user-annotation ::= SEQUENCE {
    name VisibleString,                 -- a (short) name for this annotation
    description VisibleString OPTIONAL, -- an optional longer description
    style-id Cn3d-style-table-id,       -- how to draw this annotation
    residues SEQUENCE OF Cn3d-object-location,  -- which residues to cover
    is-on BOOLEAN   -- whether this annotation is to be turned on in Cn3D
}

-- a GL-ordered transformation matrix
Cn3d-GL-matrix ::= SEQUENCE {
    m0  REAL, m1  REAL, m2  REAL, m3  REAL,
    m4  REAL, m5  REAL, m6  REAL, m7  REAL,
    m8  REAL, m9  REAL, m10 REAL, m11 REAL,
    m12 REAL, m13 REAL, m14 REAL, m15 REAL
}

-- a floating point 3d vector
Cn3d-vector ::= SEQUENCE {
    x REAL,
    y REAL,
    z REAL
}

-- parameters used to set up the camera in Cn3D
Cn3d-view-settings ::= SEQUENCE {
    camera-distance REAL,       -- camera on +Z axis this distance from origin
    camera-angle-rad REAL,      -- camera angle
    camera-look-at-X REAL,      -- X,Y of point in Z=0 plane camera points at
    camera-look-at-Y REAL,
    camera-clip-near REAL,      -- distance of clipping planes from camera
    camera-clip-far REAL,
    matrix Cn3d-GL-matrix,      -- transformation of objects in the scene
    rotation-center Cn3d-vector -- center of rotation of whole scene
}

-- The list of annotations for a given CDD/mime. If residue regions overlap
-- between annotations that are turned on, the last annotation in this list
-- that contains these residues will be used as the display style for these
-- residues.
--   Also contains the current viewpoint, so that user's camera angle
-- can be stored and reproduced, for illustrations, on-line figures, etc.
Cn3d-user-annotations ::= SEQUENCE {
    annotations SEQUENCE OF Cn3d-user-annotation OPTIONAL,
    view Cn3d-view-settings OPTIONAL
}

END


-- docsum.asn
-- ============================================
-- ::DATATOOL:: Generated from "docsum_3.0.xsd"
-- ::DATATOOL:: by application DATATOOL version 1.8.6
-- ::DATATOOL:: on 05/02/2008 10:59:28
-- ============================================

-- edited with XMLSPY v5 rel. 4 U (http://www.xmlspy.com) by Michael Kholodov (National Library of Medicine) 
-- edited with XMLSpy v2005 rel. 3 U (http://www.altova.com) by Michael Feolo (NCBI/NLM/NIH) 
Docsum-3-0 DEFINITIONS ::=
BEGIN

Assay ::= SEQUENCE {
  attlist SET {
    handle VisibleString OPTIONAL,
    batch VisibleString OPTIONAL,
    batchId INTEGER OPTIONAL,
    batchType ENUMERATED {
      snpassay (1),
      validation (2),
      doublehit (3)
    } OPTIONAL,
    molType ENUMERATED {
      genomic (1),
      cDNA (2),
      mito (3),
      chloro (4)
    } OPTIONAL,
    sampleSize INTEGER OPTIONAL,
    population VisibleString OPTIONAL,
    linkoutUrl VisibleString OPTIONAL
  },
  method SEQUENCE {
    eMethod SEQUENCE {
      attlist SET {
        name VisibleString OPTIONAL, --Submitters method identifier
        id VisibleString OPTIONAL --dbSNP method identifier
      },
      exception VisibleString --description of deviation from/addition to given method
    } OPTIONAL
  },
  taxonomy SEQUENCE {
    attlist SET {
      id INTEGER, --NCBI taxonomy ID for variation
      organism VisibleString OPTIONAL
    },
    taxonomy NULL
  },
  strains SEQUENCE OF VisibleString OPTIONAL,
  comment VisibleString OPTIONAL,
  citation SEQUENCE OF VisibleString OPTIONAL
}

--A collection of genome sequence records (curated gene regions (NG's), contigs (NWNT's)  and chromosomes (NC/AC's) produced by a genome sequence project. Structure is populated from ContigInfo tables.
Assembly ::= SEQUENCE {
  attlist SET {
    dbSnpBuild INTEGER, --dbSNP build number defining the rsid set aligned to this assembly
    genomeBuild VisibleString, --assembly build number with possible 'subbuild' version numbers to reflect updates in gene annotation (human e.g. 34_3, 35_1, 36_1)
    groupLabel VisibleString OPTIONAL, --High-level classification of the assembly to distinguish reference projects from alternate solutions. GroupLabel field from organism/build-specific ContigInfo tables. "reference" is occasionally used as the preferred assembly; standards will converge as additional organism genome projects are finished. Note that some organism assembly names include extended characters like '~' and '/' that may be incompatible with OS filename conventions.
    assemblySource VisibleString OPTIONAL, --Name of the group(s) or organization(s) that generated the assembly
    current BOOLEAN OPTIONAL, --Marks the current genomic assembly
    reference BOOLEAN OPTIONAL
  },
  component SEQUENCE OF Component OPTIONAL,
  snpStat SEQUENCE {
    attlist SET {
      mapWeight ENUMERATED {
        unmapped (1),
        unique-in-contig (2),
        two-hits-in-contig (3),
        less-10-hits (4),
        multiple-hits (5)
      }, --summary measure of placement precision in the assembly
      chromCount INTEGER OPTIONAL, --number of distinct chromosomes in the mapset
      placedContigCount INTEGER OPTIONAL, --number of distinct contigs [ gi | accession[.version] ] in the mapset
      unplacedContigCount INTEGER OPTIONAL, --number of sequence postions to a contig with unknown chromosomal assignment
      seqlocCount INTEGER OPTIONAL, --total number of sequence positions in the mapset
      hapCount INTEGER OPTIONAL --Number of hits to alternative genomic haplotypes (e.g. HLA DR region, KIR, or pseudo-autosomal regions like PAR) within the assembly mapset. Note that positions on haplotypes defined in other assemblies (a different assembly_group_label value) will not be counted in this value.
    },
    snpStat NULL
  }
}

--URL value from dbSNP_main.BaseURL links table. attributes provide context information and URL id that is referenced within individual refSNP objects.
BaseURL ::= SEQUENCE {
  attlist SET {
    urlId INTEGER OPTIONAL, --Resource identifier from dbSNP_main.baseURL.
    resourceName VisibleString OPTIONAL, --Name of linked resource
    resourceId VisibleString OPTIONAL --identifier expected by resource for URL
  },
  
  --URL value from dbSNP_main.BaseURL links table. attributes provide context information and URL id that is referenced within individual refSNP objects.
  baseURL VisibleString
}

Component ::= SEQUENCE {
  attlist SET {
    componentType ENUMERATED {
      contig (1),
      mrna (2)
    } OPTIONAL, --type of component: chromosome, contig, gene_region, etc.
    ctgId INTEGER OPTIONAL, --dbSNP contig_id used to join on contig hit / mapset data to these assembly properties
    accession VisibleString OPTIONAL, --Accession[.version] for the sequence component
    name VisibleString OPTIONAL, --contig name defined as either a submitter local id, element of a whole genome assembly set, or internal NCBI local id
    chromosome VisibleString OPTIONAL, --Organism appropriate chromosome tag, 'Un' reserved for default case of unplaced components
    start INTEGER OPTIONAL, --component starting position on the chromosome (base 0 inclusive)
    end INTEGER OPTIONAL, --component ending position on the chromosome (base 0 inclusive)
    orientation ENUMERATED {
      fwd (1),
      rev (2),
      unknown (3)
    } OPTIONAL, --orientation of this component to chromosome, forward (fwd) = 0, reverse (rev) = 1, unknown = NULL in ContigInfo.orient.
    gi VisibleString OPTIONAL, --NCBI gi for component sequence (equivalent to accession.version) for nucleotide sequence.
    groupTerm VisibleString OPTIONAL, --Identifier label for the genome assembly that defines the contigs in this mapset and their placement within the organism genome. 
    contigLabel VisibleString OPTIONAL --Display label for component
  },
  mapLoc SEQUENCE OF MapLoc
}

--Set of dbSNP refSNP docsums
ExchangeSet ::= SEQUENCE {
  attlist SET {
    setType VisibleString OPTIONAL, --set-type: full dump; from query; single refSNP
    setDepth VisibleString OPTIONAL, --content depth: brief XML (only refSNP properties and summary subSNP element content); full XML (full refSNP, full subSNP content; all flanking sequences)
    specVersion VisibleString OPTIONAL, --version number of docsum.asn/docsum.dtd specification
    dbSnpBuild INTEGER OPTIONAL, --build number of database for this export
    generated VisibleString OPTIONAL --Generated date
  },
  sourceDatabase SEQUENCE {
    attlist SET {
      taxId INTEGER, --NCBI taxonomy ID for variation
      organism VisibleString, --common name for species used as part of database name.
      dbSnpOrgAbbr VisibleString OPTIONAL, --organism abbreviation used in dbSNP. 
      gpipeOrgAbbr VisibleString OPTIONAL --organism abbreviation used within NCBI genome pipeline data dumps.
    },
    sourceDatabase NULL
  },
  rs SEQUENCE OF Rs OPTIONAL,
  assay Assay OPTIONAL,
  query SEQUENCE {
    attlist SET {
      date VisibleString OPTIONAL, --yyyy-mm-dd
      string VisibleString OPTIONAL --Query terms or search constraints
    },
    query NULL
  } OPTIONAL,
  summary SEQUENCE {
    attlist SET {
      numRsIds INTEGER OPTIONAL, --Total number of refsnp-ids in this exchange set
      totalSeqLength INTEGER OPTIONAL, --Total length of exemplar flanking sequences
      numContigHits INTEGER OPTIONAL, --Total number of contig locations from SNPContigLoc
      numGeneHits INTEGER OPTIONAL, --Total number of locus ids from SNPContigLocusId
      numGiHits INTEGER OPTIONAL, --Total number of gi hits from MapLink
      num3dStructs INTEGER OPTIONAL, --Total number of 3D structures from SNP3D
      numAlleleFreqs INTEGER OPTIONAL, --Total number of allele frequences from SubPopAllele
      numStsHits INTEGER OPTIONAL, --Total number of STS hits from SnpInSts
      numUnigeneCids INTEGER OPTIONAL --Total number of unigene cluster ids from UnigeneSnp
    },
    summary NULL
  },
  baseURL SEQUENCE OF BaseURL
}

--functional relationship of SNP (and possibly alleles) to genes at contig location as defined in organism-specific bxxx_SNPContigLocusId_xxx tables.
FxnSet ::= SEQUENCE {
  attlist SET {
    geneId INTEGER OPTIONAL, --gene-id of gene as aligned to contig
    symbol VisibleString OPTIONAL, --symbol (official if present in Entrez Gene) of gene
    mrnaAcc VisibleString OPTIONAL, --mRNA accession if variation in transcript
    mrnaVer INTEGER OPTIONAL, --mRNA sequence version if variation is in transcripot
    protAcc VisibleString OPTIONAL, --protein accession if variation in protein
    protVer INTEGER OPTIONAL, --protein version if variation is in protein
    
    --variation in region of gene, but not in transcript - deprecated
    --								synonymous change	
    --								nonsynonymous change - deprecated
    --								untranslated region - deprecated
    --								splice-site - deprecated
    --								contig reference
    --								deprecated
    --									coding: synonymy unknown
    --									In gene segment with null mrna and protein. ex. IGLV4-69. geneId=28784
    --									within 3' 0.5kb to a gene.
    --									changes to STOP codon.
    --									alters codon to make an altered amino acid in protein product.
    --									indel snp causing frameshift.
    --									3 prime untranslated region
    --									5 prime untranslated region
    --									3 prime acceptor dinucleotide
    --									5 prime donor dinucleotide
    fxnClass ENUMERATED {
      locus-region (1),
      coding-unknown (2),
      coding-synonymous (3),
      coding-nonsynonymous (4),
      mrna-utr (5),
      intron (6),
      splice-site (7),
      reference (8),
      coding-exception (9),
      synonymy-unknown (10),
      gene-segment (11),
      near-gene-3 (12),
      near-gene-5 (13),
      nonsense (14),
      missense (15),
      frameshift (16),
      utr-3 (17),
      utr-5 (18),
      splice-3 (19),
      splice-5 (20)
    } OPTIONAL,
    readingFrame INTEGER OPTIONAL,
    allele VisibleString OPTIONAL, --variation allele: * suffix indicates allele of contig at this location
    residue VisibleString OPTIONAL, --translated amino acid residue for allele
    aaPosition INTEGER OPTIONAL --position of the variant residue in peptide sequence
  },
  
  --functional relationship of SNP (and possibly alleles) to genes at contig location as defined in organism-specific bxxx_SNPContigLocusId_xxx tables.
  fxnSet NULL
}

--Position of a single hit of a variation on a contig
MapLoc ::= SEQUENCE {
  attlist SET {
    asnFrom INTEGER, --beginning of variation as feature on contig
    asnTo INTEGER, --end position of variation as feature on contig
    
    --defines the seq-loc symbol if asn_from != asn_to
    --insertion on contig
    --asn-from = asn-to write as 'asn-from'
    --deletion on contig
    locType ENUMERATED {
      insertion (1),
      exact (2),
      deletion (3),
      range-ins (4),
      range-exact (5),
      range-del (6)
    },
    alnQuality REAL OPTIONAL, --alignment qualiity
    orient ENUMERATED {
      forward (1),
      reverse (2)
    } OPTIONAL, --orientation of refSNP sequence to contig sequence
    physMapInt INTEGER OPTIONAL, --chromosome position as integer for sorting
    leftFlankNeighborPos INTEGER OPTIONAL, --nearest aligned position in 5' flanking sequence of snp
    rightFlankNeighborPos INTEGER OPTIONAL, --nearest aligned position in 3' flanking sequence of snp 
    leftContigNeighborPos INTEGER OPTIONAL, --nearest aligned position in 5' contig alignment of snp
    rightContigNeighborPos INTEGER OPTIONAL, --nearest aligned position in 3' contig alignment of snp
    numberOfMismatches INTEGER OPTIONAL, --number of Mismatched positions in this alignment
    numberOfDeletions INTEGER OPTIONAL, --number of deletions in this alignment
    numberOfInsertions INTEGER OPTIONAL --number of insetions in this alignment
  },
  fxnSet SEQUENCE OF FxnSet OPTIONAL
}

PrimarySequence ::= SEQUENCE {
  attlist SET {
    dbSnpBuild INTEGER,
    gi INTEGER,
    source ENUMERATED {
      submitter (1),
      blastmb (2),
      xm (3)
    } OPTIONAL,
    accession VisibleString OPTIONAL
  },
  mapLoc SEQUENCE OF MapLoc
}

--defines the docsum structure for refSNP clusters, where a refSNP cluster (rs) is a grouping of individual dbSNP submissions that all refer to the same variation. The refsnp provides a single unified record for annotation of NCBI resources such as reference genome sequence.
Rs ::= SEQUENCE {
  attlist SET {
    rsId INTEGER, --refSNP (rs) number
    snpClass ENUMERATED {
      snp (1),
      in-del (2),
      heterozygous (3),
      microsatellite (4),
      named-locus (5),
      no-variation (6),
      mixed (7),
      multinucleotide-polymorphism (8)
    },
    snpType ENUMERATED {
      notwithdrawn (1),
      artifact (2),
      gene-duplication (3),
      duplicate-submission (4),
      notspecified (5),
      ambiguous-location (6),
      low-map-quality (7)
    },
    molType ENUMERATED {
      genomic (1),
      cDNA (2),
      mito (3),
      chloro (4),
      unknown (5)
    },
    validProbMin INTEGER OPTIONAL, --minimum reported success rate of all submissions in cluster
    validProbMax INTEGER OPTIONAL, --maximum reported success rate of all submissions in cluster
    genotype BOOLEAN OPTIONAL, --at least one genotype reported for this refSNP
    bitField VisibleString OPTIONAL
  },
  het SEQUENCE {
    attlist SET {
      type ENUMERATED {
        est (1),
        obs (2)
      }, --Est=Estimated average het from allele frequencies, Obs=Observed from genotype data
      value REAL, --Heterozygosity
      stdError REAL OPTIONAL --Standard error of Het estimate
    },
    het NULL
  } OPTIONAL,
  validation SEQUENCE {
    attlist SET {
      byCluster BOOLEAN OPTIONAL, --at least one subsnp in cluster has frequency data submitted
      byFrequency BOOLEAN OPTIONAL, --cluster has 2+ submissions, with 1+ submissions assayed with a non-computational method
      byOtherPop BOOLEAN OPTIONAL,
      by2Hit2Allele BOOLEAN OPTIONAL, --cluster has 2+ submissions, with 1+ submissions assayed with a non-computational method
      byHapMap BOOLEAN OPTIONAL --TBD
    },
    otherPopBatchId SEQUENCE OF INTEGER OPTIONAL, --dbSNP batch-id's for other pop snp validation data.
    twoHit2AlleleBatchId SEQUENCE OF INTEGER OPTIONAL --dbSNP batch-id's for double-hit snp validation data. Use batch-id to get methods, etc.
  },
  
  --date the refsnp cluster was instantiated
  create SEQUENCE {
    
    --date the refsnp cluster was instantiated
    attlist SET {
      build INTEGER OPTIONAL, --build number when the cluster was created
      date VisibleString OPTIONAL --yyyy-mm-dd
    },
    
    --date the refsnp cluster was instantiated
    create NULL
  }, --date the refsnp cluster was instantiated
  
  --most recent date the cluster was updated (member added or deleted)
  update SEQUENCE {
    
    --most recent date the cluster was updated (member added or deleted)
    attlist SET {
      build INTEGER OPTIONAL, --build number when the cluster was updated
      date VisibleString OPTIONAL --yyyy-mm-dd
    },
    
    --most recent date the cluster was updated (member added or deleted)
    update NULL
  } OPTIONAL, --most recent date the cluster was updated (member added or deleted)
  sequence SEQUENCE {
    attlist SET {
      exemplarSs INTEGER --dbSNP ss# selected as source of refSNP flanking sequence, ss# part of ss-list below 
    },
    
    --5' sequence that flanks the variation
    --5' sequence that flanks the variation
    seq5 VisibleString OPTIONAL,
    
    --list of all nucleotide alleles observed in ss-list members, correcting for reverse complementation of memebers reported in reverse orientation
    --list of all nucleotide alleles observed in ss-list members, correcting for reverse complementation of memebers reported in reverse orientation
    observed VisibleString,
    
    --3' sequence that flanks the variation
    --3' sequence that flanks the variation
    seq3 VisibleString OPTIONAL
  },
  ss SEQUENCE OF Ss,
  assembly SEQUENCE OF Assembly OPTIONAL,
  primarySequence SEQUENCE OF PrimarySequence OPTIONAL,
  rsStruct SEQUENCE OF RsStruct OPTIONAL,
  rsLinkout SEQUENCE OF RsLinkout OPTIONAL,
  mergeHistory SEQUENCE OF SEQUENCE {
    attlist SET {
      rsId INTEGER, --previously issued rs id whose member assays have now been merged
      buildId INTEGER OPTIONAL, --build id when rs id was merged into parent rs
      orientFlip BOOLEAN OPTIONAL --TRUE if strand of rs id is reverse to parent object's current strand
    },
    mergeHistory NULL
  } OPTIONAL,
  hgvs SEQUENCE OF VisibleString OPTIONAL --							HGVS name list
}

--link data for another resource
RsLinkout ::= SEQUENCE {
  attlist SET {
    resourceId VisibleString, --BaseURLList.url_id
    linkValue VisibleString --value to append to ResourceURL.base-url for complete link
  },
  
  --link data for another resource
  rsLinkout NULL
}

--structure information for SNP
RsStruct ::= SEQUENCE {
  attlist SET {
    protAcc VisibleString OPTIONAL, --accession of the protein with variation
    protGi INTEGER OPTIONAL, --GI of the protein with variation
    protLoc INTEGER OPTIONAL, --position of the residue for the protein GI
    protResidue VisibleString OPTIONAL, --residue specified for protein at prot-loc location
    rsResidue VisibleString OPTIONAL, --alternative residue specified by variation sequence
    structGi INTEGER OPTIONAL, --GI of the structure neighbor
    structLoc INTEGER OPTIONAL, --position of the residue for the structure GI
    structResidue VisibleString OPTIONAL --residue specified for protein at struct-loc location
  },
  
  --structure information for SNP
  rsStruct NULL
}

--data for an individual submission to dbSNP
Ss ::= SEQUENCE {
  attlist SET {
    ssId INTEGER, --dbSNP accession number for submission
    handle VisibleString, --Tag for the submitting laboratory
    batchId INTEGER, --dbSNP number for batch submission
    
    --submission (ss#)
    --submitter ID
    locSnpId VisibleString OPTIONAL,
    subSnpClass ENUMERATED {
      snp (1),
      in-del (2),
      heterozygous (3),
      microsatellite (4),
      named-locus (5),
      no-variation (6),
      mixed (7),
      multinucleotide-polymorphism (8)
    } OPTIONAL, --SubSNP classification by type of variation
    
    --orientation of refsnp cluster members to refsnp cluster sequence
    --ss flanking sequence is in same orientation as seq-ss-exemplar
    --lanking sequence and alleles are reverse complement of refSNP as defined by ss exemplar
    orient ENUMERATED {
      forward (1),
      reverse (2)
    } OPTIONAL,
    strand ENUMERATED {
      top (1),
      bottom (2)
    } OPTIONAL, --strand is defined as TOP/BOTTOM by nature of flanking nucleotide sequence
    molType ENUMERATED {
      genomic (1),
      cDNA (2),
      mito (3),
      chloro (4),
      unknown (5)
    } OPTIONAL, --moltype from Batch table
    buildId INTEGER OPTIONAL, --dbSNP build number when ss# was added to a refSNP (rs#) cluster
    
    --class of method used to assay for the variation
    --Denaturing High Pressure Liquid Chromatography used to detect SNP
    --a hybridization method (e.g. chip) was used to assay for variation
    --variation was mined from sequence alignment with software
    --samples were sequenced and resulting alignment used to define variation
    methodClass ENUMERATED {
      dHPLC (1),
      hybridize (2),
      computed (3),
      sSCP (4),
      other (5),
      unknown (6),
      rFLP (7),
      sequence (8)
    } OPTIONAL,
    
    --subsnp has been experimentally validated by submitter
    --subsnp has frequency data submitted
    --has 2+ submissions, with 1+ submission assayed with a non-computational method
    validated ENUMERATED {
      by-submitter (1),
      by-frequency (2),
      by-cluster (3)
    } OPTIONAL,
    linkoutUrl VisibleString OPTIONAL --append loc-snp-id to this base URL to construct a pointer to submitter data.
  },
  sequence SEQUENCE {
    
    --5' sequence that flanks the variation
    --5' sequence that flanks the variation
    seq5 VisibleString OPTIONAL,
    
    --list of all nucleotide alleles observed in ss-list members, correcting for reverse complementation of memebers reported in reverse orientation
    --list of all nucleotide alleles observed in ss-list members, correcting for reverse complementation of memebers reported in reverse orientation
    observed VisibleString,
    
    --3' sequence that flanks the variation
    --3' sequence that flanks the variation
    seq3 VisibleString OPTIONAL
  }
}

END


-- entrez2.asn
--$Revision: 1.12 $********************************************
--
--  entrez2.asn
--   Version 1
--
--   API to Entrez Engine (1999)
--   Retrieval of sequence done through ID1 module
--     Also, SeqId queries
--   Retrieval of PubMed records through PubMed module
--   Retrieval of Structures through PubStruct module
--   Retrieval of Genomes through Genomes module
--
--***************************************************************

NCBI-Entrez2 DEFINITIONS ::=
BEGIN

   --**************************************
   --  Entrez2 common elements
   --**************************************

Entrez2-dt ::= INTEGER                   -- a date/time stamp
Entrez2-db-id ::= VisibleString          -- database name
Entrez2-field-id ::= VisibleString       -- field name
Entrez2-link-id ::= VisibleString        -- link name

Entrez2-id-list ::= SEQUENCE  {          -- list of record UIDs
  db Entrez2-db-id ,                     -- the database
  num INTEGER ,                          -- number of uids
  uids OCTET STRING OPTIONAL  }          -- coded uids

       --****************************************
       -- The structured form of the boolean is the same in a request or
       --    return so that it easy to modify a query. This means some
       --    fields are only considered in a return value, like counts
       --    by term. They are ignored in a request.
       -- The structured boolean supports specific boolean components,
       --    an unparsed string in query syntax, and UID lists as
       --    elements of a boolean. This makes it possible to submit
       --    a single string, a fully structured query, or a mixture.
       --    The UID list feature means one can also perform refinements
       --    on UID lists from links, neighbors, or other operations.
       --    UID list query now returns a history key for subsequent use.
       --*****************************************

Entrez2-boolean-exp ::= SEQUENCE {
  db Entrez2-db-id ,                         -- database for this query
  exp SEQUENCE OF Entrez2-boolean-element ,  -- the Boolean
  limits Entrez2-limits OPTIONAL }           -- date bounds

Entrez2-boolean-element ::= CHOICE {
  str VisibleString ,                       -- unparsed query string
  op Entrez2-operator ,                     -- logical operator
  term Entrez2-boolean-term ,               -- fielded term
  ids Entrez2-id-list ,                     -- list of UIDs - returns history key in reply
  key VisibleString }                       -- history key for uploaded UID list or other query

       --*****************************************
       -- the term is both sent and received as parts of
       --   queries and replies. The attributes can be filled in
       --   by either, but may be ignored by one or the other. Flags are
       --   shown if a real value is only of use in the query (Q), only
       --   in the reply (R), or used in both (B)
       -- do-not-explode and do-not-translate are only active set by
       --   by the query. However, they retain those settings in the
       --   return value so they can be resent with a new query
       --******************************************

Entrez2-boolean-term ::= SEQUENCE {
  field Entrez2-field-id ,                  -- B
  term VisibleString ,                      -- B
  term-count INTEGER OPTIONAL,              -- R count of records with term
  do-not-explode BOOLEAN DEFAULT FALSE,     -- Q do not explode term
  do-not-translate BOOLEAN DEFAULT FALSE}   -- Q do not use synonyms
  
Entrez2-operator ::= INTEGER {
  and (1) ,
  or (2) ,
  butnot (3) ,
  range (4) ,
  left-paren (5) ,
  right-paren (6) }

  --***************************************
  --  Entrez2 Request types
  --***************************************
       --****************************************
       -- The basic request wrapper leaves space for a version which
       --   allow the server to support older clients
       -- The tool parameter allows us to log the client types for
       --   debugging and tuning
       -- The cookie is a session ID returned by the first Entrez2-reply
       --****************************************
       
Entrez2-request ::= SEQUENCE {           -- a standard request
  request E2Request ,                    -- the actual request
  version INTEGER ,                      -- ASN1 spec version
  tool VisibleString OPTIONAL ,          -- tool making request
  cookie VisibleString OPTIONAL ,        -- history session cookie
  use-history BOOLEAN DEFAULT FALSE }    -- request should use history

E2Request ::= CHOICE {                   -- request types
  get-info NULL ,                        -- ask for info block
  eval-boolean Entrez2-eval-boolean ,    -- Boolean lookup
  get-docsum Entrez2-id-list ,           -- get the DocSums
  get-term-pos Entrez2-term-query,       -- get position in term list
  get-term-list Entrez2-term-pos ,       -- get Term list by position
  get-term-hierarchy Entrez2-hier-query, -- get a hierarchy from a term
  get-links Entrez2-get-links ,          -- get specific links from a UID list
  get-linked Entrez2-get-links ,         -- get subset of UID list which has links
  get-link-counts Entrez2-id }           -- get all links from one UID

       --****************************************
       -- When evaluating a boolean query the counts of hits is always
       --    returned.
       -- In addition, you can request the UIDs of the hits or the
       --    the parsed query in structured form (with counts by term),
       --    or both.
       --****************************************
  
Entrez2-eval-boolean ::= SEQUENCE {       -- evaluate Boolean query
  return-UIDs BOOLEAN DEFAULT FALSE,      -- return UID list?
  return-parse BOOLEAN DEFAULT FALSE,     -- return parsed query?
  query Entrez2-boolean-exp }             -- the actual query

Entrez2-dt-filter ::= SEQUENCE { 
  begin-date Entrez2-dt,
  end-date Entrez2-dt,
  type-date Entrez2-field-id }

Entrez2-limits ::= SEQUENCE {            -- date limits
  filter-date Entrez2-dt-filter OPTIONAL,
  max-UIDs INTEGER OPTIONAL,             -- max UIDs to return in list
  offset-UIDs INTEGER OPTIONAL}          -- start partway into UID list
 
  
Entrez2-id ::= SEQUENCE {                -- a single UID
  db Entrez2-db-id ,
  uid INTEGER }

Entrez2-term-query ::= SEQUENCE {
  db Entrez2-db-id ,
  field Entrez2-field-id ,
  term VisibleString }

Entrez2-hier-query ::= SEQUENCE {
  db Entrez2-db-id ,
  field Entrez2-field-id ,
  term VisibleString OPTIONAL ,          -- query with either term
  txid INTEGER OPTIONAL }                -- or Taxonomy ID

Entrez2-term-pos ::= SEQUENCE {          -- request portions of term list
  db Entrez2-db-id ,
  field Entrez2-field-id ,
  first-term-pos INTEGER ,
  number-of-terms INTEGER OPTIONAL }     -- optional for hierarchy only

Entrez2-get-links ::= SEQUENCE {         -- request links of one type
  uids Entrez2-id-list ,                 -- docs to link from
  linktype Entrez2-link-id ,             -- type of link
  max-UIDS INTEGER OPTIONAL ,            -- maximum number of links to return
  count-only BOOLEAN OPTIONAL ,          -- return only the counts
  parents-persist BOOLEAN OPTIONAL }     -- allow original uids in list

  --**********************************************************
  -- Replies from the Entrez server
  --  all replies contain the date/time stamp when they were executed
  --  to do reqular date bounded searches use this value+1 to search
  --  again later instead of recording the date/time on the client machine
  --  the cookie allows a simple key string to represent UID lists in the history
  --**********************************************************
  
Entrez2-reply ::= SEQUENCE {
  reply E2Reply ,                       -- the actual reply
  dt Entrez2-dt ,                       -- date/time stamp from server
  server VisibleString ,                -- server version info
  msg VisibleString OPTIONAL ,          -- possibly a message to the user
  key VisibleString OPTIONAL ,          -- history key for query
  cookie VisibleString OPTIONAL }       -- history session cookie

E2Reply ::= CHOICE {
  error VisibleString ,                 -- if nothing can be returned
  get-info Entrez2-info ,               -- the database info
  eval-boolean Entrez2-boolean-reply,   -- result of boolean query
  get-docsum Entrez2-docsum-list,
  get-term-pos INTEGER,                 -- position of the term
  get-term-list Entrez2-term-list,
  get-term-hierarchy Entrez2-hier-node,
  get-links Entrez2-link-set,
  get-linked Entrez2-id-list,
  get-link-counts Entrez2-link-count-list }


Entrez2-info ::= SEQUENCE {             -- describes all the databases
  db-count INTEGER ,                    -- number of databases
  build-date Entrez2-dt ,               -- build date of databases
  db-info SEQUENCE OF Entrez2-db-info } -- info by database

Entrez2-db-info ::= SEQUENCE {          -- info for one database
  db-name Entrez2-db-id ,               -- internal name
  db-menu VisibleString ,               -- short name for menu
  db-descr VisibleString ,              -- longer explanatory name
  doc-count INTEGER ,                   -- total number of records
  field-count INTEGER ,                 -- number of field types
  fields SEQUENCE OF Entrez2-field-info,
  link-count INTEGER ,                  -- number of link types
  links SEQUENCE OF Entrez2-link-info,
  docsum-field-count INTEGER,
  docsum-fields SEQUENCE OF Entrez2-docsum-field-info }

Entrez2-field-info ::= SEQUENCE {       -- info about one field
  field-name Entrez2-field-id ,         -- the internal name
  field-menu VisibleString ,            -- short string suitable for menu
  field-descr VisibleString ,           -- longer, explanatory name
  term-count INTEGER ,                  -- number of terms in field
  is-date BOOLEAN OPTIONAL ,
  is-numerical BOOLEAN OPTIONAL ,
  single-token BOOLEAN OPTIONAL ,
  hierarchy-avail BOOLEAN OPTIONAL ,
  is-rangable BOOLEAN OPTIONAL ,
  is-truncatable BOOLEAN OPTIONAL }

Entrez2-link-info ::= SEQUENCE {        -- info about one link
  link-name Entrez2-link-id ,
  link-menu VisibleString ,
  link-descr VisibleString ,
  db-to Entrez2-db-id ,                 -- database it links to
  data-size INTEGER OPTIONAL }          -- size of link data element    

Entrez2-docsum-field-type ::= INTEGER {
  string (1) ,
  int    (2) ,
  float  (3) ,
  date-pubmed (4) }

Entrez2-docsum-field-info ::= SEQUENCE {
  field-name VisibleString,
  field-description VisibleString,
  field-type Entrez2-docsum-field-type }

Entrez2-boolean-reply ::= SEQUENCE {
  count INTEGER ,                       -- records hit
  uids Entrez2-id-list OPTIONAL,        -- if uids requested
  query Entrez2-boolean-exp OPTIONAL }  -- if parsed query requested

Entrez2-docsum-list ::= SEQUENCE {
  count INTEGER ,                       -- number of docsums
  list SEQUENCE OF Entrez2-docsum }

Entrez2-docsum ::= SEQUENCE {
  uid INTEGER ,                         -- primary uid (gi, pubmedid)
  docsum-data SEQUENCE OF Entrez2-docsum-data }

Entrez2-docsum-data::= SEQUENCE {
  field-name VisibleString,
  field-value VisibleString }

Entrez2-term-list ::= SEQUENCE {
  pos INTEGER,                          -- position of first term in list
  num INTEGER,                          -- number of terms in list
  list SEQUENCE OF Entrez2-term }

Entrez2-term ::= SEQUENCE {
  term VisibleString ,
  txid  INTEGER OPTIONAL,
  count INTEGER ,                       -- count of records with this term
  is-leaf-node BOOLEAN OPTIONAL }       -- used for hierarchy only

Entrez2-hier-node ::= SEQUENCE {        -- for hierarchical index
  cannonical-form VisibleString ,       -- the official name
  lineage-count INTEGER ,               -- number of strings in lineage
  lineage SEQUENCE OF Entrez2-term OPTIONAL , -- strings up the lineage
  child-count INTEGER ,                 -- number of children of this node
  children SEQUENCE OF Entrez2-term ,   -- the children
  is-ambiguous BOOLEAN OPTIONAL }       -- used for hierarchy only


      --*******************************************
      -- Links are returned in sets also using OCTET STRINGS
      --*******************************************

Entrez2-link-set ::= SEQUENCE {          -- set of links
  ids Entrez2-id-list ,
  data-size INTEGER OPTIONAL ,           -- size of data elements
  data OCTET STRING OPTIONAL }           -- coded scores

Entrez2-link-count-list ::= SEQUENCE {   -- all links from 1 uid
  link-type-count INTEGER ,              -- number of types of links
  links SEQUENCE OF Entrez2-link-count }

Entrez2-link-count ::= SEQUENCE {        -- link count of one type
  link-type Entrez2-link-id ,
  link-count INTEGER }

END


-- entrezgene.asn
--$Revision: 142744 $ 
--********************************************************************** 
-- 
--  NCBI Entrezgene 
--  by James Ostell, 2001 
--   
--  Generic "Gene" object for Entrez Genes 
--    This object is designed to incorporate a subset of information from 
--    LocusLink and from records in Entrez Genomes to provide indexing, 
--    linkage, and a useful summary report in Entrez for "Genes" 
-- 
--********************************************************************** 
 
NCBI-Entrezgene DEFINITIONS ::= 
BEGIN 
 
EXPORTS Entrezgene, Entrezgene-Set, Gene-track; 
 
IMPORTS Gene-ref FROM NCBI-Gene 
    Prot-ref FROM NCBI-Protein 
    BioSource FROM NCBI-BioSource 
    RNA-ref FROM NCBI-RNA 
    Dbtag, Date FROM NCBI-General 
    Seq-loc FROM NCBI-Seqloc 
    Pub FROM NCBI-Pub; 
 
--******************************************** 
-- Entrezgene is the "document" indexed in Entrez 
--  and presented in the full display 
-- It also contains the Entrez ID and date information 
--******************************************* 
Entrezgene ::= SEQUENCE { 
    track-info Gene-track OPTIONAL , -- not in submission, but in retrieval 
    type INTEGER {                   -- type of Gene
        unknown (0) ,
        tRNA    (1) ,
        rRNA    (2) ,
        snRNA   (3) ,
        scRNA   (4) ,
        snoRNA  (5) ,
        protein-coding (6) ,
        pseudo  (7) ,
        transposon  (8) ,
        miscRNA  (9) ,
        ncRNA (10) ,
        other (255) } ,
    source BioSource , 
    gene Gene-ref ,                     -- for locus-tag see note 3
    prot Prot-ref OPTIONAL , 
    rna RNA-ref OPTIONAL , 
    summary VisibleString OPTIONAL ,    -- short summary 
    location SEQUENCE OF Maps OPTIONAL,
    gene-source Gene-source OPTIONAL ,             -- NCBI source to Entrez 
    locus SEQUENCE OF Gene-commentary OPTIONAL ,   -- location of gene on chromosome (if known)
                                                   -- and all information about products
						   -- (mRNA, proteins and so on)
    properties SEQUENCE OF Gene-commentary OPTIONAL , 
    refgene SEQUENCE OF Gene-commentary OPTIONAL , -- NG for this? 
    homology SEQUENCE OF Gene-commentary OPTIONAL , 
    comments SEQUENCE OF Gene-commentary OPTIONAL ,
    unique-keys SEQUENCE OF Dbtag OPTIONAL ,              -- see note 3
    xtra-index-terms SEQUENCE OF VisibleString OPTIONAL , -- see note 2
    xtra-properties SEQUENCE OF Xtra-Terms OPTIONAL ,     -- see note 2
    xtra-iq SEQUENCE OF Xtra-Terms OPTIONAL,              -- see note 2
    non-unique-keys SEQUENCE OF Dbtag OPTIONAL }

Entrezgene-Set ::= SET OF Entrezgene

Gene-track ::= SEQUENCE { 
    geneid INTEGER ,     -- required unique document id 
    status INTEGER {
        live (0) ,
        secondary (1) ,   -- synonym with merged
        discontinued (2), -- 'deleted', still index and display to public
        newentry (3)      --  for GeneRif submission
    } DEFAULT live ,
    current-id SEQUENCE OF Dbtag OPTIONAL , -- see note 1 below
    create-date Date ,   -- date created in Entrez 
    update-date Date ,   -- last date updated in Entrez 
    discontinue-date Date OPTIONAL } --
 
Gene-source ::= SEQUENCE { 
    src VisibleString ,                -- key to the source within NCBI locuslink, Ecoli, etc 
    src-int INTEGER OPTIONAL ,         -- eg. locuslink id 
    src-str1 VisibleString OPTIONAL ,  -- eg. chromosome1 
    src-str2 VisibleString OPTIONAL ,  -- see note 3
    gene-display BOOLEAN DEFAULT FALSE ,  -- do we have a URL for gene display? 
    locus-display BOOLEAN DEFAULT FALSE , -- do we have a URL for map/locus display? 
    extra-terms BOOLEAN DEFAULT FALSE }   -- do we have a URL for extra indexing terms? 
 
Gene-commentary ::= SEQUENCE { 
    type INTEGER {            -- type of Gene Commentary
        genomic (1) ,
        pre-RNA (2) ,
        mRNA (3) ,
        rRNA (4) ,
        tRNA (5) ,
        snRNA (6) ,
        scRNA (7) ,
        peptide (8) ,
        other-genetic (9) ,
        genomic-mRNA (10) ,
        cRNA (11) ,
        mature-peptide (12) ,
        pre-protein (13) ,
        miscRNA  (14) ,
        snoRNA  (15) ,
        property  (16) , -- used to display tag/value pair
	                 -- for this type label is used as property tag, text is used as property value, 
	                 -- other fields are not used.
        reference (17), -- currently not used             
        generif (18), -- to include generif in the main blob             
        phenotype(19), -- to display phenotype information
        complex (20), -- used (but not limited) to identify resulting 
                      -- interaction complexes
        compound (21), -- pubchem entities

        ncRNA (22), 
        gene-group (23), -- for relationship sets (such as pseudogene / parent gene)
        comment (254) ,
        other (255) } ,
    heading VisibleString OPTIONAL ,      -- appears above text 
    label VisibleString OPTIONAL ,        -- occurs to left of text
                                          -- for protein and RNA types it is a name
					  -- for property type it is a property tag  
    text VisibleString OPTIONAL ,         -- block of text 
					  -- for property type it is a property value  
    accession VisibleString OPTIONAL ,    -- accession for the gi in the seqloc, see note 3
    version INTEGER OPTIONAL ,    -- version for the accession above
    xtra-properties SEQUENCE OF Xtra-Terms OPTIONAL , -- see note 2
    refs SEQUENCE OF Pub OPTIONAL ,       -- refs for this 
    source SEQUENCE OF Other-source OPTIONAL ,    -- links and refs 
    genomic-coords SEQUENCE OF Seq-loc OPTIONAL , -- referenced sequences in genomic coords
    seqs SEQUENCE OF Seq-loc OPTIONAL ,           -- referenced sequences in non-genomic coords
    products SEQUENCE OF Gene-commentary OPTIONAL ,
    properties SEQUENCE OF Gene-commentary OPTIONAL ,
    comment SEQUENCE OF Gene-commentary OPTIONAL ,
    create-date Date OPTIONAL ,   
    update-date Date OPTIONAL } 
 
Other-source ::= SEQUENCE { 
    src Dbtag OPTIONAL ,                -- key to non-ncbi source 
    pre-text VisibleString OPTIONAL ,   -- text before anchor 
    anchor VisibleString OPTIONAL ,     -- text to show as highlight 
    url VisibleString OPTIONAL ,        -- if present, use this URL not Dbtag and datbase 
    post-text VisibleString OPTIONAL }  -- text after anchor 


Maps::= SEQUENCE {
        display-str VisibleString ,
        method CHOICE {
            proxy VisibleString ,  --url to non mapviewer mapviewing resource
            map-type ENUMERATED {  -- units used in display-str to query mapviewer 
                    cyto (0) ,
                    bp (1) ,
                    cM (2) ,
                    cR (3) ,
                    min (4)}}}
                        
Xtra-Terms ::= SEQUENCE {  -- see note 2
    tag VisibleString ,
    value VisibleString }

END 

--********************************************************************** 
-- 
--  Comments, notes, etc.
--   
--  1)  Ignored unless status = secondary.  This is where gene_ids (db = "GeneID")
--      are placed toward which the interface will direct users.  It is also
--      available for placing other source-db specific tags (i.e., db = "LocusID").
--
--  2)  These 'xtra' objects are for submitting data for Entrez indexing
--      that might not fit anywhere in the Entrezgene specification but
--      are considered by the data source submittor to be important.
--          xtra-index-terms is any string.
--          xtra-properties are tag/value pairs of properties/feilds as
--              defined in the Entrez database (i.e.: UNIGENE/Hs.74561)
--          xtra-iq are tag/value pairs of Entrez database/UID as defined
--              in the Entrezgene indexing code (i.e.: NUCLEOTIDE/20270626)
--
--  3)  Locus-tag and src-str2 are expected to be unique per organism (tax_id).
--      Protein accessions and the tag-value pairs in unique-keys
--      are expected to be unique over all organisms.
--********************************************************************** 


-- featdef.asn
--$Revision: 6.0 $
--**********************************************************************
--
--  NCBI Sequence Feature Definition Module
--  by James Ostell, 1994
--
--**********************************************************************

NCBI-FeatDef DEFINITIONS ::=
BEGIN

EXPORTS FeatDef, FeatDefSet, FeatDispGroup, FeatDispGroupSet;


FeatDef ::= SEQUENCE {
    typelabel VisibleString ,	   -- short label for type eg "CDS"
    menulabel VisibleString ,	   -- label for a menu eg "Coding Region"
    featdef-key INTEGER ,		   -- unique for this feature definition
    seqfeat-key INTEGER ,		   -- SeqFeat.data.choice from objfeat.h
    entrygroup INTEGER ,		   -- Group for data entry
    displaygroup INTEGER ,		   -- Group for data display
    molgroup FeatMolType           -- Type of Molecule used for
}

FeatMolType ::= ENUMERATED {
	aa (1),  -- proteins
    na (2),  -- nucleic acids
    both (3) }  -- both

FeatDefSet ::= SEQUENCE OF FeatDef   -- collections of defintions

FeatDispGroup ::= SEQUENCE {
	groupkey INTEGER ,
    groupname VisibleString }

FeatDispGroupSet ::= SEQUENCE OF FeatDispGroup

FeatDefGroupSet ::= SEQUENCE {
	groups FeatDispGroupSet ,
	defs FeatDefSet }

END

    
-- gbseq.asn
--$Revision: 149842 $
--*********************************************************
--
-- ASN.1 and XML for the components of a GenBank format sequence
-- J.Ostell 2002
-- Updated 15 January 2009
--
--*********************************************************

NCBI-GBSeq DEFINITIONS ::=
BEGIN

--********
--  GBSeq represents the elements in a GenBank style report
--    of a sequence with some small additions to structure and support
--    for protein (GenPept) versions of GenBank format as seen in
--    Entrez. While this represents the simplification, reduction of
--    detail, and flattening to a single sequence perspective of GenBank
--    format (compared with the full ASN.1 or XML from which GenBank and
--    this format is derived at NCBI), it is presented in ASN.1 or XML for
--    automated parsing and processing. It is hoped that this compromise
--    will be useful for those bulk processing at the GenBank format level
--    of detail today. Since it is a compromise, a number of pragmatic
--    decisions have been made.
--
--  In pursuit of simplicity and familiarity a number of
--    fields do not have full substructure defined here where there is
--    already a standard GenBank format string. For example:
--
--    Date  DD-Mon-YYYY
--    Authors   LastName, Intials (with periods)
--   Journal   JounalName Volume (issue), page-range (year)
--   FeatureLocations as per GenBank feature table, but FeatureIntervals
--    may also be provided as a convenience
--   FeatureQualifiers  as per GenBank feature table
--   Primary has a string that represents a table to construct
--    a third party (TPA) sequence.
--   other-seqids can have strings with the "vertical bar format" sequence
--    identifiers used in BLAST for example, when they are non-genbank types.
--    Currently in GenBank format you only see GI, but there are others, like
--    patents, submitter clone names, etc which will appear here, as they
--    always have in the ASN.1 format, and full XML format.
--   source-db is a formatted text block for peptides in GenPept format that
--    carries information from the source protein database.
--
--  There are also a number of elements that could have been
--   more exactly specified, but in the interest of simplicity
--   have been simply left as options. For example..
--
--  accession and accession.version will always appear in a GenBank record
--   they are optional because this format can also be used for non-GenBank
--   sequences, and in that case will have only "other-seqids".
--
--  sequences will normally all have "sequence" filled in. But contig records
--    will have a "join" statement in the "contig" slot, and no "sequence".
--    We also may consider a retrieval option with no sequence of any kind
--     and no feature table to quickly check minimal values.
--
--  a reference may have an author list, or be from a consortium, or both.
--
--  some fields, such as taxonomy, do appear as separate elements in GenBank
--    format but without a specific linetype (in GenBank format this comes
--    under ORGANISM). Another example is the separation of primary accession
--    from the list of secondary accessions. In GenBank format primary
--    accession is just the first one on the list that includes all secondaries
--    after it.
--
--  create-date deserves special comment. The date you see on the right hand
--    side of the LOCUS line in GenBank format is actually the last date the
--    the record was modified (or the update-date). The date the record was
--    first submitted to GenBank appears in the first submission citation in
--    the reference section. Internally in the databases and ASN.1 NCBI keeps
--    the first date the record was released into the sequence database at
--    NCBI as create-date. For records from EMBL, which supports create-date,
--    it is the date provided by EMBL. For DDBJ records, which do not supply
--    a create-date (same as GenBank format) the create-date is the first date
--    NCBI saw the record from DDBJ. For older GenBank records, before NCBI
--    took responsibility for GenBank, it is just the first date NCBI saw the
--    record. Create-date can be very useful, so we expose it here, but users
--    must understand it is only an approximation and comes from many sources,
--    and with many exceptions and caveats. It does NOT tell you the first
--    date the public might have seen this record and thus is NOT an accurate
--    measure for legal issues of precedence.
--
--********

GBSet ::= SEQUENCE OF GBSeq
        
GBSeq ::= SEQUENCE {
    locus VisibleString ,
    length INTEGER ,
    strandedness VisibleString OPTIONAL ,
    moltype VisibleString ,
    topology VisibleString OPTIONAL ,
    division VisibleString ,
    update-date VisibleString ,
    create-date VisibleString OPTIONAL ,
    update-release VisibleString OPTIONAL ,
    create-release VisibleString OPTIONAL ,
    definition VisibleString ,
    primary-accession VisibleString OPTIONAL ,
    entry-version VisibleString OPTIONAL ,
    accession-version VisibleString OPTIONAL ,
    other-seqids SEQUENCE OF GBSeqid OPTIONAL ,
    secondary-accessions SEQUENCE OF GBSecondary-accn OPTIONAL,
    project VisibleString OPTIONAL ,
    keywords SEQUENCE OF GBKeyword OPTIONAL ,
    segment VisibleString OPTIONAL ,
    source VisibleString OPTIONAL ,
    organism VisibleString OPTIONAL ,
    taxonomy VisibleString OPTIONAL ,
    references SEQUENCE OF GBReference OPTIONAL ,
    comment VisibleString OPTIONAL ,
    tagset GBTagset OPTIONAL ,
    primary VisibleString OPTIONAL ,
    source-db VisibleString OPTIONAL ,
    database-reference VisibleString OPTIONAL ,
    feature-table SEQUENCE OF GBFeature OPTIONAL ,
    sequence VisibleString OPTIONAL ,  -- Optional for other dump forms
    contig VisibleString OPTIONAL
}

GBSecondary-accn ::= VisibleString

GBSeqid ::= VisibleString

GBKeyword ::= VisibleString

GBAuthor ::= VisibleString

GBReference ::= SEQUENCE {
    reference VisibleString ,
    position VisibleString OPTIONAL ,
    authors SEQUENCE OF GBAuthor OPTIONAL ,
    consortium VisibleString OPTIONAL ,
    title VisibleString OPTIONAL ,
    journal VisibleString ,
    xref SET OF GBXref OPTIONAL ,
    pubmed INTEGER OPTIONAL ,
    remark VisibleString OPTIONAL
}

GBXref ::= SEQUENCE {
    dbname VisibleString ,
    id VisibleString
}

GBTagset ::= SEQUENCE {
    authority VisibleString OPTIONAL ,
    version VisibleString OPTIONAL ,
    url VisibleString OPTIONAL ,
    tags GBTags OPTIONAL
}

GBTags ::= SEQUENCE OF GBTag

GBTag ::= SEQUENCE {
    name VisibleString OPTIONAL ,
    value VisibleString OPTIONAL ,
    unit VisibleString OPTIONAL
}

GBFeature ::= SEQUENCE {
    key VisibleString ,
    location VisibleString ,
    intervals SEQUENCE OF GBInterval OPTIONAL ,
    operator VisibleString OPTIONAL ,
    partial5 BOOLEAN OPTIONAL ,
    partial3 BOOLEAN OPTIONAL ,
    quals SEQUENCE OF GBQualifier OPTIONAL
}

GBInterval ::= SEQUENCE {
    from INTEGER OPTIONAL ,
    to INTEGER OPTIONAL ,
    point INTEGER OPTIONAL ,
    iscomp BOOLEAN OPTIONAL ,
    interbp BOOLEAN OPTIONAL ,
    accession VisibleString
}

GBQualifier ::= SEQUENCE {
    name VisibleString ,
    value VisibleString OPTIONAL
}

GBTagsetRules ::= SEQUENCE {
    authority VisibleString OPTIONAL ,
    version VisibleString OPTIONAL ,
    mandatorytags GBTagNames OPTIONAL ,
    optionaltags GBTagNames OPTIONAL ,
    uniquetags GBTagNames OPTIONAL ,
    extensible BOOLEAN OPTIONAL
}

GBTagNames ::= SEQUENCE OF VisibleString

GBTagsetRuleSet ::= SEQUENCE OF GBTagsetRules

END


-- general.asn
--$Revision: 99135 $
--**********************************************************************
--
--  NCBI General Data elements
--  by James Ostell, 1990
--  Version 3.0 - June 1994
--
--**********************************************************************

NCBI-General DEFINITIONS ::=
BEGIN

EXPORTS Date, Person-id, Object-id, Dbtag, Int-fuzz, User-object, User-field;

-- StringStore is really a VisibleString.  It is used to define very
--   long strings which may need to be stored by the receiving program
--   in special structures, such as a ByteStore, but it's just a hint.
--   AsnTool stores StringStores in ByteStore structures.
-- OCTET STRINGs are also stored in ByteStores by AsnTool
-- 
-- typedef struct bsunit {             /* for building multiline strings */
   -- Nlm_Handle str;            /* the string piece */
   -- Nlm_Int2 len_avail,
       -- len;
   -- struct bsunit PNTR next; }       /* the next one */
-- Nlm_BSUnit, PNTR Nlm_BSUnitPtr;
-- 
-- typedef struct bytestore {
   -- Nlm_Int4 seekptr,       /* current position */
      -- totlen,             /* total stored data length in bytes */
      -- chain_offset;       /* offset in ByteStore of first byte in curchain */
   -- Nlm_BSUnitPtr chain,       /* chain of elements */
      -- curchain;           /* the BSUnit containing seekptr */
-- } Nlm_ByteStore, PNTR Nlm_ByteStorePtr;
--
-- AsnTool incorporates this as a primitive type, so the definition
--   is here just for completeness
-- 
--  StringStore ::= [APPLICATION 1] IMPLICIT OCTET STRING
--

-- BigInt is really an INTEGER. It is used to warn the receiving code to expect
--   a value bigger than Int4 (actually Int8). It will be stored in DataVal.bigintvalue
--
--   Like StringStore, AsnTool incorporates it as a primitive. The definition would be:
--   BigInt ::= [APPLICATION 2] IMPLICIT INTEGER
--

-- Date is used to replace the (overly complex) UTCTtime, GeneralizedTime
--  of ASN.1
--  It stores only a date
--

Date ::= CHOICE {
    str VisibleString ,        -- for those unparsed dates
    std Date-std }             -- use this if you can

Date-std ::= SEQUENCE {        -- NOTE: this is NOT a unix tm struct
    year INTEGER ,             -- full year (including 1900)
    month INTEGER OPTIONAL ,   -- month (1-12)
    day INTEGER OPTIONAL ,     -- day of month (1-31)
    season VisibleString OPTIONAL ,  -- for "spring", "may-june", etc
    hour INTEGER OPTIONAL ,    -- hour of day (0-23)
    minute INTEGER OPTIONAL ,  -- minute of hour (0-59)
    second INTEGER OPTIONAL }  -- second of minute (0-59)

-- Dbtag is generalized for tagging
-- eg. { "Social Security", str "023-79-8841" }
-- or  { "member", id 8882224 }

Dbtag ::= SEQUENCE {
    db VisibleString ,          -- name of database or system
    tag Object-id }         -- appropriate tag

-- Object-id can tag or name anything
--

Object-id ::= CHOICE {
    id INTEGER ,
    str VisibleString }

-- Person-id is to define a std element for people
--

Person-id ::= CHOICE {
    dbtag Dbtag ,               -- any defined database tag
    name Name-std ,             -- structured name
    ml VisibleString ,          -- MEDLINE name (semi-structured)
                                --    eg. "Jones RM"
    str VisibleString,          -- unstructured name
    consortium VisibleString }  -- consortium name

Name-std ::= SEQUENCE { -- Structured names
    last VisibleString ,
    first VisibleString OPTIONAL ,
    middle VisibleString OPTIONAL ,
    full VisibleString OPTIONAL ,    -- full name eg. "J. John Smith, Esq"
    initials VisibleString OPTIONAL,  -- first + middle initials
    suffix VisibleString OPTIONAL ,   -- Jr, Sr, III
    title VisibleString OPTIONAL }    -- Dr., Sister, etc

--**** Int-fuzz **********************************************
--*
--*   uncertainties in integer values

Int-fuzz ::= CHOICE {
    p-m INTEGER ,                    -- plus or minus fixed amount
    range SEQUENCE {                 -- max to min
        max INTEGER ,
        min INTEGER } ,
    pct INTEGER ,                    -- % plus or minus (x10) 0-1000
    lim ENUMERATED {                 -- some limit value
        unk (0) ,                    -- unknown
        gt (1) ,                     -- greater than
        lt (2) ,                     -- less than
        tr (3) ,                     -- space to right of position
        tl (4) ,                     -- space to left of position
        circle (5) ,                 -- artificial break at origin of circle
        other (255) } ,              -- something else
    alt SET OF INTEGER }             -- set of alternatives for the integer


--**** User-object **********************************************
--*
--*   a general object for a user defined structured data item
--*    used by Seq-feat and Seq-descr

User-object ::= SEQUENCE {
    class VisibleString OPTIONAL ,   -- endeavor which designed this object
    type Object-id ,                 -- type of object within class
    data SEQUENCE OF User-field }    -- the object itself

User-field ::= SEQUENCE {
    label Object-id ,                -- field label
    num INTEGER OPTIONAL ,           -- required for strs, ints, reals, oss
    data CHOICE {                    -- field contents
        str VisibleString ,
        int INTEGER ,
        real REAL ,
        bool BOOLEAN ,
        os OCTET STRING ,
        object User-object ,         -- for using other definitions
        strs SEQUENCE OF VisibleString ,
        ints SEQUENCE OF INTEGER ,
        reals SEQUENCE OF REAL ,
        oss SEQUENCE OF OCTET STRING ,
        fields SEQUENCE OF User-field ,
        objects SEQUENCE OF User-object } }


END


-- homologene.asn
HomoloGene DEFINITIONS ::=
BEGIN

IMPORTS Date FROM NCBI-General
        Seq-loc FROM NCBI-Seqloc
	Seq-align FROM NCBI-Seqalign;


-- HomoloGeneEntry taxid is the tax id of the group node, which can
-- be the same as the Gene tax id in case of singletons

HG-EntrySet ::= SEQUENCE {
	entries		SET OF HG-Entry -- homologene entry
}


HG-Entry ::= SEQUENCE {
	hg-id		INTEGER,
	version		INTEGER OPTIONAL,
	title		VisibleString OPTIONAL,
	caption		VisibleString OPTIONAL,
	taxid		INTEGER OPTIONAL,
	genes		SET OF HG-Gene OPTIONAL,
	cr-date		Date OPTIONAL,
	up-date		Date OPTIONAL,
	distances	SET OF HG-Stats OPTIONAL,
	commentaries	SET OF HG-CommentarySet OPTIONAL,
	warnings	SET OF VisibleString OPTIONAL,
	node		HG-Node OPTIONAL
}


HG-Gene ::= SEQUENCE {
	geneid			INTEGER,
	otherid			INTEGER OPTIONAL,  							-- internal use only!!!!!
	symbol			VisibleString OPTIONAL,
	aliases			SET OF VisibleString OPTIONAL,
	title				VisibleString,
	taxid				INTEGER,	 											--taxid of gene node
	prot-gi			INTEGER OPTIONAL,
	prot-acc		VisibleString OPTIONAL,
	prot-len		INTEGER OPTIONAL,
	nuc-gi			INTEGER OPTIONAL,
	nuc-acc			VisibleString OPTIONAL,
	gene-links	SET OF HG-Link OPTIONAL,
	prot-links	SET OF HG-Link OPTIONAL,
	domains			SET OF HG-Domain OPTIONAL,
	chr     		VisibleString OPTIONAL,
	location		Seq-loc OPTIONAL,        				-- location on the genome
	locus-tag		VisibleString OPTIONAL
}


HG-Stats ::= SEQUENCE {
	gi1		INTEGER,
	gi2		INTEGER,
	nuc-change	REAL,
	nuc-change-jc	REAL,
	prot-change	REAL,
	ka		REAL,
	ks		REAL,
	knr		REAL,
	knc		REAL,
	recip-best	BOOLEAN OPTIONAL
}


HG-Commentary ::= SEQUENCE {
	link			HG-Link,
	description		VisibleString OPTIONAL, -- main description
	caption			VisibleString OPTIONAL, -- extra text
	provider		VisibleString OPTIONAL,
	other-links		SET OF HG-Link OPTIONAL,
	other-commentaries	SET OF HG-Commentary OPTIONAL,
	taxid			INTEGER OPTIONAL,
	geneid			INTEGER OPTIONAL
}

HG-CommentarySet ::= SEQUENCE {
	hg-id		INTEGER OPTIONAL,
	title		VisibleString,
	commentaries	SET OF HG-Commentary
}

HG-CommentaryContainer ::= SET OF HG-CommentarySet

HG-Link ::= SEQUENCE {
	hypertext	VisibleString,
	url		VisibleString OPTIONAL
}

HG-Domain ::= SEQUENCE {
	begin		INTEGER,
	end		INTEGER,
	pssm-id		INTEGER OPTIONAL,	-- entrez uid
	cdd-id		VisibleString OPTIONAL,
	cdd-name	VisibleString OPTIONAL
}

HG-Node ::= SEQUENCE {
	type		ENUMERATED {
				family(0),
				ortholog(1),
				paralog(2),
				leaf(3)
			},
	id		HG-Node-id,
	caption		VisibleString OPTIONAL,
	current-node	BOOLEAN DEFAULT FALSE,
	children	SET OF HG-Node OPTIONAL,
	branch-len	INTEGER OPTIONAL
}

HG-Node-id ::= SEQUENCE {
	id		INTEGER OPTIONAL,
	id-type		ENUMERATED {
				none(0),
				geneid(1),
				hid(2)
			}
}

HG-Alignment ::= SEQUENCE {
	hg-id		INTEGER,
	alignment	Seq-align
}

HG-AlignmentSet ::= SET OF HG-Alignment

END

-- id1.asn
--$Revision: 1.12 $
--********************************************************************
--
--  Network Id server network access
--  Yaschenko 1996
--
--
--*********************************************************************
--
--  ID1.asn
--
--     messages for id server network access
--
--*********************************************************************

NCBI-ID1Access DEFINITIONS ::=
BEGIN

IMPORTS Seq-id FROM NCBI-Seqloc
		Seq-entry FROM NCBI-Seqset
		Seq-hist  FROM NCBI-Sequence;

        --**********************************
        -- requests
        --

ID1server-request ::= CHOICE {
		init NULL ,             -- DlInit
		getgi Seq-id ,          -- get a gi given a Seq-id
		getsefromgi ID1server-maxcomplex ,   -- given a gi, get the Seq-entry
		fini NULL,               -- DlFini
		getseqidsfromgi	INTEGER, --get all Seq-ids of given gi
		getgihist	INTEGER, --get an historical list of gis 
		getgirev	INTEGER, --get a revision history of gi
		getgistate	INTEGER,  --get a state of gi
		getsewithinfo	ID1server-maxcomplex,
		getblobinfo	ID1server-maxcomplex
}

--  Complexity stuff will be for ID1

ID1server-maxcomplex ::= SEQUENCE {
		maxplex Entry-complexities ,
		gi INTEGER ,
		ent INTEGER OPTIONAL, -- needed when you want to retrieve a given ent
		sat VisibleString OPTIONAL  -- satellite 0-id,1-dbEST
}

Entry-complexities ::= INTEGER {
		entry (0) ,              -- the "natural" entry for this (nuc-prot) 
		bioseq (1) ,             -- only the bioseq identified
		bioseq-set (2) ,         -- any seg-set it may be part of
		nuc-prot (3) ,           -- any nuc-prot it may be part of
		pub-set (4) 
}

ID1Seq-hist ::= SEQUENCE {
	hist Seq-hist
}


ID1server-back ::= CHOICE {
		init NULL ,                 -- DlInit
		error INTEGER ,
		gotgi INTEGER ,                 
		gotseqentry  Seq-entry,  -- live
		gotdeadseqentry  Seq-entry,  -- dead
                fini NULL,                  -- DlFini
		gistate	INTEGER,
		ids	SET OF Seq-id,
		gihist	SET OF ID1Seq-hist,  -- because hand crafted Seq-hist does not follow 
		                             -- same conventions 
		girevhist SET OF ID1Seq-hist,
		gotsewithinfo   ID1SeqEntry-info,
		gotblobinfo	ID1blob-info	
}

ID1server-debug ::= SET OF ID1server-back


ID1blob-info ::= SEQUENCE {
		gi INTEGER ,
		sat INTEGER,
                sat-key INTEGER,
                satname VisibleString,
		suppress	INTEGER,
		withdrawn	INTEGER,
		confidential	INTEGER,
                -- blob-state now contains blob version info.
                -- it's actually minutes from 01/01/1970
                -- and it's negative if blob is dead.
		blob-state	INTEGER,
                comment         VisibleString OPTIONAL, -- public comment for withdrawn record 
		extfeatmask	INTEGER	      OPTIONAL  -- mask for external features (SNP,...)
}

ID1SeqEntry-info ::= SEQUENCE {
		blob-info	ID1blob-info,
		blob		Seq-entry OPTIONAL
}
END

-- id2.asn
--$Revision: 112545 $
--********************************************************************
--
--  Network Id server network access
--  Vasilchenko 2003
--
--
--*********************************************************************
--
--  ID2.asn
--
--     messages for id server network access
--
--*********************************************************************

NCBI-ID2Access DEFINITIONS ::=
BEGIN

IMPORTS Seq-id, Seq-loc                                 FROM NCBI-Seqloc
        ID2S-Chunk-Id, ID2S-Seq-annot-Info              FROM NCBI-Seq-split;


--*********************************************************************
-- request types
--*********************************************************************

-- Requests are sent in packets to allow sending several requests at once
-- to avoid network latency, without possiblity of deadlock with server.
-- Server will not start sending replies until it will read the whole packet.
ID2-Request-Packet ::= SEQUENCE OF ID2-Request


ID2-Request ::= SEQUENCE {
        -- request's serial number, can be used in asynchronic clients
        -- server should copy it to corresponding field in reply
        serial-number  INTEGER OPTIONAL,

        params          ID2-Params OPTIONAL,

        request CHOICE {
                init            NULL,
                get-packages    ID2-Request-Get-Packages,
                get-seq-id      ID2-Request-Get-Seq-id,
                get-blob-id     ID2-Request-Get-Blob-Id,
                get-blob-info   ID2-Request-Get-Blob-Info,
                reget-blob      ID2-Request-ReGet-Blob,
                get-chunks      ID2S-Request-Get-Chunks
        }
}


-- Request for set of params packages know by server.
-- Packages can be used to abbreviate parameters of request.
ID2-Request-Get-Packages ::= SEQUENCE {
        -- return known packages from this list
        -- if unset - return all known packages
        names           SEQUENCE OF VisibleString OPTIONAL,

        -- return packages' names only
        no-contents     NULL OPTIONAL
}

-- Requested sequence ID, can be any string or Seq-id.
-- This request will be replied with one or more ID2-Reply-Get-Seq-id.
ID2-Request-Get-Seq-id ::= SEQUENCE {
        seq-id          ID2-Seq-id,
        seq-id-type     INTEGER {
                any     (0),    -- return any qualified Seq-id
                gi      (1),    -- gi is preferred
                text    (2),    -- text Seq-id (accession etc) is preferred
                general (4),    -- general Seq-id is preferred
                all     (127),  -- return all qualified Seq-ids of the sequence
                label   (128)   -- return a sequence string label as general id
        } DEFAULT any
}


ID2-Seq-id ::= CHOICE {
        string          VisibleString,
        seq-id          Seq-id
}


-- Return blob-id with specified seq-id.
-- This request with be replied with one or more ID2-Reply-Get-Blob-Id.
ID2-Request-Get-Blob-Id ::= SEQUENCE {
        -- id can be supplied by inner request
        seq-id          ID2-Request-Get-Seq-id,

        -- return id of blob with sequence
        sources         SEQUENCE OF VisibleString OPTIONAL,

        -- return Blob-Ids with external features on this Seq-id
        external        NULL OPTIONAL
}


-- Return some information related to the blob.
-- This request with be replied with one or more of:
--   ID2-Reply-Get-Blob-Seq-ids - if requested by get-seq-ids field
--   ID2-Reply-Get-Blob         - if requested by get-data field
--   ID2S-Reply-Get-Split-Info
--   ID2S-Reply-Get-Chunk
-- Last two can be sent in addition to ID2-Reply-Get-Blob
-- if the blob is split on the server.
-- The replies are made separate to allow server to create replies easier
-- from precalculated data. Each of these replies have ID2-Reply-Data field.
ID2-Request-Get-Blob-Info ::= SEQUENCE {
        -- id can be supplied by inner request
        blob-id         CHOICE {
                -- id can be supplied by inner request
                blob-id         ID2-Blob-Id,

                -- generate blob-ids from request
                resolve         SEQUENCE {
                        request         ID2-Request-Get-Blob-Id,
                        
                        -- server will not send blobs listed here
                        exclude-blobs   SEQUENCE OF ID2-Blob-Id OPTIONAL
                }
        },

        -- return in addition list of Seq-ids also resolving to this blob
        get-seq-ids     NULL OPTIONAL,

        -- level of details requested immediately
        -- server will send relevant chunks if blob is splitted
        get-data        ID2-Get-Blob-Details OPTIONAL
}


-- This is similar to FTP reget command.
-- It may be unsupported by server.
-- It's defined only for plain blobs (returned in ID2-Reply-Get-Blob)
-- as all split data comes in small chunks, so reget doesn't make sense.
ID2-Request-ReGet-Blob ::= SEQUENCE {
        blob-id         ID2-Blob-Id,

        -- blob split version to resend
        split-version   INTEGER,

        -- start offset of data to get
        offset          INTEGER
}

-- Request for specific chunks.
-- Server will reply with one or more ID2S-Reply-Get-Chunk.
ID2S-Request-Get-Chunks ::= SEQUENCE {
        blob-id         ID2-Blob-Id,

        -- requests for specific chunks of splitted blob
        chunks          SEQUENCE OF ID2S-Chunk-Id,

        -- blob split version
        split-version   INTEGER OPTIONAL
}


-- The following structure describes what parts of blob are required
-- immediately after ID2-Request-Get-Blob-Info in case blob is split.
-- Seq-entry level will have probably the same values as Entry-complexities.
ID2-Get-Blob-Details ::= SEQUENCE {
        -- reference location for details - can be only part of sequence
        location        Seq-loc OPTIONAL,

        -- Seq-entry level for all data except descriptors (sequnence, annots)
        seq-class-level INTEGER DEFAULT 1,

        -- Seq-entry level for descriptors
        descr-level     INTEGER DEFAULT 1,

        -- mask of descriptor types - see Seqdesc for variants' values
        descr-type-mask INTEGER DEFAULT 0,
        
        -- mask of annotation types - see Seq-annot.data for values
        annot-type-mask INTEGER DEFAULT 0,

        -- mask of feature types - see SeqFeatData for values
        feat-type-mask  INTEGER DEFAULT 0,

        -- level of sequence data to load
        sequence-level  ENUMERATED {
                none     (0), -- not required
                seq-map  (1), -- at least seq-map
                seq-data (2)  -- include seq-data
        } DEFAULT none
}


--*********************************************************************
-- reply types
--*********************************************************************


ID2-Reply ::= SEQUENCE {
        -- request's serial number, copy from request
        serial-number   INTEGER OPTIONAL,

        params          ID2-Params OPTIONAL,

        error           SEQUENCE OF ID2-Error OPTIONAL,

        -- true if this reply is the last one for the request
        -- false if more replies will follow
        end-of-reply    NULL OPTIONAL,

        -- reply data moved at the end to make it easier to construct
        -- the reply data manually from precalculated data
        reply CHOICE {
                init            NULL,
                empty           NULL,
                get-package     ID2-Reply-Get-Package,
                get-seq-id      ID2-Reply-Get-Seq-id,
                get-blob-id     ID2-Reply-Get-Blob-Id,
                get-blob-seq-ids    ID2-Reply-Get-Blob-Seq-ids,
                get-blob        ID2-Reply-Get-Blob,
                reget-blob      ID2-Reply-ReGet-Blob,
                get-split-info  ID2S-Reply-Get-Split-Info,
                get-chunk       ID2S-Reply-Get-Chunk
        },

        -- additional error flag if the reply is broken in the middle
        -- of transfer.
        -- 'last-octet-string', and 'nothing' mean that
        -- client may use ReGet request to get the remaining data.
        discard         ENUMERATED {
                reply             (0),  -- whole reply should be discarded
                last-octet-string (1),  -- all data in embedded ID2-Reply-Data
                                        -- except last OCTET STRING is correct
                nothing           (2)   -- all data in embedded ID2-Reply-Data
                                        -- is correct, but is incomplete
        } OPTIONAL
}


ID2-Error ::= SEQUENCE {
        severity        ENUMERATED {
                -- nothing harmful happened
                warning             (1) ,

                -- command cannot be completed this time
                failed-command      (2) ,

                -- connection cannot be reused, reconnect is required
                failed-connection   (3) ,

                -- server cannot be used for a while
                failed-server       (4) ,

                -- resolve request gives no data
                -- probably temporarily (see retry-delay field)
                no-data             (5) ,

                -- data exists but client doesn't have permission to get it
                restricted-data     (6) ,

                -- this request type is not supported by server
                unsupported-command (7) ,

                -- error in request packet, cannot retry
                invalid-arguments   (8)
        },

        -- client may retry the request after specified time in seconds 
        retry-delay INTEGER OPTIONAL,

        message VisibleString OPTIONAL
}


-- Reply to ID2-Request-Get-Packages.
ID2-Reply-Get-Package ::= SEQUENCE {
        name            VisibleString,

        params          ID2-Params OPTIONAL
}


-- Reply to ID2-Request-Get-Seq-id.
ID2-Reply-Get-Seq-id ::= SEQUENCE {
        -- copy of request
        request         ID2-Request-Get-Seq-id,

        -- resolved Seq-id
        -- not set if error occurred
        seq-id          SEQUENCE OF Seq-id OPTIONAL,

        -- this Seq-id is the last one in the request
        end-of-reply    NULL OPTIONAL
}


ID2-Blob-State ::= ENUMERATED {
        live            (0),
        suppressed-temp (1),
        suppressed      (2),
        dead            (3),
        protected       (4),
        withdrawn       (5)
}


-- Reply to ID2-Request-Get-Blob-Id.
ID2-Reply-Get-Blob-Id ::= SEQUENCE {
        -- requested Seq-id
        seq-id          Seq-id,

        -- result
        blob-id         ID2-Blob-Id OPTIONAL,

        -- version of split data
        -- (0 for non split)
        split-version   INTEGER DEFAULT 0,

        -- annotation types in this blob
        -- annotation are unknown if this field is omitted
        annot-info      SEQUENCE OF ID2S-Seq-annot-Info OPTIONAL,

        -- this Blob-id is the last one in the request
        end-of-reply    NULL OPTIONAL,

        -- state of the blob
        blob-state      ID2-Blob-State OPTIONAL
}


-- Reply to ID2-Request-Get-Blob-Info.
ID2-Reply-Get-Blob-Seq-ids ::= SEQUENCE {
        blob-id         ID2-Blob-Id,

        -- list of Seq-id resolving to this Blob-Id
        -- in compressed format
        ids             ID2-Reply-Data OPTIONAL
}


-- Reply to ID2-Request-Get-Blob-Info.
ID2-Reply-Get-Blob ::= SEQUENCE {
        blob-id         ID2-Blob-Id,

        -- version of split data
        -- (0 for non split)
        split-version   INTEGER DEFAULT 0,

        -- whole blob or blob skeleton
        -- not set if error occurred
        data            ID2-Reply-Data OPTIONAL
}


-- Reply to ID2-Request-Get-Blob-Info.
ID2S-Reply-Get-Split-Info ::= SEQUENCE {
        blob-id         ID2-Blob-Id,

        -- version of split data
        split-version   INTEGER,

        -- blob split info
        -- not set if error occurred
        data            ID2-Reply-Data OPTIONAL
}


-- Reply to ID2-Request-ReGet-Blob.
ID2-Reply-ReGet-Blob ::= SEQUENCE {
        blob-id         ID2-Blob-Id,

        -- version of data split
        split-version   INTEGER,

        -- offset of data
        offset          INTEGER,

        -- blob split info
        -- not set if error occurred
        data            ID2-Reply-Data OPTIONAL
}


-- Reply to ID2S-Request-Get-Chunks.
ID2S-Reply-Get-Chunk ::= SEQUENCE {
        blob-id         ID2-Blob-Id,

        -- id of chunk to send
        chunk-id        ID2S-Chunk-Id,

        -- chunk data
        -- not set if error occurred
        data            ID2-Reply-Data OPTIONAL
}


-- Data packing.
ID2-Reply-Data ::= SEQUENCE {
        -- index of negotiated types
        -- recommended types
        --   Seq-entry,
        --   ID2S-Split-Info,
        --   ID2S-Chunk
        data-type       INTEGER {
                seq-entry       (0),
                seq-annot       (1),
                id2s-split-info (2),
                id2s-chunk      (3)
        } DEFAULT seq-entry,

        -- serialization format (ASN.1 binary, ASN.1 text)
        -- index of negotiated formats
        data-format     INTEGER {
                asn-binary      (0),
                asn-text        (1),
                xml             (2)
        } DEFAULT asn-binary,

        -- post serialization compression (plain, gzip, etc.)
        -- index of negotiated compressions
        data-compression INTEGER {
                none            (0),
                gzip            (1),
                nlmzip          (2),
                bzip2           (3)
        } DEFAULT none,

        -- data blob
        data            SEQUENCE OF OCTET STRING
}


-- Data packed within ID2-Reply-Get-Blob-Seq-ids reply.
ID2-Blob-Seq-ids ::= SEQUENCE OF ID2-Blob-Seq-id


ID2-Blob-Seq-id ::= SEQUENCE {
        seq-id          Seq-id,
        
        -- this Seq-id is replaced by sequence in another blob
        replaced        NULL OPTIONAL
}


--*********************************************************************
-- utility types
--*********************************************************************


ID2-Blob-Id ::= SEQUENCE {
        sat             INTEGER,
        sub-sat         INTEGER {
                main        (0),
                snp         (1),
                snp-graph   (4),
                cdd         (8),
                mgc         (16),
                hprd        (32),
                sts         (64),
                trna        (128),
                exon        (512)
        } DEFAULT main,
        sat-key         INTEGER,
        -- version of blob, optional in some requests
        version         INTEGER OPTIONAL
}


ID2-Params ::= SEQUENCE OF ID2-Param


ID2-Param ::= SEQUENCE {
        name    VisibleString,
        value   SEQUENCE OF VisibleString OPTIONAL,
        type    ENUMERATED {
                -- no response expected
                set-value   (1),

                -- this option is for client only
                -- server replies with its value of param if known
                -- server omits this param in reply if unknown to server
                get-value   (2),

                -- no direct response expected,
                -- but if the param or its value is not supported
                -- an error is reported and the request is not be completed
                force-value (3),

                -- use named package
                -- value should be unset
                use-package (4)
        } DEFAULT set-value
}

END

-- insdseq.asn
--$Revision: 149845 $
--************************************************************************
--
-- ASN.1 and XML for the components of a GenBank/EMBL/DDBJ sequence record
-- The International Nucleotide Sequence Database (INSD) collaboration
-- Version 1.5, 15 January 2009
--
--************************************************************************

INSD-INSDSeq DEFINITIONS ::=
BEGIN

--  INSDSeq provides the elements of a sequence as presented in the
--    GenBank/EMBL/DDBJ-style flatfile formats, with a small amount of
--    additional structure.
--    Although this single perspective of the three flatfile formats
--    provides a useful simplification, it hides to some extent the
--    details of the actual data underlying those formats. Nevertheless,
--    the XML version of INSD-Seq is being provided with
--    the hopes that it will prove useful to those who bulk-process
--    sequence data at the flatfile-format level of detail. Further 
--    documentation regarding the content and conventions of those formats 
--    can be found at:
--
--    URLs for the DDBJ, EMBL, and GenBank Feature Table Document:
--    http://www.ddbj.nig.ac.jp/FT/full_index.html
--    http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html
--    http://www.ncbi.nlm.nih.gov/projects/collab/FT/index.html
--
--    URLs for DDBJ, EMBL, and GenBank Release Notes :
--    ftp://ftp.ddbj.nig.ac.jp/database/ddbj/ddbjrel.txt
--    http://www.ebi.ac.uk/embl/Documentation/Release_notes/current/relnotes.html
--    ftp://ftp.ncbi.nih.gov/genbank/gbrel.txt
--
--    Because INSDSeq is a compromise, a number of pragmatic decisions have
--    been made:
--
--  In pursuit of simplicity and familiarity a number of fields do not
--    have full substructure defined here where there is already a
--    standard flatfile format string. For example:
--
--   Dates:      DD-MON-YYYY (eg 10-JUN-2003)
--
--   Author:     LastName, Initials  (eg Smith, J.N.)
--            or Lastname Initials   (eg Smith J.N.)
--
--   Journal:    JournalName Volume (issue), page-range (year)
--            or JournalName Volume(issue):page-range(year)
--            eg Appl. Environ. Microbiol. 61 (4), 1646-1648 (1995)
--               Appl. Environ. Microbiol. 61(4):1646-1648(1995).
--
--  FeatureLocations are representated as in the flatfile feature table,
--    but FeatureIntervals may also be provided as a convenience
--
--  FeatureQualifiers are represented as in the flatfile feature table.
--
--  Primary has a string that represents a table to construct
--    a third party (TPA) sequence.
--
--  other-seqids can have strings with the "vertical bar format" sequence
--    identifiers used in BLAST for example, when they are non-INSD types.
--
--  Currently in flatfile format you only see Accession numbers, but there 
--    are others, like patents, submitter clone names, etc which will 
--    appear here
--
--  There are also a number of elements that could have been more exactly
--    specified, but in the interest of simplicity have been simply left as
--    optional. For example:
--
--  All publicly accessible sequence records in INSDSeq format will
--    include accession and accession.version. However, these elements are 
--    optional in optional in INSDSeq so that this format can also be used   
--    for non-public sequence data, prior to the assignment of accessions and 
--    version numbers. In such cases, records will have only "other-seqids".
--
--  sequences will normally all have "sequence" filled in. But contig records
--    will have a "join" statement in the "contig" slot, and no "sequence".
--    We also may consider a retrieval option with no sequence of any kind
--    and no feature table to quickly check minimal values.
--
--  Four (optional) elements are specific to records represented via the EMBL
--    sequence database: INSDSeq_update-release, INSDSeq_create-release,
--    INSDSeq_entry-version, and INSDSeq_database-reference.
--
--  One (optional) element is specific to records originating at the GenBank
--    and DDBJ sequence databases: INSDSeq_segment.
--
--********

INSDSet ::= SEQUENCE OF INSDSeq

INSDSeq ::= SEQUENCE {
    locus VisibleString ,
    length INTEGER ,
    strandedness VisibleString OPTIONAL ,
    moltype VisibleString ,
    topology VisibleString OPTIONAL ,
    division VisibleString ,
    update-date VisibleString ,
    create-date VisibleString OPTIONAL ,
    update-release VisibleString OPTIONAL ,
    create-release VisibleString OPTIONAL ,
    definition VisibleString ,
    primary-accession VisibleString OPTIONAL ,
    entry-version VisibleString OPTIONAL ,
    accession-version VisibleString OPTIONAL ,
    other-seqids SEQUENCE OF INSDSeqid OPTIONAL ,
    secondary-accessions SEQUENCE OF INSDSecondary-accn OPTIONAL,
    project VisibleString OPTIONAL ,
    keywords SEQUENCE OF INSDKeyword OPTIONAL ,
    segment VisibleString OPTIONAL ,
    source VisibleString OPTIONAL ,
    organism VisibleString OPTIONAL ,
    taxonomy VisibleString OPTIONAL ,
    references SEQUENCE OF INSDReference OPTIONAL ,
    comment VisibleString OPTIONAL ,
    tagset INSDTagset OPTIONAL ,
    primary VisibleString OPTIONAL ,
    source-db VisibleString OPTIONAL ,
    database-reference VisibleString OPTIONAL ,
    feature-table SEQUENCE OF INSDFeature OPTIONAL ,
    sequence VisibleString OPTIONAL ,  -- Optional for other dump forms
    contig VisibleString OPTIONAL
}

INSDSeqid ::= VisibleString

INSDSecondary-accn ::= VisibleString

INSDKeyword ::= VisibleString

-- INSDReference_position contains a string value indicating the
-- basepair span(s) to which a reference applies. The allowable
-- formats are:
-- 
--   X..Y  : Where X and Y are integers separated by two periods,
--           X >= 1 , Y <= sequence length, and X <= Y 
--
--           Multiple basepair spans can exist, separated by a
--           semi-colon and a space. For example : 10..20; 100..500
--             
--   sites : The string literal 'sites', indicating that a reference
--           provides sequence annotation information, but the specific
--           basepair spans are either not captured, or were too numerous
--           to record.
-- 
--           The 'sites' literal string is singly occuring, and
--            cannot be used in conjunction with any X..Y basepair spans.
-- 
--   References that lack an INSDReference_position element apply
--   to the entire sequence.

INSDAuthor ::= VisibleString

INSDReference ::= SEQUENCE {
    reference VisibleString ,
    position VisibleString OPTIONAL ,
    authors SEQUENCE OF INSDAuthor OPTIONAL ,
    consortium VisibleString OPTIONAL ,
    title VisibleString OPTIONAL ,
    journal VisibleString ,
    xref SET OF INSDXref OPTIONAL ,
    pubmed INTEGER OPTIONAL ,
    remark VisibleString OPTIONAL
}

-- INSDXref provides a method for referring to records in
-- other databases. INSDXref_dbname is a string value that
-- provides the name of the database, and INSDXref_dbname
-- is a string value that provides the record's identifier
-- in that database.

INSDXref ::= SEQUENCE {
    dbname VisibleString ,
    id VisibleString
}

-- INSDTagset is used for community-specific data elements
-- in a tag/value format.

INSDTagset ::= SEQUENCE {
    authority VisibleString OPTIONAL ,
    version VisibleString OPTIONAL ,
    url VisibleString OPTIONAL ,
    tags INSDTags OPTIONAL
}

INSDTags ::= SEQUENCE OF INSDTag

INSDTag ::= SEQUENCE {
    name VisibleString OPTIONAL ,
    value VisibleString OPTIONAL ,
    unit VisibleString OPTIONAL
}

-- INSDFeature_operator contains a string value describing
-- the relationship among a set of INSDInterval within
-- INSDFeature_intervals. The allowable formats are:
-- 
--   join :  The string literal 'join' indicates that the
--           INSDInterval intervals are biologically joined
--           together into a contiguous molecule.
-- 
--   order : The string literal 'order' indicates that the
--           INSDInterval intervals are in the presented
--           order, but they are not necessarily contiguous.
-- 
--   Either 'join' or 'order' is required if INSDFeature_intervals
--   is comprised of more than one INSDInterval .

INSDFeature ::= SEQUENCE {
    key VisibleString ,
    location VisibleString ,
    intervals SEQUENCE OF INSDInterval OPTIONAL ,
    operator VisibleString OPTIONAL ,
    partial5 BOOLEAN OPTIONAL ,
    partial3 BOOLEAN OPTIONAL ,
    quals SEQUENCE OF INSDQualifier OPTIONAL
}

-- INSDInterval_iscomp is a boolean indicating whether
-- an INSDInterval_from / INSDInterval_to location
-- represents a location on the complement strand.
-- When INSDInterval_iscomp is TRUE, it essentially
-- confirms that a 'from' value which is greater than
-- a 'to' value is intentional, because the location
-- is on the opposite strand of the presented sequence.

-- INSDInterval_interbp is a boolean indicating whether
-- a feature (such as a restriction site) is located
-- between two adjacent basepairs. When INSDInterval_iscomp
-- is TRUE, the 'from' and 'to' values must differ by
-- exactly one base.

INSDInterval ::= SEQUENCE {
    from INTEGER OPTIONAL ,
    to INTEGER OPTIONAL ,
    point INTEGER OPTIONAL ,
    iscomp BOOLEAN OPTIONAL ,
    interbp BOOLEAN OPTIONAL ,
    accession VisibleString
}

INSDQualifier ::= SEQUENCE {
    name VisibleString ,
    value VisibleString OPTIONAL
}

-- INSDTagsetRules defines mandatory, optional, and unique tags
-- for a given community's INSDTagset. If the tagset is extensible,
-- then additional tags which are not included in the list of
-- mandatory or optional tags may be present. The uniquetags
-- element provides a list of the tags that may occur only once
-- in a given tagset.

INSDTagsetRules ::= SEQUENCE {
    authority VisibleString OPTIONAL ,
    version VisibleString OPTIONAL ,
    mandatorytags INSDTagNames OPTIONAL ,
    optionaltags INSDTagNames OPTIONAL ,
    uniquetags INSDTagNames OPTIONAL ,
    extensible BOOLEAN OPTIONAL
}

INSDTagNames ::= SEQUENCE OF VisibleString

INSDTagsetRuleSet ::= SEQUENCE OF INSDTagsetRules

END


-- medlars.asn
--$Revision: 6.0 $
--**********************************************************************
--
--  MEDLARS data definitions
--  Grigoriy Starchenko, 1997
--
--**********************************************************************

NCBI-Medlars DEFINITIONS ::=
BEGIN

EXPORTS Medlars-entry, Medlars-record;

IMPORTS PubMedId FROM NCBI-Biblio;

Medlars-entry ::= SEQUENCE {     -- a MEDLARS entry
    pmid PubMedId,               -- All entries in PubMed must have it
    muid INTEGER OPTIONAL,       -- Medline(OCCS) id
    recs SET OF Medlars-record   -- List of Medlars records
}

Medlars-record ::= SEQUENCE {
    code INTEGER,                -- Unit record field type integer form
    abbr VisibleString OPTIONAL, -- Unit record field type abbreviation form
    data VisibleString           -- Unit record data
}

END

-- medline.asn
--$Revision: 6.0 $
--**********************************************************************
--
--  MEDLINE data definitions
--  James Ostell, 1990
--
--  enhanced in 1996 to support PubMed records as well by simply adding
--    the PubMedId and making MedlineId optional
--
--**********************************************************************

NCBI-Medline DEFINITIONS ::=
BEGIN

EXPORTS Medline-entry, Medline-si;

IMPORTS Cit-art, PubMedId FROM NCBI-Biblio
        Date FROM NCBI-General;

                                -- a MEDLINE or PubMed entry
Medline-entry ::= SEQUENCE {
    uid INTEGER OPTIONAL ,      -- MEDLINE UID, sometimes not yet available if from PubMed
    em Date ,                   -- Entry Month
    cit Cit-art ,               -- article citation
    abstract VisibleString OPTIONAL ,
    mesh SET OF Medline-mesh OPTIONAL ,
    substance SET OF Medline-rn OPTIONAL ,
    xref SET OF Medline-si OPTIONAL ,
    idnum SET OF VisibleString OPTIONAL ,  -- ID Number (grants, contracts)
    gene SET OF VisibleString OPTIONAL ,
    pmid PubMedId OPTIONAL ,               -- MEDLINE records may include the PubMedId
    pub-type SET OF VisibleString OPTIONAL, -- may show publication types (review, etc)
    mlfield SET OF Medline-field OPTIONAL ,  -- additional Medline field types
    status INTEGER {
	publisher (1) ,      -- record as supplied by publisher
        premedline (2) ,     -- premedline record
        medline (3) } DEFAULT medline }  -- regular medline record

Medline-mesh ::= SEQUENCE {
    mp BOOLEAN DEFAULT FALSE ,       -- TRUE if main point (*)
    term VisibleString ,                   -- the MeSH term
    qual SET OF Medline-qual OPTIONAL }    -- qualifiers

Medline-qual ::= SEQUENCE {
    mp BOOLEAN DEFAULT FALSE ,       -- TRUE if main point
    subh VisibleString }             -- the subheading

Medline-rn ::= SEQUENCE {       -- medline substance records
    type ENUMERATED {           -- type of record
        nameonly (0) ,
        cas (1) ,               -- CAS number
        ec (2) } ,              -- EC number
    cit VisibleString OPTIONAL ,  -- CAS or EC number if present
    name VisibleString }          -- name (always present)

Medline-si ::= SEQUENCE {       -- medline cross reference records
    type ENUMERATED {           -- type of xref
        ddbj (1) ,              -- DNA Data Bank of Japan
        carbbank (2) ,          -- Carbohydrate Structure Database
        embl (3) ,              -- EMBL Data Library
        hdb (4) ,               -- Hybridoma Data Bank
        genbank (5) ,           -- GenBank
        hgml (6) ,              -- Human Gene Map Library
        mim (7) ,               -- Mendelian Inheritance in Man
        msd (8) ,               -- Microbial Strains Database
        pdb (9) ,               -- Protein Data Bank (Brookhaven)
        pir (10) ,              -- Protein Identification Resource
        prfseqdb (11) ,         -- Protein Research Foundation (Japan)
        psd (12) ,              -- Protein Sequence Database (Japan)
        swissprot (13) ,        -- SwissProt
        gdb (14) } ,            -- Genome Data Base
    cit VisibleString OPTIONAL }    -- the citation/accession number

Medline-field ::= SEQUENCE {
    type INTEGER {              -- Keyed type
	other (0) ,             -- look in line code
	comment (1) ,           -- comment line
        erratum (2) } ,         -- retracted, corrected, etc
    str VisibleString ,         -- the text
    ids SEQUENCE OF DocRef OPTIONAL }  -- pointers relevant to this text

DocRef ::= SEQUENCE {           -- reference to a document
    type INTEGER {
        medline (1) ,
        pubmed (2) ,
        ncbigi (3) } ,
    uid INTEGER }

END


-- mim.asn
--********************************************************************
--
--  MIM data definitions
--  Brandon Brylawski, 1996.
--  version 2.1
--
--********************************************************************

NCBI-Mim DEFINITIONS ::=
BEGIN

Mim-entries ::= SEQUENCE OF Mim-entry

Mim-set ::= SEQUENCE {
	releaseDate Mim-date ,
	mimEntries SEQUENCE OF Mim-entry }

Mim-entry ::= SEQUENCE {
	mimNumber VisibleString ,
	mimType INTEGER {
		none (0) ,
		star (1) ,
		caret (2) ,
		pound (3) ,
		plus (4) ,
		perc (5) } ,
	title VisibleString ,
	copyright VisibleString OPTIONAL ,
	symbol VisibleString OPTIONAL ,
	locus VisibleString OPTIONAL ,
	synonyms SEQUENCE OF VisibleString OPTIONAL ,
	aliases SEQUENCE OF VisibleString OPTIONAL ,
	included SEQUENCE OF VisibleString OPTIONAL ,
	seeAlso SEQUENCE OF Mim-cit OPTIONAL ,
	text SEQUENCE OF Mim-text OPTIONAL ,
	textfields SEQUENCE OF Mim-text OPTIONAL ,
	hasSummary BOOLEAN OPTIONAL ,
	summary SEQUENCE OF Mim-text OPTIONAL ,
	summaryAttribution SEQUENCE OF Mim-edit-item OPTIONAL ,
	summaryEditHistory SEQUENCE OF Mim-edit-item OPTIONAL ,
	summaryCreationDate Mim-edit-item OPTIONAL ,
	allelicVariants SEQUENCE OF Mim-allelic-variant OPTIONAL ,
	hasSynopsis BOOLEAN OPTIONAL ,
	clinicalSynopsis SEQUENCE OF Mim-index-term OPTIONAL ,
	synopsisAttribution SEQUENCE OF Mim-edit-item OPTIONAL ,
	synopsisEditHistory SEQUENCE OF Mim-edit-item OPTIONAL ,
	synopsisCreationDate Mim-edit-item OPTIONAL ,
	editHistory SEQUENCE OF Mim-edit-item OPTIONAL ,
	creationDate Mim-edit-item OPTIONAL ,
	references SEQUENCE OF Mim-reference OPTIONAL ,
	attribution SEQUENCE OF Mim-edit-item OPTIONAL ,
	numGeneMaps INTEGER ,
	medlineLinks Mim-link OPTIONAL ,
	proteinLinks Mim-link OPTIONAL ,
	nucleotideLinks Mim-link OPTIONAL ,
	structureLinks Mim-link OPTIONAL ,
	genomeLinks Mim-link OPTIONAL }

Mim-text ::= SEQUENCE {
	label VisibleString ,
	text VisibleString ,
	neighbors Mim-link OPTIONAL}

Mim-allelic-variant ::= SEQUENCE {
	number VisibleString ,
	name VisibleString  ,
	aliases SEQUENCE OF VisibleString  OPTIONAL ,
	mutation SEQUENCE OF Mim-text  OPTIONAL ,
	description SEQUENCE OF Mim-text OPTIONAL ,
	snpLinks Mim-link OPTIONAL }

Mim-link ::= SEQUENCE {
	num INTEGER ,
	uids VisibleString ,
	numRelevant INTEGER OPTIONAL }

Mim-author ::= SEQUENCE {
	name VisibleString ,
	index INTEGER }

Mim-cit ::= SEQUENCE {
	number INTEGER ,
	author VisibleString ,
	others VisibleString ,
	year INTEGER }

Mim-reference ::= SEQUENCE {
	number INTEGER ,
	origNumber INTEGER OPTIONAL ,
	type ENUMERATED {
		not-set (0) ,
		citation (1) ,
		book (2) ,
		personal-communication (3) ,
		book-citation (4) } OPTIONAL ,
	authors SEQUENCE OF Mim-author ,
	primaryAuthor VisibleString ,
	otherAuthors VisibleString ,
	citationTitle VisibleString ,
	citationType INTEGER OPTIONAL ,
	bookTitle VisibleString OPTIONAL ,
	editors SEQUENCE OF Mim-author OPTIONAL ,
	volume VisibleString OPTIONAL ,
	edition VisibleString OPTIONAL ,
	journal VisibleString OPTIONAL ,
	series VisibleString OPTIONAL ,
	publisher VisibleString OPTIONAL ,
	place VisibleString OPTIONAL ,
	commNote VisibleString OPTIONAL ,
	pubDate Mim-date ,
	pages SEQUENCE OF Mim-page OPTIONAL ,
	miscInfo VisibleString OPTIONAL ,
	pubmedUID INTEGER OPTIONAL ,
	ambiguous BOOLEAN ,
	noLink BOOLEAN OPTIONAL }

Mim-index-term ::= SEQUENCE {
	key VisibleString ,
	terms SEQUENCE OF VisibleString }

Mim-edit-item ::= SEQUENCE {
	author VisibleString ,
	modDate Mim-date }

Mim-date ::= SEQUENCE {
	year INTEGER ,
	month INTEGER OPTIONAL ,
	day INTEGER OPTIONAL }

Mim-page ::= SEQUENCE {
	from VisibleString ,
	to VisibleString OPTIONAL }

END


-- mla.asn
--$Revision: 6.1 $
--********************************************************************
--
--  Network MEDLINE Archive message formats
--  Ostell 1993
--
--
--*********************************************************************
--
--  mla.asn
--
--     messages for medline archive data access
--
--*********************************************************************

NCBI-MedArchive DEFINITIONS ::=
BEGIN

IMPORTS Medline-entry FROM NCBI-Medline
        Medlars-entry FROM NCBI-Medlars
        Pubmed-entry FROM NCBI-PubMed
        Medline-si FROM NCBI-Medline
        Pub FROM NCBI-Pub
        Title, PubMedId FROM NCBI-Biblio;

        --**********************************
        -- requests
        --

Mla-request ::= CHOICE {
    init [0] NULL,              -- DlInit
    getmle [1] INTEGER,         -- get MedlineEntry
    getpub [2] INTEGER,         -- get citation by muid
    gettitle [3] Title-msg,     -- match titles
    citmatch [4] Pub,           --
    fini [5] NULL,              -- DlFini
    getmriuids [6] INTEGER,     -- Get MUIDs for an MRI
    getaccuids [7] Medline-si,  -- Get MUIDs for an Accessions
    uidtopmid [8] INTEGER,      -- get PMID for MUID
    pmidtouid [9] PubMedId,     -- get MUID for PMID
    getmlepmid [10] PubMedId,   -- get MedlineEntry by PubMed id
    getpubpmid [11] PubMedId,   -- get citation by PubMed id
    citmatchpmid [12] Pub,      -- citation match, PMID on out
    getmripmids [13] INTEGER,   -- get PMIDs for an MRI
    getaccpmids [14] Medline-si,-- get PMIDs for an Accessions
    citlstpmids [15] Pub,       -- generate list of PMID for Pub
    getmleuid [16] INTEGER,     -- get MedlineEntry by Medline id
    getmlrpmid [17] PubMedId,   -- get MedlarsEntry by PubMed id
    getmlruid [18] INTEGER      -- get MedlarsEntry by Medline id
    }

--**********************************************************************
--
--  if request = all
--	if one row returned
--	   reply=all, return every column
--	else 
--	   reply=ml-jta for each row
--
--  if request = not-set, reply=ml-jta
--
--  otherwise,
--	if request != ml-jta
--	   if column exist, reply=column, else reply=ml-jta
--
--**********************************************************************

Title-type ::= ENUMERATED {
    not-set (0),                -- request=ml-jta (default), reply=not-found
    name (1),
    tsub (2),
    trans (3),
    jta (4),
    iso-jta (5),
    ml-jta (6),
    coden (7),
    issn (8),
    abr (9),
    isbn (10),
    all (255)
    }

Title-msg ::= SEQUENCE {         -- Title match request/response
    type Title-type,             -- type to get, or type returned
    title Title                  -- title(s) to look up, or title(s) found
    }

Title-msg-list ::= SEQUENCE {
    num INTEGER,                 -- number of titles
    titles SEQUENCE OF Title-msg
    }

Error-val ::= ENUMERATED {
    not-found (0),               -- Entry was not found
    operational-error (1),       -- A run-time operation error was occurred
    cannot-connect-jrsrv (2),    -- Cannot connect to Journal server
    cannot-connect-pmdb (3),     -- Cannot connect to PubMed
    journal-not-found (4),       -- Journal title not found
    citation-not-found (5),      -- Volume, Page and Author do not match any
                                 -- article
    citation-ambiguous (6),      -- More than one article found
    citation-too-many (7)        -- Too many article was found
    }

Mla-back ::= CHOICE {
    init [0] NULL,                   -- DlInit
    error [1] Error-val,             -- not found for getmle/getpub/citmatch
    getmle [2] Medline-entry,        -- got Medline Entry
    getpub [3] Pub,
    gettitle [4] Title-msg-list,     -- match titles
    citmatch [5] INTEGER,            -- citation lookup muid or 0
    fini [6] NULL,                   -- DlFini
    getuids [7] SEQUENCE OF INTEGER, -- got a set of MUIDs
    getpmids [8] SEQUENCE OF INTEGER,-- got a set of PMIDs
    outuid [9] INTEGER,              -- result muid or 0 if not found
    outpmid [10] PubMedId,           -- result pmid or 0 if not found
    getpme [11] Pubmed-entry,        -- got Pubmed Entry
    getmlr [12] Medlars-entry        -- got Medlars Entry
    }

END

-- mmdb1.asn
--$Revision: 6.1 $
--**********************************************************************
--
--  Biological Macromolecule 3-D Structure Data Types for MMDB,
--                A Molecular Modeling Database
--
--  Definitions for a biomolecular assembly and the MMDB database
--
--  By Hitomi Ohkawa, Jim Ostell, Chris Hogue, and Steve Bryant 
--
--  National Center for Biotechnology Information
--  National Institutes of Health
--  Bethesda, MD 20894 USA
--
--  July 1995
--
--**********************************************************************

-- Contents of the MMDB database are currently based on files distributed by
-- the Protein Data Bank, PDB.  These data are changed in form, as described
-- in this specification. To some extent they are also changed in content, in 
-- that many data items implicit in PDB are made explicit, and others are
-- corrected or omitted as a consequence of validation checks.  The semantics
-- of MMDB data items are indicated by comments within the specification below.
-- These comments explain in detail the manner in which data items from  PDB 
-- have been mapped into MMDB. 

MMDB DEFINITIONS ::=

BEGIN

EXPORTS Biostruc, Biostruc-id, Biostruc-set, Biostruc-annot-set,
	Biostruc-residue-graph-set;

IMPORTS Biostruc-graph, Biomol-descr, Residue-graph FROM MMDB-Chemical-graph 
	Biostruc-model FROM MMDB-Structural-model
	Biostruc-feature-set FROM MMDB-Features
	Pub FROM NCBI-Pub
	Date, Object-id, Dbtag FROM NCBI-General;

-- A structure report or "biostruc" describes the components of a biomolecular 
-- assembly in terms of their names and descriptions, and a chemical graph 
-- giving atomic formula, connectivity and chirality. It also gives one or more
-- three-dimensional model structures, literally a mapping of the atoms, 
-- residues and/or molecules of each component into a measured three-
-- dimensional space. Structure may also be described by named features, which 
-- associate nodes in the chemical graph, or regions in space, with text or 
-- numeric descriptors.

-- Note that a biostruc may also contain cross references to other databases,
-- including citations to relevant scientific literature. These cross 
-- references use object types from other NCBI data specifications, which are 
-- "imported" into MMDB, and not repeated in this specification. 

Biostruc ::= SEQUENCE {
	id			SEQUENCE OF Biostruc-id,
	descr			SEQUENCE OF Biostruc-descr OPTIONAL,
	chemical-graph		Biostruc-graph,
	features		SEQUENCE OF Biostruc-feature-set OPTIONAL,
	model			SEQUENCE OF Biostruc-model OPTIONAL }

-- A Biostruc-id is a collection identifiers for the molecular assembly.
-- Mmdb-id's are NCBI-assigned, and are intended to be unique and stable 
-- identifiers.  Other-id's are synonyms.

Biostruc-id ::= CHOICE {
	mmdb-id			Mmdb-id,
	other-database		Dbtag,
	local-id		Object-id }

Mmdb-id ::= INTEGER


-- The description of a biostruc refers to both the reported chemical and 
-- spatial structure of a biomolecular assembly.  PDB-derived descriptors
-- which refer specifically to the chemical components or spatial structure
-- are not provided here, but instead as descriptors of the biostruc-graph or 
-- biostruc-model. For PDB-derived structures the biostruc name is the PDB 
-- id-code.  PDB-derived citations appear as publications within the biostruc 
-- description, and include a data-submission citation derived from PDB AUTHOR 
-- records.  Citations are described using the NCBI Pub specification.

Biostruc-descr ::= CHOICE {
	name			VisibleString,
	pdb-comment		VisibleString,
	other-comment		VisibleString,
	history			Biostruc-history, 
	attribution		Pub }


-- The history of a biostruc indicates it's origin and it's update history
-- within MMDB, the NCBI-maintained molecular structure database.  

Biostruc-history ::= SEQUENCE {
	replaces		Biostruc-replace OPTIONAL,
	replaced-by		Biostruc-replace OPTIONAL,
	data-source		Biostruc-source OPTIONAL }

Biostruc-replace ::= SEQUENCE {
	id			Biostruc-id,
	date			Date }

-- The origin of a biostruc is a reference to another database.  PDB release 
-- date and PDB-assigned id codes are recorded here, as are the PDB-assigned 
-- entry date and replacement history.

Biostruc-source ::= SEQUENCE {
	name-of-database	VisibleString,
	version-of-database	CHOICE {
		release-date		Date,
		release-code		VisibleString } OPTIONAL,
	database-entry-id	Biostruc-id,
	database-entry-date	Date,
	database-entry-history	SEQUENCE OF VisibleString OPTIONAL}


-- A biostruc set is a means to collect ASN.1 data for many biostrucs in 
-- one file, as convenient for application programs.  The object type is not
-- inteded to imply similarity of the biostrucs grouped together.

Biostruc-set ::= SEQUENCE {
	id		SEQUENCE OF Biostruc-id OPTIONAL,
	descr		SEQUENCE OF Biostruc-descr OPTIONAL,
	biostrucs	SEQUENCE OF Biostruc }


-- A biostruc annotation set is a means to collect ASN.1 data for biostruc
-- features into one file. The object type is intended as a means to store 
-- feature annotation of similar type, such as "core" definitions for a 
-- threading program, or structure-structure alignments for a structure-
-- similarity browser.

Biostruc-annot-set ::= SEQUENCE {
	id		SEQUENCE OF Biostruc-id OPTIONAL,
	descr		SEQUENCE OF Biostruc-descr OPTIONAL,
	features	SEQUENCE OF Biostruc-feature-set }


-- A biostruc residue graph set is a collection of residue graphs.  The object
-- type is intended as a means to record dictionaries containing the chemical
-- subgraphs of "standard" residue types, which are used as a means to 
-- simplify discription of the covalent structure of a biomolecular assembly.
-- The standard residue graph dictionary supplied with the MMDB database 
-- contains 20 standard L amino acids and 8 standard ribonucleotide groups. 
-- These graphs are complete, including explicit hydrogen atoms and separate 
-- instances for the terminal polypeptide and polynucleotide residues. 

Biostruc-residue-graph-set ::= SEQUENCE {
	id			SEQUENCE OF Biostruc-id OPTIONAL,
	descr			SEQUENCE OF Biomol-descr OPTIONAL,
	residue-graphs		SEQUENCE OF Residue-graph }

END


--**********************************************************************
--
--  Biological Macromolecule 3-D Structure Data Types for MMDB,
--                A Molecular Modeling Database
--
--  Definitions for a chemical graph
--
--  By Hitomi Ohkawa, Jim Ostell, Chris Hogue and Steve Bryant 
--
--  National Center for Biotechnology Information
--  National Institutes of Health
--  Bethesda, MD 20894 USA
--
--  July, 1995
--
--**********************************************************************

MMDB-Chemical-graph DEFINITIONS ::=

BEGIN

EXPORTS Biostruc-graph, Biomol-descr, Residue-graph,
	Molecule-id, PCSubstance-id, Residue-id, Atom-id;

IMPORTS Pub FROM NCBI-Pub
	BioSource FROM NCBI-BioSource
	Seq-id FROM NCBI-Seqloc
	Biostruc-id FROM MMDB;

-- A biostruc graph contains the complete chemical graph of the biomolecular 
-- assembly.  The assembly graph is defined hierarchically, in terms of 
-- subgraphs graphs of component molecules.  For PDB-derived biostrucs,
-- the molecules forming the assembly are the individual biopolymer chains and 
-- any non-polymer or "heterogen" groups which are present. 

-- The PDB-derived  "compound name" field appears as the name within the
-- biostruc-graph description.  PDB "class" and "source" fields appear as 
-- explicit attributes.  PDB-derived structures are assigned an assembly type 
-- of "other" unless they have been further classified as the "physiological
-- form" or "crystallographic cell" contents.  If they have, the source of the 
-- type classification appears as a citation within the  assembly description. 

-- Note that the biostruc-graph also includes as literals the subgraphs of 
-- any nonstandard residues present within it. For PDB-derived biostrucs these 
-- subgraphs are constructed automatically, with validation as described below.

Biostruc-graph ::= SEQUENCE {
	descr			SEQUENCE OF Biomol-descr OPTIONAL,
	molecule-graphs		SEQUENCE OF Molecule-graph,
	inter-molecule-bonds	SEQUENCE OF Inter-residue-bond OPTIONAL,
	residue-graphs		SEQUENCE OF Residue-graph OPTIONAL }

-- A biomolecule description refers to the chemical structure of a molecule or 
-- component substructures.  This descriptor type is used at the level of
-- assemblies, molecules and residues, and also for residue-graph dictionaries.
-- The BioSource object type is drawn from NCBI taxonomy data specifications,
-- and is not repeated here.

Biomol-descr ::= CHOICE {
	name			VisibleString,
	pdb-class		VisibleString,
	pdb-source		VisibleString,
	pdb-comment		VisibleString,
	other-comment		VisibleString,
	organism		BioSource,
	attribution		Pub,
	assembly-type		INTEGER {	physiological-form(1),
						crystallographic-cell(2),
						other(255) },
	molecule-type		INTEGER {	dna(1),
						rna(2),
						protein(3),
						other-biopolymer(4),
						solvent(5),
						other-nonpolymer(6),
						other(255) } }

-- A molecule chemical graph is defined by a sequence of residues.  Nonpolymers
-- are described in the same way, but may contain only a single residue.  

-- Biopolymer molecules are identified within PDB entries according to their
-- appearance on SEQRES records, which formally define a biopolymer as such. 
-- Biopolymers are defined by the distinction between ATOM and HETATM 
-- coordinate records only in cases where the chemical sequence from SEQRES
-- is in conflict with coordinate data. The PDB-assigned chain code appears as 
-- the name within the molecule descriptions of the biopolymers.

-- Nonpolymer molecules from PDB correspond to individual HETEROGEN groups, 
-- excluding any HETEROGEN groups which represent modified biopolymer residues.
-- These molecules are named according to the chain, residue type and residue 
-- number fields as assigned by PDB. Any description appearing in the PDB HET 
-- record appears as a pdb-comment within the molecule description. 

-- Molecule types for PDB-derived molecule graphs are assigned by matching 
-- residue and atom names against the PDB-documented standard types for protein,
-- DNA and RNA, and against residue codes commonly used to indicate solvent.
-- Classification is by "majority rule". If more than half of the residues in
-- a biopolymer are standard groups of one type, then the molecule is of that 
-- type, and otherwise classified as "other". Note that this classification does
-- not preclude the presence of modified residues, but insists they constitute 
-- less than half the biopolymer. Non-polymers are classified only as "solvent"
-- or "other".  

-- Note that a molecule graph may also contain a set of cross references 
-- to biopolymer sequence databases.  All biopolymer molecules in MMDB contain 
-- appropriate identifiers for the corresponding entry in the NCBI-Sequences 
-- database, in particular the NCBI "gi" number, which may be used for sequence
-- retrieval. The Seq-id object type is defined in the NCBI molecular sequence 
-- specification, and not repeated here.

Molecule-graph ::= SEQUENCE {
	id			Molecule-id,
	descr			SEQUENCE OF Biomol-descr OPTIONAL,
	seq-id			Seq-id OPTIONAL,
	residue-sequence	SEQUENCE OF Residue,
	inter-residue-bonds	SEQUENCE OF Inter-residue-bond OPTIONAL, 
	sid                     PCSubstance-id OPTIONAL }
   
Molecule-id ::= INTEGER

-- Pubchem substance id

PCSubstance-id ::= INTEGER

-- Residues may be assigned a text-string name as well as an id number. PDB 
-- assigned residue numbers appear as the residue name.

Residue ::= SEQUENCE {
	id			Residue-id,
	name			VisibleString OPTIONAL,
	residue-graph		Residue-graph-pntr }

Residue-id ::= INTEGER


-- Residue graphs from different sources may be referenced within a molecule
-- graph.  The allowed choices are the nonstandard residue graphs included in 
-- the present biostruc, residue graphs within other biostrucs, or residue 
-- graphs within tables of standard residue definitions.

Residue-graph-pntr ::= CHOICE {
	local			Residue-graph-id,
	biostruc		Biostruc-graph-pntr,
	standard		Biostruc-residue-graph-set-pntr }
	
Biostruc-graph-pntr ::= SEQUENCE {
	biostruc-id		Biostruc-id,
	residue-graph-id	Residue-graph-id }

Biostruc-residue-graph-set-pntr ::= SEQUENCE {
	biostruc-residue-graph-set-id	Biostruc-id,
	residue-graph-id		Residue-graph-id } 


-- Residue graphs define atomic formulae, connectivity, chirality, and names.
-- For standard residue graphs from the MMDB dictionary the PDB-assigned 
-- residue-type code appears as the name within the residue graph description,
-- and the full trivial name of the residue as a comment within that 
-- description.  For any nonstandard residue graphs provided with an MMDB 
-- biostruc the PDB-assigned residue-type code similarly appears as the name 
-- within the description, and any information provided on PDB HET records as 
-- a pdb-comment within that description.  

-- Note that nonstandard residue graphs for a PDB-derived biostruc may be 
-- incomplete. Current PDB format cannot represent connectivity for groups 
-- which are disordered, and for which no coordinates are given.  In these 
-- cases the residue graph defined in MMDB represents only the subgraph that 
-- could be identified from available ATOM, HETATM and CONECT records.

Residue-graph ::= SEQUENCE {
	id			Residue-graph-id,
	descr			SEQUENCE OF Biomol-descr OPTIONAL,
	residue-type		INTEGER {	deoxyribonucleotide(1),
						ribonucleotide(2),
						amino-acid(3),
						other(255) } OPTIONAL,
	iupac-code		SEQUENCE OF VisibleString OPTIONAL,
	atoms			SEQUENCE OF Atom,
	bonds			SEQUENCE OF Intra-residue-bond,
	chiral-centers		SEQUENCE OF Chiral-center OPTIONAL }
	
Residue-graph-id ::= INTEGER

-- Atoms in residue graphs are defined by elemental symbols and names.  PDB-
-- assigned atom names appear here in the name field, except in cases of known 
-- PDB synonyms.  In these cases atom names are mapped to the names used in the
-- MMDB standard dictionary. This occurs primarily for hydrogen atoms, where 
-- PDB practice allows synonyms for several atom types.  For PDB atoms the 
-- elemental symbol is obtained by parsing the PDB atom name field, allowing 
-- for known special-semantics cases where the atom name does not follow the
-- documented encoding rule.  Ionizable protons are identified within standard 
-- residue graphs in the MMDB dictionary, but not within automatically-defined
-- nonstandard graphs.

Atom ::= SEQUENCE {
	id			Atom-id,
	name			VisibleString OPTIONAL,
	iupac-code		SEQUENCE OF VisibleString OPTIONAL,
	element			ENUMERATED {
				h(1),   he(2),  li(3),  be(4),  b(5), 
				c(6),   n(7),   o(8),   f(9),   ne(10), 
				na(11), mg(12), al(13), si(14), p(15), 
				s(16),  cl(17), ar(18), k(19),  ca(20), 
				sc(21), ti(22), v(23),  cr(24), mn(25), 
				fe(26), co(27), ni(28), cu(29), zn(30), 
				ga(31), ge(32), as(33), se(34), br(35), 
				kr(36), rb(37), sr(38), y(39),  zr(40),
				nb(41), mo(42), tc(43), ru(44), rh(45),
				pd(46), ag(47), cd(48), in(49), sn(50),
				sb(51), te(52), i(53),  xe(54), cs(55),
				ba(56), la(57), ce(58), pr(59), nd(60),
				pm(61), sm(62), eu(63), gd(64), tb(65),
				dy(66), ho(67), er(68), tm(69), yb(70),
				lu(71), hf(72), ta(73), w(74),  re(75),
				os(76), ir(77), pt(78), au(79), hg(80),
				tl(81), pb(82), bi(83), po(84), at(85),
				rn(86), fr(87), ra(88), ac(89), th(90),
				pa(91), u(92),  np(93), pu(94), am(95),
				cm(96), bk(97), cf(98), es(99), 
				fm(100), md(101), no(102), lr(103),
				other(254), unknown(255) },
	ionizable-proton	ENUMERATED {
					true(1),
					false(2),
					unknown(255) } OPTIONAL }
	
Atom-id ::= INTEGER

-- Intra-residue-bond specifies connectivity between atoms in Residue-graph.
-- Unlike Inter-residue-bond defined later, its participating atoms are part of
-- a residue subgraph dictionary, not part of a specific biostruc-graph.

-- For residue graphs in the standard MMDB dictionary bonds are defined from
-- the known chemical structures of amino acids and nucleotides.  For 
-- nonstandard residue graphs bonds are defined from PDB CONECT records, with
-- validation for consistency with coordinate data, and from stereochemical
-- calculation to identify unreported bonds.  Validation and bond identification
-- are based on comparison of inter-atomic distances to the sum of covalent
-- radii for the corresponding elements. 

Intra-residue-bond ::= SEQUENCE {
	atom-id-1		Atom-id,
	atom-id-2		Atom-id,
	bond-order		INTEGER {
					single(1), 
					partial-double(2),
					aromatic(3), 
					double(4),
					triple(5),
					other(6),
					unknown(255)} OPTIONAL }

-- Chiral centers are atoms with tetrahedral geometry.  Chirality is defined
-- by a chiral volume involving the chiral center and 3 other atoms bonded to 
-- it.  For any coordinates assigned to atoms c, n1, n2, and n3, the vector 
-- triple product (n1-c) dot ( (n2-c) cross (n3-c) ) must have the indicated
-- sign.  The calculation assumes an orthogonal right-handed coordinate system
-- as is used for MMDB model structures.  

-- Chirality is defined for standard residues in the MMDB dictionary, but is 
-- not assigned automatically for PDB-derived nonstandard residues. If assigned
-- for nonstandard residues, the source of chirality information is described 
-- by a citation within the residue description.

Chiral-center ::= SEQUENCE {
	c			Atom-id,
	n1			Atom-id,
	n2			Atom-id,
	n3			Atom-id,
	sign			ENUMERATED { positive(1),
					     negative(2) } }

-- Inter-residue bonds are defined by a reference to two atoms. For PDB-derived 
-- structures bonds are identified from biopolymer connectivity according to
-- SEQRES and from other connectivity information on SSBOND and CONECT 
-- records. These data are validated and unreported bonds identified by
-- stereochemical calculation, using the same criteria as for intra-residue 
-- bonds.

Inter-residue-bond ::= SEQUENCE {
	atom-id-1		Atom-pntr,
	atom-id-2		Atom-pntr,
	bond-order		INTEGER {
					single(1), 
					partial-double(2),
					aromatic(3), 
					double(4),
					triple(5),
					other(6),
					unknown(255)} OPTIONAL }

-- Atoms, residues and molecules within the current biostruc are referenced 
-- by hierarchical pointers.

Atom-pntr ::= SEQUENCE {
	molecule-id		Molecule-id,
	residue-id		Residue-id,
	atom-id			Atom-id }

Atom-pntr-set ::= SEQUENCE OF Atom-pntr

END

-- mmdb2.asn
--$Revision: 6.0 $
--**********************************************************************
--
--  Biological Macromolecule 3-D Structure Data Types for MMDB,
--                A Molecular Modeling Database
--
--  Definitions for structural models
--
--  By Hitomi Ohkawa, Jim Ostell, Chris Hogue and Steve Bryant 
--
--  National Center for Biotechnology Information
--  National Institutes of Health
--  Bethesda, MD 20894 USA
--
--  July, 1996
--
--**********************************************************************

MMDB-Structural-model DEFINITIONS ::=

BEGIN

EXPORTS Biostruc-model, Model-id, Model-coordinate-set-id;

IMPORTS Chem-graph-pntrs, Atom-pntrs, Chem-graph-alignment,
	Sphere, Cone, Cylinder, Brick, Transform FROM MMDB-Features
	Biostruc-id FROM MMDB
	Pub FROM NCBI-Pub;

-- A structural model maps chemical components into a measured three-
-- dimensional space. PDB-derived biostrucs generally contain 4 models, 
-- corresponding to "views" of the structure of a biomolecular assemble with 
-- increasing levels of complexity.  Model types indicate the complexity of the
-- view.  

-- The model named "NCBI all atom" represents a view suitable for most 
-- computational biology applications.  It provides complete atomic coordinate 
-- data for a "single best" model, omitting statistical disorder information 
-- and/or ensemble structure descriptions provided in the source PDB file.  
-- Construction of the single best model is based on the assumption that the 
-- contents of the "alternate conformation" field from pdb imply no correlation
-- among the occupancies of multiple sites assigned to sets of atoms: the best 
-- site is chosen only on the basis of highest occupancy. Note, however, that 
-- alternate conformation sets where correlation is implied are generally 
-- constrained in crystallographic refinement to have uniform occupancy, and 
-- will thus be selected as a set. For ensemble models the model which assigns 
-- coordinates to the most atoms is chosen.  If numbers of coordinates are the 
-- same, the model occurring first in the PDB file is selected.  The single 
-- best model includes complete coordinates for all nonpolymer components, but 
-- omits those classified as "solvent".  Model type is 3 for this model. 

-- The model named "NCBI backbone" represents a simple view intended for 
-- graphic displays and rapid transmission over a network.  It includes only 
-- alpha carbon or backbone phosphate coordinates for biopolymers. It is based 
-- on selection of alpha-carbon and backbone phosphate atoms from the "NCBI
-- all atom" model. The model type is set to 2.  An even simpler model gives 
-- only a cartoon representation, using cylinders corresponding to secondary 
-- structure elements.  This is named "NCBI vector", and has model type 1.

-- The models named "PDB Model 1", "PDB Model 2", etc. represent the complete
-- information provided by PDB, including full descriptions of statistical
-- disorder.  The name of the model is based on the contents of the PDB MODEL
-- record, with a default name of "PDB Model 1" for PDB files which contain 
-- only a single model.  Construction of these models is based on the 
-- assumption that contents of the PDB "alternate conformation" field are 
-- intended to imply correlation among the occupancies of atom sets flagged by
-- the same identifier.  The special flag " " (blank) is assumed to indicate 
-- sites occupied in all alternate conformations, and sites flagged otherwise,
-- together with " ", to indicate a distinct member of an ensemble of 
-- alternate conformations.  Note that construction of ensemble members 
-- according to these assumption requires two validation checks on PDB 
-- "alternate conformation" flags: they must be unique among sites assigned to 
-- the same atom, and that the special " " flag must occur only for unique
-- sites.  Sites which violate the first check are flagged as "u", for 
-- "unknown"; they are omitted from all ensemble definitions but are 
-- nontheless retained in the coordinate list.  Sites which violate the second
-- check are flagged "b" for "blank", and are included in an appropriately
-- named ensemble.  The model type for pdb all models is 4.

-- Note that in the MMDB database models are stored in the ASN.1 stream in
-- order of increasing model type value.  Since models occur as the last item
-- in a biostruc, parsers may avoid reading the entire stream if the desired
-- model is one of the simplified types, which occur first in the stream. This
-- can save considerable I/O time, particularly for large ensemble models from 
-- NMR determinations.

Biostruc-model ::= SEQUENCE {
	id			Model-id,
	type			Model-type,
	descr			SEQUENCE OF Model-descr OPTIONAL,
	model-space		Model-space OPTIONAL,
	model-coordinates	SEQUENCE OF Model-coordinate-set OPTIONAL }

Model-id ::= INTEGER

Model-type ::= INTEGER {
	ncbi-vector(1),
	ncbi-backbone(2),
	ncbi-all-atom(3),
	pdb-model(4),
	other(255)}

Model-descr ::= CHOICE {
	name			VisibleString,
	pdb-reso                VisibleString,
	pdb-method              VisibleString,
	pdb-comment		VisibleString,
	other-comment		VisibleString,
	attribution		Pub }

-- The model space defines measurement units and any external reference frame.
-- Coordinates refer to a right-handed orthogonal system defined on axes 
-- tagged x, y and z in the coordinate and feature definitions of a biostruc.
-- Coordinates from PDB-derived structures are reported without change, in
-- angstrom units.  The units of temperature and occupancy factors are not
-- defined explicitly in PDB, but are inferred from their value range.

Model-space ::= SEQUENCE {
	coordinate-units	ENUMERATED {
					angstroms(1),
					nanometers(2),
					other(3),
					unknown(255)},
	thermal-factor-units	ENUMERATED {
					b(1),
					u(2),
					other(3),
					unknown(255)} OPTIONAL,
	occupancy-factor-units	ENUMERATED {
					fractional(1),
					electrons(2),
					other(3),
					unknown(255)} OPTIONAL,
	density-units		ENUMERATED {
					electrons-per-unit-volume(1),
					arbitrary-scale(2),
					other(3),
					unknown(255)} OPTIONAL,
	reference-frame		Reference-frame OPTIONAL }

-- An external reference frame is a pointer to another biostruc, with an 
-- optional operator to rotate and translate coordinates into its model space.
-- This item is intended for representation of homology-derived model 
-- structures, and is not present for structures from PDB.

Reference-frame ::= SEQUENCE {
	biostruc-id		Biostruc-id,
	rotation-translation	Transform OPTIONAL }

-- Atomic coordinates may be assigned literally or by reference to another
-- biostruc.  The reference coordinate type is used to represent homology-
-- derived model structures.  PDB-derived structures have literal coordinates.

-- Referenced coordinates identify another biostruc, any transformation to be 
-- applied to coordinates from that biostruc, and a mapping of the chemical
-- graph of the present biostruc onto that of the referenced biostruc.  They
-- give an "alignment" of atoms in the current biostruc with those in another,
-- from which the coordinates of matched atoms may be retrieved.  For non-
-- atomic models "alignment" may also be represented by molecule and residue
-- equivalence lists.  Referenced coordinates are a data item inteded for 
-- representation of homology models, with an explicit pointer to their source
-- information. They do not occur in PDB-derived models.

Model-coordinate-set ::= SEQUENCE {
	id			Model-coordinate-set-id OPTIONAL,
	descr			SEQUENCE OF Model-descr OPTIONAL,
	coordinates		CHOICE {
		literal			Coordinates,
		reference		Chem-graph-alignment } }
	
Model-coordinate-set-id ::= INTEGER


-- Literal coordinates map chemical components into the model space.  Three 
-- mapping types are allowed, atomic coordinate models, density-grid models,
-- and surface models. A model consists of a sequence of such coordinate sets, 
-- and may thus combine coordinate subsets which have a different source.  
-- PDB-derived models contain a single atomic coordinate set, as they by
-- definition represent information from a single source.

Coordinates ::= CHOICE {		
	atomic			Atomic-coordinates,
	surface			Surface-coordinates,
	density			Density-coordinates }

-- Literal atomic coordinate values give location, occupancy and order
-- parameters, and a pointer to a specific atom defined in the biostruc graph.
-- Temperature and occupancy factors have their conventional crystallographic
-- definitions, with units defined in the model space declaration.  Atoms,
-- sites, temperature-factors, occupancies and alternate-conformation-ids
-- are parallel arrays, i.e. the have the same number of values as given by
-- number-of-points. Conformation ensembles represent distinct correlated-
-- disorder subsets of the coordinates.  They will be present only for certain 
-- "views" of PDB structures, as described above. Their derivation from PDB-
-- supplied "alternate-conformation" ids is described below. 

Atomic-coordinates ::= SEQUENCE {
	number-of-points	INTEGER,
	atoms			Atom-pntrs,
	sites			Model-space-points,
	temperature-factors	Atomic-temperature-factors OPTIONAL,
	occupancies		Atomic-occupancies OPTIONAL, 
	alternate-conf-ids	Alternate-conformation-ids OPTIONAL,
	conf-ensembles		SEQUENCE OF Conformation-ensemble OPTIONAL }

-- The atoms whose location is described by each coordinate are identified
-- via a hierarchical pointer to the chemical graph of the biomolecular
-- assembly.  Coordinates may be matched with atoms in the chemical structure
-- by the values of the molecule, residue and atom id's given here,  which 
-- match exactly the items of the same type defined in Biostruc-graph.

-- Coordinates are given as integer values, with a scale factor to convert 
-- to real values for each x, y or z, in the units indicated in model-space.
-- Integer values must be divided by the the scale factor.  This use of integer
-- values reduces the ASN.1 stream size. The scale factors for temperature 
-- factors and occupancies are given separately, but must be used in the same 
-- fashion to produce properly scaled real values.

Model-space-points ::= SEQUENCE {
	scale-factor		INTEGER,
	x			SEQUENCE OF INTEGER,	
	y			SEQUENCE OF INTEGER,
	z			SEQUENCE OF INTEGER } 

Atomic-temperature-factors ::= CHOICE {
	isotropic		Isotropic-temperature-factors,
	anisotropic		Anisotropic-temperature-factors }

Isotropic-temperature-factors ::= SEQUENCE {
	scale-factor		INTEGER,
	b			SEQUENCE OF INTEGER }

Anisotropic-temperature-factors ::= SEQUENCE {
	scale-factor		INTEGER,
	b-11			SEQUENCE OF INTEGER,
	b-12			SEQUENCE OF INTEGER,
	b-13			SEQUENCE OF INTEGER,
	b-22			SEQUENCE OF INTEGER,
	b-23			SEQUENCE OF INTEGER,
	b-33			SEQUENCE OF INTEGER }

Atomic-occupancies ::= SEQUENCE {
	scale-factor		INTEGER,
	o			SEQUENCE OF INTEGER }

-- An alternate conformation id is optionally associated with each coordinate. 
-- Aside from corrections due to the validation checks described above, the 
-- contents of MMDB Alternate-conformation-ids are identical to the PDB 
-- "alternate conformation" field.

Alternate-conformation-ids ::= SEQUENCE OF Alternate-conformation-id 

Alternate-conformation-id ::= VisibleString 

-- Correlated disorder ensemble is defined by a set of alternate conformation 
-- id's which identify coordinates relevant to that ensemble. These are 
-- defined from the validated and corrected contents of the PDB "alternate
-- conformation" field as described above.  A given ensemble, for example, may
-- consist of atom sites flagged by " " and "A" Alternate-conformation-ids. 
-- Names for ensembles are constructed from these flags. This example would be
-- named, in its description, "PDB Ensemble blank plus A".

-- Note that this interpretation is consistent with common PDB usage of the 
-- "alternate conformation" field, but that PDB specifications do not formally
-- distinguish between correlated and uncorrelated disorder in crystallographic
-- models. Ensembles identified in MMDB thus may not correspond to the meaning
-- intended by PDB or the depositor.  No information is lost, however, and
-- if the intended meaning is known alternative ensemble descriptions may be
-- reconstructed directly from the Alternate-conformation-ids.

-- Note that correlated disorder as defined here is allowed within an atomic 
-- coordinate set but not between the multiple sets which may define a model. 
-- Multiple sets within the same model are intended as a means to represent 
-- assemblies modeled from different experimentally determined structures,
-- where correlated disorder between coordinate sets is not relevant.

Conformation-ensemble ::= SEQUENCE {
	name		VisibleString,
	alt-conf-ids	SEQUENCE OF Alternate-conformation-id }


-- Literal surface coordinates define the chemical components whose structure
-- is described by a surface, and the surface itself.  The surface may be
-- either a regular geometric solid or a triangle-mesh of arbitrary shape.

Surface-coordinates ::= SEQUENCE {
	contents		Chem-graph-pntrs,
	surface			CHOICE {	sphere		Sphere,
						cone		Cone,
						cylinder	Cylinder,
						brick		Brick,
						tmesh		T-mesh,
						triangles	Triangles } }
T-mesh ::= SEQUENCE {
	number-of-points	INTEGER,
	scale-factor		INTEGER,
	swap			SEQUENCE OF BOOLEAN,
	x			SEQUENCE OF INTEGER,
	y			SEQUENCE OF INTEGER,
	z		        SEQUENCE OF INTEGER }

Triangles ::= SEQUENCE {
	number-of-points	INTEGER,
	scale-factor		INTEGER,
	x			SEQUENCE OF INTEGER,
	y			SEQUENCE OF INTEGER,
	z			SEQUENCE OF INTEGER,
	number-of-triangles     INTEGER,
	v1			SEQUENCE OF INTEGER, 
	v2			SEQUENCE OF INTEGER,
	v3			SEQUENCE OF INTEGER }


-- Literal density coordinates define the chemical components whose structure
-- is described by a density grid, parameters of this grid, and density values.

Density-coordinates ::= SEQUENCE {
	contents		Chem-graph-pntrs,
	grid-corners		Brick,
	grid-steps-x		INTEGER,
	grid-steps-y		INTEGER,
	grid-steps-z		INTEGER,
	fastest-varying		ENUMERATED {
					x(1),
					y(2),
					z(3)},
	slowest-varying		ENUMERATED {
					x(1),
					y(2),
					z(3)},
	scale-factor		INTEGER,
	density			SEQUENCE OF INTEGER }


END

-- mmdb3.asn
--$Revision: 6.3 $
--**********************************************************************
--
--  Biological Macromolecule 3-D Structure Data Types for MMDB,
--                A Molecular Modeling Database
--
--  Definitions for structural features and biostruc addressing
--
--  By Hitomi Ohkawa, Jim Ostell, Chris Hogue and Steve Bryant 
--
--  National Center for Biotechnology Information
--  National Institutes of Health
--  Bethesda, MD 20894 USA
--
--  July, 1996
--
--**********************************************************************

MMDB-Features DEFINITIONS ::=

BEGIN

EXPORTS Biostruc-feature-set, Chem-graph-pntrs, Atom-pntrs,
	Chem-graph-alignment, Sphere, Cone, Cylinder, Brick, Transform,
	Biostruc-feature-set-id, Biostruc-feature-id;

IMPORTS Biostruc-id FROM MMDB
	Molecule-id, Residue-id, Atom-id FROM MMDB-Chemical-graph
	Model-id, Model-coordinate-set-id FROM MMDB-Structural-model
	User-object FROM NCBI-General
	Pub FROM NCBI-Pub;

-- Named model features refer to sets of residues or atoms, or a region in 
-- the model space.  A few specific feature types are allowed for compatibility
-- with PDB usage, but the purpose of a named model feature is simply to
-- associate various types of information with a set of atoms or 
-- residues, or a spatially-defined region of the model structure.  They also
-- support association of various properties with each residue or atom of a
-- set.

-- PDB-derived secondary structure defines a single feature, represented as a
-- sequence of residue motifs, as are the contents of PDB SITE and
-- FTNOTE records.  NCBI-assigned core and secondary structure descriptions
-- are also represented as a sequence of residue motifs.

Biostruc-feature-set ::= SEQUENCE {
	id		Biostruc-feature-set-id,
	descr		SEQUENCE OF Biostruc-feature-set-descr OPTIONAL,
	features	SEQUENCE OF Biostruc-feature }

Biostruc-feature-set-id ::= INTEGER

Biostruc-feature-set-descr ::= CHOICE {
	name			VisibleString,
	pdb-comment		VisibleString,
	other-comment		VisibleString,
	attribution		Pub }

-- An explicitly specified type in Biostruc-feature allows for
-- efficient extraction and indexing of feature sets of a specific type. 
-- Special types are provided for coloring and rendering, as
-- as needed by molecular graphics programs.
 
Biostruc-feature ::= SEQUENCE {
	id		Biostruc-feature-id OPTIONAL,
	name		VisibleString OPTIONAL,
	type	INTEGER {	helix(1),
				strand(2),
				sheet(3),
				turn(4),
				site(5),
				footnote(6),
				comment(7),      -- new 
				subgraph(100),   -- NCBI domain reserved
				region(101), 
				core(102),       -- user core definition
				supercore(103),  -- NCBI reserved
				color(150),      -- new
				render(151),     -- new
				label(152),      -- new
				transform(153),  -- new
				camera(154),     -- new
				script(155),      -- for scripts
				alignment(200),  -- VAST reserved 
				similarity(201),
				multalign(202),  -- multiple alignment
                                indirect(203),   -- new
				cn3dstate(254),  -- Cn3D reserved
				other(255) } OPTIONAL,
	property	CHOICE { 
				color		Color-prop,
				render		Render-prop,
				transform	Transform,
				camera		Camera,
				script		Biostruc-script,
				user		User-object } OPTIONAL,
	location	CHOICE {
				subgraph	Chem-graph-pntrs,
				region		Region-pntrs,   
				alignment	Chem-graph-alignment,
				similarity	Region-similarity, 
				indirect	Other-feature } OPTIONAL } -- new

-- Other-feature allows for specifying location via reference to another
-- Biostruc-feature and its location.

Other-feature ::= SEQUENCE {
	biostruc-id		Biostruc-id,
	set			Biostruc-feature-set-id,
	feature			Biostruc-feature-id }
                        
Biostruc-feature-id ::= INTEGER

-- Atom, residue or molecule motifs describe a substructure defined by a set
-- of nodes from the chemical graph. PDB secondary structure features are
-- described as a residue motif, since they are not associated with any one of
-- the multiple models that may be provided in a PDB file.  NCBI-assigned
-- secondary structure is represented in the same way, even though it is
-- model specific, since this allows for simple mapping of the structural 
-- feature onto a sequence-only representation. This addressing mode may also 
-- be used to describe features to be associated with particular atoms, 
-- as, for example, the chemical shift observed in an NMR experiment.

Chem-graph-pntrs ::= CHOICE {
	atoms			Atom-pntrs,
	residues		Residue-pntrs,
	molecules		Molecule-pntrs }

Atom-pntrs ::= SEQUENCE {
	number-of-ptrs		INTEGER,
	molecule-ids		SEQUENCE OF Molecule-id,
	residue-ids		SEQUENCE OF Residue-id,
	atom-ids		SEQUENCE OF Atom-id }

Residue-pntrs ::= CHOICE {
	explicit		Residue-explicit-pntrs,
	interval		SEQUENCE OF Residue-interval-pntr }

Residue-explicit-pntrs ::= SEQUENCE {
	number-of-ptrs		INTEGER,
	molecule-ids		SEQUENCE OF Molecule-id,
	residue-ids		SEQUENCE OF Residue-id }

Residue-interval-pntr ::= SEQUENCE {
	molecule-id		Molecule-id,
	from			Residue-id,
	to			Residue-id }

Molecule-pntrs ::= SEQUENCE {
	number-of-ptrs		INTEGER,
	molecule-ids		SEQUENCE OF Molecule-id }

-- Region motifs describe features defined by spatial location, such as the
-- site specified by a coordinate value, or a rgeion within a bounding volume.

Region-pntrs ::= SEQUENCE {
	model-id	Model-id,
	region		CHOICE {
				site		SEQUENCE OF Region-coordinates,
				boundary	SEQUENCE OF Region-boundary } }

-- Coordinate sites describe a region in space by reference to individual 
-- coordinates, in a particular model.  These coordinates may be either the
-- x, y and z values of atomic coordinates, the triangles of a surface mesh, 
-- or the grid points of a density model. All are addressed in the same manner,
-- as coordinate indices which give offsets from the beginning of the 
-- coordinate data arrays.  A coordinate-index of 5, for example, refers to 
-- the 5th x, y and z values of an atomic coordinate set, the 5th v1, v2, and v3
-- values of a triangle mesh, or the 5th value in a density grid.

-- PDB SITE and FTNOTE records refer to particular atomic coordinates, and they
-- are represented as a region motif with addresses of type Region-coordinates.
-- Any names or descriptions provided by PDB are thus associated with the
-- indicated sites, in the indicated model. 

Region-coordinates ::= SEQUENCE {
	model-coord-set-id	Model-coordinate-set-id,
	number-of-coords	INTEGER OPTIONAL,
	coordinate-indices	SEQUENCE OF INTEGER OPTIONAL }

-- Region boundaries are defined by regular solids located in the model space.  

Region-boundary ::=	CHOICE {	sphere		Sphere,
					cone		Cone,
					cylinder	Cylinder,
					brick		Brick }

-- A biostruc alignment establishes an equivalence of nodes in the chemical
-- graphs of two or more biostrucs. This may be mapped to a sequence
-- alignment in the case of biopolymers.
-- The 'dimension' component indicates the number of participants
-- in the alignment.  For pairwise alignments, such as VAST 
-- structure-structure alignments, the dimension will be always 2, with
-- biostruc-ids, alignment, and domain each containing two entries for an  
-- aligned pair.  The 'alignment' component contains a pair of Chem-graph-pntrs
-- specifying a like number of corresponding residues in each structure.
-- The 'domain' component specifies a region of each structure considered 
-- in the alignment.  Only one transform (for the second structure) and
-- one aligndata (for the pair) are provided for each VAST alignment.
--
-- For multiple alignments, a set of components are treated as
-- parallel arrays of length 'dimension'.
-- The 'transform' component moves each structure to align it with
-- the structure specified as the first element in the "parallel" array,
-- so necessarily the first transform is a NULL transform.
-- Align-stats are placeholders for scores.

Chem-graph-alignment ::= SEQUENCE {
	dimension		INTEGER DEFAULT 2,
	biostruc-ids		SEQUENCE OF Biostruc-id,
	alignment		SEQUENCE OF Chem-graph-pntrs,
	domain			SEQUENCE OF Chem-graph-pntrs OPTIONAL, 
	transform		SEQUENCE OF Transform OPTIONAL,
	aligndata		SEQUENCE OF Align-stats OPTIONAL }

Align-stats ::= SEQUENCE {
	descr		VisibleString OPTIONAL,
	scale-factor	INTEGER OPTIONAL,
	vast-score	INTEGER OPTIONAL,
	vast-mlogp	INTEGER OPTIONAL,
	align-res	INTEGER OPTIONAL,
 	rmsd		INTEGER OPTIONAL,
	blast-score	INTEGER OPTIONAL,
	blast-mlogp	INTEGER OPTIONAL,
	other-score	INTEGER OPTIONAL }	        

-- A biostruc similarity describes spatial features which are similar between
-- two or more biostrucs.  Similarities are model dependent, and the model and
-- coordinate set ids of the biostrucs must be specified.  They do not 
-- necessarily map to a sequence alignment, as the regions referenced may
-- be pieces of a surface or grid, and thus not uniquely mapable to particular
-- chemical components.

Region-similarity ::= SEQUENCE {
	dimension		INTEGER DEFAULT 2,
	biostruc-ids		SEQUENCE OF Biostruc-id,
	similarity		SEQUENCE OF Region-pntrs,
	transform		SEQUENCE OF Transform }

-- Geometrical primitives are used in the definition of region motifs, and 
-- also non-atomic coordinates.  Spheres, cones, cylinders and bricks are 
-- defined by a few points in the model space.

Sphere ::= SEQUENCE { 
	center			Model-space-point,
	radius			RealValue }

Cone ::= SEQUENCE { 
	axis-top		Model-space-point,
	axis-bottom		Model-space-point,
	radius-bottom		RealValue }

Cylinder ::= SEQUENCE { 
	axis-top		Model-space-point,
	axis-bottom		Model-space-point,
	radius			RealValue }

-- A brick is defined by the coordinates of eight corners.  These are assumed
-- to appear in the order 000, 001, 010, 011, 100, 101, 110, 111, where the 
-- digits 0 and 1 refer to respectively to the x, y and z axes of a unit cube.
-- Opposite edges are assumed to be parallel. 

Brick ::= SEQUENCE {
	corner-000		Model-space-point,
	corner-001		Model-space-point,
	corner-010		Model-space-point,
	corner-011		Model-space-point,
	corner-100		Model-space-point,
	corner-101		Model-space-point,
	corner-110		Model-space-point,
	corner-111		Model-space-point }

Model-space-point ::= SEQUENCE {
	scale-factor		INTEGER,
	x			INTEGER,
	y			INTEGER,
	z			INTEGER } 

RealValue ::= SEQUENCE {
	scale-factor		INTEGER,
	scaled-integer-value	INTEGER }


Transform ::=  SEQUENCE {
            id  INTEGER,
            moves SEQUENCE OF Move }
            
Move ::= CHOICE {
	rotate		Rot-matrix,
	translate	Trans-matrix }
          
-- A rotation matrix is defined by 9 numbers, given by row, i.e.,
-- with column indices varying fastest.
-- Coordinates, as a matrix with columns x, y, an z, are rotated 
-- via multiplication with the rotation matrix. 
-- A translation matrix is defined by 3 numbers, which is added to
-- the rotated coordinates for specified amount of translation. 

Rot-matrix ::= SEQUENCE {
	scale-factor		INTEGER,
	rot-11			INTEGER,
	rot-12			INTEGER,
	rot-13			INTEGER,
	rot-21			INTEGER,
	rot-22			INTEGER,
	rot-23			INTEGER,
	rot-31			INTEGER,
	rot-32			INTEGER,
	rot-33			INTEGER }

Trans-matrix ::= SEQUENCE {
	scale-factor		INTEGER,
	tran-1			INTEGER,
	tran-2			INTEGER,
	tran-3			INTEGER }

-- The camera is a position relative to the world coordinates
-- of the structure referred to by a location.  
-- this is used to set the initial position of the
-- camera using OpenGL.  scale is the value used to scale the
-- other values from floating point to integer

Camera ::= SEQUENCE {
	x		INTEGER,
	y		INTEGER,
	distance	INTEGER,
	angle		INTEGER,
	scale		INTEGER,
    modelview   GL-matrix }
    
    
GL-matrix ::= SEQUENCE {
	scale		INTEGER,
	m11			INTEGER,
	m12			INTEGER,
	m13			INTEGER,
	m14			INTEGER,
	m21			INTEGER,
	m22			INTEGER,
	m23			INTEGER,
	m24			INTEGER,
	m31			INTEGER,
	m32			INTEGER,
	m33			INTEGER,
	m34			INTEGER,
	m41			INTEGER,
	m42			INTEGER,
	m43			INTEGER,
	m44			INTEGER }


Color-prop ::= SEQUENCE {
	r		INTEGER OPTIONAL, 
	g		INTEGER OPTIONAL,
	b		INTEGER OPTIONAL,
	name		VisibleString OPTIONAL }

-- Note that Render-prop is compatible with the Annmm specification,
-- i.e., its numbering schemes do not clash with those in Render-prop.

Render-prop ::= INTEGER {
	default		(0),  -- Default view
	wire		(1),  -- use wireframe 
	space		(2),  -- use spacefill
	stick		(3),  -- use stick model (thin cylinders)
	ballNStick	(4),  -- use ball & stick model
	thickWire	(5),  -- thicker wireframe
	hide		(9),  -- don't show this
	name		(10), -- display its name next to it
	number 		(11), -- display its number next to it 
	pdbNumber	(12), -- display its PDB number next to it
	objWireFrame	(150), -- display MMDB surface object as wireframe
	objPolygons	(151), -- display MMDB surface object as polygons   
	colorsetCPK	(225), -- color atoms like CPK models
	colorsetbyChain	(226), -- color each chain different
	colorsetbyTemp	(227), -- color using isotropic Temp factors 
	colorsetbyRes	(228), -- color using residue properties
	colorsetbyLen	(229), -- color changes along chain length
	colorsetbySStru	(230), -- color by secondary structure
	colorsetbyHydro (231), -- color by hydrophobicity
	colorsetbyObject(246), -- color each object differently
	colorsetbyDomain(247), -- color each domain differently
	other           (255)  
	}

--  When a Biostruc-Feature with a Biostruc-script is initiated,
--  it should play the specified steps one at a time, setting the feature-do
--  list as the active display.
--  The camera can be set using a feature-do, 
--  but it may be moved independently with
--  camera-move, which specifies how to move
--  the camera dynamically during the step along the path defined (e.g.,
--  a zoom, a rotate).
--  Any value of pause (in 1:10th's of a second) will force a pause
--  after an image is shown.
--  If waitevent is TRUE, it will await a mouse or keypress and ignore 
--  the pause value.

Biostruc-script ::= SEQUENCE OF Biostruc-script-step

Biostruc-script-step ::= SEQUENCE {
	step-id			Step-id,
	step-name		VisibleString OPTIONAL, 
	feature-do		SEQUENCE OF Other-feature OPTIONAL,
	camera-move		Transform OPTIONAL,
	pause			INTEGER DEFAULT 10,
	waitevent		BOOLEAN,
	extra			INTEGER, 
	jump			Step-id OPTIONAL }

Step-id ::= INTEGER

END

-- ncbimime.asn
--$Revision: 6.12 $
--****************************************************************
--
--  NCBI MIME type (chemical/ncbi-asn1-ascii and chemical/ncbi-asn1-binary)
--  by Jonathan Epstein, February 1996
--
--****************************************************************

NCBI-Mime DEFINITIONS ::=
BEGIN

EXPORTS Ncbi-mime-asn1;
IMPORTS Biostruc, Biostruc-annot-set FROM MMDB
    Cdd FROM NCBI-Cdd
	Seq-entry FROM NCBI-Seqset
	Seq-annot FROM NCBI-Sequence
    Medline-entry FROM NCBI-Medline
    Cn3d-style-dictionary, Cn3d-user-annotations FROM NCBI-Cn3d;

Ncbi-mime-asn1 ::= CHOICE {
	entrez	Entrez-general,			-- just a structure
    alignstruc  Biostruc-align,     -- structures & sequences & alignments
	alignseq	Biostruc-align-seq,	-- sequence alignment
    strucseq    Biostruc-seq,       -- structure & sequences
    strucseqs   Biostruc-seqs,      -- structure & sequences & alignments
    general     Biostruc-seqs-aligns-cdd    -- all-purpose "grab bag"
	-- others may be added here in the future
}

-- generic bundle of sequence and alignment info
Bundle-seqs-aligns ::= SEQUENCE {
    sequences SET OF Seq-entry OPTIONAL,        -- sequences
    seqaligns SET OF Seq-annot OPTIONAL,        -- sequence alignments
    strucaligns Biostruc-annot-set OPTIONAL,    -- structure alignments
    imports SET OF Seq-annot OPTIONAL,          -- imports (updates in Cn3D)
    style-dictionary Cn3d-style-dictionary OPTIONAL,    -- Cn3D stuff
    user-annotations Cn3d-user-annotations OPTIONAL
}

Biostruc-seqs-aligns-cdd ::= SEQUENCE {
    seq-align-data CHOICE {
        bundle Bundle-seqs-aligns,          -- either seqs + alignments
        cdd Cdd                             -- or CDD (which contains these)
    },
    structures SET OF Biostruc OPTIONAL,    -- structures
    structure-type ENUMERATED {             -- type of structures to load if
        ncbi-backbone(2),                   -- not present; meanings and
        ncbi-all-atom(3),                   -- values are same as MMDB's
        pdb-model(4)                        -- Model-type
    } OPTIONAL
}

Biostruc-align ::= SEQUENCE {
	master	Biostruc,
	slaves	SET OF Biostruc,
	alignments	Biostruc-annot-set,	-- structure alignments
	sequences SET OF Seq-entry,	-- sequences
	seqalign SET OF Seq-annot,
	style-dictionary Cn3d-style-dictionary OPTIONAL,
	user-annotations Cn3d-user-annotations OPTIONAL
}

Biostruc-align-seq ::= SEQUENCE {	-- display seq structure align only
	sequences SET OF Seq-entry,	-- sequences
	seqalign SET OF Seq-annot,
	style-dictionary Cn3d-style-dictionary OPTIONAL,
	user-annotations Cn3d-user-annotations OPTIONAL
}

Biostruc-seq ::= SEQUENCE {	-- display  structure seq added by yanli
	structure Biostruc,
	sequences SET OF Seq-entry,
	style-dictionary Cn3d-style-dictionary OPTIONAL,
	user-annotations Cn3d-user-annotations OPTIONAL
}

Biostruc-seqs ::= SEQUENCE { -- display blast alignment along with neighbor's structure added by yanli
	structure Biostruc,
	sequences SET OF Seq-entry,	-- sequences
	seqalign SET OF Seq-annot,
	style-dictionary Cn3d-style-dictionary OPTIONAL,
	user-annotations Cn3d-user-annotations OPTIONAL
}

Entrez-style ::= ENUMERATED {
	docsum (1),
	genbank (2) ,
	genpept (3) ,
	fasta (4) ,
	asn1 (5) ,
	graphic (6) ,
	alignment (7) ,
	globalview (8) ,
	report (9) ,
	medlars (10) ,
	embl (11) ,
	pdb (12) ,
	kinemage (13) }

Entrez-general ::= SEQUENCE {
	title VisibleString OPTIONAL,
	data CHOICE {
		ml	Medline-entry ,
		prot	Seq-entry ,
		nuc	Seq-entry ,
		genome	Seq-entry ,
		structure Biostruc ,
		strucAnnot Biostruc-annot-set } ,
	style Entrez-style ,
	location VisibleString OPTIONAL }
END

-- objprt.asn
--$Revision: 6.0 $
--********************************************************************
--
--  Print Templates
--  James Ostell, 1993
--
--
--********************************************************************

NCBI-ObjPrt DEFINITIONS ::=
BEGIN

EXPORTS PrintTemplate, PrintTemplateSet;

PrintTemplate ::= SEQUENCE {
    name TemplateName ,  -- name for this template
    labelfrom VisibleString OPTIONAL,    -- ASN.1 path to get label from
    format PrintFormat }

TemplateName ::= VisibleString

PrintTemplateSet ::= SEQUENCE OF PrintTemplate

PrintFormat ::= SEQUENCE {
    asn1 VisibleString ,    -- ASN.1 partial path for this
    label VisibleString OPTIONAL ,   -- printable label
    prefix VisibleString OPTIONAL,
    suffix VisibleString OPTIONAL,
    form PrintForm }

PrintForm ::=   CHOICE {      -- Forms for various ASN.1 components
    block PrintFormBlock,
    boolean PrintFormBoolean,
    enum PrintFormEnum,
    text PrintFormText,
    use-template TemplateName,
    user UserFormat ,
    null NULL }               -- rarely used

UserFormat ::= SEQUENCE {
    printfunc VisibleString ,
    defaultfunc VisibleString OPTIONAL }

PrintFormBlock ::= SEQUENCE {  -- for SEQUENCE, SET
    separator VisibleString OPTIONAL ,
    components SEQUENCE OF PrintFormat }

PrintFormBoolean ::= SEQUENCE {
    true VisibleString OPTIONAL ,
    false VisibleString OPTIONAL }

PrintFormEnum ::= SEQUENCE {
    values SEQUENCE OF VisibleString OPTIONAL }

PrintFormText ::= SEQUENCE {
    textfunc VisibleString OPTIONAL }
    
END


-- omssa.asn
-- $Id: omssa.asn 142986 2008-10-14 13:50:26Z lewisg $
--**********************************************************************
--
--  OMSSA (Open Mass Spectrometry Search Algorithm) data definitions
--  Lewis Geer, 2003
--
--  make using something like
--  "datatool -m omssa.asn -oc ObjOmssa -oA -od omssa.def"
--
--  note that this file requires omssa.def
--
--**********************************************************************

OMSSA DEFINITIONS ::=
BEGIN

IMPORTS Bioseq FROM NCBI-Sequence;

-- Generic holder for experimental info

NameValue ::= SEQUENCE {
        name VisibleString,
        value VisibleString
        }

-- Holds a single spectrum

MSSpectrum ::= SEQUENCE {
        number INTEGER, -- unique number of spectrum
        charge SEQUENCE OF INTEGER,  -- may be more than one if unknown
        precursormz INTEGER,  -- scaled precursor m/z, scale is in MSSearchSettings
        mz SEQUENCE OF INTEGER,  -- scaled product m/z
        abundance SEQUENCE OF INTEGER,  -- scaled product abundance
        iscale REAL,                 -- abundance scale, float to integer
        ids SEQUENCE OF VisibleString OPTIONAL,  -- ids/filenames
        namevalue SEQUENCE OF NameValue OPTIONAL -- extra info: retention times, etc.
        }


-- Holds a set of spectra

MSSpectrumset ::= SEQUENCE OF MSSpectrum


-- enumerate enzymes

MSEnzymes ::= INTEGER {
        trypsin (0),
        argc (1),
        cnbr (2),
        chymotrypsin (3),
        formicacid (4),
        lysc (5),
        lysc-p (6),
        pepsin-a (7),
        tryp-cnbr (8),
        tryp-chymo (9),
        trypsin-p (10),
        whole-protein (11),
        aspn (12),
        gluc (13),
        aspngluc (14),
        top-down (15),
        semi-tryptic (16),
        no-enzyme (17),
        chymotrypsin-p (18),
        aspn-de (19),
        gluc-de (20),
        max(21),
        none (255)
        }


-- enumerate modifications

MSMod ::= INTEGER {
    methylk (0),          -- methylation of K
    oxym (1),             -- oxidation of methionine
    carboxymethylc (2),   -- carboxymethyl cysteine
    carbamidomethylc(3),  -- carbamidomethyl cysteine
    deamidationkq (4),    -- deamidation of K and Q
    propionamidec (5),    -- propionamide cysteine
    phosphorylations (6), -- phosphorylation of S
    phosphorylationt (7), -- phosphorylation of T
    phosphorylationy (8), -- phosphorylation of Y    
    ntermmcleave (9),     -- N terminal methionine cleavage
    ntermacetyl (10),     -- N terminal protein acetyl
    ntermmethyl (11),     -- N terminal protein methyl
    ntermtrimethyl (12),  -- N terminal protein trimethyl
    methythiold (13),     -- beta methythiolation of D
    methylq (14),         -- methylation of Q
    trimethylk (15),      -- trimethylation of K
    methyld (16),         -- methylation of D
    methyle (17),         -- methylation of E
    ctermpepmethyl (18),     -- C terminal methylation
    trideuteromethyld (19), -- trideuteromethylation of D
    trideuteromethyle (20), -- trideuteromethylation of E
    ctermpeptrideuteromethyl (21),  -- C terminal trideuteromethylation
    nformylmet (22),
    twoamino3oxobutanoicacid (23),
    acetylk (24),
    ctermamide (25),
    bmethylthiold (26),
    carbamidomethylk (27),
    carbamidometylh (28),
    carbamidomethyld (29),
    carbamidomethyle (30),
    carbamylk (31),
    ntermcarbamyl (32),
    citrullinationr (33),
    cysteicacidc (34),
    diiodinationy (35),
    dimethylk (36),
    dimethylr (37),
    ntermpepdimethyl (38),
    dihydroxyf (39),
    thioacetylk (40),
    ntermpeptioacetyl (41),
    farnesylationc (42),
    formylk (43),
    ntermpepformyl (44),
    formylkynureninw (45),
    phef (46),
    gammacarboxyld (47),
    gammacarboxyle (48),
    geranylgeranylc (49),
    ntermpepglucuronylg (50),
    glutathionec (51),
    glyglyk (52),
    guanidinationk (53),
    his2asnh (54),
    his2asph (55),
    ctermpephsem (56),
    ctermpephselactm (57),
    hydroxykynureninw (58),
    hydroxylationd (59),
    hydroxylationk (60),
    hydroxylationn (61),
    hydroxylationp (62),
    hydroxylationf (63),
    hydroxylationy (64),
    iodinationy (65),
    kynureninw (66),
    lipoylk (67),
    ctermpepmeester (68),
    meesterd (69),
    meestere (70),
    meesters (71),
    meestery (72),
    methylc (73),
    methylh (74),
    methyln (75),
    ntermpepmethyl (76),
    methylr (77),
    ntermpepmyristoyeylationg (78),
    ntermpepmyristoyl4hg (79),
    ntermpepmyristoylationg (80),
    myristoylationk (81),
    ntermformyl (82),
    nemc (83),
    nipcam (84),
    nitrow (85),
    nitroy (86),
    ctermpepo18 (87),
    ctermpepdio18 (88),
    oxyh (89),
    oxyw (90),
    ppantetheines (91),
    palmitoylationc (92),
    palmitoylationk (93),
    palmitoylations (94),
    palmitoylationt (95),
    phospholosss (96),
    phospholosst (97),
    phospholossy (98),
    phosphoneutrallossc (99),
    phosphoneutrallossd (100),
    phosphoneutrallossh (101),
    propionylk (102),
    ntermpeppropionyl (103),
    propionylheavyk (104),
    ntermpeppropionylheavy (105),
    pyridylk (106),
    ntermpeppyridyl (107),
    ntermpeppyrocmc (108),
    ntermpeppyroe (109),
    ntermpeppyroq (110),
    pyroglutamicp (111),
    spyridylethylc (112),
    semetm (113),
    sulfationy (114),
    suphonem (115),
    triiodinationy (116),
    trimethylationr (117),
    ntermpeptripalmitatec (118),
    usermod1 (119),  -- start of user defined mods
    usermod2 (120),
    usermod3 (121),
    usermod4 (122),
    usermod5 (123),
    usermod6 (124),
    usermod7 (125),
    usermod8 (126),
    usermod9 (127),
    usermod10 (128), -- end of user defined mods
    icatlight (129),
    icatheavy (130),
    camthiopropanoylk (131),
    phosphoneutrallosss (132),
    phosphoneutrallosst (133),
    phosphoetdlosss (134),
    phosphoetdlosst (135),
    arg-13c6 (136),
    arg-13c6-15n4 (137),
    lys-13c6 (138),
    oxy18 (139),
    beta-elim-s (140),
    beta-elim-t (141),
    usermod11 (142),
    usermod12 (143),
    usermod13 (144),
    usermod14 (145),
    usermod15 (146),
    usermod16 (147),
    usermod17 (148),
    usermod18 (149),
    usermod19 (150),
    usermod20 (151),
    usermod21 (152),
    usermod22 (153),
    usermod23 (154),
    usermod24 (155),
    usermod25 (156),
    usermod26 (157),
    usermod27 (158),
    usermod28 (159),
    usermod29 (160),
    usermod30 (161),
    sulfinicacid (162),
    arg2orn (163),
    dehydro (164),
    carboxykynurenin (165),
    sumoylation (166),
    iTRAQ114nterm (167),
    iTRAQ114K (168),
    iTRAQ114Y (169),
    iTRAQ115nterm (170),
    iTRAQ115K (171),
    iTRAQ115Y (172),
    iTRAQ116nterm (173),
    iTRAQ116K (174),
    iTRAQ116Y (175),
    iTRAQ117nterm (176),
    iTRAQ117K (177),
    iTRAQ117Y (178),
    mmts (179),
    lys-2H4 (180),
    lys-13C615N2 (181),
    hexNAcN (182),
    dHexHexNAcN (183),
    hexNAcS (184),
    hexNAcT (185),
    mod186 (186),
    mod187 (187),
    mod188 (188),
    mod189 (189),
    mod190 (190),
    mod191 (191),
    mod192 (192),
    mod193 (193),
    mod194 (194),
    mod195 (195),
    mod196 (196),
    mod197 (197),
    mod198 (198),
    mod199 (199),
    mod200 (200),
    mod201 (201),
    mod202 (202),
    mod203 (203),
    mod204 (204),
    mod205 (205),
    mod206 (206),
    mod207 (207),
    mod208 (208),
    mod209 (209),
    mod210 (210),
    mod211 (211),
    mod212 (212),
    mod213 (213),
    mod214 (214),
    mod215 (215),
    mod216 (216),
    mod217 (217),
    mod218 (218),
    mod219 (219),
    mod220 (220),
    mod221 (221),
    mod222 (222),
    mod223 (223),
    mod224 (224),
    mod225 (225),
    mod226 (226),
    mod227 (227),
    mod228 (228),
    mod229 (229),
    mod230 (230),
    max (231), -- maximum number of mods
    unknown(9999),  -- modification of unknown type
    none(10000)
    }

-- enumerate modification types

MSModType ::= INTEGER {
    modaa (0),   -- at particular amino acids
    modn (1),    -- at the N terminus of a protein
    modnaa (2),  -- at the N terminus of a protein at particular amino acids
    modc (3),    -- at the C terminus of a protein
    modcaa (4),  -- at the C terminus of a protein at particular amino acids
    modnp (5),   -- at the N terminus of a peptide
    modnpaa (6), -- at the N terminus of a peptide at particular amino acids
    modcp (7),   -- at the C terminus of a peptide
    modcpaa (8), -- at the C terminus of a peptide at particular amino acids
    modmax (9)   -- the max number of modification types
    }


-- mass container

MSMassSet ::= SEQUENCE {
    monomass REAL,
    averagemass REAL,
    n15mass REAL
    }

-- Modification Definition

MSModSpec ::= SEQUENCE {
    mod MSMod,  -- what is the mod
    type MSModType,  -- modification type
    name VisibleString,  -- friendly name of mod
    monomass REAL,       -- monoisotopic mass
    averagemass REAL,    -- average mass
    n15mass REAL,        -- monoisotopic n15 mass
    residues SEQUENCE OF VisibleString OPTIONAL,  -- residues to apply mod to
    neutralloss MSMassSet OPTIONAL,  -- loss after precursor mass determination
    unimod INTEGER OPTIONAL,         -- the equivalent Unimod Accession number
    psi-ms VisibleString OPTIONAL    -- the PSI-MS equivalent name  
    }
    
-- Holds a set of modifications

MSModSpecSet ::= SEQUENCE OF MSModSpec

-- How is charge to be handled?  Some input files are not clear
-- on this.  For example, a dta file only specifies one charge, 
-- even though the charge is not really known.

MSCalcPlusOne ::= INTEGER {
        dontcalc (0),  -- don't guess charge one
        calc (1)       -- guess charge one
        }

-- user instructions on whether to believe charges in input file
        
MSCalcCharge ::= INTEGER {
        calculate (0),  -- guess the charge(s) from the data
        usefile (1),    -- use what the input file says
        userange (2)    -- use the charge range specified
        }

-- How to handle precursor charge

MSChargeHandle ::= SEQUENCE {
        calcplusone MSCalcPlusOne DEFAULT 1,  -- do we guess charge one?
        calccharge MSCalcCharge DEFAULT 2,    -- how do we handle charges?
        mincharge INTEGER DEFAULT 2,          -- if userange, what is the min?
        maxcharge INTEGER DEFAULT 3,          -- if userange, what is the max?
        considermult INTEGER DEFAULT 3,       -- at which precursor charge to consider +2 ions?
        plusone REAL,                         -- what % of peaks below precursor needed to call as +1
        maxproductcharge INTEGER OPTIONAL,    -- maximum product ion charge
        prodlesspre BOOLEAN OPTIONAL          -- product charge always less thanor equal to precursor?
        }
        

-- what type of atomic mass to use

MSSearchType ::= INTEGER {
        monoisotopic(0),
        average(1),
        monon15(2),
        exact(3),
        max(4)
        }
        
-- what is the charge dependence of the mass tolerance?

MSZdependence ::= INTEGER {
        independent(0),  -- mass tol. invariant with charge
        linearwithz(1),  -- mass tol. scales with charge
        max(2)
        }
        
-- Iterative search settings

MSIterativeSettings ::=  SEQUENCE {
        researchthresh REAL, -- e-val threshold for re-searching spectra, 0 = always re-search
        subsetthresh REAL,   -- e-val threshold for picking sequence subset, 0 = all sequences
        replacethresh REAL   -- e-val threshold for replacing hitset, 0 = only if better
        }
        
-- Library search settings

MSLibrarySettings ::= SEQUENCE {
        libnames SEQUENCE OF VisibleString, -- names of search libraries
        presearch BOOLEAN,                  -- should there be a restriction on precursor mass?
        useomssascore BOOLEAN,              -- use the omssa score?
        usereplicatescore BOOLEAN,          -- use the number of replicates score?
        qtofscore BOOLEAN                  -- use the qtof score?
        }

-- Generic search settings

MSSearchSettings ::= SEQUENCE {
        precursorsearchtype MSSearchType,  -- average or monoisotopic?
        productsearchtype MSSearchType,  -- average or monoisotopic?
        ionstosearch SEQUENCE OF MSIonType,  -- which ions to search?
        peptol REAL,  -- peptide mass tolerance
        msmstol REAL, -- msms mass tolerance
        zdep MSZdependence,  -- what is the charge dependence of the mass tolerance?
        cutoff REAL,  -- evalue cutoff
          -- next 3 fields define intensity fraction below
          -- which peaks will be discard
        cutlo REAL, -- the start of the cutoff, fraction of most intense peak
        cuthi REAL, -- the end of the cutoff
        cutinc REAL, -- the increment of the cutoff
        singlewin INTEGER,  -- the size of the single charge filtering window
        doublewin INTEGER,  -- the size of the double charge filtering window
        singlenum INTEGER,  -- the number of peaks allowed in the single window
        doublenum INTEGER,  -- the number of peaks allowed in the double window
        fixed SEQUENCE OF MSMod,     -- fixed PTM's
        variable SEQUENCE OF MSMod,  -- variable PTM's
        enzyme MSEnzymes,      -- digestion enzyme
        missedcleave INTEGER,  -- number of missed cleaves allowed
        hitlistlen INTEGER DEFAULT 25,  -- the number of hits kept in memory
                                        -- for a spectrum
        db VisibleString,  -- sequence set to search, e.g. "nr"
        tophitnum INTEGER, -- number of m/z to consider in first pass
        minhit INTEGER DEFAULT 2, -- minimum number of m/z values for a valid hit
        minspectra INTEGER DEFAULT 4, -- minimum number of m/z for a valid spectra
        scale INTEGER DEFAULT 100,  -- scale for m/z float to integer
        maxmods INTEGER DEFAULT 64,  -- maximum number of mass ladders per
                                     -- database peptide
        taxids SEQUENCE OF INTEGER OPTIONAL,  -- taxa to limit search
        chargehandling MSChargeHandle OPTIONAL,  -- how to deal with charges
        usermods MSModSpecSet OPTIONAL,  -- user defined modifications
        pseudocount INTEGER DEFAULT 1, -- min number of counts per precursor bin
        searchb1 INTEGER DEFAULT 0,    -- should b1 product be in search (1=no, 0=yes)
        searchctermproduct INTEGER DEFAULT 0, -- should c terminus ion be searched (1=no, 0=yes)
        maxproductions INTEGER DEFAULT 0,     -- max number of ions in each series (0=all)
        minnoenzyme INTEGER DEFAULT 4,        -- min number of AA in peptide for noenzyme search
        maxnoenzyme INTEGER DEFAULT 0,        -- max number of AA in peptide for noenzyme search (0=none)
        exactmass REAL OPTIONAL,              -- the threshold in Da for adding neutron
        settingid INTEGER OPTIONAL,           -- id of the search settings
        iterativesettings MSIterativeSettings OPTIONAL,   -- iterative search settings
        precursorcull INTEGER OPTIONAL,       -- turn on aggressive precursor culling for ETD (0=none)
        infiles SEQUENCE OF MSInFile OPTIONAL,         -- input files
        outfiles SEQUENCE OF MSOutFile OPTIONAL,       -- output files
        nocorrelationscore INTEGER OPTIONAL,           -- turn on correlation score (1=nocorr)
        probfollowingion REAL OPTIONAL,                -- probability of a consecutive ion (used in correlation)
        nmethionine BOOLEAN OPTIONAL,                  -- should nmethionine be cleaved?
        automassadjust REAL OPTIONAL,                  -- fraction allowable adjustment of product mass tolerance
        lomasscutoff REAL OPTIONAL,                    -- low mass filter in Daltons, unscaled
        libsearchsettings MSLibrarySettings OPTIONAL,  -- library search settings
        noprolineions SEQUENCE OF MSIonType OPTIONAL,  -- which ions to use no proline rule
        reversesearch BOOLEAN OPTIONAL,                -- do reverse search
        othersettings SEQUENCE OF NameValue OPTIONAL   -- extra search settings
        }

MSSerialDataFormat ::= INTEGER {
        none (0) ,
        asntext (1),      -- open ASN.1 text format
        asnbinary (2),    -- open ASN.1 binary format
        xml (3),          -- open XML format
        csv (4),          -- csv (excel)
        pepxml (5),       -- pepXML format
	xmlbz2 (6)        -- bzip2 XML format
        }

MSOutFile ::= SEQUENCE {
        outfile VisibleString,                -- output file name
        outfiletype MSSerialDataFormat,       -- output file type
        includerequest BOOLEAN                -- should the output include the request?
        }

MSSpectrumFileType ::= INTEGER { 
        dta(0), 
        dtablank(1), 
        dtaxml(2), 
        asc(3), 
        pkl(4), 
        pks(5), 
        sciex(6), 
        mgf(7), 
        unknown(8),
        oms(9),    -- asn.1 binary for iterative search
        omx(10),   -- xml for iterative search
        xml(11),   -- xml MSRequest
	omxbz2 (12) -- bzip2 omx file
        }

MSInFile ::= SEQUENCE {
        infile VisibleString,                     -- input file name
        infiletype MSSpectrumFileType        -- input file type
        }

MSSearchSettingsSet ::= SEQUENCE OF MSSearchSettings

-- The search request that is given to the OMSSA algorithm

MSRequest ::= SEQUENCE {
        spectra MSSpectrumset,      -- the set of spectra
        settings MSSearchSettings,  -- the search settings
        rid VisibleString OPTIONAL,  -- request id
        moresettings MSSearchSettingsSet OPTIONAL, -- additional search runs
        modset MSModSpecSet OPTIONAL  -- list of mods that can be used in search
        }


-- enumeration of ion types

MSIonType ::= INTEGER {
        a (0),
        b (1),
        c (2),
        x (3),
        y (4),
        z (5),
        parent(6),
        internal(7),
        immonium(8),
        unknown(9),
        max (10)
        }
        
-- types of neutral loss

MSIonNeutralLoss ::= INTEGER {
        water (0),    -- minus 18 Da
        ammonia (1)   -- minus 17 Da
        }

-- iosotopic type of ion

MSIonIsotopicType ::= INTEGER {
        monoisotopic (0), -- no c13s in molecule
        c13 (1),          -- one c13 in molecule
        c13two (2),       -- two c13s in molecule, and so on...
        c13three (3),
        c13four (4)
        }

-- type of immonium ion

MSImmonium ::= SEQUENCE {
        parent VisibleString,           -- parent amino acid
        product VisibleString OPTIONAL  -- product ion code
        }

-- ion type at a finer level than ion series

MSIon ::= SEQUENCE {
        neutralloss MSIonNeutralLoss OPTIONAL,  -- is this peak a neutral loss?
        isotope MSIonIsotopicType OPTIONAL,         -- isotopic composition of peak
        internal VisibleString OPTIONAL,  -- if iontype is internal, this is the internal sequence
        immonium MSImmonium OPTIONAL      -- if iontype is immonium, show characteristics
        }

-- annotated comments about the ion

MSIonAnnot ::= SEQUENCE {
        suspect BOOLEAN OPTIONAL,         -- is this peak suspect?
        massdiff REAL OPTIONAL,           -- what is the difference in mass from library spectrum?
        missingisotope BOOLEAN OPTIONAL   -- are the lower mass peaks missing?
        }

-- defines a particular ion

MSMZHit ::= SEQUENCE {
        ion MSIonType,  -- ion type, e.g. b
        charge INTEGER, -- ion charge
        number INTEGER, -- the sequential number of the ion
        mz INTEGER,              -- scaled m/z value in Da
        index INTEGER OPTIONAL,  -- the index of the peak in the original spectrum
        moreion MSIon OPTIONAL,  -- more information about the ion type
        annotation MSIonAnnot OPTIONAL   -- annotations on the ion
}


-- contains information about sequences with identical peptide
-- sequences

MSPepHit ::= SEQUENCE {
        start INTEGER,       -- start position (inclusive) in sequence
        stop INTEGER,        -- stop position (inclusive) in sequence
        gi INTEGER OPTIONAL, -- genbank identifier
        accession VisibleString OPTIONAL,  -- sequence accession
        defline VisibleString OPTIONAL,    -- sequence description
        protlength INTEGER OPTIONAL,       -- length of protein
        oid INTEGER OPTIONAL,              -- blast library oid 
        reversed BOOLEAN OPTIONAL,         -- reversed sequence
        pepstart VisibleString OPTIONAL,   -- AA before the peptide
        pepstop VisibleString OPTIONAL     -- AA after the peptide
}        

-- modifications to a hit peptide

MSModHit ::= SEQUENCE {
        site INTEGER,  -- the position in the peptide
        modtype MSMod  -- the type of modification
        }


-- sets of scores

MSScoreSet ::= SEQUENCE {
       name VisibleString,
       value REAL
       }

-- hits to a given spectrum

MSHits ::= SEQUENCE {
        evalue REAL,     -- E-value (expect value)
        pvalue REAL,     -- P-value (probability value)
        charge INTEGER,  -- the charge state used in search.  -1 == not +1
        pephits SEQUENCE OF MSPepHit, -- peptides that match this hit
        mzhits SEQUENCE OF MSMZHit OPTIONAL,  -- ions hit
        pepstring VisibleString OPTIONAL,  -- the peptide sequence
        mass INTEGER OPTIONAL,  -- scaled experimental mass of peptide in Da
        mods SEQUENCE OF MSModHit OPTIONAL,  -- modifications to sequence
        pepstart VisibleString OPTIONAL,  -- AA before the peptide (depricated)
        pepstop VisibleString OPTIONAL,   -- AA after the peptide (depricated)
        protlength INTEGER OPTIONAL,      -- length of protein hit (depricated)
        theomass INTEGER OPTIONAL,        -- scaled theoretical mass of peptide hit
        oid INTEGER OPTIONAL,              -- blast library oid (depricated) 
	scores SEQUENCE OF MSScoreSet OPTIONAL, -- optional scores (for library search)
	libaccession VisibleString OPTIONAL     -- library search accesssion
        }


-- error return for a particular spectrum's hitset

MSHitError ::= INTEGER {
        none (0),
        generalerr (1),
        unable2read (2),  -- can't read the spectrum
        notenuffpeaks (3) -- not enough peaks to search
        }

-- MSHitSet annotation by end user

MSUserAnnot ::= INTEGER {
        none (0),
        delete (1),
        flag (2)
        }

-- contains a set of hits to a single spectrum

MSHitSet ::= SEQUENCE {
        number INTEGER, -- unique number of spectrum
        error MSHitError OPTIONAL,               -- error, if any
        hits SEQUENCE OF MSHits OPTIONAL,        -- set of hit to spectrum
        ids SEQUENCE OF VisibleString OPTIONAL,  -- filenames or other ids of spectra searched
        namevalue SEQUENCE OF NameValue OPTIONAL,-- extra info: retention times, etc.
        settingid INTEGER OPTIONAL,              -- id of the search setting used
        userannotation MSUserAnnot OPTIONAL      -- allows users to flag certain
        }


-- error return for the entire response

MSResponseError ::= INTEGER {
        none (0),
        generalerr (1),
        noblastdb (2),   -- unable to open blast library
        noinput (3)      -- input missing
        }


-- bioseq container

MSBioseq ::= SEQUENCE {
        oid INTEGER, -- blast library oid
        seq Bioseq
        }

MSBioseqSet ::= SEQUENCE OF MSBioseq

-- search results

MSResponse ::= SEQUENCE {
        hitsets SEQUENCE OF MSHitSet,  -- hits grouped by spectrum
        scale INTEGER DEFAULT 100,  -- scale to change m/z float to integer
        rid VisibleString OPTIONAL,  -- request id
        error MSResponseError OPTIONAL,  -- error response
        version VisibleString OPTIONAL,  -- version of OMSSA
        email VisibleString OPTIONAL,  -- email address for notification
        dbversion INTEGER OPTIONAL,    -- version of db searched (usually size)
        bioseqs MSBioseqSet OPTIONAL  -- sequences found in search     
        }        
        
-- holds both search requests and responses

MSSearch ::= SEQUENCE {
        request SEQUENCE OF MSRequest OPTIONAL,
        response SEQUENCE OF MSResponse OPTIONAL
        }

END

-- pcassay.asn
-- $Id: pcassay.asn,v 1.12 2006/01/10 12:54:50 bolton Exp $
-- ===========================================================================
--
--                            PUBLIC DOMAIN NOTICE
--               National Center for Biotechnology Information
--
--  This software/database is a "United States Government Work" under the
--  terms of the United States Copyright Act.  It was written as part of
--  the author's official duties as a United States Government employee and
--  thus cannot be copyrighted.  This software/database is freely available
--  to the public for use. The National Library of Medicine and the U.S.
--  Government have not placed any restriction on its use or reproduction.
--
--  Although all reasonable efforts have been taken to ensure the accuracy
--  and reliability of the software and data, the NLM and the U.S.
--  Government do not and cannot warrant the performance or results that
--  may be obtained by using this software or data. The NLM and the U.S.
--  Government disclaim all warranties, express or implied, including
--  warranties of performance, merchantability or fitness for any particular
--  purpose.
--
--  Please cite the author in any work or product based on this material.
--
-- ===========================================================================
--
-- Authors:  NCBI Structure Group
--
-- File Description:
--      ASN.1 definitions for PubChem biological assay data database
--
-- ===========================================================================

NCBI-PCAssay DEFINITIONS ::= BEGIN

IMPORTS Pub                              FROM NCBI-Pub
        Date, Object-id                  FROM NCBI-General
        PC-ID, PC-Source, PC-XRefData    FROM NCBI-PCSubstance;

-- EXPORTS ;


-- Container for multiple Assay Data Submissions
PC-AssayContainer ::= SEQUENCE OF PC-AssaySubmit


-- Container for Data Depositions and Assay Definitions
PC-AssaySubmit ::= SEQUENCE {
    assay             CHOICE {                               -- Assay Description or pre-existing Identifier
                          aid           INTEGER,             --   Assay Identifier
                          aid-source    PC-Source,           --   External Assay Identifier
                          descr         PC-AssayDescription, --   Assay Description (new or updated)
                          aidver        PC-ID                --   Assay Identifier/Version (for internal use)
                      },
    data              SEQUENCE OF PC-AssayResults  OPTIONAL, -- Assay Data Deposition (vector)
    revoke            SEQUENCE OF INTEGER          OPTIONAL  -- List of SID's whose data is to be suppressed
}


-- Container for multiple Assay Result Sets
PC-AssayResultsSet ::= SEQUENCE OF PC-AssayResults


-- Assay Results provided for a given Substance tested, with respect to the results types defined in the 
--   referenced Assay Description
PC-AssayResults ::= SEQUENCE {
    -- Internal/External Tracking Information
    sid               INTEGER,                               -- Tested Substance ID/Version  [Either valid ID or, 
                                                             --   if "sid-source" is used, this is a "0" value]
                                                             --   Note: A valid ID is greater than "0"
    sid-source        PC-Source                    OPTIONAL, -- External Identifier for this Substance
                                                             --   Note: May be used in-lieu of "sid"
                                                             --   Note: This is non-optional if "sid" is "0"
    version           INTEGER                      OPTIONAL, -- Version identifier for this AID-SID Result
                                                             --   Note: Incoming data should set this to be "0"

    -- Data Annotation/Qualifier and URL to further Depositor Information
    comment           VisibleString                OPTIONAL, -- Annotation or qualifier for this Result

    -- Assay Result Data for this Sample
    --   Note: Users need populate only those "tid"s, for which there is data, in any order.
    outcome           INTEGER {                              -- Assay Outcome
                          inactive        (1),               --   Substance is considered Inactive
                          active          (2),               --   Substance is considered Active
                          inconclusive    (3),               --   Substance is Inconclusive
                          unspecified     (4)                --   Substance Outcome is Unspecified
                      }               DEFAULT unspecified,
    rank              INTEGER                      OPTIONAL, -- Rank of Assay Outcome (for result ordering)
                                                             --   Note: Larger numbers are more active
    data              SEQUENCE OF PC-AssayData     OPTIONAL, -- Assay Data Reported for this SID (vector)
    url               VisibleString                OPTIONAL, -- Depositor provided URL for this Result
    xref              SEQUENCE OF PC-AnnotatedXRef OPTIONAL  -- Annotated Cross-Reference Information
}


-- Substance Tested Assay Results for a given Assay Result Type defined in the referenced Assay Description
PC-AssayData ::= SEQUENCE {
    tid               INTEGER,                            -- Assay Result Field Type ID (TID)
                                                          --   Note: Result Field ID's must be greater than "0"
    value             CHOICE {                            -- Assay Result, must be the same as defined for TID
                          ival    INTEGER,
                          fval    REAL,
                          bval    BOOLEAN,
                          sval    VisibleString
                      }
}


-- Assay Description provided by an Organization that describes the assay/protocol performed and defines the 
--   measured end-points and parameters to be stored.  An Assay Description is not a database table.  You can 
--   define as many Result Definitions as needed and they need not be used by all Substances tested.
--
-- Note:  After initial submission, Users cannot add new Result Definitions or modify existing Result Definitions
--        beyond the description text; however, users can change the Assay Description Information, as desired.
PC-AssayDescription ::= SEQUENCE {
    -- Internal/External Tracking Information
    aid               PC-ID,                                 -- Assay Description ID/Version  [Either valid ID 
                                                             --   or, if "aid-source" is used, a "0" dummy value]
                                                             --   Note: Version is for internal use (only?)
                                                             --   Note: A valid ID is greater than "0"
    aid-source        PC-Source                    OPTIONAL, -- External Identifier for this Assay Description
                                                             --   Note: May be used in-lieu of "aid"
                                                             --   Note: This is non-optional if "aid" ID is "0"

    -- Assay Description Information
    name              VisibleString,                         -- Short Assay Name (for display purposes)
    description       SEQUENCE OF VisibleString    OPTIONAL, -- Description of Assay
    protocol          SEQUENCE OF VisibleString    OPTIONAL, -- Procedure used to generate results
    comment           SEQUENCE OF VisibleString    OPTIONAL, -- Comments or additional information
    xref              SEQUENCE OF PC-AnnotatedXRef OPTIONAL, -- Annotated Cross-Reference Information

    -- Allowed Assay Result Types

    results           SEQUENCE OF PC-ResultType,             -- Result Definitions (vector)

    -- Additional Information
    pub               SEQUENCE OF Pub              OPTIONAL, -- Depositor provided publications for this assay
    revision          INTEGER                      OPTIONAL  -- Revision identifier for textual description
}


-- Annotated Cross-Reference (XRef) Information to allow the XRef to be qualified, as to its meaning or context
PC-AnnotatedXRef ::= SEQUENCE {
    xref              PC-XRefData,                    -- Cross-Reference Information
    comment           VisibleString         OPTIONAL  -- Annotation qualifier describing Cross-Reference meaning
}


-- Definition of Allowed Result Types for a given Assay
PC-ResultType ::= SEQUENCE {
    -- Tracking or Description Information
    tid               INTEGER,                             -- Assay Result Field Type ID (TID)
    name              VisibleString,                       -- Result Field Name (short name for display)
    description       SEQUENCE OF VisibleString  OPTIONAL, -- Result Field Description

    -- Result Data Type and Validation Information
    type             INTEGER {                             -- Result Data Type
                         float            (1),
                         int              (2),
                         bool             (3),
                         string           (4)
                     },
    constraints      CHOICE {                              -- Allowed Values, used for validating incoming data
                         -- If type is "float"
                         fset      SEQUENCE OF REAL,       --   Allowed values must be equal to one of these
                         fmin      REAL,                   --   Allowed values (x) must be [ fmin <= x ]
                         fmax      REAL,                   --   Allowed values (x) must be [ x <= fmax ]
                         frange    PC-RealMinMax,          --   Minimum/Maximum Range [ min <= x <= max ]

                         -- If type is "int"
                         iset      SEQUENCE OF INTEGER,    --   Allowed values must be equal to one of these
                         imin      INTEGER,                --   Allowed values (x) must be [ imin <= x ]
                         imax      INTEGER,                --   Allowed values (x) must be [ x <= imax ]
                         irange    PC-IntegerMinMax,       --   Minimum/Maximum Range [ min <= x <= max ]

                         -- If type is "string"
                         sset   SEQUENCE OF VisibleString  --   Allowed values must be equal to one of these
                     }                           OPTIONAL,

    -- Unit information provides the units for the values reported for this TID.  For example, if the values 
    --   reported for this TID are a concentration, e.g., micro-molar, setting the unit "um" allows PubChem to 
    --   know that the value, e.g., "1.3", is actually "1.3 uM".  This also allows PubChem to properly report the 
    --   units when displaying the reported values for this TID.  If the enumerated units provided below are 
    --   insufficient, you may represent the units as a string in the optional "sunit" field (see below).
    unit             INTEGER {                             -- Units for Value
                         ppt              (1),             -- Parts per Thousand
                         ppm              (2),             -- Parts per Million
                         ppb              (3),             -- Parts per Billion
                         mm               (4),             -- milliM
                         um               (5),             -- microM
                         nm               (6),             -- nanoM
                         pm               (7),             -- picoM
                         fm               (8),             -- femtoM
                         mgml             (9),             -- milligrams per mL
                         ugml            (10),             -- micrograms per mL
                         ngml            (11),             -- nanograms per mL
                         pgml            (12),             -- picograms per mL
                         fgml            (13),             -- femtograms per mL
                         m               (14),             -- Molar
                         percent         (15),             -- Percent
                         ratio           (16),             -- Ratio
                         sec             (17),             -- Seconds
                         rsec            (18),             -- Reciprocal Seconds
                         min             (19),             -- Minutes
                         rmin            (20),             -- Reciprocal Minutes
                         day             (21),             -- Days
                         rday            (22),             -- Reciprocal Days
                         none           (254),
                         unspecified    (255)
                     }                           OPTIONAL,
    sunit            VisibleString               OPTIONAL, -- Unit Type (as a String)

    -- Value Transform information qualifies the values reported for this TID.  For example, if the values
    --   reported for this TID are "-Log10 GI50", you may want to consider setting
    --   the "nlog" value below.  In doing so, PubChem would know that the value, e.g., "5.0" 
    --   is actually "1.0e-5".  If the transformation applied is not listed, you may represent
    --   this transformation as a string in the "stransform" (see below) for eventual inclusion 
    --   in the enumerated transform list below.
    transform        INTEGER {                             -- Value Type Details
                         linear           (1),             -- Linear Scale (x)
                         ln               (2),             -- Natural Log Scale (ln x)
                         log              (3),             -- Log Base 10 Scale (log10 x)
                         reciprocal       (4),             -- Reciprocal Scale (1/x)
                         negative         (5),             -- Negative Linear Scale (-x)
                         nlog             (6),             -- Negative Log Base 10 Scale (-log10 x)
                         nln              (7)              -- Negative Natural Log Scane (-ln x)
                     }                           OPTIONAL,
    stransform       VisibleString               OPTIONAL  -- Value Transform Type (as a String)
}


-- Mininum and Maximum Constraints on an Integer Value (used for validating incoming data)
PC-IntegerMinMax ::= SEQUENCE {
    min               INTEGER,                             -- Minimum Value Allowed
    max               INTEGER                              -- Maximum Value Allowed
}


-- Mininum and Maximum Constraints on a Real Value (used for validating incoming data)
PC-RealMinMax ::= SEQUENCE {
    min               REAL,                                -- Minimum Value Allowed
    max               REAL                                 -- Maximum Value Allowed
}


END

-- ===========================================================================
-- $Log: pcassay.asn,v $
-- Revision 1.12  2006/01/10 12:54:50  bolton
-- Major change to meaning of PC-ID version, now an assay restatement
-- identifier.  New revision identifier added to AssayDescription to
-- track textual modifications (previous meaning of PC-ID version).
-- Replaced "Sequence of Pub" in AssayResults with "Sequence of
-- AnnotatedXRef".  Added "PC-ID" to the AssaySubmit assay choice.
--
-- Revision 1.11  2005/11/30 22:42:18  ywang
-- change rank to be optional
--
-- Revision 1.10  2005/11/08 13:12:49  bolton
-- Added ability to provide publications at the assay description and assay
-- result (per SID) level.  Also, minor changes to comments and object order.
--
-- Revision 1.9  2005/09/29 21:05:52  tkachenk
-- PC-AssayResults.data is made optional to be able to handle empty data rows
--
-- Revision 1.8  2005/08/02 18:48:08  ywang
-- add PC-AssayResultsSet to ease spec mapping
--
-- Revision 1.7  2005/08/01 14:43:30  ucko
-- Fix comma-misplacement typo in previous revision.
--
-- Revision 1.6  2005/08/01 14:17:57  ywang
-- make PC-AssayResults version OPTIONAL and put url at the end
--
-- Revision 1.5  2005/07/28 17:05:20  bolton
-- Major update to the Assay specification streamlining (by elimination) unused
-- features, eliminating the assay deposition block, and to add new "revoke"
-- feature.
--
-- Revision 1.4  2005/03/02 16:18:49  bolton
-- Added optional URL for Substance Result.
--
-- Revision 1.3  2005/02/04 15:04:39  bolton
-- Added assay result ranking.  Reorganized order of object definitions.
--
-- Revision 1.2  2004/07/13 14:31:17  bolton
-- Added PC-XRefData, imported from the PCSubstance specification, to the
-- DataSession and AssayDescription blocks.
--
-- Revision 1.18  2004/06/10 13:24:18  bolton
-- Changes made to in documentation and enumerations to detail that "0" is an
-- invalid ID and an invalid enumeration.
--
-- Revision 1.17  2004/06/04 11:55:21  bolton
-- Added ability to allow external identifiers at all levels.  Added improved
-- documentation to reflect the current use of various sections of the spec.
--
-- Revision 1.16  2004/06/01 14:14:26  bolton
-- Forgot to remove PC-Source record at the PC-AssayDescription level.
--
-- Revision 1.15  2004/05/28 18:32:52  bolton
-- Slightly modified and improved accessioning scheme and associated
-- documentation.
--
-- Revision 1.14  2004/05/18 12:16:05  bolton
-- Assay Data Session and Assay Description tweaks.
--
-- Revision 1.13  2004/05/14 10:51:44  bolton
-- Switched ordering of PC-AssayDescription and PC-AssayDataSession in the
-- PC-AssayContainer Object.
--
-- Revision 1.12  2004/05/12 13:05:02  bolton
-- Major overhaul of the deposition/accession scheme.  Minor tweaks.
--
-- Revision 1.11  2004/05/03 12:48:39  bolton
-- Added a root container object that holds both assay deposition and assay
-- description data.  Fixed a minor inconsistency with negative Log 10 label.
--
-- Revision 1.10  2004/04/08 11:55:14  bolton
-- Removed repeat of "Log" tag.
--
-- Revision 1.9  2004/04/08 11:50:42  bolton
-- Modifications to allow more/better: X-refs for Assay descriptions; activity
-- summarization programmatically; and Result Type units.
--
-- Revision 1.8  2004/04/01 19:29:53  bolton
-- New version to reflect PubChem group consensus.
--
-- Revision 1.7  2004/03/05 16:18:50  bolton
-- Removed orphaned "PC-History" and "PC-Replacement".
--
-- Revision 1.6  2004/03/04 20:30:14  bolton
-- Minor fixes to allow compilation.
--
-- Revision 1.5  2004/03/04 20:13:53  bolton
-- Continued development of "pcassay", following latest pubchem meeting.
--
-- Revision 1.4  2004/03/02 15:20:21  thiessen
-- make pcsubstance and pcassay build on UNIX
--
-- Revision 1.3  2004/02/27 18:39:28  bolton
-- Changes to share "PC-Source" from "pcsubstance" to "pcassay".
--
-- Revision 1.2  2004/02/26 15:17:30  bolton
-- Minor changes to make "pcassay" library compile.
--
-- ===========================================================================


-- pcsubstance.asn
-- $Id: pcsubstance.asn 109801 2007-09-01 13:57:01Z bolton $
-- ===========================================================================
--
--                            PUBLIC DOMAIN NOTICE
--               National Center for Biotechnology Information
--
--  This software/database is a "United States Government Work" under the
--  terms of the United States Copyright Act.  It was written as part of
--  the author's official duties as a United States Government employee and
--  thus cannot be copyrighted.  This software/database is freely available
--  to the public for use. The National Library of Medicine and the U.S.
--  Government have not placed any restriction on its use or reproduction.
--
--  Although all reasonable efforts have been taken to ensure the accuracy
--  and reliability of the software and data, the NLM and the U.S.
--  Government do not and cannot warrant the performance or results that
--  may be obtained by using this software or data. The NLM and the U.S.
--  Government disclaim all warranties, express or implied, including
--  warranties of performance, merchantability or fitness for any particular
--  purpose.
--
--  Please cite the author in any work or product based on this material.
--
-- ===========================================================================
--
-- Authors:  NCBI Structure Group
--
-- File Description:
--      ASN.1 definitions for PubChem small molecule database
--
-- ===========================================================================

NCBI-PCSubstance DEFINITIONS ::= BEGIN

EXPORTS PC-Substance, PC-Compound, PC-Substances, PC-Compounds,
        PC-Source, PC-ID, PC-InfoData, PC-XRefData;

IMPORTS Pub              FROM NCBI-Pub
        Date, Object-id  FROM NCBI-General;


-- Root Record for Chemical Substance Definition
PC-Substance ::= SEQUENCE {
    -- Internal Tracking Information
    sid            PC-ID,                               -- Substance ID/Version  [Either valid ID or a "0" dummy
                                                        --   value, if "source" is to be used]
                                                        --   Note: Version is for internal use (only?)
                                                        --   Note: A valid ID is greater than "0"
    source         PC-Source,                           -- Data Source for this Submission

    -- Substance Description Information
    pub            SEQUENCE OF Pub            OPTIONAL, -- Articles Describing this Substance
    synonyms       SEQUENCE OF VisibleString  OPTIONAL, -- Substance Names provided by Depositor
    comment        SEQUENCE OF VisibleString  OPTIONAL, -- Comments and Description provided by Depositor
    xref           SEQUENCE OF PC-XRefData    OPTIONAL, -- X-Ref/LinkOut Data provided by Depositor

    -- Structure Description
    compound       PC-Compounds               OPTIONAL  -- Original Deposited Structure Information
}


-- Holder for groups of Substances
PC-Substances ::= SEQUENCE OF PC-Substance


-- ID and Version Description Information
PC-ID ::= SEQUENCE {
    id             INTEGER,                             -- Unique "Global" ID
                                                        --   Note: Must be greater than "0" or, if invalid, "0"
    version        INTEGER                              -- Incremented when Depositor updates record
                                                        --   Note: For Internal Use (only?)
}


-- Describes Substance Source, if from another database
PC-Source ::= CHOICE {
    individual     Pub,                                 -- Individual Submission
    db             PC-DBTracking,                       -- External DB Submission
    mmdb           PC-MMDBSource                        -- MMDB Submission (deprecated)
}


-- External DB Tracking Information
PC-DBTracking ::= SEQUENCE {
    name           VisibleString,                       -- Unique Name of External Database
    source-id      Object-id,                           -- Primary Unique ID used by External DB
    date           Date                       OPTIONAL, -- External Database Release Date
    description    VisibleString              OPTIONAL, -- External Database Release Code/Description
    pub            Pub                        OPTIONAL  -- Data Submission to same DB by original Author
}


-- MMDB Source Record detailing specific location or part of an MMDB Record
PC-MMDBSource ::= SEQUENCE {
    mmdb-id        INTEGER,                             -- MMDB Record ID
                                                        --   Note: Must be greater than "0" or, if invalid, "0"
    molecule-id    INTEGER,                             -- MMDB Molecule ID
                                                        --   Note: Must be greater than "0" or, if invalid, "0"
    molecule-name  SEQUENCE OF VisibleString,           -- MMDB Molecule Name
    residue-id     INTEGER                    OPTIONAL, -- Residue ID
                                                        --   Note: Must be greater than "0" or, if invalid, "0"
    residue-name   VisibleString              OPTIONAL, -- Residue Name
    atom-id        INTEGER                    OPTIONAL, -- Atom ID
                                                        --   Note: Must be greater than "0" or, if invalid, "0"
    atom-name      VisibleString              OPTIONAL  -- Atom Name
}


-- Depositor Provided X-Ref and LinkOut data for Entrez
PC-XRefData ::= CHOICE {
        regid            VisibleString,           -- External Database Registry ID
        rn               VisibleString,           -- Registry Number (e.g., EC Number, CAS Number)
        mesh             VisibleString,           -- MESH Index Term
        pmid             INTEGER,                 -- PubMed ID
                                                  --   Note: Must be greater than "0" or, if invalid, "0"
        gi               INTEGER,                 -- GenBank General ID
                                                  --   Note: Please use protein-gi or nucleotide-gi, if possible
                                                  --   Note: Must be greater than "0" or, if invalid, "0"
        mmdb             INTEGER,                 -- MMDB ID
                                                  --   Note: Must be greater than "0" or, if invalid, "0"
        sid              INTEGER,                 -- PubChem Substance ID
                                                  --   Note: Must be greater than "0" or, if invalid, "0"
        cid              INTEGER,                 -- PubChem Compound ID
                                                  --   Note: Must be greater than "0" or, if invalid, "0"
        dburl            VisibleString,           -- Depositor Source Database Homepage
        sburl            VisibleString,           -- Depositor Homepage for a Substance
        asurl            VisibleString,           -- Depositor Homepage for an Assay
        protein-gi       INTEGER,                 -- GenBank General ID for a Protein
                                                  --   Note: Must be greater than "0" or, if invalid, "0"
        nucleotide-gi    INTEGER,                 -- GenBank General ID for a Nucleotide
                                                  --   Note: Must be greater than "0" or, if invalid, "0"
        taxonomy         INTEGER,                 -- Taxonomy ID for an Organism
                                                  --   Note: Must be greater than "0" or, if invalid, "0"
        aid              INTEGER,                 -- PubChem BioAssay ID
                                                  --   Note: Must be greater than "0" or, if invalid, "0"
        mim              INTEGER,                 -- MIM, Mendelian Inheritance in Man, Number 
                                                  --   Note: Must be greater than "0" or, if invalid, "0"
        gene             INTEGER,                 -- Entrez Gene ID
                                                  --   Note: Must be greater than "0" or, if invalid, "0"
        probe            INTEGER                  -- Probe ID
                                                  --   Note: Must be greater than "0" or, if invalid, "0"
}


-- Compound Record
PC-Compound ::= SEQUENCE {
    -- Tracking Information
    id             PC-CompoundType,                        -- Compound Qualifier (Type/ID)
    atoms          PC-Atoms                      OPTIONAL, -- AtomID/Type Information
    bonds          PC-Bonds                      OPTIONAL, -- BondID/Type/Atom Information
    stereo         SEQUENCE OF PC-StereoCenter   OPTIONAL, -- StereoCenter Descriptions
    coords         SEQUENCE OF PC-Coordinates    OPTIONAL, -- 2D/3D Coordinate Sets of Compound
    charge         INTEGER                       OPTIONAL, -- Provided Total Formal Charge  (Signed Integer)
    props          SEQUENCE OF PC-InfoData       OPTIONAL, -- Derived (computed) Properties
    stereogroups   SEQUENCE OF PC-StereoGroup    OPTIONAL, -- Relative stereochemistry groups
    count          PC-Count                      OPTIONAL, -- Counts of various properties
    vbalt          PC-Compounds                  OPTIONAL  -- Alternate Valence-Bond Forms
}


-- Holder for groups of Compounds
PC-Compounds ::= SEQUENCE OF PC-Compound


-- Qualification used to describe the type of Compound deposited, standardized, or derived.
--    Please note that mixtures/cocktails may be specified using previously deposited substances.
PC-CompoundType ::= SEQUENCE {
    type        INTEGER {               --  Compound Qualifier or Type
                    -- For Compound Depositions
                    deposited           (0),               -- Original Deposited Compound
                    -- For Standardized Compounds
                    standardized        (1),               -- Standardized Form of a Deposited Compound
                    component           (2),               -- Component of a Standardized Compound
                    neutralized         (3),               -- Neutralized Form of a Standardized Compound
                    -- For Mixture/Cocktail Depositions
                    mixture             (4),               -- Substance that is a component of a mixture
                    -- For Theoretical Compounds
                    tautomer            (5),               -- Predicted Tautomer Form
                    pka-state           (6),               -- Predicted Ionized pKa Form

                    unknown           (255)                -- Unknown Compound Type
                }                                OPTIONAL,
    id          CHOICE {                --  Compound Namespace and ID  (absent for "deposited" type compounds)
                    cid        INTEGER,                    --  Standardized Compound
                    sid        INTEGER,                    --  PubChem Substance (for "mixture" type compounds)
                    xid        INTEGER                     --  PubChem Theoretical Compound
                }                                OPTIONAL
}


-- Counts of various properties of a Compound
PC-Count ::= SEQUENCE {
    heavy-atom               INTEGER,             -- Total count of non-Hydrogen (Heavy) Atoms

    -- StereoChemistry Counts
    atom-chiral              INTEGER,             -- Total count of (SP3) Chiral Atoms
    atom-chiral-def          INTEGER,             -- Total count of Defined (SP3) Chiral Atoms
    atom-chiral-undef        INTEGER,             -- Total count of Undefined (SP3) Chiral Atoms
    bond-chiral              INTEGER,             -- Total count of (SP2) Chiral Bonds
    bond-chiral-def          INTEGER,             -- Total count of (SP2) Defined Chiral Bonds
    bond-chiral-undef        INTEGER,             -- Total count of (SP2) Undefined Chiral Bonds

    -- Isotopic Counts
    isotope-atom             INTEGER,             -- Total count of Atoms with Isotopic Information

    -- Discrete Structure Counts
    covalent-unit            INTEGER,             -- Total count of covalently-bonded units in the record
    tautomers                INTEGER              -- Number of possible tautomers (Max. 999)
}


-- List of atom identifiers which are in a common stereochemistry group.
-- All atoms in this group possess the characteristic of the type specified.
-- The convention adopted is intended to be compatible with MDL's Enhanced
-- Stereochemical Representation white paper.
-- An atom can only be member of a single stereo group, and all atoms
-- in a stereo group must have a stereo descriptor.
-- Stereogroups only apply to stereocenters that can have parity.
PC-StereoGroup ::= SEQUENCE {
    type           INTEGER {
                       absolute         (1),            -- Absolute configuration is known
                       or               (2),            -- Relative configuration is known (absolute configuration is unknown)
                       and              (3),            -- Mixture of stereoisomers
                       unknown        (255)             -- Unknown configuration type
                   },
     aid           SEQUENCE OF INTEGER                  -- Atom Identifiers of atoms in this group
                                                        --   Note: Atom ID's must be greater than "0"
}


-- Compound Description/Descriptor Data
PC-InfoData ::= SEQUENCE {
    urn            PC-Urn,                              -- Universal Resource Name  [for Value Qualification]
    value          CHOICE {                             -- Data Value
                          bval     BOOLEAN,                   -- Boolean or Binary
                          bvec     SEQUENCE OF BOOLEAN,       -- Boolean Vector
                          ival     INTEGER,                   -- Integer (signed or unsigned)
                          ivec     SEQUENCE OF INTEGER,       -- Integer Vector
                          fval     REAL,                      -- Float or Double
                          fvec     SEQUENCE OF REAL,          -- Double Vector
                          sval     VisibleString,             -- String
                          slist    SEQUENCE OF VisibleString, -- List of Strings
                          date     Date,                      -- Date
                          binary   OCTET STRING,              -- Binary Data
                          bitlist  BIT STRING                 -- Bit List (specialized version of Boolean vector)
                   }
}


-- Universal Resource Name 
--    Provides explicit source information on derived or calculated data 
PC-Urn ::= SEQUENCE {
    label           VisibleString,                       -- Generic Name or Label for Display  [e.g., "Log P"]
    name            VisibleString              OPTIONAL, -- Qualified Name  [e.g., "XlogP"]
    datatype        PC-UrnDataType             OPTIONAL, -- Specific Data Type of Value  [e.g., binary]
    parameters      VisibleString              OPTIONAL, -- Implementation Parameter  [e.g., "metal=0"]
    implementation  VisibleString              OPTIONAL, -- Implementation Name  [e.g., "E_XlogP"]
    version         VisibleString              OPTIONAL, -- Implementation Version  [e.g., "3.317"]
    software        VisibleString              OPTIONAL, -- Implementation Software  [e.g., "Cactvs"]
    source          VisibleString              OPTIONAL, -- Implementation Organization  [e.g., "xemistry.com"]
    release         VisibleString              OPTIONAL  -- NCBI Implementation Release  [e.g., "10.25.2005"]
}


-- URN Data Type
--   Provides the ability to use more specific data types than that directly provided by ASN.1.
--   Provides for more specific validation of specified data.
PC-UrnDataType ::= INTEGER {
        -- Basic Data Types
        string                         (1),  -- String                             [maps to a VisibleString]
        stringlist                     (2),  -- List of Strings                    [maps to VisibleString list]
        int                            (3),  -- 32-Bit Signed Integer              [maps to an INTEGER]
        intvec                         (4),  -- Vector of 32-Bit Signed Integer    [maps to INTEGER vector]
        uint                           (5),  -- 32-Bit Unsigned Integer            [maps to an INTEGER]
        uintvec                        (6),  -- Vector of 32-Bit Unsigned Integer  [maps to INTEGER vector]
        double                         (7),  -- 64-Bit Float                       [maps to a REAL]
        doublevec                      (8),  -- Vector of Double                   [maps to REAL vector]
        bool                           (9),  -- Boolean or Binary value            [maps to a BOOLEAN]
        boolvec                       (10),  -- Boolean Vector                     [maps to BOOLEAN vector]

        -- Specialized Data Types
        uint64                        (11),  -- 64-Bit Unsigned Integer (Hex form) [maps to a VisibleString]
        binary                        (12),  -- Binary Data Blob                   [maps to an OCTET STRING]
        url                           (13),  -- URL                                [maps to a VisibleString]
        unicode                       (14),  -- UniCode String                     [maps to a VisibleString]
        date                          (15),  -- ISO8601 Date                       [maps to a Date]
        fingerprint                   (16),  -- Binary Fingerprint (Gzip'ped bit   [maps to an OCTET STRING]
                                             --   list w/ 4-Byte prefix denoting bit list length)

        unknown                      (255)   -- Unknown Data Type               [maps to a set of VisibleString]
}


-- Coordinates for the Compound of a given type
PC-Coordinates ::= SEQUENCE {
    type           SEQUENCE OF PC-CoordinateType,          -- Coordinate Type Information (vector)
    aid            SEQUENCE OF INTEGER,                    -- Conformer Atom IDs (vector)
                                                           --   (to be kept synchronized with Conformers)
                                                           --   Note: Atom ID's must be greater than "0"
    conformers     SEQUENCE OF PC-Conformer      OPTIONAL, -- Conformers for this Coordinate Set

    atomlabels     SEQUENCE OF PC-AtomString     OPTIONAL, -- Atom labels for Conformer Set

    data           SEQUENCE OF PC-InfoData       OPTIONAL  -- Data Associated with these Coordinates
}


-- Drawing/Conformer Definition (in Parallel Arrays, synchronized to aid integer list)
--   3D coordinates are specified in a right-handed coordinate system. For 2D plots, Y axis leads upwards.
PC-Conformer ::= SEQUENCE {
    --  [Note: Parallel Arrays must be kept Synchronized]
    x              SEQUENCE OF REAL,                       -- X Coordinates (vector)
    y              SEQUENCE OF REAL,                       -- Y Coordinates (vector)
    z              SEQUENCE OF REAL              OPTIONAL, -- Z Coordinates (vector)

    style          PC-DrawAnnotations            OPTIONAL, -- Structure Annotations

    data           SEQUENCE OF PC-InfoData       OPTIONAL  -- Data Associated with this Conformer
}


-- Holder for groups of Conformers
PC-Conformers ::= SEQUENCE OF PC-Conformer


-- Coordinate Set Type Distinctions
PC-CoordinateType ::= INTEGER {
        twod                (1),  -- 2D Coordinates
        threed              (2),  -- 3D Coordinates (should also indicate units, below)
        submitted           (3),  -- Depositor Provided Coordinates
        experimental        (4),  -- Experimentally Determined Coordinates
        computed            (5),  -- Computed Coordinates
        standardized        (6),  -- Standardized Coordinates
        augmented           (7),  -- Hybrid Original with Computed Coordinates (e.g., explicit H)
        aligned             (8),  -- Template used to align drawing
        compact             (9),  -- Drawing uses shorthand forms (e.g., COOH, OCH3, Et, etc.)
        units-angstroms    (10),  -- (3D) Coordinate units are Angstroms
        units-nanometers   (11),  -- (3D) Coordinate units are nanometers
        units-pixel        (12),  -- (2D) Coordinate units are pixels
        units-points       (13),  -- (2D) Coordinate units are points
        units-stdbonds     (14),  -- (2D) Coordinate units are standard bond lengths (1.0)
        units-unknown     (255)   -- Coordinate units are unknown or unspecified
}


-- Drawing Annotations (in Parallel Arrays)
--    [Note: A pair of atoms can have multiple annotations]
PC-DrawAnnotations ::= SEQUENCE {
    --  [Note: Parallel Arrays must be kept Synchronized]
    annotation     SEQUENCE OF PC-BondAnnotation, -- Bond Annotations (vector)
    aid1           SEQUENCE OF INTEGER,           -- Atom1 Identifier (vector)
                                                  --   Note: Atom ID's must be greater than "0"
    aid2           SEQUENCE OF INTEGER            -- Atom2 Identifier (vector)
                                                  --   Note: Atom ID's must be greater than "0"
}


-- Atom-Atom Annotation Information
PC-BondAnnotation ::= INTEGER {
    crossed        (1),                          -- Double Bond that can be both Cis/Trans
    dashed         (2),                          -- Hydrogen-Bond (3D Only?)
    wavy           (3),                          -- Unknown Stereochemistry
    dotted         (4),                          -- Complex/Fractional
    wedge-up       (5),                          -- Above-Plane
    wedge-down     (6),                          -- Below-Plane
    arrow          (7),                          -- Dative
    aromatic       (8),                          -- Aromatic
    resonance      (9),                          -- Resonance
    bold          (10),                          -- Fat Bond (Non-Specific User Interpreted Information)
    fischer       (11),                          -- Interpret Bond Stereo using Fischer Conventions
    closeContact  (12),                          -- Identification of Atom-Atom Close Contacts (3D Only)
    unknown      (255)                           -- Unspecified or Unknown Atom-Atom Annotation
}


-- Atom Information  (in Parallel Arrays)
PC-Atoms ::= SEQUENCE {
    --  [Note: Parallel Arrays must be kept Synchronized]
    aid            SEQUENCE OF INTEGER,                    -- Atom Identifiers (vector)
                                                           --   Note: Atom ID's must be greater than "0"
    element        SEQUENCE OF PC-Element,                 -- Atomic Numbers (vector)

    -- Independent Arrays of ID-Value Pairs  (Technically allows multiple values per Atom)
    label          SEQUENCE OF PC-AtomString     OPTIONAL, -- Atom labels
    isotope        SEQUENCE OF PC-AtomInt        OPTIONAL, -- Isotopic Information
    charge         SEQUENCE OF PC-AtomInt        OPTIONAL, -- Formal Charges
    radical        SEQUENCE OF PC-AtomRadical    OPTIONAL, -- Radical Information
    source         SEQUENCE OF PC-AtomSource     OPTIONAL, -- E.g. identity of MMDB "R" groups
    comment        SEQUENCE OF PC-AtomString     OPTIONAL  -- Atom Comments
}


-- Specification of an Association between an Atom Identifier and Source
PC-AtomSource ::= SEQUENCE {
    aid            INTEGER,                      -- Atom Identifier for the R-Group Source
                                                 --   Note: Atom ID's must be greater than "0"
    source         PC-MMDBSource                 -- Atom Specific MMDB Record
}


-- Specification of an Association between an Atom Identifier and an Integer Value
PC-AtomInt ::= SEQUENCE {
    aid            INTEGER,                      -- Atom Identifier for the Value
                                                 --   Note: Atom ID's must be greater than "0"
    value          INTEGER                       -- Value Associated to the ID
}


-- Specification of an Association between an Atom Identifier and a String Value
PC-AtomString ::= SEQUENCE {
    aid            INTEGER,                      -- Atom Identifier for the Value
                                                 --   Note: Atom ID's must be greater than "0"
    value          VisibleString                 -- Value Associated to the ID
}


-- Rudimentary Atom Electronic Configuration Designation
PC-AtomRadical ::= SEQUENCE {
    aid            INTEGER,                      -- Atom Identifier for the Value
                                                 --   Note: Atom ID's must be greater than "0"
    type           INTEGER {                     -- Type of Atom Radical
                       singlet    (1),           -- Open-Shell Singlet
                       doublet    (2),           -- Open-Shell Doublet
                       triplet    (3),           -- Open-Shell Triplet
                       quartet    (4),           -- Open-Shell Quartet
                       quintet    (5),           -- Open-Shell Quintet
                       hextet     (6),           -- Open-Shell Hextet
                       heptet     (7),           -- Open-Shell Quintet
                       octet      (8),           -- Open-Shell Octet
                       none     (255)            -- Closed-Shell Singlet
                   }
}


-- Element Information [which may contain "illegal" element values]
PC-Element::= INTEGER {
    -- Illegal Atom Numbers that may be Interpreted to be something else
    a  (255),                                    -- Unspecified Atom (Asterick)
    d  (254),                                    -- Dummy Atom
    r  (253),                                    -- Rgroup Label
    lp (252),                                    -- Lone Pair

    -- Elements
    h  (1), he (2), li (3), be (4), b  (5),
    c  (6), n  (7), o  (8), f  (9), ne(10),
    na(11), mg(12), al(13), si(14), p (15),
    s (16), cl(17), ar(18), k (19), ca(20),
    sc(21), ti(22), v (23), cr(24), mn(25),
    fe(26), co(27), ni(28), cu(29), zn(30),
    ga(31), ge(32), as(33), se(34), br(35),
    kr(36), rb(37), sr(38), y (39), zr(40),
    nb(41), mo(42), tc(43), ru(44), rh(45),
    pd(46), ag(47), cd(48), in(49), sn(50),
    sb(51), te(52), i (53), xe(54), cs(55),
    ba(56), la(57), ce(58), pr(59), nd(60),
    pm(61), sm(62), eu(63), gd(64), tb(65),
    dy(66), ho(67), er(68), tm(69), yb(70),
    lu(71), hf(72), ta(73), w (74), re(75),
    os(76), ir(77), pt(78), au(79), hg(80),
    tl(81), pb(82), bi(83), po(84), at(85),
    rn(86), fr(87), ra(88), ac(89), th(90),
    pa(91), u(92),  np(93), pu(94), am(95),
    cm(96), bk(97), cf(98), es(99), fm(100),
    md(101), no(102), lr(103), rf(104), db(105),
    sg(106), bh(107), hs(108), mt(109), ds(110),
    rg(111)
}


-- Bond Description Information  (in Parallel Arrays)
PC-Bonds ::= SEQUENCE {
    --  [Note: Parallel Arrays must be kept Synchronized]
    aid1           SEQUENCE OF INTEGER,          -- Atom1 Identifier (vector)
                                                 --   Note: Atom ID's must be greater than "0"
    aid2           SEQUENCE OF INTEGER,          -- Atom2 Identifier (vector)
                                                 --   Note: Atom ID's must be greater than "0"
    order          SEQUENCE OF PC-BondType       -- Bond Type Information (vector)
}


-- Bond Type Information
PC-BondType ::= INTEGER {
    single         (1),                          -- Single Bond
    double         (2),                          -- Double Bond
    triple         (3),                          -- Triple Bond
    quadruple      (4),                          -- Quadruple Bond
    dative         (5),                          -- Dative Bond
    complex        (6),                          -- Complex Bond
    ionic          (7),                          -- Ionic Bond
    unknown      (255)                           -- Unknown/Unspecified Connectivity
}


-- Allowed Stereogenic Center Types
--   [Using IUPAC Stereogenic Center recommendations and terminology]
PC-StereoCenter ::= CHOICE {
    tetrahedral    PC-StereoTetrahedral,         -- Tetrahedral (SP3) StereoCenter
    planar         PC-StereoPlanar,              -- Planar (SP2) StereoCenter
    squareplanar   PC-StereoSquarePlanar,        -- Square Planar (SP4) StereoCenter
    octahedral     PC-StereoOctahedral,          -- Octahedral (OC-6) / Square Pyramid (SPY-5) StereoCenters
    bipyramid      PC-StereoTrigonalBiPyramid,   -- Trigonal BiPyramid (TBPY-4 and TBPY-5) StereoCenters
    tshape         PC-StereoTShape,              -- T-Shaped (TS-3) StereoCenters
    pentagonal     PC-StereoPentagonalBiPyramid  -- Pentagonal BiPyramid (PBPY-7) StereoCenters
}


-- SP3 Tetrahedral StereoCenter, Trigonal Pyramid Stereogenic Center,
--   Cumulenic StereoCenter (Linear systems of an even number of double bonds),
--   or Hindered biaryl stereocenter (All biaryls have hindered rotation that
--   to some extent the ortho-hydrogens prevent coplanarity)
--   [Using IUPAC Stereogenic Center recommendations and terminology]
--   [Note: "-1" can be used for the Atom Identifier to represent a lone-pair or implicit hydrogen]
PC-StereoTetrahedral ::= SEQUENCE {
    center         INTEGER,                      -- Atom Identifier of Atom Center
                                                 --   Note: Atom ID's must be greater than "0"
    above          INTEGER,                      -- Atom Identifier of Atom Above the Plane
                                                 --   Note: Atom ID's must be greater than "0"
    top            INTEGER,                      -- Atom Identifier of Atom In-Plane and at the Top
                                                 --   Note: Atom ID's must be greater than "0"
    bottom         INTEGER,                      -- Atom Identifier of Atom In-Plane and at the Bottom
                                                 --   Note: Atom ID's must be greater than "0"
    below          INTEGER,                      -- Atom Identifier of Atom Below the Plane
                                                 --   Note: Atom ID's must be greater than "0"
    parity         INTEGER {                     -- StereoCenter Designation
                       clockwise          (1),
                       counterclockwise   (2),
                       any                (3),
                       unknown          (255)
                   }                  OPTIONAL,
    type           INTEGER {                     -- Type of StereoCenter, Tetrahedral, if not specified
                       tetrahedral        (1),   -- Tetrahedral StereoCenter
                       cumulenic          (2),   -- Cumulenic StereoCenter
                       biaryl             (3)    -- Biaryl StereoCenter
                   }                  OPTIONAL
}


-- SP2 Planar Stereogenic Center, Cumulenic StereoCenter (Linear systems on an odd
--   number of double bonds present planar stereochemistry)
--   [Using IUPAC Stereogenic Center recommendations and terminology]
--   [Note: "-1" can be used for the Atom Identifier to represent a lone-pair or implicit hydrogen]
PC-StereoPlanar ::= SEQUENCE {
    left           INTEGER,                      -- Atom ID of Left Double Bond Atom
                                                 --   Note: Atom ID's must be greater than "0"
    ltop           INTEGER,                      -- Atom ID of Top Atom attached to the Left Double Bond Atom
                                                 --   Note: Atom ID's must be greater than "0"
    lbottom        INTEGER,                      -- Atom ID of Bottom Atom attached to the Left Double Bond Atom
                                                 --   Note: Atom ID's must be greater than "0"
    right          INTEGER,                      -- Atom ID of Right Double Bond Atom
                                                 --   Note: Atom ID's must be greater than "0"
    rtop           INTEGER,                      -- Atom ID of Top Atom attached to the Right Double Bond Atom
                                                 --   Note: Atom ID's must be greater than "0"
    rbottom        INTEGER,                      -- Atom ID of Bottom Atom attached to the Right Double Bond Atom
                                                 --   Note: Atom ID's must be greater than "0"
    parity         INTEGER {                     -- StereoCenter Designation
                       same             (1),
                       opposite         (2),
                       any              (3),
                       unknown        (255)
                   }                  OPTIONAL,
    type           INTEGER {                     -- Type of StereoCenter, SP2 Planar, if not specified
                       planar           (1),     -- SP2 Planar StereoCenter
                       cumulenic        (2)      -- Cumulenic StereoCenter
                   }                  OPTIONAL
}


-- Square Planar (SP4) StereoCenters
--   [Using IUPAC Stereogenic Center recommendations and terminology]
--   [Note: "-1" can be used for the Atom Identifier to represent a lone-pair or implicit hydrogen]
PC-StereoSquarePlanar ::= SEQUENCE {
    center         INTEGER,                      -- Atom ID of Atom Center
                                                 --   Note: Atom ID's must be greater than "0"
    lbelow         INTEGER,                      -- Atom ID of Left Below Plane Atom
                                                 --   Note: Atom ID's must be greater than "0"
    rbelow         INTEGER,                      -- Atom ID of Right Below Plane Atom
                                                 --   Note: Atom ID's must be greater than "0"
    labove         INTEGER,                      -- Atom ID of Left Above Plane Atom
                                                 --   Note: Atom ID's must be greater than "0"
    rabove         INTEGER,                      -- Atom ID of Right Above Plane Atom
                                                 --   Note: Atom ID's must be greater than "0"
    parity         INTEGER {                     -- StereoCenter Type
                       u-shape          (1),     --   U shaped isomer (labove-lbelow-rbelow-rabove)
                       z-shape          (2),     --   Z shaped isomer (labove-rabove-lbelow-rbelow)
                       x-shape          (3),     --   X shaped isomer (labove-rbelow-rabove-lbelow)
                       any              (4),     --   Nonspecific mixture of isomers
                       unknown        (255)
                   }                  OPTIONAL
}


-- Octahedral (OC-6) and Square Pyramid (SPY-5) StereoCenters
--   [Using IUPAC Stereogenic Center recommendations and terminology]
--   [Note: "-1" can be used for the Atom Identifier to represent a lone-pair or implicit hydrogen]
PC-StereoOctahedral ::= SEQUENCE {
    center         INTEGER,                      -- Atom ID of Atom Center
                                                 --   Note: Atom ID's must be greater than "0"
    top            INTEGER,                      -- Atom ID of Atom In-Plane and at the Top
                                                 --   Note: Atom ID's must be greater than "0"
    bottom         INTEGER,                      -- Atom ID of Atom In-Plane and at the Bottom
                                                 --   Note: Atom ID's must be greater than "0"
    labove         INTEGER,                      -- Atom ID of Atom Above the Plane on the Left
                                                 --   Note: Atom ID's must be greater than "0"
    lbelow         INTEGER,                      -- Atom ID of Atom Below the Plane on the Left
                                                 --   Note: Atom ID's must be greater than "0"
    rabove         INTEGER,                      -- Atom ID of Atom Above the Plane on the Right
                                                 --   Note: Atom ID's must be greater than "0"
    rbelow         INTEGER                       -- Atom ID of Atom Below the Plane on the Right
                                                 --   Note: Atom ID's must be greater than "0"
}


-- Trigonal BiPyramid (TBPY-4 and TBPY-5) StereoCenters
--   [Using IUPAC Stereogenic Center recommendations and terminology]
--   [Note: "-1" can be used for the Atom Identifier to represent a lone-pair or implicit hydrogen]
PC-StereoTrigonalBiPyramid ::= SEQUENCE {
    center         INTEGER,                      -- Atom ID of Atom Center
                                                 --   Note: Atom ID's must be greater than "0"
    above          INTEGER,                      -- Atom ID of Atom Above the Plane
                                                 --   Note: Atom ID's must be greater than "0"
    below          INTEGER,                      -- Atom ID of Atom Below the Plane
                                                 --   Note: Atom ID's must be greater than "0"
    top            INTEGER,                      -- Atom ID of Atom In-Plane and at the Top
                                                 --   Note: Atom ID's must be greater than "0"
    bottom         INTEGER,                      -- Atom ID of Atom In-Plane and at the Bottom
                                                 --   Note: Atom ID's must be greater than "0"
    right          INTEGER                       -- Atom ID of Atom In-Plane and to the Right
                                                 --   Note: Atom ID's must be greater than "0"
}


-- T-Shaped (TS-3) StereoCenters
--   [Using IUPAC Stereogenic Center recommendations and terminology]
--   [Note: "-1" can be used for the Atom Identifier to represent a lone-pair or implicit hydrogen]
PC-StereoTShape ::= SEQUENCE {
    center         INTEGER,                      -- Atom ID of Atom Center
                                                 --   Note: Atom ID's must be greater than "0"
    top            INTEGER,                      -- Atom ID of Atom In-Plane and at the Top
                                                 --   Note: Atom ID's must be greater than "0"
    bottom         INTEGER,                      -- Atom ID of Atom In-Plane and at the Bottom
                                                 --   Note: Atom ID's must be greater than "0"
    above          INTEGER                       -- Atom ID of Atom Above the Plane
                                                 --   Note: Atom ID's must be greater than "0"
}


-- Pentagonal BiPyramid (PBPY-7) StereoCenters
--   [Using IUPAC Stereogenic Center recommendations and terminology]
--   [Note: "-1" can be used for the Atom Identifier to represent a lone-pair or implicit hydrogen]
PC-StereoPentagonalBiPyramid ::= SEQUENCE {
    center         INTEGER,                      -- Atom ID of Atom Center
                                                 --   Note: Atom ID's must be greater than "0"
    top            INTEGER,                      -- Atom ID of Atom In-Plane and at the Top
                                                 --   Note: Atom ID's must be greater than "0"
    bottom         INTEGER,                      -- Atom ID of Atom In-Plane and at the Bottom
                                                 --   Note: Atom ID's must be greater than "0"
    left           INTEGER,                      -- Atom ID of Atom In-Plane and at the Left
                                                 --   Note: Atom ID's must be greater than "0"
    labove         INTEGER,                      -- Atom ID of Atom Above the Plane on the Left
                                                 --   Note: Atom ID's must be greater than "0"
    lbelow         INTEGER,                      -- Atom ID of Atom Below the Plane on the Left
                                                 --   Note: Atom ID's must be greater than "0"
    rabove         INTEGER,                      -- Atom ID of Atom Above the Plane on the Right
                                                 --   Note: Atom ID's must be greater than "0"
    rbelow         INTEGER                       -- Atom ID of Atom Below the Plane on the Right
                                                 --   Note: Atom ID's must be greater than "0"
}

END

-- proj.asn
--$Revision: 6.3 $
--****************************************************************
--
--  NCBI Project Definition Module
--  by Jim Ostell and Jonathan Kans, 1998
--
--****************************************************************

NCBI-Project DEFINITIONS ::=
BEGIN

EXPORTS Project, Project-item;

IMPORTS Date FROM NCBI-General
        PubMedId FROM NCBI-Biblio
        Seq-id, Seq-loc FROM NCBI-Seqloc
        Seq-annot, Pubdesc FROM NCBI-Sequence
        Seq-entry FROM NCBI-Seqset
        Pubmed-entry FROM NCBI-PubMed;

Project ::= SEQUENCE {
    descr Project-descr OPTIONAL ,
    data Project-item }

Project-item ::= CHOICE {
    pmuid SET OF INTEGER ,
    protuid SET OF INTEGER ,
    nucuid SET OF INTEGER ,
    sequid SET OF INTEGER ,
    genomeuid SET OF INTEGER ,
    structuid SET OF INTEGER ,
    pmid SET OF PubMedId ,
    protid SET OF Seq-id ,
    nucid SET OF Seq-id ,
    seqid SET OF Seq-id ,
    genomeid SET OF Seq-id ,
    structid NULL ,
    pment SET OF Pubmed-entry ,
    protent SET OF Seq-entry ,
    nucent SET OF Seq-entry ,
    seqent SET OF Seq-entry ,
    genomeent SET OF Seq-entry ,
    structent NULL ,
    seqannot SET OF Seq-annot ,
    loc SET OF Seq-loc ,
    proj SET OF Project
}

Project-descr ::= SEQUENCE {
    id SET OF Project-id ,
    name VisibleString OPTIONAL ,
    descr SET OF Projdesc OPTIONAL }

Projdesc ::= CHOICE {
    pub Pubdesc ,
    date Date ,
    comment VisibleString ,
    title VisibleString
}

Project-id ::= VisibleString

END


-- pub.asn
--$Revision: 6.0 $
--********************************************************************
--
--  Publication common set
--  James Ostell, 1990
--
--  This is the base class definitions for Publications of all sorts
--
--  support for PubMedId added in 1996
--********************************************************************

NCBI-Pub DEFINITIONS ::=
BEGIN

EXPORTS Pub, Pub-set, Pub-equiv;

IMPORTS Medline-entry FROM NCBI-Medline
        Cit-art, Cit-jour, Cit-book, Cit-proc, Cit-pat, Id-pat, Cit-gen,
        Cit-let, Cit-sub, PubMedId FROM NCBI-Biblio;

Pub ::= CHOICE {
    gen Cit-gen ,        -- general or generic unparsed
    sub Cit-sub ,        -- submission
    medline Medline-entry ,
    muid INTEGER ,       -- medline uid
    article Cit-art ,
    journal Cit-jour ,
    book Cit-book ,
    proc Cit-proc ,      -- proceedings of a meeting
    patent Cit-pat ,
    pat-id Id-pat ,      -- identify a patent
    man Cit-let ,        -- manuscript, thesis, or letter
    equiv Pub-equiv,     -- to cite a variety of ways
	pmid PubMedId }      -- PubMedId

Pub-equiv ::= SET OF Pub   -- equivalent identifiers for same citation

Pub-set ::= CHOICE {
    pub SET OF Pub ,
    medline SET OF Medline-entry ,
    article SET OF Cit-art ,
    journal SET OF Cit-jour ,
    book SET OF Cit-book ,
    proc SET OF Cit-proc ,      -- proceedings of a meeting
    patent SET OF Cit-pat }

END


-- pubmed.asn
--$Revision: 6.0 $
--**********************************************************************
--
--  PUBMED data definitions
--
--**********************************************************************

NCBI-PubMed DEFINITIONS ::=
BEGIN

EXPORTS Pubmed-entry, Pubmed-url;

IMPORTS PubMedId FROM NCBI-Biblio
        Medline-entry FROM NCBI-Medline;

Pubmed-entry ::= SEQUENCE {        -- a PubMed entry
    -- PUBMED records must include the PubMedId
    pmid PubMedId,

    -- Medline entry information
    medent Medline-entry OPTIONAL,

    -- Publisher name
    publisher VisibleString OPTIONAL,

    -- List of URL to publisher cite
    urls SET OF Pubmed-url OPTIONAL,

    -- Publisher's article identifier
    pubid VisibleString OPTIONAL
}

Pubmed-url ::= SEQUENCE {
    location VisibleString OPTIONAL, -- Location code
    url VisibleString                -- Selected URL for location
}

END

-- remap.asn
--$Id: remap.asn,v 1.2 2004/07/28 13:43:33 jcherry Exp $********************************************
--
--  remap.asn
--   Version 1
--
--   API for remapping locations on sequences
--
--   Author: Josh Cherry
--
--***************************************************************

NCBI-Remap DEFINITIONS ::=
BEGIN

IMPORTS Seq-loc FROM NCBI-Seqloc;


Remap-dt ::= INTEGER                   -- a date/time stamp
Remap-db-id ::= VisibleString          -- database name


  --***************************************
  --  Remap Request types
  --***************************************
       --****************************************
       -- The basic request wrapper leaves space for a version which
       --   allow the server to support older clients
       -- The tool parameter allows us to log the client types for
       --   debugging and tuning
       --****************************************
       
Remap-request ::= SEQUENCE {           -- a standard request
  request RMRequest ,                    -- the actual request
  version INTEGER ,                      -- ASN1 spec version
  tool VisibleString OPTIONAL }          -- tool making request

RMRequest ::= CHOICE {                   -- request types
  remap Remap-query ,                    -- do the actual remapping
  maps-to-builds VisibleString ,         -- what builds can this be mapped to?
  maps-from-builds VisibleString ,       -- what builds can be mapped to this?
  all-builds NULL }                      -- all the builds the server knows of

Remap-query ::= SEQUENCE {
  from-build VisibleString ,             -- build to map from
  to-build VisibleString ,               -- build to map to
  locs SEQUENCE OF Seq-loc }             -- the locations to remap

  --**********************************************************
  -- Replies from the server
  --  all replies contain the date/time stamp when they were executed
  --**********************************************************

Remap-reply ::= SEQUENCE {
  reply RMReply ,                       -- the actual reply
  dt Remap-dt ,                         -- date/time stamp from server
  server VisibleString ,                -- server version info
  msg VisibleString OPTIONAL }          -- possibly a message to the user

RMReply ::= CHOICE {
  error VisibleString ,                 -- if nothing can be returned
  remap Remap-result ,                  -- result of actual remapping
  maps-to-builds SEQUENCE OF VisibleString ,  -- all the builds that the server
                                              -- knows how to map this build to
  maps-from-builds SEQUENCE OF VisibleString ,-- all the builds that the server
                                              -- knows how to map to this build
  all-builds SEQUENCE OF VisibleString } -- all builds that the server knows of

Remap-result ::= SEQUENCE OF Seq-loc  -- remapped locations

END


-- scoremat.asn
--$Id: scoremat.asn 145226 2008-11-07 19:39:10Z camacho $
-- ===========================================================================
--
--                            PUBLIC DOMAIN NOTICE
--               National Center for Biotechnology Information
--
--  This software/database is a "United States Government Work" under the
--  terms of the United States Copyright Act.  It was written as part of
--  the author's official duties as a United States Government employee and
--  thus cannot be copyrighted.  This software/database is freely available
--  to the public for use. The National Library of Medicine and the U.S.
--  Government have not placed any restriction on its use or reproduction.
--
--  Although all reasonable efforts have been taken to ensure the accuracy
--  and reliability of the software and data, the NLM and the U.S.
--  Government do not and cannot warrant the performance or results that
--  may be obtained by using this software or data. The NLM and the U.S.
--  Government disclaim all warranties, express or implied, including
--  warranties of performance, merchantability or fitness for any particular
--  purpose.
--
--  Please cite the author in any work or product based on this material.
--
-- ===========================================================================
--
-- Author:  Christiam Camacho
--
-- File Description:
--      ASN.1 definitions for scoring matrix
--
-- ===========================================================================

NCBI-ScoreMat DEFINITIONS ::= BEGIN

EXPORTS    Pssm, PssmIntermediateData, PssmFinalData, 
           PssmParameters, PssmWithParameters;
    
IMPORTS    Object-id   FROM NCBI-General
           Seq-entry   FROM NCBI-Seqset;

-- a rudimentary block/core-model, to be used with block-based alignment 
-- routines and threading

BlockProperty ::= SEQUENCE {
  type     INTEGER { unassigned  (0),
                     threshold   (1),       -- score threshold for heuristics
		     minscore    (2),       -- observed minimum score in CD
		     maxscore    (3),       -- observed maximum score in CD
		     meanscore   (4),       -- observed mean score in CD
		     variance    (5),       -- observed score variance
		     name       (10),       -- just name the block
		     is-optional(20),       -- block may not have to be used    
                     other     (255) },
  intvalue  INTEGER OPTIONAL,
  textvalue VisibleString OPTIONAL
}

CoreBlock ::= SEQUENCE {
  start          INTEGER,                   -- begin of block on query
  stop           INTEGER,                   -- end of block on query
  minstart       INTEGER OPTIONAL,          -- optional N-terminal extension
  maxstop        INTEGER OPTIONAL,          -- optional C-terminal extension
  property       SEQUENCE OF BlockProperty OPTIONAL
}

LoopConstraint ::= SEQUENCE {
  minlength      INTEGER DEFAULT 0,         -- minimum length of unaligned region
  maxlength      INTEGER DEFAULT 100000     -- maximum length of unaligned region
}

CoreDef ::= SEQUENCE {
  nblocks        INTEGER,                   -- number of core elements/blocks
  blocks         SEQUENCE OF CoreBlock,     -- nblocks locations
  loops          SEQUENCE OF LoopConstraint -- (nblocks+1) constraints 
}

-- ===========================================================================
-- PSI-BLAST, formatrpsdb, RPS-BLAST workflow:
-- ===========================================
--
-- Two possible inputs to PSI-BLAST and formatrpsdb:
-- 1) PssmWithParams where pssm field contains intermediate PSSM data (matrix 
--    of frequency ratios)
-- 2) PssmWithParams where pssm field contains final PSSM data (matrix of 
--    scores and statistical parameters) - such as written by cddumper
--
-- In case 1, PSI-BLAST's PSSM engine is invoked to create the PSSM and perform
-- the PSI-BLAST search or build the PSSM to then build the RPS-BLAST database.
-- In case 2, PSI-BLAST's PSSM engine is not invoked and the matrix of scores
-- statistical parameters are used to perform the search in PSI-BLAST and the
-- same data and the data in PssmWithParams::params::rpsdbparams is used to
-- build the PSSM and ultimately the RPS-BLAST database
-- 
-- 
--                 reads    ++++++++++++++ writes
-- PssmWithParams  ====>    + PSI-BLAST  + =====> PssmWithParams
--                          ++++++++++++++             |  ^
--         ^                                           |  |
--         |                                           |  |
--         +===========================================+  |
--                                                     |  |
--         +===========================================+  |
--         |                                              |
-- reads   |                                              | 
--         v                                              |
--  +++++++++++++++ writes +++++++++++++++++++++++        |
--  | formatrpsdb | =====> | RPS-BLAST databases |        |
--  +++++++++++++++        +++++++++++++++++++++++        |
--                                   ^                    |
--                                   |                    |
--                                   | reads              |
--                             +++++++++++++              |
--                             | RPS-BLAST |              |
--                             +++++++++++++              |
--                                                        |
--       reads  ++++++++++++               writes         |
--  Cdd ======> | cddumper | =============================+
--              ++++++++++++
--
-- ===========================================================================

-- Contains the PSSM's scores and its associated statistical parameters. 
-- Dimensions and order in which scores are stored must be the same as that 
-- specified in Pssm::numRows, Pssm::numColumns, and Pssm::byrow
PssmFinalData ::= SEQUENCE {

    -- PSSM's scores
    scores              SEQUENCE OF INTEGER, 

    -- Karlin & Altschul parameter produced during the PSSM's calculation
    lambda              REAL,

    -- Karlin & Altschul parameter produced during the PSSM's calculation
	kappa               REAL,

    -- Karlin & Altschul parameter produced during the PSSM's calculation
    h                   REAL,

    -- scaling factor used to obtain more precision when building the PSSM.
    -- (i.e.: scores are scaled by this value). By default, PSI-BLAST's PSSM
    -- engine generates PSSMs which are not scaled-up, however, if PSI-BLAST is
    -- given a PSSM which contains a scaled-up PSSM (indicated by having a
    -- scalingFactor greater than 1), then it will scale down the PSSM to
    -- perform the initial stages of the search with it.
    -- N.B.: When building RPS-BLAST databases, if formatrpsdb is provided 
    -- scaled-up PSSMs, it will ensure that all PSSMs used to build the 
    -- RPS-BLAST database are scaled by the same factor (otherwise, RPS-BLAST 
    -- will silently produce incorrect results).
    scalingFactor       INTEGER DEFAULT 1,

    -- Karlin & Altschul parameter produced during the PSSM's calculation
    lambdaUngapped      REAL OPTIONAL,

    -- Karlin & Altschul parameter produced during the PSSM's calculation
	kappaUngapped       REAL OPTIONAL,

    -- Karlin & Altschul parameter produced during the PSSM's calculation
    hUngapped           REAL OPTIONAL
}

-- Contains the PSSM's intermediate data used to create the PSSM's scores 
-- and statistical parameters. Dimensions and order in which scores are 
-- stored must be the same as that specified in Pssm::numRows, 
-- Pssm::numColumns, and Pssm::byrow
PssmIntermediateData ::= SEQUENCE {

    -- observed residue frequencies (or counts) per position of the PSSM 
    -- (prior to application of pseudocounts)
    resFreqsPerPos              SEQUENCE OF INTEGER OPTIONAL, 

    -- Weighted observed residue frequencies per position of the PSSM.
    -- (N.B.: each position's weights should add up to 1.0).
    -- This field corresponds to f_i (f sub i) in equation 2 of 
    -- Nucleic Acids Res. 2001 Jul 15;29(14):2994-3005.
    -- NOTE: this is needed for diagnostics information only (i.e.:
    -- -out_ascii_pssm option in psiblast)
    weightedResFreqsPerPos      SEQUENCE OF REAL OPTIONAL,

    -- PSSM's frequency ratios
    freqRatios                  SEQUENCE OF REAL,

    -- Information content per position of the PSSM
    -- NOTE: this is needed for diagnostics information only (i.e.:
    -- -out_ascii_pssm option in psiblast)
    informationContent          SEQUENCE OF REAL OPTIONAL,

    -- Weights for columns of the PSSM without gaps
    -- NOTE: this is needed for diagnostics information only (i.e.:
    -- -out_ascii_pssm option in psiblast)
    gaplessColumnWeights        SEQUENCE OF REAL OPTIONAL,

    -- Used in sequence weights computation
    -- NOTE: this is needed for diagnostics information only (i.e.:
    -- -out_ascii_pssm option in psiblast)
    sigma                       SEQUENCE OF REAL OPTIONAL,

    -- Length of the aligned regions per position of the query sequence
    -- NOTE: this is needed for diagnostics information only (i.e.:
    -- -out_ascii_pssm option in psiblast)
    intervalSizes               SEQUENCE OF INTEGER OPTIONAL,

    -- Number of matching sequences per position of the PSSM (including the
    -- query)
    -- NOTE: this is needed for diagnostics information only (i.e.:
    -- -out_ascii_pssm option in psiblast)
    numMatchingSeqs             SEQUENCE OF INTEGER OPTIONAL
}

-- Position-specific scoring matrix
--
-- Column indices on the PSSM refer to the positions corresponding to the
-- query/master sequence, i.e. the number of columns (N) is the same
-- as the length of the query/master sequence. 
-- Row indices refer to individual amino acid types, i.e. the number of 
-- rows (M) is the same as the number of different residues in the 
-- alphabet we use. Consequently, row labels are amino acid identifiers.
--
-- PSSMs are stored as linear arrays of integers. By default, we store
-- them column-by-column, M values for the first column followed by M
-- values for the second column, and so on. In order to provide
-- flexibility for external applications, the boolean field "byrow" is 
-- provided to specify the storage order.
Pssm ::= SEQUENCE {

    -- Is the this a protein or nucleotide scoring matrix?
    isProtein       BOOLEAN DEFAULT TRUE,	

    -- PSSM identifier
    identifier      Object-id OPTIONAL,	

    -- The dimensions of the matrix are returned so the client can
    -- verify that all data was received.

    numRows         INTEGER,	-- number of rows
    numColumns      INTEGER,	-- number of columns

    -- row-labels is given to note the order of residue types so that it can
    -- be cross-checked between applications.
    -- If this field is not given, the matrix values are presented in 
    -- order of the alphabet ncbistdaa is used for protein, ncbi4na for nucl.
    -- for proteins the values returned correspond to 
    -- (-,-), (-,A), (-,B), (-,C) ... (A,-), (A,A), (A,B), (A,C) ...
    rowLabels       SEQUENCE OF VisibleString OPTIONAL,

    -- are matrices stored row by row?
    byRow           BOOLEAN DEFAULT FALSE, 

    -- PSSM representative sequence (master) 
    query           Seq-entry OPTIONAL,           

    -- both intermediateData and finalData can be provided, but at least one of
    -- them must be provided.
    -- N.B.: by default PSI-BLAST will return the PSSM in its PssmIntermediateData 
    -- representation. 

    -- Intermediate or final data for the PSSM
    intermediateData    PssmIntermediateData OPTIONAL,

    -- Final representation for the PSSM
    finalData           PssmFinalData OPTIONAL
}

-- This structure is used to create the RPS-BLAST database auxiliary file 
-- (*.aux) and it contains parameters set at creation time of the PSSM.
-- Also, the matrixName field is used by formatrpsdb to build a PSSM from 
-- a Pssm structure which only contains PssmIntermediateData.
FormatRpsDbParameters ::= SEQUENCE {

    -- name of the underlying score matrix whose frequency ratios were
    -- used in PSSM construction (e.g.: BLOSUM62)
    matrixName   VisibleString,

    -- gap opening penalty corresponding to the matrix above
    gapOpen      INTEGER OPTIONAL,             

    -- gap extension penalty corresponding to the matrix above
    gapExtend    INTEGER OPTIONAL

}

-- Populated by PSSM engine of PSI-BLAST, original source for these values 
-- are the PSI-BLAST options specified using the BLAST options API
PssmParameters ::= SEQUENCE {

    -- pseudocount constant used for PSSM. This field corresponds to beta in 
    -- equation 2 of Nucleic Acids Res. 2001 Jul 15;29(14):2994-3005.
    pseudocount INTEGER OPTIONAL,             

    -- data needed by formatrpsdb to create RPS-BLAST databases. matrixName is
    -- populated by PSI-BLAST
    rpsdbparams     FormatRpsDbParameters OPTIONAL,

    -- alignment constraints needed by sequence-structure threader
    -- and other global or local block-alignment algorithms
	constraints	CoreDef OPTIONAL
}

-- Envelope containing PSSM and the parameters used to create it. 
-- Provided for use in PSI-BLAST, formatrpsdb, and for the structure group.
PssmWithParameters ::= SEQUENCE {

    -- This field is applicable to PSI-BLAST and formatrpsdb.
    -- When both the intermediate and final PSSM data are provided in this
    -- field, the final data (matrix of scores and associated statistical
    -- parameters) takes precedence and that data is used for further
    -- processing. The rationale for this is that the PSSM's scores and
    -- statistical parameters might have been calculated by other applications
    -- and it might not be possible to recreate it by using PSI-BLAST's PSSM 
    -- engine.
	pssm        Pssm,

    -- This field's rpsdbparams is used to specify the values of options 
    -- for processing by formatrpsdb. If these are not set, the command 
    -- line defaults of formatrpsdb are applied. This field is used
    -- by PSI-BLAST to verify that the underlying scorem matrix used to BUILD
    -- the PSSM is the same as the one being specified through the BLAST
    -- Options API. If this field is omitted, no verification will be
    -- performed, so be careful to keep track of what matrix was used to build
    -- the PSSM or else the results produced by PSI-BLAST will be unreliable.
    params      PssmParameters OPTIONAL
}

END

-- seq.asn
--$Revision: 138450 $
--**********************************************************************
--
--  NCBI Sequence elements
--  by James Ostell, 1990
--  Version 3.0 - June 1994
--
--**********************************************************************

NCBI-Sequence DEFINITIONS ::=
BEGIN

EXPORTS Annotdesc, Annot-descr, Bioseq, GIBB-mol, Heterogen, MolInfo,
        Numbering, Pubdesc, Seq-annot, Seq-data, Seqdesc, Seq-descr, Seq-ext,
        Seq-hist, Seq-inst, Seq-literal, Seqdesc, Delta-ext;

IMPORTS Date, Int-fuzz, Dbtag, Object-id, User-object FROM NCBI-General
        Seq-align FROM NCBI-Seqalign
        Seq-feat FROM NCBI-Seqfeat
        Seq-graph FROM NCBI-Seqres
        Pub-equiv FROM NCBI-Pub
        Org-ref FROM NCBI-Organism
        BioSource FROM NCBI-BioSource
        Seq-id, Seq-loc FROM NCBI-Seqloc
        GB-block FROM GenBank-General
        PIR-block FROM PIR-General
        EMBL-block FROM EMBL-General
        SP-block FROM SP-General
        PRF-block FROM PRF-General
        PDB-block FROM PDB-General
        Seq-table FROM NCBI-SeqTable;

--*** Sequence ********************************
--*

Bioseq ::= SEQUENCE {
    id SET OF Seq-id ,            -- equivalent identifiers
    descr Seq-descr OPTIONAL , -- descriptors
    inst Seq-inst ,            -- the sequence data
    annot SET OF Seq-annot OPTIONAL }

--*** Descriptors *****************************
--*

Seq-descr ::= SET OF Seqdesc

Seqdesc ::= CHOICE {
    mol-type GIBB-mol ,          -- type of molecule
    modif SET OF GIBB-mod ,             -- modifiers
    method GIBB-method ,         -- sequencing method
    name VisibleString ,         -- a name for this sequence
    title VisibleString ,        -- a title for this sequence
    org Org-ref ,                -- if all from one organism
    comment VisibleString ,      -- a more extensive comment
    num Numbering ,              -- a numbering system
    maploc Dbtag ,               -- map location of this sequence
    pir PIR-block ,              -- PIR specific info
    genbank GB-block ,           -- GenBank specific info
    pub Pubdesc ,                -- a reference to the publication
    region VisibleString ,       -- overall region (globin locus)
    user User-object ,           -- user defined object
    sp SP-block ,                -- SWISSPROT specific info
    dbxref Dbtag ,               -- xref to other databases
    embl EMBL-block ,            -- EMBL specific information
    create-date Date ,           -- date entry first created/released
    update-date Date ,           -- date of last update
    prf PRF-block ,              -- PRF specific information
    pdb PDB-block ,              -- PDB specific information
    het Heterogen ,              -- cofactor, etc associated but not bound
    source BioSource ,           -- source of materials, includes Org-ref
    molinfo MolInfo }            -- info on the molecule and techniques

--******* NOTE:
--*       mol-type, modif, method, and org are consolidated and expanded
--*       in Org-ref, BioSource, and MolInfo in this specification. They
--*       will be removed in later specifications. Do not use them in the
--*       the future. Instead expect the new structures.
--*
--***************************

--********************************************************************
--
-- MolInfo gives information on the
-- classification of the type and quality of the sequence
--
-- WARNING: this will replace GIBB-mol, GIBB-mod, GIBB-method
--
--********************************************************************

MolInfo ::= SEQUENCE {
    biomol INTEGER {
        unknown (0) ,
        genomic (1) ,
        pre-RNA (2) ,              -- precursor RNA of any sort really 
        mRNA (3) ,
        rRNA (4) ,
        tRNA (5) ,
        snRNA (6) ,
        scRNA (7) ,
        peptide (8) ,
        other-genetic (9) ,      -- other genetic material
        genomic-mRNA (10) ,      -- reported a mix of genomic and cdna sequence
        cRNA (11) ,              -- viral RNA genome copy intermediate
        snoRNA (12) ,            -- small nucleolar RNA
        transcribed-RNA (13) ,   -- transcribed RNA other than existing classes
        ncRNA (14) ,
        tmRNA (15) ,
        other (255) } DEFAULT unknown ,
    tech INTEGER {
        unknown (0) ,
        standard (1) ,          -- standard sequencing
        est (2) ,               -- Expressed Sequence Tag
        sts (3) ,               -- Sequence Tagged Site
        survey (4) ,            -- one-pass genomic sequence
        genemap (5) ,           -- from genetic mapping techniques
        physmap (6) ,           -- from physical mapping techniques
        derived (7) ,           -- derived from other data, not a primary entity
        concept-trans (8) ,     -- conceptual translation
        seq-pept (9) ,          -- peptide was sequenced
        both (10) ,             -- concept transl. w/ partial pept. seq.
        seq-pept-overlap (11) , -- sequenced peptide, ordered by overlap
        seq-pept-homol (12) ,   -- sequenced peptide, ordered by homology
        concept-trans-a (13) ,  -- conceptual transl. supplied by author
        htgs-1 (14) ,           -- unordered High Throughput sequence contig
        htgs-2 (15) ,           -- ordered High Throughput sequence contig
        htgs-3 (16) ,           -- finished High Throughput sequence
        fli-cdna (17) ,         -- full length insert cDNA
        htgs-0 (18) ,           -- single genomic reads for coordination
        htc (19) ,              -- high throughput cDNA
        wgs (20) ,              -- whole genome shotgun sequencing
        barcode (21) ,          -- barcode of life project
        composite-wgs-htgs (22) , -- composite of WGS and HTGS
        tsa (23) ,              -- transcriptome shotgun assembly
        other (255) }           -- use Source.techexp
               DEFAULT unknown ,
    techexp VisibleString OPTIONAL ,   -- explanation if tech not enough
    --
    -- Completeness is not indicated in most records.  For genomes, assume
    -- the sequences are incomplete unless specifically marked as complete.
    -- For mRNAs, assume the ends are not known exactly unless marked as
    -- having the left or right end.
    --
    completeness INTEGER {
      unknown (0) ,
      complete (1) ,                   -- complete biological entity
      partial (2) ,                    -- partial but no details given
      no-left (3) ,                    -- missing 5' or NH3 end
      no-right (4) ,                   -- missing 3' or COOH end
      no-ends (5) ,                    -- missing both ends
      has-left (6) ,                   -- 5' or NH3 end present
      has-right (7) ,                  -- 3' or COOH end present
      other (255) } DEFAULT unknown ,
    gbmoltype VisibleString OPTIONAL } -- identifies particular ncRNA


GIBB-mol ::= ENUMERATED {       -- type of molecule represented
    unknown (0) ,
    genomic (1) ,
    pre-mRNA (2) ,              -- precursor RNA of any sort really 
    mRNA (3) ,
    rRNA (4) ,
    tRNA (5) ,
    snRNA (6) ,
    scRNA (7) ,
    peptide (8) ,
    other-genetic (9) ,      -- other genetic material
    genomic-mRNA (10) ,      -- reported a mix of genomic and cdna sequence
    other (255) }
    
GIBB-mod ::= ENUMERATED {        -- GenInfo Backbone modifiers
    dna (0) ,
    rna (1) ,
    extrachrom (2) ,
    plasmid (3) ,
    mitochondrial (4) ,
    chloroplast (5) ,
    kinetoplast (6) ,
    cyanelle (7) ,
    synthetic (8) ,
    recombinant (9) ,
    partial (10) ,
    complete (11) ,
    mutagen (12) ,    -- subject of mutagenesis ?
    natmut (13) ,     -- natural mutant ?
    transposon (14) ,
    insertion-seq (15) ,
    no-left (16) ,    -- missing left end (5' for na, NH2 for aa)
    no-right (17) ,   -- missing right end (3' or COOH)
    macronuclear (18) ,
    proviral (19) ,
    est (20) ,        -- expressed sequence tag
    sts (21) ,        -- sequence tagged site
    survey (22) ,     -- one pass survey sequence
    chromoplast (23) ,
    genemap (24) ,    -- is a genetic map
    restmap (25) ,    -- is an ordered restriction map
    physmap (26) ,    -- is a physical map (not ordered restriction map)
    other (255) }

GIBB-method ::= ENUMERATED {        -- sequencing methods
    concept-trans (1) ,    -- conceptual translation
    seq-pept (2) ,         -- peptide was sequenced
    both (3) ,             -- concept transl. w/ partial pept. seq.
    seq-pept-overlap (4) , -- sequenced peptide, ordered by overlap
    seq-pept-homol (5) ,   -- sequenced peptide, ordered by homology
    concept-trans-a (6) ,  -- conceptual transl. supplied by author
    other (255) }
    
Numbering ::= CHOICE {           -- any display numbering system
    cont Num-cont ,              -- continuous numbering
    enum Num-enum ,              -- enumerated names for residues
    ref Num-ref ,                -- by reference to another sequence
    real Num-real }              -- supports mapping to a float system
    
Num-cont ::= SEQUENCE {          -- continuous display numbering system
    refnum INTEGER DEFAULT 1,         -- number assigned to first residue
    has-zero BOOLEAN DEFAULT FALSE ,  -- 0 used?
    ascending BOOLEAN DEFAULT TRUE }  -- ascending numbers?

Num-enum ::= SEQUENCE {          -- any tags to residues
    num INTEGER ,                        -- number of tags to follow
    names SEQUENCE OF VisibleString }    -- the tags

Num-ref ::= SEQUENCE {           -- by reference to other sequences
    type ENUMERATED {            -- type of reference
        not-set (0) ,
        sources (1) ,            -- by segmented or const seq sources
        aligns (2) } ,           -- by alignments given below
    aligns Seq-align OPTIONAL }

Num-real ::= SEQUENCE {          -- mapping to floating point system
    a REAL ,                     -- from an integer system used by Bioseq
    b REAL ,                     -- position = (a * int_position) + b
    units VisibleString OPTIONAL }

Pubdesc ::= SEQUENCE {              -- how sequence presented in pub
    pub Pub-equiv ,                 -- the citation(s)
    name VisibleString OPTIONAL ,   -- name used in paper
    fig VisibleString OPTIONAL ,    -- figure in paper
    num Numbering OPTIONAL ,        -- numbering from paper
    numexc BOOLEAN OPTIONAL ,       -- numbering problem with paper
    poly-a BOOLEAN OPTIONAL ,       -- poly A tail indicated in figure?
    maploc VisibleString OPTIONAL , -- map location reported in paper
    seq-raw StringStore OPTIONAL ,  -- original sequence from paper
    align-group INTEGER OPTIONAL ,  -- this seq aligned with others in paper
    comment VisibleString OPTIONAL, -- any comment on this pub in context
    reftype INTEGER {           -- type of reference in a GenBank record
        seq (0) ,               -- refers to sequence
        sites (1) ,             -- refers to unspecified features
        feats (2) ,             -- refers to specified features
        no-target (3) }         -- nothing specified (EMBL)
        DEFAULT seq }

Heterogen ::= VisibleString       -- cofactor, prosthetic group, inhibitor, etc

--*** Instances of sequences *******************************
--*

Seq-inst ::= SEQUENCE {            -- the sequence data itself
    repr ENUMERATED {              -- representation class
        not-set (0) ,              -- empty
        virtual (1) ,              -- no seq data
        raw (2) ,                  -- continuous sequence
        seg (3) ,                  -- segmented sequence
        const (4) ,                -- constructed sequence
        ref (5) ,                  -- reference to another sequence
        consen (6) ,               -- consensus sequence or pattern
        map (7) ,                  -- ordered map of any kind
        delta (8) ,              -- sequence made by changes (delta) to others
        other (255) } ,
    mol ENUMERATED {               -- molecule class in living organism
        not-set (0) ,              --   > cdna = rna
        dna (1) ,
        rna (2) ,
        aa (3) ,
        na (4) ,                   -- just a nucleic acid
        other (255) } ,
    length INTEGER OPTIONAL ,      -- length of sequence in residues
    fuzz Int-fuzz OPTIONAL ,       -- length uncertainty
    topology ENUMERATED {          -- topology of molecule
        not-set (0) ,
        linear (1) ,
        circular (2) ,
        tandem (3) ,               -- some part of tandem repeat
        other (255) } DEFAULT linear ,
    strand ENUMERATED {            -- strandedness in living organism
        not-set (0) ,
        ss (1) ,                   -- single strand
        ds (2) ,                   -- double strand
        mixed (3) ,
        other (255) } OPTIONAL ,   -- default ds for DNA, ss for RNA, pept
    seq-data Seq-data OPTIONAL ,   -- the sequence
    ext Seq-ext OPTIONAL ,         -- extensions for special types
    hist Seq-hist OPTIONAL }       -- sequence history

--*** Sequence Extensions **********************************
--*  for representing more complex types
--*  const type uses Seq-hist.assembly

Seq-ext ::= CHOICE {
    seg Seg-ext ,        -- segmented sequences
    ref Ref-ext ,        -- hot link to another sequence (a view)
    map Map-ext ,        -- ordered map of markers
    delta Delta-ext }

Seg-ext ::= SEQUENCE OF Seq-loc

Ref-ext ::= Seq-loc

Map-ext ::= SEQUENCE OF Seq-feat

Delta-ext ::= SEQUENCE OF Delta-seq

Delta-seq ::= CHOICE {
    loc Seq-loc ,       -- point to a sequence
    literal Seq-literal }   -- a piece of sequence

Seq-literal ::= SEQUENCE {
    length INTEGER ,         -- must give a length in residues
    fuzz Int-fuzz OPTIONAL , -- could be unsure
    seq-data Seq-data OPTIONAL } -- may have the data

--*** Sequence History Record ***********************************
--** assembly = records how seq was assembled from others
--** replaces = records sequences made obsolete by this one
--** replaced-by = this seq is made obsolete by another(s)

Seq-hist ::= SEQUENCE {
    assembly SET OF Seq-align OPTIONAL ,-- how was this assembled?
    replaces Seq-hist-rec OPTIONAL ,    -- seq makes these seqs obsolete
    replaced-by Seq-hist-rec OPTIONAL , -- these seqs make this one obsolete
    deleted CHOICE {
        bool BOOLEAN ,
        date Date } OPTIONAL }

Seq-hist-rec ::= SEQUENCE {
    date Date OPTIONAL ,
    ids SET OF Seq-id }
    
--*** Various internal sequence representations ************
--*      all are controlled, fixed length forms

Seq-data ::= CHOICE {              -- sequence representations
    iupacna IUPACna ,              -- IUPAC 1 letter nuc acid code
    iupacaa IUPACaa ,              -- IUPAC 1 letter amino acid code
    ncbi2na NCBI2na ,              -- 2 bit nucleic acid code
    ncbi4na NCBI4na ,              -- 4 bit nucleic acid code
    ncbi8na NCBI8na ,              -- 8 bit extended nucleic acid code
    ncbipna NCBIpna ,              -- nucleic acid probabilities
    ncbi8aa NCBI8aa ,              -- 8 bit extended amino acid codes
    ncbieaa NCBIeaa ,              -- extended ASCII 1 letter aa codes
    ncbipaa NCBIpaa ,              -- amino acid probabilities
    ncbistdaa NCBIstdaa,           -- consecutive codes for std aas
    gap Seq-gap                    -- gap types
}

Seq-gap ::= SEQUENCE {
    type INTEGER {
        unknown(0),
        fragment(1),
        clone(2),
        short-arm(3),
        heterochromatin(4),
        centromere(5),
        telomere(6),
        repeat(7),
        contig(8),
        other(255)
    },
    linkage INTEGER {
        unlinked(0),
        linked(1),
        other(255)
    } OPTIONAL
}

IUPACna ::= StringStore       -- IUPAC 1 letter codes, no spaces
IUPACaa ::= StringStore       -- IUPAC 1 letter codes, no spaces
NCBI2na ::= OCTET STRING      -- 00=A, 01=C, 10=G, 11=T
NCBI4na ::= OCTET STRING      -- 1 bit each for agct
                              -- 0001=A, 0010=C, 0100=G, 1000=T/U
                              -- 0101=Purine, 1010=Pyrimidine, etc
NCBI8na ::= OCTET STRING      -- for modified nucleic acids
NCBIpna ::= OCTET STRING      -- 5 octets/base, prob for a,c,g,t,n
                              -- probabilities are coded 0-255 = 0.0-1.0
NCBI8aa ::= OCTET STRING      -- for modified amino acids
NCBIeaa ::= StringStore       -- ASCII extended 1 letter aa codes
                              -- IUPAC codes + U=selenocysteine
NCBIpaa ::= OCTET STRING      -- 25 octets/aa, prob for IUPAC aas in order:
                              -- A-Y,B,Z,X,(ter),anything
                              -- probabilities are coded 0-255 = 0.0-1.0
NCBIstdaa ::= OCTET STRING    -- codes 0-25, 1 per byte

--*** Sequence Annotation *************************************
--*

-- This is a replica of Textseq-id
-- This is specific for annotations, and exists to maintain a semantic
-- difference between IDs assigned to annotations and IDs assigned to
-- sequences
Textannot-id ::= SEQUENCE {
    name	  VisibleString OPTIONAL ,
    accession VisibleString OPTIONAL ,
    release   VisibleString OPTIONAL ,
    version   INTEGER       OPTIONAL
}

Annot-id ::= CHOICE {
    local Object-id ,
    ncbi INTEGER ,
    general Dbtag,
    other Textannot-id
}
    
Annot-descr ::= SET OF Annotdesc

Annotdesc ::= CHOICE {
    name VisibleString ,         -- a short name for this collection
    title VisibleString ,        -- a title for this collection
    comment VisibleString ,      -- a more extensive comment
    pub Pubdesc ,                -- a reference to the publication
    user User-object ,           -- user defined object
    create-date Date ,           -- date entry first created/released
    update-date Date ,           -- date of last update
    src Seq-id ,                 -- source sequence from which annot came
    align Align-def,             -- definition of the SeqAligns
    region Seq-loc }             -- all contents cover this region

Align-def ::= SEQUENCE {
    align-type INTEGER {         -- class of align Seq-annot
      ref (1) ,                  -- set of alignments to the same sequence
      alt (2) ,                  -- set of alternate alignments of the same seqs
      blocks (3) ,               -- set of aligned blocks in the same seqs
      other (255) } ,
    ids SET OF Seq-id OPTIONAL } -- used for the one ref seqid for now

Seq-annot ::= SEQUENCE {
    id SET OF Annot-id OPTIONAL ,
    db INTEGER {                 -- source of annotation
        genbank (1) ,
        embl (2) ,
        ddbj (3) ,
        pir  (4) ,
        sp   (5) ,
        bbone (6) ,
        pdb   (7) ,
        other (255) } OPTIONAL ,
    name VisibleString OPTIONAL ,-- source if "other" above
    desc Annot-descr OPTIONAL ,  -- used only for stand alone Seq-annots
    data CHOICE {
        ftable SET OF Seq-feat ,
        align SET OF Seq-align ,
        graph SET OF Seq-graph ,
        ids SET OF Seq-id ,      -- used for communication between tools
        locs SET OF Seq-loc ,    -- used for communication between tools
        seq-table Seq-table } }  -- features in table form

END


-- seqalign.asn
--$Revision: 142982 $
--**********************************************************************
--
--  NCBI Sequence Alignment elements
--  by James Ostell, 1990
--
--**********************************************************************

NCBI-Seqalign DEFINITIONS ::=
BEGIN

EXPORTS Seq-align, Score, Score-set, Seq-align-set;

IMPORTS Seq-id, Seq-loc , Na-strand FROM NCBI-Seqloc
        User-object, Object-id FROM NCBI-General;

--*** Sequence Alignment ********************************
--*

Seq-align-set ::= SET OF Seq-align

Seq-align ::= SEQUENCE {
    type ENUMERATED {
        not-set (0) ,
        global (1) ,
        diags (2) ,     -- unbroken, but not ordered, diagonals
        partial (3) ,   -- mapping pieces together
        disc (4) ,      -- discontinuous alignment
        other (255) } ,
    dim INTEGER OPTIONAL ,     -- dimensionality
    score SET OF Score OPTIONAL ,   -- for whole alignment
    segs CHOICE {                   -- alignment data
        dendiag SEQUENCE OF Dense-diag ,
        denseg              Dense-seg ,
        std     SEQUENCE OF Std-seg ,
        packed              Packed-seg ,
        disc                Seq-align-set,
        spliced             Spliced-seg,
        sparse              Sparse-seg
    } ,
    
    -- regions of sequence over which align
    --  was computed
    bounds SET OF Seq-loc OPTIONAL,

    -- alignment id
    id SEQUENCE OF Object-id OPTIONAL,

    --extra info
    ext SEQUENCE OF User-object OPTIONAL
}

Dense-diag ::= SEQUENCE {         -- for (multiway) diagonals
    dim INTEGER DEFAULT 2 ,    -- dimensionality
    ids SEQUENCE OF Seq-id ,   -- sequences in order
    starts SEQUENCE OF INTEGER ,  -- start OFFSETS in ids order
    len INTEGER ,                 -- len of aligned segments
    strands SEQUENCE OF Na-strand OPTIONAL ,
    scores SET OF Score OPTIONAL }

    -- Dense-seg: the densist packing for sequence alignments only.
    --            a start of -1 indicates a gap for that sequence of
    --            length lens.
    --
    -- id=100  AAGGCCTTTTAGAGATGATGATGATGATGA
    -- id=200  AAGGCCTTTTAG.......GATGATGATGA
    -- id=300  ....CCTTTTAGAGATGATGAT....ATGA
    --
    -- dim = 3, numseg = 6, ids = { 100, 200, 300 }
    -- starts = { 0,0,-1, 4,4,0, 12,-1,8, 19,12,15, 22,15,-1, 26,19,18 }
    -- lens = { 4, 8, 7, 3, 4, 4 }
    --

Dense-seg ::= SEQUENCE {          -- for (multiway) global or partial alignments
    dim INTEGER DEFAULT 2 ,       -- dimensionality
    numseg INTEGER ,              -- number of segments here
    ids SEQUENCE OF Seq-id ,      -- sequences in order
    starts SEQUENCE OF INTEGER ,  -- start OFFSETS in ids order within segs
    lens SEQUENCE OF INTEGER ,    -- lengths in ids order within segs
    strands SEQUENCE OF Na-strand OPTIONAL ,
    scores SEQUENCE OF Score OPTIONAL }  -- score for each seg

Packed-seg ::= SEQUENCE {         -- for (multiway) global or partial alignments
    dim INTEGER DEFAULT 2 ,       -- dimensionality
    numseg INTEGER ,              -- number of segments here
    ids SEQUENCE OF Seq-id ,      -- sequences in order
    starts SEQUENCE OF INTEGER ,  -- start OFFSETS in ids order for whole alignment
    present OCTET STRING ,        -- Boolean if each sequence present or absent in
                                  --   each segment
    lens SEQUENCE OF INTEGER ,    -- length of each segment
    strands SEQUENCE OF Na-strand OPTIONAL ,
    scores SEQUENCE OF Score OPTIONAL }  -- score for each segment

Std-seg ::= SEQUENCE {
    dim INTEGER DEFAULT 2 ,       -- dimensionality
    ids SEQUENCE OF Seq-id OPTIONAL ,
    loc SEQUENCE OF Seq-loc ,
    scores SET OF Score OPTIONAL }


Spliced-seg ::= SEQUENCE {
    -- product is either protein or transcript (cDNA)
    product-id Seq-id OPTIONAL,
    genomic-id Seq-id OPTIONAL,

    -- should be 'plus' or 'minus'
    product-strand Na-strand OPTIONAL ,
    genomic-strand Na-strand OPTIONAL ,
    
    product-type ENUMERATED {
        transcript(0),
        protein(1)
    },

    -- set of segments involved
    -- each segment corresponds to one exon
    -- exons are always in biological order
    exons SEQUENCE OF Spliced-exon ,

    -- optional poly(A) tail
    poly-a INTEGER OPTIONAL,

    -- length of the product, in bases/residues
    -- from this, a 3' unaligned length can be extracted; this also captures
    -- the case in which a protein aligns leaving a partial codon alignment
    -- at the 3' end
    product-length INTEGER OPTIONAL,

    -- alignment descriptors / modifiers
    -- this provides us a set for extension
    modifiers SET OF Spliced-seg-modifier OPTIONAL
}

Spliced-seg-modifier ::= CHOICE {
    -- protein aligns from the start and the first codon 
    -- on both product and genomic is start codon
    start-codon-found BOOLEAN,
    
    -- protein aligns to it's end and there is stop codon 
    -- on the genomic right after the alignment
    stop-codon-found BOOLEAN
}


-- complete or partial exon
-- two consecutive Spliced-exons may belong to one exon
Spliced-exon ::= SEQUENCE {
    -- product-end >= product-start
    product-start Product-pos ,
    product-end Product-pos ,

    -- genomic-end >= genomic-start
    genomic-start INTEGER ,
    genomic-end INTEGER ,

    -- product is either protein or transcript (cDNA)
    product-id Seq-id OPTIONAL ,
    genomic-id Seq-id OPTIONAL ,

    -- should be 'plus' or 'minus'
    product-strand Na-strand OPTIONAL ,
    
    -- genomic-strand represents the strand of translation
    genomic-strand Na-strand OPTIONAL ,

    -- basic seqments always are in biologic order
    parts SEQUENCE OF Spliced-exon-chunk OPTIONAL ,

    -- scores for this exon
    scores Score-set OPTIONAL ,

    -- splice sites
    acceptor-before-exon Splice-site OPTIONAL,
    donor-after-exon Splice-site OPTIONAL,
    
    -- flag: is this exon complete or partial?
    partial BOOLEAN OPTIONAL,

    --extra info
    ext SEQUENCE OF User-object OPTIONAL
}


Product-pos ::= CHOICE {
    nucpos INTEGER,
    protpos Prot-pos
}


-- codon based position on protein (1/3 of aminoacid)
Prot-pos ::= SEQUENCE {
    -- standard protein position
    amin INTEGER ,

    -- 0, 1, 2, or 3 as for Cdregion
    -- 0 = not set
    -- 1, 2, 3 = actual frame
    frame INTEGER DEFAULT 0
}


-- Spliced-exon-chunk: piece of an exon
-- lengths are given in nucleotide bases (1/3 of aminoacid when product is a
-- protein)
Spliced-exon-chunk ::= CHOICE {
    -- both sequences represented, product and genomic sequences match
    match INTEGER ,

    -- both sequences represented, product and genomic sequences do not match
    mismatch INTEGER ,

    -- both sequences are represented, there is sufficient similarity 
    -- between product and genomic sequences. Can be used to replace stretches
    -- of matches and mismatches, mostly for protein to genomic where 
    -- definition of match or mismatch depends on translation table
    diag INTEGER ,

     -- insertion in product sequence (i.e. gap in the genomic sequence)
    product-ins INTEGER ,

     -- insertion in genomic sequence (i.e. gap in the product sequence)
    genomic-ins INTEGER
}


-- site involved in splice
Splice-site ::= SEQUENCE {
    -- typically two bases in the intronic region, always
    -- in IUPAC format
    bases VisibleString
}


-- ==========================================================================
--
-- Sparse-seg follows the semantics of dense-seg and is more optimal for
-- representing sparse multiple alignments
--
-- ==========================================================================


Sparse-seg ::= SEQUENCE {
    master-id Seq-id OPTIONAL,

    -- pairwise alignments constituting this multiple alignment
    rows SET OF Sparse-align,

    -- per-row scores
    row-scores SET OF Score OPTIONAL,

    -- index of extra items
    ext  SET OF Sparse-seg-ext OPTIONAL
}

Sparse-align ::= SEQUENCE {
    first-id Seq-id,
    second-id Seq-id,

    numseg INTEGER,                      --number of segments
    first-starts SEQUENCE OF INTEGER ,   --starts on the first sequence [numseg]
    second-starts SEQUENCE OF INTEGER ,  --starts on the second sequence [numseg]
    lens SEQUENCE OF INTEGER ,           --lengths of segments [numseg]
    second-strands SEQUENCE OF Na-strand OPTIONAL ,

    -- per-segment scores
    seg-scores SET OF Score OPTIONAL
}

Sparse-seg-ext ::= SEQUENCE {
    --seg-ext SET OF {
    --    index INTEGER,
    --    data User-field
    -- }
    index INTEGER
}


-- use of Score is discouraged for external ASN.1 specifications
Score ::= SEQUENCE {
    id Object-id OPTIONAL ,
    value CHOICE {
        real REAL ,
        int INTEGER
    }
}

-- use of Score-set is encouraged for external ASN.1 specifications
Score-set ::= SET OF Score

END 


-- seqblock.asn
--$Revision: 6.0 $
--*********************************************************************
--
-- 1990 - J.Ostell
-- Version 3.0 - June 1994
--
--*********************************************************************
--*********************************************************************
--
--  EMBL specific data
--  This block of specifications was developed by Reiner Fuchs of EMBL
--  Updated by J.Ostell, 1994
--
--*********************************************************************

EMBL-General DEFINITIONS ::=
BEGIN

EXPORTS EMBL-dbname, EMBL-xref, EMBL-block;

IMPORTS Date, Object-id FROM NCBI-General;

EMBL-dbname ::= CHOICE {
    code ENUMERATED {
        embl(0),
        genbank(1),
        ddbj(2),
        geninfo(3),
        medline(4),
        swissprot(5),
        pir(6),
        pdb(7),
        epd(8),
        ecd(9),
        tfd(10),
        flybase(11),
        prosite(12),
        enzyme(13),
        mim(14),
        ecoseq(15),
        hiv(16) ,
        other (255) } ,
    name    VisibleString }

EMBL-xref ::= SEQUENCE {
    dbname EMBL-dbname,
    id SEQUENCE OF Object-id }

EMBL-block ::= SEQUENCE {
    class ENUMERATED {
        not-set(0),
        standard(1),
        unannotated(2),
        other(255) } DEFAULT standard,
    div ENUMERATED {
        fun(0),
        inv(1),
        mam(2),
        org(3),
        phg(4),
        pln(5),
        pri(6),
        pro(7),
        rod(8),
        syn(9),
        una(10),
        vrl(11),
        vrt(12),
        pat(13),
        est(14),
        sts(15),
        other (255) } OPTIONAL,
    creation-date Date,
    update-date Date,
    extra-acc SEQUENCE OF VisibleString OPTIONAL,
    keywords SEQUENCE OF VisibleString OPTIONAL,
    xref SEQUENCE OF EMBL-xref OPTIONAL }

END

--*********************************************************************
--
--  SWISSPROT specific data
--  This block of specifications was developed by Mark Cavanaugh of
--      NCBI working with Amos Bairoch of SWISSPROT
--
--*********************************************************************

SP-General DEFINITIONS ::=
BEGIN

EXPORTS SP-block;

IMPORTS Date, Dbtag FROM NCBI-General
        Seq-id FROM NCBI-Seqloc;

SP-block ::= SEQUENCE {         -- SWISSPROT specific descriptions
    class ENUMERATED {
        not-set (0) ,
        standard (1) ,      -- conforms to all SWISSPROT checks
        prelim (2) ,        -- only seq and biblio checked
        other (255) } ,
    extra-acc SET OF VisibleString OPTIONAL ,  -- old SWISSPROT ids
    imeth BOOLEAN DEFAULT FALSE ,  -- seq known to start with Met
    plasnm SET OF VisibleString OPTIONAL,  -- plasmid names carrying gene
    seqref SET OF Seq-id OPTIONAL,         -- xref to other sequences
    dbref SET OF Dbtag OPTIONAL ,          -- xref to non-sequence dbases
    keywords SET OF VisibleString OPTIONAL , -- keywords
    created Date OPTIONAL ,         -- creation date
    sequpd Date OPTIONAL ,          -- sequence update
    annotupd Date OPTIONAL }        -- annotation update

END

--*********************************************************************
--
--  PIR specific data
--  This block of specifications was developed by Jim Ostell of
--      NCBI
--
--*********************************************************************

PIR-General DEFINITIONS ::=
BEGIN

EXPORTS PIR-block;

IMPORTS Seq-id FROM NCBI-Seqloc;

PIR-block ::= SEQUENCE {          -- PIR specific descriptions
    had-punct BOOLEAN OPTIONAL ,      -- had punctuation in sequence ?
    host VisibleString OPTIONAL ,
    source VisibleString OPTIONAL ,     -- source line
    summary VisibleString OPTIONAL ,
    genetic VisibleString OPTIONAL ,
    includes VisibleString OPTIONAL ,
    placement VisibleString OPTIONAL ,
    superfamily VisibleString OPTIONAL ,
    keywords SEQUENCE OF VisibleString OPTIONAL ,
    cross-reference VisibleString OPTIONAL ,
    date VisibleString OPTIONAL ,
    seq-raw VisibleString OPTIONAL ,  -- seq with punctuation
    seqref SET OF Seq-id OPTIONAL }         -- xref to other sequences

END

--*********************************************************************
--
--  GenBank specific data
--  This block of specifications was developed by Jim Ostell of
--      NCBI
--
--*********************************************************************

GenBank-General DEFINITIONS ::=
BEGIN

EXPORTS GB-block;

IMPORTS Date FROM NCBI-General;

GB-block ::= SEQUENCE {          -- GenBank specific descriptions
    extra-accessions SEQUENCE OF VisibleString OPTIONAL ,
    source VisibleString OPTIONAL ,     -- source line
    keywords SEQUENCE OF VisibleString OPTIONAL ,
    origin VisibleString OPTIONAL,
    date VisibleString OPTIONAL ,       -- OBSOLETE old form Entry Date
    entry-date Date OPTIONAL ,          -- replaces date
    div VisibleString OPTIONAL ,        -- GenBank division
    taxonomy VisibleString OPTIONAL }   -- continuation line of organism

END

--**********************************************************************
-- PRF specific definition
--    PRF is a protein sequence database crated and maintained by
--    Protein Research Foundation, Minoo-city, Osaka, Japan.
--
--    Written by A.Ogiwara, Inst.Chem.Res. (Dr.Kanehisa's Lab),
--            Kyoto Univ., Japan
--
--**********************************************************************

PRF-General DEFINITIONS ::=
BEGIN

EXPORTS PRF-block;

PRF-block ::= SEQUENCE {
      extra-src       PRF-ExtraSrc OPTIONAL,
      keywords        SEQUENCE OF VisibleString OPTIONAL
}

PRF-ExtraSrc ::= SEQUENCE {
      host    VisibleString OPTIONAL,
      part    VisibleString OPTIONAL,
      state   VisibleString OPTIONAL,
      strain  VisibleString OPTIONAL,
      taxon   VisibleString OPTIONAL
}

END

--*********************************************************************
--
--  PDB specific data
--  This block of specifications was developed by Jim Ostell and
--      Steve Bryant of NCBI
--
--*********************************************************************

PDB-General DEFINITIONS ::=
BEGIN

EXPORTS PDB-block;

IMPORTS Date FROM NCBI-General;

PDB-block ::= SEQUENCE {          -- PDB specific descriptions
    deposition Date ,         -- deposition date  month,year
    class VisibleString ,
    compound SEQUENCE OF VisibleString ,
    source SEQUENCE OF VisibleString ,
    exp-method VisibleString OPTIONAL ,  -- present if NOT X-ray diffraction
    replace PDB-replace OPTIONAL } -- replacement history

PDB-replace ::= SEQUENCE {
    date Date ,
    ids SEQUENCE OF VisibleString }   -- entry ids replace by this one

END


-- seqcode.asn
--$Revision: 6.0 $
--  *********************************************************************
--
--  These are code and conversion tables for NCBI sequence codes
--  ASN.1 for the sequences themselves are define in seq.asn
--
--  Seq-map-table and Seq-code-table REQUIRE that codes start with 0
--    and increase continuously.  So IUPAC codes, which are upper case
--    letters will always have 65 0 cells before the codes begin.  This
--    allows all codes to do indexed lookups for things
--
--  Valid names for code tables are:
--    IUPACna
--    IUPACaa
--    IUPACeaa
--    IUPACaa3     3 letter amino acid codes : parallels IUPACeaa
--                   display only, not a data exchange type
--    NCBI2na
--    NCBI4na
--    NCBI8na
--    NCBI8aa
--    NCBIstdaa
--     probability types map to IUPAC types for display as characters

NCBI-SeqCode DEFINITIONS ::=
BEGIN

EXPORTS Seq-code-table, Seq-map-table, Seq-code-set;

Seq-code-type ::= ENUMERATED {              -- sequence representations
    iupacna (1) ,              -- IUPAC 1 letter nuc acid code
    iupacaa (2) ,              -- IUPAC 1 letter amino acid code
    ncbi2na (3) ,              -- 2 bit nucleic acid code
    ncbi4na (4) ,              -- 4 bit nucleic acid code
    ncbi8na (5) ,              -- 8 bit extended nucleic acid code
    ncbipna (6) ,              -- nucleic acid probabilities
    ncbi8aa (7) ,              -- 8 bit extended amino acid codes
    ncbieaa (8) ,              -- extended ASCII 1 letter aa codes
    ncbipaa (9) ,              -- amino acid probabilities
    iupacaa3 (10) ,            -- 3 letter code only for display
    ncbistdaa (11) }           -- consecutive codes for std aas, 0-25

Seq-map-table ::= SEQUENCE { -- for tables of sequence mappings 
    from Seq-code-type ,      -- code to map from
    to Seq-code-type ,        -- code to map to
    num INTEGER ,             -- number of rows in table
    start-at INTEGER DEFAULT 0 ,   -- index offset of first element
    table SEQUENCE OF INTEGER }  -- table of values, in from-to order

Seq-code-table ::= SEQUENCE { -- for names of coded values
    code Seq-code-type ,      -- name of code
    num INTEGER ,             -- number of rows in table
    one-letter BOOLEAN ,   -- symbol is ALWAYS 1 letter?
    start-at INTEGER DEFAULT 0 ,   -- index offset of first element
    table SEQUENCE OF
        SEQUENCE {
            symbol VisibleString ,      -- the printed symbol or letter
            name VisibleString } ,      -- an explanatory name or string
    comps SEQUENCE OF INTEGER OPTIONAL } -- pointers to complement nuc acid

Seq-code-set ::= SEQUENCE {    -- for distribution
    codes SET OF Seq-code-table OPTIONAL ,
    maps SET OF Seq-map-table OPTIONAL }

END


-- seqfeat.asn
--$Revision: 142746 $
--**********************************************************************
--
--  NCBI Sequence Feature elements
--  by James Ostell, 1990
--  Version 3.0 - June 1994
--
--**********************************************************************

NCBI-Seqfeat DEFINITIONS ::=
BEGIN

EXPORTS Seq-feat, Feat-id, Genetic-code;

IMPORTS Gene-ref FROM NCBI-Gene
        Prot-ref FROM NCBI-Protein
        Org-ref FROM NCBI-Organism
        BioSource FROM NCBI-BioSource
        RNA-ref FROM NCBI-RNA
        Seq-loc, Giimport-id FROM NCBI-Seqloc
        Pubdesc, Numbering, Heterogen FROM NCBI-Sequence
        Rsite-ref FROM NCBI-Rsite
        Txinit FROM NCBI-TxInit
        Pub-set FROM NCBI-Pub
        Object-id, Dbtag, User-object FROM NCBI-General;

--*** Feature identifiers ********************************
--*

Feat-id ::= CHOICE {
    gibb INTEGER ,            -- geninfo backbone
    giim Giimport-id ,        -- geninfo import
    local Object-id ,         -- for local software use
    general Dbtag }           -- for use by various databases

--*** Seq-feat *******************************************
--*  sequence feature generalization

Seq-feat ::= SEQUENCE {
    id Feat-id OPTIONAL ,
    data SeqFeatData ,           -- the specific data
    partial BOOLEAN OPTIONAL ,    -- incomplete in some way?
    except BOOLEAN OPTIONAL ,     -- something funny about this?
    comment VisibleString OPTIONAL ,
    product Seq-loc OPTIONAL ,    -- product of process
    location Seq-loc ,            -- feature made from
    qual SEQUENCE OF Gb-qual OPTIONAL ,  -- qualifiers
    title VisibleString OPTIONAL ,   -- for user defined label
    ext User-object OPTIONAL ,    -- user defined structure extension
    cit Pub-set OPTIONAL ,        -- citations for this feature
    exp-ev ENUMERATED {           -- evidence for existence of feature
        experimental (1) ,        -- any reasonable experimental check
        not-experimental (2) } OPTIONAL , -- similarity, pattern, etc
    xref SET OF SeqFeatXref OPTIONAL ,   -- cite other relevant features
    dbxref SET OF Dbtag OPTIONAL ,  -- support for xref to other databases
    pseudo BOOLEAN OPTIONAL ,     -- annotated on pseudogene?
    except-text VisibleString OPTIONAL , -- explain if except=TRUE
    ids SET OF Feat-id OPTIONAL ,       -- set of Ids; will replace 'id' field
    exts SET OF User-object OPTIONAL }  -- set of extensions; will replace 'ext' field

SeqFeatData ::= CHOICE {
    gene Gene-ref ,
    org Org-ref ,
    cdregion Cdregion ,
    prot Prot-ref ,
    rna RNA-ref ,
    pub Pubdesc ,              -- publication applies to this seq 
    seq Seq-loc ,              -- to annotate origin from another seq
    imp Imp-feat ,
    region VisibleString,      -- named region (globin locus)
    comment NULL ,             -- just a comment
    bond ENUMERATED {
        disulfide (1) ,
        thiolester (2) ,
        xlink (3) ,
        thioether (4) ,
        other (255) } ,
    site ENUMERATED {
        active (1) ,
        binding (2) ,
        cleavage (3) ,
        inhibit (4) ,
        modified (5),
        glycosylation (6) ,
        myristoylation (7) ,
        mutagenized (8) ,
        metal-binding (9) ,
        phosphorylation (10) ,
        acetylation (11) ,
        amidation (12) ,
        methylation (13) ,
        hydroxylation (14) ,
        sulfatation (15) ,
        oxidative-deamination (16) ,
        pyrrolidone-carboxylic-acid (17) ,
        gamma-carboxyglutamic-acid (18) ,
        blocked (19) ,
        lipid-binding (20) ,
        np-binding (21) ,
        dna-binding (22) ,
        signal-peptide (23) ,
        transit-peptide (24) ,
        transmembrane-region (25) ,
        nitrosylation (26) ,
        other (255) } ,
    rsite Rsite-ref ,       -- restriction site  (for maps really)
    user User-object ,      -- user defined structure
    txinit Txinit ,         -- transcription initiation
    num Numbering ,         -- a numbering system
    psec-str ENUMERATED {   -- protein secondary structure
        helix (1) ,         -- any helix
        sheet (2) ,         -- beta sheet
        turn  (3) } ,       -- beta or gamma turn
    non-std-residue VisibleString ,  -- non-standard residue here in seq
    het Heterogen ,         -- cofactor, prosthetic grp, etc, bound to seq
    biosrc BioSource }

SeqFeatXref ::= SEQUENCE {       -- both optional because can have one or both
    id Feat-id OPTIONAL ,        -- the feature copied
    data SeqFeatData OPTIONAL }  -- the specific data
    
--*** CdRegion ***********************************************
--*
--*  Instructions to translate from a nucleic acid to a peptide
--*    conflict means it's supposed to translate but doesn't
--*


Cdregion ::= SEQUENCE {
    orf BOOLEAN OPTIONAL ,             -- just an ORF ?
    frame ENUMERATED {
        not-set (0) ,                  -- not set, code uses one
        one (1) ,
        two (2) ,
        three (3) } DEFAULT not-set ,      -- reading frame
    conflict BOOLEAN OPTIONAL ,        -- conflict
    gaps INTEGER OPTIONAL ,            -- number of gaps on conflict/except
    mismatch INTEGER OPTIONAL ,        -- number of mismatches on above
    code Genetic-code OPTIONAL ,       -- genetic code used
    code-break SEQUENCE OF Code-break OPTIONAL ,   -- individual exceptions
    stops INTEGER OPTIONAL }           -- number of stop codons on above

                    -- each code is 64 cells long, in the order where
                    -- T=0,C=1,A=2,G=3, TTT=0, TTC=1, TCA=4, etc
                    -- NOTE: this order does NOT correspond to a Seq-data
                    -- encoding.  It is "natural" to codon usage instead.
                    -- the value in each cell is the AA coded for
                    -- start= AA coded only if first in peptide
                    --   in start array, if codon is not a legitimate start
                    --   codon, that cell will have the "gap" symbol for
                    --   that alphabet.  Otherwise it will have the AA
                    --   encoded when that codon is used at the start.

Genetic-code ::= SET OF CHOICE {
    name VisibleString ,               -- name of a code
    id INTEGER ,                       -- id in dbase
    ncbieaa VisibleString ,            -- indexed to IUPAC extended
    ncbi8aa OCTET STRING ,             -- indexed to NCBI8aa
    ncbistdaa OCTET STRING ,           -- indexed to NCBIstdaa
    sncbieaa VisibleString ,            -- start, indexed to IUPAC extended
    sncbi8aa OCTET STRING ,             -- start, indexed to NCBI8aa
    sncbistdaa OCTET STRING }           -- start, indexed to NCBIstdaa

Code-break ::= SEQUENCE {              -- specific codon exceptions
    loc Seq-loc ,                      -- location of exception
    aa CHOICE {                        -- the amino acid
        ncbieaa INTEGER ,              -- ASCII value of NCBIeaa code
        ncbi8aa INTEGER ,              -- NCBI8aa code
        ncbistdaa INTEGER } }           -- NCBIstdaa code

Genetic-code-table ::= SET OF Genetic-code     -- table of genetic codes

--*** Import ***********************************************
--*
--*  Features imported from other databases
--*

Imp-feat ::= SEQUENCE {
    key VisibleString ,
    loc VisibleString OPTIONAL ,         -- original location string
    descr VisibleString OPTIONAL }       -- text description

Gb-qual ::= SEQUENCE {
    qual VisibleString ,
    val VisibleString }

END 

--**********************************************************************
--
--  NCBI Restriction Sites
--  by James Ostell, 1990
--  version 0.8
--
--**********************************************************************

NCBI-Rsite DEFINITIONS ::=
BEGIN

EXPORTS Rsite-ref;

IMPORTS Dbtag FROM NCBI-General;

Rsite-ref ::= CHOICE {
    str VisibleString ,     -- may be unparsable
    db  Dbtag }             -- pointer to a restriction site database

END

--**********************************************************************
--
--  NCBI RNAs
--  by James Ostell, 1990
--  version 0.8
--
--**********************************************************************

NCBI-RNA DEFINITIONS ::=
BEGIN

EXPORTS RNA-ref, Trna-ext, RNA-gen, RNA-qual, RNA-qual-set;

IMPORTS Seq-loc FROM NCBI-Seqloc;

--*** rnas ***********************************************
--*
--*  various rnas
--*
                         -- minimal RNA sequence
RNA-ref ::= SEQUENCE {
    type ENUMERATED {            -- type of RNA feature
        unknown (0) ,
        premsg (1) ,
        mRNA (2) ,
        tRNA (3) ,
        rRNA (4) ,
        snRNA (5) ,              -- will become ncRNA, with RNA-gen.class = snRNA
        scRNA (6) ,              -- will become ncRNA, with RNA-gen.class = scRNA
        snoRNA (7) ,             -- will become ncRNA, with RNA-gen.class = snoRNA
        ncRNA (8) ,              -- non-coding RNA; subsumes snRNA, scRNA, snoRNA
        tmRNA (9) ,
        miscRNA (10) ,
        other (255) } ,
    pseudo BOOLEAN OPTIONAL ,
    ext CHOICE {
        name VisibleString ,        -- for naming "other" type
        tRNA Trna-ext ,             -- for tRNAs
        gen RNA-gen } OPTIONAL      -- generic fields for ncRNA, tmRNA, miscRNA
    }

Trna-ext ::= SEQUENCE {                 -- tRNA feature extensions
    aa CHOICE {                         -- aa this carries
        iupacaa INTEGER ,
        ncbieaa INTEGER ,
        ncbi8aa INTEGER ,
        ncbistdaa INTEGER } OPTIONAL ,
    codon SET OF INTEGER OPTIONAL ,     -- codon(s) as in Genetic-code
    anticodon Seq-loc OPTIONAL }        -- location of anticodon

RNA-gen ::= SEQUENCE {
    class VisibleString OPTIONAL ,      -- for ncRNAs, the class of non-coding RNA:
                                        -- examples: antisense_RNA, guide_RNA, snRNA
    product VisibleString OPTIONAL ,
    quals RNA-qual-set OPTIONAL         -- e.g., tag_peptide qualifier for tmRNAs
}

RNA-qual ::= SEQUENCE {                 -- Additional data values for RNA-gen,
    qual VisibleString ,                -- in a tag (qual), value (val) format
    val VisibleString }

RNA-qual-set ::= SEQUENCE OF RNA-qual

END

--**********************************************************************
--
--  NCBI Genes
--  by James Ostell, 1990
--  version 0.8
--
--**********************************************************************

NCBI-Gene DEFINITIONS ::=
BEGIN

EXPORTS Gene-ref, Gene-nomenclature;

IMPORTS Dbtag FROM NCBI-General;

--*** Gene ***********************************************
--*
--*  reference to a gene
--*

Gene-ref ::= SEQUENCE {
    locus VisibleString OPTIONAL ,        -- Official gene symbol
    allele VisibleString OPTIONAL ,       -- Official allele designation
    desc VisibleString OPTIONAL ,         -- descriptive name
    maploc VisibleString OPTIONAL ,       -- descriptive map location
    pseudo BOOLEAN DEFAULT FALSE ,        -- pseudogene
    db SET OF Dbtag OPTIONAL ,            -- ids in other dbases
    syn SET OF VisibleString OPTIONAL ,   -- synonyms for locus
    locus-tag VisibleString OPTIONAL ,    -- systematic gene name (e.g., MI0001, ORF0069)
    formal-name Gene-nomenclature OPTIONAL
}

Gene-nomenclature ::= SEQUENCE {
    status ENUMERATED {
        unknown (0) ,
        official (1) ,
        interim (2) 
    } ,
    symbol VisibleString OPTIONAL ,
    name VisibleString OPTIONAL ,
    source Dbtag OPTIONAL
}

END


--**********************************************************************
--
--  NCBI Organism
--  by James Ostell, 1994
--  version 3.0
--
--**********************************************************************

NCBI-Organism DEFINITIONS ::=
BEGIN

EXPORTS Org-ref;

IMPORTS Dbtag FROM NCBI-General;

--*** Org-ref ***********************************************
--*
--*  Reference to an organism
--*     defines only the organism.. lower levels of detail for biological
--*     molecules are provided by the Source object
--*

Org-ref ::= SEQUENCE {
    taxname VisibleString OPTIONAL ,   -- preferred formal name
    common VisibleString OPTIONAL ,    -- common name
    mod SET OF VisibleString OPTIONAL , -- unstructured modifiers
    db SET OF Dbtag OPTIONAL ,         -- ids in taxonomic or culture dbases
    syn SET OF VisibleString OPTIONAL ,  -- synonyms for taxname or common
    orgname OrgName OPTIONAL }
    

OrgName ::= SEQUENCE {
    name CHOICE {
        binomial BinomialOrgName ,         -- genus/species type name
        virus VisibleString ,              -- virus names are different
        hybrid MultiOrgName ,              -- hybrid between organisms
        namedhybrid BinomialOrgName ,      -- some hybrids have genus x species name
        partial PartialOrgName } OPTIONAL , -- when genus not known
    attrib VisibleString OPTIONAL ,        -- attribution of name
    mod SEQUENCE OF OrgMod OPTIONAL ,
    lineage VisibleString OPTIONAL ,       -- lineage with semicolon separators
    gcode INTEGER OPTIONAL ,               -- genetic code (see CdRegion)
    mgcode INTEGER OPTIONAL ,              -- mitochondrial genetic code
    div VisibleString OPTIONAL }           -- GenBank division code
    

OrgMod ::= SEQUENCE {
    subtype INTEGER {
        strain (2) ,
        substrain (3) ,
        type (4) ,
        subtype (5) ,
        variety (6) ,
        serotype (7) ,
        serogroup (8) ,
        serovar (9) ,
        cultivar (10) ,
        pathovar (11) ,
        chemovar (12) ,
        biovar (13) ,
        biotype (14) ,
        group (15) ,
        subgroup (16) ,
        isolate (17) ,
        common (18) ,
        acronym (19) ,
        dosage (20) ,          -- chromosome dosage of hybrid
        nat-host (21) ,        -- natural host of this specimen
        sub-species (22) ,
        specimen-voucher (23) ,
        authority (24) ,
        forma (25) ,
        forma-specialis (26) ,
        ecotype (27) ,
        synonym (28) ,
        anamorph (29) ,
        teleomorph (30) ,
        breed (31) ,
        gb-acronym (32) ,       -- used by taxonomy database
        gb-anamorph (33) ,      -- used by taxonomy database
        gb-synonym (34) ,       -- used by taxonomy database
        culture-collection (35) ,
        bio-material (36) ,
        metagenome-source (37) ,
        old-lineage (253) ,
        old-name (254) ,
        other (255) } ,         -- ASN5: old-name (254) will be added to next spec
    subname VisibleString ,
    attrib VisibleString OPTIONAL }  -- attribution/source of name

BinomialOrgName ::= SEQUENCE {
    genus VisibleString ,               -- required
    species VisibleString OPTIONAL ,    -- species required if subspecies used
    subspecies VisibleString OPTIONAL }

MultiOrgName ::= SEQUENCE OF OrgName   -- the first will be used to assign division

PartialOrgName ::= SEQUENCE OF TaxElement  -- when we don't know the genus

TaxElement ::= SEQUENCE {
    fixed-level INTEGER {
       other (0) ,                     -- level must be set in string
       family (1) ,
       order (2) ,
       class (3) } ,
    level VisibleString OPTIONAL ,
    name VisibleString }

END


--**********************************************************************
--
--  NCBI BioSource
--  by James Ostell, 1994
--  version 3.0
--
--**********************************************************************

NCBI-BioSource DEFINITIONS ::=
BEGIN

EXPORTS BioSource;

IMPORTS Org-ref FROM NCBI-Organism;

--********************************************************************
--
-- BioSource gives the source of the biological material
--   for sequences
--
--********************************************************************

BioSource ::= SEQUENCE {
    genome INTEGER {         -- biological context
        unknown (0) ,
        genomic (1) ,
        chloroplast (2) ,
        chromoplast (3) ,
        kinetoplast (4) ,
        mitochondrion (5) ,
        plastid (6) ,
        macronuclear (7) ,
        extrachrom (8) ,
        plasmid (9) ,
        transposon (10) ,
        insertion-seq (11) ,
        cyanelle (12) ,
        proviral (13) ,
        virion (14) ,
        nucleomorph (15) ,
        apicoplast (16) ,
        leucoplast (17) ,
        proplastid (18) ,
        endogenous-virus (19) ,
        hydrogenosome (20) ,
        chromosome (21) ,
        chromatophore (22)
      } DEFAULT unknown ,
    origin INTEGER {
      unknown (0) ,
      natural (1) ,                    -- normal biological entity
      natmut (2) ,                     -- naturally occurring mutant
      mut (3) ,                        -- artificially mutagenized
      artificial (4) ,                 -- artificially engineered
      synthetic (5) ,                  -- purely synthetic
      other (255)
    } DEFAULT unknown , 
    org Org-ref ,
    subtype SEQUENCE OF SubSource OPTIONAL ,
    is-focus NULL OPTIONAL ,           -- to distinguish biological focus
    pcr-primers PCRReactionSet OPTIONAL }

PCRReactionSet ::= SET OF PCRReaction

PCRReaction ::= SEQUENCE {
    forward PCRPrimerSet OPTIONAL ,
    reverse PCRPrimerSet OPTIONAL }

PCRPrimerSet ::= SET OF PCRPrimer

PCRPrimer ::= SEQUENCE {
    seq PCRPrimerSeq OPTIONAL ,
    name PCRPrimerName OPTIONAL }

PCRPrimerSeq ::= VisibleString

PCRPrimerName ::= VisibleString

SubSource ::= SEQUENCE {
    subtype INTEGER {
        chromosome (1) ,
        map (2) ,
        clone (3) ,
        subclone (4) ,
        haplotype (5) ,
        genotype (6) ,
        sex (7) ,
        cell-line (8) ,
        cell-type (9) ,
        tissue-type (10) ,
        clone-lib (11) ,
        dev-stage (12) ,
        frequency (13) ,
        germline (14) ,
        rearranged (15) ,
        lab-host (16) ,
        pop-variant (17) ,
        tissue-lib (18) ,
        plasmid-name (19) ,
        transposon-name (20) ,
        insertion-seq-name (21) ,
        plastid-name (22) ,
        country (23) ,
        segment (24) ,
        endogenous-virus-name (25) ,
        transgenic (26) ,
        environmental-sample (27) ,
        isolation-source (28) ,
        lat-lon (29) ,          -- +/- decimal degrees
        collection-date (30) ,  -- DD-MMM-YYYY format
        collected-by (31) ,     -- name of person who collected the sample
        identified-by (32) ,    -- name of person who identified the sample
        fwd-primer-seq (33) ,   -- sequence (possibly more than one; semicolon-separated)
        rev-primer-seq (34) ,   -- sequence (possibly more than one; semicolon-separated)
        fwd-primer-name (35) ,
        rev-primer-name (36) ,
        metagenomic (37) ,
        mating-type (38) ,
        linkage-group (39) ,
        haplogroup (40) ,
        other (255) } ,
    name VisibleString ,
    attrib VisibleString OPTIONAL }    -- attribution/source of this name
        
END

--**********************************************************************
--
--  NCBI Protein
--  by James Ostell, 1990
--  version 0.8
--
--**********************************************************************

NCBI-Protein DEFINITIONS ::=
BEGIN

EXPORTS Prot-ref;

IMPORTS Dbtag FROM NCBI-General;

--*** Prot-ref ***********************************************
--*
--*  Reference to a protein name
--*

Prot-ref ::= SEQUENCE {
    name SET OF VisibleString OPTIONAL ,      -- protein name
    desc VisibleString OPTIONAL ,      -- description (instead of name)
    ec SET OF VisibleString OPTIONAL , -- E.C. number(s)
    activity SET OF VisibleString OPTIONAL ,  -- activities
    db SET OF Dbtag OPTIONAL ,         -- ids in other dbases
    processed ENUMERATED {             -- processing status
       not-set (0) ,
       preprotein (1) ,
       mature (2) ,
       signal-peptide (3) ,
       transit-peptide (4) } DEFAULT not-set }


END 
--********************************************************************
--
--  Transcription Initiation Site Feature Data Block
--  James Ostell, 1991
--  Philip Bucher, David Ghosh
--  version 1.1
--
--  
--
--********************************************************************

NCBI-TxInit DEFINITIONS ::=
BEGIN

EXPORTS Txinit;

IMPORTS Gene-ref FROM NCBI-Gene
        Prot-ref FROM NCBI-Protein
        Org-ref FROM NCBI-Organism;

Txinit ::= SEQUENCE {
    name VisibleString ,    -- descriptive name of initiation site
    syn SEQUENCE OF VisibleString OPTIONAL ,   -- synonyms
    gene SEQUENCE OF Gene-ref OPTIONAL ,  -- gene(s) transcribed
    protein SEQUENCE OF Prot-ref OPTIONAL ,   -- protein(s) produced
    rna SEQUENCE OF VisibleString OPTIONAL ,  -- rna(s) produced
    expression VisibleString OPTIONAL ,  -- tissue/time of expression
    txsystem ENUMERATED {       -- transcription apparatus used at this site
        unknown (0) ,
        pol1 (1) ,      -- eukaryotic Pol I
        pol2 (2) ,      -- eukaryotic Pol II
        pol3 (3) ,      -- eukaryotic Pol III
        bacterial (4) ,
        viral (5) ,
        rna (6) ,       -- RNA replicase
        organelle (7) ,
        other (255) } ,
    txdescr VisibleString OPTIONAL ,   -- modifiers on txsystem
    txorg Org-ref OPTIONAL ,  -- organism supplying transcription apparatus
    mapping-precise BOOLEAN DEFAULT FALSE ,  -- mapping precise or approx
    location-accurate BOOLEAN DEFAULT FALSE , -- does Seq-loc reflect mapping
    inittype ENUMERATED {
        unknown (0) ,
        single (1) ,
        multiple (2) ,
        region (3) } OPTIONAL ,
    evidence SET OF Tx-evidence OPTIONAL }

Tx-evidence ::= SEQUENCE {
    exp-code ENUMERATED {
        unknown (0) ,    
        rna-seq (1) ,   -- direct RNA sequencing
        rna-size (2) ,  -- RNA length measurement
        np-map (3) ,    -- nuclease protection mapping with homologous sequence ladder
        np-size (4) ,   -- nuclease protected fragment length measurement
        pe-seq (5) ,    -- dideoxy RNA sequencing 
        cDNA-seq (6) ,  -- full-length cDNA sequencing
        pe-map (7) ,    -- primer extension mapping with homologous sequence ladder    
        pe-size (8) ,   -- primer extension product length measurement
        pseudo-seq (9) , -- full-length processed pseudogene sequencing
        rev-pe-map (10) ,   -- see NOTE (1) below
        other (255) } ,
    expression-system ENUMERATED {
        unknown (0) ,
        physiological (1) ,
        in-vitro (2) ,
        oocyte (3) ,
        transfection (4) ,
        transgenic (5) ,
        other (255) } DEFAULT physiological ,
    low-prec-data BOOLEAN DEFAULT FALSE ,
    from-homolog BOOLEAN DEFAULT FALSE }     -- experiment actually done on
                                             --  close homolog

    -- NOTE (1) length measurement of a reverse direction primer-extension
    --          product (blocked  by  RNA  5'end)  by  comparison with
    --          homologous sequence ladder (J. Mol. Biol. 199, 587)

    
END


-- seqloc.asn
--$Revision: 140157 $
--**********************************************************************
--
--  NCBI Sequence location and identifier elements
--  by James Ostell, 1990
--
--  Version 3.0 - 1994
--
--**********************************************************************

NCBI-Seqloc DEFINITIONS ::=
BEGIN

EXPORTS Seq-id, Seq-loc, Seq-interval, Packed-seqint, Seq-point, Packed-seqpnt,
        Na-strand, Giimport-id;

IMPORTS Object-id, Int-fuzz, Dbtag, Date FROM NCBI-General
        Id-pat FROM NCBI-Biblio
        Feat-id FROM NCBI-Seqfeat;

--*** Sequence identifiers ********************************
--*

Seq-id ::= CHOICE {
    local Object-id ,            -- local use
    gibbsq INTEGER ,             -- Geninfo backbone seqid
    gibbmt INTEGER ,             -- Geninfo backbone moltype
    giim Giimport-id ,           -- Geninfo import id
    genbank Textseq-id ,
    embl Textseq-id ,
    pir Textseq-id ,
    swissprot Textseq-id ,
    patent Patent-seq-id ,
    other Textseq-id ,           -- for historical reasons, 'other' = 'refseq'
    general Dbtag ,              -- for other databases
    gi INTEGER ,                 -- GenInfo Integrated Database
    ddbj Textseq-id ,            -- DDBJ
    prf Textseq-id ,             -- PRF SEQDB
    pdb PDB-seq-id ,             -- PDB sequence
    tpg Textseq-id ,             -- Third Party Annot/Seq Genbank
    tpe Textseq-id ,             -- Third Party Annot/Seq EMBL
    tpd Textseq-id ,             -- Third Party Annot/Seq DDBJ
    gpipe Textseq-id ,           -- Internal NCBI genome pipeline processing ID
    named-annot-track Textseq-id -- Internal named annotation tracking ID
}


Patent-seq-id ::= SEQUENCE {
    seqid INTEGER ,         -- number of sequence in patent
    cit Id-pat }           -- patent citation

Textseq-id ::= SEQUENCE {
    name VisibleString OPTIONAL ,
    accession VisibleString OPTIONAL ,
    release VisibleString OPTIONAL ,
    version INTEGER OPTIONAL }

Giimport-id ::= SEQUENCE {
    id INTEGER ,                     -- the id to use here
    db VisibleString OPTIONAL ,      -- dbase used in
    release VisibleString OPTIONAL } -- the release

PDB-seq-id ::= SEQUENCE {
    mol PDB-mol-id ,           -- the molecule name
    chain INTEGER DEFAULT 32 , -- a single ASCII character, chain id
    rel Date OPTIONAL }        -- release date, month and year

PDB-mol-id ::= VisibleString  -- name of mol, 4 chars
    
--*** Sequence locations **********************************
--*

Seq-loc ::= CHOICE {
    null NULL ,           -- not placed
    empty Seq-id ,        -- to NULL one Seq-id in a collection
    whole Seq-id ,        -- whole sequence
    int Seq-interval ,    -- from to
    packed-int Packed-seqint ,
    pnt Seq-point ,
    packed-pnt Packed-seqpnt ,
    mix Seq-loc-mix ,
    equiv Seq-loc-equiv ,  -- equivalent sets of locations
    bond Seq-bond ,
    feat Feat-id }         -- indirect, through a Seq-feat
    

Seq-interval ::= SEQUENCE {
    from INTEGER ,
    to INTEGER ,
    strand Na-strand OPTIONAL ,
    id Seq-id ,    -- WARNING: this used to be optional
    fuzz-from Int-fuzz OPTIONAL ,
    fuzz-to Int-fuzz OPTIONAL }

Packed-seqint ::= SEQUENCE OF Seq-interval

Seq-point ::= SEQUENCE {
    point INTEGER ,
    strand Na-strand OPTIONAL ,
    id Seq-id ,     -- WARNING: this used to be optional
    fuzz Int-fuzz OPTIONAL }

Packed-seqpnt ::= SEQUENCE {
    strand Na-strand OPTIONAL ,
    id Seq-id ,
    fuzz Int-fuzz OPTIONAL ,
    points SEQUENCE OF INTEGER }

Na-strand ::= ENUMERATED {          -- strand of nucleic acid
    unknown (0) ,
    plus (1) ,
    minus (2) ,               
    both (3) ,                -- in forward orientation
    both-rev (4) ,            -- in reverse orientation
    other (255) }

Seq-bond ::= SEQUENCE {         -- bond between residues
    a Seq-point ,           -- connection to a least one residue
    b Seq-point OPTIONAL }  -- other end may not be available

Seq-loc-mix ::= SEQUENCE OF Seq-loc   -- this will hold anything

Seq-loc-equiv ::= SET OF Seq-loc      -- for a set of equivalent locations

END
    

-- seqres.asn
--$Revision: 6.0 $
--**********************************************************************
--
--  NCBI Sequence Analysis Results (other than alignments)
--  by James Ostell, 1990
--
--**********************************************************************

NCBI-Seqres DEFINITIONS ::=
BEGIN

EXPORTS Seq-graph;

IMPORTS Seq-loc FROM NCBI-Seqloc;

--*** Sequence Graph ********************************
--*
--*   for values mapped by residue or range to sequence
--*

Seq-graph ::= SEQUENCE {
    title VisibleString OPTIONAL ,
    comment VisibleString OPTIONAL ,
    loc Seq-loc ,                       -- region this applies to
    title-x VisibleString OPTIONAL ,    -- title for x-axis
    title-y VisibleString OPTIONAL ,
    comp INTEGER OPTIONAL ,             -- compression (residues/value)
    a REAL OPTIONAL ,                   -- for scaling values
    b REAL OPTIONAL ,                   -- display = (a x value) + b
    numval INTEGER ,                    -- number of values in graph
    graph CHOICE {
        real Real-graph ,
        int Int-graph ,
        byte Byte-graph } }

Real-graph ::= SEQUENCE {
    max REAL ,                          -- top of graph
    min REAL ,                          -- bottom of graph
    axis REAL ,                         -- value to draw axis on
    values SEQUENCE OF REAL }

Int-graph ::= SEQUENCE {
    max INTEGER ,
    min INTEGER ,
    axis INTEGER ,
    values SEQUENCE OF INTEGER } 

Byte-graph ::= SEQUENCE {              -- integer from 0-255
    max INTEGER ,
    min INTEGER ,
    axis INTEGER ,
    values OCTET STRING }

END


-- seqset.asn
--$Revision: 149840 $
--**********************************************************************
--
--  NCBI Sequence Collections
--  by James Ostell, 1990
--
--  Version 3.0 - 1994
--
--**********************************************************************

NCBI-Seqset DEFINITIONS ::=
BEGIN

EXPORTS Bioseq-set, Seq-entry;

IMPORTS Bioseq, Seq-annot, Seq-descr FROM NCBI-Sequence
        Object-id, Dbtag, Date FROM NCBI-General;

--*** Sequence Collections ********************************
--*

Bioseq-set ::= SEQUENCE {      -- just a collection
    id Object-id OPTIONAL ,
    coll Dbtag OPTIONAL ,          -- to identify a collection
    level INTEGER OPTIONAL ,       -- nesting level
    class ENUMERATED {
        not-set (0) ,
        nuc-prot (1) ,              -- nuc acid and coded proteins
        segset (2) ,                -- segmented sequence + parts
        conset (3) ,                -- constructed sequence + parts
        parts (4) ,                 -- parts for 2 or 3
        gibb (5) ,                  -- geninfo backbone
        gi (6) ,                    -- geninfo
        genbank (7) ,               -- converted genbank
        pir (8) ,                   -- converted pir
        pub-set (9) ,               -- all the seqs from a single publication
        equiv (10) ,                -- a set of equivalent maps or seqs
        swissprot (11) ,            -- converted SWISSPROT
        pdb-entry (12) ,            -- a complete PDB entry
        mut-set (13) ,              -- set of mutations
        pop-set (14) ,              -- population study
        phy-set (15) ,              -- phylogenetic study
        eco-set (16) ,              -- ecological sample study
        gen-prod-set (17) ,         -- genomic products, chrom+mRNA+protein
        wgs-set (18) ,              -- whole genome shotgun project
        named-annot (19) ,          -- named annotation set
        named-annot-prod (20) ,     -- with instantiated mRNA+protein
        read-set (21) ,             -- set from a single read
        paired-end-reads (22) ,     -- paired sequences within a read-set
        other (255) } DEFAULT not-set ,
    release VisibleString OPTIONAL ,
    date Date OPTIONAL ,
    descr Seq-descr OPTIONAL ,
    seq-set SEQUENCE OF Seq-entry ,
    annot SET OF Seq-annot OPTIONAL }

Seq-entry ::= CHOICE {
        seq Bioseq ,
        set Bioseq-set }

END


-- seqsplit.asn
--$Revision: 1.9 $
--********************************************************************
--
--  Network Id server network access
--  Vasilchenko 2003
--
--
--*********************************************************************
--
--  seqsplit.asn
--
--     representation of split sequences
--
--*********************************************************************

NCBI-Seq-split DEFINITIONS ::=
BEGIN

EXPORTS ID2S-Chunk-Id, ID2S-Seq-annot-Info;

IMPORTS Seq-id                                      FROM NCBI-Seqloc
        Seq-entry                                   FROM NCBI-Seqset
        Bioseq, Seq-annot, Seq-descr, Seq-literal   FROM NCBI-Sequence
        Seq-align                                   FROM NCBI-Seqalign
        Feat-id                                     FROM NCBI-Seqfeat;

----------------------------------------------------------------------------
-- Blob split info types
----------------------------------------------------------------------------

----------------------------------------------------------------------------
-- Chunks split description


ID2S-Split-Info ::= SEQUENCE {
        bioseqs-info    SET OF ID2S-Bioseqs-Info OPTIONAL,
        chunks          SET OF ID2S-Chunk-Info,
        skeleton        Seq-entry OPTIONAL
}


ID2S-Bioseqs-Info ::= SEQUENCE {
        info            ID2S-Bioseq-Info,
        bioseqs         ID2S-Bioseq-Ids
}


ID2S-Bioseq-Info ::= SEQUENCE {
        gap-count       INTEGER OPTIONAL,
        seq-map-has-ref BOOLEAN OPTIONAL
}


ID2S-Chunk-Info ::= SEQUENCE {
        id              ID2S-Chunk-Id,
        content         SET OF ID2S-Chunk-Content
}


-- Description of information in this chunk
-- Place means id of Bioseq or Bioseq-set
ID2S-Chunk-Content ::= CHOICE {
        -- place of Seq-descrs
        seq-descr       ID2S-Seq-descr-Info,

        -- locations and types of annotations
        seq-annot       ID2S-Seq-annot-Info,

        -- place of assembly history
        seq-assembly    ID2S-Seq-assembly-Info,

        -- place of sequence map
        seq-map         ID2S-Seq-map-Info,
        
        -- place of sequence data
        seq-data        ID2S-Seq-data-Info,

        -- place of Seq-annots
        seq-annot-place ID2S-Seq-annot-place-Info,

        -- place of Bioseqs
        bioseq-place    SET OF ID2S-Bioseq-place-Info,

        -- ids of features
        feat-ids        SET OF ID2S-Seq-feat-Ids-Info
}


ID2S-Seq-descr-Info ::= SEQUENCE {
        type-mask       INTEGER, -- mask of Seq-descr types,
        bioseqs         ID2S-Bioseq-Ids OPTIONAL,
        bioseq-sets     ID2S-Bioseq-set-Ids OPTIONAL
}


ID2S-Seq-annot-Info ::= SEQUENCE {
        -- name is set if this is named annot
        -- name may be empty which differ from unnamed annot
        name            VisibleString OPTIONAL,
        align           NULL OPTIONAL,
        graph           NULL OPTIONAL,
        feat            SET OF ID2S-Feat-type-Info OPTIONAL,
        seq-loc         ID2S-Seq-loc OPTIONAL
}


ID2S-Seq-annot-place-Info ::= SEQUENCE {
        name            VisibleString OPTIONAL,
        bioseqs         ID2S-Bioseq-Ids OPTIONAL,
        bioseq-sets     ID2S-Bioseq-set-Ids OPTIONAL
}


ID2S-Seq-feat-Ids-Info ::= SEQUENCE {
        feat-types      SET OF ID2S-Feat-type-Info OPTIONAL,
        xref-types      SET OF ID2S-Feat-type-Info OPTIONAL,
        local-ids       SET OF INTEGER OPTIONAL
}


ID2S-Feat-type-Info ::=  SEQUENCE {
        type            INTEGER,
        subtypes        SET OF INTEGER OPTIONAL
}


ID2S-Seq-assembly-Info ::= SEQUENCE {
        bioseqs         ID2S-Bioseq-Ids
}


ID2S-Seq-map-Info ::= ID2S-Seq-loc


ID2S-Seq-data-Info ::= ID2S-Seq-loc


ID2S-Bioseq-place-Info ::= SEQUENCE {
        bioseq-set      INTEGER,
        seq-ids         ID2S-Bioseq-Ids
}


ID2S-Chunk ::= SEQUENCE {
        data            SET OF ID2S-Chunk-Data
}


ID2S-Chunk-Data ::= SEQUENCE {
        -- place of data to insert
        id              CHOICE {
                -- Bioseq-set id
                bioseq-set      INTEGER,
                -- Bioseq id
                gi              INTEGER,
                -- Bioseq id
                seq-id          Seq-id
        },
        -- Seq-descr, for Bioseq and Bioseq-set
        descr           Seq-descr OPTIONAL,
        -- Seq-annot, for Bioseq and Bioseq-set
        annots          SET OF Seq-annot OPTIONAL,
        -- assembly history Seq-align, for Bioseq
        assembly        SET OF Seq-align OPTIONAL,
        -- sequence map, for Bioseq
        seq-map         SEQUENCE OF ID2S-Sequence-Piece OPTIONAL,
        -- sequence data, for Bioseq
        seq-data        SEQUENCE OF ID2S-Sequence-Piece OPTIONAL,
        -- Bioseq, for Bioseq-set
        bioseqs         SET OF Bioseq OPTIONAL
}


ID2S-Sequence-Piece ::= SEQUENCE {
        start           INTEGER,  -- start position on sequence
        data            SEQUENCE OF Seq-literal
}


----------------------------------------------------------------------------
-- utility types
----------------------------------------------------------------------------


ID2S-Chunk-Id ::= INTEGER


ID2S-Bioseq-set-Ids ::= SET OF INTEGER


ID2S-Bioseq-Ids ::= SET OF CHOICE {
        gi              INTEGER,
        seq-id          Seq-id,
        gi-range        ID2S-Gi-Range
}


ID2S-Gi-Range ::= SEQUENCE {
        start           INTEGER,             -- start gi in this gi range
        count           INTEGER DEFAULT 1    -- number of sequential gis
}


-- ID2S-Seq-loc is used to represent unordered and unstranded
-- set of intervals on set of sequences.
-- It's optimized for compact encoding of several common cases:
--    Seq-ids of type gi,
--    intervals covering whole sequences,
--    whole sequences with sequential gis,
--    set of intervals on the same sequence (Seq-id sharing).
ID2S-Seq-loc ::= CHOICE {
        whole-gi        INTEGER,             -- whole sequence by gi
        whole-seq-id    Seq-id,              -- whole sequence by Seq-id
        whole-gi-range  ID2S-Gi-Range,       -- set of whole sequences by gis
        gi-interval     ID2S-Gi-Interval,    -- interval on sequence by gi
        seq-id-interval ID2S-Seq-id-Interval,-- interval on sequence by Seq-id
        gi-ints         ID2S-Gi-Ints,        -- set of intervals on the same gi
        seq-id-ints     ID2S-Seq-id-Ints,    -- set of intervals on the same id
        loc-set         SET OF ID2S-Seq-loc  -- combination of locations
}


ID2S-Gi-Interval ::= SEQUENCE {
        gi              INTEGER,
        start           INTEGER,
        length          INTEGER DEFAULT 1
}


ID2S-Seq-id-Interval ::= SEQUENCE {
        seq-id          Seq-id,
        start           INTEGER,
        length          INTEGER DEFAULT 1
}


ID2S-Interval ::= SEQUENCE {
        start           INTEGER,
        length          INTEGER DEFAULT 1
}


ID2S-Gi-Ints ::= SEQUENCE {
        gi              INTEGER,
        ints            SET OF ID2S-Interval
}


ID2S-Seq-id-Ints ::= SEQUENCE {
        seq-id          Seq-id,
        ints            SET OF ID2S-Interval
}


END

-- seqtable.asn
--$Revision: 115572 $
--  ----------------------------------------------------------------------------
--
--                            PUBLIC DOMAIN NOTICE
--                National Center for Biotechnology Information
--
--  This software/database is a "United States Government Work" under the terms
--  of the United States Copyright Act.  It was written as part of the author's
--  official duties as a United States Government employee and thus cannot be
--  copyrighted.  This software/database is freely available to the public for
--  use.  The National Library of Medicine and the U.S. Government have not
--  placed any restriction on its use or reproduction.
--
--  Although all reasonable efforts have been taken to ensure the accuracy and
--  reliability of the software and data, the NLM and the U.S. Government do not
--  and cannot warrant the performance or results that may be obtained by using
--  this software or data.  The NLM and the U.S. Government disclaim all
--  warranties, express or implied, including warranties of performance,
--  merchantability or fitness for any particular purpose.
--
--  Please cite the authors in any work or product based on this material.
--
--  ----------------------------------------------------------------------------
--
--  Authors: Mike DiCuccio, Eugene Vasilchenko
--
--  ASN.1 interface to table readers
--
--  ----------------------------------------------------------------------------

NCBI-SeqTable DEFINITIONS ::=

BEGIN

EXPORTS
    SeqTable-column-info, SeqTable-column, Seq-table;
    
IMPORTS
    Seq-id, Seq-loc, Seq-interval   FROM NCBI-Seqloc;


SeqTable-column-info ::= SEQUENCE {
    -- user friendly column name, can be skipped
    title VisibleString OPTIONAL,

    -- identification of the column data in the objects described by the table
    field-id INTEGER { -- known column data types
        -- position types
        location        (0), -- location as Seq-loc
        location-id     (1), -- location Seq-id
        location-gi     (2), -- gi
        location-from   (3), -- interval from
        location-to     (4), -- interval to
        location-strand (5), -- location strand
        location-fuzz-from-lim (6),
        location-fuzz-to-lim   (7),

        product         (10), -- product as Seq-loc
        product-id      (11), -- product Seq-id
        product-gi      (12), -- product gi
        product-from    (13), -- product interval from
        product-to      (14), -- product interval to
        product-strand  (15), -- product strand
        product-fuzz-from-lim (16),
        product-fuzz-to-lim   (17),
        
        -- main feature fields
        id-local        (20), -- id.local.id
        xref-id-local   (21), -- xref.id.local.id
        partial         (22),
        comment         (23),
        title           (24),
        ext             (25), -- field-name must be "E.xxx", see below
        qual            (26), -- field-name must be "Q.xxx", see below
        dbxref          (27), -- field-name must be "D.xxx", see below

        -- various data fields
        data-imp-key        (30),
        data-region         (31),
        data-cdregion-frame (32),

        -- extra fields, see also special values for str below
        ext-type        (40),
        qual-qual       (41),
        qual-val        (42),
        dbxref-db       (43),
        dbxref-tag      (44)
    } OPTIONAL,

    -- any column can be identified by ASN.1 text locator string
    -- with omitted object type.
    -- examples:
    --   "data.gene.locus" for Seq-feat.data.gene.locus
    --   "data.imp.key" for Seq-feat.data.imp.key
    --   "qual.qual"
    --    - Seq-feat.qual is SEQUENCE so several columns are allowed
    --      see also "Q.xxx" special value for shorter qual representation
    --   "ext.type.str"
    --   "ext.data.label.str"
    --   "ext.data.data.int"
    --      see also "E.xxx" special value for shorter ext representation
    -- special values start with capital letter:
    --   "E.xxx" - ext.data.label.str = xxx, ext.data.data = data
    --    - Seq-feat.ext.data is SEQUENCE so several columns are allowed
    --   "Q.xxx" - qual.qual = xxx, qual.val = data
    --    - Seq-feat.qual is SEQUENCE so several columns are allowed
    --   "D.xxx" - dbxref.id = xxx, dbxref.tag = data
    --    - Seq-feat.dbxref is SET so several columns are allowed
    field-name  VisibleString OPTIONAL
}


CommonString-table ::= SEQUENCE {
    -- set of possible values
    strings     SEQUENCE OF VisibleString,

    -- indexes of values
    indexes     SEQUENCE OF INTEGER
}


CommonBytes-table ::= SEQUENCE {
    -- set of possible values
    bytes       SEQUENCE OF OCTET STRING,

    -- indexes of values
    indexes     SEQUENCE OF INTEGER
}


SeqTable-multi-data ::= CHOICE {
    -- a set of integers, one per row
    int         SEQUENCE OF INTEGER,
    
    -- a set of reals, one per row
    real        SEQUENCE OF REAL,

    -- a set of strings, one per row
    string      SEQUENCE OF VisibleString,

    -- a set of byte arrays, one per row
    bytes       SEQUENCE OF OCTET STRING,

    -- a set of string with small set of possible values
    common-string   CommonString-table,

    -- a set of byte arrays with small set of possible values
    common-bytes    CommonBytes-table,

    -- a set of bits, one per row
    -- this uses bm::bvector<> as its storage mechanism
    bit         OCTET STRING,

    -- a set of locations, one per row
    loc         SEQUENCE OF Seq-loc,
    id          SEQUENCE OF Seq-id,
    interval    SEQUENCE OF Seq-interval
}


SeqTable-single-data ::= CHOICE {
    -- integer
    int         INTEGER,
    
    -- real
    real        REAL,

    -- string
    string      VisibleString,

    -- byte array
    bytes       OCTET STRING,

    -- bit
    bit         BOOLEAN,

    -- location
    loc         Seq-loc,
    id          Seq-id,
    interval    Seq-interval
}


SeqTable-sparse-index ::= CHOICE {
    -- indexed of rows with values
    indexes SEQUENCE OF INTEGER,

    -- bitset of rows with values
    bit-set OCTET STRING
}


SeqTable-column ::= SEQUENCE {
    -- column description or reference to previously defined info
    header      SeqTable-column-info,   -- information about data

    -- row data
    data        SeqTable-multi-data OPTIONAL,

    -- in case not all rows contain data this field will contain sparse info
    sparse      SeqTable-sparse-index OPTIONAL,

    -- default value for sparse table, or if row data is too short
    default     SeqTable-single-data OPTIONAL,

    -- single value for indexes not listed in sparse table
    sparse-other SeqTable-single-data OPTIONAL
}


Seq-table ::= SEQUENCE {
    -- type of features in this table, equal to Seq-feat.data variant index
    feat-type   INTEGER,

    -- subtype of features in this table, defined in header SeqFeatData.hpp
    feat-subtype INTEGER OPTIONAL,

    -- number of rows
    num-rows    INTEGER,

    -- data in columns
    columns     SEQUENCE OF SeqTable-column
}


END

-- submit.asn
--$Revision: 6.1 $
--********************************************************************
--
--  Direct Submission of Sequence Data
--  James Ostell, 1991
--
--  This is a trial specification for direct submission of sequence
--    data worked out between NCBI and EMBL
--  Later revised to reflect work with GenBank and Integrated database
--
--  Version 3.0, 1994
--    This is the official NCBI sequence submission format now.
--
--********************************************************************

NCBI-Submit DEFINITIONS ::=
BEGIN

EXPORTS Seq-submit, Contact-info;

IMPORTS Cit-sub, Author FROM NCBI-Biblio
        Date, Object-id FROM NCBI-General
        Seq-annot FROM NCBI-Sequence
        Seq-id FROM NCBI-Seqloc
        Seq-entry FROM NCBI-Seqset;

Seq-submit ::= SEQUENCE {
    sub Submit-block ,
    data CHOICE {
        entrys  SET OF Seq-entry ,  -- sequence(s)
        annots  SET OF Seq-annot ,  -- annotation(s)
        delete  SET OF Seq-id } } -- deletions of entries

Submit-block ::= SEQUENCE {
    contact Contact-info ,        -- who to contact
    cit Cit-sub ,                 -- citation for this submission
    hup BOOLEAN DEFAULT FALSE ,   -- hold until publish
    reldate Date OPTIONAL ,       -- release by date
    subtype INTEGER {             -- type of submission
        new (1) ,                 -- new data
        update (2) ,              -- update by author
        revision (3) ,            -- 3rd party (non-author) update
        other (255) } OPTIONAL ,
    tool VisibleString OPTIONAL,  -- tool used to make submission
    user-tag VisibleString OPTIONAL, -- user supplied id for this submission
    comment VisibleString OPTIONAL } -- user comments/advice to database

Contact-info ::= SEQUENCE {      -- who to contact to discuss the submission
    name VisibleString OPTIONAL ,        -- OBSOLETE: will be removed
    address SEQUENCE OF VisibleString OPTIONAL ,
    phone VisibleString OPTIONAL ,
    fax VisibleString OPTIONAL ,
    email VisibleString OPTIONAL ,
    telex VisibleString OPTIONAL ,
    owner-id Object-id OPTIONAL ,         -- for owner accounts
    password OCTET STRING OPTIONAL ,
    last-name VisibleString OPTIONAL ,  -- structured to replace name above
    first-name VisibleString OPTIONAL ,
    middle-initial VisibleString OPTIONAL ,
    contact Author OPTIONAL }           -- WARNING: this will replace the above

END


-- tinyseq.asn
--$Revision: 6.1 $
--**********************************************************************
--
--  ASN.1 for a tiny Bioseq in XML
--    basically a structured FASTA file with a few extras
--    in this case we drop all modularity of components
--      All ids are Optional - simpler structure, less checking
--      Components of organism are hard coded - can't easily add or change
--      sequence is just string whether DNA or protein
--  by James Ostell, 2000
--
--**********************************************************************

NCBI-TSeq DEFINITIONS ::=
BEGIN

TSeq ::= SEQUENCE {
	seqtype ENUMERATED {
		nucleotide (1),
		protein (2) },
	gi INTEGER OPTIONAL,
	accver VisibleString OPTIONAL,
	sid VisibleString OPTIONAL,
	local VisibleString OPTIONAL,
	taxid INTEGER OPTIONAL,
	orgname VisibleString OPTIONAL,
	defline VisibleString,
	length INTEGER,
	sequence VisibleString }

TSeqSet ::= SEQUENCE OF TSeq    -- a bunch of them

END