--$Revision: 142746 $
--**********************************************************************
--
--  NCBI Sequence Feature elements
--  by James Ostell, 1990
--  Version 3.0 - June 1994
--
--**********************************************************************

NCBI-Seqfeat DEFINITIONS ::=
BEGIN

EXPORTS Seq-feat, Feat-id, Genetic-code;

IMPORTS Gene-ref FROM NCBI-Gene
        Prot-ref FROM NCBI-Protein
        Org-ref FROM NCBI-Organism
        BioSource FROM NCBI-BioSource
        RNA-ref FROM NCBI-RNA
        Seq-loc, Giimport-id FROM NCBI-Seqloc
        Pubdesc, Numbering, Heterogen FROM NCBI-Sequence
        Rsite-ref FROM NCBI-Rsite
        Txinit FROM NCBI-TxInit
        Pub-set FROM NCBI-Pub
        Object-id, Dbtag, User-object FROM NCBI-General;

--*** Feature identifiers ********************************
--*

Feat-id ::= CHOICE {
    gibb INTEGER ,            -- geninfo backbone
    giim Giimport-id ,        -- geninfo import
    local Object-id ,         -- for local software use
    general Dbtag }           -- for use by various databases

--*** Seq-feat *******************************************
--*  sequence feature generalization

Seq-feat ::= SEQUENCE {
    id Feat-id OPTIONAL ,
    data SeqFeatData ,           -- the specific data
    partial BOOLEAN OPTIONAL ,    -- incomplete in some way?
    except BOOLEAN OPTIONAL ,     -- something funny about this?
    comment VisibleString OPTIONAL ,
    product Seq-loc OPTIONAL ,    -- product of process
    location Seq-loc ,            -- feature made from
    qual SEQUENCE OF Gb-qual OPTIONAL ,  -- qualifiers
    title VisibleString OPTIONAL ,   -- for user defined label
    ext User-object OPTIONAL ,    -- user defined structure extension
    cit Pub-set OPTIONAL ,        -- citations for this feature
    exp-ev ENUMERATED {           -- evidence for existence of feature
        experimental (1) ,        -- any reasonable experimental check
        not-experimental (2) } OPTIONAL , -- similarity, pattern, etc
    xref SET OF SeqFeatXref OPTIONAL ,   -- cite other relevant features
    dbxref SET OF Dbtag OPTIONAL ,  -- support for xref to other databases
    pseudo BOOLEAN OPTIONAL ,     -- annotated on pseudogene?
    except-text VisibleString OPTIONAL , -- explain if except=TRUE
    ids SET OF Feat-id OPTIONAL ,       -- set of Ids; will replace 'id' field
    exts SET OF User-object OPTIONAL }  -- set of extensions; will replace 'ext' field

SeqFeatData ::= CHOICE {
    gene Gene-ref ,
    org Org-ref ,
    cdregion Cdregion ,
    prot Prot-ref ,
    rna RNA-ref ,
    pub Pubdesc ,              -- publication applies to this seq 
    seq Seq-loc ,              -- to annotate origin from another seq
    imp Imp-feat ,
    region VisibleString,      -- named region (globin locus)
    comment NULL ,             -- just a comment
    bond ENUMERATED {
        disulfide (1) ,
        thiolester (2) ,
        xlink (3) ,
        thioether (4) ,
        other (255) } ,
    site ENUMERATED {
        active (1) ,
        binding (2) ,
        cleavage (3) ,
        inhibit (4) ,
        modified (5),
        glycosylation (6) ,
        myristoylation (7) ,
        mutagenized (8) ,
        metal-binding (9) ,
        phosphorylation (10) ,
        acetylation (11) ,
        amidation (12) ,
        methylation (13) ,
        hydroxylation (14) ,
        sulfatation (15) ,
        oxidative-deamination (16) ,
        pyrrolidone-carboxylic-acid (17) ,
        gamma-carboxyglutamic-acid (18) ,
        blocked (19) ,
        lipid-binding (20) ,
        np-binding (21) ,
        dna-binding (22) ,
        signal-peptide (23) ,
        transit-peptide (24) ,
        transmembrane-region (25) ,
        nitrosylation (26) ,
        other (255) } ,
    rsite Rsite-ref ,       -- restriction site  (for maps really)
    user User-object ,      -- user defined structure
    txinit Txinit ,         -- transcription initiation
    num Numbering ,         -- a numbering system
    psec-str ENUMERATED {   -- protein secondary structure
        helix (1) ,         -- any helix
        sheet (2) ,         -- beta sheet
        turn  (3) } ,       -- beta or gamma turn
    non-std-residue VisibleString ,  -- non-standard residue here in seq
    het Heterogen ,         -- cofactor, prosthetic grp, etc, bound to seq
    biosrc BioSource }

SeqFeatXref ::= SEQUENCE {       -- both optional because can have one or both
    id Feat-id OPTIONAL ,        -- the feature copied
    data SeqFeatData OPTIONAL }  -- the specific data
    
--*** CdRegion ***********************************************
--*
--*  Instructions to translate from a nucleic acid to a peptide
--*    conflict means it's supposed to translate but doesn't
--*


Cdregion ::= SEQUENCE {
    orf BOOLEAN OPTIONAL ,             -- just an ORF ?
    frame ENUMERATED {
        not-set (0) ,                  -- not set, code uses one
        one (1) ,
        two (2) ,
        three (3) } DEFAULT not-set ,      -- reading frame
    conflict BOOLEAN OPTIONAL ,        -- conflict
    gaps INTEGER OPTIONAL ,            -- number of gaps on conflict/except
    mismatch INTEGER OPTIONAL ,        -- number of mismatches on above
    code Genetic-code OPTIONAL ,       -- genetic code used
    code-break SEQUENCE OF Code-break OPTIONAL ,   -- individual exceptions
    stops INTEGER OPTIONAL }           -- number of stop codons on above

                    -- each code is 64 cells long, in the order where
                    -- T=0,C=1,A=2,G=3, TTT=0, TTC=1, TCA=4, etc
                    -- NOTE: this order does NOT correspond to a Seq-data
                    -- encoding.  It is "natural" to codon usage instead.
                    -- the value in each cell is the AA coded for
                    -- start= AA coded only if first in peptide
                    --   in start array, if codon is not a legitimate start
                    --   codon, that cell will have the "gap" symbol for
                    --   that alphabet.  Otherwise it will have the AA
                    --   encoded when that codon is used at the start.

Genetic-code ::= SET OF CHOICE {
    name VisibleString ,               -- name of a code
    id INTEGER ,                       -- id in dbase
    ncbieaa VisibleString ,            -- indexed to IUPAC extended
    ncbi8aa OCTET STRING ,             -- indexed to NCBI8aa
    ncbistdaa OCTET STRING ,           -- indexed to NCBIstdaa
    sncbieaa VisibleString ,            -- start, indexed to IUPAC extended
    sncbi8aa OCTET STRING ,             -- start, indexed to NCBI8aa
    sncbistdaa OCTET STRING }           -- start, indexed to NCBIstdaa

Code-break ::= SEQUENCE {              -- specific codon exceptions
    loc Seq-loc ,                      -- location of exception
    aa CHOICE {                        -- the amino acid
        ncbieaa INTEGER ,              -- ASCII value of NCBIeaa code
        ncbi8aa INTEGER ,              -- NCBI8aa code
        ncbistdaa INTEGER } }           -- NCBIstdaa code

Genetic-code-table ::= SET OF Genetic-code     -- table of genetic codes

--*** Import ***********************************************
--*
--*  Features imported from other databases
--*

Imp-feat ::= SEQUENCE {
    key VisibleString ,
    loc VisibleString OPTIONAL ,         -- original location string
    descr VisibleString OPTIONAL }       -- text description

Gb-qual ::= SEQUENCE {
    qual VisibleString ,
    val VisibleString }

END 

--**********************************************************************
--
--  NCBI Restriction Sites
--  by James Ostell, 1990
--  version 0.8
--
--**********************************************************************

NCBI-Rsite DEFINITIONS ::=
BEGIN

EXPORTS Rsite-ref;

IMPORTS Dbtag FROM NCBI-General;

Rsite-ref ::= CHOICE {
    str VisibleString ,     -- may be unparsable
    db  Dbtag }             -- pointer to a restriction site database

END

--**********************************************************************
--
--  NCBI RNAs
--  by James Ostell, 1990
--  version 0.8
--
--**********************************************************************

NCBI-RNA DEFINITIONS ::=
BEGIN

EXPORTS RNA-ref, Trna-ext, RNA-gen, RNA-qual, RNA-qual-set;

IMPORTS Seq-loc FROM NCBI-Seqloc;

--*** rnas ***********************************************
--*
--*  various rnas
--*
                         -- minimal RNA sequence
RNA-ref ::= SEQUENCE {
    type ENUMERATED {            -- type of RNA feature
        unknown (0) ,
        premsg (1) ,
        mRNA (2) ,
        tRNA (3) ,
        rRNA (4) ,
        snRNA (5) ,              -- will become ncRNA, with RNA-gen.class = snRNA
        scRNA (6) ,              -- will become ncRNA, with RNA-gen.class = scRNA
        snoRNA (7) ,             -- will become ncRNA, with RNA-gen.class = snoRNA
        ncRNA (8) ,              -- non-coding RNA; subsumes snRNA, scRNA, snoRNA
        tmRNA (9) ,
        miscRNA (10) ,
        other (255) } ,
    pseudo BOOLEAN OPTIONAL ,
    ext CHOICE {
        name VisibleString ,        -- for naming "other" type
        tRNA Trna-ext ,             -- for tRNAs
        gen RNA-gen } OPTIONAL      -- generic fields for ncRNA, tmRNA, miscRNA
    }

Trna-ext ::= SEQUENCE {                 -- tRNA feature extensions
    aa CHOICE {                         -- aa this carries
        iupacaa INTEGER ,
        ncbieaa INTEGER ,
        ncbi8aa INTEGER ,
        ncbistdaa INTEGER } OPTIONAL ,
    codon SET OF INTEGER OPTIONAL ,     -- codon(s) as in Genetic-code
    anticodon Seq-loc OPTIONAL }        -- location of anticodon

RNA-gen ::= SEQUENCE {
    class VisibleString OPTIONAL ,      -- for ncRNAs, the class of non-coding RNA:
                                        -- examples: antisense_RNA, guide_RNA, snRNA
    product VisibleString OPTIONAL ,
    quals RNA-qual-set OPTIONAL         -- e.g., tag_peptide qualifier for tmRNAs
}

RNA-qual ::= SEQUENCE {                 -- Additional data values for RNA-gen,
    qual VisibleString ,                -- in a tag (qual), value (val) format
    val VisibleString }

RNA-qual-set ::= SEQUENCE OF RNA-qual

END

--**********************************************************************
--
--  NCBI Genes
--  by James Ostell, 1990
--  version 0.8
--
--**********************************************************************

NCBI-Gene DEFINITIONS ::=
BEGIN

EXPORTS Gene-ref, Gene-nomenclature;

IMPORTS Dbtag FROM NCBI-General;

--*** Gene ***********************************************
--*
--*  reference to a gene
--*

Gene-ref ::= SEQUENCE {
    locus VisibleString OPTIONAL ,        -- Official gene symbol
    allele VisibleString OPTIONAL ,       -- Official allele designation
    desc VisibleString OPTIONAL ,         -- descriptive name
    maploc VisibleString OPTIONAL ,       -- descriptive map location
    pseudo BOOLEAN DEFAULT FALSE ,        -- pseudogene
    db SET OF Dbtag OPTIONAL ,            -- ids in other dbases
    syn SET OF VisibleString OPTIONAL ,   -- synonyms for locus
    locus-tag VisibleString OPTIONAL ,    -- systematic gene name (e.g., MI0001, ORF0069)
    formal-name Gene-nomenclature OPTIONAL
}

Gene-nomenclature ::= SEQUENCE {
    status ENUMERATED {
        unknown (0) ,
        official (1) ,
        interim (2) 
    } ,
    symbol VisibleString OPTIONAL ,
    name VisibleString OPTIONAL ,
    source Dbtag OPTIONAL
}

END


--**********************************************************************
--
--  NCBI Organism
--  by James Ostell, 1994
--  version 3.0
--
--**********************************************************************

NCBI-Organism DEFINITIONS ::=
BEGIN

EXPORTS Org-ref;

IMPORTS Dbtag FROM NCBI-General;

--*** Org-ref ***********************************************
--*
--*  Reference to an organism
--*     defines only the organism.. lower levels of detail for biological
--*     molecules are provided by the Source object
--*

Org-ref ::= SEQUENCE {
    taxname VisibleString OPTIONAL ,   -- preferred formal name
    common VisibleString OPTIONAL ,    -- common name
    mod SET OF VisibleString OPTIONAL , -- unstructured modifiers
    db SET OF Dbtag OPTIONAL ,         -- ids in taxonomic or culture dbases
    syn SET OF VisibleString OPTIONAL ,  -- synonyms for taxname or common
    orgname OrgName OPTIONAL }
    

OrgName ::= SEQUENCE {
    name CHOICE {
        binomial BinomialOrgName ,         -- genus/species type name
        virus VisibleString ,              -- virus names are different
        hybrid MultiOrgName ,              -- hybrid between organisms
        namedhybrid BinomialOrgName ,      -- some hybrids have genus x species name
        partial PartialOrgName } OPTIONAL , -- when genus not known
    attrib VisibleString OPTIONAL ,        -- attribution of name
    mod SEQUENCE OF OrgMod OPTIONAL ,
    lineage VisibleString OPTIONAL ,       -- lineage with semicolon separators
    gcode INTEGER OPTIONAL ,               -- genetic code (see CdRegion)
    mgcode INTEGER OPTIONAL ,              -- mitochondrial genetic code
    div VisibleString OPTIONAL }           -- GenBank division code
    

OrgMod ::= SEQUENCE {
    subtype INTEGER {
        strain (2) ,
        substrain (3) ,
        type (4) ,
        subtype (5) ,
        variety (6) ,
        serotype (7) ,
        serogroup (8) ,
        serovar (9) ,
        cultivar (10) ,
        pathovar (11) ,
        chemovar (12) ,
        biovar (13) ,
        biotype (14) ,
        group (15) ,
        subgroup (16) ,
        isolate (17) ,
        common (18) ,
        acronym (19) ,
        dosage (20) ,          -- chromosome dosage of hybrid
        nat-host (21) ,        -- natural host of this specimen
        sub-species (22) ,
        specimen-voucher (23) ,
        authority (24) ,
        forma (25) ,
        forma-specialis (26) ,
        ecotype (27) ,
        synonym (28) ,
        anamorph (29) ,
        teleomorph (30) ,
        breed (31) ,
        gb-acronym (32) ,       -- used by taxonomy database
        gb-anamorph (33) ,      -- used by taxonomy database
        gb-synonym (34) ,       -- used by taxonomy database
        culture-collection (35) ,
        bio-material (36) ,
        metagenome-source (37) ,
        old-lineage (253) ,
        old-name (254) ,
        other (255) } ,         -- ASN5: old-name (254) will be added to next spec
    subname VisibleString ,
    attrib VisibleString OPTIONAL }  -- attribution/source of name

BinomialOrgName ::= SEQUENCE {
    genus VisibleString ,               -- required
    species VisibleString OPTIONAL ,    -- species required if subspecies used
    subspecies VisibleString OPTIONAL }

MultiOrgName ::= SEQUENCE OF OrgName   -- the first will be used to assign division

PartialOrgName ::= SEQUENCE OF TaxElement  -- when we don't know the genus

TaxElement ::= SEQUENCE {
    fixed-level INTEGER {
       other (0) ,                     -- level must be set in string
       family (1) ,
       order (2) ,
       class (3) } ,
    level VisibleString OPTIONAL ,
    name VisibleString }

END


--**********************************************************************
--
--  NCBI BioSource
--  by James Ostell, 1994
--  version 3.0
--
--**********************************************************************

NCBI-BioSource DEFINITIONS ::=
BEGIN

EXPORTS BioSource;

IMPORTS Org-ref FROM NCBI-Organism;

--********************************************************************
--
-- BioSource gives the source of the biological material
--   for sequences
--
--********************************************************************

BioSource ::= SEQUENCE {
    genome INTEGER {         -- biological context
        unknown (0) ,
        genomic (1) ,
        chloroplast (2) ,
        chromoplast (3) ,
        kinetoplast (4) ,
        mitochondrion (5) ,
        plastid (6) ,
        macronuclear (7) ,
        extrachrom (8) ,
        plasmid (9) ,
        transposon (10) ,
        insertion-seq (11) ,
        cyanelle (12) ,
        proviral (13) ,
        virion (14) ,
        nucleomorph (15) ,
        apicoplast (16) ,
        leucoplast (17) ,
        proplastid (18) ,
        endogenous-virus (19) ,
        hydrogenosome (20) ,
        chromosome (21) ,
        chromatophore (22)
      } DEFAULT unknown ,
    origin INTEGER {
      unknown (0) ,
      natural (1) ,                    -- normal biological entity
      natmut (2) ,                     -- naturally occurring mutant
      mut (3) ,                        -- artificially mutagenized
      artificial (4) ,                 -- artificially engineered
      synthetic (5) ,                  -- purely synthetic
      other (255)
    } DEFAULT unknown , 
    org Org-ref ,
    subtype SEQUENCE OF SubSource OPTIONAL ,
    is-focus NULL OPTIONAL ,           -- to distinguish biological focus
    pcr-primers PCRReactionSet OPTIONAL }

PCRReactionSet ::= SET OF PCRReaction

PCRReaction ::= SEQUENCE {
    forward PCRPrimerSet OPTIONAL ,
    reverse PCRPrimerSet OPTIONAL }

PCRPrimerSet ::= SET OF PCRPrimer

PCRPrimer ::= SEQUENCE {
    seq PCRPrimerSeq OPTIONAL ,
    name PCRPrimerName OPTIONAL }

PCRPrimerSeq ::= VisibleString

PCRPrimerName ::= VisibleString

SubSource ::= SEQUENCE {
    subtype INTEGER {
        chromosome (1) ,
        map (2) ,
        clone (3) ,
        subclone (4) ,
        haplotype (5) ,
        genotype (6) ,
        sex (7) ,
        cell-line (8) ,
        cell-type (9) ,
        tissue-type (10) ,
        clone-lib (11) ,
        dev-stage (12) ,
        frequency (13) ,
        germline (14) ,
        rearranged (15) ,
        lab-host (16) ,
        pop-variant (17) ,
        tissue-lib (18) ,
        plasmid-name (19) ,
        transposon-name (20) ,
        insertion-seq-name (21) ,
        plastid-name (22) ,
        country (23) ,
        segment (24) ,
        endogenous-virus-name (25) ,
        transgenic (26) ,
        environmental-sample (27) ,
        isolation-source (28) ,
        lat-lon (29) ,          -- +/- decimal degrees
        collection-date (30) ,  -- DD-MMM-YYYY format
        collected-by (31) ,     -- name of person who collected the sample
        identified-by (32) ,    -- name of person who identified the sample
        fwd-primer-seq (33) ,   -- sequence (possibly more than one; semicolon-separated)
        rev-primer-seq (34) ,   -- sequence (possibly more than one; semicolon-separated)
        fwd-primer-name (35) ,
        rev-primer-name (36) ,
        metagenomic (37) ,
        mating-type (38) ,
        linkage-group (39) ,
        haplogroup (40) ,
        other (255) } ,
    name VisibleString ,
    attrib VisibleString OPTIONAL }    -- attribution/source of this name
        
END

--**********************************************************************
--
--  NCBI Protein
--  by James Ostell, 1990
--  version 0.8
--
--**********************************************************************

NCBI-Protein DEFINITIONS ::=
BEGIN

EXPORTS Prot-ref;

IMPORTS Dbtag FROM NCBI-General;

--*** Prot-ref ***********************************************
--*
--*  Reference to a protein name
--*

Prot-ref ::= SEQUENCE {
    name SET OF VisibleString OPTIONAL ,      -- protein name
    desc VisibleString OPTIONAL ,      -- description (instead of name)
    ec SET OF VisibleString OPTIONAL , -- E.C. number(s)
    activity SET OF VisibleString OPTIONAL ,  -- activities
    db SET OF Dbtag OPTIONAL ,         -- ids in other dbases
    processed ENUMERATED {             -- processing status
       not-set (0) ,
       preprotein (1) ,
       mature (2) ,
       signal-peptide (3) ,
       transit-peptide (4) } DEFAULT not-set }


END 
--********************************************************************
--
--  Transcription Initiation Site Feature Data Block
--  James Ostell, 1991
--  Philip Bucher, David Ghosh
--  version 1.1
--
--  
--
--********************************************************************

NCBI-TxInit DEFINITIONS ::=
BEGIN

EXPORTS Txinit;

IMPORTS Gene-ref FROM NCBI-Gene
        Prot-ref FROM NCBI-Protein
        Org-ref FROM NCBI-Organism;

Txinit ::= SEQUENCE {
    name VisibleString ,    -- descriptive name of initiation site
    syn SEQUENCE OF VisibleString OPTIONAL ,   -- synonyms
    gene SEQUENCE OF Gene-ref OPTIONAL ,  -- gene(s) transcribed
    protein SEQUENCE OF Prot-ref OPTIONAL ,   -- protein(s) produced
    rna SEQUENCE OF VisibleString OPTIONAL ,  -- rna(s) produced
    expression VisibleString OPTIONAL ,  -- tissue/time of expression
    txsystem ENUMERATED {       -- transcription apparatus used at this site
        unknown (0) ,
        pol1 (1) ,      -- eukaryotic Pol I
        pol2 (2) ,      -- eukaryotic Pol II
        pol3 (3) ,      -- eukaryotic Pol III
        bacterial (4) ,
        viral (5) ,
        rna (6) ,       -- RNA replicase
        organelle (7) ,
        other (255) } ,
    txdescr VisibleString OPTIONAL ,   -- modifiers on txsystem
    txorg Org-ref OPTIONAL ,  -- organism supplying transcription apparatus
    mapping-precise BOOLEAN DEFAULT FALSE ,  -- mapping precise or approx
    location-accurate BOOLEAN DEFAULT FALSE , -- does Seq-loc reflect mapping
    inittype ENUMERATED {
        unknown (0) ,
        single (1) ,
        multiple (2) ,
        region (3) } OPTIONAL ,
    evidence SET OF Tx-evidence OPTIONAL }

Tx-evidence ::= SEQUENCE {
    exp-code ENUMERATED {
        unknown (0) ,    
        rna-seq (1) ,   -- direct RNA sequencing
        rna-size (2) ,  -- RNA length measurement
        np-map (3) ,    -- nuclease protection mapping with homologous sequence ladder
        np-size (4) ,   -- nuclease protected fragment length measurement
        pe-seq (5) ,    -- dideoxy RNA sequencing 
        cDNA-seq (6) ,  -- full-length cDNA sequencing
        pe-map (7) ,    -- primer extension mapping with homologous sequence ladder    
        pe-size (8) ,   -- primer extension product length measurement
        pseudo-seq (9) , -- full-length processed pseudogene sequencing
        rev-pe-map (10) ,   -- see NOTE (1) below
        other (255) } ,
    expression-system ENUMERATED {
        unknown (0) ,
        physiological (1) ,
        in-vitro (2) ,
        oocyte (3) ,
        transfection (4) ,
        transgenic (5) ,
        other (255) } DEFAULT physiological ,
    low-prec-data BOOLEAN DEFAULT FALSE ,
    from-homolog BOOLEAN DEFAULT FALSE }     -- experiment actually done on
                                             --  close homolog

    -- NOTE (1) length measurement of a reverse direction primer-extension
    --          product (blocked  by  RNA  5'end)  by  comparison with
    --          homologous sequence ladder (J. Mol. Biol. 199, 587)

    
END