--$Revision: 142746 $ --********************************************************************** -- -- NCBI Sequence Feature elements -- by James Ostell, 1990 -- Version 3.0 - June 1994 -- --********************************************************************** NCBI-Seqfeat DEFINITIONS ::= BEGIN EXPORTS Seq-feat, Feat-id, Genetic-code; IMPORTS Gene-ref FROM NCBI-Gene Prot-ref FROM NCBI-Protein Org-ref FROM NCBI-Organism BioSource FROM NCBI-BioSource RNA-ref FROM NCBI-RNA Seq-loc, Giimport-id FROM NCBI-Seqloc Pubdesc, Numbering, Heterogen FROM NCBI-Sequence Rsite-ref FROM NCBI-Rsite Txinit FROM NCBI-TxInit Pub-set FROM NCBI-Pub Object-id, Dbtag, User-object FROM NCBI-General; --*** Feature identifiers ******************************** --* Feat-id ::= CHOICE { gibb INTEGER , -- geninfo backbone giim Giimport-id , -- geninfo import local Object-id , -- for local software use general Dbtag } -- for use by various databases --*** Seq-feat ******************************************* --* sequence feature generalization Seq-feat ::= SEQUENCE { id Feat-id OPTIONAL , data SeqFeatData , -- the specific data partial BOOLEAN OPTIONAL , -- incomplete in some way? except BOOLEAN OPTIONAL , -- something funny about this? comment VisibleString OPTIONAL , product Seq-loc OPTIONAL , -- product of process location Seq-loc , -- feature made from qual SEQUENCE OF Gb-qual OPTIONAL , -- qualifiers title VisibleString OPTIONAL , -- for user defined label ext User-object OPTIONAL , -- user defined structure extension cit Pub-set OPTIONAL , -- citations for this feature exp-ev ENUMERATED { -- evidence for existence of feature experimental (1) , -- any reasonable experimental check not-experimental (2) } OPTIONAL , -- similarity, pattern, etc xref SET OF SeqFeatXref OPTIONAL , -- cite other relevant features dbxref SET OF Dbtag OPTIONAL , -- support for xref to other databases pseudo BOOLEAN OPTIONAL , -- annotated on pseudogene? except-text VisibleString OPTIONAL , -- explain if except=TRUE ids SET OF Feat-id OPTIONAL , -- set of Ids; will replace 'id' field exts SET OF User-object OPTIONAL } -- set of extensions; will replace 'ext' field SeqFeatData ::= CHOICE { gene Gene-ref , org Org-ref , cdregion Cdregion , prot Prot-ref , rna RNA-ref , pub Pubdesc , -- publication applies to this seq seq Seq-loc , -- to annotate origin from another seq imp Imp-feat , region VisibleString, -- named region (globin locus) comment NULL , -- just a comment bond ENUMERATED { disulfide (1) , thiolester (2) , xlink (3) , thioether (4) , other (255) } , site ENUMERATED { active (1) , binding (2) , cleavage (3) , inhibit (4) , modified (5), glycosylation (6) , myristoylation (7) , mutagenized (8) , metal-binding (9) , phosphorylation (10) , acetylation (11) , amidation (12) , methylation (13) , hydroxylation (14) , sulfatation (15) , oxidative-deamination (16) , pyrrolidone-carboxylic-acid (17) , gamma-carboxyglutamic-acid (18) , blocked (19) , lipid-binding (20) , np-binding (21) , dna-binding (22) , signal-peptide (23) , transit-peptide (24) , transmembrane-region (25) , nitrosylation (26) , other (255) } , rsite Rsite-ref , -- restriction site (for maps really) user User-object , -- user defined structure txinit Txinit , -- transcription initiation num Numbering , -- a numbering system psec-str ENUMERATED { -- protein secondary structure helix (1) , -- any helix sheet (2) , -- beta sheet turn (3) } , -- beta or gamma turn non-std-residue VisibleString , -- non-standard residue here in seq het Heterogen , -- cofactor, prosthetic grp, etc, bound to seq biosrc BioSource } SeqFeatXref ::= SEQUENCE { -- both optional because can have one or both id Feat-id OPTIONAL , -- the feature copied data SeqFeatData OPTIONAL } -- the specific data --*** CdRegion *********************************************** --* --* Instructions to translate from a nucleic acid to a peptide --* conflict means it's supposed to translate but doesn't --* Cdregion ::= SEQUENCE { orf BOOLEAN OPTIONAL , -- just an ORF ? frame ENUMERATED { not-set (0) , -- not set, code uses one one (1) , two (2) , three (3) } DEFAULT not-set , -- reading frame conflict BOOLEAN OPTIONAL , -- conflict gaps INTEGER OPTIONAL , -- number of gaps on conflict/except mismatch INTEGER OPTIONAL , -- number of mismatches on above code Genetic-code OPTIONAL , -- genetic code used code-break SEQUENCE OF Code-break OPTIONAL , -- individual exceptions stops INTEGER OPTIONAL } -- number of stop codons on above -- each code is 64 cells long, in the order where -- T=0,C=1,A=2,G=3, TTT=0, TTC=1, TCA=4, etc -- NOTE: this order does NOT correspond to a Seq-data -- encoding. It is "natural" to codon usage instead. -- the value in each cell is the AA coded for -- start= AA coded only if first in peptide -- in start array, if codon is not a legitimate start -- codon, that cell will have the "gap" symbol for -- that alphabet. Otherwise it will have the AA -- encoded when that codon is used at the start. Genetic-code ::= SET OF CHOICE { name VisibleString , -- name of a code id INTEGER , -- id in dbase ncbieaa VisibleString , -- indexed to IUPAC extended ncbi8aa OCTET STRING , -- indexed to NCBI8aa ncbistdaa OCTET STRING , -- indexed to NCBIstdaa sncbieaa VisibleString , -- start, indexed to IUPAC extended sncbi8aa OCTET STRING , -- start, indexed to NCBI8aa sncbistdaa OCTET STRING } -- start, indexed to NCBIstdaa Code-break ::= SEQUENCE { -- specific codon exceptions loc Seq-loc , -- location of exception aa CHOICE { -- the amino acid ncbieaa INTEGER , -- ASCII value of NCBIeaa code ncbi8aa INTEGER , -- NCBI8aa code ncbistdaa INTEGER } } -- NCBIstdaa code Genetic-code-table ::= SET OF Genetic-code -- table of genetic codes --*** Import *********************************************** --* --* Features imported from other databases --* Imp-feat ::= SEQUENCE { key VisibleString , loc VisibleString OPTIONAL , -- original location string descr VisibleString OPTIONAL } -- text description Gb-qual ::= SEQUENCE { qual VisibleString , val VisibleString } END --********************************************************************** -- -- NCBI Restriction Sites -- by James Ostell, 1990 -- version 0.8 -- --********************************************************************** NCBI-Rsite DEFINITIONS ::= BEGIN EXPORTS Rsite-ref; IMPORTS Dbtag FROM NCBI-General; Rsite-ref ::= CHOICE { str VisibleString , -- may be unparsable db Dbtag } -- pointer to a restriction site database END --********************************************************************** -- -- NCBI RNAs -- by James Ostell, 1990 -- version 0.8 -- --********************************************************************** NCBI-RNA DEFINITIONS ::= BEGIN EXPORTS RNA-ref, Trna-ext, RNA-gen, RNA-qual, RNA-qual-set; IMPORTS Seq-loc FROM NCBI-Seqloc; --*** rnas *********************************************** --* --* various rnas --* -- minimal RNA sequence RNA-ref ::= SEQUENCE { type ENUMERATED { -- type of RNA feature unknown (0) , premsg (1) , mRNA (2) , tRNA (3) , rRNA (4) , snRNA (5) , -- will become ncRNA, with RNA-gen.class = snRNA scRNA (6) , -- will become ncRNA, with RNA-gen.class = scRNA snoRNA (7) , -- will become ncRNA, with RNA-gen.class = snoRNA ncRNA (8) , -- non-coding RNA; subsumes snRNA, scRNA, snoRNA tmRNA (9) , miscRNA (10) , other (255) } , pseudo BOOLEAN OPTIONAL , ext CHOICE { name VisibleString , -- for naming "other" type tRNA Trna-ext , -- for tRNAs gen RNA-gen } OPTIONAL -- generic fields for ncRNA, tmRNA, miscRNA } Trna-ext ::= SEQUENCE { -- tRNA feature extensions aa CHOICE { -- aa this carries iupacaa INTEGER , ncbieaa INTEGER , ncbi8aa INTEGER , ncbistdaa INTEGER } OPTIONAL , codon SET OF INTEGER OPTIONAL , -- codon(s) as in Genetic-code anticodon Seq-loc OPTIONAL } -- location of anticodon RNA-gen ::= SEQUENCE { class VisibleString OPTIONAL , -- for ncRNAs, the class of non-coding RNA: -- examples: antisense_RNA, guide_RNA, snRNA product VisibleString OPTIONAL , quals RNA-qual-set OPTIONAL -- e.g., tag_peptide qualifier for tmRNAs } RNA-qual ::= SEQUENCE { -- Additional data values for RNA-gen, qual VisibleString , -- in a tag (qual), value (val) format val VisibleString } RNA-qual-set ::= SEQUENCE OF RNA-qual END --********************************************************************** -- -- NCBI Genes -- by James Ostell, 1990 -- version 0.8 -- --********************************************************************** NCBI-Gene DEFINITIONS ::= BEGIN EXPORTS Gene-ref, Gene-nomenclature; IMPORTS Dbtag FROM NCBI-General; --*** Gene *********************************************** --* --* reference to a gene --* Gene-ref ::= SEQUENCE { locus VisibleString OPTIONAL , -- Official gene symbol allele VisibleString OPTIONAL , -- Official allele designation desc VisibleString OPTIONAL , -- descriptive name maploc VisibleString OPTIONAL , -- descriptive map location pseudo BOOLEAN DEFAULT FALSE , -- pseudogene db SET OF Dbtag OPTIONAL , -- ids in other dbases syn SET OF VisibleString OPTIONAL , -- synonyms for locus locus-tag VisibleString OPTIONAL , -- systematic gene name (e.g., MI0001, ORF0069) formal-name Gene-nomenclature OPTIONAL } Gene-nomenclature ::= SEQUENCE { status ENUMERATED { unknown (0) , official (1) , interim (2) } , symbol VisibleString OPTIONAL , name VisibleString OPTIONAL , source Dbtag OPTIONAL } END --********************************************************************** -- -- NCBI Organism -- by James Ostell, 1994 -- version 3.0 -- --********************************************************************** NCBI-Organism DEFINITIONS ::= BEGIN EXPORTS Org-ref; IMPORTS Dbtag FROM NCBI-General; --*** Org-ref *********************************************** --* --* Reference to an organism --* defines only the organism.. lower levels of detail for biological --* molecules are provided by the Source object --* Org-ref ::= SEQUENCE { taxname VisibleString OPTIONAL , -- preferred formal name common VisibleString OPTIONAL , -- common name mod SET OF VisibleString OPTIONAL , -- unstructured modifiers db SET OF Dbtag OPTIONAL , -- ids in taxonomic or culture dbases syn SET OF VisibleString OPTIONAL , -- synonyms for taxname or common orgname OrgName OPTIONAL } OrgName ::= SEQUENCE { name CHOICE { binomial BinomialOrgName , -- genus/species type name virus VisibleString , -- virus names are different hybrid MultiOrgName , -- hybrid between organisms namedhybrid BinomialOrgName , -- some hybrids have genus x species name partial PartialOrgName } OPTIONAL , -- when genus not known attrib VisibleString OPTIONAL , -- attribution of name mod SEQUENCE OF OrgMod OPTIONAL , lineage VisibleString OPTIONAL , -- lineage with semicolon separators gcode INTEGER OPTIONAL , -- genetic code (see CdRegion) mgcode INTEGER OPTIONAL , -- mitochondrial genetic code div VisibleString OPTIONAL } -- GenBank division code OrgMod ::= SEQUENCE { subtype INTEGER { strain (2) , substrain (3) , type (4) , subtype (5) , variety (6) , serotype (7) , serogroup (8) , serovar (9) , cultivar (10) , pathovar (11) , chemovar (12) , biovar (13) , biotype (14) , group (15) , subgroup (16) , isolate (17) , common (18) , acronym (19) , dosage (20) , -- chromosome dosage of hybrid nat-host (21) , -- natural host of this specimen sub-species (22) , specimen-voucher (23) , authority (24) , forma (25) , forma-specialis (26) , ecotype (27) , synonym (28) , anamorph (29) , teleomorph (30) , breed (31) , gb-acronym (32) , -- used by taxonomy database gb-anamorph (33) , -- used by taxonomy database gb-synonym (34) , -- used by taxonomy database culture-collection (35) , bio-material (36) , metagenome-source (37) , old-lineage (253) , old-name (254) , other (255) } , -- ASN5: old-name (254) will be added to next spec subname VisibleString , attrib VisibleString OPTIONAL } -- attribution/source of name BinomialOrgName ::= SEQUENCE { genus VisibleString , -- required species VisibleString OPTIONAL , -- species required if subspecies used subspecies VisibleString OPTIONAL } MultiOrgName ::= SEQUENCE OF OrgName -- the first will be used to assign division PartialOrgName ::= SEQUENCE OF TaxElement -- when we don't know the genus TaxElement ::= SEQUENCE { fixed-level INTEGER { other (0) , -- level must be set in string family (1) , order (2) , class (3) } , level VisibleString OPTIONAL , name VisibleString } END --********************************************************************** -- -- NCBI BioSource -- by James Ostell, 1994 -- version 3.0 -- --********************************************************************** NCBI-BioSource DEFINITIONS ::= BEGIN EXPORTS BioSource; IMPORTS Org-ref FROM NCBI-Organism; --******************************************************************** -- -- BioSource gives the source of the biological material -- for sequences -- --******************************************************************** BioSource ::= SEQUENCE { genome INTEGER { -- biological context unknown (0) , genomic (1) , chloroplast (2) , chromoplast (3) , kinetoplast (4) , mitochondrion (5) , plastid (6) , macronuclear (7) , extrachrom (8) , plasmid (9) , transposon (10) , insertion-seq (11) , cyanelle (12) , proviral (13) , virion (14) , nucleomorph (15) , apicoplast (16) , leucoplast (17) , proplastid (18) , endogenous-virus (19) , hydrogenosome (20) , chromosome (21) , chromatophore (22) } DEFAULT unknown , origin INTEGER { unknown (0) , natural (1) , -- normal biological entity natmut (2) , -- naturally occurring mutant mut (3) , -- artificially mutagenized artificial (4) , -- artificially engineered synthetic (5) , -- purely synthetic other (255) } DEFAULT unknown , org Org-ref , subtype SEQUENCE OF SubSource OPTIONAL , is-focus NULL OPTIONAL , -- to distinguish biological focus pcr-primers PCRReactionSet OPTIONAL } PCRReactionSet ::= SET OF PCRReaction PCRReaction ::= SEQUENCE { forward PCRPrimerSet OPTIONAL , reverse PCRPrimerSet OPTIONAL } PCRPrimerSet ::= SET OF PCRPrimer PCRPrimer ::= SEQUENCE { seq PCRPrimerSeq OPTIONAL , name PCRPrimerName OPTIONAL } PCRPrimerSeq ::= VisibleString PCRPrimerName ::= VisibleString SubSource ::= SEQUENCE { subtype INTEGER { chromosome (1) , map (2) , clone (3) , subclone (4) , haplotype (5) , genotype (6) , sex (7) , cell-line (8) , cell-type (9) , tissue-type (10) , clone-lib (11) , dev-stage (12) , frequency (13) , germline (14) , rearranged (15) , lab-host (16) , pop-variant (17) , tissue-lib (18) , plasmid-name (19) , transposon-name (20) , insertion-seq-name (21) , plastid-name (22) , country (23) , segment (24) , endogenous-virus-name (25) , transgenic (26) , environmental-sample (27) , isolation-source (28) , lat-lon (29) , -- +/- decimal degrees collection-date (30) , -- DD-MMM-YYYY format collected-by (31) , -- name of person who collected the sample identified-by (32) , -- name of person who identified the sample fwd-primer-seq (33) , -- sequence (possibly more than one; semicolon-separated) rev-primer-seq (34) , -- sequence (possibly more than one; semicolon-separated) fwd-primer-name (35) , rev-primer-name (36) , metagenomic (37) , mating-type (38) , linkage-group (39) , haplogroup (40) , other (255) } , name VisibleString , attrib VisibleString OPTIONAL } -- attribution/source of this name END --********************************************************************** -- -- NCBI Protein -- by James Ostell, 1990 -- version 0.8 -- --********************************************************************** NCBI-Protein DEFINITIONS ::= BEGIN EXPORTS Prot-ref; IMPORTS Dbtag FROM NCBI-General; --*** Prot-ref *********************************************** --* --* Reference to a protein name --* Prot-ref ::= SEQUENCE { name SET OF VisibleString OPTIONAL , -- protein name desc VisibleString OPTIONAL , -- description (instead of name) ec SET OF VisibleString OPTIONAL , -- E.C. number(s) activity SET OF VisibleString OPTIONAL , -- activities db SET OF Dbtag OPTIONAL , -- ids in other dbases processed ENUMERATED { -- processing status not-set (0) , preprotein (1) , mature (2) , signal-peptide (3) , transit-peptide (4) } DEFAULT not-set } END --******************************************************************** -- -- Transcription Initiation Site Feature Data Block -- James Ostell, 1991 -- Philip Bucher, David Ghosh -- version 1.1 -- -- -- --******************************************************************** NCBI-TxInit DEFINITIONS ::= BEGIN EXPORTS Txinit; IMPORTS Gene-ref FROM NCBI-Gene Prot-ref FROM NCBI-Protein Org-ref FROM NCBI-Organism; Txinit ::= SEQUENCE { name VisibleString , -- descriptive name of initiation site syn SEQUENCE OF VisibleString OPTIONAL , -- synonyms gene SEQUENCE OF Gene-ref OPTIONAL , -- gene(s) transcribed protein SEQUENCE OF Prot-ref OPTIONAL , -- protein(s) produced rna SEQUENCE OF VisibleString OPTIONAL , -- rna(s) produced expression VisibleString OPTIONAL , -- tissue/time of expression txsystem ENUMERATED { -- transcription apparatus used at this site unknown (0) , pol1 (1) , -- eukaryotic Pol I pol2 (2) , -- eukaryotic Pol II pol3 (3) , -- eukaryotic Pol III bacterial (4) , viral (5) , rna (6) , -- RNA replicase organelle (7) , other (255) } , txdescr VisibleString OPTIONAL , -- modifiers on txsystem txorg Org-ref OPTIONAL , -- organism supplying transcription apparatus mapping-precise BOOLEAN DEFAULT FALSE , -- mapping precise or approx location-accurate BOOLEAN DEFAULT FALSE , -- does Seq-loc reflect mapping inittype ENUMERATED { unknown (0) , single (1) , multiple (2) , region (3) } OPTIONAL , evidence SET OF Tx-evidence OPTIONAL } Tx-evidence ::= SEQUENCE { exp-code ENUMERATED { unknown (0) , rna-seq (1) , -- direct RNA sequencing rna-size (2) , -- RNA length measurement np-map (3) , -- nuclease protection mapping with homologous sequence ladder np-size (4) , -- nuclease protected fragment length measurement pe-seq (5) , -- dideoxy RNA sequencing cDNA-seq (6) , -- full-length cDNA sequencing pe-map (7) , -- primer extension mapping with homologous sequence ladder pe-size (8) , -- primer extension product length measurement pseudo-seq (9) , -- full-length processed pseudogene sequencing rev-pe-map (10) , -- see NOTE (1) below other (255) } , expression-system ENUMERATED { unknown (0) , physiological (1) , in-vitro (2) , oocyte (3) , transfection (4) , transgenic (5) , other (255) } DEFAULT physiological , low-prec-data BOOLEAN DEFAULT FALSE , from-homolog BOOLEAN DEFAULT FALSE } -- experiment actually done on -- close homolog -- NOTE (1) length measurement of a reverse direction primer-extension -- product (blocked by RNA 5'end) by comparison with -- homologous sequence ladder (J. Mol. Biol. 199, 587) END