-- access.asn --$Revision: 6.0 $ --********************************************************************* -- -- access.asn -- -- messages for data access -- --********************************************************************* NCBI-Access DEFINITIONS ::= BEGIN EXPORTS Link-set; -- links between same class = neighbors -- links between other classes = links Link-set ::= SEQUENCE { num INTEGER , -- number of links to this doc type uids SEQUENCE OF INTEGER OPTIONAL , -- the links weights SEQUENCE OF INTEGER OPTIONAL } -- the weights END -- biblio.asn --$Revision: 6.2 $ --**************************************************************** -- -- NCBI Bibliographic data elements -- by James Ostell, 1990 -- -- Taken from the American National Standard for -- Bibliographic References -- ANSI Z39.29-1977 -- Version 3.0 - June 1994 -- PubMedId added in 1996 -- ArticleIds and eprint elements added in 1999 -- --**************************************************************** NCBI-Biblio DEFINITIONS ::= BEGIN EXPORTS Cit-art, Cit-jour, Cit-book, Cit-pat, Cit-let, Id-pat, Cit-gen, Cit-proc, Cit-sub, Title, Author, PubMedId; IMPORTS Person-id, Date, Dbtag FROM NCBI-General; -- Article Ids ArticleId ::= CHOICE { -- can be many ids for an article pubmed PubMedId , -- see types below medline MedlineUID , doi DOI , pii PII , pmcid PmcID , pmcpid PmcPid , pmpid PmPid , other Dbtag } -- generic catch all PubMedId ::= INTEGER -- Id from the PubMed database at NCBI MedlineUID ::= INTEGER -- Id from MEDLINE DOI ::= VisibleString -- Document Object Identifier PII ::= VisibleString -- Controlled Publisher Identifier PmcID ::= INTEGER -- PubMed Central Id PmcPid ::= VisibleString -- Publisher Id supplied to PubMed Central PmPid ::= VisibleString -- Publisher Id supplied to PubMed ArticleIdSet ::= SET OF ArticleId -- Status Dates PubStatus ::= INTEGER { -- points of publication received (1) , -- date manuscript received for review accepted (2) , -- accepted for publication epublish (3) , -- published electronically by publisher ppublish (4) , -- published in print by publisher revised (5) , -- article revised by publisher/author pmc (6) , -- article first appeared in PubMed Central pmcr (7) , -- article revision in PubMed Central pubmed (8) , -- article citation first appeared in PubMed pubmedr (9) , -- article citation revision in PubMed aheadofprint (10), -- epublish, but will be followed by print premedline (11), -- date into PreMedline status medline (12), -- date made a MEDLINE record other (255) } PubStatusDate ::= SEQUENCE { -- done as a structure so fields can be added pubstatus PubStatus , date Date } -- time may be added later PubStatusDateSet ::= SET OF PubStatusDate -- Citation Types Cit-art ::= SEQUENCE { -- article in journal or book title Title OPTIONAL , -- title of paper (ANSI requires) authors Auth-list OPTIONAL , -- authors (ANSI requires) from CHOICE { -- journal or book journal Cit-jour , book Cit-book , proc Cit-proc } , ids ArticleIdSet OPTIONAL } -- lots of ids Cit-jour ::= SEQUENCE { -- Journal citation title Title , -- title of journal imp Imprint } Cit-book ::= SEQUENCE { -- Book citation title Title , -- Title of book coll Title OPTIONAL , -- part of a collection authors Auth-list, -- authors imp Imprint } Cit-proc ::= SEQUENCE { -- Meeting proceedings book Cit-book , -- citation to meeting meet Meeting } -- time and location of meeting -- Patent number and date-issue were made optional in 1997 to -- support patent applications being issued from the USPTO -- Semantically a Cit-pat must have either a patent number or -- an application number (or both) to be valid Cit-pat ::= SEQUENCE { -- patent citation title VisibleString , authors Auth-list, -- author/inventor country VisibleString , -- Patent Document Country doc-type VisibleString , -- Patent Document Type number VisibleString OPTIONAL, -- Patent Document Number date-issue Date OPTIONAL, -- Patent Issue/Pub Date class SEQUENCE OF VisibleString OPTIONAL , -- Patent Doc Class Code app-number VisibleString OPTIONAL , -- Patent Doc Appl Number app-date Date OPTIONAL , -- Patent Appl File Date applicants Auth-list OPTIONAL , -- Applicants assignees Auth-list OPTIONAL , -- Assignees priority SEQUENCE OF Patent-priority OPTIONAL , -- Priorities abstract VisibleString OPTIONAL } -- abstract of patent Patent-priority ::= SEQUENCE { country VisibleString , -- Patent country code number VisibleString , -- number assigned in that country date Date } -- date of application Id-pat ::= SEQUENCE { -- just to identify a patent country VisibleString , -- Patent Document Country id CHOICE { number VisibleString , -- Patent Document Number app-number VisibleString } , -- Patent Doc Appl Number doc-type VisibleString OPTIONAL } -- Patent Doc Type Cit-let ::= SEQUENCE { -- letter, thesis, or manuscript cit Cit-book , -- same fields as a book man-id VisibleString OPTIONAL , -- Manuscript identifier type ENUMERATED { manuscript (1) , letter (2) , thesis (3) } OPTIONAL } -- NOTE: this is just to cite a -- direct data submission, see NCBI-Submit -- for the form of a sequence submission Cit-sub ::= SEQUENCE { -- citation for a direct submission authors Auth-list , -- not necessarily authors of the paper imp Imprint OPTIONAL , -- this only used to get date.. will go medium ENUMERATED { -- medium of submission paper (1) , tape (2) , floppy (3) , email (4) , other (255) } OPTIONAL , date Date OPTIONAL , -- replaces imp, will become required descr VisibleString OPTIONAL } -- description of changes for public view Cit-gen ::= SEQUENCE { -- NOT from ANSI, this is a catchall cit VisibleString OPTIONAL , -- anything, not parsable authors Auth-list OPTIONAL , muid INTEGER OPTIONAL , -- medline uid journal Title OPTIONAL , volume VisibleString OPTIONAL , issue VisibleString OPTIONAL , pages VisibleString OPTIONAL , date Date OPTIONAL , serial-number INTEGER OPTIONAL , -- for GenBank style references title VisibleString OPTIONAL , -- eg. cit="unpublished",title="title" pmid PubMedId OPTIONAL } -- PubMed Id -- Authorship Group Auth-list ::= SEQUENCE { names CHOICE { std SEQUENCE OF Author , -- full citations ml SEQUENCE OF VisibleString , -- MEDLINE, semi-structured str SEQUENCE OF VisibleString } , -- free for all affil Affil OPTIONAL } -- author affiliation Author ::= SEQUENCE { name Person-id , -- Author, Primary or Secondary level ENUMERATED { primary (1), secondary (2) } OPTIONAL , role ENUMERATED { -- Author Role Indicator compiler (1), editor (2), patent-assignee (3), translator (4) } OPTIONAL , affil Affil OPTIONAL , is-corr BOOLEAN OPTIONAL } -- TRUE if corresponding author Affil ::= CHOICE { str VisibleString , -- unparsed string std SEQUENCE { -- std representation affil VisibleString OPTIONAL , -- Author Affiliation, Name div VisibleString OPTIONAL , -- Author Affiliation, Division city VisibleString OPTIONAL , -- Author Affiliation, City sub VisibleString OPTIONAL , -- Author Affiliation, County Sub country VisibleString OPTIONAL , -- Author Affiliation, Country street VisibleString OPTIONAL , -- street address, not ANSI email VisibleString OPTIONAL , fax VisibleString OPTIONAL , phone VisibleString OPTIONAL , postal-code VisibleString OPTIONAL }} -- Title Group -- Valid for = A = Analytic (Cit-art) -- J = Journals (Cit-jour) -- B = Book (Cit-book) -- Valid for: Title ::= SET OF CHOICE { name VisibleString , -- Title, Anal,Coll,Mono AJB tsub VisibleString , -- Title, Subordinate A B trans VisibleString , -- Title, Translated AJB jta VisibleString , -- Title, Abbreviated J iso-jta VisibleString , -- specifically ISO jta J ml-jta VisibleString , -- specifically MEDLINE jta J coden VisibleString , -- a coden J issn VisibleString , -- ISSN J abr VisibleString , -- Title, Abbreviated B isbn VisibleString } -- ISBN B Imprint ::= SEQUENCE { -- Imprint group date Date , -- date of publication volume VisibleString OPTIONAL , issue VisibleString OPTIONAL , pages VisibleString OPTIONAL , section VisibleString OPTIONAL , pub Affil OPTIONAL, -- publisher, required for book cprt Date OPTIONAL, -- copyright date, " " " part-sup VisibleString OPTIONAL , -- part/sup of volume language VisibleString DEFAULT "ENG" , -- put here for simplicity prepub ENUMERATED { -- for prepublication citations submitted (1) , -- submitted, not accepted in-press (2) , -- accepted, not published other (255) } OPTIONAL , part-supi VisibleString OPTIONAL , -- part/sup on issue retract CitRetract OPTIONAL , -- retraction info pubstatus PubStatus OPTIONAL , -- current status of this publication history PubStatusDateSet OPTIONAL } -- dates for this record CitRetract ::= SEQUENCE { type ENUMERATED { -- retraction of an entry retracted (1) , -- this citation retracted notice (2) , -- this citation is a retraction notice in-error (3) , -- an erratum was published about this erratum (4) } , -- this is a published erratum exp VisibleString OPTIONAL } -- citation and/or explanation Meeting ::= SEQUENCE { number VisibleString , date Date , place Affil OPTIONAL } END -- biotree.asn --$Revision: 1.4 $ --********************************************************************* -- -- biotree.asn -- -- BioTree ASN -- Anatoliy Kuznetsov -- --********************************************************************* NCBI-BioTree DEFINITIONS ::= BEGIN EXPORTS BioTreeContainer, DistanceMatrix; BioTreeContainer ::= SEQUENCE { treetype VisibleString OPTIONAL, -- hint on what kind of tree is that fdict FeatureDictSet, -- features dictionary nodes NodeSet -- set of nodes with encoded topology } NodeSet ::= SET OF Node Node ::= SEQUENCE { id INTEGER, -- node uid parent INTEGER OPTIONAL, -- parent node id features NodeFeatureSet OPTIONAL } NodeFeatureSet ::= SET OF NodeFeature NodeFeature ::= SEQUENCE { featureid INTEGER, value VisibleString } FeatureDictSet ::= SET OF FeatureDescr FeatureDescr ::= SEQUENCE { id INTEGER, -- feature id name VisibleString -- feature name } DistanceMatrix ::= SEQUENCE { labels SEQUENCE OF VisibleString, -- n labels distances SEQUENCE OF REAL -- n(n-1)/2 pairwise distances -- (0, 1)...(0, n), (1, 2)...(1, n)... } END -- blast.asn -- ---------------------------------------------------------------------------- -- -- PUBLIC DOMAIN NOTICE -- National Center for Biotechnology Information -- -- This software/database is a "United States Government Work" under the terms -- of the United States Copyright Act. It was written as part of the author's -- official duties as a United States Government employee and thus cannot be -- copyrighted. This software/database is freely available to the public for -- use. The National Library of Medicine and the U.S. Government have not -- placed any restriction on its use or reproduction. -- -- Although all reasonable efforts have been taken to ensure the accuracy and -- reliability of the software and data, the NLM and the U.S. Government do not -- and cannot warrant the performance or results that may be obtained by using -- this software or data. The NLM and the U.S. Government disclaim all -- warranties, express or implied, including warranties of performance, -- merchantability or fitness for any particular purpose. -- -- Please cite the authors in any work or product based on this material. -- -- ---------------------------------------------------------------------------- -- -- Authors: Tom Madden, Tim Boemker -- -- ASN.1 interface to BLAST. -- -- ---------------------------------------------------------------------------- NCBI-Blast4 DEFINITIONS ::= BEGIN EXPORTS Blast4-ka-block, Blast4-value, Blast4-parameter, Blast4-parameters; IMPORTS Bioseq FROM NCBI-Sequence Seq-data FROM NCBI-Sequence Bioseq-set FROM NCBI-Seqset PssmWithParameters FROM NCBI-ScoreMat Seq-id, Seq-loc FROM NCBI-Seqloc Seq-align, Seq-align-set FROM NCBI-Seqalign; -- -------------------------------------------------------------------- -- -- Requests -- -- -------------------------------------------------------------------- Blast4-request ::= SEQUENCE { ident VisibleString OPTIONAL, body Blast4-request-body } Blast4-request-body ::= CHOICE { finish-params Blast4-finish-params-request, get-databases NULL, get-matrices NULL, get-parameters NULL, get-paramsets NULL, get-programs NULL, get-search-results Blast4-get-search-results-request, get-sequences Blast4-get-sequences-request, queue-search Blast4-queue-search-request, get-request-info Blast4-get-request-info-request, get-sequence-parts Blast4-get-seq-parts-request } Blast4-finish-params-request ::= SEQUENCE { program VisibleString, service VisibleString, paramset VisibleString OPTIONAL, params Blast4-parameters OPTIONAL } Blast4-get-search-results-request ::= SEQUENCE { request-id VisibleString } Blast4-get-sequences-request ::= SEQUENCE { database Blast4-database, seq-ids SEQUENCE OF Seq-id } -- If a PSSM is used (ie. for PSI-Blast), it must contain a "query" -- for formatting purposes. Bioseq-set may contain any number of -- queries, specified as data. Seq-loc-list may contain only the -- "whole" or "interval" types. In the case of "whole", any number of -- queries may be used; in the case of "interval", there should be -- exactly one query. (This is limited by the BlastObject.) Blast4-queries ::= CHOICE { pssm PssmWithParameters, seq-loc-list SEQUENCE OF Seq-loc, bioseq-set Bioseq-set } -- Options have been broken down into two groups as part of the BLAST -- API work. The algorithm options essentially correspond to those -- options available via the CBlastOptions class. -- algorithm-options: Options for BLAST (ie. seq comparison) algorithm. -- program-options: Other options, such as which seqs. to compare. Blast4-queue-search-request ::= SEQUENCE { program VisibleString, service VisibleString, queries Blast4-queries, subject Blast4-subject, paramset VisibleString OPTIONAL, algorithm-options Blast4-parameters OPTIONAL, program-options Blast4-parameters OPTIONAL } -- Fetch information about the search request. Blast4-get-request-info-request ::= SEQUENCE { request-id VisibleString } Blast4-get-request-info-reply ::= SEQUENCE { database Blast4-database, program VisibleString, service VisibleString, created-by VisibleString, queries Blast4-queries, algorithm-options Blast4-parameters, program-options Blast4-parameters } -- Fetch parts of a sequence a-la cart. Blast4-get-seq-parts-request ::= SEQUENCE { database Blast4-database, id Seq-id, need-meta-data BOOLEAN, -- If end is 0, no data will be fetched. If end is past the -- length of the sequence, it will be adjusted to the end of -- the sequence (this allows fetching of the first chunk in -- cases where the length is not yet known). start INTEGER OPTIONAL, end INTEGER OPTIONAL } -- -------------------------------------------------------------------- -- -- Replies -- -- -------------------------------------------------------------------- Blast4-reply ::= SEQUENCE { errors SEQUENCE OF Blast4-error OPTIONAL, body Blast4-reply-body } Blast4-reply-body ::= CHOICE { finish-params Blast4-finish-params-reply, get-databases Blast4-get-databases-reply, get-matrices Blast4-get-matrices-reply, get-parameters Blast4-get-parameters-reply, get-paramsets Blast4-get-paramsets-reply, get-programs Blast4-get-programs-reply, get-search-results Blast4-get-search-results-reply, get-sequences Blast4-get-sequences-reply, queue-search Blast4-queue-search-reply, get-queries Blast4-get-queries-reply, get-request-info Blast4-get-request-info-reply, get-sequence-parts Blast4-get-seq-parts-reply } Blast4-finish-params-reply ::= Blast4-parameters Blast4-get-databases-reply ::= SEQUENCE OF Blast4-database-info Blast4-get-matrices-reply ::= SEQUENCE OF Blast4-matrix-id Blast4-get-parameters-reply ::= SEQUENCE OF Blast4-parameter-info Blast4-get-paramsets-reply ::= SEQUENCE OF Blast4-paramset-info Blast4-get-programs-reply ::= SEQUENCE OF Blast4-program-info Blast4-get-search-results-reply ::= SEQUENCE { alignments Seq-align-set OPTIONAL, phi-alignments Blast4-phi-alignments OPTIONAL, -- Masking locations for the query sequence(s). Each element of this set -- corresponds to a single query's translation frame as appropriate. masks SEQUENCE OF Blast4-mask OPTIONAL, ka-blocks SEQUENCE OF Blast4-ka-block OPTIONAL, search-stats SEQUENCE OF VisibleString OPTIONAL, pssm PssmWithParameters OPTIONAL } Blast4-get-sequences-reply ::= SEQUENCE OF Bioseq Blast4-queue-search-reply ::= SEQUENCE { request-id VisibleString OPTIONAL } Blast4-get-queries-reply ::= SEQUENCE { queries Blast4-queries } Blast4-get-seq-parts-reply ::= SEQUENCE { bioseq Bioseq OPTIONAL, ids SEQUENCE OF Seq-id OPTIONAL, length INTEGER OPTIONAL, data Seq-data OPTIONAL } -- -------------------------------------------------------------------- -- -- Errors -- -- -------------------------------------------------------------------- Blast4-error ::= SEQUENCE { code INTEGER, message VisibleString OPTIONAL } Blast4-error-flags ::= ENUMERATED { warning (1024), error (2048) } Blast4-error-code ::= INTEGER { -- warnings conversion-warning (1024), -- errors internal-error (2048), not-implemented (2049), not-allowed (2050), bad-request (2051), bad-request-id (2052), search-pending (2053) } -- -------------------------------------------------------------------- -- -- Other types in alphabetical order -- -- -------------------------------------------------------------------- Blast4-cutoff ::= CHOICE { e-value REAL, raw-score INTEGER } Blast4-database ::= SEQUENCE { name VisibleString, type Blast4-residue-type } -- Borrowed from seq.asn Blast4-seqtech ::= INTEGER { unknown (0) , standard (1) , -- standard sequencing est (2) , -- Expressed Sequence Tag sts (3) , -- Sequence Tagged Site survey (4) , -- one-pass genomic sequence genemap (5) , -- from genetic mapping techniques physmap (6) , -- from physical mapping techniques derived (7) , -- derived from other data, not a primary entity concept-trans (8) , -- conceptual translation seq-pept (9) , -- peptide was sequenced both (10) , -- concept transl. w/ partial pept. seq. seq-pept-overlap (11) , -- sequenced peptide, ordered by overlap seq-pept-homol (12) , -- sequenced peptide, ordered by homology concept-trans-a (13) , -- conceptual transl. supplied by author htgs-1 (14) , -- unordered High Throughput sequence contig htgs-2 (15) , -- ordered High Throughput sequence contig htgs-3 (16) , -- finished High Throughput sequence fli-cdna (17) , -- full length insert cDNA htgs-0 (18) , -- single genomic reads for coordination htc (19) , -- high throughput cDNA wgs (20) , -- whole genome shotgun sequencing other (255) -- use Source.techexp } Blast4-database-info ::= SEQUENCE { database Blast4-database, description VisibleString, last-updated VisibleString, total-length BigInt, num-sequences BigInt, seqtech Blast4-seqtech, taxid INTEGER } Blast4-frame-type ::= ENUMERATED { notset (0), plus1 (1), plus2 (2), plus3 (3), minus1 (4), minus2 (5), minus3 (6) } Blast4-ka-block ::= SEQUENCE { lambda REAL, k REAL, h REAL, gapped BOOLEAN } -- Masking locations for a query's frame. The locations field is a single -- Seq-loc of type Packed-int, which contains all the masking locations for the -- translation frame specified by the frame field. -- Notes: -- On input (i.e.: when the client specifies masking locations as a -- Blast4-parameter), in the case of protein queries, the frame field must -- always be notset, in the case of nucleotide queries (regardless of whether -- the search will translate these or not), the frame must be plus1. Masking -- locations in the translated encoding are not permitted. -- On output (i.e.: when blast 4 server encodes these as part of the -- Blast4-get-search-results-reply), the same conventions as above apply for -- non-translated protein and nucleotide queries, but in the case of translated -- nucleotide queries, the frame field can be specified in any of the -- translation frames as appropriate. Blast4-mask ::= SEQUENCE { locations SEQUENCE OF Seq-loc, frame Blast4-frame-type } Blast4-matrix-id ::= SEQUENCE { residue-type Blast4-residue-type, name VisibleString } Blast4-parameter ::= SEQUENCE { name VisibleString, value Blast4-value } Blast4-parameter-info ::= SEQUENCE { name VisibleString, type VisibleString } Blast4-paramset-info ::= SEQUENCE { program VisibleString, name VisibleString } Blast4-program-info ::= SEQUENCE { program VisibleString, services SEQUENCE OF VisibleString } Blast4-residue-type ::= ENUMERATED { unknown (0), protein (1), nucleotide (2) } Blast4-strand-type ::= ENUMERATED { forward-strand (1), reverse-strand (2), both-strands (3) } Blast4-subject ::= CHOICE { database VisibleString, sequences SEQUENCE OF Bioseq } Blast4-parameters ::= SEQUENCE OF Blast4-parameter Blast4-phi-alignments ::= SEQUENCE { num-alignments INTEGER, seq-locs SEQUENCE OF Seq-loc } Blast4-value ::= CHOICE { -- scalar types big-integer BigInt, bioseq Bioseq, boolean BOOLEAN, cutoff Blast4-cutoff, integer INTEGER, matrix PssmWithParameters, real REAL, seq-align Seq-align, seq-id Seq-id, seq-loc Seq-loc, strand-type Blast4-strand-type, string VisibleString, -- lists of scalar types big-integer-list SEQUENCE OF BigInt, bioseq-list SEQUENCE OF Bioseq, boolean-list SEQUENCE OF BOOLEAN, cutoff-list SEQUENCE OF Blast4-cutoff, integer-list SEQUENCE OF INTEGER, matrix-list SEQUENCE OF PssmWithParameters, real-list SEQUENCE OF REAL, seq-align-list SEQUENCE OF Seq-align, seq-id-list SEQUENCE OF Seq-id, seq-loc-list SEQUENCE OF Seq-loc, strand-type-list SEQUENCE OF Blast4-strand-type, string-list SEQUENCE OF VisibleString, -- imported collection types bioseq-set Bioseq-set, seq-align-set Seq-align-set, -- Intended to represent user-provided masking locations for a single query -- sequence (name field in Blast4-parameter should be "LCaseMask"). -- Multiple Blast4-parameters of this type are needed to specify masking -- locations for multiple queries. query-mask Blast4-mask } END -- blastdb.asn --$Id: blastdb.asn 145295 2008-11-10 15:59:38Z camacho $ -- -- Notes: -- -- taxonomy: an integer is proposed, which would require some sort of -- table (or network connection) to do the conversions from integer -- to various names. This could save quite a bit of space for databases -- that are predominantly of one organism (e.g., human in htgs). -- I've proposed here that table contain scientific-, common-, and -- blast-names at the advice of Scott Federhen. Scott also was in -- favor of having the complete lineage in the file, but it seems like -- this would be seldom used and we could have a view with a link back -- to the taxonomy page for anyone needing it. Since one file would -- suffice for all blast databases, it seems like this should be a new file. -- -- memberships: a sequence of integers is proposed. Each bit of an integer -- would indicate membership in some (virtual) blast database (e.g., pdb, -- swissprot) or some classification (e.g., mRNA, genomic). -- -- links: a sequence of integers is proposed. Each bit of an integer would -- indicate a link that could be established based upon the gi of the -- database sequence. -- NCBI-BlastDL DEFINITIONS ::= BEGIN EXPORTS Blast-def-line-set, Blast-def-line; IMPORTS Seq-id, Seq-loc FROM NCBI-Seqloc; Blast-def-line-set ::= SEQUENCE OF Blast-def-line -- all deflines for an entry Blast-def-line ::= SEQUENCE { title VisibleString OPTIONAL, -- simple title seqid SEQUENCE OF Seq-id, -- Regular NCBI Seq-Id taxid INTEGER OPTIONAL, -- taxonomy id memberships SEQUENCE OF INTEGER OPTIONAL, -- bit arrays links SEQUENCE OF INTEGER OPTIONAL, -- bit arrays other-info SEQUENCE OF INTEGER OPTIONAL -- for future use (probably genomic sequences) } -- This defines the possible sequence filtering algorithms to be used in a -- BLAST database Blast-filter-program ::= INTEGER { not-set (0), dust (10), seg (20), windowmasker (30), repeat (40), other (100), max (255) } Blast-mask-list ::= SEQUENCE { masks SEQUENCE OF Seq-loc, more BOOLEAN } Blast-db-mask-info ::= SEQUENCE { algo-id INTEGER, algo-program Blast-filter-program, algo-options VisibleString, masks Blast-mask-list } END -- blastxml.asn --$Id: blastxml.asn 120927 2008-02-28 18:57:30Z ucko $ NCBI-BlastOutput DEFINITIONS ::= BEGIN BlastOutput ::= SEQUENCE { program VisibleString , -- BLAST program: blastp, tblastx etc. version VisibleString , -- Program version reference VisibleString , -- Steven, David, Tom and others db VisibleString , -- BLAST Database name query-ID VisibleString , -- SeqId of query query-def VisibleString , -- Definition line of query query-len INTEGER , -- length of query sequence query-seq VisibleString OPTIONAL , -- query sequence itself param Parameters, -- search parameters iterations SEQUENCE OF Iteration, mbstat Statistics OPTIONAL -- Mega BLAST search statistics } Iteration ::= SEQUENCE { iter-num INTEGER , -- iteration number query-ID VisibleString OPTIONAL, -- SeqId of query query-def VisibleString OPTIONAL,-- Definition line of query query-len INTEGER OPTIONAL , -- length of query sequence hits SEQUENCE OF Hit OPTIONAL, -- Hits one for every db sequence stat Statistics OPTIONAL, -- search statistics message VisibleString OPTIONAL -- Some (error?) information } Parameters ::= SEQUENCE { matrix VisibleString OPTIONAL , -- Matrix used (-M) expect REAL , -- Expectation threshold (-e) include REAL OPTIONAL , -- Inclusion threshold (-h) sc-match INTEGER OPTIONAL , -- match score for NT (-r) sc-mismatch INTEGER OPTIONAL , -- mismatch score for NT (-q) gap-open INTEGER , -- Gap opening cost (-G) gap-extend INTEGER , -- Gap extension cost (-E) filter VisibleString OPTIONAL, -- Filtering options (-F) pattern VisibleString OPTIONAL, -- PHI-BLAST pattern entrez-query VisibleString OPTIONAL -- Limit of request to Entrez query } Statistics ::= SEQUENCE { db-num INTEGER , -- Number of sequences in BLAST db db-len BigInt , -- Length of BLAST db hsp-len INTEGER , -- Effective HSP length eff-space REAL, -- Effective search space kappa REAL, -- Karlin-Altschul parameter K lambda REAL, -- Karlin-Altschul parameter Lambda entropy REAL -- Karlin-Altschul parameter H } Hit ::= SEQUENCE { num INTEGER , -- hit number id VisibleString , -- SeqId of subject def VisibleString , -- definition line of subject accession VisibleString , -- accession len INTEGER , -- length of subject hsps SEQUENCE OF Hsp OPTIONAL -- all HSP regions for the given subject } Hsp ::= SEQUENCE { num INTEGER , -- HSP number bit-score REAL , -- score (in bits) of HSP score REAL , -- score of HSP evalue REAL , -- e-value of HSP query-from INTEGER , -- start of HSP in query query-to INTEGER , -- end of HSP hit-from INTEGER, -- start of HSP in subject hit-to INTEGER , -- end of HSP in subject pattern-from INTEGER OPTIONAL , -- start of PHI-BLAST pattern pattern-to INTEGER OPTIONAL , -- end of PHI-BLAST pattern query-frame INTEGER OPTIONAL , -- translation frame of query hit-frame INTEGER OPTIONAL , -- translation frame of subject identity INTEGER OPTIONAL , -- number of identities in HSP positive INTEGER OPTIONAL , -- number of positives in HSP gaps INTEGER OPTIONAL , -- number of gaps in HSP align-len INTEGER OPTIONAL , -- length of the alignment used density INTEGER OPTIONAL , -- score density qseq VisibleString , -- alignment string for the query (with gaps) hseq VisibleString, -- alignment string for subject (with gaps) midline VisibleString OPTIONAL -- formating middle line } END -- cdd.asn --$Revision: 150675 $ --********************************************************************** -- -- Definitions for CDD's -- -- NCBI Structure Group -- -- National Center for Biotechnology Information -- National Institutes of Health -- Bethesda, MD 20894 USA -- -- October 1999 -- -- asntool -m cdd.asn -w 100 -o cdd.h -- asntool -B objcdd -m cdd.asn -G -w 100 -I objseq.h objsset.h -K cdd.h \ -- -M asn.all --********************************************************************** NCBI-Cdd DEFINITIONS ::= -- NCBI Conserved Domain Definition BEGIN EXPORTS Cdd-id, Cdd-id-set, Cdd, Cdd-set, Cdd-tree, Cdd-tree-set, Cdd-pref-nodes, Cdd-Project; IMPORTS Date FROM NCBI-General Pub FROM NCBI-Pub Biostruc-annot-set FROM MMDB Bioseq FROM NCBI-Sequence Seq-annot FROM NCBI-Sequence Seq-entry FROM NCBI-Seqset Org-ref FROM NCBI-Organism Seq-id FROM NCBI-Seqloc Seq-interval FROM NCBI-Seqloc Seq-loc FROM NCBI-Seqloc Seq-feat FROM NCBI-Seqfeat Score-set FROM NCBI-Seqalign Cn3d-style-dictionary, Cn3d-user-annotations FROM NCBI-Cn3d PssmWithParameters FROM NCBI-ScoreMat; -- dealing with lists of preferred tax-nodes Cdd-org-ref ::= SEQUENCE { reference Org-ref, active BOOLEAN DEFAULT TRUE, parent-tax-id INTEGER OPTIONAL, rank VisibleString OPTIONAL } Cdd-org-ref-set ::= SET OF Cdd-org-ref Cdd-pref-node-descr ::= CHOICE { create-date Date, description VisibleString } Cdd-pref-node-descr-set ::= SET OF Cdd-pref-node-descr Cdd-pref-nodes ::= SEQUENCE { preferred-nodes Cdd-org-ref-set, model-organisms Cdd-org-ref-set OPTIONAL, optional-nodes Cdd-org-ref-set OPTIONAL, description Cdd-pref-node-descr-set OPTIONAL } -- Cdd's should not exist without a unique accession, but alternative id's may -- be present as well. It is conceivable that a CD which is created as a merged -- product of two highly redundant CDs will retain the source ids in addition -- to its new unique id Global-id ::= SEQUENCE { accession VisibleString, -- SMART, Pfam, LOAD or CD accession release VisibleString OPTIONAL, -- to hold CD-Database release number -- if desired, currently not used version INTEGER OPTIONAL, -- version 0 is the seed, version -- numbers increase with update/curate -- cycles database VisibleString OPTIONAL -- this is NOT the source!, rather the } -- database the object resides in -- currently not in use Cdd-id ::= CHOICE { uid INTEGER, -- for synchronization with Entrez -- holds PSSM-Ids gid Global-id -- holds accession/version pairs } Cdd-id-set ::= SEQUENCE OF Cdd-id Cdd-repeat ::= SEQUENCE { -- record whether the CD contains -- repeated sequence/structure motifs count INTEGER, -- number of tandem repeats in the CD location Seq-loc OPTIONAL, -- location on the representative avglen INTEGER OPTIONAL -- average repeat length } Cdd-book-ref ::= SEQUENCE { -- record a link to Entrez Books bookname VisibleString, -- abbreviated book title textelement ENUMERATED { unassigned(0), -- type of element section(1), -- a section or paragraph figgrp(2), -- a figure or set of figures table(3), -- a table chapter(4), -- a whole chapter biblist(5), -- a lisf of references box(6), -- an inserted box glossary(7), -- glossary appendix(8), -- appendix other(255) }, elementid INTEGER OPTIONAL, -- numerical address of the text-element subelementid INTEGER OPTIONAL, -- exact address, used with section celementid VisibleString OPTIONAL, -- address of the text element, if character string csubelementid VisibleString OPTIONAL -- exact address, if character string } -- The description of CDD's refers to the specific set of aligned sequences, -- the region that is being aligned and the information contained in the -- alignment. It may contain a lengthy comment -- describing the function of the domain as well as its origin and all -- other anecdotal information that can't be pressed into a rigid scheme. -- Crosslinks to reference papers available in PubMed are possible as well. -- There can be as many of these as you want in the CDD. Cdd-descr ::= CHOICE { othername VisibleString, -- alternative names for the CDD -- if domain has several common names category VisibleString, -- intracellular, extracellular, etc. -- to record spatial and/or temporal -- expression in free-text format comment VisibleString, -- this is where descriptions go reference Pub, -- a citation describing the domain create-date Date, -- Date of first creation/dump tax-source Org-ref, -- holds the highest common tax node source VisibleString, -- the database the seeds were created -- from, e.g. SMART, PFAM, etc.. status INTEGER { unassigned(0), finished-ok(1), -- a public curated CD pending-release(2), -- needs work done, not yet released other-asis(3), -- imported as-is, immediate release matrix-only(4), -- CD holds a Psi-Blast PSSM only, -- does not contain alignment data update-running(5), -- has been flagged for -- update (in queue) auto-updated(6), -- update finished, no -- work necessary claimed(7), -- is earmarked for curation curated-complete(8),-- public curated member of a -- completed family other(255) }, -- for CD production? update-date Date, -- Date of last version change scrapbook SEQUENCE OF VisibleString, -- for storing curation notes -- those won't make it into public -- distributions source-id Cdd-id-set, -- for linking back to source db repeats Cdd-repeat, -- to record repeat counts old-root Cdd-id-set, -- to record short-term history curation-status INTEGER { unassigned(0), -- to record curation status prein (1), -- when CD is checked out from ofc (2), -- the tracking database, for iac (3), -- use within curation software ofv1 (4), iav1 (5), ofv2 (6), iav2 (7), postin (8), other (255) }, readonly-status INTEGER { unassigned(0), -- to record read-only status readonly (1), -- when CD is checked out from readwrite (2), -- the tracking database, for other (255) }, -- use within curation software book-ref Cdd-book-ref, -- links to Entrez/books attribution Pub, -- add citations and/or author names title VisibleString -- hold short descriptive text } Cdd-descr-set ::= SET OF Cdd-descr -- the Cdd-tree stores the hierarchy of CDDs. These objects are stored separate -- from the CDs to allow for fast retrieval and use as an 'index' into CDs -- all the components in a CD-tree match components in the full-sized CD -- and should be synchronized Cdd-tree ::= SEQUENCE { name VisibleString, -- short name copied from CD id Cdd-id-set, -- IDs copied from CD description Cdd-descr-set OPTIONAL, -- description copied from CD parent Cdd-id OPTIONAL, -- CD is the result of a split/merge children Cdd-id-set OPTIONAL, -- this CD has been split siblings Cdd-id-set OPTIONAL, -- related CDs (have common hits) neighbors Cdd-id-set OPTIONAL -- co-occurring CDs (non-overlapping -- hits to same sequences) } Cdd-tree-set ::= SEQUENCE OF Cdd-tree -- Matrix definitions, these are supposed to store PSSMs and corresponding -- matrices of relative residue frequencies. -- the number of columns and rows is listed explicitly, values in columns -- are stored column by column, i.e. in groups of nrows values for each column Matrix ::= SEQUENCE { ncolumns INTEGER, nrows INTEGER, row-labels SEQUENCE OF VisibleString OPTIONAL, scale-factor INTEGER, columns SEQUENCE OF INTEGER } -- definition for matrix of pairwise "distances", stored as the upper -- triangle of a squared n x n matrix (excluding the diagonal), this is -- supposed to store pairwise percentages of identical residues, pairwise -- alignment scores or E-values from pairwise BLAST sequence comparisons Triangle ::= SEQUENCE { nelements INTEGER, scores Score-set OPTIONAL, div-ranks SEQUENCE OF INTEGER OPTIONAL } -- Update-align is supposed to contain alignments that still need some work -- done to fit into the CD-proper alignment. These originate from the -- CD update process (generated by Blast, for example) or may be created in -- an editing session to save its state Update-comment ::= CHOICE { comment VisibleString, -- free text to describe nature of -- Update-align addthis Seq-loc, -- suggestion for inclusion in the CD -- without corresponding alignment replaces Seq-loc, -- if one or several alignment rows are -- to be replaced by the Update-align reject-loc Seq-loc, -- if used with Reject-id, specify a -- location on a sequence which should -- not be used reference Pub -- if update alignment imported from -- citation and for whenever it seems -- necessary to cite } -- Both fields are optional, as the Update-align may be a Seq-annot without -- description, or a suggestion to add a sequence without the corresponding -- alignment Update-align ::= SEQUENCE { description SEQUENCE OF Update-comment OPTIONAL, seqannot Seq-annot OPTIONAL, -- contains the SeqAlign type INTEGER { unassigned(0), update(1), update-3d(2), demoted(51), demoted-3d(52), other(255)} } Reject-id ::= SEQUENCE { description SEQUENCE OF Update-comment OPTIONAL, ids SET OF Seq-id } Feature-evidence ::= CHOICE { comment VisibleString, -- so we can spell out what doesn't -- fit in any other category reference Pub, -- evidence via a literature reference bsannot Biostruc-annot-set, -- evidence via Biostruc-features, such -- as structure superpositions seqfeat Seq-feat, -- evidence is a Sequence feature found -- elsewhere book-ref Cdd-book-ref -- evidence is a book chapter or figure } Align-annot ::= SEQUENCE { location Seq-loc, -- points to a location in one of the -- aligned sequences, usually the -- master/representative description VisibleString OPTIONAL, -- to hold descriptions/names like -- "Heme binding site" or "catalytic -- triad" etc., something that should -- be used for labels in visualization evidence SEQUENCE OF Feature-evidence OPTIONAL, -- evidence we can -- compute with type INTEGER OPTIONAL -- for typing annotated features } Align-annot-set ::= SEQUENCE OF Align-annot -- the Domain-parent records an evolutionary relationship which may not be -- as simple as a classical parent-child relationship in a typical hierarchy, -- i.e. where a CD is merely a specific subgroup ("child") of a more general -- diverse alignment model ("parent"). A CD alignment model may be the result -- of an ancient fusion event, combining two or more domains into a bigger unit -- which has subsequently undergone a divergent evolutionary process similar to -- what may have happened to a single "domain". A CD alignment model may -- also reflect the result of a deletion event, where a specific subgroup -- lacks part of a (set of) domain(s), but where the part present is found to -- be highly similar to a putative "parent", with some added evidence for -- an actual deletion, like from the distribution of truncated copies in phylogenetic -- lineages. Deletion events which affect different parts of a set of -- duplicated domain architectures may be indistinguishable from actual -- fission events, which means that we may want to represent the latter as -- deletions after duplication and do not need a special case for fissions. Domain-parent ::= SEQUENCE { parent-type INTEGER { classical (0), -- the classification of parent child relations fusion (1), deletion (2), permutation (3), other (255) }, parentid Cdd-id, -- identify the section parent by accession seqannot Seq-annot OPTIONAL -- contains the sequence alignment linking -- CD alignment models, should align the -- masters/representatives of each CD } -- record sequence trees generated by a suitable algorithm. Sequence-tree ::= SEQUENCE { cdAccession VisibleString OPTIONAL, algorithm Algorithm-type, isAnnotated BOOLEAN DEFAULT FALSE, root SeqTree-node } SeqTree-node ::= SEQUENCE { isAnnotated BOOLEAN DEFAULT FALSE, name VisibleString OPTIONAL, distance REAL OPTIONAL, children CHOICE { children SEQUENCE OF SeqTree-node, footprint SEQUENCE { seqRange Seq-interval, rowId INTEGER OPTIONAL } }, annotation Node-annotation OPTIONAL } Algorithm-type ::= SEQUENCE { scoring-Scheme INTEGER { unassigned (0), percent-id (1), kimura-corrected (2), aligned-score (3), aligned-score-ext (4), aligned-score-filled (5), blast-footprint (6), blast-full (7), hybrid-aligned-score (8), other (255) }, clustering-Method INTEGER { unassigned (0), single-linkage (1), neighbor-joining (2), fast-minimum-evolution (3), other (255) }, score-Matrix INTEGER { unassigned (0), blosum45 (1), blosum62 (2), blosum80 (3), pam30 (4), pam70 (5), pam250 (6), other (255) } OPTIONAL, gapOpen INTEGER OPTIONAL, gapExtend INTEGER OPTIONAL, gapScaleFactor INTEGER OPTIONAL, nTerminalExt INTEGER OPTIONAL, cTerminalExt INTEGER OPTIONAL, tree-scope INTEGER { allDescendants (0), immediateChildrenOnly(1), selfOnly (2), other (255) } OPTIONAL, coloring-scope INTEGER { allDescendants (0), immediateChildrenOnly (1), other (255) } OPTIONAL } Node-annotation ::= SEQUENCE { presentInChildCD VisibleString OPTIONAL, note VisibleString OPTIONAL } -- the Cdd is the basic ASN.1 object storing an annotated and curated set of -- alignments (formulated as a set of pairwise master-slave alignments). -- The alignment data are contained in Seq-annots, and a special type of -- object, the Update-align, contains additional alignment data from unfinished -- editing sessions and update processes. The Biostruc-annot-set holds -- structure superposition information for multiple structure-derived rows in -- the alignment. -- Version numbers in Global-ids are meant to be updated every time the Cdd is -- changed in a way that does not require Global-ids to be changed (sequences -- added in update cycle, annotation changed, alignment errors fixed) Cdd ::= SEQUENCE { name VisibleString, -- a short name (can be the accession..) id Cdd-id-set, -- this CD's Ids description Cdd-descr-set OPTIONAL, -- status, references, etc. seqannot SEQUENCE OF Seq-annot OPTIONAL, -- contains the CD alignment features Biostruc-annot-set OPTIONAL, -- contains structure -- alignment data -- or "core" definitions sequences Seq-entry OPTIONAL, -- store as bioseq-set inside seq-entry profile-range Seq-interval OPTIONAL, -- profile for this region only -- also stores the Seq-id of the master trunc-master Bioseq OPTIONAL, -- holds the truncated master, which -- may be something like a consensus, -- uses the same sequence coordinate -- frame as the profile-range posfreq Matrix OPTIONAL, -- relative residue frequencies scoremat Matrix OPTIONAL, -- Position dependent score matrix distance Triangle OPTIONAL, -- pairwise distances for all seqs. parent Cdd-id OPTIONAL, -- this CD is the result of a split children Cdd-id-set OPTIONAL, -- this CD has been split, not used siblings Cdd-id-set OPTIONAL, -- related CDs (common hits), clusters neighbors Cdd-id-set OPTIONAL, -- co-occurring CDs, not used pending SEQUENCE OF Update-align OPTIONAL, -- contains alignments from -- update or "lower panel" rejects SEQUENCE OF Reject-id OPTIONAL, -- SeqIds of rejected CD- -- members, ignore in update master3d SET OF Seq-id OPTIONAL, -- record if CD has a 3D representative alignannot Align-annot-set OPTIONAL, -- alignment annotation style-dictionary Cn3d-style-dictionary OPTIONAL, -- record rendering styles user-annotations Cn3d-user-annotations OPTIONAL, -- user annotations in Cn3D ancestors SEQUENCE OF Domain-parent OPTIONAL, -- list of parents scoreparams PssmWithParameters OPTIONAL, seqtree Sequence-tree OPTIONAL } Cdd-set ::= SET OF Cdd -- Cdd projects store a set of CDs, typically related to each other -- relationships would be specified using the ancestors fields in the -- individual CD objects. For use with CD-Tree, a program to visualize -- curated CD hierarchies and evidence for hierarchical family structures. Cdd-Viewer-Rect ::= SEQUENCE { top INTEGER, -- top coordinate left INTEGER, -- left coordinate width INTEGER, -- width height INTEGER -- height } Cdd-Viewer ::= SEQUENCE { ctrl INTEGER { -- viewer type unassigned (0), cd-info (1), align-annot (2), seq-list (3), seq-tree (4), merge-preview (5), cross-hits (6), notes (7), tax-tree (8), dart (9), dart-selected-rows (10), other (255) }, rect Cdd-Viewer-Rect OPTIONAL, -- viewer rectangle accessions SEQUENCE OF VisibleString -- list of accessions associated with a viewer } Cdd-Script ::= SEQUENCE { type INTEGER { unassigned (0), user-recorded (1), server-generated (2), other (255) } OPTIONAL, name VisibleString OPTIONAL, -- user assigned name/description commands VisibleString -- actual script commands } -- cd colors are as: 0000FF for red, 00FF00 for green, FF0000 for blue Cdd-Project ::= SEQUENCE { cds SEQUENCE OF Cdd , -- cds cdcolor SEQUENCE OF INTEGER, -- colors viewers SEQUENCE OF Cdd-Viewer, -- Sequence viewers log VisibleString, -- log scripts SEQUENCE OF Cdd-Script OPTIONAL -- command scripts } END -- cn3d.asn --$Revision: 1.15 $ --********************************************************************** -- -- Definitions for Cn3D-specific data (rendering settings, -- user annotations, etc.) -- -- by Paul Thiessen -- -- National Center for Biotechnology Information -- National Institutes of Health -- Bethesda, MD 20894 USA -- -- asntool -m cn3d.asn -w 100 -o cn3d.h -- asntool -B objcn3d -m cn3d.asn -G -w 100 -K cn3d.h -I mapcn3d.h \ -- -M ../mmdb1.asn,../mmdb2.asn,../mmdb3.asn --********************************************************************** NCBI-Cn3d DEFINITIONS ::= -- Cn3D-specific information BEGIN EXPORTS Cn3d-style-dictionary, Cn3d-user-annotations; IMPORTS Biostruc-id FROM MMDB Molecule-id, Residue-id FROM MMDB-Chemical-graph; -- values of enumerations must match those in cn3d/style_manager.hpp! Cn3d-backbone-type ::= ENUMERATED { -- for different types of backbones off (1), trace (2), partial (3), complete (4) } Cn3d-drawing-style ::= ENUMERATED { -- atom/bond/object rendering styles -- for atoms and bonds wire (1), tubes (2), ball-and-stick (3), space-fill (4), wire-worm (5), tube-worm (6), -- for 3d-objects with-arrows (7), without-arrows (8) } Cn3d-color-scheme ::= ENUMERATED { -- available color schemes (not all -- necessarily applicable to all objects) element (1), object (2), molecule (3), domain (4), residue (20), secondary-structure (5), user-select (6), -- different alignment conservation coloring (currently only for proteins) aligned (7), identity (8), variety (9), weighted-variety (10), information-content (11), fit (12), block-fit (17), block-z-fit (18), block-row-fit (19), -- other schemes temperature (13), hydrophobicity (14), charge (15), rainbow (16) } -- RGB triplet, interpreted (after division by the scale-factor) as floating -- point values which should range from [0..1]. The default scale-factor is -- 255, so that one can conveniently set integer byte values [0..255] for -- colors with the scale-factor already set appropriately to map to [0..1]. -- An alpha value is allowed, but is currently ignored by Cn3D. Cn3d-color ::= SEQUENCE { scale-factor INTEGER DEFAULT 255, red INTEGER, green INTEGER, blue INTEGER, alpha INTEGER DEFAULT 255 } Cn3d-backbone-style ::= SEQUENCE { -- style blob for backbones only type Cn3d-backbone-type, style Cn3d-drawing-style, color-scheme Cn3d-color-scheme, user-color Cn3d-color } Cn3d-general-style ::= SEQUENCE { -- style blob for other objects is-on BOOLEAN, style Cn3d-drawing-style, color-scheme Cn3d-color-scheme, user-color Cn3d-color } Cn3d-backbone-label-style ::= SEQUENCE { -- style blob for backbone labels spacing INTEGER, -- zero means none type ENUMERATED { one-letter (1), three-letter (2) }, number ENUMERATED { none (0), sequential (1), -- from 1, by residues present, to match sequence pdb (2) -- use number assigned by PDB }, termini BOOLEAN, white BOOLEAN -- all white, or (if false) color of alpha carbon } -- rendering settings for Cn3D (mirrors StyleSettings class) Cn3d-style-settings ::= SEQUENCE { name VisibleString OPTIONAL, -- a name (for favorites) protein-backbone Cn3d-backbone-style, -- backbone styles nucleotide-backbone Cn3d-backbone-style, protein-sidechains Cn3d-general-style, -- styles for other stuff nucleotide-sidechains Cn3d-general-style, heterogens Cn3d-general-style, solvents Cn3d-general-style, connections Cn3d-general-style, helix-objects Cn3d-general-style, strand-objects Cn3d-general-style, virtual-disulfides-on BOOLEAN, -- virtual disulfides virtual-disulfide-color Cn3d-color, hydrogens-on BOOLEAN, -- hydrogens background-color Cn3d-color, -- background -- floating point parameters - scale-factor applies to all the following: scale-factor INTEGER, space-fill-proportion INTEGER, ball-radius INTEGER, stick-radius INTEGER, tube-radius INTEGER, tube-worm-radius INTEGER, helix-radius INTEGER, strand-width INTEGER, strand-thickness INTEGER, -- backbone labels (no labels if not present) protein-labels Cn3d-backbone-label-style OPTIONAL, nucleotide-labels Cn3d-backbone-label-style OPTIONAL, -- ion labels ion-labels BOOLEAN OPTIONAL } Cn3d-style-settings-set ::= SET OF Cn3d-style-settings Cn3d-style-table-id ::= INTEGER Cn3d-style-table-item ::= SEQUENCE { id Cn3d-style-table-id, style Cn3d-style-settings } -- the global settings, and a lookup table of styles for user annotations. Cn3d-style-dictionary ::= SEQUENCE { global-style Cn3d-style-settings, style-table SEQUENCE OF Cn3d-style-table-item OPTIONAL } -- a range of residues in a chain, identified by MMDB residue-id -- (e.g., numbered from 1) Cn3d-residue-range ::= SEQUENCE { from Residue-id, to Residue-id } -- set of locations on a particular chain Cn3d-molecule-location ::= SEQUENCE { molecule-id Molecule-id, -- MMDB molecule id -- which residues; whole molecule implied if absent residues SEQUENCE OF Cn3d-residue-range OPTIONAL } -- set of locations on a particular structure object (e.g., a PDB/MMDB -- structure), which may include multiple ranges of residues each on -- multiple chains. Cn3d-object-location ::= SEQUENCE { structure-id Biostruc-id, residues SEQUENCE OF Cn3d-molecule-location } -- information for an individual user annotation Cn3d-user-annotation ::= SEQUENCE { name VisibleString, -- a (short) name for this annotation description VisibleString OPTIONAL, -- an optional longer description style-id Cn3d-style-table-id, -- how to draw this annotation residues SEQUENCE OF Cn3d-object-location, -- which residues to cover is-on BOOLEAN -- whether this annotation is to be turned on in Cn3D } -- a GL-ordered transformation matrix Cn3d-GL-matrix ::= SEQUENCE { m0 REAL, m1 REAL, m2 REAL, m3 REAL, m4 REAL, m5 REAL, m6 REAL, m7 REAL, m8 REAL, m9 REAL, m10 REAL, m11 REAL, m12 REAL, m13 REAL, m14 REAL, m15 REAL } -- a floating point 3d vector Cn3d-vector ::= SEQUENCE { x REAL, y REAL, z REAL } -- parameters used to set up the camera in Cn3D Cn3d-view-settings ::= SEQUENCE { camera-distance REAL, -- camera on +Z axis this distance from origin camera-angle-rad REAL, -- camera angle camera-look-at-X REAL, -- X,Y of point in Z=0 plane camera points at camera-look-at-Y REAL, camera-clip-near REAL, -- distance of clipping planes from camera camera-clip-far REAL, matrix Cn3d-GL-matrix, -- transformation of objects in the scene rotation-center Cn3d-vector -- center of rotation of whole scene } -- The list of annotations for a given CDD/mime. If residue regions overlap -- between annotations that are turned on, the last annotation in this list -- that contains these residues will be used as the display style for these -- residues. -- Also contains the current viewpoint, so that user's camera angle -- can be stored and reproduced, for illustrations, on-line figures, etc. Cn3d-user-annotations ::= SEQUENCE { annotations SEQUENCE OF Cn3d-user-annotation OPTIONAL, view Cn3d-view-settings OPTIONAL } END -- docsum.asn -- ============================================ -- ::DATATOOL:: Generated from "docsum_3.0.xsd" -- ::DATATOOL:: by application DATATOOL version 1.8.6 -- ::DATATOOL:: on 05/02/2008 10:59:28 -- ============================================ -- edited with XMLSPY v5 rel. 4 U (http://www.xmlspy.com) by Michael Kholodov (National Library of Medicine) -- edited with XMLSpy v2005 rel. 3 U (http://www.altova.com) by Michael Feolo (NCBI/NLM/NIH) Docsum-3-0 DEFINITIONS ::= BEGIN Assay ::= SEQUENCE { attlist SET { handle VisibleString OPTIONAL, batch VisibleString OPTIONAL, batchId INTEGER OPTIONAL, batchType ENUMERATED { snpassay (1), validation (2), doublehit (3) } OPTIONAL, molType ENUMERATED { genomic (1), cDNA (2), mito (3), chloro (4) } OPTIONAL, sampleSize INTEGER OPTIONAL, population VisibleString OPTIONAL, linkoutUrl VisibleString OPTIONAL }, method SEQUENCE { eMethod SEQUENCE { attlist SET { name VisibleString OPTIONAL, --Submitters method identifier id VisibleString OPTIONAL --dbSNP method identifier }, exception VisibleString --description of deviation from/addition to given method } OPTIONAL }, taxonomy SEQUENCE { attlist SET { id INTEGER, --NCBI taxonomy ID for variation organism VisibleString OPTIONAL }, taxonomy NULL }, strains SEQUENCE OF VisibleString OPTIONAL, comment VisibleString OPTIONAL, citation SEQUENCE OF VisibleString OPTIONAL } --A collection of genome sequence records (curated gene regions (NG's), contigs (NWNT's) and chromosomes (NC/AC's) produced by a genome sequence project. Structure is populated from ContigInfo tables. Assembly ::= SEQUENCE { attlist SET { dbSnpBuild INTEGER, --dbSNP build number defining the rsid set aligned to this assembly genomeBuild VisibleString, --assembly build number with possible 'subbuild' version numbers to reflect updates in gene annotation (human e.g. 34_3, 35_1, 36_1) groupLabel VisibleString OPTIONAL, --High-level classification of the assembly to distinguish reference projects from alternate solutions. GroupLabel field from organism/build-specific ContigInfo tables. "reference" is occasionally used as the preferred assembly; standards will converge as additional organism genome projects are finished. Note that some organism assembly names include extended characters like '~' and '/' that may be incompatible with OS filename conventions. assemblySource VisibleString OPTIONAL, --Name of the group(s) or organization(s) that generated the assembly current BOOLEAN OPTIONAL, --Marks the current genomic assembly reference BOOLEAN OPTIONAL }, component SEQUENCE OF Component OPTIONAL, snpStat SEQUENCE { attlist SET { mapWeight ENUMERATED { unmapped (1), unique-in-contig (2), two-hits-in-contig (3), less-10-hits (4), multiple-hits (5) }, --summary measure of placement precision in the assembly chromCount INTEGER OPTIONAL, --number of distinct chromosomes in the mapset placedContigCount INTEGER OPTIONAL, --number of distinct contigs [ gi | accession[.version] ] in the mapset unplacedContigCount INTEGER OPTIONAL, --number of sequence postions to a contig with unknown chromosomal assignment seqlocCount INTEGER OPTIONAL, --total number of sequence positions in the mapset hapCount INTEGER OPTIONAL --Number of hits to alternative genomic haplotypes (e.g. HLA DR region, KIR, or pseudo-autosomal regions like PAR) within the assembly mapset. Note that positions on haplotypes defined in other assemblies (a different assembly_group_label value) will not be counted in this value. }, snpStat NULL } } --URL value from dbSNP_main.BaseURL links table. attributes provide context information and URL id that is referenced within individual refSNP objects. BaseURL ::= SEQUENCE { attlist SET { urlId INTEGER OPTIONAL, --Resource identifier from dbSNP_main.baseURL. resourceName VisibleString OPTIONAL, --Name of linked resource resourceId VisibleString OPTIONAL --identifier expected by resource for URL }, --URL value from dbSNP_main.BaseURL links table. attributes provide context information and URL id that is referenced within individual refSNP objects. baseURL VisibleString } Component ::= SEQUENCE { attlist SET { componentType ENUMERATED { contig (1), mrna (2) } OPTIONAL, --type of component: chromosome, contig, gene_region, etc. ctgId INTEGER OPTIONAL, --dbSNP contig_id used to join on contig hit / mapset data to these assembly properties accession VisibleString OPTIONAL, --Accession[.version] for the sequence component name VisibleString OPTIONAL, --contig name defined as either a submitter local id, element of a whole genome assembly set, or internal NCBI local id chromosome VisibleString OPTIONAL, --Organism appropriate chromosome tag, 'Un' reserved for default case of unplaced components start INTEGER OPTIONAL, --component starting position on the chromosome (base 0 inclusive) end INTEGER OPTIONAL, --component ending position on the chromosome (base 0 inclusive) orientation ENUMERATED { fwd (1), rev (2), unknown (3) } OPTIONAL, --orientation of this component to chromosome, forward (fwd) = 0, reverse (rev) = 1, unknown = NULL in ContigInfo.orient. gi VisibleString OPTIONAL, --NCBI gi for component sequence (equivalent to accession.version) for nucleotide sequence. groupTerm VisibleString OPTIONAL, --Identifier label for the genome assembly that defines the contigs in this mapset and their placement within the organism genome. contigLabel VisibleString OPTIONAL --Display label for component }, mapLoc SEQUENCE OF MapLoc } --Set of dbSNP refSNP docsums ExchangeSet ::= SEQUENCE { attlist SET { setType VisibleString OPTIONAL, --set-type: full dump; from query; single refSNP setDepth VisibleString OPTIONAL, --content depth: brief XML (only refSNP properties and summary subSNP element content); full XML (full refSNP, full subSNP content; all flanking sequences) specVersion VisibleString OPTIONAL, --version number of docsum.asn/docsum.dtd specification dbSnpBuild INTEGER OPTIONAL, --build number of database for this export generated VisibleString OPTIONAL --Generated date }, sourceDatabase SEQUENCE { attlist SET { taxId INTEGER, --NCBI taxonomy ID for variation organism VisibleString, --common name for species used as part of database name. dbSnpOrgAbbr VisibleString OPTIONAL, --organism abbreviation used in dbSNP. gpipeOrgAbbr VisibleString OPTIONAL --organism abbreviation used within NCBI genome pipeline data dumps. }, sourceDatabase NULL }, rs SEQUENCE OF Rs OPTIONAL, assay Assay OPTIONAL, query SEQUENCE { attlist SET { date VisibleString OPTIONAL, --yyyy-mm-dd string VisibleString OPTIONAL --Query terms or search constraints }, query NULL } OPTIONAL, summary SEQUENCE { attlist SET { numRsIds INTEGER OPTIONAL, --Total number of refsnp-ids in this exchange set totalSeqLength INTEGER OPTIONAL, --Total length of exemplar flanking sequences numContigHits INTEGER OPTIONAL, --Total number of contig locations from SNPContigLoc numGeneHits INTEGER OPTIONAL, --Total number of locus ids from SNPContigLocusId numGiHits INTEGER OPTIONAL, --Total number of gi hits from MapLink num3dStructs INTEGER OPTIONAL, --Total number of 3D structures from SNP3D numAlleleFreqs INTEGER OPTIONAL, --Total number of allele frequences from SubPopAllele numStsHits INTEGER OPTIONAL, --Total number of STS hits from SnpInSts numUnigeneCids INTEGER OPTIONAL --Total number of unigene cluster ids from UnigeneSnp }, summary NULL }, baseURL SEQUENCE OF BaseURL } --functional relationship of SNP (and possibly alleles) to genes at contig location as defined in organism-specific bxxx_SNPContigLocusId_xxx tables. FxnSet ::= SEQUENCE { attlist SET { geneId INTEGER OPTIONAL, --gene-id of gene as aligned to contig symbol VisibleString OPTIONAL, --symbol (official if present in Entrez Gene) of gene mrnaAcc VisibleString OPTIONAL, --mRNA accession if variation in transcript mrnaVer INTEGER OPTIONAL, --mRNA sequence version if variation is in transcripot protAcc VisibleString OPTIONAL, --protein accession if variation in protein protVer INTEGER OPTIONAL, --protein version if variation is in protein --variation in region of gene, but not in transcript - deprecated -- synonymous change -- nonsynonymous change - deprecated -- untranslated region - deprecated -- splice-site - deprecated -- contig reference -- deprecated -- coding: synonymy unknown -- In gene segment with null mrna and protein. ex. IGLV4-69. geneId=28784 -- within 3' 0.5kb to a gene. -- changes to STOP codon. -- alters codon to make an altered amino acid in protein product. -- indel snp causing frameshift. -- 3 prime untranslated region -- 5 prime untranslated region -- 3 prime acceptor dinucleotide -- 5 prime donor dinucleotide fxnClass ENUMERATED { locus-region (1), coding-unknown (2), coding-synonymous (3), coding-nonsynonymous (4), mrna-utr (5), intron (6), splice-site (7), reference (8), coding-exception (9), synonymy-unknown (10), gene-segment (11), near-gene-3 (12), near-gene-5 (13), nonsense (14), missense (15), frameshift (16), utr-3 (17), utr-5 (18), splice-3 (19), splice-5 (20) } OPTIONAL, readingFrame INTEGER OPTIONAL, allele VisibleString OPTIONAL, --variation allele: * suffix indicates allele of contig at this location residue VisibleString OPTIONAL, --translated amino acid residue for allele aaPosition INTEGER OPTIONAL --position of the variant residue in peptide sequence }, --functional relationship of SNP (and possibly alleles) to genes at contig location as defined in organism-specific bxxx_SNPContigLocusId_xxx tables. fxnSet NULL } --Position of a single hit of a variation on a contig MapLoc ::= SEQUENCE { attlist SET { asnFrom INTEGER, --beginning of variation as feature on contig asnTo INTEGER, --end position of variation as feature on contig --defines the seq-loc symbol if asn_from != asn_to --insertion on contig --asn-from = asn-to write as 'asn-from' --deletion on contig locType ENUMERATED { insertion (1), exact (2), deletion (3), range-ins (4), range-exact (5), range-del (6) }, alnQuality REAL OPTIONAL, --alignment qualiity orient ENUMERATED { forward (1), reverse (2) } OPTIONAL, --orientation of refSNP sequence to contig sequence physMapInt INTEGER OPTIONAL, --chromosome position as integer for sorting leftFlankNeighborPos INTEGER OPTIONAL, --nearest aligned position in 5' flanking sequence of snp rightFlankNeighborPos INTEGER OPTIONAL, --nearest aligned position in 3' flanking sequence of snp leftContigNeighborPos INTEGER OPTIONAL, --nearest aligned position in 5' contig alignment of snp rightContigNeighborPos INTEGER OPTIONAL, --nearest aligned position in 3' contig alignment of snp numberOfMismatches INTEGER OPTIONAL, --number of Mismatched positions in this alignment numberOfDeletions INTEGER OPTIONAL, --number of deletions in this alignment numberOfInsertions INTEGER OPTIONAL --number of insetions in this alignment }, fxnSet SEQUENCE OF FxnSet OPTIONAL } PrimarySequence ::= SEQUENCE { attlist SET { dbSnpBuild INTEGER, gi INTEGER, source ENUMERATED { submitter (1), blastmb (2), xm (3) } OPTIONAL, accession VisibleString OPTIONAL }, mapLoc SEQUENCE OF MapLoc } --defines the docsum structure for refSNP clusters, where a refSNP cluster (rs) is a grouping of individual dbSNP submissions that all refer to the same variation. The refsnp provides a single unified record for annotation of NCBI resources such as reference genome sequence. Rs ::= SEQUENCE { attlist SET { rsId INTEGER, --refSNP (rs) number snpClass ENUMERATED { snp (1), in-del (2), heterozygous (3), microsatellite (4), named-locus (5), no-variation (6), mixed (7), multinucleotide-polymorphism (8) }, snpType ENUMERATED { notwithdrawn (1), artifact (2), gene-duplication (3), duplicate-submission (4), notspecified (5), ambiguous-location (6), low-map-quality (7) }, molType ENUMERATED { genomic (1), cDNA (2), mito (3), chloro (4), unknown (5) }, validProbMin INTEGER OPTIONAL, --minimum reported success rate of all submissions in cluster validProbMax INTEGER OPTIONAL, --maximum reported success rate of all submissions in cluster genotype BOOLEAN OPTIONAL, --at least one genotype reported for this refSNP bitField VisibleString OPTIONAL }, het SEQUENCE { attlist SET { type ENUMERATED { est (1), obs (2) }, --Est=Estimated average het from allele frequencies, Obs=Observed from genotype data value REAL, --Heterozygosity stdError REAL OPTIONAL --Standard error of Het estimate }, het NULL } OPTIONAL, validation SEQUENCE { attlist SET { byCluster BOOLEAN OPTIONAL, --at least one subsnp in cluster has frequency data submitted byFrequency BOOLEAN OPTIONAL, --cluster has 2+ submissions, with 1+ submissions assayed with a non-computational method byOtherPop BOOLEAN OPTIONAL, by2Hit2Allele BOOLEAN OPTIONAL, --cluster has 2+ submissions, with 1+ submissions assayed with a non-computational method byHapMap BOOLEAN OPTIONAL --TBD }, otherPopBatchId SEQUENCE OF INTEGER OPTIONAL, --dbSNP batch-id's for other pop snp validation data. twoHit2AlleleBatchId SEQUENCE OF INTEGER OPTIONAL --dbSNP batch-id's for double-hit snp validation data. Use batch-id to get methods, etc. }, --date the refsnp cluster was instantiated create SEQUENCE { --date the refsnp cluster was instantiated attlist SET { build INTEGER OPTIONAL, --build number when the cluster was created date VisibleString OPTIONAL --yyyy-mm-dd }, --date the refsnp cluster was instantiated create NULL }, --date the refsnp cluster was instantiated --most recent date the cluster was updated (member added or deleted) update SEQUENCE { --most recent date the cluster was updated (member added or deleted) attlist SET { build INTEGER OPTIONAL, --build number when the cluster was updated date VisibleString OPTIONAL --yyyy-mm-dd }, --most recent date the cluster was updated (member added or deleted) update NULL } OPTIONAL, --most recent date the cluster was updated (member added or deleted) sequence SEQUENCE { attlist SET { exemplarSs INTEGER --dbSNP ss# selected as source of refSNP flanking sequence, ss# part of ss-list below }, --5' sequence that flanks the variation --5' sequence that flanks the variation seq5 VisibleString OPTIONAL, --list of all nucleotide alleles observed in ss-list members, correcting for reverse complementation of memebers reported in reverse orientation --list of all nucleotide alleles observed in ss-list members, correcting for reverse complementation of memebers reported in reverse orientation observed VisibleString, --3' sequence that flanks the variation --3' sequence that flanks the variation seq3 VisibleString OPTIONAL }, ss SEQUENCE OF Ss, assembly SEQUENCE OF Assembly OPTIONAL, primarySequence SEQUENCE OF PrimarySequence OPTIONAL, rsStruct SEQUENCE OF RsStruct OPTIONAL, rsLinkout SEQUENCE OF RsLinkout OPTIONAL, mergeHistory SEQUENCE OF SEQUENCE { attlist SET { rsId INTEGER, --previously issued rs id whose member assays have now been merged buildId INTEGER OPTIONAL, --build id when rs id was merged into parent rs orientFlip BOOLEAN OPTIONAL --TRUE if strand of rs id is reverse to parent object's current strand }, mergeHistory NULL } OPTIONAL, hgvs SEQUENCE OF VisibleString OPTIONAL -- HGVS name list } --link data for another resource RsLinkout ::= SEQUENCE { attlist SET { resourceId VisibleString, --BaseURLList.url_id linkValue VisibleString --value to append to ResourceURL.base-url for complete link }, --link data for another resource rsLinkout NULL } --structure information for SNP RsStruct ::= SEQUENCE { attlist SET { protAcc VisibleString OPTIONAL, --accession of the protein with variation protGi INTEGER OPTIONAL, --GI of the protein with variation protLoc INTEGER OPTIONAL, --position of the residue for the protein GI protResidue VisibleString OPTIONAL, --residue specified for protein at prot-loc location rsResidue VisibleString OPTIONAL, --alternative residue specified by variation sequence structGi INTEGER OPTIONAL, --GI of the structure neighbor structLoc INTEGER OPTIONAL, --position of the residue for the structure GI structResidue VisibleString OPTIONAL --residue specified for protein at struct-loc location }, --structure information for SNP rsStruct NULL } --data for an individual submission to dbSNP Ss ::= SEQUENCE { attlist SET { ssId INTEGER, --dbSNP accession number for submission handle VisibleString, --Tag for the submitting laboratory batchId INTEGER, --dbSNP number for batch submission --submission (ss#) --submitter ID locSnpId VisibleString OPTIONAL, subSnpClass ENUMERATED { snp (1), in-del (2), heterozygous (3), microsatellite (4), named-locus (5), no-variation (6), mixed (7), multinucleotide-polymorphism (8) } OPTIONAL, --SubSNP classification by type of variation --orientation of refsnp cluster members to refsnp cluster sequence --ss flanking sequence is in same orientation as seq-ss-exemplar --lanking sequence and alleles are reverse complement of refSNP as defined by ss exemplar orient ENUMERATED { forward (1), reverse (2) } OPTIONAL, strand ENUMERATED { top (1), bottom (2) } OPTIONAL, --strand is defined as TOP/BOTTOM by nature of flanking nucleotide sequence molType ENUMERATED { genomic (1), cDNA (2), mito (3), chloro (4), unknown (5) } OPTIONAL, --moltype from Batch table buildId INTEGER OPTIONAL, --dbSNP build number when ss# was added to a refSNP (rs#) cluster --class of method used to assay for the variation --Denaturing High Pressure Liquid Chromatography used to detect SNP --a hybridization method (e.g. chip) was used to assay for variation --variation was mined from sequence alignment with software --samples were sequenced and resulting alignment used to define variation methodClass ENUMERATED { dHPLC (1), hybridize (2), computed (3), sSCP (4), other (5), unknown (6), rFLP (7), sequence (8) } OPTIONAL, --subsnp has been experimentally validated by submitter --subsnp has frequency data submitted --has 2+ submissions, with 1+ submission assayed with a non-computational method validated ENUMERATED { by-submitter (1), by-frequency (2), by-cluster (3) } OPTIONAL, linkoutUrl VisibleString OPTIONAL --append loc-snp-id to this base URL to construct a pointer to submitter data. }, sequence SEQUENCE { --5' sequence that flanks the variation --5' sequence that flanks the variation seq5 VisibleString OPTIONAL, --list of all nucleotide alleles observed in ss-list members, correcting for reverse complementation of memebers reported in reverse orientation --list of all nucleotide alleles observed in ss-list members, correcting for reverse complementation of memebers reported in reverse orientation observed VisibleString, --3' sequence that flanks the variation --3' sequence that flanks the variation seq3 VisibleString OPTIONAL } } END -- entrez2.asn --$Revision: 1.12 $******************************************** -- -- entrez2.asn -- Version 1 -- -- API to Entrez Engine (1999) -- Retrieval of sequence done through ID1 module -- Also, SeqId queries -- Retrieval of PubMed records through PubMed module -- Retrieval of Structures through PubStruct module -- Retrieval of Genomes through Genomes module -- --*************************************************************** NCBI-Entrez2 DEFINITIONS ::= BEGIN --************************************** -- Entrez2 common elements --************************************** Entrez2-dt ::= INTEGER -- a date/time stamp Entrez2-db-id ::= VisibleString -- database name Entrez2-field-id ::= VisibleString -- field name Entrez2-link-id ::= VisibleString -- link name Entrez2-id-list ::= SEQUENCE { -- list of record UIDs db Entrez2-db-id , -- the database num INTEGER , -- number of uids uids OCTET STRING OPTIONAL } -- coded uids --**************************************** -- The structured form of the boolean is the same in a request or -- return so that it easy to modify a query. This means some -- fields are only considered in a return value, like counts -- by term. They are ignored in a request. -- The structured boolean supports specific boolean components, -- an unparsed string in query syntax, and UID lists as -- elements of a boolean. This makes it possible to submit -- a single string, a fully structured query, or a mixture. -- The UID list feature means one can also perform refinements -- on UID lists from links, neighbors, or other operations. -- UID list query now returns a history key for subsequent use. --***************************************** Entrez2-boolean-exp ::= SEQUENCE { db Entrez2-db-id , -- database for this query exp SEQUENCE OF Entrez2-boolean-element , -- the Boolean limits Entrez2-limits OPTIONAL } -- date bounds Entrez2-boolean-element ::= CHOICE { str VisibleString , -- unparsed query string op Entrez2-operator , -- logical operator term Entrez2-boolean-term , -- fielded term ids Entrez2-id-list , -- list of UIDs - returns history key in reply key VisibleString } -- history key for uploaded UID list or other query --***************************************** -- the term is both sent and received as parts of -- queries and replies. The attributes can be filled in -- by either, but may be ignored by one or the other. Flags are -- shown if a real value is only of use in the query (Q), only -- in the reply (R), or used in both (B) -- do-not-explode and do-not-translate are only active set by -- by the query. However, they retain those settings in the -- return value so they can be resent with a new query --****************************************** Entrez2-boolean-term ::= SEQUENCE { field Entrez2-field-id , -- B term VisibleString , -- B term-count INTEGER OPTIONAL, -- R count of records with term do-not-explode BOOLEAN DEFAULT FALSE, -- Q do not explode term do-not-translate BOOLEAN DEFAULT FALSE} -- Q do not use synonyms Entrez2-operator ::= INTEGER { and (1) , or (2) , butnot (3) , range (4) , left-paren (5) , right-paren (6) } --*************************************** -- Entrez2 Request types --*************************************** --**************************************** -- The basic request wrapper leaves space for a version which -- allow the server to support older clients -- The tool parameter allows us to log the client types for -- debugging and tuning -- The cookie is a session ID returned by the first Entrez2-reply --**************************************** Entrez2-request ::= SEQUENCE { -- a standard request request E2Request , -- the actual request version INTEGER , -- ASN1 spec version tool VisibleString OPTIONAL , -- tool making request cookie VisibleString OPTIONAL , -- history session cookie use-history BOOLEAN DEFAULT FALSE } -- request should use history E2Request ::= CHOICE { -- request types get-info NULL , -- ask for info block eval-boolean Entrez2-eval-boolean , -- Boolean lookup get-docsum Entrez2-id-list , -- get the DocSums get-term-pos Entrez2-term-query, -- get position in term list get-term-list Entrez2-term-pos , -- get Term list by position get-term-hierarchy Entrez2-hier-query, -- get a hierarchy from a term get-links Entrez2-get-links , -- get specific links from a UID list get-linked Entrez2-get-links , -- get subset of UID list which has links get-link-counts Entrez2-id } -- get all links from one UID --**************************************** -- When evaluating a boolean query the counts of hits is always -- returned. -- In addition, you can request the UIDs of the hits or the -- the parsed query in structured form (with counts by term), -- or both. --**************************************** Entrez2-eval-boolean ::= SEQUENCE { -- evaluate Boolean query return-UIDs BOOLEAN DEFAULT FALSE, -- return UID list? return-parse BOOLEAN DEFAULT FALSE, -- return parsed query? query Entrez2-boolean-exp } -- the actual query Entrez2-dt-filter ::= SEQUENCE { begin-date Entrez2-dt, end-date Entrez2-dt, type-date Entrez2-field-id } Entrez2-limits ::= SEQUENCE { -- date limits filter-date Entrez2-dt-filter OPTIONAL, max-UIDs INTEGER OPTIONAL, -- max UIDs to return in list offset-UIDs INTEGER OPTIONAL} -- start partway into UID list Entrez2-id ::= SEQUENCE { -- a single UID db Entrez2-db-id , uid INTEGER } Entrez2-term-query ::= SEQUENCE { db Entrez2-db-id , field Entrez2-field-id , term VisibleString } Entrez2-hier-query ::= SEQUENCE { db Entrez2-db-id , field Entrez2-field-id , term VisibleString OPTIONAL , -- query with either term txid INTEGER OPTIONAL } -- or Taxonomy ID Entrez2-term-pos ::= SEQUENCE { -- request portions of term list db Entrez2-db-id , field Entrez2-field-id , first-term-pos INTEGER , number-of-terms INTEGER OPTIONAL } -- optional for hierarchy only Entrez2-get-links ::= SEQUENCE { -- request links of one type uids Entrez2-id-list , -- docs to link from linktype Entrez2-link-id , -- type of link max-UIDS INTEGER OPTIONAL , -- maximum number of links to return count-only BOOLEAN OPTIONAL , -- return only the counts parents-persist BOOLEAN OPTIONAL } -- allow original uids in list --********************************************************** -- Replies from the Entrez server -- all replies contain the date/time stamp when they were executed -- to do reqular date bounded searches use this value+1 to search -- again later instead of recording the date/time on the client machine -- the cookie allows a simple key string to represent UID lists in the history --********************************************************** Entrez2-reply ::= SEQUENCE { reply E2Reply , -- the actual reply dt Entrez2-dt , -- date/time stamp from server server VisibleString , -- server version info msg VisibleString OPTIONAL , -- possibly a message to the user key VisibleString OPTIONAL , -- history key for query cookie VisibleString OPTIONAL } -- history session cookie E2Reply ::= CHOICE { error VisibleString , -- if nothing can be returned get-info Entrez2-info , -- the database info eval-boolean Entrez2-boolean-reply, -- result of boolean query get-docsum Entrez2-docsum-list, get-term-pos INTEGER, -- position of the term get-term-list Entrez2-term-list, get-term-hierarchy Entrez2-hier-node, get-links Entrez2-link-set, get-linked Entrez2-id-list, get-link-counts Entrez2-link-count-list } Entrez2-info ::= SEQUENCE { -- describes all the databases db-count INTEGER , -- number of databases build-date Entrez2-dt , -- build date of databases db-info SEQUENCE OF Entrez2-db-info } -- info by database Entrez2-db-info ::= SEQUENCE { -- info for one database db-name Entrez2-db-id , -- internal name db-menu VisibleString , -- short name for menu db-descr VisibleString , -- longer explanatory name doc-count INTEGER , -- total number of records field-count INTEGER , -- number of field types fields SEQUENCE OF Entrez2-field-info, link-count INTEGER , -- number of link types links SEQUENCE OF Entrez2-link-info, docsum-field-count INTEGER, docsum-fields SEQUENCE OF Entrez2-docsum-field-info } Entrez2-field-info ::= SEQUENCE { -- info about one field field-name Entrez2-field-id , -- the internal name field-menu VisibleString , -- short string suitable for menu field-descr VisibleString , -- longer, explanatory name term-count INTEGER , -- number of terms in field is-date BOOLEAN OPTIONAL , is-numerical BOOLEAN OPTIONAL , single-token BOOLEAN OPTIONAL , hierarchy-avail BOOLEAN OPTIONAL , is-rangable BOOLEAN OPTIONAL , is-truncatable BOOLEAN OPTIONAL } Entrez2-link-info ::= SEQUENCE { -- info about one link link-name Entrez2-link-id , link-menu VisibleString , link-descr VisibleString , db-to Entrez2-db-id , -- database it links to data-size INTEGER OPTIONAL } -- size of link data element Entrez2-docsum-field-type ::= INTEGER { string (1) , int (2) , float (3) , date-pubmed (4) } Entrez2-docsum-field-info ::= SEQUENCE { field-name VisibleString, field-description VisibleString, field-type Entrez2-docsum-field-type } Entrez2-boolean-reply ::= SEQUENCE { count INTEGER , -- records hit uids Entrez2-id-list OPTIONAL, -- if uids requested query Entrez2-boolean-exp OPTIONAL } -- if parsed query requested Entrez2-docsum-list ::= SEQUENCE { count INTEGER , -- number of docsums list SEQUENCE OF Entrez2-docsum } Entrez2-docsum ::= SEQUENCE { uid INTEGER , -- primary uid (gi, pubmedid) docsum-data SEQUENCE OF Entrez2-docsum-data } Entrez2-docsum-data::= SEQUENCE { field-name VisibleString, field-value VisibleString } Entrez2-term-list ::= SEQUENCE { pos INTEGER, -- position of first term in list num INTEGER, -- number of terms in list list SEQUENCE OF Entrez2-term } Entrez2-term ::= SEQUENCE { term VisibleString , txid INTEGER OPTIONAL, count INTEGER , -- count of records with this term is-leaf-node BOOLEAN OPTIONAL } -- used for hierarchy only Entrez2-hier-node ::= SEQUENCE { -- for hierarchical index cannonical-form VisibleString , -- the official name lineage-count INTEGER , -- number of strings in lineage lineage SEQUENCE OF Entrez2-term OPTIONAL , -- strings up the lineage child-count INTEGER , -- number of children of this node children SEQUENCE OF Entrez2-term , -- the children is-ambiguous BOOLEAN OPTIONAL } -- used for hierarchy only --******************************************* -- Links are returned in sets also using OCTET STRINGS --******************************************* Entrez2-link-set ::= SEQUENCE { -- set of links ids Entrez2-id-list , data-size INTEGER OPTIONAL , -- size of data elements data OCTET STRING OPTIONAL } -- coded scores Entrez2-link-count-list ::= SEQUENCE { -- all links from 1 uid link-type-count INTEGER , -- number of types of links links SEQUENCE OF Entrez2-link-count } Entrez2-link-count ::= SEQUENCE { -- link count of one type link-type Entrez2-link-id , link-count INTEGER } END -- entrezgene.asn --$Revision: 142744 $ --********************************************************************** -- -- NCBI Entrezgene -- by James Ostell, 2001 -- -- Generic "Gene" object for Entrez Genes -- This object is designed to incorporate a subset of information from -- LocusLink and from records in Entrez Genomes to provide indexing, -- linkage, and a useful summary report in Entrez for "Genes" -- --********************************************************************** NCBI-Entrezgene DEFINITIONS ::= BEGIN EXPORTS Entrezgene, Entrezgene-Set, Gene-track; IMPORTS Gene-ref FROM NCBI-Gene Prot-ref FROM NCBI-Protein BioSource FROM NCBI-BioSource RNA-ref FROM NCBI-RNA Dbtag, Date FROM NCBI-General Seq-loc FROM NCBI-Seqloc Pub FROM NCBI-Pub; --******************************************** -- Entrezgene is the "document" indexed in Entrez -- and presented in the full display -- It also contains the Entrez ID and date information --******************************************* Entrezgene ::= SEQUENCE { track-info Gene-track OPTIONAL , -- not in submission, but in retrieval type INTEGER { -- type of Gene unknown (0) , tRNA (1) , rRNA (2) , snRNA (3) , scRNA (4) , snoRNA (5) , protein-coding (6) , pseudo (7) , transposon (8) , miscRNA (9) , ncRNA (10) , other (255) } , source BioSource , gene Gene-ref , -- for locus-tag see note 3 prot Prot-ref OPTIONAL , rna RNA-ref OPTIONAL , summary VisibleString OPTIONAL , -- short summary location SEQUENCE OF Maps OPTIONAL, gene-source Gene-source OPTIONAL , -- NCBI source to Entrez locus SEQUENCE OF Gene-commentary OPTIONAL , -- location of gene on chromosome (if known) -- and all information about products -- (mRNA, proteins and so on) properties SEQUENCE OF Gene-commentary OPTIONAL , refgene SEQUENCE OF Gene-commentary OPTIONAL , -- NG for this? homology SEQUENCE OF Gene-commentary OPTIONAL , comments SEQUENCE OF Gene-commentary OPTIONAL , unique-keys SEQUENCE OF Dbtag OPTIONAL , -- see note 3 xtra-index-terms SEQUENCE OF VisibleString OPTIONAL , -- see note 2 xtra-properties SEQUENCE OF Xtra-Terms OPTIONAL , -- see note 2 xtra-iq SEQUENCE OF Xtra-Terms OPTIONAL, -- see note 2 non-unique-keys SEQUENCE OF Dbtag OPTIONAL } Entrezgene-Set ::= SET OF Entrezgene Gene-track ::= SEQUENCE { geneid INTEGER , -- required unique document id status INTEGER { live (0) , secondary (1) , -- synonym with merged discontinued (2), -- 'deleted', still index and display to public newentry (3) -- for GeneRif submission } DEFAULT live , current-id SEQUENCE OF Dbtag OPTIONAL , -- see note 1 below create-date Date , -- date created in Entrez update-date Date , -- last date updated in Entrez discontinue-date Date OPTIONAL } -- Gene-source ::= SEQUENCE { src VisibleString , -- key to the source within NCBI locuslink, Ecoli, etc src-int INTEGER OPTIONAL , -- eg. locuslink id src-str1 VisibleString OPTIONAL , -- eg. chromosome1 src-str2 VisibleString OPTIONAL , -- see note 3 gene-display BOOLEAN DEFAULT FALSE , -- do we have a URL for gene display? locus-display BOOLEAN DEFAULT FALSE , -- do we have a URL for map/locus display? extra-terms BOOLEAN DEFAULT FALSE } -- do we have a URL for extra indexing terms? Gene-commentary ::= SEQUENCE { type INTEGER { -- type of Gene Commentary genomic (1) , pre-RNA (2) , mRNA (3) , rRNA (4) , tRNA (5) , snRNA (6) , scRNA (7) , peptide (8) , other-genetic (9) , genomic-mRNA (10) , cRNA (11) , mature-peptide (12) , pre-protein (13) , miscRNA (14) , snoRNA (15) , property (16) , -- used to display tag/value pair -- for this type label is used as property tag, text is used as property value, -- other fields are not used. reference (17), -- currently not used generif (18), -- to include generif in the main blob phenotype(19), -- to display phenotype information complex (20), -- used (but not limited) to identify resulting -- interaction complexes compound (21), -- pubchem entities ncRNA (22), gene-group (23), -- for relationship sets (such as pseudogene / parent gene) comment (254) , other (255) } , heading VisibleString OPTIONAL , -- appears above text label VisibleString OPTIONAL , -- occurs to left of text -- for protein and RNA types it is a name -- for property type it is a property tag text VisibleString OPTIONAL , -- block of text -- for property type it is a property value accession VisibleString OPTIONAL , -- accession for the gi in the seqloc, see note 3 version INTEGER OPTIONAL , -- version for the accession above xtra-properties SEQUENCE OF Xtra-Terms OPTIONAL , -- see note 2 refs SEQUENCE OF Pub OPTIONAL , -- refs for this source SEQUENCE OF Other-source OPTIONAL , -- links and refs genomic-coords SEQUENCE OF Seq-loc OPTIONAL , -- referenced sequences in genomic coords seqs SEQUENCE OF Seq-loc OPTIONAL , -- referenced sequences in non-genomic coords products SEQUENCE OF Gene-commentary OPTIONAL , properties SEQUENCE OF Gene-commentary OPTIONAL , comment SEQUENCE OF Gene-commentary OPTIONAL , create-date Date OPTIONAL , update-date Date OPTIONAL } Other-source ::= SEQUENCE { src Dbtag OPTIONAL , -- key to non-ncbi source pre-text VisibleString OPTIONAL , -- text before anchor anchor VisibleString OPTIONAL , -- text to show as highlight url VisibleString OPTIONAL , -- if present, use this URL not Dbtag and datbase post-text VisibleString OPTIONAL } -- text after anchor Maps::= SEQUENCE { display-str VisibleString , method CHOICE { proxy VisibleString , --url to non mapviewer mapviewing resource map-type ENUMERATED { -- units used in display-str to query mapviewer cyto (0) , bp (1) , cM (2) , cR (3) , min (4)}}} Xtra-Terms ::= SEQUENCE { -- see note 2 tag VisibleString , value VisibleString } END --********************************************************************** -- -- Comments, notes, etc. -- -- 1) Ignored unless status = secondary. This is where gene_ids (db = "GeneID") -- are placed toward which the interface will direct users. It is also -- available for placing other source-db specific tags (i.e., db = "LocusID"). -- -- 2) These 'xtra' objects are for submitting data for Entrez indexing -- that might not fit anywhere in the Entrezgene specification but -- are considered by the data source submittor to be important. -- xtra-index-terms is any string. -- xtra-properties are tag/value pairs of properties/feilds as -- defined in the Entrez database (i.e.: UNIGENE/Hs.74561) -- xtra-iq are tag/value pairs of Entrez database/UID as defined -- in the Entrezgene indexing code (i.e.: NUCLEOTIDE/20270626) -- -- 3) Locus-tag and src-str2 are expected to be unique per organism (tax_id). -- Protein accessions and the tag-value pairs in unique-keys -- are expected to be unique over all organisms. --********************************************************************** -- featdef.asn --$Revision: 6.0 $ --********************************************************************** -- -- NCBI Sequence Feature Definition Module -- by James Ostell, 1994 -- --********************************************************************** NCBI-FeatDef DEFINITIONS ::= BEGIN EXPORTS FeatDef, FeatDefSet, FeatDispGroup, FeatDispGroupSet; FeatDef ::= SEQUENCE { typelabel VisibleString , -- short label for type eg "CDS" menulabel VisibleString , -- label for a menu eg "Coding Region" featdef-key INTEGER , -- unique for this feature definition seqfeat-key INTEGER , -- SeqFeat.data.choice from objfeat.h entrygroup INTEGER , -- Group for data entry displaygroup INTEGER , -- Group for data display molgroup FeatMolType -- Type of Molecule used for } FeatMolType ::= ENUMERATED { aa (1), -- proteins na (2), -- nucleic acids both (3) } -- both FeatDefSet ::= SEQUENCE OF FeatDef -- collections of defintions FeatDispGroup ::= SEQUENCE { groupkey INTEGER , groupname VisibleString } FeatDispGroupSet ::= SEQUENCE OF FeatDispGroup FeatDefGroupSet ::= SEQUENCE { groups FeatDispGroupSet , defs FeatDefSet } END -- gbseq.asn --$Revision: 149842 $ --********************************************************* -- -- ASN.1 and XML for the components of a GenBank format sequence -- J.Ostell 2002 -- Updated 15 January 2009 -- --********************************************************* NCBI-GBSeq DEFINITIONS ::= BEGIN --******** -- GBSeq represents the elements in a GenBank style report -- of a sequence with some small additions to structure and support -- for protein (GenPept) versions of GenBank format as seen in -- Entrez. While this represents the simplification, reduction of -- detail, and flattening to a single sequence perspective of GenBank -- format (compared with the full ASN.1 or XML from which GenBank and -- this format is derived at NCBI), it is presented in ASN.1 or XML for -- automated parsing and processing. It is hoped that this compromise -- will be useful for those bulk processing at the GenBank format level -- of detail today. Since it is a compromise, a number of pragmatic -- decisions have been made. -- -- In pursuit of simplicity and familiarity a number of -- fields do not have full substructure defined here where there is -- already a standard GenBank format string. For example: -- -- Date DD-Mon-YYYY -- Authors LastName, Intials (with periods) -- Journal JounalName Volume (issue), page-range (year) -- FeatureLocations as per GenBank feature table, but FeatureIntervals -- may also be provided as a convenience -- FeatureQualifiers as per GenBank feature table -- Primary has a string that represents a table to construct -- a third party (TPA) sequence. -- other-seqids can have strings with the "vertical bar format" sequence -- identifiers used in BLAST for example, when they are non-genbank types. -- Currently in GenBank format you only see GI, but there are others, like -- patents, submitter clone names, etc which will appear here, as they -- always have in the ASN.1 format, and full XML format. -- source-db is a formatted text block for peptides in GenPept format that -- carries information from the source protein database. -- -- There are also a number of elements that could have been -- more exactly specified, but in the interest of simplicity -- have been simply left as options. For example.. -- -- accession and accession.version will always appear in a GenBank record -- they are optional because this format can also be used for non-GenBank -- sequences, and in that case will have only "other-seqids". -- -- sequences will normally all have "sequence" filled in. But contig records -- will have a "join" statement in the "contig" slot, and no "sequence". -- We also may consider a retrieval option with no sequence of any kind -- and no feature table to quickly check minimal values. -- -- a reference may have an author list, or be from a consortium, or both. -- -- some fields, such as taxonomy, do appear as separate elements in GenBank -- format but without a specific linetype (in GenBank format this comes -- under ORGANISM). Another example is the separation of primary accession -- from the list of secondary accessions. In GenBank format primary -- accession is just the first one on the list that includes all secondaries -- after it. -- -- create-date deserves special comment. The date you see on the right hand -- side of the LOCUS line in GenBank format is actually the last date the -- the record was modified (or the update-date). The date the record was -- first submitted to GenBank appears in the first submission citation in -- the reference section. Internally in the databases and ASN.1 NCBI keeps -- the first date the record was released into the sequence database at -- NCBI as create-date. For records from EMBL, which supports create-date, -- it is the date provided by EMBL. For DDBJ records, which do not supply -- a create-date (same as GenBank format) the create-date is the first date -- NCBI saw the record from DDBJ. For older GenBank records, before NCBI -- took responsibility for GenBank, it is just the first date NCBI saw the -- record. Create-date can be very useful, so we expose it here, but users -- must understand it is only an approximation and comes from many sources, -- and with many exceptions and caveats. It does NOT tell you the first -- date the public might have seen this record and thus is NOT an accurate -- measure for legal issues of precedence. -- --******** GBSet ::= SEQUENCE OF GBSeq GBSeq ::= SEQUENCE { locus VisibleString , length INTEGER , strandedness VisibleString OPTIONAL , moltype VisibleString , topology VisibleString OPTIONAL , division VisibleString , update-date VisibleString , create-date VisibleString OPTIONAL , update-release VisibleString OPTIONAL , create-release VisibleString OPTIONAL , definition VisibleString , primary-accession VisibleString OPTIONAL , entry-version VisibleString OPTIONAL , accession-version VisibleString OPTIONAL , other-seqids SEQUENCE OF GBSeqid OPTIONAL , secondary-accessions SEQUENCE OF GBSecondary-accn OPTIONAL, project VisibleString OPTIONAL , keywords SEQUENCE OF GBKeyword OPTIONAL , segment VisibleString OPTIONAL , source VisibleString OPTIONAL , organism VisibleString OPTIONAL , taxonomy VisibleString OPTIONAL , references SEQUENCE OF GBReference OPTIONAL , comment VisibleString OPTIONAL , tagset GBTagset OPTIONAL , primary VisibleString OPTIONAL , source-db VisibleString OPTIONAL , database-reference VisibleString OPTIONAL , feature-table SEQUENCE OF GBFeature OPTIONAL , sequence VisibleString OPTIONAL , -- Optional for other dump forms contig VisibleString OPTIONAL } GBSecondary-accn ::= VisibleString GBSeqid ::= VisibleString GBKeyword ::= VisibleString GBAuthor ::= VisibleString GBReference ::= SEQUENCE { reference VisibleString , position VisibleString OPTIONAL , authors SEQUENCE OF GBAuthor OPTIONAL , consortium VisibleString OPTIONAL , title VisibleString OPTIONAL , journal VisibleString , xref SET OF GBXref OPTIONAL , pubmed INTEGER OPTIONAL , remark VisibleString OPTIONAL } GBXref ::= SEQUENCE { dbname VisibleString , id VisibleString } GBTagset ::= SEQUENCE { authority VisibleString OPTIONAL , version VisibleString OPTIONAL , url VisibleString OPTIONAL , tags GBTags OPTIONAL } GBTags ::= SEQUENCE OF GBTag GBTag ::= SEQUENCE { name VisibleString OPTIONAL , value VisibleString OPTIONAL , unit VisibleString OPTIONAL } GBFeature ::= SEQUENCE { key VisibleString , location VisibleString , intervals SEQUENCE OF GBInterval OPTIONAL , operator VisibleString OPTIONAL , partial5 BOOLEAN OPTIONAL , partial3 BOOLEAN OPTIONAL , quals SEQUENCE OF GBQualifier OPTIONAL } GBInterval ::= SEQUENCE { from INTEGER OPTIONAL , to INTEGER OPTIONAL , point INTEGER OPTIONAL , iscomp BOOLEAN OPTIONAL , interbp BOOLEAN OPTIONAL , accession VisibleString } GBQualifier ::= SEQUENCE { name VisibleString , value VisibleString OPTIONAL } GBTagsetRules ::= SEQUENCE { authority VisibleString OPTIONAL , version VisibleString OPTIONAL , mandatorytags GBTagNames OPTIONAL , optionaltags GBTagNames OPTIONAL , uniquetags GBTagNames OPTIONAL , extensible BOOLEAN OPTIONAL } GBTagNames ::= SEQUENCE OF VisibleString GBTagsetRuleSet ::= SEQUENCE OF GBTagsetRules END -- general.asn --$Revision: 99135 $ --********************************************************************** -- -- NCBI General Data elements -- by James Ostell, 1990 -- Version 3.0 - June 1994 -- --********************************************************************** NCBI-General DEFINITIONS ::= BEGIN EXPORTS Date, Person-id, Object-id, Dbtag, Int-fuzz, User-object, User-field; -- StringStore is really a VisibleString. It is used to define very -- long strings which may need to be stored by the receiving program -- in special structures, such as a ByteStore, but it's just a hint. -- AsnTool stores StringStores in ByteStore structures. -- OCTET STRINGs are also stored in ByteStores by AsnTool -- -- typedef struct bsunit { /* for building multiline strings */ -- Nlm_Handle str; /* the string piece */ -- Nlm_Int2 len_avail, -- len; -- struct bsunit PNTR next; } /* the next one */ -- Nlm_BSUnit, PNTR Nlm_BSUnitPtr; -- -- typedef struct bytestore { -- Nlm_Int4 seekptr, /* current position */ -- totlen, /* total stored data length in bytes */ -- chain_offset; /* offset in ByteStore of first byte in curchain */ -- Nlm_BSUnitPtr chain, /* chain of elements */ -- curchain; /* the BSUnit containing seekptr */ -- } Nlm_ByteStore, PNTR Nlm_ByteStorePtr; -- -- AsnTool incorporates this as a primitive type, so the definition -- is here just for completeness -- -- StringStore ::= [APPLICATION 1] IMPLICIT OCTET STRING -- -- BigInt is really an INTEGER. It is used to warn the receiving code to expect -- a value bigger than Int4 (actually Int8). It will be stored in DataVal.bigintvalue -- -- Like StringStore, AsnTool incorporates it as a primitive. The definition would be: -- BigInt ::= [APPLICATION 2] IMPLICIT INTEGER -- -- Date is used to replace the (overly complex) UTCTtime, GeneralizedTime -- of ASN.1 -- It stores only a date -- Date ::= CHOICE { str VisibleString , -- for those unparsed dates std Date-std } -- use this if you can Date-std ::= SEQUENCE { -- NOTE: this is NOT a unix tm struct year INTEGER , -- full year (including 1900) month INTEGER OPTIONAL , -- month (1-12) day INTEGER OPTIONAL , -- day of month (1-31) season VisibleString OPTIONAL , -- for "spring", "may-june", etc hour INTEGER OPTIONAL , -- hour of day (0-23) minute INTEGER OPTIONAL , -- minute of hour (0-59) second INTEGER OPTIONAL } -- second of minute (0-59) -- Dbtag is generalized for tagging -- eg. { "Social Security", str "023-79-8841" } -- or { "member", id 8882224 } Dbtag ::= SEQUENCE { db VisibleString , -- name of database or system tag Object-id } -- appropriate tag -- Object-id can tag or name anything -- Object-id ::= CHOICE { id INTEGER , str VisibleString } -- Person-id is to define a std element for people -- Person-id ::= CHOICE { dbtag Dbtag , -- any defined database tag name Name-std , -- structured name ml VisibleString , -- MEDLINE name (semi-structured) -- eg. "Jones RM" str VisibleString, -- unstructured name consortium VisibleString } -- consortium name Name-std ::= SEQUENCE { -- Structured names last VisibleString , first VisibleString OPTIONAL , middle VisibleString OPTIONAL , full VisibleString OPTIONAL , -- full name eg. "J. John Smith, Esq" initials VisibleString OPTIONAL, -- first + middle initials suffix VisibleString OPTIONAL , -- Jr, Sr, III title VisibleString OPTIONAL } -- Dr., Sister, etc --**** Int-fuzz ********************************************** --* --* uncertainties in integer values Int-fuzz ::= CHOICE { p-m INTEGER , -- plus or minus fixed amount range SEQUENCE { -- max to min max INTEGER , min INTEGER } , pct INTEGER , -- % plus or minus (x10) 0-1000 lim ENUMERATED { -- some limit value unk (0) , -- unknown gt (1) , -- greater than lt (2) , -- less than tr (3) , -- space to right of position tl (4) , -- space to left of position circle (5) , -- artificial break at origin of circle other (255) } , -- something else alt SET OF INTEGER } -- set of alternatives for the integer --**** User-object ********************************************** --* --* a general object for a user defined structured data item --* used by Seq-feat and Seq-descr User-object ::= SEQUENCE { class VisibleString OPTIONAL , -- endeavor which designed this object type Object-id , -- type of object within class data SEQUENCE OF User-field } -- the object itself User-field ::= SEQUENCE { label Object-id , -- field label num INTEGER OPTIONAL , -- required for strs, ints, reals, oss data CHOICE { -- field contents str VisibleString , int INTEGER , real REAL , bool BOOLEAN , os OCTET STRING , object User-object , -- for using other definitions strs SEQUENCE OF VisibleString , ints SEQUENCE OF INTEGER , reals SEQUENCE OF REAL , oss SEQUENCE OF OCTET STRING , fields SEQUENCE OF User-field , objects SEQUENCE OF User-object } } END -- homologene.asn HomoloGene DEFINITIONS ::= BEGIN IMPORTS Date FROM NCBI-General Seq-loc FROM NCBI-Seqloc Seq-align FROM NCBI-Seqalign; -- HomoloGeneEntry taxid is the tax id of the group node, which can -- be the same as the Gene tax id in case of singletons HG-EntrySet ::= SEQUENCE { entries SET OF HG-Entry -- homologene entry } HG-Entry ::= SEQUENCE { hg-id INTEGER, version INTEGER OPTIONAL, title VisibleString OPTIONAL, caption VisibleString OPTIONAL, taxid INTEGER OPTIONAL, genes SET OF HG-Gene OPTIONAL, cr-date Date OPTIONAL, up-date Date OPTIONAL, distances SET OF HG-Stats OPTIONAL, commentaries SET OF HG-CommentarySet OPTIONAL, warnings SET OF VisibleString OPTIONAL, node HG-Node OPTIONAL } HG-Gene ::= SEQUENCE { geneid INTEGER, otherid INTEGER OPTIONAL, -- internal use only!!!!! symbol VisibleString OPTIONAL, aliases SET OF VisibleString OPTIONAL, title VisibleString, taxid INTEGER, --taxid of gene node prot-gi INTEGER OPTIONAL, prot-acc VisibleString OPTIONAL, prot-len INTEGER OPTIONAL, nuc-gi INTEGER OPTIONAL, nuc-acc VisibleString OPTIONAL, gene-links SET OF HG-Link OPTIONAL, prot-links SET OF HG-Link OPTIONAL, domains SET OF HG-Domain OPTIONAL, chr VisibleString OPTIONAL, location Seq-loc OPTIONAL, -- location on the genome locus-tag VisibleString OPTIONAL } HG-Stats ::= SEQUENCE { gi1 INTEGER, gi2 INTEGER, nuc-change REAL, nuc-change-jc REAL, prot-change REAL, ka REAL, ks REAL, knr REAL, knc REAL, recip-best BOOLEAN OPTIONAL } HG-Commentary ::= SEQUENCE { link HG-Link, description VisibleString OPTIONAL, -- main description caption VisibleString OPTIONAL, -- extra text provider VisibleString OPTIONAL, other-links SET OF HG-Link OPTIONAL, other-commentaries SET OF HG-Commentary OPTIONAL, taxid INTEGER OPTIONAL, geneid INTEGER OPTIONAL } HG-CommentarySet ::= SEQUENCE { hg-id INTEGER OPTIONAL, title VisibleString, commentaries SET OF HG-Commentary } HG-CommentaryContainer ::= SET OF HG-CommentarySet HG-Link ::= SEQUENCE { hypertext VisibleString, url VisibleString OPTIONAL } HG-Domain ::= SEQUENCE { begin INTEGER, end INTEGER, pssm-id INTEGER OPTIONAL, -- entrez uid cdd-id VisibleString OPTIONAL, cdd-name VisibleString OPTIONAL } HG-Node ::= SEQUENCE { type ENUMERATED { family(0), ortholog(1), paralog(2), leaf(3) }, id HG-Node-id, caption VisibleString OPTIONAL, current-node BOOLEAN DEFAULT FALSE, children SET OF HG-Node OPTIONAL, branch-len INTEGER OPTIONAL } HG-Node-id ::= SEQUENCE { id INTEGER OPTIONAL, id-type ENUMERATED { none(0), geneid(1), hid(2) } } HG-Alignment ::= SEQUENCE { hg-id INTEGER, alignment Seq-align } HG-AlignmentSet ::= SET OF HG-Alignment END -- id1.asn --$Revision: 1.12 $ --******************************************************************** -- -- Network Id server network access -- Yaschenko 1996 -- -- --********************************************************************* -- -- ID1.asn -- -- messages for id server network access -- --********************************************************************* NCBI-ID1Access DEFINITIONS ::= BEGIN IMPORTS Seq-id FROM NCBI-Seqloc Seq-entry FROM NCBI-Seqset Seq-hist FROM NCBI-Sequence; --********************************** -- requests -- ID1server-request ::= CHOICE { init NULL , -- DlInit getgi Seq-id , -- get a gi given a Seq-id getsefromgi ID1server-maxcomplex , -- given a gi, get the Seq-entry fini NULL, -- DlFini getseqidsfromgi INTEGER, --get all Seq-ids of given gi getgihist INTEGER, --get an historical list of gis getgirev INTEGER, --get a revision history of gi getgistate INTEGER, --get a state of gi getsewithinfo ID1server-maxcomplex, getblobinfo ID1server-maxcomplex } -- Complexity stuff will be for ID1 ID1server-maxcomplex ::= SEQUENCE { maxplex Entry-complexities , gi INTEGER , ent INTEGER OPTIONAL, -- needed when you want to retrieve a given ent sat VisibleString OPTIONAL -- satellite 0-id,1-dbEST } Entry-complexities ::= INTEGER { entry (0) , -- the "natural" entry for this (nuc-prot) bioseq (1) , -- only the bioseq identified bioseq-set (2) , -- any seg-set it may be part of nuc-prot (3) , -- any nuc-prot it may be part of pub-set (4) } ID1Seq-hist ::= SEQUENCE { hist Seq-hist } ID1server-back ::= CHOICE { init NULL , -- DlInit error INTEGER , gotgi INTEGER , gotseqentry Seq-entry, -- live gotdeadseqentry Seq-entry, -- dead fini NULL, -- DlFini gistate INTEGER, ids SET OF Seq-id, gihist SET OF ID1Seq-hist, -- because hand crafted Seq-hist does not follow -- same conventions girevhist SET OF ID1Seq-hist, gotsewithinfo ID1SeqEntry-info, gotblobinfo ID1blob-info } ID1server-debug ::= SET OF ID1server-back ID1blob-info ::= SEQUENCE { gi INTEGER , sat INTEGER, sat-key INTEGER, satname VisibleString, suppress INTEGER, withdrawn INTEGER, confidential INTEGER, -- blob-state now contains blob version info. -- it's actually minutes from 01/01/1970 -- and it's negative if blob is dead. blob-state INTEGER, comment VisibleString OPTIONAL, -- public comment for withdrawn record extfeatmask INTEGER OPTIONAL -- mask for external features (SNP,...) } ID1SeqEntry-info ::= SEQUENCE { blob-info ID1blob-info, blob Seq-entry OPTIONAL } END -- id2.asn --$Revision: 112545 $ --******************************************************************** -- -- Network Id server network access -- Vasilchenko 2003 -- -- --********************************************************************* -- -- ID2.asn -- -- messages for id server network access -- --********************************************************************* NCBI-ID2Access DEFINITIONS ::= BEGIN IMPORTS Seq-id, Seq-loc FROM NCBI-Seqloc ID2S-Chunk-Id, ID2S-Seq-annot-Info FROM NCBI-Seq-split; --********************************************************************* -- request types --********************************************************************* -- Requests are sent in packets to allow sending several requests at once -- to avoid network latency, without possiblity of deadlock with server. -- Server will not start sending replies until it will read the whole packet. ID2-Request-Packet ::= SEQUENCE OF ID2-Request ID2-Request ::= SEQUENCE { -- request's serial number, can be used in asynchronic clients -- server should copy it to corresponding field in reply serial-number INTEGER OPTIONAL, params ID2-Params OPTIONAL, request CHOICE { init NULL, get-packages ID2-Request-Get-Packages, get-seq-id ID2-Request-Get-Seq-id, get-blob-id ID2-Request-Get-Blob-Id, get-blob-info ID2-Request-Get-Blob-Info, reget-blob ID2-Request-ReGet-Blob, get-chunks ID2S-Request-Get-Chunks } } -- Request for set of params packages know by server. -- Packages can be used to abbreviate parameters of request. ID2-Request-Get-Packages ::= SEQUENCE { -- return known packages from this list -- if unset - return all known packages names SEQUENCE OF VisibleString OPTIONAL, -- return packages' names only no-contents NULL OPTIONAL } -- Requested sequence ID, can be any string or Seq-id. -- This request will be replied with one or more ID2-Reply-Get-Seq-id. ID2-Request-Get-Seq-id ::= SEQUENCE { seq-id ID2-Seq-id, seq-id-type INTEGER { any (0), -- return any qualified Seq-id gi (1), -- gi is preferred text (2), -- text Seq-id (accession etc) is preferred general (4), -- general Seq-id is preferred all (127), -- return all qualified Seq-ids of the sequence label (128) -- return a sequence string label as general id } DEFAULT any } ID2-Seq-id ::= CHOICE { string VisibleString, seq-id Seq-id } -- Return blob-id with specified seq-id. -- This request with be replied with one or more ID2-Reply-Get-Blob-Id. ID2-Request-Get-Blob-Id ::= SEQUENCE { -- id can be supplied by inner request seq-id ID2-Request-Get-Seq-id, -- return id of blob with sequence sources SEQUENCE OF VisibleString OPTIONAL, -- return Blob-Ids with external features on this Seq-id external NULL OPTIONAL } -- Return some information related to the blob. -- This request with be replied with one or more of: -- ID2-Reply-Get-Blob-Seq-ids - if requested by get-seq-ids field -- ID2-Reply-Get-Blob - if requested by get-data field -- ID2S-Reply-Get-Split-Info -- ID2S-Reply-Get-Chunk -- Last two can be sent in addition to ID2-Reply-Get-Blob -- if the blob is split on the server. -- The replies are made separate to allow server to create replies easier -- from precalculated data. Each of these replies have ID2-Reply-Data field. ID2-Request-Get-Blob-Info ::= SEQUENCE { -- id can be supplied by inner request blob-id CHOICE { -- id can be supplied by inner request blob-id ID2-Blob-Id, -- generate blob-ids from request resolve SEQUENCE { request ID2-Request-Get-Blob-Id, -- server will not send blobs listed here exclude-blobs SEQUENCE OF ID2-Blob-Id OPTIONAL } }, -- return in addition list of Seq-ids also resolving to this blob get-seq-ids NULL OPTIONAL, -- level of details requested immediately -- server will send relevant chunks if blob is splitted get-data ID2-Get-Blob-Details OPTIONAL } -- This is similar to FTP reget command. -- It may be unsupported by server. -- It's defined only for plain blobs (returned in ID2-Reply-Get-Blob) -- as all split data comes in small chunks, so reget doesn't make sense. ID2-Request-ReGet-Blob ::= SEQUENCE { blob-id ID2-Blob-Id, -- blob split version to resend split-version INTEGER, -- start offset of data to get offset INTEGER } -- Request for specific chunks. -- Server will reply with one or more ID2S-Reply-Get-Chunk. ID2S-Request-Get-Chunks ::= SEQUENCE { blob-id ID2-Blob-Id, -- requests for specific chunks of splitted blob chunks SEQUENCE OF ID2S-Chunk-Id, -- blob split version split-version INTEGER OPTIONAL } -- The following structure describes what parts of blob are required -- immediately after ID2-Request-Get-Blob-Info in case blob is split. -- Seq-entry level will have probably the same values as Entry-complexities. ID2-Get-Blob-Details ::= SEQUENCE { -- reference location for details - can be only part of sequence location Seq-loc OPTIONAL, -- Seq-entry level for all data except descriptors (sequnence, annots) seq-class-level INTEGER DEFAULT 1, -- Seq-entry level for descriptors descr-level INTEGER DEFAULT 1, -- mask of descriptor types - see Seqdesc for variants' values descr-type-mask INTEGER DEFAULT 0, -- mask of annotation types - see Seq-annot.data for values annot-type-mask INTEGER DEFAULT 0, -- mask of feature types - see SeqFeatData for values feat-type-mask INTEGER DEFAULT 0, -- level of sequence data to load sequence-level ENUMERATED { none (0), -- not required seq-map (1), -- at least seq-map seq-data (2) -- include seq-data } DEFAULT none } --********************************************************************* -- reply types --********************************************************************* ID2-Reply ::= SEQUENCE { -- request's serial number, copy from request serial-number INTEGER OPTIONAL, params ID2-Params OPTIONAL, error SEQUENCE OF ID2-Error OPTIONAL, -- true if this reply is the last one for the request -- false if more replies will follow end-of-reply NULL OPTIONAL, -- reply data moved at the end to make it easier to construct -- the reply data manually from precalculated data reply CHOICE { init NULL, empty NULL, get-package ID2-Reply-Get-Package, get-seq-id ID2-Reply-Get-Seq-id, get-blob-id ID2-Reply-Get-Blob-Id, get-blob-seq-ids ID2-Reply-Get-Blob-Seq-ids, get-blob ID2-Reply-Get-Blob, reget-blob ID2-Reply-ReGet-Blob, get-split-info ID2S-Reply-Get-Split-Info, get-chunk ID2S-Reply-Get-Chunk }, -- additional error flag if the reply is broken in the middle -- of transfer. -- 'last-octet-string', and 'nothing' mean that -- client may use ReGet request to get the remaining data. discard ENUMERATED { reply (0), -- whole reply should be discarded last-octet-string (1), -- all data in embedded ID2-Reply-Data -- except last OCTET STRING is correct nothing (2) -- all data in embedded ID2-Reply-Data -- is correct, but is incomplete } OPTIONAL } ID2-Error ::= SEQUENCE { severity ENUMERATED { -- nothing harmful happened warning (1) , -- command cannot be completed this time failed-command (2) , -- connection cannot be reused, reconnect is required failed-connection (3) , -- server cannot be used for a while failed-server (4) , -- resolve request gives no data -- probably temporarily (see retry-delay field) no-data (5) , -- data exists but client doesn't have permission to get it restricted-data (6) , -- this request type is not supported by server unsupported-command (7) , -- error in request packet, cannot retry invalid-arguments (8) }, -- client may retry the request after specified time in seconds retry-delay INTEGER OPTIONAL, message VisibleString OPTIONAL } -- Reply to ID2-Request-Get-Packages. ID2-Reply-Get-Package ::= SEQUENCE { name VisibleString, params ID2-Params OPTIONAL } -- Reply to ID2-Request-Get-Seq-id. ID2-Reply-Get-Seq-id ::= SEQUENCE { -- copy of request request ID2-Request-Get-Seq-id, -- resolved Seq-id -- not set if error occurred seq-id SEQUENCE OF Seq-id OPTIONAL, -- this Seq-id is the last one in the request end-of-reply NULL OPTIONAL } ID2-Blob-State ::= ENUMERATED { live (0), suppressed-temp (1), suppressed (2), dead (3), protected (4), withdrawn (5) } -- Reply to ID2-Request-Get-Blob-Id. ID2-Reply-Get-Blob-Id ::= SEQUENCE { -- requested Seq-id seq-id Seq-id, -- result blob-id ID2-Blob-Id OPTIONAL, -- version of split data -- (0 for non split) split-version INTEGER DEFAULT 0, -- annotation types in this blob -- annotation are unknown if this field is omitted annot-info SEQUENCE OF ID2S-Seq-annot-Info OPTIONAL, -- this Blob-id is the last one in the request end-of-reply NULL OPTIONAL, -- state of the blob blob-state ID2-Blob-State OPTIONAL } -- Reply to ID2-Request-Get-Blob-Info. ID2-Reply-Get-Blob-Seq-ids ::= SEQUENCE { blob-id ID2-Blob-Id, -- list of Seq-id resolving to this Blob-Id -- in compressed format ids ID2-Reply-Data OPTIONAL } -- Reply to ID2-Request-Get-Blob-Info. ID2-Reply-Get-Blob ::= SEQUENCE { blob-id ID2-Blob-Id, -- version of split data -- (0 for non split) split-version INTEGER DEFAULT 0, -- whole blob or blob skeleton -- not set if error occurred data ID2-Reply-Data OPTIONAL } -- Reply to ID2-Request-Get-Blob-Info. ID2S-Reply-Get-Split-Info ::= SEQUENCE { blob-id ID2-Blob-Id, -- version of split data split-version INTEGER, -- blob split info -- not set if error occurred data ID2-Reply-Data OPTIONAL } -- Reply to ID2-Request-ReGet-Blob. ID2-Reply-ReGet-Blob ::= SEQUENCE { blob-id ID2-Blob-Id, -- version of data split split-version INTEGER, -- offset of data offset INTEGER, -- blob split info -- not set if error occurred data ID2-Reply-Data OPTIONAL } -- Reply to ID2S-Request-Get-Chunks. ID2S-Reply-Get-Chunk ::= SEQUENCE { blob-id ID2-Blob-Id, -- id of chunk to send chunk-id ID2S-Chunk-Id, -- chunk data -- not set if error occurred data ID2-Reply-Data OPTIONAL } -- Data packing. ID2-Reply-Data ::= SEQUENCE { -- index of negotiated types -- recommended types -- Seq-entry, -- ID2S-Split-Info, -- ID2S-Chunk data-type INTEGER { seq-entry (0), seq-annot (1), id2s-split-info (2), id2s-chunk (3) } DEFAULT seq-entry, -- serialization format (ASN.1 binary, ASN.1 text) -- index of negotiated formats data-format INTEGER { asn-binary (0), asn-text (1), xml (2) } DEFAULT asn-binary, -- post serialization compression (plain, gzip, etc.) -- index of negotiated compressions data-compression INTEGER { none (0), gzip (1), nlmzip (2), bzip2 (3) } DEFAULT none, -- data blob data SEQUENCE OF OCTET STRING } -- Data packed within ID2-Reply-Get-Blob-Seq-ids reply. ID2-Blob-Seq-ids ::= SEQUENCE OF ID2-Blob-Seq-id ID2-Blob-Seq-id ::= SEQUENCE { seq-id Seq-id, -- this Seq-id is replaced by sequence in another blob replaced NULL OPTIONAL } --********************************************************************* -- utility types --********************************************************************* ID2-Blob-Id ::= SEQUENCE { sat INTEGER, sub-sat INTEGER { main (0), snp (1), snp-graph (4), cdd (8), mgc (16), hprd (32), sts (64), trna (128), exon (512) } DEFAULT main, sat-key INTEGER, -- version of blob, optional in some requests version INTEGER OPTIONAL } ID2-Params ::= SEQUENCE OF ID2-Param ID2-Param ::= SEQUENCE { name VisibleString, value SEQUENCE OF VisibleString OPTIONAL, type ENUMERATED { -- no response expected set-value (1), -- this option is for client only -- server replies with its value of param if known -- server omits this param in reply if unknown to server get-value (2), -- no direct response expected, -- but if the param or its value is not supported -- an error is reported and the request is not be completed force-value (3), -- use named package -- value should be unset use-package (4) } DEFAULT set-value } END -- insdseq.asn --$Revision: 149845 $ --************************************************************************ -- -- ASN.1 and XML for the components of a GenBank/EMBL/DDBJ sequence record -- The International Nucleotide Sequence Database (INSD) collaboration -- Version 1.5, 15 January 2009 -- --************************************************************************ INSD-INSDSeq DEFINITIONS ::= BEGIN -- INSDSeq provides the elements of a sequence as presented in the -- GenBank/EMBL/DDBJ-style flatfile formats, with a small amount of -- additional structure. -- Although this single perspective of the three flatfile formats -- provides a useful simplification, it hides to some extent the -- details of the actual data underlying those formats. Nevertheless, -- the XML version of INSD-Seq is being provided with -- the hopes that it will prove useful to those who bulk-process -- sequence data at the flatfile-format level of detail. Further -- documentation regarding the content and conventions of those formats -- can be found at: -- -- URLs for the DDBJ, EMBL, and GenBank Feature Table Document: -- http://www.ddbj.nig.ac.jp/FT/full_index.html -- http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html -- http://www.ncbi.nlm.nih.gov/projects/collab/FT/index.html -- -- URLs for DDBJ, EMBL, and GenBank Release Notes : -- ftp://ftp.ddbj.nig.ac.jp/database/ddbj/ddbjrel.txt -- http://www.ebi.ac.uk/embl/Documentation/Release_notes/current/relnotes.html -- ftp://ftp.ncbi.nih.gov/genbank/gbrel.txt -- -- Because INSDSeq is a compromise, a number of pragmatic decisions have -- been made: -- -- In pursuit of simplicity and familiarity a number of fields do not -- have full substructure defined here where there is already a -- standard flatfile format string. For example: -- -- Dates: DD-MON-YYYY (eg 10-JUN-2003) -- -- Author: LastName, Initials (eg Smith, J.N.) -- or Lastname Initials (eg Smith J.N.) -- -- Journal: JournalName Volume (issue), page-range (year) -- or JournalName Volume(issue):page-range(year) -- eg Appl. Environ. Microbiol. 61 (4), 1646-1648 (1995) -- Appl. Environ. Microbiol. 61(4):1646-1648(1995). -- -- FeatureLocations are representated as in the flatfile feature table, -- but FeatureIntervals may also be provided as a convenience -- -- FeatureQualifiers are represented as in the flatfile feature table. -- -- Primary has a string that represents a table to construct -- a third party (TPA) sequence. -- -- other-seqids can have strings with the "vertical bar format" sequence -- identifiers used in BLAST for example, when they are non-INSD types. -- -- Currently in flatfile format you only see Accession numbers, but there -- are others, like patents, submitter clone names, etc which will -- appear here -- -- There are also a number of elements that could have been more exactly -- specified, but in the interest of simplicity have been simply left as -- optional. For example: -- -- All publicly accessible sequence records in INSDSeq format will -- include accession and accession.version. However, these elements are -- optional in optional in INSDSeq so that this format can also be used -- for non-public sequence data, prior to the assignment of accessions and -- version numbers. In such cases, records will have only "other-seqids". -- -- sequences will normally all have "sequence" filled in. But contig records -- will have a "join" statement in the "contig" slot, and no "sequence". -- We also may consider a retrieval option with no sequence of any kind -- and no feature table to quickly check minimal values. -- -- Four (optional) elements are specific to records represented via the EMBL -- sequence database: INSDSeq_update-release, INSDSeq_create-release, -- INSDSeq_entry-version, and INSDSeq_database-reference. -- -- One (optional) element is specific to records originating at the GenBank -- and DDBJ sequence databases: INSDSeq_segment. -- --******** INSDSet ::= SEQUENCE OF INSDSeq INSDSeq ::= SEQUENCE { locus VisibleString , length INTEGER , strandedness VisibleString OPTIONAL , moltype VisibleString , topology VisibleString OPTIONAL , division VisibleString , update-date VisibleString , create-date VisibleString OPTIONAL , update-release VisibleString OPTIONAL , create-release VisibleString OPTIONAL , definition VisibleString , primary-accession VisibleString OPTIONAL , entry-version VisibleString OPTIONAL , accession-version VisibleString OPTIONAL , other-seqids SEQUENCE OF INSDSeqid OPTIONAL , secondary-accessions SEQUENCE OF INSDSecondary-accn OPTIONAL, project VisibleString OPTIONAL , keywords SEQUENCE OF INSDKeyword OPTIONAL , segment VisibleString OPTIONAL , source VisibleString OPTIONAL , organism VisibleString OPTIONAL , taxonomy VisibleString OPTIONAL , references SEQUENCE OF INSDReference OPTIONAL , comment VisibleString OPTIONAL , tagset INSDTagset OPTIONAL , primary VisibleString OPTIONAL , source-db VisibleString OPTIONAL , database-reference VisibleString OPTIONAL , feature-table SEQUENCE OF INSDFeature OPTIONAL , sequence VisibleString OPTIONAL , -- Optional for other dump forms contig VisibleString OPTIONAL } INSDSeqid ::= VisibleString INSDSecondary-accn ::= VisibleString INSDKeyword ::= VisibleString -- INSDReference_position contains a string value indicating the -- basepair span(s) to which a reference applies. The allowable -- formats are: -- -- X..Y : Where X and Y are integers separated by two periods, -- X >= 1 , Y <= sequence length, and X <= Y -- -- Multiple basepair spans can exist, separated by a -- semi-colon and a space. For example : 10..20; 100..500 -- -- sites : The string literal 'sites', indicating that a reference -- provides sequence annotation information, but the specific -- basepair spans are either not captured, or were too numerous -- to record. -- -- The 'sites' literal string is singly occuring, and -- cannot be used in conjunction with any X..Y basepair spans. -- -- References that lack an INSDReference_position element apply -- to the entire sequence. INSDAuthor ::= VisibleString INSDReference ::= SEQUENCE { reference VisibleString , position VisibleString OPTIONAL , authors SEQUENCE OF INSDAuthor OPTIONAL , consortium VisibleString OPTIONAL , title VisibleString OPTIONAL , journal VisibleString , xref SET OF INSDXref OPTIONAL , pubmed INTEGER OPTIONAL , remark VisibleString OPTIONAL } -- INSDXref provides a method for referring to records in -- other databases. INSDXref_dbname is a string value that -- provides the name of the database, and INSDXref_dbname -- is a string value that provides the record's identifier -- in that database. INSDXref ::= SEQUENCE { dbname VisibleString , id VisibleString } -- INSDTagset is used for community-specific data elements -- in a tag/value format. INSDTagset ::= SEQUENCE { authority VisibleString OPTIONAL , version VisibleString OPTIONAL , url VisibleString OPTIONAL , tags INSDTags OPTIONAL } INSDTags ::= SEQUENCE OF INSDTag INSDTag ::= SEQUENCE { name VisibleString OPTIONAL , value VisibleString OPTIONAL , unit VisibleString OPTIONAL } -- INSDFeature_operator contains a string value describing -- the relationship among a set of INSDInterval within -- INSDFeature_intervals. The allowable formats are: -- -- join : The string literal 'join' indicates that the -- INSDInterval intervals are biologically joined -- together into a contiguous molecule. -- -- order : The string literal 'order' indicates that the -- INSDInterval intervals are in the presented -- order, but they are not necessarily contiguous. -- -- Either 'join' or 'order' is required if INSDFeature_intervals -- is comprised of more than one INSDInterval . INSDFeature ::= SEQUENCE { key VisibleString , location VisibleString , intervals SEQUENCE OF INSDInterval OPTIONAL , operator VisibleString OPTIONAL , partial5 BOOLEAN OPTIONAL , partial3 BOOLEAN OPTIONAL , quals SEQUENCE OF INSDQualifier OPTIONAL } -- INSDInterval_iscomp is a boolean indicating whether -- an INSDInterval_from / INSDInterval_to location -- represents a location on the complement strand. -- When INSDInterval_iscomp is TRUE, it essentially -- confirms that a 'from' value which is greater than -- a 'to' value is intentional, because the location -- is on the opposite strand of the presented sequence. -- INSDInterval_interbp is a boolean indicating whether -- a feature (such as a restriction site) is located -- between two adjacent basepairs. When INSDInterval_iscomp -- is TRUE, the 'from' and 'to' values must differ by -- exactly one base. INSDInterval ::= SEQUENCE { from INTEGER OPTIONAL , to INTEGER OPTIONAL , point INTEGER OPTIONAL , iscomp BOOLEAN OPTIONAL , interbp BOOLEAN OPTIONAL , accession VisibleString } INSDQualifier ::= SEQUENCE { name VisibleString , value VisibleString OPTIONAL } -- INSDTagsetRules defines mandatory, optional, and unique tags -- for a given community's INSDTagset. If the tagset is extensible, -- then additional tags which are not included in the list of -- mandatory or optional tags may be present. The uniquetags -- element provides a list of the tags that may occur only once -- in a given tagset. INSDTagsetRules ::= SEQUENCE { authority VisibleString OPTIONAL , version VisibleString OPTIONAL , mandatorytags INSDTagNames OPTIONAL , optionaltags INSDTagNames OPTIONAL , uniquetags INSDTagNames OPTIONAL , extensible BOOLEAN OPTIONAL } INSDTagNames ::= SEQUENCE OF VisibleString INSDTagsetRuleSet ::= SEQUENCE OF INSDTagsetRules END -- medlars.asn --$Revision: 6.0 $ --********************************************************************** -- -- MEDLARS data definitions -- Grigoriy Starchenko, 1997 -- --********************************************************************** NCBI-Medlars DEFINITIONS ::= BEGIN EXPORTS Medlars-entry, Medlars-record; IMPORTS PubMedId FROM NCBI-Biblio; Medlars-entry ::= SEQUENCE { -- a MEDLARS entry pmid PubMedId, -- All entries in PubMed must have it muid INTEGER OPTIONAL, -- Medline(OCCS) id recs SET OF Medlars-record -- List of Medlars records } Medlars-record ::= SEQUENCE { code INTEGER, -- Unit record field type integer form abbr VisibleString OPTIONAL, -- Unit record field type abbreviation form data VisibleString -- Unit record data } END -- medline.asn --$Revision: 6.0 $ --********************************************************************** -- -- MEDLINE data definitions -- James Ostell, 1990 -- -- enhanced in 1996 to support PubMed records as well by simply adding -- the PubMedId and making MedlineId optional -- --********************************************************************** NCBI-Medline DEFINITIONS ::= BEGIN EXPORTS Medline-entry, Medline-si; IMPORTS Cit-art, PubMedId FROM NCBI-Biblio Date FROM NCBI-General; -- a MEDLINE or PubMed entry Medline-entry ::= SEQUENCE { uid INTEGER OPTIONAL , -- MEDLINE UID, sometimes not yet available if from PubMed em Date , -- Entry Month cit Cit-art , -- article citation abstract VisibleString OPTIONAL , mesh SET OF Medline-mesh OPTIONAL , substance SET OF Medline-rn OPTIONAL , xref SET OF Medline-si OPTIONAL , idnum SET OF VisibleString OPTIONAL , -- ID Number (grants, contracts) gene SET OF VisibleString OPTIONAL , pmid PubMedId OPTIONAL , -- MEDLINE records may include the PubMedId pub-type SET OF VisibleString OPTIONAL, -- may show publication types (review, etc) mlfield SET OF Medline-field OPTIONAL , -- additional Medline field types status INTEGER { publisher (1) , -- record as supplied by publisher premedline (2) , -- premedline record medline (3) } DEFAULT medline } -- regular medline record Medline-mesh ::= SEQUENCE { mp BOOLEAN DEFAULT FALSE , -- TRUE if main point (*) term VisibleString , -- the MeSH term qual SET OF Medline-qual OPTIONAL } -- qualifiers Medline-qual ::= SEQUENCE { mp BOOLEAN DEFAULT FALSE , -- TRUE if main point subh VisibleString } -- the subheading Medline-rn ::= SEQUENCE { -- medline substance records type ENUMERATED { -- type of record nameonly (0) , cas (1) , -- CAS number ec (2) } , -- EC number cit VisibleString OPTIONAL , -- CAS or EC number if present name VisibleString } -- name (always present) Medline-si ::= SEQUENCE { -- medline cross reference records type ENUMERATED { -- type of xref ddbj (1) , -- DNA Data Bank of Japan carbbank (2) , -- Carbohydrate Structure Database embl (3) , -- EMBL Data Library hdb (4) , -- Hybridoma Data Bank genbank (5) , -- GenBank hgml (6) , -- Human Gene Map Library mim (7) , -- Mendelian Inheritance in Man msd (8) , -- Microbial Strains Database pdb (9) , -- Protein Data Bank (Brookhaven) pir (10) , -- Protein Identification Resource prfseqdb (11) , -- Protein Research Foundation (Japan) psd (12) , -- Protein Sequence Database (Japan) swissprot (13) , -- SwissProt gdb (14) } , -- Genome Data Base cit VisibleString OPTIONAL } -- the citation/accession number Medline-field ::= SEQUENCE { type INTEGER { -- Keyed type other (0) , -- look in line code comment (1) , -- comment line erratum (2) } , -- retracted, corrected, etc str VisibleString , -- the text ids SEQUENCE OF DocRef OPTIONAL } -- pointers relevant to this text DocRef ::= SEQUENCE { -- reference to a document type INTEGER { medline (1) , pubmed (2) , ncbigi (3) } , uid INTEGER } END -- mim.asn --******************************************************************** -- -- MIM data definitions -- Brandon Brylawski, 1996. -- version 2.1 -- --******************************************************************** NCBI-Mim DEFINITIONS ::= BEGIN Mim-entries ::= SEQUENCE OF Mim-entry Mim-set ::= SEQUENCE { releaseDate Mim-date , mimEntries SEQUENCE OF Mim-entry } Mim-entry ::= SEQUENCE { mimNumber VisibleString , mimType INTEGER { none (0) , star (1) , caret (2) , pound (3) , plus (4) , perc (5) } , title VisibleString , copyright VisibleString OPTIONAL , symbol VisibleString OPTIONAL , locus VisibleString OPTIONAL , synonyms SEQUENCE OF VisibleString OPTIONAL , aliases SEQUENCE OF VisibleString OPTIONAL , included SEQUENCE OF VisibleString OPTIONAL , seeAlso SEQUENCE OF Mim-cit OPTIONAL , text SEQUENCE OF Mim-text OPTIONAL , textfields SEQUENCE OF Mim-text OPTIONAL , hasSummary BOOLEAN OPTIONAL , summary SEQUENCE OF Mim-text OPTIONAL , summaryAttribution SEQUENCE OF Mim-edit-item OPTIONAL , summaryEditHistory SEQUENCE OF Mim-edit-item OPTIONAL , summaryCreationDate Mim-edit-item OPTIONAL , allelicVariants SEQUENCE OF Mim-allelic-variant OPTIONAL , hasSynopsis BOOLEAN OPTIONAL , clinicalSynopsis SEQUENCE OF Mim-index-term OPTIONAL , synopsisAttribution SEQUENCE OF Mim-edit-item OPTIONAL , synopsisEditHistory SEQUENCE OF Mim-edit-item OPTIONAL , synopsisCreationDate Mim-edit-item OPTIONAL , editHistory SEQUENCE OF Mim-edit-item OPTIONAL , creationDate Mim-edit-item OPTIONAL , references SEQUENCE OF Mim-reference OPTIONAL , attribution SEQUENCE OF Mim-edit-item OPTIONAL , numGeneMaps INTEGER , medlineLinks Mim-link OPTIONAL , proteinLinks Mim-link OPTIONAL , nucleotideLinks Mim-link OPTIONAL , structureLinks Mim-link OPTIONAL , genomeLinks Mim-link OPTIONAL } Mim-text ::= SEQUENCE { label VisibleString , text VisibleString , neighbors Mim-link OPTIONAL} Mim-allelic-variant ::= SEQUENCE { number VisibleString , name VisibleString , aliases SEQUENCE OF VisibleString OPTIONAL , mutation SEQUENCE OF Mim-text OPTIONAL , description SEQUENCE OF Mim-text OPTIONAL , snpLinks Mim-link OPTIONAL } Mim-link ::= SEQUENCE { num INTEGER , uids VisibleString , numRelevant INTEGER OPTIONAL } Mim-author ::= SEQUENCE { name VisibleString , index INTEGER } Mim-cit ::= SEQUENCE { number INTEGER , author VisibleString , others VisibleString , year INTEGER } Mim-reference ::= SEQUENCE { number INTEGER , origNumber INTEGER OPTIONAL , type ENUMERATED { not-set (0) , citation (1) , book (2) , personal-communication (3) , book-citation (4) } OPTIONAL , authors SEQUENCE OF Mim-author , primaryAuthor VisibleString , otherAuthors VisibleString , citationTitle VisibleString , citationType INTEGER OPTIONAL , bookTitle VisibleString OPTIONAL , editors SEQUENCE OF Mim-author OPTIONAL , volume VisibleString OPTIONAL , edition VisibleString OPTIONAL , journal VisibleString OPTIONAL , series VisibleString OPTIONAL , publisher VisibleString OPTIONAL , place VisibleString OPTIONAL , commNote VisibleString OPTIONAL , pubDate Mim-date , pages SEQUENCE OF Mim-page OPTIONAL , miscInfo VisibleString OPTIONAL , pubmedUID INTEGER OPTIONAL , ambiguous BOOLEAN , noLink BOOLEAN OPTIONAL } Mim-index-term ::= SEQUENCE { key VisibleString , terms SEQUENCE OF VisibleString } Mim-edit-item ::= SEQUENCE { author VisibleString , modDate Mim-date } Mim-date ::= SEQUENCE { year INTEGER , month INTEGER OPTIONAL , day INTEGER OPTIONAL } Mim-page ::= SEQUENCE { from VisibleString , to VisibleString OPTIONAL } END -- mla.asn --$Revision: 6.1 $ --******************************************************************** -- -- Network MEDLINE Archive message formats -- Ostell 1993 -- -- --********************************************************************* -- -- mla.asn -- -- messages for medline archive data access -- --********************************************************************* NCBI-MedArchive DEFINITIONS ::= BEGIN IMPORTS Medline-entry FROM NCBI-Medline Medlars-entry FROM NCBI-Medlars Pubmed-entry FROM NCBI-PubMed Medline-si FROM NCBI-Medline Pub FROM NCBI-Pub Title, PubMedId FROM NCBI-Biblio; --********************************** -- requests -- Mla-request ::= CHOICE { init [0] NULL, -- DlInit getmle [1] INTEGER, -- get MedlineEntry getpub [2] INTEGER, -- get citation by muid gettitle [3] Title-msg, -- match titles citmatch [4] Pub, -- fini [5] NULL, -- DlFini getmriuids [6] INTEGER, -- Get MUIDs for an MRI getaccuids [7] Medline-si, -- Get MUIDs for an Accessions uidtopmid [8] INTEGER, -- get PMID for MUID pmidtouid [9] PubMedId, -- get MUID for PMID getmlepmid [10] PubMedId, -- get MedlineEntry by PubMed id getpubpmid [11] PubMedId, -- get citation by PubMed id citmatchpmid [12] Pub, -- citation match, PMID on out getmripmids [13] INTEGER, -- get PMIDs for an MRI getaccpmids [14] Medline-si,-- get PMIDs for an Accessions citlstpmids [15] Pub, -- generate list of PMID for Pub getmleuid [16] INTEGER, -- get MedlineEntry by Medline id getmlrpmid [17] PubMedId, -- get MedlarsEntry by PubMed id getmlruid [18] INTEGER -- get MedlarsEntry by Medline id } --********************************************************************** -- -- if request = all -- if one row returned -- reply=all, return every column -- else -- reply=ml-jta for each row -- -- if request = not-set, reply=ml-jta -- -- otherwise, -- if request != ml-jta -- if column exist, reply=column, else reply=ml-jta -- --********************************************************************** Title-type ::= ENUMERATED { not-set (0), -- request=ml-jta (default), reply=not-found name (1), tsub (2), trans (3), jta (4), iso-jta (5), ml-jta (6), coden (7), issn (8), abr (9), isbn (10), all (255) } Title-msg ::= SEQUENCE { -- Title match request/response type Title-type, -- type to get, or type returned title Title -- title(s) to look up, or title(s) found } Title-msg-list ::= SEQUENCE { num INTEGER, -- number of titles titles SEQUENCE OF Title-msg } Error-val ::= ENUMERATED { not-found (0), -- Entry was not found operational-error (1), -- A run-time operation error was occurred cannot-connect-jrsrv (2), -- Cannot connect to Journal server cannot-connect-pmdb (3), -- Cannot connect to PubMed journal-not-found (4), -- Journal title not found citation-not-found (5), -- Volume, Page and Author do not match any -- article citation-ambiguous (6), -- More than one article found citation-too-many (7) -- Too many article was found } Mla-back ::= CHOICE { init [0] NULL, -- DlInit error [1] Error-val, -- not found for getmle/getpub/citmatch getmle [2] Medline-entry, -- got Medline Entry getpub [3] Pub, gettitle [4] Title-msg-list, -- match titles citmatch [5] INTEGER, -- citation lookup muid or 0 fini [6] NULL, -- DlFini getuids [7] SEQUENCE OF INTEGER, -- got a set of MUIDs getpmids [8] SEQUENCE OF INTEGER,-- got a set of PMIDs outuid [9] INTEGER, -- result muid or 0 if not found outpmid [10] PubMedId, -- result pmid or 0 if not found getpme [11] Pubmed-entry, -- got Pubmed Entry getmlr [12] Medlars-entry -- got Medlars Entry } END -- mmdb1.asn --$Revision: 6.1 $ --********************************************************************** -- -- Biological Macromolecule 3-D Structure Data Types for MMDB, -- A Molecular Modeling Database -- -- Definitions for a biomolecular assembly and the MMDB database -- -- By Hitomi Ohkawa, Jim Ostell, Chris Hogue, and Steve Bryant -- -- National Center for Biotechnology Information -- National Institutes of Health -- Bethesda, MD 20894 USA -- -- July 1995 -- --********************************************************************** -- Contents of the MMDB database are currently based on files distributed by -- the Protein Data Bank, PDB. These data are changed in form, as described -- in this specification. To some extent they are also changed in content, in -- that many data items implicit in PDB are made explicit, and others are -- corrected or omitted as a consequence of validation checks. The semantics -- of MMDB data items are indicated by comments within the specification below. -- These comments explain in detail the manner in which data items from PDB -- have been mapped into MMDB. MMDB DEFINITIONS ::= BEGIN EXPORTS Biostruc, Biostruc-id, Biostruc-set, Biostruc-annot-set, Biostruc-residue-graph-set; IMPORTS Biostruc-graph, Biomol-descr, Residue-graph FROM MMDB-Chemical-graph Biostruc-model FROM MMDB-Structural-model Biostruc-feature-set FROM MMDB-Features Pub FROM NCBI-Pub Date, Object-id, Dbtag FROM NCBI-General; -- A structure report or "biostruc" describes the components of a biomolecular -- assembly in terms of their names and descriptions, and a chemical graph -- giving atomic formula, connectivity and chirality. It also gives one or more -- three-dimensional model structures, literally a mapping of the atoms, -- residues and/or molecules of each component into a measured three- -- dimensional space. Structure may also be described by named features, which -- associate nodes in the chemical graph, or regions in space, with text or -- numeric descriptors. -- Note that a biostruc may also contain cross references to other databases, -- including citations to relevant scientific literature. These cross -- references use object types from other NCBI data specifications, which are -- "imported" into MMDB, and not repeated in this specification. Biostruc ::= SEQUENCE { id SEQUENCE OF Biostruc-id, descr SEQUENCE OF Biostruc-descr OPTIONAL, chemical-graph Biostruc-graph, features SEQUENCE OF Biostruc-feature-set OPTIONAL, model SEQUENCE OF Biostruc-model OPTIONAL } -- A Biostruc-id is a collection identifiers for the molecular assembly. -- Mmdb-id's are NCBI-assigned, and are intended to be unique and stable -- identifiers. Other-id's are synonyms. Biostruc-id ::= CHOICE { mmdb-id Mmdb-id, other-database Dbtag, local-id Object-id } Mmdb-id ::= INTEGER -- The description of a biostruc refers to both the reported chemical and -- spatial structure of a biomolecular assembly. PDB-derived descriptors -- which refer specifically to the chemical components or spatial structure -- are not provided here, but instead as descriptors of the biostruc-graph or -- biostruc-model. For PDB-derived structures the biostruc name is the PDB -- id-code. PDB-derived citations appear as publications within the biostruc -- description, and include a data-submission citation derived from PDB AUTHOR -- records. Citations are described using the NCBI Pub specification. Biostruc-descr ::= CHOICE { name VisibleString, pdb-comment VisibleString, other-comment VisibleString, history Biostruc-history, attribution Pub } -- The history of a biostruc indicates it's origin and it's update history -- within MMDB, the NCBI-maintained molecular structure database. Biostruc-history ::= SEQUENCE { replaces Biostruc-replace OPTIONAL, replaced-by Biostruc-replace OPTIONAL, data-source Biostruc-source OPTIONAL } Biostruc-replace ::= SEQUENCE { id Biostruc-id, date Date } -- The origin of a biostruc is a reference to another database. PDB release -- date and PDB-assigned id codes are recorded here, as are the PDB-assigned -- entry date and replacement history. Biostruc-source ::= SEQUENCE { name-of-database VisibleString, version-of-database CHOICE { release-date Date, release-code VisibleString } OPTIONAL, database-entry-id Biostruc-id, database-entry-date Date, database-entry-history SEQUENCE OF VisibleString OPTIONAL} -- A biostruc set is a means to collect ASN.1 data for many biostrucs in -- one file, as convenient for application programs. The object type is not -- inteded to imply similarity of the biostrucs grouped together. Biostruc-set ::= SEQUENCE { id SEQUENCE OF Biostruc-id OPTIONAL, descr SEQUENCE OF Biostruc-descr OPTIONAL, biostrucs SEQUENCE OF Biostruc } -- A biostruc annotation set is a means to collect ASN.1 data for biostruc -- features into one file. The object type is intended as a means to store -- feature annotation of similar type, such as "core" definitions for a -- threading program, or structure-structure alignments for a structure- -- similarity browser. Biostruc-annot-set ::= SEQUENCE { id SEQUENCE OF Biostruc-id OPTIONAL, descr SEQUENCE OF Biostruc-descr OPTIONAL, features SEQUENCE OF Biostruc-feature-set } -- A biostruc residue graph set is a collection of residue graphs. The object -- type is intended as a means to record dictionaries containing the chemical -- subgraphs of "standard" residue types, which are used as a means to -- simplify discription of the covalent structure of a biomolecular assembly. -- The standard residue graph dictionary supplied with the MMDB database -- contains 20 standard L amino acids and 8 standard ribonucleotide groups. -- These graphs are complete, including explicit hydrogen atoms and separate -- instances for the terminal polypeptide and polynucleotide residues. Biostruc-residue-graph-set ::= SEQUENCE { id SEQUENCE OF Biostruc-id OPTIONAL, descr SEQUENCE OF Biomol-descr OPTIONAL, residue-graphs SEQUENCE OF Residue-graph } END --********************************************************************** -- -- Biological Macromolecule 3-D Structure Data Types for MMDB, -- A Molecular Modeling Database -- -- Definitions for a chemical graph -- -- By Hitomi Ohkawa, Jim Ostell, Chris Hogue and Steve Bryant -- -- National Center for Biotechnology Information -- National Institutes of Health -- Bethesda, MD 20894 USA -- -- July, 1995 -- --********************************************************************** MMDB-Chemical-graph DEFINITIONS ::= BEGIN EXPORTS Biostruc-graph, Biomol-descr, Residue-graph, Molecule-id, PCSubstance-id, Residue-id, Atom-id; IMPORTS Pub FROM NCBI-Pub BioSource FROM NCBI-BioSource Seq-id FROM NCBI-Seqloc Biostruc-id FROM MMDB; -- A biostruc graph contains the complete chemical graph of the biomolecular -- assembly. The assembly graph is defined hierarchically, in terms of -- subgraphs graphs of component molecules. For PDB-derived biostrucs, -- the molecules forming the assembly are the individual biopolymer chains and -- any non-polymer or "heterogen" groups which are present. -- The PDB-derived "compound name" field appears as the name within the -- biostruc-graph description. PDB "class" and "source" fields appear as -- explicit attributes. PDB-derived structures are assigned an assembly type -- of "other" unless they have been further classified as the "physiological -- form" or "crystallographic cell" contents. If they have, the source of the -- type classification appears as a citation within the assembly description. -- Note that the biostruc-graph also includes as literals the subgraphs of -- any nonstandard residues present within it. For PDB-derived biostrucs these -- subgraphs are constructed automatically, with validation as described below. Biostruc-graph ::= SEQUENCE { descr SEQUENCE OF Biomol-descr OPTIONAL, molecule-graphs SEQUENCE OF Molecule-graph, inter-molecule-bonds SEQUENCE OF Inter-residue-bond OPTIONAL, residue-graphs SEQUENCE OF Residue-graph OPTIONAL } -- A biomolecule description refers to the chemical structure of a molecule or -- component substructures. This descriptor type is used at the level of -- assemblies, molecules and residues, and also for residue-graph dictionaries. -- The BioSource object type is drawn from NCBI taxonomy data specifications, -- and is not repeated here. Biomol-descr ::= CHOICE { name VisibleString, pdb-class VisibleString, pdb-source VisibleString, pdb-comment VisibleString, other-comment VisibleString, organism BioSource, attribution Pub, assembly-type INTEGER { physiological-form(1), crystallographic-cell(2), other(255) }, molecule-type INTEGER { dna(1), rna(2), protein(3), other-biopolymer(4), solvent(5), other-nonpolymer(6), other(255) } } -- A molecule chemical graph is defined by a sequence of residues. Nonpolymers -- are described in the same way, but may contain only a single residue. -- Biopolymer molecules are identified within PDB entries according to their -- appearance on SEQRES records, which formally define a biopolymer as such. -- Biopolymers are defined by the distinction between ATOM and HETATM -- coordinate records only in cases where the chemical sequence from SEQRES -- is in conflict with coordinate data. The PDB-assigned chain code appears as -- the name within the molecule descriptions of the biopolymers. -- Nonpolymer molecules from PDB correspond to individual HETEROGEN groups, -- excluding any HETEROGEN groups which represent modified biopolymer residues. -- These molecules are named according to the chain, residue type and residue -- number fields as assigned by PDB. Any description appearing in the PDB HET -- record appears as a pdb-comment within the molecule description. -- Molecule types for PDB-derived molecule graphs are assigned by matching -- residue and atom names against the PDB-documented standard types for protein, -- DNA and RNA, and against residue codes commonly used to indicate solvent. -- Classification is by "majority rule". If more than half of the residues in -- a biopolymer are standard groups of one type, then the molecule is of that -- type, and otherwise classified as "other". Note that this classification does -- not preclude the presence of modified residues, but insists they constitute -- less than half the biopolymer. Non-polymers are classified only as "solvent" -- or "other". -- Note that a molecule graph may also contain a set of cross references -- to biopolymer sequence databases. All biopolymer molecules in MMDB contain -- appropriate identifiers for the corresponding entry in the NCBI-Sequences -- database, in particular the NCBI "gi" number, which may be used for sequence -- retrieval. The Seq-id object type is defined in the NCBI molecular sequence -- specification, and not repeated here. Molecule-graph ::= SEQUENCE { id Molecule-id, descr SEQUENCE OF Biomol-descr OPTIONAL, seq-id Seq-id OPTIONAL, residue-sequence SEQUENCE OF Residue, inter-residue-bonds SEQUENCE OF Inter-residue-bond OPTIONAL, sid PCSubstance-id OPTIONAL } Molecule-id ::= INTEGER -- Pubchem substance id PCSubstance-id ::= INTEGER -- Residues may be assigned a text-string name as well as an id number. PDB -- assigned residue numbers appear as the residue name. Residue ::= SEQUENCE { id Residue-id, name VisibleString OPTIONAL, residue-graph Residue-graph-pntr } Residue-id ::= INTEGER -- Residue graphs from different sources may be referenced within a molecule -- graph. The allowed choices are the nonstandard residue graphs included in -- the present biostruc, residue graphs within other biostrucs, or residue -- graphs within tables of standard residue definitions. Residue-graph-pntr ::= CHOICE { local Residue-graph-id, biostruc Biostruc-graph-pntr, standard Biostruc-residue-graph-set-pntr } Biostruc-graph-pntr ::= SEQUENCE { biostruc-id Biostruc-id, residue-graph-id Residue-graph-id } Biostruc-residue-graph-set-pntr ::= SEQUENCE { biostruc-residue-graph-set-id Biostruc-id, residue-graph-id Residue-graph-id } -- Residue graphs define atomic formulae, connectivity, chirality, and names. -- For standard residue graphs from the MMDB dictionary the PDB-assigned -- residue-type code appears as the name within the residue graph description, -- and the full trivial name of the residue as a comment within that -- description. For any nonstandard residue graphs provided with an MMDB -- biostruc the PDB-assigned residue-type code similarly appears as the name -- within the description, and any information provided on PDB HET records as -- a pdb-comment within that description. -- Note that nonstandard residue graphs for a PDB-derived biostruc may be -- incomplete. Current PDB format cannot represent connectivity for groups -- which are disordered, and for which no coordinates are given. In these -- cases the residue graph defined in MMDB represents only the subgraph that -- could be identified from available ATOM, HETATM and CONECT records. Residue-graph ::= SEQUENCE { id Residue-graph-id, descr SEQUENCE OF Biomol-descr OPTIONAL, residue-type INTEGER { deoxyribonucleotide(1), ribonucleotide(2), amino-acid(3), other(255) } OPTIONAL, iupac-code SEQUENCE OF VisibleString OPTIONAL, atoms SEQUENCE OF Atom, bonds SEQUENCE OF Intra-residue-bond, chiral-centers SEQUENCE OF Chiral-center OPTIONAL } Residue-graph-id ::= INTEGER -- Atoms in residue graphs are defined by elemental symbols and names. PDB- -- assigned atom names appear here in the name field, except in cases of known -- PDB synonyms. In these cases atom names are mapped to the names used in the -- MMDB standard dictionary. This occurs primarily for hydrogen atoms, where -- PDB practice allows synonyms for several atom types. For PDB atoms the -- elemental symbol is obtained by parsing the PDB atom name field, allowing -- for known special-semantics cases where the atom name does not follow the -- documented encoding rule. Ionizable protons are identified within standard -- residue graphs in the MMDB dictionary, but not within automatically-defined -- nonstandard graphs. Atom ::= SEQUENCE { id Atom-id, name VisibleString OPTIONAL, iupac-code SEQUENCE OF VisibleString OPTIONAL, element ENUMERATED { h(1), he(2), li(3), be(4), b(5), c(6), n(7), o(8), f(9), ne(10), na(11), mg(12), al(13), si(14), p(15), s(16), cl(17), ar(18), k(19), ca(20), sc(21), ti(22), v(23), cr(24), mn(25), fe(26), co(27), ni(28), cu(29), zn(30), ga(31), ge(32), as(33), se(34), br(35), kr(36), rb(37), sr(38), y(39), zr(40), nb(41), mo(42), tc(43), ru(44), rh(45), pd(46), ag(47), cd(48), in(49), sn(50), sb(51), te(52), i(53), xe(54), cs(55), ba(56), la(57), ce(58), pr(59), nd(60), pm(61), sm(62), eu(63), gd(64), tb(65), dy(66), ho(67), er(68), tm(69), yb(70), lu(71), hf(72), ta(73), w(74), re(75), os(76), ir(77), pt(78), au(79), hg(80), tl(81), pb(82), bi(83), po(84), at(85), rn(86), fr(87), ra(88), ac(89), th(90), pa(91), u(92), np(93), pu(94), am(95), cm(96), bk(97), cf(98), es(99), fm(100), md(101), no(102), lr(103), other(254), unknown(255) }, ionizable-proton ENUMERATED { true(1), false(2), unknown(255) } OPTIONAL } Atom-id ::= INTEGER -- Intra-residue-bond specifies connectivity between atoms in Residue-graph. -- Unlike Inter-residue-bond defined later, its participating atoms are part of -- a residue subgraph dictionary, not part of a specific biostruc-graph. -- For residue graphs in the standard MMDB dictionary bonds are defined from -- the known chemical structures of amino acids and nucleotides. For -- nonstandard residue graphs bonds are defined from PDB CONECT records, with -- validation for consistency with coordinate data, and from stereochemical -- calculation to identify unreported bonds. Validation and bond identification -- are based on comparison of inter-atomic distances to the sum of covalent -- radii for the corresponding elements. Intra-residue-bond ::= SEQUENCE { atom-id-1 Atom-id, atom-id-2 Atom-id, bond-order INTEGER { single(1), partial-double(2), aromatic(3), double(4), triple(5), other(6), unknown(255)} OPTIONAL } -- Chiral centers are atoms with tetrahedral geometry. Chirality is defined -- by a chiral volume involving the chiral center and 3 other atoms bonded to -- it. For any coordinates assigned to atoms c, n1, n2, and n3, the vector -- triple product (n1-c) dot ( (n2-c) cross (n3-c) ) must have the indicated -- sign. The calculation assumes an orthogonal right-handed coordinate system -- as is used for MMDB model structures. -- Chirality is defined for standard residues in the MMDB dictionary, but is -- not assigned automatically for PDB-derived nonstandard residues. If assigned -- for nonstandard residues, the source of chirality information is described -- by a citation within the residue description. Chiral-center ::= SEQUENCE { c Atom-id, n1 Atom-id, n2 Atom-id, n3 Atom-id, sign ENUMERATED { positive(1), negative(2) } } -- Inter-residue bonds are defined by a reference to two atoms. For PDB-derived -- structures bonds are identified from biopolymer connectivity according to -- SEQRES and from other connectivity information on SSBOND and CONECT -- records. These data are validated and unreported bonds identified by -- stereochemical calculation, using the same criteria as for intra-residue -- bonds. Inter-residue-bond ::= SEQUENCE { atom-id-1 Atom-pntr, atom-id-2 Atom-pntr, bond-order INTEGER { single(1), partial-double(2), aromatic(3), double(4), triple(5), other(6), unknown(255)} OPTIONAL } -- Atoms, residues and molecules within the current biostruc are referenced -- by hierarchical pointers. Atom-pntr ::= SEQUENCE { molecule-id Molecule-id, residue-id Residue-id, atom-id Atom-id } Atom-pntr-set ::= SEQUENCE OF Atom-pntr END -- mmdb2.asn --$Revision: 6.0 $ --********************************************************************** -- -- Biological Macromolecule 3-D Structure Data Types for MMDB, -- A Molecular Modeling Database -- -- Definitions for structural models -- -- By Hitomi Ohkawa, Jim Ostell, Chris Hogue and Steve Bryant -- -- National Center for Biotechnology Information -- National Institutes of Health -- Bethesda, MD 20894 USA -- -- July, 1996 -- --********************************************************************** MMDB-Structural-model DEFINITIONS ::= BEGIN EXPORTS Biostruc-model, Model-id, Model-coordinate-set-id; IMPORTS Chem-graph-pntrs, Atom-pntrs, Chem-graph-alignment, Sphere, Cone, Cylinder, Brick, Transform FROM MMDB-Features Biostruc-id FROM MMDB Pub FROM NCBI-Pub; -- A structural model maps chemical components into a measured three- -- dimensional space. PDB-derived biostrucs generally contain 4 models, -- corresponding to "views" of the structure of a biomolecular assemble with -- increasing levels of complexity. Model types indicate the complexity of the -- view. -- The model named "NCBI all atom" represents a view suitable for most -- computational biology applications. It provides complete atomic coordinate -- data for a "single best" model, omitting statistical disorder information -- and/or ensemble structure descriptions provided in the source PDB file. -- Construction of the single best model is based on the assumption that the -- contents of the "alternate conformation" field from pdb imply no correlation -- among the occupancies of multiple sites assigned to sets of atoms: the best -- site is chosen only on the basis of highest occupancy. Note, however, that -- alternate conformation sets where correlation is implied are generally -- constrained in crystallographic refinement to have uniform occupancy, and -- will thus be selected as a set. For ensemble models the model which assigns -- coordinates to the most atoms is chosen. If numbers of coordinates are the -- same, the model occurring first in the PDB file is selected. The single -- best model includes complete coordinates for all nonpolymer components, but -- omits those classified as "solvent". Model type is 3 for this model. -- The model named "NCBI backbone" represents a simple view intended for -- graphic displays and rapid transmission over a network. It includes only -- alpha carbon or backbone phosphate coordinates for biopolymers. It is based -- on selection of alpha-carbon and backbone phosphate atoms from the "NCBI -- all atom" model. The model type is set to 2. An even simpler model gives -- only a cartoon representation, using cylinders corresponding to secondary -- structure elements. This is named "NCBI vector", and has model type 1. -- The models named "PDB Model 1", "PDB Model 2", etc. represent the complete -- information provided by PDB, including full descriptions of statistical -- disorder. The name of the model is based on the contents of the PDB MODEL -- record, with a default name of "PDB Model 1" for PDB files which contain -- only a single model. Construction of these models is based on the -- assumption that contents of the PDB "alternate conformation" field are -- intended to imply correlation among the occupancies of atom sets flagged by -- the same identifier. The special flag " " (blank) is assumed to indicate -- sites occupied in all alternate conformations, and sites flagged otherwise, -- together with " ", to indicate a distinct member of an ensemble of -- alternate conformations. Note that construction of ensemble members -- according to these assumption requires two validation checks on PDB -- "alternate conformation" flags: they must be unique among sites assigned to -- the same atom, and that the special " " flag must occur only for unique -- sites. Sites which violate the first check are flagged as "u", for -- "unknown"; they are omitted from all ensemble definitions but are -- nontheless retained in the coordinate list. Sites which violate the second -- check are flagged "b" for "blank", and are included in an appropriately -- named ensemble. The model type for pdb all models is 4. -- Note that in the MMDB database models are stored in the ASN.1 stream in -- order of increasing model type value. Since models occur as the last item -- in a biostruc, parsers may avoid reading the entire stream if the desired -- model is one of the simplified types, which occur first in the stream. This -- can save considerable I/O time, particularly for large ensemble models from -- NMR determinations. Biostruc-model ::= SEQUENCE { id Model-id, type Model-type, descr SEQUENCE OF Model-descr OPTIONAL, model-space Model-space OPTIONAL, model-coordinates SEQUENCE OF Model-coordinate-set OPTIONAL } Model-id ::= INTEGER Model-type ::= INTEGER { ncbi-vector(1), ncbi-backbone(2), ncbi-all-atom(3), pdb-model(4), other(255)} Model-descr ::= CHOICE { name VisibleString, pdb-reso VisibleString, pdb-method VisibleString, pdb-comment VisibleString, other-comment VisibleString, attribution Pub } -- The model space defines measurement units and any external reference frame. -- Coordinates refer to a right-handed orthogonal system defined on axes -- tagged x, y and z in the coordinate and feature definitions of a biostruc. -- Coordinates from PDB-derived structures are reported without change, in -- angstrom units. The units of temperature and occupancy factors are not -- defined explicitly in PDB, but are inferred from their value range. Model-space ::= SEQUENCE { coordinate-units ENUMERATED { angstroms(1), nanometers(2), other(3), unknown(255)}, thermal-factor-units ENUMERATED { b(1), u(2), other(3), unknown(255)} OPTIONAL, occupancy-factor-units ENUMERATED { fractional(1), electrons(2), other(3), unknown(255)} OPTIONAL, density-units ENUMERATED { electrons-per-unit-volume(1), arbitrary-scale(2), other(3), unknown(255)} OPTIONAL, reference-frame Reference-frame OPTIONAL } -- An external reference frame is a pointer to another biostruc, with an -- optional operator to rotate and translate coordinates into its model space. -- This item is intended for representation of homology-derived model -- structures, and is not present for structures from PDB. Reference-frame ::= SEQUENCE { biostruc-id Biostruc-id, rotation-translation Transform OPTIONAL } -- Atomic coordinates may be assigned literally or by reference to another -- biostruc. The reference coordinate type is used to represent homology- -- derived model structures. PDB-derived structures have literal coordinates. -- Referenced coordinates identify another biostruc, any transformation to be -- applied to coordinates from that biostruc, and a mapping of the chemical -- graph of the present biostruc onto that of the referenced biostruc. They -- give an "alignment" of atoms in the current biostruc with those in another, -- from which the coordinates of matched atoms may be retrieved. For non- -- atomic models "alignment" may also be represented by molecule and residue -- equivalence lists. Referenced coordinates are a data item inteded for -- representation of homology models, with an explicit pointer to their source -- information. They do not occur in PDB-derived models. Model-coordinate-set ::= SEQUENCE { id Model-coordinate-set-id OPTIONAL, descr SEQUENCE OF Model-descr OPTIONAL, coordinates CHOICE { literal Coordinates, reference Chem-graph-alignment } } Model-coordinate-set-id ::= INTEGER -- Literal coordinates map chemical components into the model space. Three -- mapping types are allowed, atomic coordinate models, density-grid models, -- and surface models. A model consists of a sequence of such coordinate sets, -- and may thus combine coordinate subsets which have a different source. -- PDB-derived models contain a single atomic coordinate set, as they by -- definition represent information from a single source. Coordinates ::= CHOICE { atomic Atomic-coordinates, surface Surface-coordinates, density Density-coordinates } -- Literal atomic coordinate values give location, occupancy and order -- parameters, and a pointer to a specific atom defined in the biostruc graph. -- Temperature and occupancy factors have their conventional crystallographic -- definitions, with units defined in the model space declaration. Atoms, -- sites, temperature-factors, occupancies and alternate-conformation-ids -- are parallel arrays, i.e. the have the same number of values as given by -- number-of-points. Conformation ensembles represent distinct correlated- -- disorder subsets of the coordinates. They will be present only for certain -- "views" of PDB structures, as described above. Their derivation from PDB- -- supplied "alternate-conformation" ids is described below. Atomic-coordinates ::= SEQUENCE { number-of-points INTEGER, atoms Atom-pntrs, sites Model-space-points, temperature-factors Atomic-temperature-factors OPTIONAL, occupancies Atomic-occupancies OPTIONAL, alternate-conf-ids Alternate-conformation-ids OPTIONAL, conf-ensembles SEQUENCE OF Conformation-ensemble OPTIONAL } -- The atoms whose location is described by each coordinate are identified -- via a hierarchical pointer to the chemical graph of the biomolecular -- assembly. Coordinates may be matched with atoms in the chemical structure -- by the values of the molecule, residue and atom id's given here, which -- match exactly the items of the same type defined in Biostruc-graph. -- Coordinates are given as integer values, with a scale factor to convert -- to real values for each x, y or z, in the units indicated in model-space. -- Integer values must be divided by the the scale factor. This use of integer -- values reduces the ASN.1 stream size. The scale factors for temperature -- factors and occupancies are given separately, but must be used in the same -- fashion to produce properly scaled real values. Model-space-points ::= SEQUENCE { scale-factor INTEGER, x SEQUENCE OF INTEGER, y SEQUENCE OF INTEGER, z SEQUENCE OF INTEGER } Atomic-temperature-factors ::= CHOICE { isotropic Isotropic-temperature-factors, anisotropic Anisotropic-temperature-factors } Isotropic-temperature-factors ::= SEQUENCE { scale-factor INTEGER, b SEQUENCE OF INTEGER } Anisotropic-temperature-factors ::= SEQUENCE { scale-factor INTEGER, b-11 SEQUENCE OF INTEGER, b-12 SEQUENCE OF INTEGER, b-13 SEQUENCE OF INTEGER, b-22 SEQUENCE OF INTEGER, b-23 SEQUENCE OF INTEGER, b-33 SEQUENCE OF INTEGER } Atomic-occupancies ::= SEQUENCE { scale-factor INTEGER, o SEQUENCE OF INTEGER } -- An alternate conformation id is optionally associated with each coordinate. -- Aside from corrections due to the validation checks described above, the -- contents of MMDB Alternate-conformation-ids are identical to the PDB -- "alternate conformation" field. Alternate-conformation-ids ::= SEQUENCE OF Alternate-conformation-id Alternate-conformation-id ::= VisibleString -- Correlated disorder ensemble is defined by a set of alternate conformation -- id's which identify coordinates relevant to that ensemble. These are -- defined from the validated and corrected contents of the PDB "alternate -- conformation" field as described above. A given ensemble, for example, may -- consist of atom sites flagged by " " and "A" Alternate-conformation-ids. -- Names for ensembles are constructed from these flags. This example would be -- named, in its description, "PDB Ensemble blank plus A". -- Note that this interpretation is consistent with common PDB usage of the -- "alternate conformation" field, but that PDB specifications do not formally -- distinguish between correlated and uncorrelated disorder in crystallographic -- models. Ensembles identified in MMDB thus may not correspond to the meaning -- intended by PDB or the depositor. No information is lost, however, and -- if the intended meaning is known alternative ensemble descriptions may be -- reconstructed directly from the Alternate-conformation-ids. -- Note that correlated disorder as defined here is allowed within an atomic -- coordinate set but not between the multiple sets which may define a model. -- Multiple sets within the same model are intended as a means to represent -- assemblies modeled from different experimentally determined structures, -- where correlated disorder between coordinate sets is not relevant. Conformation-ensemble ::= SEQUENCE { name VisibleString, alt-conf-ids SEQUENCE OF Alternate-conformation-id } -- Literal surface coordinates define the chemical components whose structure -- is described by a surface, and the surface itself. The surface may be -- either a regular geometric solid or a triangle-mesh of arbitrary shape. Surface-coordinates ::= SEQUENCE { contents Chem-graph-pntrs, surface CHOICE { sphere Sphere, cone Cone, cylinder Cylinder, brick Brick, tmesh T-mesh, triangles Triangles } } T-mesh ::= SEQUENCE { number-of-points INTEGER, scale-factor INTEGER, swap SEQUENCE OF BOOLEAN, x SEQUENCE OF INTEGER, y SEQUENCE OF INTEGER, z SEQUENCE OF INTEGER } Triangles ::= SEQUENCE { number-of-points INTEGER, scale-factor INTEGER, x SEQUENCE OF INTEGER, y SEQUENCE OF INTEGER, z SEQUENCE OF INTEGER, number-of-triangles INTEGER, v1 SEQUENCE OF INTEGER, v2 SEQUENCE OF INTEGER, v3 SEQUENCE OF INTEGER } -- Literal density coordinates define the chemical components whose structure -- is described by a density grid, parameters of this grid, and density values. Density-coordinates ::= SEQUENCE { contents Chem-graph-pntrs, grid-corners Brick, grid-steps-x INTEGER, grid-steps-y INTEGER, grid-steps-z INTEGER, fastest-varying ENUMERATED { x(1), y(2), z(3)}, slowest-varying ENUMERATED { x(1), y(2), z(3)}, scale-factor INTEGER, density SEQUENCE OF INTEGER } END -- mmdb3.asn --$Revision: 6.3 $ --********************************************************************** -- -- Biological Macromolecule 3-D Structure Data Types for MMDB, -- A Molecular Modeling Database -- -- Definitions for structural features and biostruc addressing -- -- By Hitomi Ohkawa, Jim Ostell, Chris Hogue and Steve Bryant -- -- National Center for Biotechnology Information -- National Institutes of Health -- Bethesda, MD 20894 USA -- -- July, 1996 -- --********************************************************************** MMDB-Features DEFINITIONS ::= BEGIN EXPORTS Biostruc-feature-set, Chem-graph-pntrs, Atom-pntrs, Chem-graph-alignment, Sphere, Cone, Cylinder, Brick, Transform, Biostruc-feature-set-id, Biostruc-feature-id; IMPORTS Biostruc-id FROM MMDB Molecule-id, Residue-id, Atom-id FROM MMDB-Chemical-graph Model-id, Model-coordinate-set-id FROM MMDB-Structural-model User-object FROM NCBI-General Pub FROM NCBI-Pub; -- Named model features refer to sets of residues or atoms, or a region in -- the model space. A few specific feature types are allowed for compatibility -- with PDB usage, but the purpose of a named model feature is simply to -- associate various types of information with a set of atoms or -- residues, or a spatially-defined region of the model structure. They also -- support association of various properties with each residue or atom of a -- set. -- PDB-derived secondary structure defines a single feature, represented as a -- sequence of residue motifs, as are the contents of PDB SITE and -- FTNOTE records. NCBI-assigned core and secondary structure descriptions -- are also represented as a sequence of residue motifs. Biostruc-feature-set ::= SEQUENCE { id Biostruc-feature-set-id, descr SEQUENCE OF Biostruc-feature-set-descr OPTIONAL, features SEQUENCE OF Biostruc-feature } Biostruc-feature-set-id ::= INTEGER Biostruc-feature-set-descr ::= CHOICE { name VisibleString, pdb-comment VisibleString, other-comment VisibleString, attribution Pub } -- An explicitly specified type in Biostruc-feature allows for -- efficient extraction and indexing of feature sets of a specific type. -- Special types are provided for coloring and rendering, as -- as needed by molecular graphics programs. Biostruc-feature ::= SEQUENCE { id Biostruc-feature-id OPTIONAL, name VisibleString OPTIONAL, type INTEGER { helix(1), strand(2), sheet(3), turn(4), site(5), footnote(6), comment(7), -- new subgraph(100), -- NCBI domain reserved region(101), core(102), -- user core definition supercore(103), -- NCBI reserved color(150), -- new render(151), -- new label(152), -- new transform(153), -- new camera(154), -- new script(155), -- for scripts alignment(200), -- VAST reserved similarity(201), multalign(202), -- multiple alignment indirect(203), -- new cn3dstate(254), -- Cn3D reserved other(255) } OPTIONAL, property CHOICE { color Color-prop, render Render-prop, transform Transform, camera Camera, script Biostruc-script, user User-object } OPTIONAL, location CHOICE { subgraph Chem-graph-pntrs, region Region-pntrs, alignment Chem-graph-alignment, similarity Region-similarity, indirect Other-feature } OPTIONAL } -- new -- Other-feature allows for specifying location via reference to another -- Biostruc-feature and its location. Other-feature ::= SEQUENCE { biostruc-id Biostruc-id, set Biostruc-feature-set-id, feature Biostruc-feature-id } Biostruc-feature-id ::= INTEGER -- Atom, residue or molecule motifs describe a substructure defined by a set -- of nodes from the chemical graph. PDB secondary structure features are -- described as a residue motif, since they are not associated with any one of -- the multiple models that may be provided in a PDB file. NCBI-assigned -- secondary structure is represented in the same way, even though it is -- model specific, since this allows for simple mapping of the structural -- feature onto a sequence-only representation. This addressing mode may also -- be used to describe features to be associated with particular atoms, -- as, for example, the chemical shift observed in an NMR experiment. Chem-graph-pntrs ::= CHOICE { atoms Atom-pntrs, residues Residue-pntrs, molecules Molecule-pntrs } Atom-pntrs ::= SEQUENCE { number-of-ptrs INTEGER, molecule-ids SEQUENCE OF Molecule-id, residue-ids SEQUENCE OF Residue-id, atom-ids SEQUENCE OF Atom-id } Residue-pntrs ::= CHOICE { explicit Residue-explicit-pntrs, interval SEQUENCE OF Residue-interval-pntr } Residue-explicit-pntrs ::= SEQUENCE { number-of-ptrs INTEGER, molecule-ids SEQUENCE OF Molecule-id, residue-ids SEQUENCE OF Residue-id } Residue-interval-pntr ::= SEQUENCE { molecule-id Molecule-id, from Residue-id, to Residue-id } Molecule-pntrs ::= SEQUENCE { number-of-ptrs INTEGER, molecule-ids SEQUENCE OF Molecule-id } -- Region motifs describe features defined by spatial location, such as the -- site specified by a coordinate value, or a rgeion within a bounding volume. Region-pntrs ::= SEQUENCE { model-id Model-id, region CHOICE { site SEQUENCE OF Region-coordinates, boundary SEQUENCE OF Region-boundary } } -- Coordinate sites describe a region in space by reference to individual -- coordinates, in a particular model. These coordinates may be either the -- x, y and z values of atomic coordinates, the triangles of a surface mesh, -- or the grid points of a density model. All are addressed in the same manner, -- as coordinate indices which give offsets from the beginning of the -- coordinate data arrays. A coordinate-index of 5, for example, refers to -- the 5th x, y and z values of an atomic coordinate set, the 5th v1, v2, and v3 -- values of a triangle mesh, or the 5th value in a density grid. -- PDB SITE and FTNOTE records refer to particular atomic coordinates, and they -- are represented as a region motif with addresses of type Region-coordinates. -- Any names or descriptions provided by PDB are thus associated with the -- indicated sites, in the indicated model. Region-coordinates ::= SEQUENCE { model-coord-set-id Model-coordinate-set-id, number-of-coords INTEGER OPTIONAL, coordinate-indices SEQUENCE OF INTEGER OPTIONAL } -- Region boundaries are defined by regular solids located in the model space. Region-boundary ::= CHOICE { sphere Sphere, cone Cone, cylinder Cylinder, brick Brick } -- A biostruc alignment establishes an equivalence of nodes in the chemical -- graphs of two or more biostrucs. This may be mapped to a sequence -- alignment in the case of biopolymers. -- The 'dimension' component indicates the number of participants -- in the alignment. For pairwise alignments, such as VAST -- structure-structure alignments, the dimension will be always 2, with -- biostruc-ids, alignment, and domain each containing two entries for an -- aligned pair. The 'alignment' component contains a pair of Chem-graph-pntrs -- specifying a like number of corresponding residues in each structure. -- The 'domain' component specifies a region of each structure considered -- in the alignment. Only one transform (for the second structure) and -- one aligndata (for the pair) are provided for each VAST alignment. -- -- For multiple alignments, a set of components are treated as -- parallel arrays of length 'dimension'. -- The 'transform' component moves each structure to align it with -- the structure specified as the first element in the "parallel" array, -- so necessarily the first transform is a NULL transform. -- Align-stats are placeholders for scores. Chem-graph-alignment ::= SEQUENCE { dimension INTEGER DEFAULT 2, biostruc-ids SEQUENCE OF Biostruc-id, alignment SEQUENCE OF Chem-graph-pntrs, domain SEQUENCE OF Chem-graph-pntrs OPTIONAL, transform SEQUENCE OF Transform OPTIONAL, aligndata SEQUENCE OF Align-stats OPTIONAL } Align-stats ::= SEQUENCE { descr VisibleString OPTIONAL, scale-factor INTEGER OPTIONAL, vast-score INTEGER OPTIONAL, vast-mlogp INTEGER OPTIONAL, align-res INTEGER OPTIONAL, rmsd INTEGER OPTIONAL, blast-score INTEGER OPTIONAL, blast-mlogp INTEGER OPTIONAL, other-score INTEGER OPTIONAL } -- A biostruc similarity describes spatial features which are similar between -- two or more biostrucs. Similarities are model dependent, and the model and -- coordinate set ids of the biostrucs must be specified. They do not -- necessarily map to a sequence alignment, as the regions referenced may -- be pieces of a surface or grid, and thus not uniquely mapable to particular -- chemical components. Region-similarity ::= SEQUENCE { dimension INTEGER DEFAULT 2, biostruc-ids SEQUENCE OF Biostruc-id, similarity SEQUENCE OF Region-pntrs, transform SEQUENCE OF Transform } -- Geometrical primitives are used in the definition of region motifs, and -- also non-atomic coordinates. Spheres, cones, cylinders and bricks are -- defined by a few points in the model space. Sphere ::= SEQUENCE { center Model-space-point, radius RealValue } Cone ::= SEQUENCE { axis-top Model-space-point, axis-bottom Model-space-point, radius-bottom RealValue } Cylinder ::= SEQUENCE { axis-top Model-space-point, axis-bottom Model-space-point, radius RealValue } -- A brick is defined by the coordinates of eight corners. These are assumed -- to appear in the order 000, 001, 010, 011, 100, 101, 110, 111, where the -- digits 0 and 1 refer to respectively to the x, y and z axes of a unit cube. -- Opposite edges are assumed to be parallel. Brick ::= SEQUENCE { corner-000 Model-space-point, corner-001 Model-space-point, corner-010 Model-space-point, corner-011 Model-space-point, corner-100 Model-space-point, corner-101 Model-space-point, corner-110 Model-space-point, corner-111 Model-space-point } Model-space-point ::= SEQUENCE { scale-factor INTEGER, x INTEGER, y INTEGER, z INTEGER } RealValue ::= SEQUENCE { scale-factor INTEGER, scaled-integer-value INTEGER } Transform ::= SEQUENCE { id INTEGER, moves SEQUENCE OF Move } Move ::= CHOICE { rotate Rot-matrix, translate Trans-matrix } -- A rotation matrix is defined by 9 numbers, given by row, i.e., -- with column indices varying fastest. -- Coordinates, as a matrix with columns x, y, an z, are rotated -- via multiplication with the rotation matrix. -- A translation matrix is defined by 3 numbers, which is added to -- the rotated coordinates for specified amount of translation. Rot-matrix ::= SEQUENCE { scale-factor INTEGER, rot-11 INTEGER, rot-12 INTEGER, rot-13 INTEGER, rot-21 INTEGER, rot-22 INTEGER, rot-23 INTEGER, rot-31 INTEGER, rot-32 INTEGER, rot-33 INTEGER } Trans-matrix ::= SEQUENCE { scale-factor INTEGER, tran-1 INTEGER, tran-2 INTEGER, tran-3 INTEGER } -- The camera is a position relative to the world coordinates -- of the structure referred to by a location. -- this is used to set the initial position of the -- camera using OpenGL. scale is the value used to scale the -- other values from floating point to integer Camera ::= SEQUENCE { x INTEGER, y INTEGER, distance INTEGER, angle INTEGER, scale INTEGER, modelview GL-matrix } GL-matrix ::= SEQUENCE { scale INTEGER, m11 INTEGER, m12 INTEGER, m13 INTEGER, m14 INTEGER, m21 INTEGER, m22 INTEGER, m23 INTEGER, m24 INTEGER, m31 INTEGER, m32 INTEGER, m33 INTEGER, m34 INTEGER, m41 INTEGER, m42 INTEGER, m43 INTEGER, m44 INTEGER } Color-prop ::= SEQUENCE { r INTEGER OPTIONAL, g INTEGER OPTIONAL, b INTEGER OPTIONAL, name VisibleString OPTIONAL } -- Note that Render-prop is compatible with the Annmm specification, -- i.e., its numbering schemes do not clash with those in Render-prop. Render-prop ::= INTEGER { default (0), -- Default view wire (1), -- use wireframe space (2), -- use spacefill stick (3), -- use stick model (thin cylinders) ballNStick (4), -- use ball & stick model thickWire (5), -- thicker wireframe hide (9), -- don't show this name (10), -- display its name next to it number (11), -- display its number next to it pdbNumber (12), -- display its PDB number next to it objWireFrame (150), -- display MMDB surface object as wireframe objPolygons (151), -- display MMDB surface object as polygons colorsetCPK (225), -- color atoms like CPK models colorsetbyChain (226), -- color each chain different colorsetbyTemp (227), -- color using isotropic Temp factors colorsetbyRes (228), -- color using residue properties colorsetbyLen (229), -- color changes along chain length colorsetbySStru (230), -- color by secondary structure colorsetbyHydro (231), -- color by hydrophobicity colorsetbyObject(246), -- color each object differently colorsetbyDomain(247), -- color each domain differently other (255) } -- When a Biostruc-Feature with a Biostruc-script is initiated, -- it should play the specified steps one at a time, setting the feature-do -- list as the active display. -- The camera can be set using a feature-do, -- but it may be moved independently with -- camera-move, which specifies how to move -- the camera dynamically during the step along the path defined (e.g., -- a zoom, a rotate). -- Any value of pause (in 1:10th's of a second) will force a pause -- after an image is shown. -- If waitevent is TRUE, it will await a mouse or keypress and ignore -- the pause value. Biostruc-script ::= SEQUENCE OF Biostruc-script-step Biostruc-script-step ::= SEQUENCE { step-id Step-id, step-name VisibleString OPTIONAL, feature-do SEQUENCE OF Other-feature OPTIONAL, camera-move Transform OPTIONAL, pause INTEGER DEFAULT 10, waitevent BOOLEAN, extra INTEGER, jump Step-id OPTIONAL } Step-id ::= INTEGER END -- ncbimime.asn --$Revision: 6.12 $ --**************************************************************** -- -- NCBI MIME type (chemical/ncbi-asn1-ascii and chemical/ncbi-asn1-binary) -- by Jonathan Epstein, February 1996 -- --**************************************************************** NCBI-Mime DEFINITIONS ::= BEGIN EXPORTS Ncbi-mime-asn1; IMPORTS Biostruc, Biostruc-annot-set FROM MMDB Cdd FROM NCBI-Cdd Seq-entry FROM NCBI-Seqset Seq-annot FROM NCBI-Sequence Medline-entry FROM NCBI-Medline Cn3d-style-dictionary, Cn3d-user-annotations FROM NCBI-Cn3d; Ncbi-mime-asn1 ::= CHOICE { entrez Entrez-general, -- just a structure alignstruc Biostruc-align, -- structures & sequences & alignments alignseq Biostruc-align-seq, -- sequence alignment strucseq Biostruc-seq, -- structure & sequences strucseqs Biostruc-seqs, -- structure & sequences & alignments general Biostruc-seqs-aligns-cdd -- all-purpose "grab bag" -- others may be added here in the future } -- generic bundle of sequence and alignment info Bundle-seqs-aligns ::= SEQUENCE { sequences SET OF Seq-entry OPTIONAL, -- sequences seqaligns SET OF Seq-annot OPTIONAL, -- sequence alignments strucaligns Biostruc-annot-set OPTIONAL, -- structure alignments imports SET OF Seq-annot OPTIONAL, -- imports (updates in Cn3D) style-dictionary Cn3d-style-dictionary OPTIONAL, -- Cn3D stuff user-annotations Cn3d-user-annotations OPTIONAL } Biostruc-seqs-aligns-cdd ::= SEQUENCE { seq-align-data CHOICE { bundle Bundle-seqs-aligns, -- either seqs + alignments cdd Cdd -- or CDD (which contains these) }, structures SET OF Biostruc OPTIONAL, -- structures structure-type ENUMERATED { -- type of structures to load if ncbi-backbone(2), -- not present; meanings and ncbi-all-atom(3), -- values are same as MMDB's pdb-model(4) -- Model-type } OPTIONAL } Biostruc-align ::= SEQUENCE { master Biostruc, slaves SET OF Biostruc, alignments Biostruc-annot-set, -- structure alignments sequences SET OF Seq-entry, -- sequences seqalign SET OF Seq-annot, style-dictionary Cn3d-style-dictionary OPTIONAL, user-annotations Cn3d-user-annotations OPTIONAL } Biostruc-align-seq ::= SEQUENCE { -- display seq structure align only sequences SET OF Seq-entry, -- sequences seqalign SET OF Seq-annot, style-dictionary Cn3d-style-dictionary OPTIONAL, user-annotations Cn3d-user-annotations OPTIONAL } Biostruc-seq ::= SEQUENCE { -- display structure seq added by yanli structure Biostruc, sequences SET OF Seq-entry, style-dictionary Cn3d-style-dictionary OPTIONAL, user-annotations Cn3d-user-annotations OPTIONAL } Biostruc-seqs ::= SEQUENCE { -- display blast alignment along with neighbor's structure added by yanli structure Biostruc, sequences SET OF Seq-entry, -- sequences seqalign SET OF Seq-annot, style-dictionary Cn3d-style-dictionary OPTIONAL, user-annotations Cn3d-user-annotations OPTIONAL } Entrez-style ::= ENUMERATED { docsum (1), genbank (2) , genpept (3) , fasta (4) , asn1 (5) , graphic (6) , alignment (7) , globalview (8) , report (9) , medlars (10) , embl (11) , pdb (12) , kinemage (13) } Entrez-general ::= SEQUENCE { title VisibleString OPTIONAL, data CHOICE { ml Medline-entry , prot Seq-entry , nuc Seq-entry , genome Seq-entry , structure Biostruc , strucAnnot Biostruc-annot-set } , style Entrez-style , location VisibleString OPTIONAL } END -- objprt.asn --$Revision: 6.0 $ --******************************************************************** -- -- Print Templates -- James Ostell, 1993 -- -- --******************************************************************** NCBI-ObjPrt DEFINITIONS ::= BEGIN EXPORTS PrintTemplate, PrintTemplateSet; PrintTemplate ::= SEQUENCE { name TemplateName , -- name for this template labelfrom VisibleString OPTIONAL, -- ASN.1 path to get label from format PrintFormat } TemplateName ::= VisibleString PrintTemplateSet ::= SEQUENCE OF PrintTemplate PrintFormat ::= SEQUENCE { asn1 VisibleString , -- ASN.1 partial path for this label VisibleString OPTIONAL , -- printable label prefix VisibleString OPTIONAL, suffix VisibleString OPTIONAL, form PrintForm } PrintForm ::= CHOICE { -- Forms for various ASN.1 components block PrintFormBlock, boolean PrintFormBoolean, enum PrintFormEnum, text PrintFormText, use-template TemplateName, user UserFormat , null NULL } -- rarely used UserFormat ::= SEQUENCE { printfunc VisibleString , defaultfunc VisibleString OPTIONAL } PrintFormBlock ::= SEQUENCE { -- for SEQUENCE, SET separator VisibleString OPTIONAL , components SEQUENCE OF PrintFormat } PrintFormBoolean ::= SEQUENCE { true VisibleString OPTIONAL , false VisibleString OPTIONAL } PrintFormEnum ::= SEQUENCE { values SEQUENCE OF VisibleString OPTIONAL } PrintFormText ::= SEQUENCE { textfunc VisibleString OPTIONAL } END -- omssa.asn -- $Id: omssa.asn 142986 2008-10-14 13:50:26Z lewisg $ --********************************************************************** -- -- OMSSA (Open Mass Spectrometry Search Algorithm) data definitions -- Lewis Geer, 2003 -- -- make using something like -- "datatool -m omssa.asn -oc ObjOmssa -oA -od omssa.def" -- -- note that this file requires omssa.def -- --********************************************************************** OMSSA DEFINITIONS ::= BEGIN IMPORTS Bioseq FROM NCBI-Sequence; -- Generic holder for experimental info NameValue ::= SEQUENCE { name VisibleString, value VisibleString } -- Holds a single spectrum MSSpectrum ::= SEQUENCE { number INTEGER, -- unique number of spectrum charge SEQUENCE OF INTEGER, -- may be more than one if unknown precursormz INTEGER, -- scaled precursor m/z, scale is in MSSearchSettings mz SEQUENCE OF INTEGER, -- scaled product m/z abundance SEQUENCE OF INTEGER, -- scaled product abundance iscale REAL, -- abundance scale, float to integer ids SEQUENCE OF VisibleString OPTIONAL, -- ids/filenames namevalue SEQUENCE OF NameValue OPTIONAL -- extra info: retention times, etc. } -- Holds a set of spectra MSSpectrumset ::= SEQUENCE OF MSSpectrum -- enumerate enzymes MSEnzymes ::= INTEGER { trypsin (0), argc (1), cnbr (2), chymotrypsin (3), formicacid (4), lysc (5), lysc-p (6), pepsin-a (7), tryp-cnbr (8), tryp-chymo (9), trypsin-p (10), whole-protein (11), aspn (12), gluc (13), aspngluc (14), top-down (15), semi-tryptic (16), no-enzyme (17), chymotrypsin-p (18), aspn-de (19), gluc-de (20), max(21), none (255) } -- enumerate modifications MSMod ::= INTEGER { methylk (0), -- methylation of K oxym (1), -- oxidation of methionine carboxymethylc (2), -- carboxymethyl cysteine carbamidomethylc(3), -- carbamidomethyl cysteine deamidationkq (4), -- deamidation of K and Q propionamidec (5), -- propionamide cysteine phosphorylations (6), -- phosphorylation of S phosphorylationt (7), -- phosphorylation of T phosphorylationy (8), -- phosphorylation of Y ntermmcleave (9), -- N terminal methionine cleavage ntermacetyl (10), -- N terminal protein acetyl ntermmethyl (11), -- N terminal protein methyl ntermtrimethyl (12), -- N terminal protein trimethyl methythiold (13), -- beta methythiolation of D methylq (14), -- methylation of Q trimethylk (15), -- trimethylation of K methyld (16), -- methylation of D methyle (17), -- methylation of E ctermpepmethyl (18), -- C terminal methylation trideuteromethyld (19), -- trideuteromethylation of D trideuteromethyle (20), -- trideuteromethylation of E ctermpeptrideuteromethyl (21), -- C terminal trideuteromethylation nformylmet (22), twoamino3oxobutanoicacid (23), acetylk (24), ctermamide (25), bmethylthiold (26), carbamidomethylk (27), carbamidometylh (28), carbamidomethyld (29), carbamidomethyle (30), carbamylk (31), ntermcarbamyl (32), citrullinationr (33), cysteicacidc (34), diiodinationy (35), dimethylk (36), dimethylr (37), ntermpepdimethyl (38), dihydroxyf (39), thioacetylk (40), ntermpeptioacetyl (41), farnesylationc (42), formylk (43), ntermpepformyl (44), formylkynureninw (45), phef (46), gammacarboxyld (47), gammacarboxyle (48), geranylgeranylc (49), ntermpepglucuronylg (50), glutathionec (51), glyglyk (52), guanidinationk (53), his2asnh (54), his2asph (55), ctermpephsem (56), ctermpephselactm (57), hydroxykynureninw (58), hydroxylationd (59), hydroxylationk (60), hydroxylationn (61), hydroxylationp (62), hydroxylationf (63), hydroxylationy (64), iodinationy (65), kynureninw (66), lipoylk (67), ctermpepmeester (68), meesterd (69), meestere (70), meesters (71), meestery (72), methylc (73), methylh (74), methyln (75), ntermpepmethyl (76), methylr (77), ntermpepmyristoyeylationg (78), ntermpepmyristoyl4hg (79), ntermpepmyristoylationg (80), myristoylationk (81), ntermformyl (82), nemc (83), nipcam (84), nitrow (85), nitroy (86), ctermpepo18 (87), ctermpepdio18 (88), oxyh (89), oxyw (90), ppantetheines (91), palmitoylationc (92), palmitoylationk (93), palmitoylations (94), palmitoylationt (95), phospholosss (96), phospholosst (97), phospholossy (98), phosphoneutrallossc (99), phosphoneutrallossd (100), phosphoneutrallossh (101), propionylk (102), ntermpeppropionyl (103), propionylheavyk (104), ntermpeppropionylheavy (105), pyridylk (106), ntermpeppyridyl (107), ntermpeppyrocmc (108), ntermpeppyroe (109), ntermpeppyroq (110), pyroglutamicp (111), spyridylethylc (112), semetm (113), sulfationy (114), suphonem (115), triiodinationy (116), trimethylationr (117), ntermpeptripalmitatec (118), usermod1 (119), -- start of user defined mods usermod2 (120), usermod3 (121), usermod4 (122), usermod5 (123), usermod6 (124), usermod7 (125), usermod8 (126), usermod9 (127), usermod10 (128), -- end of user defined mods icatlight (129), icatheavy (130), camthiopropanoylk (131), phosphoneutrallosss (132), phosphoneutrallosst (133), phosphoetdlosss (134), phosphoetdlosst (135), arg-13c6 (136), arg-13c6-15n4 (137), lys-13c6 (138), oxy18 (139), beta-elim-s (140), beta-elim-t (141), usermod11 (142), usermod12 (143), usermod13 (144), usermod14 (145), usermod15 (146), usermod16 (147), usermod17 (148), usermod18 (149), usermod19 (150), usermod20 (151), usermod21 (152), usermod22 (153), usermod23 (154), usermod24 (155), usermod25 (156), usermod26 (157), usermod27 (158), usermod28 (159), usermod29 (160), usermod30 (161), sulfinicacid (162), arg2orn (163), dehydro (164), carboxykynurenin (165), sumoylation (166), iTRAQ114nterm (167), iTRAQ114K (168), iTRAQ114Y (169), iTRAQ115nterm (170), iTRAQ115K (171), iTRAQ115Y (172), iTRAQ116nterm (173), iTRAQ116K (174), iTRAQ116Y (175), iTRAQ117nterm (176), iTRAQ117K (177), iTRAQ117Y (178), mmts (179), lys-2H4 (180), lys-13C615N2 (181), hexNAcN (182), dHexHexNAcN (183), hexNAcS (184), hexNAcT (185), mod186 (186), mod187 (187), mod188 (188), mod189 (189), mod190 (190), mod191 (191), mod192 (192), mod193 (193), mod194 (194), mod195 (195), mod196 (196), mod197 (197), mod198 (198), mod199 (199), mod200 (200), mod201 (201), mod202 (202), mod203 (203), mod204 (204), mod205 (205), mod206 (206), mod207 (207), mod208 (208), mod209 (209), mod210 (210), mod211 (211), mod212 (212), mod213 (213), mod214 (214), mod215 (215), mod216 (216), mod217 (217), mod218 (218), mod219 (219), mod220 (220), mod221 (221), mod222 (222), mod223 (223), mod224 (224), mod225 (225), mod226 (226), mod227 (227), mod228 (228), mod229 (229), mod230 (230), max (231), -- maximum number of mods unknown(9999), -- modification of unknown type none(10000) } -- enumerate modification types MSModType ::= INTEGER { modaa (0), -- at particular amino acids modn (1), -- at the N terminus of a protein modnaa (2), -- at the N terminus of a protein at particular amino acids modc (3), -- at the C terminus of a protein modcaa (4), -- at the C terminus of a protein at particular amino acids modnp (5), -- at the N terminus of a peptide modnpaa (6), -- at the N terminus of a peptide at particular amino acids modcp (7), -- at the C terminus of a peptide modcpaa (8), -- at the C terminus of a peptide at particular amino acids modmax (9) -- the max number of modification types } -- mass container MSMassSet ::= SEQUENCE { monomass REAL, averagemass REAL, n15mass REAL } -- Modification Definition MSModSpec ::= SEQUENCE { mod MSMod, -- what is the mod type MSModType, -- modification type name VisibleString, -- friendly name of mod monomass REAL, -- monoisotopic mass averagemass REAL, -- average mass n15mass REAL, -- monoisotopic n15 mass residues SEQUENCE OF VisibleString OPTIONAL, -- residues to apply mod to neutralloss MSMassSet OPTIONAL, -- loss after precursor mass determination unimod INTEGER OPTIONAL, -- the equivalent Unimod Accession number psi-ms VisibleString OPTIONAL -- the PSI-MS equivalent name } -- Holds a set of modifications MSModSpecSet ::= SEQUENCE OF MSModSpec -- How is charge to be handled? Some input files are not clear -- on this. For example, a dta file only specifies one charge, -- even though the charge is not really known. MSCalcPlusOne ::= INTEGER { dontcalc (0), -- don't guess charge one calc (1) -- guess charge one } -- user instructions on whether to believe charges in input file MSCalcCharge ::= INTEGER { calculate (0), -- guess the charge(s) from the data usefile (1), -- use what the input file says userange (2) -- use the charge range specified } -- How to handle precursor charge MSChargeHandle ::= SEQUENCE { calcplusone MSCalcPlusOne DEFAULT 1, -- do we guess charge one? calccharge MSCalcCharge DEFAULT 2, -- how do we handle charges? mincharge INTEGER DEFAULT 2, -- if userange, what is the min? maxcharge INTEGER DEFAULT 3, -- if userange, what is the max? considermult INTEGER DEFAULT 3, -- at which precursor charge to consider +2 ions? plusone REAL, -- what % of peaks below precursor needed to call as +1 maxproductcharge INTEGER OPTIONAL, -- maximum product ion charge prodlesspre BOOLEAN OPTIONAL -- product charge always less thanor equal to precursor? } -- what type of atomic mass to use MSSearchType ::= INTEGER { monoisotopic(0), average(1), monon15(2), exact(3), max(4) } -- what is the charge dependence of the mass tolerance? MSZdependence ::= INTEGER { independent(0), -- mass tol. invariant with charge linearwithz(1), -- mass tol. scales with charge max(2) } -- Iterative search settings MSIterativeSettings ::= SEQUENCE { researchthresh REAL, -- e-val threshold for re-searching spectra, 0 = always re-search subsetthresh REAL, -- e-val threshold for picking sequence subset, 0 = all sequences replacethresh REAL -- e-val threshold for replacing hitset, 0 = only if better } -- Library search settings MSLibrarySettings ::= SEQUENCE { libnames SEQUENCE OF VisibleString, -- names of search libraries presearch BOOLEAN, -- should there be a restriction on precursor mass? useomssascore BOOLEAN, -- use the omssa score? usereplicatescore BOOLEAN, -- use the number of replicates score? qtofscore BOOLEAN -- use the qtof score? } -- Generic search settings MSSearchSettings ::= SEQUENCE { precursorsearchtype MSSearchType, -- average or monoisotopic? productsearchtype MSSearchType, -- average or monoisotopic? ionstosearch SEQUENCE OF MSIonType, -- which ions to search? peptol REAL, -- peptide mass tolerance msmstol REAL, -- msms mass tolerance zdep MSZdependence, -- what is the charge dependence of the mass tolerance? cutoff REAL, -- evalue cutoff -- next 3 fields define intensity fraction below -- which peaks will be discard cutlo REAL, -- the start of the cutoff, fraction of most intense peak cuthi REAL, -- the end of the cutoff cutinc REAL, -- the increment of the cutoff singlewin INTEGER, -- the size of the single charge filtering window doublewin INTEGER, -- the size of the double charge filtering window singlenum INTEGER, -- the number of peaks allowed in the single window doublenum INTEGER, -- the number of peaks allowed in the double window fixed SEQUENCE OF MSMod, -- fixed PTM's variable SEQUENCE OF MSMod, -- variable PTM's enzyme MSEnzymes, -- digestion enzyme missedcleave INTEGER, -- number of missed cleaves allowed hitlistlen INTEGER DEFAULT 25, -- the number of hits kept in memory -- for a spectrum db VisibleString, -- sequence set to search, e.g. "nr" tophitnum INTEGER, -- number of m/z to consider in first pass minhit INTEGER DEFAULT 2, -- minimum number of m/z values for a valid hit minspectra INTEGER DEFAULT 4, -- minimum number of m/z for a valid spectra scale INTEGER DEFAULT 100, -- scale for m/z float to integer maxmods INTEGER DEFAULT 64, -- maximum number of mass ladders per -- database peptide taxids SEQUENCE OF INTEGER OPTIONAL, -- taxa to limit search chargehandling MSChargeHandle OPTIONAL, -- how to deal with charges usermods MSModSpecSet OPTIONAL, -- user defined modifications pseudocount INTEGER DEFAULT 1, -- min number of counts per precursor bin searchb1 INTEGER DEFAULT 0, -- should b1 product be in search (1=no, 0=yes) searchctermproduct INTEGER DEFAULT 0, -- should c terminus ion be searched (1=no, 0=yes) maxproductions INTEGER DEFAULT 0, -- max number of ions in each series (0=all) minnoenzyme INTEGER DEFAULT 4, -- min number of AA in peptide for noenzyme search maxnoenzyme INTEGER DEFAULT 0, -- max number of AA in peptide for noenzyme search (0=none) exactmass REAL OPTIONAL, -- the threshold in Da for adding neutron settingid INTEGER OPTIONAL, -- id of the search settings iterativesettings MSIterativeSettings OPTIONAL, -- iterative search settings precursorcull INTEGER OPTIONAL, -- turn on aggressive precursor culling for ETD (0=none) infiles SEQUENCE OF MSInFile OPTIONAL, -- input files outfiles SEQUENCE OF MSOutFile OPTIONAL, -- output files nocorrelationscore INTEGER OPTIONAL, -- turn on correlation score (1=nocorr) probfollowingion REAL OPTIONAL, -- probability of a consecutive ion (used in correlation) nmethionine BOOLEAN OPTIONAL, -- should nmethionine be cleaved? automassadjust REAL OPTIONAL, -- fraction allowable adjustment of product mass tolerance lomasscutoff REAL OPTIONAL, -- low mass filter in Daltons, unscaled libsearchsettings MSLibrarySettings OPTIONAL, -- library search settings noprolineions SEQUENCE OF MSIonType OPTIONAL, -- which ions to use no proline rule reversesearch BOOLEAN OPTIONAL, -- do reverse search othersettings SEQUENCE OF NameValue OPTIONAL -- extra search settings } MSSerialDataFormat ::= INTEGER { none (0) , asntext (1), -- open ASN.1 text format asnbinary (2), -- open ASN.1 binary format xml (3), -- open XML format csv (4), -- csv (excel) pepxml (5), -- pepXML format xmlbz2 (6) -- bzip2 XML format } MSOutFile ::= SEQUENCE { outfile VisibleString, -- output file name outfiletype MSSerialDataFormat, -- output file type includerequest BOOLEAN -- should the output include the request? } MSSpectrumFileType ::= INTEGER { dta(0), dtablank(1), dtaxml(2), asc(3), pkl(4), pks(5), sciex(6), mgf(7), unknown(8), oms(9), -- asn.1 binary for iterative search omx(10), -- xml for iterative search xml(11), -- xml MSRequest omxbz2 (12) -- bzip2 omx file } MSInFile ::= SEQUENCE { infile VisibleString, -- input file name infiletype MSSpectrumFileType -- input file type } MSSearchSettingsSet ::= SEQUENCE OF MSSearchSettings -- The search request that is given to the OMSSA algorithm MSRequest ::= SEQUENCE { spectra MSSpectrumset, -- the set of spectra settings MSSearchSettings, -- the search settings rid VisibleString OPTIONAL, -- request id moresettings MSSearchSettingsSet OPTIONAL, -- additional search runs modset MSModSpecSet OPTIONAL -- list of mods that can be used in search } -- enumeration of ion types MSIonType ::= INTEGER { a (0), b (1), c (2), x (3), y (4), z (5), parent(6), internal(7), immonium(8), unknown(9), max (10) } -- types of neutral loss MSIonNeutralLoss ::= INTEGER { water (0), -- minus 18 Da ammonia (1) -- minus 17 Da } -- iosotopic type of ion MSIonIsotopicType ::= INTEGER { monoisotopic (0), -- no c13s in molecule c13 (1), -- one c13 in molecule c13two (2), -- two c13s in molecule, and so on... c13three (3), c13four (4) } -- type of immonium ion MSImmonium ::= SEQUENCE { parent VisibleString, -- parent amino acid product VisibleString OPTIONAL -- product ion code } -- ion type at a finer level than ion series MSIon ::= SEQUENCE { neutralloss MSIonNeutralLoss OPTIONAL, -- is this peak a neutral loss? isotope MSIonIsotopicType OPTIONAL, -- isotopic composition of peak internal VisibleString OPTIONAL, -- if iontype is internal, this is the internal sequence immonium MSImmonium OPTIONAL -- if iontype is immonium, show characteristics } -- annotated comments about the ion MSIonAnnot ::= SEQUENCE { suspect BOOLEAN OPTIONAL, -- is this peak suspect? massdiff REAL OPTIONAL, -- what is the difference in mass from library spectrum? missingisotope BOOLEAN OPTIONAL -- are the lower mass peaks missing? } -- defines a particular ion MSMZHit ::= SEQUENCE { ion MSIonType, -- ion type, e.g. b charge INTEGER, -- ion charge number INTEGER, -- the sequential number of the ion mz INTEGER, -- scaled m/z value in Da index INTEGER OPTIONAL, -- the index of the peak in the original spectrum moreion MSIon OPTIONAL, -- more information about the ion type annotation MSIonAnnot OPTIONAL -- annotations on the ion } -- contains information about sequences with identical peptide -- sequences MSPepHit ::= SEQUENCE { start INTEGER, -- start position (inclusive) in sequence stop INTEGER, -- stop position (inclusive) in sequence gi INTEGER OPTIONAL, -- genbank identifier accession VisibleString OPTIONAL, -- sequence accession defline VisibleString OPTIONAL, -- sequence description protlength INTEGER OPTIONAL, -- length of protein oid INTEGER OPTIONAL, -- blast library oid reversed BOOLEAN OPTIONAL, -- reversed sequence pepstart VisibleString OPTIONAL, -- AA before the peptide pepstop VisibleString OPTIONAL -- AA after the peptide } -- modifications to a hit peptide MSModHit ::= SEQUENCE { site INTEGER, -- the position in the peptide modtype MSMod -- the type of modification } -- sets of scores MSScoreSet ::= SEQUENCE { name VisibleString, value REAL } -- hits to a given spectrum MSHits ::= SEQUENCE { evalue REAL, -- E-value (expect value) pvalue REAL, -- P-value (probability value) charge INTEGER, -- the charge state used in search. -1 == not +1 pephits SEQUENCE OF MSPepHit, -- peptides that match this hit mzhits SEQUENCE OF MSMZHit OPTIONAL, -- ions hit pepstring VisibleString OPTIONAL, -- the peptide sequence mass INTEGER OPTIONAL, -- scaled experimental mass of peptide in Da mods SEQUENCE OF MSModHit OPTIONAL, -- modifications to sequence pepstart VisibleString OPTIONAL, -- AA before the peptide (depricated) pepstop VisibleString OPTIONAL, -- AA after the peptide (depricated) protlength INTEGER OPTIONAL, -- length of protein hit (depricated) theomass INTEGER OPTIONAL, -- scaled theoretical mass of peptide hit oid INTEGER OPTIONAL, -- blast library oid (depricated) scores SEQUENCE OF MSScoreSet OPTIONAL, -- optional scores (for library search) libaccession VisibleString OPTIONAL -- library search accesssion } -- error return for a particular spectrum's hitset MSHitError ::= INTEGER { none (0), generalerr (1), unable2read (2), -- can't read the spectrum notenuffpeaks (3) -- not enough peaks to search } -- MSHitSet annotation by end user MSUserAnnot ::= INTEGER { none (0), delete (1), flag (2) } -- contains a set of hits to a single spectrum MSHitSet ::= SEQUENCE { number INTEGER, -- unique number of spectrum error MSHitError OPTIONAL, -- error, if any hits SEQUENCE OF MSHits OPTIONAL, -- set of hit to spectrum ids SEQUENCE OF VisibleString OPTIONAL, -- filenames or other ids of spectra searched namevalue SEQUENCE OF NameValue OPTIONAL,-- extra info: retention times, etc. settingid INTEGER OPTIONAL, -- id of the search setting used userannotation MSUserAnnot OPTIONAL -- allows users to flag certain } -- error return for the entire response MSResponseError ::= INTEGER { none (0), generalerr (1), noblastdb (2), -- unable to open blast library noinput (3) -- input missing } -- bioseq container MSBioseq ::= SEQUENCE { oid INTEGER, -- blast library oid seq Bioseq } MSBioseqSet ::= SEQUENCE OF MSBioseq -- search results MSResponse ::= SEQUENCE { hitsets SEQUENCE OF MSHitSet, -- hits grouped by spectrum scale INTEGER DEFAULT 100, -- scale to change m/z float to integer rid VisibleString OPTIONAL, -- request id error MSResponseError OPTIONAL, -- error response version VisibleString OPTIONAL, -- version of OMSSA email VisibleString OPTIONAL, -- email address for notification dbversion INTEGER OPTIONAL, -- version of db searched (usually size) bioseqs MSBioseqSet OPTIONAL -- sequences found in search } -- holds both search requests and responses MSSearch ::= SEQUENCE { request SEQUENCE OF MSRequest OPTIONAL, response SEQUENCE OF MSResponse OPTIONAL } END -- pcassay.asn -- $Id: pcassay.asn,v 1.12 2006/01/10 12:54:50 bolton Exp $ -- =========================================================================== -- -- PUBLIC DOMAIN NOTICE -- National Center for Biotechnology Information -- -- This software/database is a "United States Government Work" under the -- terms of the United States Copyright Act. It was written as part of -- the author's official duties as a United States Government employee and -- thus cannot be copyrighted. This software/database is freely available -- to the public for use. The National Library of Medicine and the U.S. -- Government have not placed any restriction on its use or reproduction. -- -- Although all reasonable efforts have been taken to ensure the accuracy -- and reliability of the software and data, the NLM and the U.S. -- Government do not and cannot warrant the performance or results that -- may be obtained by using this software or data. The NLM and the U.S. -- Government disclaim all warranties, express or implied, including -- warranties of performance, merchantability or fitness for any particular -- purpose. -- -- Please cite the author in any work or product based on this material. -- -- =========================================================================== -- -- Authors: NCBI Structure Group -- -- File Description: -- ASN.1 definitions for PubChem biological assay data database -- -- =========================================================================== NCBI-PCAssay DEFINITIONS ::= BEGIN IMPORTS Pub FROM NCBI-Pub Date, Object-id FROM NCBI-General PC-ID, PC-Source, PC-XRefData FROM NCBI-PCSubstance; -- EXPORTS ; -- Container for multiple Assay Data Submissions PC-AssayContainer ::= SEQUENCE OF PC-AssaySubmit -- Container for Data Depositions and Assay Definitions PC-AssaySubmit ::= SEQUENCE { assay CHOICE { -- Assay Description or pre-existing Identifier aid INTEGER, -- Assay Identifier aid-source PC-Source, -- External Assay Identifier descr PC-AssayDescription, -- Assay Description (new or updated) aidver PC-ID -- Assay Identifier/Version (for internal use) }, data SEQUENCE OF PC-AssayResults OPTIONAL, -- Assay Data Deposition (vector) revoke SEQUENCE OF INTEGER OPTIONAL -- List of SID's whose data is to be suppressed } -- Container for multiple Assay Result Sets PC-AssayResultsSet ::= SEQUENCE OF PC-AssayResults -- Assay Results provided for a given Substance tested, with respect to the results types defined in the -- referenced Assay Description PC-AssayResults ::= SEQUENCE { -- Internal/External Tracking Information sid INTEGER, -- Tested Substance ID/Version [Either valid ID or, -- if "sid-source" is used, this is a "0" value] -- Note: A valid ID is greater than "0" sid-source PC-Source OPTIONAL, -- External Identifier for this Substance -- Note: May be used in-lieu of "sid" -- Note: This is non-optional if "sid" is "0" version INTEGER OPTIONAL, -- Version identifier for this AID-SID Result -- Note: Incoming data should set this to be "0" -- Data Annotation/Qualifier and URL to further Depositor Information comment VisibleString OPTIONAL, -- Annotation or qualifier for this Result -- Assay Result Data for this Sample -- Note: Users need populate only those "tid"s, for which there is data, in any order. outcome INTEGER { -- Assay Outcome inactive (1), -- Substance is considered Inactive active (2), -- Substance is considered Active inconclusive (3), -- Substance is Inconclusive unspecified (4) -- Substance Outcome is Unspecified } DEFAULT unspecified, rank INTEGER OPTIONAL, -- Rank of Assay Outcome (for result ordering) -- Note: Larger numbers are more active data SEQUENCE OF PC-AssayData OPTIONAL, -- Assay Data Reported for this SID (vector) url VisibleString OPTIONAL, -- Depositor provided URL for this Result xref SEQUENCE OF PC-AnnotatedXRef OPTIONAL -- Annotated Cross-Reference Information } -- Substance Tested Assay Results for a given Assay Result Type defined in the referenced Assay Description PC-AssayData ::= SEQUENCE { tid INTEGER, -- Assay Result Field Type ID (TID) -- Note: Result Field ID's must be greater than "0" value CHOICE { -- Assay Result, must be the same as defined for TID ival INTEGER, fval REAL, bval BOOLEAN, sval VisibleString } } -- Assay Description provided by an Organization that describes the assay/protocol performed and defines the -- measured end-points and parameters to be stored. An Assay Description is not a database table. You can -- define as many Result Definitions as needed and they need not be used by all Substances tested. -- -- Note: After initial submission, Users cannot add new Result Definitions or modify existing Result Definitions -- beyond the description text; however, users can change the Assay Description Information, as desired. PC-AssayDescription ::= SEQUENCE { -- Internal/External Tracking Information aid PC-ID, -- Assay Description ID/Version [Either valid ID -- or, if "aid-source" is used, a "0" dummy value] -- Note: Version is for internal use (only?) -- Note: A valid ID is greater than "0" aid-source PC-Source OPTIONAL, -- External Identifier for this Assay Description -- Note: May be used in-lieu of "aid" -- Note: This is non-optional if "aid" ID is "0" -- Assay Description Information name VisibleString, -- Short Assay Name (for display purposes) description SEQUENCE OF VisibleString OPTIONAL, -- Description of Assay protocol SEQUENCE OF VisibleString OPTIONAL, -- Procedure used to generate results comment SEQUENCE OF VisibleString OPTIONAL, -- Comments or additional information xref SEQUENCE OF PC-AnnotatedXRef OPTIONAL, -- Annotated Cross-Reference Information -- Allowed Assay Result Types results SEQUENCE OF PC-ResultType, -- Result Definitions (vector) -- Additional Information pub SEQUENCE OF Pub OPTIONAL, -- Depositor provided publications for this assay revision INTEGER OPTIONAL -- Revision identifier for textual description } -- Annotated Cross-Reference (XRef) Information to allow the XRef to be qualified, as to its meaning or context PC-AnnotatedXRef ::= SEQUENCE { xref PC-XRefData, -- Cross-Reference Information comment VisibleString OPTIONAL -- Annotation qualifier describing Cross-Reference meaning } -- Definition of Allowed Result Types for a given Assay PC-ResultType ::= SEQUENCE { -- Tracking or Description Information tid INTEGER, -- Assay Result Field Type ID (TID) name VisibleString, -- Result Field Name (short name for display) description SEQUENCE OF VisibleString OPTIONAL, -- Result Field Description -- Result Data Type and Validation Information type INTEGER { -- Result Data Type float (1), int (2), bool (3), string (4) }, constraints CHOICE { -- Allowed Values, used for validating incoming data -- If type is "float" fset SEQUENCE OF REAL, -- Allowed values must be equal to one of these fmin REAL, -- Allowed values (x) must be [ fmin <= x ] fmax REAL, -- Allowed values (x) must be [ x <= fmax ] frange PC-RealMinMax, -- Minimum/Maximum Range [ min <= x <= max ] -- If type is "int" iset SEQUENCE OF INTEGER, -- Allowed values must be equal to one of these imin INTEGER, -- Allowed values (x) must be [ imin <= x ] imax INTEGER, -- Allowed values (x) must be [ x <= imax ] irange PC-IntegerMinMax, -- Minimum/Maximum Range [ min <= x <= max ] -- If type is "string" sset SEQUENCE OF VisibleString -- Allowed values must be equal to one of these } OPTIONAL, -- Unit information provides the units for the values reported for this TID. For example, if the values -- reported for this TID are a concentration, e.g., micro-molar, setting the unit "um" allows PubChem to -- know that the value, e.g., "1.3", is actually "1.3 uM". This also allows PubChem to properly report the -- units when displaying the reported values for this TID. If the enumerated units provided below are -- insufficient, you may represent the units as a string in the optional "sunit" field (see below). unit INTEGER { -- Units for Value ppt (1), -- Parts per Thousand ppm (2), -- Parts per Million ppb (3), -- Parts per Billion mm (4), -- milliM um (5), -- microM nm (6), -- nanoM pm (7), -- picoM fm (8), -- femtoM mgml (9), -- milligrams per mL ugml (10), -- micrograms per mL ngml (11), -- nanograms per mL pgml (12), -- picograms per mL fgml (13), -- femtograms per mL m (14), -- Molar percent (15), -- Percent ratio (16), -- Ratio sec (17), -- Seconds rsec (18), -- Reciprocal Seconds min (19), -- Minutes rmin (20), -- Reciprocal Minutes day (21), -- Days rday (22), -- Reciprocal Days none (254), unspecified (255) } OPTIONAL, sunit VisibleString OPTIONAL, -- Unit Type (as a String) -- Value Transform information qualifies the values reported for this TID. For example, if the values -- reported for this TID are "-Log10 GI50", you may want to consider setting -- the "nlog" value below. In doing so, PubChem would know that the value, e.g., "5.0" -- is actually "1.0e-5". If the transformation applied is not listed, you may represent -- this transformation as a string in the "stransform" (see below) for eventual inclusion -- in the enumerated transform list below. transform INTEGER { -- Value Type Details linear (1), -- Linear Scale (x) ln (2), -- Natural Log Scale (ln x) log (3), -- Log Base 10 Scale (log10 x) reciprocal (4), -- Reciprocal Scale (1/x) negative (5), -- Negative Linear Scale (-x) nlog (6), -- Negative Log Base 10 Scale (-log10 x) nln (7) -- Negative Natural Log Scane (-ln x) } OPTIONAL, stransform VisibleString OPTIONAL -- Value Transform Type (as a String) } -- Mininum and Maximum Constraints on an Integer Value (used for validating incoming data) PC-IntegerMinMax ::= SEQUENCE { min INTEGER, -- Minimum Value Allowed max INTEGER -- Maximum Value Allowed } -- Mininum and Maximum Constraints on a Real Value (used for validating incoming data) PC-RealMinMax ::= SEQUENCE { min REAL, -- Minimum Value Allowed max REAL -- Maximum Value Allowed } END -- =========================================================================== -- $Log: pcassay.asn,v $ -- Revision 1.12 2006/01/10 12:54:50 bolton -- Major change to meaning of PC-ID version, now an assay restatement -- identifier. New revision identifier added to AssayDescription to -- track textual modifications (previous meaning of PC-ID version). -- Replaced "Sequence of Pub" in AssayResults with "Sequence of -- AnnotatedXRef". Added "PC-ID" to the AssaySubmit assay choice. -- -- Revision 1.11 2005/11/30 22:42:18 ywang -- change rank to be optional -- -- Revision 1.10 2005/11/08 13:12:49 bolton -- Added ability to provide publications at the assay description and assay -- result (per SID) level. Also, minor changes to comments and object order. -- -- Revision 1.9 2005/09/29 21:05:52 tkachenk -- PC-AssayResults.data is made optional to be able to handle empty data rows -- -- Revision 1.8 2005/08/02 18:48:08 ywang -- add PC-AssayResultsSet to ease spec mapping -- -- Revision 1.7 2005/08/01 14:43:30 ucko -- Fix comma-misplacement typo in previous revision. -- -- Revision 1.6 2005/08/01 14:17:57 ywang -- make PC-AssayResults version OPTIONAL and put url at the end -- -- Revision 1.5 2005/07/28 17:05:20 bolton -- Major update to the Assay specification streamlining (by elimination) unused -- features, eliminating the assay deposition block, and to add new "revoke" -- feature. -- -- Revision 1.4 2005/03/02 16:18:49 bolton -- Added optional URL for Substance Result. -- -- Revision 1.3 2005/02/04 15:04:39 bolton -- Added assay result ranking. Reorganized order of object definitions. -- -- Revision 1.2 2004/07/13 14:31:17 bolton -- Added PC-XRefData, imported from the PCSubstance specification, to the -- DataSession and AssayDescription blocks. -- -- Revision 1.18 2004/06/10 13:24:18 bolton -- Changes made to in documentation and enumerations to detail that "0" is an -- invalid ID and an invalid enumeration. -- -- Revision 1.17 2004/06/04 11:55:21 bolton -- Added ability to allow external identifiers at all levels. Added improved -- documentation to reflect the current use of various sections of the spec. -- -- Revision 1.16 2004/06/01 14:14:26 bolton -- Forgot to remove PC-Source record at the PC-AssayDescription level. -- -- Revision 1.15 2004/05/28 18:32:52 bolton -- Slightly modified and improved accessioning scheme and associated -- documentation. -- -- Revision 1.14 2004/05/18 12:16:05 bolton -- Assay Data Session and Assay Description tweaks. -- -- Revision 1.13 2004/05/14 10:51:44 bolton -- Switched ordering of PC-AssayDescription and PC-AssayDataSession in the -- PC-AssayContainer Object. -- -- Revision 1.12 2004/05/12 13:05:02 bolton -- Major overhaul of the deposition/accession scheme. Minor tweaks. -- -- Revision 1.11 2004/05/03 12:48:39 bolton -- Added a root container object that holds both assay deposition and assay -- description data. Fixed a minor inconsistency with negative Log 10 label. -- -- Revision 1.10 2004/04/08 11:55:14 bolton -- Removed repeat of "Log" tag. -- -- Revision 1.9 2004/04/08 11:50:42 bolton -- Modifications to allow more/better: X-refs for Assay descriptions; activity -- summarization programmatically; and Result Type units. -- -- Revision 1.8 2004/04/01 19:29:53 bolton -- New version to reflect PubChem group consensus. -- -- Revision 1.7 2004/03/05 16:18:50 bolton -- Removed orphaned "PC-History" and "PC-Replacement". -- -- Revision 1.6 2004/03/04 20:30:14 bolton -- Minor fixes to allow compilation. -- -- Revision 1.5 2004/03/04 20:13:53 bolton -- Continued development of "pcassay", following latest pubchem meeting. -- -- Revision 1.4 2004/03/02 15:20:21 thiessen -- make pcsubstance and pcassay build on UNIX -- -- Revision 1.3 2004/02/27 18:39:28 bolton -- Changes to share "PC-Source" from "pcsubstance" to "pcassay". -- -- Revision 1.2 2004/02/26 15:17:30 bolton -- Minor changes to make "pcassay" library compile. -- -- =========================================================================== -- pcsubstance.asn -- $Id: pcsubstance.asn 109801 2007-09-01 13:57:01Z bolton $ -- =========================================================================== -- -- PUBLIC DOMAIN NOTICE -- National Center for Biotechnology Information -- -- This software/database is a "United States Government Work" under the -- terms of the United States Copyright Act. It was written as part of -- the author's official duties as a United States Government employee and -- thus cannot be copyrighted. This software/database is freely available -- to the public for use. The National Library of Medicine and the U.S. -- Government have not placed any restriction on its use or reproduction. -- -- Although all reasonable efforts have been taken to ensure the accuracy -- and reliability of the software and data, the NLM and the U.S. -- Government do not and cannot warrant the performance or results that -- may be obtained by using this software or data. The NLM and the U.S. -- Government disclaim all warranties, express or implied, including -- warranties of performance, merchantability or fitness for any particular -- purpose. -- -- Please cite the author in any work or product based on this material. -- -- =========================================================================== -- -- Authors: NCBI Structure Group -- -- File Description: -- ASN.1 definitions for PubChem small molecule database -- -- =========================================================================== NCBI-PCSubstance DEFINITIONS ::= BEGIN EXPORTS PC-Substance, PC-Compound, PC-Substances, PC-Compounds, PC-Source, PC-ID, PC-InfoData, PC-XRefData; IMPORTS Pub FROM NCBI-Pub Date, Object-id FROM NCBI-General; -- Root Record for Chemical Substance Definition PC-Substance ::= SEQUENCE { -- Internal Tracking Information sid PC-ID, -- Substance ID/Version [Either valid ID or a "0" dummy -- value, if "source" is to be used] -- Note: Version is for internal use (only?) -- Note: A valid ID is greater than "0" source PC-Source, -- Data Source for this Submission -- Substance Description Information pub SEQUENCE OF Pub OPTIONAL, -- Articles Describing this Substance synonyms SEQUENCE OF VisibleString OPTIONAL, -- Substance Names provided by Depositor comment SEQUENCE OF VisibleString OPTIONAL, -- Comments and Description provided by Depositor xref SEQUENCE OF PC-XRefData OPTIONAL, -- X-Ref/LinkOut Data provided by Depositor -- Structure Description compound PC-Compounds OPTIONAL -- Original Deposited Structure Information } -- Holder for groups of Substances PC-Substances ::= SEQUENCE OF PC-Substance -- ID and Version Description Information PC-ID ::= SEQUENCE { id INTEGER, -- Unique "Global" ID -- Note: Must be greater than "0" or, if invalid, "0" version INTEGER -- Incremented when Depositor updates record -- Note: For Internal Use (only?) } -- Describes Substance Source, if from another database PC-Source ::= CHOICE { individual Pub, -- Individual Submission db PC-DBTracking, -- External DB Submission mmdb PC-MMDBSource -- MMDB Submission (deprecated) } -- External DB Tracking Information PC-DBTracking ::= SEQUENCE { name VisibleString, -- Unique Name of External Database source-id Object-id, -- Primary Unique ID used by External DB date Date OPTIONAL, -- External Database Release Date description VisibleString OPTIONAL, -- External Database Release Code/Description pub Pub OPTIONAL -- Data Submission to same DB by original Author } -- MMDB Source Record detailing specific location or part of an MMDB Record PC-MMDBSource ::= SEQUENCE { mmdb-id INTEGER, -- MMDB Record ID -- Note: Must be greater than "0" or, if invalid, "0" molecule-id INTEGER, -- MMDB Molecule ID -- Note: Must be greater than "0" or, if invalid, "0" molecule-name SEQUENCE OF VisibleString, -- MMDB Molecule Name residue-id INTEGER OPTIONAL, -- Residue ID -- Note: Must be greater than "0" or, if invalid, "0" residue-name VisibleString OPTIONAL, -- Residue Name atom-id INTEGER OPTIONAL, -- Atom ID -- Note: Must be greater than "0" or, if invalid, "0" atom-name VisibleString OPTIONAL -- Atom Name } -- Depositor Provided X-Ref and LinkOut data for Entrez PC-XRefData ::= CHOICE { regid VisibleString, -- External Database Registry ID rn VisibleString, -- Registry Number (e.g., EC Number, CAS Number) mesh VisibleString, -- MESH Index Term pmid INTEGER, -- PubMed ID -- Note: Must be greater than "0" or, if invalid, "0" gi INTEGER, -- GenBank General ID -- Note: Please use protein-gi or nucleotide-gi, if possible -- Note: Must be greater than "0" or, if invalid, "0" mmdb INTEGER, -- MMDB ID -- Note: Must be greater than "0" or, if invalid, "0" sid INTEGER, -- PubChem Substance ID -- Note: Must be greater than "0" or, if invalid, "0" cid INTEGER, -- PubChem Compound ID -- Note: Must be greater than "0" or, if invalid, "0" dburl VisibleString, -- Depositor Source Database Homepage sburl VisibleString, -- Depositor Homepage for a Substance asurl VisibleString, -- Depositor Homepage for an Assay protein-gi INTEGER, -- GenBank General ID for a Protein -- Note: Must be greater than "0" or, if invalid, "0" nucleotide-gi INTEGER, -- GenBank General ID for a Nucleotide -- Note: Must be greater than "0" or, if invalid, "0" taxonomy INTEGER, -- Taxonomy ID for an Organism -- Note: Must be greater than "0" or, if invalid, "0" aid INTEGER, -- PubChem BioAssay ID -- Note: Must be greater than "0" or, if invalid, "0" mim INTEGER, -- MIM, Mendelian Inheritance in Man, Number -- Note: Must be greater than "0" or, if invalid, "0" gene INTEGER, -- Entrez Gene ID -- Note: Must be greater than "0" or, if invalid, "0" probe INTEGER -- Probe ID -- Note: Must be greater than "0" or, if invalid, "0" } -- Compound Record PC-Compound ::= SEQUENCE { -- Tracking Information id PC-CompoundType, -- Compound Qualifier (Type/ID) atoms PC-Atoms OPTIONAL, -- AtomID/Type Information bonds PC-Bonds OPTIONAL, -- BondID/Type/Atom Information stereo SEQUENCE OF PC-StereoCenter OPTIONAL, -- StereoCenter Descriptions coords SEQUENCE OF PC-Coordinates OPTIONAL, -- 2D/3D Coordinate Sets of Compound charge INTEGER OPTIONAL, -- Provided Total Formal Charge (Signed Integer) props SEQUENCE OF PC-InfoData OPTIONAL, -- Derived (computed) Properties stereogroups SEQUENCE OF PC-StereoGroup OPTIONAL, -- Relative stereochemistry groups count PC-Count OPTIONAL, -- Counts of various properties vbalt PC-Compounds OPTIONAL -- Alternate Valence-Bond Forms } -- Holder for groups of Compounds PC-Compounds ::= SEQUENCE OF PC-Compound -- Qualification used to describe the type of Compound deposited, standardized, or derived. -- Please note that mixtures/cocktails may be specified using previously deposited substances. PC-CompoundType ::= SEQUENCE { type INTEGER { -- Compound Qualifier or Type -- For Compound Depositions deposited (0), -- Original Deposited Compound -- For Standardized Compounds standardized (1), -- Standardized Form of a Deposited Compound component (2), -- Component of a Standardized Compound neutralized (3), -- Neutralized Form of a Standardized Compound -- For Mixture/Cocktail Depositions mixture (4), -- Substance that is a component of a mixture -- For Theoretical Compounds tautomer (5), -- Predicted Tautomer Form pka-state (6), -- Predicted Ionized pKa Form unknown (255) -- Unknown Compound Type } OPTIONAL, id CHOICE { -- Compound Namespace and ID (absent for "deposited" type compounds) cid INTEGER, -- Standardized Compound sid INTEGER, -- PubChem Substance (for "mixture" type compounds) xid INTEGER -- PubChem Theoretical Compound } OPTIONAL } -- Counts of various properties of a Compound PC-Count ::= SEQUENCE { heavy-atom INTEGER, -- Total count of non-Hydrogen (Heavy) Atoms -- StereoChemistry Counts atom-chiral INTEGER, -- Total count of (SP3) Chiral Atoms atom-chiral-def INTEGER, -- Total count of Defined (SP3) Chiral Atoms atom-chiral-undef INTEGER, -- Total count of Undefined (SP3) Chiral Atoms bond-chiral INTEGER, -- Total count of (SP2) Chiral Bonds bond-chiral-def INTEGER, -- Total count of (SP2) Defined Chiral Bonds bond-chiral-undef INTEGER, -- Total count of (SP2) Undefined Chiral Bonds -- Isotopic Counts isotope-atom INTEGER, -- Total count of Atoms with Isotopic Information -- Discrete Structure Counts covalent-unit INTEGER, -- Total count of covalently-bonded units in the record tautomers INTEGER -- Number of possible tautomers (Max. 999) } -- List of atom identifiers which are in a common stereochemistry group. -- All atoms in this group possess the characteristic of the type specified. -- The convention adopted is intended to be compatible with MDL's Enhanced -- Stereochemical Representation white paper. -- An atom can only be member of a single stereo group, and all atoms -- in a stereo group must have a stereo descriptor. -- Stereogroups only apply to stereocenters that can have parity. PC-StereoGroup ::= SEQUENCE { type INTEGER { absolute (1), -- Absolute configuration is known or (2), -- Relative configuration is known (absolute configuration is unknown) and (3), -- Mixture of stereoisomers unknown (255) -- Unknown configuration type }, aid SEQUENCE OF INTEGER -- Atom Identifiers of atoms in this group -- Note: Atom ID's must be greater than "0" } -- Compound Description/Descriptor Data PC-InfoData ::= SEQUENCE { urn PC-Urn, -- Universal Resource Name [for Value Qualification] value CHOICE { -- Data Value bval BOOLEAN, -- Boolean or Binary bvec SEQUENCE OF BOOLEAN, -- Boolean Vector ival INTEGER, -- Integer (signed or unsigned) ivec SEQUENCE OF INTEGER, -- Integer Vector fval REAL, -- Float or Double fvec SEQUENCE OF REAL, -- Double Vector sval VisibleString, -- String slist SEQUENCE OF VisibleString, -- List of Strings date Date, -- Date binary OCTET STRING, -- Binary Data bitlist BIT STRING -- Bit List (specialized version of Boolean vector) } } -- Universal Resource Name -- Provides explicit source information on derived or calculated data PC-Urn ::= SEQUENCE { label VisibleString, -- Generic Name or Label for Display [e.g., "Log P"] name VisibleString OPTIONAL, -- Qualified Name [e.g., "XlogP"] datatype PC-UrnDataType OPTIONAL, -- Specific Data Type of Value [e.g., binary] parameters VisibleString OPTIONAL, -- Implementation Parameter [e.g., "metal=0"] implementation VisibleString OPTIONAL, -- Implementation Name [e.g., "E_XlogP"] version VisibleString OPTIONAL, -- Implementation Version [e.g., "3.317"] software VisibleString OPTIONAL, -- Implementation Software [e.g., "Cactvs"] source VisibleString OPTIONAL, -- Implementation Organization [e.g., "xemistry.com"] release VisibleString OPTIONAL -- NCBI Implementation Release [e.g., "10.25.2005"] } -- URN Data Type -- Provides the ability to use more specific data types than that directly provided by ASN.1. -- Provides for more specific validation of specified data. PC-UrnDataType ::= INTEGER { -- Basic Data Types string (1), -- String [maps to a VisibleString] stringlist (2), -- List of Strings [maps to VisibleString list] int (3), -- 32-Bit Signed Integer [maps to an INTEGER] intvec (4), -- Vector of 32-Bit Signed Integer [maps to INTEGER vector] uint (5), -- 32-Bit Unsigned Integer [maps to an INTEGER] uintvec (6), -- Vector of 32-Bit Unsigned Integer [maps to INTEGER vector] double (7), -- 64-Bit Float [maps to a REAL] doublevec (8), -- Vector of Double [maps to REAL vector] bool (9), -- Boolean or Binary value [maps to a BOOLEAN] boolvec (10), -- Boolean Vector [maps to BOOLEAN vector] -- Specialized Data Types uint64 (11), -- 64-Bit Unsigned Integer (Hex form) [maps to a VisibleString] binary (12), -- Binary Data Blob [maps to an OCTET STRING] url (13), -- URL [maps to a VisibleString] unicode (14), -- UniCode String [maps to a VisibleString] date (15), -- ISO8601 Date [maps to a Date] fingerprint (16), -- Binary Fingerprint (Gzip'ped bit [maps to an OCTET STRING] -- list w/ 4-Byte prefix denoting bit list length) unknown (255) -- Unknown Data Type [maps to a set of VisibleString] } -- Coordinates for the Compound of a given type PC-Coordinates ::= SEQUENCE { type SEQUENCE OF PC-CoordinateType, -- Coordinate Type Information (vector) aid SEQUENCE OF INTEGER, -- Conformer Atom IDs (vector) -- (to be kept synchronized with Conformers) -- Note: Atom ID's must be greater than "0" conformers SEQUENCE OF PC-Conformer OPTIONAL, -- Conformers for this Coordinate Set atomlabels SEQUENCE OF PC-AtomString OPTIONAL, -- Atom labels for Conformer Set data SEQUENCE OF PC-InfoData OPTIONAL -- Data Associated with these Coordinates } -- Drawing/Conformer Definition (in Parallel Arrays, synchronized to aid integer list) -- 3D coordinates are specified in a right-handed coordinate system. For 2D plots, Y axis leads upwards. PC-Conformer ::= SEQUENCE { -- [Note: Parallel Arrays must be kept Synchronized] x SEQUENCE OF REAL, -- X Coordinates (vector) y SEQUENCE OF REAL, -- Y Coordinates (vector) z SEQUENCE OF REAL OPTIONAL, -- Z Coordinates (vector) style PC-DrawAnnotations OPTIONAL, -- Structure Annotations data SEQUENCE OF PC-InfoData OPTIONAL -- Data Associated with this Conformer } -- Holder for groups of Conformers PC-Conformers ::= SEQUENCE OF PC-Conformer -- Coordinate Set Type Distinctions PC-CoordinateType ::= INTEGER { twod (1), -- 2D Coordinates threed (2), -- 3D Coordinates (should also indicate units, below) submitted (3), -- Depositor Provided Coordinates experimental (4), -- Experimentally Determined Coordinates computed (5), -- Computed Coordinates standardized (6), -- Standardized Coordinates augmented (7), -- Hybrid Original with Computed Coordinates (e.g., explicit H) aligned (8), -- Template used to align drawing compact (9), -- Drawing uses shorthand forms (e.g., COOH, OCH3, Et, etc.) units-angstroms (10), -- (3D) Coordinate units are Angstroms units-nanometers (11), -- (3D) Coordinate units are nanometers units-pixel (12), -- (2D) Coordinate units are pixels units-points (13), -- (2D) Coordinate units are points units-stdbonds (14), -- (2D) Coordinate units are standard bond lengths (1.0) units-unknown (255) -- Coordinate units are unknown or unspecified } -- Drawing Annotations (in Parallel Arrays) -- [Note: A pair of atoms can have multiple annotations] PC-DrawAnnotations ::= SEQUENCE { -- [Note: Parallel Arrays must be kept Synchronized] annotation SEQUENCE OF PC-BondAnnotation, -- Bond Annotations (vector) aid1 SEQUENCE OF INTEGER, -- Atom1 Identifier (vector) -- Note: Atom ID's must be greater than "0" aid2 SEQUENCE OF INTEGER -- Atom2 Identifier (vector) -- Note: Atom ID's must be greater than "0" } -- Atom-Atom Annotation Information PC-BondAnnotation ::= INTEGER { crossed (1), -- Double Bond that can be both Cis/Trans dashed (2), -- Hydrogen-Bond (3D Only?) wavy (3), -- Unknown Stereochemistry dotted (4), -- Complex/Fractional wedge-up (5), -- Above-Plane wedge-down (6), -- Below-Plane arrow (7), -- Dative aromatic (8), -- Aromatic resonance (9), -- Resonance bold (10), -- Fat Bond (Non-Specific User Interpreted Information) fischer (11), -- Interpret Bond Stereo using Fischer Conventions closeContact (12), -- Identification of Atom-Atom Close Contacts (3D Only) unknown (255) -- Unspecified or Unknown Atom-Atom Annotation } -- Atom Information (in Parallel Arrays) PC-Atoms ::= SEQUENCE { -- [Note: Parallel Arrays must be kept Synchronized] aid SEQUENCE OF INTEGER, -- Atom Identifiers (vector) -- Note: Atom ID's must be greater than "0" element SEQUENCE OF PC-Element, -- Atomic Numbers (vector) -- Independent Arrays of ID-Value Pairs (Technically allows multiple values per Atom) label SEQUENCE OF PC-AtomString OPTIONAL, -- Atom labels isotope SEQUENCE OF PC-AtomInt OPTIONAL, -- Isotopic Information charge SEQUENCE OF PC-AtomInt OPTIONAL, -- Formal Charges radical SEQUENCE OF PC-AtomRadical OPTIONAL, -- Radical Information source SEQUENCE OF PC-AtomSource OPTIONAL, -- E.g. identity of MMDB "R" groups comment SEQUENCE OF PC-AtomString OPTIONAL -- Atom Comments } -- Specification of an Association between an Atom Identifier and Source PC-AtomSource ::= SEQUENCE { aid INTEGER, -- Atom Identifier for the R-Group Source -- Note: Atom ID's must be greater than "0" source PC-MMDBSource -- Atom Specific MMDB Record } -- Specification of an Association between an Atom Identifier and an Integer Value PC-AtomInt ::= SEQUENCE { aid INTEGER, -- Atom Identifier for the Value -- Note: Atom ID's must be greater than "0" value INTEGER -- Value Associated to the ID } -- Specification of an Association between an Atom Identifier and a String Value PC-AtomString ::= SEQUENCE { aid INTEGER, -- Atom Identifier for the Value -- Note: Atom ID's must be greater than "0" value VisibleString -- Value Associated to the ID } -- Rudimentary Atom Electronic Configuration Designation PC-AtomRadical ::= SEQUENCE { aid INTEGER, -- Atom Identifier for the Value -- Note: Atom ID's must be greater than "0" type INTEGER { -- Type of Atom Radical singlet (1), -- Open-Shell Singlet doublet (2), -- Open-Shell Doublet triplet (3), -- Open-Shell Triplet quartet (4), -- Open-Shell Quartet quintet (5), -- Open-Shell Quintet hextet (6), -- Open-Shell Hextet heptet (7), -- Open-Shell Quintet octet (8), -- Open-Shell Octet none (255) -- Closed-Shell Singlet } } -- Element Information [which may contain "illegal" element values] PC-Element::= INTEGER { -- Illegal Atom Numbers that may be Interpreted to be something else a (255), -- Unspecified Atom (Asterick) d (254), -- Dummy Atom r (253), -- Rgroup Label lp (252), -- Lone Pair -- Elements h (1), he (2), li (3), be (4), b (5), c (6), n (7), o (8), f (9), ne(10), na(11), mg(12), al(13), si(14), p (15), s (16), cl(17), ar(18), k (19), ca(20), sc(21), ti(22), v (23), cr(24), mn(25), fe(26), co(27), ni(28), cu(29), zn(30), ga(31), ge(32), as(33), se(34), br(35), kr(36), rb(37), sr(38), y (39), zr(40), nb(41), mo(42), tc(43), ru(44), rh(45), pd(46), ag(47), cd(48), in(49), sn(50), sb(51), te(52), i (53), xe(54), cs(55), ba(56), la(57), ce(58), pr(59), nd(60), pm(61), sm(62), eu(63), gd(64), tb(65), dy(66), ho(67), er(68), tm(69), yb(70), lu(71), hf(72), ta(73), w (74), re(75), os(76), ir(77), pt(78), au(79), hg(80), tl(81), pb(82), bi(83), po(84), at(85), rn(86), fr(87), ra(88), ac(89), th(90), pa(91), u(92), np(93), pu(94), am(95), cm(96), bk(97), cf(98), es(99), fm(100), md(101), no(102), lr(103), rf(104), db(105), sg(106), bh(107), hs(108), mt(109), ds(110), rg(111) } -- Bond Description Information (in Parallel Arrays) PC-Bonds ::= SEQUENCE { -- [Note: Parallel Arrays must be kept Synchronized] aid1 SEQUENCE OF INTEGER, -- Atom1 Identifier (vector) -- Note: Atom ID's must be greater than "0" aid2 SEQUENCE OF INTEGER, -- Atom2 Identifier (vector) -- Note: Atom ID's must be greater than "0" order SEQUENCE OF PC-BondType -- Bond Type Information (vector) } -- Bond Type Information PC-BondType ::= INTEGER { single (1), -- Single Bond double (2), -- Double Bond triple (3), -- Triple Bond quadruple (4), -- Quadruple Bond dative (5), -- Dative Bond complex (6), -- Complex Bond ionic (7), -- Ionic Bond unknown (255) -- Unknown/Unspecified Connectivity } -- Allowed Stereogenic Center Types -- [Using IUPAC Stereogenic Center recommendations and terminology] PC-StereoCenter ::= CHOICE { tetrahedral PC-StereoTetrahedral, -- Tetrahedral (SP3) StereoCenter planar PC-StereoPlanar, -- Planar (SP2) StereoCenter squareplanar PC-StereoSquarePlanar, -- Square Planar (SP4) StereoCenter octahedral PC-StereoOctahedral, -- Octahedral (OC-6) / Square Pyramid (SPY-5) StereoCenters bipyramid PC-StereoTrigonalBiPyramid, -- Trigonal BiPyramid (TBPY-4 and TBPY-5) StereoCenters tshape PC-StereoTShape, -- T-Shaped (TS-3) StereoCenters pentagonal PC-StereoPentagonalBiPyramid -- Pentagonal BiPyramid (PBPY-7) StereoCenters } -- SP3 Tetrahedral StereoCenter, Trigonal Pyramid Stereogenic Center, -- Cumulenic StereoCenter (Linear systems of an even number of double bonds), -- or Hindered biaryl stereocenter (All biaryls have hindered rotation that -- to some extent the ortho-hydrogens prevent coplanarity) -- [Using IUPAC Stereogenic Center recommendations and terminology] -- [Note: "-1" can be used for the Atom Identifier to represent a lone-pair or implicit hydrogen] PC-StereoTetrahedral ::= SEQUENCE { center INTEGER, -- Atom Identifier of Atom Center -- Note: Atom ID's must be greater than "0" above INTEGER, -- Atom Identifier of Atom Above the Plane -- Note: Atom ID's must be greater than "0" top INTEGER, -- Atom Identifier of Atom In-Plane and at the Top -- Note: Atom ID's must be greater than "0" bottom INTEGER, -- Atom Identifier of Atom In-Plane and at the Bottom -- Note: Atom ID's must be greater than "0" below INTEGER, -- Atom Identifier of Atom Below the Plane -- Note: Atom ID's must be greater than "0" parity INTEGER { -- StereoCenter Designation clockwise (1), counterclockwise (2), any (3), unknown (255) } OPTIONAL, type INTEGER { -- Type of StereoCenter, Tetrahedral, if not specified tetrahedral (1), -- Tetrahedral StereoCenter cumulenic (2), -- Cumulenic StereoCenter biaryl (3) -- Biaryl StereoCenter } OPTIONAL } -- SP2 Planar Stereogenic Center, Cumulenic StereoCenter (Linear systems on an odd -- number of double bonds present planar stereochemistry) -- [Using IUPAC Stereogenic Center recommendations and terminology] -- [Note: "-1" can be used for the Atom Identifier to represent a lone-pair or implicit hydrogen] PC-StereoPlanar ::= SEQUENCE { left INTEGER, -- Atom ID of Left Double Bond Atom -- Note: Atom ID's must be greater than "0" ltop INTEGER, -- Atom ID of Top Atom attached to the Left Double Bond Atom -- Note: Atom ID's must be greater than "0" lbottom INTEGER, -- Atom ID of Bottom Atom attached to the Left Double Bond Atom -- Note: Atom ID's must be greater than "0" right INTEGER, -- Atom ID of Right Double Bond Atom -- Note: Atom ID's must be greater than "0" rtop INTEGER, -- Atom ID of Top Atom attached to the Right Double Bond Atom -- Note: Atom ID's must be greater than "0" rbottom INTEGER, -- Atom ID of Bottom Atom attached to the Right Double Bond Atom -- Note: Atom ID's must be greater than "0" parity INTEGER { -- StereoCenter Designation same (1), opposite (2), any (3), unknown (255) } OPTIONAL, type INTEGER { -- Type of StereoCenter, SP2 Planar, if not specified planar (1), -- SP2 Planar StereoCenter cumulenic (2) -- Cumulenic StereoCenter } OPTIONAL } -- Square Planar (SP4) StereoCenters -- [Using IUPAC Stereogenic Center recommendations and terminology] -- [Note: "-1" can be used for the Atom Identifier to represent a lone-pair or implicit hydrogen] PC-StereoSquarePlanar ::= SEQUENCE { center INTEGER, -- Atom ID of Atom Center -- Note: Atom ID's must be greater than "0" lbelow INTEGER, -- Atom ID of Left Below Plane Atom -- Note: Atom ID's must be greater than "0" rbelow INTEGER, -- Atom ID of Right Below Plane Atom -- Note: Atom ID's must be greater than "0" labove INTEGER, -- Atom ID of Left Above Plane Atom -- Note: Atom ID's must be greater than "0" rabove INTEGER, -- Atom ID of Right Above Plane Atom -- Note: Atom ID's must be greater than "0" parity INTEGER { -- StereoCenter Type u-shape (1), -- U shaped isomer (labove-lbelow-rbelow-rabove) z-shape (2), -- Z shaped isomer (labove-rabove-lbelow-rbelow) x-shape (3), -- X shaped isomer (labove-rbelow-rabove-lbelow) any (4), -- Nonspecific mixture of isomers unknown (255) } OPTIONAL } -- Octahedral (OC-6) and Square Pyramid (SPY-5) StereoCenters -- [Using IUPAC Stereogenic Center recommendations and terminology] -- [Note: "-1" can be used for the Atom Identifier to represent a lone-pair or implicit hydrogen] PC-StereoOctahedral ::= SEQUENCE { center INTEGER, -- Atom ID of Atom Center -- Note: Atom ID's must be greater than "0" top INTEGER, -- Atom ID of Atom In-Plane and at the Top -- Note: Atom ID's must be greater than "0" bottom INTEGER, -- Atom ID of Atom In-Plane and at the Bottom -- Note: Atom ID's must be greater than "0" labove INTEGER, -- Atom ID of Atom Above the Plane on the Left -- Note: Atom ID's must be greater than "0" lbelow INTEGER, -- Atom ID of Atom Below the Plane on the Left -- Note: Atom ID's must be greater than "0" rabove INTEGER, -- Atom ID of Atom Above the Plane on the Right -- Note: Atom ID's must be greater than "0" rbelow INTEGER -- Atom ID of Atom Below the Plane on the Right -- Note: Atom ID's must be greater than "0" } -- Trigonal BiPyramid (TBPY-4 and TBPY-5) StereoCenters -- [Using IUPAC Stereogenic Center recommendations and terminology] -- [Note: "-1" can be used for the Atom Identifier to represent a lone-pair or implicit hydrogen] PC-StereoTrigonalBiPyramid ::= SEQUENCE { center INTEGER, -- Atom ID of Atom Center -- Note: Atom ID's must be greater than "0" above INTEGER, -- Atom ID of Atom Above the Plane -- Note: Atom ID's must be greater than "0" below INTEGER, -- Atom ID of Atom Below the Plane -- Note: Atom ID's must be greater than "0" top INTEGER, -- Atom ID of Atom In-Plane and at the Top -- Note: Atom ID's must be greater than "0" bottom INTEGER, -- Atom ID of Atom In-Plane and at the Bottom -- Note: Atom ID's must be greater than "0" right INTEGER -- Atom ID of Atom In-Plane and to the Right -- Note: Atom ID's must be greater than "0" } -- T-Shaped (TS-3) StereoCenters -- [Using IUPAC Stereogenic Center recommendations and terminology] -- [Note: "-1" can be used for the Atom Identifier to represent a lone-pair or implicit hydrogen] PC-StereoTShape ::= SEQUENCE { center INTEGER, -- Atom ID of Atom Center -- Note: Atom ID's must be greater than "0" top INTEGER, -- Atom ID of Atom In-Plane and at the Top -- Note: Atom ID's must be greater than "0" bottom INTEGER, -- Atom ID of Atom In-Plane and at the Bottom -- Note: Atom ID's must be greater than "0" above INTEGER -- Atom ID of Atom Above the Plane -- Note: Atom ID's must be greater than "0" } -- Pentagonal BiPyramid (PBPY-7) StereoCenters -- [Using IUPAC Stereogenic Center recommendations and terminology] -- [Note: "-1" can be used for the Atom Identifier to represent a lone-pair or implicit hydrogen] PC-StereoPentagonalBiPyramid ::= SEQUENCE { center INTEGER, -- Atom ID of Atom Center -- Note: Atom ID's must be greater than "0" top INTEGER, -- Atom ID of Atom In-Plane and at the Top -- Note: Atom ID's must be greater than "0" bottom INTEGER, -- Atom ID of Atom In-Plane and at the Bottom -- Note: Atom ID's must be greater than "0" left INTEGER, -- Atom ID of Atom In-Plane and at the Left -- Note: Atom ID's must be greater than "0" labove INTEGER, -- Atom ID of Atom Above the Plane on the Left -- Note: Atom ID's must be greater than "0" lbelow INTEGER, -- Atom ID of Atom Below the Plane on the Left -- Note: Atom ID's must be greater than "0" rabove INTEGER, -- Atom ID of Atom Above the Plane on the Right -- Note: Atom ID's must be greater than "0" rbelow INTEGER -- Atom ID of Atom Below the Plane on the Right -- Note: Atom ID's must be greater than "0" } END -- proj.asn --$Revision: 6.3 $ --**************************************************************** -- -- NCBI Project Definition Module -- by Jim Ostell and Jonathan Kans, 1998 -- --**************************************************************** NCBI-Project DEFINITIONS ::= BEGIN EXPORTS Project, Project-item; IMPORTS Date FROM NCBI-General PubMedId FROM NCBI-Biblio Seq-id, Seq-loc FROM NCBI-Seqloc Seq-annot, Pubdesc FROM NCBI-Sequence Seq-entry FROM NCBI-Seqset Pubmed-entry FROM NCBI-PubMed; Project ::= SEQUENCE { descr Project-descr OPTIONAL , data Project-item } Project-item ::= CHOICE { pmuid SET OF INTEGER , protuid SET OF INTEGER , nucuid SET OF INTEGER , sequid SET OF INTEGER , genomeuid SET OF INTEGER , structuid SET OF INTEGER , pmid SET OF PubMedId , protid SET OF Seq-id , nucid SET OF Seq-id , seqid SET OF Seq-id , genomeid SET OF Seq-id , structid NULL , pment SET OF Pubmed-entry , protent SET OF Seq-entry , nucent SET OF Seq-entry , seqent SET OF Seq-entry , genomeent SET OF Seq-entry , structent NULL , seqannot SET OF Seq-annot , loc SET OF Seq-loc , proj SET OF Project } Project-descr ::= SEQUENCE { id SET OF Project-id , name VisibleString OPTIONAL , descr SET OF Projdesc OPTIONAL } Projdesc ::= CHOICE { pub Pubdesc , date Date , comment VisibleString , title VisibleString } Project-id ::= VisibleString END -- pub.asn --$Revision: 6.0 $ --******************************************************************** -- -- Publication common set -- James Ostell, 1990 -- -- This is the base class definitions for Publications of all sorts -- -- support for PubMedId added in 1996 --******************************************************************** NCBI-Pub DEFINITIONS ::= BEGIN EXPORTS Pub, Pub-set, Pub-equiv; IMPORTS Medline-entry FROM NCBI-Medline Cit-art, Cit-jour, Cit-book, Cit-proc, Cit-pat, Id-pat, Cit-gen, Cit-let, Cit-sub, PubMedId FROM NCBI-Biblio; Pub ::= CHOICE { gen Cit-gen , -- general or generic unparsed sub Cit-sub , -- submission medline Medline-entry , muid INTEGER , -- medline uid article Cit-art , journal Cit-jour , book Cit-book , proc Cit-proc , -- proceedings of a meeting patent Cit-pat , pat-id Id-pat , -- identify a patent man Cit-let , -- manuscript, thesis, or letter equiv Pub-equiv, -- to cite a variety of ways pmid PubMedId } -- PubMedId Pub-equiv ::= SET OF Pub -- equivalent identifiers for same citation Pub-set ::= CHOICE { pub SET OF Pub , medline SET OF Medline-entry , article SET OF Cit-art , journal SET OF Cit-jour , book SET OF Cit-book , proc SET OF Cit-proc , -- proceedings of a meeting patent SET OF Cit-pat } END -- pubmed.asn --$Revision: 6.0 $ --********************************************************************** -- -- PUBMED data definitions -- --********************************************************************** NCBI-PubMed DEFINITIONS ::= BEGIN EXPORTS Pubmed-entry, Pubmed-url; IMPORTS PubMedId FROM NCBI-Biblio Medline-entry FROM NCBI-Medline; Pubmed-entry ::= SEQUENCE { -- a PubMed entry -- PUBMED records must include the PubMedId pmid PubMedId, -- Medline entry information medent Medline-entry OPTIONAL, -- Publisher name publisher VisibleString OPTIONAL, -- List of URL to publisher cite urls SET OF Pubmed-url OPTIONAL, -- Publisher's article identifier pubid VisibleString OPTIONAL } Pubmed-url ::= SEQUENCE { location VisibleString OPTIONAL, -- Location code url VisibleString -- Selected URL for location } END -- remap.asn --$Id: remap.asn,v 1.2 2004/07/28 13:43:33 jcherry Exp $******************************************** -- -- remap.asn -- Version 1 -- -- API for remapping locations on sequences -- -- Author: Josh Cherry -- --*************************************************************** NCBI-Remap DEFINITIONS ::= BEGIN IMPORTS Seq-loc FROM NCBI-Seqloc; Remap-dt ::= INTEGER -- a date/time stamp Remap-db-id ::= VisibleString -- database name --*************************************** -- Remap Request types --*************************************** --**************************************** -- The basic request wrapper leaves space for a version which -- allow the server to support older clients -- The tool parameter allows us to log the client types for -- debugging and tuning --**************************************** Remap-request ::= SEQUENCE { -- a standard request request RMRequest , -- the actual request version INTEGER , -- ASN1 spec version tool VisibleString OPTIONAL } -- tool making request RMRequest ::= CHOICE { -- request types remap Remap-query , -- do the actual remapping maps-to-builds VisibleString , -- what builds can this be mapped to? maps-from-builds VisibleString , -- what builds can be mapped to this? all-builds NULL } -- all the builds the server knows of Remap-query ::= SEQUENCE { from-build VisibleString , -- build to map from to-build VisibleString , -- build to map to locs SEQUENCE OF Seq-loc } -- the locations to remap --********************************************************** -- Replies from the server -- all replies contain the date/time stamp when they were executed --********************************************************** Remap-reply ::= SEQUENCE { reply RMReply , -- the actual reply dt Remap-dt , -- date/time stamp from server server VisibleString , -- server version info msg VisibleString OPTIONAL } -- possibly a message to the user RMReply ::= CHOICE { error VisibleString , -- if nothing can be returned remap Remap-result , -- result of actual remapping maps-to-builds SEQUENCE OF VisibleString , -- all the builds that the server -- knows how to map this build to maps-from-builds SEQUENCE OF VisibleString ,-- all the builds that the server -- knows how to map to this build all-builds SEQUENCE OF VisibleString } -- all builds that the server knows of Remap-result ::= SEQUENCE OF Seq-loc -- remapped locations END -- scoremat.asn --$Id: scoremat.asn 145226 2008-11-07 19:39:10Z camacho $ -- =========================================================================== -- -- PUBLIC DOMAIN NOTICE -- National Center for Biotechnology Information -- -- This software/database is a "United States Government Work" under the -- terms of the United States Copyright Act. It was written as part of -- the author's official duties as a United States Government employee and -- thus cannot be copyrighted. This software/database is freely available -- to the public for use. The National Library of Medicine and the U.S. -- Government have not placed any restriction on its use or reproduction. -- -- Although all reasonable efforts have been taken to ensure the accuracy -- and reliability of the software and data, the NLM and the U.S. -- Government do not and cannot warrant the performance or results that -- may be obtained by using this software or data. The NLM and the U.S. -- Government disclaim all warranties, express or implied, including -- warranties of performance, merchantability or fitness for any particular -- purpose. -- -- Please cite the author in any work or product based on this material. -- -- =========================================================================== -- -- Author: Christiam Camacho -- -- File Description: -- ASN.1 definitions for scoring matrix -- -- =========================================================================== NCBI-ScoreMat DEFINITIONS ::= BEGIN EXPORTS Pssm, PssmIntermediateData, PssmFinalData, PssmParameters, PssmWithParameters; IMPORTS Object-id FROM NCBI-General Seq-entry FROM NCBI-Seqset; -- a rudimentary block/core-model, to be used with block-based alignment -- routines and threading BlockProperty ::= SEQUENCE { type INTEGER { unassigned (0), threshold (1), -- score threshold for heuristics minscore (2), -- observed minimum score in CD maxscore (3), -- observed maximum score in CD meanscore (4), -- observed mean score in CD variance (5), -- observed score variance name (10), -- just name the block is-optional(20), -- block may not have to be used other (255) }, intvalue INTEGER OPTIONAL, textvalue VisibleString OPTIONAL } CoreBlock ::= SEQUENCE { start INTEGER, -- begin of block on query stop INTEGER, -- end of block on query minstart INTEGER OPTIONAL, -- optional N-terminal extension maxstop INTEGER OPTIONAL, -- optional C-terminal extension property SEQUENCE OF BlockProperty OPTIONAL } LoopConstraint ::= SEQUENCE { minlength INTEGER DEFAULT 0, -- minimum length of unaligned region maxlength INTEGER DEFAULT 100000 -- maximum length of unaligned region } CoreDef ::= SEQUENCE { nblocks INTEGER, -- number of core elements/blocks blocks SEQUENCE OF CoreBlock, -- nblocks locations loops SEQUENCE OF LoopConstraint -- (nblocks+1) constraints } -- =========================================================================== -- PSI-BLAST, formatrpsdb, RPS-BLAST workflow: -- =========================================== -- -- Two possible inputs to PSI-BLAST and formatrpsdb: -- 1) PssmWithParams where pssm field contains intermediate PSSM data (matrix -- of frequency ratios) -- 2) PssmWithParams where pssm field contains final PSSM data (matrix of -- scores and statistical parameters) - such as written by cddumper -- -- In case 1, PSI-BLAST's PSSM engine is invoked to create the PSSM and perform -- the PSI-BLAST search or build the PSSM to then build the RPS-BLAST database. -- In case 2, PSI-BLAST's PSSM engine is not invoked and the matrix of scores -- statistical parameters are used to perform the search in PSI-BLAST and the -- same data and the data in PssmWithParams::params::rpsdbparams is used to -- build the PSSM and ultimately the RPS-BLAST database -- -- -- reads ++++++++++++++ writes -- PssmWithParams ====> + PSI-BLAST + =====> PssmWithParams -- ++++++++++++++ | ^ -- ^ | | -- | | | -- +===========================================+ | -- | | -- +===========================================+ | -- | | -- reads | | -- v | -- +++++++++++++++ writes +++++++++++++++++++++++ | -- | formatrpsdb | =====> | RPS-BLAST databases | | -- +++++++++++++++ +++++++++++++++++++++++ | -- ^ | -- | | -- | reads | -- +++++++++++++ | -- | RPS-BLAST | | -- +++++++++++++ | -- | -- reads ++++++++++++ writes | -- Cdd ======> | cddumper | =============================+ -- ++++++++++++ -- -- =========================================================================== -- Contains the PSSM's scores and its associated statistical parameters. -- Dimensions and order in which scores are stored must be the same as that -- specified in Pssm::numRows, Pssm::numColumns, and Pssm::byrow PssmFinalData ::= SEQUENCE { -- PSSM's scores scores SEQUENCE OF INTEGER, -- Karlin & Altschul parameter produced during the PSSM's calculation lambda REAL, -- Karlin & Altschul parameter produced during the PSSM's calculation kappa REAL, -- Karlin & Altschul parameter produced during the PSSM's calculation h REAL, -- scaling factor used to obtain more precision when building the PSSM. -- (i.e.: scores are scaled by this value). By default, PSI-BLAST's PSSM -- engine generates PSSMs which are not scaled-up, however, if PSI-BLAST is -- given a PSSM which contains a scaled-up PSSM (indicated by having a -- scalingFactor greater than 1), then it will scale down the PSSM to -- perform the initial stages of the search with it. -- N.B.: When building RPS-BLAST databases, if formatrpsdb is provided -- scaled-up PSSMs, it will ensure that all PSSMs used to build the -- RPS-BLAST database are scaled by the same factor (otherwise, RPS-BLAST -- will silently produce incorrect results). scalingFactor INTEGER DEFAULT 1, -- Karlin & Altschul parameter produced during the PSSM's calculation lambdaUngapped REAL OPTIONAL, -- Karlin & Altschul parameter produced during the PSSM's calculation kappaUngapped REAL OPTIONAL, -- Karlin & Altschul parameter produced during the PSSM's calculation hUngapped REAL OPTIONAL } -- Contains the PSSM's intermediate data used to create the PSSM's scores -- and statistical parameters. Dimensions and order in which scores are -- stored must be the same as that specified in Pssm::numRows, -- Pssm::numColumns, and Pssm::byrow PssmIntermediateData ::= SEQUENCE { -- observed residue frequencies (or counts) per position of the PSSM -- (prior to application of pseudocounts) resFreqsPerPos SEQUENCE OF INTEGER OPTIONAL, -- Weighted observed residue frequencies per position of the PSSM. -- (N.B.: each position's weights should add up to 1.0). -- This field corresponds to f_i (f sub i) in equation 2 of -- Nucleic Acids Res. 2001 Jul 15;29(14):2994-3005. -- NOTE: this is needed for diagnostics information only (i.e.: -- -out_ascii_pssm option in psiblast) weightedResFreqsPerPos SEQUENCE OF REAL OPTIONAL, -- PSSM's frequency ratios freqRatios SEQUENCE OF REAL, -- Information content per position of the PSSM -- NOTE: this is needed for diagnostics information only (i.e.: -- -out_ascii_pssm option in psiblast) informationContent SEQUENCE OF REAL OPTIONAL, -- Weights for columns of the PSSM without gaps -- NOTE: this is needed for diagnostics information only (i.e.: -- -out_ascii_pssm option in psiblast) gaplessColumnWeights SEQUENCE OF REAL OPTIONAL, -- Used in sequence weights computation -- NOTE: this is needed for diagnostics information only (i.e.: -- -out_ascii_pssm option in psiblast) sigma SEQUENCE OF REAL OPTIONAL, -- Length of the aligned regions per position of the query sequence -- NOTE: this is needed for diagnostics information only (i.e.: -- -out_ascii_pssm option in psiblast) intervalSizes SEQUENCE OF INTEGER OPTIONAL, -- Number of matching sequences per position of the PSSM (including the -- query) -- NOTE: this is needed for diagnostics information only (i.e.: -- -out_ascii_pssm option in psiblast) numMatchingSeqs SEQUENCE OF INTEGER OPTIONAL } -- Position-specific scoring matrix -- -- Column indices on the PSSM refer to the positions corresponding to the -- query/master sequence, i.e. the number of columns (N) is the same -- as the length of the query/master sequence. -- Row indices refer to individual amino acid types, i.e. the number of -- rows (M) is the same as the number of different residues in the -- alphabet we use. Consequently, row labels are amino acid identifiers. -- -- PSSMs are stored as linear arrays of integers. By default, we store -- them column-by-column, M values for the first column followed by M -- values for the second column, and so on. In order to provide -- flexibility for external applications, the boolean field "byrow" is -- provided to specify the storage order. Pssm ::= SEQUENCE { -- Is the this a protein or nucleotide scoring matrix? isProtein BOOLEAN DEFAULT TRUE, -- PSSM identifier identifier Object-id OPTIONAL, -- The dimensions of the matrix are returned so the client can -- verify that all data was received. numRows INTEGER, -- number of rows numColumns INTEGER, -- number of columns -- row-labels is given to note the order of residue types so that it can -- be cross-checked between applications. -- If this field is not given, the matrix values are presented in -- order of the alphabet ncbistdaa is used for protein, ncbi4na for nucl. -- for proteins the values returned correspond to -- (-,-), (-,A), (-,B), (-,C) ... (A,-), (A,A), (A,B), (A,C) ... rowLabels SEQUENCE OF VisibleString OPTIONAL, -- are matrices stored row by row? byRow BOOLEAN DEFAULT FALSE, -- PSSM representative sequence (master) query Seq-entry OPTIONAL, -- both intermediateData and finalData can be provided, but at least one of -- them must be provided. -- N.B.: by default PSI-BLAST will return the PSSM in its PssmIntermediateData -- representation. -- Intermediate or final data for the PSSM intermediateData PssmIntermediateData OPTIONAL, -- Final representation for the PSSM finalData PssmFinalData OPTIONAL } -- This structure is used to create the RPS-BLAST database auxiliary file -- (*.aux) and it contains parameters set at creation time of the PSSM. -- Also, the matrixName field is used by formatrpsdb to build a PSSM from -- a Pssm structure which only contains PssmIntermediateData. FormatRpsDbParameters ::= SEQUENCE { -- name of the underlying score matrix whose frequency ratios were -- used in PSSM construction (e.g.: BLOSUM62) matrixName VisibleString, -- gap opening penalty corresponding to the matrix above gapOpen INTEGER OPTIONAL, -- gap extension penalty corresponding to the matrix above gapExtend INTEGER OPTIONAL } -- Populated by PSSM engine of PSI-BLAST, original source for these values -- are the PSI-BLAST options specified using the BLAST options API PssmParameters ::= SEQUENCE { -- pseudocount constant used for PSSM. This field corresponds to beta in -- equation 2 of Nucleic Acids Res. 2001 Jul 15;29(14):2994-3005. pseudocount INTEGER OPTIONAL, -- data needed by formatrpsdb to create RPS-BLAST databases. matrixName is -- populated by PSI-BLAST rpsdbparams FormatRpsDbParameters OPTIONAL, -- alignment constraints needed by sequence-structure threader -- and other global or local block-alignment algorithms constraints CoreDef OPTIONAL } -- Envelope containing PSSM and the parameters used to create it. -- Provided for use in PSI-BLAST, formatrpsdb, and for the structure group. PssmWithParameters ::= SEQUENCE { -- This field is applicable to PSI-BLAST and formatrpsdb. -- When both the intermediate and final PSSM data are provided in this -- field, the final data (matrix of scores and associated statistical -- parameters) takes precedence and that data is used for further -- processing. The rationale for this is that the PSSM's scores and -- statistical parameters might have been calculated by other applications -- and it might not be possible to recreate it by using PSI-BLAST's PSSM -- engine. pssm Pssm, -- This field's rpsdbparams is used to specify the values of options -- for processing by formatrpsdb. If these are not set, the command -- line defaults of formatrpsdb are applied. This field is used -- by PSI-BLAST to verify that the underlying scorem matrix used to BUILD -- the PSSM is the same as the one being specified through the BLAST -- Options API. If this field is omitted, no verification will be -- performed, so be careful to keep track of what matrix was used to build -- the PSSM or else the results produced by PSI-BLAST will be unreliable. params PssmParameters OPTIONAL } END -- seq.asn --$Revision: 138450 $ --********************************************************************** -- -- NCBI Sequence elements -- by James Ostell, 1990 -- Version 3.0 - June 1994 -- --********************************************************************** NCBI-Sequence DEFINITIONS ::= BEGIN EXPORTS Annotdesc, Annot-descr, Bioseq, GIBB-mol, Heterogen, MolInfo, Numbering, Pubdesc, Seq-annot, Seq-data, Seqdesc, Seq-descr, Seq-ext, Seq-hist, Seq-inst, Seq-literal, Seqdesc, Delta-ext; IMPORTS Date, Int-fuzz, Dbtag, Object-id, User-object FROM NCBI-General Seq-align FROM NCBI-Seqalign Seq-feat FROM NCBI-Seqfeat Seq-graph FROM NCBI-Seqres Pub-equiv FROM NCBI-Pub Org-ref FROM NCBI-Organism BioSource FROM NCBI-BioSource Seq-id, Seq-loc FROM NCBI-Seqloc GB-block FROM GenBank-General PIR-block FROM PIR-General EMBL-block FROM EMBL-General SP-block FROM SP-General PRF-block FROM PRF-General PDB-block FROM PDB-General Seq-table FROM NCBI-SeqTable; --*** Sequence ******************************** --* Bioseq ::= SEQUENCE { id SET OF Seq-id , -- equivalent identifiers descr Seq-descr OPTIONAL , -- descriptors inst Seq-inst , -- the sequence data annot SET OF Seq-annot OPTIONAL } --*** Descriptors ***************************** --* Seq-descr ::= SET OF Seqdesc Seqdesc ::= CHOICE { mol-type GIBB-mol , -- type of molecule modif SET OF GIBB-mod , -- modifiers method GIBB-method , -- sequencing method name VisibleString , -- a name for this sequence title VisibleString , -- a title for this sequence org Org-ref , -- if all from one organism comment VisibleString , -- a more extensive comment num Numbering , -- a numbering system maploc Dbtag , -- map location of this sequence pir PIR-block , -- PIR specific info genbank GB-block , -- GenBank specific info pub Pubdesc , -- a reference to the publication region VisibleString , -- overall region (globin locus) user User-object , -- user defined object sp SP-block , -- SWISSPROT specific info dbxref Dbtag , -- xref to other databases embl EMBL-block , -- EMBL specific information create-date Date , -- date entry first created/released update-date Date , -- date of last update prf PRF-block , -- PRF specific information pdb PDB-block , -- PDB specific information het Heterogen , -- cofactor, etc associated but not bound source BioSource , -- source of materials, includes Org-ref molinfo MolInfo } -- info on the molecule and techniques --******* NOTE: --* mol-type, modif, method, and org are consolidated and expanded --* in Org-ref, BioSource, and MolInfo in this specification. They --* will be removed in later specifications. Do not use them in the --* the future. Instead expect the new structures. --* --*************************** --******************************************************************** -- -- MolInfo gives information on the -- classification of the type and quality of the sequence -- -- WARNING: this will replace GIBB-mol, GIBB-mod, GIBB-method -- --******************************************************************** MolInfo ::= SEQUENCE { biomol INTEGER { unknown (0) , genomic (1) , pre-RNA (2) , -- precursor RNA of any sort really mRNA (3) , rRNA (4) , tRNA (5) , snRNA (6) , scRNA (7) , peptide (8) , other-genetic (9) , -- other genetic material genomic-mRNA (10) , -- reported a mix of genomic and cdna sequence cRNA (11) , -- viral RNA genome copy intermediate snoRNA (12) , -- small nucleolar RNA transcribed-RNA (13) , -- transcribed RNA other than existing classes ncRNA (14) , tmRNA (15) , other (255) } DEFAULT unknown , tech INTEGER { unknown (0) , standard (1) , -- standard sequencing est (2) , -- Expressed Sequence Tag sts (3) , -- Sequence Tagged Site survey (4) , -- one-pass genomic sequence genemap (5) , -- from genetic mapping techniques physmap (6) , -- from physical mapping techniques derived (7) , -- derived from other data, not a primary entity concept-trans (8) , -- conceptual translation seq-pept (9) , -- peptide was sequenced both (10) , -- concept transl. w/ partial pept. seq. seq-pept-overlap (11) , -- sequenced peptide, ordered by overlap seq-pept-homol (12) , -- sequenced peptide, ordered by homology concept-trans-a (13) , -- conceptual transl. supplied by author htgs-1 (14) , -- unordered High Throughput sequence contig htgs-2 (15) , -- ordered High Throughput sequence contig htgs-3 (16) , -- finished High Throughput sequence fli-cdna (17) , -- full length insert cDNA htgs-0 (18) , -- single genomic reads for coordination htc (19) , -- high throughput cDNA wgs (20) , -- whole genome shotgun sequencing barcode (21) , -- barcode of life project composite-wgs-htgs (22) , -- composite of WGS and HTGS tsa (23) , -- transcriptome shotgun assembly other (255) } -- use Source.techexp DEFAULT unknown , techexp VisibleString OPTIONAL , -- explanation if tech not enough -- -- Completeness is not indicated in most records. For genomes, assume -- the sequences are incomplete unless specifically marked as complete. -- For mRNAs, assume the ends are not known exactly unless marked as -- having the left or right end. -- completeness INTEGER { unknown (0) , complete (1) , -- complete biological entity partial (2) , -- partial but no details given no-left (3) , -- missing 5' or NH3 end no-right (4) , -- missing 3' or COOH end no-ends (5) , -- missing both ends has-left (6) , -- 5' or NH3 end present has-right (7) , -- 3' or COOH end present other (255) } DEFAULT unknown , gbmoltype VisibleString OPTIONAL } -- identifies particular ncRNA GIBB-mol ::= ENUMERATED { -- type of molecule represented unknown (0) , genomic (1) , pre-mRNA (2) , -- precursor RNA of any sort really mRNA (3) , rRNA (4) , tRNA (5) , snRNA (6) , scRNA (7) , peptide (8) , other-genetic (9) , -- other genetic material genomic-mRNA (10) , -- reported a mix of genomic and cdna sequence other (255) } GIBB-mod ::= ENUMERATED { -- GenInfo Backbone modifiers dna (0) , rna (1) , extrachrom (2) , plasmid (3) , mitochondrial (4) , chloroplast (5) , kinetoplast (6) , cyanelle (7) , synthetic (8) , recombinant (9) , partial (10) , complete (11) , mutagen (12) , -- subject of mutagenesis ? natmut (13) , -- natural mutant ? transposon (14) , insertion-seq (15) , no-left (16) , -- missing left end (5' for na, NH2 for aa) no-right (17) , -- missing right end (3' or COOH) macronuclear (18) , proviral (19) , est (20) , -- expressed sequence tag sts (21) , -- sequence tagged site survey (22) , -- one pass survey sequence chromoplast (23) , genemap (24) , -- is a genetic map restmap (25) , -- is an ordered restriction map physmap (26) , -- is a physical map (not ordered restriction map) other (255) } GIBB-method ::= ENUMERATED { -- sequencing methods concept-trans (1) , -- conceptual translation seq-pept (2) , -- peptide was sequenced both (3) , -- concept transl. w/ partial pept. seq. seq-pept-overlap (4) , -- sequenced peptide, ordered by overlap seq-pept-homol (5) , -- sequenced peptide, ordered by homology concept-trans-a (6) , -- conceptual transl. supplied by author other (255) } Numbering ::= CHOICE { -- any display numbering system cont Num-cont , -- continuous numbering enum Num-enum , -- enumerated names for residues ref Num-ref , -- by reference to another sequence real Num-real } -- supports mapping to a float system Num-cont ::= SEQUENCE { -- continuous display numbering system refnum INTEGER DEFAULT 1, -- number assigned to first residue has-zero BOOLEAN DEFAULT FALSE , -- 0 used? ascending BOOLEAN DEFAULT TRUE } -- ascending numbers? Num-enum ::= SEQUENCE { -- any tags to residues num INTEGER , -- number of tags to follow names SEQUENCE OF VisibleString } -- the tags Num-ref ::= SEQUENCE { -- by reference to other sequences type ENUMERATED { -- type of reference not-set (0) , sources (1) , -- by segmented or const seq sources aligns (2) } , -- by alignments given below aligns Seq-align OPTIONAL } Num-real ::= SEQUENCE { -- mapping to floating point system a REAL , -- from an integer system used by Bioseq b REAL , -- position = (a * int_position) + b units VisibleString OPTIONAL } Pubdesc ::= SEQUENCE { -- how sequence presented in pub pub Pub-equiv , -- the citation(s) name VisibleString OPTIONAL , -- name used in paper fig VisibleString OPTIONAL , -- figure in paper num Numbering OPTIONAL , -- numbering from paper numexc BOOLEAN OPTIONAL , -- numbering problem with paper poly-a BOOLEAN OPTIONAL , -- poly A tail indicated in figure? maploc VisibleString OPTIONAL , -- map location reported in paper seq-raw StringStore OPTIONAL , -- original sequence from paper align-group INTEGER OPTIONAL , -- this seq aligned with others in paper comment VisibleString OPTIONAL, -- any comment on this pub in context reftype INTEGER { -- type of reference in a GenBank record seq (0) , -- refers to sequence sites (1) , -- refers to unspecified features feats (2) , -- refers to specified features no-target (3) } -- nothing specified (EMBL) DEFAULT seq } Heterogen ::= VisibleString -- cofactor, prosthetic group, inhibitor, etc --*** Instances of sequences ******************************* --* Seq-inst ::= SEQUENCE { -- the sequence data itself repr ENUMERATED { -- representation class not-set (0) , -- empty virtual (1) , -- no seq data raw (2) , -- continuous sequence seg (3) , -- segmented sequence const (4) , -- constructed sequence ref (5) , -- reference to another sequence consen (6) , -- consensus sequence or pattern map (7) , -- ordered map of any kind delta (8) , -- sequence made by changes (delta) to others other (255) } , mol ENUMERATED { -- molecule class in living organism not-set (0) , -- > cdna = rna dna (1) , rna (2) , aa (3) , na (4) , -- just a nucleic acid other (255) } , length INTEGER OPTIONAL , -- length of sequence in residues fuzz Int-fuzz OPTIONAL , -- length uncertainty topology ENUMERATED { -- topology of molecule not-set (0) , linear (1) , circular (2) , tandem (3) , -- some part of tandem repeat other (255) } DEFAULT linear , strand ENUMERATED { -- strandedness in living organism not-set (0) , ss (1) , -- single strand ds (2) , -- double strand mixed (3) , other (255) } OPTIONAL , -- default ds for DNA, ss for RNA, pept seq-data Seq-data OPTIONAL , -- the sequence ext Seq-ext OPTIONAL , -- extensions for special types hist Seq-hist OPTIONAL } -- sequence history --*** Sequence Extensions ********************************** --* for representing more complex types --* const type uses Seq-hist.assembly Seq-ext ::= CHOICE { seg Seg-ext , -- segmented sequences ref Ref-ext , -- hot link to another sequence (a view) map Map-ext , -- ordered map of markers delta Delta-ext } Seg-ext ::= SEQUENCE OF Seq-loc Ref-ext ::= Seq-loc Map-ext ::= SEQUENCE OF Seq-feat Delta-ext ::= SEQUENCE OF Delta-seq Delta-seq ::= CHOICE { loc Seq-loc , -- point to a sequence literal Seq-literal } -- a piece of sequence Seq-literal ::= SEQUENCE { length INTEGER , -- must give a length in residues fuzz Int-fuzz OPTIONAL , -- could be unsure seq-data Seq-data OPTIONAL } -- may have the data --*** Sequence History Record *********************************** --** assembly = records how seq was assembled from others --** replaces = records sequences made obsolete by this one --** replaced-by = this seq is made obsolete by another(s) Seq-hist ::= SEQUENCE { assembly SET OF Seq-align OPTIONAL ,-- how was this assembled? replaces Seq-hist-rec OPTIONAL , -- seq makes these seqs obsolete replaced-by Seq-hist-rec OPTIONAL , -- these seqs make this one obsolete deleted CHOICE { bool BOOLEAN , date Date } OPTIONAL } Seq-hist-rec ::= SEQUENCE { date Date OPTIONAL , ids SET OF Seq-id } --*** Various internal sequence representations ************ --* all are controlled, fixed length forms Seq-data ::= CHOICE { -- sequence representations iupacna IUPACna , -- IUPAC 1 letter nuc acid code iupacaa IUPACaa , -- IUPAC 1 letter amino acid code ncbi2na NCBI2na , -- 2 bit nucleic acid code ncbi4na NCBI4na , -- 4 bit nucleic acid code ncbi8na NCBI8na , -- 8 bit extended nucleic acid code ncbipna NCBIpna , -- nucleic acid probabilities ncbi8aa NCBI8aa , -- 8 bit extended amino acid codes ncbieaa NCBIeaa , -- extended ASCII 1 letter aa codes ncbipaa NCBIpaa , -- amino acid probabilities ncbistdaa NCBIstdaa, -- consecutive codes for std aas gap Seq-gap -- gap types } Seq-gap ::= SEQUENCE { type INTEGER { unknown(0), fragment(1), clone(2), short-arm(3), heterochromatin(4), centromere(5), telomere(6), repeat(7), contig(8), other(255) }, linkage INTEGER { unlinked(0), linked(1), other(255) } OPTIONAL } IUPACna ::= StringStore -- IUPAC 1 letter codes, no spaces IUPACaa ::= StringStore -- IUPAC 1 letter codes, no spaces NCBI2na ::= OCTET STRING -- 00=A, 01=C, 10=G, 11=T NCBI4na ::= OCTET STRING -- 1 bit each for agct -- 0001=A, 0010=C, 0100=G, 1000=T/U -- 0101=Purine, 1010=Pyrimidine, etc NCBI8na ::= OCTET STRING -- for modified nucleic acids NCBIpna ::= OCTET STRING -- 5 octets/base, prob for a,c,g,t,n -- probabilities are coded 0-255 = 0.0-1.0 NCBI8aa ::= OCTET STRING -- for modified amino acids NCBIeaa ::= StringStore -- ASCII extended 1 letter aa codes -- IUPAC codes + U=selenocysteine NCBIpaa ::= OCTET STRING -- 25 octets/aa, prob for IUPAC aas in order: -- A-Y,B,Z,X,(ter),anything -- probabilities are coded 0-255 = 0.0-1.0 NCBIstdaa ::= OCTET STRING -- codes 0-25, 1 per byte --*** Sequence Annotation ************************************* --* -- This is a replica of Textseq-id -- This is specific for annotations, and exists to maintain a semantic -- difference between IDs assigned to annotations and IDs assigned to -- sequences Textannot-id ::= SEQUENCE { name VisibleString OPTIONAL , accession VisibleString OPTIONAL , release VisibleString OPTIONAL , version INTEGER OPTIONAL } Annot-id ::= CHOICE { local Object-id , ncbi INTEGER , general Dbtag, other Textannot-id } Annot-descr ::= SET OF Annotdesc Annotdesc ::= CHOICE { name VisibleString , -- a short name for this collection title VisibleString , -- a title for this collection comment VisibleString , -- a more extensive comment pub Pubdesc , -- a reference to the publication user User-object , -- user defined object create-date Date , -- date entry first created/released update-date Date , -- date of last update src Seq-id , -- source sequence from which annot came align Align-def, -- definition of the SeqAligns region Seq-loc } -- all contents cover this region Align-def ::= SEQUENCE { align-type INTEGER { -- class of align Seq-annot ref (1) , -- set of alignments to the same sequence alt (2) , -- set of alternate alignments of the same seqs blocks (3) , -- set of aligned blocks in the same seqs other (255) } , ids SET OF Seq-id OPTIONAL } -- used for the one ref seqid for now Seq-annot ::= SEQUENCE { id SET OF Annot-id OPTIONAL , db INTEGER { -- source of annotation genbank (1) , embl (2) , ddbj (3) , pir (4) , sp (5) , bbone (6) , pdb (7) , other (255) } OPTIONAL , name VisibleString OPTIONAL ,-- source if "other" above desc Annot-descr OPTIONAL , -- used only for stand alone Seq-annots data CHOICE { ftable SET OF Seq-feat , align SET OF Seq-align , graph SET OF Seq-graph , ids SET OF Seq-id , -- used for communication between tools locs SET OF Seq-loc , -- used for communication between tools seq-table Seq-table } } -- features in table form END -- seqalign.asn --$Revision: 142982 $ --********************************************************************** -- -- NCBI Sequence Alignment elements -- by James Ostell, 1990 -- --********************************************************************** NCBI-Seqalign DEFINITIONS ::= BEGIN EXPORTS Seq-align, Score, Score-set, Seq-align-set; IMPORTS Seq-id, Seq-loc , Na-strand FROM NCBI-Seqloc User-object, Object-id FROM NCBI-General; --*** Sequence Alignment ******************************** --* Seq-align-set ::= SET OF Seq-align Seq-align ::= SEQUENCE { type ENUMERATED { not-set (0) , global (1) , diags (2) , -- unbroken, but not ordered, diagonals partial (3) , -- mapping pieces together disc (4) , -- discontinuous alignment other (255) } , dim INTEGER OPTIONAL , -- dimensionality score SET OF Score OPTIONAL , -- for whole alignment segs CHOICE { -- alignment data dendiag SEQUENCE OF Dense-diag , denseg Dense-seg , std SEQUENCE OF Std-seg , packed Packed-seg , disc Seq-align-set, spliced Spliced-seg, sparse Sparse-seg } , -- regions of sequence over which align -- was computed bounds SET OF Seq-loc OPTIONAL, -- alignment id id SEQUENCE OF Object-id OPTIONAL, --extra info ext SEQUENCE OF User-object OPTIONAL } Dense-diag ::= SEQUENCE { -- for (multiway) diagonals dim INTEGER DEFAULT 2 , -- dimensionality ids SEQUENCE OF Seq-id , -- sequences in order starts SEQUENCE OF INTEGER , -- start OFFSETS in ids order len INTEGER , -- len of aligned segments strands SEQUENCE OF Na-strand OPTIONAL , scores SET OF Score OPTIONAL } -- Dense-seg: the densist packing for sequence alignments only. -- a start of -1 indicates a gap for that sequence of -- length lens. -- -- id=100 AAGGCCTTTTAGAGATGATGATGATGATGA -- id=200 AAGGCCTTTTAG.......GATGATGATGA -- id=300 ....CCTTTTAGAGATGATGAT....ATGA -- -- dim = 3, numseg = 6, ids = { 100, 200, 300 } -- starts = { 0,0,-1, 4,4,0, 12,-1,8, 19,12,15, 22,15,-1, 26,19,18 } -- lens = { 4, 8, 7, 3, 4, 4 } -- Dense-seg ::= SEQUENCE { -- for (multiway) global or partial alignments dim INTEGER DEFAULT 2 , -- dimensionality numseg INTEGER , -- number of segments here ids SEQUENCE OF Seq-id , -- sequences in order starts SEQUENCE OF INTEGER , -- start OFFSETS in ids order within segs lens SEQUENCE OF INTEGER , -- lengths in ids order within segs strands SEQUENCE OF Na-strand OPTIONAL , scores SEQUENCE OF Score OPTIONAL } -- score for each seg Packed-seg ::= SEQUENCE { -- for (multiway) global or partial alignments dim INTEGER DEFAULT 2 , -- dimensionality numseg INTEGER , -- number of segments here ids SEQUENCE OF Seq-id , -- sequences in order starts SEQUENCE OF INTEGER , -- start OFFSETS in ids order for whole alignment present OCTET STRING , -- Boolean if each sequence present or absent in -- each segment lens SEQUENCE OF INTEGER , -- length of each segment strands SEQUENCE OF Na-strand OPTIONAL , scores SEQUENCE OF Score OPTIONAL } -- score for each segment Std-seg ::= SEQUENCE { dim INTEGER DEFAULT 2 , -- dimensionality ids SEQUENCE OF Seq-id OPTIONAL , loc SEQUENCE OF Seq-loc , scores SET OF Score OPTIONAL } Spliced-seg ::= SEQUENCE { -- product is either protein or transcript (cDNA) product-id Seq-id OPTIONAL, genomic-id Seq-id OPTIONAL, -- should be 'plus' or 'minus' product-strand Na-strand OPTIONAL , genomic-strand Na-strand OPTIONAL , product-type ENUMERATED { transcript(0), protein(1) }, -- set of segments involved -- each segment corresponds to one exon -- exons are always in biological order exons SEQUENCE OF Spliced-exon , -- optional poly(A) tail poly-a INTEGER OPTIONAL, -- length of the product, in bases/residues -- from this, a 3' unaligned length can be extracted; this also captures -- the case in which a protein aligns leaving a partial codon alignment -- at the 3' end product-length INTEGER OPTIONAL, -- alignment descriptors / modifiers -- this provides us a set for extension modifiers SET OF Spliced-seg-modifier OPTIONAL } Spliced-seg-modifier ::= CHOICE { -- protein aligns from the start and the first codon -- on both product and genomic is start codon start-codon-found BOOLEAN, -- protein aligns to it's end and there is stop codon -- on the genomic right after the alignment stop-codon-found BOOLEAN } -- complete or partial exon -- two consecutive Spliced-exons may belong to one exon Spliced-exon ::= SEQUENCE { -- product-end >= product-start product-start Product-pos , product-end Product-pos , -- genomic-end >= genomic-start genomic-start INTEGER , genomic-end INTEGER , -- product is either protein or transcript (cDNA) product-id Seq-id OPTIONAL , genomic-id Seq-id OPTIONAL , -- should be 'plus' or 'minus' product-strand Na-strand OPTIONAL , -- genomic-strand represents the strand of translation genomic-strand Na-strand OPTIONAL , -- basic seqments always are in biologic order parts SEQUENCE OF Spliced-exon-chunk OPTIONAL , -- scores for this exon scores Score-set OPTIONAL , -- splice sites acceptor-before-exon Splice-site OPTIONAL, donor-after-exon Splice-site OPTIONAL, -- flag: is this exon complete or partial? partial BOOLEAN OPTIONAL, --extra info ext SEQUENCE OF User-object OPTIONAL } Product-pos ::= CHOICE { nucpos INTEGER, protpos Prot-pos } -- codon based position on protein (1/3 of aminoacid) Prot-pos ::= SEQUENCE { -- standard protein position amin INTEGER , -- 0, 1, 2, or 3 as for Cdregion -- 0 = not set -- 1, 2, 3 = actual frame frame INTEGER DEFAULT 0 } -- Spliced-exon-chunk: piece of an exon -- lengths are given in nucleotide bases (1/3 of aminoacid when product is a -- protein) Spliced-exon-chunk ::= CHOICE { -- both sequences represented, product and genomic sequences match match INTEGER , -- both sequences represented, product and genomic sequences do not match mismatch INTEGER , -- both sequences are represented, there is sufficient similarity -- between product and genomic sequences. Can be used to replace stretches -- of matches and mismatches, mostly for protein to genomic where -- definition of match or mismatch depends on translation table diag INTEGER , -- insertion in product sequence (i.e. gap in the genomic sequence) product-ins INTEGER , -- insertion in genomic sequence (i.e. gap in the product sequence) genomic-ins INTEGER } -- site involved in splice Splice-site ::= SEQUENCE { -- typically two bases in the intronic region, always -- in IUPAC format bases VisibleString } -- ========================================================================== -- -- Sparse-seg follows the semantics of dense-seg and is more optimal for -- representing sparse multiple alignments -- -- ========================================================================== Sparse-seg ::= SEQUENCE { master-id Seq-id OPTIONAL, -- pairwise alignments constituting this multiple alignment rows SET OF Sparse-align, -- per-row scores row-scores SET OF Score OPTIONAL, -- index of extra items ext SET OF Sparse-seg-ext OPTIONAL } Sparse-align ::= SEQUENCE { first-id Seq-id, second-id Seq-id, numseg INTEGER, --number of segments first-starts SEQUENCE OF INTEGER , --starts on the first sequence [numseg] second-starts SEQUENCE OF INTEGER , --starts on the second sequence [numseg] lens SEQUENCE OF INTEGER , --lengths of segments [numseg] second-strands SEQUENCE OF Na-strand OPTIONAL , -- per-segment scores seg-scores SET OF Score OPTIONAL } Sparse-seg-ext ::= SEQUENCE { --seg-ext SET OF { -- index INTEGER, -- data User-field -- } index INTEGER } -- use of Score is discouraged for external ASN.1 specifications Score ::= SEQUENCE { id Object-id OPTIONAL , value CHOICE { real REAL , int INTEGER } } -- use of Score-set is encouraged for external ASN.1 specifications Score-set ::= SET OF Score END -- seqblock.asn --$Revision: 6.0 $ --********************************************************************* -- -- 1990 - J.Ostell -- Version 3.0 - June 1994 -- --********************************************************************* --********************************************************************* -- -- EMBL specific data -- This block of specifications was developed by Reiner Fuchs of EMBL -- Updated by J.Ostell, 1994 -- --********************************************************************* EMBL-General DEFINITIONS ::= BEGIN EXPORTS EMBL-dbname, EMBL-xref, EMBL-block; IMPORTS Date, Object-id FROM NCBI-General; EMBL-dbname ::= CHOICE { code ENUMERATED { embl(0), genbank(1), ddbj(2), geninfo(3), medline(4), swissprot(5), pir(6), pdb(7), epd(8), ecd(9), tfd(10), flybase(11), prosite(12), enzyme(13), mim(14), ecoseq(15), hiv(16) , other (255) } , name VisibleString } EMBL-xref ::= SEQUENCE { dbname EMBL-dbname, id SEQUENCE OF Object-id } EMBL-block ::= SEQUENCE { class ENUMERATED { not-set(0), standard(1), unannotated(2), other(255) } DEFAULT standard, div ENUMERATED { fun(0), inv(1), mam(2), org(3), phg(4), pln(5), pri(6), pro(7), rod(8), syn(9), una(10), vrl(11), vrt(12), pat(13), est(14), sts(15), other (255) } OPTIONAL, creation-date Date, update-date Date, extra-acc SEQUENCE OF VisibleString OPTIONAL, keywords SEQUENCE OF VisibleString OPTIONAL, xref SEQUENCE OF EMBL-xref OPTIONAL } END --********************************************************************* -- -- SWISSPROT specific data -- This block of specifications was developed by Mark Cavanaugh of -- NCBI working with Amos Bairoch of SWISSPROT -- --********************************************************************* SP-General DEFINITIONS ::= BEGIN EXPORTS SP-block; IMPORTS Date, Dbtag FROM NCBI-General Seq-id FROM NCBI-Seqloc; SP-block ::= SEQUENCE { -- SWISSPROT specific descriptions class ENUMERATED { not-set (0) , standard (1) , -- conforms to all SWISSPROT checks prelim (2) , -- only seq and biblio checked other (255) } , extra-acc SET OF VisibleString OPTIONAL , -- old SWISSPROT ids imeth BOOLEAN DEFAULT FALSE , -- seq known to start with Met plasnm SET OF VisibleString OPTIONAL, -- plasmid names carrying gene seqref SET OF Seq-id OPTIONAL, -- xref to other sequences dbref SET OF Dbtag OPTIONAL , -- xref to non-sequence dbases keywords SET OF VisibleString OPTIONAL , -- keywords created Date OPTIONAL , -- creation date sequpd Date OPTIONAL , -- sequence update annotupd Date OPTIONAL } -- annotation update END --********************************************************************* -- -- PIR specific data -- This block of specifications was developed by Jim Ostell of -- NCBI -- --********************************************************************* PIR-General DEFINITIONS ::= BEGIN EXPORTS PIR-block; IMPORTS Seq-id FROM NCBI-Seqloc; PIR-block ::= SEQUENCE { -- PIR specific descriptions had-punct BOOLEAN OPTIONAL , -- had punctuation in sequence ? host VisibleString OPTIONAL , source VisibleString OPTIONAL , -- source line summary VisibleString OPTIONAL , genetic VisibleString OPTIONAL , includes VisibleString OPTIONAL , placement VisibleString OPTIONAL , superfamily VisibleString OPTIONAL , keywords SEQUENCE OF VisibleString OPTIONAL , cross-reference VisibleString OPTIONAL , date VisibleString OPTIONAL , seq-raw VisibleString OPTIONAL , -- seq with punctuation seqref SET OF Seq-id OPTIONAL } -- xref to other sequences END --********************************************************************* -- -- GenBank specific data -- This block of specifications was developed by Jim Ostell of -- NCBI -- --********************************************************************* GenBank-General DEFINITIONS ::= BEGIN EXPORTS GB-block; IMPORTS Date FROM NCBI-General; GB-block ::= SEQUENCE { -- GenBank specific descriptions extra-accessions SEQUENCE OF VisibleString OPTIONAL , source VisibleString OPTIONAL , -- source line keywords SEQUENCE OF VisibleString OPTIONAL , origin VisibleString OPTIONAL, date VisibleString OPTIONAL , -- OBSOLETE old form Entry Date entry-date Date OPTIONAL , -- replaces date div VisibleString OPTIONAL , -- GenBank division taxonomy VisibleString OPTIONAL } -- continuation line of organism END --********************************************************************** -- PRF specific definition -- PRF is a protein sequence database crated and maintained by -- Protein Research Foundation, Minoo-city, Osaka, Japan. -- -- Written by A.Ogiwara, Inst.Chem.Res. (Dr.Kanehisa's Lab), -- Kyoto Univ., Japan -- --********************************************************************** PRF-General DEFINITIONS ::= BEGIN EXPORTS PRF-block; PRF-block ::= SEQUENCE { extra-src PRF-ExtraSrc OPTIONAL, keywords SEQUENCE OF VisibleString OPTIONAL } PRF-ExtraSrc ::= SEQUENCE { host VisibleString OPTIONAL, part VisibleString OPTIONAL, state VisibleString OPTIONAL, strain VisibleString OPTIONAL, taxon VisibleString OPTIONAL } END --********************************************************************* -- -- PDB specific data -- This block of specifications was developed by Jim Ostell and -- Steve Bryant of NCBI -- --********************************************************************* PDB-General DEFINITIONS ::= BEGIN EXPORTS PDB-block; IMPORTS Date FROM NCBI-General; PDB-block ::= SEQUENCE { -- PDB specific descriptions deposition Date , -- deposition date month,year class VisibleString , compound SEQUENCE OF VisibleString , source SEQUENCE OF VisibleString , exp-method VisibleString OPTIONAL , -- present if NOT X-ray diffraction replace PDB-replace OPTIONAL } -- replacement history PDB-replace ::= SEQUENCE { date Date , ids SEQUENCE OF VisibleString } -- entry ids replace by this one END -- seqcode.asn --$Revision: 6.0 $ -- ********************************************************************* -- -- These are code and conversion tables for NCBI sequence codes -- ASN.1 for the sequences themselves are define in seq.asn -- -- Seq-map-table and Seq-code-table REQUIRE that codes start with 0 -- and increase continuously. So IUPAC codes, which are upper case -- letters will always have 65 0 cells before the codes begin. This -- allows all codes to do indexed lookups for things -- -- Valid names for code tables are: -- IUPACna -- IUPACaa -- IUPACeaa -- IUPACaa3 3 letter amino acid codes : parallels IUPACeaa -- display only, not a data exchange type -- NCBI2na -- NCBI4na -- NCBI8na -- NCBI8aa -- NCBIstdaa -- probability types map to IUPAC types for display as characters NCBI-SeqCode DEFINITIONS ::= BEGIN EXPORTS Seq-code-table, Seq-map-table, Seq-code-set; Seq-code-type ::= ENUMERATED { -- sequence representations iupacna (1) , -- IUPAC 1 letter nuc acid code iupacaa (2) , -- IUPAC 1 letter amino acid code ncbi2na (3) , -- 2 bit nucleic acid code ncbi4na (4) , -- 4 bit nucleic acid code ncbi8na (5) , -- 8 bit extended nucleic acid code ncbipna (6) , -- nucleic acid probabilities ncbi8aa (7) , -- 8 bit extended amino acid codes ncbieaa (8) , -- extended ASCII 1 letter aa codes ncbipaa (9) , -- amino acid probabilities iupacaa3 (10) , -- 3 letter code only for display ncbistdaa (11) } -- consecutive codes for std aas, 0-25 Seq-map-table ::= SEQUENCE { -- for tables of sequence mappings from Seq-code-type , -- code to map from to Seq-code-type , -- code to map to num INTEGER , -- number of rows in table start-at INTEGER DEFAULT 0 , -- index offset of first element table SEQUENCE OF INTEGER } -- table of values, in from-to order Seq-code-table ::= SEQUENCE { -- for names of coded values code Seq-code-type , -- name of code num INTEGER , -- number of rows in table one-letter BOOLEAN , -- symbol is ALWAYS 1 letter? start-at INTEGER DEFAULT 0 , -- index offset of first element table SEQUENCE OF SEQUENCE { symbol VisibleString , -- the printed symbol or letter name VisibleString } , -- an explanatory name or string comps SEQUENCE OF INTEGER OPTIONAL } -- pointers to complement nuc acid Seq-code-set ::= SEQUENCE { -- for distribution codes SET OF Seq-code-table OPTIONAL , maps SET OF Seq-map-table OPTIONAL } END -- seqfeat.asn --$Revision: 142746 $ --********************************************************************** -- -- NCBI Sequence Feature elements -- by James Ostell, 1990 -- Version 3.0 - June 1994 -- --********************************************************************** NCBI-Seqfeat DEFINITIONS ::= BEGIN EXPORTS Seq-feat, Feat-id, Genetic-code; IMPORTS Gene-ref FROM NCBI-Gene Prot-ref FROM NCBI-Protein Org-ref FROM NCBI-Organism BioSource FROM NCBI-BioSource RNA-ref FROM NCBI-RNA Seq-loc, Giimport-id FROM NCBI-Seqloc Pubdesc, Numbering, Heterogen FROM NCBI-Sequence Rsite-ref FROM NCBI-Rsite Txinit FROM NCBI-TxInit Pub-set FROM NCBI-Pub Object-id, Dbtag, User-object FROM NCBI-General; --*** Feature identifiers ******************************** --* Feat-id ::= CHOICE { gibb INTEGER , -- geninfo backbone giim Giimport-id , -- geninfo import local Object-id , -- for local software use general Dbtag } -- for use by various databases --*** Seq-feat ******************************************* --* sequence feature generalization Seq-feat ::= SEQUENCE { id Feat-id OPTIONAL , data SeqFeatData , -- the specific data partial BOOLEAN OPTIONAL , -- incomplete in some way? except BOOLEAN OPTIONAL , -- something funny about this? comment VisibleString OPTIONAL , product Seq-loc OPTIONAL , -- product of process location Seq-loc , -- feature made from qual SEQUENCE OF Gb-qual OPTIONAL , -- qualifiers title VisibleString OPTIONAL , -- for user defined label ext User-object OPTIONAL , -- user defined structure extension cit Pub-set OPTIONAL , -- citations for this feature exp-ev ENUMERATED { -- evidence for existence of feature experimental (1) , -- any reasonable experimental check not-experimental (2) } OPTIONAL , -- similarity, pattern, etc xref SET OF SeqFeatXref OPTIONAL , -- cite other relevant features dbxref SET OF Dbtag OPTIONAL , -- support for xref to other databases pseudo BOOLEAN OPTIONAL , -- annotated on pseudogene? except-text VisibleString OPTIONAL , -- explain if except=TRUE ids SET OF Feat-id OPTIONAL , -- set of Ids; will replace 'id' field exts SET OF User-object OPTIONAL } -- set of extensions; will replace 'ext' field SeqFeatData ::= CHOICE { gene Gene-ref , org Org-ref , cdregion Cdregion , prot Prot-ref , rna RNA-ref , pub Pubdesc , -- publication applies to this seq seq Seq-loc , -- to annotate origin from another seq imp Imp-feat , region VisibleString, -- named region (globin locus) comment NULL , -- just a comment bond ENUMERATED { disulfide (1) , thiolester (2) , xlink (3) , thioether (4) , other (255) } , site ENUMERATED { active (1) , binding (2) , cleavage (3) , inhibit (4) , modified (5), glycosylation (6) , myristoylation (7) , mutagenized (8) , metal-binding (9) , phosphorylation (10) , acetylation (11) , amidation (12) , methylation (13) , hydroxylation (14) , sulfatation (15) , oxidative-deamination (16) , pyrrolidone-carboxylic-acid (17) , gamma-carboxyglutamic-acid (18) , blocked (19) , lipid-binding (20) , np-binding (21) , dna-binding (22) , signal-peptide (23) , transit-peptide (24) , transmembrane-region (25) , nitrosylation (26) , other (255) } , rsite Rsite-ref , -- restriction site (for maps really) user User-object , -- user defined structure txinit Txinit , -- transcription initiation num Numbering , -- a numbering system psec-str ENUMERATED { -- protein secondary structure helix (1) , -- any helix sheet (2) , -- beta sheet turn (3) } , -- beta or gamma turn non-std-residue VisibleString , -- non-standard residue here in seq het Heterogen , -- cofactor, prosthetic grp, etc, bound to seq biosrc BioSource } SeqFeatXref ::= SEQUENCE { -- both optional because can have one or both id Feat-id OPTIONAL , -- the feature copied data SeqFeatData OPTIONAL } -- the specific data --*** CdRegion *********************************************** --* --* Instructions to translate from a nucleic acid to a peptide --* conflict means it's supposed to translate but doesn't --* Cdregion ::= SEQUENCE { orf BOOLEAN OPTIONAL , -- just an ORF ? frame ENUMERATED { not-set (0) , -- not set, code uses one one (1) , two (2) , three (3) } DEFAULT not-set , -- reading frame conflict BOOLEAN OPTIONAL , -- conflict gaps INTEGER OPTIONAL , -- number of gaps on conflict/except mismatch INTEGER OPTIONAL , -- number of mismatches on above code Genetic-code OPTIONAL , -- genetic code used code-break SEQUENCE OF Code-break OPTIONAL , -- individual exceptions stops INTEGER OPTIONAL } -- number of stop codons on above -- each code is 64 cells long, in the order where -- T=0,C=1,A=2,G=3, TTT=0, TTC=1, TCA=4, etc -- NOTE: this order does NOT correspond to a Seq-data -- encoding. It is "natural" to codon usage instead. -- the value in each cell is the AA coded for -- start= AA coded only if first in peptide -- in start array, if codon is not a legitimate start -- codon, that cell will have the "gap" symbol for -- that alphabet. Otherwise it will have the AA -- encoded when that codon is used at the start. Genetic-code ::= SET OF CHOICE { name VisibleString , -- name of a code id INTEGER , -- id in dbase ncbieaa VisibleString , -- indexed to IUPAC extended ncbi8aa OCTET STRING , -- indexed to NCBI8aa ncbistdaa OCTET STRING , -- indexed to NCBIstdaa sncbieaa VisibleString , -- start, indexed to IUPAC extended sncbi8aa OCTET STRING , -- start, indexed to NCBI8aa sncbistdaa OCTET STRING } -- start, indexed to NCBIstdaa Code-break ::= SEQUENCE { -- specific codon exceptions loc Seq-loc , -- location of exception aa CHOICE { -- the amino acid ncbieaa INTEGER , -- ASCII value of NCBIeaa code ncbi8aa INTEGER , -- NCBI8aa code ncbistdaa INTEGER } } -- NCBIstdaa code Genetic-code-table ::= SET OF Genetic-code -- table of genetic codes --*** Import *********************************************** --* --* Features imported from other databases --* Imp-feat ::= SEQUENCE { key VisibleString , loc VisibleString OPTIONAL , -- original location string descr VisibleString OPTIONAL } -- text description Gb-qual ::= SEQUENCE { qual VisibleString , val VisibleString } END --********************************************************************** -- -- NCBI Restriction Sites -- by James Ostell, 1990 -- version 0.8 -- --********************************************************************** NCBI-Rsite DEFINITIONS ::= BEGIN EXPORTS Rsite-ref; IMPORTS Dbtag FROM NCBI-General; Rsite-ref ::= CHOICE { str VisibleString , -- may be unparsable db Dbtag } -- pointer to a restriction site database END --********************************************************************** -- -- NCBI RNAs -- by James Ostell, 1990 -- version 0.8 -- --********************************************************************** NCBI-RNA DEFINITIONS ::= BEGIN EXPORTS RNA-ref, Trna-ext, RNA-gen, RNA-qual, RNA-qual-set; IMPORTS Seq-loc FROM NCBI-Seqloc; --*** rnas *********************************************** --* --* various rnas --* -- minimal RNA sequence RNA-ref ::= SEQUENCE { type ENUMERATED { -- type of RNA feature unknown (0) , premsg (1) , mRNA (2) , tRNA (3) , rRNA (4) , snRNA (5) , -- will become ncRNA, with RNA-gen.class = snRNA scRNA (6) , -- will become ncRNA, with RNA-gen.class = scRNA snoRNA (7) , -- will become ncRNA, with RNA-gen.class = snoRNA ncRNA (8) , -- non-coding RNA; subsumes snRNA, scRNA, snoRNA tmRNA (9) , miscRNA (10) , other (255) } , pseudo BOOLEAN OPTIONAL , ext CHOICE { name VisibleString , -- for naming "other" type tRNA Trna-ext , -- for tRNAs gen RNA-gen } OPTIONAL -- generic fields for ncRNA, tmRNA, miscRNA } Trna-ext ::= SEQUENCE { -- tRNA feature extensions aa CHOICE { -- aa this carries iupacaa INTEGER , ncbieaa INTEGER , ncbi8aa INTEGER , ncbistdaa INTEGER } OPTIONAL , codon SET OF INTEGER OPTIONAL , -- codon(s) as in Genetic-code anticodon Seq-loc OPTIONAL } -- location of anticodon RNA-gen ::= SEQUENCE { class VisibleString OPTIONAL , -- for ncRNAs, the class of non-coding RNA: -- examples: antisense_RNA, guide_RNA, snRNA product VisibleString OPTIONAL , quals RNA-qual-set OPTIONAL -- e.g., tag_peptide qualifier for tmRNAs } RNA-qual ::= SEQUENCE { -- Additional data values for RNA-gen, qual VisibleString , -- in a tag (qual), value (val) format val VisibleString } RNA-qual-set ::= SEQUENCE OF RNA-qual END --********************************************************************** -- -- NCBI Genes -- by James Ostell, 1990 -- version 0.8 -- --********************************************************************** NCBI-Gene DEFINITIONS ::= BEGIN EXPORTS Gene-ref, Gene-nomenclature; IMPORTS Dbtag FROM NCBI-General; --*** Gene *********************************************** --* --* reference to a gene --* Gene-ref ::= SEQUENCE { locus VisibleString OPTIONAL , -- Official gene symbol allele VisibleString OPTIONAL , -- Official allele designation desc VisibleString OPTIONAL , -- descriptive name maploc VisibleString OPTIONAL , -- descriptive map location pseudo BOOLEAN DEFAULT FALSE , -- pseudogene db SET OF Dbtag OPTIONAL , -- ids in other dbases syn SET OF VisibleString OPTIONAL , -- synonyms for locus locus-tag VisibleString OPTIONAL , -- systematic gene name (e.g., MI0001, ORF0069) formal-name Gene-nomenclature OPTIONAL } Gene-nomenclature ::= SEQUENCE { status ENUMERATED { unknown (0) , official (1) , interim (2) } , symbol VisibleString OPTIONAL , name VisibleString OPTIONAL , source Dbtag OPTIONAL } END --********************************************************************** -- -- NCBI Organism -- by James Ostell, 1994 -- version 3.0 -- --********************************************************************** NCBI-Organism DEFINITIONS ::= BEGIN EXPORTS Org-ref; IMPORTS Dbtag FROM NCBI-General; --*** Org-ref *********************************************** --* --* Reference to an organism --* defines only the organism.. lower levels of detail for biological --* molecules are provided by the Source object --* Org-ref ::= SEQUENCE { taxname VisibleString OPTIONAL , -- preferred formal name common VisibleString OPTIONAL , -- common name mod SET OF VisibleString OPTIONAL , -- unstructured modifiers db SET OF Dbtag OPTIONAL , -- ids in taxonomic or culture dbases syn SET OF VisibleString OPTIONAL , -- synonyms for taxname or common orgname OrgName OPTIONAL } OrgName ::= SEQUENCE { name CHOICE { binomial BinomialOrgName , -- genus/species type name virus VisibleString , -- virus names are different hybrid MultiOrgName , -- hybrid between organisms namedhybrid BinomialOrgName , -- some hybrids have genus x species name partial PartialOrgName } OPTIONAL , -- when genus not known attrib VisibleString OPTIONAL , -- attribution of name mod SEQUENCE OF OrgMod OPTIONAL , lineage VisibleString OPTIONAL , -- lineage with semicolon separators gcode INTEGER OPTIONAL , -- genetic code (see CdRegion) mgcode INTEGER OPTIONAL , -- mitochondrial genetic code div VisibleString OPTIONAL } -- GenBank division code OrgMod ::= SEQUENCE { subtype INTEGER { strain (2) , substrain (3) , type (4) , subtype (5) , variety (6) , serotype (7) , serogroup (8) , serovar (9) , cultivar (10) , pathovar (11) , chemovar (12) , biovar (13) , biotype (14) , group (15) , subgroup (16) , isolate (17) , common (18) , acronym (19) , dosage (20) , -- chromosome dosage of hybrid nat-host (21) , -- natural host of this specimen sub-species (22) , specimen-voucher (23) , authority (24) , forma (25) , forma-specialis (26) , ecotype (27) , synonym (28) , anamorph (29) , teleomorph (30) , breed (31) , gb-acronym (32) , -- used by taxonomy database gb-anamorph (33) , -- used by taxonomy database gb-synonym (34) , -- used by taxonomy database culture-collection (35) , bio-material (36) , metagenome-source (37) , old-lineage (253) , old-name (254) , other (255) } , -- ASN5: old-name (254) will be added to next spec subname VisibleString , attrib VisibleString OPTIONAL } -- attribution/source of name BinomialOrgName ::= SEQUENCE { genus VisibleString , -- required species VisibleString OPTIONAL , -- species required if subspecies used subspecies VisibleString OPTIONAL } MultiOrgName ::= SEQUENCE OF OrgName -- the first will be used to assign division PartialOrgName ::= SEQUENCE OF TaxElement -- when we don't know the genus TaxElement ::= SEQUENCE { fixed-level INTEGER { other (0) , -- level must be set in string family (1) , order (2) , class (3) } , level VisibleString OPTIONAL , name VisibleString } END --********************************************************************** -- -- NCBI BioSource -- by James Ostell, 1994 -- version 3.0 -- --********************************************************************** NCBI-BioSource DEFINITIONS ::= BEGIN EXPORTS BioSource; IMPORTS Org-ref FROM NCBI-Organism; --******************************************************************** -- -- BioSource gives the source of the biological material -- for sequences -- --******************************************************************** BioSource ::= SEQUENCE { genome INTEGER { -- biological context unknown (0) , genomic (1) , chloroplast (2) , chromoplast (3) , kinetoplast (4) , mitochondrion (5) , plastid (6) , macronuclear (7) , extrachrom (8) , plasmid (9) , transposon (10) , insertion-seq (11) , cyanelle (12) , proviral (13) , virion (14) , nucleomorph (15) , apicoplast (16) , leucoplast (17) , proplastid (18) , endogenous-virus (19) , hydrogenosome (20) , chromosome (21) , chromatophore (22) } DEFAULT unknown , origin INTEGER { unknown (0) , natural (1) , -- normal biological entity natmut (2) , -- naturally occurring mutant mut (3) , -- artificially mutagenized artificial (4) , -- artificially engineered synthetic (5) , -- purely synthetic other (255) } DEFAULT unknown , org Org-ref , subtype SEQUENCE OF SubSource OPTIONAL , is-focus NULL OPTIONAL , -- to distinguish biological focus pcr-primers PCRReactionSet OPTIONAL } PCRReactionSet ::= SET OF PCRReaction PCRReaction ::= SEQUENCE { forward PCRPrimerSet OPTIONAL , reverse PCRPrimerSet OPTIONAL } PCRPrimerSet ::= SET OF PCRPrimer PCRPrimer ::= SEQUENCE { seq PCRPrimerSeq OPTIONAL , name PCRPrimerName OPTIONAL } PCRPrimerSeq ::= VisibleString PCRPrimerName ::= VisibleString SubSource ::= SEQUENCE { subtype INTEGER { chromosome (1) , map (2) , clone (3) , subclone (4) , haplotype (5) , genotype (6) , sex (7) , cell-line (8) , cell-type (9) , tissue-type (10) , clone-lib (11) , dev-stage (12) , frequency (13) , germline (14) , rearranged (15) , lab-host (16) , pop-variant (17) , tissue-lib (18) , plasmid-name (19) , transposon-name (20) , insertion-seq-name (21) , plastid-name (22) , country (23) , segment (24) , endogenous-virus-name (25) , transgenic (26) , environmental-sample (27) , isolation-source (28) , lat-lon (29) , -- +/- decimal degrees collection-date (30) , -- DD-MMM-YYYY format collected-by (31) , -- name of person who collected the sample identified-by (32) , -- name of person who identified the sample fwd-primer-seq (33) , -- sequence (possibly more than one; semicolon-separated) rev-primer-seq (34) , -- sequence (possibly more than one; semicolon-separated) fwd-primer-name (35) , rev-primer-name (36) , metagenomic (37) , mating-type (38) , linkage-group (39) , haplogroup (40) , other (255) } , name VisibleString , attrib VisibleString OPTIONAL } -- attribution/source of this name END --********************************************************************** -- -- NCBI Protein -- by James Ostell, 1990 -- version 0.8 -- --********************************************************************** NCBI-Protein DEFINITIONS ::= BEGIN EXPORTS Prot-ref; IMPORTS Dbtag FROM NCBI-General; --*** Prot-ref *********************************************** --* --* Reference to a protein name --* Prot-ref ::= SEQUENCE { name SET OF VisibleString OPTIONAL , -- protein name desc VisibleString OPTIONAL , -- description (instead of name) ec SET OF VisibleString OPTIONAL , -- E.C. number(s) activity SET OF VisibleString OPTIONAL , -- activities db SET OF Dbtag OPTIONAL , -- ids in other dbases processed ENUMERATED { -- processing status not-set (0) , preprotein (1) , mature (2) , signal-peptide (3) , transit-peptide (4) } DEFAULT not-set } END --******************************************************************** -- -- Transcription Initiation Site Feature Data Block -- James Ostell, 1991 -- Philip Bucher, David Ghosh -- version 1.1 -- -- -- --******************************************************************** NCBI-TxInit DEFINITIONS ::= BEGIN EXPORTS Txinit; IMPORTS Gene-ref FROM NCBI-Gene Prot-ref FROM NCBI-Protein Org-ref FROM NCBI-Organism; Txinit ::= SEQUENCE { name VisibleString , -- descriptive name of initiation site syn SEQUENCE OF VisibleString OPTIONAL , -- synonyms gene SEQUENCE OF Gene-ref OPTIONAL , -- gene(s) transcribed protein SEQUENCE OF Prot-ref OPTIONAL , -- protein(s) produced rna SEQUENCE OF VisibleString OPTIONAL , -- rna(s) produced expression VisibleString OPTIONAL , -- tissue/time of expression txsystem ENUMERATED { -- transcription apparatus used at this site unknown (0) , pol1 (1) , -- eukaryotic Pol I pol2 (2) , -- eukaryotic Pol II pol3 (3) , -- eukaryotic Pol III bacterial (4) , viral (5) , rna (6) , -- RNA replicase organelle (7) , other (255) } , txdescr VisibleString OPTIONAL , -- modifiers on txsystem txorg Org-ref OPTIONAL , -- organism supplying transcription apparatus mapping-precise BOOLEAN DEFAULT FALSE , -- mapping precise or approx location-accurate BOOLEAN DEFAULT FALSE , -- does Seq-loc reflect mapping inittype ENUMERATED { unknown (0) , single (1) , multiple (2) , region (3) } OPTIONAL , evidence SET OF Tx-evidence OPTIONAL } Tx-evidence ::= SEQUENCE { exp-code ENUMERATED { unknown (0) , rna-seq (1) , -- direct RNA sequencing rna-size (2) , -- RNA length measurement np-map (3) , -- nuclease protection mapping with homologous sequence ladder np-size (4) , -- nuclease protected fragment length measurement pe-seq (5) , -- dideoxy RNA sequencing cDNA-seq (6) , -- full-length cDNA sequencing pe-map (7) , -- primer extension mapping with homologous sequence ladder pe-size (8) , -- primer extension product length measurement pseudo-seq (9) , -- full-length processed pseudogene sequencing rev-pe-map (10) , -- see NOTE (1) below other (255) } , expression-system ENUMERATED { unknown (0) , physiological (1) , in-vitro (2) , oocyte (3) , transfection (4) , transgenic (5) , other (255) } DEFAULT physiological , low-prec-data BOOLEAN DEFAULT FALSE , from-homolog BOOLEAN DEFAULT FALSE } -- experiment actually done on -- close homolog -- NOTE (1) length measurement of a reverse direction primer-extension -- product (blocked by RNA 5'end) by comparison with -- homologous sequence ladder (J. Mol. Biol. 199, 587) END -- seqloc.asn --$Revision: 140157 $ --********************************************************************** -- -- NCBI Sequence location and identifier elements -- by James Ostell, 1990 -- -- Version 3.0 - 1994 -- --********************************************************************** NCBI-Seqloc DEFINITIONS ::= BEGIN EXPORTS Seq-id, Seq-loc, Seq-interval, Packed-seqint, Seq-point, Packed-seqpnt, Na-strand, Giimport-id; IMPORTS Object-id, Int-fuzz, Dbtag, Date FROM NCBI-General Id-pat FROM NCBI-Biblio Feat-id FROM NCBI-Seqfeat; --*** Sequence identifiers ******************************** --* Seq-id ::= CHOICE { local Object-id , -- local use gibbsq INTEGER , -- Geninfo backbone seqid gibbmt INTEGER , -- Geninfo backbone moltype giim Giimport-id , -- Geninfo import id genbank Textseq-id , embl Textseq-id , pir Textseq-id , swissprot Textseq-id , patent Patent-seq-id , other Textseq-id , -- for historical reasons, 'other' = 'refseq' general Dbtag , -- for other databases gi INTEGER , -- GenInfo Integrated Database ddbj Textseq-id , -- DDBJ prf Textseq-id , -- PRF SEQDB pdb PDB-seq-id , -- PDB sequence tpg Textseq-id , -- Third Party Annot/Seq Genbank tpe Textseq-id , -- Third Party Annot/Seq EMBL tpd Textseq-id , -- Third Party Annot/Seq DDBJ gpipe Textseq-id , -- Internal NCBI genome pipeline processing ID named-annot-track Textseq-id -- Internal named annotation tracking ID } Patent-seq-id ::= SEQUENCE { seqid INTEGER , -- number of sequence in patent cit Id-pat } -- patent citation Textseq-id ::= SEQUENCE { name VisibleString OPTIONAL , accession VisibleString OPTIONAL , release VisibleString OPTIONAL , version INTEGER OPTIONAL } Giimport-id ::= SEQUENCE { id INTEGER , -- the id to use here db VisibleString OPTIONAL , -- dbase used in release VisibleString OPTIONAL } -- the release PDB-seq-id ::= SEQUENCE { mol PDB-mol-id , -- the molecule name chain INTEGER DEFAULT 32 , -- a single ASCII character, chain id rel Date OPTIONAL } -- release date, month and year PDB-mol-id ::= VisibleString -- name of mol, 4 chars --*** Sequence locations ********************************** --* Seq-loc ::= CHOICE { null NULL , -- not placed empty Seq-id , -- to NULL one Seq-id in a collection whole Seq-id , -- whole sequence int Seq-interval , -- from to packed-int Packed-seqint , pnt Seq-point , packed-pnt Packed-seqpnt , mix Seq-loc-mix , equiv Seq-loc-equiv , -- equivalent sets of locations bond Seq-bond , feat Feat-id } -- indirect, through a Seq-feat Seq-interval ::= SEQUENCE { from INTEGER , to INTEGER , strand Na-strand OPTIONAL , id Seq-id , -- WARNING: this used to be optional fuzz-from Int-fuzz OPTIONAL , fuzz-to Int-fuzz OPTIONAL } Packed-seqint ::= SEQUENCE OF Seq-interval Seq-point ::= SEQUENCE { point INTEGER , strand Na-strand OPTIONAL , id Seq-id , -- WARNING: this used to be optional fuzz Int-fuzz OPTIONAL } Packed-seqpnt ::= SEQUENCE { strand Na-strand OPTIONAL , id Seq-id , fuzz Int-fuzz OPTIONAL , points SEQUENCE OF INTEGER } Na-strand ::= ENUMERATED { -- strand of nucleic acid unknown (0) , plus (1) , minus (2) , both (3) , -- in forward orientation both-rev (4) , -- in reverse orientation other (255) } Seq-bond ::= SEQUENCE { -- bond between residues a Seq-point , -- connection to a least one residue b Seq-point OPTIONAL } -- other end may not be available Seq-loc-mix ::= SEQUENCE OF Seq-loc -- this will hold anything Seq-loc-equiv ::= SET OF Seq-loc -- for a set of equivalent locations END -- seqres.asn --$Revision: 6.0 $ --********************************************************************** -- -- NCBI Sequence Analysis Results (other than alignments) -- by James Ostell, 1990 -- --********************************************************************** NCBI-Seqres DEFINITIONS ::= BEGIN EXPORTS Seq-graph; IMPORTS Seq-loc FROM NCBI-Seqloc; --*** Sequence Graph ******************************** --* --* for values mapped by residue or range to sequence --* Seq-graph ::= SEQUENCE { title VisibleString OPTIONAL , comment VisibleString OPTIONAL , loc Seq-loc , -- region this applies to title-x VisibleString OPTIONAL , -- title for x-axis title-y VisibleString OPTIONAL , comp INTEGER OPTIONAL , -- compression (residues/value) a REAL OPTIONAL , -- for scaling values b REAL OPTIONAL , -- display = (a x value) + b numval INTEGER , -- number of values in graph graph CHOICE { real Real-graph , int Int-graph , byte Byte-graph } } Real-graph ::= SEQUENCE { max REAL , -- top of graph min REAL , -- bottom of graph axis REAL , -- value to draw axis on values SEQUENCE OF REAL } Int-graph ::= SEQUENCE { max INTEGER , min INTEGER , axis INTEGER , values SEQUENCE OF INTEGER } Byte-graph ::= SEQUENCE { -- integer from 0-255 max INTEGER , min INTEGER , axis INTEGER , values OCTET STRING } END -- seqset.asn --$Revision: 149840 $ --********************************************************************** -- -- NCBI Sequence Collections -- by James Ostell, 1990 -- -- Version 3.0 - 1994 -- --********************************************************************** NCBI-Seqset DEFINITIONS ::= BEGIN EXPORTS Bioseq-set, Seq-entry; IMPORTS Bioseq, Seq-annot, Seq-descr FROM NCBI-Sequence Object-id, Dbtag, Date FROM NCBI-General; --*** Sequence Collections ******************************** --* Bioseq-set ::= SEQUENCE { -- just a collection id Object-id OPTIONAL , coll Dbtag OPTIONAL , -- to identify a collection level INTEGER OPTIONAL , -- nesting level class ENUMERATED { not-set (0) , nuc-prot (1) , -- nuc acid and coded proteins segset (2) , -- segmented sequence + parts conset (3) , -- constructed sequence + parts parts (4) , -- parts for 2 or 3 gibb (5) , -- geninfo backbone gi (6) , -- geninfo genbank (7) , -- converted genbank pir (8) , -- converted pir pub-set (9) , -- all the seqs from a single publication equiv (10) , -- a set of equivalent maps or seqs swissprot (11) , -- converted SWISSPROT pdb-entry (12) , -- a complete PDB entry mut-set (13) , -- set of mutations pop-set (14) , -- population study phy-set (15) , -- phylogenetic study eco-set (16) , -- ecological sample study gen-prod-set (17) , -- genomic products, chrom+mRNA+protein wgs-set (18) , -- whole genome shotgun project named-annot (19) , -- named annotation set named-annot-prod (20) , -- with instantiated mRNA+protein read-set (21) , -- set from a single read paired-end-reads (22) , -- paired sequences within a read-set other (255) } DEFAULT not-set , release VisibleString OPTIONAL , date Date OPTIONAL , descr Seq-descr OPTIONAL , seq-set SEQUENCE OF Seq-entry , annot SET OF Seq-annot OPTIONAL } Seq-entry ::= CHOICE { seq Bioseq , set Bioseq-set } END -- seqsplit.asn --$Revision: 1.9 $ --******************************************************************** -- -- Network Id server network access -- Vasilchenko 2003 -- -- --********************************************************************* -- -- seqsplit.asn -- -- representation of split sequences -- --********************************************************************* NCBI-Seq-split DEFINITIONS ::= BEGIN EXPORTS ID2S-Chunk-Id, ID2S-Seq-annot-Info; IMPORTS Seq-id FROM NCBI-Seqloc Seq-entry FROM NCBI-Seqset Bioseq, Seq-annot, Seq-descr, Seq-literal FROM NCBI-Sequence Seq-align FROM NCBI-Seqalign Feat-id FROM NCBI-Seqfeat; ---------------------------------------------------------------------------- -- Blob split info types ---------------------------------------------------------------------------- ---------------------------------------------------------------------------- -- Chunks split description ID2S-Split-Info ::= SEQUENCE { bioseqs-info SET OF ID2S-Bioseqs-Info OPTIONAL, chunks SET OF ID2S-Chunk-Info, skeleton Seq-entry OPTIONAL } ID2S-Bioseqs-Info ::= SEQUENCE { info ID2S-Bioseq-Info, bioseqs ID2S-Bioseq-Ids } ID2S-Bioseq-Info ::= SEQUENCE { gap-count INTEGER OPTIONAL, seq-map-has-ref BOOLEAN OPTIONAL } ID2S-Chunk-Info ::= SEQUENCE { id ID2S-Chunk-Id, content SET OF ID2S-Chunk-Content } -- Description of information in this chunk -- Place means id of Bioseq or Bioseq-set ID2S-Chunk-Content ::= CHOICE { -- place of Seq-descrs seq-descr ID2S-Seq-descr-Info, -- locations and types of annotations seq-annot ID2S-Seq-annot-Info, -- place of assembly history seq-assembly ID2S-Seq-assembly-Info, -- place of sequence map seq-map ID2S-Seq-map-Info, -- place of sequence data seq-data ID2S-Seq-data-Info, -- place of Seq-annots seq-annot-place ID2S-Seq-annot-place-Info, -- place of Bioseqs bioseq-place SET OF ID2S-Bioseq-place-Info, -- ids of features feat-ids SET OF ID2S-Seq-feat-Ids-Info } ID2S-Seq-descr-Info ::= SEQUENCE { type-mask INTEGER, -- mask of Seq-descr types, bioseqs ID2S-Bioseq-Ids OPTIONAL, bioseq-sets ID2S-Bioseq-set-Ids OPTIONAL } ID2S-Seq-annot-Info ::= SEQUENCE { -- name is set if this is named annot -- name may be empty which differ from unnamed annot name VisibleString OPTIONAL, align NULL OPTIONAL, graph NULL OPTIONAL, feat SET OF ID2S-Feat-type-Info OPTIONAL, seq-loc ID2S-Seq-loc OPTIONAL } ID2S-Seq-annot-place-Info ::= SEQUENCE { name VisibleString OPTIONAL, bioseqs ID2S-Bioseq-Ids OPTIONAL, bioseq-sets ID2S-Bioseq-set-Ids OPTIONAL } ID2S-Seq-feat-Ids-Info ::= SEQUENCE { feat-types SET OF ID2S-Feat-type-Info OPTIONAL, xref-types SET OF ID2S-Feat-type-Info OPTIONAL, local-ids SET OF INTEGER OPTIONAL } ID2S-Feat-type-Info ::= SEQUENCE { type INTEGER, subtypes SET OF INTEGER OPTIONAL } ID2S-Seq-assembly-Info ::= SEQUENCE { bioseqs ID2S-Bioseq-Ids } ID2S-Seq-map-Info ::= ID2S-Seq-loc ID2S-Seq-data-Info ::= ID2S-Seq-loc ID2S-Bioseq-place-Info ::= SEQUENCE { bioseq-set INTEGER, seq-ids ID2S-Bioseq-Ids } ID2S-Chunk ::= SEQUENCE { data SET OF ID2S-Chunk-Data } ID2S-Chunk-Data ::= SEQUENCE { -- place of data to insert id CHOICE { -- Bioseq-set id bioseq-set INTEGER, -- Bioseq id gi INTEGER, -- Bioseq id seq-id Seq-id }, -- Seq-descr, for Bioseq and Bioseq-set descr Seq-descr OPTIONAL, -- Seq-annot, for Bioseq and Bioseq-set annots SET OF Seq-annot OPTIONAL, -- assembly history Seq-align, for Bioseq assembly SET OF Seq-align OPTIONAL, -- sequence map, for Bioseq seq-map SEQUENCE OF ID2S-Sequence-Piece OPTIONAL, -- sequence data, for Bioseq seq-data SEQUENCE OF ID2S-Sequence-Piece OPTIONAL, -- Bioseq, for Bioseq-set bioseqs SET OF Bioseq OPTIONAL } ID2S-Sequence-Piece ::= SEQUENCE { start INTEGER, -- start position on sequence data SEQUENCE OF Seq-literal } ---------------------------------------------------------------------------- -- utility types ---------------------------------------------------------------------------- ID2S-Chunk-Id ::= INTEGER ID2S-Bioseq-set-Ids ::= SET OF INTEGER ID2S-Bioseq-Ids ::= SET OF CHOICE { gi INTEGER, seq-id Seq-id, gi-range ID2S-Gi-Range } ID2S-Gi-Range ::= SEQUENCE { start INTEGER, -- start gi in this gi range count INTEGER DEFAULT 1 -- number of sequential gis } -- ID2S-Seq-loc is used to represent unordered and unstranded -- set of intervals on set of sequences. -- It's optimized for compact encoding of several common cases: -- Seq-ids of type gi, -- intervals covering whole sequences, -- whole sequences with sequential gis, -- set of intervals on the same sequence (Seq-id sharing). ID2S-Seq-loc ::= CHOICE { whole-gi INTEGER, -- whole sequence by gi whole-seq-id Seq-id, -- whole sequence by Seq-id whole-gi-range ID2S-Gi-Range, -- set of whole sequences by gis gi-interval ID2S-Gi-Interval, -- interval on sequence by gi seq-id-interval ID2S-Seq-id-Interval,-- interval on sequence by Seq-id gi-ints ID2S-Gi-Ints, -- set of intervals on the same gi seq-id-ints ID2S-Seq-id-Ints, -- set of intervals on the same id loc-set SET OF ID2S-Seq-loc -- combination of locations } ID2S-Gi-Interval ::= SEQUENCE { gi INTEGER, start INTEGER, length INTEGER DEFAULT 1 } ID2S-Seq-id-Interval ::= SEQUENCE { seq-id Seq-id, start INTEGER, length INTEGER DEFAULT 1 } ID2S-Interval ::= SEQUENCE { start INTEGER, length INTEGER DEFAULT 1 } ID2S-Gi-Ints ::= SEQUENCE { gi INTEGER, ints SET OF ID2S-Interval } ID2S-Seq-id-Ints ::= SEQUENCE { seq-id Seq-id, ints SET OF ID2S-Interval } END -- seqtable.asn --$Revision: 115572 $ -- ---------------------------------------------------------------------------- -- -- PUBLIC DOMAIN NOTICE -- National Center for Biotechnology Information -- -- This software/database is a "United States Government Work" under the terms -- of the United States Copyright Act. It was written as part of the author's -- official duties as a United States Government employee and thus cannot be -- copyrighted. This software/database is freely available to the public for -- use. The National Library of Medicine and the U.S. Government have not -- placed any restriction on its use or reproduction. -- -- Although all reasonable efforts have been taken to ensure the accuracy and -- reliability of the software and data, the NLM and the U.S. Government do not -- and cannot warrant the performance or results that may be obtained by using -- this software or data. The NLM and the U.S. Government disclaim all -- warranties, express or implied, including warranties of performance, -- merchantability or fitness for any particular purpose. -- -- Please cite the authors in any work or product based on this material. -- -- ---------------------------------------------------------------------------- -- -- Authors: Mike DiCuccio, Eugene Vasilchenko -- -- ASN.1 interface to table readers -- -- ---------------------------------------------------------------------------- NCBI-SeqTable DEFINITIONS ::= BEGIN EXPORTS SeqTable-column-info, SeqTable-column, Seq-table; IMPORTS Seq-id, Seq-loc, Seq-interval FROM NCBI-Seqloc; SeqTable-column-info ::= SEQUENCE { -- user friendly column name, can be skipped title VisibleString OPTIONAL, -- identification of the column data in the objects described by the table field-id INTEGER { -- known column data types -- position types location (0), -- location as Seq-loc location-id (1), -- location Seq-id location-gi (2), -- gi location-from (3), -- interval from location-to (4), -- interval to location-strand (5), -- location strand location-fuzz-from-lim (6), location-fuzz-to-lim (7), product (10), -- product as Seq-loc product-id (11), -- product Seq-id product-gi (12), -- product gi product-from (13), -- product interval from product-to (14), -- product interval to product-strand (15), -- product strand product-fuzz-from-lim (16), product-fuzz-to-lim (17), -- main feature fields id-local (20), -- id.local.id xref-id-local (21), -- xref.id.local.id partial (22), comment (23), title (24), ext (25), -- field-name must be "E.xxx", see below qual (26), -- field-name must be "Q.xxx", see below dbxref (27), -- field-name must be "D.xxx", see below -- various data fields data-imp-key (30), data-region (31), data-cdregion-frame (32), -- extra fields, see also special values for str below ext-type (40), qual-qual (41), qual-val (42), dbxref-db (43), dbxref-tag (44) } OPTIONAL, -- any column can be identified by ASN.1 text locator string -- with omitted object type. -- examples: -- "data.gene.locus" for Seq-feat.data.gene.locus -- "data.imp.key" for Seq-feat.data.imp.key -- "qual.qual" -- - Seq-feat.qual is SEQUENCE so several columns are allowed -- see also "Q.xxx" special value for shorter qual representation -- "ext.type.str" -- "ext.data.label.str" -- "ext.data.data.int" -- see also "E.xxx" special value for shorter ext representation -- special values start with capital letter: -- "E.xxx" - ext.data.label.str = xxx, ext.data.data = data -- - Seq-feat.ext.data is SEQUENCE so several columns are allowed -- "Q.xxx" - qual.qual = xxx, qual.val = data -- - Seq-feat.qual is SEQUENCE so several columns are allowed -- "D.xxx" - dbxref.id = xxx, dbxref.tag = data -- - Seq-feat.dbxref is SET so several columns are allowed field-name VisibleString OPTIONAL } CommonString-table ::= SEQUENCE { -- set of possible values strings SEQUENCE OF VisibleString, -- indexes of values indexes SEQUENCE OF INTEGER } CommonBytes-table ::= SEQUENCE { -- set of possible values bytes SEQUENCE OF OCTET STRING, -- indexes of values indexes SEQUENCE OF INTEGER } SeqTable-multi-data ::= CHOICE { -- a set of integers, one per row int SEQUENCE OF INTEGER, -- a set of reals, one per row real SEQUENCE OF REAL, -- a set of strings, one per row string SEQUENCE OF VisibleString, -- a set of byte arrays, one per row bytes SEQUENCE OF OCTET STRING, -- a set of string with small set of possible values common-string CommonString-table, -- a set of byte arrays with small set of possible values common-bytes CommonBytes-table, -- a set of bits, one per row -- this uses bm::bvector<> as its storage mechanism bit OCTET STRING, -- a set of locations, one per row loc SEQUENCE OF Seq-loc, id SEQUENCE OF Seq-id, interval SEQUENCE OF Seq-interval } SeqTable-single-data ::= CHOICE { -- integer int INTEGER, -- real real REAL, -- string string VisibleString, -- byte array bytes OCTET STRING, -- bit bit BOOLEAN, -- location loc Seq-loc, id Seq-id, interval Seq-interval } SeqTable-sparse-index ::= CHOICE { -- indexed of rows with values indexes SEQUENCE OF INTEGER, -- bitset of rows with values bit-set OCTET STRING } SeqTable-column ::= SEQUENCE { -- column description or reference to previously defined info header SeqTable-column-info, -- information about data -- row data data SeqTable-multi-data OPTIONAL, -- in case not all rows contain data this field will contain sparse info sparse SeqTable-sparse-index OPTIONAL, -- default value for sparse table, or if row data is too short default SeqTable-single-data OPTIONAL, -- single value for indexes not listed in sparse table sparse-other SeqTable-single-data OPTIONAL } Seq-table ::= SEQUENCE { -- type of features in this table, equal to Seq-feat.data variant index feat-type INTEGER, -- subtype of features in this table, defined in header SeqFeatData.hpp feat-subtype INTEGER OPTIONAL, -- number of rows num-rows INTEGER, -- data in columns columns SEQUENCE OF SeqTable-column } END -- submit.asn --$Revision: 6.1 $ --******************************************************************** -- -- Direct Submission of Sequence Data -- James Ostell, 1991 -- -- This is a trial specification for direct submission of sequence -- data worked out between NCBI and EMBL -- Later revised to reflect work with GenBank and Integrated database -- -- Version 3.0, 1994 -- This is the official NCBI sequence submission format now. -- --******************************************************************** NCBI-Submit DEFINITIONS ::= BEGIN EXPORTS Seq-submit, Contact-info; IMPORTS Cit-sub, Author FROM NCBI-Biblio Date, Object-id FROM NCBI-General Seq-annot FROM NCBI-Sequence Seq-id FROM NCBI-Seqloc Seq-entry FROM NCBI-Seqset; Seq-submit ::= SEQUENCE { sub Submit-block , data CHOICE { entrys SET OF Seq-entry , -- sequence(s) annots SET OF Seq-annot , -- annotation(s) delete SET OF Seq-id } } -- deletions of entries Submit-block ::= SEQUENCE { contact Contact-info , -- who to contact cit Cit-sub , -- citation for this submission hup BOOLEAN DEFAULT FALSE , -- hold until publish reldate Date OPTIONAL , -- release by date subtype INTEGER { -- type of submission new (1) , -- new data update (2) , -- update by author revision (3) , -- 3rd party (non-author) update other (255) } OPTIONAL , tool VisibleString OPTIONAL, -- tool used to make submission user-tag VisibleString OPTIONAL, -- user supplied id for this submission comment VisibleString OPTIONAL } -- user comments/advice to database Contact-info ::= SEQUENCE { -- who to contact to discuss the submission name VisibleString OPTIONAL , -- OBSOLETE: will be removed address SEQUENCE OF VisibleString OPTIONAL , phone VisibleString OPTIONAL , fax VisibleString OPTIONAL , email VisibleString OPTIONAL , telex VisibleString OPTIONAL , owner-id Object-id OPTIONAL , -- for owner accounts password OCTET STRING OPTIONAL , last-name VisibleString OPTIONAL , -- structured to replace name above first-name VisibleString OPTIONAL , middle-initial VisibleString OPTIONAL , contact Author OPTIONAL } -- WARNING: this will replace the above END -- tinyseq.asn --$Revision: 6.1 $ --********************************************************************** -- -- ASN.1 for a tiny Bioseq in XML -- basically a structured FASTA file with a few extras -- in this case we drop all modularity of components -- All ids are Optional - simpler structure, less checking -- Components of organism are hard coded - can't easily add or change -- sequence is just string whether DNA or protein -- by James Ostell, 2000 -- --********************************************************************** NCBI-TSeq DEFINITIONS ::= BEGIN TSeq ::= SEQUENCE { seqtype ENUMERATED { nucleotide (1), protein (2) }, gi INTEGER OPTIONAL, accver VisibleString OPTIONAL, sid VisibleString OPTIONAL, local VisibleString OPTIONAL, taxid INTEGER OPTIONAL, orgname VisibleString OPTIONAL, defline VisibleString, length INTEGER, sequence VisibleString } TSeqSet ::= SEQUENCE OF TSeq -- a bunch of them END