include/gui/objutils/snp_bitfield.hpp

Go to the documentation of this file.
00001 #ifndef GUI_OBJUTILS___SNP_BITFIELD__HPP
00002 #define GUI_OBJUTILS___SNP_BITFIELD__HPP
00003 
00004 /*  $Id: snp_bitfield.hpp 17219 2008-06-27 16:51:37Z dicuccio $
00005  * ===========================================================================
00006  *
00007  *                            PUBLIC DOMAIN NOTICE
00008  *               National Center for Biotechnology Information
00009  *
00010  *  This software/database is a "United States Government Work" under the
00011  *  terms of the United States Copyright Act.  It was written as part of
00012  *  the author's official duties as a United States Government employee and
00013  *  thus cannot be copyrighted.  This software/database is freely available
00014  *  to the public for use. The National Library of Medicine and the U.S.
00015  *  Government have not placed any restriction on its use or reproduction.
00016  *
00017  *  Although all reasonable efforts have been taken to ensure the accuracy
00018  *  and reliability of the software and data, the NLM and the U.S.
00019  *  Government do not and cannot warrant the performance or results that
00020  *  may be obtained by using this software or data. The NLM and the U.S.
00021  *  Government disclaim all warranties, express or implied, including
00022  *  warranties of performance, merchantability or fitness for any particular
00023  *  purpose.
00024  *
00025  *  Please cite the author in any work or product based on this material.
00026  *
00027  * ===========================================================================
00028  *
00029  * Authors:  Melvin Quintos
00030  *
00031  * File Description:
00032  *
00033  */
00034 
00035 #include <corelib/ncbistd.hpp>
00036 #include <gui/gui_export.h>
00037 
00038 #include <memory>
00039 
00040 BEGIN_NCBI_SCOPE
00041 
00042 class CSnpBitfieldFactory;
00043 
00044 /**
00045 *   CSnpBitfield is a facade for representing any version of the SNP
00046 *   bitfield.  A CSnpBitfield is created from a vector<char> data type.
00047 *
00048 *   Example:
00049 *      vector<char> data = <get data e.g. CUser_field::C_Data::GetOs >
00050 *      CSnpBitfield bitfield = data
00051 *
00052 *   Internally, the CSnpBitfield uses a Factory (CSnpBitfieldFactory)
00053 *   to determine the version/format of the bitfield to create and store.
00054 *   Although it is possible to create bitfields from the Factory, it is
00055 *   best to use this class, CSnpBitfield, instead.
00056 *
00057 *   CSnpBitfield is a facade to the CSnpBitfield::IEncoding interface.
00058 *   The CSnpBitfield::IEncoding and CSnpBitfield::EProperty will evolve to
00059 *   represent the latest SNP bitfield fields.  As newer bitfield versions
00060 *   are introduced, all subclasses of CSnpBitfield::IEncoding are recompiled
00061 *   to ensure the latest features of the bitfield are backwards compatible.  
00062 *   Developers that also modify CSnpBitfield and related classes should run the
00063 *   unit_test_snp project to test and make sure nothing was broken.
00064 *
00065 *   For example:
00066 *      CSnpBitfield2 (v2) introduced a byte for version number (Not found in v1.2).
00067 *      CSnpBitfield::IEncoding was modified to get version number (e.g. GetVersion).
00068 *      CSnpBitfield1_2 (v1.2) was forced to be recompiled.
00069 *         Calls to 1.2's implementation of 'GetVersion' return 1
00070 *
00071 **/
00072 class  CSnpBitfield
00073 {
00074 
00075 ///////////////////////////////////////////////////////////////////////////////
00076 // Public Structs/Inner-classes/ Enumerations
00077 ///////////////////////////////////////////////////////////////////////////////
00078 public:
00079 
00080     enum EProperty
00081     {
00082         // Note: The order of the properties is important.  Explicitly
00083         //  assigned values are intended.
00084 
00085         // DO NOT MODIFY EXISTING ASSIGNED VALUES.
00086         // ADD NEW PROPERTIES TO END OF ENUMERATION
00087 
00088         // F1 Link
00089         eHasLinkOut         = 0,  ///< Has SubmitterLinkOut From SNP->SubSNP->Batch.link_out
00090         eHasSnp3D           = 1,  ///< Has 3D structure SNP3D
00091         eHasSTS             = 2,  ///< Has STS Query Entrez to get the current links
00092         eHasEntrez          = 3,  ///< Has EntrezGene Query Entrez to get the current links
00093         eHasProbeDB         = 4,  ///< Has ProbeDB Query Entrez to get the current links
00094         eHasGEO             = 5,  ///< Has GEO Query Entrez to get the current links
00095         eHasAssembly        = 6,  ///< Has Assembly Query Entrez to get the current links
00096         eHasTrace           = 7,  ///< Has Trace Query Entrez to get the current links
00097         eFromMgcClone       = 8,  ///< From MGC clone We have ~20K rs. This bit could be set from specific submitter handle/ batch_id
00098         eHasOrganism        = 9,  ///< Has OrganismDBLink (Ex. Jackson Lab for mouse)
00099 
00100         // F2 Gene Function is handled separately  See EFunctionClass
00101 
00102         // F3 Map
00103         eIsAssemblySpecific = 10, // Is Assembly specific. This bit is 1 if the snp only maps to one assembly
00104         eHasAssemblyConflict= 11, // Has Assembly conflict. This is for weight 1 and 2 snp that maps to different chromosomes on different assemblies
00105         eHasOtherSameSNP    = 12, // Has other snp with exactly the same set of mapping position on NCBI refernce assembly
00106 
00107         // F4 Freq
00108         e5PctMinorAllele1Plus   = 13, // >5% minor allele frequency in 1+ populations
00109         e5PctMinorAlleleAll     = 14, // >5% minor allele frequency in each and all populations.
00110         eIsDoubleHit            = 15, // Deprecated in v4+.  This bit is set if the rs# is in Jim Mullikin's double hit submission which has been only on human snp.        
00111         eIsMutation             = 16, // Is mutation (journal citation, explicit fact) low frequency variation that is cited in journal and other reputable sources.
00112 
00113         // F5 GTY
00114         eHasGenotype            = 17, // Genotypes available. The snp has individual genotype (in SubInd table).
00115         eInHaplotypeSet         = 18, // In Haplotype tagging set
00116         eInGenotypeKit          = 19, // Marker is on high density genotyping kit (50K density or greater). The snp may have phenotype associations present in dbGaP
00117 
00118         // F6 Hapmap
00119         ePhase1Attempted        = 20, // Phase 1 attempted all snp in HapMap unfiltered-redundant set
00120         ePhase1Genotyped        = 21, // Phase 1 genotyped a subset of above: filtered, non-redundant
00121         ePhase2Attempted        = 22, // Phase 2 attempted
00122         ePhase2Genotyped        = 23, // Phase 2 genotyped  filtered, non-redundant
00123         ePhase3Attempted        = 24, // Phase 3 attempted
00124         ePhase3Genotyped        = 25, // Phase 3 genotyped  filtered, non-redundant
00125 
00126         // F7 Phenotype
00127         eHasOMIM_OMIA           = 26, // Has OMIM/OMIA
00128         eHasSnpRIF              = 27, // Has SnpRIF
00129         eHasLodScore            = 28, // Has LOD score
00130         eHasPhenoDB             = 29, // Has significant association in dbGaP study
00131         eHasDiseaseInfo         = 30, // Submitted as a disease-related mutation and/or present in a locus-specific database
00132         eHasTranscriptionFactor = 31, // Has transcription factor
00133         eHasClinicalAssay       = 32, // Variation is interrogated in a clinical diagnostic assay Note: Used to be eHasMPO(Mammalian Pheonotype Ontology), but never used
00134         eHasMeSH                = 33, // Has MeSH is linked to a disease
00135 
00136         // F8 Variation class is handled separately  See EVariationClass
00137 
00138         // F9 Quality Check
00139         eHasGenotypeConflict            = 34, // Has Genotype Conflict Same (rs, ind), different genotype. N/N is not included
00140         eIsStrainSpecific               = 35, // Is Strain Specific
00141         eHasMendelError                 = 36, // Has Mendelian Error
00142         eHasHardyWeinbergDeviation      = 37, // Has Hardy Weinberg deviation
00143         eHasMemberSsConflict            = 38, // Has member ss with conflict alleles
00144         eIsWithdrawn                    = 39, // Is Withdrawn by submitter If one member ss is withdrawn by submitter, then this bit is set. If all member ss' are withdrawn, then the rs is deleted to SNPHistory
00145 
00146         // Version 2 additions
00147         // F1 Link
00148         eHasShortReadArchive            = 40,  // Has Short Read Archive link
00149 
00150         // Version 3 additions
00151         // F9 Quality
00152         eIsContigAlleleAbsent           = 41,   // Contig allele not present in SNP allele list. The reference sequence allele at the mapped position is not present in the SNP allele list, adjusted for orientation
00153 
00154         // Version 2 & 3 (hidden in F2, gene function properties.  will be moved out of F2 in later bitfield versions)
00155         eHasReference                   = 42,   // A coding region variation where one allele in the set is identical to the reference sequence. FxnCode = 8
00156 
00157         // Version 4 Additions
00158         eIsValidated                    = 43    // This bit is set if the snp has 2+ minor allele count based on frequency of genotype data
00159         /// Add additional properties here.
00160 
00161     };
00162 
00163     // A SNP can only be one class of variation
00164     enum EVariationClass
00165     {
00166         eUnknownVariation       = 0,
00167         eSingleBase             = 1,
00168         eDips                   = 2,
00169         eHeterozygous           = 3,
00170         eMicrosatellite         = 4,
00171         eNamedSNP               = 5,
00172         eNoVariation            = 6,
00173         eMixed                  = 7,
00174         eMultiBase              = 8
00175     };
00176 
00177     // Function class (gene_prop in v1.2)    
00178     // A SNP can belong to more than one gene function class
00179     enum EFunctionClass
00180     {
00181         eUnknownFxn             = 0,  // Uknown
00182         eIntron                 = 1,  // In Intron
00183         eDonor                  = 2,  // In donor splice-site
00184         eAcceptor               = 3,  // In acceptor splice site
00185         eUTR                    = 4,  // In Exon. location is in a spliced transcript. Is "untranslated region" (UTR) if "In CDS" is false
00186         eSynonymous             = 5,  // In coding region (CDS). A subset of "Exon" excluding "UTR": SYNONYMOUS if bits 5-7 are false
00187         eNonsense               = 6,  // Is non-synonymous Nonsense. Changes to STOP codon (TER)
00188         eMissense               = 7,  // Is non-synonymous Missense. Changes protein peptide
00189         eFrameshift             = 8,  // Is non-synonymous Frameshift. Changes all downstream amino acids
00190 
00191         // Version 2 additions
00192         eInGene                 = 9,  // In gene segment Defined as sequence intervals covered by a gene ID but not having an aligned transcript. FxnCode = 11
00193         eInGene5                = 10, // In 5' gene region FxnCode = 15
00194         eInGene3                = 11, // In 3' gene region FxnCode = 13
00195         eInUTR5                 = 12, // In 5' UTR Location is in an untranslated region (UTR). FxnCode = 55
00196         eInUTR3                 = 13, // In 3' UTR Location is in an untranslated region (UTR). FxnCode = 53
00197         eMultipleFxn            = 14  // Has multiple functions (i.e. fwd strand 5'near gene, rev strand 3'near gene)                                      
00198                                       // use IsTrue(EFunctionClass) to determine function classes the snp belongs to.
00199     };
00200 
00201 ///////////////////////////////////////////////////////////////////////////////
00202 // Public Methods
00203 ///////////////////////////////////////////////////////////////////////////////
00204 public:
00205 
00206     static const char *     GetString(EVariationClass e);
00207     static const char *     GetString(EFunctionClass e);
00208     static bool             IsCompatible(EFunctionClass e1, EFunctionClass e2);
00209 
00210     CSnpBitfield();
00211     CSnpBitfield(const CSnpBitfield &rhs);
00212     CSnpBitfield(const std::vector<char> &rhs);
00213 
00214     CSnpBitfield &          operator=( const CSnpBitfield &rhs );
00215     CSnpBitfield &          operator=( const std::vector<char> &rhs);
00216 
00217     bool                    IsTrue(EProperty prop) const;
00218     bool                    IsTrue(EFunctionClass fxn)  const;
00219     bool                    IsTrue(EVariationClass var) const;
00220     int                     GetWeight()                 const;
00221     int                     GetVersion()                const;
00222     EVariationClass         GetVariationClass()         const;
00223     EFunctionClass          GetFunctionClass()          const;
00224     const char *            GetGenePropertyString()     const;
00225     const char *            GetVariationClassString()   const;
00226     const char *            GetString()                 const;
00227 
00228 private:
00229     void x_CreateString();
00230 
00231 ///////////////////////////////////////////////////////////////////////////////
00232 // Public Inner Classes
00233 ///////////////////////////////////////////////////////////////////////////////
00234 public:
00235 
00236 class IEncoding
00237 {
00238 public:
00239     virtual bool                            IsTrue(EProperty e)      const = 0;
00240     virtual bool                            IsTrue(EFunctionClass e) const = 0;
00241     virtual int                             GetWeight()              const = 0;
00242     virtual int                             GetVersion()             const = 0;
00243     virtual CSnpBitfield::EFunctionClass    GetFunctionClass()       const = 0;
00244     virtual CSnpBitfield::EVariationClass   GetVariationClass()      const = 0;
00245     virtual const char *                    GetString()              const = 0;
00246     virtual IEncoding *                     Clone()                        = 0;
00247     virtual                                 ~IEncoding(){};
00248 };
00249 
00250 ///////////////////////////////////////////////////////////////////////////////
00251 // Private Data
00252 ///////////////////////////////////////////////////////////////////////////////
00253 private:
00254 
00255     std::auto_ptr<IEncoding>     m_bitfield; // inits to null object
00256     static CSnpBitfieldFactory   sm_Factory; // one shared factory
00257 };
00258 
00259 ///////////////////////////////////////////////////////////////////////////////
00260 // Inline methods
00261 ///////////////////////////////////////////////////////////////////////////////
00262 inline bool CSnpBitfield::IsTrue(CSnpBitfield::EProperty prop) const {
00263     return m_bitfield->IsTrue(prop);
00264 }
00265 
00266 inline bool CSnpBitfield::IsTrue(CSnpBitfield::EFunctionClass fxn)  const {
00267     return m_bitfield->IsTrue(fxn);
00268 }
00269 
00270 inline bool CSnpBitfield::IsTrue(CSnpBitfield::EVariationClass var) const {
00271     return (m_bitfield->GetVariationClass() == var);
00272 }
00273 
00274 inline int  CSnpBitfield::GetWeight() const {
00275     return m_bitfield->GetWeight();
00276 }
00277 
00278 inline CSnpBitfield::EFunctionClass    CSnpBitfield::GetFunctionClass() const {
00279     return m_bitfield->GetFunctionClass();
00280 }
00281 
00282 inline CSnpBitfield::EVariationClass   CSnpBitfield::GetVariationClass() const {
00283     return m_bitfield->GetVariationClass();
00284 }
00285 
00286 inline const char * CSnpBitfield::GetString() const {
00287     return m_bitfield->GetString();
00288 }
00289 
00290 inline int  CSnpBitfield::GetVersion() const {
00291     return m_bitfield->GetVersion();
00292 }
00293 
00294 END_NCBI_SCOPE
00295 
00296 #endif // GUI_OBJUTILS___SNP_BITFIELD__HPP
00297 
00298 
00299 

Generated on Sun Mar 15 19:07:51 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Mon Mar 16 12:49:52 2009 by modify_doxy.py rev. 117643