src/algo/ms/omssa/mspeak.hpp

Go to the documentation of this file.
00001 /* $Id: mspeak.hpp 141656 2008-09-29 13:08:30Z lewisg $
00002  * ===========================================================================
00003  *
00004  *                            PUBLIC DOMAIN NOTICE
00005  *               National Center for Biotechnology Information
00006  *
00007  *  This software/database is a "United States Government Work" under the
00008  *  terms of the United States Copyright Act.  It was written as part of
00009  *  the author's official duties as a United States Government employee and
00010  *  thus cannot be copyrighted.  This software/database is freely available
00011  *  to the public for use. The National Library of Medicine and the U.S.
00012  *  Government have not placed any restriction on its use or reproduction.
00013  *
00014  *  Although all reasonable efforts have been taken to ensure the accuracy
00015  *  and reliability of the software and data, the NLM and the U.S.
00016  *  Government do not and cannot warrant the performance or results that
00017  *  may be obtained by using this software or data. The NLM and the U.S.
00018  *  Government disclaim all warranties, express or implied, including
00019  *  warranties of performance, merchantability or fitness for any particular
00020  *  purpose.
00021  *
00022  *  Please cite the authors in any work or product based on this material.
00023  *
00024  * ===========================================================================
00025  *
00026  * Authors:  Lewis Y. Geer, Douglas J. Slotta
00027  *
00028  * File Description:
00029  *    code to deal with spectra and m/z ladders
00030  *
00031  * ===========================================================================
00032  */
00033 
00034 #ifndef MSPEAK__HPP
00035 #define MSPEAK__HPP
00036 
00037 #include <corelib/ncbimisc.hpp>
00038 #include <objects/omssa/omssa__.hpp>
00039 #include <util/rangemap.hpp>
00040 #include <util/itree.hpp>
00041 
00042 #include <set>
00043 #include <iostream>
00044 #include <vector>
00045 #include <deque>
00046 #include <map>
00047 #include <string.h>
00048 
00049 #include "msms.hpp"
00050 #include "msladder.hpp"
00051 #include "SpectrumSet.hpp"
00052 #include "omssascore.hpp"
00053 
00054 
00055 BEGIN_NCBI_SCOPE
00056 BEGIN_SCOPE(objects)
00057 BEGIN_SCOPE(omssa)
00058 
00059 
00060 class CMSPeak;
00061 
00062 /** enum that describes type of peak list */
00063 enum EMSPeakListTypes {
00064     eMSPeakListOriginal, // original data (MSORIGINAL)
00065     eMSPeakListTop, // top hits (MSTOPHITS)
00066     eMSPeakListCharge1,  // charge +1 (MSCULLED1)
00067     eMSPeakListCharge2,  // 
00068     eMSPeakListCharge3,  // (MSCULLED2)
00069     eMSPeakListCharge4,  // 
00070     eMSPeakListCharge5,  // 
00071     eMSPeakListCharge6,  // 
00072     eMSPeakListCharge7,  // 
00073     eMSPeakListCharge8,  // 
00074     eMSPeakListCharge9,  // 
00075     eMSPeakListCharge10, // 
00076     eMSPeakListCharge11,  // 
00077     eMSPeakListCharge12,  // 
00078     eMSPeakListCharge13,  //
00079     eMSPeakListCharge14,  // 
00080     eMSPeakListCharge15,  // 
00081     eMSPeakListCharge16,  // 
00082     eMSPeakListCharge17,  // 
00083     eMSPeakListCharge18,  // 
00084     eMSPeakListCharge19,  // 
00085     eMSPeakListCharge20,  // 
00086     eMSPeakListCharge21,  //
00087     eMSPeakListCharge22,  // 
00088     eMSPeakListCharge23,  //
00089     eMSPeakListCharge24,  // 
00090     eMSPeakListCharge25,  // 
00091     eMSPeakListCharge26,  // 
00092     eMSPeakListCharge27,  // 
00093     eMSPeakListCharge28,  // 
00094     eMSPeakListCharge29,  // 
00095     eMSPeakListCharge30,  // 
00096     eMSPeakListChargeMax
00097     };
00098 
00099 
00100 /**
00101  *  Class to hold mod information for a hit
00102  */
00103 class  CMSModInfo {
00104 public:
00105     const int GetModEnum(void) const;
00106     int& SetModEnum(void);
00107 
00108     const int GetSite(void) const;
00109     int& SetSite(void);
00110 
00111     const int GetIsFixed(void) const;
00112     int& SetIsFixed(void);
00113 
00114 private:
00115     // the mod type
00116     int ModEnum;
00117     // the position in the peptide
00118     int Site;
00119     // is it fixed
00120     int IsFixed;
00121 };
00122 
00123 
00124 ///////////////////  CMSModInfo inline methods
00125 
00126 inline
00127 const int CMSModInfo::GetModEnum(void) const
00128 {
00129     return ModEnum;
00130 }
00131 
00132 inline
00133 int& CMSModInfo::SetModEnum(void)
00134 {
00135     return ModEnum;
00136 }
00137 
00138 inline
00139 const int CMSModInfo::GetSite(void) const
00140 {
00141     return Site;
00142 }
00143 
00144 inline
00145 int& CMSModInfo::SetSite(void)
00146 {
00147     return Site;
00148 }
00149 
00150 
00151 inline
00152 const int CMSModInfo::GetIsFixed(void) const
00153 {
00154     return IsFixed;
00155 }
00156 
00157 inline
00158 int& CMSModInfo::SetIsFixed(void)
00159 {
00160     return IsFixed;
00161 }
00162 
00163 
00164 ///////////////////  end CMSModInfo inline methods
00165 
00166 
00167 
00168 /** typedef for holding hit information */
00169 typedef AutoPtr <CMSModInfo, ArrayDeleter<CMSModInfo> > TModInfo;
00170 
00171 
00172 /**
00173  *  class to contain preliminary hits.  memory footprint must be kept small.
00174  */
00175 class  CMSHit: public CMSSpectrumMatch {
00176 public:
00177     // tor's
00178     CMSHit(void);
00179     CMSHit(int StartIn, int StopIn, int IndexIn);
00180     CMSHit(int StartIn, int StopIn, int IndexIn, int MassIn, int HitsIn,
00181        int ChargeIn);
00182 
00183     /** get sequence start */
00184     const int GetStart(void) const;
00185 
00186     /** set sequence start */
00187     int& SetStart();
00188 
00189     /** get sequence stop */
00190     const int GetStop(void) const;
00191 
00192     /** set sequence stop */
00193     int& SetStop(void);
00194 
00195     /** get blast oid */
00196     const int GetSeqIndex(void) const;
00197 
00198     /** set blast oid */
00199     int& SetSeqIndex(void);
00200 
00201     /**
00202      * get modification info
00203      * 
00204      * @param n array index
00205      */
00206     const CMSModInfo& GetModInfo(int n) const;
00207 
00208     /**
00209      * set modification info
00210      * 
00211      * @param n array index
00212      */
00213     CMSModInfo& SetModInfo(int n);
00214 
00215     /**
00216      * get size of modification info array
00217      */
00218     const int GetNumModInfo(void) const;
00219 
00220     /**
00221      *  return number of hits above threshold
00222      */
00223     int CountHits(double Threshold, int MaxI);
00224 
00225 
00226     /**
00227      * count hits into two categories: independent hits and hits that are dependent on others
00228      * 
00229      * @param Independent count of independent hits
00230      * @param Dependent count of dependent hits
00231      */
00232     void CountHitsByType(int& Independent,
00233                          int& Dependent,
00234                          double Threshold, 
00235                          int MaxI) const;
00236 
00237     // for poisson test
00238     /**
00239      *  return number of hits above threshold scaled by m/z positions
00240      */
00241     int CountHits(double Threshold, int MaxI, int High);
00242 
00243     /**     
00244      * Make a record of the hits to the mass ladders
00245      * 
00246      * @param LadderContainer holds the theoretical ladders
00247      * @param iMods index into LadderContainer
00248      * @param Peaks the experimental spectrum
00249      * @param ModMask the bit array of modifications
00250      * @param ModList modification information
00251      * @param NumMod  number of modifications
00252      * @param PepStart starting position of peptide
00253      * @param Searchctermproduct search the c terminal ions
00254      * @param Searchb1 search the first forward ion?
00255      * @param TheoreticalMassIn the mass of the theoretical peptide
00256      */
00257     void RecordMatches(CLadderContainer& LadderContainer,
00258                        int iMod,
00259                        CMSPeak *Peaks,
00260                        unsigned ModMask,
00261                        CMod ModList[],
00262                        int NumMod,
00263                        const char *PepStart,
00264                        int Searchctermproduct,
00265                        int Searchb1,
00266                        int TheoreticalMassIn
00267                         );
00268 
00269 
00270     ///
00271     ///  Count Modifications in Mask
00272     ///
00273     
00274     int CountMods(unsigned ModMask, int NumMod);
00275 
00276 
00277     /**
00278      * Record the modifications used in the hit
00279      * Note that fixed aa modifications are *not* recorded
00280      * as these are dealt with by modifying the aa mass
00281      * and the positions are not recorded anywhere
00282      */
00283     void RecordModInfo(unsigned ModMask,
00284                        CMod ModList[],
00285                        int NumMod,
00286                        const char *PepStart
00287                        );
00288 
00289 
00290     /**
00291      * assignment operator
00292      * does a copy 
00293      */
00294 
00295     CMSHit& operator= (CMSHit& in);
00296 
00297 protected:
00298 
00299     /**
00300      *  helper function for RecordHits that scans thru a single ladder
00301      * 
00302      * @param Ladder the ladder to record
00303      * @param iHitInfo the index of the hit
00304      * @param Peaks the spectrum that is hit
00305      * @param Which which noise reduced spectrum to examine
00306      * @param NOffset the numbering offset for the ladder at n terminus
00307      * @param COffset the numbering offset for the ladder at c terminus
00308      */
00309     void RecordMatchesScan(CLadder& Ladder,
00310                            int& iHitInfo,
00311                            CMSPeak *Peaks,
00312                            EMSPeakListTypes Which,
00313                            int NOffset,
00314                            int COffset);
00315 
00316 private:
00317 
00318     // disallow copy
00319     CMSHit(const CMSHit& in) {}
00320 
00321     /**
00322      * start and stop positions, inclusive, on sequence
00323      */
00324     int Start, Stop;
00325 
00326     /**
00327      * blast ordinal
00328      */
00329     int Index;
00330 
00331     /** modification information array */
00332     TModInfo ModInfo;
00333 
00334     /** size of ModInfo */
00335     int NumModInfo;
00336 };
00337 
00338 
00339 /////////////////// CMSHit inline methods
00340 
00341 inline 
00342 CMSHit::CMSHit(void)
00343 {
00344     SetHits() = 0;
00345 }
00346 
00347 inline 
00348 CMSHit::CMSHit(int StartIn, int StopIn, int IndexIn):
00349     Start(StartIn), Stop(StopIn), Index(IndexIn)
00350 {
00351     SetHits() = 0;
00352 }
00353 
00354 inline 
00355 CMSHit::CMSHit(int StartIn, int StopIn, int IndexIn, int MassIn, int HitsIn,
00356               int ChargeIn):
00357     Start(StartIn), Stop(StopIn), Index(IndexIn)
00358 {
00359     SetHits() = HitsIn;
00360     SetExpMass() = MassIn;
00361     SetCharge() = ChargeIn;
00362 }
00363 
00364 inline 
00365 const int CMSHit::GetStart(void) const
00366 { 
00367     return Start;
00368 }
00369 
00370 inline 
00371 int& CMSHit::SetStart(void) 
00372 { 
00373     return Start;
00374 }
00375 
00376 inline 
00377 const int CMSHit::GetStop(void) const
00378 { 
00379     return Stop; 
00380 }
00381 
00382 inline 
00383 int& CMSHit::SetStop(void) 
00384 { 
00385     return Stop; 
00386 }
00387 
00388 inline 
00389 const int CMSHit::GetSeqIndex(void) const
00390 { 
00391     return Index; 
00392 }
00393 
00394 inline 
00395 int& CMSHit::SetSeqIndex(void) 
00396 { 
00397     return Index; 
00398 }
00399 
00400 inline 
00401 CMSModInfo& CMSHit::SetModInfo(int n)
00402 {
00403     return *(ModInfo.get() + n);
00404 }
00405 
00406 inline 
00407 const CMSModInfo& CMSHit::GetModInfo(int n) const
00408 {
00409     return *(ModInfo.get() + n);
00410 }
00411 
00412 inline 
00413 const int CMSHit::GetNumModInfo(void) const
00414 {
00415     return NumModInfo;
00416 }
00417 
00418 inline 
00419 CMSHit& CMSHit::operator= (CMSHit& in) 
00420 { 
00421     // handle self assignment
00422     if(this == &in) return *this;
00423 
00424     CMSSpectrumMatch::operator= (in);
00425     Start = in.Start; 
00426     Stop = in.Stop;
00427     Index = in.Index; 
00428     NumModInfo = in.NumModInfo;
00429     int i;
00430     ModInfo.reset();
00431     if(in.ModInfo) {
00432         ModInfo.reset(new CMSModInfo[NumModInfo]);
00433         for(i = 0; i < NumModInfo; i++) 
00434             SetModInfo(i) = in.SetModInfo(i);
00435     }
00436     return *this;
00437 }
00438 
00439 /////////////////// end of CMSHit inline methods
00440 
00441 
00442 
00443 /////////////////////////////////////////////////////////////////////////////
00444 //
00445 //  CMZI::
00446 //
00447 //  Used by CMSPeak class to spectral data
00448 //
00449 
00450 /**
00451  *  a class for holding an m/z value, intensity, and rank
00452  */
00453 class  CMZI: public CMSBasicPeak {
00454 public:
00455     CMZI(void);
00456     CMZI(int MZIn, unsigned IntensityIn);
00457     CMZI(double MZIn, double IntensityIn);
00458 
00459     /** get the peak rank */
00460     const TMSRank GetRank(void) const;
00461 
00462     /** set the peak rank */
00463     TMSRank& SetRank(void);
00464 
00465 private:
00466     /** The intensity rank of the peak. 1 = most intense */
00467     TMSRank Rank;
00468 };
00469 
00470 ///////////////////  CMZI inline methods
00471 
00472 inline 
00473 CMZI::CMZI(void) 
00474 {}
00475 
00476 inline 
00477 CMZI::CMZI(int MZIn, unsigned IntensityIn)
00478 {
00479     SetMZ() = MZIn;
00480     SetIntensity() = IntensityIn;
00481 }
00482 
00483 inline 
00484 CMZI::CMZI(double MZIn, double IntensityIn)
00485 {
00486     SetMZ() = MSSCALE2INT(MZIn);
00487     SetIntensity() = static_cast <unsigned> (IntensityIn);
00488 }
00489 
00490 inline
00491 const TMSRank CMZI::GetRank(void) const
00492 {
00493     return Rank;
00494 }
00495 
00496 inline
00497 TMSRank& CMZI::SetRank(void)
00498 {
00499     return Rank;
00500 }
00501 
00502 /////////////////// end of CMZI inline methods
00503 
00504 
00505 /////////////////////////////////////////////////////////////////////////////
00506 //
00507 //  CMSPeak::
00508 //
00509 //  Class used to hold spectra and convert to mass ladders
00510 //
00511 
00512 
00513 // for containing hits in mspeak class
00514 // first index is charge
00515 typedef CMSHit * TMSHitList;
00516 
00517 // min number of peaks to be considered a hit
00518 #define MSHITMIN 2
00519 
00520 // min number of peaks to consider a spectra
00521 // two is absolute minimum in order to get m/z range
00522 #define MSPEAKMIN 5
00523 
00524 // size of histogram bin in Daltons
00525 #define MSBIN 100
00526 
00527 // the maximum charge state that can be considered
00528 //#define MSMAXCHARGE 10
00529 
00530 /** 
00531  * function object for cull iterate 
00532  */
00533 typedef bool (*TMZIbool) (const CMZI&, const CMZI&, int tol);
00534 
00535 enum EChargeState {
00536     eChargeUnknown, // charge has not been computed
00537     eCharge1,
00538     eChargeNot1,  // charge is not +1, but one of +2, +3 ...
00539     eCharge2,
00540     eCharge3,
00541     eCharge4,
00542     eCharge5 };
00543 
00544 // for statistical modelling
00545 // #define MSSTATRUN
00546 
00547 
00548 /**
00549  * enumeration of peak sort order
00550  */
00551 
00552 enum EMSPeakListSort {
00553     eMSPeakListSortNone,
00554     eMSPeakListSortMZ,
00555     eMSPeakListSortIntensity
00556 };
00557 
00558 /**
00559  * class for holding a set of peaks
00560  */
00561 
00562 
00563 class  CMSPeakList : public CObject {
00564 public:
00565     CMSPeakList(void);
00566 
00567     CMZI * const GetMZI(void) const;
00568     void SetMZI(CMZI *In);
00569 
00570     const int GetNum(void) const;
00571     int& SetNum(void);
00572 
00573     const EMSPeakListSort GetSorted(void) const;
00574     EMSPeakListSort& SetSorted(void);
00575 
00576     /** 
00577      * fill out the arrays and set defaults
00578      * 
00579      * @param Size size of the arrays
00580      */
00581     void CreateLists(int Size);
00582 
00583     /**
00584      * sort the peak by sort type
00585      * @param SortType which sort to perform
00586      */
00587     void Sort(EMSPeakListSort SortType);
00588 
00589     /**
00590      * Rank the given spectrum by intensity.
00591      * assumes the spectrum is sorted by intensity.
00592      * highest intensity is given rank 1.
00593      */
00594     void Rank(void);
00595 
00596 private:
00597     /**  m/z values and intensities */
00598     AutoPtr <CMZI, ArrayDeleter<CMZI> > MZI; 
00599 
00600     /** number of CMZI */
00601     int Num;
00602     /** have the CMZI been sorted? */
00603     EMSPeakListSort Sorted;
00604 };
00605 
00606 inline
00607 CMZI * const CMSPeakList::GetMZI(void) const
00608 {    
00609     return MZI.get();
00610 }
00611 
00612 inline
00613 void CMSPeakList::SetMZI(CMZI *In)
00614 {
00615     MZI.reset(In);
00616 }
00617 
00618 inline
00619 const int CMSPeakList::GetNum(void) const
00620 {
00621     return Num;
00622 }
00623 
00624 inline
00625 int& CMSPeakList::SetNum(void)
00626 {
00627     return Num;
00628 }
00629 
00630 inline
00631 const EMSPeakListSort CMSPeakList::GetSorted(void) const
00632 {
00633     return Sorted;
00634 }
00635 
00636 inline
00637 EMSPeakListSort& CMSPeakList::SetSorted(void)
00638 {
00639     return Sorted;
00640 }
00641 
00642 
00643 /**
00644  * class to hold spectral data
00645  * for filtering and statistical characterization
00646  */
00647 
00648 class  CMSPeak {
00649 public:
00650 
00651     /**
00652      * CMSPeak ctor
00653      */
00654     CMSPeak(void);
00655 
00656     /**
00657      * CMSPeak ctor
00658      * 
00659      * @param HitListSize size of the hit list allowed
00660      */
00661     CMSPeak(int HitListSize);
00662 
00663 
00664 private:
00665 
00666     /**
00667      *  shared c'tor code
00668      */
00669     void xCMSPeak(void);
00670 
00671     /** 
00672      * writes out dta format
00673      * 
00674      * @param FileOut output for dta file
00675      * @param Temp list of intensities and m/z
00676      * @param Num number of peaks
00677      */
00678     void xWrite(std::ostream& FileOut, const CMZI * const Temp, const int Num) const;
00679 
00680 public:
00681 
00682     ~CMSPeak(void);
00683 
00684     /**
00685      * Compare the ladder and peaks and return back rank statistics
00686      * @param Ladder the ladder to compare
00687      * @param Which which exp spectrum to use
00688      *
00689      */
00690     int CompareSortedRank(CLadder& Ladder,
00691                           EMSPeakListTypes Which,
00692                           vector<bool>& usedPeaks);
00693 
00694     /**
00695      * Read a spectrum set into a CMSPeak
00696      * 
00697      * @param Spectrum the spectrum itself
00698      * @param Settings search settings, e.g. experimental tolerances
00699      */
00700     int Read(const CMSSpectrum& Spectrum,
00701              const CMSSearchSettings& Settings);
00702 
00703     /**
00704      * Read and process a spectrum set into a CMSPeak
00705      * 
00706      * @param Spectrum the spectrum itself
00707      * @param Settings search settings, e.g. experimental tolerances
00708      */
00709     void ReadAndProcess(const CMSSpectrum& Spectrum,
00710                        const CMSSearchSettings& Settings);
00711 
00712     /**
00713      *  Write out a CMSPeak in dta format (useful for debugging)
00714      * 
00715      * @param FileOut the file to write out to
00716      * @param FileType file format to use
00717      * @param Which which MZI set to use
00718      */
00719     void Write(std::ostream& FileOut, const EMSSpectrumFileType FileType,
00720                const EMSPeakListTypes Which) const;
00721 
00722     // functions used in SmartCull
00723     
00724     /**
00725      * iterate thru peaks, deleting ones that pass the test
00726      * 
00727      * @param Temp MZI values to use
00728      * @param TempLen length of Temp
00729      * @param FCN function to use to do the test
00730      */
00731     void CullIterate(CMZI *Temp, int& TempLen, const TMZIbool FCN);
00732 
00733     /**
00734      *  cull precursors
00735      * 
00736      * @param Temp MZI values to use
00737      * @param TempLen length of Temp
00738      * @param scaled precursor value
00739      * @param Charge precursor charge
00740      * @param PrecursorCull charge reduced culling
00741      */
00742     void CullPrecursor(CMZI *Temp,
00743                        int& TempLen,
00744                        const int Precursor,
00745                        const int Charge,
00746                        bool PrecursorCull);
00747 
00748     /**
00749      *  take out peaks below a threshold
00750      * 
00751      * @param Temp MZI values to use
00752      * @param TempLen length of Temp
00753      * @param Threshold fraction of highest intensity used to cull
00754      */
00755     void CullBaseLine(const double Threshold, CMZI *Temp, int& TempLen);
00756 
00757     /**
00758      *  cull isotopes using the Markey Method
00759      * 
00760      * @param Temp MZI values to use
00761      * @param TempLen length of Temp
00762      */
00763     void CullIsotope(CMZI *Temp, int& TempLen);
00764 
00765     /**
00766      * cull peaks that are water or ammonia loss
00767      * note that this only culls the water or ammonia loss if these peaks have a lesser
00768      * less intensity
00769      * 
00770      * @param Temp MZI values to use
00771      * @param TempLen length of Temp
00772      */
00773     void CullH20NH3(CMZI *Temp, int& TempLen);
00774 
00775     /**
00776      * recursively culls the peaks
00777      * 
00778      * @param ConsiderMultProduct assume multiply charged products?
00779      * @param Temp MZI values to use
00780      * @param TempLen length of Temp
00781      * @param Settings search settings, e.g. experimental tolerances
00782      */
00783     void SmartCull(const CMSSearchSettings& Settings,
00784                    CMZI *Temp,
00785                    int& TempLen,
00786                    const bool ConsiderMultProduct
00787                    );
00788 
00789     /**
00790      *  use smartcull on all charge states
00791      * 
00792      * @param Settings search settings, e.g. experimental tolerances
00793      */
00794     void CullAll(const CMSSearchSettings& Settings);
00795 
00796     /**
00797      * Performs culling based on whether to consider multiply charged ions or not
00798      * 
00799      * @param Settings search settings, e.g. experimental tolerances
00800      */
00801     void CullChargeAndWhich(const CMSSearchSettings& Settings
00802                             );
00803 
00804     /**
00805      * return the lowest culled peak and the highest culled peak less than the
00806      * precursor mass passed in
00807      * 
00808      * @param NumLo number of peak below mh/2
00809      * @param NumHi number of peaks above mh/2 and below mh
00810      */
00811     void HighLow(int& High,
00812                  int& Low,
00813                  int& NumPeaks, 
00814                  const int PrecursorMass,
00815                  const int Charge,
00816                  const double Threshold,
00817                  int& NumLo,
00818                  int& NumHi
00819                  );
00820 
00821     /**
00822      * count number of AA intervals in spectrum.
00823      */
00824     const int CountAAIntervals(const CMassArray& MassArray,
00825                                const bool Nodup/*=true*/,
00826                                const EMSPeakListTypes Which /*= MSCULLED1*/) const;
00827     
00828     /**
00829      *  counts the number of peaks above % of maximum peak
00830      */
00831     const int AboveThresh(const double Threshold,
00832                           const EMSPeakListTypes Which) const;
00833     
00834     /**
00835      *  the number of peaks at and below the precursor ion
00836      */
00837     const int PercentBelow(void) const;
00838 
00839     /**
00840      * return the number of peaks in a range. range is in fractions of MH
00841      */
00842     const int CountRange(const double StartFraction,
00843                          const double StopFraction) const;
00844 
00845     /**
00846      * return the number of peaks in a range
00847      * 
00848      * @param Start inclusive start of range in integer m/z
00849      * @param Stop exclusive stop of range in integer m/z
00850      * @param MaxIntensity the minimum intensity that a peak can have for counting
00851      * @param Which which noise filtered spectrum to use
00852      */
00853     const int CountMZRange(const int StartIn,
00854                            const int StopIn,
00855                            const double MinIntensity,
00856                            const int Which) const;
00857         
00858 
00859     /**
00860      *  takes the ratio, low/high, of two ranges in the spectrum
00861      */
00862     const double RangeRatio(const double Start,
00863                             const double Middle, 
00864                             const double Stop) const;
00865 
00866 
00867     // various charge functions
00868 
00869     
00870     void SetPlusOne(const double PlusIn);
00871 
00872     /**
00873      *  is the data charge +1?
00874      */
00875     const bool IsPlus1(const double PercentBelowIn) const;
00876 
00877     /**
00878      *  calculates charge based on threshold and sets charge value 
00879      * 
00880      * @param ChargeHandle contains info on how to deal with charge
00881      * @param Spectrum the spectrum
00882      */
00883     void SetComputedCharge(const CMSChargeHandle& ChargeHandle, 
00884                            const CMSSpectrum& Spectrum);
00885 
00886     /**
00887      * return the computed charge state
00888      */
00889     const EChargeState GetComputedCharge(void) const;
00890 
00891     /**
00892      *  return allowed computed charges
00893      */
00894     int * GetCharges(void);
00895 
00896     /**
00897      *  return number of allowed computed charges
00898      */
00899     const int GetNumCharges(void) const;
00900 
00901     /**
00902      * compare peaks to ladder using ContainsFast
00903      *
00904      * @param Ladder the ladder to compare
00905      * @param Which which experimental spectrum to use
00906      */
00907     const int Compare(CLadder& Ladder, 
00908                       const EMSPeakListTypes Which) const;
00909 
00910     /**
00911      * see if value is contained in peaks
00912      * 
00913      * @param value the m/z to compare
00914      * @param Which which experimental spectrum to use
00915      */
00916     const bool Contains(const int value,
00917                         const EMSPeakListTypes Which) const;
00918 
00919 
00920     /**
00921      * see if value is contained in peaks using binary search
00922      * 
00923      * @param value the m/z value to compare
00924      * @param Which which experimental spectrum to use
00925      */
00926     const bool ContainsFast(const int value,
00927                             const EMSPeakListTypes Which) const;
00928 
00929     /**
00930      * compares only the top hits
00931      * 
00932      * @param Ladder ladder to compare to
00933      */
00934     const bool CompareTop(CLadder& Ladder);
00935 
00936     /**
00937      * Get Maximum intensity
00938      * 
00939      * @param Which which experimental spectrum to use
00940      */
00941     const int GetMaxI(const EMSPeakListTypes Which) const;
00942 
00943     /**
00944      * returns the cull array index
00945      * 
00946      * @param Which which experimental spectrum to use
00947      */
00948     const EMSPeakListTypes GetWhich(const int Charge) const;
00949     
00950     /**
00951      * initializes arrays used to track hits
00952      * 
00953      * @param Minhitin minimal number of hits for a match
00954      */
00955     void InitHitList(const int Minhitin
00956               );
00957 
00958     /**
00959      * Get a hit list
00960      * 
00961      * @param Index which hit list
00962      */
00963     TMSHitList& GetHitList(const int Index);
00964 
00965     /**
00966      * Get minimum hit
00967      * 
00968      */
00969     const int GetMinhit() const;
00970 
00971     /**
00972      * Set minimum hit
00973      *
00974      */
00975     int & SetMinhit(void);
00976 
00977     /**
00978      * Get size of hit list
00979      *
00980      * @param Index which hit list
00981      */
00982     const int GetHitListIndex(const int Index) const;
00983 
00984 
00985     /**
00986      * add hit to hitlist.  returns true and the added hit if successful
00987      * 
00988      * @param in Hit to add
00989      * @param out the added hit
00990      */
00991     const bool AddHit(CMSHit& in, CMSHit*& out);
00992 
00993 
00994     /**
00995      * return number of peptides examine for each charge state
00996      * 
00997      * @param ChargeIn charge state
00998      */
00999     const int GetPeptidesExamined(const int ChargeIn) const;
01000 
01001     /**
01002      * set the number of peptides examine for each charge state
01003      * 
01004      * @param ChargeIn charge state
01005      */
01006     int& SetPeptidesExamined(const int ChargeIn);
01007 
01008 
01009     // getter-setters
01010    
01011     /**
01012      *  get precursor m/z
01013      */
01014     const int GetPrecursormz(void) const;
01015 
01016     /**
01017      * calculates neutral mass
01018      * 
01019      * @param PrecusorCharge the charge to assume
01020      */
01021     const int CalcPrecursorMass(const int PrecursorCharge) const;
01022 
01023     /**
01024      * gets min precursor charge to consider multiply charged product ions
01025      */
01026     const int GetConsiderMult(void) const;  
01027 
01028     /**
01029      * return any errors in computing on peaks
01030      */
01031     const EMSHitError GetError(void) const;
01032 
01033     /**
01034      * set any errors in computing on peaks
01035      * 
01036      * @param ErrorIn what was the error?
01037      */
01038     void SetError(const EMSHitError ErrorIn);
01039 
01040     /**
01041      * set the names of the spectrum
01042      */
01043     CMSSpectrum::TIds& SetName(void);
01044 
01045     /**
01046      * get the names of the spectrum
01047      */
01048     const CMSSpectrum::TIds& GetName(void) const;
01049 
01050     /**
01051      * set the spectrum number
01052      */
01053     int& SetNumber(void);
01054 
01055     /**
01056      * get the spectrum number
01057      */
01058     const int GetNumber(void) const;
01059 
01060     /**
01061      * set the product mass tolerance in Daltons.
01062      * 
01063      * @param tolin unscaled mass tolerance
01064      */
01065     void SetTolerance(const double tolin);
01066 
01067     /**
01068      * get the product mass tolerance in Daltons.
01069      */
01070     const int GetTol(void) const;
01071 
01072     /**
01073      * set the precursor mass tolerance in Daltons.
01074      * 
01075      * @param tolin precursor mass tolerance
01076      */
01077     void SetPrecursorTol(double tolin);
01078 
01079     /**
01080      * get the precursor mass tolerance in Daltons.
01081      */
01082     const int GetPrecursorTol(void) const;
01083 
01084     // functions for testing if peaks are h2o or nh3 losses
01085     
01086     /**
01087      * check to see if TestMZ is Diff away from BigMZ
01088      * 
01089      * @param BigMZ the major ion
01090      * @param TestMZ the minor ion
01091      * @param Diff distance between minor and major ions
01092      * @param tolin mass tolerance
01093      */
01094     const bool IsAtMZ(const int BigMZ, 
01095                       const int TestMZ, 
01096                       const int Diff, 
01097                       const int tolin) const;
01098 
01099     /**
01100      * see if TestMZ can be associated with BigMZ, e.g. water loss, etc.
01101      * 
01102      * @param BigMZ the major ion
01103      * @param TestMZ the minor ion
01104      * @param tolin mass tolerance
01105      */
01106     const bool IsMajorPeak(const int BigMZ, 
01107                            const int TestMZ, 
01108                            const int tolin) const;
01109 
01110     /**
01111      * list of peaks, e.g. one for each charge
01112      */
01113     typedef vector < CRef < CMSPeakList > > TPeakLists;
01114 
01115     /**
01116      * get the peak lists
01117      */
01118     const TPeakLists& GetPeakLists(void) const;
01119 
01120     /**
01121      * set the peak lists
01122      */
01123     TPeakLists& SetPeakLists(void);
01124 
01125 private:
01126     /** lists of peaks filtered at different precursor charges, etc. */
01127     TPeakLists PeakLists;
01128     int Precursormz;
01129     int Charges[eMSPeakListChargeMax - eMSPeakListCharge1];  // Computed allowed charges
01130     int NumCharges;  // array size of Charges[]
01131     
01132     //! product error tolerance of peptide
01133     int tol;
01134     //! precursor error tolerance
01135     int PrecursorTol;
01136 
01137     double PlusOne;  // value used to determine if spectra is +1
01138     EChargeState ComputedCharge;  // algorithmically calculated 
01139     int ConsiderMult;  // at what precursor charge should multiply charged products be considered?
01140     int MaxCharge;  // maximum precursor charge to consider
01141     int MinCharge;  // minimum precursor charge to consider
01142     CAA AA;
01143 
01144     CMSSpectrum::TIds Name;  // name taken from spectrum
01145     int Number;  // spectrum number taken from spectrum
01146     int Minhit;  // minimum number of hit peaks to record hit
01147 
01148     // list of hits
01149     TMSHitList HitList[eMSPeakListChargeMax - eMSPeakListCharge1];
01150     int HitListSize;  // max size of hit list
01151     int HitListIndex[eMSPeakListChargeMax - eMSPeakListCharge1];  // current size of HitList
01152     int LastHitNum[eMSPeakListChargeMax - eMSPeakListCharge1];  // the smallest hit currently in List
01153     int PeptidesExamined[eMSPeakListChargeMax - eMSPeakListCharge1];  // the number of peptides examined in search
01154 
01155     EMSHitError Error; // errors that have occurred in processing
01156 
01157 };
01158 
01159 
01160 ///////////////////   CMSPeak inline methods
01161 
01162 inline
01163 const CMSPeak::TPeakLists& CMSPeak::GetPeakLists(void) const
01164 {
01165     return PeakLists;
01166 }
01167 
01168 inline
01169 CMSPeak::TPeakLists& CMSPeak::SetPeakLists(void)
01170 {
01171     return PeakLists;
01172 }
01173 
01174 inline 
01175 void CMSPeak::SetPlusOne(const double PlusIn) 
01176 { 
01177     PlusOne = PlusIn; 
01178 }
01179 
01180 inline 
01181 const EChargeState CMSPeak::GetComputedCharge(void) const
01182 { 
01183     return ComputedCharge; 
01184 }
01185 
01186 inline 
01187 TMSHitList& CMSPeak::GetHitList(const int Index)
01188 { 
01189     return HitList[Index]; 
01190 }
01191 
01192 inline 
01193 const int CMSPeak::GetHitListIndex(const int Index) const
01194 { 
01195     return HitListIndex[Index]; 
01196 }
01197     
01198 inline
01199 const int CMSPeak::GetMinhit() const
01200 {
01201     return Minhit;
01202 }
01203 
01204 inline
01205 int & CMSPeak::SetMinhit(void)
01206 {
01207     return Minhit;
01208 }
01209 
01210 inline 
01211 const int CMSPeak::GetPeptidesExamined(const int ChargeIn) const
01212 { 
01213     return PeptidesExamined[ChargeIn - Charges[0]];
01214 }
01215 
01216 inline 
01217 int& CMSPeak::SetPeptidesExamined(const int ChargeIn) 
01218 { 
01219     return PeptidesExamined[ChargeIn - Charges[0]];
01220 }
01221 
01222 inline 
01223 const int CMSPeak::GetPrecursormz(void) const
01224 {
01225     return Precursormz;
01226 }
01227 
01228 inline 
01229 const int CMSPeak::CalcPrecursorMass(const int PrecursorCharge) const
01230 {
01231     return Precursormz * PrecursorCharge - MSSCALE2INT(PrecursorCharge * kProton);
01232 }
01233 
01234 
01235 inline
01236 const int CMSPeak::GetConsiderMult(void) const  
01237 {
01238     return ConsiderMult;
01239 }
01240 
01241 inline 
01242 const EMSHitError CMSPeak::GetError(void) const
01243 {
01244     return Error; 
01245 }
01246 
01247 inline 
01248 void CMSPeak::SetError(const EMSHitError ErrorIn) 
01249 {
01250     Error = ErrorIn; 
01251 }
01252 
01253 inline 
01254 CMSSpectrum::TIds& CMSPeak::SetName(void) 
01255 { 
01256     return Name; 
01257 }
01258 
01259 inline 
01260 const CMSSpectrum::TIds& CMSPeak::GetName(void) const 
01261 { 
01262     return Name; 
01263 }
01264 
01265 inline 
01266 int& CMSPeak::SetNumber(void) 
01267 { 
01268     return Number; 
01269 }
01270 
01271 inline 
01272 const int CMSPeak::GetNumber(void) const 
01273 { 
01274     return Number; 
01275 }
01276 
01277 inline 
01278 void CMSPeak::SetTolerance(const double tolin)
01279 {
01280     tol = MSSCALE2INT(tolin);
01281 }
01282 
01283 inline 
01284 const int CMSPeak::GetTol(void) const
01285 { 
01286     return tol; 
01287 }
01288 
01289 inline 
01290 void CMSPeak::SetPrecursorTol(double tolin)
01291 {
01292     PrecursorTol = MSSCALE2INT(tolin);
01293 }
01294 
01295 inline 
01296 const int CMSPeak::GetPrecursorTol(void) const
01297 { 
01298     return PrecursorTol; 
01299 }
01300 
01301 // returns the cull array index
01302 inline 
01303 const EMSPeakListTypes CMSPeak::GetWhich(const int Charge) const
01304 {
01305     return static_cast <EMSPeakListTypes> (eMSPeakListCharge1 + Charge - 1);
01306 }
01307 
01308 inline
01309 int * CMSPeak::GetCharges(void)
01310 { 
01311     return Charges;
01312 }
01313 
01314 inline
01315 const int CMSPeak::GetNumCharges(void) const
01316 { 
01317     return NumCharges; 
01318 }
01319 
01320 /////////////////// end of  CMSPeak  inline methods
01321 
01322 
01323 /////////////////////////////////////////////////////////////////////////////
01324 //
01325 //  CMSPeakSet::
01326 //
01327 //  Class used to hold sets of CMSPeak and access them quickly
01328 //
01329 
01330 
01331 typedef deque <CMSPeak *> TPeakSet;
01332 
01333 class  _MassPeak: public CObject {
01334 public:
01335     int Mass, Peptol;
01336     int Charge;
01337     CMSPeak *Peak;
01338 };
01339 
01340 typedef _MassPeak TMassPeak;
01341 
01342 
01343 // range type for peptide mass +/- some tolerance
01344 typedef CRange<TSignedSeqPos> TMassRange;
01345 
01346 class  CMSPeakSet: public CObject {
01347 public:
01348     // tor's
01349     CMSPeakSet(void);
01350     ~CMSPeakSet();
01351 
01352     void AddPeak(CMSPeak *PeakIn);
01353 
01354     /**
01355      *  put the pointers into an array sorted by mass
01356      *
01357      * @param Peptol the precursor mass tolerance
01358      * @param Zdep should the tolerance be charge dependent?
01359      * @return maximum m/z value
01360      */
01361     int SortPeaks(
01362         int Peptol,
01363         int Zdep
01364         );
01365     
01366     TPeakSet& GetPeaks(void);
01367     CIntervalTree& SetIntervalTree(void);
01368 
01369 private:
01370     TPeakSet PeakSet;  // peak list for deletion
01371     CIntervalTree MassIntervals;
01372 };
01373 
01374 ///////////////////   CMSPeakSet inline methods
01375 
01376 inline CMSPeakSet::CMSPeakSet(void)
01377 {}
01378 
01379 inline void CMSPeakSet::AddPeak(CMSPeak *PeakIn)
01380 { 
01381     PeakSet.push_back(PeakIn); 
01382 }
01383 
01384 inline
01385 CIntervalTree& CMSPeakSet::SetIntervalTree(void)
01386 {
01387     return MassIntervals;
01388 }
01389 
01390 inline 
01391 TPeakSet& CMSPeakSet::GetPeaks(void) 
01392 { 
01393     return PeakSet; 
01394 }
01395 
01396 /////////////////// end of CMSPeakSet inline methods
01397 
01398 END_SCOPE(omssa)
01399 END_SCOPE(objects)
01400 END_NCBI_SCOPE
01401 
01402 #endif
01403 
01404 
01405 

Generated on Sun Feb 15 02:00:05 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Sun Feb 15 15:27:13 2009 by modify_doxy.py rev. 117643