# main.R Contains R Functions described in ORNL/TM-2004/146 # # Statistical Analyis of Data with Non-Detectable Values # E. L. Frome and J. P. Watkins # Revision 6: 28 July 2004 # # mlndln() calculates mle estimates for left censored sample # extol() exact tolerance limit for Logmormal model # nptl() calculate index for Nonparametric tolerance limit # ########################### mlndln ################################ mlndln <- function(dd = ex1 ){ # ML estimates for lognormal sample with non-detects # see ORNL/TM-2004/146 Section 3 # USAGE: mlndln( dd ) # ARGUMENT: matrix dd with d[i] in column 1 and cen[i] in col 2 # d[i] is positive lognormal data cen[i]=0 for non-detect ; 1 for detect # y= log(d) is normal with mean mu and standard deviation sigma # E(dose) = exp( mu + 0.5*sig2)= exp(logE) where sig2 = sigma^2 # m is number of detects and Conver is convergence check # VALUE: mlndln returns estimates of following in 2 by 6 matrix format: # mu sigma logE sig2 -2Log(L) Conver # se.mu se.sigma se.logE se.sig2 cov(mu,sig) m # REFERENCE: Cohen, A.C (1991) Truncated and Censored Samples # Marcel Decker, New York # REQUIREs ndln() ndln2() loglikelihood function for optim() # see R help file for details on optim() and dlnorm() m <- sum(dd[,2]) # number of non-detects # initial estimate of mu and sig (sigma) yt <- ifelse(dd[,2]==0,dd[,1]/2,dd[,1] ) est <- c( mean(log(yt)), sd(log(yt)) ) # ML estimates mu and sig est <- optim(est,ndln, method = c("Nelder-Mead"),xd=dd )$par cont <- list(parscale=abs(est)) opt1 <- optim(est,ndln ,NULL, method ="L-BFGS-B",lower=c(-Inf,0.0), upper=c(Inf,Inf),cont, hessian=T,xd=dd ) conv1 <- opt1$conv # convergenc check from optim() mle <- opt1$par # ML estimate of mu and sig vcm <- solve(opt1$hessian) semle <- sqrt(diag( vcm )) # standard Errors of mu and sig cov <- vcm[1,2] # covariace(mu,sig) needed for Tolerance bound # # est logE(dose) and sig2 (sigmma^2) # # est[1] <- mle[1] - 0.5*mle[2]^2 est[1] <- mle[1] + 0.5*mle[2]^2 est[2] <- mle[2]^2 cont <- list(parscale=abs(est)) opt2 <- optim(est,ndln2 ,NULL, method ="L-BFGS-B",lower=c(-Inf,0.0), upper=c(Inf,Inf),cont, hessian=T,xd=dd ) # next line adds ML estimate of logE sig2 -2Log(L) and Conver # If Conver is not equal to 0 CHECK RESULTs--- see optim() help mle <- c(mle,opt2$par, 2*opt2$value,opt2$conv+conv1 ) semle <- c(semle,sqrt(diag(solve(opt2$hessian))),vcm[1,2],m) names(mle) <- c("mu","sigma","logE","sig2","-2Log(L)","Conver") out<-rbind( mle,semle) out } # ndln<-function(p=est,xd){ # - log liklihood for lognormal sample mu<-p[1];sig<-p[2];x<-xd[,1] xx<-ifelse(xd[,2]==1,dlnorm(x,mu,sig,log=T) , plnorm(x,mu,sig,log.p=T)) -sum(xx) } ndln2<-function(p=est,xd){ mu<-p[1] - 0.5*p[2]; sig<-sqrt(p[2]);x<-xd[,1] xx<-ifelse(xd[,2]==1,dlnorm(x,mu,sig,log=T) , plnorm(x,mu,sig,log.p=T)) -sum(xx) } ############################ extol #################################### extol<- function(n=50,p=0.95,gam=0.95) { # For random sample size n from normal distribution # ybar is sample mean and SD is standard deviation # calculate with confidence level gam that at least # 100p percent of population lies below the # tolerance limit = ybar + k*SD # USAGE: extol(n,p,gam) # ARGUMENTS: n: sample size p: defined above # gam: confidence level for one-sided interval # VALUE: factor k for exact tolerance limit # DETAILS: R function uniroot is used to find quantile # of noncentral t distribution # REFERENCES: # Johnson, N. L. and Welch, B. L. (1940), Applications # of the Non-Central T distribution, Biometrika, 362-389 # see Table 1 in # Odeh, R.E. and Owen, D.B.(1980) Table for Normal Tolerance Limits, # Sampling Plans, and Screening,Marcel Deker, New York # NOTE: second argument to uniroot may not be optimal tx<- function(x,nn=n,th=p,ga=gam) {pt(x,nn-1,(-sqrt(nn)*qnorm(th))) + ga- 1} uout<- uniroot(tx,sqrt(n)*c( -(1/(1- max(p,gam) )),50) ) u.tmp<<-uout k<- -uout$root/sqrt(n) k } ############################ nptl #################################### nptl<- function(n=100,p=0.95,gamma=0.95){ # function nptl(n,p,gam) given n p and gamma 8Oct2002 # For a random sample of size n calculate largest value # of m such that with confidence level gamma # 100p percent of population lies below the # mth largest data value in the sample # USAGE: nptl(n,p,gam) # ARGUMENTS: n: sample size p: defined above # gam: confidence level for one-sided interva # VALUE: m # DETAILS: Requries R function qbeata(p,par1,par2) # REFERENCES: # Sommerville, P.N. (1958) Annals Math Stat pp 599-601 k <- ceiling(n*p) pv <- qbeta(1-gamma,k,n+1-k) while( pv < p && k < n+1){ k <- k + 1 if( k == n + 1) next pv<-qbeta(1-gamma,k,n+1-k) } if( k <= n) m<- n+1-k else m<- NA m } ############################ end main.R ####################################