#! /usr/bin/Rscript library(reshape) library(ggplot2) timings <- data.frame(A=scan('data/C70.time'), B=scan('data/Static1209.time'), C=scan('data/C70Tip1209.time'), D=scan('data/Static.time'), E=scan('data/Stock1209.time'), F=scan('data/StockTip1209.time'), G=scan('data/Stock.time')) timings.flat <- melt(timings, id.vars=c(), variable_name='condition') # compute mean and standard deviation in each condition timings.msd <- cast(timings.flat, condition ~ ., fun.aggregate=function(v) list(mean=mean(v), sd=sd(v))) timings.norm <- data.frame(condition = timings.flat$condition, value = numeric(length(timings.flat$value))) for (c in timings.msd$condition) { M <- timings.msd[timings.msd$condition==c, 'mean'] S <- timings.msd[timings.msd$condition==c, 'sd'] ROWS <- timings.norm$condition == c timings.norm[ROWS, 'value'] <- (timings.flat[ROWS, 'value'] - M) / S } # overlay frame with the normal distribution overlay.norm <- data.frame(x=seq(-3.5,3.5,0.1), y=dnorm(seq(-3.5,3.5,0.1))) # diagnostic plot of the pdf of each condition p <- ggplot(timings.norm, aes(x=value)) + facet_wrap(~ condition, ncol=4) + geom_density(fill='gray', colour=NA) + geom_rug() + layer(data=overlay.norm, mapping=aes(x=x, y=y), geom='line', geom_params=list(colour='red', size=0.5)) + scale_x_continuous(name='Startup time, normalized per condition', limits=c(-9, 9), breaks=seq(-9, 9, 3)) + ylab('Observation density') pdf(file='diagnosis.pdf', width=16, height=8) print(p) dev.off()