The following R code simulates iter=1000 confidence intervals using the Wald formula.
# First scan in these functions to compute the upper and lower
# confidence limits of an interval
lcl <- function(x, z=qnorm(.975)) { # x will be a 0-1 vector; z is
# critical point depending on the
# confidence level.
n <- length(x)
p.hat <- mean(x)
return(p.hat - z*sqrt(p.hat*(1-p.hat)/n))
}
ucl <- function(x, z=qnorm(.975)) { # x will be a 0-1 vector; z is
# critical point depending on the
# confidence level.
n <- length(x)
p.hat <- mean(x)
return(p.hat + z*sqrt(p.hat*(1-p.hat)/n))
}
p <- .5 # we pretend that unknown value is .5
n <- 50 # the sample size
iter <- 1000 # the simulation will perform 1000 iterations
z <- qnorm(.975) # we simulate 95% confidence intervals
data <- replicate(iter, rbinom(n,1,p)) # each column is a random sample
# Now use the apply function to compute the confidence
# interval for each column, accumulating a collection of
# confidence intervals in the form of a lower confidence limit (lcl)
# and an upper confidence limit (ucl).
lcl.all <- apply(data,2,lcl)
ucl.all <- apply(data,2,ucl)
# Now check each interval for whether it contains the true p;
# we will let 'hit = 1' when the interval contains p
# (a 'successful interval') and 'hit = 0' otherwise.
hit <- lcl.all <= p & p <= ucl.all
sum(hit==1)/iter # this proportion gives the empirical “coverage probability.”
# Compare it to the “nominal” confidence level; e.g., 95%.
# The following code produces a graphic that shows successful
# and unsuccessful confidence intervals among the set of samples.
# The graphic will become unwieldy if iter is too large. We suggest
# using a value 100 or less for the graphic.
# If your iter is greater than 100, the first 100 intervals are shown.
plot(1,1,xlim=c(0,1),ylim=c(0,101),type='n')
for (i in 1:iter) {
# we'll plot CI schematics from bottom up
lines(c(lcl.all[i],ucl.all[i]),c(i,i))
points(x[i],i,pch=16,cex=.5,)
}
abline(v=p,lty=2)