Confidence interval simulation for an unknown binomial proportion

The following R code simulates iter=1000 confidence intervals using the Wald formula.


# First scan in these functions to compute the upper and lower 
# confidence limits of an interval
lcl <- function(x, z=qnorm(.975)) { # x will be a 0-1 vector; z is
                                    # critical point depending on the
                                    # confidence level.
       n <- length(x)
       p.hat <- mean(x)
       return(p.hat - z*sqrt(p.hat*(1-p.hat)/n))
}

ucl <- function(x, z=qnorm(.975)) { # x will be a 0-1 vector; z is
                                    # critical point depending on the
                                    # confidence level.
       n <- length(x)
       p.hat <- mean(x)
       return(p.hat + z*sqrt(p.hat*(1-p.hat)/n))
}

p <- .5  # we pretend that unknown value is .5
n <- 50  # the sample size
iter <- 1000 # the simulation will perform 1000 iterations
z <- qnorm(.975)  # we simulate 95% confidence intervals

data <- replicate(iter, rbinom(n,1,p)) # each column is a random sample

# Now use the  apply  function to compute the confidence 
# interval for each column, accumulating a collection of 
# confidence intervals in the form of a lower confidence limit (lcl) 
# and an upper confidence limit (ucl).

lcl.all <- apply(data,2,lcl)
ucl.all <- apply(data,2,ucl)

# Now check each interval for whether it contains the true p;
# we will let 'hit = 1' when the interval contains p 
# (a 'successful interval') and 'hit = 0' otherwise.

hit <- lcl.all <= p & p <= ucl.all

sum(hit==1)/iter # this proportion gives the empirical “coverage probability.” 
                 # Compare it to the “nominal” confidence level; e.g., 95%.
                 

# The following code produces a graphic that shows successful
# and unsuccessful confidence intervals among the set of samples.
# The graphic will become unwieldy if iter is too large.  We suggest
# using a value 100 or less for the graphic.
# If your iter is greater than 100, the first 100 intervals are shown.

plot(1,1,xlim=c(0,1),ylim=c(0,101),type='n')
for (i in 1:iter) {
     # we'll plot CI schematics from bottom up
     lines(c(lcl.all[i],ucl.all[i]),c(i,i))
     points(x[i],i,pch=16,cex=.5,)
     }
abline(v=p,lty=2)