############# ## Sheet 2 ## ############# #### question 7 x <- scan("http://www.stats.ox.ac.uk/~laws/partA-stats/data/quakes.txt") n <- length(x) k <- 1:n plot(-log(1 - k/(n+1)), sort(x), main = "Exponential Q-Q Plot", ylab = "Ordered data", xlab = "-log[1 - k/(n+1)]") abline(0, mean(x)) # abline above plots a line with intercept = 0 and gradient = mean(x) # - from lecture notes the exponential Q-Q plot should have intercept 0 # and gradient mu if the data are exponential with mean mu # use ?abline to see the help page for abline a <- qgamma(0.025, n) b <- qgamma(0.975, n) # interval using the gamma distribution c(a, b) / sum(x) xbar <- mean(x) # approx interval using lambda.hat +/- 1.96*I(lambda.hat)^{-1/2} c(1 - 1.96/sqrt(n), 1 + 1.96/sqrt(n)) / xbar # second approx interval from substituting I(lambda) = n/lambda^2 # and then solving the inequalities # i.e. not replacing lambda by lambda.hat in order to estimate a variance c(1/(1 + 1.96/sqrt(n)), 1/(1 - 1.96/sqrt(n))) / xbar #### question 8 # above we have three slightly different intervals # interval1 from gamma, interval2 from first approx method # and interval3 from second approx # n = 62 for the above data and the large sample properties are evident, # the three intervals are almost the same # now investigate how the three intervals perform in small samples, # e.g. n = 10, using data generated from an exponential, parameter 1 # generate the sample, calculate and plot the three intervals # repeat m times, e.g. m = 33 giving 99 intervals in total # cut-and-paste the following chunk into R, you don't need to work out # the details of what all the plotting commands are doing # ---begin chunk--- n <- 10 a <- qgamma(0.025, n) b <- qgamma(0.975, n) m <- 33 plot(1, 1, type = "n", yaxt = "n", xlim = c(0, 5), ylim = c(0, 4*m), xlab = "lambda", ylab = "", main = paste("95% CIs: samples of size", n, "from exponential, parameter 1")) abline(v = 1) legend("topright", c("interval1", "interval2", "interval3"), lty = 1, lwd = 2, col = c(1, "orange2", "steelblue2")) for (i in 1:m) { x <- rexp(n) ci1 <- c(a, b) / sum(x) ci2 <- c(1 - 1.96/sqrt(n), 1 + 1.96/sqrt(n)) / mean(x) ci3 <- c(1/(1 + 1.96/sqrt(n)), 1/(1 - 1.96/sqrt(n))) / mean(x) lines(ci1, rep(4*i-1, 2), lwd = 2) lines(ci2, rep(4*i-2, 2), lwd = 2, col = "orange2") lines(ci3, rep(4*i-3, 2), lwd = 2, col = "steelblue2") } # ---end chunk--- # x <- rexp(n) generates a sample of size n # use ?rexp to see the help page for rexp - when no rate parameter is # given, rate = 1 is the default, hence vertical line on the plot at # the true value lambda = 1 # the three intervals behave differently in small samples # try repeating with larger n, e.g. n = 20, 50 # - you only need to change the first line n <- 10 to a different value # at n = 50 the three intervals are close, especially intervals 1 & 2 # (and n = 62 for the data in question 3) #### question 9 x <- c(10, 11, 12, 13, 15, 16, 17, 18, 19, 25) # test statistic tobs <- sqrt(10)*(mean(x) - 13.1)/sd(x) # two-sided p-value 2*(1 - pt(tobs, df = 9)) # one-sided p-value 1 - pt(tobs, df = 9) # can check using t.test # see ?t.test, by default it assumes two-sided, and also uses a method for # unequal variances hence we want var.equal = TRUE t.test(x, mu = 13.1, var.equal = TRUE) # one-sided t.test(x, mu = 13.1, alternative = "greater", var.equal = TRUE) # or could compare tobs to the quantiles qt(0.975, df = 9) qt(0.95, df = 9)