############# ## Sheet 1 ## ############# #### question 7 x1 <- rnorm(100) qqnorm(x1) x2 <- rexp(100) qqnorm(x2) x3 <- runif(100) qqnorm(x3) x4 <- rt(100, df = 1) qqnorm(x4) #### question 8 # to see all four plots at once, # i.e. to arrange the plots in a 2 x 2 array, # use par(mfrow = c(2, 2)) and then the qqnorm commands par(mfrow = c(2, 2)) # from now on plots will be in a 2 x 2 array x1 <- rnorm(100) qqnorm(x1, main = "Normal Q-Q plot: normal data") x2 <- rexp(100) qqnorm(x2, main = "Normal Q-Q plot: exponential data") x3 <- runif(100) qqnorm(x3, main = "Normal Q-Q plot: uniform data") x4 <- rt(100, df = 1) qqnorm(x4, main = "Normal Q-Q plot: Cauchy data") # to get back to a 1 x 1 array of plots you would use # par(mfrow = c(1, 1)) # try multiple plots to see how much variation there is # from one sample to another # normal data, n = 100, try running this a few times for (i in 1:4) { x <- rnorm(100) qqnorm(x) } # and repeat but with x <- rexp(100) # and with x <- runif(100) # and with x <- rt(100, df = 1) # next, vary the sample size # normal data, n = 10 for (i in 1:4) { x <- rnorm(10) qqnorm(x) } # useful to also try n = 20, 50 # useful to also try exponential data (using rexp), # and uniform data (using runif), # and Cauchy, or t, data (using rt) # e.g. uniform distribution, n = 20 for (i in 1:4) { x <- runif(20) qqnorm(x) } # can also look at t-distributions with different numbers # of degrees of freedom # e.g. t-distribution with 5 dgrees of freedom, n = 10 for (i in 1:4) { x <- rt(10, df = 5) qqnorm(x) } #### question 9 n <- 100 x <- rnorm(n) k <- 1:n plot(-log(1 - k/(n+1)), sort(x), main = "Exponential Q-Q Plot", ylab = "Ordered data", xlab = "-log[1 - k/(n+1)]") # now try replacing x <- rnorm(n) by x <- rexp(n) x <- rexp(n) plot(-log(1 - k/(n+1)), sort(x), main = "Exponential Q-Q Plot", ylab = "Ordered data", xlab = "-log[1 - k/(n+1)]") # are interarrival times exponential? # exponential Q-Q plot with data on insurance claim interarrival times x <- scan("http://www.stats.ox.ac.uk/~laws/partA-stats/data/interarrivals.txt") n <- length(x) k <- 1:n plot(-log(1 - k/(n+1)), sort(x), main = "Exponential Q-Q Plot", ylab = "Ordered data", xlab = "-log[1 - k/(n+1)]") # are claim amounts exponential? # exponential Q-Q plot with data on insurance claim amounts x <- scan("http://www.stats.ox.ac.uk/~laws/partA-stats/data/amounts.txt") n <- length(x) k <- 1:n plot(-log(1 - k/(n+1)), sort(x), main = "Exponential Q-Q Plot", ylab = "Ordered data", xlab = "-log[1 - k/(n+1)]") # are interarrival times Pareto? # Pareto Q-Q plot for interarrival times x <- scan("http://www.stats.ox.ac.uk/~laws/partA-stats/data/interarrivals.txt") n <- length(x) k <- 1:n plot(-log(1 - k/(n+1)), sort(log(x)), main = "Pareto Q-Q Plot: interarrivals", ylab = "log(Ordered data)", xlab = "-log[1 - k/(n+1)]") # are claim amounts Pareto? # Pareto Q-Q plot for claim amounts x <- scan("http://www.stats.ox.ac.uk/~laws/partA-stats/data/amounts.txt") n <- length(x) k <- 1:n plot(-log(1 - k/(n+1)), sort(log(x)), main = "Pareto Q-Q Plot: amounts", ylab = "log(Ordered data)", xlab = "-log[1 - k/(n+1)]")