## BOOTSTRAP LECTURE #This methodology calculates # I will exemplify with some grades data grade.data=read.csv("bootstrapexample.csv",header=TRUE) grade.data ## Descrption of data ## These are 78 gymnasium students (some have dropped out of the study) (seven grade) #GPA - not specified when # IQ - scores for a standard IQ test # concept - scores for a self concept score administered by the psychologist doing the study. ## gender - 1=? 2= the other. #I am going to look at the GPA variable only. In fact I will create a new vector gpa: gpa=grade.data$gpa hist(gpa) # Notice skewed distribution. You remember that mean is not a good measure when the distribution is skewed. However Median is. median(gpa) #However we do not know anything about a confidence interval for this. #Suppose we would like to construct one without assuming anything about the distribution of the variable gpa. #CREATE one BOOTSTRAP SAMPLES: sample(gpa, replace=T) #apply the function to the new sample: median(sample(gpa, replace=T)) #so now generate many bootstrap samples and calculate the estimate for each: boot.median.estimate=NULL for(i in 1:10000){boot.median.estimate=c(boot.median.estimate,median(sample(gpa, replace=T)))} hist(boot.median.estimate) #Efron: The new bootstrap estimator: mean(boot.median.estimate) #confidence interval: quantile(boot.median.estimate,0.025) quantile(boot.median.estimate,0.975) #Hall method: the better estimate is 2*original.estimate - mean(bootstrap.vector) #The Confidence Interval then is also 2*original.estimate - 97.5quantile to 2*original.estimate - 2.5quantile # That is: 2*median(gpa)- mean(boot.median.estimate) #And the limits: 2*median(gpa)- quantile(boot.median.estimate,0.975) 2*median(gpa) -quantile(boot.median.estimate,0.025) ## THE ABOVE WORKS FOR ANY ESIMATION METHOD