## BOOTSTRAP LECTURE

#This methodology calculates 


# I will exemplify with some grades data
grade.data=read.csv("bootstrapexample.csv",header=TRUE)
grade.data
## Descrption of data 
## These are 78 gymnasium students (some have dropped out of the study) (seven grade)
#GPA - not specified when
# IQ - scores for a standard IQ test
# concept - scores for a self concept score administered by the psychologist doing the study.
## gender - 1=? 2= the other.

#I am going to look at the GPA variable only. In fact I will create a new vector gpa:
gpa=grade.data$gpa

hist(gpa)
# Notice skewed distribution. You remember that mean is not a good measure when the distribution is skewed. However Median is.
median(gpa)
#However we do not know anything about a confidence interval for this. 
#Suppose we would like to construct one without assuming anything about the distribution of the variable gpa.

#CREATE one BOOTSTRAP SAMPLES:

sample(gpa, replace=T)

#apply the function to the new sample:
median(sample(gpa, replace=T))

#so now generate many bootstrap samples and calculate the estimate for each:

boot.median.estimate=NULL

for(i in  1:10000){boot.median.estimate=c(boot.median.estimate,median(sample(gpa, replace=T)))}

hist(boot.median.estimate)

#Efron: The new bootstrap estimator:
mean(boot.median.estimate)

#confidence interval:
quantile(boot.median.estimate,0.025)
quantile(boot.median.estimate,0.975)



#Hall method: the better estimate is 2*original.estimate - mean(bootstrap.vector)
#The Confidence Interval then is also 2*original.estimate - 97.5quantile to 2*original.estimate - 2.5quantile
# That is:

2*median(gpa)- mean(boot.median.estimate)

#And the limits:

2*median(gpa)- quantile(boot.median.estimate,0.975)
2*median(gpa) -quantile(boot.median.estimate,0.025)

## THE ABOVE WORKS FOR ANY ESIMATION METHOD