# read in data (table format), you need to change the address where the file is located usair <- read.table("/Users/yasu/Desktop/ma/Data/usair.txt", header=TRUE) # one way to obtain mean sum(usair$SO2)/length(usair$SO2) sum(usair$Pop)/length(usair$Pop) # easier way to calculate mean mean(usair) # one way to obtain standard deviation sqrt(sum((usair$SO2 - (sum(usair$SO2)/length(usair$SO2)))^2)/(length(usair$SO2)-1)) # better to break things down xbar <- sum(usair$SO2)/length(usair$SO2) # xbar is the sample mean df <- length(usair$SO2)-1 # df is the degrees of freedom sse <- sum((usair$SO2 - xbar)^2) # sse is the sum squared error (deviation) var <- sse/df # var is variance, the mean squared error (deviation) s <- sqrt(var) # s is the standard deviation # easier way to get standard deviation sd(usair) # correlation in a hard way sum(((usair$SO2-mean(usair$SO2))/sd(usair$SO2))*((usair$Pop-mean(usair$Pop))/sd(usair$Pop)))/(length(usair$Pop) - 1) # breaking down the correlation df <- length(usair$Pop) - 1 # degrees of freedom dx <- (usair$SO2-mean(usair$SO2))/sd(usair$SO2) # deviation scores for x (variable 1) dy <- (usair$Pop-mean(usair$Pop))/sd(usair$Pop) # deviation scores for y (variable 2) r <- sum(dx*dy)/df # or just use the built in function cor(usair$SO2, usair$Pop) # attach the dataframe so we can access the columns without calling the dataframe attach(usair) cor(SO2, Neg.Temp) cor(SO2, Manuf) cor(SO2, Pop) cor(SO2, Wind) cor(SO2, Precip) cor(SO2, Days) # plotting scatterplot is easy plot(SO2, Manuf) plot(SO2, Manuf, type='b')