########################################################## # Multiple regression and canonical correlation ########################################################## # Multiple regression # -------------------------------------------------------- # ::: generalization of a simple linear regression # ::: relationship between a dependent variable, y, # and some explanatory variables, x1, x2, ... # ::: y = b0 + b1x1 + b2x2 + b3x3 ... + e # ::: or y = Xb + e # ::: y = [y1, y2, ..., yn] n = # of observations # ::: X = matrix n*(q+1), q = # of explanatory variables # ::: b = [b0, b1, ..., bq] regression coefficient # ::: e = [e1, 12, ..., en] residual term ########################################################## # read in table data ########################################################## usair <- read.table("/Users/yasu/Desktop/ma/Data/usair.txt", header=TRUE) attach(usair) usair.fit <- lm(SO2~Neg.Temp + Manuf + Pop + Wind + Precip + Days) summary(usair.fit) ########################################################## # F statistic tests null that all b are 0 # t statistics show which ones are the most important predictors # Multiple R^2 shows the % of variation in y accounted for by xs ########################################################## # Canonical correlations # -------------------------------------------------------- # ::: extension of multiple regression - more than one y # ::: relationships between two sets of variables # ::: multiple dependent and independent variables # ::: correlation between linear functions # ::: x = [x1, x2, ..., xq1], y = [y1, y2, ..., yq2] # ::: R11 = cor matrix of variables in x # ::: R22 = cor matrix of variables in y # ::: R12 = cor between x and y (q1 by q2 matrix) # ::: E1 = R11inv R12 R22inv R21 ########################################################## R11*x = R12 R22*x = R12 #Ainv<-solve(A) # # Note that if you want to solve Ax=b you can do solve(A,b), which should be # quicker than solve(A)%*%b # Ainv <- qr.solve(A) ########################################################## # head measurement in millimeters for the first two sons in 25 families ########################################################## headsize<-source("/Users/yasu/Desktop/ma/Data/chap8headsize.dat")$value headsize # # standardize by dividing by sd # headsize.std<-sweep(headsize,2,sqrt(apply(headsize,2,var)),FUN="/") headsize.std # headsize1<-headsize.std[,1:2] headsize2<-headsize.std[,3:4] # headsize1 headsize2 # # find all the matrices necessary for calculation # r11<-cor(headsize1) r22<-cor(headsize2) r12<-c(cor(headsize1[,1],headsize2[,1]),cor(headsize1[,1],headsize2[,2]), cor(headsize1[,2],headsize2[,1]),cor(headsize1[,2],headsize2[,2])) # r12<-matrix(r12,ncol=2,byrow=T) r21<-t(r12) # E1<-solve(r11, r12)%*%solve(r22, r21) # solve(r11)%*%r12%*%solve(r22)%*%r21 E2<-solve(r22, r21)%*%solve(r11, r12) # solve(r22)%*%r21%*%solve(r11)%*%r12 E1 E2 # eigen(E1) eigen(E2) # sqrt(eigen(R1)$values) # # # r22<-matrix(c(1.0,0.044,-0.106,-0.180,0.044,1.0,-0.208,-0.192,-0.106,-0.208,1.0,0.492, -0.180,-0.192,0.492,1.0),ncol=4,byrow=T) r11<-matrix(c(1.0,0.212,0.212,1.0),ncol=2,byrow=2) r12<-matrix(c(0.124,-0.164,-0.101,-0.158,0.098,0.308,-0.270,-0.183),ncol=4,byrow=T) r21<-t(r12) # E1<-solve(r11)%*%r12%*%solve(r22)%*%r21 E2<-solve(r22)%*%r21%*%solve(r11)%*%r12 # E1 E2 # eigen(E1) eigen(E2)