# Factor analysis examines the interrelationships among a large number of variables to determine underlying dimensions (factors). # data on drug use for 1634 students in the 7th to 9th grades in LA (around 1980) # 5 point scale - never, once, a few times, many times, regularly druguse.cor<-source("/Users/yasu/Desktop/maFall2009/Data/druguse.dat")$value druguse.cor # Assumption: there are some underlying common factors (e.g., intelligence, social class, social/soft drug) # These factors cannot be directly observed - latent variables - concepts that cannot be measured directly # Measurable variables - manifest variables - are expected to be related to the latent variables # Factor analysis looks at the relationships between assumed latent variables and the manifest variables. # The model assumes that the observed relationships between manifest (or directly measured) variables (covariance or correlation) are due to the relationships of these variables to the latent variables (or factors that cannot be measured directly) f1 <- factanal(covmat=druguse.cor,factors=1,method="mle",n.obs=1634) f1 f2 <- factanal(covmat=druguse.cor,factors=2,method="mle",n.obs=1634) f2 # Exploratory factor analysis - no constraints on which of the manifest variables load on the common factors # Confirmatory factor analysis - specific constraints are introduced (i.e., specify models) - Structure equation modeling # In principal components analysis we assume that all variability in a variable should be used in the analysis # In factors analysis we only use the variability in a manifest variable that it has in common with the other manifest variables. # If latent variables or factors underlie relationships between manifest variables, we should not expect that the factors will extract all variance from the manifest variables; rather, only the proportion that is due to the common factors and shared by several manifest variables will be extracted # The proportion of variance of a particular variable that is due to common factors (shared with other variables) is called communality # The communality of a variable is the proportion of a variable's total variance explained by all of factors on which it loads - It is the sum of the squared loadings for each variable on all factors. # Subtracting the communality of a variable from 1 yields that variable's uniqueness # Uniqueness (which is the opposite of communality) is the proportion of variance of the variable that is not accounted for by all of the factors taken together, and a very high uniqueness may indicate that a variable does not belong with any of the factors # In most cases, principal components analysis and factor analysis yield very similar results but principal components analysis is often preferred as a method for data reduction, whereas factor analysis is often preferred when the goal of the analysis is to detect structure # Factor loadings are often rotated in an attempt to make them more interpretable. # The goal of a rotation is to achieve a pattern of loadings that is easy to interpret with a few large and many small coefficients # rotation="varimax" is the default in R # Deciding on number of factors f3 <- factanal(covmat=druguse.cor,factors=3,method="mle",n.obs=1634) f3 f4 <- factanal(covmat=druguse.cor,factors=4,method="mle",n.obs=1634) f4 f5 <- factanal(covmat=druguse.cor,factors=5,method="mle",n.obs=1634) f5 f6 <- factanal(covmat=druguse.cor,factors=6,method="mle",n.obs=1634) f6 #druguse.fa<-lapply(1:6,function(nf)factanal(covmat=druguse.cor,factors=nf,method="mle",n.obs=1634)) # With large n, even small differences between correlation matrix # predicted by a proposed model and the observed correlation matrix # may become significant # You can look at the goodness or lack of fit instead pred<-f6$loadings%*%t(f6$loadings)+diag(f6$uniquenesses) round(druguse.cor-pred,digits=3) mean(round(abs(druguse.cor-pred),digits=3)) pred<-f5$loadings%*%t(f5$loadings)+diag(f5$uniquenesses) round(druguse.cor-pred,digits=3) mean(round(abs(druguse.cor-pred),digits=3)) pred<-f4$loadings%*%t(f4$loadings)+diag(f4$uniquenesses) round(druguse.cor-pred,digits=3) mean(round(abs(druguse.cor-pred),digits=3)) pred<-f3$loadings%*%t(f3$loadings)+diag(f3$uniquenesses) round(druguse.cor-pred,digits=3) mean(round(abs(druguse.cor-pred),digits=3)) pred<-f2$loadings%*%t(f2$loadings)+diag(f2$uniquenesses) round(druguse.cor-pred,digits=3) mean(round(abs(druguse.cor-pred),digits=3)) pred<-f1$loadings%*%t(f1$loadings)+diag(f1$uniquenesses) round(druguse.cor-pred,digits=3) mean(round(abs(druguse.cor-pred),digits=3)) # Life expectancy in the 1960's in years by country, age, and sex life <- read.table("/Users/yasu/Desktop/ma/Data/life.txt", header=TRUE) attach(life) life life.fa1<-factanal(life,factors=1,method="mle") life.fa1 # Do factor analysis # How many factors? # Try to interpret each factor # download sem package (use the package installer in R to download)