# Discriminant analysis is used to classify a case into one of two or more populations. # You need to know which population the individual belongs to for the initial sample. # You classify future individuals whose membership is unknown (prediction). # You identify which variables contribute to making the classification (description). # Mathematically identical to MANOVA depression <- read.table("/Users/yasu/Desktop/maFall2009/Data/depress.txt", header=FALSE) colnames(depression) <- c('id', 'sex', 'age', 'marital', 'education', 'employment', 'income', 'religion', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9', 'c10', 'c11', 'c12', 'c13', 'c14', 'c15', 'c16', 'c17', 'c18', 'c19', 'c20', 'cesd', 'cases', 'drink', 'health', 'regulardoc', 'treat', 'beddays', 'acuteill', 'chronicill') attach(depression) nsex <- tapply(sex[cases==0], id[cases==0], mean) dsex <- tapply(sex[cases==1], id[cases==1], mean) mean(nsex) mean(dsex) table(nsex) table(dsex) chisq.test(rbind(table(nsex),table(dsex))) nage <- tapply(age[cases==0], id[cases==0], mean) dage <- tapply(age[cases==1], id[cases==1], mean) mean(nage) mean(dage) t.test(nage, dage) nmarital <- tapply(marital[cases==0], id[cases==0], mean) dmarital <- tapply(marital[cases==1], id[cases==1], mean) mean(nmarital) mean(dmarital) table(nmarital) table(dmarital) chisq.test(rbind(table(nmarital),table(dmarital))) neducation <- tapply(education[cases==0], id[cases==0], mean) deducation <- tapply(education[cases==1], id[cases==1], mean) mean(neducation) mean(deducation) table(neducation) tabulate(deducation) chisq.test(rbind(table(neducation),tabulate(deducation))) nincome <- tapply(income[cases==0], id[cases==0], mean) dincome <- tapply(income[cases==1], id[cases==1], mean) mean(nincome) mean(dincome) t.test(nincome, dincome) nhealth <- tapply(health[cases==0], id[cases==0], mean) dhealth <- tapply(health[cases==1], id[cases==1], mean) mean(nhealth) mean(dhealth) table(nhealth) table(dhealth) chisq.test(rbind(table(nhealth),table(dhealth))) nbedbdays <- tapply(beddays[cases==0], id[cases==0], mean) dbeddays <- tapply(beddays[cases==1], id[cases==1], mean) mean(nbedbdays) mean(dbeddays) table(nbedbdays) table(dbeddays) chisq.test(rbind(table(nbedbdays),table(dbeddays))) depress.manova <- manova(cbind(age, income) ~ cases) summary(depress.manova, test='Pillai') summary(depress.manova, test='Wilks') summary(depress.manova, test='Hotelling') summary(depress.manova, test='Roy') library(MASS) disc <- lda(cases ~ age + income, na.action="na.omit") fit <- predict(disc) plot(disc) ct <- table(cases, fit$class) ct diag(prop.table(ct, 1)) disc <- lda(cases ~ age + income, prior=c(.5, .5), na.action="na.omit") fit <- predict(disc, method = c("plug-in")) plot(disc) ct <- table(cases, fit$class) ct diag(prop.table(ct, 1)) disc <- lda(cases ~ age + income + beddays + religion, prior=c(.5, .5), na.action="na.omit") fit <- predict(disc, method = c("plug-in")) ct <- table(cases, fit$class) ct plot(disc) diag(prop.table(ct, 1)) disc <- lda(cases ~ age + income + beddays + religion, na.action="na.omit") fit <- predict(disc, method = c("plug-in")) ct <- table(cases, fit$class) ct plot(disc) diag(prop.table(ct, 1))