### 
### Exploratory Factor Analysis

### First, set the working directory where R will find the data

setwd("H:/data/User/Classes/multiv_13/YTs/YTSplit/Week 09 wmv - 2016/Wk09.R")
getwd()
## [1] "H:/data/User/Classes/multiv_13/YTs/YTSplit/Week 09 wmv - 2016/Wk09.R"
### Read the data
efa1<- read.csv("Class10_practice_2014.csv")


library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
library(psych)
## 
## Attaching package: 'psych'
## The following object is masked from 'package:Hmisc':
##
## describe
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
### This week we'll focus on garden-variety factor analyses, the most
### common kind. Next week we'll think about special forms for ordinal
### and dichotomous items.

### Base R functions
### Efficient, but limited in output (no factor correlation; sort loadings
### doesn't work well)
head(efa1)
##   ï..const     CESD     Beck     POMS BetaAmyl WMVolume  FracAni AtrophyScore
## 1 1 34.73182 36.16655 44.57600 45.94785 46.97603 31.99389 49.99852
## 2 1 59.41073 62.15906 56.25899 54.69603 45.92990 21.98266 51.42699
## 3 1 31.59620 39.15886 43.89899 48.87836 57.16307 24.60464 50.18029
## 4 1 34.06906 39.94961 42.52231 45.45211 47.83405 31.62042 42.47116
## 5 1 52.53337 48.14276 46.14688 49.07371 49.41460 24.59134 49.87953
## 6 1 52.84505 54.42654 55.62126 50.31230 57.64351 29.76299 47.11341
## WorkingMemory Fluency Attention Reasoning Verbal
## 1 23.86472 13.46455 13.60931 12.60790 44.65840
## 2 20.97466 15.81759 15.78525 23.52436 15.05801
## 3 25.51209 32.41084 24.50479 30.24691 35.47969
## 4 30.13510 47.70895 36.03791 36.32771 32.01823
## 5 25.95482 26.15073 23.48610 20.98279 42.78060
## 6 20.83932 22.54041 21.76019 22.09729 32.99808
# Varimax
fit.2 <- factanal(efa1[2:13],factors=3,rotation="varimax")
print(fit.2, digits = 3, cutoff = .000001, sort = TRUE)
## 
## Call:
## factanal(x = efa1[2:13], factors = 3, rotation = "varimax")
##
## Uniquenesses:
## CESD Beck POMS BetaAmyl WMVolume
## 0.236 0.285 0.191 0.150 0.239
## FracAni AtrophyScore WorkingMemory Fluency Attention
## 0.306 0.180 0.308 0.273 0.314
## Reasoning Verbal
## 0.174 0.995
##
## Loadings:
## Factor1 Factor2 Factor3
## BetaAmyl 0.680 0.622 -0.024
## WMVolume 0.869 0.030 -0.074
## FracAni -0.833 -0.014 0.016
## AtrophyScore 0.901 0.088 0.023
## CESD 0.005 0.873 -0.037
## Beck 0.057 0.843 0.006
## POMS 0.026 0.899 -0.002
## WorkingMemory -0.559 -0.022 0.616
## Fluency 0.025 0.038 0.851
## Attention -0.047 -0.055 0.825
## Reasoning -0.014 -0.034 0.908
## Verbal -0.049 0.035 0.041
##
## Factor1 Factor2 Factor3
## SS loadings 3.045 2.686 2.619
## Proportion Var 0.254 0.224 0.218
## Cumulative Var 0.254 0.478 0.696
##
## Test of the hypothesis that 3 factors are sufficient.
## The chi square statistic is 56.23 on 33 degrees of freedom.
## The p-value is 0.00708
### Enhanced FA output from psych
### Key benefits include more extensive fit information
### And for promax, we get a factor intercorrelation matrix
### Psych always presents the "factor pattern". This is identical
### To the factor structure for orthogonal rotations
### For olique solutions, it may not be possible to see the
### the factor structure, but this is not very important/useful anyway

#Basic scree plot, but also shows a scree plot based on the reduced
#eigenvalues from the SMC priors. Here, the Kaiser's rule wouldn't
#apply, but the elbo rule still would

scree(efa1[2:13],factors=TRUE,pc=TRUE,main="Scree plot",hline=TRUE,add=FALSE)

# Unrotated

fit.3<-fa(efa1[2:13],nfactors=3,rotate="none", scores="regression",
residuals=FALSE, SMC=TRUE, fm="pa",
alpha=.05,p=.05,oblique.scores=FALSE,use="pairwise",cor="cor")
print(fit.3, cutoff=.000001, sort=TRUE)
## Factor Analysis using method =  pa
## Call: fa(r = efa1[2:13], nfactors = 3, rotate = "none", scores = "regression",
## residuals = FALSE, SMC = TRUE, fm = "pa", alpha = 0.05, p = 0.05,
## oblique.scores = FALSE, use = "pairwise", cor = "cor")
## Standardized loadings (pattern matrix) based upon correlation matrix
## item PA1 PA2 PA3 h2 u2 com
## BetaAmyl 4 0.86 0.35 0.04 0.8545 0.15 1.3
## AtrophyScore 7 0.73 0.07 0.53 0.8274 0.17 1.8
## WMVolume 5 0.70 -0.04 0.50 0.7459 0.25 1.8
## WorkingMemory 8 -0.68 0.49 -0.07 0.7070 0.29 1.8
## FracAni 6 -0.65 0.01 -0.52 0.6932 0.31 1.9
## Reasoning 11 -0.36 0.72 0.42 0.8245 0.18 2.2
## Fluency 9 -0.28 0.71 0.37 0.7211 0.28 1.9
## Attention 10 -0.37 0.64 0.37 0.6832 0.32 2.3
## Beck 2 0.48 0.50 -0.49 0.7125 0.29 3.0
## Verbal 12 -0.03 0.06 -0.03 0.0055 0.99 2.4
## CESD 1 0.47 0.47 -0.56 0.7599 0.24 2.9
## POMS 3 0.49 0.52 -0.55 0.8118 0.19 3.0
##
## PA1 PA2 PA3
## SS loadings 3.69 2.55 2.11
## Proportion Var 0.31 0.21 0.18
## Cumulative Var 0.31 0.52 0.70
## Proportion Explained 0.44 0.30 0.25
## Cumulative Proportion 0.44 0.75 1.00
##
## Mean item complexity = 2.2
## Test of the hypothesis that 3 factors are sufficient.
##
## The degrees of freedom for the null model are 66 and the objective function was 8.44 1e-06 with Chi Square of 2652
## The degrees of freedom for the model are 33 and the objective function was 0.18
## 1e-06
## The root mean square of the residuals (RMSR) is 0.02
## The df corrected root mean square of the residuals is 0.02
## 1e-06
## The harmonic number of observations is 320 with the empirical chi square 9.81 with prob < 1
## 1e-06The total number of observations was 320 with Likelihood Chi Square = 57.29 with prob < 0.0055
## 1e-06
## Tucker Lewis Index of factoring reliability = 0.981
## RMSEA index = 0.048 and the 95 % confidence intervals are 0.02 0.072 1e-06
## BIC = -133.06
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## PA1 PA2 PA3
## Correlation of (regression) scores with factors 0.97 0.96 0.95
## Multiple R square of scores with factors 0.95 0.92 0.90
## Minimum correlation of possible factor scores 0.89 0.83 0.80
# Rotated, orthogonal, varimax

fit.3.varimax<-fa(efa1[2:13],nfactors=3,rotate="varimax", scores="regression",
residuals=FALSE, SMC=TRUE, fm="pa",
alpha=.05,p=.05,oblique.scores=FALSE,use="pairwise",cor="cor")
print(fit.3.varimax, cutoff=.000001, sort=TRUE)
## Factor Analysis using method =  pa
## Call: fa(r = efa1[2:13], nfactors = 3, rotate = "varimax", scores = "regression",
## residuals = FALSE, SMC = TRUE, fm = "pa", alpha = 0.05, p = 0.05,
## oblique.scores = FALSE, use = "pairwise", cor = "cor")
## Standardized loadings (pattern matrix) based upon correlation matrix
## item PA1 PA3 PA2 h2 u2 com
## AtrophyScore 7 0.91 0.09 0.02 0.8274 0.17 1.0
## WMVolume 5 0.86 0.03 -0.08 0.7459 0.25 1.0
## FracAni 6 -0.83 -0.01 0.02 0.6932 0.31 1.0
## BetaAmyl 4 0.68 0.62 -0.02 0.8545 0.15 2.0
## Verbal 12 -0.05 0.04 0.04 0.0055 0.99 2.9
## POMS 3 0.03 0.90 0.00 0.8118 0.19 1.0
## CESD 1 0.01 0.87 -0.04 0.7599 0.24 1.0
## Beck 2 0.06 0.84 0.01 0.7125 0.29 1.0
## Reasoning 11 -0.01 -0.03 0.91 0.8245 0.18 1.0
## Fluency 9 0.03 0.04 0.85 0.7211 0.28 1.0
## Attention 10 -0.05 -0.05 0.82 0.6832 0.32 1.0
## WorkingMemory 8 -0.56 -0.02 0.62 0.7070 0.29 2.0
##
## PA1 PA3 PA2
## SS loadings 3.04 2.68 2.62
## Proportion Var 0.25 0.22 0.22
## Cumulative Var 0.25 0.48 0.70
## Proportion Explained 0.36 0.32 0.31
## Cumulative Proportion 0.36 0.69 1.00
##
## Mean item complexity = 1.3
## Test of the hypothesis that 3 factors are sufficient.
##
## The degrees of freedom for the null model are 66 and the objective function was 8.44 1e-06 with Chi Square of 2652
## The degrees of freedom for the model are 33 and the objective function was 0.18
## 1e-06
## The root mean square of the residuals (RMSR) is 0.02
## The df corrected root mean square of the residuals is 0.02
## 1e-06
## The harmonic number of observations is 320 with the empirical chi square 9.81 with prob < 1
## 1e-06The total number of observations was 320 with Likelihood Chi Square = 57.29 with prob < 0.0055
## 1e-06
## Tucker Lewis Index of factoring reliability = 0.981
## RMSEA index = 0.048 and the 95 % confidence intervals are 0.02 0.072 1e-06
## BIC = -133.06
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## PA1 PA3 PA2
## Correlation of (regression) scores with factors 0.96 0.96 0.96
## Multiple R square of scores with factors 0.93 0.92 0.91
## Minimum correlation of possible factor scores 0.86 0.84 0.83
# Rotated, oblique, promax

fit.3.promax<-fa(efa1[2:13],nfactors=3,rotate="promax", scores="regression",
residuals=FALSE, SMC=TRUE, fm="pa",
alpha=.05,p=.05,oblique.scores=TRUE,use="pairwise",cor="cor")
## Loading required namespace: GPArotation
print(fit.3.promax, cutoff=.000001, sort=TRUE)
## Factor Analysis using method =  pa
## Call: fa(r = efa1[2:13], nfactors = 3, rotate = "promax", scores = "regression",
## residuals = FALSE, SMC = TRUE, fm = "pa", alpha = 0.05, p = 0.05,
## oblique.scores = TRUE, use = "pairwise", cor = "cor")
## Standardized loadings (pattern matrix) based upon correlation matrix
## item PA1 PA2 PA3 h2 u2 com
## AtrophyScore 7 0.94 0.16 0.03 0.8274 0.17 1.1
## WMVolume 5 0.88 0.06 -0.02 0.7459 0.25 1.0
## FracAni 6 -0.86 -0.11 0.04 0.6932 0.31 1.0
## BetaAmyl 4 0.68 0.05 0.59 0.8545 0.15 2.0
## Verbal 12 -0.04 0.03 0.04 0.0055 0.99 2.9
## Reasoning 11 0.14 0.94 -0.08 0.8245 0.18 1.1
## Fluency 9 0.16 0.88 -0.01 0.7211 0.28 1.1
## Attention 10 0.08 0.85 -0.10 0.6832 0.32 1.0
## WorkingMemory 8 -0.48 0.56 -0.03 0.7070 0.29 2.0
## POMS 3 -0.01 -0.06 0.91 0.8118 0.19 1.0
## CESD 1 -0.04 -0.10 0.88 0.7599 0.24 1.0
## Beck 2 0.02 -0.04 0.84 0.7125 0.29 1.0
##
## PA1 PA2 PA3
## SS loadings 3.06 2.62 2.67
## Proportion Var 0.25 0.22 0.22
## Cumulative Var 0.25 0.47 0.70
## Proportion Explained 0.37 0.31 0.32
## Cumulative Proportion 0.37 0.68 1.00
##
## With factor correlations of
## PA1 PA2 PA3
## PA1 1.00 -0.29 0.07
## PA2 -0.29 1.00 0.10
## PA3 0.07 0.10 1.00
##
## Mean item complexity = 1.3
## Test of the hypothesis that 3 factors are sufficient.
##
## The degrees of freedom for the null model are 66 and the objective function was 8.44 1e-06 with Chi Square of 2652
## The degrees of freedom for the model are 33 and the objective function was 0.18
## 1e-06
## The root mean square of the residuals (RMSR) is 0.02
## The df corrected root mean square of the residuals is 0.02
## 1e-06
## The harmonic number of observations is 320 with the empirical chi square 9.81 with prob < 1
## 1e-06The total number of observations was 320 with Likelihood Chi Square = 57.29 with prob < 0.0055
## 1e-06
## Tucker Lewis Index of factoring reliability = 0.981
## RMSEA index = 0.048 and the 95 % confidence intervals are 0.02 0.072 1e-06
## BIC = -133.06
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## PA1 PA2 PA3
## Correlation of (regression) scores with factors 0.97 0.96 0.96
## Multiple R square of scores with factors 0.93 0.92 0.92
## Minimum correlation of possible factor scores 0.87 0.83 0.85
### The regression method of factor scores is fine for orthogonal solutions
### Package creator recommends method tenBerge for oblique solutions, as below
### For non-orthogonal solutions, change below to regression

fspro<-factor.scores(efa1[2:13], fit.3.promax, Phi = NULL, method = "tenBerge",
rho=NULL)

#To save factor scores to your data frame
efascore<-as.data.frame(fspro$scores)
efa1<-cbind(efa1,efascore)