#One way to assess homoscedasticity is to categorize a continuous IV
#And then test homogeneity of residual variance (Levene's test) across levels of that IV

setwd("H:/data/User/Classes/stats01_13/Week10/Class10_2020/R.Week10")
getwd()
## [1] "H:/data/User/Classes/stats01_13/Week10/Class10_2020/R.Week10"
### Read the data
# Read an spss sav file

library(haven)
ass7 <- read_sav("ass7.sav")

#This data set has missing values, so it is best to make a no-miss data set first

ass7_nomiss<-na.omit(ass7)

#Run the basic regression

fit1 <- lm(AVLT1 ~ AGE, data=ass7_nomiss)
summary(fit1) # show results
## 
## Call:
## lm(formula = AVLT1 ~ AGE, data = ass7_nomiss)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.7633 -7.4408 0.0167 7.5982 25.9263
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 72.0102 10.0078 7.195 2.11e-11 ***
## AGE -0.3767 0.1344 -2.803 0.00567 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.55 on 164 degrees of freedom
## Multiple R-squared: 0.04573, Adjusted R-squared: 0.03991
## F-statistic: 7.859 on 1 and 164 DF, p-value: 0.005667
confint(fit1, level=0.95) # CIs for model parameters 
##                  2.5 %     97.5 %
## (Intercept) 52.2495099 91.7708450
## AGE -0.6419821 -0.1113722
#lm does not show standardized bweight values (beta), so we use lm.beta from the
#QuantPsyc package

library(QuantPsyc)
## Loading required package: boot
## Loading required package: MASS
## 
## Attaching package: 'QuantPsyc'
## The following object is masked from 'package:base':
##
## norm
lm.beta(fit1)
##        AGE 
## -0.2138465

#Save the residuals to your data frame. First, we define all values of the residual as NA
# Then we replace non-missing values of the residual with actual values
# This ensures that the vector length is unaltered

#Save the residuals to your data frame. First, we define all values of the residual as NA
# Then we replace non-missing values of the residual with actual values
# This ensures that the vector length is unaltered

ass7_nomiss$resid <- NA
ass7_nomiss$resid <- fit1$resid


#For Levene's test, first we categorize age. We'll do this in five year bands

ass7_nomiss$agecat<-cut(ass7_nomiss$AGE, seq(65,100,5))

#Next, compute the Levene's test

#install.packages("car")
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:boot':
##
## logit
# Levene's test with one independent variable
leveneTest(resid ~ agecat, data = ass7_nomiss)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 1.3843 0.2417
## 161
#To visualize variability by age category

library(psych)
## 
## Attaching package: 'psych'
## The following object is masked from 'package:car':
##
## logit
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## The following object is masked from 'package:boot':
##
## logit
describeBy(ass7_nomiss$resid,group=ass7_nomiss$agecat,type=3,mat=TRUE)
##     item   group1 vars  n       mean        sd     median    trimmed       mad
## X11 1 (65,70] 1 50 -0.2577897 10.586587 1.5449108 0.5956769 10.652395
## X12 2 (70,75] 1 48 -0.8292078 9.432295 -1.4059243 -1.0620084 10.167965
## X13 3 (75,80] 1 35 1.4464960 10.194832 -0.9650445 1.1878696 10.594801
## X14 4 (80,85] 1 22 2.0685726 13.567101 2.1466868 2.0988256 14.875692
## X15 5 (85,90] 1 11 -3.9494998 8.898729 -3.7030875 -3.6674497 4.070141
## X16 6 (90,95] NA NA NA NA NA NA NA
## X17 7 (95,100] NA NA NA NA NA NA NA
## min max range skew kurtosis se
## X11 -27.76327 16.31201 44.07528 -0.65487522 -0.2952607 1.497169
## X12 -17.18609 17.93354 35.11963 0.26028453 -1.0124852 1.361435
## X13 -17.97226 21.95658 39.92884 0.21508584 -1.0673955 1.723241
## X14 -25.68384 25.92633 51.61017 -0.06193491 -0.9028611 2.892516
## X15 -21.15203 10.71458 31.86662 -0.42287114 -0.6033906 2.683068
## X16 NA NA NA NA NA NA
## X17 NA NA NA NA NA NA