#Saving residual and evaluating residual normality

setwd("H:/data/User/Classes/stats01_13/Week10/Class10_2020/R.Week10")
getwd()
## [1] "H:/data/User/Classes/stats01_13/Week10/Class10_2020/R.Week10"
### Read the data
# Read an spss sav file

library(haven)
Wk10 <- read_sav("Week_11_Practice_2014.sav")

#Create a listwise deletion data set
Wk10nm<-na.omit(Wk10[,c(1:3,7:8)])

#Obtain multiple correlation from regression

fit1 <- lm(DV1 ~ IV1 + IV2, data=Wk10nm)
summary(fit1) # show results
## 
## Call:
## lm(formula = DV1 ~ IV1 + IV2, data = Wk10nm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.6493 -7.9209 0.5525 7.5321 24.5636
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 55.55112 2.68834 20.66 <2e-16 ***
## IV1 0.52110 0.03259 15.99 <2e-16 ***
## IV2 -0.63250 0.06117 -10.34 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.26 on 197 degrees of freedom
## Multiple R-squared: 0.5681, Adjusted R-squared: 0.5638
## F-statistic: 129.6 on 2 and 197 DF, p-value: < 2.2e-16
confint(fit1, level=0.95) # CIs for model parameters 
##                  2.5 %     97.5 %
## (Intercept) 50.2494952 60.8527396
## IV1 0.4568406 0.5853614
## IV2 -0.7531298 -0.5118763
library(QuantPsyc)
## Loading required package: boot
## Loading required package: MASS
## 
## Attaching package: 'QuantPsyc'
## The following object is masked from 'package:base':
##
## norm
lm.beta(fit1)
##        IV1        IV2 
## 0.8965188 -0.5796954

#To save residual to the data set, we first 

Wk10nm$resid <- NA
Wk10nm$resid <- fit1$resid

#Checking normality of residuals

#skewness and kurtosis

#Load helper function

spssSkewKurtosis=function(x) {
w=length(x)
m1=mean(x)
m2=sum((x-m1)^2)
m3=sum((x-m1)^3)
m4=sum((x-m1)^4)
s1=sd(x)
skew=w*m3/(w-1)/(w-2)/s1^3
sdskew=sqrt( 6*w*(w-1) / ((w-2)*(w+1)*(w+3)) )
kurtosis=(w*(w+1)*m4 - 3*m2^2*(w-1)) / ((w-1)*(w-2)*(w-3)*s1^4)
sdkurtosis=sqrt( 4*(w^2-1) * sdskew^2 / ((w-3)*(w+5)) )
mat=matrix(c(skew,kurtosis, sdskew,sdkurtosis), 2,
dimnames=list(c("skew","kurtosis"), c("estimate","se")))
return(mat)
}

#Evaluate skewness and kurtosis of the three variables.

skewkurt<-as.data.frame(spssSkewKurtosis(Wk10nm$resid))
skewkurt$z.ratio<-skewkurt$estimate / skewkurt$se
skewkurt
##             estimate        se    z.ratio
## skew -0.02929535 0.1719248 -0.1703963
## kurtosis -0.55829953 0.3422024 -1.6314893
#Kolmogorov-Smirnov and Shapiro-Wilk

shapiro.test(Wk10nm$resid)
## 
## Shapiro-Wilk normality test
##
## data: Wk10nm$resid
## W = 0.98978, p-value = 0.1658
library(nortest)
lillie.test(Wk10nm$resid)
## 
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: Wk10nm$resid
## D = 0.038042, p-value = 0.6839
#histogram

h1 <- hist(Wk10nm$resid, breaks = 25, density = 100,
col = "red", xlab = "DV1 residual", main = "Histogram of DV1 residual")
xfit1 <- seq(min(Wk10nm$resid), max(Wk10nm$resid), length = 40)
yfit1 <- dnorm(xfit1, mean = mean(Wk10nm$resid), sd = sd(Wk10nm$resid))
yfit1 <- yfit1 * diff(h1$mids[1:2]) * length(Wk10nm$resid)
lines(xfit1, yfit1, col = "black", lwd = 2)

#QQ plots 

qqnorm(Wk10nm$resid, pch = 1, col="red", frame = FALSE)
qqline(Wk10nm$resid, col = "red", lwd = 2)

boxplot(Wk10nm$resid, range = 1.5)