### 6. ANOVA with orthogonal polynomial contrasts
Wk01c<- read.csv("Class04c_2013.csv")
head(Wk01c)
##   ï..const NumMeds YearsLived
## 1 1 1 9
## 2 1 1 10
## 3 1 1 11
## 4 1 1 10
## 5 1 1 10
## 6 1 1 9
#A. Begin by Visualizing linear and non-linear relationship
#From last semester week 10
attach(Wk01c)
names(Wk01c)
## [1] "ï..const"   "NumMeds"    "YearsLived"
# First, let's set up a linear model, though really we should plot first and only 
#then perform the regression.

linear.model <-lm(Wk01c$YearsLived ~ Wk01c$NumMeds)
#We now obtain detailed information on our regression through the summary() command.

summary(linear.model)
## 
## Call:
## lm(formula = Wk01c$YearsLived ~ Wk01c$NumMeds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.495 -1.495 0.345 1.345 3.185
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.9750 0.2931 40.85 <2e-16 ***
## Wk01c$NumMeds -2.1600 0.1070 -20.18 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.514 on 158 degrees of freedom
## Multiple R-squared: 0.7205, Adjusted R-squared: 0.7187
## F-statistic: 407.3 on 1 and 158 DF, p-value: < 2.2e-16
#Let's plot DV over IV and superpose our linear model.

plot(Wk01c$NumMeds, Wk01c$YearsLived, pch=16, ylab = "Libido", cex.lab = 1.3,
col = "red" )

abline(lm(Wk01c$YearsLived ~ Wk01c$NumMeds), col = "blue")

#Now we fit a model that is quadratic in dose. We create a squared IV

Wk01c$NumMeds2 <- Wk01c$NumMeds^2

quadratic.model <-lm(Wk01c$YearsLived ~ Wk01c$NumMeds + Wk01c$NumMeds2)

#Note the syntax involved in fitting a linear model with two or more predictors.
#We include each predictor and put a plus sign between them.

#Now let's plot the quadratic model by setting up a grid of IV values running
#from 1-4 in increments of 1 unit.

Wk01c$groupvalues <- seq(1,4,1)
Wk01c$predictedcounts <- predict(quadratic.model,list(Group=Wk01c$groupvalues,
Group2=Wk01c$groupvalues^2))
plot(Wk01c$NumMeds, Wk01c$YearsLived, type="p", pch=16, xlab = "Number of Medications",
ylab = "YearsLived",
cex.lab = 1.3, col = "blue")
#Now we include the quadratic model to the plot using the lines() command.

lines(Wk01c$NumMeds, Wk01c$predictedcounts, col = "darkgreen", lwd = 3)

#Note that the line above is generated based on the regression model, and plots a random
#grid of IV values that we generated above. What you will get is a plot of the model-implied
#smooth quadratic, and not the actual plot of the group means.

#In contrast, if you only wanted to superimpose a line connecting IV means to better show the trend,
#You can use ggplot2. By default, ggplot is set up to plot lines for multiple groups
#To get only one line, you need to put everyone in one "group". We do this with the constant
#command below

Wk01c$const<-1

ggplot(Wk01c, aes(x = NumMeds, y = YearsLived)) +
geom_point() +
stat_summary(aes(group=const), fun.y = mean, geom="line")+
theme_bw()
## Warning: `fun.y` is deprecated. Use `fun` instead.

#B. Estimating linear and quadratic orthogonal polynomials


# Coding rules below show how to test up to the quintic:
#2 levels
#Linear -1 1

#3 levels
#Linear -1 0 1
#Quadratic 1 -2 1

#4 levels
#Linear -3 -1 1 3
#Quadratic 1 -1 -1 1
#Cubic -1 3 -3 1

#5 levels
#Linear -2 -1 0 1 2
#Quadratic 2 -1 -2 -1 2
#Cubic -1 2 0 -2 1
#Quartic 1 -4 6 -4 1

#6 levels
#Linear -5 -3 -1 1 3 5
#Quadratic 5 -1 -4 -4 -1 5
#Cubic -5 7 4 -4 -7 5
#Quartic 1 -3 2 2 -3 1
#Quintic -1 5 -10 10 -5 1


#The polynomial ANOVA
Lived_mod<-lm(YearsLived ~ NumMeds, Wk01c)
anova(Lived_mod)
## Analysis of Variance Table
##
## Response: YearsLived
## Df Sum Sq Mean Sq F value Pr(>F)
## NumMeds 1 933.12 933.12 407.3 < 2.2e-16 ***
## Residuals 158 361.98 2.29
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Need to assign contrast coefficients
# Our desired contrasts (from above):
#Linear -3 -1 1 3
#Quadratic 1 -1 -1 1
#Cubic -1 3 -3 1
contrastmatrix<-cbind( c(-3,-1,1,3),
c(1,-1,-1,1),
c(-1,3,-3,1))
contrastmatrix
##      [,1] [,2] [,3]
## [1,] -3 1 -1
## [2,] -1 -1 3
## [3,] 1 -1 -3
## [4,] 3 1 1
Wk01c$NumMeds<-as.factor(Wk01c$NumMeds)
contrasts(Wk01c$NumMeds)<-contrastmatrix
Wk01c$NumMeds
##   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [38] 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [75] 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [112] 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [149] 4 4 4 4 4 4 4 4 4 4 4 4
## attr(,"contrasts")
## [,1] [,2] [,3]
## 1 -3 1 -1
## 2 -1 -1 3
## 3 1 -1 -3
## 4 3 1 1
## Levels: 1 2 3 4
Lived_poly_mod<-aov(YearsLived ~ NumMeds, Wk01c)
summary(Lived_poly_mod, split = list(NumMeds = list("Lin" = 1,
"Quad" = 2,
"Cub"=3)))
##                  Df Sum Sq Mean Sq F value Pr(>F)    
## NumMeds 3 1191.5 397.2 598.4 <2e-16 ***
## NumMeds: Lin 1 933.1 933.1 1405.8 <2e-16 ***
## NumMeds: Quad 1 112.2 112.2 169.1 <2e-16 ***
## NumMeds: Cub 1 146.2 146.2 220.3 <2e-16 ***
## Residuals 156 103.6 0.7
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#In general, people do not report contrast SS; so in 
# practice you can simply use lm(), as
#usual:
Lived_poly2_mod<-lm(YearsLived ~ NumMeds, Wk01c)
summary(Lived_poly2_mod)
## 
## Call:
## lm(formula = YearsLived ~ NumMeds, data = Wk01c)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.225 -0.600 -0.100 0.625 2.775
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.57500 0.06441 102.08 <2e-16 ***
## NumMeds1 -1.08000 0.02880 -37.49 <2e-16 ***
## NumMeds2 0.83750 0.06441 13.00 <2e-16 ***
## NumMeds3 0.42750 0.02880 14.84 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8147 on 156 degrees of freedom
## Multiple R-squared: 0.92, Adjusted R-squared: 0.9185
## F-statistic: 598.4 on 3 and 156 DF, p-value: < 2.2e-16
]