In R, there are three different ways to supply the data to the function glm to fit a logistic regression model. We will look at the way to analyze summarized data first.
ecg.dat <- data.frame(cnt = c(4,8,9,21),total=c(15,18,18,27),
gend=c("female_0","female_0","male_1","male_1"),
ecg=c(0,1,0,1))
ecg.dat
## cnt total gend ecg
## 1 4 15 female_0 0
## 2 8 18 female_0 1
## 3 9 18 male_1 0
## 4 21 27 male_1 1
Method 1: (successes,failures) ~ covariates
ecg.logit <- glm(cbind(cnt,total-cnt) ~ gend,data=ecg.dat,family=binomial(link="logit"))
summary(ecg.logit)
##
## Call:
## glm(formula = cbind(cnt, total - cnt) ~ gend, family = binomial(link = "logit"),
## data = ecg.dat)
##
## Deviance Residuals:
## 1 2 3 4
## -0.7994 0.7034 -1.4561 1.2684
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.5596 0.3619 -1.546 0.12200
## gendmale_1 1.2528 0.4806 2.607 0.00914 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 11.9835 on 3 degrees of freedom
## Residual deviance: 4.8626 on 2 degrees of freedom
## AIC: 21.958
##
## Number of Fisher Scoring iterations: 4
Method 2: successes/(successes + failures) ~ covariates witht the weights option
ecg.logit2 <- glm(cnt/total ~ gend,data=ecg.dat,family=binomial(link="logit"),weights=total)
summary(ecg.logit2)
##
## Call:
## glm(formula = cnt/total ~ gend, family = binomial(link = "logit"),
## data = ecg.dat, weights = total)
##
## Deviance Residuals:
## 1 2 3 4
## -0.7994 0.7034 -1.4561 1.2684
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.5596 0.3619 -1.546 0.12200
## gendmale_1 1.2528 0.4806 2.607 0.00914 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 11.9835 on 3 degrees of freedom
## Residual deviance: 4.8626 on 2 degrees of freedom
## AIC: 21.958
##
## Number of Fisher Scoring iterations: 4
Non summarized data
ecg.dat2 <- data.frame(CA = rep(c(0,1,0,1,0,1,0,1),c(11,4,10,8,9,9,6,21)),
gend = c(rep("female_0",33),rep("male_1",45)),
ecg = c(rep(0,15),rep(1,18),rep(0,18),rep(1,27)))
head(ecg.dat2)
## CA gend ecg
## 1 0 female_0 0
## 2 0 female_0 0
## 3 0 female_0 0
## 4 0 female_0 0
## 5 0 female_0 0
## 6 0 female_0 0
with(ecg.dat2,table(gend,CA))
## CA
## gend 0 1
## female_0 21 12
## male_1 15 30
ecg.logit3 <- glm(CA ~ gend,data=ecg.dat2,family=binomial(link="logit"))
summary(ecg.logit3)
##
## Call:
## glm(formula = CA ~ gend, family = binomial(link = "logit"), data = ecg.dat2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.4823 -0.9508 0.9005 0.9005 1.4224
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.5596 0.3619 -1.546 0.12200
## gendmale_1 1.2528 0.4806 2.607 0.00914 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 107.67 on 77 degrees of freedom
## Residual deviance: 100.55 on 76 degrees of freedom
## AIC: 104.55
##
## Number of Fisher Scoring iterations: 4
Estimated probabilities
fitted(ecg.logit3)[c(1,34)]
## 1 34
## 0.3636364 0.6666667
predict(ecg.logit3)[c(1,34)] #on linear (logit) scale
## 1 34
## -0.5596158 0.6931472
predict(ecg.logit3,newdata=data.frame(gend=c("female_0","male_1")), type = "response")
## 1 2
## 0.3636364 0.6666667
Effects in terms of odds ratios with confidence intervals
exp(coef(ecg.logit))
## (Intercept) gendmale_1
## 0.5714286 3.5000000
exp(confint.default(ecg.logit))
## 2.5 % 97.5 %
## (Intercept) 0.2811478 1.161420
## gendmale_1 1.3645889 8.977063
Test of X coefficients all equal to 0
with(ecg.logit, null.deviance-deviance) #LR test statistic
## [1] 7.120893
with(ecg.logit,df.null-df.residual) #df
## [1] 1
with(ecg.logit,pchisq(null.deviance-deviance,df.null-df.residual,lower.tail=FALSE)) #p-value
## [1] 0.007619074