Measure of Association
Variable List
Variable | Description |
ID | Subject ID |
age | Subject age |
BMI | Subject BMI |
diabetes | Diabetes
0 – no 1 – yes |
race | Subject Race
0 – African-American 1 – Hispanic 2 – White |
smoke | Smoking Status
0 – no 1 – yes |
drinkany | Drink Alcohol
0 – no 1 – yes |
Solution
Code
mydata=read.csv(‘data5.csv’, header = TRUE, sep = “,”, quote = “\””,dec = “,”)
tbl=table(mydata$smoke, mydata$diabetes)
tbl <- cbind(tbl, rowSums(tbl), rowSums(tbl) / sum(tbl))
install.packages(“epitools”)
library(“epitools”)
epitab(tbl,method=”riskratio”)
tbl=table(mydata$race, mydata$diabetes)
tbl <- cbind(tbl, rowSums(tbl), rowSums(tbl) / sum(tbl))
chisq.test(tbl)
ci.mean <- function(x, alfa=0.05){
n <- length(x)
a<-qt(1-alfa/2, n-1)
m<-mean(x, na.rm=TRUE); s<-sd(x, na.rm=TRUE)
se<-s/sqrt(n)
res <- c(m, m-a*se,m+a*se)
setNames(res, c(“mean”, paste(100*alfa/2, “%”), paste(100*(1-alfa/2), “%”)))
}
ci.mean(with(mydata, age[diabetes==1&drinkany==1]))
ci.mean(with(mydata, age[diabetes==1&drinkany==0]))
ci.mean(with(mydata, age[diabetes==0&drinkany==1]))
ci.mean(with(mydata, age[diabetes==0&drinkany==0]))
ci.mean(with(mydata, BMI[diabetes==1&drinkany==1]))
ci.mean(with(mydata, BMI[diabetes==1&drinkany==0]))
ci.mean(with(mydata, BMI[diabetes==0&drinkany==1]))
ci.mean(with(mydata, BMI[diabetes==0&drinkany==0]))
Code with output
> mydata=read.csv(‘data5.csv’, header = TRUE, sep = “,”, quote = “\””,dec = “,”)
> tbl=table(mydata$smoke, mydata$diabetes)
> addmargins(tbl)
0 1 Sum
0 1895 444 2339
1 336 88 424
Sum 2231 532 2763
> install.packages(“epitools”)
Installing package into ‘C:/Users/Nagy Attila/Documents/R/win-library/3.3’
(as ‘lib’ is unspecified)
Warning: package ‘epitools’ is in use and will not be installed
> library(“epitools”)
> epitab(tbl,method=”riskratio”)
$tab
0 p0 1 p1 riskratio lower upper p.value
0 1895 0.8101753 444 0.1898247 1.000000 NA NA NA
1 336 0.7924528 88 0.2075472 1.093362 0.8916253 1.340744 0.3850902
$measure
[1] “wald”
$conf.level
[1] 0.95
$pvalue
[1] “fisher.exact”
>
> tbl=table(mydata$race, mydata$diabetes)
> addmargins(tbl)
0 1 Sum
0 456 99 555
1 439 101 540
2 1336 332 1668
Sum 2231 532 2763
> epitab(tbl,method=”riskratio”)
$tab
0 p0 1 p1 riskratio lower upper p.value
0 456 0.8216216 99 0.1783784 1.000000 NA NA NA
1 439 0.8129630 101 0.1870370 1.048541 0.8161129 1.347164 0.7544769
2 1336 0.8009592 332 0.1990408 1.115835 0.9109669 1.366775 0.2928858
$measure
[1] “wald”
$conf.level
[1] 0.95
$pvalue
[1] “fisher.exact”
> chisq.test(tbl)
Pearson’s Chi-squared test
data: tbl
X-squared = 1.2745, df = 2, p-value = 0.5287
>
> ci.mean(with(mydata, age[diabetes==1&drinkany==1]))
mean 2.5 % 97.5 %
65.73737 64.83957 66.63518
> ci.mean(with(mydata, age[diabetes==1&drinkany==0]))
mean 2.5 % 97.5 %
67.18563 66.46567 67.90558
> ci.mean(with(mydata, age[diabetes==0&drinkany==1]))
mean 2.5 % 97.5 %
66.54689 66.13083 66.96295
> ci.mean(with(mydata, age[diabetes==0&drinkany==0]))
mean 2.5 % 97.5 %
66.72465 66.35420 67.09510
> ci.mean(with(mydata, BMI[diabetes==1&drinkany==1]))
mean 2.5 % 97.5 %
28.51259 27.75659 29.26858
> ci.mean(with(mydata, BMI[diabetes==1&drinkany==0]))
mean 2.5 % 97.5 %
28.49535 27.92987 29.06082
> ci.mean(with(mydata, BMI[diabetes==0&drinkany==1]))
mean 2.5 % 97.5 %
28.66257 28.30434 29.02079
> ci.mean(with(mydata, BMI[diabetes==0&drinkany==0]))
mean 2.5 % 97.5 %
28.54972 28.24670 28.85274
Tables with interpretations
Diabetes | No Diabetes | Total | |
Smoke | 88 | 336 | 424 |
No Smoke | 444 | 1895 | 2339 |
Total | 532 | 2231 | 2763 |
The measure is relative risk:
1.093362 [0.8916253-1.340744]
Smokers have 1.09 times higher risk to have diabetes, but it is not significant, as 1 is inside the 95% CI. It makes sense, and it is crude measure, not adjusted for possible confounders.
Diabetes | No Diabetes | Total | |
African-American | 99 | 456 | 555 |
Hispanic | 101 | 439 | 540 |
White | 332 | 1336 | 1668 |
Total | 532 | 2231 | 2763 |
There are no significant differences. Even Chi-squared test show the same not significant result: p-value = 0.5287
Diabetes (n=532) | No Diabetes (n=2231) | |||
Drink (n=198) | No Drink (n=334) | Drink (n=949) | No Drink (n=1282) | |
Age (mean, 95% CI) | 65.74 [64.84-66.64] | 67.19 [66.47-67.91] | 66.55 [66.13-66.96] | 66.7 2[66.35-67.10] |
BMI (mean, 95% CI) | 28.51 [27.76-29.27] | 28.50 [27.93-29.06] | 28.66 [28.30-29.02] | 28.55 [28.25-28.85] |
There are no significant differences, as the 95% confidence intervals are overlapping.
Proportions
tbl=table(mydata$smoke, mydata$diabetes)
tbl <- rbind(tbl, colSums(tbl))
res<-cbind(tbl,rowSums(tbl),prop.table(tbl,1))
colnames(res) <- c(‘Diabetes No’,’Diabetes yes’, ‘Total’, ‘Prop No’, ‘Prop yes’)
rownames(res) <- c(‘Smoke No’,’Smoke yes’, ‘Total’)
res
0 1 0 1
0 1895 444 2339 0.8101753 0.1898247
1 336 88 424 0.7924528 0.2075472
2231 532 2763 0.8074557 0.1925443
rr<-tbl[2,2]/( tbl[2,2]+ tbl[2,1]) / (tbl[1,2]/( tbl[1,2]+ tbl[1,1]))
lrr <- log(tbl[2,2]/(tbl[2,2]+tbl[2,1]) / (tbl[1,2]/(tbl[1,2]+tbl[1,1]) ) )
vlrr <- 1/tbl[2,2] – 1/(tbl[2,2]+tbl[2,1]) + 1/tbl[1,2] – 1/(tbl[1,2]+tbl[1,1])
lower<-exp(lrr – 1.96*sqrt(vlrr))
upper<-exp(lrr + 1.96*sqrt(vlrr))
c (round(rr,2), ‘[‘,round(lower,2), ‘-‘,round(upper,2),’]’)
tbl=table(mydata$race, mydata$diabetes)
tbl <- rbind(tbl, colSums(tbl))
res<-cbind(tbl,rowSums(tbl),prop.table(tbl,1))
colnames(res) <- c(‘Diabetes No’,’Diabetes yes’, ‘Total’, ‘Prop No’, ‘Prop yes’)
rownames(res) <- c(‘African-American’,’Hispanic’, ‘White’,’Total’)
res
Diabetes No Diabetes yes Total Prop No Prop yes
African-American 456 99 555 0.8216216 0.1783784
Hispanic 439 101 540 0.8129630 0.1870370
White 1336 332 1668 0.8009592 0.1990408
Total 2231 532 2763 0.8074557 0.1925443
rr<-tbl[2,2]/( tbl[2,2]+ tbl[2,1]) / (tbl[1,2]/( tbl[1,2]+ tbl[1,1]))
lrr <- log(tbl[2,2]/(tbl[2,2]+tbl[2,1]) / (tbl[1,2]/(tbl[1,2]+tbl[1,1]) ) )
vlrr <- 1/tbl[2,2] – 1/(tbl[2,2]+tbl[2,1]) + 1/tbl[1,2] – 1/(tbl[1,2]+tbl[1,1])
lower<-exp(lrr – 1.96*sqrt(vlrr))
upper<-exp(lrr + 1.96*sqrt(vlrr))
c (round(rr,2), ‘[‘,round(lower,2), ‘-‘,round(upper,2),’]’)
rr<-tbl[3,2]/( tbl[3,2]+ tbl[3,1]) / (tbl[1,2]/( tbl[1,2]+ tbl[1,1]))
lrr <- log(tbl[3,2]/(tbl[3,2]+tbl[3,1]) / (tbl[1,2]/(tbl[1,2]+tbl[1,1]) ) )
vlrr <- 1/tbl[3,2] – 1/(tbl[3,2]+tbl[3,1]) + 1/tbl[1,2] – 1/(tbl[1,2]+tbl[1,1])
lower<-exp(lrr – 1.96*sqrt(vlrr))
upper<-exp(lrr + 1.96*sqrt(vlrr))
c (round(rr,2), ‘[‘,round(lower,2), ‘-‘,round(upper,2),’]’)