Measure of Association

Measure of Association



Variable List

Variable Description
ID Subject ID
age Subject age
BMI Subject BMI
diabetes Diabetes

0 – no

1 – yes

race Subject Race

0 – African-American

1 – Hispanic

2 – White

smoke Smoking Status

0 – no

1 – yes

drinkany Drink Alcohol

0 – no

1 – yes

Solution 

Code

mydata=read.csv(‘data5.csv’, header = TRUE, sep = “,”, quote = “\””,dec = “,”)

tbl=table(mydata$smoke, mydata$diabetes)

tbl <- cbind(tbl, rowSums(tbl), rowSums(tbl) / sum(tbl))

install.packages(“epitools”)

library(“epitools”)

epitab(tbl,method=”riskratio”)

tbl=table(mydata$race, mydata$diabetes)

tbl <- cbind(tbl, rowSums(tbl), rowSums(tbl) / sum(tbl))

chisq.test(tbl)

ci.mean <- function(x, alfa=0.05){

n <- length(x)

a<-qt(1-alfa/2, n-1)

m<-mean(x, na.rm=TRUE);          s<-sd(x, na.rm=TRUE)

se<-s/sqrt(n)

res <- c(m, m-a*se,m+a*se)

setNames(res, c(“mean”, paste(100*alfa/2, “%”), paste(100*(1-alfa/2), “%”)))

}

ci.mean(with(mydata, age[diabetes==1&drinkany==1]))

ci.mean(with(mydata, age[diabetes==1&drinkany==0]))

ci.mean(with(mydata, age[diabetes==0&drinkany==1]))

ci.mean(with(mydata, age[diabetes==0&drinkany==0]))

ci.mean(with(mydata, BMI[diabetes==1&drinkany==1]))

ci.mean(with(mydata, BMI[diabetes==1&drinkany==0]))

ci.mean(with(mydata, BMI[diabetes==0&drinkany==1]))

ci.mean(with(mydata, BMI[diabetes==0&drinkany==0]))

Code with output

> mydata=read.csv(‘data5.csv’, header = TRUE, sep = “,”, quote = “\””,dec = “,”)

> tbl=table(mydata$smoke, mydata$diabetes)

> addmargins(tbl)

0    1  Sum

0   1895  444 2339

1    336   88  424

Sum 2231  532 2763

> install.packages(“epitools”)

Installing package into ‘C:/Users/Nagy Attila/Documents/R/win-library/3.3’

(as ‘lib’ is unspecified)

Warning: package ‘epitools’ is in use and will not be installed

> library(“epitools”)

> epitab(tbl,method=”riskratio”)

$tab

0        p0   1        p1 riskratio     lower    upper   p.value

0 1895 0.8101753 444 0.1898247  1.000000        NA       NA        NA

1  336 0.7924528  88 0.2075472  1.093362 0.8916253 1.340744 0.3850902

$measure

[1] “wald”

$conf.level

[1] 0.95

$pvalue

[1] “fisher.exact”

>

> tbl=table(mydata$race, mydata$diabetes)

> addmargins(tbl)

0    1  Sum

0    456   99  555

1    439  101  540

2   1336  332 1668

Sum 2231  532 2763

> epitab(tbl,method=”riskratio”)

$tab

0        p0   1        p1 riskratio     lower    upper   p.value

0  456 0.8216216  99 0.1783784  1.000000        NA       NA        NA

1  439 0.8129630 101 0.1870370  1.048541 0.8161129 1.347164 0.7544769

2 1336 0.8009592 332 0.1990408  1.115835 0.9109669 1.366775 0.2928858

$measure

[1] “wald”

$conf.level

[1] 0.95

$pvalue

[1] “fisher.exact”

> chisq.test(tbl)

Pearson’s Chi-squared test

data:  tbl

X-squared = 1.2745, df = 2, p-value = 0.5287

>

> ci.mean(with(mydata, age[diabetes==1&drinkany==1]))

mean    2.5 %   97.5 %

65.73737 64.83957 66.63518

> ci.mean(with(mydata, age[diabetes==1&drinkany==0]))

mean    2.5 %   97.5 %

67.18563 66.46567 67.90558

> ci.mean(with(mydata, age[diabetes==0&drinkany==1]))

mean    2.5 %   97.5 %

66.54689 66.13083 66.96295

> ci.mean(with(mydata, age[diabetes==0&drinkany==0]))

mean    2.5 %   97.5 %

66.72465 66.35420 67.09510

> ci.mean(with(mydata, BMI[diabetes==1&drinkany==1]))

mean    2.5 %   97.5 %

28.51259 27.75659 29.26858

> ci.mean(with(mydata, BMI[diabetes==1&drinkany==0]))

mean    2.5 %   97.5 %

28.49535 27.92987 29.06082

> ci.mean(with(mydata, BMI[diabetes==0&drinkany==1]))

mean    2.5 %   97.5 %

28.66257 28.30434 29.02079

> ci.mean(with(mydata, BMI[diabetes==0&drinkany==0]))

mean    2.5 %   97.5 %

28.54972 28.24670 28.85274

Tables with interpretations

Diabetes No Diabetes Total
Smoke 88 336 424
No Smoke 444 1895 2339
Total 532 2231 2763

The measure is relative risk:

1.093362 [0.8916253-1.340744]

Smokers have 1.09 times higher risk to have diabetes, but it is not significant, as 1 is inside the 95% CI. It makes sense, and it is crude measure, not adjusted for possible confounders.

Diabetes No Diabetes Total
African-American 99 456 555
Hispanic 101 439 540
White 332 1336 1668
Total 532 2231 2763

There are no significant differences.  Even Chi-squared test show the same not significant result: p-value = 0.5287 

Diabetes (n=532) No Diabetes (n=2231)
Drink (n=198) No Drink (n=334) Drink (n=949) No Drink (n=1282)
Age (mean, 95% CI) 65.74 [64.84-66.64] 67.19 [66.47-67.91] 66.55 [66.13-66.96] 66.7 2[66.35-67.10]
BMI (mean, 95% CI) 28.51 [27.76-29.27] 28.50 [27.93-29.06] 28.66 [28.30-29.02] 28.55 [28.25-28.85]

 There are no significant differences, as the 95% confidence intervals are overlapping.

Proportions

tbl=table(mydata$smoke, mydata$diabetes)

tbl <- rbind(tbl, colSums(tbl))

res<-cbind(tbl,rowSums(tbl),prop.table(tbl,1))

colnames(res) <- c(‘Diabetes No’,’Diabetes yes’, ‘Total’, ‘Prop No’, ‘Prop yes’)

rownames(res) <- c(‘Smoke No’,’Smoke yes’, ‘Total’)

res

     0   1              0         1

0 1895 444 2339 0.8101753 0.1898247

1  336  88  424 0.7924528 0.2075472

  2231 532 2763 0.8074557 0.1925443

rr<-tbl[2,2]/( tbl[2,2]+ tbl[2,1]) / (tbl[1,2]/( tbl[1,2]+ tbl[1,1]))

lrr <- log(tbl[2,2]/(tbl[2,2]+tbl[2,1]) / (tbl[1,2]/(tbl[1,2]+tbl[1,1]) ) )
vlrr <- 1/tbl[2,2] – 1/(tbl[2,2]+tbl[2,1]) + 1/tbl[1,2] – 1/(tbl[1,2]+tbl[1,1])

lower<-exp(lrr – 1.96*sqrt(vlrr))

upper<-exp(lrr + 1.96*sqrt(vlrr))

c (round(rr,2), ‘[‘,round(lower,2), ‘-‘,round(upper,2),’]’) 

tbl=table(mydata$race, mydata$diabetes)

tbl <- rbind(tbl, colSums(tbl))

res<-cbind(tbl,rowSums(tbl),prop.table(tbl,1))

colnames(res) <- c(‘Diabetes No’,’Diabetes yes’, ‘Total’, ‘Prop No’, ‘Prop yes’)

rownames(res) <- c(‘African-American’,’Hispanic’, ‘White’,’Total’)

res 

                 Diabetes No Diabetes yes Total   Prop No  Prop yes

African-American         456           99   555 0.8216216 0.1783784

Hispanic                 439          101   540 0.8129630 0.1870370

White                   1336          332  1668 0.8009592 0.1990408

Total                   2231          532  2763 0.8074557 0.1925443 

rr<-tbl[2,2]/( tbl[2,2]+ tbl[2,1]) / (tbl[1,2]/( tbl[1,2]+ tbl[1,1]))

lrr <- log(tbl[2,2]/(tbl[2,2]+tbl[2,1]) / (tbl[1,2]/(tbl[1,2]+tbl[1,1]) ) )
vlrr <- 1/tbl[2,2] – 1/(tbl[2,2]+tbl[2,1]) + 1/tbl[1,2] – 1/(tbl[1,2]+tbl[1,1])

lower<-exp(lrr – 1.96*sqrt(vlrr))

upper<-exp(lrr + 1.96*sqrt(vlrr))

c (round(rr,2), ‘[‘,round(lower,2), ‘-‘,round(upper,2),’]’)

rr<-tbl[3,2]/( tbl[3,2]+ tbl[3,1]) / (tbl[1,2]/( tbl[1,2]+ tbl[1,1]))

lrr <- log(tbl[3,2]/(tbl[3,2]+tbl[3,1]) / (tbl[1,2]/(tbl[1,2]+tbl[1,1]) ) )
vlrr <- 1/tbl[3,2] – 1/(tbl[3,2]+tbl[3,1]) + 1/tbl[1,2] – 1/(tbl[1,2]+tbl[1,1])

lower<-exp(lrr – 1.96*sqrt(vlrr))

upper<-exp(lrr + 1.96*sqrt(vlrr))

c (round(rr,2), ‘[‘,round(lower,2), ‘-‘,round(upper,2),’]’)