-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathweek1_wcgs_r_lab_workbook.Rmd
More file actions
180 lines (117 loc) · 4.43 KB
/
week1_wcgs_r_lab_workbook.Rmd
File metadata and controls
180 lines (117 loc) · 4.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
---
title: "WCGS data lab workbook"
author: "Maureen Lahiff"
date: "March 6, 2020"
---
# this R Markdown file assumes you went through the Week 1 R Tutorial first
```{r set libraries and read in data, include=FALSE}
library(ggplot2)
library(dplyr)
library(readr)
library(epitools)
library(readr)
library(rmarkdown)
library(knitr)
data(wcgs)
```
```{r variables we made in the Week 1 R Tutorial, include=FALSE}
wcgs$dibpat0_fact <- factor(wcgs$dibpat0, ordered = TRUE, labels = c("A","B"))
wcgs$smoker0[wcgs$ncigs0 > 0] <- 1
wcgs$smoker0[wcgs$ncigs0 == 0] <- 0
wcgs$smoker0_fact <- factor(wcgs$smoker0, ordered = TRUE, labels = c("Non-Smoker","Smoker"))
wcgs$highsbp0[wcgs$sbp0 >= 140] <- 1
wcgs$highsbp0[wcgs$sbp0 < 140] <- 0
wcgs$heightcm0 <- round(wcgs$height0 * 2.54, digits = 2)
wcgs$weightkg0 <- round(wcgs$weight0/2.2, digits = 2)
wcgs$BMI0 <- round(wcgs$weightkg0/((wcgs$heightcm0/100)^2), digits = 1)
```
```{r average cholesterol for smokers and non-smokers }
# Question 1 to turn in
# average cholesterol at baseline for smokers and non-smokers
wcgs %>% group_by(smoker0_fact) %>% summarize(average = mean(sbp0))
```
```{r}
head(wcgs)
```
```{r recode behavioral patterns}
# create a factor variable out of the 4-level behavioral pattern variable, behpat0
wcgs$behpat0_fact <- factor(wcgs$behpat0,
ordered = TRUE,
labels = c("A", "some A", "mix A and B","B"))
wcgs$smoker_example <- factor(labels = c("A","B"), wcgs$smoker0)
```
```{r create bmi categories}
# cut gives us a lot of flexibility
# in our specifications of the interval endpoints
# the intervals here are the commonly used ones
wcgs$bmi_cat <- cut(wcgs$BMI0,
breaks = c(0, 18.5, 25.0, 30.0, Inf),
include.lowest = TRUE,
right = FALSE,
ordered_results = TRUE,
labels = c("underweight", "normal", "overweight", "obese") )
```
```{r sbp and BMI}
wcgs %>% group_by(bmi_cat) %>% summarize(blood_pressure = mean(sbp0))
```
```{r}
```
```{r}
# control (command)+alt-i
# command+shift+c to make comments
```
```{r be sure chd at 8 1/2 yr follow-up is a factor}
wcgs$chd69_fact <- factor(wcgs$chd69, labels = c("No CHD", "CHD"))
```
```{r tables }
# these are examples for additional tables
behpat_table <- table(wcgs$behpat0_fact)
addmargins(behpat_table)
round(prop.table(behpat_table), digits = 3)
# table for behavioral pattern and chd
# the proportion option 1 asks for "row percents" in the two-way table
# this is Question 2 to turn in
# making a two-way table chd and behavioral pattern
chd_behpat_table <- table(wcgs$behpat0_fact, wcgs$chd69)
addmargins(chd_behpat_table)
round(prop.table(chd_behpat_table, 1), digits = 3)
# making a two-way table chd and blood pressure
#
wcgs$highsbp0_fact <- factor(wcgs$highsbp0, labels = c("High Sbp", "Low Sbp"))
chd_highsbp_table <- table(wcgs$highsbp0_fact, wcgs$chd69)
addmargins(chd_highsbp_table)
round(prop.table(chd_highsbp_table, 1), digits = 3)
```
```{r Question 4}
```
```{r side by side bar charts for chd69 by behavior pattern group}
# this is Question 3 to turn in
wcgs %>% count(behpat0_fact, chd69) %>%
group_by(behpat0_fact) %>%
mutate(prop = n/sum(n)) %>%
ggplot(aes(x = behpat0_fact, y = prop, fill = chd69)) +
geom_bar(width = 0.5, stat = "identity", position = "dodge") +
labs(y = "percent", x = "behavior type", title = "Type A behavior and CHD at 8 1/2 year follow-up",
subtitle = "n = 3154 men at baseline") +
theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5))
```
```{r summary, histogram, and box plots }
# summary for BMI by CHD at 8 /12 yr follow-up
no_chd_summary <- summary(subset(wcgs, chd69 == "No CHD", select = BMI0))
chd_summary <- summary(subset(wcgs, chd69 == "CHD", select = BMI0))
no_chd_summary
chd_summary
#use facet wrap to get side by side histgrams for baseline BMI by CHD
ggplot(wcgs, aes(x = BMI0, y = ..density..)) +
geom_histogram(binwidth = 1, color = "black", fill = "purple") +
facet_wrap(.~wcgs$chd69) +
labs(title = "WCGS baseline BMI by CHD group",
subtitle = "men ages 39 to 59",
x = "BMI ",
y = "density" ) +
scale_x_continuous(limits = c(18.5, 40),
breaks = c(20, 25, 30, 35),
labels = c(20, 25, 30, 35)) +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
```