Hi All! I've been asked to "create a bar chart of marijuana use by age group and sex with side-by-side bars." I've been able to create a bar chart of marijuana use by age group, but I cannot figure out how to subdivide each age group by gender (on top of the fact it's already divided by "yes/no" use). I have included the questions leading up to that question, but the issue is number 7. Thank you!!
################PROLOG#####################
# Project: Chapter 1 Coder Exercises
# Purpose: Practice R Studio
# Author: Camille
# Edit date: 09/08/2021
# Data: 2013-2014 NHANES data, codebook at
# https://www.cdc.gov/nchs/nhanes/index.htm
###########################################
# 1) Open the 2013-2014 NHANES data file saved as nhanes_2013_ch1.csv
# Choose one of the ways of importing data and delete the others
# import method 1
# bring in directly from the internet (most reproducible but also most time consuming during the import)
library(package = "data.table")
temp <- tempfile(fileext = ".zip")
download.file(url = "https://edge.sagepub.com/system/files/datasets_7.zip", destfile = temp)
nhanes <- read.csv(unz(temp, "Datasets/data/nhanes_2013_ch1.csv"))
# 2) Examine the data types for DUQ200, RIDAGEYR, and RIAGENDR, and
# fix data types if needed based on the NHANES codebook.
# DUQ200 = Have you ever, even once, used marijuana or hashish? (Categorical - Factor)
# Yes = 1, No = 2, Refused = 7, Don't Know = 9, and . = Missing
# RIDAGEYR = Age in years of the participant at the time of screening. (Numerical - Numeric)
# Individuals 80 and over are topcoded at 80 years of age.
# RIAGENDR = Gender of the participant (Categorical - Factor)
# Male = 1 and Female = 2
# change the variable type for the DUQ200 variable
nhanes$DUQ200 <- as.factor(x = nhanes$DUQ200)
# change the variable type for the RIDAGEYR variable
nhanes$RIDAGEYR <- as.numeric(x = nhanes$RIDAGEYR)
# change the variable type for the RIAGENDR variable
nhanes$RIAGENDR <- as.factor(x = nhanes$RIAGENDR)
# examine the variable types and summary to
# check the work
class(x = nhanes$DUQ200)
class(x = nhanes$RIDAGEYR)
class(x = nhanes$RIAGENDR)
summary(object = nhanes)
# 3) Based on the online NHANES codebook, code missing values
# appropriately for DUQ200, RIDAGEYR, and RIAGENDR.
# open tidyverse
library(tidyverse)
# use tidyverse pipe to change "don't know" and "refused" to NA and remove categories
nhanes.cleaned <- nhanes %>%
mutate(DUQ200 = as.factor(x = DUQ200)) %>%
mutate(DUQ200 = na_if(x = DUQ200, y = "9")) %>%
mutate(DUQ200 = na_if(x = DUQ200, y = "7")) %>%
mutate(DUQ200 = droplevels(x = DUQ200))
# check the summary
summary(object = nhanes.cleaned)
# 4) Create a bar chart showing the percentage of NHANES participants
# answering yes and no to marijuana use.
marijuana.use.bar <- nhanes.cleaned %>%
mutate(DUQ200 = recode_factor(.x = DUQ200,
'1' = "YES",
'2' = "NO")) %>%
drop_na(DUQ200) %>%
ggplot(aes(x = DUQ200,
y = 100 * (..count..) / sum(..count..),
fill = DUQ200)) +
geom_bar() +
scale_fill_manual(values = c("#78A678", "#7463AC"),
guide = "none") +
theme_minimal() +
labs(x = "Have you ever tried marijuana?",
y = "Percent of responses")
# show the chart
marijuana.use.bar
# 5) Recode age into a new variable called age.cat with 4
# categories: 18-29, 30-39, 40-49, 50-59.
nhanes.cleaned <- nhanes %>%
mutate(DUQ200 = as.factor(x = DUQ200)) %>%
mutate(DUQ200 = na_if(x = DUQ200, y = "9")) %>%
mutate(DUQ200 = na_if(x = DUQ200, y = "7")) %>%
mutate(DUQ200 = droplevels(x = DUQ200)) %>%
mutate(RIAGENDR = as.factor(x = RIAGENDR)) %>%
mutate(RIDAGEYR = as.numeric(x = RIDAGEYR)) %>%
mutate(age.cat = cut(x = RIDAGEYR,
breaks = c(-Inf, 29, 39, 49, Inf),
labels = c("18-29", "30-39", "40-49", "50-59")))
# check the summary
summary(object = nhanes.cleaned)
# 6) Create a bar chart of marijuana use by age group (% of total responses).
marijuana.use.bar <- nhanes.cleaned %>%
drop_na(DUQ200) %>%
drop_na(RIDAGEYR) %>%
mutate(DUQ200 = recode_factor(.x = DUQ200,
'1' = "YES",
'2' = "NO")) %>%
ggplot(aes(x = age.cat,
y = 100 * (..count..) / sum(..count..),
fill = DUQ200)) +
geom_bar(position = 'dodge') +
scale_fill_manual(values = c("#78A678", "#7463AC"),
name = "Have you tried marijuana?") +
theme_minimal() +
labs(x = "Age group (in years)",
y = "Percent of total responses")
# show the chart
marijuana.use.bar
# 7) now create a bar chart of marijuana use by age group and sex with side-by-side bars