I'm trying to get a side by side bar chart of gender with the percentage as the y-axis instead of the counts. I can do the counts but can't seem to get it manipulated to percent.
#library(tidyverse) # may or may not need.
library(dplyr)
library(ggplot2)
## Data for national data set.
df_1 <- data.frame(
categorical = c("A","B","C","A","B","A","C","C","C","A","A","C","C","C","A","C","A","B","A","C"),
indicator1 = c(1,0,1,NA,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1),
indicator2 = c(1,1,1,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0),
indicator3 = c(1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,1,0),
indicator4 = c(0,1,0,0,1,0,0,0,0,0,1,1,0,NA,1,0,0,0,0,0),
indicator5 = c(0,0,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0),
gender = c("M","M",NA,"F","F","F","F","F","M","U","U","F","M","M","F","F","F","U","M","F"),
continuous1 = c(2.3,3.4,6.6,5.5,6,7,11,12.3,13,5,2.4,3.6,6.3,5.2,5,6.6,11.3,12,14,5))
print(df_1)
summary(df_1)
#set missing as zeros
df_1[is.na(df_1)] <- 0
df_1$audience = 0
print(df_1)
## Data for client data set.
df_2 <- data.frame(
categorical = c("A","B","C","A","B","A","C","C","C","A"),
indicator1 = c(1,1,1,1,1,1,1,0,0,0),
indicator2 = c(1,1,1,1,NA,1,1,1,1,1),
indicator3 = c(0,NA,1,0,1,0,0,0,0,0),
indicator4 = c(1,1,1,1,1,0,0,0,0,1),
indicator5 = c(1,1,1,1,1,0,1,0,1,1),
continuous1 = c(2.3,3.4,6.6,5.5,6,7,11,12.3,13,5),
gender = c("M","F","F","F","F","M","M","M","F","F"))
print(df_2)
summary(df_2)
#set missing as zeros
df_2[is.na(df_2)] <- 0
df_2$audience = 1
print(df_2)
# Combine 2 data frames
#### combine rows of df
df1_df2 = rbind(df_1,df_2)
print(df1_df2)
# this works except the y-axis is the count and I want the percent.
# So all the blacks would add to 100%, and all the orange would add to 100%
a <- ggplot(data = df1_df2, aes(x = factor(gender), fill = factor(audience))) +
scale_fill_manual(values=c("#0B2632","#FF5D00"))+
geom_bar(stat="count", position = position_dodge(preserve="single"))
a
# this is what I attempted but it doesn't work. And I'm unsure if it would be the right thing anyways.
percent <- ddply(df1_df2, "gender", transform,
percent_gender = (count/sum(count))*100)
percent
# error:
#Error in sum(count) : invalid 'type' (closure) of argument
a <- ggplot(data = percent, aes(x = factor(gender), y=percent_gender, fill = factor(audience))) +
scale_fill_manual(values=c("#0B2632","#FF5D00"))+
geom_bar(stat="identity", position = position_dodge)
a