ggplot Pie Chart Help

Hi,

I am analyzing the name "Liam" by ethnicity in New York. I exported the following data set into excel and imported into R.
https://data.cityofnewyork.us/Health/Popular-Baby-Names/25th-nujf/data

Then, I attempted to create a pie chart. The pie chart percentages do not correspond to their respective pie slices. How should I edit my code?
I have posted my code below. Thanks for your help.

NY_Popular_Baby_Names <- read_excel("NY_Popular_Baby_Names.xlsx")
View(NY_Popular_Baby_Names)

library(ggplot2)
library(dplyr)
library(scales)
library(tidyr)
library(magrittr)

sequence_of_years_NY<- seq(from = 2000, to = 2018, by = 1)
NY_Popular_Baby_Names.df <- NY_Popular_Baby_Names
colnames(NY_Popular_Baby_Names.df) <- c("year","gender","ethnicity","name","count","rank")
NY_Popular_Baby_Names.df$total <- sum(NY_Popular_Baby_Names.df$count)
NY_Popular_Baby_Names.df$perc <- NY_Popular_Baby_Names.df$count/NY_Popular_Baby_Names.df$total

Liam_NY.df <- NY_Popular_Baby_Names.df[which(NY_Popular_Baby_Names.df$name == "Liam"),]
Liam_2013.df <- Liam_NY.df[which(Liam_NY.df$year == 2013),]
Liam_2013.df <- Liam_2013.df[-c(1,6),]
Liam_2013.df$total <- sum(Liam_2013.df$count)
Liam_2013.df$perc <- Liam_2013.df$count/Liam_2013.df$total

blank_theme <- theme_minimal()+
  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    panel.border = element_blank(),
    panel.grid=element_blank(),
    axis.ticks = element_blank(),
    plot.title=element_text(size=14, face="bold")
  )

ggplot(Liam_2013.df, aes(x="", y=perc, fill=ethnicity))+
  geom_bar(width = 1, stat = "identity") +
  coord_polar("y", start=0) + scale_fill_brewer("Blues")+
  blank_theme  + theme(axis.text.x=element_blank())+
  geom_text(aes(y = perc, label = percent(perc)), size=5) +
  labs(title="William in New York by Ethnicity", x = "", y="Percentage of Total",
  caption="Source: Data from the New York Department of Health")

Hi @liam.monahan.tx, could you post a sample of the contents of Liam_2013.df by pasting the output of dput(Liam_2013.df %>% head(50)) here? That would help folks reproduce your context more easily.

    gender     ethnicity                                          name  count     total   perc

1 MALE ASIAN AND PACIFIC ISLANDER Liam 72 498 0.1445783
2 MALE BLACK NON HISPANIC Liam 94 498 0.1887550
3 MALE HISPANIC Liam 192 498 0.3855422
4 MALE WHITE NON HISPANIC Liam 140 498 0.2811245

Thanks, but that's an image, not the output of the command dput(Liam_2013.df %>% head(50)) -- could you paste that output here, between a pair of triple backticks (```), like this?

```
<--- paste output of dput(Liam_2013.df %>% head(50)) here
```

Is this what you are trying to accomplish?

library(tidyverse)
library(scales)

Liam_2013.df <- data.frame(
    row.names = c("9369", "9865","10652","11419"),
    year = c(2013L, 2013L, 2013L, 2013L),
    count = c(72L, 94L, 192L, 140L),
    rank = c(10L, 10L, 11L, 21L),
    total = c(498L, 498L, 498L, 498L),
    perc = c(0.144578313253012,0.188755020080321,
             0.385542168674699,0.281124497991968),
    gender = as.factor(c("MALE","MALE","MALE",
                         "MALE")),
    ethnicity = as.factor(c("ASIAN AND PACIFIC ISLANDER",
                            "BLACK NON HISPANIC","HISPANIC",
                            "WHITE NON HISPANIC")),
    name = as.factor(c("Liam","Liam","Liam",
                       "Liam"))
)

blank_theme <- theme_minimal()+
    theme(
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        panel.border = element_blank(),
        panel.grid = element_blank(),
        axis.ticks = element_blank(),
        plot.title = element_text(size = 14, face = "bold")
    )

ggplot(Liam_2013.df, aes(x = "", y = perc, fill = ethnicity))+
    geom_bar(width = 1, stat = "identity") +
    coord_polar("y", start = 0) +
    scale_fill_brewer("Blues")+
    blank_theme  +
    theme(axis.text.x=element_blank())+
    geom_text(aes(x = 1.1, label = percent(perc)),
              position = position_stack(vjust = 0.5),
              size = 5) +
    labs(title = "William in New York by Ethnicity",
         x = "",
         y = "Percentage of Total",
         caption = "Source: Data from the New York Department of Health")

Created on 2020-03-15 by the reprex package (v0.3.0.9001)

2 Likes

Is this what you had in mind?

structure(list(year = c(2013L, 2013L, 2013L, 2013L), count = c(72L, 
94L, 192L, 140L), rank = c(10L, 10L, 11L, 21L), total = c(498L, 
498L, 498L, 498L), perc = c(0.144578313253012, 0.188755020080321, 
0.385542168674699, 0.281124497991968), gender = structure(c(1L, 
1L, 1L, 1L), .Label = "MALE", class = "factor"), ethnicity = structure(1:4, .Label = c("ASIAN AND PACIFIC ISLANDER", 
"BLACK NON HISPANIC", "HISPANIC", "WHITE NON HISPANIC"), class = "factor"), 
    name = structure(c(1L, 1L, 1L, 1L), .Label = "Liam", class = "factor")), row.names = c("9369", 
"9865", "10652", "11419"), class = "data.frame")

Exactly! Recreating a sample of the data in copy-paste-friendly form, as @andresrcs did for yours, is very helpful for folks who'd like to help you. :slight_smile:

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.