Hi!
I have a data set with more than 17000 observations which contain many NA
s. I compare different values from different columns with each other.
Although ggplot removes the incomplete rows, ggplot2 writes "NA
" into the legend. For that reason, I'm trying to remove the NAs from my geom_point, but only for the values I'm using in the specific graph. I tried it with "na.rm
", "na.omit
", "!is.na
", "complete.cases
", and with "filter
", but nothing worked until now.
Here is my code:
library(ggplot2)
library(dplyr)
library(tidyverse)
#sample data
Vergleich <- data.frame(
mean_hh_wohnbdl = c(6,6,3,3,3,1,9,1,7,2,4,9,3,3,9),
gleich_entf_fakt = c(16.00,66.67,50.00,14.29,42.86,17.14,33.33,116.67,200.00,20.00,66.67,40.00,70.00,NA,33.33)
)
Vergleich <- mutate(Vergleich,gleich_entf_kat = cut(gleich_entf_fakt,breaks = c(0,30,50,70,90,110,130,150,170,200000),labels = c("5","4","3","2","1","2","3","4","5")))
#geom_point
ggplot(Vergleich, aes(x=as.numeric(mean_hh_wohnbdl),y=as.numeric(gleich_entf_fakt))) +
geom_point(aes(color = factor(gleich_entf_kat)),na.rm=TRUE) +
scale_color_manual(values=c("red4","red3","orange","green3","green4"),labels=c("sehr schlecht","schlecht","mäßig","gut","sehr gut")) +
scale_x_continuous(breaks = 1:9,name="Bundesland",labels = c("B","K","NÖ","OÖ","S","ST","T","V","W")) +
scale_fill_manual(breaks = 1:9,name="Bundesland",labels = c("B","K","NÖ","OÖ","S","ST","T","V","W")) +
labs(title = "Übereinstimmung nach Bundesland",
x = "Bundesland",
y = "Übereinstimmungsgrad",
color = "gleich_entf_kat") +
theme(plot.title = element_text(hjust = 0.5,face = "bold")) +
ggsave("04.01-Uebereinstimmung_Bundesland.png")
And my graph with NA
in the legend:
Hope I was clear enough by explaining my problem.
Thanks in advance.