Hi!
I have a data set with more than 17000 observations which contain many NAs. I compare different values from different columns with each other.
Although ggplot removes the incomplete rows, ggplot2 writes "NA" into the legend. For that reason, I'm trying to remove the NAs from my geom_point, but only for the values I'm using in the specific graph. I tried it with "na.rm", "na.omit", "!is.na", "complete.cases", and with "filter", but nothing worked until now.
Here is my code:
library(ggplot2)
library(dplyr)
library(tidyverse)
#sample data
Vergleich <- data.frame(
mean_hh_wohnbdl = c(6,6,3,3,3,1,9,1,7,2,4,9,3,3,9),
gleich_entf_fakt = c(16.00,66.67,50.00,14.29,42.86,17.14,33.33,116.67,200.00,20.00,66.67,40.00,70.00,NA,33.33)
)
Vergleich <- mutate(Vergleich,gleich_entf_kat = cut(gleich_entf_fakt,breaks = c(0,30,50,70,90,110,130,150,170,200000),labels = c("5","4","3","2","1","2","3","4","5")))
#geom_point
ggplot(Vergleich, aes(x=as.numeric(mean_hh_wohnbdl),y=as.numeric(gleich_entf_fakt))) +
geom_point(aes(color = factor(gleich_entf_kat)),na.rm=TRUE) +
scale_color_manual(values=c("red4","red3","orange","green3","green4"),labels=c("sehr schlecht","schlecht","mäßig","gut","sehr gut")) +
scale_x_continuous(breaks = 1:9,name="Bundesland",labels = c("B","K","NÖ","OÖ","S","ST","T","V","W")) +
scale_fill_manual(breaks = 1:9,name="Bundesland",labels = c("B","K","NÖ","OÖ","S","ST","T","V","W")) +
labs(title = "Übereinstimmung nach Bundesland",
x = "Bundesland",
y = "Übereinstimmungsgrad",
color = "gleich_entf_kat") +
theme(plot.title = element_text(hjust = 0.5,face = "bold")) +
ggsave("04.01-Uebereinstimmung_Bundesland.png")
And my graph with NA in the legend:
Hope I was clear enough by explaining my problem.
Thanks in advance.