Can't remove redundant level names

I have this really weird excel file which seems to show lots of bugs and when imported as a data frame into Posit there is a certain issue I can't resolve. For one of the variables of the data frame it gives me two factor levels with exactly the same name (there are no differences in spacing as trimws() didn't help) and it falsifies further analyses and plots. I have tried merging the two levels with forcats::fct_collapse() but it didn't work. They are still separate levels afterwards when looking at the levels with unique(factor(df$Variable)). I feel like this is not the right command for that, but levels(df$Variable) gives output NULL.

I guess a possible reason for why Posit treats the two levels differently is because originally they were separate and then two levels were combined into the one new level. But after the merge nothing in the new data frame structure which I can observe is hinting at the original separation.

Can you work with the given informations? I really don't know how I can give you the necessary files/data frame structure so that you can see what's going on.

To help us help you, could you please prepare a reproducible example (reprex) illustrating your issue? Please have a look at this guide, to see how to create one:

Note: Using dput() to provide sample data should preserve any weird data structure is giving you troubles

Yes, of course! I had to cut down some variable levels, but I hope this works:

df =structure(list(Fraktion = structure(c(2L, 7L, 2L, 7L, 2L, 7L, 
2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 
2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 
2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 
2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 
2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 
2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 
2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 
2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 
2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 
2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 
2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 
2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 2L, 7L, 
2L, 7L, 2L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 
1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 
1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 
1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 
1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 
1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 1L, 7L, 7L, 1L, 7L, 1L, 7L, 
1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 
1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 
1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 
1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 
1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 
1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 
1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L, 1L, 7L), levels = c("Die Mitte", 
"Die Mitte", "FDP", "GLP", "Grüne", "SP", "SVP"), class = "factor"), 
    VoteRegistrationNumber = c("24692", "20249", "24691", "20251", 
    "24581", "20261", "24471", "20264", "24429", "20265", "24388", 
    "20281", "24288", "20283", "24274", "20287", "24273", "20291", 
    "24197", "20300", "24166", "20314", "24164", "20319", "24161", 
    "20336", "24151", "20338", "24148", "20410", "24145", "20424", 
    "24130", "20428", "24126", "20429", "24121", "20610", "24118", 
    "21602", "24117", "21714", "24109", "21744", "24104", "21750", 
    "24102", "21771", "24069", "21778", "23878", "21811", "23765", 
    "21814", "23573", "21816", "23421", "21824", "23390", "21967", 
    "23264", "22082", "23108", "22085", "23088", "22089", "23086", 
    "22093", "23081", "22346", "23080", "22409", "23061", "22410", 
    "22781", "22411", "22668", "22419", "22667", "22422", "22665", 
    "22435", "22624", "22436", "22619", "22439", "22616", "22440", 
    "22614", "22447", "22516", "22451", "22507", "22458", "22495", 
    "22462", "22494", "22465", "22491", "22467", "22467", "22491", 
    "22465", "22494", "22462", "22495", "22458", "22507", "22451", 
    "22516", "22447", "22614", "22440", "22616", "22439", "22619", 
    "22436", "22624", "22435", "22665", "22422", "22667", "22419", 
    "22668", "22411", "22781", "22410", "23061", "22409", "23080", 
    "22346", "23081", "22093", "23086", "22089", "23088", "22085", 
    "23108", "22082", "23264", "21967", "23390", "21824", "23421", 
    "21816", "23573", "21814", "23765", "21811", "23878", "21778", 
    "24069", "21771", "24102", "21750", "24104", "21744", "24109", 
    "21714", "24117", "21602", "24118", "20610", "24121", "20429", 
    "24126", "20428", "24130", "20424", "24145", "20410", "24148", 
    "20338", "24151", "20336", "24161", "20319", "24164", "20314", 
    "24166", "20300", "24197", "20291", "24273", "20287", "24274", 
    "20283", "24288", "20281", "24388", "20265", "24429", "20264", 
    "24471", "20261", "24581", "20251", "24691", "20249", "24692", 
    "22655", "22655", "20028", "20028", "20056", "20056", "24228", 
    "24228", "21618", "21618", "20090", "20090", "20163", "20163", 
    "20205", "20205", "20360", "20360", "20391", "20391", "20475", 
    "20475", "20565", "20565", "20700", "20700", "21798", "21798", 
    "20609", "20609", "24675", "24675", "21036", "21036", "22173", 
    "22173", "23207", "23207", "23260", "23260", "20685", "20685", 
    "22576", "22576", "20719", "20719", "22556", "22556", "20791", 
    "20791", "23364", "23364", "20848", "20848", "20869", "20869", 
    "20927", "20927", "20997", "20997", "21049", "21049", "21079", 
    "21079", "21091", "21091", "21112", "21112", "21148", "21148", 
    "21183", "21183", "22280", "22280", "23724", "23724", "21146", 
    "21146", "20858", "20858", "21191", "21191", "21197", "21197", 
    "21586", "21586", "24657", "24659", "24657", "24659", "21199", 
    "21199", "23513", "23513", "21205", "21205", "22164", "22164", 
    "21265", "21265", "21227", "21227", "23687", "23687", "21254", 
    "21254", "21304", "21304", "21369", "21369", "21449", "21449", 
    "21521", "21521", "21567", "21567", "21869", "21869", "21901", 
    "21901", "22298", "22298", "23346", "23346", "24329", "24329", 
    "23738", "23738", "22304", "22304", "22314", "22314", "22317", 
    "22317", "22611", "22611", "24688", "24688", "22722", "22722", 
    "22838", "22838", "22144", "22144", "23033", "23033", "23044", 
    "23044", "23057", "23057", "23173", "23173", "23206", "23206", 
    "23387", "23387", "23600", "23600", "23643", "23643", "24505", 
    "24505", "23644", "23644", "23675", "23675", "23676", "23676", 
    "23829", "23829", "23875", "23875", "24076", "24076", "24220", 
    "24220", "22373", "22373", "24229", "24229", "24266", "24266", 
    "24272", "24272", "24338", "24338", "22130", "22130", "24393", 
    "24393", "24397", "24397", "24601", "24601", "24770", "24770", 
    "24774", "24774", "24714", "24714"), AI = c(1, 0.94545455, 
    1, 1, 1, 1, 0.84482759, 0.97272727, 1, 0.97272727, 0.5862069, 
    1, 1, 0.89090909, 0.85, 0.8909091, 1, 1, 1, 1, 0.8965517, 
    1, 0.9482759, 0.83636364, 0.8448276, 0.8055556, 1, 0.77358491, 
    1, 1, 1, 0.75, 0.51785714, 0.80555556, 0.88888889, 0.13888889, 
    0.42307692, 0.2358491, 0.76, 1, 1, 1, 1, 0.6666667, 0.88461538, 
    1, 0.88461538, 1, 0.94444444, 1, 0.637931, 0.96938776, 1, 
    0.7244898, 0.7857143, 1, 1, 1, 0.9423077, 0.9189189, 0.2741935, 
    1, 1, 1, 0.4, 1, 0.65, 1, 1, 0.9722222, 0.7857143, 0.7545455, 
    1, 0.9444444, 0.85, 0.9722222, 0.9, 0.6181818, 0.85, 0.2924528, 
    1, 0.8055556, 0.9, 1, 0.8, 0.8888889, 0.7580645, 0.7818182, 
    0.7096774, 1, 1, 1, 0.95, 1, 1, 0.9727273, 0.9482759, 0.9166667, 
    0.9482759, 1, 1, 0.9166667, 1, 0.4339623, 1, 0.3207547, 0.6785714, 
    0.75, 1, 0.79807692, 1, 1, 0.9482759, 1, 0.9482759, 1, 0.8448276, 
    0.9150943, 0.2758621, 0.3055556, 1, 1, 1, 0.9150943, 0.7931034, 
    1, 1, 0.97, 1, 1, 0.4193548, 0.2058824, 0.6129032, 0.9411765, 
    0.9516129, 1, 1, 0.625, 0.7096774, 1, 1, 0.6170213, 1, 0.3942308, 
    0.75806452, 0.9361702, 0.90322581, 0.2641509, 1, 1, 1, 1, 
    1, 1, 0.8965517, 1, 0.8448276, 1, 0.7857143, 0.97058824, 
    0.4827586, 1, 0.9482759, 0.96938776, 1, 1, 1, 1, 1, 0.79, 
    1, 0.65625, 0.9, 0.41836735, 1, 1, 0.8888889, 0.9166667, 
    1, 0.9444444, 0.6612903, 1, 1, 0.6111111, 0.7580645, 0.9722222, 
    1, 0.93877551, 1, 1, 0.7096774, 0.74528302, 0.8548387, 0.97169811, 
    0.9032258, 1, 0.8, 1, 0.9516129, 1, 1, 1, 1, 0.972222222222222, 
    0.370967741935484, 0.916666666666667, 1, 1, 1, 0.73, 1, 1, 
    0.758064516129032, 1, 0.903225806451613, 1, 1, 1, 1, 1, 1, 
    0.688679245283019, 0.844827586206897, 1, 1, 1, 1, 1, 0.777777777777778, 
    0.972222222222222, 0.826923076923077, 1, 0.4, 0.941176470588235, 
    0.95, 0.911764705882353, 0.482758620689655, 1, 1, 0.938775510204082, 
    0.85, 1, 1, 0.882352941176471, 0.95, 1, 0.946428571428571, 
    0.970588235294118, 0.5, 0.972222222222222, 0.7, 0.80188679245283, 
    1, 1, 0.948275862068966, 1, 1, 1, 0.5, 1, 0.942307692307692, 
    1, 1, 0.972727272727273, 0.948275862068966, 1, 1, 0.945454545454545, 
    1, 0.641304347826087, 0.951612903225806, 0.94, 0.951612903225806, 
    0.913461538461538, 0.637931034482759, 0.235294117647059, 
    0.95, 0.944444444444444, 0.793103448275862, 0.861111111111111, 
    1, 0.941176470588235, 0.55, 1, 0.769230769230769, 1, 1, 0.571428571428571, 
    1, 1, 1, 1, 1, 0.73, 1, 1, 1, 0.971153846153846, 1, 1, 0.467741935483871, 
    1, 0.482758620689655, 1, 1, 1, 1, 1, 1, 0.972222222222222, 
    0.85, 1, 0.661290322580645, 0.972727272727273, 0.277777777777778, 
    0.888888888888889, 0.75, 0.73, 0.661290322580645, 1, 1, 0.971153846153846, 
    1, 0.823529411764706, 0.732142857142857, 1, 0.8, 0.970588235294118, 
    0.854838709677419, 1, 0.95, 1, 1, 1, 1, 0.943396226415094, 
    1, 1, 0.758064516129032, 1, 0.946428571428571, 1, 1, 0.855769230769231, 
    1, 1, 0.75, 0.943396226415094, 0.944444444444444, 0.911764705882353, 
    1, 1, 0.85, 0.80188679245283, 1, 0.970588235294118, 1, 0.96875, 
    1, 0.638888888888889, 0.347826086956522, 0.625, 1, 1, 1, 
    0.852941176470588, 0.896551724137931, 0.852941176470588, 
    1, 0.716981132075472, 1, 1, 1, 1, 0.948275862068966, 1, 0.785714285714286, 
    1, 0.379310344827586, 0.970588235294118, 0.951612903225806, 
    1, 0.854838709677419, 1, 0.839285714285714, 1, 1, 1, 1, 1, 
    0.833333333333333, 1, 0.85, 1, 1, 1, 0.9, 0.941176470588235, 
    1, 0.97)), row.names = c(NA, -400L), class = c("tbl_df", 
"tbl", "data.frame"))

Your dput created a structure with explicit duplicated factor level, as you can see in the middle of your code: levels = c("Die Mitte", "Die Mitte", "FDP", "GLP", "Grüne", "SP", "SVP"). It is hard to say how this happened, without following your steps from the Excel file. How did you import it? Did you use readxl package?

The simplest way of correcting factor levels is to convert the variable into character and then back into factor:

df |>
  mutate(Fraktion = as.factor(as.character(Fraktion)))

But you should check if the values in this column correspond exactly to what you have in the Excel file.

There could be invisible unicode which would get lost in the rendering of the text to the forum...
Try this
assuming your data.frame is called df

(lf <- levels(df$Fraktion))
lapply(lf,charToRaw)

I expect that the two Die Mitte entries will have different charToRaw values in their level strings

This topic was automatically closed 42 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.