Difficulties to convert my dataframe (character to numeric)

Hi,

Need help to re organize my data. I don't know why, because there is no difference in excel but when i import my data to R, some of them are character and some are numeric.
I need to convert all in numeric except for dates (t) and country (i)

Looking for a global solution if it's possible, column by column is long ^^

Thanks :slight_smile:

structure(list(GEO/TIME = c("Union européenne - 28 pays", "Union européenne - 15 pays (1995-2004)",
"Zone euro (19 pays)", "Belgique", "Bulgarie", "Tchéquie", "Danemark",
"Allemagne (jusqu'en 1990, ancien territoire de la RFA)", "Estonie",
"Irlande", "Grèce", "Espagne", "France", "Croatie", "Italie",
"Chypre", "Lettonie", "Lituanie", "Luxembourg", "Hongrie", "Malte",
"Pays-Bas", "Autriche", "Pologne", "Portugal", "Roumanie", "Slovénie",
"Slovaquie", "Finlande", "Suède", "Royaume-Uni", "Islande", "Norvège",
"Suisse", "Monténégro", "Macédoine du Nord", "Serbie", "Turquie"
), 2000 = c(":", "19", "17.600000000000001", "23.800000000000001",
"15.1", "9.5", "22", "21.399999999999999", "24.100000000000001",
"19.199999999999999", "14.1", "21.199999999999999", "19.800000000000001",
":", "8.0999999999999996", "22.100000000000001", "15.1", "35.299999999999997",
"16.699999999999999", "11.699999999999999", "4.9000000000000004",
"20.600000000000001", ":", "9.1999999999999993", "7.5", "7.5",
"12.9", "8.1999999999999993", "27.100000000000001", "26.800000000000001",
"25.699999999999999", "19", "28.600000000000001", "20.5", ":",
":", ":", ":"), 2001 = c(":", "19", "17.5", "24.399999999999999",
"17.699999999999999", "9.6999999999999993", "24.100000000000001",
"20", "24.800000000000001", "20.899999999999999", "14.5", "22",
"20.699999999999999", ":", "8.3000000000000007", "23.600000000000001",
"14.9", "19.199999999999999", "16", "11.699999999999999", "8.5",
"20.800000000000001", ":", "9.5999999999999996", "7.9000000000000004",
"7.9000000000000004", "11.6", "8.5999999999999996", "27", "22.5",
"25.899999999999999", "19.100000000000001", "30.699999999999999",
"21.399999999999999", ":", ":", ":", ":"), 2002 = c("17.399999999999999",
"19.300000000000001", "17.699999999999999", "24.899999999999999",
"17.5", "9.9000000000000004", "25.100000000000001", "18.899999999999999",
"24.399999999999999", "22", "15.199999999999999", "23", "21.5",
"13.1", "8.5999999999999996", "25.699999999999999", "15.800000000000001",
"19", "16.199999999999999", "12.1", "8.1999999999999993", "21.600000000000001",
":", "10.199999999999999", "8.0999999999999996", "8", "12.300000000000001",
"8.8000000000000007", "26.899999999999999", "23.199999999999999",
"26.699999999999999", "20.600000000000001", "30.199999999999999",
"21.399999999999999", ":", ":", ":", ":"), 2003 = c("18.100000000000001",
"20", "18.600000000000001", "25.600000000000001", "17.699999999999999",
"10", "27.199999999999999", "20.300000000000001", "24.800000000000001",
"23.5", "15.699999999999999", "23.600000000000001", "22.199999999999999",
"13.199999999999999", "9.0999999999999996", "26.100000000000001",
"14.9", "19.800000000000001", "12.6", "13.1", "8.5", "23.800000000000001",
":", "11.6", "9.4000000000000004", "7.9000000000000004", "14.4",
"9.5999999999999996", "27.5", "23.899999999999999", "25.5", "23.399999999999999",
"27.5", "22.699999999999999", ":", ":", ":", ":"), 2004 = c("19.100000000000001",
"21", "19.600000000000001", "26.800000000000001", "17.899999999999999",
"10.4", "27.899999999999999", "21.100000000000001", "25.600000000000001",
"24.899999999999999", "17.5", "24.5", "22.699999999999999", "13.199999999999999",
"10", "25.699999999999999", "16.5", "21.600000000000001", "20.800000000000001",
"14.199999999999999", "10.199999999999999", "25.699999999999999",
"15.4", "12.800000000000001", "10.800000000000001", "8.6999999999999993",
"15.699999999999999", "10.4", "28.300000000000001", "24.699999999999999",
"26.300000000000001", "23.899999999999999", "28.199999999999999",
"23.699999999999999", ":", ":", ":", ":"), 2005 = c("19.600000000000001",
"21.399999999999999", "20", "27.199999999999999", "17.800000000000001",
"11", "28.5", "20.600000000000001", "27.699999999999999", "26.100000000000001",
"17.600000000000001", "26.100000000000001", "23.399999999999999",
"13.4", "10.800000000000001", "25.699999999999999", "16.899999999999999",
"22.399999999999999", "23", "14.5", "10.300000000000001", "24.5",
"15", "13.9", "11.1", "9.0999999999999996", "16.699999999999999",
"11.4", "28.600000000000001", "25.899999999999999", "26.800000000000001",
"24.899999999999999", "28.399999999999999", "24.199999999999999",
":", ":", ":", ":"), 2006 = c("20", "21.699999999999999", "20.300000000000001",
"27.899999999999999", "18.199999999999999", "11.4", "29.300000000000001",
"20.100000000000001", "27.399999999999999", "28.100000000000001",
"18.699999999999999", "26.399999999999999", "24", "13.6", "11.4",
"27.600000000000001", "17.399999999999999", "22", "20.5", "15",
"11.1", "24.800000000000001", "14.699999999999999", "14.9", "11.699999999999999",
"9.5999999999999996", "17.800000000000001", "11.9", "29", "25.899999999999999",
"27.600000000000001", "24", "28.199999999999999", "25.199999999999999",
":", "10.6", ":", "8.4000000000000004"), 2007 = c("20.5", "22.199999999999999",
"20.800000000000001", "28.100000000000001", "18.5", "11.6", "26",
"20.399999999999999", "27.5", "30.300000000000001", "19.100000000000001",
"26.800000000000001", "24.399999999999999", "13.4", "12", "29.699999999999999",
"18.5", "23.699999999999999", "22.699999999999999", "15.4", "11.699999999999999",
"25.5", "14.6", "15.699999999999999", "12", "9.9000000000000004",
"18.5", "11.9", "30", "26.399999999999999", "28.699999999999999",
"24.800000000000001", "29.199999999999999", "26.5", ":", "11.699999999999999",
":", "8.8000000000000007"), 2008 = c("21.199999999999999",
"22.800000000000001", "21.399999999999999", "28.399999999999999",
"18.899999999999999", "12.4", "26.300000000000001", "21.399999999999999",
"28.300000000000001", "31.699999999999999", "19.800000000000001",
"27", "24.800000000000001", "13.6", "12.699999999999999", "31",
"20.699999999999999", "25.300000000000001", "23.699999999999999",
"16.399999999999999", "12.1", "26.300000000000001", "15", "16.5",
"12.6", "10.699999999999999", "19", "12.300000000000001", "30.199999999999999",
"26.899999999999999", "28.699999999999999", "25.5", "30.199999999999999",
"28.5", ":", "11", ":", "9.5"), 2009 = c("22", "23.600000000000001",
"22.100000000000001", "29.399999999999999", "19.199999999999999",
"13.4", "26.899999999999999", "22.300000000000001", "30.199999999999999",
"32.600000000000001", "19.899999999999999", "27.399999999999999",
"25.899999999999999", "14.5", "12.800000000000001", "30.5", "21.399999999999999",
"25.5", "30.199999999999999", "16.899999999999999", "12.800000000000001",
"26.800000000000001", "16", "18.100000000000001", "13.1", "11.199999999999999",
"19.600000000000001", "13.4", "30.899999999999999", "27.600000000000001",
"30", "26.5", "30.699999999999999", "29.600000000000001", ":",
"12.5", ":", "10"), 2010 = c("22.800000000000001", "24.300000000000001",
"22.699999999999999", "30.699999999999999", "19.699999999999999",
"14.5", "27.5", "22.699999999999999", "30", "33.899999999999999",
"20.899999999999999", "28.399999999999999", "26.199999999999999",
"15.699999999999999", "13", "32.100000000000001", "22.600000000000001",
"26.899999999999999", "30.300000000000001", "17.100000000000001",
"14.199999999999999", "27.699999999999999", "16.199999999999999",
"19.399999999999999", "13.9", "11.9", "20.199999999999999", "15.1",
"31.600000000000001", "28.199999999999999", "31.600000000000001",
"26.300000000000001", "31.399999999999999", "28.800000000000001",
":", "13.300000000000001", "14.6", "10.5"), 2011 = c(23.7,
25.2, 23.5, 30.4, 20.1, 15.8, 27.9, 24.3, 31.3, 34.4, 22.2, 29.3,
26.7, 15.4, 13.2, 33.7, 23.6, 27.9, 31.7, 18, 15.1, 28, 16.3,
20.3, 15.5, 12.9, 21.6, 16.4, 32.5, 29.1, 33.2, 27.4, 32.1, 28.9,
16.4, 15, 15, 11.3), 2012 = c(24.6, 26.1, 24.2, 31.3, 20.7,
17, 28.6, 24.9, 32.1, 35.7, 22.9, 30, 27.7, 15.8, 13.9, 35, 25.2,
28.6, 33.4, 19, 16.6, 28.6, 16.9, 21.5, 16.7, 13.5, 23, 17, 32.8,
30.1, 34.6, 28.5, 33, 30.1, 17.6, 15.7, 15.8, 12.4), 2013 = c(25.4,
26.9, 25, 31.5, 22.2, 18.1, 29.1, 25.2, 32.3, 37.3, 24, 30.9,
28.9, 17, 14.4, 35.4, 27, 29.8, 35.2, 19.5, 17.9, 29.3, 17.7,
22.6, 17.6, 13.8, 24.4, 17.7, 33.6, 31.4, 35.6, 29.3, 34.2, 31.9,
18.4, 15.1, 16.8, 13), 2014 = c(26, 27.3, 25.3, 32.6, 23.6,
19.1, 29.8, 23.2, 32.6, 38, 24.6, 31.7, 29.8, 18.5, 15, 36.4,
26.9, 31.4, 39.6, 20.2, 19.4, 29.7, 27.4, 23.8, 19.7, 14.2, 25.1,
18.1, 34.7, 32.8, 36.6, 30.2, 36.1, 32.9, 20.2, 15.2, 17.7, 13.6
), 2015 = c(26.7, 28, 25.9, 32.7, 24.1, 19.8, 30.7, 23.8, 33.3,
39, 25.4, 32.1, 30.4, 19.7, 15.5, 36.4, 28.1, 33.2, 35.2, 20.9,
19.9, 30.5, 28.1, 24.4, 20.7, 15, 26.6, 18.9, 35.5, 34, 37.6,
31.7, 36.7, 34.2, 20, 16.2, 18.7, 14.6), 2016 = c(27.3, 28.6,
26.5, 33.2, 24.4, 20.6, 31.2, 24.4, 34.1, 39.5, 26.4, 32.7, 30.9,
20, 15.7, 37.6, 29.5, 34.1, 36.4, 20.6, 20.3, 31, 28.9, 25.2,
21.5, 15.1, 27.2, 19.7, 35.9, 35.3, 38.3, 33.4, 36.8, 35.4, 21.2,
17.9, 19.4, 16), 2017 = c(27.9, 29.2, 27.1, 35.6, 24.5, 21.4,
32.4, 24.8, 34.7, 40.4, 27.2, 33.2, 31.4, 20.6, 16.5, 38.1, 30,
34.8, 34.1, 20.9, 22.1, 32.1, 29.7, 26.3, 21.7, 15.3, 28.7, 20.7,
36.4, 36, 38.7, 35.3, 36.8, 36.8, 20.6, 18, 19.8, 16.6), 2018 = c(28.7,
29.9, 27.9, 36, 24.8, 21.7, 32.7, 25.2, 35.9, 40.5, 27.7, 34,
32.8, 22, 17.1, 39.4, 30.1, 36.1, 38.3, 21.7, 24.6, 33, 30.1,
27.2, 22.5, 15.5, 28.7, 22, 37.3, 37.1, 39.3, 36.5, 37.5, 37.7,
20.3, 17.6, 20.4, 17.3)), row.names = c(NA, -38L), class = c("tbl_df",
"tbl", "data.frame"))

There are colons, :, embedded among the numbers. This will cause that column to be interpreted as text. Are those intentional or are they there by mistake?

Probably a mistake ^^

You can convert columns using mutate_if(is.character, as numeric). This does throw warnings if there are typos though, so you might want to use suppressMessages().

Alternatively you can read all your data in as character and clean it up in R. read_excel(col_types = cols(.default = "c")

Yes it's work ! Thanks :slight_smile: , but is it possible to select columns (because countries become N/A) ?

df %>% mutate_at(setdiff(names(df), c("countries")), as.numeric)

It works ! Thanks :slight_smile:

1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.