The function works for me with invented data. Please post the output
summary(DF)
where you replace DF with the data you are using.
Here is what I ran.
#Invent data
DF <- data.frame(Ref.= rep("A",173),`Units[b]` = rep("B",173),
`Country or region` = rep("C",173),
date = rep("D",173),tested = rep("1,234",173),
confirmed = rep("2,345",173),
confirmed.tested.ratio = rep("3,456",173),
tested.population.ratio=rep("4,567",173),
confirmed.population.ratio = rep("7,234",173),check.names = FALSE)
head(DF)
#> Ref. Units[b] Country or region date tested confirmed confirmed.tested.ratio
#> 1 A B C D 1,234 2,345 3,456
#> 2 A B C D 1,234 2,345 3,456
#> 3 A B C D 1,234 2,345 3,456
#> 4 A B C D 1,234 2,345 3,456
#> 5 A B C D 1,234 2,345 3,456
#> 6 A B C D 1,234 2,345 3,456
#> tested.population.ratio confirmed.population.ratio
#> 1 4,567 7,234
#> 2 4,567 7,234
#> 3 4,567 7,234
#> 4 4,567 7,234
#> 5 4,567 7,234
#> 6 4,567 7,234
preprocess_covid_data_frame <- function(data_frame) {
shape <- dim(data_frame)
# Remove the World row
data_frame<-data_frame[!(data_frame$`Country or region`=="World"),]
# Remove the last row
data_frame <- data_frame[1:172, ]
# We dont need the Units and Ref columns, so can be removed
data_frame["Ref."] <- NULL
data_frame["Units[b]"] <- NULL
# Renaming the columns
names(data_frame) <- c("country", "date", "tested", "confirmed", "confirmed.tested.ratio", "tested.population.ratio", "confirmed.population.ratio")
# Convert column data types
data_frame$country <- as.factor(data_frame$country)
data_frame$date <- as.factor(data_frame$date)
data_frame$tested <- as.numeric(gsub(",","",data_frame$tested))
data_frame$confirmed <- as.numeric(gsub(",","",data_frame$confirmed))
data_frame$'confirmed.tested.ratio' <- as.numeric(gsub(",","",data_frame$`confirmed.tested.ratio`))
data_frame$'tested.population.ratio' <- as.numeric(gsub(",","",data_frame$`tested.population.ratio`))
data_frame$'confirmed.population.ratio' <- as.numeric(gsub(",","",data_frame$`confirmed.population.ratio`))
return(data_frame)
}
OUT <- preprocess_covid_data_frame(DF)
summary(OUT)
#> country date tested confirmed confirmed.tested.ratio
#> C:172 D:172 Min. :1234 Min. :2345 Min. :3456
#> 1st Qu.:1234 1st Qu.:2345 1st Qu.:3456
#> Median :1234 Median :2345 Median :3456
#> Mean :1234 Mean :2345 Mean :3456
#> 3rd Qu.:1234 3rd Qu.:2345 3rd Qu.:3456
#> Max. :1234 Max. :2345 Max. :3456
#> tested.population.ratio confirmed.population.ratio
#> Min. :4567 Min. :7234
#> 1st Qu.:4567 1st Qu.:7234
#> Median :4567 Median :7234
#> Mean :4567 Mean :7234
#> 3rd Qu.:4567 3rd Qu.:7234
#> Max. :4567 Max. :7234
Created on 2022-01-02 by the reprex package (v2.0.1)