Replacing NA with column mean

I have a data set column which contains data in hour_minuet_seconds (Ex-03:20:00)format. I want to fill the values in the column with the mean value of the column.

like this example?

library(dplyr)
library(lubridate)

df <- data.frame(
    id = c(1L, 2L, 3L, 4L, 5L, 6L, 7L),
    time = c("07:45:00", "09:45:00", NA, "08:45:00", "23:50:00",
                   "07:45:00", "16:45:00")
)

df %>% 
    mutate(time = as.numeric(hms(time)),
           time = if_else(is.na(time), mean(time, na.rm=TRUE), time),
           time = seconds_to_period(time))

#>   id        time
#> 1  1   7H 45M 0S
#> 2  2   9H 45M 0S
#> 3  3 12H 25M 50S
#> 4  4   8H 45M 0S
#> 5  5  23H 50M 0S
#> 6  6   7H 45M 0S
#> 7  7  16H 45M 0S

Created on 2019-09-06 by the reprex package (v0.3.0.9000)

2 Likes

Another solution:

df <- data.frame(id = 1:7,
                 time = c("07:45:00", "09:45:00", NA, "08:45:00", "23:50:00", "07:45:00", "16:45:00"),
                 stringsAsFactors = FALSE)

within(data = df,
       expr =
         {
           time <- replace(x = time,
                           list = is.na(x = time),
                           values = format(x = mean(x = as.POSIXct(x = time,
                                                                   format = "%H:%M:%S"),
                                                    na.rm = TRUE),
                                           format = "%H:%M:%S"))
         })
#>   id     time
#> 1  1 07:45:00
#> 2  2 09:45:00
#> 3  3 12:25:50
#> 4  4 08:45:00
#> 5  5 23:50:00
#> 6  6 07:45:00
#> 7  7 16:45:00

Created on 2019-09-07 by the reprex package (v0.3.0)

1 Like

One more alternative using the lesser known chron package along with dplyr. Personally, I think this is more concise and easier to read and interpret.

library(dplyr)

df <- data.frame(
  id = c(1L, 2L, 3L, 4L, 5L, 6L, 7L),
  time = c("07:45:00", "09:45:00", NA, "08:45:00", "23:50:00", "07:45:00", "16:45:00")
)

df %>%
  mutate(
    time = chron::times(time),
    time = if_else(is.na(time), mean(time), time)
    )
#>    id    time
#> 1  1 07:45:00
#> 2  2 09:45:00
#> 3  3 12:25:50
#> 4  4 08:45:00
#> 5  5 23:50:00
#> 6  6 07:45:00
#> 7  7 16:45:00
2 Likes