Unquoting issue in purrr::map_df function.

fredericparrot · June 4, 2021, 7:35am

### function fill_forward ran well with R v.3.6.1.  Now I have R v.4.0.5  and there seems to be ### an unquoting issue

library('rlang')
library('dplyr')
library('purrr')

### this function nowcasts a time series based on an another more timely one
fill_forward <- function(df, .x, .y) {
  .x <- ensym(.x)
  .y <- ensym(.y)
  if (!anyNA(df[[.x]])) {
    df[["obs_status"]]  <- "A"
    df[["conf_status"]] <- "F"
    df[[.y]] <- NULL
    return(df)
  }
  
  firstempty <- min(which(is.na(df[[.x]])))
  i <- numeric(0)
  df[["obs_status"]]  <- "A"
  df[["conf_status"]] <- "F"
  for (i in firstempty:length(df[[.x]])) {
    df[[.x]][i] <- df[[.y]][i] / df[[.y]][i - 1] * df[[.x]][i - 1]
    df[["obs_status"]][i]  <- "E"
  }
  df[[.y]] <- NULL
  df <- na.omit(df)
  df 
}

df1 <- data.frame(
  time = c(1995, 1996, 1997, 1998),
  value.x = c(2, 3, 4, NA),
  value.y = c(3, 4, 7, 10.5)
)

df <- data.frame(
country = c("AUT", "AUT", "AUT", "AUT", "BEL", "BEL", "BEL", "BEL"),
  time = c(1995, 1996, 1997, 1998, 1995, 1996, 1997, 1998),
  value.x = c(2, 3, 4, NA, 5, 7, 8, NA),
  value.y = c(3, 4, 7, 10.5, 5.5, 9, 12, 16)
)

### 1 country 
test1 <- fill_forward(df1, value.x, value.y)
### code works, and I don't have to quote 'value.x' and 'value.y'

### more than 1 country
test2 <- df %>%
  group_split(country) %>%
  map_df(fill_forward, value.x, value.y) ### this line no longer supports unquoting?

### Error: Can't use character names to index an unnamed vector.
### Called from: vec_as_location(i, n, names = names, arg = arg)

### if I remove .x <- ensym(.x), .y <- ensym(.y) in the function (see below)
### and quote the argument inside map_df, it works.  How do I now unquote the arguments?

fill_forward1 <- function(df, .x, .y) {
  # .x <- ensym(.x)
  # .y <- ensym(.y)
  if (!anyNA(df[[.x]])) {
    df[["obs_status"]]  <- "A"
    df[["conf_status"]] <- "F"
    df[[.y]] <- NULL
    return(df)
  }
  
  firstempty <- min(which(is.na(df[[.x]])))
  i <- numeric(0)
  df[["obs_status"]]  <- "A"
  df[["conf_status"]] <- "F"
  for (i in firstempty:length(df[[.x]])) {
    df[[.x]][i] <- df[[.y]][i] / df[[.y]][i - 1] * df[[.x]][i - 1]
    df[["obs_status"]][i]  <- "E"
  }
  df[[.y]] <- NULL
  df <- na.omit(df)
  df 
}

### now, code works
test2 <- df %>%
  group_split(country) %>%
  map_df(fill_forward1, "value.x", "value.y")

mara · June 4, 2021, 1:02pm

I'm guessing that when you updated R versions you also updated packages (which is par for the course when it's a major version change), and that the issue is coming from there. I don't have an answer, but wanted to point you to a couple hints (which lead me to think that there's been a change in vctrs that's causing the error you're seeing).

Two helpful things to look at here are rlang::last_error() which gives you a sort of highlight reel of the backtrace (the most-likely-to-be-relevant items), and rlang::last_trace() which gives you the full backtrace in sequential order as a tree.

So, after going through your reprex through the error, here's what that looks like:

rlang::last_error()
# <error/rlang_error>
# Error: Can't use character names to index an unnamed vector.
Backtrace:
  1. `%>%`(...)
  2. purrr::map_df(., fill_forward, value.x, value.y)
  3. purrr::map(.x, .f, ...)
  4. global::.f(.x[[i]], ...)
  6. tibble:::`[[<-.tbl_df`(`*tmp*`, .x, value = c(2, 3, 4, 6))
  7. tibble:::vectbl_as_col_location2(...)
 10. vctrs::vec_as_location2(j, n, names)
 12. vctrs:::vec_as_location2_result(...)
 17. vctrs::vec_as_location(i, n, names = names, arg = arg)
# Run `rlang::last_trace()` to see the full context.

rlang::last_trace()
# <error/rlang_error>
# Error: Can't use character names to index an unnamed vector.
Backtrace:
     ▆
  1. ├─`%>%`(...)
  2. └─purrr::map_df(., fill_forward, value.x, value.y)
  3.   └─purrr::map(.x, .f, ...)
  4.     └─global::.f(.x[[i]], ...)
  5.       ├─base::`[[<-`(`*tmp*`, .x, value = c(2, 3, 4, 6))
  6.       └─tibble:::`[[<-.tbl_df`(`*tmp*`, .x, value = c(2, 3, 4, 6))
  7.         └─tibble:::vectbl_as_col_location2(...)
  8.           ├─tibble:::subclass_col_index_errors(...)
  9.           │ └─base::withCallingHandlers(...)
 10.           └─vctrs::vec_as_location2(j, n, names)
 11.             ├─vctrs:::result_get(...)
 12.             └─vctrs:::vec_as_location2_result(...)
 13.               ├─base::tryCatch(...)
 14.               │ └─base:::tryCatchList(expr, classes, parentenv, handlers)
 15.               │   └─base:::tryCatchOne(expr, names, parentenv, handlers[[1L]])
 16.               │     └─base:::doTryCatch(return(expr), name, parentenv, handler)
 17.               └─vctrs::vec_as_location(i, n, names = names, arg = arg)

I'll pass this on to someone who might be able to help you troubleshoot more directly.

davis · June 4, 2021, 2:32pm

I've managed to simplify this down to the fact that you can sub assign with a symbol when using a data frame, but not with a tibble.

library(tibble)
library(rlang)

df <- data.frame(x = 1)
tbl <- tibble(x = 1)

x_sym <- sym("x")
x_chr <- "x"

df[[x_chr]] <- 2
tbl[[x_chr]] <- 2

# Can subset-assign with a symbol for data frames
df[[x_sym]] <- 2

# Can't subset-assign with a symbol for tibbles
tbl[[x_sym]] <- 2
#> Error: Can't use character names to index an unnamed vector.

I'll report this upstream.

I don't think this has anything to the version of R you are using. It is just that you started with a data frame, but group_split() returns tibbles.

To patch your function in the meantime, you can wrap ensym(.x) in as_name() to convert the symbol to a character, which you can sub assign with.

library('rlang')
library('dplyr')
library('purrr')

### this function nowcasts a time series based on an another more timely one
fill_forward <- function(df, .x, .y) {
  .x <- as_name(ensym(.x))
  .y <- as_name(ensym(.y))
  if (!anyNA(df[[.x]])) {
    df[["obs_status"]]  <- "A"
    df[["conf_status"]] <- "F"
    df[[.y]] <- NULL
    return(df)
  }
  
  firstempty <- min(which(is.na(df[[.x]])))
  i <- numeric(0)
  df[["obs_status"]]  <- "A"
  df[["conf_status"]] <- "F"
  for (i in firstempty:length(df[[.x]])) {
    df[[.x]][i] <- df[[.y]][i] / df[[.y]][i - 1] * df[[.x]][i - 1]
    df[["obs_status"]][i]  <- "E"
  }
  df[[.y]] <- NULL
  df <- na.omit(df)
  df 
}

df1 <- data.frame(
  time = c(1995, 1996, 1997, 1998),
  value.x = c(2, 3, 4, NA),
  value.y = c(3, 4, 7, 10.5)
)

df <- data.frame(
  country = c("AUT", "AUT", "AUT", "AUT", "BEL", "BEL", "BEL", "BEL"),
  time = c(1995, 1996, 1997, 1998, 1995, 1996, 1997, 1998),
  value.x = c(2, 3, 4, NA, 5, 7, 8, NA),
  value.y = c(3, 4, 7, 10.5, 5.5, 9, 12, 16)
)

### 1 country 
test1 <- fill_forward(df1, value.x, value.y)

### more than 1 country
test2 <- df %>%
  group_split(country) %>%
  map_df(fill_forward, value.x, value.y)

^{Created on 2021-06-04 by the reprex package (v2.0.0)}

system · June 25, 2021, 2:32pm

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.