Using filter and pmap functions with dtplyr package

Hi there,

I'm experimenting the new data.table backend for dplyr - dtplyr package with filter, map and pmap functions.

filter_custom function below worked with regular data frame but didn't work with data.table object.
The same happened to purrr::pmap_dfr function.

What should be the right way to use these functions with dtplyr?

Thank you!


library(data.table)
library(purrr)
library(dtplyr)
library(dplyr, warn.conflicts = FALSE)

mtcars2 <- lazy_dt(mtcars)
mtcars2 %>% 
  filter(wt < 5 & wt > 1) %>% 
  mutate(l100k = 235.21 / mpg) %>% # liters / 100 km
  group_by(cyl) %>% 
  summarise(l100k = mean(l100k)) %>% 
  as_tibble()
#> # A tibble: 3 x 2
#>     cyl l100k
#>   <dbl> <dbl>
#> 1     4  9.05
#> 2     6 12.0 
#> 3     8 14.9

# define custom function
filter_custom <- function(df, filter_value1, filter_value2) {
  out <- df %>% 
    filter(wt < filter_value1 & wt > filter_value2) %>% 
    mutate(l100k = 235.21 / mpg) %>%
    group_by(cyl) %>% 
    summarise(l100k = mean(l100k)) %>% 
    as_tibble()
  return(out)
}

# error but there is still output ???
map(mtcars2, ~ print(.x$hp))
#>  [1] 110 110  93 110 175 105 245  62  95 123 123 180 180 180 205 215 230  66  52
#> [20]  65  97 150 150 245 175  66  91 113 264 175 335 109
#> Error: $ operator is invalid for atomic vectors

# create a filter data frame
filter_data <- tibble(filter_value1 = c(4, 5, 6),
                      filter_value2 = c(1, 2, 3))
filter_data2 <- lazy_dt(filter_data)

# nothing showup
filter_data2 %>% 
  pmap_dfr(~ filter_custom(mtcars2, ..1, ..2), .id = 'id') %>% 
  as_tibble()
#> # A tibble: 0 x 0

# but works with data frame object
filter_data %>% 
  set_names() %>% 
  pmap_dfr(~ filter_custom(mtcars, ..1, ..2), .id = 'id') 
#> # A tibble: 9 x 3
#>     cyl l100k id   
#>   <dbl> <dbl> <chr>
#> 1     4  9.05 1    
#> 2     6 12.0  1    
#> 3     8 14.9  1    
#> 4     4  9.79 2    
#> 5     6 12.0  2    
#> 6     8 14.9  2    
#> 7     4  9.98 3    
#> 8     6 12.4  3    
#> 9     8 16.1  3

The function didn't even work for this simple case. Why was that? Thank you!

filter_custom(mtcars2, 4, 2)
#> Error in .checkTypos(e, names_x): Object 'filter_value1' not found amongst 
#  mpg, cyl, disp, hp, drat and 6 more

If I used enquo & !! in filter_custom2 function, it worked with directly specified values. pmap still gave me: Error: ..1 used in an incorrect context, no ... to look in

library(data.table)
library(purrr)
library(dtplyr)
library(dplyr, warn.conflicts = FALSE)

mtcars2 <- lazy_dt(mtcars)
# create a filter data frame
filter_data <- data.frame(filter_value1 = c(4, 5, 6),
                          filter_value2 = c(1, 2, 3))
filter_data2 <- lazy_dt(filter_data)

filter_custom2 <- function(df, filter_value1, filter_value2) {
  filter_value1 <- enquo(filter_value1)
  filter_value2 <- enquo(filter_value2)
  out <- df %>% 
    filter(wt < !!filter_value1 & wt > !!filter_value2) %>% 
    mutate(l100k = 235.21 / mpg) %>%
    group_by(cyl) %>% 
    summarise(l100k = mean(l100k)) %>% 
    as_tibble()
  return(out)
}

# test
filter_custom2(mtcars2, 4, 2)
#> # A tibble: 3 x 2
#>     cyl l100k
#>   <dbl> <dbl>
#> 1     4  9.79
#> 2     6 12.0 
#> 3     8 14.9

# not working
filter_data %>% 
  pmap_dfr(~ filter_custom2(mtcars2, ..1, ..2), .id = 'id') %>% 
  as_tibble()
#> Error: ..1 used in an incorrect context, no ... to look in

# not working either
filter_data2 %>% 
  pmap_dfr(~ filter_custom2(mtcars2, ..1, ..2), .id = 'id') %>% 
  as_tibble()
#> # A tibble: 0 x 0

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.