Hi Everyone and @javierluraschi,
I want to rename columns in a spark table, but I need the flexibility to pass arbitrary names.
Can someone help with Attempt 3 in the reprex below, please?
library(sparklyr)
library(rlang)
#>
#> Attaching package: 'rlang'
#> The following object is masked from 'package:sparklyr':
#>
#> invoke
options(sparklyr.java9 = TRUE)
sc <- spark_connect(master = "local")
data <- dplyr::select(mtcars, mpg, cyl, disp) %>% head(5)
remote_tbl <- dplyr::copy_to(dest = sc,
df = data,
name = "remote_tbl",
overwrite = TRUE)
remote_tbl
#> # Source: spark<remote_tbl> [?? x 3]
#> mpg cyl disp
#> <dbl> <dbl> <dbl>
#> 1 21 6 160
#> 2 21 6 160
#> 3 22.8 4 108
#> 4 21.4 6 258
#> 5 18.7 8 360
# Attempt 1: -------------------------------------------------------------
# Standard renaming works fine
x <- c(mpg_version2 = "mpg")
dplyr::rename(remote_tbl, !!!x)
#> # Source: spark<?> [?? x 3]
#> mpg_version2 cyl disp
#> <dbl> <dbl> <dbl>
#> 1 21 6 160
#> 2 21 6 160
#> 3 22.8 4 108
#> 4 21.4 6 258
#> 5 18.7 8 360
# Attempt 2: -------------------------------------------------------------
# This also works fine
fn <- function(data, new_name) {
data %>% dplyr::rename(!!new_name := 'mpg')
}
fn(remote_tbl, new_name = "mpg_version2")
#> # Source: spark<?> [?? x 3]
#> mpg_version2 cyl disp
#> <dbl> <dbl> <dbl>
#> 1 21 6 160
#> 2 21 6 160
#> 3 22.8 4 108
#> 4 21.4 6 258
#> 5 18.7 8 360
# Attempt 3: -------------------------------------------------------------
# Any idea why this fails although Attempt 1 works?
# How can i make it work?
fn <- function(data, new_name) {
x <- glue::glue("c({new_name} = 'mpg')") %>%
rlang::parse_expr() %>%
eval()
dplyr::rename(data, !!x)
}
fn(remote_tbl, new_name = "mpg_version2")
#> Error: All arguments must be named
Created on 2020-01-17 by the reprex package (v0.3.0)
Thank you