construct dplyr expressions, then evaluate them

MayaGans · April 28, 2020, 4:30pm

I have a crazy idea where I'd like to construct dplyr logic and THEN apply it to a pipeline. Here's an example of what I'm trying to do:

# create a single function for input data
# mutated column
# then select columns
eval_data <- function(data, mutated_column_name, mutation_logic, select_vector) {
  data %>%
    mutate(
      mutation_logic
    ) %>%
    select(
      select_vector
    )
}

# user sets name of mutated column
col_name <- "col_y"

# user creates some mutation logic
my_mutation <- function(mutated_column_name) {
  mutated_column_name = case_when(
    Sepal.Length > 7 ~'Big',
    Sepal.Length > 6 ~ 'Medium',
    TRUE ~ 'Small'
  )
}

# user selects columns they want
selects <- c("col_y", "Species")

eval_data(iris, my_mutation(col_name), selects)

Desired output:

iris %>%
  mutate(
    col_y = case_when(
      Sepal.Length > 7 ~'Big',
      Sepal.Length > 6 ~ 'Medium',
      TRUE ~ 'Small'
    )
  ) %>%
  select("col_y", "Species")

I've played with turning the mutation logic into a string to be evaluated in the pipeline but that seems fragile and is a little - I was wondering if people had better ideas or suggestions for this?

nirgrahamuk · April 28, 2020, 5:01pm

# create a single function for input data
# mutated column
# then select columns
library(rlang)
library(tidyverse)

eval_data <- function(data, mutated_column_name, mutation_logic, select_vector) {
  data %>%
    mutate(
      !!sym(mutated_column_name) := eval(mutation_logic)
    ) %>%
    select(
     { select_vector }
    )
}

# user sets name of mutated column
col_name <- "col_y"

# user creates some mutation logic
my_mutation <-  rlang::expr( case_when(
    Sepal.Length > 7 ~'Big',
    Sepal.Length > 6 ~ 'Medium',
    TRUE ~ 'Small'
  ) )

# user selects columns they want
selects <- c("col_y", "Species")

eval_data(iris,col_name, my_mutation, selects)

MayaGans · April 28, 2020, 8:20pm

Hi @nirgrahamuk thank you so much! think I can bother you with a slightly more complex example? Theres a list of datasets, a flexible list of cleaning steps, and then finally a list of selects for each dataset - so a function that can:

data %>%
MIDDLE LOGIC LIST %>%
select(select_list)

where middle logic list can be nothing, mutate, 2 mutates, or filter

dd <- list()
dd$data <- list(
  mutate0 = iris,
  mutate1 = iris,
  mutate2= iris,
  filter1 = iris
)

select_vec <- list(
  c("Species", "Sepal.Length"),
  c("Species", "New_Column1"),
  c("Species", "New_Column2", "New_Column3"),
  c("Species", "Sepal.Width")
)

logic <- # do nothing
  I(),
  #mutate1
  rlang::expr(mutate(New_Column1 = case_when(
    Sepal.Length > 7 ~'Big',
    Sepal.Length > 6 ~ 'Medium',
    TRUE ~ 'Small'
    )
  )),
  #mutate2
  rlang::expr(mutate(New_Column2 = case_when(
    Sepal.Length > 7 ~'Big2',
    Sepal.Length > 6 ~ 'Medium2',
    TRUE ~ 'Small2'
  )) %>%
    mutate(
      New_Column3 = case_when(
        Sepal.Length > 7 ~'Big3',
        Sepal.Length > 6 ~ 'Medium3',
        TRUE ~ 'Small3'
      )
    )
  ),
  #filter1
  rlang::expr(filter(Sepal.Width > 3))
)
eval_data <- function(data, mutation_logic, select_vector) {
  data %>%
    eval( mutation_logic ) %>%
    select(
      { select_vector }
    )
}

# WORKS !
# eval_data(dd$data[[1]], logic[[1]], select_vec[[1]])

# TODO
# eval_data(dd$data[[2]], logic[[2]], select_vec[[2]])
# eval_data(dd$data[[3]], logic[[3]], select_vec[[3]])
# eval_data(dd$data[[4]], logic[[4]], select_vec[[4]])

# GOAL
# pmap(dd$data, logic, select_vec, ~eval_data)

Is this possible? Thank you SO much!!

system · May 5, 2020, 8:20pm

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.