Drop variables based on condition

budugulo · August 4, 2020, 4:25pm

How do I drop variables that contain certain words e.g. "ERROR1" or "ERROR1" or both? I am looking for a general solution. Thanks in advance!

library(tidyverse)
toy_data <- tibble(
  year = rep(2010:2011, each = 4),
  cat = rep(c("A", "B", "C", "D"), times = 2),
  i = rnorm(8),
  k = c(rnorm(7), "ERROR1"),
  l = c(rnorm(7), "ERROR2"),
  m = c("ERROR1", "ERROR2", rnorm(6))
)
toy_data
#> # A tibble: 8 x 6
#>    year cat        i k                  l                 m                  
#>   <int> <chr>  <dbl> <chr>              <chr>             <chr>              
#> 1  2010 A     -0.255 0.17487171060379   0.53752746125903  ERROR1             
#> 2  2010 B      0.947 2.20960747642375   -1.07459398632459 ERROR2             
#> 3  2010 C      0.402 0.331472426766173  -1.26145314640879 0.109379293657667  
#> 4  2010 D      0.764 -0.257007016841951 -0.51664541999114 -0.634155097786941 
#> 5  2011 A     -1.02  1.54227000076291   2.42946294561708  -0.0146312521180938
#> 6  2011 B      1.11  -0.666488302277402 0.354761773034822 0.0051401757579631 
#> 7  2011 C     -1.44  0.738880132017502  0.834126780022647 -1.39864301101319  
#> 8  2011 D     -1.09  ERROR1             ERROR2            0.53178497895577

^{Created on 2020-08-04 by the reprex package (v0.3.0)}

nirgrahamuk · August 4, 2020, 4:57pm

library(tidyverse)
toy_data <- tibble(
  year = rep(2010:2011, each = 4),
  cat = rep(c("A", "B", "C", "D"), times = 2),
  i = rnorm(8),
  k = c(rnorm(7), "ERROR1"),
  l = c(rnorm(7), "ERROR2"),
  m = c("ERROR1", "ERROR2", rnorm(6))
)
toy_data %>% mutate_if(
  is.character,
  list(
    grep =
      ~ any(grepl(
        x = .,
        pattern = "ERROR"
      ))
  )
)

(char_cols_to_drop_df <- toy_data %>%
  select(where(is.character)) %>%
  summarise_all(function(x) {
    !any(grepl(
      x = x,
      pattern = "ERROR"
    ))
  }) %>% unlist())

(char_cols_to_drop <- names(which(char_cols_to_drop_df == FALSE)))

(final <- toy_data %>% select(-all_of(char_cols_to_drop)))
# A tibble: 8 x 3
# year cat        i
# <int> <chr>  <dbl>
# 1  2010 A      0.397
# 2  2010 B     -1.29 
# 3  2010 C     -0.363
# 4  2010 D      1.94 
# 5  2011 A      0.149
# 6  2011 B      1.12 
# 7  2011 C      0.406
# 8  2011 D     -0.626

budugulo · August 6, 2020, 12:19am

@nirgrahamuk thanks a lot!

Would you please guide me to understand the following chunk of code? I am little lost. Beginner here.

(char_cols_to_drop_df <- toy_data %>%
  select(where(is.character)) %>%
  summarise_all(function(x) {
    !any(grepl(
      x = x,
      pattern = "ERROR"
    ))
  }) %>% unlist())

system · August 27, 2020, 12:19am

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.