ifelse elseif on a string appearing between date range

regex of my data frame:

library(tidyverse)

game_date <- as.Date(c('2014-10-01', '2014-10-02', '2014-10-02',
                       '2014-10-03', '2014-10-03', '2014-10-03',
                       '2014-10-04', '2014-10-04', '2014-10-04'))

t1 <- c('LDN', 'PBO', 'BAR',
        'BAR', 'BEL', 'LDN',
        'BAR', 'PBO', 'GUE')

t2 <- c('GUE', 'BEL', 'SUD',
        'GUE', 'PBO', 'KIT',
        'LDN', 'NB', 'BEL')

df <- data.frame(game_date, t1, t2)

My goal is to create two columns that produce either "rested", "fatigue" or "tired" based on if the string appears in previous days.

Conditions:
rested = does not appear in any row defined by game_date -1
fatigue = string appears in any row defined by game_date-1
tired = string appears in any row defined by game_date-1 & game_date-2

Here is picture of ultimate goal.

image

Thank you in advance,

df <- data.frame(game_date, t1, t2) %>%
  mutate(game_id = row_number())

(df1 <- df %>% pivot_longer(cols = c(-game_date, -game_id)))

(df1_short <- df1 %>%
  select(-name) %>%
  group_by(game_date) %>%
  summarise(
    data = list(value)
  )
)

(df2 <- df1 %>%
  mutate(
    gd_1 = game_date - 1,
    gd_2 = game_date - 2
  ) %>% left_join(df1_short,
    by = c("gd_1" = "game_date")
  ) %>%
  left_join(df1_short,
    by = c("gd_2" = "game_date")
  ))

(df3 <- df2 %>% rowwise() %>%
  mutate(status = case_when(
    value %in% data.y & value %in% data.x ~ "tired",
    value %in% data.x ~ "fatigued",
    TRUE ~ "rested"
  )))

df3 %>%
  mutate(
    label_name = paste0("team_name_", name),
    label_status = paste0("team_status_", name)
  ) %>%
  select(game_date, game_id,
         value, status, 
         label_name, label_status) %>%
  pivot_wider(
    names_from = c(label_name, label_status),
    values_from = c(value, status)
  )

Wow... thank you nirgrahamuk!

I'll need to spend some time studying your code. Nice solution.

This is a bit verbose, but it works. (I'd probably have formatted the data differently in the beginning, a column for Home/Away and a match id, would simplify the tidying)

library(tidyverse)

game_date <- as.Date(
  c(
    '2014-10-01',
    '2014-10-02',
    '2014-10-02',
    '2014-10-03',
    '2014-10-03',
    '2014-10-03',
    '2014-10-04',
    '2014-10-04',
    '2014-10-04'
  )
)

t1 <- c('LDN', 'PBO', 'BAR',
        'BAR', 'BEL', 'LDN',
        'BAR', 'PBO', 'GUE')

t2 <- c('GUE', 'BEL', 'SUD',
        'GUE', 'PBO', 'KIT',
        'LDN', 'NB', 'BEL')

df <- data.frame(game_date, t1, t2)

library(sjmisc)

df <- df %>%
  pivot_longer(cols = c(t1, t2),
               names_to = "t1_t2",
               values_to = "teams") %>%
  group_by(teams) %>%
  arrange(game_date) %>%
  mutate(
    status = case_when (
      lag(game_date, 1) == game_date - 1 &
        lag(game_date, 2) == game_date - 2 ~ "tired",
      lag(game_date, 1) == game_date - 1 ~ "fatigue",
      TRUE ~ "rested"
    )
  ) %>%
  ungroup() %>%
  mutate(
    t1_status = case_when(t1_t2 == "t1" ~ status,
                          TRUE ~ NA_character_),
    t2_status = case_when(t1_t2 == "t2" ~ status,
                          TRUE ~ NA_character_),
    t1 = case_when(t1_t2 == "t1" ~ teams,
                   TRUE ~ NA_character_),
    t2 = case_when(t1_t2 == "t2" ~ teams,
                   TRUE ~ NA_character_)
  ) %>%
  mutate(
    t1 = case_when (is_even(row_number()) ~ lag(t1, 1),
                    is_odd(row_number()) ~ t1),
    t2 = case_when (is_odd(row_number()) ~ lead(t2, 1),
                    is_even(row_number()) ~ t2),
    t1_status = case_when (
      is_even(row_number()) ~ lag(t1_status, 1),
      is_odd(row_number()) ~ t1_status
    ),
    t2_status = case_when (
      is_odd(row_number()) ~ lead(t2_status, 1),
      is_even(row_number()) ~ t2_status
    )
  ) %>%
  select(game_date, t1, t2, t1_status, t2_status) %>%
  distinct()

The problem becomes more elegant if you pivot the data so each player is in a column and the value is TRUE/FALSE if they played. Then, the dplyr::lag function can be applied directly.

library(tidyverse)

game_date <- as.Date(c('2014-10-01', '2014-10-02', '2014-10-02',
                       '2014-10-03', '2014-10-03', '2014-10-03',
                       '2014-10-04', '2014-10-04', '2014-10-04'))

t1 <- c('LDN', 'PBO', 'BAR',
        'BAR', 'BEL', 'LDN',
        'BAR', 'PBO', 'GUE')

t2 <- c('GUE', 'BEL', 'SUD',
        'GUE', 'PBO', 'KIT',
        'LDN', 'NB', 'BEL')

df <- data.frame(game_date, t1, t2)

df2 <- df %>% 
  mutate(id = 1:n()) %>%
  pivot_longer(c(t1, t2)) %>%
  mutate(played = TRUE) %>%
  pivot_wider(names_from = value, values_from = played, id_cols = c(id, game_date)) %>%
  mutate_if(is.logical, replace_na, FALSE) 

df2 
#> # A tibble: 9 x 10
#>      id game_date  LDN   GUE   PBO   BEL   BAR   SUD   KIT   NB   
#>   <int> <date>     <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl>
#> 1     1 2014-10-01 TRUE  TRUE  FALSE FALSE FALSE FALSE FALSE FALSE
#> 2     2 2014-10-02 FALSE FALSE TRUE  TRUE  FALSE FALSE FALSE FALSE
#> 3     3 2014-10-02 FALSE FALSE FALSE FALSE TRUE  TRUE  FALSE FALSE
#> 4     4 2014-10-03 FALSE TRUE  FALSE FALSE TRUE  FALSE FALSE FALSE
#> 5     5 2014-10-03 FALSE FALSE TRUE  TRUE  FALSE FALSE FALSE FALSE
#> 6     6 2014-10-03 TRUE  FALSE FALSE FALSE FALSE FALSE TRUE  FALSE
#> 7     7 2014-10-04 TRUE  FALSE FALSE FALSE TRUE  FALSE FALSE FALSE
#> 8     8 2014-10-04 FALSE FALSE TRUE  FALSE FALSE FALSE FALSE TRUE 
#> 9     9 2014-10-04 FALSE TRUE  FALSE TRUE  FALSE FALSE FALSE FALSE

df2 %>%
  bind_cols(
    select(., -c(id, game_date)) %>%
      mutate_all(
        ~case_when(
          lag(., 1, FALSE) ~ "fatigue",
          lag(., 2, FALSE) ~ "tired",
          TRUE             ~ "rested")
      ) %>%
      setNames(paste0(names(.), "_status"))
  )
#> # A tibble: 9 x 18
#>      id game_date  LDN   GUE   PBO   BEL   BAR   SUD   KIT   NB    LDN_status
#>   <int> <date>     <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <chr>     
#> 1     1 2014-10-01 TRUE  TRUE  FALSE FALSE FALSE FALSE FALSE FALSE rested    
#> 2     2 2014-10-02 FALSE FALSE TRUE  TRUE  FALSE FALSE FALSE FALSE fatigue   
#> 3     3 2014-10-02 FALSE FALSE FALSE FALSE TRUE  TRUE  FALSE FALSE tired     
#> 4     4 2014-10-03 FALSE TRUE  FALSE FALSE TRUE  FALSE FALSE FALSE rested    
#> 5     5 2014-10-03 FALSE FALSE TRUE  TRUE  FALSE FALSE FALSE FALSE rested    
#> 6     6 2014-10-03 TRUE  FALSE FALSE FALSE FALSE FALSE TRUE  FALSE rested    
#> 7     7 2014-10-04 TRUE  FALSE FALSE FALSE TRUE  FALSE FALSE FALSE fatigue   
#> 8     8 2014-10-04 FALSE FALSE TRUE  FALSE FALSE FALSE FALSE TRUE  fatigue   
#> 9     9 2014-10-04 FALSE TRUE  FALSE TRUE  FALSE FALSE FALSE FALSE tired     
#> # ... with 7 more variables: GUE_status <chr>, PBO_status <chr>,
#> #   BEL_status <chr>, BAR_status <chr>, SUD_status <chr>, KIT_status <chr>,
#> #   NB_status <chr>

Created on 2021-06-17 by the reprex package (v1.0.0)

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.