ifelse function on a dataframe

Hello everyone,

I have a dataframe with a certain number of participants, and a column with their age, as well as several other columns with some scores I have measured.
I first would like to report the measured scores of the youngest participants to a new column entitled "score_youngs", and then do the same thing with the oldest participants (basically separate the measured scores of the youngs and the oldest, while keeping the original score column).
I have already created an empty column score_youngs full of NAs, and if the participants are younger than 16, I would like to report their scores from a previous column to the newly created column score_youngs.
Here is the code I have made:

frame$score_youngs <- NA
frame$score_olds <- NA

f <- function(frame, age) {
  ifelse(frame$age <= 16.0, frame$score_youngs <- frame$score, NA)
}

When I do this, the result is that it just reported ALL the scores into the score_youngs column, without respecting the condition I put! What is my mistake?

Thank you very much

Here are two ways to do that. The key points are:

  1. ifelse returns a value, you do not do assignment within ifelse.

  2. Functions create their own environments so that modifying objects inside of a function does not modify an object outside of the function. There are ways to do that but it is not usually a good idea.

DF <- data.frame(Age = c(33, 21, 15, 67, 55, 9, 78),
                 Score = c(1:7))
DF
#>   Age Score
#> 1  33     1
#> 2  21     2
#> 3  15     3
#> 4  67     4
#> 5  55     5
#> 6   9     6
#> 7  78     7
DF$score_young <- ifelse(DF$Age <= 16, DF$Score, NA)
DF
#>   Age Score score_young
#> 1  33     1          NA
#> 2  21     2          NA
#> 3  15     3           3
#> 4  67     4          NA
#> 5  55     5          NA
#> 6   9     6           6
#> 7  78     7          NA

MyFunc <- function(Frame, age) {
  ifelse(Frame$Age <= age, Frame$Score, NA)
}
DF$score_young2 <- MyFunc(DF, 16) 
DF
#>   Age Score score_young score_young2
#> 1  33     1          NA           NA
#> 2  21     2          NA           NA
#> 3  15     3           3            3
#> 4  67     4          NA           NA
#> 5  55     5          NA           NA
#> 6   9     6           6            6
#> 7  78     7          NA           NA

Created on 2020-02-19 by the reprex package (v0.3.0)

2 Likes

frame_raw <- data.frame(age=1:50,score=sample.int(1000,size=50))

frame_raw$score_youngs <- NA
frame_raw$score_olds <- NA

frame1 <- frame_raw

# first without a function
frame1$score_youngs <- ifelse(frame1$age<=16, frame1$score,NA)
frame1$score_olds <- ifelse(frame1$age>16, frame1$score,NA)

#if we want to hide this work in a function
f <- function(inframe) {
  inframe$score_youngs <- ifelse(inframe$age<=16, inframe$score,NA)
  inframe$score_olds <- ifelse(inframe$age>16, inframe$score,NA)
  return (inframe)
}

frame2 <- f(inframe=frame_raw)
2 Likes

FJCC, oh snap :laughing:!

Thank you, that worked perfectly fine!
If I want to generalize this to all the score columns of my dataframe, what should I do?
I built this for loop: (frame_j stands for frame_youngs and frame_v for frame_olds)

frame_j <- data.frame(matrix(ncol = 160, nrow = 348))
frame_v <- data.frame(matrix(ncol = 160, nrow = 348))

for (i in colnames(frame)) {
for (j in 1:nrow(frame)) {
  frame_j$i <- ifelse(frame$age<=16, frame$i,NA)
  frame_v$i <- ifelse(frame$age>=19, frame$i,NA)
  return(frame_j)
  return(frame_v)
}
} 

As a result, I just have a frame with 3 columns that looks like this:

Name     Type         Value
frame_v  list[349]    list of length 349
[[1]]      integer[1]     NA

to generalise the approach I offer you something like this structure

library(rlang)  # for sym() to evaluate chars as symbols
library(tidyverse)

frame_raw <- data.frame(
  age = 1:50,
  alpha_score = sample.int(1000, size = 50),
  beta_score = sample.int(1000, size = 50),
  theta_score = sample.int(1000, size = 50)
)

# identify all the _score columns to operate over
varnames <- colnames(frame_raw)
scorenames <- na.omit(str_extract(varnames, "\\w*_score\\b"))

# a function to take a dataframe, apply a split on a score variable sending info to one column or another based on condition
split_score <- function(df, # pass a df
                        score_to_split, # pass the character string name of the intended variable
                        splitname_low, # ditto where to send low results
                        splitname_high, # ditto where to send high results
                        condition_var, # character string naming the variable to evaluate the condition on
                        condition_cutoff) # a number giving the split point
{
  result <- dplyr::mutate(
    df,
    !!splitname_low := ifelse(!!sym(condition_var) <= condition_cutoff, !!sym(score_to_split), NA),
    !!splitname_high := ifelse(!!sym(condition_var) > condition_cutoff, !!sym(score_to_split), NA)
  )
}

# loop over scorenames applying our function repeatedly
for (scr in scorenames)
{
  frame_raw <- split_score(
    frame_raw,
    scr,
    paste0(scr, "_young"),
    paste0(scr, "_old"),
    "age",
    16
  )
}

# the result is now in frame_raw
1 Like

I see that @nirgrahamuk has given an elegant general solution. I will offer a much less general but simpler version. It looks from your for loop code that you want to modify all of the columns in each data frame. For that, you can use the mutate_all function from the dplyr package. I put in some extra print statements to make it easy to compare the data frames before and after the processing.
(If you want to modify just some columns, there are functions called mutate_at and mutate_if that may be useful)

library(dplyr)

frame_j <- data.frame(A = c(23, 14, 45), B = c(16, 19, 12), C = c(23, 32, 6))
frame_v <- data.frame(A = c(23, 14, 45), B = c(16, 19, 12), C = c(23, 32, 6))

jFunc <- function(x) ifelse(x <= 16, x, NA)
frame_j
#>    A  B  C
#> 1 23 16 23
#> 2 14 19 32
#> 3 45 12  6
frame_j <- mutate_all(frame_j, jFunc)      
frame_j
#>    A  B  C
#> 1 NA 16 NA
#> 2 14 NA NA
#> 3 NA 12  6

vFunc <- function(x) ifelse(x >= 19, x, NA)
frame_v
#>    A  B  C
#> 1 23 16 23
#> 2 14 19 32
#> 3 45 12  6
frame_v <- mutate_all(frame_v, vFunc)      
frame_v
#>    A  B  C
#> 1 23 NA 23
#> 2 NA 19 32
#> 3 45 NA NA

Created on 2020-02-20 by the reprex package (v0.3.0)

1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.