Creating columns based on condition in the previous column

I have a dataset of students' assessments. The first dataframe data1 is how the data looks like. But the template that is needed is like the second dataframe data2. The score must appear to the column adjacent to the response column. The scoring will change as per the question. Here in this case, it is as follows:
l1c1_identify_pic1,l1c1_identify_pic2,l1c1_identify_pic3= 2 marks when response is 2, 1 when response is 1 and 0 for 0 and -99 (no response).
l1cq_identify_col1,l1c1_identify_col2,l1c1_cards_order= 1 mark when response is 1, 0 when response is 0 and -99 (No response).
If this process can be done with looping and functions, it could be very useful for me. I tried but couldn't figure it out.

library(tidyverse)

data1<-tibble::tribble(
  ~student_id, ~age, ~demo_tv, ~demo_regrigerator, ~demo_phone, ~demo_laptop, ~l1c1_identify_pic1, ~l1c1_identify_pic2, ~l1c1_identify_pic3, ~l1c1_identify_col1, ~l1c1_identify_col2, ~l1c1_identify_shape1, ~l1c1_cards_order,
       "S001",   8L,       0L,                 1L,          0L,           0L,                  2L,                  1L,                  0L,                  1L,                  1L,                    1L,                0L,
       "S002",   6L,       1L,                 0L,          0L,           1L,                  1L,                  0L,                  2L,                  0L,                  0L,                    1L,              -99L,
       "S003",   8L,       1L,                 0L,          0L,           1L,                  1L,                  1L,                -99L,                  1L,                -99L,                    1L,                1L,
       "S004",   8L,       1L,                 1L,          1L,           1L,                  0L,                  2L,                  0L,                  1L,                  0L,                    0L,                1L,
       "S005",   8L,       1L,                 1L,          0L,           1L,                  2L,                  0L,                  2L,                  1L,                  0L,                    1L,                0L,
       "S006",   8L,       1L,                 0L,          1L,           1L,                  0L,                  0L,                  0L,                  1L,                  1L,                    1L,              -99L,
       "S007",   7L,       1L,                 0L,          1L,           1L,                  1L,                  1L,                  0L,                  0L,                  0L,                  -99L,                1L,
       "S008",   6L,       0L,                 0L,          0L,           0L,                  1L,                  1L,                  0L,                -99L,                  0L,                    0L,                0L,
       "S009",   6L,       1L,                 1L,          1L,           1L,                  2L,                  1L,                -99L,                  0L,                  0L,                    1L,                1L,
       "S010",   6L,       1L,                 1L,          1L,           1L,                  2L,                  0L,                  0L,                  0L,                  0L,                    1L,                0L,
       "S011",   7L,       1L,                 0L,          0L,           0L,                -99L,                  0L,                  2L,                  0L,                  0L,                    0L,              -99L,
       "S012",   8L,       1L,                 0L,          1L,           1L,                  0L,                  1L,                  0L,                  0L,                -99L,                    0L,                1L,
       "S013",   8L,       0L,                 1L,          0L,           1L,                  2L,                -99L,                  2L,                  1L,                  1L,                    0L,                0L,
       "S014",   6L,       0L,                 1L,          0L,           1L,                  1L,                  0L,                  1L,                  1L,                  0L,                    0L,                0L,
       "S015",   6L,       0L,                 0L,          0L,           1L,                  2L,                  0L,                  1L,                  0L,                  0L,                    1L,              -99L,
       "S016",   6L,       0L,                 0L,          1L,           0L,                  0L,                  1L,                  0L,                  1L,                  1L,                  -99L,                0L,
       "S017",   6L,       1L,                 1L,          0L,           1L,                  0L,                -99L,                  2L,                  0L,                  0L,                    0L,                1L,
       "S018",   8L,       0L,                 0L,          1L,           1L,                  2L,                  1L,                  1L,                  0L,                  1L,                    1L,                1L,
       "S019",   6L,       1L,                 0L,          0L,           1L,                  1L,                  0L,                  0L,                -99L,                  0L,                    0L,              -99L,
       "S020",   8L,       0L,                 0L,          0L,           1L,                  0L,                  2L,                  1L,                  1L,                  1L,                    0L,                0L,
       "S021",   8L,       1L,                 0L,          0L,           1L,                  1L,                  0L,                  1L,                  0L,                  1L,                    1L,                1L,
       "S022",   7L,       1L,                 0L,          1L,           1L,                  0L,                  1L,                  2L,                  0L,                  1L,                  -99L,                1L,
       "S023",   6L,       1L,                 0L,          0L,           0L,                  1L,                  2L,                -99L,                  1L,                  1L,                    0L,                1L,
       "S024",   6L,       0L,                 1L,          0L,           1L,                  0L,                  2L,                  2L,                  1L,                -99L,                    0L,                1L
  )



data2<-tibble::tribble(
         ~student_id, ~age, ~demo_tv, ~demo_regrigerator, ~demo_phone, ~demo_laptop, ~l1c1_identify_pic1, ~score, ~l1c1_identify_pic2, ~score, ~l1c1_identify_pic3, ~score, ~l1c1_identify_col1, ~score, ~l1c1_identify_col2, ~score, ~l1c1_identify_shape1, ~score, ~l1c1_cards_order, ~score,
              "S001",   8L,       0L,                 1L,          0L,           0L,                  2L,     2L,                  1L,     1L,                  0L,     0L,                  1L,     1L,                  1L,     1L,                    1L,     1L,                0L,     0L,
              "S002",   6L,       1L,                 0L,          0L,           1L,                  1L,     1L,                  0L,     0L,                  2L,     2L,                  0L,     0L,                  0L,     0L,                    1L,     1L,              -99L,     0L,
              "S003",   8L,       1L,                 0L,          0L,           1L,                  1L,     1L,                  1L,     1L,                -99L,     0L,                  1L,     1L,                -99L,     0L,                    1L,     1L,                1L,     1L,
              "S004",   8L,       1L,                 1L,          1L,           1L,                  0L,     0L,                  2L,     2L,                  0L,     0L,                  1L,     1L,                  0L,     0L,                    0L,     0L,                1L,     1L,
              "S005",   8L,       1L,                 1L,          0L,           1L,                  2L,     2L,                  0L,     0L,                  2L,     2L,                  1L,     1L,                  0L,     0L,                    1L,     1L,                0L,     0L,
              "S006",   8L,       1L,                 0L,          1L,           1L,                  0L,     0L,                  0L,     0L,                  0L,     0L,                  1L,     1L,                  1L,     1L,                    1L,     1L,              -99L,     0L,
              "S007",   7L,       1L,                 0L,          1L,           1L,                  1L,     1L,                  1L,     1L,                  0L,     0L,                  0L,     0L,                  0L,     0L,                  -99L,     0L,                1L,     1L,
              "S008",   6L,       0L,                 0L,          0L,           0L,                  1L,     1L,                  1L,     1L,                  0L,     0L,                -99L,     0L,                  0L,     0L,                    0L,     0L,                0L,     0L,
              "S009",   6L,       1L,                 1L,          1L,           1L,                  2L,     2L,                  1L,     1L,                -99L,     0L,                  0L,     0L,                  0L,     0L,                    1L,     1L,                1L,     1L,
              "S010",   6L,       1L,                 1L,          1L,           1L,                  2L,     2L,                  0L,     0L,                  0L,     0L,                  0L,     0L,                  0L,     0L,                    1L,     1L,                0L,     0L,
              "S011",   7L,       1L,                 0L,          0L,           0L,                -99L,     0L,                  0L,     0L,                  2L,     2L,                  0L,     0L,                  0L,     0L,                    0L,     0L,              -99L,     0L,
              "S012",   8L,       1L,                 0L,          1L,           1L,                  0L,     0L,                  1L,     1L,                  0L,     0L,                  0L,     0L,                -99L,     0L,                    0L,     0L,                1L,     1L,
              "S013",   8L,       0L,                 1L,          0L,           1L,                  2L,     2L,                -99L,     0L,                  2L,     2L,                  1L,     1L,                  1L,     1L,                    0L,     0L,                0L,     0L,
              "S014",   6L,       0L,                 1L,          0L,           1L,                  1L,     1L,                  0L,     0L,                  1L,     1L,                  1L,     1L,                  0L,     0L,                    0L,     0L,                0L,     0L,
              "S015",   6L,       0L,                 0L,          0L,           1L,                  2L,     2L,                  0L,     0L,                  1L,     1L,                  0L,     0L,                  0L,     0L,                    1L,     1L,              -99L,     0L,
              "S016",   6L,       0L,                 0L,          1L,           0L,                  0L,     0L,                  1L,     1L,                  0L,     0L,                  1L,     1L,                  1L,     1L,                  -99L,     0L,                0L,     0L,
              "S017",   6L,       1L,                 1L,          0L,           1L,                  0L,     0L,                -99L,     0L,                  2L,     2L,                  0L,     0L,                  0L,     0L,                    0L,     0L,                1L,     1L,
              "S018",   8L,       0L,                 0L,          1L,           1L,                  2L,     2L,                  1L,     1L,                  1L,     1L,                  0L,     0L,                  1L,     1L,                    1L,     1L,                1L,     1L,
              "S019",   6L,       1L,                 0L,          0L,           1L,                  1L,     1L,                  0L,     0L,                  0L,     0L,                -99L,     0L,                  0L,     0L,                    0L,     0L,              -99L,     0L,
              "S020",   8L,       0L,                 0L,          0L,           1L,                  0L,     0L,                  2L,     2L,                  1L,     1L,                  1L,     1L,                  1L,     1L,                    0L,     0L,                0L,     0L,
              "S021",   8L,       1L,                 0L,          0L,           1L,                  1L,     1L,                  0L,     0L,                  1L,     1L,                  0L,     0L,                  1L,     1L,                    1L,     1L,                1L,     1L,
              "S022",   7L,       1L,                 0L,          1L,           1L,                  0L,     0L,                  1L,     1L,                  2L,     2L,                  0L,     0L,                  1L,     1L,                  -99L,     0L,                1L,     1L,
              "S023",   6L,       1L,                 0L,          0L,           0L,                  1L,     1L,                  2L,     2L,                -99L,     0L,                  1L,     1L,                  1L,     1L,                    0L,     0L,                1L,     1L,
              "S024",   6L,       0L,                 1L,          0L,           1L,                  0L,     0L,                  2L,     2L,                  2L,     2L,                  1L,     1L,                -99L,     0L,                    0L,     0L,                1L,     1L
         )
Created on 2022-03-12 by the reprex package (v2.0.1)

Hi there,

You can do this with some dplyr magic and some sorting.
I shortened your dataframe and column names for better display here

library(tidyverse)

#The data
data1<-tibble::tribble(
  ~student_id, ~age, ~pic1, ~pic2, ~order,
  "S001",      8L,   2L,    1L,      0L,
  "S002",      6L,   1L,    0L,    -99L,
  "S003",      8L,   1L,    1L,      1L,
  "S004",      8L,   0L,    2L,      1L,
  "S005",      8L,   2L,  -99L,      0L,
  "S006",      8L,   0L,    0L,    -99L,
  "S007",      7L,   1L,    1L,      1L,
  "S008",      6L,   1L,    1L,      0L,
  "S009",      6L,   2L,    1L,      1L,
  "S010",      6L,   2L,    0L,      0L
)

#From which column onwards do we need additional scoring columns
firstColToScore = 3

#Temp change the column names to ensure later sorting is preserving the order
colnames(data1) = 
  paste(1:ncol(data1) %>% as.character() %>% sort(), colnames(data1), sep = "_")

#Create the scoring columns
scores = data1 %>% select(-(1:(firstColToScore - 1))) %>% 
  mutate(across(everything(), function(x){ifelse(x < 0, 0, x)}))

#Edit the names
colnames(scores) = paste(colnames(scores), "score", sep = "_")

#Bind the data together and sort the scoring columns next to the original
data2 = bind_cols(data1, scores)
data2 = data2 %>% select(1:(firstColToScore - 1), sort(colnames(data2)))

#Remove the temp sorting parameter from the columns names
colnames(data2) = str_remove(colnames(data2), "^\\d+_")

data2
#> # A tibble: 10 x 8
#>    student_id   age  pic1 pic1_score  pic2 pic2_score order order_score
#>    <chr>      <int> <int>      <int> <int>      <dbl> <int>       <dbl>
#>  1 S001           8     2          2     1          1     0           0
#>  2 S002           6     1          1     0          0   -99           0
#>  3 S003           8     1          1     1          1     1           1
#>  4 S004           8     0          0     2          2     1           1
#>  5 S005           8     2          2   -99          0     0           0
#>  6 S006           8     0          0     0          0   -99           0
#>  7 S007           7     1          1     1          1     1           1
#>  8 S008           6     1          1     1          1     0           0
#>  9 S009           6     2          2     1          1     1           1
#> 10 S010           6     2          2     0          0     0           0

Created on 2022-03-12 by the reprex package (v2.0.1.9000)

Note that you have to put the firstColToScore = 3 to firstColToScore = 7 in your example using the full dataset (i.e., from that columns onwards the scoring will be added).

Also, having columns with identical names is bad practice, so I changed it that the scoring column have the original name with _score appended.

Hope this helps,
PJ

But I have a problem here. The scoring here will change. For few questions, the scoring will be different. Can I specify from which column to which column I have to follow a particular scoring pattern?

Hi,

Sorry I did not read about the part for different rules.
Here is a way of doing that, you can create more rules if you like:

library(tidyverse)

#The data
data1<-tibble::tribble(
  ~student_id, ~age, ~pic1, ~pic2, ~order,
  "S001",      8L,   2L,    1L,      0L,
  "S002",      6L,   1L,    0L,    -99L,
  "S003",      8L,   1L,    1L,      1L,
  "S004",      8L,   0L,    2L,      1L,
  "S005",      8L,   2L,  -99L,      0L,
  "S006",      8L,   0L,    0L,    -99L,
  "S007",      7L,   1L,    1L,      1L,
  "S008",      6L,   1L,    1L,      0L,
  "S009",      6L,   2L,    1L,      1L,
  "S010",      6L,   2L,    0L,      0L
)

#Greate groups of columns with different scoring rules
scoring1 = c("pic1", "pic2")
scoring2 = c("order")

#Create the scoring columns
scoringCols = c(scoring1, scoring2)

scores = data1 %>% select(all_of(scoringCols)) %>% 
  #Set the rules for each scoring group
  mutate(across(all_of(scoring1), function(x){ifelse(x < 0, 0, x)}),
         across(all_of(scoring2), function(x){ifelse(x == 1, 1, 0)}))

#Edit the new col names
colnames(scores) = paste(colnames(scores), "score", sep = "_")

#Bind the data together and sort the scoring columns next to the original
data2 = bind_cols(data1, scores)
data2 = data2 %>% 
  select(-all_of(c(scoringCols, colnames(scores))), sort(colnames(data2)))

data2
#> # A tibble: 10 x 8
#>    student_id   age order order_score  pic1 pic1_score  pic2 pic2_score
#>    <chr>      <int> <int>       <dbl> <int>      <int> <int>      <dbl>
#>  1 S001           8     0           0     2          2     1          1
#>  2 S002           6   -99           0     1          1     0          0
#>  3 S003           8     1           1     1          1     1          1
#>  4 S004           8     1           1     0          0     2          2
#>  5 S005           8     0           0     2          2   -99          0
#>  6 S006           8   -99           0     0          0     0          0
#>  7 S007           7     1           1     1          1     1          1
#>  8 S008           6     0           0     1          1     1          1
#>  9 S009           6     1           1     2          2     1          1
#> 10 S010           6     0           0     2          2     0          0

Created on 2022-03-15 by the reprex package (v2.0.1.9000)

PJ

This was super useful. But just one more thing. Can it be made that the adjacent columns can be made just "score" and not "pic1_score", "pic2_score". I know that's a bad practice, but unfortunately, that is the requirement.

Hi,

That is a weird requirement indeed, but you can force this by removing the part before 'score' from every scoring column at the end by adding this line

colnames(data2) = str_remove(colnames(data2), "(.*)(?=score$)")

It will remove the text before the word 'score' if present at the end of a column name

PJ

Thanks a lot for this. It was really helpful..

Regards,
Nithin

An additional query. While I did this, a few columns are getting interchanged. The data columns have to remain in the same order. Is that possible?
For e.g, after each question there is the total score of that question. That variable is coming to the from of the dataset after running the codes.

There are a few columns which are not taken for calculation of scores. That is now coming in front of the data.

What I mean to say is as follows:
l1c_total must come after l1_c1_identify_pic4.
But here it comes at the front itself.

library(tidyverse)
data1<-tibble::tribble(
  ~Student_ID, ~l1c1_identify_pic1, ~l1c1_identify_pic2, ~l1c1_identify_pic3, ~l1c1_identify_pic4, ~l1c1_total,
       "S001",                  1L,                  1L,                  1L,                  1L,          6L,
       "S002",                  1L,                  0L,                  1L,                  1L,          4L,
       "S003",                  1L,                  0L,                  1L,                  0L,          6L,
       "S004",                  1L,                  0L,                  1L,                  0L,          4L,
       "S005",                  0L,                  1L,                  1L,                  0L,          2L,
       "S006",                  0L,                  0L,                  1L,                  0L,          4L,
       "S007",                  1L,                  1L,                  1L,                  0L,          6L,
       "S008",                  0L,                  1L,                  1L,                  0L,          6L,
       "S009",                  1L,                  1L,                  0L,                -99L,          4L,
       "S010",                  0L,                  0L,                -99L,                  0L,          4L,
       "S011",                  1L,                  0L,                 99L,                  0L,          4L,
       "S012",                  0L,                  1L,                -99L,                  0L,          6L,
       "S013",                  0L,                  0L,                  1L,                  0L,          6L,
       "S014",                  0L,                  0L,                  1L,                  0L,          6L,
       "S015",                  1L,                  1L,                  1L,                  1L,          6L
  )
#Greate groups of columns with different scoring rules
scoring1 = c("l1c1_identify_pic1","l1c1_identify_pic2")
scoring2 = c("l1c1_identify_pic3","l1c1_identify_pic4")

#Create the scoring columns
scoringCols = c(scoring1, scoring2)

scores = data1 %>% select(all_of(scoringCols)) %>% 
  #Set the rules for each scoring group
  mutate(across(all_of(scoring1), function(x){ifelse(x==1, 2, 0)}),
         across(all_of(scoring2), function(x){ifelse(x == 1, 1, 0)}))

#Edit the new col names
colnames(scores) = paste(colnames(scores), "score", sep = "_")

#Bind the data together and sort the scoring columns next to the original
data2 = bind_cols(data1, scores)
data2 = data2 %>% 
  select(-all_of(c(scoringCols, colnames(scores))), sort(colnames(data2)))

data2
#> # A tibble: 15 x 10
#>    Student_ID l1c1_total l1c1_identify_pic1 l1c1_identify_pic1~ l1c1_identify_p~
#>    <chr>           <int>              <int>               <dbl>            <int>
#>  1 S001                6                  1                   2                1
#>  2 S002                4                  1                   2                0
#>  3 S003                6                  1                   2                0
#>  4 S004                4                  1                   2                0
#>  5 S005                2                  0                   0                1
#>  6 S006                4                  0                   0                0
#>  7 S007                6                  1                   2                1
#>  8 S008                6                  0                   0                1
#>  9 S009                4                  1                   2                1
#> 10 S010                4                  0                   0                0
#> 11 S011                4                  1                   2                0
#> 12 S012                6                  0                   0                1
#> 13 S013                6                  0                   0                0
#> 14 S014                6                  0                   0                0
#> 15 S015                6                  1                   2                1
#> # ... with 5 more variables: l1c1_identify_pic2_score <dbl>,
#> #   l1c1_identify_pic3 <int>, l1c1_identify_pic3_score <dbl>,
#> #   l1c1_identify_pic4 <int>, l1c1_identify_pic4_score <dbl>
Created on 2022-03-21 by the reprex package (v2.0.1)

Hi,

Well this is possible but the solution is not the most easy to read code as I needed several rlang tricks to get this working. Maybe someone else has a nicer implementation

library(tidyverse)

#The data
data1<-tibble::tribble(
  ~student_id, ~age, ~pic1, ~pic2, ~order,
  "S001",      8L,   2L,    1L,      0L,
  "S002",      6L,   1L,    0L,    -99L,
  "S003",      8L,   1L,    1L,      1L,
  "S004",      8L,   0L,    2L,      1L,
  "S005",      8L,   2L,  -99L,      0L,
  "S006",      8L,   0L,    0L,    -99L,
  "S007",      7L,   1L,    1L,      1L,
  "S008",      6L,   1L,    1L,      0L,
  "S009",      6L,   2L,    1L,      1L,
  "S010",      6L,   2L,    0L,      0L
)

#Create vector of columns and which scoring rules need to be applied
scoring = c("pic1" = 1, "pic2" = 1, "order" = 2)

#Create the scoring columns
for(i in 1:length(scoring)){

  var = sym(names(scoring)[i])
  
  if(scoring[i] == 1){
    data1 = data1 %>% mutate(
      "{{var}}_score" := ifelse({{var}} < 0, 0, {{var}}), .after = {{var}}
    )
  } else {
    data1 = data1 %>% mutate(
      "{{var}}_score" := ifelse({{var}} == 1, 1, 0), .after = {{var}}
    )
  }
 
}

#Edit the new col names
colnames(data1) = str_remove(colnames(data1), "(.*)(?=score$)")

data1
#> # A tibble: 10 x 8
#>    student_id   age  pic1 score  pic2 score order score
#>    <chr>      <int> <int> <int> <int> <dbl> <int> <dbl>
#>  1 S001           8     2     2     1     1     0     0
#>  2 S002           6     1     1     0     0   -99     0
#>  3 S003           8     1     1     1     1     1     1
#>  4 S004           8     0     0     2     2     1     1
#>  5 S005           8     2     2   -99     0     0     0
#>  6 S006           8     0     0     0     0   -99     0
#>  7 S007           7     1     1     1     1     1     1
#>  8 S008           6     1     1     1     1     0     0
#>  9 S009           6     2     2     1     1     1     1
#> 10 S010           6     2     2     0     0     0     0

Created on 2022-03-21 by the reprex package (v2.0.1)

PJ

This looks a bit difficult. I will revert to the earlier solution. Thanks a lot.

Regards,
NP

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.