Adding suffix to variable names

kuttan98 · January 9, 2023, 7:22am

Hi,
I have a dataset where I have combined surveys that happened for baseline and endline. But I need to differentiate it by adding suffix to variable names. For baseline, the _b. has to be added. Then for endline, _e has to be added.
Here in the reprex, from c1_identify_pic1 to el2_sent2, _b needs to be added. Then after that the variables repeat themselves. For them _e has to be added. How can I do this?

library(tidyverse)

data<-tibble::tribble(
  ~student_id, ~c1_identify_pic1, ~c1_identify_pic2, ~c1_identify_pic3, ~c1_identify_pic4, ~c2_identify_col1, ~c2_identify_col2, ~c3_identify_shap1, ~c3_identify_shap2, ~el1_recog1, ~el2_sent1, ~el2_sent2, ~c1_identify_pic1, ~c1_identify_pic2, ~c1_identify_pic3, ~c1_identify_pic4, ~c2_identify_col1, ~c2_identify_col2, ~c3_identify_shap1, ~c3_identify_shap2, ~el1_recog1, ~el2_sent1, ~el2_sent2,
      "DFGHG",                1L,                6L,                5L,                4L,                3L,                2L,                 3L,                 6L,          6L,         2L,         2L,                5L,                2L,                6L,                6L,                4L,                3L,                 5L,                 5L,          2L,         0L,         5L,
       "DBFF",                6L,                5L,                4L,                0L,                4L,                6L,                 4L,                 3L,          2L,         3L,         3L,                2L,                3L,                5L,                3L,                6L,                6L,                 0L,                 3L,          6L,         4L,         4L,
        "NDH",                5L,                3L,                0L,                0L,                1L,                0L,                 0L,                 4L,          4L,         4L,         1L,                6L,                6L,                6L,                2L,                6L,                4L,                 6L,                 6L,          5L,         4L,         0L,
     "BSJJFK",                4L,                3L,                4L,                1L,                1L,                3L,                 5L,                 1L,          6L,         5L,         2L,                2L,                5L,                1L,                5L,                6L,                0L,                 6L,                 6L,          5L,         5L,         1L,
     "BNSKII",                2L,                2L,                2L,                1L,                4L,                2L,                 0L,                 6L,          2L,         1L,         2L,                1L,                3L,                0L,                4L,                2L,                6L,                 3L,                 1L,          4L,         2L,         6L,
      "BBD56",                2L,                6L,                0L,                1L,                0L,                5L,                 4L,                 2L,          5L,         0L,         1L,                0L,                6L,                6L,                2L,                6L,                6L,                 2L,                 6L,          6L,         0L,         3L,
      "BBDJD",                5L,                0L,                2L,                3L,                5L,                4L,                 1L,                 6L,          2L,         1L,         5L,                2L,                2L,                1L,                0L,                6L,                3L,                 0L,                 2L,          2L,         1L,         2L
  )

nirgrahamuk · January 9, 2023, 11:43am


prep <- matrix(rep(1:3, 5), nrow = 3, ncol = 5)
colnames(prep) <- c("id", "x", "y", "x", "y")
# tibbles dont support duplicate names and I would expect an error
# therefore go with data.frame to get close to the issue ...
(myframe <- as.data.frame(prep))

skiplist <- c("id")
# get unique names not inluding id
(u_nms <- setdiff(unique(names(myframe)), skiplist))

# numbering each repeated encounter
(u_cnt <- rep(0, length(u_nms)))
(u_cnt <- setNames(u_cnt, u_nms))

for (i in 1:ncol(myframe)) {
  curr_col <- myframe[i]
  curr_col_nm <- names(curr_col)[[1]]
  if (!curr_col_nm %in% skiplist) {
    u_cnt[curr_col_nm] <- u_cnt[curr_col_nm] + 1
    colnames(myframe)[i] <- paste0(curr_col_nm, "_", u_cnt[curr_col_nm])
    print(colnames(myframe)[i])
  }
}

myframe |>
  rename_with(
    .fn =
      ~ gsub("_1$", "_b", .x)
  ) |>
  rename_with(
    .fn =
      ~ gsub("_2$", "_e", .x)
  )

technocrat · January 10, 2023, 12:17am

It's simple if it is just a matter of renaming specific columns.

Data <- tibble::tribble(
  ~student_id, ~c1_identify_pic1, ~c1_identify_pic2, ~c1_identify_pic3, ~c1_identify_pic4, ~c2_identify_col1, ~c2_identify_col2, ~c3_identify_shap1, ~c3_identify_shap2, ~el1_recog1, ~el2_sent1, ~el2_sent2, ~c1_identify_pic1, ~c1_identify_pic2, ~c1_identify_pic3, ~c1_identify_pic4, ~c2_identify_col1, ~c2_identify_col2, ~c3_identify_shap1, ~c3_identify_shap2, ~el1_recog1, ~el2_sent1, ~el2_sent2,
  "DFGHG", 1L, 6L, 5L, 4L, 3L, 2L, 3L, 6L, 6L, 2L, 2L, 5L, 2L, 6L, 6L, 4L, 3L, 5L, 5L, 2L, 0L, 5L,
  "DBFF", 6L, 5L, 4L, 0L, 4L, 6L, 4L, 3L, 2L, 3L, 3L, 2L, 3L, 5L, 3L, 6L, 6L, 0L, 3L, 6L, 4L, 4L,
  "NDH", 5L, 3L, 0L, 0L, 1L, 0L, 0L, 4L, 4L, 4L, 1L, 6L, 6L, 6L, 2L, 6L, 4L, 6L, 6L, 5L, 4L, 0L,
  "BSJJFK", 4L, 3L, 4L, 1L, 1L, 3L, 5L, 1L, 6L, 5L, 2L, 2L, 5L, 1L, 5L, 6L, 0L, 6L, 6L, 5L, 5L, 1L,
  "BNSKII", 2L, 2L, 2L, 1L, 4L, 2L, 0L, 6L, 2L, 1L, 2L, 1L, 3L, 0L, 4L, 2L, 6L, 3L, 1L, 4L, 2L, 6L,
  "BBD56", 2L, 6L, 0L, 1L, 0L, 5L, 4L, 2L, 5L, 0L, 1L, 0L, 6L, 6L, 2L, 6L, 6L, 2L, 6L, 6L, 0L, 3L,
  "BBDJD", 5L, 0L, 2L, 3L, 5L, 4L, 1L, 6L, 2L, 1L, 5L, 2L, 2L, 1L, 0L, 6L, 3L, 0L, 2L, 2L, 1L, 2L
)

oldnames <- colnames(Data)[2:12]
newnames <- gsub("$","_b", oldnames)
colnames(Data)[2:12] <- newnames
colnames(Data)[2:12]
#>  [1] "c1_identify_pic1_b"  "c1_identify_pic2_b"  "c1_identify_pic3_b" 
#>  [4] "c1_identify_pic4_b"  "c2_identify_col1_b"  "c2_identify_col2_b" 
#>  [7] "c3_identify_shap1_b" "c3_identify_shap2_b" "el1_recog1_b"       
#> [10] "el2_sent1_b"         "el2_sent2_b"

Created on 2023-01-09 with reprex v2.0.2

system · January 31, 2023, 12:18am

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.