Horizontal to vertical transformation with variable selection.

Hi. I am having a dataset of student class performance with selected parameters. The questionnaire coding is as given in data1. The student names appear horizontally and the respective parameter scores come after that.
For example, the first student is Sinchana. Her scores is given in q_1_1, q_1_2, etc.. till q_1_11. The numbers 1-11 signify different parameters. The second students is Swati. Her scores are given in q_2_1, q_2_2, etc.. till q_2_11.
I want this data format to change and appear as given in data2. Is this possible to do?

library(tidyverse)
library(janitor)
#> 
#> Attaching package: 'janitor'
#> The following objects are masked from 'package:stats':
#> 
#>     chisq.test, fisher.test
data1<-tibble::tribble(
  ~enumerator,              ~en_name, ~confirm_en_name, ~tchidentrychoice, ~clusterid, ~schoolid, ~teacherid,            ~cal_name1,       ~cal_name2,       ~cal_name3,          ~cal_name4,       ~cal_name5,          ~cal_name6,          ~cal_name7,         ~cal_name8,         ~cal_name9,           ~cal_name10, ~q_1_1, ~q_1_2, ~q_1_3, ~q_1_4, ~q_1_5, ~q_1_6, ~q_1_7, ~q_1_8, ~q_1_9, ~q_1_10, ~q_1_11, ~q_2, ~q_2_1, ~q_2_2, ~q_2_3, ~q_2_4, ~q_2_5, ~q_2_6, ~q_2_7, ~q_2_8, ~q_2_9, ~q_2_10, ~q_2_11,  ~q_3, ~q_3_1, ~q_3_2, ~q_3_3, ~q_3_4, ~q_3_5, ~q_3_6, ~q_3_7, ~q_3_8, ~q_3_9, ~q_3_10, ~q_3_11, ~q_4, ~q_4_1, ~q_4_2, ~q_4_3, ~q_4_4, ~q_4_5, ~q_4_6, ~q_4_7, ~q_4_8, ~q_4_9, ~q_4_10, ~q_4_11, ~q_5, ~q_5_1, ~q_5_2, ~q_5_3, ~q_5_4, ~q_5_5, ~q_5_6, ~q_5_7, ~q_5_8, ~q_5_9, ~q_5_10, ~q_5_11, ~q_6, ~q_6_1, ~q_6_2, ~q_6_3, ~q_6_4, ~q_6_5, ~q_6_6, ~q_6_7, ~q_6_8, ~q_6_9, ~q_6_10, ~q_6_11, ~q_7, ~q_7_1, ~q_7_2, ~q_7_3, ~q_7_4, ~q_7_5, ~q_7_6, ~q_7_7, ~q_7_8, ~q_7_9, ~q_7_10, ~q_7_11,  ~q_8, ~q_8_1, ~q_8_2, ~q_8_3, ~q_8_4, ~q_8_5, ~q_8_6, ~q_8_7, ~q_8_8, ~q_8_9, ~q_8_10, ~q_8_11, ~q_9, ~q_9_1, ~q_9_2, ~q_9_3, ~q_9_4, ~q_9_5, ~q_9_6, ~q_9_7, ~q_9_8, ~q_9_9, ~q_9_10, ~q_9_11, ~q_10, ~q_10_1, ~q_10_2, ~q_10_3, ~q_10_4, ~q_10_5, ~q_10_6, ~q_10_7, ~q_10_8, ~q_10_9, ~q_10_10, ~q_10_11,
     "PEN001", "Gangavva B Hiremath",               1L,                1L, "Sattapur",     "ABC",     "TCH1", "Sinchana Hiregoudar", "Swathi Ganiger", "Deepa Vantetin", "Chaitra Kusugalla", "Veeranna Gouda", "Basavaraj Madiwal", "Apoorva Doddamani", "Ashmita R kamble", "Ara■■■■ R kamble", "Basavaraj madivalar",     0L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,      0L,      0L,   4L,     0L,     0L,     0L,     1L,     0L,     0L,     0L,     0L,     0L,      0L,      0L, "7 9",     0L,     0L,     0L,     0L,     0L,     0L,     1L,     0L,     1L,      0L,      0L,   2L,     0L,     1L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,      0L,      0L,   NA,     0L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,      0L,      0L,   NA,     0L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,      0L,      0L,   4L,     0L,     0L,     0L,     1L,     0L,     0L,     0L,     0L,     0L,      0L,      0L, "3 8",     0L,     0L,     1L,     0L,     0L,     0L,     0L,     1L,     0L,      0L,      0L,   NA,     0L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,      0L,      0L,    NA,      0L,      0L,      0L,      0L,      0L,      0L,      0L,      0L,      0L,       0L,       0L
  )


data2<-tibble::tribble(
                     ~cal_name, ~reading, ~demo, ~reprimand_phy, ~reprimand_word, ~speaking, ~discipline, ~respond, ~answer, ~writing, ~sports, ~academics,
         "Sinchana Hiregoudar",       0L,    0L,             0L,              0L,        0L,          0L,       0L,      0L,       0L,      0L,         0L,
              "Swathi Ganiger",       0L,    0L,             0L,              1L,        0L,          0L,       0L,      0L,       0L,      0L,         0L,
              "Deepa Vantetin",       0L,    0L,             0L,              0L,        0L,          0L,       1L,      0L,       1L,      0L,         0L,
           "Chaitra Kusugalla",       0L,    1L,             0L,              0L,        0L,          0L,       0L,      0L,       0L,      0L,         0L,
              "Veeranna Gouda",       0L,    0L,             0L,              0L,        0L,          0L,       0L,      0L,       0L,      0L,         0L,
           "Basavaraj Madiwal",       0L,    0L,             0L,              0L,        0L,          0L,       0L,      0L,       0L,      0L,         0L,
           "Apoorva Doddamani",       0L,    0L,             0L,              1L,        0L,          0L,       0L,      0L,       0L,      0L,         0L,
            "Ashmita R kamble",       0L,    0L,             1L,              0L,        0L,          0L,       0L,      1L,       0L,      0L,         0L,
            "Ara■■■■ R kamble",       0L,    0L,             0L,              0L,        0L,          0L,       0L,      0L,       0L,      0L,         0L,
         "Basavaraj madivalar",       0L,    0L,             0L,              0L,        0L,          0L,       0L,      0L,       0L,      0L,         0L
         )
Created on 2022-06-14 by the reprex package (v2.0.1)

Is this what you want? I did not set the column names.

library(tidyverse)
#> Warning: package 'tibble' was built under R version 4.1.2
data1<-tibble::tribble(
  ~enumerator,              ~en_name, ~confirm_en_name, ~tchidentrychoice, ~clusterid, ~schoolid, ~teacherid,            ~cal_name1,       ~cal_name2,       ~cal_name3,          ~cal_name4,       ~cal_name5,          ~cal_name6,          ~cal_name7,         ~cal_name8,         ~cal_name9,           ~cal_name10, ~q_1_1, ~q_1_2, ~q_1_3, ~q_1_4, ~q_1_5, ~q_1_6, ~q_1_7, ~q_1_8, ~q_1_9, ~q_1_10, ~q_1_11, ~q_2, ~q_2_1, ~q_2_2, ~q_2_3, ~q_2_4, ~q_2_5, ~q_2_6, ~q_2_7, ~q_2_8, ~q_2_9, ~q_2_10, ~q_2_11,  ~q_3, ~q_3_1, ~q_3_2, ~q_3_3, ~q_3_4, ~q_3_5, ~q_3_6, ~q_3_7, ~q_3_8, ~q_3_9, ~q_3_10, ~q_3_11, ~q_4, ~q_4_1, ~q_4_2, ~q_4_3, ~q_4_4, ~q_4_5, ~q_4_6, ~q_4_7, ~q_4_8, ~q_4_9, ~q_4_10, ~q_4_11, ~q_5, ~q_5_1, ~q_5_2, ~q_5_3, ~q_5_4, ~q_5_5, ~q_5_6, ~q_5_7, ~q_5_8, ~q_5_9, ~q_5_10, ~q_5_11, ~q_6, ~q_6_1, ~q_6_2, ~q_6_3, ~q_6_4, ~q_6_5, ~q_6_6, ~q_6_7, ~q_6_8, ~q_6_9, ~q_6_10, ~q_6_11, ~q_7, ~q_7_1, ~q_7_2, ~q_7_3, ~q_7_4, ~q_7_5, ~q_7_6, ~q_7_7, ~q_7_8, ~q_7_9, ~q_7_10, ~q_7_11,  ~q_8, ~q_8_1, ~q_8_2, ~q_8_3, ~q_8_4, ~q_8_5, ~q_8_6, ~q_8_7, ~q_8_8, ~q_8_9, ~q_8_10, ~q_8_11, ~q_9, ~q_9_1, ~q_9_2, ~q_9_3, ~q_9_4, ~q_9_5, ~q_9_6, ~q_9_7, ~q_9_8, ~q_9_9, ~q_9_10, ~q_9_11, ~q_10, ~q_10_1, ~q_10_2, ~q_10_3, ~q_10_4, ~q_10_5, ~q_10_6, ~q_10_7, ~q_10_8, ~q_10_9, ~q_10_10, ~q_10_11,
  "PEN001", "Gangavva B Hiremath",               1L,                1L, "Sattapur",     "ABC",     "TCH1", "Sinchana Hiregoudar", "Swathi Ganiger", "Deepa Vantetin", "Chaitra Kusugalla", "Veeranna Gouda", "Basavaraj Madiwal", "Apoorva Doddamani", "Ashmita R kamble", "Ara¦¦¦¦ R kamble", "Basavaraj madivalar",     0L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,      0L,      0L,   4L,     0L,     0L,     0L,     1L,     0L,     0L,     0L,     0L,     0L,      0L,      0L, "7 9",     0L,     0L,     0L,     0L,     0L,     0L,     1L,     0L,     1L,      0L,      0L,   2L,     0L,     1L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,      0L,      0L,   NA,     0L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,      0L,      0L,   NA,     0L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,      0L,      0L,   4L,     0L,     0L,     0L,     1L,     0L,     0L,     0L,     0L,     0L,      0L,      0L, "3 8",     0L,     0L,     1L,     0L,     0L,     0L,     0L,     1L,     0L,      0L,      0L,   NA,     0L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,      0L,      0L,    NA,      0L,      0L,      0L,      0L,      0L,      0L,      0L,      0L,      0L,       0L,       0L
)
Students <- data1 |> select(starts_with("cal_name"))
Students <- Students |> 
  pivot_longer(cols = everything(),names_to = "ID",values_to = "Name") |> 
  mutate(ID=str_remove(ID,"cal_name"))

Scores <- data1 |> select(matches("q_\\d+_\\d+"))
Scores <- Scores |> 
  mutate(across(.fns = as.character)) |> 
  pivot_longer(cols = everything(),names_to = "ID",values_to = "Score") |> 
  mutate(Q=str_remove(ID,"_\\d+"),
         ID=str_extract(ID,"\\d+")) |> 
  pivot_wider(names_from = "Q",values_from = "Score")

Final <- inner_join(Students,Scores,by="ID")
Final
#> # A tibble: 10 x 13
#>    ID    Name  q_1   q_2   q_3   q_4   q_5   q_6   q_7   q_8   q_9   q_10  q_11 
#>    <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#>  1 1     Sinc~ 0     0     0     0     0     0     0     0     0     0     0    
#>  2 2     Swat~ 0     0     0     1     0     0     0     0     0     0     0    
#>  3 3     Deep~ 0     0     0     0     0     0     1     0     1     0     0    
#>  4 4     Chai~ 0     1     0     0     0     0     0     0     0     0     0    
#>  5 5     Veer~ 0     0     0     0     0     0     0     0     0     0     0    
#>  6 6     Basa~ 0     0     0     0     0     0     0     0     0     0     0    
#>  7 7     Apoo~ 0     0     0     1     0     0     0     0     0     0     0    
#>  8 8     Ashm~ 0     0     1     0     0     0     0     1     0     0     0    
#>  9 9     Ara¦~ 0     0     0     0     0     0     0     0     0     0     0    
#> 10 10    Basa~ 0     0     0     0     0     0     0     0     0     0     0

Created on 2022-06-14 by the reprex package (v2.0.1)

Thanks a lot for this. I have two more queries:

  1. Can I maintain the data type as numeric itself?
  2. And when more data is added, suppose another school's students data is given (which will appear in the second row) and it has let's say 5 students. Will the same code work or should I modify it for varying number of students?

Regards,
Nithin

  1. Yes, just remove this line.
mutate(across(.fns = as.character)) |> 

I included it by mistake.
2. The code relies on patterns in the column names. As long as those patterns remain, the code should work. Be sure to understand the code before you use it for anything important.

Thank you.
I have a fair idea of what you have done by creating data frames and then later joining it.
My doubt is whether the code has to modify with change in number of students.
I am putting the code and sample data below.

library(tidyverse)
library(janitor)
#> 
#> Attaching package: 'janitor'
#> The following objects are masked from 'package:stats':
#> 
#>     chisq.test, fisher.test
library(readxl)

data1<-tibble::tribble(
  ~enumerator,                ~en_name, ~confirm_en_name, ~tchidentrychoice, ~clusterid,      ~schoolid,      ~teacherid,         ~cal_name1,           ~cal_name2,                  ~cal_name3,       ~cal_name4,               ~cal_name5, ~q_1_1, ~q_1_2, ~q_1_3, ~q_1_4, ~q_1_5, ~q_1_6, ~q_1_7, ~q_1_8, ~q_1_9, ~q_1_10, ~q_1_11,   ~q_2, ~q_2_1, ~q_2_2, ~q_2_3, ~q_2_4, ~q_2_5, ~q_2_6, ~q_2_7, ~q_2_8, ~q_2_9, ~q_2_10, ~q_2_11,    ~q_3, ~q_3_1, ~q_3_2, ~q_3_3, ~q_3_4, ~q_3_5, ~q_3_6, ~q_3_7, ~q_3_8, ~q_3_9, ~q_3_10, ~q_3_11,  ~q_4, ~q_4_1, ~q_4_2, ~q_4_3, ~q_4_4, ~q_4_5, ~q_4_6, ~q_4_7, ~q_4_8, ~q_4_9, ~q_4_10, ~q_4_11, ~q_5, ~q_5_1, ~q_5_2, ~q_5_3, ~q_5_4, ~q_5_5, ~q_5_6, ~q_5_7, ~q_5_8, ~q_5_9, ~q_5_10, ~q_5_11,  ~q_6, ~q_6_1, ~q_6_2, ~q_6_3, ~q_6_4, ~q_6_5, ~q_6_6, ~q_6_7, ~q_6_8, ~q_6_9, ~q_6_10, ~q_6_11,
     "Ben001",   "Gangavva B Hiremath",               1L,                1L,   "Katnur", "GHPS Giriyal", "J B Savadatti", "Pallavi.R.Ingale",    "Sachin.B.Jadhav",      "Shravani Muraganavar", "Pavitra Salgar", "Sannidhi Haranashikari",     1L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,      0L,      0L,    "4",     0L,     0L,     0L,     1L,     0L,     0L,     0L,     0L,     0L,      0L,      0L,      NA,     0L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,      0L,      0L, "5 8",     0L,     0L,     0L,     0L,     1L,     0L,     0L,     1L,     0L,      0L,      0L,   6L,     0L,     0L,     0L,     0L,     0L,     1L,     0L,     0L,     0L,      0L,      0L, "1 2",     1L,     1L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,      0L,      0L,
     "Ben002", "Hemalata B. Bhajantri",               1L,                1L, "Byahatti", "DPEP Kusugal",     "Manjula A",   "Anand Byahatti", "Apsana. S Hubballi", "Fathima Begum.R.Kawalikai",               NA,                       NA,     0L,     0L,     0L,     1L,     0L,     0L,     0L,     1L,     0L,      0L,      0L, "7 10",     0L,     0L,     0L,     0L,     0L,     0L,     1L,     0L,     0L,      1L,      0L, "3 7 9",     0L,     0L,     1L,     0L,     0L,     0L,     1L,     0L,     1L,      0L,      0L,    NA,     NA,     NA,     NA,     NA,     NA,     NA,     NA,     NA,     NA,      NA,      NA,   NA,     NA,     NA,     NA,     NA,     NA,     NA,     NA,     NA,     NA,      NA,      NA,    NA,     NA,     NA,     NA,     NA,     NA,     NA,     NA,     NA,     NA,      NA,      NA
  )


Students <- data1 |> select(starts_with("cal_name"))
Students <- Students |> 
  pivot_longer(cols = everything(),names_to = "ID",values_to = "Name") |> 
  mutate(ID=str_remove(ID,"cal_name"))

Scores <- data1 |> select(matches("q_\\d+_\\d+"))
Scores <- Scores |> 
  mutate(across(.fns = as.character)) |> 
  pivot_longer(cols = everything(),names_to = "ID",values_to = "Score") |> 
  mutate(Q=str_remove(ID,"_\\d+"),
         ID=str_extract(ID,"\\d+")) |> 
  pivot_wider(names_from = "Q",values_from = "Score")
#> Warning: Values are not uniquely identified; output will contain list-cols.
#> * Use `values_fn = list` to suppress this warning.
#> * Use `values_fn = length` to identify where the duplicates arise
#> * Use `values_fn = {summary_fun}` to summarise duplicates
(Final <- inner_join(Students,Scores,by="ID"))
#> # A tibble: 10 x 13
#>    ID    Name  q_1   q_2   q_3   q_4   q_5   q_6   q_7   q_8   q_9   q_10  q_11 
#>    <chr> <chr> <lis> <lis> <lis> <lis> <lis> <lis> <lis> <lis> <lis> <lis> <lis>
#>  1 1     Pall~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~
#>  2 2     Sach~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~
#>  3 3     Shra~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~
#>  4 4     Pavi~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~
#>  5 5     Sann~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~
#>  6 1     Anan~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~
#>  7 2     Apsa~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~
#>  8 3     Fath~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~
#>  9 4     <NA>  <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~
#> 10 5     <NA>  <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~ <chr~
Created on 2022-06-15 by the reprex package (v2.0.1)

The problem was not the different number of students. It was that ID values in Scores were repeated with no other column available to distinguish them. I included the schoolid in the Scores data frame to provide a unique identity to each score.

data1<-tibble::tribble(
  ~enumerator,                ~en_name, ~confirm_en_name, ~tchidentrychoice, ~clusterid,      ~schoolid,      ~teacherid,         ~cal_name1,           ~cal_name2,                  ~cal_name3,       ~cal_name4,               ~cal_name5, ~q_1_1, ~q_1_2, ~q_1_3, ~q_1_4, ~q_1_5, ~q_1_6, ~q_1_7, ~q_1_8, ~q_1_9, ~q_1_10, ~q_1_11,   ~q_2, ~q_2_1, ~q_2_2, ~q_2_3, ~q_2_4, ~q_2_5, ~q_2_6, ~q_2_7, ~q_2_8, ~q_2_9, ~q_2_10, ~q_2_11,    ~q_3, ~q_3_1, ~q_3_2, ~q_3_3, ~q_3_4, ~q_3_5, ~q_3_6, ~q_3_7, ~q_3_8, ~q_3_9, ~q_3_10, ~q_3_11,  ~q_4, ~q_4_1, ~q_4_2, ~q_4_3, ~q_4_4, ~q_4_5, ~q_4_6, ~q_4_7, ~q_4_8, ~q_4_9, ~q_4_10, ~q_4_11, ~q_5, ~q_5_1, ~q_5_2, ~q_5_3, ~q_5_4, ~q_5_5, ~q_5_6, ~q_5_7, ~q_5_8, ~q_5_9, ~q_5_10, ~q_5_11,  ~q_6, ~q_6_1, ~q_6_2, ~q_6_3, ~q_6_4, ~q_6_5, ~q_6_6, ~q_6_7, ~q_6_8, ~q_6_9, ~q_6_10, ~q_6_11,
  "Ben001",   "Gangavva B Hiremath",               1L,                1L,   "Katnur", "GHPS Giriyal", "J B Savadatti", "Pallavi.R.Ingale",    "Sachin.B.Jadhav",      "Shravani Muraganavar", "Pavitra Salgar", "Sannidhi Haranashikari",     1L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,      0L,      0L,    "4",     0L,     0L,     0L,     1L,     0L,     0L,     0L,     0L,     0L,      0L,      0L,      NA,     0L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,      0L,      0L, "5 8",     0L,     0L,     0L,     0L,     1L,     0L,     0L,     1L,     0L,      0L,      0L,   6L,     0L,     0L,     0L,     0L,     0L,     1L,     0L,     0L,     0L,      0L,      0L, "1 2",     1L,     1L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,      0L,      0L,
  "Ben002", "Hemalata B. Bhajantri",               1L,                1L, "Byahatti", "DPEP Kusugal",     "Manjula A",   "Anand Byahatti", "Apsana. S Hubballi", "Fathima Begum.R.Kawalikai",               NA,                       NA,     0L,     0L,     0L,     1L,     0L,     0L,     0L,     1L,     0L,      0L,      0L, "7 10",     0L,     0L,     0L,     0L,     0L,     0L,     1L,     0L,     0L,      1L,      0L, "3 7 9",     0L,     0L,     1L,     0L,     0L,     0L,     1L,     0L,     1L,      0L,      0L,    NA,     NA,     NA,     NA,     NA,     NA,     NA,     NA,     NA,     NA,      NA,      NA,   NA,     NA,     NA,     NA,     NA,     NA,     NA,     NA,     NA,     NA,      NA,      NA,    NA,     NA,     NA,     NA,     NA,     NA,     NA,     NA,     NA,     NA,      NA,      NA
)

Students <- data1 |> select(starts_with("cal_name"))
Students <- Students |> 
  pivot_longer(cols = everything(),names_to = "ID",values_to = "Name") |> 
  mutate(ID=str_remove(ID,"cal_name")) |> 
  filter(!is.na(Name))

Scores <- data1 |> select(schoolid,matches("q_\\d+_\\d+"))
Scores <- Scores |> 
  pivot_longer(cols = -schoolid,names_to = "ID",values_to = "Score") |> 
  mutate(Q=str_remove(ID,"_\\d+"),
         ID=str_extract(ID,"\\d+")) |> 
  filter(!is.na(Score)) |> 
  pivot_wider(names_from = "Q",values_from = "Score")

Final <- inner_join(Students,Scores,by="ID")

Thank you for this.
But the "Final" data frame is creating duplicates. 14 observations are being shown.

Students <- data1 |> select(schoolid, starts_with("cal_name"))
Students <- Students |> 
  pivot_longer(cols = -schoolid,names_to = "ID",values_to = "Name") |> 
  mutate(ID=str_remove(ID,"cal_name")) |> 
  filter(!is.na(Name))

Scores <- data1 |> select(schoolid,matches("q_\\d+_\\d+"))
Scores <- Scores |> 
  pivot_longer(cols = -schoolid,names_to = "ID",values_to = "Score") |> 
  mutate(Q=str_remove(ID,"_\\d+"),
         ID=str_extract(ID,"\\d+")) |> 
  filter(!is.na(Score)) |> 
  pivot_wider(names_from = "Q",values_from = "Score")

Final <- inner_join(Students,Scores,by=c("ID","schoolid"))

Thanks a lot for this. I am getting an error message. Will post it in separate topic.

Regards,
Nithin

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.