Merge two data frames

I am having two different data frames. In data1, there is student test data with their total marks.
In data2, there is classroom observation data with each student being observed and marking yes/no (1/0) responses.
I want the data formatted as in data3 data frame. The challenge here is that the "id" variable is not common to both files. Is there some way I can overcome this problem?

library(tidyverse)
library(janitor)
#> 
#> Attaching package: 'janitor'
#> The following objects are masked from 'package:stats':
#> 
#>     chisq.test, fisher.test
data1=tibble::tribble(
  ~student_id, ~age, ~l1c1, ~l1c2, ~l1c3, ~l1el1, ~l1el2, ~total,
       "S001",   6L,    5L,    6L,    4L,    10L,    10L,    35L,
       "S002",   6L,   10L,   10L,    7L,     5L,     4L,    36L,
       "S003",   6L,    7L,    6L,    6L,     9L,     8L,    36L,
       "S004",   7L,    6L,    6L,   10L,    10L,     6L,    38L,
       "S005",   8L,    5L,   10L,    4L,     9L,     8L,    36L,
       "S006",   7L,    2L,    2L,    6L,     0L,     6L,    16L,
       "S007",   7L,    3L,    3L,    3L,     7L,     5L,    21L,
       "S008",   7L,    1L,    1L,    3L,     2L,     7L,    14L,
       "S009",   8L,   10L,    9L,   10L,     5L,     9L,    43L,
       "S010",   6L,    4L,   10L,    6L,     2L,     6L,    28L,
       "S011",   6L,   10L,    8L,    1L,     3L,     7L,    29L,
       "S012",   7L,    7L,    3L,    6L,     2L,     5L,    23L,
       "S013",   6L,    1L,    3L,    4L,     9L,     9L,    26L,
       "S014",   8L,   10L,   10L,    9L,    10L,     1L,    40L
  )


data2=tibble::tribble(
          ~school, ~student1, ~reading, ~demo, ~reprimand, ~student2, ~reading, ~demo, ~reprimand, ~student3, ~reading, ~demo, ~reprimand, ~student4, ~reading, ~demo, ~reprimand,
   "GHPS KUSUGAL",    "S001",       0L,    0L,         0L,    "S002",       0L,    1L,         1L,    "S003",       0L,    1L,         0L,    "S004",       0L,    1L,         1L,
   "DPEP KUSUGAL",    "S005",       0L,    0L,         0L,    "S006",       1L,    0L,         0L,    "S007",       0L,    1L,         1L,    "S008",       0L,    0L,         1L,
  "GHPS NARENDRA",    "S009",       0L,    1L,         0L,    "S010",       0L,    0L,         1L,    "S011",       1L,    0L,         1L,    "S012",       1L,    1L,         1L,
     "GHPS HUBLI",    "S013",       0L,    0L,         1L,    "S014",       0L,    0L,         1L,        NA,       NA,    NA,         NA,        NA,       NA,    NA,         NA
  )

data3=tibble::tribble(
        ~student_id, ~age, ~l1c1, ~l1c2, ~l1c3, ~l1el1, ~l1el2, ~total, ~reading, ~demo, ~reprimand,
             "S001",   6L,    5L,    6L,    4L,    10L,    10L,    35L,       0L,    0L,         0L,
             "S002",   6L,   10L,   10L,    7L,     5L,     4L,    36L,       0L,    0L,         0L,
             "S003",   6L,    7L,    6L,    6L,     9L,     8L,    36L,       0L,    1L,         0L,
             "S004",   7L,    6L,    6L,   10L,    10L,     6L,    38L,       0L,    0L,         1L,
             "S005",   8L,    5L,   10L,    4L,     9L,     8L,    36L,       0L,    0L,         0L,
             "S006",   7L,    2L,    2L,    6L,     0L,     6L,    16L,       1L,    1L,         0L,
             "S007",   7L,    3L,    3L,    3L,     7L,     5L,    21L,       1L,    1L,         1L,
             "S008",   7L,    1L,    1L,    3L,     2L,     7L,    14L,       0L,    0L,         0L,
             "S009",   8L,   10L,    9L,   10L,     5L,     9L,    43L,       0L,    1L,         0L,
             "S010",   6L,    4L,   10L,    6L,     2L,     6L,    28L,       1L,    1L,         1L,
             "S011",   6L,   10L,    8L,    1L,     3L,     7L,    29L,       1L,    0L,         0L,
             "S012",   7L,    7L,    3L,    6L,     2L,     5L,    23L,       0L,    0L,         0L,
             "S013",   6L,    1L,    3L,    4L,     9L,     9L,    26L,       0L,    0L,         1L,
             "S014",   8L,   10L,   10L,    9L,    10L,     1L,    40L,       0L,    1L,         1L
        )
Created on 2022-06-11 by the reprex package (v2.0.1)


data2_named_better <- data2

names(data2_named_better) <- c(
  "school",
  paste0(
    rep(c("student", "reading", "demo", "reprimand"), 4), "_",
    rep(1:4, 1, each = 4)
  )
)

(d2_long <- pivot_longer(data2_named_better |>
      mutate_all(as.character),
    cols = -"school",
    names_sep = "_",
    names_to = c("type", "id")
    ))

(d2_next <- pivot_wider(d2_long,
          id_cols = c("school", "id"),
          names_from = "type"
        ) |> mutate(across(
          c(
            reading,
            demo,
            reprimand
          ),
          as.integer
        )) |>
          rename(student_id = student))

(d3x <- data1 |> left_join(d2_next) |>
       select(-school, -id))

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.