full join or merge

I have 2 datasets, one contains school/subject quarter rate with 6 rows
and one contains subject national quarter rate with 8 rows. I want to combine
national quarter rate to the school subject rate, regardless of the existing of
school subject rate in that quarter. After join or merge, the class dataset should be 8 rows and qtr_rate_school should be NA in 2020Q4 and 2022Q2 rows AND sch_id should be 10001? Thanks

class <- data.frame(
  stringsAsFactors = FALSE,
            sch_id = c("10001", "10001", "10001", "10001", "10001", "10001"),
        subj = c("40610", "40610", "40610", "40610", "40610", "40610"),
             YrQtr = c("2020Q3","2021Q1","2021Q2",
                       "2021Q3","2021Q4","2022Q1"),
      qtr_rate_school = c(224, 252.333333333333, 188, 144, NA, 295.666666666667)
)

nat <- data.frame(
  stringsAsFactors = FALSE,
        subj = c("40610","40610","40610",
                       "40610","40610","40610","40610","40610"),
             YrQtr = c("2020Q3","2020Q4","2021Q1",
                       "2021Q2","2021Q3","2021Q4","2022Q1","2022Q2"),
      qtr_rate_nat = c(152.132977487291,
                       198.50825617284,110.760384271671,129.086600051908,140.185767587582,
                       123.179853793799,159.337869537275,116.569458074953)
)

all <- full_join(class,nat,by=c('subj','YrQtr'))
# avoid built-in function names like class, df and the
# like because sometimes the function name takes 
# precedence over your object name

Class <- data.frame(
  stringsAsFactors = FALSE,
  sch_id = c("10001", "10001", "10001", "10001", "10001", "10001"),
  subj = c("40610", "40610", "40610", "40610", "40610", "40610"),
  YrQtr = c("2020Q3","2021Q1","2021Q2",
            "2021Q3","2021Q4","2022Q1"),
  qtr_rate_school = c(224, 252.333333333333, 188, 144, NA, 295.666666666667)
)

nat <- data.frame(
  stringsAsFactors = FALSE,
  subj = c("40610","40610","40610",
           "40610","40610","40610","40610","40610"),
  YrQtr = c("2020Q3","2020Q4","2021Q1",
            "2021Q2","2021Q3","2021Q4","2022Q1","2022Q2"),
  qtr_rate_nat = c(152.132977487291,
                   198.50825617284,110.760384271671,129.086600051908,140.185767587582,
                   123.179853793799,159.337869537275,116.569458074953)
)

all <- dplyr::full_join(Class,nat)
#> Joining, by = c("subj", "YrQtr")
all
#>   sch_id  subj  YrQtr qtr_rate_school qtr_rate_nat
#> 1  10001 40610 2020Q3        224.0000     152.1330
#> 2  10001 40610 2021Q1        252.3333     110.7604
#> 3  10001 40610 2021Q2        188.0000     129.0866
#> 4  10001 40610 2021Q3        144.0000     140.1858
#> 5  10001 40610 2021Q4              NA     123.1799
#> 6  10001 40610 2022Q1        295.6667     159.3379
#> 7   <NA> 40610 2020Q4              NA     198.5083
#> 8   <NA> 40610 2022Q2              NA     116.5695

This topic was automatically closed 42 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.