Join data with different structures

Hi, I have these two data sets. I'm trying to unite them by "country" and "year" with full_join(). But it seems impossible (at least on my level) because they are really untidy.

library(tidyverse)

data1 <- data.frame(country = c("Australia", "Australia", "Australia", "Canada", "Canada", "Canada"),
                    series_name = c("var_1", "var_2", "var_3"),
                    YR1970 = c(20,65,50,20,65,50),
                    YR1971 = c(1,0,1,1,1,1),
                    YR1972 = c(36,37,38,50,50,52)
                    )

data2 <- data.frame(country = c("Australia", "Australia", "Australia", "Canada", "Canada", "Canada"),
                    year = c(1970, 1971, 1972, 1970, 1971, 1972),
                    var_x = c(175, 105, 100, 508, 588, 559),
                    var_y = c(10,10,30, 40,45,50),
                    var_z = c(0,10,3, 0,5,5)
                    )

make them both tidy; then they could vertically stack.

library(tidyverse)
(tidy_data1 <- pivot_longer(data1,
                           cols=starts_with("YR"),
                           names_sep = "R",
                           names_to = c("_","year"))|> 
    mutate(year=parse_integer(year)) |> select(-"_"))

(tidy_data2 <- pivot_longer(data2,
                           cols=starts_with("Var"),
                           names_to="series_name") |> 
    select(all_of(names(tidy_data1))))

(tidy_data_together <- bind_rows(tidy_data1,
                                tidy_data2))
1 Like

Thanks!

One question the names of the columns on my dataset1 are in this weird format:
1970[YR1970] = c(20,65,50,20,65,50),
1971[YR1971] = c(1,0,1,1,1,1),
1972[YR1972] = c(36,37,38,50,50,52)

I dind´t know how to reproduce it in the original example because the initial number. In this case how would yoy change the code in the tidy_data1 example?
Thanks again.

like this ?

library(tidyverse)
data1 <- tibble(country = c("Australia", "Australia", "Australia", "Canada", "Canada", "Canada"),
                    series_name = rep(c("var_1", "var_2", "var_3"),2),
                `[YR1970]` = c(20,65,50,20,65,50),
                `[YR1971]` = c(1,0,1,1,1,1),
                `[YR1972]` = c(36,37,38,50,50,52)
)

(tidy_data1 <- pivot_longer(data1,
                            cols=starts_with("[YR"),
                            names_pattern = "\\[(\\w{2})(\\d{4})\\]",
                            names_to = c("_","year"))|> 
    mutate(year=parse_integer(year)) |> select(-"_"))
1 Like

No, they have the number plus the brackets

Ok! I think changing:
cols =start_with ("[YR") by cols = contains ("[YR")

It´s done
I am a bit newby and I didn't know how to do the example accurately enough
Thanks!

library(tidyverse)
data1 <- tibble(country = c("Australia", "Australia", "Australia", "Canada", "Canada", "Canada"),
                series_name = rep(c("var_1", "var_2", "var_3"),2),
                `1970[YR1970]` = c(20,65,50,20,65,50),
                `1971[YR1971]` = c(1,0,1,1,1,1),
                `1972[YR1972]` = c(36,37,38,50,50,52)
)

(tidy_data1 <- pivot_longer(data1,
                            cols=contains("[YR"),
                            names_pattern = "\\[(\\w{2})(\\d{4})\\]",
                            names_to = c("_","year"))|> 
    mutate(year=parse_integer(year)) |> select(-"_"))

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.