Join two datasets

i want to join two datasets about the number of employers, but excluding the row that appears 0 in one year or another. Example:

Data_Frame_2003 <- data.frame (
  "occ_id" = c( 1010, 1010, 1010, 1010, 1010,1234,1234,1234,1234, 4321, 4321,4321,4321,4321,6521, 4777,8521),
  "Ind_id" = c(28339,27138,31224,33103,1112,27138,31224,1112,52418,33103,31224,1112,52417,26301,20201,60010,31224),
  "Number_emp_2003" = c( 8 ,9 ,  11 ,15 ,16 ,50,10,30,5,20,10,50,30,50,0,10,0)
)
Data_Frame_2013 <- data.frame (
  "occ_id" = c( 1010, 1010, 1010, 1010, 1010,1234,1234,1234,1234, 4321, 4321,4321,4321,4321,6521, 4777,8521),
  "Ind_id" = c(28339,27138,31224,33103,1112,27138,31224,1112,52418,33103,31224,1112,52417,26301,20201,60010,31224),
  "Number_emp_2013" = c( 18 ,29 ,  21 ,25 ,16 ,50,30,30,5,20,10,50,30,0,50,0,50)
)

When I do an inner join or left join the lines with zero remain


Data_Frame_2003 %>% left_join(Data_Frame_2013, by=c("occ_id", "Ind_id"))

I want to join side by side and get this:


Data_Frame <- data.frame (
  "occ_id" = c( 1010, 1010, 1010, 1010, 1010,1234,1234,1234,1234, 4321, 4321,4321,4321),
  "Ind_id" = c(28339,27138,31224,33103,1112,27138,31224,1112,52418,33103,31224,1112,52417),
  "Number_emp_2003" = c( 8 ,9 ,  11 ,15 ,16 ,50,10,30,5,20,10,50,30),   
  "occ_id" = c( 1010, 1010, 1010, 1010, 1010,1234,1234,1234,1234, 4321, 4321,4321,4321),
  "Ind_id" = c(28339,27138,31224,33103,1112,27138,31224,1112,52418,33103,31224,1112,52417),
  "Number_emp_2013" = c( 18 ,29 ,  21 ,25 ,16 ,50,30,30,5,20,10,50,30)
)


Can you guys help me?

Try this. Note that left_join adds suffixes to the column names to avoid duplicates.

NewDF <- Data_Frame_2003 %>% 
  left_join(Data_Frame_2013, by=c("occ_id", "Ind_id"), keep = TRUE) |> 
  filter(Number_emp_2003 != 0, Number_emp_2013 != 0)

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.