Conditional clause for cross-observations

Hey,

this thread relates to my last one posted, but with more data (see below). I want to match the partner ID with the partner (e.g. Susi should have the same pid as Hans). I would like to have a code, that is replicable for more data and more couples. Technocrat already recommended the "pair-function", with the limitation that it is only applicable for one couple.

I thought about the if-clause, but couldnt figure out what to insert to refer to that specific partners id:

Testfile1$pid[Testfile1$surname_p %in% Testfile1$surname & Testfile1$prename_p %in% Testfile1$prename] <-"partnerID"

I appreciate any help! Thanks :slight_smile:

head(Testfile, 9)
did pid partner prename surname prename_p surname surname_p
1 111 yes Susi Bauer Hans Maurer
2 222 no Peter Müller
3 333 no Christian Schneider
4 444 yes Hans Maurer Susi Bauer
5 555 no Maja Bäcker
6 666 no Robin Maler
7 777 yes Maria Spinner Ludwig Feuer
8 888 no Katrin Klopfer
9 999 yes Ludwig Feuer Maria Spinner

library(tidyverse)
(df <- tibble::tribble(
  ~did, ~pid, ~partner,    ~prename,    ~surname, ~prename_p, ~surname_p,
  1L, 111L,    "yes",      "Susi",     "Bauer",     "Hans",   "Maurer",
  2L, 222L,     "no",     "Peter",    "Muller",         NA,         NA,
  3L, 333L,     "no", "Christian", "Schneider",         NA,         NA,
  4L, 444L,    "yes",      "Hans",    "Maurer",     "Susi",    "Bauer",
  5L, 555L,     "no",      "Maja",    "Backer",         NA,         NA,
  6L, 666L,     "no",     "Robin",     "Maler",         NA,         NA,
  7L, 777L,    "yes",     "Maria",   "Spinner",   "Ludwig",    "Feuer",
  8L, 888L,     "no",    "Katrin",   "Klopfer",         NA,         NA,
  9L, 999L,    "yes",    "Ludwig",     "Feuer",    "Maria",  "Spinner"
))

(with_partners_df <- filter(
  df,
  partner == "yes"
))



(with_partners_df_x <- left_join(with_partners_df,
  rename_with(with_partners_df,
    .fn = \(x)paste0("m_", x)
  ),
  by = c(
    "prename" = "m_prename_p",
    "surname" = "m_surname_p"
  )
) |> rowwise() |> mutate(
  sorted_pids_str = factor(toString(sort(c(pid, m_pid)))),
  grpid = as.integer(sorted_pids_str)
) |> ungroup())


(with_partners_df_clean <- select(
  with_partners_df_x,
  all_of(c(names(with_partners_df), "sorted_pids_str", "grpid"))
))

(fin_df <- left_join(df, with_partners_df_clean,
  by = c("did", "pid", "partner", "prename", "surname", "prename_p", "surname_p")
))
# A tibble: 9 x 9
    did   pid partner prename   surname   prename_p surname_p sorted_pids_str grpid
  <int> <int> <chr>   <chr>     <chr>     <chr>     <chr>     <fct>           <int>
1     1   111 yes     Susi      Bauer     Hans      Maurer    111, 444            1
2     2   222 no      Peter     Muller    NA        NA        NA                 NA
3     3   333 no      Christian Schneider NA        NA        NA                 NA
4     4   444 yes     Hans      Maurer    Susi      Bauer     111, 444            1
5     5   555 no      Maja      Backer    NA        NA        NA                 NA
6     6   666 no      Robin     Maler     NA        NA        NA                 NA
7     7   777 yes     Maria     Spinner   Ludwig    Feuer     777, 999            2
8     8   888 no      Katrin    Klopfer   NA        NA        NA                 NA
9     9   999 yes     Ludwig    Feuer     Maria     Spinner   777, 999            2

Rows 1&2 should both have a pid of 111 and 7&9 should both have 777

Here's a solution, still limited to the data provided, which do not have the case that after subseting out partner == "yes", the pairs of partners do not have consecutive appearances, such as row 1 being followed by row 4 then by row 9 (these are the row indices, which are the same as did.

d <- data.frame(did = 1:9, pid = c(
  111L, 222L, 333L, 111L, 555L,
  666L, 777L, 888L, 777L
), partner = c(
  "yes", "no", "no", "yes",
  "no", "no", "yes", "no", "yes"
), prename = c(
  "Susi", "Peter",
  "Christian", "Hans", "Maja", "Robin", "Maria", "Katrin", "Ludwig"
), surname = c(
  "Bauer", "Müller", "Schneider", "Maurer", "Bäcker",
  "Maler", "Spinner", "Klopfer", "Feuer"
), prename_p = c(
  "Hans",
  "", "", "Susi", "", "", "Ludwig", "", "Maria"
), surname_p = c(
  "Maurer",
  "", "", "Bauer", "", "", "Feuer", "", "Spinner"
))
# subset only entries with a partner
paired <- d[which(d$partner == "yes"), ]
# create single name
paired$partner1 <- paste(paired$prename, paired$surname)
paired$partner2 <- paste(paired$prename_p, paired$surname_p)
paired <- paired[-c(3:7)]
# find partners
make_pairs <- function(x) split(x, ceiling((seq_along(x)) / 2))
# assign the lowest pid to each partner in a pair
make_pid <- function(x) min(x$pid)
assign_pid <- function(x) pairs[x][[1]][2] <<- make_pid(pairs[x][[1]])

pairs <- make_pairs(paired)
for (i in seq_along(pairs)) assign_pid(i)
pairs
#> ```
1`
#>   did pid    partner1    partner2
#> 1   1 111  Susi Bauer Hans Maurer
#> 4   4 111 Hans Maurer  Susi Bauer
#> 
#> ```
2`
#>   did pid      partner1      partner2
#> 7   7 777 Maria Spinner  Ludwig Feuer
#> 9   9 777  Ludwig Feuer Maria Spinner
correct_pairs <- function(x) {
  d[pairs[x][[1]][[1]], 2] <- pairs[x][[1]][[2]]
}
for (i in seq_along(pairs)) correct_pairs(i)

d
#>   did pid partner   prename   surname prename_p surname_p
#> 1   1 111     yes      Susi     Bauer      Hans    Maurer
#> 2   2 222      no     Peter    Müller                    
#> 3   3 333      no Christian Schneider                    
#> 4   4 111     yes      Hans    Maurer      Susi     Bauer
#> 5   5 555      no      Maja    Bäcker                    
#> 6   6 666      no     Robin     Maler                    
#> 7   7 777     yes     Maria   Spinner    Ludwig     Feuer
#> 8   8 888      no    Katrin   Klopfer                    
#> 9   9 777     yes    Ludwig     Feuer     Maria   Spinner

Created on 2023-01-14 with reprex v2.0.2