So I think that the code below will generally address the issue you're having. However, I wasn't sure about the two year 6 entries for Claudias in your dataset: Shouldn't both Claudias have a last initial included in the name column based on issue number 1 that you provide? If that's the case, then I think the method below should be useful!
I used an inner_join to replicate the table you provide, but I'm not sure if this is the type of join you're targeting.
library(tidyverse)
dataset <- tibble::tribble(
~year, ~name, ~text,
3L, "Adam A.", "fubar",
3L, "Adam B.", "asdsdasd",
3L, "Laura B.", "blah",
3L, "Laura C.", "brown hairball",
3L, "Laura C.", "black hairball",
3L, "John", "quick brown fox",
3L, "Zeke", "over lazy dog",
6L, "Adam", "different person same initials",
6L, "Jack", "birds are cool",
6L, "Laura", "appear again",
6L, "Claudia", "foo",
6L, "Claudia M.", "bar"
)
# initials field is created
demographics <- tibble::tribble(
~year, ~full_name, ~name, ~initials, ~personality,
3L, "John Green", "John", "John G.", "INTP",
3L, "Adam Apple", "Adam", "Adam A.", "INTJ",
3L, "Adam Banana", "Adam B.", "Adam B.", "ESFJ",
3L, "Laura Bosch", "Laura", "Laura B.", "ESFJ",
3L, "Laura Caley", "Laura C.", "Laura C.", "ISFP",
3L, "Zeke Wong", "Zeke", "Zeke W.", "ENFP",
6L, "Adam Another", "Adam", "Adam A.", "ESTP",
6L, "Jack Sparrow", "Jack", "Jack S.", "INTJ",
6L, "Laura Caley", "Laura", "Laura C.", "ISFP",
6L, "Abi-Maria", "Abi-Maria", "Abi-Maria", "ENFJ",
6L, "Douglas Orange", "Douglas", "Douglas O.", "ISFJ",
6L, "Claudia Jane", "Claudia", "Claudia J.", "ISFP",
6L, "Claudia Miley", "Claudia M.", "Claudia M.", "INFP"
)
update_demographics <- demographics %>%
# Split demo name into first & last cols
separate(col = name, into = c("first", "last_init"), sep = " ") %>%
# Are there first name duplicates in the year?
add_count(year, first) %>%
mutate(
# If there's duplicates...
join_id = if_else(
condition = n != 1,
# Then use name with last initial
true = initials,
# Otherwise just first name will do
false = first)) %>%
select(-c(last_init, first, n))
#> Warning: Expected 2 pieces. Missing pieces filled with `NA` in 10 rows [1, 2, 4,
#> 6, 7, 8, 9, 10, 11, 12].
inner_join(
x = dataset,
y = update_demographics,
by = c("year", "name" = "join_id")
)
#> # A tibble: 11 x 6
#> year name text full_name initials personality
#> <int> <chr> <chr> <chr> <chr> <chr>
#> 1 3 Adam A. fubar Adam Apple Adam A. INTJ
#> 2 3 Adam B. asdsdasd Adam Banana Adam B. ESFJ
#> 3 3 Laura B. blah Laura Bosch Laura B. ESFJ
#> 4 3 Laura C. brown hairball Laura Caley Laura C. ISFP
#> 5 3 Laura C. black hairball Laura Caley Laura C. ISFP
#> 6 3 John quick brown fox John Green John G. INTP
#> 7 3 Zeke over lazy dog Zeke Wong Zeke W. ENFP
#> 8 6 Adam different person same in~ Adam Another Adam A. ESTP
#> 9 6 Jack birds are cool Jack Sparrow Jack S. INTJ
#> 10 6 Laura appear again Laura Caley Laura C. ISFP
#> 11 6 Claudia M. bar Claudia Mil~ Claudia ~ INFP
Created on 2021-07-07 by the reprex package (v2.0.0)