Hi 
I found a solution with vecsets to sort it.
library(tidyverse)
#> Warning: package 'tibble' was built under R version 4.0.3
#> Warning: package 'dplyr' was built under R version 4.0.5
library(vecsets)
#> Warning: package 'vecsets' was built under R version 4.0.5
df_a <-
data.frame(
stringsAsFactors = FALSE,
name = c("a","b","c","d","e",
"f","g","h","i","j","k","l","m","n","o","p",
"q","r","s","t","u","v","w","x","y","z","aa",
"bb","cc"),
value = c(1,4,4,4,1,1,1,
1,1,2,4,1,3,2,3,3,4,2,2,4,4,
3,4,4,1,3,3,2,3)
)
df_b <-
data.frame(
stringsAsFactors = FALSE,
name = c("e","f","g","h","i","j",
"k","l","m","n","o","p","q","r","s","t","u","v",
"w","x","y","z","aa","bb","cc","dd","ee"),
value = c(1,4,4,4,1,4,1,4,
1,2,3,1,3,2,3,4,1,2,2,3,4,3,
4,4,4,3,3)
)
sum(df_a$value)
#> [1] 75
sum(df_b$value)
#> [1] 75
set_a <- rep(df_a$name,df_a$value)
set_a
#> [1] "a" "b" "b" "b" "b" "c" "c" "c" "c" "d" "d" "d" "d" "e" "f"
#> [16] "g" "h" "i" "j" "j" "k" "k" "k" "k" "l" "m" "m" "m" "n" "n"
#> [31] "o" "o" "o" "p" "p" "p" "q" "q" "q" "q" "r" "r" "s" "s" "t"
#> [46] "t" "t" "t" "u" "u" "u" "u" "v" "v" "v" "w" "w" "w" "w" "x"
#> [61] "x" "x" "x" "y" "z" "z" "z" "aa" "aa" "aa" "bb" "bb" "cc" "cc" "cc"
set_b <- rep(df_b$name,df_b$value)
set_b
#> [1] "e" "f" "f" "f" "f" "g" "g" "g" "g" "h" "h" "h" "h" "i" "j"
#> [16] "j" "j" "j" "k" "l" "l" "l" "l" "m" "n" "n" "o" "o" "o" "p"
#> [31] "q" "q" "q" "r" "r" "s" "s" "s" "t" "t" "t" "t" "u" "v" "v"
#> [46] "w" "w" "x" "x" "x" "y" "y" "y" "y" "z" "z" "z" "aa" "aa" "aa"
#> [61] "aa" "bb" "bb" "bb" "bb" "cc" "cc" "cc" "cc" "dd" "dd" "dd" "ee" "ee" "ee"
a_diff <-
vsetdiff(set_a,set_b, multiple = TRUE) %>% length()
b_diff <-
vsetdiff(set_b,set_a, multiple = TRUE) %>% length()
total_diff <- a_diff + b_diff
total_agreement_unduplicated <-
vintersect(set_a,set_b, multiple = TRUE) %>% length()
total_agreement_unduplicated
#> [1] 47
similarity <- (length(set_a) + length(set_b) - total_diff)/ (length(set_a) + length(set_b)) * 100
similarity
#> [1] 62.66667
Created on 2021-04-24 by the reprex package (v0.3.0)