Although inconsequential at this point, I'd suggest keeping the data consistent; both the "database" and the desired outcome are not aligned.
The following uses base R and data.table.
library(data.table)
# juandmaz's "database"
dt_1 <- data.table(
id = c(1:15),
age = c(16, 19, 16, 17, 17,
17, 17, 16, 16, 19,
17, 16, 16, 16, 20),
demo = c(rep(1, 11), rep(0, 4)),
result = c("POS", "POS", "NEG", "NEG", "NEG",
"POS", "POS", "POS", "POS", "NEG",
"POS", "NEG", "NEG", "NEG", "NEG"),
category = c("A", "A", "C", "A", "B",
"C", "A", "A", "A", "B",
"A", "A", "B", "B", "A"),
nationality = c("Usa", "Spain", "Spain", "Brasil", "Spain",
"Usa", "Brasil", "Spain", "France", "Brasil",
"Brasil", "Brasil", "France", "Usa", "France"),
sport = c("soccer", "soccer", "basquet", "golf", "voley",
"soccer", "voley", "soccer", "soccer", "golf",
"golf", "voley", "basquet", "basquet", "basquet")
)
# juandmaz's reference data
dt_2 <- data.table(
id = c(1:15),
age = c(16, 19, 21, 20, 17,
19, 17, 16, 19, 19,
21, 20, 20, 16, 20),
demo = c(1, 1, 0, 1, 0,
0, 0, 1, 0, 0,
0, 0, 0, 1, 0),
result = c("POS", "POS", "POS", "POS", "NEG",
"POS", "POS", "POS", "POS", "POS",
"POS", "POS", "POS", "POS", "NEG"),
category = c("A", "A", "C", "A", "B",
"C", "A", "A", "A", "B",
"A", "A", "B", "B", "A"),
nationality = c("Usa", "Spain", "Spain", "Brasil", "Spain",
"Usa", "Brasil", "Spain", "France", "Brasil",
"Brasil", "Brasil", "France", "Usa", "France"),
sport = c("soccer", "soccer", "basquet", "golf", "voley",
"voley", "voley", "soccer", "golf", "soccer",
"golf", "voley", "basquet", "soccer", "soccer")
)
# Inconsistancies in references (omitting [exclusion] and [reason])
dt_1 == dt_2
# Rules for [exclusion] and [reason]
# 1. [age] < 18
# 2. [demo] == 1
# 3. [result] == "NEG"
# 4. [sport] == "soccer"
# Seperate Columns for reference
# dt_2[age < 18, i.age := "under 18"][demo == 1, i.demo := "demo equal 1"][result == "NEG", i.result := "resul negative"][sport == "soccer", i.sport := "chose soccer"][]
# Combine and clean (getting rid of tailing space and ",")
dt_2[
age < 18 | demo == 1 | result == "NEG" | sport == "soccer", exclusion := 1
][
, reason := gsub(
"NA, |, $|^.\\s+",
"",
paste0(
ifelse(age < 18, "under 18, ", ""),
ifelse(demo == 1, "demo equal 1, ", ""),
ifelse(result == "NEG", "resul negative, ", ""),
ifelse(sport == "soccer", "chose soccer, ", ""
)
)
)
]