Some ideas for you
## setup
library(dplyr)
library(stringr)
samp_df <- tibble::tribble(
~DEPT, ~city, ~email,
"Human Resource", "del", "ddd.dev@xyzcomp.com",
"Infrastructure", "mum", "frv.xxt@xyzcomp.com",
"Human Resource", "nav", "xini.abbas@xyzcomp.com",
"Infrastructure", "pun", "busu.lal@xyzcomp.com",
"Human Resources", "bang", "tink.lal@xyzcomp.com",
"Infrastructure", "chen", "frv.xxt@yahoo.com",
"Human Resource", "triv", "ratr.ali@xyzcomp.com",
"Infrastructure", "vish", "sritu.singh@xyzcomp.com",
"Infrastructure", "city", "frv.xxt@thirdemail.com",
)
## Handling department mispellings
#list of correctly spelled department names
lex <- c("Human Resource", "Infrastructure")
samp_df <- samp_df %>%
mutate(
Dept_Disc = !(DEPT %in% lex)
)
## Handling dup emails
# str_extract - https://stringr.tidyverse.org/reference/str_extract.html
# duplicated - https://www.rdocumentation.org/packages/base/versions/3.6.2/topics/duplicated
samp_df <- samp_df %>%
mutate(
two_mail = str_extract(email, "([^@]+)"),
two_mail = duplicated(two_mail, fromLast = TRUE)
)
samp_df
#> # A tibble: 9 x 5
#> DEPT city email Dept_Disc two_mail
#> <chr> <chr> <chr> <lgl> <lgl>
#> 1 Human Resource del ddd.dev@xyzcomp.com FALSE FALSE
#> 2 Infrastructure mum frv.xxt@xyzcomp.com FALSE TRUE
#> 3 Human Resource nav xini.abbas@xyzcomp.com FALSE FALSE
#> 4 Infrastructure pun busu.lal@xyzcomp.com FALSE FALSE
#> 5 Human Resources bang tink.lal@xyzcomp.com TRUE FALSE
#> 6 Infrastructure chen frv.xxt@yahoo.com FALSE TRUE
#> 7 Human Resource triv ratr.ali@xyzcomp.com FALSE FALSE
#> 8 Infrastructure vish sritu.singh@xyzcomp.com FALSE FALSE
#> 9 Infrastructure city frv.xxt@thirdemail.com FALSE FALSE
Created on 2020-09-08 by the reprex package (v0.3.0)