checking duplicate entries in rows in data frame

my data can have many columns from ID2 to ID1000 and for these columns on data frame i want to create a functionality which can check in every rows if any entry comes more than one.

No entry should come more than once.

so in my data frame A0012 comes twice in second row. and in eighth row AB982 comes twice. whenever any entry comes more than one then it should show as duplicate entry.

it can be a subset of first column showing duplicates and then all columns from ID2 to IDN

I can have columns from ID2 to ID5000 in my data frame.

df <- data.frame(ID =c("DEV2962","KTN2252","ANA2719","ITI2624","DEV2698","HRT2921",NA,"KTN2624","ANA2548","ITI2535","DEV2732","HRT2837","ERV2951","KTN2542","ANA2813","ITI2210"),
                 city=c("DEL","mum","DEL","MUM","DEL","del","MUM","DEL","del","MUM","mum","mum","mum","mum","DEL","DEL"),
                 Name= c("dev,akash","singh,Ajay","abbas,salman","lal,ram","singh,nkunj","garg,prabal","ali,sanu","singh,kunal","tomar,lakhan","thakur,praveen","ali,sarman","khan,zuber","singh,giriraj","sharma,lokesh","pawar,pooja","sharma,nikita"),
                 ID2 = c("A0011","A0011","A0011","A0011","A0011","A0012","AB702","AB328","AC728","AC314","AC742","AC919","AC062","AD712","AD021","AD920"),
                 ID3 = c("A0012","A0012","A0012","A0012","A0012","A0013","AB712","AB712","AB702","AB328","AC314","AC728","AB702","AB712","AC742","AC919"),
                 ID4 = c(NA,"A0013","A0013","A0013","AB982","AB982",NA,"AB982","A0013","A0012","A0012","A0012","A0012",NA,"A0013","A0012"),
                 ID5 =c(NA,"A0012","AB012","AB012",NA,"AB702",NA,"A0013",NA,"A0011","A0011",NA,"A0011",NA,NA,NA),
                 ID6 = c(NA,NA,NA,"AB982",NA,NA,NA,"A0012",NA,NA,NA,NA,NA,NA,NA,NA),
                 ID7 = c(NA,NA,NA,NA,NA,NA,NA,"A0011",NA,NA,NA,NA,NA,NA,NA,NA),
                 ID8 =c(NA,NA,NA,NA,NA,NA,NA,"AB982",NA,NA,NA,NA,NA,NA,NA,NA))

#dupl <- function(df){
  
  #checkDup <- function(x){
    x <- x %>%  select(matches("^L[1]"):ncol(x))

    ifelse(max(table(x)) > 1, "duplicate available", "")
    
  #}

The output should be look like

df <- data.frame(ID =c("DEV2962","KTN2252","ANA2719","ITI2624","DEV2698","HRT2921",NA,"KTN2624","ANA2548","ITI2535","DEV2732","HRT2837","ERV2951","KTN2542","ANA2813","ITI2210"),
                 city=c("DEL","mum","DEL","MUM","DEL","del","MUM","DEL","del","MUM","mum","mum","mum","mum","DEL","DEL"),
                 Name= c("dev,akash","singh,Ajay","abbas,salman","lal,ram","singh,nkunj","garg,prabal","ali,sanu","singh,kunal","tomar,lakhan","thakur,praveen","ali,sarman","khan,zuber","singh,giriraj","sharma,lokesh","pawar,pooja","sharma,nikita"),
                 ID2 = c("A0011","A0011","A0011","A0011","A0011","A0012","AB702","AB328","AC728","AC314","AC742","AC919","AC062","AD712","AD021","AD920"),
                 ID3 = c("A0012","A0012","A0012","A0012","A0012","A0013","AB712","AB712","AB702","AB328","AC314","AC728","AB702","AB712","AC742","AC919"),
                 ID4 = c(NA,"A0013","A0013","A0013","AB982","AB982",NA,"AB982","A0013","A0012","A0012","A0012","A0012",NA,"A0013","A0012"),
                 ID5 =c(NA,"A0012","AB012","AB012",NA,"AB702",NA,"A0013",NA,"A0011","A0011",NA,"A0011",NA,NA,NA),
                 ID6 = c(NA,NA,NA,"AB982",NA,NA,NA,"A0012",NA,NA,NA,NA,NA,NA,NA,NA),
                 ID7 = c(NA,NA,NA,NA,NA,NA,NA,"A0011",NA,NA,NA,NA,NA,NA,NA,NA),
                 ID8 =c(NA,NA,NA,NA,NA,NA,NA,"AB982",NA,NA,NA,NA,NA,NA,NA,NA),
                 stringsAsFactors = FALSE)

library(tidyverse)

dupcheck <- function(x){
  ifelse( any(duplicated(x,incomparables = NA))
          ,"duplicate available"
          ,"")
}

idlist <- syms(paste0("ID",2:8)) # make it 2:5000 if you have 5000 IDs


df %>%  
    rowwise() %>% 
    mutate(check =dupcheck(c(!!!idlist))) %>% 
    ungroup() %>% 
    relocate(check,.after="Name")

i want to make it dynamically so that it can select the columns from ID1 to ncol

idlist <- syms(paste0("ID",2:(ncol(df)-2)))

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.