validate two dataframes have same records based on two column

I am trying to validate two data frame records and if they are not same then mutate new column with text.

so i want to check if ID and city is same in both data frame and also ID and name is same in both data frame .

city is lower case and upper case so i want to convert the same on lower case and then validate.

df <- data.frame(ID =c("DEV2962","KTN2252","ANA2719","ITI2624","DEV2698","HRT2921","","KTN2624","ANA2548","ITI2535","DEV2732","HRT2837","ERV2951","KTN2542","ANA2813","ITI2210"),
                 city=c("DEL","mum","DEL","MUM","DEL","del","MUM","DEL","del","MUM","mum","mum","mum","mum","DEL","DEL"),
                 Name= c("dev,akash","singh,rahul","abbas,salman","lal,ram","singh,nkunj","garg,prabal","ali,sanu","singh,kunal","tomar,lakhan","thakur,praveen","ali,sarman","khan,zuber","singh,giriraj","sharma,lokesh","pawar,pooja","sharma,nikita"))


df1 <- data.frame(ID =c("DEV2962","KTN2251","ANA2719","ITI2624","DEV2698","HRT2923","","KTN2624","ANA2548","ITI2535","DEV2732","HRT2837","ERV2951","KTN2542","ANA2813","ITI2210"),
                  city=c("DEL","mum","DEL","MUM","DEL","del","MUM","DEL","del","MUM","mum","mum","mum","mum","DEL","DEL"),
                  Name= c("dev,akash","singh,rahul","abbas,salman","lal","singh,nkunj","garg","ali,sanu","singh,kunal","tomar,lakhan","thakur,praveen","ali,sarman","khan,zuber","singh,giriraj","sharma,lokesh","pawar,pooja","sharma,nikita"))



df[[ID_avail]] <- factor(as.integer(!df[[ID]] %in% df1[[ID]]))

df[[ID_Name]] <- factor(as.integer(!df[[Name]] %in% df1[[Name]]))

The output should be like

ID city Name ID_not same Name_not_same
DEV2962 DEL dev,akash
KTN2252 mum singh,rahul not same
ANA2719 DEL abbas,salman
ITI2624 MUM lal,ram not same
DEV2698 DEL singh,nkunj
HRT2921 del garg,prabal not same not same
MUM
KTN2624 DEL singh,kunal
ANA2548 del tomar,lakhan
ITI2535 MUM thakur,praveen
DEV2732 mum ali,sarman
HRT2837 mum khan,zuber
ERV2951 mum singh,giriraj
KTN2542 mum sharma,lokesh
ANA2813 DEL pawar,pooja
ITI2210 MUM sharma,nikita not same not same

Hi @shoaibali,
If you know that both dataframes are in the correct row-order for comparison, then the following should do what you need, by using an introduced rec_num column for matching:

suppressPackageStartupMessages(library(tidyverse))

df <- data.frame(ID =c("DEV2962","KTN2252","ANA2719","ITI2624","DEV2698","HRT2921","","KTN2624","ANA2548","ITI2535","DEV2732","HRT2837","ERV2951","KTN2542","ANA2813","ITI2210"),
                 city=c("DEL","mum","DEL","MUM","DEL","del","MUM","DEL","del","MUM","mum","mum","mum","mum","DEL","DEL"),
                 Name= c("dev,akash","singh,rahul","abbas,salman","lal,ram","singh,nkunj","garg,prabal","ali,sanu","singh,kunal","tomar,lakhan","thakur,praveen","ali,sarman","khan,zuber","singh,giriraj","sharma,lokesh","pawar,pooja","sharma,nikita"))


df1 <- data.frame(ID =c("DEV2962","KTN2251","ANA2719","ITI2624","DEV2698","HRT2923","","KTN2624","ANA2548","ITI2535","DEV2732","HRT2837","ERV2951","KTN2542","ANA2813","ITI2210"),
                  city=c("DEL","mum","DEL","MUM","DEL","del","MUM","DEL","del","MUM","mum","mum","mum","mum","DEL","DEL"),
                  Name= c("dev,akash","singh,rahul","abbas,salman","lal","singh,nkunj","garg","ali,sanu","singh,kunal","tomar,lakhan","thakur,praveen","ali,sarman","khan,zuber","singh,giriraj","sharma,lokesh","pawar,pooja","sharma,nikita"))

dim(df)
#> [1] 16  3
dim(df1)
#> [1] 16  3

# Add record number to each dataframe
df$rec_num <- seq(1:length(df$ID))
df1$rec_num <- seq(1:length(df1$ID))

# Make sure all city abbreviations are uppercase
df$city <- toupper(df$city)
df1$city <- toupper(df$city)

head(df)
#>        ID city         Name rec_num
#> 1 DEV2962  DEL    dev,akash       1
#> 2 KTN2252  MUM  singh,rahul       2
#> 3 ANA2719  DEL abbas,salman       3
#> 4 ITI2624  MUM      lal,ram       4
#> 5 DEV2698  DEL  singh,nkunj       5
#> 6 HRT2921  DEL  garg,prabal       6
head(df1)
#>        ID city         Name rec_num
#> 1 DEV2962  DEL    dev,akash       1
#> 2 KTN2251  MUM  singh,rahul       2
#> 3 ANA2719  DEL abbas,salman       3
#> 4 ITI2624  MUM          lal       4
#> 5 DEV2698  DEL  singh,nkunj       5
#> 6 HRT2923  DEL         garg       6

full <- full_join(df, df1, by="rec_num")

full %>% 
  mutate(id_not_same = ifelse(ID.x != ID.y, "not same", ""),
         city_not_same = ifelse(city.x != city.y, "not same", ""),
         Name_not_same = ifelse(Name.x != Name.y, "not same", "")) -> full
full
#>       ID.x city.x         Name.x rec_num    ID.y city.y         Name.y
#> 1  DEV2962    DEL      dev,akash       1 DEV2962    DEL      dev,akash
#> 2  KTN2252    MUM    singh,rahul       2 KTN2251    MUM    singh,rahul
#> 3  ANA2719    DEL   abbas,salman       3 ANA2719    DEL   abbas,salman
#> 4  ITI2624    MUM        lal,ram       4 ITI2624    MUM            lal
#> 5  DEV2698    DEL    singh,nkunj       5 DEV2698    DEL    singh,nkunj
#> 6  HRT2921    DEL    garg,prabal       6 HRT2923    DEL           garg
#> 7             MUM       ali,sanu       7            MUM       ali,sanu
#> 8  KTN2624    DEL    singh,kunal       8 KTN2624    DEL    singh,kunal
#> 9  ANA2548    DEL   tomar,lakhan       9 ANA2548    DEL   tomar,lakhan
#> 10 ITI2535    MUM thakur,praveen      10 ITI2535    MUM thakur,praveen
#> 11 DEV2732    MUM     ali,sarman      11 DEV2732    MUM     ali,sarman
#> 12 HRT2837    MUM     khan,zuber      12 HRT2837    MUM     khan,zuber
#> 13 ERV2951    MUM  singh,giriraj      13 ERV2951    MUM  singh,giriraj
#> 14 KTN2542    MUM  sharma,lokesh      14 KTN2542    MUM  sharma,lokesh
#> 15 ANA2813    DEL    pawar,pooja      15 ANA2813    DEL    pawar,pooja
#> 16 ITI2210    DEL  sharma,nikita      16 ITI2210    DEL  sharma,nikita
#>    id_not_same city_not_same Name_not_same
#> 1                                         
#> 2     not same                            
#> 3                                         
#> 4                                 not same
#> 5                                         
#> 6     not same                    not same
#> 7                                         
#> 8                                         
#> 9                                         
#> 10                                        
#> 11                                        
#> 12                                        
#> 13                                        
#> 14                                        
#> 15                                        
#> 16

# Remove any unwanted columns in output
full %>% 
  select(-c(4:7))
#>       ID.x city.x         Name.x id_not_same city_not_same Name_not_same
#> 1  DEV2962    DEL      dev,akash                                        
#> 2  KTN2252    MUM    singh,rahul    not same                            
#> 3  ANA2719    DEL   abbas,salman                                        
#> 4  ITI2624    MUM        lal,ram                                not same
#> 5  DEV2698    DEL    singh,nkunj                                        
#> 6  HRT2921    DEL    garg,prabal    not same                    not same
#> 7             MUM       ali,sanu                                        
#> 8  KTN2624    DEL    singh,kunal                                        
#> 9  ANA2548    DEL   tomar,lakhan                                        
#> 10 ITI2535    MUM thakur,praveen                                        
#> 11 DEV2732    MUM     ali,sarman                                        
#> 12 HRT2837    MUM     khan,zuber                                        
#> 13 ERV2951    MUM  singh,giriraj                                        
#> 14 KTN2542    MUM  sharma,lokesh                                        
#> 15 ANA2813    DEL    pawar,pooja                                        
#> 16 ITI2210    DEL  sharma,nikita

Created on 2021-06-16 by the reprex package (v2.0.0)

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.