comparing the names and options of two similar meta data

I have two similar meta data of approx. 1000 records and more than 500 columns . and I want to check the consistency between two data frame. now i want to create a new data frame for which it would show all column names of df1 in first row and same with columns names of df2 in second row and also their option in column 3 and 4 respectively. and then mutate new columns to show TRUE or FALSE if column names and their options are matching.

basically i have to check if column names in df1 is excact with df 2 and if options in all columns of df1 is exactly matching with df2

df1 <- data.frame(ID =c("DEV2962","KTN2252","ANA2719","ITI2624","DEV2698","HRT2921",NA,"KTN2624","ANA2548","ITI2535","DEV2732","HRT2837","ERV2951","KTN2542","ANA2813","ITI2210"),
                  city=c("del","mum","nav","pun","bang","chen","triv","vish","del","mum","bang","vish","bhop","kol","noi","gurg"),
                  Name= c("dev,akash","singh,rahul","abbas,salman","lal,ram","singh,nkunj","garg,prabal","ali,sanu","singh,kunal","tomar,lakhan","thakur,praveen","ali,sarman","khan,zuber","singh,giriraj","sharma,lokesh","pawar,pooja","sharma,nikita"),
                  gender =c("m","f","m","f","m","m","m","m","m","m","m","f","m","f","m","m"))

df2 <- data.frame(ID =c("DEV2962","KTN2152","ANA2719","ITs2624","DEV2698","HRT2921",NA,"KTN2624","ANA2548","ITI2535","DEV2732","HRT2837","ERV2951","KTN2542","ANA2813","ITI2210"),
                  city=c("del","MUm"," nav","pun","bang","chen"," ddgy ","vish","del","mum","bang","vish","bhol","nhus","huay","gurg"),
                  Name= c("dev","singh,rahul","abbas,salman","lal,ram","singh,nkunj","huna,ghalak","khan,fhalt","singh,kunal","tomar,lakhan","thakur,praveen","ali,sarman","khan,zuber","singh,giriraj","sharma,lokesh","pawar,pooja","sharma,nikita"),
                  gender =c("m","f","m","f","m","m","m","male","m","male","m","f","m","f","m","m"))


df <- select(df1,matches("^[A-Z]"))
comp <- do.call(rbind, lapply(df[, 4:ncol(df)], function(option) as.data.frame(table(option)))) 
comp$variable <- gsub("[.](.*)","", rownames(comp)) 
rownames(comp) <- NULL
comp <- comp[, c(3,1,2)] 
comp <- comp[order(-comp$Freq), ]

I am unable to attach the file but i have showed the pic below the required output should look like.

The output should be look like .

Hi @str_guru,
Using full_join() on your two dataframes allows you to line them up based on "ID" and then check each of the other column pairs to see if they are the same. In addition, it allows you to check if all the chosen column pairs are matching. I realize this is not the exact output you requested but it may get you started.

suppressPackageStartupMessages(library(tidyverse))

df1 <- data.frame(ID =c("DEV2962","KTN2252","ANA2719","ITI2624","DEV2698","HRT2921",NA,"KTN2624","ANA2548","ITI2535","DEV2732","HRT2837","ERV2951","KTN2542","ANA2813","ITI2210"),
                  city=c("del","mum","nav","pun","bang","chen","triv","vish","del","mum","bang","vish","bhop","kol","noi","gurg"),
                  Name= c("dev,akash","singh,rahul","abbas,salman","lal,ram","singh,nkunj","garg,prabal","ali,sanu","singh,kunal","tomar,lakhan","thakur,praveen","ali,sarman","khan,zuber","singh,giriraj","sharma,lokesh","pawar,pooja","sharma,nikita"),
                  gender =c("m","f","m","f","m","m","m","m","m","m","m","f","m","f","m","m"))

df2 <- data.frame(ID =c("DEV2962","KTN2152","ANA2719","ITs2624","DEV2698","HRT2921",NA,"KTN2624","ANA2548","ITI2535","DEV2732","HRT2837","ERV2951","KTN2542","ANA2813","ITI2210"),
                  city=c("del","MUm"," nav","pun","bang","chen"," ddgy","vish","del","mum","bang","vish","bhol","nhus","huay","gurg"),
                  Name= c("dev","singh,rahul","abbas,salman","lal,ram","singh,nkunj","huna,ghalak","khan,fhalt","singh,kunal","tomar,lakhan","thakur,praveen","ali,sarman","khan,zuber","singh,giriraj","sharma,lokesh","pawar,pooja","sharma,nikita"),
                  gender =c("m","f","m","f","m","m","m","male","m","male","m","f","m","f","m","m"))

df1
#>         ID city           Name gender
#> 1  DEV2962  del      dev,akash      m
#> 2  KTN2252  mum    singh,rahul      f
#> 3  ANA2719  nav   abbas,salman      m
#> 4  ITI2624  pun        lal,ram      f
#> 5  DEV2698 bang    singh,nkunj      m
#> 6  HRT2921 chen    garg,prabal      m
#> 7     <NA> triv       ali,sanu      m
#> 8  KTN2624 vish    singh,kunal      m
#> 9  ANA2548  del   tomar,lakhan      m
#> 10 ITI2535  mum thakur,praveen      m
#> 11 DEV2732 bang     ali,sarman      m
#> 12 HRT2837 vish     khan,zuber      f
#> 13 ERV2951 bhop  singh,giriraj      m
#> 14 KTN2542  kol  sharma,lokesh      f
#> 15 ANA2813  noi    pawar,pooja      m
#> 16 ITI2210 gurg  sharma,nikita      m
df2
#>         ID  city           Name gender
#> 1  DEV2962   del            dev      m
#> 2  KTN2152   MUm    singh,rahul      f
#> 3  ANA2719   nav   abbas,salman      m
#> 4  ITs2624   pun        lal,ram      f
#> 5  DEV2698  bang    singh,nkunj      m
#> 6  HRT2921  chen    huna,ghalak      m
#> 7     <NA>  ddgy     khan,fhalt      m
#> 8  KTN2624  vish    singh,kunal   male
#> 9  ANA2548   del   tomar,lakhan      m
#> 10 ITI2535   mum thakur,praveen   male
#> 11 DEV2732  bang     ali,sarman      m
#> 12 HRT2837  vish     khan,zuber      f
#> 13 ERV2951  bhol  singh,giriraj      m
#> 14 KTN2542  nhus  sharma,lokesh      f
#> 15 ANA2813  huay    pawar,pooja      m
#> 16 ITI2210  gurg  sharma,nikita      m

comp <- full_join(df1, df2, by="ID", keep=TRUE)

comp$ID_matching <- ifelse(comp$ID.x == comp$ID.y, TRUE, FALSE)
comp$name_matching <- ifelse(comp$Name.x == comp$Name.y, TRUE, FALSE)
comp$city_matching <- ifelse(comp$city.x == comp$city.y, TRUE, FALSE)
comp$gender_matching <- ifelse(comp$gender.x == comp$gender.y, TRUE, FALSE)

comp %>% 
  mutate(ALL_matching = ifelse(ID_matching & 
                                 name_matching & 
                                 city_matching & 
                                 gender_matching, TRUE, FALSE)) -> comp

comp
#>       ID.x city.x         Name.x gender.x    ID.y city.y         Name.y
#> 1  DEV2962    del      dev,akash        m DEV2962    del            dev
#> 2  KTN2252    mum    singh,rahul        f    <NA>   <NA>           <NA>
#> 3  ANA2719    nav   abbas,salman        m ANA2719    nav   abbas,salman
#> 4  ITI2624    pun        lal,ram        f    <NA>   <NA>           <NA>
#> 5  DEV2698   bang    singh,nkunj        m DEV2698   bang    singh,nkunj
#> 6  HRT2921   chen    garg,prabal        m HRT2921   chen    huna,ghalak
#> 7     <NA>   triv       ali,sanu        m    <NA>   ddgy     khan,fhalt
#> 8  KTN2624   vish    singh,kunal        m KTN2624   vish    singh,kunal
#> 9  ANA2548    del   tomar,lakhan        m ANA2548    del   tomar,lakhan
#> 10 ITI2535    mum thakur,praveen        m ITI2535    mum thakur,praveen
#> 11 DEV2732   bang     ali,sarman        m DEV2732   bang     ali,sarman
#> 12 HRT2837   vish     khan,zuber        f HRT2837   vish     khan,zuber
#> 13 ERV2951   bhop  singh,giriraj        m ERV2951   bhol  singh,giriraj
#> 14 KTN2542    kol  sharma,lokesh        f KTN2542   nhus  sharma,lokesh
#> 15 ANA2813    noi    pawar,pooja        m ANA2813   huay    pawar,pooja
#> 16 ITI2210   gurg  sharma,nikita        m ITI2210   gurg  sharma,nikita
#> 17    <NA>   <NA>           <NA>     <NA> KTN2152    MUm    singh,rahul
#> 18    <NA>   <NA>           <NA>     <NA> ITs2624    pun        lal,ram
#>    gender.y ID_matching name_matching city_matching gender_matching
#> 1         m        TRUE         FALSE          TRUE            TRUE
#> 2      <NA>          NA            NA            NA              NA
#> 3         m        TRUE          TRUE         FALSE            TRUE
#> 4      <NA>          NA            NA            NA              NA
#> 5         m        TRUE          TRUE          TRUE            TRUE
#> 6         m        TRUE         FALSE          TRUE            TRUE
#> 7         m          NA         FALSE         FALSE            TRUE
#> 8      male        TRUE          TRUE          TRUE           FALSE
#> 9         m        TRUE          TRUE          TRUE            TRUE
#> 10     male        TRUE          TRUE          TRUE           FALSE
#> 11        m        TRUE          TRUE          TRUE            TRUE
#> 12        f        TRUE          TRUE          TRUE            TRUE
#> 13        m        TRUE          TRUE         FALSE            TRUE
#> 14        f        TRUE          TRUE         FALSE            TRUE
#> 15        m        TRUE          TRUE         FALSE            TRUE
#> 16        m        TRUE          TRUE          TRUE            TRUE
#> 17        f          NA            NA            NA              NA
#> 18        f          NA            NA            NA              NA
#>    ALL_matching
#> 1         FALSE
#> 2            NA
#> 3         FALSE
#> 4            NA
#> 5          TRUE
#> 6         FALSE
#> 7         FALSE
#> 8         FALSE
#> 9          TRUE
#> 10        FALSE
#> 11         TRUE
#> 12         TRUE
#> 13        FALSE
#> 14        FALSE
#> 15        FALSE
#> 16         TRUE
#> 17           NA
#> 18           NA

Created on 2021-09-29 by the reprex package (v2.0.1)

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.