if city name is same for unique ID in two dataframe

I want to check if city name is same for all ID in df and df2 . if city name is different then mutate new column different city name and show as "different name"

I want to do two things :::

  1. I want to check if id is present in df and df2 then check if city is same so if city is same then mutate df as different city name and show as differente city name.

  2. if ID is present in df2 but not present in df then pull those ID as output.

df <- data.frame(ID =c("DEV2962","KTN2252","ITI2624","DEV2698","HRT2921","","KTN2624","ANA2548","ITI2535","DEV2732","HRT2837","ERV2951","KTN2542","ANA2813","ITI2210","KTN2542"),
                 city=c("DEL","mum","MUM","DEL","del","MUM","DEL","del","MUM","mum","mum","mum","mum","DEL","DEL","mum"),
                 Name= c("dev,akash","singh,rahul","lal,ram","singh,nkunj","garg,prabal","ali,sanu","singh,kunal","tomar,lakhan","thakur,praveen","ali,sarman","khan,zuber","singh,giriraj","sharma,lokesh","pawar,pooja","sharma,nikita","shivam"))


df2 <- data.frame(ID =c("DEV2962","KTN2251","ANA2719","ITI2624","DEV2698","HRT2923","","KTN2624","ANA2548","ITI2535","DEV2732","HRT2837","ERV2951","KTN2542","ANA2813","ITI2210"),
                  city=c("DEL","mum","DEL","MUM","DEL","del","MUM","DEL","del","MUM","mum","mum","mum","mum","DEL","DEL"),
                  Name= c("dev,akash","singh,rahul","abbas,salman","lal","singh,nkunj","garg","ali,sanu","singh,kunal","tomar,lakhan","thakur,praveen","ali,sarman","khan,zuber","singh,giriraj","sharma,lokesh","pawar,pooja","sharma,nikita"))

anti_join(df2, df)
anti_join(df, df2)

When you're comparing information in two data frames, typically you want to make a join, and use the join to populate columns that provide the answers you need.

library(tidyverse)

df <- data.frame(ID =c("DEV2962","KTN2252","ITI2624","DEV2698","HRT2921","","KTN2624","ANA2548","ITI2535","DEV2732","HRT2837","ERV2951","KTN2542","ANA2813","ITI2210","KTN2542"),
                 city=c("DEL","mum","MUM","DEL","del","MUM","DEL","del","MUM","mum","mum","mum","mum","DEL","DEL","mum"),
                 Name= c("dev,akash","singh,rahul","lal,ram","singh,nkunj","garg,prabal","ali,sanu","singh,kunal","tomar,lakhan","thakur,praveen","ali,sarman","khan,zuber","singh,giriraj","sharma,lokesh","pawar,pooja","sharma,nikita","shivam"))


df2 <- data.frame(ID =c("DEV2962","KTN2251","ANA2719","ITI2624","DEV2698","HRT2923","","KTN2624","ANA2548","ITI2535","DEV2732","HRT2837","ERV2951","KTN2542","ANA2813","ITI2210"),
                  city=c("DEL","mum","DEL","MUM","DEL","del","MUM","DEL","del","MUM","mum","mum","mum","mum","DEL","DEL"),
                  Name= c("dev,akash","singh,rahul","abbas,salman","lal","singh,nkunj","garg","ali,sanu","singh,kunal","tomar,lakhan","thakur,praveen","ali,sarman","khan,zuber","singh,giriraj","sharma,lokesh","pawar,pooja","sharma,nikita"))

dfj <- full_join(df, df2, by = "ID", suffix = c("1", "2"), keep = TRUE) %>%
  as_tibble() %>%
  mutate(same_city = (city1 == city2) %>% replace_na(FALSE),
         missing2 = is.na(ID2)) 
dfj
#> # A tibble: 19 x 8
#>    ID1       city1 Name1         ID2       city2 Name2        same_city missing2
#>    <chr>     <chr> <chr>         <chr>     <chr> <chr>        <lgl>     <lgl>   
#>  1 "DEV2962" DEL   dev,akash     "DEV2962" DEL   dev,akash    TRUE      FALSE   
#>  2 "KTN2252" mum   singh,rahul    <NA>     <NA>  <NA>         FALSE     TRUE    
#>  3 "ITI2624" MUM   lal,ram       "ITI2624" MUM   lal          TRUE      FALSE   
#>  4 "DEV2698" DEL   singh,nkunj   "DEV2698" DEL   singh,nkunj  TRUE      FALSE   
#>  5 "HRT2921" del   garg,prabal    <NA>     <NA>  <NA>         FALSE     TRUE    
#>  6 ""        MUM   ali,sanu      ""        MUM   ali,sanu     TRUE      FALSE   
#>  7 "KTN2624" DEL   singh,kunal   "KTN2624" DEL   singh,kunal  TRUE      FALSE   
#>  8 "ANA2548" del   tomar,lakhan  "ANA2548" del   tomar,lakhan TRUE      FALSE   
#>  9 "ITI2535" MUM   thakur,prave~ "ITI2535" MUM   thakur,prav~ TRUE      FALSE   
#> 10 "DEV2732" mum   ali,sarman    "DEV2732" mum   ali,sarman   TRUE      FALSE   
#> 11 "HRT2837" mum   khan,zuber    "HRT2837" mum   khan,zuber   TRUE      FALSE   
#> 12 "ERV2951" mum   singh,giriraj "ERV2951" mum   singh,girir~ TRUE      FALSE   
#> 13 "KTN2542" mum   sharma,lokesh "KTN2542" mum   sharma,loke~ TRUE      FALSE   
#> 14 "ANA2813" DEL   pawar,pooja   "ANA2813" DEL   pawar,pooja  TRUE      FALSE   
#> 15 "ITI2210" DEL   sharma,nikita "ITI2210" DEL   sharma,niki~ TRUE      FALSE   
#> 16 "KTN2542" mum   shivam        "KTN2542" mum   sharma,loke~ TRUE      FALSE   
#> 17  <NA>     <NA>  <NA>          "KTN2251" mum   singh,rahul  FALSE     FALSE   
#> 18  <NA>     <NA>  <NA>          "ANA2719" DEL   abbas,salman FALSE     FALSE   
#> 19  <NA>     <NA>  <NA>          "HRT2923" del   garg         FALSE     FALSE

# not the same city
dfj %>% filter(!same_city)
#> # A tibble: 5 x 8
#>   ID1     city1 Name1       ID2     city2 Name2        same_city missing2
#>   <chr>   <chr> <chr>       <chr>   <chr> <chr>        <lgl>     <lgl>   
#> 1 KTN2252 mum   singh,rahul <NA>    <NA>  <NA>         FALSE     TRUE    
#> 2 HRT2921 del   garg,prabal <NA>    <NA>  <NA>         FALSE     TRUE    
#> 3 <NA>    <NA>  <NA>        KTN2251 mum   singh,rahul  FALSE     FALSE   
#> 4 <NA>    <NA>  <NA>        ANA2719 DEL   abbas,salman FALSE     FALSE   
#> 5 <NA>    <NA>  <NA>        HRT2923 del   garg         FALSE     FALSE

# missing in 2
dfj %>% filter(missing2)
#> # A tibble: 2 x 8
#>   ID1     city1 Name1       ID2   city2 Name2 same_city missing2
#>   <chr>   <chr> <chr>       <chr> <chr> <chr> <lgl>     <lgl>   
#> 1 KTN2252 mum   singh,rahul <NA>  <NA>  <NA>  FALSE     TRUE    
#> 2 HRT2921 del   garg,prabal <NA>  <NA>  <NA>  FALSE     TRUE

Created on 2021-06-19 by the reprex package (v1.0.0)

this is working correct , but in my original data I have many data columns so this will change the requirement. also how can I keep the or change the same names of column as it is.

the output required is just df with columns same_city and missing2

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.