validation column of two data frame

I have two dataframe, df8, df9, now i am checking if "name "column in df8 is blank or space or NA then mutate blank_name as 1, else 0 but i don't want to change the original "Name" column for city in (CY) . i am trying the below way but doesn't work for me.

also i want to check if id of df8 present in df9 then i want to check if name is consistant with name in df9

df9 <- data.frame(id=c(3109,2357,4339,8927,9143,4285,2683,8217,3702,7857,3255,4262,8501,7111,2681,6970),
                  name=c("try,xab","xab,Lan","mhy,mun","vgtu,mmc","dgsy,aaf","kull,nnhu","hula,njam","mund,jiha","htfy,ntha","","sgyu,hytb","vdti,kula","mftyu,huta","","cday,bhsue","ajtu,nudj"))
                  
df8 <- data.frame(id=c(3109,2357,4339,8927,9143,4285,2683,8217,3702,7857,3255,4262,8501,7111,2681,6970),
                  city = c("CY","NY","DA","CY","MN","GA","MN","CY","NY","DA","CY","CY","GA","CY","LA","DA"),
                  name=c("try,xab","xab,Lan","mhy,mun","vgtu,mmc","   ","kull,nnhu","hula,njam","mund,jiha","htfy,ntha",NA,"sgyu,hytb","vdti,kula","mftyu,huta","","cday,bhsue","ajtu,nudj"))



df8["blank_name"] <- 0
df8 <- df8 %>% 
  if(city == "CY"){ 
    mutate(name = trimws(name, which = "both")) %>%
      mutate(blank_name = ifelse(name == "" | is.na(name), 1, 0))
  } else {blank_name = 0}

the out put should be like

id name blank if_name_ok
3109 try,xab 0 0
2357 xab,Lan 0 0
4339 jhtu,hjui 0 1
8927 vgtu,mmc 0 0
9143 1 0
4285 kull,nnhu 0 0
2683 hula,njam 0 0
8217 mund,jiha 0 0
3702 htfy,ntha 0 0
7857 NA 1 0
3255 sgyu,hytb 0 0
4262 vdti,kula 0 0
8501 mftyu,huta 0 0
7111 1 0
2681 cday,bhsue 0 0
6970 ajtu,nudj 0 0

If() is not vectorised you would use ifelse() and it would be inside the mutate call

This may not be the most efficient way but I broke it down into many steps:

library(tidyverse)

df9 <- data.frame(id=c(3109,2357,4339,8927,9143,4285,2683,8217,3702,7857,3255,4262,8501,7111,2681,6970),
                  name=c("try,xab","xab,Lan","mhy,mun","vgtu,mmc","dgsy,aaf","kull,nnhu","hula,njam","mund,jiha","htfy,ntha","","sgyu,hytb","vdti,kula","mftyu,huta","","cday,bhsue","ajtu,nudj"))


df8 <- data.frame(id=c(3109,2357,4339,8927,9143,4285,2683,8217,3702,7857,3255,4262,8501,7111,2681,6970),
                  city = c("CY","NY","DA","CY","MN","GA","MN","CY","NY","DA","CY","CY","GA","CY","LA","DA"),
                  name=c("try,xab","xab,Lan","mhy,mun","vgtu,mmc","   ","kull,nnhu","hula,njam","mund,jiha","htfy,ntha",NA,"sgyu,hytb","vdti,kula","mftyu,huta","","cday,bhsue","ajtu,nudj"))

df8_update <- df8 %>%
  mutate(
    name=if_else(city=="CY", name, str_trim(name)),
    blank = case_when(
      is.na(name)~1,
      str_length(name)==0~1,
      TRUE~0
    )
  )

df9 %>%
  rename(name.9=name) %>%
  right_join(df8_update, by="id") %>%
  mutate(if_name_ok= case_when(
    is.na(name) & is.na(name.9)~ 0,
    is.na(name) & blank==1 ~0,
    name == name.9 ~0,
    name != name.9~1,
    TRUE ~ NA_real_
  )) 
#>      id     name.9 city       name blank if_name_ok
#> 1  3109    try,xab   CY    try,xab     0          0
#> 2  2357    xab,Lan   NY    xab,Lan     0          0
#> 3  4339    mhy,mun   DA    mhy,mun     0          0
#> 4  8927   vgtu,mmc   CY   vgtu,mmc     0          0
#> 5  9143   dgsy,aaf   MN                1          1
#> 6  4285  kull,nnhu   GA  kull,nnhu     0          0
#> 7  2683  hula,njam   MN  hula,njam     0          0
#> 8  8217  mund,jiha   CY  mund,jiha     0          0
#> 9  3702  htfy,ntha   NY  htfy,ntha     0          0
#> 10 7857              DA       <NA>     1          0
#> 11 3255  sgyu,hytb   CY  sgyu,hytb     0          0
#> 12 4262  vdti,kula   CY  vdti,kula     0          0
#> 13 8501 mftyu,huta   GA mftyu,huta     0          0
#> 14 7111              CY                1          0
#> 15 2681 cday,bhsue   LA cday,bhsue     0          0
#> 16 6970  ajtu,nudj   DA  ajtu,nudj     0          0

Created on 2020-09-17 by the reprex package (v0.3.0)

actually , i want to keep the names of column as it is, don't want to change the original database
thanks it works, but if there is NA in the name of df8, if_name_ok should also be 1
what should i change here for this,

also i am using right_join(df8, by=c("ID1"="ID2")) will it work

also please make me understand name.9 why we are using .9 here

I've updated the code below. You can't use right_join(df8, by=c("ID1"="ID2")) because there is no variable called ID1 (df9) or ID2 (df8) only a variable called ID.

I rename the name variable of df9 to be name.9 just to remind us it comes from df9.

library(tidyverse)

df9 <- data.frame(id=c(3109,2357,4339,8927,9143,4285,2683,8217,3702,7857,3255,4262,8501,7111,2681,6970),
                  name=c("try,xab","xab,Lan","mhy,mun","vgtu,mmc","dgsy,aaf","kull,nnhu","hula,njam","mund,jiha","htfy,ntha","","sgyu,hytb","vdti,kula","mftyu,huta","","cday,bhsue","ajtu,nudj"))


df8 <- data.frame(id=c(3109,2357,4339,8927,9143,4285,2683,8217,3702,7857,3255,4262,8501,7111,2681,6970),
                  city = c("CY","NY","DA","CY","MN","GA","MN","CY","NY","DA","CY","CY","GA","CY","LA","DA"),
                  name=c("try,xab","xab,Lan","mhy,mun","vgtu,mmc","   ","kull,nnhu","hula,njam","mund,jiha","htfy,ntha",NA,"sgyu,hytb","vdti,kula","mftyu,huta","","cday,bhsue","ajtu,nudj"))

df8_update <- df8 %>%
  mutate(
    name=if_else(city=="CY", name, str_trim(name)),
    blank = case_when(
      is.na(name)~1,
      str_length(name)==0~1,
      TRUE~0
    )
  )

df9 %>%
  rename(name.9=name) %>% # rename the name variable on df9 to be df.9. Using 9 to remind us it comes from df9
  right_join(df8_update, by="id") %>%
  mutate(if_name_ok= case_when(
    is.na(name) ~ 1, # NA in name of df8 then if_name_ok is 1
    name == name.9 ~0,
    name != name.9~1,
    TRUE ~ NA_real_
  )) 
#>      id     name.9 city       name blank if_name_ok
#> 1  3109    try,xab   CY    try,xab     0          0
#> 2  2357    xab,Lan   NY    xab,Lan     0          0
#> 3  4339    mhy,mun   DA    mhy,mun     0          0
#> 4  8927   vgtu,mmc   CY   vgtu,mmc     0          0
#> 5  9143   dgsy,aaf   MN                1          1
#> 6  4285  kull,nnhu   GA  kull,nnhu     0          0
#> 7  2683  hula,njam   MN  hula,njam     0          0
#> 8  8217  mund,jiha   CY  mund,jiha     0          0
#> 9  3702  htfy,ntha   NY  htfy,ntha     0          0
#> 10 7857              DA       <NA>     1          1
#> 11 3255  sgyu,hytb   CY  sgyu,hytb     0          0
#> 12 4262  vdti,kula   CY  vdti,kula     0          0
#> 13 8501 mftyu,huta   GA mftyu,huta     0          0
#> 14 7111              CY                1          0
#> 15 2681 cday,bhsue   LA cday,bhsue     0          0
#> 16 6970  ajtu,nudj   DA  ajtu,nudj     0          0

Created on 2020-09-18 by the reprex package (v0.3.0)

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.