compare two rows

I need to compare two data.frames of one row each. To explain my issue I present a short reprex:
segID_1<- data.frame(stringsAsFactors=FALSE,
SegID_1 = c("*WHA_RBV", "!DHqI_*PUT", "!BXhV_!BXhW", "SIN_SUNUM",
"!Auql_KARDE", "ADUXO_DISVU", "INB_MIMTO")
)
segID_2<- data.frame(stringsAsFactors=FALSE,
SegID_2 = c("*WHA_RBV", "GHKJ_GDIU", "!BXhV_!BXhW", "*WHA_RBV",
"!BXhV_!BXhW", "ADUXO_DISVU", "GHKJ_GDIU")
)
Conflicto<- data.frame(stringsAsFactors=FALSE,
Conflicto = c("YES", "NO", "YES", "NO", "NO", "YES", "NO")
)
number_of_yes<- data.frame(
Nº.YES = c(3, NA, NA, NA, NA, NA, NA)
)
number_of_no<- data.frame(
Nº.NO = c(4, NA, NA, NA, NA, NA, NA)
)

data.frame "segID_1" and "segID_2" are the data I would like to compare row by row. Both have the same number of rows and are non-numeric data. What I would like the code to do, is to compare the names row by row of these 2 data.frames and return a solution in another column or data.frame: YES if rows are equal, NO if they are different. Finally, I would like to count the number of YES and NO. In the example given, solution of what I would like to have is shown in data.frames: "Conflicto","number_of_yes" and "number_of_no".
Thanks in advance!

Hi

You could approach this a number of ways but based on your description you could create a simple ifesle statement that evaluates the two variables and updates your third data frame. e.g.

ifelse(segID_1 == seg1D_2, Conflicto <- "YES", Conflicto <- "NO")

you wouldn't need to create the third and fourth frames either, you could then just do something like length(Conflicto[Conflicto == "YES"]) then length(Conflicto[Conflicto == "NO"])

or

Conflicto >%>
  group_by() >%>
  tally()

Another approach using dplyr()

library(dplyr)

segID_1 <- data.frame(stringsAsFactors=FALSE,
                      SegID_1 = c("*WHA_RBV", "!DHqI_*PUT", "!BXhV_!BXhW", "SIN_SUNUM",
                                  "!Auql_KARDE", "ADUXO_DISVU", "INB_MIMTO")
)
segID_2 <- data.frame(stringsAsFactors=FALSE,
                      SegID_2 = c("*WHA_RBV", "GHKJ_GDIU", "!BXhV_!BXhW", "*WHA_RBV",
                                  "!BXhV_!BXhW", "ADUXO_DISVU", "GHKJ_GDIU")
)

conflicto <- segID_1 %>% 
    bind_cols(segID_2) %>% 
    mutate(Conflicto = if_else(segID_1 == segID_2, "YES", "NO"))

conflicto
#>       SegID_1     SegID_2 Conflicto
#> 1    *WHA_RBV    *WHA_RBV       YES
#> 2  !DHqI_*PUT   GHKJ_GDIU        NO
#> 3 !BXhV_!BXhW !BXhV_!BXhW       YES
#> 4   SIN_SUNUM    *WHA_RBV        NO
#> 5 !Auql_KARDE !BXhV_!BXhW        NO
#> 6 ADUXO_DISVU ADUXO_DISVU       YES
#> 7   INB_MIMTO   GHKJ_GDIU        NO

conflicto %>% 
    count(Conflicto)
#> # A tibble: 2 x 2
#>   Conflicto     n
#>   <chr>     <int>
#> 1 NO            4
#> 2 YES           3

Created on 2019-12-06 by the reprex package (v0.3.0.9000)

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.