Calculate how many times R has remplace data

I have a data.frame called "trajectory", where I have 2 rows with the same meaning but named opposite. To put you in context, those columns represent line names (i.e. "%%EDH_WSN"). Lines can be named following one direction or the opposite. That is the reason why some lines are named one way, others the opposite way, but both represent the same line. For the example given below, one line way would be "WSN_%%EDH" and the opposite "%%EDH_WSN". I have already created a code that identify the names in the opposite direction and re-write them in another column to be as the other direction (trajectory$ID_Name_New), that means for the example: "%%EDH_WSN" == "WSN_%%EDH"; in my code, one of them have changed to be written the same way: "%%EDH_WSN" == "%%EDH_WSN". What I would like to do now is to calculate how many times the code has to change that names ("Trafico Enfrentado == "YES") and identify the ones written opposite way ("Which_Segment").
To expose better my issue I present a short reprex:
trayectory<-data.frame(stringsAsFactors=FALSE,
ID_Name = c("%%EDH_WSN", "%DIPA_PITES", "%DIPI_LADAT", "%DRSI_SITET",
"%200_BAKER", "%%WSN_EDH", "%PITES_DIPA", "%BAKER_200"),
ID_Name_New = c("%%EDH_WSN", "%DIPA_PITES", "%DIPI_LADAT", "%DRSI_SITET",
"%200_BAKER", "%%EDH_WSN", "%DIPA_PITES", "%200_BAKER")
)
solution<-data.frame(stringsAsFactors=FALSE,
ID_Name = c("%%EDH_WSN", "%DIPA_PITES", "%DIPI_LADAT",
"%DRSI_SITET", "%200_BAKER", "%%WSN_EDH",
"%PITES_DIPA", "
%BAKER_200"),
ID_Name_New = c("%%EDH_WSN", "%DIPA_PITES", "%DIPI_LADAT",
"%DRSI_SITET", "%200_BAKER", "%%EDH_WSN",
"%DIPA_PITES", "
%200_BAKER"),
Trafico_Enfrentado = c("YES", "YES", "NO", "NO", "YES", "YES", "YES", "YES"),
Which_Segment = c("%%WSN_EDH", "%PITES_DIPA", "NA", "NA",
"%BAKER_200", "%%EDH_WSN", "%DIPA_PITES",
"
%200_BAKER")
)

Thanks in advance for your help!

Thanks for including the code. I've reproduced it in reprex form using the reprex addin to RStudio, which is always helpful to peeps looking at the question.

If I have understood the question correctly, a solution is at the end

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
trayectory<-data.frame(stringsAsFactors=FALSE,
ID_Name = c("%%EDH_WSN", "%DIPA_PITES", "%DIPI_LADAT", "%DRSI_SITET",
"%200_BAKER", "%%WSN_EDH", "%PITES_DIPA", "%BAKER_200"),
ID_Name_New = c("%%EDH_WSN", "%DIPA_PITES", "%DIPI_LADAT", "%DRSI_SITET",
"%200_BAKER", "%%EDH_WSN", "%DIPA_PITES", "%200_BAKER")
)
solution<-data.frame(stringsAsFactors=FALSE,
ID_Name = c("%%EDH_WSN", "%DIPA_PITES", "%DIPI_LADAT",
"%DRSI_SITET", "%200_BAKER", "%%WSN_EDH",
"%PITES_DIPA", "%BAKER_200"),
ID_Name_New = c("%%EDH_WSN", "%DIPA_PITES", "%DIPI_LADAT",
"%DRSI_SITET", "%200_BAKER", "%%EDH_WSN",
"%DIPA_PITES", "%200_BAKER"),
Trafico_Enfrentado = c("YES", "YES", "NO", "NO", "YES", "YES", "YES", "YES"),
Which_Segment = c("%%WSN_EDH", "%PITES_DIPA", "NA", "NA",
"%BAKER_200", "%%EDH_WSN", "%DIPA_PITES",
"%200_BAKER")
)
solution %>% group_by(Trafico_Enfrentado) %>% count()
#> # A tibble: 2 x 2
#> # Groups:   Trafico_Enfrentado [2]
#>   Trafico_Enfrentado     n
#>   <chr>              <int>
#> 1 NO                     2
#> 2 YES                    6

Created on 2019-12-15 by the reprex package (v0.3.0)

First of all thank you so much for your quick response. However, code isn´t actually what I need. data.frame "solution" is what I would like R to give me (that is an excel did by hand only for the example). The issue is that I don´t know how to calculate that solution data.frame in R; that is to say, count how many times R identifies a duplicated item, in other words, times R identify a line as same line but named the opposite and so write the column "ID_Name_New". Appart from that, I would also like to know what data it has identified as duplicated, I mean, I would like the code to show me the column in the solution data.frame "which_segment" Do you think that would be possible?

1 Like

Thanks for the clarification. I was taking solution as a given. I'll take another look.

Thank you so much in advance for your help @technocrat, do not hesitate to ask me whatever you need it, perhaps I haven´t explained myself the best

1 Like

This will get you the reversals, I think and you can use the group_by in my earlier answer to get the counts

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(stringr)
trayectory<-data.frame(stringsAsFactors=FALSE,
ID_Name = c("%%EDH_WSN", "%DIPA_PITES", "%DIPI_LADAT", "%DRSI_SITET",
"%200_BAKER", "%%WSN_EDH", "%PITES_DIPA", "%BAKER_200"),
ID_Name_New = c("%%EDH_WSN", "%DIPA_PITES", "%DIPI_LADAT", "%DRSI_SITET",
"%200_BAKER", "%%EDH_WSN", "%DIPA_PITES", "%200_BAKER")
)
prefix_pattern <- "^.*_" 

solution <- trayectory %>% 
  mutate(ID_Name_prefix = str_extract(ID_Name, prefix_pattern)) %>%
  mutate(ID_Name_New_prefix = str_extract(ID_Name_New, prefix_pattern)) %>%
  mutate(Traffico_Enfrentado = ifelse(ID_Name_prefix == ID_Name_New_prefix, "YES", "NO"))
solution
#>       ID_Name ID_Name_New ID_Name_prefix ID_Name_New_prefix Traffico_Enfrentado
#> 1   %%EDH_WSN   %%EDH_WSN         %%EDH_             %%EDH_                 YES
#> 2 %DIPA_PITES %DIPA_PITES         %DIPA_             %DIPA_                 YES
#> 3 %DIPI_LADAT %DIPI_LADAT         %DIPI_             %DIPI_                 YES
#> 4 %DRSI_SITET %DRSI_SITET         %DRSI_             %DRSI_                 YES
#> 5  %200_BAKER  %200_BAKER          %200_              %200_                 YES
#> 6   %%WSN_EDH   %%EDH_WSN         %%WSN_             %%EDH_                  NO
#> 7 %PITES_DIPA %DIPA_PITES        %PITES_             %DIPA_                  NO
#> 8  %BAKER_200  %200_BAKER        %BAKER_              %200_                  NO

Created on 2019-12-16 by the reprex package (v0.3.0)

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.