Split string for few columns in data frame

Hey Community,

How can we split top1,2,3 columns containing string elements, separator being "( ". Although I need only elements to the left. tried separate and str_split but need your help in feeding it to dplyr.

library(stringr)
library(dplyr)
library(rebus)
library(tidyr)

df <- data.frame( AA = runif(10), AB = runif(10), AC = runif(10),
                  BA = runif(10), BB = runif(10), BC = runif(10),
                  top1 = c("AA(.9)","AB(.6)","AA(.7)","AA(.2)","AA(.2)","AA(.5)","AA(.4)","AA(.8)","AA(.7)","AC(.3)"),
                  top2 = c("BA(.9)","BC(.6)","AC(.7)","BB(.2)","AB(.2)","AB(.5)","BA(.4)","BB(.8)","AC(.7)","AB(.3)"),#"AC(.4)","AC(.8)","AC(.7)"),
                  top3 = c("BB(.9)","AA(.6)","BA(.7)","BA(.2)","BC(.2)","BA(.5)","BB(.4)","BA(.8)","BA(.7)","AB(.3)")#AB(.4)","AB(.8)","AA(.7)")
                  )
# Convert to character
df[7:9] <- lapply(df[7:9],as.character)

# Expected output for only 1 column
Although Separate works when fed individually
top3sep = separate(df, top3, c("a", "b"))
But it kills the fed column
Needed all: top3, top3sep$a and top3sep$b columns in df

I tried to add 3 more columns with just string value as in top3sep$a

Tried to use stringr and dplyr like this
df %>% mutate_at(.vars = vars(top1,top2),
                 .funs = funs(vital = str_split(., pattern = "( ", n = 2, simplify = T)))

Seems I am missing some link !!!
Could you please guide on this.

Thanks in advance

Not sure if I understand correctly but is this what you want?

library(dplyr)
library(tidyr)

df <- data.frame(stringsAsFactors = FALSE,
                 AA = runif(10), AB = runif(10), AC = runif(10),
                 BA = runif(10), BB = runif(10), BC = runif(10),
                 top1 = c("AA(.9)","AB(.6)","AA(.7)","AA(.2)","AA(.2)","AA(.5)","AA(.4)","AA(.8)","AA(.7)","AC(.3)"),
                 top2 = c("BA(.9)","BC(.6)","AC(.7)","BB(.2)","AB(.2)","AB(.5)","BA(.4)","BB(.8)","AC(.7)","AB(.3)"),
                 top3 = c("BB(.9)","AA(.6)","BA(.7)","BA(.2)","BC(.2)","BA(.5)","BB(.4)","BA(.8)","BA(.7)","AB(.3)")
)

df %>% 
    separate(top1, c("a1", "b1"),sep = "\\(|\\)", remove = FALSE, convert = TRUE) %>% 
    separate(top2, c("a2", "b2"),sep = "\\(|\\)", remove = FALSE, convert = TRUE) %>%
    separate(top3, c("a3", "b3"),sep = "\\(|\\)", remove = FALSE, convert = TRUE)

#>            AA         AB         AC        BA         BB         BC   top1
#> 1  0.30185531 0.11034818 0.85785075 0.1084743 0.56488219 0.99779461 AA(.9)
#> 2  0.19632403 0.09951909 0.02639554 0.8965831 0.76539544 0.92592199 AB(.6)
#> 3  0.72708296 0.01253129 0.74237674 0.4797273 0.37098165 0.57786245 AA(.7)
#> 4  0.98479168 0.23361551 0.35101986 0.6861378 0.27116869 0.41128713 AA(.2)
#> 5  0.98624849 0.52964390 0.55693394 0.9742142 0.73188099 0.23690660 AA(.2)
#> 6  0.11747877 0.28660234 0.46222577 0.1010138 0.25290742 0.09545162 AA(.5)
#> 7  0.63861473 0.40034127 0.28586732 0.8256375 0.15813582 0.09631863 AA(.4)
#> 8  0.79784259 0.10923271 0.01887175 0.4715794 0.08371871 0.75640180 AA(.8)
#> 9  0.06997231 0.72184796 0.34415773 0.5377777 0.32428036 0.43252057 AA(.7)
#> 10 0.87634085 0.05418213 0.90934887 0.1482573 0.37152917 0.54754731 AC(.3)
#>    a1  b1   top2 a2  b2   top3 a3  b3
#> 1  AA 0.9 BA(.9) BA 0.9 BB(.9) BB 0.9
#> 2  AB 0.6 BC(.6) BC 0.6 AA(.6) AA 0.6
#> 3  AA 0.7 AC(.7) AC 0.7 BA(.7) BA 0.7
#> 4  AA 0.2 BB(.2) BB 0.2 BA(.2) BA 0.2
#> 5  AA 0.2 AB(.2) AB 0.2 BC(.2) BC 0.2
#> 6  AA 0.5 AB(.5) AB 0.5 BA(.5) BA 0.5
#> 7  AA 0.4 BA(.4) BA 0.4 BB(.4) BB 0.4
#> 8  AA 0.8 BB(.8) BB 0.8 BA(.8) BA 0.8
#> 9  AA 0.7 AC(.7) AC 0.7 BA(.7) BA 0.7
#> 10 AC 0.3 AB(.3) AB 0.3 AB(.3) AB 0.3

Created on 2019-03-28 by the reprex package (v0.2.1)

1 Like

Perfect solution @andresrcs

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.