updating dataframe on the basis on text in one column

I want to transform the data frame on the basis on column house. so if the columns house have text seprated by ";" the add the text in new row. and also add the new column with head1, head2

library(dplyr)
library(tidyr)

df <-  data.frame(Region = c("AU","USA","CA","UK","GE","AU","USA","CA","UK"),
                  lock = c(1,1,NA,1,NA,1,NA,1,NA),
                  type= c("sale",NA,NA,"target","target",NA,"sale",NA,"target"),
                  House =c("Tagore house","Gandhi house",NA,"Flexible;Tagore house;Gandhi house","Tagore house",NA,"Flexible;Gandhi house","Gandhi house","Gandhi house"))

df %>%
  mutate(House = strsplit(House, ";"),
         head= head) %>%
  unnest(House)

the output should look like

image

 library(tidyverse)
df <-  tibble(Region = c("AU","USA","CA","UK","GE","AU","USA","CA","UK"),
                  lock = c(1,1,NA,1,NA,1,NA,1,NA),
                  type= c("sale",NA,NA,"target","target",NA,"sale",NA,"target"),
                  House =c("Tagore house","Gandhi house",NA,"Flexible;Tagore house;Gandhi house","Tagore house",NA,"Flexible;Gandhi house","Gandhi house","Gandhi house"))

(step_1 <- df %>% mutate(g_l=replace_na(str_detect(House,
                                  ";"),0),
              g_num = if_else(g_l,cumsum(g_l),0)) |>
  separate_longer_delim(cols="House",delim=";") |> group_by(g_num) |>
  mutate(rn1=if_else(g_num==0,0,row_number()),
         rn = replace_na(if_else(dplyr::lead(rn1)==0,0,rn1),0),
         head = if_else(rn>0,paste0("head",rn),"")))

(fin <- ungroup(step_1) |> select(Region,lock,type,House,head))

but i dont want to change the column region, lock, type , want to keep them as it is

oh I didnt even notice. but its easy enough;
add

Region = if_else(rn>0,"",Region)

to the last mutate in the code to blank region for the new stuff we made, repeat this approach for lock and type

step_1 <- df %>% mutate(g_l=replace_na(str_detect(House,
";"),0),
g_num = if_else(g_l,cumsum(g_l),0)) |>
separate_longer_delim(cols="House",delim=";") |> group_by(g_num) |>
mutate(rn1=if_else(g_num==0,0,row_number(),),
rn = replace_na(if_else(dplyr::lead(rn1)==0,0,rn1),0),
head = if_else(rn>0,paste0("head",rn),""),
Region = if_else(rn>0,"",Region,lock=ifelse(rn>0,"",lock)))

seems like i am doing something wrong

Yes, an dplyr::if_else should only have 2 commas separating the 3 clauses, of what to test, what to do if succeed and what to do if fail. you didnt close region and layered lock into it.
Also lock is numeric and not character so can not be an empty string and preserve numeric, the closest thing would be NA I suppose.


step_1 <- df %>%
  mutate(
    g_l = replace_na(str_detect(House,";"), 0),
    g_num = if_else(g_l, cumsum(g_l), 0)
  ) |>
  separate_longer_delim(cols = "House", delim = ";") |>
  group_by(g_num) |>
  mutate(
    rn1 = if_else(g_num == 0, 0, row_number(), ),
    rn = replace_na(if_else(dplyr::lead(rn1) == 0, 0, rn1), 0),
    head = if_else(rn > 0, paste0("head", rn), ""),
    Region = if_else(rn > 0, "", Region),
    lock = if_else(rn > 0, NA, lock))

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.