In that case, I think this strategy will work:
library(tidyverse)
#> Warning: package 'ggplot2' was built under R version 4.2.2
df<-data.frame(SYMBOL= c(rep("RET",4),rep("ROS",5),rep("ALK",3)),
region = c("Promoter1","Promoter2",'intronic',"intronic","NCR","intronic","Promoter1","intronic","NCR","intronic","Promoter1","Promoter2"),
value = sample(x=1:15,size=12))
df %>%
mutate(
group = case_when(
str_detect(string = region, pattern = "Promoter") ~ "promoter",
# str_detect(string = region, pattern = "intronic") ~ "intronic",
# str_detect(string = region, pattern = "NCR") ~ "ncr",
TRUE ~ "other"
)
) %>%
group_by(
SYMBOL,
group
) %>%
summarise(
average = mean(value)
) %>%
ungroup() %>%
pivot_wider(
names_from = group,
values_from = average
) %>%
mutate(
symbol_mean = promoter / other
)
#> `summarise()` has grouped output by 'SYMBOL'. You can override using the
#> `.groups` argument.
#> # A tibble: 3 × 4
#> SYMBOL other promoter symbol_mean
#> <chr> <dbl> <dbl> <dbl>
#> 1 ALK 6 7.5 1.25
#> 2 RET 14 2.5 0.179
#> 3 ROS 7.75 1 0.129
Created on 2022-11-18 with reprex v2.0.2