The ivs package was created for problems like this one. You can turn your start/end columns into a single column representing the interval. Depending on whether or not Gene_end
is included in the range or not, you may need to add 1
to it.
library(tidyverse)
library(ivs)
df <- tibble(
Gene_name=c("geneA","geneB","geneC","geneD"),
Gene_start=c(20684560,22971177,31944853,32074946),
Gene_end=c(20701216,22982551,31950382,32076793)
)
df2 <- tibble(
name = c("position1","position2","position3","position4","position5","position6"),
position =c(20684565,31944858,33076793,52076793,62076793,72076793)
)
# This assumes `Gene_end` is an exclusive boundary, i.e. you are making
# intervals like `[, )`. If it is inclusive, add `1` to `Gene_end`.
df <- df %>%
mutate(Gene_range = iv(Gene_start, Gene_end), .keep = "unused")
df
#> # A tibble: 4 × 2
#> Gene_name Gene_range
#> <chr> <iv<dbl>>
#> 1 geneA [20684560, 20701216)
#> 2 geneB [22971177, 22982551)
#> 3 geneC [31944853, 31950382)
#> 4 geneD [32074946, 32076793)
df2 <- df2 %>%
mutate(in_any_range = iv_between(position, df$Gene_range))
df2
#> # A tibble: 6 × 3
#> name position in_any_range
#> <chr> <dbl> <lgl>
#> 1 position1 20684565 TRUE
#> 2 position2 31944858 TRUE
#> 3 position3 33076793 FALSE
#> 4 position4 52076793 FALSE
#> 5 position5 62076793 FALSE
#> 6 position6 72076793 FALSE
filter(df2, in_any_range)
#> # A tibble: 2 × 3
#> name position in_any_range
#> <chr> <dbl> <lgl>
#> 1 position1 20684565 TRUE
#> 2 position2 31944858 TRUE
Created on 2023-01-06 with reprex v2.0.2