Here's an approach that merges nested data.frames to avoid duplicating data, then leveraging findInterval to subset the labels. It still uses map2, but here it only iterates over the groups—the findInterval call for each is vectorized—and thus should still be reasonably efficient.
library(tidyverse)
cars <- mtcars %>%
rownames_to_column('car') %>%
select(1:3) %>%
as_data_frame()
mpg_label <- data_frame(
cyl_l = rep(c(4, 6, 8), each = 3),
label_l = rep(c("bad", "medium", "good"), 3),
mpg_l = c(25, 28, Inf, 18, 20, Inf, 15, 18, Inf)
)
inner_join(nest(cars, -cyl),
nest(mpg_label, -cyl_l),
by = c('cyl' = 'cyl_l')) %>%
mutate(label = map2(data.x, data.y,
~.y$label_l[findInterval(.x$mpg, c(0, .y$mpg_l))])) %>% # intervals need a left bound
unnest(data.x, label)
#> # A tibble: 32 x 4
#> cyl label car mpg
#> <dbl> <chr> <chr> <dbl>
#> 1 6 good Mazda RX4 21.0
#> 2 6 good Mazda RX4 Wag 21.0
#> 3 6 good Hornet 4 Drive 21.4
#> 4 6 medium Valiant 18.1
#> 5 6 medium Merc 280 19.2
#> 6 6 bad Merc 280C 17.8
#> 7 6 medium Ferrari Dino 19.7
#> 8 4 bad Datsun 710 22.8
#> 9 4 bad Merc 240D 24.4
#> 10 4 bad Merc 230 22.8
#> # ... with 22 more rows
That sounds a lot like data.table non-equi joins, e.g.
library(data.table)
cars <- mtcars[1:2]
cars$car <- rownames(cars)
setDT(cars)
mpg_label <- data.table(
cyl_l = rep(c(4, 6, 8), each = 3),
label_l = rep(c("bad", "medium", "good"), 3),
mpg_l = c(25, 28, Inf, 18, 20, Inf, 15, 18, Inf)
)
mpg_label[cars, on = .(cyl_l = cyl, mpg_l >= mpg), mult = 'first']
#> cyl_l label_l mpg_l car
#> 1: 6 good 21.0 Mazda RX4
#> 2: 6 good 21.0 Mazda RX4 Wag
#> 3: 4 bad 22.8 Datsun 710
#> 4: 6 good 21.4 Hornet 4 Drive
#> 5: 8 good 18.7 Hornet Sportabout
#> 6: 6 medium 18.1 Valiant
#> 7: 8 bad 14.3 Duster 360
#> 8: 4 bad 24.4 Merc 240D
#> 9: 4 bad 22.8 Merc 230
#> 10: 6 medium 19.2 Merc 280
#> 11: 6 bad 17.8 Merc 280C
#> 12: 8 medium 16.4 Merc 450SE
#> 13: 8 medium 17.3 Merc 450SL
#> 14: 8 medium 15.2 Merc 450SLC
#> 15: 8 bad 10.4 Cadillac Fleetwood
#> 16: 8 bad 10.4 Lincoln Continental
#> 17: 8 bad 14.7 Chrysler Imperial
#> 18: 4 good 32.4 Fiat 128
#> 19: 4 good 30.4 Honda Civic
#> 20: 4 good 33.9 Toyota Corolla
#> 21: 4 bad 21.5 Toyota Corona
#> 22: 8 medium 15.5 Dodge Challenger
#> 23: 8 medium 15.2 AMC Javelin
#> 24: 8 bad 13.3 Camaro Z28
#> 25: 8 good 19.2 Pontiac Firebird
#> 26: 4 medium 27.3 Fiat X1-9
#> 27: 4 medium 26.0 Porsche 914-2
#> 28: 4 good 30.4 Lotus Europa
#> 29: 8 medium 15.8 Ford Pantera L
#> 30: 6 medium 19.7 Ferrari Dino
#> 31: 8 bad 15.0 Maserati Bora
#> 32: 4 bad 21.4 Volvo 142E
#> cyl_l label_l mpg_l car
Support for non-equi joins in dplyr has been teased, but hasn't been mentioned in a while.