Here's an example using the built-in mtcars
data frame. We use the rescale
function from the scales
package
library(tidyverse)
library(scales)
mtcars %>%
mutate_all(list(~ifelse(. < median(.), 0, rescale(., to=c(0, 11), from=c(median(.), max(.))))))
Output
mpg cyl disp hp drat wt qsec vs am gear carb
1 1.3469388 0 0.000000 0.000000 1.82591093 0.0000000 0.0000000 0 11 0 3.666667
2 1.3469388 0 0.000000 0.000000 1.82591093 0.0000000 0.0000000 0 11 0 3.666667
3 2.6938776 0 0.000000 0.000000 1.38056680 0.0000000 1.9075145 11 11 0 0.000000
4 1.6462585 0 2.461734 0.000000 0.00000000 0.0000000 3.6666667 11 0 0 0.000000
5 0.0000000 11 6.531375 2.698113 0.00000000 0.6026679 0.0000000 0 0 0 0.000000
6 0.0000000 0 1.145085 0.000000 0.00000000 0.7074798 5.3198459 11 0 0 0.000000
7 0.0000000 11 6.531375 6.330189 0.00000000 1.2839447 0.0000000 0 0 0 3.666667
8 3.8911565 0 0.000000 0.000000 0.00000000 0.0000000 4.8535645 11 0 0 0.000000
9 2.6938776 0 0.000000 0.000000 2.00404858 0.0000000 11.0000000 11 0 0 0.000000
10 0.0000000 0 0.000000 0.000000 2.00404858 0.6026679 1.2504817 11 0 0 3.666667
11 0.0000000 0 0.000000 0.000000 2.00404858 0.6026679 2.5221580 11 0 0 3.666667
12 0.0000000 11 3.171926 2.957547 0.00000000 3.9042401 0.0000000 0 0 0 1.833333
13 0.0000000 11 3.171926 2.957547 0.00000000 2.1224393 0.0000000 0 0 0 1.833333
14 0.0000000 11 3.171926 2.957547 0.00000000 2.3844688 0.6146435 0 0 0 1.833333
15 0.0000000 11 11.000000 4.254717 0.00000000 10.0881372 0.5722543 0 0 0 3.666667
16 0.0000000 11 10.521219 4.773585 0.00000000 11.0000000 0.2331407 0 0 0 3.666667
17 0.0000000 11 9.723250 5.551887 0.00000000 10.5859933 0.0000000 0 0 0 3.666667
18 9.8775510 0 0.000000 0.000000 3.42914980 0.0000000 3.7302505 11 11 0 0.000000
19 8.3809524 0 0.000000 0.000000 11.00000000 0.0000000 1.7167630 11 11 0 0.000000
20 11.0000000 0 0.000000 0.000000 4.67611336 0.0000000 4.6416185 11 11 0 0.000000
21 1.7210884 0 0.000000 0.000000 0.04453441 0.0000000 4.8747592 11 0 0 0.000000
22 0.0000000 11 4.855640 1.400943 0.00000000 1.0219152 0.0000000 0 0 0 0.000000
23 0.0000000 11 4.297062 1.400943 0.00000000 0.5764650 0.0000000 0 0 0 0.000000
24 0.0000000 11 6.132390 6.330189 0.31174089 2.6989042 0.0000000 0 0 0 3.666667
25 0.0000000 11 8.127312 2.698113 0.00000000 2.7251072 0.0000000 0 0 0 0.000000
26 6.0612245 0 0.000000 0.000000 3.42914980 0.0000000 2.5221580 11 11 0 0.000000
27 5.0884354 0 0.000000 0.000000 6.54655870 0.0000000 0.0000000 0 11 11 0.000000
28 8.3809524 0 0.000000 0.000000 0.66801619 0.0000000 0.0000000 11 11 11 0.000000
29 0.0000000 11 6.172289 7.316038 4.67611336 0.0000000 0.0000000 0 11 11 3.666667
30 0.3741497 0 0.000000 2.698113 0.00000000 0.0000000 0.0000000 0 11 11 7.333333
31 0.0000000 11 4.177367 11.000000 0.00000000 1.2839447 0.0000000 0 11 11 11.000000
32 1.6462585 0 0.000000 0.000000 3.69635628 0.0000000 1.8863198 11 11 0 0.000000
For a single vector of values, this would be:
# Fake data
set.seed(2)
x = runif(20, 0, 1)
xscaled = ifelse(x < median(x), 0, rescale(x, c(0, 11), from=c(median(x), max(x))))
You can look at what rescale
is doing by typing getAnywhere(rescale.numeric)
or scales:::rescale.numeric
in the console, which gives:
function (x, to = c(0, 1), from = range(x, na.rm = TRUE, finite = TRUE), ...) {
if (zero_range(from) || zero_range(to)) {
return(ifelse(is.na(x), NA, mean(to)))
}
(x - from[1])/diff(from) * diff(to) + to[1]
}
To implement this directly:
from = c(median(x), max(x))
to = c(0, 11)
xscaled2 = ifelse(x < median(x), 0, (x - from[1])/(from[2] - from[1]) * (to[2] - to[1]) + to[1])
from[2] - from[1]
is the same as diff(from)
, but I've used the longer version to make everything explicit.