Hi there,
Why lm(y ~ x)
and lm(x ~ y)
return seemingly irrelevant coefficients? See reprex below.
suppressWarnings(library(tidyverse))
set.seed(651)
# Random line with noise
a <- rnorm(1, 2)
b <- rnorm(1, 100, sd = 50)
df <- tibble(x = 1:1000,
y = a * x + b + rnorm(1000, sd = 500))
# Linear model
(lin_mod <- lm(y ~ x, data = df))
#>
#> Call:
#> lm(formula = y ~ x, data = df)
#>
#> Coefficients:
#> (Intercept) x
#> 130.636 1.299
# With simple math we can rearrenge the formula for x
# y = a*x + b
# x = 1/a * y - (b/a)
(expected_slope <- 1/lin_mod$coefficients[[2]])
#> [1] 0.7698294
(expected_intercept <- -(lin_mod$coefficients[[1]]/lin_mod$coefficients[[2]]))
#> [1] -100.5674
# Linear model with swapped coordinates
(lin_mod_swap <- lm(x ~ y, data = df))
#>
#> Call:
#> lm(formula = x ~ y, data = df)
#>
#> Coefficients:
#> (Intercept) y
#> 281.092 0.281
# Apparently these coefficients do not match with the expected ones
lin_mod_swap$coefficients[[1]] == expected_intercept
#> [1] FALSE
lin_mod_swap$coefficients[[2]] == expected_slope
#> [1] FALSE
ggplot(df,
aes(x = y,
y = x)) +
geom_point(size = 0.5) +
geom_abline(slope = lin_mod_swap$coefficients[[2]],
intercept = lin_mod_swap$coefficients[[1]],
color = 'red') +
geom_abline(slope = expected_slope,
intercept = expected_intercept,
color = 'blue') +
theme_classic()
Created on 2023-11-13 with reprex v2.0.2