I found this tutorial over here : RPubs - gaussian-process-greta
This tutorial shows you how to make a certain type of regression model ("gaussian process") between one response variable and one predictor variable. I am trying to extend the code from this tutorial so that it works between one response variable and multiple predictor variables.
From the tutorial, the following code works perfectly on my computer:
#PART 1
#load libraries
library(MASS)
library(tidyverse)
#set seed
set.seed(12345)
#create initial data
x_predict <- seq(-5,5,len=50)
l <- 1
#define functions for evaluating the covariance
SE <- function(Xi,Xj, l) exp(-0.5 * (Xi - Xj) ^ 2 / l ^ 2)
cov <- function(X, Y) outer(X, Y, SE, l)
COV <- cov(x_predict, x_predict)
#sample these functions, place them into a data frame and plot
values <- mvrnorm(200, rep(0, length=length(x_predict)), COV)
dat <- data.frame(x=x_predict, t(values)) %>%
tidyr::pivot_longer(-x, names_to = "rep", values_to = "value") %>%
mutate(rep = as.numeric(as.factor(rep)))
ggplot(dat,aes(x=x,y=value)) +
geom_line(aes(group=rep), color = rgb(0.7, 0.1, 0.4), alpha = 0.4)
#PART2
#create new data
obs <- data.frame(x = c(-4, -3, -1, 0, 2),
y = c(-2, 0, 1, 2, -1))
#repeat steps from part 1
cov_xx_inv <- solve(cov(obs$x, obs$x))
Ef <- cov(x_predict, obs$x) %*% cov_xx_inv %*% obs$y
Cf <- cov(x_predict, x_predict) - cov(x_predict, obs$x) %*% cov_xx_inv %*% cov(obs$x, x_predict)
values <- mvrnorm(200, Ef, Cf)
dat <- data.frame(x=x_predict, t(values)) %>%
tidyr::pivot_longer(-x, names_to = "rep", values_to = "value") %>%
mutate(rep = as.numeric(as.factor(rep)))
gp <- data.frame(x = x_predict, Ef = Ef, sigma = 2*sqrt(diag(Cf)) )
ggplot(dat,aes(x=x,y=value)) +
geom_line(aes(group=rep), color = rgb(0.7, 0.1, 0.4), alpha = 0.2) + #REPLICATES
geom_ribbon(data = gp,
aes(x,
y = Ef,
ymin = Ef - sigma,
ymax = Ef + sigma),
fill="grey", alpha = 0.4) +
geom_line(dat = gp, aes(x=x,y=Ef), size=1) + #MEAN
geom_point(data=obs,aes(x=x,y=y)) + #OBSERVED DATA
scale_y_continuous(lim=c(-3,3), name="output, f(x)") +
xlab("input, x")
Now, I am trying to modify this code so that it works for 3 predictor variables and 1 response variable (in the end, there should be 3 graphs : response vs var1, response vs var2, response vs var3).
I tried to create some new data:
x_predict_1 <- seq(-5,5,len=50)
x_predict_2 <- seq(-6,6,len=50)
x_predict_3 <- seq(-7,7,len=50)
l <- 1
x_predict <- data.frame(x_predict_1, x_predict_2, x_predict_3 )
COV <- cov(x_predict, x_predict)
But this produces the following error:
Error in Xi - Xj : non-numeric argument to binary operator
This error is preventing me from creating the "values" and the "dat" objects from part 1, and I can not create the desired graphs (e.g. x_predict_1 vs values and x_predict_2 vs values). This is also preventing me from creating the desired graphs in part 2:
#part 1
#sample these functions, place them into a data frame and plot
values <- mvrnorm(200, rep(0, length=length(x_predict)), COV)
dat <- data.frame(x=x_predict, t(values)) %>%
tidyr::pivot_longer(-x, names_to = "rep", values_to = "value") %>%
mutate(rep = as.numeric(as.factor(rep)))
ggplot(dat,aes(x=x,y=value)) +
geom_line(aes(group=rep), color = rgb(0.7, 0.1, 0.4), alpha = 0.4)
#PART2
#create new data
obs <- data.frame(x = c(-4, -3, -1, 0, 2),
y = c(-2, 0, 1, 2, -1),
z = c(-3, -1, -1, 2, 2))
#repeat steps from part 1 (I am not sure if this is correct)
cov_xx_inv <- solve(cov(obs$x, obs$x))
Ef <- cov(x_predict, obs$x) %*% cov_xx_inv %*% obs$y
Cf <- cov(x_predict, x_predict) - cov(x_predict, obs$x) %*% cov_xx_inv %*% cov(obs$x, x_predict)
cov_yy_inv <- solve(cov(obs$y, obs$y))
Ef_1 <- cov(y_predict, obs$y) %*% cov_yy_inv %*% obs$y
Cf_1 <- cov(y_predict, y_predict) - cov(y_predict, obs$y) %*% cov_yy_inv %*% cov(obs$y, y_predict)
values <- mvrnorm(200, Ef, Cf, Ef1, Cf1)
dat <- data.frame(x=x_predict, t(values)) %>%
tidyr::pivot_longer(-x, names_to = "rep", values_to = "value") %>%
mutate(rep = as.numeric(as.factor(rep)))
gp <- data.frame(x = x_predict, Ef = Ef, sigma = 2*sqrt(diag(Cf)) )
ggplot(dat,aes(x=x,y=value)) +
geom_line(aes(group=rep), color = rgb(0.7, 0.1, 0.4), alpha = 0.2) + #REPLICATES
geom_ribbon(data = gp,
aes(x,
y = Ef,
ymin = Ef - sigma,
ymax = Ef + sigma),
fill="grey", alpha = 0.4) +
geom_line(dat = gp, aes(x=x,y=Ef), size=1) + #MEAN
geom_point(data=obs,aes(x=x,y=y)) + #OBSERVED DATA
scale_y_continuous(lim=c(-3,3), name="output, f(x)") +
xlab("input, x")
Can someone please show me what I am doing wrong and how to fix this?
Thanks