Thanks Orson.
probit.nll <- function (beta) {
# linear predictor
eta <- W %*% beta
# probability
p <- pnorm(eta)
# negative log-likelihood
-sum((1 - Z) * log(1 - p) + Z * log(p))
}
probit.gr <- function (beta) {
# linear predictor
eta <- W %*% beta
# probability
p <- pnorm(eta)
# chain rule
u <- dnorm(eta) * (Z - p) / (p * (1 - p))
# gradient
-crossprod(W, u)
}
fit <- optim(beta, probit.nll, gr = probit.gr, method = "BFGS", hessian = TRUE)
I found this in another post somewhere, so if anyone needs to do a probit model manually this worked for me.