R plumber API scaling/monitoring

I have been running multiple plumber APIs at scale using docker and traefik to load balance. I hope to look at "future" for some larger jobs but most calls only take 1-3 seconds due to datatables already loaded. I'm trying to determine how I might monitor performance other than setting expected response times on each call after each code update. I've used AB load testing in the past to try and determine the optimal scaling. Is there a way to monitor "wait" time in the R process or log wait time while processing multiple calls? I want to be able to determine # of APIs to spin up if necessary.

Here are a couple strategies we used to access the required sizing of APIs in the past.

Load test API using jmeter / loadtest package
Log each API request timings using GCP logEntry format. Basically you log the system.time in a preroute hook and do the same in a postserialize hook. We store this information in the plumber data env.
By logging each request to your API you are better able to assess ressource needs.

Hopefully you can adapt some of that

library(plumber)

pr <- plumb() |>
  pr_get(path = "/",
         handler = function() {"OK"},
         serializer = serializer_text(),
         preempt = "__first__",
         comments = "Health check",
         tags = "Status")

preroute <- function(data, req) {

  data$verbose <- TRUE
  if (req$PATH_INFO %in% c("/", "/openapi.json") || grepl("__docs__", req$PATH_INFO, fixed = TRUE)) {
    data$verbose <- FALSE
  }

  if (data$verbose) {

    data$start <- proc.time()[[3L]]
    data$requestSize <- object.size(req$bodyRaw)
    data$logEntry$httpRequest <- list(
      "requestMethod" = req$REQUEST_METHOD,
      "requestUrl" = paste0(req$rook.url_scheme, "://", req$HTTP_HOST, req$PATH_INFO, req$QUERY_STRING),
      "requestSize" = as.character(data$requestSize),
      "remoteIp" = paste0(req$REMOTE_ADDR, ":", req$REMOTE_PORT)
    )

    if (isTRUE(nchar(req$HTTP_REFERER) > 0)) {
      data$logEntry$httpRequest$referer <- req$HTTP_REFERER
    }

    if (isTRUE(nchar(req$HTTP_USER_AGENT) > 0)) {
      data$logEntry$httpRequest$userAgent <- req$HTTP_USER_AGENT
    }

  }

}

postroute <- function(data, req) {

  if (data$verbose) {

    # Set debugging either by passing debug via query string arg `?debug=T` or in post body
    # Alternatively use ENV DBG_ENABLE
    if (isTRUE(as.logical(req$args$debug)) || Sys.getenv("DBG_ENABLE", FALSE) == TRUE) {
      req$pr$setDebug(TRUE)
    }

  }

}

postserialize <- function(data, req, res) {

  if (data$verbose) {

    data$logEntry$httpRequest$status <- res$status
    data$responseSize <- object.size(res$body)
    data$logEntry$httpRequest$responseSize <- as.character(data$responseSize)
    data$logEntry$httpRequest$latency <- sprintf("%fs", proc.time()[["elapsed"]] - data[["start"]])

    data$logEntry$severity <- "INFO"
    if (res$status >= 400) { data$logEntry$severity <- "WARNING" }
    if (res$status >= 500) { data$logEntry$severity <- "ERROR" }

    if (!is.null(req$extradata)) {
      data$logEntry$extradata <- req$extradata
    }

    if (req$pr$getDebug()) {

      maxSizeLogEntry <- 10485760
      oversize <- "Size greater than 10Mb."

      data$logEntry$request$headers <- as.list(req$HEADERS)

      if (length(req$argsPath)) {
        data$logEntry$request$argsPath <- req$argsPath
      }

      if (length(req$argsQuery)) {
        data$logEntry$request$argsQuery <- req$argsQuery
      }

      if (data$requestSize <= maxSizeLogEntry) {
        data$logEntry$request$body <- rawToChar(req$bodyRaw)
        if (plumber:::looks_like_json(req$bodyRaw[1])) {
          class(data$logEntry$request$body) <- "json"
        }
      } else {
        data$logEntry$request$body <-  oversize
      }

      if (data$responseSize <= maxSizeLogEntry) {
        data$logEntry$response$body <- res$body
      } else {
        data$logEntry$response$body <- oversize
      }

    }

    cat(jsonlite::toJSON(data$logEntry, auto_unbox = TRUE, json_verbatim = TRUE), sep = "\n")

    req$pr$setDebug(FALSE)
  }

}

pr$registerHooks(list(preroute = preroute, postroute = postroute, postserialize = postserialize))


pr$run(host = "0.0.0.0", port = 8004, debug = FALSE)
2 Likes

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.