Isolating database query

I'm trying to implement the following workflow:

  • the user select the categorical and numerical filters it wants.
  • once a button is pressed, based on these filters, a complex query is run against a Presto database and it is stored in memory as a dataframe.
  • successive changes to the filters should not be calling the database, but filtering the in-memory dataframe.

How can this be implemented ? I unsuccessfully tried the following:

tableMod <- function(input, output, session, dims, thresholds, cats, search) {
  observeEvent(search(), {
    output$table <- renderDataTable({
      if (search() == TRUE) {
        baseTable <- genBaseTable(dims, thresholds, cats)
        table <- reactifyTable(baseTable, dims, thresholds, cats)
      }
      else {
        table <- reactive(empty_table)
      }
      datatable(table(), rownames = FALSE, selection = "none", options = list(pageLength = 100, lengthChange = FALSE, searching = FALSE))
    })
  })
}
genBaseTable <- function(dims, thresholds, cats) {
  isolate({
    print("querying database")
    pool %>% tbl(TableSitesDPerfo) %>%
      filter(
        imps >= thresholds$imps(),
        clicks >= thresholds$clicks(),
        ctr >= thresholds$ctr()
      ) %>%
      group_by_at(dims()) %>%
      summarise(
        imps = sum(imps),
        clicks = sum(clicks),
        jcost = sum(jcost)
      ) %>%
      mutate(
        ctr = clicks / imps
      ) %>%
      ungroup() %>%
      collect()
  })
}
filterPerformance <- function(table, operator, imps, clicks, ctr, jcost) {
  reactive({
    if (operator() == "AND") {
      filter(table, (imps >= imps()) & (clicks >= clicks()) &
                    (ctr >= ctr() / 100) & (jcost >= jcost()))
    }
    else if (operator() == "OR") {
      filter(table, (imps >= imps()) | (clicks >= clicks()) |
                    (ctr >= ctr() / 100) | (jcost >= jcost()))
    }
  })
}

reactifyTable <- function(table, dims, thresholds, cats) {
  table_perfo <- filterPerformance(
    table, thresholds$operator, thresholds$imps, thresholds$clicks, thresholds$ctr, thresholds$jcost
  )
}

I expected genBaseTable not to be called after the first time since it is isolated and filtering work at the dataframe level on baseTable. However, genBaseTable keeps getting called when I make changes to the filters.

There are any number of ways to query external database and assign the results to a data frame object locally. Take a look, for example, at dblyr. To relieve the user of a cheatsheet on the three filters, shiny would be a good adjunct.

Here's some p_code

local_result <- dbplyr::{appropriate query}
revised_local <- local_result %>% {filter, select, mutate, summarise, etc.)

Without some proxy data (see reproducible example, called a reprex), I can't help with debugging. Overall there's nothing wrong to the eye with your code, it's simply not idiomatic and imports procedural/imperative constructs to an environment that is fundamentally functional. If you think of it as f(x) = fetch_query and g(x) modify_results in place, it goes much more smoothly.

library(shiny)
library(shinyWidgets)
library(DT)
library(tidyverse)
library(tidytidbits)
library(pool)
library(DBI)


pool <- dbPool(
  drv = RMySQL::MySQL(),
  dbname = "shinydemo",
  host = "shiny-demo.csa7qlmguqrf.us-east-1.rds.amazonaws.com",
  username = "guest",
  password = "guest",
  minSize = 1,
  maxSize = 3,
  idleTimeout = 2 * 60 * 100
)

onStop(function() {
  poolClose(pool)
})


ui <- fluidPage(
  titlePanel("Basic DataTable"),

  # Create a new Row in the UI for selectInputs
  fluidRow(
    column(4,
        selectInput("name",
                    "Name:",
                    c("all", "Kabul", "Amsterdam"))
    ),
    column(4,
        selectInput("nistrict",
                    "District:",
                    c("all", "Kabol", "Noord-Holland"))
    ),
    column(4,
        switchInput("search", label = icon("search"), value = FALSE)
    )
  ),
  # Create a new row for the table.
  DT::dataTableOutput("table")
)


server <- function(input, output) {

  # Filter data based on selections
  observeEvent(search(), {
    output$table <- renderDataTable({
      if (input$search == TRUE) {
        baseTable <- isolate({
          print("querying database")
          pool %>% tbl("City") %>%
            execute_if(input$name != "all", filter(Name == input$name)) %>%
            collect()
        })
        data <- reactive(baseTable %>%
          execute_if(input$name != "all", filter(Name == input$name)))
      }
      else {
        data <- reactive('i want you to fail')
      }
      datatable(data())
    })
  })
}

shinyApp(ui, server)

Here is a reprex to the issue I'm describing in the original post.

I haven't tried this in Shiny, but I think the key is that you need an object (I used a data frame, but that's not crucial) to hold the results of the query. Here, I loaded the City table into an object called res that I can modify to my heart's content without ever bothering the database.


library(shiny)
library(pool)
library(shinyWidgets)
library(DT)
#> 
#> Attaching package: 'DT'
#> The following objects are masked from 'package:shiny':
#> 
#>     dataTableOutput, renderDataTable

library(pool)
library(RMySQL)
#> Loading required package: DBI
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union


pool <- dbPool(
  drv = RMySQL::MySQL(),
  dbname = "shinydemo",
  host = "shiny-demo.csa7qlmguqrf.us-east-1.rds.amazonaws.com",
  username = "guest",
  password = "guest",
  minSize = 1,
  maxSize = 3,
  idleTimeout = 2 * 60 * 100
)

res <- dbGetQuery(pool, "SELECT * FROM City")

Created on 2019-04-16 by the reprex package (v0.2.1)

baseTable in my code is an object.

True, but isn't the embedded

pool %>% tbl("City")

statement circle back to the remote source?

I don't think I follow you. Would you mind helping me with the reproducible example I elaborated ?

1 Like

Well, I'm not exactly covering myself with glory so far, but let's see. When I run the reprex, the app comes up with the error message

Warning: Error in datatable: 'data' must be 2-dimensional (e.g. data frame or matrix)

and when I toggle the app on

Error: could not find function "execute_if"

from (tidytidbits) but only from the IDE, not the vanilla R console, which presents a table.

I'm going to have a late lunch. When I get back, I'll play with with table and see if I can reproduce the issue.

suppressPackageStartupMessages(library(data.table))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(DT))
suppressPackageStartupMessages(library(pool))
suppressPackageStartupMessages(library(RMySQL))
suppressPackageStartupMessages(library(shiny))
suppressPackageStartupMessages(library(shinyWidgets))


pool <- dbPool(
  drv = RMySQL::MySQL(),
  dbname = "shinydemo",
  host = "shiny-demo.csa7qlmguqrf.us-east-1.rds.amazonaws.com",
  username = "guest",
  password = "guest",
  minSize = 1,
  maxSize = 3,
  idleTimeout = 2 * 60 * 100
)

res <- dbGetQuery(pool, "SELECT * FROM City")
result <- as.data.table(res)

Created on 2019-04-16 by the reprex package (v0.2.1)

result is a data.table

> result <- as.data.table(res)
> result
        ID           Name CountryCode      District Population
   1:    1          Kabul         AFG         Kabol    1780000
   2:    2       Qandahar         AFG      Qandahar     237500
   3:    3          Herat         AFG         Herat     186800
   4:    4 Mazar-e-Sharif         AFG         Balkh     127800
   5:    5      Amsterdam         NLD Noord-Holland     731200
  ---                                                         
3423: 4075     Khan Yunis         PSE    Khan Yunis     123175
3424: 4076         Hebron         PSE        Hebron     119401
3425: 4077       Jabaliya         PSE    North Gaza     113901
3426: 4078         Nablus         PSE        Nablus     100231
3427: 4079          Rafah         PSE         Rafah      92020

that is not going to read from the City table again, nor write to it. I don't have enough Shiny-jo to debug your example, but from the R console, every time the user changes either option it displays the message that the database (pool?) has been queried. Hopefully, having a stable data.table in memory will fix the problem.

Wish I could help further.

This topic was automatically closed 54 days after the last reply. New replies are no longer allowed.