r shiny - apply custom function to datatable

I would like to be able to apply a user defined function to the set of columns chosen via pickerInput . The problem is I am unsure where I should apply it. The data I will use in the app is text and the custom function basically just cleans it up, removing special characters, stripping white space etc. The app requires the user to load one or two datafiles (csv or xls) and then i want to apply the function after the columns from each data file are chosen. The code is below.

X <- c("plyr", "dplyr", "tm", "readxl", "wordcloud", "SnowballC", "stringdist", "tidytext",
   "rmarkdown", "knitr", "quanteda", "reshape", "stringr", "RecordLinkage", 
   "data.table", "rvest", "qdap", "shiny", "shinydashboard", "shinyWidgets", "DT") 

lapply(X, FUN = function(X){
do.call("library", list(X))
})

# begin defining custom function

removeSPE <- function(x) gsub("[[:punct:]]", "", x)

removeStopWords <- function(x, stop) {
stop <- c("inc", "company", "co", "corporation", "corp", "incorporated", "llc", 
          "llp", "ltd", stopwords("english"))
x <- removeWords(x, stop)
return(x)
}

cleanup <- function(x){
x <- as.character(x) # convert to character
x <- tolower(x) # make all lowercase
x <- sapply(x, removeSPE) # remove special characters
x <- trimws(x, "both") # since stopwords have been removed, there is extra white space left, this removes it
x <- sapply(x, removeStopWords)
x <- gsub("(?<=\\b\\w)\\s(?=\\w\\b)", "", x, perl = TRUE) # removes whitespace btwn two single chars
return(x)
}

##### APP BEGINS HERE WITH UI #####

ui <- dashboardPage(
dashboardHeader(title = "Record Linkage App"),
dashboardSidebar(
    sidebarMenu(
        ## Tab 1 -- Specify Task
        menuItem("Select Task And Upload Files", tabName = "task", icon = icon("file-text-o")),
        ## Tab 2 -- View Raw Data Files
        menuItem("View Raw Data", tabName = "raw", icon = icon("file-text-o")),
        ## Tab 3 -- View Processed Data Files
        menuItem("View Processed Data", tabName = "processed", icon = icon("file-text-o")),
        ## Tab 4 -- Select Training Set
        menuItem("Select Training Set", tabName = "mltrain", icon = icon("file-text-o")),
        ## Tab 5 -- View Weight & Probabilities (choose which chart to view or both?)
        menuItem("Visualize Distributions", tabName = "distributions", icon = icon("bar-chart-o")),
        ## Tab 6 -- View Results (review, match and trash files--need to be able to choose dataset)
        ## Want to be able to add checkboxes to select rows for inclusion in deletion later on
        menuItem("View Result Files", tabName = "fileview", icon = icon("file-text-o"))
        
            )), # close dashboard sidebar

        #### Dashboard Body starts here

    dashboardBody(
    tabItems(
        ### Specify Task & Upload Files Tab
        tabItem(tabName = "task",
                radioButtons("task", "Select a Task:", c("Frame Deduplication", "Frame Record Linkage")),
                fileInput("selection", "Upload Files:", multiple = T, 
                          accept = c(".xlsx", ".xls", "text/csv", "text/comma-separated-values, text/plain", ".csv")),
                helpText(paste("Please upload a file.  Supported file types are:  .txt, .csv and .xls.")),
                helpText(paste("Note:  Record Linkage requires two data frames."))
                
        ), # close first tabItem
        
        tabItem(tabName = "raw",
                helpText(paste("This tab displays the raw, unprocessed data frames selected in the previous tab.")),
                helpText(paste("Select the columns you wish to display.  These columns will be used for string comparisons")),
                fluidRow(
                    column(width = 6,
                           uiOutput("pick_col1"),
                           dataTableOutput("content1")
                    ),
                    column(width = 6,
                           uiOutput("pick_col2"),
                           dataTableOutput("content2")
                    )
                )
                
        ) # close tabItem
    ) # close tabItems
) # close dashboardBody 
) # closes dashboardpage
options(shiny.maxRequestSize = 100*1024^2)


server <- function(input, output, session) {

data <- reactiveValues(file1 = NULL,
                       file2 = NULL)

observe({
    if (!is.null(input$selection$datapath[1]))
        
        if (grepl(".csv$", input$selection$datapath[1])) {
            
            data$file1 <- read.csv(input$selection$datapath[1], header = TRUE, sep = ",")
            
        } else if (grepl(".xls$|.xlsx$", input$selection$datapath[1])) {
            
            data$file1 <- read_excel(input$selection$datapath[1], col_names = TRUE)    
        } 
})

observe({
    if (!is.null(input$selection$datapath[2]))
        
        if (grepl(".csv$", input$selection$datapath[2])) {
            
            data$file2 <- read.csv(input$selection$datapath[2], header = TRUE, sep = ",")
            
        } else if (grepl(".xls$|.xlsx$", input$selection$datapath[2])) {
            
            data$file2 <- read_excel(input$selection$datapath[2], col_names = TRUE)    
        } 
})

output$pick_col1 <- renderUI({
    
    pickerInput(
        inputId = "pick_col1",
        label = "Select the columns of table 1 you wish to display:",
        choices = colnames(data$file1),
        selected = colnames(data$file1),
        options = list(`actions-box` = TRUE,
                       `selected-text-format` = paste0("count > ", length(colnames(data$file1)) - 1),
                       `count-selected-text` = "Alle",
                       liveSearch = TRUE,
                       liveSearchPlaceholder = TRUE),   # build buttons for collective selection
        multiple = TRUE)
})

output$pick_col2 <- renderUI({
    
    pickerInput(
        inputId = "pick_col2",
        label = "Select the columns of table 2 you wish to display:",
        choices = colnames(data$file2),
        selected = colnames(data$file2),
        options = list(`actions-box` = TRUE,
                       `selected-text-format` = paste0("count > ", length(colnames(data$file2)) - 1),
                       `count-selected-text` = "Alle",
                       liveSearch = TRUE,
                       liveSearchPlaceholder = TRUE),   # build buttons for collective selection
        multiple = TRUE)
})


clean1 <- reactive({
    t <- cleanup(input$pick_col1)
    
})


output$content1 <- renderDataTable({
    
    data$file1[, req(input$pick_col1)]
    
    
})

output$content2 <- renderDataTable({
    
    data$file2[, req(input$pick_col2)]
    
})

}

shinyApp(ui, server)

I would like to apply the function cleanup to what is chosen from pickerInput. I have tried creating a different output, say output$clean1 <- reactive({ cleanup(input$pick_col1)}) but I keep getting errors, usually 'Error in datatable: 'data' must be 2-dimensional'.

Any help would be much appreciated. Thanks.

I'd recommend starting by creating a simple reprex, as described in https://mastering-shiny.org/action-workflow.html#reprex.

In particular, for your case, I'd start by weeding down the 21 (!!) packages that you currently need. I'd also recommend loading the packages using library(), since your current adhoc way of loading packages means tools can't easily tell what packages you're loading, and hence offer to install them automatically.

I'd also recommend using styler or similar to apply a consistent style to your code. Inconsistent indentation makes it much harder to see the structure of your code at a glance.

I noticed that you're using reactiveValues() and an observer() when a reactive would do. These techniques tend to make your reactive graph more complicated, and hence harder to reason about. I'd recommend replacing this:

observe({
  if (!is.null(input$selection$datapath[1])) {
    if (grepl(".csv$", input$selection$datapath[1])) {
      data$file1 <- read.csv(input$selection$datapath[1], header = TRUE, sep = ",")
    } else if (grepl(".xls$|.xlsx$", input$selection$datapath[1])) {
      data$file1 <- read_excel(input$selection$datapath[1], col_names = TRUE)
    }
  }
})

with

file1 <- reactive({
  path <- input$selection$datapath[[1]]
  req(path)
  
  ext <- tools::file_ext(path)
  
  if (ext == "csv") {
    read.csv(path, header = TRUE)
  } else if (ext == "xls" || ext == "xlsx") {
    read_excel(path)
  } else {
    stop("Unknown extension: '.", ext, "'")
  }
})

Note that I've pulled out both path and ext into their own variables, and I throw an error for unknown extensions. This makes the code easier to read.

Next you could pull out the repeated code into a function:

load_path <- function(path) {
  req(path)
  
  ext <- tools::file_ext(path)
  
  if (ext == "csv") {
    read.csv(path, header = TRUE)
  } else if (ext == "xls" || ext == "xlsx") {
    read_excel(path)
  } else {
    stop("Unknown extension: '.", ext, "'")
  }
}

file1 <- reactive(load_path(input$selection$datapath[[1]]))
file2 <- reactive(load_path(input$selection$datapath[[2]]))

(load_path() would go outside the server function because it no longer depends on anything specific to a shiny app).

2 Likes