This is more of a general question, but if we assume that -- for the most part -- a function only holds the "essential" components it needs to do what it needs to do, is it okay for a function to be "large" in size?
Like, should I try and separate the components more to make a few more, smaller functions?
Here's an example of a "large" function (1.2 mb) I have that kinda-sorta needs to be the size that it is.
library(tidyverse)
library(RSelenium)
library(rvest)
library(progress)
get_box_score <- function(..., progress = TRUE) {
if (progress) {
pb <- progress::progress_bar$new(format = "get_box_score() [:bar] :percent eta: :eta", clear = FALSE, total = nrow(...), show_after = 0)
pb$tick(0)}
driver <- rsDriver(verbose = FALSE)
on.exit(driver$client$close())
on.exit(driver$server$stop())
.get_box_score <- function(url, league, season, ...) {
seq(2, 5, by = 0.001) %>%
sample(1) %>%
Sys.sleep()
driver$client$navigate(url)
Sys.sleep(3)
page <- driver$client$getPageSource() %>%
purrr::pluck(1) %>%
read_html()
league_alternative_name <- case_when(league == "OHL" ~ "ohl",
league == "WHL" ~ "whl",
league == "QMJHL" ~ "lhjmq")
teams <- page %>%
html_nodes(".gamecentre-playbyplay-event--goal") %>%
{tibble(team = as(., "character"))} %>%
mutate(team = str_split(team, str_c('<div class="gamecentre-playbyplay-event team-border--', league_alternative_name, '-', sep = ""), simplify = TRUE, n = 2)[,2]) %>%
mutate(team = str_split(team, 'gamecentre-playbyplay-event--goal', simplify = TRUE, n = 2)[,1]) %>%
mutate(team = toupper(team))
goal_info <- page %>%
html_nodes(".gamecentre-playbyplay-event--goal") %>%
html_text() %>%
as_tibble() %>%
set_names("messy_data") %>%
mutate(period = str_split(messy_data, " ", simplify = TRUE, n = 2)[,1]) %>%
mutate(period = str_split(period, "Goal", simplify = TRUE, n = 2)[,2]) %>%
mutate(period = str_replace_all(period, c("ST" = "", "ND" = "", "RD" = ""))) %>%
mutate(time = str_split(messy_data, " ", simplify = TRUE, n = 2)[,2]) %>%
mutate(time = str_split(time, "\\#", simplify = TRUE, n = 2)[,1]) %>%
mutate(goal = str_split(messy_data, " ", simplify = TRUE, n = 3)[,3]) %>%
mutate(goal = str_split(goal, "\\(", simplify = TRUE, n = 2)[,1]) %>%
mutate(assists = str_split(messy_data, "Assists\\:", simplify = TRUE, n = 2)[,2]) %>%
mutate(assists = str_split(assists, "\\+/-", simplify = TRUE, n = 2)[,1]) %>%
mutate(game_strength = case_when(str_detect(messy_data, "Short Handed") & str_detect(messy_data, "Empty Net") ~ "SH EN",
str_detect(messy_data, "Power Play") & str_detect(messy_data, "Empty Net") ~ "PP EN",
str_detect(messy_data, "Short Handed") & str_detect(messy_data, "Penalty Shot") ~ "SH PS",
str_detect(messy_data, "Power Play") & str_detect(messy_data, "Penalty Shot") ~ "PP PS",
str_detect(messy_data, "Empty Net") ~ "EN",
str_detect(messy_data, "Short Handed") ~ "SH",
str_detect(messy_data, "Power Play") ~ "PP",
str_detect(messy_data, "Penalty Shot") ~ "PS",
TRUE ~ "EV")) %>%
mutate(assists = str_replace_all(assists, c("Power Play" = "",
"Short Handed" = "",
"Empty Net" = "",
"Penalty Shot" = "",
"Game Winning" = "",
"Game Tying" = "",
"Insurance Goal" = ""))) %>%
mutate(primary_assist = str_split(assists, ",", simplify = TRUE, n = 2)[,1]) %>%
mutate(primary_assist = str_replace_all(primary_assist, "\\#[0-9]{1,2}", "")) %>%
mutate(secondary_assist = str_split(assists, ",", simplify = TRUE, n = 2)[,2]) %>%
mutate(secondary_assist = str_replace_all(secondary_assist, "\\#[0-9]{1,2}", ""))
box_score_data <- teams %>%
bind_cols(goal_info) %>%
mutate(season = season) %>%
mutate(league = league) %>%
mutate(game_url = url) %>%
select(time, period, game_strength, team, goal, primary_assist, secondary_assist, season, league, game_url) %>%
mutate_all(str_squish) %>%
mutate_all(~na_if(., ""))
if (progress) {pb$tick()}
return(box_score_data)
}
persistently_get_box_score <- elite::persistently(.get_box_score, max_attempts = 10, wait_seconds = 0.0001)
try_get_box_score <- function(url, league, season, ...) {
tryCatch(persistently_get_box_score(url, league, season, ...),
error = function(e) {
print(e)
print(url)
data_frame()},
warning = function(w) {
print(w)
print(url)
data_frame()})
}
all_box_score_data <- pmap_dfr(..., try_get_box_score)
return(all_box_score_data)
}