Okay, I think I have something safer to run! I don't know if this will solve whatever problems were occurring before, but it should at least be certain to not repeat a page. Hope it helps!
This time, I left the call to view() in there, so that you can look at what's happening as it goes. One time I ran it, and while all the code was all running fine, one time the page just wasn't loading, so you should be able to see that if it happens.
library(rvest)
library(chromote)
library(tidyverse)
# Scrapes the table in the website's current status
scrape_table <- function(chromote_obj) {
chromote_obj$Runtime$evaluate('document.querySelector("#sc-ui-grid-body-c4716e4a").outerHTML')$result$value %>%
read_html() %>%
html_nodes("#sc-ui-grid-body-c4716e4a") %>%
html_table()
}
# Clicks through to the next page of the table. The `repeat` block waits to make
# sure the new page has actually loaded
click_next <- function(chromote_obj) {
cur_nav <- nav_text(chromote_obj)
cur_null <- null_loc(chromote_obj)
js_click <- '$("#sc_grid_toobar_bot > table > tbody > tr > td:nth-child(2) > a:nth-child(8)")[0].click()'
consult$Runtime$evaluate(js_click)
repeat {
if(cur_nav != nav_text(chromote_obj) || cur_null != null_loc(chromote_obj)) {
break
}
}
}
# Checks whether the "next" button is enabled or disabled
next_enabled <- function(chromote_obj) {
img_html <- chromote_obj$
Runtime$
evaluate('$("#sc_grid_toobar_bot > table > tbody > tr > td:nth-child(2) > a:nth-child(8)")[0].innerHTML')$
result$
value
str_detect(img_html, "enabled")
}
# Get the page's bottom text
nav_text <- function(chromote_obj) {
chromote_obj$
Runtime$
evaluate('$("#sc_grid_toobar_bot > table > tbody > tr > td:nth-child(2)")[0].innerText')$
result$
value
}
# Get the page's bottom HTML
nav_html <- function(chromote_obj, child) {
consult$
Runtime$
evaluate(str_glue('$("#sc_grid_toobar_bot > table > tbody > tr > td:nth-child(2) > a:nth-child({child})")[0].innerHTML'))$
result$
value
}
# Get the index of the null entry (current page marker)
null_loc <- function(chromote_obj) {
map(1:9, ~nav_html(chromote_obj, .)) %>%
set_names(1:9) %>%
keep(is.null) %>%
names()
}
# Checks whether a new page has actually loaded
# Initialize a `chromote` session
consult <- ChromoteSession$new()
# Show the session (Not necessary for running, but it shows what's happening)
consult$view()
# Navigate to the page
message(consult$Page$navigate("http://www.css.gob.pa/p/grid_defensoria/"))
Sys.sleep(30)
# Set the number of records to 50
message(consult$Runtime$evaluate('document.querySelector("#quant_linhas_f0_bot").value = 50'))
Sys.sleep(10)
message(consult$Runtime$evaluate('document.querySelector("#quant_linhas_f0_bot").dispatchEvent(new Event("change"))'))
Sys.sleep(10)
# Initialize a tibble to store results
t <- tibble()
i <- 0
cat("", file = "log.csv", append = FALSE)
t <- bind_rows(t, scrape_table(consult))
click_next(consult)
t <- bind_rows(t, scrape_table(consult))
click_next(consult)
# While the next button is clickable, scrape the table and click the "next"
# button. Wait 3 seconds between requests to be polite
while (next_enabled(consult)) {
t <- bind_rows(t, scrape_table(consult))
click_next(consult)
i <- i + 1
cat(i, ",",
consult$
Runtime$
evaluate('$("#sc_grid_toobar_bot > table > tbody > tr > td:nth-child(2)")[0].innerText')$
result$
value,
",",
consult$
Runtime$
evaluate('$("#sc_grid_toobar_bot > table > tbody > tr > td:nth-child(2) > a:nth-child(8)")[0].innerHTML')$
result$
value,
"\n",
sep = "", file = "log.csv", append = TRUE)
write_csv(t, "t.csv")
Sys.sleep(4)
}
# Scrape the table on the last page
t <- bind_rows(t, scrape_table(consult))