RSelenium Scraping Task

I'm trying to scrape match data from flashscore.pt and I keep running into a problem. This is the code I'm using:

library(RSelenium)
library(rvest)

# Start a Selenium server
driver <- rsDriver(browser = "firefox")

# Connect to the remote driver
remote_driver <- driver$client

# Specify the URL of the website
url <- "https://www.flashscore.pt/futebol/eua/mls-2022/resultados/"

# Navigate to the website
remote_driver$navigate(url)

# Find and click the "Mostrar mais jogos" button using JavaScript
while (TRUE) {
  # Execute JavaScript to check if the button is present on the page
  is_button_present <- remote_driver$executeScript(
    "return document.querySelector('.event__more') !== null;"
  )
  
  # Check the result of the JavaScript execution
  if (as.logical(is_button_present)) {
    # Click the button using JavaScript
    remote_driver$executeScript("document.querySelector('.event__more').click();")
    
    # Wait for some time to allow the page to load new content
    Sys.sleep(5)
  } else {
    # If the button is not present, break the loop
    break
  }
}

# Find the match links
match_links <- remote_driver$findElements(using = "css selector", value = ".event__match")

# Extract the match URLs
match_urls <- sapply(match_links, function(link) {
  match_id <- gsub("^.*_(\\w+)$", "\\1", link$getElementAttribute("id")[[1]])
  paste0("https://www.flashscore.pt/jogo/", match_id, "/#/sumario-do-jogo/sumario-do-jogo")
})

# Function to extract match data from a URL
extract_match_data <- function(url) {
  # Navigate to the match URL
  remote_driver$navigate(url)
  
  # Extract the league name
  league_name <- remote_driver$findElement(using = "css selector", value = ".tournamentHeader__country")$getElementText()
  
  # Extract the team names
  team_names <- remote_driver$findElements(using = "css selector", value = "div.participant__participantName a")$getElementText()
  
  # Extract the date
  date <- remote_driver$findElement(using = "css selector", value = "div.duelParticipant__startTime > div:nth-child(1)")$getElementText()
  
  # Extract the home and away goals
  home_goals <- remote_driver$findElement(using = "css selector", value = "div.event__score.event__score--home")$getElementText()
  away_goals <- remote_driver$findElement(using = "css selector", value = "div.event__score.event__score--away")$getElementText()
  
  # Extract the odds
  odds_rows <- remote_driver$findElements(using = "css selector", value = ".oddsRow")
  odds <- lapply(odds_rows, function(row) row$getElementText())
  
  # Create a data frame with the extracted data
  match_data <- data.frame(
    League = league_name,
    HomeTeam = team_names[1],
    AwayTeam = team_names[2],
    Date = date,
    HomeGoals = home_goals,
    AwayGoals = away_goals,
    Odds = odds,
    stringsAsFactors = FALSE
  )
  
  return(match_data)
}

# Extract match data for each URL
all_match_data <- lapply(match_urls, extract_match_data)

# Print the extracted match data
for (i in seq_along(all_match_data)) {
  cat("Match URL:", match_urls[i], "\n")
  print(all_match_data[[i]])
  cat("\n")
}

and I get this error:

> # Extract match data for each URL
> all_match_data <- lapply(match_urls, extract_match_data)
Error in FUN(X[[i]], ...) : attempt to apply non-function
Called from: FUN(X[[i]], ...)
Browse[1]> 

How can I fix this problem?

1 Like

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.