Hi there! I have been trying to scrape a website.
I get this error
" Error: 'movie_links' does not exist in current working directory ('F:/Data_Science/Casestudies/Webscrape_IMDB')."
Below is the code:
get_cast = function(movie_links)
{
movie_page = read_html("movie_links")
movie_cast = movie_page %>% html_nodes(".primary_photo+ td a") %>% html_text() %>% paste(collapse=(","))
return(movie_cast)
}
movies = data.frame()
for(page_result in seq(from=1,to=50,by=51))
{
next_page_link = paste0("https://www.imdb.com/search/title/?title_type=feature&year=2020-01-01,2020-12-31&start=",page_result,"&ref_=adv_nxt")
next_page_infor = read_html(next_page_link)
name = next_page_infor %>% html_nodes(".lister-item-header a") %>% html_text()
year = next_page_infor %>% html_nodes(".text-muted.unbold") %>% html_text() %>% str_trim()
synopsis = next_page_infor %>% html_nodes(".ratings-bar+ .text-muted") %>% html_text() %>% str_trim() %>% as.character()
rate = next_page_infor %>% html_nodes(".ratings-imdb-rating strong") %>% html_text() %>% as.numeric()
# Extract Page url section and paste with domain name for entire list.
movie_links = next_page_infor %>% html_nodes(".lister-item-header a") %>% html_attr("href") %>%
paste0("https://www.imdb.com",., sep="")
cast = sapply(movie_links,FUN = get_cast,USE.NAMES = FALSE)
rbind(movies,data.frame(name,year,synopsis,rate,cast,stringsAsFactors = FALSE))
print(paste0("Page No:",page_result))
}
Any suggestions? Thanks in advance.
Regards
Karthik