📅  最后修改于: 2022-03-11 14:57:12.940000             🧑  作者: Mango
url_name <- 'https://scholar.google.com/scholar?hl=en&as_sdt=0%2C38&q=apex+predator+conservation&btnG=&oq=apex+predator+c'
wp <- xml2::read_html(url_name)
# Extract raw data
titles <- rvest::html_text(rvest::html_nodes(wp, '.gs_rt'))
authors_years <- rvest::html_text(rvest::html_nodes(wp, '.gs_a'))
# Process data
authors <- gsub('^(.*?)\\W+-\\W+.*', '\\1', authors_years, perl = TRUE)
years <- gsub('^.*(\\d{4}).*', '\\1', authors_years, perl = TRUE)
# Make data frame
df <- data.frame(titles = titles, authors = authors, years = years, stringsAsFactors = FALSE)