diff --git a/DESCRIPTION b/DESCRIPTION index 1ca5378..50f6cac 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: espnscrapeR Type: Package Title: Scrapes Or Collects NFL Data From ESPN -Version: 0.8.0 +Version: 0.9.0 Author: Thomas Mock Maintainer: Thomas Mock Description: Main use case is to collect ESPN QBR for NFL and college football. @@ -9,7 +9,7 @@ Description: Main use case is to collect ESPN QBR for NFL and college football. License: MIT + file LICENSE Encoding: UTF-8 LazyData: true -RoxygenNote: 7.2.1 +RoxygenNote: 7.3.2 URL: https://github.com/jthomasmock/espnscrapeR BugReports: https://github.com/jthomasmock/espnscrapeR/issues Imports: diff --git a/NAMESPACE b/NAMESPACE index 77d74a5..3b13652 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -53,5 +53,6 @@ importFrom(scales,col_numeric) importFrom(stringr,str_detect) importFrom(stringr,str_remove) importFrom(tibble,enframe) +importFrom(tidyr,pivot_longer) importFrom(tidyr,separate) importFrom(xml2,read_html) diff --git a/R/scrape_espn_win_rate.R b/R/scrape_espn_win_rate.R index fdf0472..bc278e9 100644 --- a/R/scrape_espn_win_rate.R +++ b/R/scrape_espn_win_rate.R @@ -14,9 +14,9 @@ #' # Get off and def pass/run win rates #' scrape_espn_win_rate() -scrape_espn_win_rate <- function(season = 2023){ +scrape_espn_win_rate <- function(season = 2024){ - if(!(as.numeric(season) %in% c(2019:2023))) stop("Data available for 2019-2023") + if(!(as.numeric(season) %in% c(2019:2024))) stop("Data available for 2019-2024") pbwr_2022 <- "https://www.espn.com/nfl/story/_/id/34536376/2022-nfl-pass-rushing-run-stopping-blocking-leaderboard-win-rate-rankings-top-players-teams" pbwr_2021 <- "https://www.espn.com/nfl/story/_/id/32176833/2021-nfl-pass-rushing-run-stopping-blocking-leaderboard-win-rate-rankings" @@ -35,38 +35,41 @@ scrape_espn_win_rate <- function(season = 2023){ "Pass Block Win Rate" ) - # 2023 specific code: + # 2023+ specific code: - if(season == 2023){ - url_2023 <- "https://www.espn.com/nfl/story/_/id/38356170/2023-nfl-pass-rush-run-stop-blocking-win-rate-rankings-top-players-teams" + if(season >= 2023){ + load_from <- switch (as.character(season), + "2023" = "https://www.espn.com/nfl/story/_/id/38356170/2023-nfl-pass-rush-run-stop-blocking-win-rate-rankings-top-players-teams", + "2024" = "https://www.espn.com/nfl/story/_/id/41040723/2024-nfl-win-rates-top-teams-players-rankings" + ) -raw_html <- read_html(url_2023) + raw_html <- read_html(load_from) -tab_23 <- raw_html %>% - html_table() %>% - .[[9]] %>% - pivot_longer(cols = -1, names_to = "stat", values_to = "win_rate") %>% - mutate( - stat = case_when( - stat == "PRWR" ~ "Pass Rush Win Rate", - stat == "RSWR" ~ "Run Stop Win Rate", - stat == "PBWR" ~ "Pass Block Win Rate", - stat == "RBWR" ~ "Run Block Win Rate" - )) %>% + tab <- raw_html %>% + html_table() %>% + .[[9]] %>% + pivot_longer(cols = -1, names_to = "stat", values_to = "win_rate") %>% + mutate( + stat = case_when( + stat == "PRWR" ~ "Pass Rush Win Rate", + stat == "RSWR" ~ "Run Stop Win Rate", + stat == "PBWR" ~ "Pass Block Win Rate", + stat == "RBWR" ~ "Run Block Win Rate" + )) %>% mutate( - # extract just the string that is before a '%' - win_rate = str_extract(win_rate, "^[^%]+"), - # convert to a number - win_pct = as.numeric(win_rate) - ) %>% - mutate(date_updated = NA, season = 2023) %>% - arrange(stat, desc(win_pct)) %>% - group_by(stat) %>% - mutate(stat_rank = row_number()) %>% - ungroup() %>% - select(stat, stat_rank, team = Team, win_pct, date_updated, season) + # extract just the string that is before a '%' + win_rate = str_extract(win_rate, "^[^%]+"), + # convert to a number + win_pct = as.numeric(win_rate) + ) %>% + mutate(date_updated = NA, season = season) %>% + arrange(stat, desc(win_pct)) %>% + group_by(stat) %>% + mutate(stat_rank = row_number()) %>% + ungroup() %>% + select(stat, stat_rank, team = Team, win_pct, date_updated, season) - return(tab_23) + return(tab) } @@ -76,8 +79,8 @@ tab_23 <- raw_html %>% season == 2020 ~ pbwr_2020, season == 2021 ~ pbwr_2021, season == 2022 ~ pbwr_2022 - ) ) + ) date_updated <- raw_html %>% html_node("#article-feed > article:nth-child(1) > div > div.article-body > div.article-meta > span > span") %>% diff --git a/espnscrapeR.Rproj b/espnscrapeR.Rproj index 497f8bf..270314b 100644 --- a/espnscrapeR.Rproj +++ b/espnscrapeR.Rproj @@ -18,3 +18,4 @@ StripTrailingWhitespace: Yes BuildType: Package PackageUseDevtools: Yes PackageInstallArgs: --no-multiarch --with-keep.source +PackageRoxygenize: rd,collate,namespace diff --git a/man/scrape_espn_win_rate.Rd b/man/scrape_espn_win_rate.Rd index 11b35fc..0163498 100644 --- a/man/scrape_espn_win_rate.Rd +++ b/man/scrape_espn_win_rate.Rd @@ -4,7 +4,7 @@ \alias{scrape_espn_win_rate} \title{Scrape ESPN Pass/Run Block/Rush Win Rates ratings for a specific season from ESPN's site} \usage{ -scrape_espn_win_rate(season = 2022) +scrape_espn_win_rate(season = 2024) } \value{ Returns a tibble