Show the code
library(hoopR)
library(tidyverse)
library(knitr)
seasons <- 2002:most_recent_nba_season()
# Let's download game-level schedule data for every game played in this era.
sched <- load_nba_schedule(seasons = seasons)
# Only standard NBA games (excludes ALLSTAR, USA/WORLD, EAST/WEST, etc.)
sched <- sched %>%
filter(type_abbreviation == "STD")
nba_abbrevs <- sched %>%
select(home_abbreviation, away_abbreviation) %>%
pivot_longer(cols = everything(), values_to = "team_abbreviation") %>%
distinct(team_abbreviation)
# Let's create a dataset with only games played at MSG.
msg_games <- sched %>%
filter(venue_full_name == "Madison Square Garden") %>% # venue name is in schedule data :contentReference[oaicite:3]{index=3}
transmute(
game_id,
season,
season_type,
game_date,
venue_full_name,
home_abbreviation,
away_abbreviation,
home_score,
away_score,
home_winner,
neutral_site
)
# Cleaning MSG schedule data to only include Knicks regular season and playoff games.
msg_games %>% count(season_type, sort = TRUE)# A tibble: 1 × 2
season_type n
<int> <int>
1 2 1001
Show the code
msg_games %>% count(home_abbreviation, sort = TRUE) %>% head(10)# A tibble: 3 × 2
home_abbreviation n
<chr> <int>
1 NY 999
2 EAST 1
3 IND 1
Show the code
msg_knicks_home_games <- msg_games %>%
filter(home_abbreviation == "NY", neutral_site == FALSE)
msg_knicks_home_games %>%
count(season_type, sort = TRUE)# A tibble: 1 × 2
season_type n
<int> <int>
1 2 999
Show the code
# Load player box scores for all games in all seasons.
pb <- load_nba_player_box(seasons = seasons)
pb %>%
filter(team_abbreviation %in% c("NY", "NYK")) %>%
count(team_abbreviation, sort = TRUE)# A tibble: 1 × 2
team_abbreviation n
<chr> <int>
1 NY 27134
Show the code
# Let's add some composite measures of offensive and defensive stat creation.
pb <- pb %>%
filter(!did_not_play, minutes > 0) %>%
mutate(
# True Shooting Percentage
denom = 2 * (field_goals_attempted + 0.44 * free_throws_attempted),
ts = if_else(denom > 0, points / denom, NA_real_),
# Composite performance metrics
offensive_output = points + rebounds + assists,
defensive_output = steals + blocks
)
# Create dataset of all player box scores only from games at MSG. Categorize home/away players. Calculate TS%.
pb_msg <- pb %>%
inner_join(
msg_knicks_home_games,
by = c("game_id", "season", "season_type", "game_date")
) %>%
mutate(
at_msg = TRUE,
is_knicks = (team_abbreviation == "NY"),
is_home = (home_away == "home"),
is_away = (home_away == "away"),
ts = points / (2 * (field_goals_attempted + 0.44 * free_throws_attempted))
)
pb_road_flagged <- pb %>%
filter(home_away == "away", !did_not_play, minutes > 0) %>%
left_join(
msg_knicks_home_games %>% transmute(game_id, at_msg = TRUE),
by = "game_id"
) %>%
mutate(
at_msg = if_else(is.na(at_msg), FALSE, at_msg),
ts = points / (2 * (field_goals_attempted + 0.44 * free_throws_attempted))
)