-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathinitial_analysis.R
More file actions
122 lines (104 loc) · 4.96 KB
/
initial_analysis.R
File metadata and controls
122 lines (104 loc) · 4.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
library(tidyverse)
library(worldfootballR)
library(itscalledsoccer)
library(ggplot2)
library(ggrepel)
library(ggbeeswarm)
library(janitor)
devtools::install_github('American-soccer-analysis/itscalledsoccer-r')
devtools::install_github('JaseZiv/worldfootballR')
asa <- AmericanSoccerAnalysis$new()
mls_players <- asa$get_players(leagues = 'mls') |> mutate(birth_date = as.Date.character(birth_date))
mls_teams <- asa$get_teams(leagues = 'mls')
mls_salaries <- asa$get_player_salaries(leagues = 'mls') |>
group_by(player_id, team_id, season_name) |>
filter(mlspa_release == max(mlspa_release)) |>
ungroup()
mls_goals_added <- asa$get_player_goals_added(
leagues = 'mls',
split_by_teams = T,
split_by_seasons = T,
stage_name = "Regular Season"
) |> unnest(data) |>
mutate(season_name = as.integer(season_name))
?AmericanSoccerAnalysis
difftime()
Salary_GA <- mls_goals_added |>
group_by(player_id, team_id, season_name, general_position, minutes_played) |>
summarize(goals_added_raw = sum(goals_added_raw),
goals_added_above_avg = sum(goals_added_above_avg)) |>
mutate(ga96 = goals_added_raw * 96/minutes_played,
gaaa96 = goals_added_above_avg * 96/minutes_played) |>
left_join(mls_teams) |>
left_join(mls_players |> select(1:4)) |>
left_join(mls_salaries |>
ungroup() |>
select(player_id, team_id, season_name, mlspa_release,
base_salary, guaranteed_compensation)) |>
select(season_name, team_name, player_name, team_short_name, team_abbreviation,
general_position, minutes_played,
base_salary, guaranteed_compensation, goals_added_above_avg, gaaa96, everything()) |>
mutate(player_age = interval(birth_date, mlspa_release) %/% years())
Salary_GA_24 <- Salary_GA |> ungroup() |> filter(!is.na(base_salary), season_name == 2024) |>
mutate(mp_pct = percent_rank(minutes_played),
sal_outlier = base_salary > quantile(base_salary, probs = 0.75, na.rm = FALSE) + 1.5 * IQR(base_salary),
cap_hit = ifelse(base_salary >= 683750, 683750, base_salary)
)
quantile(Salary_GA_24$base_salary, probs = 0.75, na.rm = F) + 1.5 * IQR(Salary_GA_24$base_salary)
Salary_GA_24 |>
mutate(salary_group = case_when(
base_salary > 683750 ~ "$684k+",
base_salary > 350000 & base_salary <= 683750 ~ "$350k - $684k",
base_salary > 100000 & base_salary <= 350000 ~ "$100k - $350k",
.default = " $67k - $100k")) |>
group_by(salary_group) |>
#filter(minutes_played > 450) |>
summarize(n = n())
Salary_GA_24 |>
ungroup() |>
filter(!is.na(base_salary)) |>
mutate(salary_group = case_when(
base_salary > 683750 ~ "$684k+",
base_salary > 350000 & base_salary <= 683750 ~ "$350k - $684k",
base_salary > 100000 & base_salary <= 350000 ~ "$100k - $350k",
.default = " $67k - $100k")) |>
group_by(salary_group) |>
mutate(base_salary_pct = percent_rank(base_salary) * 100) |> ungroup() |>
group_by(general_position) |>
mutate(gaaa96_pct = pnorm(gaaa96, mean = mean(gaaa96), sd = sd(gaaa96)),
sounders = ifelse(team_name == "Charlotte FC", player_name, NA)) |>
ggplot(aes(x = salary_group, y = gaaa96_pct, color = general_position, size = minutes_played)) +
ggbeeswarm::geom_beeswarm() +
ggrepel::geom_label_repel(aes(label = sounders))
Salary_GA_24 |> ungroup() |> filter(minutes_played > 900) |> ggplot() + geom_histogram(aes(x = gaaa96))
Salary_GA_24 |> select(team_name, player_name, minutes_played, gaaa96) |> filter(minutes_played > 900) |>
mutate(GA_Z = (gaaa96 - mean(gaaa96))/sd(gaaa96)) |> View()
Salary_GA_23 |>
ungroup() |>
filter(!is.na(base_salary)) |>
mutate(mp_pct = percent_rank(minutes_played),
sal_outlier = base_salary > quantile(base_salary, probs = 0.75, na.rm = FALSE) + 1.5 * IQR(base_salary),
cap_hit = ifelse(base_salary >= 651250, 651250, base_salary)
) |>
group_by(cap_hit) |> summarize(n = n_distinct(player_id)) |> arrange(cap_hit) |> print(n = 20)
mutate(mean_salary2 = mean(base_salary[sal_outlier == F]),
sd_salary2 = sd(base_salary[sal_outlier == F]),
salary_norm = ifelse(sal_outlier == F,
(base_salary - mean_salary2)/sd_salary2,
10
),
sal_pnorm = pnorm(salary_norm)
) |>
group_by(general_position) |>
mutate(gaaa96_norm = pnorm(gaaa96, mean = mean(gaaa96), sd = sd(gaaa96)),
high_asset = ifelse(sal_outlier == F, "Rest of MLS", "DP/High-Paid TAM")) |>
ggplot(aes(x = sal_pnorm, y = gaaa96_norm, color = high_asset)) + geom_point()
arrange(-salary_norm, -base_salary)
arrange(-sal_sd) |> print(n = 30, scipen = 6)
filter(minutes_played >= 180) |>
group_by(general_position) |>
mutate(gaaa96_mean = mean(gaaa96),
gaaa96_sd = sd(gaaa96),
gaaa96_pct = pnorm(gaaa96, mean = gaaa96_mean, sd = gaaa96_sd),
gaaa96_sigma = (gaaa96-gaaa96_mean)/gaaa96_sd) |>
ggplot(aes(x = sal_pct, y = gaaa96_sigma)) + geom_point()