-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTesteDuarte_V2.py
More file actions
3512 lines (3098 loc) · 322 KB
/
TesteDuarte_V2.py
File metadata and controls
3512 lines (3098 loc) · 322 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Install in case its necessary
# pip install actionable-recourse
# pip install dice_ml
# pip install -U git+https://github.com/joaopfonseca/ml-research.git
# pip install -U recourse-game
# pip install git+https://github.com/joaopfonseca/recourse-game
# pip install shap
# pip install -U git+https://github.com/joaopfonseca/ShaRP.git
# pip install -U git+https://github.com/DataResponsibly/ShaRP
# Imports
import dash
import dice_ml
import math
import matplotlib.pyplot as plt
import mlresearch
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import random
import recgame # Import the recgame library
import requests as rq
import scipy.stats as stats
import seaborn as sns
import sklearn
import streamlit as st
import shap
import warnings
warnings.filterwarnings("ignore")
warnings.filterwarnings("ignore", category=UserWarning, module="matplotlib")
#st.set_option('deprecation.showPyplotGlobalUse', False)
# From other libraries
from dash import dcc, html
from dash.dependencies import Input, Output
from itertools import cycle,islice, product
from io import BytesIO
from math import ceil, pi
from mlresearch.utils import set_matplotlib_style, parallel_loop
from os.path import join
from pandas.plotting import parallel_coordinates
from pathlib import Path
from plotly.subplots import make_subplots
from recgame.recourse import ActionableRecourse, NFeatureRecourse
from recgame.recourse import DiCE, NFeatureRecourse # We will mainly follow DiCE results.
from sharp import ShaRP
#from sharp.qoi import QOI_OBJECTS
from sharp.qoi import get_qoi_names
from sharp.qoi import get_qoi
from sharp.utils import scores_to_ordering
from sharp.utils import check_inputs
from sharp.visualization._waterfall import _waterfall
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.utils import check_random_state
# Setting Visual Theme
set_matplotlib_style(18)
# Set the default serif font
plt.rcParams['font.family'] = 'serif'
###################################################################################################################################################################################################################################################
###################################################################################################################################################################################################################################################
###################################################################################################################################################################################################################################################
# 1. Displaying an image.
st.image("Sports.png")
# 2. Title for the app
st.title("What must a basketball, football or tennis player do to improve their overall ranking?")
# 3. Header
st.subheader("Datasets used in our research:")
# 4. Sub Header
items = ["(1) Basket Team NBA 2022-23 (Regular Season)",
"(2) Basket Player NBA 2022 (Regular Season)",
"(3) Football Team 2023",
"(4) Football Player FIFA 2022",
"(5) ATP Season 2022",
"(6) WTA Season 2023"]
for item in items:
st.write(item)
# 5. Info
st.info("Scroll down to get insights according to your selection. Select your preferences in the left side section and navegate throughout the different tabs. Note that it may take a while to process your filters.")
# 6. Sidebar Part I
st.sidebar.title("SECTIONS")
st.sidebar.header("Personalize your choice:")
Sport = st.sidebar.radio("Sport", ["Basketball", "Football", "Tennis"])
tab_titles = ['I. DiCE: General', 'II. DiCE: Individual Selection' , 'III. SHAP: General', 'IV. SHAP: Individual Selection' , 'V. SHARP', 'VI. Conclusion: DiCE vs SHAP vs SHARP']
tabs = st.tabs(tab_titles)
# Display specific information based on the selected option: Basket.
if Sport == 'Basketball':
# Open a sidebar for additional Basketball options
st.sidebar.subheader("Basketball Options")
# Create a radio button for selecting the type (team or player)
Team_vs_Player = st.sidebar.radio('Type Preference:', ["Team", "Player"])
# Check if the user selects the type as Team
if Team_vs_Player == 'Team':
Team = st.sidebar.selectbox('Select the Team:', ('Atlanta Hawks', 'Boston Celtics', 'Brooklyn Nets', 'Charlotte Hornets', 'Chicago Bulls', 'Cleveland Cavaliers', 'Dallas Mavericks', 'Denver Nuggets', 'Detroit Pistons', 'Golden State Warriors', 'Houston Rockets', 'Indiana Pacers', 'Los Angeles Clippers', 'Los Angeles Lakers', 'Memphis Grizzlies', 'Miami Heat', 'Milwaukee Bucks', 'Minnesota Timberwolves', 'New Orleans Pelicans', 'New York Knicks', 'Oklahoma City Thunder', 'Orlando Magic', 'Philadelphia 76ers', 'Phoenix Suns', 'Portland Trail Blazers', 'Sacramento Kings', 'San Antonio Spurs', 'Toronto Raptors', 'Utah Jazz', 'Washington Wizards'))
# df_summaries | df_totals
df_summaries = pd.read_excel('1_NBA_Team_Stats_Regular_Season_Team_Summaries.xlsx', sheet_name= 'PBC_NBA_1947_Team_Summaries')
df_totals = pd.read_excel('1_NBA_Team_Stats_Regular_Season_Team_Totals.xlsx', sheet_name= 'PBC_NBA_1947_Team_Totals')
df_summaries = df_summaries[df_summaries['season'] == 2023]
df_totals = df_totals[df_totals['season'] == 2023]
df_summaries = df_summaries[df_summaries['team'] != 'League Average']
df_totals = df_totals[df_totals['team'] != 'League Average']
df_summaries.columns = df_summaries.columns.str.replace("%", "perc").str.replace("/", "_").str.replace(" ", "_").str.replace(".", "").str.lower()
df_totals.columns = df_totals.columns.str.replace("%", "perc").str.replace("/", "_").str.replace(" ", "_").str.replace(".", "").str.lower()
df_summaries =df_summaries.drop(columns=["season",
"lg", # Not informative.
"abbreviation", # Already filtered information. Discarted.
"playoffs", # Irrelevant.
"w", # Already filtered information. Discarted.
"l", # Already filtered information. Discarted.
"pw", # It is an immutable feature. It can not be changed.
"pl", # It is an immutable feature. It can not be changed.
"arena", # Purely informative.
"attend", # Not a game related variable.
"attend_g" # Not a game related variable.
]).set_index("team")
df_totals =df_totals.drop(columns=["season",
"lg", # Not informative.
"abbreviation", # Already filtered information. Discarted.
"playoffs", # Irrelevant.
"g", # Already filtered information. Discarted.
"mp", # Already filtered information. Discarted.
"fg", # Informative already reflected on 'fg_percent'.
"fga", # Informative already reflected on 'fg_percent'.
"x3p", # Informative already reflected on 'x3p_percent'.
"x3pa", # Informative already reflected on 'x3p_percent'.
"x2p", # Informative already reflected on 'x2p_percent'.
"x2pa", # Informative already reflected on 'x2p_percent'.
"ft", # Informative already reflected on 'ft_percent'.
"fta", # Informative already reflected on 'ft_percent'.
]).set_index("team")
df_summaries['tov_percent'] = df_summaries['tov_percent'] / 100
df_summaries['orb_percent'] = df_summaries['orb_percent'] / 100
df_summaries['opp_tov_percent'] = df_summaries['opp_tov_percent'] / 100
df_summaries['opp_drb_percent'] = df_summaries['opp_drb_percent'] / 100
df = pd.merge(df_summaries, df_totals, on='team', how='inner')
df = df.sort_values(by='pts', ascending=False)
X = df.drop(columns=["pts"])
y = df.pts / df.pts.max()
# Define the dictionary mapping short names to full names
variable_names = {"age": "Age (Team average age on 1 February 2023)",
"mov": "Margin of Victory",
"sos": "Strength of Schedule",
"srs": "Simple Rating System",
"o_rtg": "Offensive Rating",
"d_rtg": "Defensive Rating",
"n_rtg": "Net Rating",
"pace": "Pace Factor",
"f_tr": "Free Throw Rate",
"x3p_ar": "3-Point Attempt Rate",
"ts_percent": "True Shooting Percentage",
"e_fg_percent": "Effective Field Goal Percentage",
"tov_percent": "Turnover Percentage",
"orb_percent": "Offensive Rebound Percentage",
"ft_fga": "Free Throw Attempt Rate",
"opp_e_fg_percent": "Opponent Effective Field Goal Percentage",
"opp_tov_percent": "Opponent Turnover Percentage",
"opp_drb_percent": "Opponent Defensive Rebound Percentage",
"opp_ft_fga": "Opponent Free Throw Attempt Rate",
"fg_percent": "Field Goal Percentage",
"x3p_percent": "3-Point Percentage",
"x2p_percent": "2-Point Percentage",
"ft_percent": "Free Throw Percentage",
"orb": "Offensive Rebounds",
"drb": "Defensive Rebounds",
"trb": "Total Rebounds",
"ast": "Assists",
"stl": "Steals",
"blk": "Blocks",
"tov": "Turnovers",
"pf": "Personal Fouls"}
# Open a sidebar for a different feature option
Basketball_team_list = list(variable_names.keys()) # Basketball_team_list = X.columns.tolist()
Basketball_team_list_full = list(variable_names.values())
Basketball_team_feature_full_name = st.sidebar.selectbox('Feature in focus:', Basketball_team_list_full)
Basketball_team_feature = [key for key, value in variable_names.items() if value == Basketball_team_feature_full_name][0] # Get the corresponding short name from the dictionary
# Open a sidebar for a different feature option
Decil = st.sidebar.selectbox('Top Ranking (%) you desire to achieve (where 0,05 means top 5%):', ('0.05','0.1', '0.15', '0.2', '0.25', '0.3', '0.35', '0.4', '0.45', '0.5',
'0.55', '0.6', '0.65', '0.7', '0.75', '0.8', '0.85', '0.9'))
#Decil_final = 1 - float(Decil)
Decil_final = round(1 - float(Decil), 2)
Team_2 = st.sidebar.selectbox('Select a Team to compare:', ('Atlanta Hawks', 'Boston Celtics', 'Brooklyn Nets', 'Charlotte Hornets', 'Chicago Bulls', 'Cleveland Cavaliers', 'Dallas Mavericks', 'Denver Nuggets', 'Detroit Pistons', 'Golden State Warriors', 'Houston Rockets', 'Indiana Pacers', 'Los Angeles Clippers', 'Los Angeles Lakers', 'Memphis Grizzlies', 'Miami Heat', 'Milwaukee Bucks', 'Minnesota Timberwolves', 'New Orleans Pelicans', 'New York Knicks', 'Oklahoma City Thunder', 'Orlando Magic', 'Philadelphia 76ers', 'Phoenix Suns', 'Portland Trail Blazers', 'Sacramento Kings', 'San Antonio Spurs', 'Toronto Raptors', 'Utah Jazz', 'Washington Wizards'))
# Opening our datasets
cfs = pd.read_excel(f'cfs_1_{Decil_final}.xlsx')
differences = pd.read_excel(f'differences_1_{Decil_final}.xlsx')
st.write("<div style='height: 650px;'></div>", unsafe_allow_html=True)
#if tabs == "1. General Sport Analysis":
with tabs[0]:
st.markdown("<h4 style='text-align: center;'>Dataset in focus</h1>", unsafe_allow_html=True)
st.write(df)
st.markdown("**Figure 1**: Representation of the DataFrame used. It aggregates all data used in our research.")
st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# Concepts to take into account.
st.info("DICE: method used to generate diverse counterfactual explanations for machine learning models. In simple words, it provides 'what-if' explanations for the model output. 'Counterfactuals' represent the desired values. 'X' represent the initial values. 'Differences' will be lead from now onwards, represent SUGGESTED CHANGES (recommendations) between the counterfactuals and the initial values.")
# 1.1 Preparing future Histogram.
cfs.set_index(cfs.columns[0], inplace=True)
differences.set_index(differences.columns[0], inplace=True)
# Plot bar
Team_differences = differences.loc[Team]
# 2. Heatmap: Insights from SUGGESTED CHANGES
plt.figure(figsize=(10, 10))
sns.heatmap(differences, cmap='coolwarm')
st.markdown("<h4 style='text-align: center;'>Heatmap: Insights from SUGGESTED CHANGES</h1>", unsafe_allow_html=True)
st.pyplot() # Displaying plot in Streamlit
st.markdown("**Figure 2**: Results from DICE. Representation of all the differences obtained in our dataset, per instance. Visual representation of how the features would need to be altered in the counterfactual scenarios compared to the original data to achieve the desired outcomes predicted by the model. Teams (in Y-axis) vs Features (in X-axis), with variations in absolute values: \n - **Positive values** indicate an increase recommendation for that feature; \n - **Negative values** indicate a decrease recommendation for that feature.")
st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# 3. Histograms: Insights from SUGGESTED CHANGES
# Transforming differences into an array.
differences_array = differences.values.flatten()
# Create a histogram.
plt.hist(differences_array, bins=20, edgecolor='black')
plt.xlabel('Differences')
plt.ylabel('Frequency')
st.markdown("<h4 style='text-align: center;'>Histograms: Insights from SUGGESTED CHANGES</h1>", unsafe_allow_html=True)
st.pyplot() # Displaying plot in Streamlit
st.markdown("**Figure 3**: Results from DICE. It helps to understand the the overall pattern and where most of the differences are concentrated in. It indicates the frequency (in absolute values), per each difference value. \n - **Positive values** indicate an increase recommendation for that feature; \n - **Negative values** indicate a decrease recommendation for that feature.")
st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# 4. Violin: Insights from SUGGESTED CHANGES
differences_array = differences.values.flatten()
# # Create a violin plot
# plt.figure(figsize=(8, 6))
# sns.violinplot(y = differences_array, color='skyblue')
# plt.ylabel('Differences')
# st.markdown("<h4 style='text-align: center;'>Violin: Insights from SUGGESTED CHANGES</h1>", unsafe_allow_html=True)
# st.pyplot() # Displaying plot in Streamlit
# st.markdown("**Figure 4**: Results from DICE. Another simple method to interpret **where the majority of the differences** are concentrated. Mostly concentrated around < |0.1|. There is no feature on X-axis.")
# st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# # 5. Density Plot: Insights from SUGGESTED CHANGES
# differences = differences.squeeze() # Ensure it's a Series
# plt.figure(figsize=(10, 10))
# sns.kdeplot(data=differences, shade=True)
# plt.xlabel('(CFS - X)')
# plt.ylabel('Density')
# st.markdown("<h4 style='text-align: center;'>Density Plot: Insights from SUGGESTED CHANGES</h1>", unsafe_allow_html=True)
# st.pyplot() # Displaying plot in Streamlit
# st.markdown("**Figure 5**: Results from DICE. Provides the distribution of **differences per feature**, indicating which ones vary the most and which one vary the least. The closer a feature is to zero, the less it varies.")
# st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# # 6. Radar Chart: Average SUGGESTED CHANGES per feature
categories = list(differences.columns) # Setting categories as a list of all "differences" column.
values = differences.mean().values.tolist() # List of mean differences per feature.
values += values[:1] # Connect the first and the last point of the radar, closing and creating a loop.
angles = [n / float(len(categories)) * 2 * pi for n in range(len(categories))] # Angles for each category.
angles += angles[:1] # Connect the first and the last point, closing creating a loop.
# Plot features.
plt.figure(figsize=(8, 8)) # Setting figure size.
plt.polar(angles, values) # Using polar coordinates.
plt.fill(angles, values, alpha=0.25) # Fill the inside area with a semi-transparent color.
plt.xticks(angles[:-1], categories) # Set the categories as labels.
st.markdown("<h4 style='text-align: center;'>Radar Chart: Average SUGGESTED CHANGES per feature</h1>", unsafe_allow_html=True)
st.pyplot() # Displaying plot in Streamlit
st.markdown("**Figure 6**: Results from DICE. Another method to represent the differences obtained. **The axis defines each difference magnitude per feature.**")
st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# 7. SWARM: Insights from SUGGESTED CHANGES
# sns.swarmplot(data=differences, palette='coolwarm')
# plt.xlabel('Features')
# plt.ylabel('Differences')
# st.markdown("<h4 style='text-align: center;'>SWARM: Insights from SUGGESTED CHANGES</h1>", unsafe_allow_html=True)
# plt.xticks(rotation=90) # Better adjusted the rotation angle so that we can better observe feature names.
# st.pyplot() # Displaying plot in Streamlit
# st.markdown("**Figure 7**: Results from DICE. Last representation of individual differences per feature, with a clear overview on which feature vary the most. **Each point represent a single instance of the dataset**.")
# st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
#else:
with tabs[1]:
# 8. Bar Plot
fig, ax = plt.subplots()
ax.bar(Team_differences.index, Team_differences.values)
plt.xticks(rotation=90) # Adjusting the angle of my axis.
plt.xlabel('Columns')
plt.ylabel('Values')
st.markdown(f"<h4 style='text-align: center;'>Bar Plot for {Team}</h1>", unsafe_allow_html=True)
st.pyplot(fig) # Displaying plot in Streamlit
st.markdown(f"**Figure 8**: Results from DICE for **{Team}**. As described in the previous tab, it provides 'what-if' explanations for the model output, by stating **which features would need to be altered in the counterfactual scenarios** compared to the original data to achieve the desired outcomes predicted by the model. \n - **Positive values** indicate an increase recommendation for that feature; \n - **Negative values** indicate a decrease recommendation for that feature.")
st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# 9. KDE
differences_array = differences[Basketball_team_feature].values
# Create KDE plot
plt.figure(figsize=(8, 6)) # Setting figure size.
sns.kdeplot(differences_array, shade=True)
plt.xlabel('Differences')
plt.ylabel('Density')
st.markdown(f"<h4 style='text-align: center;'>KDE: Insights from SUGGESTED CHANGES for variable {Basketball_team_feature_full_name}</h1>", unsafe_allow_html=True)
st.pyplot() # Displaying plot in Streamlit
st.markdown(f"**Figure 9**: Results from DICE regarding variable **{Basketball_team_feature}**. Provides the distribution of differences across all instances on this specific feature. In case the graph is empty, it means **{Basketball_team_feature} is recommended to change**.")
st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
normalized_data_X = scaler.fit_transform(X)
normalized_data_cfs = scaler.fit_transform(cfs)
normalized_data_differences = scaler.fit_transform(differences)
X_normalized = pd.DataFrame(normalized_data_X, columns=X.columns, index=X.index)
cfs_normalized = pd.DataFrame(normalized_data_cfs, columns=cfs.columns, index=cfs.index)
differences_normalized = pd.DataFrame(normalized_data_differences, columns=differences.columns, index=differences.index)
# 10. Radar (per player) - INITIAL STATE
# Specify the name of the player
selected_player = Team
# # Filter "differences" DataFrame.
# player_X_normalized = X_normalized.loc[selected_player]
# categories = list(player_X_normalized.index) # Setting categories as a list of all "differences" column.
# values = player_X_normalized.values.tolist() # List of mean differences per feature.
# values += values[:1] # Connect the first and the last point of the radar, closing and creating a loop.
# angles = [n / float(len(categories)) * 2 * pi for n in range(len(categories))] # Angles for each category.
# angles += angles[:1] # Connect the first and the last point, closing creating a loop.
# plt.figure(figsize=(8, 8)) # Setting figure size.
# plt.polar(angles, values) # Using polar coordinates.
# plt.fill(angles, values, alpha=0.25) # Fill the inside area with a semi-transparent color.
# plt.xticks(angles[:-1], categories) # Set the categories as labels.
# st.markdown(f"<h4 style='text-align: center;'>INITIAL STATE: Values for {selected_player}</h1>", unsafe_allow_html=True)
# st.pyplot() # Displaying plot in Streamlit
# st.markdown(f"**Figure 10**: 'Radar' chart gives us a visual understanding of the current importance, per feature, **on {selected_player}**. Provides insights on which features are **currently contributing the most** for the actual model output.")
# st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# # 11. Radar (per player) - SUGGESTED CHANGES
# # Specify the name of the player
# selected_player = Team
# # Filter "differences" DataFrame.
# player_differences = differences.loc[selected_player]
# categories = list(player_differences.index) # Setting categories as a list of all "differences" column.
# values = player_differences.values.tolist() # List of mean differences per feature.
# values += values[:1] # Connect the first and the last point of the radar, closing and creating a loop.
# angles = [n / float(len(categories)) * 2 * pi for n in range(len(categories))] # Angles for each category.
# angles += angles[:1] # Connect the first and the last point, closing creating a loop.
# plt.figure(figsize=(8, 8)) # Setting figure size.
# plt.polar(angles, values) # Using polar coordinates.
# plt.fill(angles, values, alpha=0.25) # Fill the inside area with a semi-transparent color.
# plt.xticks(angles[:-1], categories) # Set the categories as labels.
# st.markdown(f"<h4 style='text-align: center;'>SUGGESTED CHANGES: Mean Differences for {selected_player}</h1>", unsafe_allow_html=True)
# st.pyplot() # Displaying plot in Streamlit
# st.markdown(f"**Figure 11**: 'Radar' chart gives us a closer look at the differences, per feature, **on {selected_player}**. Provides insights on which features should **contribute more and less** in order to achieve the desired model output.")
# st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# # 12. Radar (per player) - SUGGESTED CHANGES - Normalized.
# # Specify the name of the player
# selected_player = Team
# # Filter "differences" DataFrame.
# player_differences_normalized = differences_normalized.loc[selected_player]
# categories = list(player_differences_normalized.index) # Setting categories as a list of all "differences" column.
# values = player_differences_normalized.values.tolist() # List of mean differences per feature.
# values += values[:1] # Connect the first and the last point of the radar, closing and creating a loop.
# angles = [n / float(len(categories)) * 2 * pi for n in range(len(categories))] # Angles for each category.
# angles += angles[:1] # Connect the first and the last point, closing creating a loop.
# plt.figure(figsize=(8, 8)) # Setting figure size.
# plt.polar(angles, values) # Using polar coordinates.
# plt.fill(angles, values, alpha=0.25) # Fill the inside area with a semi-transparent color.
# plt.xticks(angles[:-1], categories) # Set the categories as labels.
# st.markdown(f"<h4 style='text-align: center;'>SUGGESTED CHANGES: Mean Differences for {selected_player} - Normalized</h1>", unsafe_allow_html=True)
# st.pyplot() # Displaying plot in Streamlit
# st.markdown(f"**Figure 12**: 'Radar' chart gives us a closer look at the differences, per feature, **on {selected_player}**. Similar to the previous visualization, but with values normalized.")
# st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# # 13. Radar (per player) - RECOMMENDED STATE
# # Specify the name of the player
# selected_player = Team
# # Filter "differences" DataFrame.
# player_cfs_normalized = cfs_normalized.loc[selected_player]
# categories = list(player_cfs_normalized.index) # Setting categories as a list of all "differences" column.
# values = player_cfs_normalized.values.tolist() # List of mean differences per feature.
# values += values[:1] # Connect the first and the last point of the radar, closing and creating a loop.
# angles = [n / float(len(categories)) * 2 * pi for n in range(len(categories))] # Angles for each category.
# angles += angles[:1] # Connect the first and the last point, closing creating a loop.
# plt.figure(figsize=(8, 8)) # Setting figure size.
# plt.polar(angles, values) # Using polar coordinates.
# plt.fill(angles, values, alpha=0.25) # Fill the inside area with a semi-transparent color.
# plt.xticks(angles[:-1], categories) # Set the categories as labels.
# st.markdown(f"<h4 style='text-align: center;'>RECOMMENDED STATE: Values for {selected_player}</h1>", unsafe_allow_html=True)
# st.pyplot() # Displaying plot in Streamlit
# st.markdown(f"**Figure 13**: ''Radar' chart gives us a visual understanding of the desired importance, per feature, **on {selected_player}**. Provides insights on which features should **in the future contributing the most** to achieve the desired model output.")
# st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# # 14. Radar (per player) - INITIAL and RECOMMENDED STATE overlapped
# Specify the name of the player.
selected_player = Team
# Filter the differences "DataFrame" for the selected player.
player_cfs_normalized = cfs_normalized.loc[selected_player]
player_values_cfs = player_cfs_normalized.values.tolist()
player_values_cfs += player_values_cfs[:1]
player_X_normalized = X_normalized.loc[selected_player]
player_values_X = player_X_normalized.values.tolist()
player_values_X += player_values_X[:1]
# Changing angles and categories.
categories = list(player_cfs_normalized.index)
angles = [n / float(len(categories)) * 2 * pi for n in range(len(categories))]
angles += angles[:1]
# Plot for 'cfs', that represent the desired values.
plt.figure(figsize=(8, 8))
plt.polar(angles, player_values_cfs, label='recommended', color='blue')
plt.fill(angles, player_values_cfs, alpha=0.25, color='blue')
# Plot for 'X', that represent the initial values.
plt.polar(angles, player_values_X, label='initial', color='green')
plt.fill(angles, player_values_X, alpha=0.25, color='green')
plt.xticks(angles[:-1], categories)
st.markdown(f"<h4 style='text-align: center;'>INITIAL STATE and RECOMMENDED STATE: for {selected_player} - NORMALIZED</h1>", unsafe_allow_html=True)
plt.legend()
st.pyplot() # Displaying plot in Streamlit
st.markdown(f"**Figure 14**: To obtain clear insights, we overlapped previous **INITIAL** and **RECOMMENDADED STATES** visualizations. Recapping: \n - **Blue line** represent **DESIRED** feature values (Counterfactuals); \n - **Green line** represent **INITIAL** feature values.")
st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
#else:
with tabs[2]:
# Concepts to take into account
st.info("SHAP: (SHapley Additive exPlanations) can be defined as a game theoretic approach to explain the output of a machine learning model. It explains the impact and the importance of each feature on model output/predictions for a specific instance. \n It provides a more interpretable view of the model's behavior and these values can be used to gain insights on which factors mostly influence specific predictions. \n Looks at the average value and give us information.")
# 15. SHAP Bar Plot
lr = LinearRegression(fit_intercept=False).fit(X, y)
explainer = shap.Explainer(lr, X)
shap_values = explainer(X)
#st.markdown(f"<h4 style='text-align: center;'>SHAP Bar Plot</h1>", unsafe_allow_html=True)
#st.set_option('deprecation.showPyplotGlobalUse', False)
#shap.plots.bar(shap_values, max_display=15)
#st.pyplot()
#st.markdown("**Figure 15**: Overview of the impact of **each feature on the model output/predictions**. It represents the **mean absolute value of each feature** for the overall dataset. \n - **The higher the SHAP Value mean**, the **higher its feature importance**.")
#st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# # 16. SHAP Beeswarm Plot
st.markdown(f"<h4 style='text-align: center;'>SHAP Beeswarm Plot</h1>", unsafe_allow_html=True)
#st.set_option('deprecation.showPyplotGlobalUse', False)
shap.plots.beeswarm(shap_values, max_display=15)
st.pyplot()
st.markdown("**Figure 16**: Beeswarm Plot summarizes what are the **most relevant features** impact model output. Each instance is represented at the graph by a single point. The plot below sorts features by their SHAP value magnitudes. \n - In the X-axis, **positive SHAP values represent a positive impact** from the feature to the model output (positive SHAP values means that that feature contribute positively to its model outcome) (Features whose variance contribute positively to the player overall improvement have positive absolute values); \n - In the X-axis, **negative SHAP values represent a negative impact** from the feature to the model output (negative SHAP values means that that feature contributely negatively to its model outcome)(Features whose variance contribute negatively to the player overall improvement have negative absolute values); \n - **The red color code** for a specific instance, means that it a value above the dataset average for that specific feature; \n - **The blue color code** for a specific instance, means that it a value bellow the dataset average for that specific feature.")
st.markdown("For example, for features with mostly blue dot at the right side of the graph, it means that the lower the feature value, the higher it tends to be the outcome.")
st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
#else:
with tabs[3]:
# 17. Scatter Plot
basketball_team_index_feature = Basketball_team_list.index(Basketball_team_feature)
st.markdown(f"<h4 style='text-align: center;'>SHAP Scatter Plot for feature {Basketball_team_feature_full_name}</h1>", unsafe_allow_html=True)
#st.set_option('deprecation.showPyplotGlobalUse', False)
shap.plots.scatter(shap_values[:, basketball_team_index_feature])
st.pyplot()
st.markdown(f"**Figure 17**: Scatter plot on feature **{Basketball_team_feature_full_name}**, which shows its effect on model predictions. Each point represents an instance from the dataset. \n - **X-axis** represents the feature input value; \n - **y-axis** represents the SHAP values for {Basketball_team_feature_full_name} feature, which means **'how much must {Basketball_team_feature_full_name} change the model output value'**; \n - **The gray area** represents, through an histogram, dataset distribution for **{Basketball_team_feature_full_name}**.")
st.markdown(f"This means that, for positive SHAP values, **{Basketball_team_feature_full_name} must impact positively** the model output, while for negative SHAP values, **{Basketball_team_feature_full_name} must impact negatively** the model output.")
st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# # 18. SHAP Partial Dependence Plot
# st.markdown(f"<h4 style='text-align: center;'>SHAP Partial Dependence Plot for feature {Basketball_team_feature_full_name}</h1>", unsafe_allow_html=True)
# st.set_option('deprecation.showPyplotGlobalUse', False)
# shap.partial_dependence_plot(
# Basketball_team_feature, lr.predict, X, ice=False,
# model_expected_value=True, feature_expected_value=True)
# st.pyplot()
# st.markdown(f"**Figure 18**: Model's dependence on the feature {Basketball_team_feature_full_name}, now in the new original feature space (X). It explains **how SHAP values of {Basketball_team_feature_full_name} vary across a dataset** and how changes in the {Basketball_team_feature_full_name} values impact model's predictions. \n - **X-axis** represents SHAP values for the {Basketball_team_feature_full_name} feature; \n - **Y-axis** represents the variation per player; \n - **Gray horizontal line** represents the final expected value for the model; \n - **Gray vertical line** represents {Basketball_team_feature_full_name} average value; \n - **The blue line with positive slope** represents the model average value when we define **{Basketball_team_feature_full_name}** as a certain value;")
# st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# 19. SHAP Waterfall Plot
X_indexes = X.index.tolist()
basketball_team_index_player = X_indexes.index(Team)
st.markdown(f"<h4 style='text-align: center;'>SHAP Waterfall Plot for {Team}</h1>", unsafe_allow_html=True)
#st.set_option('deprecation.showPyplotGlobalUse', False)
shap.plots.waterfall(shap_values[basketball_team_index_player], max_display=15)
st.pyplot()
st.markdown(f"**Figure 19**: Waterfall plot attempts to explain the predictions for {Team}, instead of, as in the previous two graphs, focusing on feature {Basketball_team_feature_full_name}. In the X-axis, we have information of the entire model expected output value. The color code, along with its respective magnitude indication, inform if: \n - The **red features** are pushing the **prediction higher**; \n - The **blue features** are pushing the **prediction lower**; \n - The **gray values** before the feature name, indicate each feature value for **{Team}**; \n - The **gray value** on top of the graph, indicates the model prediction for **{Team}**.")
st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
#else:
with tabs[4]:
# # Concepts to take into account
# st.info("SHARP: (SHapley for Rankings and Preferences), a framework that attemps to explain the contribution of features to different decils of an output in 'a ranking format' and can be base either on ShaPley or Unary values (we used the last one). According to recent studies, ShaRP claims that the weght of each feature does not correspond to its ShaPley value contribution (analyzed on tabs 3 and 4). Researches appoint that it depends on feature distribution (varying according to the decil in focus) and to local interactions between scoring features. ShaRP, derived from Quantitative Input Influence framework, can contribute to explain score-based and ranking type models.")
# # 20. SHARP: Rank vs Score
# import os
# st.markdown(f"<h4 style='text-align: center;'>SHARP: Rank vs Score</h1>", unsafe_allow_html=True)
# st.image("Rank_vs_Score_(1) Basket Team.png")
# st.markdown("**Figure 20**: Relationship between Score and Rank. Score function, which provides a certain weight to each variable in the dataset, was defined by us, acccording to our knowledge of the sport. We tend to see an **inverse relationship between Score and Rank**.")
# st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# # 21. SHARP: Top and Bottom 3 Individuals
# st.markdown(f"<h4 style='text-align: center;'>SHARP: Top and Bottom 3 Individuals</h1>", unsafe_allow_html=True)
# st.image("Top_bottom_feature_importance_1.png")
# st.markdown("**Figure 21**: Top 3 and Bottom 3 instances with their respective aggregate feature importance, providing insights on which are the most and the **least relevant features for their ranking**. For example: \n - Features with a **high positive values among the top 3**, means that it was a **key feature** for these instances to achieve this **high/good ranking**; \n - Features with a **considerable negative values among the bottom 3**, means that it was a **key feature** for these instances to achieve this **low/bad ranking;** ")
# st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# # 22. SHARP: Feature Importance
# st.markdown(f"<h4 style='text-align: center;'>SHARP: Feature Importance</h1>", unsafe_allow_html=True)
# st.image("Strata_boxplot_1.png")
# st.markdown("**Figure 22**: Visualization on how feature importance varies **across strata (different decil categories)**. \n - There are 5 decil categories, represented at the bottom of the graph. \n - All the features are legended at the top of the graph. \n - At the left side of the graph, we have indication of the importance magnitude. \n - Each feature importance is distributed thorugh a boxplot, indicating us Q1, Q2 (median) and Q3. The higher the position of the boxplot, **the higher the relevancy of that specific feature in that decil**. \n - **The longer the boxplot**, the **more different importances that feature acquire** in the dataset.")
# st.markdown("We highly recommend you to open the figure (at the top right corner of the figure) and zoom it, so that you can have a better understanding of the main insights.")
# st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# 23. Unary values in focus
def scorer(dataset, columns=None):
X, _ = check_inputs(dataset)
# Define weights for each column
weights = [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
0.2, 0.2, 0.2, 0.2, 0.2,
0.2, 0.2, 0.2, 0.2,
0.2, 0.2, 0.2, 0.2, 0.2,
0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2]
# Calculate the weighted sum for each row
weighted_sum = np.sum(X * weights, axis=1)
return weighted_sum
X_sharp = X
X_sharp_np = X_sharp.values
y = scorer(X_sharp_np)
xai = ShaRP(
qoi="rank",
target_function=scorer,
measure="unary",
sample_size=None, # sample_size=None,
replace=False,
random_state=42,
verbose=1,
n_jobs=-1)
xai.fit(X_sharp)
st.markdown(f"<h4 style='text-align: center;'>Unary values in focus</h1>", unsafe_allow_html=True)
unary_values = pd.read_csv("cs_rankqoi_unary_values_(1)_basket_team.csv")
unary_values.set_index(unary_values.columns[0], inplace=True)
unary_values_player = unary_values.loc[Team].drop(["Score", "Ranking"])
st.write(unary_values) #st.write(unary_values_player), if we want to filter by the player we chose.
st.markdown("**Figure 23**: Representation of all Unary Values computed and used in our research.")
st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# 24. SHARP: Waterfall
st.markdown(f"<h4 style='text-align: center;'>SHARP: Waterfall Plot</h1>", unsafe_allow_html=True)
#st.set_option('deprecation.showPyplotGlobalUse', False)
rank_dict = {
"upper_bounds": None,
"lower_bounds": None,
"features": None,
"data": None,
"base_values": 0,
"feature_names": unary_values_player.index.tolist(),
"values": unary_values_player}
_waterfall(rank_dict, max_display=15)
st.pyplot()
st.markdown(f"**Figure 24**: Waterfall plot for the selected {Team}. Similarly to SHAP Waterfall, it attempts to explain {Team} ranking. In the X-axis, we have information of the entire model expected output value. The color code, along with its respective magnitude indication, inform if: \n - The **red features** are pushing the **prediction higher**; \n - The **blue features** are pushing the **prediction lower**; \n - The **gray values** before the feature name, indicate each feature value for **{Team}**; \n - The **gray value** on top of the graph, indicates the model prediction for **{Team}**.")
st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# 25. SHARP: Pairwise Comparison
st.markdown(f"<h4 style='text-align: center;'>SHARP: Pairwise Comparison</h1>", unsafe_allow_html=True)
#st.set_option('deprecation.showPyplotGlobalUse', False)
X_sharp = X
X_sharp_np = X_sharp.values
y = scorer(X_sharp_np)
values = xai.pairwise(
X_sharp.loc[Team].values,
X_sharp.loc[Team_2].values)
fig, ax = plt.subplots(1, 1, figsize=(8,8))
pairwise_bars = xai.plot.bar(values, ax=ax)
ax.set_ylabel("Contribution to Rank")
ax.set_xlabel("")
plt.xticks(rotation=90)
st.pyplot()
st.markdown(f"**Figure 25**: Pairwise comparison between {Team} and {Team_2}. It provides insights on which variables mostly contribute and which variables mostly harm each one. \n - **Positive values** for a certain feature, means that it **favors {Team} instead of {Team_2}**. \n - **Negative values** for a certain feature, means that it **favors {Team_2} instead of {Team}**.")
st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
#else:
with tabs[5]:
# Extracting values per feature
all_values = []
# Combine values from the main plots for each feature (DiCE, SHAP and SHARP).
for feature in rank_dict["feature_names"]:
feature_values = {"Feature": feature}
if feature in Team_differences.index: # Get value from Plot 1 (DiCE: Player_differences)
feature_values["Team_differences"] = Team_differences[feature]
else:
feature_values["Team_differences"] = None
if feature in rank_dict["feature_names"]: # Get value from Plot 2 (SHAP values)
shap_index = rank_dict["feature_names"].index(feature)
feature_values["SHAP_values"] = shap_values[basketball_team_index_player].values[shap_index]
else:
feature_values["SHAP_values"] = None
if feature in rank_dict["feature_names"]: # Get value from Plot 3 (SHARP: rank_dict)
rank_index = rank_dict["feature_names"].index(feature)
feature_values["Rank_dict_values"] = rank_dict["values"][rank_index]
else:
feature_values["Rank_dict_values"] = None
# Append to the list of all values
all_values.append(feature_values)
# 26. DiCE vs SHAP vs SHARP: Comparing Methods
# Convert to DataFrame and displaying the table.
st.markdown(f"<h4 style='text-align: center;'>DiCE vs SHAP vs SHARP: Comparing Methods</h1>", unsafe_allow_html=True)
#st.set_option('deprecation.showPyplotGlobalUse', False)
df_values_2 = pd.DataFrame(all_values)
df_values_2.set_index('Feature', inplace=True)
df_values_2.columns = ["DiCE Counterfactuals", "SHAP Values", "SHARP Values"] # Renaming columns. Replacing Rank Dict Values for SHARP Values.
# Highlight the top largest and smallestvalues per column
light_green = 'background-color: rgb(144, 238, 144)' # Light green.
light_red = 'background-color: rgba(255, 99, 71, 0.5)' # Light red color (with transparency)
# Highlight the top 3 values in a Series green.
def highlight_top1(s):
top1 = s.nlargest(1)
bottom1 = s.nsmallest(1)
is_top1 = s.isin(top1)
is_bottom1 = s.isin(bottom1)
colors = []
#return [light_green if v else '' for v in is_top1]
for v in s:
if v in top1.values:
colors.append(light_green)
elif v in bottom1.values:
colors.append(light_red)
else:
colors.append('')
return colors
# Apply the highlight_top3 function to the DataFrame and displaying it
df_styled_2 = df_values_2.style.apply(highlight_top1)
st.dataframe(df_styled_2, width=900)
st.markdown(f"**Figure 26**: Table aggregating the main insights from DiCE, SHAP and SHARP applied to {Team} and according to the selected decil. \n - **In green** is represent the highest positive value. \n - **In red** is represent the lowest negative value. \n - Note that highest DiCE values does not necessary mean worse features. DiCE can impact either the best features or the worst features. But overall, the lowest the player ranking, the higher amount tend to be the player average DiCE values.")
st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# 27. DiCE vs SHAP vs SHARP: Comparing Methods Graphically
st.markdown(f"<h4 style='text-align: center;'>DiCE vs SHAP vs SHARP: Comparing Methods Graphically</h1>", unsafe_allow_html=True)
#st.set_option('deprecation.showPyplotGlobalUse', False)
st.line_chart(df_values_2, width=800, height=600)
st.markdown(f"**Figure 27**: Graphic representation of the previous table.")
st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# 28. Create a Statistics DataFrame
st.markdown(f"<h4 style='text-align: center;'>DiCE vs SHAP vs SHARP: Statistics Comparison</h1>", unsafe_allow_html=True)
average_abs_values = df_values_2.abs().mean() # Calculate the average of the absolute values for each column
variance_values = df_values_2.var() # Calculate the variance for each column
diff_max_min_values = df_values_2.max() - df_values_2.min() # Calculate the difference between the maximum and minimum values for each column
df_stats = pd.DataFrame({
'Average Absolute Value': average_abs_values,
'Variance': variance_values,
'Max-Min Difference (Amplitude)': diff_max_min_values})
st.dataframe(df_stats, width=900)
st.markdown(f"**Figure 28**: Table aggregating the average values, the variance and the amplitude from DiCE, SHAP and SHARP applied to {Team} and according to the selected decil.")
st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# 29. DiCE vs SHAP vs SHARP: Correlation Matrix
st.markdown(f"<h4 style='text-align: center;'>DiCE vs SHAP vs SHARP: Correlation Matrix</h1>", unsafe_allow_html=True)
#st.set_option('deprecation.showPyplotGlobalUse', False)
correlation_matrix = df_values_2.corr()
st.write(correlation_matrix)
st.markdown(f"**Figure 29**: Correlation matrix between DiCE, SHAP and SHARP applied to {Team} and according to the selected decil. \n - **Positive values** represent a direct relationship, meaning that features increase and decrese together; \n - **Negative values** represent an indirect relationship, meaning when one of the methods increases, the other decreases; \n - **The highest the absolute value**, the most relevant the feature is.")
st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# Check if the user selects the type as Player
elif Team_vs_Player == 'Player':
Player = st.sidebar.selectbox('Select the Player:', ('Aaron Gordon', 'Aaron Nesmith', 'Aaron Wiggins', 'AJ Griffin', 'Al Horford', 'Alec Burks', 'Aleksej Pokusevski', 'Alex Caruso', 'Alperen Þengün', 'Andrew Nembhard', 'Andrew Wiggins', 'Anfernee Simons', 'Anthony Davis', 'Anthony Edwards', 'Anthony Lamb', 'Austin Reaves', 'Austin Rivers', 'Ayo Dosunmu', 'Bam Adebayo', 'Ben Simmons', 'Bennedict Mathurin', 'Blake Wesley', 'Bobby Portis', 'Bogdan Bogdanovi?', 'Bojan Bogdanovi?', 'Bol Bol', 'Bones Hyland', 'Bradley Beal', 'Brandon Clarke', 'Brandon Ingram', 'Brook Lopez', 'Bruce Brown', 'Bryce McGowens', 'Buddy Hield', 'Cade Cunningham', 'Caleb Houstan', 'Caleb Martin', 'Cam Reddish', 'Cam Thomas', 'Cameron Johnson', 'Cameron Payne', 'Caris LeVert', 'Cedi Osman', 'Chance Comanche', 'Chris Boucher', 'Chris Duarte', 'Chris Paul', 'Christian Braun', 'Christian Wood', 'Chuma Okeke', 'CJ McCollum', 'Clint Capela', 'Coby White', 'Cody Martin', 'Cole Anthony', 'Collin Sexton', 'Corey Kispert', 'Cory Joseph', 'Daishen Nix', 'Damian Lillard', 'Damion Lee', 'Daniel Gafford', 'Daniel Theis', 'Darius Garland', 'David Roddy', 'Davion Mitchell', 'Dean Wade', 'Deandre Ayton', 'Dejounte Murray', 'Delon Wright', 'DeMar DeRozan', 'Deni Avdija', 'Dennis Schröder', 'Dennis Smith Jr.', 'Derrick White', 'Desmond Bane', 'Devin Booker', 'Devin Vassell', 'Dillon Brooks', 'Domantas Sabonis', 'Donovan Mitchell', 'Donte DiVincenzo', 'Dorian Finney-Smith', 'Doug McDermott', 'Draymond Green', 'Drew Eubanks', 'Duncan Robinson', 'Dwight Powell', 'Dyson Daniels', 'Eric Gordon', 'Eugene Omoruyi', 'Evan Fournier', 'Evan Mobley', 'Franz Wagner', 'Fred VanVleet', 'Gabe Vincent', 'Gabe York', 'Gary Harris', 'Gary Payton II', 'Gary Trent Jr.', 'George Hill', 'Georges Niang', 'Giannis Antetokounmpo', 'Gordon Hayward', 'Grant Williams', 'Grayson Allen', 'Hamidou Diallo', 'Harrison Barnes', 'Haywood Highsmith', 'Herbert Jones', 'Immanuel Quickley', 'Isaac Okoro', 'Isaiah Hartenstein', 'Isaiah Jackson', 'Isaiah Joe', 'Isaiah Livers', 'Isaiah Stewart', 'Ish Wainright', 'Ivica Zubac', 'Ja Morant', 'Jabari Smith Jr.', 'Jacob Gilyard', 'Jaden Ivey', 'Jaden McDaniels', 'Jae Crowder', 'Jakob Poeltl', 'Jalen Brunson', 'Jalen Duren', 'Jalen Green', 'Jalen McDaniels', 'Jalen Smith', 'Jalen Suggs', 'Jalen Williams', 'Jamal Murray', 'James Bouknight', 'James Harden', 'James Wiseman', 'Jaren Jackson Jr.', 'Jarred Vanderbilt', 'Jarrett Allen', 'Jaylen Brown', 'Jaylen Nowell', 'Jaylin Williams', 'Jayson Tatum', 'Jeenathan Williams', 'Jeff Green', 'Jerami Grant', 'Jeremiah Robinson-Earl', 'Jeremy Sochan', 'Jericho Sims', 'Jevon Carter', 'Jimmy Butler', 'Joe Harris', 'Joe Ingles', 'Joel Embiid', 'John Collins', 'John Konchar', 'John Wall', 'Johnny Davis', 'Jonas Valan?i?nas', 'Jonathan Kuminga', 'Jordan Clarkson', 'Jordan Goodwin', 'Jordan McLaughlin', 'Jordan Nwora', 'Jordan Poole', 'Jose Alvarado', 'Josh Giddey', 'Josh Green', 'Josh Hart', 'Josh Okogie', 'Josh Richardson', 'Joshua Primo', 'Jrue Holiday', 'Julius Randle', 'Justin Holiday', 'Justin Minaya', 'Justise Winslow', 'Jusuf Nurki?', 'Karl-Anthony Towns', 'Kawhi Leonard', 'Keegan Murray', 'Keita Bates-Diop', 'Keldon Johnson', 'Kelly Olynyk', 'Kelly Oubre Jr.', 'Kemba Walker', 'Kenrich Williams', 'Kentavious Caldwell-Pope', 'Kenyon Martin Jr.', 'Kevin Durant', 'Kevin Huerter', 'Kevin Knox', 'Kevin Love', 'Kevin Porter Jr.', 'Kevon Looney', 'Khris Middleton', 'Killian Hayes', 'Klay Thompson', 'Kris Dunn', 'Kristaps Porzi??is', 'Kyle Anderson', 'Kyle Kuzma', 'Kyle Lowry', 'Kyrie Irving', 'Lamar Stevens', 'LaMelo Ball', 'Landry Shamet', 'Larry Nance Jr.', 'Lauri Markkanen', 'LeBron James', 'Lonnie Walker IV', 'Louis King', 'Luguentz Dort', 'Luka Don?i?', 'Luka Šamani?', 'Luke Kennard', 'Mac McClung', 'Malaki Branham', 'Malcolm Brogdon', 'Malik Beasley', 'Malik Monk', 'Marcus Morris', 'Marcus Smart', 'Mark Williams', 'Markelle Fultz', 'Marvin Bagley III', 'Mason Plumlee', 'Matisse Thybulle', 'Max Strus', 'Maxi Kleber', 'Michael Porter Jr.', 'Mikal Bridges', 'Mike Conley', 'Mike Muscala', 'Mitchell Robinson', 'Monte Morris', 'Moritz Wagner', 'Myles Turner', 'Naji Marshall', 'Nassir Little', 'Naz Reid', 'Nic Claxton', 'Nick Richards', 'Nickeil Alexander-Walker', 'Nicolas Batum', 'Nikola Joki?', 'Nikola Vu?evi?', 'Norman Powell', 'Obi Toppin', 'Ochai Agbaji', 'OG Anunoby', 'Onyeka Okongwu', 'Oshae Brissett', 'Otto Porter Jr.', 'P.J. Tucker', 'P.J. Washington', 'Paolo Banchero', 'Pascal Siakam', 'Pat Connaughton', 'Patrick Beverley', 'Patrick Williams', 'Paul George', 'Precious Achiuwa', 'Quentin Grimes', 'R.J. Hampton', 'RaiQuan Gray', 'Reggie Bullock', 'Reggie Jackson', 'Ricky Rubio', 'RJ Barrett', 'Robert Covington', 'Robert Williams', 'Rodney McGruder', 'Romeo Langford', 'Rudy Gobert', 'Rui Hachimura', 'Russell Westbrook', 'Saddiq Bey', 'Sam Hauser', 'Sandro Mamukelashvili', 'Santi Aldama', 'Scottie Barnes', 'Seth Curry', 'Shaedon Sharpe', 'Shai Gilgeous-Alexander', 'Shake Milton', 'Shaquille Harrison', 'Skylar Mays', 'Spencer Dinwiddie', 'Stanley Johnson', 'Stephen Curry', 'Steven Adams', 'T.J. McConnell', 'T.J. Warren', 'Talen Horton-Tucker', 'Tari Eason', 'Taurean Prince', 'Terance Mann', 'Terrence Ross', 'Terry Rozier', 'Théo Maledon', 'Thomas Bryant', 'Tim Hardaway Jr.', 'Tobias Harris', 'Torrey Craig', 'Trae Young', 'Tre Jones', 'Tre Mann', 'Trendon Watford', 'Trey Lyles', 'Trey Murphy III', 'Troy Brown Jr.', 'Ty Jerome', 'Tyler Herro', 'Tyrese Haliburton', 'Tyrese Maxey', 'Tyus Jones', 'Victor Oladipo', 'Walker Kessler', 'Wendell Carter Jr.', 'Wenyen Gabriel', 'Wesley Matthews', 'Will Barton', 'Xavier Tillman Sr.', 'Yuta Watanabe', 'Zach Collins', 'Zach LaVine', 'Ziaire Williams', 'Zion Williamson'))
# df
df = pd.read_excel('2_NBA_Player_Stats_Regular_Season_2022_2023.xlsx', sheet_name= 'PBC 2022_23 NBA Player Stat')
df.info()
df_duplicate = df[df.duplicated(subset="Player", keep=False)]
df_duplicate = df_duplicate[df_duplicate['Tm']=='TOT']
df_double_duplicate = df_duplicate[df_duplicate.duplicated(subset="Player", keep=False)]
df = df[~df['Player'].duplicated(keep=False)]
#df = df.append(df_duplicate, ignore_index=True)
df = pd.concat([df, df_duplicate], ignore_index=True)
df = df[df['MP'] > 15]
df.columns = df.columns.str.replace("%", "_perc").str.replace("/", "_").str.replace(" ", "_").str.replace(".", "").str.lower()
df = df.drop(columns=["rk", # Not informative.
"pos", # Already existed information.
"tm" # Not informative.
]).set_index("player")
# Eliminating redundant columns.
df = df.drop(columns=["fg", # Information already present in fg_perc.
"fga", # Information already present in fg_perc.
"3p", # Information already present in 3p_perc.
"3pa", # Information already present in 3p_perc.
"2p", # Information already present in 2p_perc.
"2pa", # Information already present in 2p_perc.
"ft", # Information already present in ft_perc.
"fta" # Information already present in ft_perc.
])
df = df.sort_values(by='pts', ascending=False)
X = df.drop(columns=["pts"])
y = df.pts / df.pts.max()
# Define the dictionary mapping short names to full names
variable_names = {
"age": "Player's age",
"g": "Games played",
"gs": "Games started",
"mp": "Minutes played per game",
"fg_perc": "Field goal percentage",
"3p_perc": "3-point field goal percentage",
"2p_perc": "2-point field goal percentage",
"efg_perc": "Effective field goal percentage",
"ft_perc": "Free throw percentage",
"orb": "Offensive rebounds per game",
"drb": "Defensive rebounds per game",
"trb": "Total rebounds per game",
"ast": "Assists per game",
"stl": "Steals per game",
"blk": "Blocks per game",
"tov": "Turnovers per game",
"pf": "Personal fouls per game"}
# Open a sidebar for a different feature option
Basketball_player_list = list(variable_names.keys()) # Basketball_player_list = X.columns.tolist()
Basketball_player_list_full = list(variable_names.values())
Basketball_player_feature_full_name = st.sidebar.selectbox('Feature in focus:', Basketball_player_list_full)
Basketball_player_feature = [key for key, value in variable_names.items() if value == Basketball_player_feature_full_name][0] # Get the corresponding short name from the dictionary
# Open a sidebar for a different feature option
Decil = st.sidebar.selectbox('Top Ranking (%) you desire to achieve (where 0,05 means top 5%):', ('0.05','0.1', '0.15', '0.2', '0.25', '0.3', '0.35', '0.4', '0.45', '0.5',
'0.55', '0.6', '0.65', '0.7', '0.75', '0.8', '0.85', '0.9'))
#Decil_final = 1 - float(Decil)
Decil_final = round(1 - float(Decil), 2)
Player_2 = st.sidebar.selectbox('Select a Team to compare:', ('Aaron Gordon', 'Aaron Nesmith', 'Aaron Wiggins', 'AJ Griffin', 'Al Horford', 'Alec Burks', 'Aleksej Pokusevski', 'Alex Caruso', 'Alperen Þengün', 'Andrew Nembhard', 'Andrew Wiggins', 'Anfernee Simons', 'Anthony Davis', 'Anthony Edwards', 'Anthony Lamb', 'Austin Reaves', 'Austin Rivers', 'Ayo Dosunmu', 'Bam Adebayo', 'Ben Simmons', 'Bennedict Mathurin', 'Blake Wesley', 'Bobby Portis', 'Bogdan Bogdanovi?', 'Bojan Bogdanovi?', 'Bol Bol', 'Bones Hyland', 'Bradley Beal', 'Brandon Clarke', 'Brandon Ingram', 'Brook Lopez', 'Bruce Brown', 'Bryce McGowens', 'Buddy Hield', 'Cade Cunningham', 'Caleb Houstan', 'Caleb Martin', 'Cam Reddish', 'Cam Thomas', 'Cameron Johnson', 'Cameron Payne', 'Caris LeVert', 'Cedi Osman', 'Chance Comanche', 'Chris Boucher', 'Chris Duarte', 'Chris Paul', 'Christian Braun', 'Christian Wood', 'Chuma Okeke', 'CJ McCollum', 'Clint Capela', 'Coby White', 'Cody Martin', 'Cole Anthony', 'Collin Sexton', 'Corey Kispert', 'Cory Joseph', 'Daishen Nix', 'Damian Lillard', 'Damion Lee', 'Daniel Gafford', 'Daniel Theis', 'Darius Garland', 'David Roddy', 'Davion Mitchell', 'Dean Wade', 'Deandre Ayton', 'Dejounte Murray', 'Delon Wright', 'DeMar DeRozan', 'Deni Avdija', 'Dennis Schröder', 'Dennis Smith Jr.', 'Derrick White', 'Desmond Bane', 'Devin Booker', 'Devin Vassell', 'Dillon Brooks', 'Domantas Sabonis', 'Donovan Mitchell', 'Donte DiVincenzo', 'Dorian Finney-Smith', 'Doug McDermott', 'Draymond Green', 'Drew Eubanks', 'Duncan Robinson', 'Dwight Powell', 'Dyson Daniels', 'Eric Gordon', 'Eugene Omoruyi', 'Evan Fournier', 'Evan Mobley', 'Franz Wagner', 'Fred VanVleet', 'Gabe Vincent', 'Gabe York', 'Gary Harris', 'Gary Payton II', 'Gary Trent Jr.', 'George Hill', 'Georges Niang', 'Giannis Antetokounmpo', 'Gordon Hayward', 'Grant Williams', 'Grayson Allen', 'Hamidou Diallo', 'Harrison Barnes', 'Haywood Highsmith', 'Herbert Jones', 'Immanuel Quickley', 'Isaac Okoro', 'Isaiah Hartenstein', 'Isaiah Jackson', 'Isaiah Joe', 'Isaiah Livers', 'Isaiah Stewart', 'Ish Wainright', 'Ivica Zubac', 'Ja Morant', 'Jabari Smith Jr.', 'Jacob Gilyard', 'Jaden Ivey', 'Jaden McDaniels', 'Jae Crowder', 'Jakob Poeltl', 'Jalen Brunson', 'Jalen Duren', 'Jalen Green', 'Jalen McDaniels', 'Jalen Smith', 'Jalen Suggs', 'Jalen Williams', 'Jamal Murray', 'James Bouknight', 'James Harden', 'James Wiseman', 'Jaren Jackson Jr.', 'Jarred Vanderbilt', 'Jarrett Allen', 'Jaylen Brown', 'Jaylen Nowell', 'Jaylin Williams', 'Jayson Tatum', 'Jeenathan Williams', 'Jeff Green', 'Jerami Grant', 'Jeremiah Robinson-Earl', 'Jeremy Sochan', 'Jericho Sims', 'Jevon Carter', 'Jimmy Butler', 'Joe Harris', 'Joe Ingles', 'Joel Embiid', 'John Collins', 'John Konchar', 'John Wall', 'Johnny Davis', 'Jonas Valan?i?nas', 'Jonathan Kuminga', 'Jordan Clarkson', 'Jordan Goodwin', 'Jordan McLaughlin', 'Jordan Nwora', 'Jordan Poole', 'Jose Alvarado', 'Josh Giddey', 'Josh Green', 'Josh Hart', 'Josh Okogie', 'Josh Richardson', 'Joshua Primo', 'Jrue Holiday', 'Julius Randle', 'Justin Holiday', 'Justin Minaya', 'Justise Winslow', 'Jusuf Nurki?', 'Karl-Anthony Towns', 'Kawhi Leonard', 'Keegan Murray', 'Keita Bates-Diop', 'Keldon Johnson', 'Kelly Olynyk', 'Kelly Oubre Jr.', 'Kemba Walker', 'Kenrich Williams', 'Kentavious Caldwell-Pope', 'Kenyon Martin Jr.', 'Kevin Durant', 'Kevin Huerter', 'Kevin Knox', 'Kevin Love', 'Kevin Porter Jr.', 'Kevon Looney', 'Khris Middleton', 'Killian Hayes', 'Klay Thompson', 'Kris Dunn', 'Kristaps Porzi??is', 'Kyle Anderson', 'Kyle Kuzma', 'Kyle Lowry', 'Kyrie Irving', 'Lamar Stevens', 'LaMelo Ball', 'Landry Shamet', 'Larry Nance Jr.', 'Lauri Markkanen', 'LeBron James', 'Lonnie Walker IV', 'Louis King', 'Luguentz Dort', 'Luka Don?i?', 'Luka Šamani?', 'Luke Kennard', 'Mac McClung', 'Malaki Branham', 'Malcolm Brogdon', 'Malik Beasley', 'Malik Monk', 'Marcus Morris', 'Marcus Smart', 'Mark Williams', 'Markelle Fultz', 'Marvin Bagley III', 'Mason Plumlee', 'Matisse Thybulle', 'Max Strus', 'Maxi Kleber', 'Michael Porter Jr.', 'Mikal Bridges', 'Mike Conley', 'Mike Muscala', 'Mitchell Robinson', 'Monte Morris', 'Moritz Wagner', 'Myles Turner', 'Naji Marshall', 'Nassir Little', 'Naz Reid', 'Nic Claxton', 'Nick Richards', 'Nickeil Alexander-Walker', 'Nicolas Batum', 'Nikola Joki?', 'Nikola Vu?evi?', 'Norman Powell', 'Obi Toppin', 'Ochai Agbaji', 'OG Anunoby', 'Onyeka Okongwu', 'Oshae Brissett', 'Otto Porter Jr.', 'P.J. Tucker', 'P.J. Washington', 'Paolo Banchero', 'Pascal Siakam', 'Pat Connaughton', 'Patrick Beverley', 'Patrick Williams', 'Paul George', 'Precious Achiuwa', 'Quentin Grimes', 'R.J. Hampton', 'RaiQuan Gray', 'Reggie Bullock', 'Reggie Jackson', 'Ricky Rubio', 'RJ Barrett', 'Robert Covington', 'Robert Williams', 'Rodney McGruder', 'Romeo Langford', 'Rudy Gobert', 'Rui Hachimura', 'Russell Westbrook', 'Saddiq Bey', 'Sam Hauser', 'Sandro Mamukelashvili', 'Santi Aldama', 'Scottie Barnes', 'Seth Curry', 'Shaedon Sharpe', 'Shai Gilgeous-Alexander', 'Shake Milton', 'Shaquille Harrison', 'Skylar Mays', 'Spencer Dinwiddie', 'Stanley Johnson', 'Stephen Curry', 'Steven Adams', 'T.J. McConnell', 'T.J. Warren', 'Talen Horton-Tucker', 'Tari Eason', 'Taurean Prince', 'Terance Mann', 'Terrence Ross', 'Terry Rozier', 'Théo Maledon', 'Thomas Bryant', 'Tim Hardaway Jr.', 'Tobias Harris', 'Torrey Craig', 'Trae Young', 'Tre Jones', 'Tre Mann', 'Trendon Watford', 'Trey Lyles', 'Trey Murphy III', 'Troy Brown Jr.', 'Ty Jerome', 'Tyler Herro', 'Tyrese Haliburton', 'Tyrese Maxey', 'Tyus Jones', 'Victor Oladipo', 'Walker Kessler', 'Wendell Carter Jr.', 'Wenyen Gabriel', 'Wesley Matthews', 'Will Barton', 'Xavier Tillman Sr.', 'Yuta Watanabe', 'Zach Collins', 'Zach LaVine', 'Ziaire Williams', 'Zion Williamson'))
# Opening our datasets
cfs = pd.read_excel(f'cfs_2_{Decil_final}.xlsx')
differences = pd.read_excel(f'differences_2_{Decil_final}.xlsx')
st.write("<div style='height: 650px;'></div>", unsafe_allow_html=True)
#if tabs == "1. General Sport Analysis":
with tabs[0]:
# st.markdown("<h4 style='text-align: center;'>Dataset in focus</h1>", unsafe_allow_html=True)
# st.write(df)
# st.markdown("**Figure 1**: Representation of the DataFrame used. It aggregates all data used in our research.")
# st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# Concepts to take into account
st.info("DICE: method used to generate diverse counterfactual explanations for machine learning models. In simple words, it provides 'what-if' explanations for the model output. 'Counterfactuals' represent the desired values. 'X' represent the initial values. 'Differences' will be lead from now onwards, represent SUGGESTED CHANGES (recommendations) between the counterfactuals and the initial values.")
# 1.1 Preparing future Histogram.
cfs.set_index(cfs.columns[0], inplace=True)
differences.set_index(differences.columns[0], inplace=True)
# Plot bar
Player_differences = differences.loc[Player]
# # 2. Heatmap: Insights from SUGGESTED CHANGES
plt.figure(figsize=(10, 10))
sns.heatmap(differences, cmap='coolwarm')
st.markdown("<h4 style='text-align: center;'>Heatmap: Insights from SUGGESTED CHANGES</h1>", unsafe_allow_html=True)
st.pyplot() # Displaying plot in Streamlit
st.markdown("**Figure 2**: Results from DICE. Representation of all the differences obtained in our dataset, per instance. Visual representation of how the features would need to be altered in the counterfactual scenarios compared to the original data to achieve the desired outcomes predicted by the model. Players (in Y-axis) vs Features (in X-axis), with variations in absolute values: \n - **Positive values** indicate an increase recommendation for that feature; \n - **Negative values** indicate a decrease recommendation for that feature.")
st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# 3. Histograms: Insights from SUGGESTED CHANGES
# Transforming differences into an array.
differences_array = differences.values.flatten()
# # Create a histogram.
# plt.hist(differences_array, bins=20, edgecolor='black')
# plt.xlabel('Differences')
# plt.ylabel('Frequency')
# st.markdown("<h4 style='text-align: center;'>Histograms: Insights from SUGGESTED CHANGES</h1>", unsafe_allow_html=True)
# st.pyplot() # Displaying plot in Streamlit
# st.markdown("**Figure 3**: Results from DICE. It helps to understand the the overall pattern and where most of the differences are concentrated in. It indicates the frequency (in absolute values), per each difference value. \n - **Positive values** indicate an increase recommendation for that feature; \n - **Negative values** indicate a decrease recommendation for that feature.")
# st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# # 4. Violin: Insights from SUGGESTED CHANGES
# differences_array = differences.values.flatten()
# # Create a violin plot
# plt.figure(figsize=(8, 6)) # Setting figure size.
# sns.violinplot(y = differences_array, color='skyblue')
# plt.ylabel('Differences')
# st.markdown("<h4 style='text-align: center;'>Violin: Insights from SUGGESTED CHANGES</h1>", unsafe_allow_html=True)
# st.pyplot() # Displaying plot in Streamlit
# st.markdown("**Figure 4**: Results from DICE. Another simple method to interpret **where the majority of the differences** are concentrated. Mostly concentrated around < |0.1|. There is no feature on X-axis.")
# st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# # 5. Density Plot: Insights from SUGGESTED CHANGES
# differences = differences.squeeze() # Ensure it's a Series
# plt.figure(figsize=(10, 10))
# sns.kdeplot(data=differences, shade=True)
# plt.xlabel('(CFS - X)')
# plt.ylabel('Density')
# st.markdown("<h4 style='text-align: center;'>Density Plot: Insights from SUGGESTED CHANGES</h1>", unsafe_allow_html=True)
# st.pyplot() # Displaying plot in Streamlit
# st.markdown("**Figure 5**: Results from DICE. Provides the distribution of **differences per feature**, indicating which ones vary the most and which one vary the least. The closer a feature is to zero, the less it varies.")
# st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# # 6. Radar Chart: Average SUGGESTED CHANGES per feature
categories = list(differences.columns) # Setting categories as a list of all "differences" column.
values = differences.mean().values.tolist() # List of mean differences per feature.
values += values[:1] # Connect the first and the last point of the radar, closing and creating a loop.
angles = [n / float(len(categories)) * 2 * pi for n in range(len(categories))] # Angles for each category.
angles += angles[:1] # Connect the first and the last point, closing creating a loop.
# Plot features.
plt.figure(figsize=(8, 8)) # Setting figure size.
plt.polar(angles, values) # Using polar coordinates.
plt.fill(angles, values, alpha=0.25) # Fill the inside area with a semi-transparent color.
plt.xticks(angles[:-1], categories) # Set the categories as labels.
st.markdown("<h4 style='text-align: center;'>Radar Chart: Average SUGGESTED CHANGES per feature</h1>", unsafe_allow_html=True)
st.pyplot() # Displaying plot in Streamlit
st.markdown("**Figure 6**: Results from DICE. Another method to represent the differences obtained. **The axis defines each difference magnitude per feature.**")
st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# 7. SWARM: Insights from SUGGESTED CHANGES
# sns.swarmplot(data=differences, palette='coolwarm')
# plt.xlabel('Features')
# plt.ylabel('Differences')
# st.markdown("<h4 style='text-align: center;'>SWARM: Insights from SUGGESTED CHANGES</h1>", unsafe_allow_html=True)
# plt.xticks(rotation=90) # Better adjusted the rotation angle so that we can better observe feature names.
# st.pyplot() # Displaying plot in Streamlit
# st.markdown("**Figure 7**: Results from DICE. Last representation of individual differences per feature, with a clear overview on which feature vary the most. **Each point represent a single instance of the dataset**.")
# st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
#else:
with tabs[1]:
# 8. Bar Plot
fig, ax = plt.subplots()
ax.bar(Player_differences.index, Player_differences.values)
plt.xticks(rotation=90) # Adjusting the angle of my axis.
plt.xlabel('Columns')
plt.ylabel('Values')
st.markdown(f"<h4 style='text-align: center;'>Bar Plot for {Player}</h1>", unsafe_allow_html=True)
st.pyplot(fig) # Displaying plot in Streamlit
st.markdown(f"**Figure 8**: Results from DICE for **{Player}**. As described in the previous tab, it provides 'what-if' explanations for the model output, by stating **which features would need to be altered in the counterfactual scenarios** compared to the original data to achieve the desired outcomes predicted by the model. \n - **Positive values** indicate an increase recommendation for that feature; \n - **Negative values** indicate a decrease recommendation for that feature.")
st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# 9. KDE
differences_array = differences[Basketball_player_feature].values
# # Create KDE plot
# plt.figure(figsize=(8, 6)) # Setting figure size.
# sns.kdeplot(differences_array, shade=True)
# plt.xlabel('Differences')
# plt.ylabel('Density')
# st.markdown(f"<h4 style='text-align: center;'>KDE: Insights from SUGGESTED CHANGES for variable {Basketball_player_feature_full_name}</h1>", unsafe_allow_html=True)
# st.pyplot() # Displaying plot in Streamlit
# st.markdown(f"**Figure 9**: Results from DICE regarding variable **{Basketball_player_feature}**. Provides the distribution of differences across all instances on this specific feature. In case the graph is empty, it means **{Basketball_player_feature} is recommended to change**.")
# st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
normalized_data_X = scaler.fit_transform(X)
normalized_data_cfs = scaler.fit_transform(cfs)
normalized_data_differences = scaler.fit_transform(differences)
X_normalized = pd.DataFrame(normalized_data_X, columns=X.columns, index=X.index)
cfs_normalized = pd.DataFrame(normalized_data_cfs, columns=cfs.columns, index=cfs.index)
differences_normalized = pd.DataFrame(normalized_data_differences, columns=differences.columns, index=differences.index)
# # 10. Radar (per player) - INITIAL STATE
# # Specify the name of the player
# selected_player = Player
# # Filter "differences" DataFrame.
# player_X_normalized = X_normalized.loc[selected_player]
# categories = list(player_X_normalized.index) # Setting categories as a list of all "differences" column.
# values = player_X_normalized.values.tolist() # List of mean differences per feature.
# values += values[:1] # Connect the first and the last point of the radar, closing and creating a loop.
# angles = [n / float(len(categories)) * 2 * pi for n in range(len(categories))] # Angles for each category.
# angles += angles[:1] # Connect the first and the last point, closing creating a loop.
# plt.figure(figsize=(8, 8)) # Setting figure size.
# plt.polar(angles, values) # Using polar coordinates.
# plt.fill(angles, values, alpha=0.25) # Fill the inside area with a semi-transparent color.
# plt.xticks(angles[:-1], categories) # Set the categories as labels.
# st.markdown(f"<h4 style='text-align: center;'>INITIAL STATE: Values for {selected_player}</h1>", unsafe_allow_html=True)
# st.pyplot() # Displaying plot in Streamlit
# st.markdown(f"**Figure 10**: 'Radar' chart gives us a visual understanding of the current importance, per feature, **on {selected_player}**. Provides insights on which features are **currently contributing the most** for the actual model output.")
# st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# # 11. Radar (per player) - SUGGESTED CHANGES
# # Specify the name of the player
# selected_player = Player
# # Filter "differences" DataFrame.
# player_differences = differences.loc[selected_player]
# categories = list(player_differences.index) # Setting categories as a list of all "differences" column.
# values = player_differences.values.tolist() # List of mean differences per feature.
# values += values[:1] # Connect the first and the last point of the radar, closing and creating a loop.
# angles = [n / float(len(categories)) * 2 * pi for n in range(len(categories))] # Angles for each category.
# angles += angles[:1] # Connect the first and the last point, closing creating a loop.
# plt.figure(figsize=(8, 8)) # Setting figure size.
# plt.polar(angles, values) # Using polar coordinates.
# plt.fill(angles, values, alpha=0.25) # Fill the inside area with a semi-transparent color.
# plt.xticks(angles[:-1], categories) # Set the categories as labels.
# st.markdown(f"<h4 style='text-align: center;'>SUGGESTED CHANGES: Mean Differences for {selected_player}</h1>", unsafe_allow_html=True)
# st.pyplot() # Displaying plot in Streamlit
# st.markdown(f"**Figure 11**: 'Radar' chart gives us a closer look at the differences, per feature, **on {selected_player}**. Provides insights on which features should **contribute more and less** in order to achieve the desired model output.")
# st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# # 12. Radar (per player) - SUGGESTED CHANGES - Normalized.
# # Specify the name of the player
# selected_player = Player
# # Filter "differences" DataFrame.
# player_differences_normalized = differences_normalized.loc[selected_player]
# categories = list(player_differences_normalized.index) # Setting categories as a list of all "differences" column.
# values = player_differences_normalized.values.tolist() # List of mean differences per feature.
# values += values[:1] # Connect the first and the last point of the radar, closing and creating a loop.
# angles = [n / float(len(categories)) * 2 * pi for n in range(len(categories))] # Angles for each category.
# angles += angles[:1] # Connect the first and the last point, closing creating a loop.
# plt.figure(figsize=(8, 8)) # Setting figure size.
# plt.polar(angles, values) # Using polar coordinates.
# plt.fill(angles, values, alpha=0.25) # Fill the inside area with a semi-transparent color.
# plt.xticks(angles[:-1], categories) # Set the categories as labels.
# st.markdown(f"<h4 style='text-align: center;'>SUGGESTED CHANGES: Mean Differences for {selected_player} - Normalized</h1>", unsafe_allow_html=True)
# st.pyplot() # Displaying plot in Streamlit
# st.markdown(f"**Figure 12**: 'Radar' chart gives us a closer look at the differences, per feature, **on {selected_player}**. Similar to the previous visualization, but with values normalized.")
# st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# # 13. Radar (per player) - RECOMMENDED STATE
# # Specify the name of the player
# selected_player = Player
# # Filter "differences" DataFrame.
# player_cfs_normalized = cfs_normalized.loc[selected_player]
# categories = list(player_cfs_normalized.index) # Setting categories as a list of all "differences" column.
# values = player_cfs_normalized.values.tolist() # List of mean differences per feature.
# values += values[:1] # Connect the first and the last point of the radar, closing and creating a loop.
# angles = [n / float(len(categories)) * 2 * pi for n in range(len(categories))] # Angles for each category.
# angles += angles[:1] # Connect the first and the last point, closing creating a loop.
# plt.figure(figsize=(8, 8)) # Setting figure size.
# plt.polar(angles, values) # Using polar coordinates.
# plt.fill(angles, values, alpha=0.25) # Fill the inside area with a semi-transparent color.
# plt.xticks(angles[:-1], categories) # Set the categories as labels.
# st.markdown(f"<h4 style='text-align: center;'>RECOMMENDED STATE: Values for {selected_player}</h1>", unsafe_allow_html=True)
# st.pyplot() # Displaying plot in Streamlit
# st.markdown(f"**Figure 13**: ''Radar' chart gives us a visual understanding of the desired importance, per feature, **on {selected_player}**. Provides insights on which features should **in the future contributing the most** to achieve the desired model output.")
# st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# # 14. Radar (per player) - INITIAL and RECOMMENDED STATE overlapped
# Specify the name of the player.
selected_player = Player
# Filter the differences "DataFrame" for the selected player.
player_cfs_normalized = cfs_normalized.loc[selected_player]
player_values_cfs = player_cfs_normalized.values.tolist()
player_values_cfs += player_values_cfs[:1]
player_X_normalized = X_normalized.loc[selected_player]
player_values_X = player_X_normalized.values.tolist()
player_values_X += player_values_X[:1]
# Changing angles and categories.
categories = list(player_cfs_normalized.index)
angles = [n / float(len(categories)) * 2 * pi for n in range(len(categories))]
angles += angles[:1]
# Plot for 'cfs', that represent the desired values.
plt.figure(figsize=(8, 8))
plt.polar(angles, player_values_cfs, label='recommended', color='blue')
plt.fill(angles, player_values_cfs, alpha=0.25, color='blue')
# Plot for 'X', that represent the initial values.
plt.polar(angles, player_values_X, label='initial', color='green')
plt.fill(angles, player_values_X, alpha=0.25, color='green')
plt.xticks(angles[:-1], categories)
st.markdown(f"<h4 style='text-align: center;'>INITIAL STATE and RECOMMENDED STATE: for {selected_player} - NORMALIZED</h1>", unsafe_allow_html=True)
plt.legend()
st.pyplot() # Displaying plot in Streamlit
st.markdown(f"**Figure 14**: To obtain clear insights, we overlapped previous **INITIAL** and **RECOMMENDADED STATES** visualizations. Recapping: \n - **Blue line** represent **DESIRED** feature values (Counterfactuals); \n - **Green line** represent **INITIAL** feature values.")
st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
#else:
with tabs[2]:
# Concepts to take into account
st.info("SHAP: (SHapley Additive exPlanations) can be defined as a game theoretic approach to explain the output of a machine learning model. It explains the impact and the importance of each feature on model output/predictions for a specific instance. \n It provides a more interpretable view of the model's behavior and these values can be used to gain insights on which factors mostly influence specific predictions. \n Looks at the average value and give us information.")
# 15. SHAP Bar Plot
lr = LinearRegression(fit_intercept=False).fit(X, y)
explainer = shap.Explainer(lr, X)
shap_values = explainer(X)
# st.markdown(f"<h4 style='text-align: center;'>SHAP Bar Plot</h1>", unsafe_allow_html=True)
# st.set_option('deprecation.showPyplotGlobalUse', False)
# shap.plots.bar(shap_values, max_display=15)
# st.pyplot()
# st.markdown("**Figure 15**: Overview of the impact of **each feature on the model output/predictions**. It represents the **mean absolute value of each feature** for the overall dataset. \n - **The higher the SHAP Value mean**, the **higher its feature importance**.")
# st.write("<div style='height: 150px;'></div>", unsafe_allow_html=True)
# # 16. SHAP Beeswarm Plot