-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathUKPlayers.py
More file actions
150 lines (128 loc) · 5.44 KB
/
UKPlayers.py
File metadata and controls
150 lines (128 loc) · 5.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import geopandas as gpd
import os
import matplotlib.pyplot as plt
import matplotlib.pylab as pl
from matplotlib.ticker import LogFormatter
from matplotlib.ticker import ScalarFormatter
import matplotlib.colors as colors
# set the filepath and load in a shapefile
fp = 'geoassets/Areas.shp'
all_df = gpd.read_file(fp)
# Now the map is sorted out, add the data
import pandas as pd
import numpy as np
df = pd.read_csv("data/Questionaire_Responces.csv")
del df['UserID']
del df['UserNo']
#del df['Started']
del df['Ended']
del df['Q1']
del df['Q4']
del df['Q5']
del df['Q6']
df['Started'] = pd.to_datetime(df['Started'], dayfirst=True)
df = df.set_index(['Started'])
df['Q3.1'] = df['Q3.1. 0 - 7 years old'].replace('-','0').replace(np.nan,0).astype(int)
df['Q3.2'] = df['Q3.2. 8 - 14 years old'].replace('-','0').replace(np.nan,0).astype(int)
df['Q3.3'] = df['Q3.3. 15 - 19 years old'].replace('-','0').replace(np.nan,0).astype(int)
df['Q3.4'] = df['Q3.4. 20 +'].replace('-','0').replace(np.nan,0).astype(int)
df['total'] = df['Q3.1']+df['Q3.2']+df['Q3.3']+df['Q3.4']
df = df[df['UK'] == 1.0]
df['name'] = df['Q2'].str.split('[0-9]').str[0].str.upper()
clean_df = df.copy()
clean_df = clean_df.sort_index()
start_date = pd.to_datetime('2020-6-1')
end_date = pd.to_datetime('2022-6-9')
# set the range, start date set before to pick up any early ones where plotting is problematic
df = clean_df.loc[start_date:end_date]
# seperate personal and institutional data
df_ind = df[df['total']<=5]
df_big = df[df['total']>5]
# aggregate the data by postcode region
df_ind = df_ind.groupby('name').agg({'Q3.1':['sum'],'Q3.2':['sum'],'Q3.3':['sum'],'Q3.4':['sum'],'total':['sum']})
df_big = df_big.groupby('name').agg({'Q3.1':['sum'],'Q3.2':['sum'],'Q3.3':['sum'],'Q3.4':['sum'],'total':['sum']})
df_ind.columns = df_ind.columns.get_level_values(0)
df_big.columns = df_big.columns.get_level_values(0)
print(df_ind)
#merge the 2 datasets with map data
df_ind = pd.merge(all_df, df_ind, on=['name'], how='left')
df_big = pd.merge(all_df, df_big, on=['name'], how='left')
# create figure and axes for Matplotlib
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 8), dpi=300)
# create maps
plt1 = df_ind.plot(cmap='viridis_r',
linewidth=0.5,
ax=ax1,
edgecolor='0.0',
column=('total'),
missing_kwds={"color": "lightgrey",
"edgecolor": "black",
"label": "Missing values",
},
norm=colors.LogNorm(vmin=df_ind['total'].min(),
vmax=df_ind['total'].max()),
)
plt2 = df_big.plot(cmap='viridis_r',
linewidth=0.5,
ax=ax2,
edgecolor='0.0',
column=('total'),
missing_kwds={"color": "lightgrey",
"edgecolor": "black",
"label": "Missing values",
},
norm=colors.LogNorm(vmin=df_big['total'].min(),
vmax=df_big['total'].max()),
)
# Now we can customise and add annotations
# remove the axis
ax1.axis('off')
ax2.axis('off')
# add a title
fig.suptitle(f'Boardgame players in the United Kingdom\nfrom {start_date} to {end_date}', fontsize=16)
ax1.set_title('Personal', \
fontdict={'fontsize': '14'})
ax2.set_title('Institutional', \
fontdict={'fontsize': '14'})
# Create colorbar as a legend
formatter = ScalarFormatter()
sm1 = plt.cm.ScalarMappable(cmap='viridis_r',
norm=colors.LogNorm(vmin=df_ind['total'].min(),
vmax=df_ind['total'].max()),
)
sm1._A = []
cbar1 = fig.colorbar(sm1, ax=ax1, shrink=0.4, format=formatter, ticks=[1,2,5,10,20,50,100,200,500,1000,2000])
sm2 = plt.cm.ScalarMappable(cmap='viridis_r',
norm=colors.LogNorm(vmin=df_big['total'].min(),
vmax=df_big['total'].max())
)
sm2._A = []
cbar2 = fig.colorbar(sm2, ax=ax2, shrink=0.4, format=formatter, ticks=[1,2,5,10,20,50,100,200,500,1000,2000])
# this will save the figure as a high-res png. you can also save as svg
fig.savefig(f'figures/UKPlayers.png', dpi=300)
# get back to the original datasets for plotting dat a over time
# seperate personal and institutional data
df_ind = df[df['total']<=5]
df_big = df[df['total']>5]
# for the individual data, make a plot of total vrs time
plt.rcParams["figure.dpi"] = 300
plt.clf()
plt.cla()
title = "Personal players over time in the UK"
df_ind_sum = df_ind['total'].sort_index().cumsum()
print(df_ind_sum)
ax = df_ind_sum.plot(kind='line', x='date', rot=90, stacked=True, colormap=pl.cm.viridis, figsize=(8, 6), title=title)
ax.set_ylabel("Number of People")
fig = ax.get_figure()
fig.tight_layout()
fig.savefig('figures/TotalUKPlayersOverTime.png', dpi=300)
# for the group data, make a plot of total vrs time
plt.rcParams["figure.dpi"] = 300
plt.clf()
plt.cla()
title = "Institutional players over time in the UK"
ax = df_big['total'].sort_index().cumsum().plot(kind='line', x='date', rot=90, stacked=True, colormap=pl.cm.viridis, figsize=(8, 6), title=title)
ax.set_ylabel("Number of People")
fig = ax.get_figure()
fig.tight_layout()
fig.savefig('figures/TotalUKGroupsOverTime.png', dpi=300)