-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgithub_graph_test.py
More file actions
79 lines (64 loc) · 2.04 KB
/
github_graph_test.py
File metadata and controls
79 lines (64 loc) · 2.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import pandas as pd
import matplotlib.pyplot as plt
user_to_org = {
'Peter Borissow': 'Kartographia',
'Jordan.Dobson': 'Culmen',
'JohnKim': 'Culmen',
'pborissow': 'Kartographia',
'Peter.Borissow': 'Kartographia',
'Logan Mohseni': 'Culmen',
'Peter': 'Kartographia',
'Kenneth': 'Kartographia',
'Jan': 'Kartographia',
'Jordan': 'Culmen',
'jhkcm0219': 'Culmen',
'sashatrubetskoy': 'Kartographia',
'Erik Raith': 'Kartographia',
'Velazquez': 'Azimuth1',
'swordysrepo': 'Kartographia',
'Sasha Trubetskoy': 'Kartographia',
'Ryan V': 'Azimuth1',
'Amanda W': 'Azimuth1',
'Sasha T': 'Kartographia',
}
user_to_user = {
'Peter Borissow': 'Peter Borissow',
'Jordan.Dobson': 'Jordan Dobson',
'JohnKim': 'John Kim',
'pborissow': 'Peter Borissow',
'Peter.Borissow': 'Peter Borissow',
'Logan Mohseni': 'Logan Mohseni',
'Peter': 'Peter Borissow',
'Kenneth': 'Ken McQuade',
'Jan': 'Jan',
'Jordan': 'Jordan Dobson',
'jhkcm0219': 'John Kim',
'sashatrubetskoy': 'Sasha Trubetskoy',
'Erik Raith': 'Erik Raith',
'Velazquez': 'Ryan Velazquez',
'swordysrepo': 'Ken McQuade',
'Sasha Trubetskoy': 'Sasha Trubetskoy',
'Ryan V': 'Ryan Velazquez',
'Amanda W': 'Amanda West',
'Sasha T': 'Sasha Trubetskoy',
}
df = pd.read_csv('allCommits.csv')
df['org'] = df['username'].map(user_to_org)
df['user'] = df['username'].map(user_to_user)
df = df[df['num_lines_changed'] < 10000]
df.to_csv('allCommits-users-orgs.csv')
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values('date')
# df = df[df['date']>'2021-02-01']
GROUPCOL = 'user'
cumlines = df.groupby(GROUPCOL)['num_lines_changed'].cumsum()
df['cum_lines_changed'] = cumlines
print(df)
print(df.sort_values('num_lines_changed')[['date', 'username', 'num_lines_changed']])
fig, ax = plt.subplots()
for user_name, user_df in df.groupby(GROUPCOL):
user_df[user_name] = user_df['cum_lines_changed']
user_df.plot('date', user_name, ax=ax)
plt.grid()
plt.title('Cumulative lines changed')
plt.show()