-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathNotebookDataClock.py
More file actions
230 lines (185 loc) · 10.2 KB
/
NotebookDataClock.py
File metadata and controls
230 lines (185 loc) · 10.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from matplotlib.cm import get_cmap
from matplotlib.markers import MarkerStyle
import ipywidgets as widgets
from IPython.display import display, FileLink
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import DBSCAN
from datetime import datetime
# File upload widget
file_upload = widgets.FileUpload(
accept='.csv', # Accept only CSV files
multiple=False # Allow only a single file upload
)
upload_button = widgets.Button(description="Process File")
download_button = widgets.Button(description="Download Image")
output = widgets.Output()
# Configuration panel widgets
eps_slider = widgets.FloatSlider(value=0.4, min=0.1, max=2.0, step=0.05, description="DBSCAN eps:")
min_samples_slider = widgets.IntSlider(value=5, min=1, max=20, step=1, description="Min Samples:")
marker_size_slider = widgets.IntSlider(value=200, min=50, max=500, step=10, description="Marker Size:")
color_map_dropdown = widgets.Dropdown(options=['tab10', 'viridis', 'plasma', 'inferno', 'coolwarm'],
value='tab10', description="Color Map:")
save_path_text = widgets.Text(value='polar_plot_with_dbscan_clusters.png', description="Save Path:")
# Add dropdowns for selecting columns
timestamp_column_dropdown = widgets.Dropdown(description="Timestamp Column", options=[])
event_type_column_dropdown = widgets.Dropdown(description="Event Type Column", options=[])
# Display configuration panel and widgets
config_panel = widgets.VBox([
eps_slider,
min_samples_slider,
marker_size_slider,
color_map_dropdown,
timestamp_column_dropdown,
event_type_column_dropdown,
save_path_text
])
# Display widgets
display(widgets.VBox([file_upload, upload_button, config_panel, download_button, output]))
# Placeholder for the file name of the saved plot
saved_plot_path = "polar_plot_with_dbscan_clusters.png"
def process_file(change):
global saved_plot_path
with output:
output.clear_output() # Clear previous output
if not file_upload.value:
print("Please upload a file first.")
return
file_content = file_upload.value[0]['content']
# Create a temporary file to read the CSV
with open('temp.csv', 'wb') as f:
f.write(file_content)
# Read the CSV file into a DataFrame
data = pd.read_csv('temp.csv')
# Remove the temporary file
os.remove('temp.csv')
print("File uploaded and processed successfully!")
# Update dropdown options based on columns in the CSV
timestamp_column_dropdown.options = data.columns.tolist()
event_type_column_dropdown.options = data.columns.tolist()
# Ensure the user selects the correct columns
if not timestamp_column_dropdown.value or not event_type_column_dropdown.value:
print("Please select both the Timestamp and Event Type columns.")
return
# Extract the selected columns
Timestamps = data[timestamp_column_dropdown.value]
EventTypes = data[event_type_column_dropdown.value]
# Generate unique markers and colors for each Event type
unique_Event_types = EventTypes.unique()
cmap = plt.colormaps[color_map_dropdown.value] # Using selected color map
markers = ['o', 's', '^', 'D', 'P', '*', 'X'] # Predefined markers
Event_styles = {Event: (markers[i % len(markers)], cmap(i / len(unique_Event_types))) for i, Event in enumerate(unique_Event_types)}
# Prepare data for clustering (polar coordinates)
polar_coordinates = []
timestamps_parsed = []
for t in range(len(Timestamps)):
timestamp = Timestamps[t]
try:
# Try parsing the timestamp
Date = timestamp.split(' ')[0]
Time = timestamp.split(' ')[1]
Day = int(Date.split('-')[-1])
Hour = int(Time.split(':')[0]) + (int(Time.split(':')[1]) / 60)
# Convert to polar coordinates (angle in radians and distance for day)
angle = np.pi / 12 * Hour # Hour to angle (radians)
radial_distance = Day # Day as the radial distance
polar_coordinates.append([angle, radial_distance])
timestamps_parsed.append(datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S"))
except ValueError as e:
# Handle the invalid timestamp (e.g., skip or log)
print(f"Skipping invalid timestamp: {timestamp} ({e})")
continue
if len(timestamps_parsed) == 0:
print("No valid timestamps found. Please check your data.")
return
polar_coordinates = np.array(polar_coordinates)
# Scale the data to improve clustering
scaler = StandardScaler()
scaled_polar_coordinates = scaler.fit_transform(polar_coordinates)
# Apply DBSCAN clustering with fine-tuned parameters
dbscan = DBSCAN(eps=eps_slider.value, min_samples=min_samples_slider.value)
labels = dbscan.fit_predict(scaled_polar_coordinates)
unique_labels = set(labels)
print(f"Identified clusters: {unique_labels}")
# Set up the polar plot
fig = plt.figure(figsize=(30, 30))
ax = fig.add_subplot(111, projection='polar')
# Define the angles of the bold separators at 0, 90, 180, and 270 degrees
separator_angles = [0, np.pi / 2, np.pi, 3 * np.pi / 2]
# Set the angles and labels for the radial grid lines
ax.set_xticks(np.arange(0, 24, 0.25) * np.pi / 12) # Add 15-minute marks
ax.set_xticklabels([f'{int(number)}:{int((number % 1) * 60):02d}' if (number % 1) != 0 else '' for number in np.arange(0, 24, 0.25)]) # Add 15-minute labels, avoid additional hour labels
ax.set_theta_zero_location("N")
ax.set_theta_direction(-1)
# Add bold separators at every 90 degrees and label each one (1 to 31)
for angle in separator_angles:
ax.axvline(angle, color='black', linewidth=2, linestyle='')
# Label the separator line with 1-31 for each
for day in range(1, 32):
ax.text(angle, day + 5, str(day), horizontalalignment='center', verticalalignment='center', fontsize=10) # Shift day labels by 5
# Add 1-hour and 15-minute marks
for hour in range(24):
angle = hour * np.pi / 12
ax.axvline(angle, color='black', linewidth=2, alpha=1.0) # 1-hour marks, 0% transparent
ax.text(angle, 38, f'{hour}:00', horizontalalignment='center', verticalalignment='center', fontsize=12, fontweight='bold') # Adjusted position for hour labels
for minute in np.arange(0.25, 24, 0.25):
angle = minute * np.pi / 12
ax.axvline(angle, color='gray', linewidth=1, alpha=0.5) # 15-minute marks, 50% transparent
# Draw a big white circle to cover the blank circles
big_white_circle = plt.Circle((0, 0), 5, color='white', fill=True, zorder=5, transform=ax.transData._b)
ax.add_patch(big_white_circle)
# Add points with unique markers and colors
for t in range(len(Timestamps)):
# Extract date, time, and Event type
timestamp = Timestamps[t]
Event_type = EventTypes[t]
marker, color = Event_styles[Event_type]
Date = timestamp.split(' ')[0]
Time = timestamp.split(' ')[1]
Day = int(Date.split('-')[-1]) + 5 # Shift days by 5
Hour = int(Time.split(':')[0]) + (int(Time.split(':')[1]) / 60)
# Plot the point with specific marker and color
ax.scatter(np.pi / 12 * Hour, Day, color=color, s=marker_size_slider.value, marker=MarkerStyle(marker), label=Event_type if t == 0 else "", zorder=10)
# Plot red circles around clusters identified by DBSCAN
for label in unique_labels:
if label != -1: # -1 is for noise points, not belonging to any cluster
# Get the points belonging to this cluster
cluster_points = polar_coordinates[labels == label]
# Calculate the center of the cluster (mean position)
cluster_center = np.mean(cluster_points, axis=0)
cluster_angle, cluster_radius = cluster_center
# Plot the center of the cluster
ax.plot(cluster_angle, cluster_radius + 5, 'ro', markersize=10) # Red point for cluster center, shift by 5
# Draw a red circle around the cluster
circle = plt.Circle((cluster_angle, cluster_radius + 5), 1, color='red', fill=False, linewidth=3, alpha=0.5) # 50% transparent circle, shift by 5
ax.add_patch(circle)
# Add a legend
handles = [plt.Line2D([0], [0], marker=MarkerStyle(Event_styles[Event][0]), color='w',
markerfacecolor=Event_styles[Event][1], markersize=12)
for Event in unique_Event_types]
ax.legend(handles, unique_Event_types, title="Event Types", bbox_to_anchor=(1.05, 1), loc='upper left')
ax.set_ylim(0, 37) # Set radial limit to 37 to cover 1-31 days plus 5 blank circles
# Explicitly set the radial grid lines (rings)
ax.set_yticks(np.arange(1, 37)) # Show rings from 1 to 37
ax.set_yticklabels([str(i) for i in range(1, 37)]) # Label the rings with day numbers
# Ensure all grid lines are visible even if there are no points
ax.grid(True, which='both', linestyle='-', linewidth=1.5, color='gray', alpha=0.5) # 50% transparent grid lines
# Save the plot to a file
saved_plot_path = save_path_text.value
fig.savefig(saved_plot_path, bbox_inches='tight')
plt.show()
plt.close(fig) # Close the figure to avoid display issues
print(f"Plot saved as {saved_plot_path}")
def download_plot(change):
with output:
if not os.path.exists(saved_plot_path):
print("No plot to download. Please generate the plot first.")
return
# Provide a download link
display(FileLink(saved_plot_path, result_html_prefix="Click here to download the plot: "))
# Attach event handlers to the buttons
upload_button.on_click(process_file)
download_button.on_click(download_plot)