Skip to content

Commit 5457771

Browse files
Add scatter plot
1 parent 8d79749 commit 5457771

1 file changed

Lines changed: 80 additions & 4 deletions

File tree

sections/analyze.py

Lines changed: 80 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ def is_university_ip(ip):
3737

3838

3939
# Créer les onglets principaux
40-
tab1, tab2, tab3, tab4 = st.tabs(
41-
["Explore data", "Analysis", "Foreign IP addresses", "Sankey"]
40+
tab1, tab2, tab3, tab4, tab5 = st.tabs(
41+
["Explore data", "Dataviz", "Analysis", "Foreign IP addresses", "Sankey"]
4242
)
4343

4444
# Onglet Analysis
@@ -188,6 +188,82 @@ def set_dynamic():
188188

189189
# Onglet Analysis
190190
with tab2:
191+
st.subheader("Dataviz")
192+
193+
# Créer ici un scatter plot permettant une Visualisation interactive des données (IP source avec le nombre
194+
# d’occurrences de destination contactées, incluant le nombre de flux rejetés et autorisés).
195+
196+
# Agréger les données par IP source
197+
df_agg = data.group_by("ipsrc").agg(
198+
[
199+
pl.col("ipdst").n_unique().alias("distinct_destinations"),
200+
((pl.col("action") == "PERMIT").cast(pl.Int64)).sum().alias("count_permit"),
201+
((pl.col("action") == "DENY").cast(pl.Int64)).sum().alias("count_deny"),
202+
]
203+
)
204+
205+
# Créer un scatter plot
206+
if not df_agg.is_empty():
207+
# We need to recreate the aggregation to count distinct destinations per action type
208+
permit_agg = (
209+
data.filter(pl.col("action") == "PERMIT")
210+
.group_by("ipsrc")
211+
.agg(
212+
[
213+
pl.col("ipdst").n_unique().alias("distinct_destinations"),
214+
pl.count("ipsrc").alias("connections"),
215+
]
216+
)
217+
.with_columns(pl.lit("PERMIT").alias("action"))
218+
)
219+
220+
deny_agg = (
221+
data.filter(pl.col("action") == "DENY")
222+
.group_by("ipsrc")
223+
.agg(
224+
[
225+
pl.col("ipdst").n_unique().alias("distinct_destinations"),
226+
pl.count("ipsrc").alias("connections"),
227+
]
228+
)
229+
.with_columns(pl.lit("DENY").alias("action"))
230+
)
231+
232+
# Combine both datasets
233+
combined_df = pl.concat([permit_agg, deny_agg])
234+
235+
# Convert to pandas
236+
df_pandas = combined_df.to_pandas()
237+
238+
# Create the scatter plot with two points per IP source (one for PERMIT, one for DENY)
239+
fig = px.scatter(
240+
df_pandas,
241+
x="ipsrc",
242+
y="distinct_destinations",
243+
color="action",
244+
size="connections",
245+
color_discrete_map={"PERMIT": "blue", "DENY": "red"},
246+
hover_data=["connections", "action"],
247+
title="Number of Distinct Destinations Contacted by Each IP Source",
248+
labels={
249+
"ipsrc": "Source IP Address",
250+
"distinct_destinations": "Number of Distinct Destinations",
251+
"connections": "Number of Connections",
252+
"action": "Action",
253+
},
254+
)
255+
256+
# Improve layout for better readability
257+
fig.update_layout(
258+
xaxis={"categoryorder": "total descending"}, legend_title="Action Type"
259+
)
260+
261+
st.plotly_chart(fig, use_container_width=True)
262+
else:
263+
st.info("No data available for scatter plot.")
264+
265+
# Onglet Analysis
266+
with tab3:
191267
st.subheader("Analysis")
192268

193269
# Afficher ici le top 10 des ports inférieurs à 1024 avec accès autorisé
@@ -322,7 +398,7 @@ def set_dynamic():
322398

323399

324400
# Onglet Foreign IP addresses
325-
with tab3:
401+
with tab4:
326402
st.subheader("🚫 List of access outside the university network")
327403

328404
if "ipsrc" in data.columns and "action" in data.columns:
@@ -363,7 +439,7 @@ def set_dynamic():
363439

364440

365441
# Onglet Sankey
366-
with tab4:
442+
with tab5:
367443
st.subheader("Sankey Diagram")
368444

369445
def create_sankey(df, source_col, target_col):

0 commit comments

Comments
 (0)