@@ -37,8 +37,8 @@ def is_university_ip(ip):
3737
3838
3939# Créer les onglets principaux
40- tab1 , tab2 , tab3 , tab4 = st .tabs (
41- ["Explore data" , "Analysis" , "Foreign IP addresses" , "Sankey" ]
40+ tab1 , tab2 , tab3 , tab4 , tab5 = st .tabs (
41+ ["Explore data" , "Dataviz" , " Analysis" , "Foreign IP addresses" , "Sankey" ]
4242)
4343
4444# Onglet Analysis
@@ -188,6 +188,82 @@ def set_dynamic():
188188
189189# Onglet Analysis
190190with tab2 :
191+ st .subheader ("Dataviz" )
192+
193+ # Créer ici un scatter plot permettant une Visualisation interactive des données (IP source avec le nombre
194+ # d’occurrences de destination contactées, incluant le nombre de flux rejetés et autorisés).
195+
196+ # Agréger les données par IP source
197+ df_agg = data .group_by ("ipsrc" ).agg (
198+ [
199+ pl .col ("ipdst" ).n_unique ().alias ("distinct_destinations" ),
200+ ((pl .col ("action" ) == "PERMIT" ).cast (pl .Int64 )).sum ().alias ("count_permit" ),
201+ ((pl .col ("action" ) == "DENY" ).cast (pl .Int64 )).sum ().alias ("count_deny" ),
202+ ]
203+ )
204+
205+ # Créer un scatter plot
206+ if not df_agg .is_empty ():
207+ # We need to recreate the aggregation to count distinct destinations per action type
208+ permit_agg = (
209+ data .filter (pl .col ("action" ) == "PERMIT" )
210+ .group_by ("ipsrc" )
211+ .agg (
212+ [
213+ pl .col ("ipdst" ).n_unique ().alias ("distinct_destinations" ),
214+ pl .count ("ipsrc" ).alias ("connections" ),
215+ ]
216+ )
217+ .with_columns (pl .lit ("PERMIT" ).alias ("action" ))
218+ )
219+
220+ deny_agg = (
221+ data .filter (pl .col ("action" ) == "DENY" )
222+ .group_by ("ipsrc" )
223+ .agg (
224+ [
225+ pl .col ("ipdst" ).n_unique ().alias ("distinct_destinations" ),
226+ pl .count ("ipsrc" ).alias ("connections" ),
227+ ]
228+ )
229+ .with_columns (pl .lit ("DENY" ).alias ("action" ))
230+ )
231+
232+ # Combine both datasets
233+ combined_df = pl .concat ([permit_agg , deny_agg ])
234+
235+ # Convert to pandas
236+ df_pandas = combined_df .to_pandas ()
237+
238+ # Create the scatter plot with two points per IP source (one for PERMIT, one for DENY)
239+ fig = px .scatter (
240+ df_pandas ,
241+ x = "ipsrc" ,
242+ y = "distinct_destinations" ,
243+ color = "action" ,
244+ size = "connections" ,
245+ color_discrete_map = {"PERMIT" : "blue" , "DENY" : "red" },
246+ hover_data = ["connections" , "action" ],
247+ title = "Number of Distinct Destinations Contacted by Each IP Source" ,
248+ labels = {
249+ "ipsrc" : "Source IP Address" ,
250+ "distinct_destinations" : "Number of Distinct Destinations" ,
251+ "connections" : "Number of Connections" ,
252+ "action" : "Action" ,
253+ },
254+ )
255+
256+ # Improve layout for better readability
257+ fig .update_layout (
258+ xaxis = {"categoryorder" : "total descending" }, legend_title = "Action Type"
259+ )
260+
261+ st .plotly_chart (fig , use_container_width = True )
262+ else :
263+ st .info ("No data available for scatter plot." )
264+
265+ # Onglet Analysis
266+ with tab3 :
191267 st .subheader ("Analysis" )
192268
193269 # Afficher ici le top 10 des ports inférieurs à 1024 avec accès autorisé
@@ -322,7 +398,7 @@ def set_dynamic():
322398
323399
324400# Onglet Foreign IP addresses
325- with tab3 :
401+ with tab4 :
326402 st .subheader ("🚫 List of access outside the university network" )
327403
328404 if "ipsrc" in data .columns and "action" in data .columns :
@@ -363,7 +439,7 @@ def set_dynamic():
363439
364440
365441# Onglet Sankey
366- with tab4 :
442+ with tab5 :
367443 st .subheader ("Sankey Diagram" )
368444
369445 def create_sankey (df , source_col , target_col ):
0 commit comments