|
1 | 1 | import polars as pl |
2 | 2 | import streamlit as st |
| 3 | +import ipaddress |
| 4 | +import plotly.express as px |
| 5 | +import plotly.graph_objs as go |
| 6 | +import pandas as pd |
3 | 7 |
|
4 | 8 | if "parsed_df" not in st.session_state: |
5 | 9 | st.session_state.parsed_df = None |
|
14 | 18 |
|
15 | 19 | data = st.session_state.parsed_df |
16 | 20 |
|
| 21 | +university_subnets = [ |
| 22 | + ipaddress.ip_network("192.168.0.0/16"), |
| 23 | + ipaddress.ip_network("10.79.0.0/16"), |
| 24 | + ipaddress.ip_network("159.84.0.0/16"), |
| 25 | +] |
| 26 | + |
| 27 | +# Fonction pour vérifier si une IP appartient aux sous-réseaux universitaires |
| 28 | +def is_university_ip(ip): |
| 29 | + try: |
| 30 | + ip_obj = ipaddress.ip_address(ip) |
| 31 | + return any(ip_obj in subnet for subnet in university_subnets) |
| 32 | + except ValueError: |
| 33 | + return False |
| 34 | + |
17 | 35 | # Créer les onglets principaux |
18 | 36 | tab1, tab2, tab3, tab4 = st.tabs( |
19 | 37 | ["Dataviz", "Analysis", "Foreign IP addresses", "Sankey"] |
@@ -172,18 +190,165 @@ def set_dynamic(): |
172 | 190 | ) |
173 | 191 | st.dataframe(top_ips, use_container_width=True) |
174 | 192 |
|
| 193 | + # Graphique |
| 194 | + |
| 195 | + st.write("### 🔴 Analysis of Blocked Attempts") |
| 196 | + |
| 197 | + if "ipsrc" in data.columns and "action" in data.columns: |
| 198 | + # Filtrer uniquement les tentatives bloquées |
| 199 | + blocked_attempts = data.filter(pl.col("action") == "DENY") |
| 200 | + |
| 201 | + # Compter les occurrences des IP sources bloquées |
| 202 | + blocked_ips = ( |
| 203 | + blocked_attempts |
| 204 | + .group_by("ipsrc") |
| 205 | + .agg(pl.count("ipsrc").alias("count")) |
| 206 | + .sort("count", descending=True) |
| 207 | + ) |
| 208 | + |
| 209 | + |
| 210 | + top_n = st.slider(" ", 5, 20, 10, key="top_n_slider") |
| 211 | + |
| 212 | + # Sélectionner le Top N des IP bloquées |
| 213 | + top_blocked_ips = blocked_ips.head(top_n) |
| 214 | + |
| 215 | + |
| 216 | + # ---- GRAPHIQUE AVEC PLOTLY ---- |
| 217 | + color_palette = px.colors.sequential.Blues |
| 218 | + if not top_blocked_ips.is_empty(): |
| 219 | + fig = px.bar( |
| 220 | + top_blocked_ips.to_pandas(), # Convertir en DataFrame Pandas pour Plotly |
| 221 | + x="count", |
| 222 | + y="ipsrc", |
| 223 | + orientation="h", |
| 224 | + text="count", |
| 225 | + title=f"Top {top_n} Most Blocked IPs", |
| 226 | + labels={"ipsrc": "IP Source", "count": "Number of Blocked Attempts"}, |
| 227 | + color_discrete_sequence=["#3d85c6"] |
| 228 | + ) |
| 229 | + |
| 230 | + # Amélioration du layout |
| 231 | + fig.update_traces(texttemplate='%{text}', textposition='inside') |
| 232 | + fig.update_layout(yaxis=dict(categoryorder="total ascending")) |
| 233 | + |
| 234 | + # Afficher le graphique interactif |
| 235 | + st.plotly_chart(fig, use_container_width=True) |
| 236 | + else: |
| 237 | + st.info("No blocked attempts found.") |
| 238 | + else: |
| 239 | + st.warning("Columns 'ipsrc' or 'action' not found.") |
| 240 | + |
| 241 | + # Graphique de série temporelle des connexions par heure |
| 242 | + st.write("### 📊 Hourly Connection Activity") |
| 243 | + |
| 244 | + if "timestamp" in data.columns: |
| 245 | + # Extraire uniquement les connexions autorisées (PERMIT) et valider le format datetime |
| 246 | + activity_data = ( |
| 247 | + data |
| 248 | + .filter(pl.col("action") == "PERMIT") # Ne garder que les connexions autorisées |
| 249 | + .with_columns(pl.col("timestamp").dt.strftime("%Y-%m-%d %H:00:00").alias("hour")) # Normaliser à l'heure |
| 250 | + .group_by("hour") |
| 251 | + .agg(pl.count("hour").alias("connection_count")) # Compter les connexions par heure |
| 252 | + .sort("hour") # Trier chronologiquement |
| 253 | + ) |
| 254 | + |
| 255 | + # Vérifier si on a des données après filtrage |
| 256 | + if not activity_data.is_empty(): |
| 257 | + # Convertir en DataFrame Pandas pour Plotly |
| 258 | + df_activity = activity_data.to_pandas() |
| 259 | + df_activity["hour"] = pd.to_datetime(df_activity["hour"]) # Assurer le bon format datetime |
| 260 | + |
| 261 | + # Tracer le graphique |
| 262 | + fig = px.line( |
| 263 | + df_activity, |
| 264 | + x="hour", |
| 265 | + y="connection_count", |
| 266 | + markers=True, # Ajouter des points pour bien voir les pics |
| 267 | + title="Hourly Connection Activity", |
| 268 | + labels={"hour": "Hour", "connection_count": "Number of Connections"}, |
| 269 | + line_shape="spline" # Rendre les courbes lisses |
| 270 | + ) |
| 271 | + |
| 272 | + # Afficher le graphique |
| 273 | + st.plotly_chart(fig, use_container_width=True) |
| 274 | + else: |
| 275 | + st.info("No connection data found for the selected period.") |
| 276 | + else: |
| 277 | + st.warning("Column 'timestamp' not found.") |
| 278 | + |
| 279 | + |
175 | 280 |
|
176 | 281 | # Onglet Foreign IP addresses |
177 | 282 | with tab3: |
178 | | - # Afficher ici la liste des accès hors plan d’adressage universitaire |
179 | | - st.write("### 🚫 List of access outside the university network") |
180 | | - external_access = data.filter( |
181 | | - ~pl.col("ipdst").cast(pl.Utf8).str.contains(r"^192\.168\.") |
182 | | - & ~pl.col("ipdst").cast(pl.Utf8).str.contains(r"^10\.79\.") |
183 | | - & ~pl.col("ipdst").cast(pl.Utf8).str.contains(r"^159\.84\.") |
184 | | - ) |
185 | | - st.dataframe(external_access, use_container_width=True) |
| 283 | + st.subheader("🚫 List of access outside the university network") |
| 284 | + |
| 285 | + if "ipsrc" in data.columns and "action" in data.columns: |
| 286 | + # Conversion des IPs en chaînes de caractères pour éviter les erreurs de type |
| 287 | + data = data.with_columns([ |
| 288 | + pl.col("ipsrc").cast(pl.Utf8).alias("ipsrc"), |
| 289 | + pl.col("action").cast(pl.Utf8).alias("action") |
| 290 | + ]) |
| 291 | + |
| 292 | + # Vérification des IPs avec la fonction is_university_ip |
| 293 | + data = data.with_columns([ |
| 294 | + pl.col("ipsrc").map_elements(is_university_ip, return_dtype=pl.Boolean).alias("is_src_university_ip") |
| 295 | + ]) |
| 296 | + |
| 297 | + # filtrer toutes les connexions impliquant une adresse externe |
| 298 | + intrusion_attempts = data.filter( |
| 299 | + (~pl.col("is_src_university_ip")) |
| 300 | + ) |
| 301 | + # Ajout d'un filtre par action |
| 302 | + selected_action = st.selectbox("Select action type", ["All", "PERMIT", "DENY"]) |
| 303 | + |
| 304 | + if selected_action != "All": |
| 305 | + intrusion_attempts = intrusion_attempts.filter( |
| 306 | + pl.col("action") == selected_action |
| 307 | + ) |
| 308 | + # Affichage des accès externes |
| 309 | + st.write(f"### 🔍 External accesses: {intrusion_attempts.shape[0]} entries") |
| 310 | + st.dataframe( intrusion_attempts.drop(["is_src_university_ip"]), use_container_width=True) |
| 311 | + |
| 312 | + else: |
| 313 | + st.warning("Columns 'ipsrc' not found.") |
| 314 | + |
| 315 | + |
186 | 316 |
|
187 | 317 | # Onglet Sankey |
188 | 318 | with tab4: |
189 | 319 | st.subheader("Sankey Diagram") |
| 320 | + |
| 321 | + def create_sankey(df, source_col, target_col): |
| 322 | + """ Crée un diagramme de Sankey entre deux colonnes """ |
| 323 | + df_grouped = df.groupby([source_col, target_col]).len().to_pandas() |
| 324 | + |
| 325 | + # Création des nœuds |
| 326 | + labels = list(pd.concat([df_grouped[source_col], df_grouped[target_col]]).unique()) |
| 327 | + label_to_index = {label: i for i, label in enumerate(labels)} |
| 328 | + |
| 329 | + # Création des liens |
| 330 | + sources = df_grouped[source_col].map(label_to_index) |
| 331 | + targets = df_grouped[target_col].map(label_to_index) |
| 332 | + values = df_grouped["len"] |
| 333 | + |
| 334 | + # Création du Sankey Diagram |
| 335 | + fig = go.Figure(go.Sankey( |
| 336 | + node=dict( |
| 337 | + pad=15, thickness=20, line=dict(color="black", width=0.5), |
| 338 | + label=labels |
| 339 | + ), |
| 340 | + link=dict( |
| 341 | + source=sources, target=targets, value=values |
| 342 | + ) |
| 343 | + )) |
| 344 | + |
| 345 | + fig.update_layout(title_text=f"Flux entre {source_col} et {target_col}", font_size=10) |
| 346 | + st.plotly_chart(fig, use_container_width=True) |
| 347 | + |
| 348 | + # 🔹 Sankey entre IP source et IP destination |
| 349 | + create_sankey(data, "ip_source", "ip_destination") |
| 350 | + |
| 351 | + # 🔹 Sankey entre IP source et port destination |
| 352 | + df = df.with_columns(df["port_destination"].cast(pl.Utf8)) # Convertir les ports en chaînes pour éviter les erreurs |
| 353 | + create_sankey(data, "ip_source", "port_destination") |
| 354 | + |
0 commit comments