From 46399881a13be93075f7d0f9efb2ba56c0f71a34 Mon Sep 17 00:00:00 2001 From: Simone Rodigari Date: Tue, 12 Aug 2025 20:59:31 +0100 Subject: [PATCH 1/7] add Kubernetes support --- Makefile | 240 +++++++++++++- README.md | 107 +++++- bpf/connection.o | Bin 15152 -> 15152 bytes bpf/packet_drop.o | Bin 10784 -> 10792 bytes cmd/aggregator/main.go | 94 ++++++ docker/Dockerfile | 45 +++ docker/Dockerfile.aggregator | 40 +++ internal/aggregator/aggregator.go | 312 ++++++++++++++++++ internal/aggregator/health.go | 50 +++ internal/client/aggregator.go | 191 +++++++++++ internal/events/events.go | 24 ++ .../events/kubernetes_integration_test.go | 108 ++++++ internal/kubernetes/metadata.go | 104 ++++++ internal/kubernetes/metadata_test.go | 107 ++++++ internal/programs/base.go | 7 +- internal/storage/forwarding.go | 51 +++ internal/system/system.go | 22 +- kind-config.yaml | 33 ++ kubernetes/README.md | 295 +++++++++++++++++ kubernetes/aggregator-deployment.yaml | 52 +++ kubernetes/configmap.yaml | 23 ++ kubernetes/daemonset.yaml | 95 ++++++ kubernetes/namespace.yaml | 6 + kubernetes/rbac.yaml | 61 ++++ kubernetes/services.yaml | 38 +++ scripts/create-kind-cluster.sh | 42 +++ scripts/deploy-to-kind.sh | 43 +++ scripts/deploy.sh | 149 +++++++++ scripts/load-kind-images.sh | 23 ++ scripts/test-kind-deployment.sh | 56 ++++ scripts/test-kind.sh | 198 +++++++++++ 31 files changed, 2589 insertions(+), 27 deletions(-) create mode 100644 cmd/aggregator/main.go create mode 100644 docker/Dockerfile create mode 100644 docker/Dockerfile.aggregator create mode 100644 internal/aggregator/aggregator.go create mode 100644 internal/aggregator/health.go create mode 100644 internal/client/aggregator.go create mode 100644 internal/events/kubernetes_integration_test.go create mode 100644 internal/kubernetes/metadata.go create mode 100644 internal/kubernetes/metadata_test.go create mode 100644 internal/storage/forwarding.go create mode 100644 kind-config.yaml create mode 100644 kubernetes/README.md create mode 100644 kubernetes/aggregator-deployment.yaml create mode 100644 kubernetes/configmap.yaml create mode 100644 kubernetes/daemonset.yaml create mode 100644 kubernetes/namespace.yaml create mode 100644 kubernetes/rbac.yaml create mode 100644 kubernetes/services.yaml create mode 100755 scripts/create-kind-cluster.sh create mode 100755 scripts/deploy-to-kind.sh create mode 100755 scripts/deploy.sh create mode 100755 scripts/load-kind-images.sh create mode 100755 scripts/test-kind-deployment.sh create mode 100755 scripts/test-kind.sh diff --git a/Makefile b/Makefile index 0bb47d1..5b901dd 100644 --- a/Makefile +++ b/Makefile @@ -1,11 +1,25 @@ # ebpf-server - HTTP API Server with eBPF Network Monitoring +# +# Project Structure: +# - docker/: Docker build files (Dockerfile, Dockerfile.aggregator) +# - scripts/: Deployment and testing scripts +# - kubernetes/: Kubernetes manifests +# - bpf/: eBPF source code and compiled objects +# # Variables BINARY_NAME := ebpf-server +AGGREGATOR_NAME := ebpf-aggregator BPF_SOURCES := $(wildcard bpf/*.c) BPF_OBJECTS := $(BPF_SOURCES:.c=.o) GO_SOURCES := $(shell find . -name '*.go' -not -path './vendor/*') +# Container settings +REGISTRY ?= localhost:5000 +TAG ?= latest +AGENT_IMAGE := $(REGISTRY)/$(BINARY_NAME):$(TAG) +AGGREGATOR_IMAGE := $(REGISTRY)/$(AGGREGATOR_NAME):$(TAG) + # Detect architecture and OS ARCH := $(shell uname -m) OS := $(shell uname -s) @@ -65,17 +79,38 @@ vmlinux: -Wall \ -g -c $< -o $@ -# Build the Go binary (HTTP transport) -.PHONY: build -build: generate +# Build all eBPF programs +.PHONY: bpf +bpf: $(BPF_OBJECTS) + @echo "All eBPF programs compiled successfully" + +# Build the Go binary (server) +.PHONY: build-server +build-server: generate @echo "Building $(BINARY_NAME)..." go build -o bin/$(BINARY_NAME) ./cmd/server +# Build the aggregator binary +.PHONY: build-aggregator +build-aggregator: generate + @echo "Building $(AGGREGATOR_NAME)..." + go build -o bin/$(AGGREGATOR_NAME) ./cmd/aggregator + +# Build the aggregator binary without eBPF dependencies (for Docker) +.PHONY: build-aggregator-only +build-aggregator-only: + @echo "Building $(AGGREGATOR_NAME) (no eBPF dependencies)..." + go build -o bin/$(AGGREGATOR_NAME) ./cmd/aggregator + +# Build both binaries +.PHONY: build +build: build-server build-aggregator + # Build development version with debug symbols and verbose debug logging .PHONY: build-dev build-dev: generate @echo "Building $(BINARY_NAME) with debug symbols and debug logging..." - go build -race -tags debug -o bin/$(BINARY_NAME)-dev ./cmd/server + CGO_ENABLED=1 CC=clang go build -race -tags debug -o bin/$(BINARY_NAME)-dev ./cmd/server # Run the server (requires root for eBPF) - HTTP transport .PHONY: run @@ -139,6 +174,29 @@ clean: rm -rf bin/ rm -rf bpf/*.o +# Clean only eBPF objects (for fresh compilation) +.PHONY: clean-bpf +clean-bpf: + @echo "Cleaning eBPF object files..." + rm -rf bpf/*.o + +# Fresh build - clean eBPF objects and rebuild everything +.PHONY: fresh-build +fresh-build: clean-bpf bpf docker-build + @echo "Fresh build completed with new eBPF objects" + +# Fresh build for kind testing - builds everything from scratch +.PHONY: fresh-kind-build +fresh-kind-build: + @echo "πŸ”„ Starting fresh build for kind testing..." +# @echo "1️⃣ Compiling fresh eBPF objects on host..." +# $(MAKE) bpf + @echo "2️⃣ Building fresh Docker images..." + $(MAKE) docker-build + @echo "3️⃣ Loading images to kind..." + $(MAKE) kind-load-images + @echo "βœ… Fresh kind build completed!" + # Generate API documentation using Swagger .PHONY: docs docs: @@ -190,24 +248,174 @@ check-deps: .PHONY: help help: @echo "Available targets:" - @echo " all - Generate eBPF bindings and build the binary" + @echo " all - Generate eBPF bindings and build binaries" @echo " generate - Generate eBPF Go bindings" + @echo " bpf - Compile eBPF programs" @echo " vmlinux - Generate vmlinux.h from running kernel (Linux only)" - @echo " build - Build the release binary" + @echo " build - Build both server and aggregator binaries" + @echo " build-server - Build the server binary" + @echo " build-aggregator - Build the aggregator binary" @echo " build-dev - Build development binary with debug symbols" @echo " run - Run the server (requires root)" @echo " run-dev - Run development server" - @echo " deps - Install Go dependencies" - @echo " test - Run tests" - @echo " test-race - Run tests with race detection" - @echo " lint - Run linters" - @echo " fmt - Format code" + @echo " test - Run all tests" + @echo " test-unit - Run unit tests only" + @echo " test-integration - Run integration tests only" + @echo " check - Run linting and static analysis" + @echo " deps - Install dependencies" + @echo " check-deps - Check if dependencies are installed" @echo " clean - Clean build artifacts" - @echo " install - Install binary system-wide" - @echo " uninstall - Remove binary from system" - @echo " dev-setup - Setup development tools" - @echo " check-deps - Check system dependencies" - @echo " help - Show this help message" + @echo " clean-bpf - Clean only eBPF object files" + @echo " fresh-build - Clean eBPF objects and rebuild everything" + @echo " fresh-kind-build - Fresh build for kind testing" + @echo "" + @echo "Container targets:" + @echo " docker-build - Build Docker images (using docker/Dockerfile*)" + @echo " docker-push - Push Docker images to registry" + @echo " docker-build-agent - Build agent Docker image" + @echo " docker-build-aggregator - Build aggregator Docker image" + @echo "" + @echo "Kubernetes targets:" + @echo " k8s-deploy - Deploy to Kubernetes" + @echo " k8s-undeploy - Remove from Kubernetes" + @echo " k8s-logs - Show logs from pods" + @echo " k8s-status - Show pod status" + @echo "" + @echo "Kind (local testing) targets (using scripts/):" + @echo " kind-check-deps - Check prerequisites for kind testing" + @echo " kind-cluster-create - Create kind cluster with local registry" + @echo " kind-cluster-delete - Delete kind cluster and registry" + @echo " kind-deploy - Build and deploy to kind cluster" + @echo " kind-test - Deploy and run basic tests in kind" + @echo " kind-full-test - Create cluster, deploy, and test" + @echo " kind-integration-test - Run comprehensive integration tests" + @echo " kind-cleanup - Clean up kind deployment" + +# Container build targets +.PHONY: docker-build +docker-build: docker-build-agent docker-build-aggregator + +.PHONY: docker-build-agent +docker-build-agent: + @echo "Building agent Docker image..." + docker build -t $(AGENT_IMAGE) -f docker/Dockerfile . + +.PHONY: docker-build-aggregator +docker-build-aggregator: + @echo "Building aggregator Docker image..." + docker build -t $(AGGREGATOR_IMAGE) -f docker/Dockerfile.aggregator . + +.PHONY: docker-push +docker-push: + @echo "Pushing Docker images..." + docker push $(AGENT_IMAGE) + docker push $(AGGREGATOR_IMAGE) + +# Kubernetes targets +.PHONY: k8s-deploy +k8s-deploy: + @echo "Deploying to Kubernetes..." + kubectl apply -f kubernetes/namespace.yaml + kubectl apply -f kubernetes/rbac.yaml + kubectl apply -f kubernetes/configmap.yaml + kubectl apply -f kubernetes/services.yaml + kubectl apply -f kubernetes/aggregator-deployment.yaml + kubectl apply -f kubernetes/daemonset.yaml + @echo "Waiting for deployments to be ready..." + kubectl wait --for=condition=available --timeout=300s deployment/ebpf-aggregator -n ebpf-system + kubectl rollout status daemonset/ebpf-monitor -n ebpf-system --timeout=300s + +.PHONY: k8s-undeploy +k8s-undeploy: + @echo "Removing from Kubernetes..." + -kubectl delete -f kubernetes/daemonset.yaml + -kubectl delete -f kubernetes/aggregator-deployment.yaml + -kubectl delete -f kubernetes/services.yaml + -kubectl delete -f kubernetes/configmap.yaml + -kubectl delete -f kubernetes/rbac.yaml + -kubectl delete -f kubernetes/namespace.yaml + +.PHONY: k8s-logs +k8s-logs: + @echo "Showing aggregator logs..." + kubectl logs -l app=ebpf-aggregator -n ebpf-system --tail=100 -f + +.PHONY: k8s-logs-agents +k8s-logs-agents: + @echo "Showing agent logs..." + kubectl logs -l app=ebpf-monitor -n ebpf-system --tail=50 + +.PHONY: k8s-status +k8s-status: + @echo "Checking deployment status..." + kubectl get pods -n ebpf-system + kubectl get services -n ebpf-system + kubectl get daemonset -n ebpf-system + kubectl get deployment -n ebpf-system + +# Combined build and deploy target +.PHONY: k8s-build-deploy +k8s-build-deploy: docker-build docker-push k8s-deploy + +# Kind (Kubernetes in Docker) targets for local testing +KIND_CLUSTER_NAME ?= ebpf-test +KIND_REGISTRY_NAME ?= kind-registry +KIND_REGISTRY_PORT ?= 5001 +KIND_NODE_IMAGE ?= kindest/node:v1.28.0 + +.PHONY: kind-check-deps +kind-check-deps: + @echo "πŸ” Checking Kind testing prerequisites..." + @which kind > /dev/null || (echo "❌ Kind not found. Install from: https://kind.sigs.k8s.io/docs/user/quick-start/" && exit 1) + @which docker > /dev/null || (echo "❌ Docker not found. Install Docker Desktop or Docker Engine" && exit 1) + @which kubectl > /dev/null || (echo "❌ kubectl not found. Install from: https://kubernetes.io/docs/tasks/tools/" && exit 1) + @which jq > /dev/null || (echo "⚠️ jq not found. Install for better test output: https://stedolan.github.io/jq/") + @which curl > /dev/null || (echo "❌ curl not found. Install curl" && exit 1) + @docker info > /dev/null 2>&1 || (echo "❌ Docker is not running. Start Docker Desktop or Docker daemon" && exit 1) + @echo "βœ… All prerequisites available!" + +.PHONY: kind-cluster-create +kind-cluster-create: kind-check-deps + @./scripts/create-kind-cluster.sh + +.PHONY: kind-cluster-delete +kind-cluster-delete: + @echo "πŸ—‘οΈ Deleting kind cluster and registry..." + -kind delete cluster --name $(KIND_CLUSTER_NAME) + -docker rm -f $(KIND_REGISTRY_NAME) + @echo "βœ… Kind cluster and registry deleted" + +.PHONY: kind-load-images +kind-load-images: docker-build + @./scripts/load-kind-images.sh + +.PHONY: kind-deploy +kind-deploy: kind-load-images + @./scripts/deploy-to-kind.sh + +.PHONY: kind-test +kind-test: fresh-kind-build kind-deploy + @./scripts/test-kind-deployment.sh + +.PHONY: kind-full-test +kind-full-test: kind-cluster-create kind-test + @echo "πŸŽ‰ Full kind cluster test completed!" + +.PHONY: kind-integration-test +kind-integration-test: kind-deploy + @echo "πŸ§ͺ Running comprehensive integration tests..." + ./scripts/test-kind.sh --automated + +.PHONY: kind-cleanup +kind-cleanup: + @echo "🧹 Cleaning up kind deployment..." + -kubectl delete -f kubernetes/daemonset.yaml + -kubectl delete -f kubernetes/aggregator-deployment.yaml + -kubectl delete -f kubernetes/services.yaml + -kubectl delete -f kubernetes/configmap.yaml + -kubectl delete -f kubernetes/rbac.yaml + -kubectl delete -f kubernetes/namespace.yaml + @echo "βœ… Kind deployment cleaned up" # Create necessary directories bin: diff --git a/README.md b/README.md index bbc1936..b00daba 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,39 @@ # eBPF Network Monitor -[![CI Pipeline](https://github.com/srodi/ebpf-server/actions/workflows/ci.yml/badge.svg)](https://github.com/srodi/ebpf-server/actions/workflows/ci.yml) +[### Kubernetes Deployment (New!) +```bash +# Quick deployment with built-in script +./scripts/deploy.sh all --registry your-registry.com + +# Or step by step +make docker-build +make docker-push REGISTRY=your-registry.com +make k8s-deploy +``` + +### Local Testing with Kind +```bash +# Test locally with Kind (Kubernetes in Docker) +make kind-full-test + +# Or step by step: +make kind-cluster-create # Create local cluster +make kind-deploy # Deploy to kind cluster +make kind-integration-test # Run comprehensive tests +``` + +**πŸ“– [Kubernetes Deployment Guide](kubernetes/README.md)** - Complete setup and configuration guide + +**πŸ§ͺ [Kind Testing Guide](KIND_TESTING.md)** - Local testing with Kind clusterse](https://github.com/srodi/ebpf-server/actions/workflows/ci.yml/badge.svg)](https://github.com/srodi/ebpf-server/actions/workflows/ci.yml) [![API Documentation](https://img.shields.io/badge/API-Documentation-blue?style=for-the-badge&logo=swagger)](https://petstore.swagger.io/?url=https://raw.githubusercontent.com/srodi/ebpf-server/main/docs/swagger/swagger.json) [![OpenAPI Spec](https://img.shields.io/badge/OpenAPI-3.0-green?style=for-the-badge&logo=openapiinitiative)](docs/swagger.json) [![Go Version](https://img.shields.io/badge/Go-1.23+-00ADD8?style=for-the-badge&logo=go)](https://golang.org) -A modular eBPF monitoring system with HTTP API server for real-time network and system event monitoring. Features a clean, interface-based architecture for easy extension with new monitoring programs. +A modular eBPF monitoring system with HTTP API server for real-time network and system event monitoring. **Supports both VM and Kubernetes deployments** with automatic metadata enrichment. -## Quick Start +## πŸš€ Deployment Options +### VM Deployment (Traditional) ```bash # Install dependencies (Ubuntu/Debian) sudo apt install -y golang-go clang libbpf-dev linux-headers-$(uname -r) @@ -20,13 +45,26 @@ sudo ./bin/ebpf-server # Test the API curl http://localhost:8080/health curl "http://localhost:8080/api/events?type=connection&limit=10" -curl "http://localhost:8080/api/programs" ``` -**πŸ“š [View Interactive API Documentation](https://petstore.swagger.io/?url=https://raw.githubusercontent.com/srodi/ebpf-server/main/docs/swagger/swagger.json)** - Test APIs directly in your browser +### Kubernetes Deployment (New!) +```bash +# Quick deployment with built-in script +./scripts/deploy.sh all --registry your-registry.com + +# Or step by step +make docker-build +make docker-push REGISTRY=your-registry.com +make k8s-deploy +``` + +**οΏ½ [Kubernetes Deployment Guide](kubernetes/README.md)** - Complete setup and configuration guide + +**οΏ½πŸ“š [View Interactive API Documentation](https://petstore.swagger.io/?url=https://raw.githubusercontent.com/srodi/ebpf-server/main/docs/swagger/swagger.json)** - Test APIs directly in your browser ## Architecture +### VM Mode **Modular, interface-based monitoring system** with clean separation of concerns: ``` @@ -49,6 +87,31 @@ curl "http://localhost:8080/api/programs" β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ ``` +### Kubernetes Mode +**Distributed monitoring with centralized aggregation**: + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Kubernetes Cluster β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ Node 1 β”‚ β”‚ Node 2 β”‚ β”‚ Node N β”‚ β”‚ +β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ +β”‚ β”‚ β”‚ eBPF β”‚ β”‚ β”‚ β”‚ eBPF β”‚ β”‚ β”‚ β”‚ eBPF β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚ Agent β”‚ β”‚ β”‚ β”‚ Agent β”‚ β”‚ β”‚ β”‚ Agent β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚+K8s Metaβ”‚ β”‚ β”‚ β”‚+K8s Metaβ”‚ β”‚ β”‚ β”‚+K8s Metaβ”‚ β”‚ β”‚ +β”‚ β”‚ β””β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”˜ β”‚ β”‚ β””β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”˜ β”‚ β”‚ β””β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”˜ β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ β”‚ β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ eBPF β”‚ β”‚ +β”‚ β”‚ Aggregator│◄─── Unified API β”‚ +β”‚ β”‚ β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + **Core Components:** - **Core Interfaces**: Define contracts for Events, Programs, Managers, and Storage - **Event System**: Unified event creation, streaming, and storage with `BaseEvent` @@ -254,10 +317,42 @@ int trace_your_event(void *ctx) { char LICENSE[] SEC("license") = "GPL"; ``` +## ✨ Key Features + +### πŸ”„ Dual Deployment Support +- **VM Mode**: Traditional single-server deployment (unchanged) +- **Kubernetes Mode**: DaemonSet + Aggregator architecture for cluster-wide monitoring +- **Automatic Detection**: Seamlessly detects environment and adapts behavior + +### 🏷️ Kubernetes Metadata Enrichment +Events in Kubernetes include rich metadata: +```json +{ + "id": "abc123", + "type": "connection", + "k8s_node_name": "worker-node-1", + "k8s_pod_name": "ebpf-monitor-xyz", + "k8s_namespace": "ebpf-system", + ... +} +``` + +### πŸ—οΈ Scalable Architecture +- **DaemonSet Agents**: One monitoring pod per cluster node +- **Centralized Aggregator**: Single API endpoint for cluster-wide events +- **Event Forwarding**: Automatic forwarding from agents to aggregator +- **Backward Compatible**: Existing VM deployments continue unchanged + +### πŸ“Š Unified Monitoring +- **Cross-Node Correlation**: View events across entire Kubernetes cluster +- **Node-Specific Filtering**: Query events by specific nodes or pods +- **Aggregated Statistics**: Cluster-wide event statistics and metrics + ## API Features - **Unified Event API**: Single `/api/events` endpoint for all monitoring data -- **Flexible Filtering**: Filter by event type, PID, command, and time windows +- **Flexible Filtering**: Filter by event type, PID, command, and time windows +- **Kubernetes Filtering**: Filter by node name, pod name, or namespace - **Program Status**: View program status and metrics via `/api/programs` - **Auto-Generated Documentation**: OpenAPI 3.0 spec from code annotations - **Interactive Testing**: Built-in Swagger UI for API exploration diff --git a/bpf/connection.o b/bpf/connection.o index 562208372563b79b82b6317b470c0558b43ab2aa..d4d8c384bb2c0aef55b7886093503de893801db1 100644 GIT binary patch delta 1362 zcmZ9~O=wd=5C`ztYcNz-bYG@`t;IpknMq*7XFC7?nN1&eriJ;b(POTkMIT6&PY zD1IOaR-#z&gP<4j^GQLf6!9X6AoL`~Q!h$I@Z(Sv3*vv%d1YxJ%>HKf%}(~kpZ2GH z71>wn5kyH!$bK666v9#@8V=HlabzhON~l9rd5U$rtRRs(Oc&<6BafOrN6-QifARf} zaw9y7XdFeCri`EpyBpq<6*OyE!8P_t)MG<}e6|O-4x&H%2HY|r=neZJ?8T1ZKrGCG zuMrEU1WmHn5x36>nqgy0{ZnB#N}>p+d$7em`#9?D=P`ig1iZ{HOY*4Yil9|a1PAE&xav}zgg+nxBIS$vh*k_-` z{%kTt1w=Ez6U1C5L_L;=;iBa@JjM3R113@7L{by=KE{n$&cMFq4tT+`slT&q>dWj) z=(l$fuMp7;Snpp5QFg;#J&{48$J)>V7cHCmkY!UJW!Der3(>4?9JpW`^%dKw(>5~@ z-ha@*sVCUjP_&H(qqb46*hal(+YDeE^(EV=uNwE_N`e8x;f*T_{xn&3P#>_pSZz(8 zj$Mw9-?z88*Z@uHkpO3M!- zufzc@pMzsso&)P0|Lrb2Dy!v1?9Q#JVQ%>w* IU7hjde|){+NB{r; delta 1362 zcmZ9~K}b|V7zglg-Z+z?tE;i@(=6VahlSi_D-(r^g{(XXbzF;$H86xac*#7(z848W z6i7=HQM8bUD2fLfZ5^cOB6RT-(jkO8*r5nY$h!2O^;;QSSibpv-@I?Wowwm+I2o$Q zzBB!T$fdYEO!-d^mLgtvl+KMIORf`FgH(Khb-N6Z=r~5#=K3PfntfML1&P1-{y@0_ zE+85=p-W?@pn%;6@Ad@ESXOYEeF638kf4z5!>xnp&%Oh4R9Jqj( zxhQCy{SR^bgrF%lwp2d>yFn5~rUX6N;*fn7_4X?mz;Ya3W)~&-)G{q-l@qlC{;(gT zLH?$odPFmzaSYCEahBa&N95sp3hV=L>K=ZOvsIS;Yowk{Q@cwHJ zylR4t4O!c0P_T`9U>o(aZ8Lyv)R%0dzG^&xE2#}oU%zohwLeXgU8@h-o-ef~E=6zb zeYt-7_rZ6^Ph5UA=APbPx|_(0L}@LN^k;ofMYQ}(X`NH;I-%m)Q!RQ}k(Q7Aw90Gw zh0-df(>eJNdp#|`lBZI`Du<0R@4UQ^Y=@TbMNYXXEf*S7K#Y6Iz$Vo&kzY_*1&F$P1 GbN>VSD&L3z diff --git a/bpf/packet_drop.o b/bpf/packet_drop.o index de4bee5cbd46a75054beb07145970fa689b32ff5..fc221dd3d8fdbc5c3c7e56584461472f7092ada8 100644 GIT binary patch delta 678 zcmZXQK}b|#5XWcsa~7Yn`PAp`cL?`wn)?Hz^|L{9Rb@ z12h^APqv8uz2Un4(*-?$agwQ@;-b!7?n{?`i<9AJ!g$J8Iuia>1Y3mR-MjKRE`m0E zDA~G!;2HiUIhI41HY_c$ZX?VZmKNAE2t~t>gy|tdS#szf&fdC!7k2fXCH%kKiHxP6 z%RpwC+Ol$B$8c>~(eE3s>&u4g7gh|%B-4uco3(4t@Cv~-EYF|-*SIG>ibK2@zh%FD z{Pb_(-p%7b+f!#x)~f~HiZ3N~RE@jjL{+~_I!P@_avC3y9wJitoNDG=LXUj}B)dtI z(??cURDJMWibd5|aLJk{J5BV4S|gq~ZAeF~en?06UiK;^UY9y`#KNOeDs_OExc=X>9qdFzF>!ez{K9Hj3j(-cQeSo;; z=wzelH#JxF`}g#GWT)GX#l^UD8J8}diIe6x!uW?9T2g_HV2Log`=@-iLQt9`12c=@ zYQB)1=s}p#{77aopCQa?9u&3;2d!S-FTB>AkW35Wuh*_T!xDm{S)RcF{J<)aVeBRj0`AA~O>+ zwgZO}G2;*%GGCFEBU+}W8dq$-Nk@E+rK2AIZ?u~FRPxkK98XJL+b_f(#>@_uIm`rj zQu4C_;$IHicW{!69Z|#21!!?7v73E2z(o$-G3;m4>%&d= c.batchSize + c.mu.Unlock() + + if shouldFlush { + return c.flush(ctx) + } + + return nil +} + +// SendEvents sends multiple events to the aggregator. +func (c *AggregatorClient) SendEvents(ctx context.Context, events []core.Event) error { + if !c.enabled || len(events) == 0 { + return nil + } + + c.mu.Lock() + c.buffer = append(c.buffer, events...) + shouldFlush := len(c.buffer) >= c.batchSize + c.mu.Unlock() + + if shouldFlush { + return c.flush(ctx) + } + + return nil +} + +// Flush sends all buffered events to the aggregator. +func (c *AggregatorClient) Flush(ctx context.Context) error { + if !c.enabled { + return nil + } + + return c.flush(ctx) +} + +// Close stops the client and flushes any remaining events. +func (c *AggregatorClient) Close() error { + if !c.enabled { + return nil + } + + close(c.stopCh) + c.flushTimer.Stop() + + // Final flush + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + return c.flush(ctx) +} + +// flush sends buffered events to the aggregator. +func (c *AggregatorClient) flush(ctx context.Context) error { + c.mu.Lock() + if len(c.buffer) == 0 { + c.mu.Unlock() + return nil + } + + events := make([]core.Event, len(c.buffer)) + copy(events, c.buffer) + c.buffer = c.buffer[:0] // Clear buffer + c.mu.Unlock() + + // Convert events to JSON + jsonEvents := make([]json.RawMessage, len(events)) + for i, event := range events { + data, err := json.Marshal(event) + if err != nil { + logger.Errorf("Failed to marshal event: %v", err) + continue + } + jsonEvents[i] = json.RawMessage(data) + } + + // Send to aggregator + requestData := map[string]interface{}{ + "events": jsonEvents, + } + + jsonData, err := json.Marshal(requestData) + if err != nil { + return fmt.Errorf("failed to marshal request: %v", err) + } + + req, err := http.NewRequestWithContext(ctx, "POST", c.baseURL+"/api/events/ingest", bytes.NewReader(jsonData)) + if err != nil { + return fmt.Errorf("failed to create request: %v", err) + } + + req.Header.Set("Content-Type", "application/json") + + resp, err := c.httpClient.Do(req) + if err != nil { + return fmt.Errorf("failed to send request: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("aggregator returned status %d", resp.StatusCode) + } + + logger.Debugf("Sent %d events to aggregator", len(events)) + return nil +} + +// flushRoutine periodically flushes buffered events. +func (c *AggregatorClient) flushRoutine() { + for { + select { + case <-c.flushTimer.C: + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + if err := c.flush(ctx); err != nil { + logger.Errorf("Failed to flush events to aggregator: %v", err) + } + cancel() + case <-c.stopCh: + return + } + } +} diff --git a/internal/events/events.go b/internal/events/events.go index c83500c..509851a 100644 --- a/internal/events/events.go +++ b/internal/events/events.go @@ -14,6 +14,7 @@ import ( "time" "github.com/srodi/ebpf-server/internal/core" + "github.com/srodi/ebpf-server/internal/kubernetes" "github.com/srodi/ebpf-server/pkg/logger" ) @@ -22,6 +23,10 @@ var ( systemBootTime time.Time bootTimeCalculated bool bootTimeMutex sync.Mutex + + // Global Kubernetes metadata provider + k8sProvider *kubernetes.Provider + k8sOnce sync.Once ) // calculateSystemBootTime calculates the system boot time. @@ -106,6 +111,14 @@ type BaseEvent struct { metadata map[string]interface{} } +// getKubernetesProvider returns the global Kubernetes metadata provider. +func getKubernetesProvider() *kubernetes.Provider { + k8sOnce.Do(func() { + k8sProvider = kubernetes.NewProvider() + }) + return k8sProvider +} + // NewBaseEvent creates a new base event. func NewBaseEvent(eventType string, pid uint32, command string, timestamp uint64, metadata map[string]interface{}) *BaseEvent { // Generate a unique ID @@ -122,6 +135,17 @@ func NewBaseEvent(eventType string, pid uint32, command string, timestamp uint64 // eBPF timestamps are nanoseconds since boot (from bpf_ktime_get_ns()) eventTime := convertEBPFTimestamp(timestamp) + // Ensure metadata map exists + if metadata == nil { + metadata = make(map[string]interface{}) + } + + // Add Kubernetes metadata if available + k8sProvider := getKubernetesProvider() + if k8sProvider.IsEnabled() { + k8sProvider.AddToMap(metadata) + } + return &BaseEvent{ id: id, eventType: eventType, diff --git a/internal/events/kubernetes_integration_test.go b/internal/events/kubernetes_integration_test.go new file mode 100644 index 0000000..a1b1461 --- /dev/null +++ b/internal/events/kubernetes_integration_test.go @@ -0,0 +1,108 @@ +package events + +import ( + "os" + "sync" + "testing" + "time" +) + +func TestKubernetesMetadataIntegration(t *testing.T) { + // Save original env vars + originalMode := os.Getenv("DEPLOYMENT_MODE") + originalNodeName := os.Getenv("NODE_NAME") + originalPodName := os.Getenv("POD_NAME") + originalNamespace := os.Getenv("POD_NAMESPACE") + + // Cleanup function + defer func() { + os.Setenv("DEPLOYMENT_MODE", originalMode) + os.Setenv("NODE_NAME", originalNodeName) + os.Setenv("POD_NAME", originalPodName) + os.Setenv("POD_NAMESPACE", originalNamespace) + // Reset the provider for future tests + k8sProvider = nil + k8sOnce = sync.Once{} + }() + + t.Run("VM mode - no Kubernetes metadata", func(t *testing.T) { + // Clear all Kubernetes env vars + os.Unsetenv("DEPLOYMENT_MODE") + os.Unsetenv("NODE_NAME") + os.Unsetenv("POD_NAME") + os.Unsetenv("POD_NAMESPACE") + + // Reset provider + k8sProvider = nil + k8sOnce = sync.Once{} + + metadata := map[string]interface{}{ + "custom_field": "test_value", + } + + event := NewBaseEvent("test", 1234, "test-cmd", uint64(time.Now().UnixNano()), metadata) + + // Should not have Kubernetes metadata + eventMetadata := event.Metadata() + if _, exists := eventMetadata["k8s_node_name"]; exists { + t.Error("Event should not have k8s_node_name in VM mode") + } + if _, exists := eventMetadata["k8s_pod_name"]; exists { + t.Error("Event should not have k8s_pod_name in VM mode") + } + if _, exists := eventMetadata["k8s_namespace"]; exists { + t.Error("Event should not have k8s_namespace in VM mode") + } + + // Should still have custom metadata + if eventMetadata["custom_field"] != "test_value" { + t.Error("Custom metadata should be preserved") + } + }) + + t.Run("Kubernetes mode - with metadata", func(t *testing.T) { + // Set Kubernetes environment + os.Setenv("DEPLOYMENT_MODE", "kubernetes") + os.Setenv("NODE_NAME", "test-node-1") + os.Setenv("POD_NAME", "ebpf-monitor-abcde") + os.Setenv("POD_NAMESPACE", "ebpf-system") + + // Reset provider to pick up new env vars + k8sProvider = nil + k8sOnce = sync.Once{} + + metadata := map[string]interface{}{ + "custom_field": "test_value", + } + + event := NewBaseEvent("connection", 5678, "curl", uint64(time.Now().UnixNano()), metadata) + + // Should have Kubernetes metadata + eventMetadata := event.Metadata() + if eventMetadata["k8s_node_name"] != "test-node-1" { + t.Errorf("Expected k8s_node_name 'test-node-1', got '%v'", eventMetadata["k8s_node_name"]) + } + if eventMetadata["k8s_pod_name"] != "ebpf-monitor-abcde" { + t.Errorf("Expected k8s_pod_name 'ebpf-monitor-abcde', got '%v'", eventMetadata["k8s_pod_name"]) + } + if eventMetadata["k8s_namespace"] != "ebpf-system" { + t.Errorf("Expected k8s_namespace 'ebpf-system', got '%v'", eventMetadata["k8s_namespace"]) + } + + // Should still have custom metadata + if eventMetadata["custom_field"] != "test_value" { + t.Error("Custom metadata should be preserved") + } + + // Verify basic event properties + if event.Type() != "connection" { + t.Errorf("Expected event type 'connection', got '%s'", event.Type()) + } + if event.PID() != 5678 { + t.Errorf("Expected PID 5678, got %d", event.PID()) + } + if event.Command() != "curl" { + t.Errorf("Expected command 'curl', got '%s'", event.Command()) + } + }) +} diff --git a/internal/kubernetes/metadata.go b/internal/kubernetes/metadata.go new file mode 100644 index 0000000..e6b5af2 --- /dev/null +++ b/internal/kubernetes/metadata.go @@ -0,0 +1,104 @@ +// Package kubernetes provides Kubernetes-specific metadata and utilities. +package kubernetes + +import ( + "os" + "sync" +) + +// Metadata represents Kubernetes-specific metadata for events. +type Metadata struct { + NodeName string `json:"node_name,omitempty"` + PodName string `json:"pod_name,omitempty"` + Namespace string `json:"namespace,omitempty"` +} + +// Provider provides Kubernetes metadata for events. +type Provider struct { + metadata *Metadata + mu sync.RWMutex + enabled bool +} + +// NewProvider creates a new Kubernetes metadata provider. +func NewProvider() *Provider { + p := &Provider{ + enabled: isKubernetesEnvironment(), + } + + if p.enabled { + p.metadata = &Metadata{ + NodeName: os.Getenv("NODE_NAME"), + PodName: os.Getenv("POD_NAME"), + Namespace: os.Getenv("POD_NAMESPACE"), + } + } + + return p +} + +// IsEnabled returns true if running in Kubernetes environment. +func (p *Provider) IsEnabled() bool { + p.mu.RLock() + defer p.mu.RUnlock() + return p.enabled +} + +// GetMetadata returns the Kubernetes metadata. +func (p *Provider) GetMetadata() *Metadata { + p.mu.RLock() + defer p.mu.RUnlock() + + if !p.enabled || p.metadata == nil { + return nil + } + + // Return a copy to avoid race conditions + return &Metadata{ + NodeName: p.metadata.NodeName, + PodName: p.metadata.PodName, + Namespace: p.metadata.Namespace, + } +} + +// AddToMap adds Kubernetes metadata to a map. +func (p *Provider) AddToMap(data map[string]interface{}) { + if !p.IsEnabled() { + return + } + + metadata := p.GetMetadata() + if metadata == nil { + return + } + + if metadata.NodeName != "" { + data["k8s_node_name"] = metadata.NodeName + } + if metadata.PodName != "" { + data["k8s_pod_name"] = metadata.PodName + } + if metadata.Namespace != "" { + data["k8s_namespace"] = metadata.Namespace + } +} + +// isKubernetesEnvironment checks if we're running in Kubernetes. +func isKubernetesEnvironment() bool { + // Check for standard Kubernetes environment variables + if os.Getenv("KUBERNETES_SERVICE_HOST") != "" { + return true + } + + // Check deployment mode environment variable + if os.Getenv("DEPLOYMENT_MODE") == "kubernetes" { + return true + } + + // Check if we can find Kubernetes service account token + if _, err := os.Stat("/var/run/secrets/kubernetes.io/serviceaccount/token"); err == nil { + return true + } + + return false +} diff --git a/internal/kubernetes/metadata_test.go b/internal/kubernetes/metadata_test.go new file mode 100644 index 0000000..31b10f6 --- /dev/null +++ b/internal/kubernetes/metadata_test.go @@ -0,0 +1,107 @@ +package kubernetes + +import ( + "os" + "testing" +) + +func TestKubernetesProvider(t *testing.T) { + // Save original env vars + originalHost := os.Getenv("KUBERNETES_SERVICE_HOST") + originalMode := os.Getenv("DEPLOYMENT_MODE") + originalNodeName := os.Getenv("NODE_NAME") + originalPodName := os.Getenv("POD_NAME") + originalNamespace := os.Getenv("POD_NAMESPACE") + + // Cleanup function + defer func() { + os.Setenv("KUBERNETES_SERVICE_HOST", originalHost) + os.Setenv("DEPLOYMENT_MODE", originalMode) + os.Setenv("NODE_NAME", originalNodeName) + os.Setenv("POD_NAME", originalPodName) + os.Setenv("POD_NAMESPACE", originalNamespace) + }() + + t.Run("VM environment", func(t *testing.T) { + // Clear all Kubernetes env vars + os.Unsetenv("KUBERNETES_SERVICE_HOST") + os.Unsetenv("DEPLOYMENT_MODE") + os.Unsetenv("NODE_NAME") + os.Unsetenv("POD_NAME") + os.Unsetenv("POD_NAMESPACE") + + provider := NewProvider() + if provider.IsEnabled() { + t.Error("Provider should not be enabled in VM environment") + } + + metadata := provider.GetMetadata() + if metadata != nil { + t.Error("Metadata should be nil in VM environment") + } + }) + + t.Run("Kubernetes environment with deployment mode", func(t *testing.T) { + os.Setenv("DEPLOYMENT_MODE", "kubernetes") + os.Setenv("NODE_NAME", "test-node-1") + os.Setenv("POD_NAME", "ebpf-monitor-12345") + os.Setenv("POD_NAMESPACE", "ebpf-system") + + provider := NewProvider() + if !provider.IsEnabled() { + t.Error("Provider should be enabled in Kubernetes environment") + } + + metadata := provider.GetMetadata() + if metadata == nil { + t.Fatal("Metadata should not be nil in Kubernetes environment") + } + + if metadata.NodeName != "test-node-1" { + t.Errorf("Expected node name 'test-node-1', got '%s'", metadata.NodeName) + } + + if metadata.PodName != "ebpf-monitor-12345" { + t.Errorf("Expected pod name 'ebpf-monitor-12345', got '%s'", metadata.PodName) + } + + if metadata.Namespace != "ebpf-system" { + t.Errorf("Expected namespace 'ebpf-system', got '%s'", metadata.Namespace) + } + }) + + t.Run("Kubernetes environment with service host", func(t *testing.T) { + os.Unsetenv("DEPLOYMENT_MODE") + os.Setenv("KUBERNETES_SERVICE_HOST", "10.96.0.1") + os.Setenv("NODE_NAME", "test-node-2") + + provider := NewProvider() + if !provider.IsEnabled() { + t.Error("Provider should be enabled when KUBERNETES_SERVICE_HOST is set") + } + }) + + t.Run("AddToMap functionality", func(t *testing.T) { + os.Setenv("DEPLOYMENT_MODE", "kubernetes") + os.Setenv("NODE_NAME", "test-node") + os.Setenv("POD_NAME", "test-pod") + os.Setenv("POD_NAMESPACE", "test-ns") + + provider := NewProvider() + data := make(map[string]interface{}) + + provider.AddToMap(data) + + if data["k8s_node_name"] != "test-node" { + t.Errorf("Expected k8s_node_name 'test-node', got '%v'", data["k8s_node_name"]) + } + + if data["k8s_pod_name"] != "test-pod" { + t.Errorf("Expected k8s_pod_name 'test-pod', got '%v'", data["k8s_pod_name"]) + } + + if data["k8s_namespace"] != "test-ns" { + t.Errorf("Expected k8s_namespace 'test-ns', got '%v'", data["k8s_namespace"]) + } + }) +} diff --git a/internal/programs/base.go b/internal/programs/base.go index cef0b44..749801e 100644 --- a/internal/programs/base.go +++ b/internal/programs/base.go @@ -76,7 +76,12 @@ func (p *BaseProgram) Load(ctx context.Context) error { logger.Debugf("Loading eBPF program %s from %s", p.name, p.objectPath) - collection, err := ebpf.LoadCollection(p.objectPath) + spec, err := ebpf.LoadCollectionSpec(p.objectPath) + if err != nil { + return fmt.Errorf("failed to load eBPF collection spec: %w", err) + } + + collection, err := ebpf.NewCollection(spec) if err != nil { return fmt.Errorf("failed to load eBPF collection: %w", err) } diff --git a/internal/storage/forwarding.go b/internal/storage/forwarding.go new file mode 100644 index 0000000..1581916 --- /dev/null +++ b/internal/storage/forwarding.go @@ -0,0 +1,51 @@ +package storage + +import ( + "context" + + "github.com/srodi/ebpf-server/internal/client" + "github.com/srodi/ebpf-server/internal/core" + "github.com/srodi/ebpf-server/pkg/logger" +) + +// ForwardingStorage wraps another storage and forwards events to an aggregator. +type ForwardingStorage struct { + primary core.EventSink + aggregatorClient *client.AggregatorClient +} + +// NewForwardingStorage creates a new forwarding storage. +func NewForwardingStorage(primary core.EventSink, aggregatorClient *client.AggregatorClient) *ForwardingStorage { + return &ForwardingStorage{ + primary: primary, + aggregatorClient: aggregatorClient, + } +} + +// Store saves an event to primary storage and forwards to aggregator. +func (s *ForwardingStorage) Store(ctx context.Context, event core.Event) error { + // Store in primary storage + if err := s.primary.Store(ctx, event); err != nil { + return err + } + + // Forward to aggregator if enabled + if s.aggregatorClient != nil && s.aggregatorClient.IsEnabled() { + if err := s.aggregatorClient.SendEvent(ctx, event); err != nil { + logger.Debugf("Failed to forward event to aggregator: %v", err) + // Don't return error - primary storage succeeded + } + } + + return nil +} + +// Query delegates to primary storage. +func (s *ForwardingStorage) Query(ctx context.Context, query core.Query) ([]core.Event, error) { + return s.primary.Query(ctx, query) +} + +// Count delegates to primary storage. +func (s *ForwardingStorage) Count(ctx context.Context, query core.Query) (int, error) { + return s.primary.Count(ctx, query) +} diff --git a/internal/system/system.go b/internal/system/system.go index 8750f2b..2673fc6 100644 --- a/internal/system/system.go +++ b/internal/system/system.go @@ -5,6 +5,7 @@ import ( "context" "fmt" + "github.com/srodi/ebpf-server/internal/client" "github.com/srodi/ebpf-server/internal/core" "github.com/srodi/ebpf-server/internal/programs" "github.com/srodi/ebpf-server/internal/programs/connection" @@ -15,18 +16,24 @@ import ( // System is the main orchestrator for the eBPF monitoring system. type System struct { - manager core.Manager - storage core.EventSink + manager core.Manager + storage core.EventSink + aggregatorClient *client.AggregatorClient } // NewSystem creates a new eBPF monitoring system. func NewSystem() *System { manager := programs.NewManager() memStorage := storage.NewMemoryStorage() + aggregatorClient := client.NewAggregatorClient() + + // Wrap storage with forwarding to aggregator + forwardingStorage := storage.NewForwardingStorage(memStorage, aggregatorClient) return &System{ - manager: manager, - storage: memStorage, + manager: manager, + storage: forwardingStorage, + aggregatorClient: aggregatorClient, } } @@ -88,6 +95,13 @@ func (s *System) Stop(ctx context.Context) error { storageWithSink.Close() } + // Close aggregator client + if s.aggregatorClient != nil { + if err := s.aggregatorClient.Close(); err != nil { + logger.Errorf("Failed to close aggregator client: %v", err) + } + } + // Detach all programs if err := s.manager.DetachAll(ctx); err != nil { return fmt.Errorf("failed to detach programs: %w", err) diff --git a/kind-config.yaml b/kind-config.yaml new file mode 100644 index 0000000..5754c46 --- /dev/null +++ b/kind-config.yaml @@ -0,0 +1,33 @@ +# Kind cluster configuration for eBPF monitoring testing +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 + +# Configure containerd registry mirror for local registry +containerdConfigPatches: +- |- + [plugins."io.containerd.grpc.v1.cri".registry.mirrors."localhost:5001"] + endpoint = ["http://kind-registry:5000"] + +# Cluster nodes configuration +nodes: +# Control plane node with ingress support +- role: control-plane + kubeadmConfigPatches: + - | + kind: InitConfiguration + nodeRegistration: + kubeletExtraArgs: + node-labels: "ingress-ready=true" + extraPortMappings: + # Aggregator API port + - containerPort: 30080 + hostPort: 8081 + protocol: TCP + # Agent monitoring port + - containerPort: 30081 + hostPort: 8080 + protocol: TCP + +# Worker nodes for testing DaemonSet deployment +- role: worker +- role: worker diff --git a/kubernetes/README.md b/kubernetes/README.md new file mode 100644 index 0000000..4104725 --- /dev/null +++ b/kubernetes/README.md @@ -0,0 +1,295 @@ +# eBPF Monitor - Kubernetes Deployment Guide + +This guide explains how to deploy the eBPF monitoring system in Kubernetes alongside the existing VM deployment option. + +## Architecture Overview + +The Kubernetes deployment consists of two main components: + +1. **eBPF Monitor Agent (DaemonSet)**: Runs on every node to collect eBPF events with Kubernetes metadata +2. **eBPF Aggregator (Deployment)**: Centralized service that collects and aggregates events from all agents + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Kubernetes Cluster β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ Node 1 β”‚ β”‚ Node 2 β”‚ β”‚ Node N β”‚ β”‚ +β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ +β”‚ β”‚ β”‚ eBPF β”‚ β”‚ β”‚ β”‚ eBPF β”‚ β”‚ β”‚ β”‚ eBPF β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚ Agent β”‚ β”‚ β”‚ β”‚ Agent β”‚ β”‚ β”‚ β”‚ Agent β”‚ β”‚ β”‚ +β”‚ β”‚ β””β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”˜ β”‚ β”‚ β””β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”˜ β”‚ β”‚ β””β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”˜ β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ β”‚ β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ eBPF β”‚ β”‚ +β”‚ β”‚ Aggregatorβ”‚ β”‚ +β”‚ β”‚ β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +## Features + +- **Dual Deployment**: Works both on VMs (unchanged) and Kubernetes +- **Kubernetes Metadata**: Events include node name, pod name, and namespace +- **DaemonSet Architecture**: Ensures monitoring on every cluster node +- **Centralized Aggregation**: Single API endpoint for all cluster events +- **Backward Compatible**: Existing VM deployments continue to work unchanged + +## Quick Start + +### Prerequisites + +- Kubernetes cluster (1.20+) +- `kubectl` configured for your cluster +- Docker registry access (or use local registry) +- `make` and `docker` installed + +### Option 1: Using the Deployment Script (Recommended) + +```bash +# Build, push, and deploy everything +./scripts/deploy.sh all --registry your-registry.com --tag v1.0.0 + +# Or step by step: +./scripts/deploy.sh build +./scripts/deploy.sh push --registry your-registry.com +./scripts/deploy.sh deploy +``` + +### Option 2: Using Make Targets + +```bash +# Build Docker images +make docker-build + +# Push to registry (update REGISTRY variable) +make docker-push REGISTRY=your-registry.com TAG=v1.0.0 + +# Deploy to Kubernetes +make k8s-deploy +``` + +### Option 3: Manual kubectl + +```bash +# Apply manifests in order +kubectl apply -f kubernetes/namespace.yaml +kubectl apply -f kubernetes/rbac.yaml +kubectl apply -f kubernetes/configmap.yaml +kubectl apply -f kubernetes/services.yaml +kubectl apply -f kubernetes/aggregator-deployment.yaml +kubectl apply -f kubernetes/daemonset.yaml +``` + +## Configuration + +### Environment Variables + +The system detects Kubernetes environment automatically but can be controlled with: + +| Variable | Description | Default | +|----------|-------------|---------| +| `DEPLOYMENT_MODE` | Set to "kubernetes" to force K8s mode | Auto-detected | +| `NODE_NAME` | Kubernetes node name | From downward API | +| `POD_NAME` | Pod name | From downward API | +| `POD_NAMESPACE` | Pod namespace | From downward API | +| `AGGREGATOR_URL` | Aggregator service URL | Auto-configured | + +### Resource Limits + +Default resource limits per pod: + +**Agent (per node):** +- CPU: 100m request, 500m limit +- Memory: 128Mi request, 512Mi limit + +**Aggregator:** +- CPU: 200m request, 1000m limit +- Memory: 256Mi request, 1Gi limit + +Adjust in the manifest files as needed for your cluster size. + +## API Access + +### Aggregator API + +The aggregator provides a unified API for all cluster events: + +```bash +# Port-forward to access aggregator +kubectl port-forward -n ebpf-system svc/ebpf-aggregator 8081:8081 + +# Query all events +curl http://localhost:8081/api/events + +# Query by type +curl http://localhost:8081/api/events?type=connection + +# Query by node +curl 'http://localhost:8081/api/events' | jq '.events[] | select(.k8s_node_name=="node1")' + +# Get aggregation stats +curl http://localhost:8081/api/stats +``` + +### Individual Agent APIs + +Each agent also exposes its local API: + +```bash +# List all agent pods +kubectl get pods -n ebpf-system -l app=ebpf-monitor + +# Port-forward to specific agent +kubectl port-forward -n ebpf-system ebpf-monitor-xxxxx 8080:8080 + +# Query local events +curl http://localhost:8080/api/events +``` + +## Event Format with Kubernetes Metadata + +Events in Kubernetes mode include additional metadata: + +```json +{ + "id": "1a2b3c4d5e6f7890", + "type": "connection", + "pid": 1234, + "command": "curl", + "timestamp": 1703764800000000000, + "time": "2023-12-28T12:00:00.000000000Z", + "k8s_node_name": "worker-node-1", + "k8s_pod_name": "ebpf-monitor-abcde", + "k8s_namespace": "ebpf-system", + "source_ip": "10.244.1.5", + "dest_ip": "142.250.191.14", + "dest_port": 443 +} +``` + +## Monitoring and Troubleshooting + +### Check Deployment Status + +```bash +# Quick status check +./scripts/deploy.sh status + +# Or manually: +kubectl get pods -n ebpf-system +kubectl get daemonset -n ebpf-system +kubectl get deployment -n ebpf-system +``` + +### View Logs + +```bash +# Aggregator logs +kubectl logs -l app=ebpf-aggregator -n ebpf-system -f + +# Agent logs from all nodes +kubectl logs -l app=ebpf-monitor -n ebpf-system --tail=50 + +# Specific agent logs +kubectl logs -n ebpf-system ebpf-monitor-xxxxx -f +``` + +### Common Issues + +**1. Agents not starting:** +- Check if nodes have required kernel headers: `kubectl describe pod -n ebpf-system` +- Verify privileged security context is allowed + +**2. No events from agents:** +- Check eBPF program loading: `kubectl logs -n ebpf-system ebpf-monitor-xxxxx` +- Verify kernel version compatibility (4.18+ required) + +**3. Aggregator connection issues:** +- Check service DNS resolution: `kubectl exec -n ebpf-system deploy/ebpf-aggregator -- nslookup ebpf-aggregator` +- Verify network policies don't block communication + +## Scaling Considerations + +### Large Clusters (100+ nodes) + +1. **Increase aggregator resources:** + ```yaml + resources: + requests: + cpu: 500m + memory: 1Gi + limits: + cpu: 2000m + memory: 4Gi + ``` + +2. **Configure event batching** (environment variables in daemonset): + ```yaml + - name: BATCH_SIZE + value: "100" + - name: FLUSH_INTERVAL + value: "30s" + ``` + +3. **Consider multiple aggregator replicas** for high availability: + ```yaml + spec: + replicas: 2 # Add load balancer configuration + ``` + +## VM Compatibility + +The system maintains full backward compatibility with VM deployments: + +```bash +# VM deployment (unchanged) +make build +sudo ./bin/ebpf-server + +# Events in VM mode (no Kubernetes metadata): +{ + "id": "1a2b3c4d5e6f7890", + "type": "connection", + "pid": 1234, + "command": "curl", + "timestamp": 1703764800000000000, + "time": "2023-12-28T12:00:00.000000000Z", + "source_ip": "192.168.1.100", + "dest_ip": "142.250.191.14", + "dest_port": 443 +} +``` + +## Cleanup + +```bash +# Remove everything +./scripts/deploy.sh undeploy + +# Or manually: +make k8s-undeploy +``` + +## Security Considerations + +- Agents run with privileged security context (required for eBPF) +- RBAC limits permissions to necessary resources only +- Network policies can be added to restrict aggregator access +- Consider Pod Security Standards in restricted environments + +## Performance + +Expected overhead per node: +- CPU: ~50-100m under normal load +- Memory: ~100-200Mi +- Network: ~1-5MB/min to aggregator (depends on event volume) + +The aggregator provides significant benefits: +- Single API endpoint for cluster-wide monitoring +- Event correlation across nodes +- Reduced client connections to individual agents +- Centralized storage and querying diff --git a/kubernetes/aggregator-deployment.yaml b/kubernetes/aggregator-deployment.yaml new file mode 100644 index 0000000..99add8e --- /dev/null +++ b/kubernetes/aggregator-deployment.yaml @@ -0,0 +1,52 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ebpf-aggregator + namespace: ebpf-system + labels: + app: ebpf-aggregator + component: aggregator +spec: + replicas: 1 + selector: + matchLabels: + app: ebpf-aggregator + component: aggregator + template: + metadata: + labels: + app: ebpf-aggregator + component: aggregator + spec: + serviceAccount: ebpf-aggregator + containers: + - name: aggregator + image: ebpf-aggregator:latest + imagePullPolicy: Always + ports: + - containerPort: 8081 + name: http + env: + - name: DEPLOYMENT_MODE + value: "kubernetes" + - name: LOG_LEVEL + value: "info" + resources: + requests: + memory: "256Mi" + cpu: "200m" + limits: + memory: "1Gi" + cpu: "1000m" + livenessProbe: + httpGet: + path: /health + port: 8081 + initialDelaySeconds: 30 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /health + port: 8081 + initialDelaySeconds: 5 + periodSeconds: 5 diff --git a/kubernetes/configmap.yaml b/kubernetes/configmap.yaml new file mode 100644 index 0000000..41259c8 --- /dev/null +++ b/kubernetes/configmap.yaml @@ -0,0 +1,23 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: ebpf-config + namespace: ebpf-system +data: + config.yaml: | + # eBPF Monitor Configuration + server: + port: 8080 + debug: false + + aggregator: + url: "http://ebpf-aggregator.ebpf-system.svc.cluster.local:8081" + timeout: "30s" + retry_attempts: 3 + + kubernetes: + enabled: true + metadata: + include_node_name: true + include_pod_name: true + include_namespace: true diff --git a/kubernetes/daemonset.yaml b/kubernetes/daemonset.yaml new file mode 100644 index 0000000..7547f15 --- /dev/null +++ b/kubernetes/daemonset.yaml @@ -0,0 +1,95 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: ebpf-monitor + namespace: ebpf-system + labels: + app: ebpf-monitor + component: agent +spec: + selector: + matchLabels: + app: ebpf-monitor + component: agent + template: + metadata: + labels: + app: ebpf-monitor + component: agent + spec: + serviceAccount: ebpf-monitor + hostNetwork: true + hostPID: true + dnsPolicy: ClusterFirstWithHostNet + tolerations: + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule + containers: + - name: ebpf-monitor + image: ebpf-monitor:latest + imagePullPolicy: Always + securityContext: + privileged: true + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: DEPLOYMENT_MODE + value: "kubernetes" + - name: AGGREGATOR_URL + value: "http://ebpf-aggregator.ebpf-system.svc.cluster.local:8081" + ports: + - containerPort: 8080 + name: http + resources: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "512Mi" + cpu: "500m" + volumeMounts: + - name: proc + mountPath: /host/proc + readOnly: true + - name: sys + mountPath: /host/sys + readOnly: true + - name: debugfs + mountPath: /sys/kernel/debug + livenessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 30 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 5 + periodSeconds: 5 + volumes: + - name: proc + hostPath: + path: /proc + - name: sys + hostPath: + path: /sys + - name: debugfs + hostPath: + path: /sys/kernel/debug + terminationGracePeriodSeconds: 30 diff --git a/kubernetes/namespace.yaml b/kubernetes/namespace.yaml new file mode 100644 index 0000000..248f6a1 --- /dev/null +++ b/kubernetes/namespace.yaml @@ -0,0 +1,6 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: ebpf-system + labels: + name: ebpf-system diff --git a/kubernetes/rbac.yaml b/kubernetes/rbac.yaml new file mode 100644 index 0000000..5c35117 --- /dev/null +++ b/kubernetes/rbac.yaml @@ -0,0 +1,61 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: ebpf-monitor + namespace: ebpf-system +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: ebpf-aggregator + namespace: ebpf-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: ebpf-monitor +rules: +- apiGroups: [""] + resources: ["nodes", "pods"] + verbs: ["get", "list", "watch"] +- apiGroups: [""] + resources: ["events"] + verbs: ["create"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: ebpf-monitor +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: ebpf-monitor +subjects: +- kind: ServiceAccount + name: ebpf-monitor + namespace: ebpf-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: ebpf-aggregator +rules: +- apiGroups: [""] + resources: ["nodes", "pods"] + verbs: ["get", "list", "watch"] +- apiGroups: [""] + resources: ["events"] + verbs: ["create", "get", "list"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: ebpf-aggregator +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: ebpf-aggregator +subjects: +- kind: ServiceAccount + name: ebpf-aggregator + namespace: ebpf-system diff --git a/kubernetes/services.yaml b/kubernetes/services.yaml new file mode 100644 index 0000000..c68637f --- /dev/null +++ b/kubernetes/services.yaml @@ -0,0 +1,38 @@ +apiVersion: v1 +kind: Service +metadata: + name: ebpf-aggregator + namespace: ebpf-system + labels: + app: ebpf-aggregator + component: aggregator +spec: + type: ClusterIP + ports: + - name: http + port: 8081 + targetPort: 8081 + protocol: TCP + selector: + app: ebpf-aggregator + component: aggregator +--- +apiVersion: v1 +kind: Service +metadata: + name: ebpf-monitor + namespace: ebpf-system + labels: + app: ebpf-monitor + component: agent +spec: + type: ClusterIP + clusterIP: None # Headless service for DaemonSet + ports: + - name: http + port: 8080 + targetPort: 8080 + protocol: TCP + selector: + app: ebpf-monitor + component: agent diff --git a/scripts/create-kind-cluster.sh b/scripts/create-kind-cluster.sh new file mode 100755 index 0000000..95c03a4 --- /dev/null +++ b/scripts/create-kind-cluster.sh @@ -0,0 +1,42 @@ +#!/bin/bash +set -e + +# Configuration from environment or defaults +KIND_CLUSTER_NAME=${KIND_CLUSTER_NAME:-ebpf-test} +KIND_REGISTRY_NAME=${KIND_REGISTRY_NAME:-kind-registry} +KIND_REGISTRY_PORT=${KIND_REGISTRY_PORT:-5001} + +echo "πŸš€ Creating kind cluster with local registry..." + +# Check if cluster already exists +if kind get clusters | grep -q "^${KIND_CLUSTER_NAME}$"; then + echo "βœ… Cluster ${KIND_CLUSTER_NAME} already exists" + exit 0 +fi + +echo "πŸ“¦ Creating local container registry..." +docker run -d --restart=always \ + -p "127.0.0.1:${KIND_REGISTRY_PORT}:5000" \ + --name "${KIND_REGISTRY_NAME}" \ + registry:2 || true + +echo "πŸ—οΈ Creating kind cluster..." +kind create cluster --name "${KIND_CLUSTER_NAME}" --config kind-config.yaml + +echo "πŸ”— Connecting registry to cluster network..." +docker network connect "kind" "${KIND_REGISTRY_NAME}" || true + +echo "πŸ“‹ Documenting local registry..." +kubectl apply -f - < kubernetes/daemonset.yaml + +# Backup and update aggregator manifest +cp kubernetes/aggregator-deployment.yaml kubernetes/aggregator-deployment.yaml.bak +sed "s|ebpf-aggregator:latest|localhost:${KIND_REGISTRY_PORT}/${AGGREGATOR_NAME}:${TAG}|g" \ + kubernetes/aggregator-deployment.yaml.bak > kubernetes/aggregator-deployment.yaml + +# Apply manifests in order +echo "πŸ“‹ Applying manifests..." +kubectl apply -f kubernetes/namespace.yaml +kubectl apply -f kubernetes/rbac.yaml +kubectl apply -f kubernetes/configmap.yaml +kubectl apply -f kubernetes/services.yaml +kubectl apply -f kubernetes/aggregator-deployment.yaml +kubectl apply -f kubernetes/daemonset.yaml + +# Restore original manifests +echo "πŸ”„ Restoring original manifests..." +mv kubernetes/daemonset.yaml.bak kubernetes/daemonset.yaml +mv kubernetes/aggregator-deployment.yaml.bak kubernetes/aggregator-deployment.yaml + +echo "⏳ Waiting for deployments to be ready..." +kubectl wait --for=condition=available --timeout=300s deployment/ebpf-aggregator -n ebpf-system +kubectl rollout status daemonset/ebpf-monitor -n ebpf-system --timeout=300s + +echo "βœ… Deployment completed!" diff --git a/scripts/deploy.sh b/scripts/deploy.sh new file mode 100755 index 0000000..0461ab3 --- /dev/null +++ b/scripts/deploy.sh @@ -0,0 +1,149 @@ +#!/bin/bash +set -e + +# eBPF Monitor Kubernetes Deployment Script + +REGISTRY=${REGISTRY:-"localhost:5000"} +TAG=${TAG:-"latest"} +NAMESPACE=${NAMESPACE:-"ebpf-system"} + +function usage() { + echo "Usage: $0 [COMMAND] [OPTIONS]" + echo "" + echo "Commands:" + echo " build Build Docker images" + echo " push Push Docker images to registry" + echo " deploy Deploy to Kubernetes" + echo " undeploy Remove from Kubernetes" + echo " status Check deployment status" + echo " logs Show logs" + echo " all Build, push, and deploy" + echo "" + echo "Options:" + echo " --registry REGISTRY Docker registry (default: $REGISTRY)" + echo " --tag TAG Image tag (default: $TAG)" + echo " --namespace NS Kubernetes namespace (default: $NAMESPACE)" + echo "" + echo "Examples:" + echo " $0 all --registry my-registry.com --tag v1.0.0" + echo " $0 deploy --namespace monitoring" + exit 1 +} + +function build() { + echo "πŸ”¨ Building Docker images..." + make docker-build REGISTRY=$REGISTRY TAG=$TAG +} + +function push() { + echo "πŸ“€ Pushing Docker images..." + make docker-push REGISTRY=$REGISTRY TAG=$TAG +} + +function deploy() { + echo "πŸš€ Deploying to Kubernetes namespace: $NAMESPACE..." + + # Update image references in manifests if needed + if [[ "$REGISTRY" != "localhost:5000" ]] || [[ "$TAG" != "latest" ]]; then + echo "πŸ“ Updating image references..." + sed -i.bak "s|ebpf-monitor:latest|$REGISTRY/ebpf-server:$TAG|g" kubernetes/daemonset.yaml + sed -i.bak "s|ebpf-aggregator:latest|$REGISTRY/ebpf-aggregator:$TAG|g" kubernetes/aggregator-deployment.yaml + fi + + make k8s-deploy + + # Restore original manifests + if [[ -f kubernetes/daemonset.yaml.bak ]]; then + mv kubernetes/daemonset.yaml.bak kubernetes/daemonset.yaml + mv kubernetes/aggregator-deployment.yaml.bak kubernetes/aggregator-deployment.yaml + fi +} + +function undeploy() { + echo "πŸ—‘οΈ Removing from Kubernetes..." + make k8s-undeploy +} + +function status() { + echo "πŸ“Š Checking deployment status..." + make k8s-status +} + +function logs() { + echo "πŸ“‹ Showing logs..." + echo "" + echo "Aggregator logs:" + make k8s-logs & + sleep 2 + echo "" + echo "Agent logs (first 50 lines):" + make k8s-logs-agents +} + +function all() { + build + push + deploy +} + +# Parse command line arguments +COMMAND="" +while [[ $# -gt 0 ]]; do + case $1 in + build|push|deploy|undeploy|status|logs|all) + COMMAND="$1" + shift + ;; + --registry) + REGISTRY="$2" + shift 2 + ;; + --tag) + TAG="$2" + shift 2 + ;; + --namespace) + NAMESPACE="$2" + shift 2 + ;; + -h|--help) + usage + ;; + *) + echo "Unknown option: $1" + usage + ;; + esac +done + +if [[ -z "$COMMAND" ]]; then + echo "Error: No command specified" + usage +fi + +# Execute command +case $COMMAND in + build) + build + ;; + push) + push + ;; + deploy) + deploy + ;; + undeploy) + undeploy + ;; + status) + status + ;; + logs) + logs + ;; + all) + all + ;; +esac + +echo "βœ… Command '$COMMAND' completed successfully!" diff --git a/scripts/load-kind-images.sh b/scripts/load-kind-images.sh new file mode 100755 index 0000000..7963a80 --- /dev/null +++ b/scripts/load-kind-images.sh @@ -0,0 +1,23 @@ +#!/bin/bash +set -e + +# Configuration from environment or defaults +KIND_CLUSTER_NAME=${KIND_CLUSTER_NAME:-ebpf-test} +KIND_REGISTRY_NAME=${KIND_REGISTRY_NAME:-kind-registry} +KIND_REGISTRY_PORT=${KIND_REGISTRY_PORT:-5001} +BINARY_NAME=${BINARY_NAME:-ebpf-server} +AGGREGATOR_NAME=${AGGREGATOR_NAME:-ebpf-aggregator} +TAG=${TAG:-latest} + +echo "πŸ“¦ Loading Docker images into kind cluster..." + +# Build images with localhost registry tag for kind +docker tag "localhost:5000/${BINARY_NAME}:${TAG}" "localhost:${KIND_REGISTRY_PORT}/${BINARY_NAME}:${TAG}" +docker tag "localhost:5000/${AGGREGATOR_NAME}:${TAG}" "localhost:${KIND_REGISTRY_PORT}/${AGGREGATOR_NAME}:${TAG}" + +# Push to local registry +echo "Pushing images to local registry..." +docker push "localhost:${KIND_REGISTRY_PORT}/${BINARY_NAME}:${TAG}" +docker push "localhost:${KIND_REGISTRY_PORT}/${AGGREGATOR_NAME}:${TAG}" + +echo "βœ… Images pushed to kind local registry" diff --git a/scripts/test-kind-deployment.sh b/scripts/test-kind-deployment.sh new file mode 100755 index 0000000..013419f --- /dev/null +++ b/scripts/test-kind-deployment.sh @@ -0,0 +1,56 @@ +#!/bin/bash +set -e + +echo "πŸ§ͺ Testing eBPF system on Kind cluster..." + +# Configuration +NAMESPACE=${NAMESPACE:-ebpf-system} + +echo "πŸ“Š Checking deployment status..." + +# Check aggregator deployment +echo "Aggregator deployment status:" +kubectl get deployment ebpf-aggregator -n ${NAMESPACE} + +# Check daemonset +echo "DaemonSet status:" +kubectl get daemonset ebpf-monitor -n ${NAMESPACE} + +# Get pods +echo "πŸ“‹ Pods:" +kubectl get pods -n ${NAMESPACE} + +# Check aggregator logs +echo "πŸ” Aggregator logs (last 20 lines):" +kubectl logs -l app=ebpf-aggregator -n ${NAMESPACE} --tail=20 + +# Check DaemonSet logs +echo "πŸ” Monitor logs (last 20 lines from one pod):" +POD_NAME=$(kubectl get pods -n ${NAMESPACE} -l app=ebpf-monitor -o jsonpath='{.items[0].metadata.name}') +if [ ! -z "$POD_NAME" ]; then + kubectl logs $POD_NAME -n ${NAMESPACE} --tail=20 +fi + +# Test aggregator API +echo "🌐 Testing aggregator API..." +kubectl port-forward service/ebpf-aggregator-service 8080:8080 -n ${NAMESPACE} & +PORT_FORWARD_PID=$! + +sleep 3 + +# Test health endpoint +echo "Health check:" +curl -s http://localhost:8080/health || echo "Health endpoint not accessible" + +# Test events endpoint +echo "Events check:" +curl -s http://localhost:8080/events | jq '.' || echo "Events endpoint not accessible" + +# Test stats endpoint +echo "Stats check:" +curl -s http://localhost:8080/stats | jq '.' || echo "Stats endpoint not accessible" + +# Cleanup port-forward +kill $PORT_FORWARD_PID 2>/dev/null || true + +echo "βœ… Testing completed!" diff --git a/scripts/test-kind.sh b/scripts/test-kind.sh new file mode 100755 index 0000000..4325f5a --- /dev/null +++ b/scripts/test-kind.sh @@ -0,0 +1,198 @@ +#!/bin/bash +set -e + +echo "πŸ§ͺ eBPF Monitoring Kind Cluster Test Suite" +echo "==========================================" + +# Configuration +NAMESPACE="ebpf-system" +AGGREGATOR_SERVICE="ebpf-aggregator" +PORT=8081 + +# Function to check if pods are ready +check_pods_ready() { + echo "⏳ Waiting for all pods to be ready..." + kubectl wait --for=condition=ready --timeout=300s pod -l app=ebpf-aggregator -n $NAMESPACE + kubectl wait --for=condition=ready --timeout=300s pod -l app=ebpf-monitor -n $NAMESPACE + echo "βœ… All pods are ready" +} + +# Function to test aggregator APIs +test_apis() { + echo "πŸ” Testing aggregator APIs..." + + # Start port forwarding in background + kubectl port-forward -n $NAMESPACE svc/$AGGREGATOR_SERVICE $PORT:$PORT > /dev/null 2>&1 & + PF_PID=$! + + # Wait for port forward to be ready + sleep 5 + + # Test health endpoint + echo "Testing health endpoint..." + if curl -s -f http://localhost:$PORT/health > /dev/null; then + echo "βœ… Health endpoint working" + curl -s http://localhost:$PORT/health | jq . + else + echo "❌ Health endpoint failed" + return 1 + fi + + echo "" + + # Test events endpoint + echo "Testing events endpoint..." + if curl -s -f http://localhost:$PORT/api/events > /dev/null; then + echo "βœ… Events endpoint working" + echo "Sample response:" + curl -s http://localhost:$PORT/api/events | head -c 300 + echo "..." + else + echo "❌ Events endpoint failed" + fi + + echo "" + + # Test stats endpoint + echo "Testing stats endpoint..." + if curl -s -f http://localhost:$PORT/api/stats > /dev/null; then + echo "βœ… Stats endpoint working" + curl -s http://localhost:$PORT/api/stats | jq . + else + echo "❌ Stats endpoint failed" + fi + + # Clean up port forwarding + kill $PF_PID > /dev/null 2>&1 || true + + echo "" +} + +# Function to generate some network activity for testing +generate_test_events() { + echo "🌐 Generating network activity to test eBPF monitoring..." + + # Get one of the agent pods to exec into + AGENT_POD=$(kubectl get pods -n $NAMESPACE -l app=ebpf-monitor -o jsonpath='{.items[0].metadata.name}') + + if [[ -z "$AGENT_POD" ]]; then + echo "❌ No agent pods found" + return 1 + fi + + echo "Using agent pod: $AGENT_POD" + + # Generate some network connections + echo "Making HTTP requests to generate connection events..." + kubectl exec -n $NAMESPACE $AGENT_POD -- curl -s -m 5 http://httpbin.org/get > /dev/null 2>&1 || true + kubectl exec -n $NAMESPACE $AGENT_POD -- curl -s -m 5 https://httpbin.org/get > /dev/null 2>&1 || true + kubectl exec -n $NAMESPACE $AGENT_POD -- nslookup google.com > /dev/null 2>&1 || true + + echo "βœ… Test network activity generated" + + # Give some time for events to be processed + sleep 5 +} + +# Function to check for events +check_events() { + echo "πŸ“Š Checking for captured events..." + + # Start port forwarding in background + kubectl port-forward -n $NAMESPACE svc/$AGGREGATOR_SERVICE $PORT:$PORT > /dev/null 2>&1 & + PF_PID=$! + + # Wait for port forward to be ready + sleep 3 + + # Query events + echo "Querying recent events..." + EVENTS_RESPONSE=$(curl -s http://localhost:$PORT/api/events?limit=5) + + if [[ -n "$EVENTS_RESPONSE" ]]; then + echo "βœ… Events captured successfully" + echo "$EVENTS_RESPONSE" | jq . || echo "$EVENTS_RESPONSE" + else + echo "⚠️ No events found (this might be expected in some environments)" + fi + + # Clean up port forwarding + kill $PF_PID > /dev/null 2>&1 || true + + echo "" +} + +# Function to show cluster status +show_status() { + echo "πŸ“‹ Cluster Status" + echo "==================" + + echo "Nodes:" + kubectl get nodes -o wide + + echo "" + echo "Pods in $NAMESPACE:" + kubectl get pods -n $NAMESPACE -o wide + + echo "" + echo "Services in $NAMESPACE:" + kubectl get services -n $NAMESPACE + + echo "" + echo "DaemonSet status:" + kubectl get daemonset -n $NAMESPACE + + echo "" + echo "Deployment status:" + kubectl get deployment -n $NAMESPACE + + echo "" +} + +# Function to show logs +show_logs() { + echo "πŸ“ Recent Logs" + echo "==============" + + echo "Aggregator logs:" + kubectl logs -n $NAMESPACE -l app=ebpf-aggregator --tail=20 --timestamps + + echo "" + echo "Agent logs (from first pod):" + AGENT_POD=$(kubectl get pods -n $NAMESPACE -l app=ebpf-monitor -o jsonpath='{.items[0].metadata.name}') + if [[ -n "$AGENT_POD" ]]; then + kubectl logs -n $NAMESPACE $AGENT_POD --tail=20 --timestamps + fi + + echo "" +} + +# Main test execution +main() { + echo "Starting comprehensive test suite..." + echo "" + + show_status + check_pods_ready + test_apis + generate_test_events + check_events + show_logs + + echo "πŸŽ‰ Test suite completed!" + echo "" + echo "πŸ’‘ To manually test:" + echo " kubectl port-forward -n $NAMESPACE svc/$AGGREGATOR_SERVICE $PORT:$PORT" + echo " curl http://localhost:$PORT/health" + echo " curl http://localhost:$PORT/api/events" +} + +# Check if running in automation or interactive mode +if [[ "${1:-}" == "--automated" ]]; then + # In automated mode, exit on any failure + main +else + # In interactive mode, continue on failures for debugging + set +e + main +fi From 1cf27e019183f163889e17caa6294e6a7dafc010 Mon Sep 17 00:00:00 2001 From: Simone Rodigari Date: Tue, 12 Aug 2025 21:16:09 +0100 Subject: [PATCH 2/7] update README.md --- README.md | 417 +++++++++++++----------------------------------------- 1 file changed, 101 insertions(+), 316 deletions(-) diff --git a/README.md b/README.md index b00daba..6ea9f0d 100644 --- a/README.md +++ b/README.md @@ -1,30 +1,6 @@ # eBPF Network Monitor -[### Kubernetes Deployment (New!) -```bash -# Quick deployment with built-in script -./scripts/deploy.sh all --registry your-registry.com - -# Or step by step -make docker-build -make docker-push REGISTRY=your-registry.com -make k8s-deploy -``` - -### Local Testing with Kind -```bash -# Test locally with Kind (Kubernetes in Docker) -make kind-full-test - -# Or step by step: -make kind-cluster-create # Create local cluster -make kind-deploy # Deploy to kind cluster -make kind-integration-test # Run comprehensive tests -``` - -**πŸ“– [Kubernetes Deployment Guide](kubernetes/README.md)** - Complete setup and configuration guide - -**πŸ§ͺ [Kind Testing Guide](KIND_TESTING.md)** - Local testing with Kind clusterse](https://github.com/srodi/ebpf-server/actions/workflows/ci.yml/badge.svg)](https://github.com/srodi/ebpf-server/actions/workflows/ci.yml) +[![CI](https://github.com/srodi/ebpf-server/actions/workflows/ci.yml/badge.svg)](https://github.com/srodi/ebpf-server/actions/workflows/ci.yml) [![API Documentation](https://img.shields.io/badge/API-Documentation-blue?style=for-the-badge&logo=swagger)](https://petstore.swagger.io/?url=https://raw.githubusercontent.com/srodi/ebpf-server/main/docs/swagger/swagger.json) [![OpenAPI Spec](https://img.shields.io/badge/OpenAPI-3.0-green?style=for-the-badge&logo=openapiinitiative)](docs/swagger.json) [![Go Version](https://img.shields.io/badge/Go-1.23+-00ADD8?style=for-the-badge&logo=go)](https://golang.org) @@ -33,21 +9,10 @@ A modular eBPF monitoring system with HTTP API server for real-time network and ## πŸš€ Deployment Options -### VM Deployment (Traditional) -```bash -# Install dependencies (Ubuntu/Debian) -sudo apt install -y golang-go clang libbpf-dev linux-headers-$(uname -r) - -# Build and run -make build -sudo ./bin/ebpf-server +### Kubernetes Deployment (Recommended) -# Test the API -curl http://localhost:8080/health -curl "http://localhost:8080/api/events?type=connection&limit=10" -``` +Deploy across your entire Kubernetes cluster with automatic node metadata enrichment: -### Kubernetes Deployment (New!) ```bash # Quick deployment with built-in script ./scripts/deploy.sh all --registry your-registry.com @@ -58,270 +23,46 @@ make docker-push REGISTRY=your-registry.com make k8s-deploy ``` -**οΏ½ [Kubernetes Deployment Guide](kubernetes/README.md)** - Complete setup and configuration guide - -**οΏ½πŸ“š [View Interactive API Documentation](https://petstore.swagger.io/?url=https://raw.githubusercontent.com/srodi/ebpf-server/main/docs/swagger/swagger.json)** - Test APIs directly in your browser - -## Architecture - -### VM Mode -**Modular, interface-based monitoring system** with clean separation of concerns: - -``` -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ System Layer β”‚ -β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ -β”‚ β”‚ Manager β”‚ β”‚ -β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ -β”‚ β”‚ β”‚ Connection β”‚ β”‚ Packet Drop β”‚ β”‚ Your New β”‚ β”‚ β”‚ -β”‚ β”‚ β”‚ Program β”‚ β”‚ Program β”‚ β”‚ Program β”‚ β”‚ β”‚ -β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ -β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ - β”‚ - β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β” - β”‚ β”‚ - β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β” - β”‚ Event Storage β”‚ β”‚ HTTP API β”‚ - β”‚ (Unified) β”‚ β”‚ Handlers β”‚ - β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ -``` - -### Kubernetes Mode -**Distributed monitoring with centralized aggregation**: - -``` -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ Kubernetes Cluster β”‚ -β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ -β”‚ β”‚ Node 1 β”‚ β”‚ Node 2 β”‚ β”‚ Node N β”‚ β”‚ -β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ -β”‚ β”‚ β”‚ eBPF β”‚ β”‚ β”‚ β”‚ eBPF β”‚ β”‚ β”‚ β”‚ eBPF β”‚ β”‚ β”‚ -β”‚ β”‚ β”‚ Agent β”‚ β”‚ β”‚ β”‚ Agent β”‚ β”‚ β”‚ β”‚ Agent β”‚ β”‚ β”‚ -β”‚ β”‚ β”‚+K8s Metaβ”‚ β”‚ β”‚ β”‚+K8s Metaβ”‚ β”‚ β”‚ β”‚+K8s Metaβ”‚ β”‚ β”‚ -β”‚ β”‚ β””β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”˜ β”‚ β”‚ β””β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”˜ β”‚ β”‚ β””β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”˜ β”‚ β”‚ -β”‚ β””β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ -β”‚ β”‚ β”‚ β”‚ β”‚ -β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ -β”‚ β”‚ β”‚ -β”‚ β”Œβ”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β” β”‚ -β”‚ β”‚ eBPF β”‚ β”‚ -β”‚ β”‚ Aggregator│◄─── Unified API β”‚ -β”‚ β”‚ β”‚ β”‚ -β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ -``` - -**Core Components:** -- **Core Interfaces**: Define contracts for Events, Programs, Managers, and Storage -- **Event System**: Unified event creation, streaming, and storage with `BaseEvent` -- **Program Manager**: Coordinates program lifecycle and provides unified event streams -- **Storage Layer**: Persistent event storage with query capabilities -- **API Layer**: HTTP endpoints for querying events and program status -- **System Layer**: Top-level coordination and initialization +**πŸ“– [Complete Kubernetes Guide](kubernetes/README.md)** - Detailed setup and configuration -## Event Flow Architecture - -The system processes events through a real-time streaming pipeline that ensures low latency and high throughput: +### Local Testing with Kind -``` -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ eBPF Program β”‚ β”‚ Ring Buffer β”‚ β”‚ Event Parser β”‚ β”‚ Event Stream β”‚ -β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ -β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ -β”‚ β”‚ sys_connect β”‚ │───▢│ β”‚ events β”‚ │───▢│ β”‚ Connection β”‚ │───▢│ β”‚ Channel β”‚ β”‚ -β”‚ β”‚ tracepoint β”‚ β”‚ β”‚ β”‚ (16MB) β”‚ β”‚ β”‚ β”‚ Parser β”‚ β”‚ β”‚ β”‚ (buffered) β”‚ β”‚ -β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ -β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ -β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ β”‚ -β”‚ β”‚ kfree_skb β”‚ │───▢│ β”‚drop_events β”‚ │───▢│ β”‚ PacketDrop β”‚ │───▢│ β”‚ -β”‚ β”‚ tracepoint β”‚ β”‚ β”‚ β”‚ (256KB) β”‚ β”‚ β”‚ β”‚ Parser β”‚ β”‚ β”‚ β”‚ -β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ - β”‚ β”‚ β”‚ β”‚ - Kernel Space Ring Buffer Go Application Event Stream - (eBPF Programs) (Temporary) (Event Parsing) (Buffered) - β”‚ β”‚ - β–Ό β–Ό - β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” - β”‚ Always Empty β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ - β”‚ β”‚ β”‚ β”‚ Memory Storage β”‚ β”‚ - β”‚ Events consumed β”‚ β”‚ β”‚ β”‚ β”‚ - β”‚ immediately by β”‚ β”‚ β”‚ β€’ Query Events β”‚ β”‚ - β”‚ userspace readers β”‚ β”‚ β”‚ β€’ Time Filters β”‚ β”‚ - β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ β€’ PID Grouping β”‚ β”‚ - β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ - β”‚ β”‚ β”‚ - β”‚ β–Ό β”‚ - β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ - β”‚ β”‚ HTTP API β”‚ β”‚ - β”‚ β”‚ β”‚ β”‚ - β”‚ β”‚ /api/list- β”‚ β”‚ - β”‚ β”‚ connections β”‚ β”‚ - β”‚ β”‚ β”‚ β”‚ - β”‚ β”‚ /api/list- β”‚ β”‚ - β”‚ β”‚ packet-drops β”‚ β”‚ - β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ - β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +Test the full Kubernetes deployment locally: +```bash +# Full automated test +make kind-full-test +# Or step by step: +make kind-cluster-create # Create local cluster +make kind-deploy # Deploy to kind cluster +make kind-integration-test # Run comprehensive tests ``` -### Ring buffers - -Ring buffers in eBPF are designed for real-time streaming: - -1. **eBPF programs** write events to ring buffers using `bpf_ringbuf_reserve()` and `bpf_ringbuf_submit()` -2. **Userspace readers** immediately consume events using `ringbuf.NewReader()` -3. **Events are parsed** and sent to Go event streams -4. **Ring buffers become empty** as events are consumed in real-time -5. **Events are stored** in memory for API queries - -Events flow through the pipeline without accumulating in kernel space. - -## Extending the System - -πŸ“š **[Complete Development Guide](docs/program-development.md)** - Detailed guide for creating new eBPF monitoring programs - -### Quick Example: Create a New Monitoring Program +### VM Deployment (Traditional) -### 1. Create a New Monitoring Program +For single-server deployments: ```bash -mkdir -p internal/programs/your_monitor -``` - -### 2. Implement Your Program - -Create `internal/programs/your_monitor/your_monitor.go`: - -```go -package your_monitor - -import ( - "context" - "encoding/binary" - "fmt" - - "github.com/srodi/ebpf-server/internal/core" - "github.com/srodi/ebpf-server/internal/events" - "github.com/srodi/ebpf-server/internal/programs" - "github.com/srodi/ebpf-server/pkg/logger" -) - -const ( - ProgramName = "your_monitor" - ProgramDescription = "Monitors your custom events" - ObjectPath = "bpf/your_monitor.o" - TracepointProgram = "trace_your_event" - EventsMapName = "events" -) - -type Program struct { - *programs.BaseProgram -} - -func NewProgram() *Program { - base := programs.NewBaseProgram(ProgramName, ProgramDescription, ObjectPath) - return &Program{BaseProgram: base} -} - -func (p *Program) Attach(ctx context.Context) error { - if !p.IsLoaded() { - return fmt.Errorf("program not loaded") - } - - logger.Debugf("Attaching %s program", ProgramName) - - if err := p.AttachTracepoint("syscalls", "your_event", TracepointProgram); err != nil { - return fmt.Errorf("failed to attach: %w", err) - } - - if err := p.StartEventProcessing(ctx, EventsMapName, p.parseEvent); err != nil { - return fmt.Errorf("failed to start processing: %w", err) - } - - p.SetAttached(true) - return nil -} - -func (p *Program) parseEvent(data []byte) (core.Event, error) { - if len(data) < 24 { - return nil, fmt.Errorf("insufficient data: %d bytes", len(data)) - } - - pid := binary.LittleEndian.Uint32(data[0:4]) - timestamp := binary.LittleEndian.Uint64(data[4:12]) - command := extractNullTerminatedString(data[12:]) - - metadata := map[string]interface{}{ - "custom_field": "custom_value", - } - - return events.NewBaseEvent(ProgramName, pid, command, timestamp, metadata), nil -} -``` -### 3. Register Your Program +# Install dependencies (Ubuntu/Debian) +sudo apt install -y golang-go clang libbpf-dev linux-headers-$(uname -r) -Add to `internal/system/system.go` in the `Initialize()` method: +# Build and run +make build +sudo ./bin/ebpf-server -```go -// Register your program -yourProgram := your_monitor.NewProgram() -if err := s.manager.RegisterProgram(yourProgram); err != nil { - return fmt.Errorf("failed to register your_monitor: %w", err) -} -logger.Debugf("βœ… Registered your monitoring program") +# Test the API +curl http://localhost:8080/health +curl "http://localhost:8080/api/events?type=connection&limit=10" ``` -### 4. Create eBPF C Code - -Create `bpf/your_monitor.c`: - -```c -#include "vmlinux.h" -#include "bpf_helpers.h" -#include "bpf_tracing.h" - -struct your_event { - u32 pid; - u64 timestamp; - char comm[16]; - char custom_field[64]; -}; - -struct { - __uint(type, BPF_MAP_TYPE_RINGBUF); - __uint(max_entries, 256 * 1024); -} events SEC(".maps"); - -SEC("tracepoint/syscalls/your_event") -int trace_your_event(void *ctx) { - struct your_event *event; - - event = bpf_ringbuf_reserve(&events, sizeof(*event), 0); - if (!event) { - return 0; - } - - event->pid = bpf_get_current_pid_tgid() >> 32; - event->timestamp = bpf_ktime_get_ns(); - bpf_get_current_comm(&event->comm, sizeof(event->comm)); - - // Add your custom logic here - - bpf_ringbuf_submit(event, 0); - return 0; -} - -char LICENSE[] SEC("license") = "GPL"; -``` +**πŸ“š [Interactive API Documentation](https://petstore.swagger.io/?url=https://raw.githubusercontent.com/srodi/ebpf-server/main/docs/swagger/swagger.json)** - Test APIs in your browser ## ✨ Key Features ### πŸ”„ Dual Deployment Support -- **VM Mode**: Traditional single-server deployment (unchanged) - **Kubernetes Mode**: DaemonSet + Aggregator architecture for cluster-wide monitoring +- **VM Mode**: Traditional single-server deployment - **Automatic Detection**: Seamlessly detects environment and adapts behavior ### 🏷️ Kubernetes Metadata Enrichment @@ -338,17 +79,60 @@ Events in Kubernetes include rich metadata: ``` ### πŸ—οΈ Scalable Architecture -- **DaemonSet Agents**: One monitoring pod per cluster node -- **Centralized Aggregator**: Single API endpoint for cluster-wide events -- **Event Forwarding**: Automatic forwarding from agents to aggregator -- **Backward Compatible**: Existing VM deployments continue unchanged + +**Kubernetes Mode**: Distributed monitoring with centralized aggregation +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Kubernetes Cluster β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ Node 1 β”‚ β”‚ Node 2 β”‚ β”‚ Node N β”‚ β”‚ +β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ +β”‚ β”‚ β”‚ eBPF β”‚ β”‚ β”‚ β”‚ eBPF β”‚ β”‚ β”‚ β”‚ eBPF β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚ Agent β”‚ β”‚ β”‚ β”‚ Agent β”‚ β”‚ β”‚ β”‚ Agent β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚+K8s Metaβ”‚ β”‚ β”‚ β”‚+K8s Metaβ”‚ β”‚ β”‚ β”‚+K8s Metaβ”‚ β”‚ β”‚ +β”‚ β”‚ β””β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”˜ β”‚ β”‚ β””β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”˜ β”‚ β”‚ β””β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”˜ β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ β”‚ β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ eBPF β”‚ β”‚ +β”‚ β”‚ Aggregator│◄─── Unified API β”‚ +β”‚ β”‚ β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +**VM Mode**: Modular, interface-based monitoring system +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ eBPF Programs β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ Connection β”‚ β”‚ Packet Drop β”‚ β”‚ Custom β”‚ β”‚ +β”‚ β”‚ Monitor β”‚ β”‚ Monitor β”‚ β”‚ Monitors β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”˜ β”‚ +└─────────┼─────────────────┼──────────────────┼-β”€β”€β”€β”€β”€β”˜ + β”‚ β”‚ β”‚ + β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β–Ό + β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” + β”‚ Event Processing β”‚ + β”‚ (Manager + Storage) β”‚ + β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β–Ό + β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” + β”‚ HTTP API β”‚ + β”‚ (/api/events) β”‚ + β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` ### πŸ“Š Unified Monitoring - **Cross-Node Correlation**: View events across entire Kubernetes cluster - **Node-Specific Filtering**: Query events by specific nodes or pods - **Aggregated Statistics**: Cluster-wide event statistics and metrics +- **Backward Compatible**: Existing VM deployments continue unchanged -## API Features +## πŸ“‘ API Features - **Unified Event API**: Single `/api/events` endpoint for all monitoring data - **Flexible Filtering**: Filter by event type, PID, command, and time windows @@ -357,15 +141,13 @@ Events in Kubernetes include rich metadata: - **Auto-Generated Documentation**: OpenAPI 3.0 spec from code annotations - **Interactive Testing**: Built-in Swagger UI for API exploration -## API Endpoints - ### Core Endpoints - **`GET /health`** - System health and status - **`GET /api/events`** - Query events with filtering support - **`GET /api/programs`** - List all programs and their status -### Event Query Examples +### Query Examples ```bash # Get all connection events from the last hour @@ -374,8 +156,8 @@ curl "http://localhost:8080/api/events?type=connection&since=2023-01-01T00:00:00 # Get events for a specific process curl "http://localhost:8080/api/events?pid=1234&limit=50" -# Get packet drop events with command filter -curl "http://localhost:8080/api/events?type=packet_drop&command=curl" +# Kubernetes: Get events from specific node +curl "http://localhost:8080/api/events?k8s_node_name=worker-1" ``` ### Query Parameters @@ -383,11 +165,11 @@ curl "http://localhost:8080/api/events?type=packet_drop&command=curl" - `type`: Event type filter (e.g., "connection", "packet_drop") - `pid`: Process ID filter - `command`: Command name filter -- `since`: RFC3339 timestamp for start time -- `until`: RFC3339 timestamp for end time +- `k8s_node_name`, `k8s_pod_name`, `k8s_namespace`: Kubernetes filters +- `since`, `until`: RFC3339 timestamp filters - `limit`: Maximum results (default: 100) -## Development +## πŸ› οΈ Development ```bash # Development build with debug logging @@ -403,34 +185,37 @@ make test make build-bpf ``` -## Project Structure +**πŸ“š [Complete Development Guide](docs/program-development.md)** - Detailed guide for creating new eBPF monitoring programs + +## πŸ“ Project Structure ``` -β”œβ”€β”€ cmd/server/ # Main application entry point +β”œβ”€β”€ cmd/ # Application entry points +β”‚ β”œβ”€β”€ server/ # eBPF monitoring server +β”‚ └── aggregator/ # Kubernetes aggregator β”œβ”€β”€ internal/ -β”‚ β”œβ”€β”€ core/ # Core interfaces and types -β”‚ β”œβ”€β”€ events/ # Event system (BaseEvent, streams) -β”‚ β”œβ”€β”€ programs/ # eBPF program implementations -β”‚ β”‚ β”œβ”€β”€ base.go # BaseProgram foundation -β”‚ β”‚ β”œβ”€β”€ manager.go # Program manager -β”‚ β”‚ β”œβ”€β”€ connection/ # Network connection monitoring -β”‚ β”‚ └── packet_drop/ # Packet drop monitoring -β”‚ β”œβ”€β”€ storage/ # Event storage and querying -β”‚ β”œβ”€β”€ api/ # HTTP API handlers -β”‚ └── system/ # System initialization and coordination -β”œβ”€β”€ bpf/ # eBPF C programs and headers -β”œβ”€β”€ docs/ # Documentation and API specs -└── pkg/logger/ # Logging utilities +β”‚ β”œβ”€β”€ core/ # Core interfaces and types +β”‚ β”œβ”€β”€ events/ # Event system (BaseEvent, streams) +β”‚ β”œβ”€β”€ programs/ # eBPF program implementations +β”‚ β”œβ”€β”€ storage/ # Event storage and querying +β”‚ β”œβ”€β”€ api/ # HTTP API handlers +β”‚ β”œβ”€β”€ kubernetes/ # Kubernetes metadata integration +β”‚ └── system/ # System initialization +β”œβ”€β”€ bpf/ # eBPF C programs and headers +β”œβ”€β”€ kubernetes/ # Kubernetes manifests +β”œβ”€β”€ scripts/ # Deployment and testing scripts +└── docs/ # Documentation and API specs ``` -## Requirements +## πŸ”§ Requirements - **Linux kernel 4.18+** with eBPF support -- **Root privileges** for eBPF program loading +- **Root privileges** for eBPF program loading - **Dependencies**: Go 1.23+, Clang, libbpf-dev, kernel headers +- **Kubernetes**: 1.20+ (for K8s deployment) -For detailed setup: [docs/setup.md](docs/setup.md) | Development guide: [docs/program-development.md](docs/program-development.md) +**πŸ“– Setup Guide**: [docs/setup.md](docs/setup.md) -## License +## πŸ“„ License MIT License - see [LICENSE](LICENSE) file. From 76780e1cfc8c92397332c02beb4972652086bed8 Mon Sep 17 00:00:00 2001 From: Simone Rodigari Date: Tue, 12 Aug 2025 21:30:43 +0100 Subject: [PATCH 3/7] add cross-compile support --- Makefile | 28 +++++++++++++++++++++++++++- docker/Dockerfile.cross-compile | 29 +++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 docker/Dockerfile.cross-compile diff --git a/Makefile b/Makefile index 5b901dd..1a9fdf4 100644 --- a/Makefile +++ b/Makefile @@ -250,7 +250,9 @@ help: @echo "Available targets:" @echo " all - Generate eBPF bindings and build binaries" @echo " generate - Generate eBPF Go bindings" - @echo " bpf - Compile eBPF programs" + @echo " bpf - Compile eBPF programs (native)" + @echo " bpf-cross-compile - Cross-compile eBPF programs for ARM64 Linux (macOS only)" + @echo " bpf-cross-clean - Clean cross-compilation Docker images" @echo " vmlinux - Generate vmlinux.h from running kernel (Linux only)" @echo " build - Build both server and aggregator binaries" @echo " build-server - Build the server binary" @@ -291,12 +293,36 @@ help: @echo " kind-integration-test - Run comprehensive integration tests" @echo " kind-cleanup - Clean up kind deployment" +# Cross-compilation targets for macOS +.PHONY: bpf-cross-compile +bpf-cross-compile: + @if [ "$(OS)" = "Darwin" ]; then \ + echo "πŸ”„ Cross-compiling eBPF programs for ARM64 Linux using Docker..."; \ + docker build -t ebpf-cross-compiler -f docker/Dockerfile.cross-compile .; \ + docker run --name ebpf-temp ebpf-cross-compiler sleep 1; \ + docker cp ebpf-temp:/workspace/bpf/ ./; \ + docker rm ebpf-temp; \ + echo "βœ… Cross-compilation completed - eBPF objects ready for ARM64 Linux"; \ + else \ + echo "Cross-compilation target is for macOS only. Use 'make bpf' on Linux."; \ + fi + +.PHONY: bpf-cross-clean +bpf-cross-clean: + @echo "🧹 Cleaning cross-compilation Docker images..." + -docker rmi ebpf-cross-compiler + -docker rm ebpf-temp + # Container build targets .PHONY: docker-build docker-build: docker-build-agent docker-build-aggregator .PHONY: docker-build-agent docker-build-agent: + @if [ "$(OS)" = "Darwin" ]; then \ + echo "πŸ”„ Cross-compiling eBPF programs before Docker build..."; \ + $(MAKE) bpf-cross-compile; \ + fi @echo "Building agent Docker image..." docker build -t $(AGENT_IMAGE) -f docker/Dockerfile . diff --git a/docker/Dockerfile.cross-compile b/docker/Dockerfile.cross-compile new file mode 100644 index 0000000..0f31aa8 --- /dev/null +++ b/docker/Dockerfile.cross-compile @@ -0,0 +1,29 @@ +# Cross-compilation Docker image for eBPF programs +FROM --platform=linux/arm64 ubuntu:22.04 + +# Install dependencies for eBPF compilation +RUN apt-get update && apt-get install -y \ + build-essential \ + clang \ + llvm \ + libbpf-dev \ + linux-headers-generic \ + pkg-config \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /workspace + +# Copy eBPF source files +COPY bpf/ ./bpf/ + +# Compile eBPF programs +RUN for file in bpf/*.c; do \ + echo "Compiling $file..."; \ + clang -O2 -target bpf -D__TARGET_ARCH_arm64 \ + -I./bpf/include \ + -Wall \ + -g -c "$file" -o "${file%.c}.o" || exit 1; \ + done + +# Keep the container running so we can copy files out +CMD ["tail", "-f", "/dev/null"] From 4db9918148a0b2f69d0a2358e63e8c0aee1a1af6 Mon Sep 17 00:00:00 2001 From: Simone Rodigari Date: Tue, 12 Aug 2025 21:53:30 +0100 Subject: [PATCH 4/7] fix kind test --- Makefile | 19 +++++-- docker/Dockerfile | 3 +- scripts/test-kind-deployment.sh | 87 ++++++++++++++++++++++++++++++--- 3 files changed, 99 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index 1a9fdf4..eccf38f 100644 --- a/Makefile +++ b/Makefile @@ -182,15 +182,28 @@ clean-bpf: # Fresh build - clean eBPF objects and rebuild everything .PHONY: fresh-build -fresh-build: clean-bpf bpf docker-build +fresh-build: clean-bpf + @if [ "$(OS)" = "Darwin" ]; then \ + $(MAKE) bpf-cross-compile; \ + else \ + $(MAKE) bpf; \ + fi + $(MAKE) docker-build @echo "Fresh build completed with new eBPF objects" # Fresh build for kind testing - builds everything from scratch .PHONY: fresh-kind-build fresh-kind-build: @echo "πŸ”„ Starting fresh build for kind testing..." -# @echo "1️⃣ Compiling fresh eBPF objects on host..." -# $(MAKE) bpf + @if [ "$(OS)" = "Darwin" ]; then \ + echo "1️⃣ Cross-compiling fresh eBPF objects for ARM64 Linux..."; \ + $(MAKE) clean-bpf; \ + $(MAKE) bpf-cross-compile; \ + else \ + echo "1️⃣ Compiling fresh eBPF objects on host..."; \ + $(MAKE) clean-bpf; \ + $(MAKE) bpf; \ + fi @echo "2️⃣ Building fresh Docker images..." $(MAKE) docker-build @echo "3️⃣ Loading images to kind..." diff --git a/docker/Dockerfile b/docker/Dockerfile index 6049334..1a27fa7 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -17,7 +17,8 @@ RUN go mod download COPY . . # Build the application (eBPF objects should already be compiled on host) -RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o ebpf-server ./cmd/server +RUN mkdir -p bin +RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o bin/ebpf-server ./cmd/server # Runtime stage FROM ubuntu:22.04 diff --git a/scripts/test-kind-deployment.sh b/scripts/test-kind-deployment.sh index 013419f..20935d5 100755 --- a/scripts/test-kind-deployment.sh +++ b/scripts/test-kind-deployment.sh @@ -8,6 +8,22 @@ NAMESPACE=${NAMESPACE:-ebpf-system} echo "πŸ“Š Checking deployment status..." +# Wait for deployments to be ready +echo "⏳ Waiting for deployments to be ready..." +kubectl wait --for=condition=available --timeout=300s deployment/ebpf-aggregator -n ${NAMESPACE} || { + echo "❌ Aggregator deployment failed to become available" + kubectl describe deployment ebpf-aggregator -n ${NAMESPACE} + exit 1 +} + +kubectl rollout status daemonset/ebpf-monitor -n ${NAMESPACE} --timeout=300s || { + echo "❌ Monitor daemonset failed to roll out" + kubectl describe daemonset ebpf-monitor -n ${NAMESPACE} + exit 1 +} + +echo "βœ… All deployments are ready!" + # Check aggregator deployment echo "Aggregator deployment status:" kubectl get deployment ebpf-aggregator -n ${NAMESPACE} @@ -33,24 +49,83 @@ fi # Test aggregator API echo "🌐 Testing aggregator API..." -kubectl port-forward service/ebpf-aggregator-service 8080:8080 -n ${NAMESPACE} & +echo "⏳ Waiting for service to be ready..." +kubectl wait --for=condition=ready pod -l app=ebpf-aggregator -n ${NAMESPACE} --timeout=60s || { + echo "⚠️ Warning: Aggregator pods may not be fully ready" +} + +# Clean up any existing port-forwards +echo "🧹 Cleaning up any existing port-forwards..." +pkill -f "kubectl.*port-forward.*ebpf-aggregator" 2>/dev/null || true +sleep 2 + +# Find an available port +LOCAL_PORT=8082 +check_port() { + if command -v netstat >/dev/null 2>&1; then + netstat -an | grep -q ":${1}.*LISTEN" 2>/dev/null + elif command -v lsof >/dev/null 2>&1; then + lsof -i ":${1}" >/dev/null 2>&1 + else + # Fallback: try to connect to the port + (echo >/dev/tcp/localhost/${1}) >/dev/null 2>&1 + fi +} + +while check_port ${LOCAL_PORT}; do + echo "⚠️ Port ${LOCAL_PORT} is in use, trying $((LOCAL_PORT + 1))" + LOCAL_PORT=$((LOCAL_PORT + 1)) + if [ ${LOCAL_PORT} -gt 8090 ]; then + echo "❌ Could not find available port between 8082-8090" + exit 1 + fi +done + +echo "πŸ“‘ Starting port-forward on localhost:${LOCAL_PORT}..." +kubectl port-forward service/ebpf-aggregator ${LOCAL_PORT}:8081 -n ${NAMESPACE} & PORT_FORWARD_PID=$! -sleep 3 +echo "⏳ Waiting for port-forward to establish..." +sleep 5 + +# Verify port-forward is working +if ! ps -p ${PORT_FORWARD_PID} > /dev/null 2>&1; then + echo "❌ Port-forward failed to start" + exit 1 +fi # Test health endpoint echo "Health check:" -curl -s http://localhost:8080/health || echo "Health endpoint not accessible" +for i in {1..5}; do + if curl -s http://localhost:${LOCAL_PORT}/health >/dev/null 2>&1; then + echo "βœ… Health endpoint is accessible" + curl -s http://localhost:${LOCAL_PORT}/health + break + else + echo "⏳ Attempt $i/5: Health endpoint not ready, waiting..." + sleep 2 + fi + if [ $i -eq 5 ]; then + echo "❌ Health endpoint not accessible after 5 attempts" + fi +done # Test events endpoint echo "Events check:" -curl -s http://localhost:8080/events | jq '.' || echo "Events endpoint not accessible" +curl -s http://localhost:${LOCAL_PORT}/api/events?limit=1 | jq '.' 2>/dev/null || echo "⚠️ Events endpoint not accessible or no data" # Test stats endpoint echo "Stats check:" -curl -s http://localhost:8080/stats | jq '.' || echo "Stats endpoint not accessible" +curl -s http://localhost:${LOCAL_PORT}/api/stats | jq '.' 2>/dev/null || echo "⚠️ Stats endpoint not accessible or no data" # Cleanup port-forward -kill $PORT_FORWARD_PID 2>/dev/null || true +echo "🧹 Cleaning up port-forward..." +if [ ! -z "$PORT_FORWARD_PID" ]; then + kill $PORT_FORWARD_PID 2>/dev/null || true + wait $PORT_FORWARD_PID 2>/dev/null || true +fi + +# Additional cleanup - kill any remaining port-forwards +pkill -f "kubectl.*port-forward.*ebpf-aggregator" 2>/dev/null || true echo "βœ… Testing completed!" From 3eb01eb0d81bb2610f4a7543363e80e74407959d Mon Sep 17 00:00:00 2001 From: Simone Rodigari Date: Tue, 12 Aug 2025 22:17:25 +0100 Subject: [PATCH 5/7] add aggregator swagger --- Makefile | 16 +- cmd/aggregator/main.go | 2 +- docs/swagger-aggregator/docs.go | 849 +++++++++++++++++++++++++++ docs/swagger-aggregator/swagger.json | 825 ++++++++++++++++++++++++++ docs/swagger-aggregator/swagger.yaml | 565 ++++++++++++++++++ internal/aggregator/aggregator.go | 47 ++ internal/aggregator/health.go | 10 + 7 files changed, 2311 insertions(+), 3 deletions(-) create mode 100644 docs/swagger-aggregator/docs.go create mode 100644 docs/swagger-aggregator/swagger.json create mode 100644 docs/swagger-aggregator/swagger.yaml diff --git a/Makefile b/Makefile index eccf38f..614b0cd 100644 --- a/Makefile +++ b/Makefile @@ -92,13 +92,13 @@ build-server: generate # Build the aggregator binary .PHONY: build-aggregator -build-aggregator: generate +build-aggregator: generate docs-aggregator @echo "Building $(AGGREGATOR_NAME)..." go build -o bin/$(AGGREGATOR_NAME) ./cmd/aggregator # Build the aggregator binary without eBPF dependencies (for Docker) .PHONY: build-aggregator-only -build-aggregator-only: +build-aggregator-only: docs-aggregator @echo "Building $(AGGREGATOR_NAME) (no eBPF dependencies)..." go build -o bin/$(AGGREGATOR_NAME) ./cmd/aggregator @@ -219,6 +219,18 @@ docs: @echo "Interactive docs: http://localhost:8080/docs/ (when server is running)" @echo "External docs: https://petstore.swagger.io/?url=https://raw.githubusercontent.com/srodi/ebpf-server/main/docs/swagger.json" +# Generate aggregator API documentation using Swagger +.PHONY: docs-aggregator +docs-aggregator: + @command -v $(shell go env GOPATH)/bin/swag >/dev/null 2>&1 || { echo "Installing swag..."; go install github.com/swaggo/swag/cmd/swag@latest; } + $(shell go env GOPATH)/bin/swag init -g internal/aggregator/aggregator.go -o docs/swagger-aggregator --parseDependency --parseInternal + @echo "Aggregator API documentation generated at docs/swagger-aggregator/" + @echo "Interactive docs: http://localhost:8081/swagger/ (when aggregator is running)" + +# Generate all documentation +.PHONY: docs-all +docs-all: docs docs-aggregator + # Install the binary system-wide .PHONY: install install: build diff --git a/cmd/aggregator/main.go b/cmd/aggregator/main.go index aaf2a16..843b868 100644 --- a/cmd/aggregator/main.go +++ b/cmd/aggregator/main.go @@ -12,7 +12,7 @@ import ( "github.com/srodi/ebpf-server/internal/aggregator" "github.com/srodi/ebpf-server/pkg/logger" - _ "github.com/srodi/ebpf-server/docs/swagger" // Import generated docs + _ "github.com/srodi/ebpf-server/docs/swagger-aggregator" // Import generated aggregator docs httpSwagger "github.com/swaggo/http-swagger" ) diff --git a/docs/swagger-aggregator/docs.go b/docs/swagger-aggregator/docs.go new file mode 100644 index 0000000..72f782b --- /dev/null +++ b/docs/swagger-aggregator/docs.go @@ -0,0 +1,849 @@ +// Package swagger_aggregator Code generated by swaggo/swag. DO NOT EDIT +package swagger_aggregator + +import "github.com/swaggo/swag" + +const docTemplate = `{ + "schemes": {{ marshal .Schemes }}, + "swagger": "2.0", + "info": { + "description": "{{escape .Description}}", + "title": "{{.Title}}", + "contact": { + "name": "API Support", + "url": "https://github.com/srodi/ebpf-server/issues", + "email": "support@example.com" + }, + "license": { + "name": "MIT", + "url": "https://github.com/srodi/ebpf-server/blob/main/LICENSE" + }, + "version": "{{.Version}}" + }, + "host": "{{.Host}}", + "basePath": "{{.BasePath}}", + "paths": { + "/api/connection-summary": { + "get": { + "description": "Get count of connection events filtered by PID, command, and time window", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "connections" + ], + "summary": "Get connection statistics", + "parameters": [ + { + "type": "integer", + "description": "Process ID (GET only)", + "name": "pid", + "in": "query" + }, + { + "type": "string", + "description": "Command name (GET only)", + "name": "command", + "in": "query" + }, + { + "type": "integer", + "description": "Duration in seconds (GET only, default: 60)", + "name": "duration_seconds", + "in": "query" + }, + { + "description": "Connection summary request (POST only)", + "name": "request", + "in": "body", + "schema": { + "$ref": "#/definitions/api.ConnectionSummaryRequest" + } + } + ], + "responses": { + "200": { + "description": "Connection statistics", + "schema": { + "$ref": "#/definitions/api.ConnectionSummaryResponse" + } + }, + "400": { + "description": "Bad request", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "500": { + "description": "Internal server error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "503": { + "description": "Service unavailable", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + }, + "post": { + "description": "Get count of connection events filtered by PID, command, and time window", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "connections" + ], + "summary": "Get connection statistics", + "parameters": [ + { + "type": "integer", + "description": "Process ID (GET only)", + "name": "pid", + "in": "query" + }, + { + "type": "string", + "description": "Command name (GET only)", + "name": "command", + "in": "query" + }, + { + "type": "integer", + "description": "Duration in seconds (GET only, default: 60)", + "name": "duration_seconds", + "in": "query" + }, + { + "description": "Connection summary request (POST only)", + "name": "request", + "in": "body", + "schema": { + "$ref": "#/definitions/api.ConnectionSummaryRequest" + } + } + ], + "responses": { + "200": { + "description": "Connection statistics", + "schema": { + "$ref": "#/definitions/api.ConnectionSummaryResponse" + } + }, + "400": { + "description": "Bad request", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "500": { + "description": "Internal server error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "503": { + "description": "Service unavailable", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, + "/api/events": { + "get": { + "description": "Get events filtered by type, PID, command, time range, and limit", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "events" + ], + "summary": "Query events", + "parameters": [ + { + "type": "string", + "description": "Event type (connection, packet_drop)", + "name": "type", + "in": "query" + }, + { + "type": "integer", + "description": "Process ID", + "name": "pid", + "in": "query" + }, + { + "type": "string", + "description": "Command name", + "name": "command", + "in": "query" + }, + { + "type": "string", + "description": "Start time (RFC3339 format)", + "name": "since", + "in": "query" + }, + { + "type": "string", + "description": "End time (RFC3339 format)", + "name": "until", + "in": "query" + }, + { + "type": "integer", + "description": "Maximum number of events to return (default: 100)", + "name": "limit", + "in": "query" + } + ], + "responses": { + "200": { + "description": "Filtered events", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "500": { + "description": "Internal server error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "503": { + "description": "Service unavailable", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, + "/api/events/ingest": { + "post": { + "description": "Accept events from eBPF agents for aggregation and storage", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "events" + ], + "summary": "Ingest events from agents", + "parameters": [ + { + "description": "Events to ingest", + "name": "events", + "in": "body", + "required": true, + "schema": { + "type": "object" + } + } + ], + "responses": { + "200": { + "description": "Ingestion result", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Bad request", + "schema": { + "type": "string" + } + }, + "405": { + "description": "Method not allowed", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal server error", + "schema": { + "type": "string" + } + } + } + } + }, + "/api/list-connections": { + "get": { + "description": "Get recent connection events grouped by PID", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "connections" + ], + "summary": "List connection events", + "responses": { + "200": { + "description": "Connection events", + "schema": { + "$ref": "#/definitions/api.ConnectionListResponse" + } + }, + "500": { + "description": "Internal server error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "503": { + "description": "Service unavailable", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, + "/api/list-packet-drops": { + "get": { + "description": "Get recent packet drop events grouped by PID", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "packet_drops" + ], + "summary": "List packet drop events", + "responses": { + "200": { + "description": "Packet drop events", + "schema": { + "$ref": "#/definitions/api.PacketDropListResponse" + } + }, + "500": { + "description": "Internal server error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "503": { + "description": "Service unavailable", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, + "/api/packet-drop-summary": { + "get": { + "description": "Get count of packet drop events filtered by PID, command, and time window", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "packet_drops" + ], + "summary": "Get packet drop statistics", + "parameters": [ + { + "type": "integer", + "description": "Process ID (GET only)", + "name": "pid", + "in": "query" + }, + { + "type": "string", + "description": "Command name (GET only)", + "name": "command", + "in": "query" + }, + { + "type": "integer", + "description": "Duration in seconds (GET only, default: 60)", + "name": "duration_seconds", + "in": "query" + }, + { + "description": "Packet drop summary request (POST only)", + "name": "request", + "in": "body", + "schema": { + "$ref": "#/definitions/api.PacketDropSummaryRequest" + } + } + ], + "responses": { + "200": { + "description": "Packet drop statistics", + "schema": { + "$ref": "#/definitions/api.PacketDropSummaryResponse" + } + }, + "400": { + "description": "Bad request", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "500": { + "description": "Internal server error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "503": { + "description": "Service unavailable", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + }, + "post": { + "description": "Get count of packet drop events filtered by PID, command, and time window", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "packet_drops" + ], + "summary": "Get packet drop statistics", + "parameters": [ + { + "type": "integer", + "description": "Process ID (GET only)", + "name": "pid", + "in": "query" + }, + { + "type": "string", + "description": "Command name (GET only)", + "name": "command", + "in": "query" + }, + { + "type": "integer", + "description": "Duration in seconds (GET only, default: 60)", + "name": "duration_seconds", + "in": "query" + }, + { + "description": "Packet drop summary request (POST only)", + "name": "request", + "in": "body", + "schema": { + "$ref": "#/definitions/api.PacketDropSummaryRequest" + } + } + ], + "responses": { + "200": { + "description": "Packet drop statistics", + "schema": { + "$ref": "#/definitions/api.PacketDropSummaryResponse" + } + }, + "400": { + "description": "Bad request", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "500": { + "description": "Internal server error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "503": { + "description": "Service unavailable", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, + "/api/programs": { + "get": { + "description": "Get the status and information of all loaded eBPF programs", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "programs" + ], + "summary": "List eBPF programs", + "responses": { + "200": { + "description": "List of eBPF programs", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "500": { + "description": "Internal server error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "503": { + "description": "Service unavailable", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, + "/api/stats": { + "get": { + "description": "Retrieve statistics about event aggregation including counts by type and node", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "stats" + ], + "summary": "Get aggregation statistics", + "responses": { + "200": { + "description": "Aggregation statistics", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "405": { + "description": "Method not allowed", + "schema": { + "type": "string" + } + } + } + } + }, + "/health": { + "get": { + "description": "Get the health status of the eBPF monitoring system", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "health" + ], + "summary": "Health check", + "responses": { + "200": { + "description": "Health status", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "503": { + "description": "Service unavailable", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + } + }, + "definitions": { + "api.ConnectionListResponse": { + "type": "object", + "properties": { + "events_by_pid": { + "description": "Events grouped by PID", + "type": "object", + "additionalProperties": { + "type": "array", + "items": {} + } + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "total_events": { + "description": "Total number of events", + "type": "integer", + "example": 10 + }, + "total_pids": { + "description": "Number of unique PIDs", + "type": "integer", + "example": 3 + } + } + }, + "api.ConnectionSummaryRequest": { + "type": "object", + "properties": { + "command": { + "description": "Command name", + "type": "string", + "example": "curl" + }, + "duration_seconds": { + "description": "Duration in seconds", + "type": "integer", + "example": 60 + }, + "pid": { + "description": "Process ID", + "type": "integer", + "example": 1234 + } + } + }, + "api.ConnectionSummaryResponse": { + "type": "object", + "properties": { + "command": { + "description": "Command name", + "type": "string", + "example": "curl" + }, + "count": { + "description": "Number of connection events", + "type": "integer", + "example": 5 + }, + "duration_seconds": { + "description": "Duration in seconds", + "type": "integer", + "example": 60 + }, + "pid": { + "description": "Process ID", + "type": "integer", + "example": 1234 + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + } + } + }, + "api.PacketDropListResponse": { + "type": "object", + "properties": { + "events_by_pid": { + "description": "Events grouped by PID", + "type": "object", + "additionalProperties": { + "type": "array", + "items": {} + } + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "total_events": { + "description": "Total number of events", + "type": "integer", + "example": 7 + }, + "total_pids": { + "description": "Number of unique PIDs", + "type": "integer", + "example": 2 + } + } + }, + "api.PacketDropSummaryRequest": { + "type": "object", + "properties": { + "command": { + "description": "Command name", + "type": "string", + "example": "nginx" + }, + "duration_seconds": { + "description": "Duration in seconds", + "type": "integer", + "example": 60 + }, + "pid": { + "description": "Process ID", + "type": "integer", + "example": 1234 + } + } + }, + "api.PacketDropSummaryResponse": { + "type": "object", + "properties": { + "command": { + "description": "Command name", + "type": "string", + "example": "nginx" + }, + "count": { + "description": "Number of packet drop events", + "type": "integer", + "example": 3 + }, + "duration_seconds": { + "description": "Duration in seconds", + "type": "integer", + "example": 60 + }, + "pid": { + "description": "Process ID", + "type": "integer", + "example": 1234 + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + } + } + }, + "internal_aggregator.HealthCheck": { + "type": "object", + "properties": { + "component": { + "type": "string" + }, + "stats": { + "type": "object", + "additionalProperties": true + }, + "status": { + "type": "string" + }, + "uptime": { + "type": "string" + } + } + } + } +}` + +// SwaggerInfo holds exported Swagger Info so clients can modify it +var SwaggerInfo = &swag.Spec{ + Version: "1.0.0", + Host: "localhost:8081", + BasePath: "/", + Schemes: []string{}, + Title: "eBPF Event Aggregator API", + Description: "HTTP API for aggregating and querying eBPF events from multiple agents", + InfoInstanceName: "swagger", + SwaggerTemplate: docTemplate, + LeftDelim: "{{", + RightDelim: "}}", +} + +func init() { + swag.Register(SwaggerInfo.InstanceName(), SwaggerInfo) +} diff --git a/docs/swagger-aggregator/swagger.json b/docs/swagger-aggregator/swagger.json new file mode 100644 index 0000000..47b9556 --- /dev/null +++ b/docs/swagger-aggregator/swagger.json @@ -0,0 +1,825 @@ +{ + "swagger": "2.0", + "info": { + "description": "HTTP API for aggregating and querying eBPF events from multiple agents", + "title": "eBPF Event Aggregator API", + "contact": { + "name": "API Support", + "url": "https://github.com/srodi/ebpf-server/issues", + "email": "support@example.com" + }, + "license": { + "name": "MIT", + "url": "https://github.com/srodi/ebpf-server/blob/main/LICENSE" + }, + "version": "1.0.0" + }, + "host": "localhost:8081", + "basePath": "/", + "paths": { + "/api/connection-summary": { + "get": { + "description": "Get count of connection events filtered by PID, command, and time window", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "connections" + ], + "summary": "Get connection statistics", + "parameters": [ + { + "type": "integer", + "description": "Process ID (GET only)", + "name": "pid", + "in": "query" + }, + { + "type": "string", + "description": "Command name (GET only)", + "name": "command", + "in": "query" + }, + { + "type": "integer", + "description": "Duration in seconds (GET only, default: 60)", + "name": "duration_seconds", + "in": "query" + }, + { + "description": "Connection summary request (POST only)", + "name": "request", + "in": "body", + "schema": { + "$ref": "#/definitions/api.ConnectionSummaryRequest" + } + } + ], + "responses": { + "200": { + "description": "Connection statistics", + "schema": { + "$ref": "#/definitions/api.ConnectionSummaryResponse" + } + }, + "400": { + "description": "Bad request", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "500": { + "description": "Internal server error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "503": { + "description": "Service unavailable", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + }, + "post": { + "description": "Get count of connection events filtered by PID, command, and time window", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "connections" + ], + "summary": "Get connection statistics", + "parameters": [ + { + "type": "integer", + "description": "Process ID (GET only)", + "name": "pid", + "in": "query" + }, + { + "type": "string", + "description": "Command name (GET only)", + "name": "command", + "in": "query" + }, + { + "type": "integer", + "description": "Duration in seconds (GET only, default: 60)", + "name": "duration_seconds", + "in": "query" + }, + { + "description": "Connection summary request (POST only)", + "name": "request", + "in": "body", + "schema": { + "$ref": "#/definitions/api.ConnectionSummaryRequest" + } + } + ], + "responses": { + "200": { + "description": "Connection statistics", + "schema": { + "$ref": "#/definitions/api.ConnectionSummaryResponse" + } + }, + "400": { + "description": "Bad request", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "500": { + "description": "Internal server error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "503": { + "description": "Service unavailable", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, + "/api/events": { + "get": { + "description": "Get events filtered by type, PID, command, time range, and limit", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "events" + ], + "summary": "Query events", + "parameters": [ + { + "type": "string", + "description": "Event type (connection, packet_drop)", + "name": "type", + "in": "query" + }, + { + "type": "integer", + "description": "Process ID", + "name": "pid", + "in": "query" + }, + { + "type": "string", + "description": "Command name", + "name": "command", + "in": "query" + }, + { + "type": "string", + "description": "Start time (RFC3339 format)", + "name": "since", + "in": "query" + }, + { + "type": "string", + "description": "End time (RFC3339 format)", + "name": "until", + "in": "query" + }, + { + "type": "integer", + "description": "Maximum number of events to return (default: 100)", + "name": "limit", + "in": "query" + } + ], + "responses": { + "200": { + "description": "Filtered events", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "500": { + "description": "Internal server error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "503": { + "description": "Service unavailable", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, + "/api/events/ingest": { + "post": { + "description": "Accept events from eBPF agents for aggregation and storage", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "events" + ], + "summary": "Ingest events from agents", + "parameters": [ + { + "description": "Events to ingest", + "name": "events", + "in": "body", + "required": true, + "schema": { + "type": "object" + } + } + ], + "responses": { + "200": { + "description": "Ingestion result", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Bad request", + "schema": { + "type": "string" + } + }, + "405": { + "description": "Method not allowed", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal server error", + "schema": { + "type": "string" + } + } + } + } + }, + "/api/list-connections": { + "get": { + "description": "Get recent connection events grouped by PID", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "connections" + ], + "summary": "List connection events", + "responses": { + "200": { + "description": "Connection events", + "schema": { + "$ref": "#/definitions/api.ConnectionListResponse" + } + }, + "500": { + "description": "Internal server error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "503": { + "description": "Service unavailable", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, + "/api/list-packet-drops": { + "get": { + "description": "Get recent packet drop events grouped by PID", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "packet_drops" + ], + "summary": "List packet drop events", + "responses": { + "200": { + "description": "Packet drop events", + "schema": { + "$ref": "#/definitions/api.PacketDropListResponse" + } + }, + "500": { + "description": "Internal server error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "503": { + "description": "Service unavailable", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, + "/api/packet-drop-summary": { + "get": { + "description": "Get count of packet drop events filtered by PID, command, and time window", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "packet_drops" + ], + "summary": "Get packet drop statistics", + "parameters": [ + { + "type": "integer", + "description": "Process ID (GET only)", + "name": "pid", + "in": "query" + }, + { + "type": "string", + "description": "Command name (GET only)", + "name": "command", + "in": "query" + }, + { + "type": "integer", + "description": "Duration in seconds (GET only, default: 60)", + "name": "duration_seconds", + "in": "query" + }, + { + "description": "Packet drop summary request (POST only)", + "name": "request", + "in": "body", + "schema": { + "$ref": "#/definitions/api.PacketDropSummaryRequest" + } + } + ], + "responses": { + "200": { + "description": "Packet drop statistics", + "schema": { + "$ref": "#/definitions/api.PacketDropSummaryResponse" + } + }, + "400": { + "description": "Bad request", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "500": { + "description": "Internal server error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "503": { + "description": "Service unavailable", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + }, + "post": { + "description": "Get count of packet drop events filtered by PID, command, and time window", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "packet_drops" + ], + "summary": "Get packet drop statistics", + "parameters": [ + { + "type": "integer", + "description": "Process ID (GET only)", + "name": "pid", + "in": "query" + }, + { + "type": "string", + "description": "Command name (GET only)", + "name": "command", + "in": "query" + }, + { + "type": "integer", + "description": "Duration in seconds (GET only, default: 60)", + "name": "duration_seconds", + "in": "query" + }, + { + "description": "Packet drop summary request (POST only)", + "name": "request", + "in": "body", + "schema": { + "$ref": "#/definitions/api.PacketDropSummaryRequest" + } + } + ], + "responses": { + "200": { + "description": "Packet drop statistics", + "schema": { + "$ref": "#/definitions/api.PacketDropSummaryResponse" + } + }, + "400": { + "description": "Bad request", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "500": { + "description": "Internal server error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "503": { + "description": "Service unavailable", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, + "/api/programs": { + "get": { + "description": "Get the status and information of all loaded eBPF programs", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "programs" + ], + "summary": "List eBPF programs", + "responses": { + "200": { + "description": "List of eBPF programs", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "500": { + "description": "Internal server error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "503": { + "description": "Service unavailable", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, + "/api/stats": { + "get": { + "description": "Retrieve statistics about event aggregation including counts by type and node", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "stats" + ], + "summary": "Get aggregation statistics", + "responses": { + "200": { + "description": "Aggregation statistics", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "405": { + "description": "Method not allowed", + "schema": { + "type": "string" + } + } + } + } + }, + "/health": { + "get": { + "description": "Get the health status of the eBPF monitoring system", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "health" + ], + "summary": "Health check", + "responses": { + "200": { + "description": "Health status", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "503": { + "description": "Service unavailable", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + } + }, + "definitions": { + "api.ConnectionListResponse": { + "type": "object", + "properties": { + "events_by_pid": { + "description": "Events grouped by PID", + "type": "object", + "additionalProperties": { + "type": "array", + "items": {} + } + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "total_events": { + "description": "Total number of events", + "type": "integer", + "example": 10 + }, + "total_pids": { + "description": "Number of unique PIDs", + "type": "integer", + "example": 3 + } + } + }, + "api.ConnectionSummaryRequest": { + "type": "object", + "properties": { + "command": { + "description": "Command name", + "type": "string", + "example": "curl" + }, + "duration_seconds": { + "description": "Duration in seconds", + "type": "integer", + "example": 60 + }, + "pid": { + "description": "Process ID", + "type": "integer", + "example": 1234 + } + } + }, + "api.ConnectionSummaryResponse": { + "type": "object", + "properties": { + "command": { + "description": "Command name", + "type": "string", + "example": "curl" + }, + "count": { + "description": "Number of connection events", + "type": "integer", + "example": 5 + }, + "duration_seconds": { + "description": "Duration in seconds", + "type": "integer", + "example": 60 + }, + "pid": { + "description": "Process ID", + "type": "integer", + "example": 1234 + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + } + } + }, + "api.PacketDropListResponse": { + "type": "object", + "properties": { + "events_by_pid": { + "description": "Events grouped by PID", + "type": "object", + "additionalProperties": { + "type": "array", + "items": {} + } + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "total_events": { + "description": "Total number of events", + "type": "integer", + "example": 7 + }, + "total_pids": { + "description": "Number of unique PIDs", + "type": "integer", + "example": 2 + } + } + }, + "api.PacketDropSummaryRequest": { + "type": "object", + "properties": { + "command": { + "description": "Command name", + "type": "string", + "example": "nginx" + }, + "duration_seconds": { + "description": "Duration in seconds", + "type": "integer", + "example": 60 + }, + "pid": { + "description": "Process ID", + "type": "integer", + "example": 1234 + } + } + }, + "api.PacketDropSummaryResponse": { + "type": "object", + "properties": { + "command": { + "description": "Command name", + "type": "string", + "example": "nginx" + }, + "count": { + "description": "Number of packet drop events", + "type": "integer", + "example": 3 + }, + "duration_seconds": { + "description": "Duration in seconds", + "type": "integer", + "example": 60 + }, + "pid": { + "description": "Process ID", + "type": "integer", + "example": 1234 + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + } + } + }, + "internal_aggregator.HealthCheck": { + "type": "object", + "properties": { + "component": { + "type": "string" + }, + "stats": { + "type": "object", + "additionalProperties": true + }, + "status": { + "type": "string" + }, + "uptime": { + "type": "string" + } + } + } + } +} \ No newline at end of file diff --git a/docs/swagger-aggregator/swagger.yaml b/docs/swagger-aggregator/swagger.yaml new file mode 100644 index 0000000..6168b51 --- /dev/null +++ b/docs/swagger-aggregator/swagger.yaml @@ -0,0 +1,565 @@ +basePath: / +definitions: + api.ConnectionListResponse: + properties: + events_by_pid: + additionalProperties: + items: {} + type: array + description: Events grouped by PID + type: object + query_time: + description: Query timestamp + example: "2023-01-01T12:00:00Z" + type: string + total_events: + description: Total number of events + example: 10 + type: integer + total_pids: + description: Number of unique PIDs + example: 3 + type: integer + type: object + api.ConnectionSummaryRequest: + properties: + command: + description: Command name + example: curl + type: string + duration_seconds: + description: Duration in seconds + example: 60 + type: integer + pid: + description: Process ID + example: 1234 + type: integer + type: object + api.ConnectionSummaryResponse: + properties: + command: + description: Command name + example: curl + type: string + count: + description: Number of connection events + example: 5 + type: integer + duration_seconds: + description: Duration in seconds + example: 60 + type: integer + pid: + description: Process ID + example: 1234 + type: integer + query_time: + description: Query timestamp + example: "2023-01-01T12:00:00Z" + type: string + type: object + api.PacketDropListResponse: + properties: + events_by_pid: + additionalProperties: + items: {} + type: array + description: Events grouped by PID + type: object + query_time: + description: Query timestamp + example: "2023-01-01T12:00:00Z" + type: string + total_events: + description: Total number of events + example: 7 + type: integer + total_pids: + description: Number of unique PIDs + example: 2 + type: integer + type: object + api.PacketDropSummaryRequest: + properties: + command: + description: Command name + example: nginx + type: string + duration_seconds: + description: Duration in seconds + example: 60 + type: integer + pid: + description: Process ID + example: 1234 + type: integer + type: object + api.PacketDropSummaryResponse: + properties: + command: + description: Command name + example: nginx + type: string + count: + description: Number of packet drop events + example: 3 + type: integer + duration_seconds: + description: Duration in seconds + example: 60 + type: integer + pid: + description: Process ID + example: 1234 + type: integer + query_time: + description: Query timestamp + example: "2023-01-01T12:00:00Z" + type: string + type: object + internal_aggregator.HealthCheck: + properties: + component: + type: string + stats: + additionalProperties: true + type: object + status: + type: string + uptime: + type: string + type: object +host: localhost:8081 +info: + contact: + email: support@example.com + name: API Support + url: https://github.com/srodi/ebpf-server/issues + description: HTTP API for aggregating and querying eBPF events from multiple agents + license: + name: MIT + url: https://github.com/srodi/ebpf-server/blob/main/LICENSE + title: eBPF Event Aggregator API + version: 1.0.0 +paths: + /api/connection-summary: + get: + consumes: + - application/json + description: Get count of connection events filtered by PID, command, and time + window + parameters: + - description: Process ID (GET only) + in: query + name: pid + type: integer + - description: Command name (GET only) + in: query + name: command + type: string + - description: 'Duration in seconds (GET only, default: 60)' + in: query + name: duration_seconds + type: integer + - description: Connection summary request (POST only) + in: body + name: request + schema: + $ref: '#/definitions/api.ConnectionSummaryRequest' + produces: + - application/json + responses: + "200": + description: Connection statistics + schema: + $ref: '#/definitions/api.ConnectionSummaryResponse' + "400": + description: Bad request + schema: + additionalProperties: + type: string + type: object + "500": + description: Internal server error + schema: + additionalProperties: + type: string + type: object + "503": + description: Service unavailable + schema: + additionalProperties: + type: string + type: object + summary: Get connection statistics + tags: + - connections + post: + consumes: + - application/json + description: Get count of connection events filtered by PID, command, and time + window + parameters: + - description: Process ID (GET only) + in: query + name: pid + type: integer + - description: Command name (GET only) + in: query + name: command + type: string + - description: 'Duration in seconds (GET only, default: 60)' + in: query + name: duration_seconds + type: integer + - description: Connection summary request (POST only) + in: body + name: request + schema: + $ref: '#/definitions/api.ConnectionSummaryRequest' + produces: + - application/json + responses: + "200": + description: Connection statistics + schema: + $ref: '#/definitions/api.ConnectionSummaryResponse' + "400": + description: Bad request + schema: + additionalProperties: + type: string + type: object + "500": + description: Internal server error + schema: + additionalProperties: + type: string + type: object + "503": + description: Service unavailable + schema: + additionalProperties: + type: string + type: object + summary: Get connection statistics + tags: + - connections + /api/events: + get: + consumes: + - application/json + description: Get events filtered by type, PID, command, time range, and limit + parameters: + - description: Event type (connection, packet_drop) + in: query + name: type + type: string + - description: Process ID + in: query + name: pid + type: integer + - description: Command name + in: query + name: command + type: string + - description: Start time (RFC3339 format) + in: query + name: since + type: string + - description: End time (RFC3339 format) + in: query + name: until + type: string + - description: 'Maximum number of events to return (default: 100)' + in: query + name: limit + type: integer + produces: + - application/json + responses: + "200": + description: Filtered events + schema: + additionalProperties: true + type: object + "500": + description: Internal server error + schema: + additionalProperties: + type: string + type: object + "503": + description: Service unavailable + schema: + additionalProperties: + type: string + type: object + summary: Query events + tags: + - events + /api/events/ingest: + post: + consumes: + - application/json + description: Accept events from eBPF agents for aggregation and storage + parameters: + - description: Events to ingest + in: body + name: events + required: true + schema: + type: object + produces: + - application/json + responses: + "200": + description: Ingestion result + schema: + additionalProperties: true + type: object + "400": + description: Bad request + schema: + type: string + "405": + description: Method not allowed + schema: + type: string + "500": + description: Internal server error + schema: + type: string + summary: Ingest events from agents + tags: + - events + /api/list-connections: + get: + consumes: + - application/json + description: Get recent connection events grouped by PID + produces: + - application/json + responses: + "200": + description: Connection events + schema: + $ref: '#/definitions/api.ConnectionListResponse' + "500": + description: Internal server error + schema: + additionalProperties: + type: string + type: object + "503": + description: Service unavailable + schema: + additionalProperties: + type: string + type: object + summary: List connection events + tags: + - connections + /api/list-packet-drops: + get: + consumes: + - application/json + description: Get recent packet drop events grouped by PID + produces: + - application/json + responses: + "200": + description: Packet drop events + schema: + $ref: '#/definitions/api.PacketDropListResponse' + "500": + description: Internal server error + schema: + additionalProperties: + type: string + type: object + "503": + description: Service unavailable + schema: + additionalProperties: + type: string + type: object + summary: List packet drop events + tags: + - packet_drops + /api/packet-drop-summary: + get: + consumes: + - application/json + description: Get count of packet drop events filtered by PID, command, and time + window + parameters: + - description: Process ID (GET only) + in: query + name: pid + type: integer + - description: Command name (GET only) + in: query + name: command + type: string + - description: 'Duration in seconds (GET only, default: 60)' + in: query + name: duration_seconds + type: integer + - description: Packet drop summary request (POST only) + in: body + name: request + schema: + $ref: '#/definitions/api.PacketDropSummaryRequest' + produces: + - application/json + responses: + "200": + description: Packet drop statistics + schema: + $ref: '#/definitions/api.PacketDropSummaryResponse' + "400": + description: Bad request + schema: + additionalProperties: + type: string + type: object + "500": + description: Internal server error + schema: + additionalProperties: + type: string + type: object + "503": + description: Service unavailable + schema: + additionalProperties: + type: string + type: object + summary: Get packet drop statistics + tags: + - packet_drops + post: + consumes: + - application/json + description: Get count of packet drop events filtered by PID, command, and time + window + parameters: + - description: Process ID (GET only) + in: query + name: pid + type: integer + - description: Command name (GET only) + in: query + name: command + type: string + - description: 'Duration in seconds (GET only, default: 60)' + in: query + name: duration_seconds + type: integer + - description: Packet drop summary request (POST only) + in: body + name: request + schema: + $ref: '#/definitions/api.PacketDropSummaryRequest' + produces: + - application/json + responses: + "200": + description: Packet drop statistics + schema: + $ref: '#/definitions/api.PacketDropSummaryResponse' + "400": + description: Bad request + schema: + additionalProperties: + type: string + type: object + "500": + description: Internal server error + schema: + additionalProperties: + type: string + type: object + "503": + description: Service unavailable + schema: + additionalProperties: + type: string + type: object + summary: Get packet drop statistics + tags: + - packet_drops + /api/programs: + get: + consumes: + - application/json + description: Get the status and information of all loaded eBPF programs + produces: + - application/json + responses: + "200": + description: List of eBPF programs + schema: + additionalProperties: true + type: object + "500": + description: Internal server error + schema: + additionalProperties: + type: string + type: object + "503": + description: Service unavailable + schema: + additionalProperties: + type: string + type: object + summary: List eBPF programs + tags: + - programs + /api/stats: + get: + consumes: + - application/json + description: Retrieve statistics about event aggregation including counts by + type and node + produces: + - application/json + responses: + "200": + description: Aggregation statistics + schema: + additionalProperties: true + type: object + "405": + description: Method not allowed + schema: + type: string + summary: Get aggregation statistics + tags: + - stats + /health: + get: + consumes: + - application/json + description: Get the health status of the eBPF monitoring system + produces: + - application/json + responses: + "200": + description: Health status + schema: + additionalProperties: true + type: object + "503": + description: Service unavailable + schema: + additionalProperties: + type: string + type: object + summary: Health check + tags: + - health +swagger: "2.0" diff --git a/internal/aggregator/aggregator.go b/internal/aggregator/aggregator.go index 32b2f45..c9ab3d6 100644 --- a/internal/aggregator/aggregator.go +++ b/internal/aggregator/aggregator.go @@ -1,4 +1,15 @@ // Package aggregator provides event aggregation functionality for eBPF monitoring. +// +// @title eBPF Event Aggregator API +// @description HTTP API for aggregating and querying eBPF events from multiple agents +// @version 1.0.0 +// @host localhost:8081 +// @BasePath / +// @contact.name API Support +// @contact.url https://github.com/srodi/ebpf-server/issues +// @contact.email support@example.com +// @license.name MIT +// @license.url https://github.com/srodi/ebpf-server/blob/main/LICENSE package aggregator import ( @@ -94,6 +105,21 @@ func (a *Aggregator) IsRunning() bool { } // HandleEvents handles HTTP requests for querying aggregated events. +// +// @Summary Query aggregated events +// @Description Retrieve aggregated events with optional filtering by type, node, and time range +// @Tags events +// @Accept json +// @Produce json +// @Param type query string false "Event type filter" +// @Param node query string false "Node name filter" +// @Param since query string false "Start time (RFC3339 format)" +// @Param until query string false "End time (RFC3339 format)" +// @Param limit query int false "Maximum number of events to return" +// @Success 200 {object} map[string]interface{} "Events and count" +// @Failure 405 {string} string "Method not allowed" +// @Failure 500 {string} string "Internal server error" +// @Router /api/events [get] func (a *Aggregator) HandleEvents(w http.ResponseWriter, r *http.Request) { if r.Method != http.MethodGet { http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) @@ -122,6 +148,18 @@ func (a *Aggregator) HandleEvents(w http.ResponseWriter, r *http.Request) { } // HandleIngest handles HTTP requests for ingesting events from agents. +// +// @Summary Ingest events from agents +// @Description Accept events from eBPF agents for aggregation and storage +// @Tags events +// @Accept json +// @Produce json +// @Param events body object true "Events to ingest" +// @Success 200 {object} map[string]interface{} "Ingestion result" +// @Failure 400 {string} string "Bad request" +// @Failure 405 {string} string "Method not allowed" +// @Failure 500 {string} string "Internal server error" +// @Router /api/events/ingest [post] func (a *Aggregator) HandleIngest(w http.ResponseWriter, r *http.Request) { if r.Method != http.MethodPost { http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) @@ -161,6 +199,15 @@ func (a *Aggregator) HandleIngest(w http.ResponseWriter, r *http.Request) { } // HandleStats handles HTTP requests for aggregation statistics. +// +// @Summary Get aggregation statistics +// @Description Retrieve statistics about event aggregation including counts by type and node +// @Tags stats +// @Accept json +// @Produce json +// @Success 200 {object} map[string]interface{} "Aggregation statistics" +// @Failure 405 {string} string "Method not allowed" +// @Router /api/stats [get] func (a *Aggregator) HandleStats(w http.ResponseWriter, r *http.Request) { if r.Method != http.MethodGet { http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) diff --git a/internal/aggregator/health.go b/internal/aggregator/health.go index a9eb60a..6b79494 100644 --- a/internal/aggregator/health.go +++ b/internal/aggregator/health.go @@ -15,6 +15,16 @@ type HealthCheck struct { } // HandleHealth handles health check requests. +// +// @Summary Health check +// @Description Get the health status and basic statistics of the aggregator +// @Tags health +// @Accept json +// @Produce json +// @Success 200 {object} HealthCheck "Health status" +// @Failure 405 {string} string "Method not allowed" +// @Failure 503 {object} HealthCheck "Service unavailable" +// @Router /health [get] func (a *Aggregator) HandleHealth(w http.ResponseWriter, r *http.Request) { if r.Method != http.MethodGet { http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) From 744354999630d6d28899275f9b8cee7fcc0cf923 Mon Sep 17 00:00:00 2001 From: Simone Rodigari Date: Tue, 12 Aug 2025 22:25:02 +0100 Subject: [PATCH 6/7] sync k8sProvider acces --- internal/events/events.go | 38 ++++++++++++++++--- .../events/kubernetes_integration_test.go | 16 +++----- 2 files changed, 39 insertions(+), 15 deletions(-) diff --git a/internal/events/events.go b/internal/events/events.go index 509851a..68d0896 100644 --- a/internal/events/events.go +++ b/internal/events/events.go @@ -19,14 +19,15 @@ import ( ) var ( - // Cached boot time to avoid recalculating it for every event + // System boot time calculation (cached) systemBootTime time.Time bootTimeCalculated bool bootTimeMutex sync.Mutex - // Global Kubernetes metadata provider + // Global Kubernetes metadata provider with proper synchronization k8sProvider *kubernetes.Provider - k8sOnce sync.Once + k8sMutex sync.RWMutex // Protects both k8sProvider and initialization + k8sInit bool // Tracks if provider is initialized ) // calculateSystemBootTime calculates the system boot time. @@ -112,13 +113,40 @@ type BaseEvent struct { } // getKubernetesProvider returns the global Kubernetes metadata provider. +// This function is thread-safe and ensures proper synchronization. func getKubernetesProvider() *kubernetes.Provider { - k8sOnce.Do(func() { + // Fast path: check if already initialized with read lock + k8sMutex.RLock() + if k8sInit { + provider := k8sProvider + k8sMutex.RUnlock() + return provider + } + k8sMutex.RUnlock() + + // Slow path: need to initialize, acquire write lock + k8sMutex.Lock() + defer k8sMutex.Unlock() + + // Double-check after acquiring write lock + if !k8sInit { k8sProvider = kubernetes.NewProvider() - }) + k8sInit = true + } + return k8sProvider } +// resetKubernetesProvider resets the global Kubernetes provider for testing. +// This should only be used in test code. +func resetKubernetesProvider() { + k8sMutex.Lock() + defer k8sMutex.Unlock() + + k8sProvider = nil + k8sInit = false +} + // NewBaseEvent creates a new base event. func NewBaseEvent(eventType string, pid uint32, command string, timestamp uint64, metadata map[string]interface{}) *BaseEvent { // Generate a unique ID diff --git a/internal/events/kubernetes_integration_test.go b/internal/events/kubernetes_integration_test.go index a1b1461..b190cd7 100644 --- a/internal/events/kubernetes_integration_test.go +++ b/internal/events/kubernetes_integration_test.go @@ -2,7 +2,6 @@ package events import ( "os" - "sync" "testing" "time" ) @@ -20,9 +19,8 @@ func TestKubernetesMetadataIntegration(t *testing.T) { os.Setenv("NODE_NAME", originalNodeName) os.Setenv("POD_NAME", originalPodName) os.Setenv("POD_NAMESPACE", originalNamespace) - // Reset the provider for future tests - k8sProvider = nil - k8sOnce = sync.Once{} + // Reset the provider for future tests using the safe method + resetKubernetesProvider() }() t.Run("VM mode - no Kubernetes metadata", func(t *testing.T) { @@ -32,9 +30,8 @@ func TestKubernetesMetadataIntegration(t *testing.T) { os.Unsetenv("POD_NAME") os.Unsetenv("POD_NAMESPACE") - // Reset provider - k8sProvider = nil - k8sOnce = sync.Once{} + // Reset provider using the safe method + resetKubernetesProvider() metadata := map[string]interface{}{ "custom_field": "test_value", @@ -67,9 +64,8 @@ func TestKubernetesMetadataIntegration(t *testing.T) { os.Setenv("POD_NAME", "ebpf-monitor-abcde") os.Setenv("POD_NAMESPACE", "ebpf-system") - // Reset provider to pick up new env vars - k8sProvider = nil - k8sOnce = sync.Once{} + // Reset provider to pick up new env vars using the safe method + resetKubernetesProvider() metadata := map[string]interface{}{ "custom_field": "test_value", From 05702d00458b12cc2a5ae695364806fcd748e625 Mon Sep 17 00:00:00 2001 From: Simone Rodigari Date: Tue, 12 Aug 2025 22:27:36 +0100 Subject: [PATCH 7/7] fix linting --- internal/aggregator/aggregator.go | 6 ++++-- internal/aggregator/health.go | 6 +++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/internal/aggregator/aggregator.go b/internal/aggregator/aggregator.go index c9ab3d6..0f0611d 100644 --- a/internal/aggregator/aggregator.go +++ b/internal/aggregator/aggregator.go @@ -191,11 +191,13 @@ func (a *Aggregator) HandleIngest(w http.ResponseWriter, r *http.Request) { // Return success response w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]interface{}{ + if err := json.NewEncoder(w).Encode(map[string]interface{}{ "status": "success", "processed": processed, "total": len(requestData.Events), - }) + }); err != nil { + logger.Errorf("Failed to encode ingest response: %v", err) + } } // HandleStats handles HTTP requests for aggregation statistics. diff --git a/internal/aggregator/health.go b/internal/aggregator/health.go index 6b79494..7541b37 100644 --- a/internal/aggregator/health.go +++ b/internal/aggregator/health.go @@ -4,6 +4,8 @@ import ( "encoding/json" "net/http" "time" + + "github.com/srodi/ebpf-server/pkg/logger" ) // HealthCheck represents the aggregator health status. @@ -56,5 +58,7 @@ func (a *Aggregator) HandleHealth(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusServiceUnavailable) } - json.NewEncoder(w).Encode(health) + if err := json.NewEncoder(w).Encode(health); err != nil { + logger.Errorf("Failed to encode health response: %v", err) + } }