From b4234820045ceee5532e3ff95cf253934d5a4853 Mon Sep 17 00:00:00 2001 From: Simone Rodigari Date: Wed, 13 Aug 2025 07:24:11 +0100 Subject: [PATCH 01/11] add API endpoints to aggregator for compatibility --- bpf/connection.o | Bin 15152 -> 0 bytes bpf/packet_drop.o | Bin 10792 -> 0 bytes cmd/aggregator/main.go | 7 + internal/aggregator/aggregator.go | 367 ++++++++++++++++++++++++++++++ 4 files changed, 374 insertions(+) delete mode 100644 bpf/connection.o delete mode 100644 bpf/packet_drop.o diff --git a/bpf/connection.o b/bpf/connection.o deleted file mode 100644 index d4d8c384bb2c0aef55b7886093503de893801db1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 15152 zcmcIq3vgUlc|LdV-qn>?kCo)r!}84X{K>Qh&_~0Lkr_6nNCVmMiAP%&^D$ikTxB= z-+vyfyT@`SGYn_;zyJCE|3Clv&-0#p@1ESbXIH>5m`Vow6u0gwZbn17~2BX3#QON2=+8{Px0snA9b5;eW|Ba^YVz*zd=IrB`oXV^3Y=)r$uE z?IT<+0`o#aUXJ1=4_=6(1b@6*a_SYrm6BI*e(pZbSQ(m2{;0o+Q#-l;$2W5W@9`TY z-y}J;n*7}$dAsColDA5}M)K8?w@B_t-Yt1d@-E5ON&Y^`*Gk?gIrSpdzh3e_$$KU5 z;r!TX>OIDywy-<*42QhESFT>YO6{c?aE>Mw616)EIskG&YW(*y(T>zJSFS?L&ehTt zdKBjU-M*1yuYQ2@sL{=6* zDW_2<{cn0a#l*GPD|{Z`cb^!~17H;4eK{Tm`;Zu>;yiyHlZX1r6U)6s9*3OP0pAC2 z5dK`Fo?V?{%r#cArBJxW0K1B%7&ShK3-LXS)ze;j8?W#;7snDAN3D#bL&kxicpR8t zJPxtHxke4U`V7U}&aVCr#R+|yIQE~9Q@mn+yGBIXwMskeU)-)u+V${uxkfd+dJ~n8 zu_#q(pksk8Cib@leY@1Jkopc;zLf=jMCFz-_AK!>W_*px)2yaAmrFew!SySo-r@SF zQODTLRBj~;psS+>{0~w2H$Ob#Q;%#QhSc73bBXvyd$I0H3E(Bf!ib zkza#V{FL_9AT!?}y;v{T>KWIQKm1Dk)5=@$Kh6DrU<0pdho24+rf_L1UN& zaDNEbnGhUYC_8Nve9bPD8y{xDw=F7!!ou9BG1}VdEVC`w5NT*`t7)zcc@6o7hiQNU z{mg9JUHAT`8^fE!0mNw<#%>IJu!Zfe+lU>>YK6hJa2VMTWy9fZ$d=U^;V>1!+OOmu z2n+(2SEC?fOL%#-JuE5^wiQu0zX5|^X>qG68sh3G@+6G31Jy(g%GcHzRF^1LR|yMM z!P16e)zO$ijfpqj-(JkvqJcc=9F@x{Gm_9NGMIdk*LXy*LBnp@VxUx{-8D{Q4Cb)|(%nF8#kY$$zLVHQQ zE%1y{7Gaz1vPptlm|gw=WwzR^oNln@Cvf3Ko0(->!SaWca0&TCGFW)YQ^YIEv1(bx z3%FGB%UR0Ng;iPMGCN%HO=OLl@^6DxlRR9>CDCxz4@py7j%C0qs7B%7E##P0T^0;S zXlfhF%7a(|nn5K$CzP**CK9fpP1~p}Uk6sR5CtpW3PEi&!fNc=y~wg!W-r}GnYsY8 z>dFEOT~x)&?r}MU#xSg@yF~d`wlu<8Z3nS2*2e7BYbevs z>^0q#p&OUo%8RdUWacW!_y|Rq)nXeaRlqTVAtO2PQ9eVAyW81z>fw60+5QC-Z7|le z9RbX%hQJQCe-MQmS%bNiC4K;9qtOt&iH*AyRb!yRx|tRJ1o_Kuxsr+eJ z*Y?E&jdlkW3^Xx&?T;z5EMTwWnP$`O`YB142kma2Sz+5f|3#9O5!Os$cLu7OsX6Zp zR5p|6SYWAn1(-J>#H`*jyPVFMwcEF_F1tUAEQ>LF;3#FfgZ3bo^i+iR>^2~+-dmDwv1_tf-^UTIgcATpQZ)DBXfg1wV%`{y%1|kd7 zB*ILxmL1Ou(9`ZpmC)*=d(y3-;M>7V7B;}sHg><$HMAF=DGL_Drz?Upm1ir1^Oc3F z;B3|Ts^EN8p*lEQeYQGyvHDVV@IquhLObT$OM<5>W-5bcE9WbNg{s-A;Q6Zgs$ijd zwmNvW`eJqPQgtB`JQFz|Vaw5j;tVF{$RO(fEzrk6W1u6T6QECmJ_ULd^oO7qLEi`c z1oS_k5d16yb%8d5c7bjO-31y29RlS+9|xTR&48W+{Q>BC(9582fnEju5JYqOmmm}U zRRfBGT0m`}KF|ixt)RW2eV}_lNl*s#Y0zgukANNpJr4R^5KXt|Kwkkp4|*B&3g~;F z*Ff{2OQ8P%{Q^X@Xz%EBIzR1mOqwtSt^}pQ^}(VNOCFHqq-8gRQkZ^G`Pz>vgx#! zI*@k9oyiR1qzgAY;N}x!)43dqCbFsVME*c(oGT9HQ&Vn&)M??+Z#kBknp*Hk)l-$( zTxQfog~{>6G%76#!MJt~-^*2xKsa zib8Ua3c->&GWU<1tc#C+B9}ap@Q!&j?&xlpVF1}QN@8A25A?H~D|}+7_40{SR`CIm z;aw_614Eb3jAbTyyF}Po^bh8cm}rSSSW0WSc;$cXpg#HnhPq;mwsmOGa}*3 zSh+-3LQ0FH1+oxP!s(SUrwGL%7K@EP`!|Ej9ZA_i!c$gYXnXsj3VY84KY6PWP-A1pmP}_-!DZ19`d&S9;1NzvD9B2!0GiM?VL2O6KF> zv`f+%z5sd{gk;n1KEt!jpmvh4F9~c{fj=wq{7oYD^_&;mAaGrp}ti8 zf;LrBnQwsT$@^O%$SUaRQK$+ewIQ{euyp!yDECpA*0K{;MCOOJfQf0>67E> zizh12_p8PBG`?K?8=2bV`1)FG&imI{$r_X3x(lqhxUe{j3ruW9Se`|R*bT6xiW0ta z(h@IS^c-a~*b2L`BFB=GlOAQn(kyNb@4DxAUvrCqH>aeU4lkdZ9#g_aI>cFv&^n!u zGY3f;3$whl)^)Zzi`*JI+C?|7bG+33Zf2rmt=qZI>FM0eounq5juyAm!Dcd@OFKP6 z>UM3zW+VOi?+of5($VQ`+vcqAE1Ks$Y3;wUbjV8J4*p@#v)SqHc6O&z`BZWe(NML{ zM9Q5U=fP0){@Q4i5q{S;$`s?5xO2g|+{<_c+!0J@2j@FoiBxTbB|^D>F34ohs+)kb zmi9xa)%YwZIx!wy+h`yEXz%`o9wmD9?%GNKkan3 zqWAnR$t{{GN+33o5h0L{5>=}VwmRE)C3fGw^G@A~GF&u1)F6AO7~_IS#fduxLX|h~ zn8)wDoleUl0rRMES@5yY=}PyW;-Q<{H4%~UnwYq7(O^^%WN~d6N}I}->Xf{+lz-ic zvrvaC7TG75zhUpGajERUHEn~;*R%oN-{KS#lV_NB?{TVFg$`dIP3BuC*~W4CFZPWX)~!x||N6zm zeSIVq$98pwk4+`B9-B;!xoOYE?T^ku4&4Wl=)OwyI&O@Y`OQ(>LtlkV-GJ+mr~4_% z@OGG||HZYF4DjxPbcDx&n{0!h0SJE?xV*w(w@5u0PeZtpp1ze;Ej8G%4_5%s`fw-k z86PJ7oW%c$@|BLk=qZ-`5e@^7fD^95L8GG8UfLC@H{K<~=ge~A%sXqi98G?U_ zPXgEOfq#h~1FpHnVCN8sqOg zf%Zte9=HpgbI^x}f$8M~`6GQ4cvi!6603MFO8g}3mn}8fyu>d8H%3er0VjWd4_@D4 zGDl)nzDHu3pH{!gZjrbOhh}4o$qq^!0Z!ThcGQQXz_S{jllTGHcQgiA4>;BT$KYM9 z0XFEv`+(y<90ks5xFB&O&YPRQivIB7R^T}wUJZQKhe?0Ihe`jM#Git{buVH3z$rfN zzbC*te7F^O(1%w8$9&H~TA47l9kDX2B)cZ(mR{6Wp0%F*)=J~v;*w*qD# z5A!`y`6GTBWhi|Q&8MeL7kS$--l?i&1`-em8G6r$T?S+#Uu~^4*OB1URqoSIyi@US ze>$;#YvTAFRa+r5f@OzJ-2S5T)SEA7wjcYa)sNFNopEy4OJ`ra>Dl~WkN@DxciP{g z2cB~7liZl+nPD|8>9)!bSS)0I{iI`6h0Ir&V^xG4t8>PxaIDJE8Y>J`5wdTyR^Db= zp_mm3nM-0;ZI@NE%8IP9dhDC6@|KYKEXr4g%y||I4O>m27OMf}YC?O+_fsch*8Y(B z&n#v|L$P7=i?DS<=D(n>u07T;45QA3`2?iP5P>mlHFkx}mre{@^&#^+hC?p?;zSI! zo8L8Jma#2l{uy90Wd1fF_ju)mV|9eghhW_G3u~YB_$x>^h0LF!p4QN<*6Ntm>dYzKB#cQ|NL3Vg*GL@b_irjTm1k)tQe$KyX z=|(2Q#;7I!vK(^S^ZeyL8x}(dwO)_KxQNQB|+zhkr+)c45!)r+1`1?N$EtH7Tb(%Aat=K{EN?;+MZ6 zTVxpw%I|M9Ze)sd~do+E&h6gpgMZ<9o z->l(VG)&`2NyRrLU=iP`;b9Hmqu~(^k8Aj#h9@Tw0!vzhW((q{wKdj*y z4S!z4PiXi_4bN)$_cVM)!(Y|#oQ9v*@L3JNq~Y@#{-%a6X!vh6d{M)%YWOt`zpmkV z4gW;Lmo)tE8osRI|JLx^8vZ{G(=n5hn$JNEd+rE~F!@^tqC}X=$3c`79ulyKM>L$(a6!W}8lKhg zoQBV9_@ajAHGEmaYP?kZs{a*seEmV~?a^>t!$TS#(QsD71r5(=cvi!68a}V#iyEHS z@MR7A$Cug{p$k&6{TjyOX^B3r;UNu=XgI6kf`(@_Jgeb34WHNWMGenu__BuiLi4p3 zuRBWG=V-V`!*LA{X?R4#Sq&F7Jfq=R4bN%#yoN7ocwWPoHOvRv-+mf+A9ggXo;G4UcFztKou%XEZ#k;W-VT*YHIR&ujRyhSiaf#56UQQsa@Ro7zD7ez}^INTjE; z9i0IwDcmPu5r06#_h^{@|451IChAz*$ztQ~==6a^a&$E39%iw;do<5tId?L)V~S#geas%5g4zU^3<5PXMx_$;_C*;ya(HOT>T=S&7WVgy)hqe@!jRKv687O1lY; zDMlGPzCQW=@PAdrCMOS1@sB(Bt4Z;FH*c9Jg{O1ZUuq(koO1EE2j9P9VyGye9Az=@ z*c9ioUy6T-*|TFuPhu0}Y(VmU$=6HXEBT-_lYE2ZeUfjKYRT6tJRsy7B=48JPjY-Q z7PW5>e7)rO#|4-7OTJ$6UdcD~34NdB1HH1m^X}!E#dG#Ep3aImp!nFSAe+h9Vkh~xtv?3DIe?X|6 zs=Si_D`Zr^-UkVX+gs(yveGnR5M#kTevNutFvHIs^{Ugv* zqWbA=nqL2xAtRsq{9U~ULZrQd6|bs(#s2{sbI;WbSfOZyOhn#4)bywNDXDS-;YI)L2= zl78PjmY1WI3bg&vA34I=bG|d@%$diXxign~^5EgSV~!&XIpS@xgp5^)a~qcQj8rpX zEj&LCy%~I|UK+)tkO9=r`m8iu@RdDfReB*N#10f)a8)_$`55?8ox+`U4@>3JM-+Y( z@hPZbmkNm5lhTd~Dd(h+?O^I~P9+W(Z0)~?#8g>QvrBwa9q=s2=o(82|<%`<; zq+R8Y+BpMZmugSc9!R@tU(~Li1J&NBT|Ec7{dKC{kD?CN5Z0Og+-x3e&)=mSfEZtlM~Y8f4)wR$@u7cP~-EPb;5xO7yo~(6RsOR zBEMNL@^O{xh}zB8%RNU9D@W`1D6oeW%~j6lJOFbi9{|4xd;sjk;>ncrFlFkjqm*N3 z>YRs!KjC0oj_nri=0ol8UHP8G zo0RyqHrtT4o#iNLZ#W0?k(KmlbsGtt!bxqbz5B~_1H>xGvl_dWCMk0^N@)K zCTQO+VjGrVM;Gc*5HILtLf(8uICF4I0b11CnDd$6D0IUJ&lx zP~z|BkMQDkPMzo1$LbzoaaZgGr#>m#{rVDNhwvLX-J;VM4YQE$NAV*=R}0Vg;tfw5 z;|kWt8bxBo3s9Wah8H0kzl)^1A)1&`xxNiecrEquMACKO(A3~VB$w(Dt+5{Lq0Wa}#VUri1!5JGwBz5J$Kc!U zbc)-4mrAT#tbQ7bXm$H!NSoNFL-xHv2e(}ukRkC-@je;ib&LHnq=U`vh$TDNpsrX; z!?OtJT(?6s_?`3=D}VzbP`%@U@ z2>YVrpP;XeHF>xBPQx#ucfRYK+vu<7o6Bty{zj=Z`~D`Wv?Tq_Qc0%#EmCO>{H;=H z>+yS}(%$E9lS;>+zg;Sw8UJ>vblvOkkV^Lvf2UMZ$NaQZR-W*CrLtovU-*^L659 zvH(9Ip6=caei+P?=_q&}d@f1ilCS0q}>w z4}c#59|z~a)8JX~6X0jSzYqQl_<8Ubz+VA>4g3=Lo8ZgfZ-c)J=H&hj_y^$Ez?{(^ zfqx9X2EGpFB>n=-349y;@8AcAXN#5DU}P#+91rI5<*+bQ4ElGc`_g?o`)6fgf4W}` zm&STaxsl0yB|BQ4DWyk5@5Ib>zIQG+Ud|OOy znL&26oX>?bMS&W!Q~9DOk zTmt_+J&y;7?@dw#E1x`|j*2wJsN#_#IdS|1RVbtv+~p$UZN>wGjGscN18+1#%VJcd ze5b^Dc3uhAxp0?M^<^NpgF($a;hS=(Of3nvCW1S`yfyG$1Y6?@NKG-`ex~7s%^`jc z`Yd#HGZNyY)-$j_q4fpmAJzJ0=pQ%!sQ4-45B(YA5B*u=5B*cdAM!b^XCObV^##b! zYW*_g^IGSqeqQSt=wH(M0`zlQzYP7n@yDe6nem7I=f)rU%f=u2CF74t`WAGMe5Yrm zD#xojhJ2%6fe*UT$TLPa@`2=60X$PMOOjTercu4*HOjA$7)ht>1Eo>%DPD3_xZ>2<{4Qwt@9* z+at6qys801L&2^ARZ>qI}VR0s#_&@R{w?t5J{a|X)$+fl zvB~$Q#@y_@YpB>&@rQ{2896kb1a^L@#_EP~S>woWJS8mJC@6FnQ)emOUbE@!y9O4Y6|UvH2y8{=LL>fjwAfJpA#GZ z>l({(LB4l1mgDj`>UWjD9G9o?jA<;#zR88e(~m49{g4Q3;%Xz z2sS5PKlwZNeLeWx8?VglJNe@c&mE@m$-7^^`1J#yuY7m(N5A^(EpLg3Xk7o+if@d+ z^ziqdJ^TEXXMd~kpLpvu$V4a%5kAUWb3!OYQMI_vBtTk?!}Lp;!zZSYGpGv8?K^Jt^z{TX93> zFQNpszmAgB&RbM}%lDW_%Qsn}I5IUmn&%PNI_Gn5oXAg=a5YFzh`H&hLUDE;8tr_Q z;v5TfYr-_e#lP@6bOX6uR2GeJYGLR4vdJ|k$4CK|1jv)mUBoO^SDx~0D{GIO_#(xw zP~W~ICS8v?0WL3bS-0~TR8pj97^JTMB=cr4jcO{1aW#8ZT383o~e1MCJ}Qq zHxht|`A*?|nncXHZYKZ{^ZXyJpR>gMgT!)T(=UU~zF}~R&LD%;Um2EgAWN6_;Cv_SooBMpR({LEPUF+e_-J=7XGY-&szA47CvX;Ke6z63xCzZ7cKm< zg)dw9I~IP`!r!;>6$`&+;j0#Y-NM%_{4)#t8uM+(`&lxAsiz{CK0yS>p-<`8Bz5|q zieT2a159E$i}n(Lh{;C?K&JeJL`%42;ROqyw(waCpSSR33tzGDH4DFKVYyMlX4;$N z#u>qZh5Ia=u`rHU%i?iFt77Juw=j-e%gWDK_?(3=TKH89U$yXc3%_Gwp0i188#!g+ z9t#gz_+ASiv+#t4JD3c_{jCvfKab}5GUa`A2C*8pZvz2{<$SAeApjXXpkNi>W8vc# z{)mO2(U|$j{}92<-_qKa6zS3Y@GL&f4G)*|b0S^I&sRjcoS#bXKYDjs0_9LHj!zXr zd`;8=Q!^t9N3Q)UQ2B?Ia&~5H43~F=$P1tjL_xKVLJ^-ciJ4G0WOTG_-$Gez-$WxJ zh2q$ZNKZ}8O=s~z6Q7R9tC!7)7Y>a1Xri%lZaR-opwW-X_BR=ZOIxk)GHJBDk{cH3 z@Z_|Vdeo{Poe%Hd-=EzjAo?Kw_r&`LpADP#u2+`d*6YEPTTp(~qAl`mQ@UnMfF=2i zL}>$pZxwIOefZF3l-zf3hN3Hm?I^N_lF{b|pb3d@_WEuWU zBj$&OguRrvGMK zHo#`r$&hdBE0nH=CP}vM{jl0LI=-&sugj>VAKUjn6xC|~u~jN&K|dI5KlgLH{gbe< z&OUIIzQfLfa0y>VCclwSqtxh8$G_B9#h$3KhVLS5wc3A`4F+Z8uaN7HcPYF5{99qI z{BP>~uW->s$X*X0Kya=6DSk2m1@c$r=QnmcKj*hr{`2~S{Jb@Oc7EOmYvmu*`7=76 z!$Ds=|64c7-_wNxP)7bP{g`-Tc>_wV{PThG$S5JlpSN=pZ=V03!d9#Q6FUEUtwIZm zpCG$IQWKhmEPMs>UqCclw?g#!2RuV)|@j1T4DdWFn< f>BsSgz8*kD))dRYq&|3=^CR=|4>+d%=3Mq)WzMTz diff --git a/cmd/aggregator/main.go b/cmd/aggregator/main.go index 843b868..1a66aca 100644 --- a/cmd/aggregator/main.go +++ b/cmd/aggregator/main.go @@ -52,6 +52,13 @@ func main() { mux.HandleFunc("/api/events", agg.HandleEvents) mux.HandleFunc("/api/events/ingest", agg.HandleIngest) mux.HandleFunc("/api/stats", agg.HandleStats) + mux.HandleFunc("/api/programs", agg.HandlePrograms) + + // Connection and packet drop API endpoints (aggregator-specific) + mux.HandleFunc("/api/list-connections", agg.HandleListConnections) + mux.HandleFunc("/api/list-packet-drops", agg.HandleListPacketDrops) + mux.HandleFunc("/api/connection-summary", agg.HandleConnectionSummary) + mux.HandleFunc("/api/packet-drop-summary", agg.HandlePacketDropSummary) // Swagger documentation mux.HandleFunc("/swagger/", httpSwagger.WrapHandler) diff --git a/internal/aggregator/aggregator.go b/internal/aggregator/aggregator.go index 0f0611d..24862af 100644 --- a/internal/aggregator/aggregator.go +++ b/internal/aggregator/aggregator.go @@ -104,6 +104,64 @@ func (a *Aggregator) IsRunning() bool { return a.running } +// HandleHealth responds with aggregator health information. +// +// @Summary Health check +// @Description Get the health status of the eBPF event aggregator +// @Tags health +// @Accept json +// @Produce json +// @Success 200 {object} map[string]interface{} "Health status" +// @Failure 503 {object} map[string]string "Service unavailable" +// @Router /health [get] +func (a *Aggregator) HandleHealth(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) + return + } + + a.mu.RLock() + running := a.running + a.mu.RUnlock() + + if !running { + http.Error(w, "Aggregator not running", http.StatusServiceUnavailable) + return + } + + // Get current stats + a.stats.mu.RLock() + totalEvents := a.stats.TotalEvents + eventsByType := make(map[string]int64) + for k, v := range a.stats.EventsByType { + eventsByType[k] = v + } + eventsByNode := make(map[string]int64) + for k, v := range a.stats.EventsByNode { + eventsByNode[k] = v + } + lastEventTime := a.stats.LastEventTime + startTime := a.stats.StartTime + a.stats.mu.RUnlock() + + health := map[string]interface{}{ + "status": "healthy", + "running": running, + "uptime": time.Since(startTime).String(), + "total_events": totalEvents, + "events_by_type": eventsByType, + "events_by_node": eventsByNode, + "last_event_time": lastEventTime, + "timestamp": time.Now().Format(time.RFC3339), + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(health); err != nil { + logger.Errorf("Failed to encode health response: %v", err) + http.Error(w, "Internal server error", http.StatusInternalServerError) + } +} + // HandleEvents handles HTTP requests for querying aggregated events. // // @Summary Query aggregated events @@ -232,6 +290,39 @@ func (a *Aggregator) HandleStats(w http.ResponseWriter, r *http.Request) { } } +// HandlePrograms handles HTTP requests for program information. +// Since the aggregator doesn't run eBPF programs directly, it returns program status from connected agents. +// +// @Summary Get program information +// @Description Get information about eBPF programs running on connected agents +// @Tags programs +// @Accept json +// @Produce json +// @Success 200 {object} map[string]interface{} "Program information" +// @Failure 405 {string} string "Method not allowed" +// @Router /api/programs [get] +func (a *Aggregator) HandlePrograms(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) + return + } + + // The aggregator doesn't directly manage eBPF programs + // In a real implementation, this could query connected agents for their program status + response := map[string]interface{}{ + "message": "Aggregator does not run eBPF programs directly", + "description": "This endpoint shows program status from connected agents", + "connected_agents": 0, // This would be populated based on active agent connections + "programs": []interface{}{}, // This would contain programs from all connected agents + "query_time": time.Now().Format(time.RFC3339), + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(response); err != nil { + logger.Errorf("Failed to encode programs response: %v", err) + } +} + // ingestEvent processes a single event from an agent. func (a *Aggregator) ingestEvent(ctx context.Context, eventData json.RawMessage) error { // Parse event data into a generic event @@ -359,3 +450,279 @@ func (e *SimpleEvent) Metadata() map[string]interface{} { func (e *SimpleEvent) MarshalJSON() ([]byte, error) { return json.Marshal(e.data) } + +// QueryEvents retrieves events matching the criteria (for API compatibility). +func (a *Aggregator) QueryEvents(ctx context.Context, query core.Query) ([]core.Event, error) { + return a.storage.Query(ctx, query) +} + +// CountEvents returns the number of events matching the criteria (for API compatibility). +func (a *Aggregator) CountEvents(ctx context.Context, query core.Query) (int, error) { + return a.storage.Count(ctx, query) +} + +// GetPrograms returns program status (for API compatibility). +// The aggregator doesn't manage eBPF programs directly, so returns empty slice. +func (a *Aggregator) GetPrograms() []core.ProgramStatus { + return []core.ProgramStatus{} +} + +// HandleListConnections returns recent connection events from aggregated data. +// +// @Summary List connection events +// @Description Get recent connection events grouped by PID from aggregated data +// @Tags connections +// @Accept json +// @Produce json +// @Success 200 {object} map[string]interface{} "Connection events" +// @Failure 500 {object} map[string]string "Internal server error" +// @Failure 503 {object} map[string]string "Service unavailable" +// @Router /api/list-connections [get] +func (a *Aggregator) HandleListConnections(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) + return + } + + query := core.Query{ + EventType: "connection", + Limit: 100, + Since: time.Now().Add(-1 * time.Hour), // Last hour by default + } + + events, err := a.storage.Query(r.Context(), query) + if err != nil { + logger.Errorf("Error querying connection events: %v", err) + http.Error(w, "Internal server error", http.StatusInternalServerError) + return + } + + // Group by PID for compatibility + eventsByPID := make(map[uint32][]core.Event) + for _, event := range events { + pid := event.PID() + eventsByPID[pid] = append(eventsByPID[pid], event) + } + + response := map[string]interface{}{ + "total_pids": len(eventsByPID), + "total_events": len(events), + "events_by_pid": eventsByPID, + "query_time": time.Now().Format(time.RFC3339), + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(response); err != nil { + logger.Errorf("Error encoding list connections response: %v", err) + http.Error(w, "Internal server error", http.StatusInternalServerError) + } +} + +// HandleListPacketDrops returns recent packet drop events from aggregated data. +// +// @Summary List packet drop events +// @Description Get recent packet drop events grouped by PID from aggregated data +// @Tags packet_drops +// @Accept json +// @Produce json +// @Success 200 {object} map[string]interface{} "Packet drop events" +// @Failure 500 {object} map[string]string "Internal server error" +// @Failure 503 {object} map[string]string "Service unavailable" +// @Router /api/list-packet-drops [get] +func (a *Aggregator) HandleListPacketDrops(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) + return + } + + query := core.Query{ + EventType: "packet_drop", + Limit: 100, + Since: time.Now().Add(-1 * time.Hour), // Last hour by default + } + + events, err := a.storage.Query(r.Context(), query) + if err != nil { + logger.Errorf("Error querying packet drop events: %v", err) + http.Error(w, "Internal server error", http.StatusInternalServerError) + return + } + + // Group by PID for compatibility + eventsByPID := make(map[uint32][]core.Event) + for _, event := range events { + pid := event.PID() + eventsByPID[pid] = append(eventsByPID[pid], event) + } + + response := map[string]interface{}{ + "total_pids": len(eventsByPID), + "total_events": len(events), + "events_by_pid": eventsByPID, + "query_time": time.Now().Format(time.RFC3339), + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(response); err != nil { + logger.Errorf("Error encoding list packet drops response: %v", err) + http.Error(w, "Internal server error", http.StatusInternalServerError) + } +} + +// HandleConnectionSummary provides connection event summaries from aggregated data. +// +// @Summary Get connection statistics +// @Description Get count of connection events filtered by PID, command, and time window from aggregated data +// @Tags connections +// @Accept json +// @Produce json +// @Param pid query int false "Process ID (GET only)" +// @Param command query string false "Command name (GET only)" +// @Param duration_seconds query int false "Duration in seconds (GET only, default: 60)" +// @Param request body map[string]interface{} false "Connection summary request (POST only)" +// @Success 200 {object} map[string]interface{} "Connection statistics" +// @Failure 400 {object} map[string]string "Bad request" +// @Failure 500 {object} map[string]string "Internal server error" +// @Router /api/connection-summary [get] +// @Router /api/connection-summary [post] +func (a *Aggregator) HandleConnectionSummary(w http.ResponseWriter, r *http.Request) { + // Parse request body for POST requests + var request struct { + PID uint32 `json:"pid"` + Command string `json:"command"` + Duration int `json:"duration_seconds"` + } + + if r.Method == "POST" { + if err := json.NewDecoder(r.Body).Decode(&request); err != nil { + http.Error(w, "Invalid JSON", http.StatusBadRequest) + return + } + } else { + // Handle GET request with query parameters + if pidStr := r.URL.Query().Get("pid"); pidStr != "" { + if pid, err := strconv.ParseUint(pidStr, 10, 32); err == nil { + request.PID = uint32(pid) + } + } + request.Command = r.URL.Query().Get("command") + if durationStr := r.URL.Query().Get("duration_seconds"); durationStr != "" { + if duration, err := strconv.Atoi(durationStr); err == nil { + request.Duration = duration + } + } + } + + // Default duration to 60 seconds + if request.Duration == 0 { + request.Duration = 60 + } + + // Build query + query := core.Query{ + EventType: "connection", + PID: request.PID, + Command: request.Command, + Since: time.Now().Add(-time.Duration(request.Duration) * time.Second), + } + + count, err := a.storage.Count(r.Context(), query) + if err != nil { + logger.Errorf("Error counting connection events: %v", err) + http.Error(w, "Internal server error", http.StatusInternalServerError) + return + } + + response := map[string]interface{}{ + "count": count, + "pid": request.PID, + "command": request.Command, + "duration_seconds": request.Duration, + "query_time": time.Now().Format(time.RFC3339), + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(response); err != nil { + logger.Errorf("Error encoding connection summary response: %v", err) + http.Error(w, "Internal server error", http.StatusInternalServerError) + } +} + +// HandlePacketDropSummary provides packet drop event summaries from aggregated data. +// +// @Summary Get packet drop statistics +// @Description Get count of packet drop events filtered by PID, command, and time window from aggregated data +// @Tags packet_drops +// @Accept json +// @Produce json +// @Param pid query int false "Process ID (GET only)" +// @Param command query string false "Command name (GET only)" +// @Param duration_seconds query int false "Duration in seconds (GET only, default: 60)" +// @Param request body map[string]interface{} false "Packet drop summary request (POST only)" +// @Success 200 {object} map[string]interface{} "Packet drop statistics" +// @Failure 400 {object} map[string]string "Bad request" +// @Failure 500 {object} map[string]string "Internal server error" +// @Router /api/packet-drop-summary [get] +// @Router /api/packet-drop-summary [post] +func (a *Aggregator) HandlePacketDropSummary(w http.ResponseWriter, r *http.Request) { + // Parse request body for POST requests + var request struct { + PID uint32 `json:"pid"` + Command string `json:"command"` + Duration int `json:"duration_seconds"` + } + + if r.Method == "POST" { + if err := json.NewDecoder(r.Body).Decode(&request); err != nil { + http.Error(w, "Invalid JSON", http.StatusBadRequest) + return + } + } else { + // Handle GET request with query parameters + if pidStr := r.URL.Query().Get("pid"); pidStr != "" { + if pid, err := strconv.ParseUint(pidStr, 10, 32); err == nil { + request.PID = uint32(pid) + } + } + request.Command = r.URL.Query().Get("command") + if durationStr := r.URL.Query().Get("duration_seconds"); durationStr != "" { + if duration, err := strconv.Atoi(durationStr); err == nil { + request.Duration = duration + } + } + } + + // Default duration to 60 seconds + if request.Duration == 0 { + request.Duration = 60 + } + + // Build query + query := core.Query{ + EventType: "packet_drop", + PID: request.PID, + Command: request.Command, + Since: time.Now().Add(-time.Duration(request.Duration) * time.Second), + } + + count, err := a.storage.Count(r.Context(), query) + if err != nil { + logger.Errorf("Error counting packet drop events: %v", err) + http.Error(w, "Internal server error", http.StatusInternalServerError) + return + } + + response := map[string]interface{}{ + "count": count, + "pid": request.PID, + "command": request.Command, + "duration_seconds": request.Duration, + "query_time": time.Now().Format(time.RFC3339), + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(response); err != nil { + logger.Errorf("Error encoding packet drop summary response: %v", err) + http.Error(w, "Internal server error", http.StatusInternalServerError) + } +} From 621a316bdf812a799188ccde4da37414f878258b Mon Sep 17 00:00:00 2001 From: Simone Rodigari Date: Wed, 13 Aug 2025 07:30:36 +0100 Subject: [PATCH 02/11] add API doc link for aggregator --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 6ea9f0d..3119e21 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,8 @@ make kind-deploy # Deploy to kind cluster make kind-integration-test # Run comprehensive tests ``` +To get detailed API documentation for the aggregator, available only in Kubernetes mode [see API Aggregator Documentation](https://petstore.swagger.io/?url=https://raw.githubusercontent.com/srodi/ebpf-server/main/docs/swagger-aggregator/swagger.json) + ### VM Deployment (Traditional) For single-server deployments: From 9b081ccac74fe24cba23c8cc73a0feb78473f427 Mon Sep 17 00:00:00 2001 From: Simone Rodigari Date: Wed, 13 Aug 2025 07:34:43 +0100 Subject: [PATCH 03/11] fix health handler --- internal/aggregator/aggregator.go | 58 ------------------------------- 1 file changed, 58 deletions(-) diff --git a/internal/aggregator/aggregator.go b/internal/aggregator/aggregator.go index 24862af..69c2099 100644 --- a/internal/aggregator/aggregator.go +++ b/internal/aggregator/aggregator.go @@ -104,64 +104,6 @@ func (a *Aggregator) IsRunning() bool { return a.running } -// HandleHealth responds with aggregator health information. -// -// @Summary Health check -// @Description Get the health status of the eBPF event aggregator -// @Tags health -// @Accept json -// @Produce json -// @Success 200 {object} map[string]interface{} "Health status" -// @Failure 503 {object} map[string]string "Service unavailable" -// @Router /health [get] -func (a *Aggregator) HandleHealth(w http.ResponseWriter, r *http.Request) { - if r.Method != http.MethodGet { - http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) - return - } - - a.mu.RLock() - running := a.running - a.mu.RUnlock() - - if !running { - http.Error(w, "Aggregator not running", http.StatusServiceUnavailable) - return - } - - // Get current stats - a.stats.mu.RLock() - totalEvents := a.stats.TotalEvents - eventsByType := make(map[string]int64) - for k, v := range a.stats.EventsByType { - eventsByType[k] = v - } - eventsByNode := make(map[string]int64) - for k, v := range a.stats.EventsByNode { - eventsByNode[k] = v - } - lastEventTime := a.stats.LastEventTime - startTime := a.stats.StartTime - a.stats.mu.RUnlock() - - health := map[string]interface{}{ - "status": "healthy", - "running": running, - "uptime": time.Since(startTime).String(), - "total_events": totalEvents, - "events_by_type": eventsByType, - "events_by_node": eventsByNode, - "last_event_time": lastEventTime, - "timestamp": time.Now().Format(time.RFC3339), - } - - w.Header().Set("Content-Type", "application/json") - if err := json.NewEncoder(w).Encode(health); err != nil { - logger.Errorf("Failed to encode health response: %v", err) - http.Error(w, "Internal server error", http.StatusInternalServerError) - } -} - // HandleEvents handles HTTP requests for querying aggregated events. // // @Summary Query aggregated events From f53048201e2e6ab3bc762292b73e5a48118a97e6 Mon Sep 17 00:00:00 2001 From: Simone Rodigari Date: Wed, 13 Aug 2025 07:53:41 +0100 Subject: [PATCH 04/11] update programs handler --- internal/aggregator/aggregator.go | 105 ++++++++++++++++++++++++++++-- 1 file changed, 98 insertions(+), 7 deletions(-) diff --git a/internal/aggregator/aggregator.go b/internal/aggregator/aggregator.go index 69c2099..354f3b7 100644 --- a/internal/aggregator/aggregator.go +++ b/internal/aggregator/aggregator.go @@ -249,14 +249,105 @@ func (a *Aggregator) HandlePrograms(w http.ResponseWriter, r *http.Request) { return } - // The aggregator doesn't directly manage eBPF programs - // In a real implementation, this could query connected agents for their program status + // Query recent events to infer connected agents and their programs + query := core.Query{ + Limit: 1000, // Get a good sample of recent events + Since: time.Now().Add(-10 * time.Minute), // Last 10 minutes + } + + events, err := a.storage.Query(r.Context(), query) + if err != nil { + logger.Errorf("Failed to query events for program info: %v", err) + http.Error(w, "Internal server error", http.StatusInternalServerError) + return + } + + // Aggregate information about connected agents and their programs + agents := make(map[string]map[string]interface{}) // node_name -> agent info + eventTypes := make(map[string]bool) // unique event types (indicate programs) + + for _, event := range events { + metadata := event.Metadata() + + // Extract agent information + nodeName, hasNode := metadata["k8s_node_name"].(string) + podName, _ := metadata["k8s_pod_name"].(string) + namespace, _ := metadata["k8s_namespace"].(string) + + if hasNode && nodeName != "" { + if agents[nodeName] == nil { + agents[nodeName] = map[string]interface{}{ + "node_name": nodeName, + "pod_name": podName, + "namespace": namespace, + "event_types": make(map[string]bool), + "last_seen": event.Time(), + "event_count": 0, + } + } + + // Update agent info + agent := agents[nodeName] + eventTypesMap := agent["event_types"].(map[string]bool) + eventTypesMap[event.Type()] = true + agent["event_types"] = eventTypesMap + agent["event_count"] = agent["event_count"].(int) + 1 + + // Update last seen if this event is more recent + if event.Time().After(agent["last_seen"].(time.Time)) { + agent["last_seen"] = event.Time() + } + } + + // Track unique event types across all agents + eventTypes[event.Type()] = true + } + + // Convert agents map to slice and format programs + var connectedAgents []map[string]interface{} + var allPrograms []map[string]interface{} + + for nodeName, agentInfo := range agents { + eventTypesMap := agentInfo["event_types"].(map[string]bool) + var programs []string + for eventType := range eventTypesMap { + programs = append(programs, eventType) + } + + agentData := map[string]interface{}{ + "node_name": nodeName, + "pod_name": agentInfo["pod_name"], + "namespace": agentInfo["namespace"], + "programs": programs, + "last_seen": agentInfo["last_seen"].(time.Time).Format(time.RFC3339), + "event_count": agentInfo["event_count"], + } + connectedAgents = append(connectedAgents, agentData) + + // Add programs to the global list + for _, program := range programs { + allPrograms = append(allPrograms, map[string]interface{}{ + "program_type": program, + "node_name": nodeName, + "status": "active", // Inferred from recent events + }) + } + } + + // Get unique program types + var uniquePrograms []string + for eventType := range eventTypes { + uniquePrograms = append(uniquePrograms, eventType) + } + response := map[string]interface{}{ - "message": "Aggregator does not run eBPF programs directly", - "description": "This endpoint shows program status from connected agents", - "connected_agents": 0, // This would be populated based on active agent connections - "programs": []interface{}{}, // This would contain programs from all connected agents - "query_time": time.Now().Format(time.RFC3339), + "connected_agents": len(connectedAgents), + "unique_programs": uniquePrograms, + "agents": connectedAgents, + "all_programs": allPrograms, + "total_events_analyzed": len(events), + "query_time": time.Now().Format(time.RFC3339), + "description": "Program information inferred from events received from connected agents", } w.Header().Set("Content-Type", "application/json") From 0849191afc4d5683758bad1896412623ee2ed791 Mon Sep 17 00:00:00 2001 From: Simone Rodigari Date: Wed, 13 Aug 2025 08:17:13 +0100 Subject: [PATCH 05/11] update response types and swagger doc --- docs/swagger-aggregator/docs.go | 152 +++++++++++++++- docs/swagger-aggregator/swagger.json | 152 +++++++++++++++- docs/swagger-aggregator/swagger.yaml | 118 ++++++++++++- docs/swagger/docs.go | 250 ++++++++++++++++++++++++++- docs/swagger/swagger.json | 250 ++++++++++++++++++++++++++- docs/swagger/swagger.yaml | 181 ++++++++++++++++++- internal/aggregator/aggregator.go | 91 +++++++++- internal/api/handlers.go | 130 ++++++++++++-- 8 files changed, 1261 insertions(+), 63 deletions(-) diff --git a/docs/swagger-aggregator/docs.go b/docs/swagger-aggregator/docs.go index 72f782b..438ca16 100644 --- a/docs/swagger-aggregator/docs.go +++ b/docs/swagger-aggregator/docs.go @@ -11,7 +11,6 @@ const docTemplate = `{ "title": "{{.Title}}", "contact": { "name": "API Support", - "url": "https://github.com/srodi/ebpf-server/issues", "email": "support@example.com" }, "license": { @@ -232,8 +231,7 @@ const docTemplate = `{ "200": { "description": "Filtered events", "schema": { - "type": "object", - "additionalProperties": true + "$ref": "#/definitions/api.EventsResponse" } }, "500": { @@ -563,8 +561,7 @@ const docTemplate = `{ "200": { "description": "List of eBPF programs", "schema": { - "type": "object", - "additionalProperties": true + "$ref": "#/definitions/api.ProgramsResponse" } }, "500": { @@ -635,8 +632,7 @@ const docTemplate = `{ "200": { "description": "Health status", "schema": { - "type": "object", - "additionalProperties": true + "$ref": "#/definitions/api.HealthResponse" } }, "503": { @@ -731,6 +727,99 @@ const docTemplate = `{ } } }, + "api.EventFilters": { + "type": "object", + "properties": { + "command": { + "description": "Command filter", + "type": "string", + "example": "curl" + }, + "limit": { + "description": "Limit filter", + "type": "integer", + "example": 100 + }, + "pid": { + "description": "Process ID filter", + "type": "integer", + "example": 1234 + }, + "since": { + "description": "Start time filter", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "type": { + "description": "Event type filter", + "type": "string", + "example": "connection" + }, + "until": { + "description": "End time filter", + "type": "string", + "example": "2023-01-01T13:00:00Z" + } + } + }, + "api.EventsResponse": { + "type": "object", + "properties": { + "count": { + "description": "Number of events returned", + "type": "integer", + "example": 25 + }, + "events": { + "description": "List of events", + "type": "array", + "items": {} + }, + "filters": { + "description": "Applied filters", + "allOf": [ + { + "$ref": "#/definitions/api.EventFilters" + } + ] + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "total_count": { + "description": "Total number of matching events", + "type": "integer", + "example": 150 + } + } + }, + "api.HealthResponse": { + "type": "object", + "properties": { + "component": { + "description": "Component name", + "type": "string", + "example": "eBPF Monitor API" + }, + "status": { + "description": "Service status", + "type": "string", + "example": "healthy" + }, + "uptime": { + "description": "Service uptime", + "type": "string", + "example": "1h30m" + }, + "version": { + "description": "API version", + "type": "string", + "example": "1.0.0" + } + } + }, "api.PacketDropListResponse": { "type": "object", "properties": { @@ -809,6 +898,53 @@ const docTemplate = `{ } } }, + "api.ProgramInfo": { + "type": "object", + "properties": { + "id": { + "description": "Program ID", + "type": "integer", + "example": 123 + }, + "name": { + "description": "Program name", + "type": "string", + "example": "connection_tracer" + }, + "status": { + "description": "Program status", + "type": "string", + "example": "loaded" + }, + "type": { + "description": "Program type", + "type": "string", + "example": "kprobe" + } + } + }, + "api.ProgramsResponse": { + "type": "object", + "properties": { + "programs": { + "description": "List of eBPF programs", + "type": "array", + "items": { + "$ref": "#/definitions/api.ProgramInfo" + } + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "total_count": { + "description": "Total number of programs", + "type": "integer", + "example": 2 + } + } + }, "internal_aggregator.HealthCheck": { "type": "object", "properties": { @@ -837,7 +973,7 @@ var SwaggerInfo = &swag.Spec{ BasePath: "/", Schemes: []string{}, Title: "eBPF Event Aggregator API", - Description: "HTTP API for aggregating and querying eBPF events from multiple agents", + Description: "HTTP API for aggregating and querying eBPF events from multiple //\t@Success\t\t200\t\t\t{object}\tAggregatedE//\t@Success\t\t200\t\t{object}\tIngestResponse\t\t\t\t\"Ingest//\t@Success\t\t200\t{object}\tAggrega//\t@Success\t\t200\t{object}\tAggregatorProgramsResponse\t\"Program information\"ionStatsResponse\t\"Aggregation statistics\"on result\"entsResponse\t\"Events and count\"gents", InfoInstanceName: "swagger", SwaggerTemplate: docTemplate, LeftDelim: "{{", diff --git a/docs/swagger-aggregator/swagger.json b/docs/swagger-aggregator/swagger.json index 47b9556..8f01fcd 100644 --- a/docs/swagger-aggregator/swagger.json +++ b/docs/swagger-aggregator/swagger.json @@ -1,11 +1,10 @@ { "swagger": "2.0", "info": { - "description": "HTTP API for aggregating and querying eBPF events from multiple agents", + "description": "HTTP API for aggregating and querying eBPF events from multiple //\t@Success\t\t200\t\t\t{object}\tAggregatedE//\t@Success\t\t200\t\t{object}\tIngestResponse\t\t\t\t\"Ingest//\t@Success\t\t200\t{object}\tAggrega//\t@Success\t\t200\t{object}\tAggregatorProgramsResponse\t\"Program information\"ionStatsResponse\t\"Aggregation statistics\"on result\"entsResponse\t\"Events and count\"gents", "title": "eBPF Event Aggregator API", "contact": { "name": "API Support", - "url": "https://github.com/srodi/ebpf-server/issues", "email": "support@example.com" }, "license": { @@ -226,8 +225,7 @@ "200": { "description": "Filtered events", "schema": { - "type": "object", - "additionalProperties": true + "$ref": "#/definitions/api.EventsResponse" } }, "500": { @@ -557,8 +555,7 @@ "200": { "description": "List of eBPF programs", "schema": { - "type": "object", - "additionalProperties": true + "$ref": "#/definitions/api.ProgramsResponse" } }, "500": { @@ -629,8 +626,7 @@ "200": { "description": "Health status", "schema": { - "type": "object", - "additionalProperties": true + "$ref": "#/definitions/api.HealthResponse" } }, "503": { @@ -725,6 +721,99 @@ } } }, + "api.EventFilters": { + "type": "object", + "properties": { + "command": { + "description": "Command filter", + "type": "string", + "example": "curl" + }, + "limit": { + "description": "Limit filter", + "type": "integer", + "example": 100 + }, + "pid": { + "description": "Process ID filter", + "type": "integer", + "example": 1234 + }, + "since": { + "description": "Start time filter", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "type": { + "description": "Event type filter", + "type": "string", + "example": "connection" + }, + "until": { + "description": "End time filter", + "type": "string", + "example": "2023-01-01T13:00:00Z" + } + } + }, + "api.EventsResponse": { + "type": "object", + "properties": { + "count": { + "description": "Number of events returned", + "type": "integer", + "example": 25 + }, + "events": { + "description": "List of events", + "type": "array", + "items": {} + }, + "filters": { + "description": "Applied filters", + "allOf": [ + { + "$ref": "#/definitions/api.EventFilters" + } + ] + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "total_count": { + "description": "Total number of matching events", + "type": "integer", + "example": 150 + } + } + }, + "api.HealthResponse": { + "type": "object", + "properties": { + "component": { + "description": "Component name", + "type": "string", + "example": "eBPF Monitor API" + }, + "status": { + "description": "Service status", + "type": "string", + "example": "healthy" + }, + "uptime": { + "description": "Service uptime", + "type": "string", + "example": "1h30m" + }, + "version": { + "description": "API version", + "type": "string", + "example": "1.0.0" + } + } + }, "api.PacketDropListResponse": { "type": "object", "properties": { @@ -803,6 +892,53 @@ } } }, + "api.ProgramInfo": { + "type": "object", + "properties": { + "id": { + "description": "Program ID", + "type": "integer", + "example": 123 + }, + "name": { + "description": "Program name", + "type": "string", + "example": "connection_tracer" + }, + "status": { + "description": "Program status", + "type": "string", + "example": "loaded" + }, + "type": { + "description": "Program type", + "type": "string", + "example": "kprobe" + } + } + }, + "api.ProgramsResponse": { + "type": "object", + "properties": { + "programs": { + "description": "List of eBPF programs", + "type": "array", + "items": { + "$ref": "#/definitions/api.ProgramInfo" + } + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "total_count": { + "description": "Total number of programs", + "type": "integer", + "example": 2 + } + } + }, "internal_aggregator.HealthCheck": { "type": "object", "properties": { diff --git a/docs/swagger-aggregator/swagger.yaml b/docs/swagger-aggregator/swagger.yaml index 6168b51..f5ff4ef 100644 --- a/docs/swagger-aggregator/swagger.yaml +++ b/docs/swagger-aggregator/swagger.yaml @@ -59,6 +59,75 @@ definitions: example: "2023-01-01T12:00:00Z" type: string type: object + api.EventFilters: + properties: + command: + description: Command filter + example: curl + type: string + limit: + description: Limit filter + example: 100 + type: integer + pid: + description: Process ID filter + example: 1234 + type: integer + since: + description: Start time filter + example: "2023-01-01T12:00:00Z" + type: string + type: + description: Event type filter + example: connection + type: string + until: + description: End time filter + example: "2023-01-01T13:00:00Z" + type: string + type: object + api.EventsResponse: + properties: + count: + description: Number of events returned + example: 25 + type: integer + events: + description: List of events + items: {} + type: array + filters: + allOf: + - $ref: '#/definitions/api.EventFilters' + description: Applied filters + query_time: + description: Query timestamp + example: "2023-01-01T12:00:00Z" + type: string + total_count: + description: Total number of matching events + example: 150 + type: integer + type: object + api.HealthResponse: + properties: + component: + description: Component name + example: eBPF Monitor API + type: string + status: + description: Service status + example: healthy + type: string + uptime: + description: Service uptime + example: 1h30m + type: string + version: + description: API version + example: 1.0.0 + type: string + type: object api.PacketDropListResponse: properties: events_by_pid: @@ -118,6 +187,41 @@ definitions: example: "2023-01-01T12:00:00Z" type: string type: object + api.ProgramInfo: + properties: + id: + description: Program ID + example: 123 + type: integer + name: + description: Program name + example: connection_tracer + type: string + status: + description: Program status + example: loaded + type: string + type: + description: Program type + example: kprobe + type: string + type: object + api.ProgramsResponse: + properties: + programs: + description: List of eBPF programs + items: + $ref: '#/definitions/api.ProgramInfo' + type: array + query_time: + description: Query timestamp + example: "2023-01-01T12:00:00Z" + type: string + total_count: + description: Total number of programs + example: 2 + type: integer + type: object internal_aggregator.HealthCheck: properties: component: @@ -135,8 +239,9 @@ info: contact: email: support@example.com name: API Support - url: https://github.com/srodi/ebpf-server/issues - description: HTTP API for aggregating and querying eBPF events from multiple agents + description: "HTTP API for aggregating and querying eBPF events from multiple //\t@Success\t\t200\t\t\t{object}\tAggregatedE//\t@Success\t\t200\t\t{object}\tIngestResponse\t\t\t\t\"Ingest//\t@Success\t\t200\t{object}\tAggrega//\t@Success\t\t200\t{object}\tAggregatorProgramsResponse\t\"Program + information\"ionStatsResponse\t\"Aggregation statistics\"on result\"entsResponse\t\"Events + and count\"gents" license: name: MIT url: https://github.com/srodi/ebpf-server/blob/main/LICENSE @@ -282,8 +387,7 @@ paths: "200": description: Filtered events schema: - additionalProperties: true - type: object + $ref: '#/definitions/api.EventsResponse' "500": description: Internal server error schema: @@ -502,8 +606,7 @@ paths: "200": description: List of eBPF programs schema: - additionalProperties: true - type: object + $ref: '#/definitions/api.ProgramsResponse' "500": description: Internal server error schema: @@ -551,8 +654,7 @@ paths: "200": description: Health status schema: - additionalProperties: true - type: object + $ref: '#/definitions/api.HealthResponse' "503": description: Service unavailable schema: diff --git a/docs/swagger/docs.go b/docs/swagger/docs.go index d5e136a..5dda5a6 100644 --- a/docs/swagger/docs.go +++ b/docs/swagger/docs.go @@ -232,8 +232,7 @@ const docTemplate = `{ "200": { "description": "Filtered events", "schema": { - "type": "object", - "additionalProperties": true + "$ref": "#/definitions/internal_api.EventsResponse" } }, "500": { @@ -257,6 +256,59 @@ const docTemplate = `{ } } }, + "/api/events/ingest": { + "post": { + "description": "Accept events from eBPF agents for aggregation and storage", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "events" + ], + "summary": "Ingest events from agents", + "parameters": [ + { + "description": "Events to ingest", + "name": "events", + "in": "body", + "required": true, + "schema": { + "type": "object" + } + } + ], + "responses": { + "200": { + "description": "Ingestion result", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Bad request", + "schema": { + "type": "string" + } + }, + "405": { + "description": "Method not allowed", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal server error", + "schema": { + "type": "string" + } + } + } + } + }, "/api/list-connections": { "get": { "description": "Get recent connection events grouped by PID", @@ -510,8 +562,7 @@ const docTemplate = `{ "200": { "description": "List of eBPF programs", "schema": { - "type": "object", - "additionalProperties": true + "$ref": "#/definitions/internal_api.ProgramsResponse" } }, "500": { @@ -535,6 +586,36 @@ const docTemplate = `{ } } }, + "/api/stats": { + "get": { + "description": "Retrieve statistics about event aggregation including counts by type and node", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "stats" + ], + "summary": "Get aggregation statistics", + "responses": { + "200": { + "description": "Aggregation statistics", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "405": { + "description": "Method not allowed", + "schema": { + "type": "string" + } + } + } + } + }, "/health": { "get": { "description": "Get the health status of the eBPF monitoring system", @@ -552,8 +633,7 @@ const docTemplate = `{ "200": { "description": "Health status", "schema": { - "type": "object", - "additionalProperties": true + "$ref": "#/definitions/internal_api.HealthResponse" } }, "503": { @@ -570,6 +650,24 @@ const docTemplate = `{ } }, "definitions": { + "aggregator.HealthCheck": { + "type": "object", + "properties": { + "component": { + "type": "string" + }, + "stats": { + "type": "object", + "additionalProperties": true + }, + "status": { + "type": "string" + }, + "uptime": { + "type": "string" + } + } + }, "internal_api.ConnectionListResponse": { "type": "object", "properties": { @@ -648,6 +746,99 @@ const docTemplate = `{ } } }, + "internal_api.EventFilters": { + "type": "object", + "properties": { + "command": { + "description": "Command filter", + "type": "string", + "example": "curl" + }, + "limit": { + "description": "Limit filter", + "type": "integer", + "example": 100 + }, + "pid": { + "description": "Process ID filter", + "type": "integer", + "example": 1234 + }, + "since": { + "description": "Start time filter", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "type": { + "description": "Event type filter", + "type": "string", + "example": "connection" + }, + "until": { + "description": "End time filter", + "type": "string", + "example": "2023-01-01T13:00:00Z" + } + } + }, + "internal_api.EventsResponse": { + "type": "object", + "properties": { + "count": { + "description": "Number of events returned", + "type": "integer", + "example": 25 + }, + "events": { + "description": "List of events", + "type": "array", + "items": {} + }, + "filters": { + "description": "Applied filters", + "allOf": [ + { + "$ref": "#/definitions/internal_api.EventFilters" + } + ] + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "total_count": { + "description": "Total number of matching events", + "type": "integer", + "example": 150 + } + } + }, + "internal_api.HealthResponse": { + "type": "object", + "properties": { + "component": { + "description": "Component name", + "type": "string", + "example": "eBPF Monitor API" + }, + "status": { + "description": "Service status", + "type": "string", + "example": "healthy" + }, + "uptime": { + "description": "Service uptime", + "type": "string", + "example": "1h30m" + }, + "version": { + "description": "API version", + "type": "string", + "example": "1.0.0" + } + } + }, "internal_api.PacketDropListResponse": { "type": "object", "properties": { @@ -725,6 +916,53 @@ const docTemplate = `{ "example": "2023-01-01T12:00:00Z" } } + }, + "internal_api.ProgramInfo": { + "type": "object", + "properties": { + "id": { + "description": "Program ID", + "type": "integer", + "example": 123 + }, + "name": { + "description": "Program name", + "type": "string", + "example": "connection_tracer" + }, + "status": { + "description": "Program status", + "type": "string", + "example": "loaded" + }, + "type": { + "description": "Program type", + "type": "string", + "example": "kprobe" + } + } + }, + "internal_api.ProgramsResponse": { + "type": "object", + "properties": { + "programs": { + "description": "List of eBPF programs", + "type": "array", + "items": { + "$ref": "#/definitions/internal_api.ProgramInfo" + } + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "total_count": { + "description": "Total number of programs", + "type": "integer", + "example": 2 + } + } } } }` diff --git a/docs/swagger/swagger.json b/docs/swagger/swagger.json index 8365979..64b7d3d 100644 --- a/docs/swagger/swagger.json +++ b/docs/swagger/swagger.json @@ -226,8 +226,7 @@ "200": { "description": "Filtered events", "schema": { - "type": "object", - "additionalProperties": true + "$ref": "#/definitions/internal_api.EventsResponse" } }, "500": { @@ -251,6 +250,59 @@ } } }, + "/api/events/ingest": { + "post": { + "description": "Accept events from eBPF agents for aggregation and storage", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "events" + ], + "summary": "Ingest events from agents", + "parameters": [ + { + "description": "Events to ingest", + "name": "events", + "in": "body", + "required": true, + "schema": { + "type": "object" + } + } + ], + "responses": { + "200": { + "description": "Ingestion result", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Bad request", + "schema": { + "type": "string" + } + }, + "405": { + "description": "Method not allowed", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal server error", + "schema": { + "type": "string" + } + } + } + } + }, "/api/list-connections": { "get": { "description": "Get recent connection events grouped by PID", @@ -504,8 +556,7 @@ "200": { "description": "List of eBPF programs", "schema": { - "type": "object", - "additionalProperties": true + "$ref": "#/definitions/internal_api.ProgramsResponse" } }, "500": { @@ -529,6 +580,36 @@ } } }, + "/api/stats": { + "get": { + "description": "Retrieve statistics about event aggregation including counts by type and node", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "stats" + ], + "summary": "Get aggregation statistics", + "responses": { + "200": { + "description": "Aggregation statistics", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "405": { + "description": "Method not allowed", + "schema": { + "type": "string" + } + } + } + } + }, "/health": { "get": { "description": "Get the health status of the eBPF monitoring system", @@ -546,8 +627,7 @@ "200": { "description": "Health status", "schema": { - "type": "object", - "additionalProperties": true + "$ref": "#/definitions/internal_api.HealthResponse" } }, "503": { @@ -564,6 +644,24 @@ } }, "definitions": { + "aggregator.HealthCheck": { + "type": "object", + "properties": { + "component": { + "type": "string" + }, + "stats": { + "type": "object", + "additionalProperties": true + }, + "status": { + "type": "string" + }, + "uptime": { + "type": "string" + } + } + }, "internal_api.ConnectionListResponse": { "type": "object", "properties": { @@ -642,6 +740,99 @@ } } }, + "internal_api.EventFilters": { + "type": "object", + "properties": { + "command": { + "description": "Command filter", + "type": "string", + "example": "curl" + }, + "limit": { + "description": "Limit filter", + "type": "integer", + "example": 100 + }, + "pid": { + "description": "Process ID filter", + "type": "integer", + "example": 1234 + }, + "since": { + "description": "Start time filter", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "type": { + "description": "Event type filter", + "type": "string", + "example": "connection" + }, + "until": { + "description": "End time filter", + "type": "string", + "example": "2023-01-01T13:00:00Z" + } + } + }, + "internal_api.EventsResponse": { + "type": "object", + "properties": { + "count": { + "description": "Number of events returned", + "type": "integer", + "example": 25 + }, + "events": { + "description": "List of events", + "type": "array", + "items": {} + }, + "filters": { + "description": "Applied filters", + "allOf": [ + { + "$ref": "#/definitions/internal_api.EventFilters" + } + ] + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "total_count": { + "description": "Total number of matching events", + "type": "integer", + "example": 150 + } + } + }, + "internal_api.HealthResponse": { + "type": "object", + "properties": { + "component": { + "description": "Component name", + "type": "string", + "example": "eBPF Monitor API" + }, + "status": { + "description": "Service status", + "type": "string", + "example": "healthy" + }, + "uptime": { + "description": "Service uptime", + "type": "string", + "example": "1h30m" + }, + "version": { + "description": "API version", + "type": "string", + "example": "1.0.0" + } + } + }, "internal_api.PacketDropListResponse": { "type": "object", "properties": { @@ -719,6 +910,53 @@ "example": "2023-01-01T12:00:00Z" } } + }, + "internal_api.ProgramInfo": { + "type": "object", + "properties": { + "id": { + "description": "Program ID", + "type": "integer", + "example": 123 + }, + "name": { + "description": "Program name", + "type": "string", + "example": "connection_tracer" + }, + "status": { + "description": "Program status", + "type": "string", + "example": "loaded" + }, + "type": { + "description": "Program type", + "type": "string", + "example": "kprobe" + } + } + }, + "internal_api.ProgramsResponse": { + "type": "object", + "properties": { + "programs": { + "description": "List of eBPF programs", + "type": "array", + "items": { + "$ref": "#/definitions/internal_api.ProgramInfo" + } + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "total_count": { + "description": "Total number of programs", + "type": "integer", + "example": 2 + } + } } } } \ No newline at end of file diff --git a/docs/swagger/swagger.yaml b/docs/swagger/swagger.yaml index 2e84124..6da84b1 100644 --- a/docs/swagger/swagger.yaml +++ b/docs/swagger/swagger.yaml @@ -1,5 +1,17 @@ basePath: / definitions: + aggregator.HealthCheck: + properties: + component: + type: string + stats: + additionalProperties: true + type: object + status: + type: string + uptime: + type: string + type: object internal_api.ConnectionListResponse: properties: events_by_pid: @@ -59,6 +71,75 @@ definitions: example: "2023-01-01T12:00:00Z" type: string type: object + internal_api.EventFilters: + properties: + command: + description: Command filter + example: curl + type: string + limit: + description: Limit filter + example: 100 + type: integer + pid: + description: Process ID filter + example: 1234 + type: integer + since: + description: Start time filter + example: "2023-01-01T12:00:00Z" + type: string + type: + description: Event type filter + example: connection + type: string + until: + description: End time filter + example: "2023-01-01T13:00:00Z" + type: string + type: object + internal_api.EventsResponse: + properties: + count: + description: Number of events returned + example: 25 + type: integer + events: + description: List of events + items: {} + type: array + filters: + allOf: + - $ref: '#/definitions/internal_api.EventFilters' + description: Applied filters + query_time: + description: Query timestamp + example: "2023-01-01T12:00:00Z" + type: string + total_count: + description: Total number of matching events + example: 150 + type: integer + type: object + internal_api.HealthResponse: + properties: + component: + description: Component name + example: eBPF Monitor API + type: string + status: + description: Service status + example: healthy + type: string + uptime: + description: Service uptime + example: 1h30m + type: string + version: + description: API version + example: 1.0.0 + type: string + type: object internal_api.PacketDropListResponse: properties: events_by_pid: @@ -118,6 +199,41 @@ definitions: example: "2023-01-01T12:00:00Z" type: string type: object + internal_api.ProgramInfo: + properties: + id: + description: Program ID + example: 123 + type: integer + name: + description: Program name + example: connection_tracer + type: string + status: + description: Program status + example: loaded + type: string + type: + description: Program type + example: kprobe + type: string + type: object + internal_api.ProgramsResponse: + properties: + programs: + description: List of eBPF programs + items: + $ref: '#/definitions/internal_api.ProgramInfo' + type: array + query_time: + description: Query timestamp + example: "2023-01-01T12:00:00Z" + type: string + total_count: + description: Total number of programs + example: 2 + type: integer + type: object host: localhost:8080 info: contact: @@ -270,8 +386,7 @@ paths: "200": description: Filtered events schema: - additionalProperties: true - type: object + $ref: '#/definitions/internal_api.EventsResponse' "500": description: Internal server error schema: @@ -287,6 +402,41 @@ paths: summary: Query events tags: - events + /api/events/ingest: + post: + consumes: + - application/json + description: Accept events from eBPF agents for aggregation and storage + parameters: + - description: Events to ingest + in: body + name: events + required: true + schema: + type: object + produces: + - application/json + responses: + "200": + description: Ingestion result + schema: + additionalProperties: true + type: object + "400": + description: Bad request + schema: + type: string + "405": + description: Method not allowed + schema: + type: string + "500": + description: Internal server error + schema: + type: string + summary: Ingest events from agents + tags: + - events /api/list-connections: get: consumes: @@ -455,8 +605,7 @@ paths: "200": description: List of eBPF programs schema: - additionalProperties: true - type: object + $ref: '#/definitions/internal_api.ProgramsResponse' "500": description: Internal server error schema: @@ -472,6 +621,27 @@ paths: summary: List eBPF programs tags: - programs + /api/stats: + get: + consumes: + - application/json + description: Retrieve statistics about event aggregation including counts by + type and node + produces: + - application/json + responses: + "200": + description: Aggregation statistics + schema: + additionalProperties: true + type: object + "405": + description: Method not allowed + schema: + type: string + summary: Get aggregation statistics + tags: + - stats /health: get: consumes: @@ -483,8 +653,7 @@ paths: "200": description: Health status schema: - additionalProperties: true - type: object + $ref: '#/definitions/internal_api.HealthResponse' "503": description: Service unavailable schema: diff --git a/internal/aggregator/aggregator.go b/internal/aggregator/aggregator.go index 354f3b7..c75a5e6 100644 --- a/internal/aggregator/aggregator.go +++ b/internal/aggregator/aggregator.go @@ -1,12 +1,12 @@ // Package aggregator provides event aggregation functionality for eBPF monitoring. // // @title eBPF Event Aggregator API -// @description HTTP API for aggregating and querying eBPF events from multiple agents +// @description HTTP API for aggregating and querying eBPF events from multiple // @Success 200 {object} AggregatedE// @Success 200 {object} IngestResponse "Ingest// @Success 200 {object} Aggrega// @Success 200 {object} AggregatorProgramsResponse "Program information"ionStatsResponse "Aggregation statistics"on result"entsResponse "Events and count"gents // @version 1.0.0 // @host localhost:8081 // @BasePath / // @contact.name API Support -// @contact.url https://github.com/srodi/ebpf-server/issues +// @contact.url// @Success 200 {object} AggregatedListResponse "// @Success 200 {object} AggregatedListResponse "Pack// @Success 200 {object} AggregatedSummaryResponse "Connection statistics"t// @Success 200 {object} AggregatedSummaryResponse "Packet drop statistics"drop events"onnection events"https://github.com/srodi/ebpf-server/issues // @contact.email support@example.com // @license.name MIT // @license.url https://github.com/srodi/ebpf-server/blob/main/LICENSE @@ -26,6 +26,93 @@ import ( "github.com/srodi/ebpf-server/pkg/logger" ) +// Response types for aggregator API endpoints + +// AggregatedEventsResponse represents the response for querying aggregated events +type AggregatedEventsResponse struct { + Events []core.Event `json:"events"` // List of aggregated events + Count int `json:"count" example:"50"` // Number of events returned + TotalCount int `json:"total_count" example:"1250"` // Total number of matching events + QueryTime string `json:"query_time" example:"2023-01-01T12:00:00Z"` // Query timestamp + Filters AggregatedEventFilters `json:"filters"` // Applied filters +} + +// AggregatedEventFilters represents the filters applied to aggregated event queries +type AggregatedEventFilters struct { + Type string `json:"type,omitempty" example:"connection"` // Event type filter + Node string `json:"node,omitempty" example:"worker-1"` // Node name filter + Since string `json:"since,omitempty" example:"2023-01-01T12:00:00Z"` // Start time filter + Until string `json:"until,omitempty" example:"2023-01-01T13:00:00Z"` // End time filter + Limit int `json:"limit,omitempty" example:"100"` // Limit filter +} + +// IngestResponse represents the response for event ingestion +type IngestResponse struct { + EventsProcessed int `json:"events_processed" example:"25"` // Number of events processed + Success bool `json:"success" example:"true"` // Ingestion success status + Message string `json:"message" example:"Events ingested successfully"` // Status message + Timestamp string `json:"timestamp" example:"2023-01-01T12:00:00Z"` // Processing timestamp +} + +// AggregationStatsResponse represents the response for aggregation statistics +type AggregationStatsResponse struct { + TotalEvents int64 `json:"total_events" example:"12500"` // Total events stored + EventsByType map[string]int64 `json:"events_by_type"` // Events grouped by type + EventsByNode map[string]int64 `json:"events_by_node"` // Events grouped by node + ConnectedAgents int `json:"connected_agents" example:"5"` // Number of connected agents + LastEventTime string `json:"last_event_time" example:"2023-01-01T12:00:00Z"` // Timestamp of last event + AggregationStart string `json:"aggregation_start" example:"2023-01-01T10:00:00Z"` // When aggregation started + QueryTime string `json:"query_time" example:"2023-01-01T12:00:00Z"` // Query timestamp +} + +// AggregatorProgramsResponse represents the response for aggregator programs information +type AggregatorProgramsResponse struct { + ConnectedAgents []AgentInfo `json:"connected_agents"` // List of connected agents + AllPrograms []ProgramInfo `json:"all_programs"` // All programs across agents + TotalAgents int `json:"total_agents" example:"3"` // Total number of agents + TotalPrograms int `json:"total_programs" example:"6"` // Total number of programs + QueryTime string `json:"query_time" example:"2023-01-01T12:00:00Z"` // Query timestamp +} + +// AgentInfo represents information about a connected agent +type AgentInfo struct { + NodeName string `json:"node_name" example:"worker-1"` // Node name + LastSeen string `json:"last_seen" example:"2023-01-01T12:00:00Z"` // Last seen timestamp + EventCount int64 `json:"event_count" example:"2500"` // Number of events from this agent + Programs []ProgramInfo `json:"programs"` // Programs running on this agent + Status string `json:"status" example:"active"` // Agent status +} + +// ProgramInfo represents information about an eBPF program +type ProgramInfo struct { + Name string `json:"name" example:"connection_tracer"` // Program name + Type string `json:"type" example:"kprobe"` // Program type + Status string `json:"status" example:"active"` // Program status + Node string `json:"node" example:"worker-1"` // Node where program is running + EventCount int64 `json:"event_count" example:"1250"` // Events generated by this program +} + +// AggregatedListResponse represents the response for listing aggregated connection/packet drop events +type AggregatedListResponse struct { + TotalPIDs int `json:"total_pids" example:"8"` // Number of unique PIDs across all nodes + TotalEvents int `json:"total_events" example:"45"` // Total number of events + TotalNodes int `json:"total_nodes" example:"3"` // Number of nodes with events + EventsByPID map[uint32][]core.Event `json:"events_by_pid"` // Events grouped by PID + EventsByNode map[string]int `json:"events_by_node"` // Event count by node + QueryTime string `json:"query_time" example:"2023-01-01T12:00:00Z"` // Query timestamp +} + +// AggregatedSummaryResponse represents the response for aggregated connection/packet drop summaries +type AggregatedSummaryResponse struct { + Count int `json:"count" example:"15"` // Total count across all nodes + CountByNode map[string]int `json:"count_by_node"` // Count by node + PID uint32 `json:"pid,omitempty" example:"1234"` // Process ID (if filtered) + Command string `json:"command,omitempty" example:"curl"` // Command name (if filtered) + DurationSeconds int `json:"duration_seconds" example:"60"` // Duration in seconds + TotalNodes int `json:"total_nodes" example:"3"` // Number of nodes with events + QueryTime string `json:"query_time" example:"2023-01-01T12:00:00Z"` // Query timestamp +} + // Config represents aggregator configuration. type Config struct { HTTPAddr string diff --git a/internal/api/handlers.go b/internal/api/handlers.go index f6c3029..1e5041a 100644 --- a/internal/api/handlers.go +++ b/internal/api/handlers.go @@ -39,25 +39,24 @@ func Initialize(sys *system.System) { // @Tags health // @Accept json // @Produce json -// @Success 200 {object} map[string]interface{} "Health status" -// @Failure 503 {object} map[string]string "Service unavailable" +// @Success 200 {object} HealthResponse "Health status" +// @Failure 503 {object} map[string]string "Service unavailable" // @Router /health [get] func HandleHealth(w http.ResponseWriter, r *http.Request) { - if globalSystem == nil { - http.Error(w, "System not initialized", http.StatusServiceUnavailable) - return - } - - health := map[string]interface{}{ - "status": "healthy", - "running": globalSystem.IsRunning(), - "time": time.Now().Format(time.RFC3339), + w.Header().Set("Content-Type", "application/json") + + health := HealthResponse{ + Status: "healthy", + Component: "eBPF Monitor API", + Uptime: "active", // Since we don't have access to start time, use a generic status + Version: "1.0.0", } - w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) if err := json.NewEncoder(w).Encode(health); err != nil { logger.Errorf("Error encoding health response: %v", err) http.Error(w, "Internal server error", http.StatusInternalServerError) + return } } @@ -68,7 +67,7 @@ func HandleHealth(w http.ResponseWriter, r *http.Request) { // @Tags programs // @Accept json // @Produce json -// @Success 200 {object} map[string]interface{} "List of eBPF programs" +// @Success 200 {object} ProgramsResponse "List of eBPF programs" // @Failure 500 {object} map[string]string "Internal server error" // @Failure 503 {object} map[string]string "Service unavailable" // @Router /api/programs [get] @@ -79,9 +78,36 @@ func HandlePrograms(w http.ResponseWriter, r *http.Request) { } programs := globalSystem.GetPrograms() + + // Convert to structured response + var programList []ProgramInfo + for _, prog := range programs { + status := "unknown" + if prog.Loaded && prog.Attached { + status = "active" + } else if prog.Loaded { + status = "loaded" + } else { + status = "inactive" + } + + programInfo := ProgramInfo{ + Name: prog.Name, + Type: "eBPF", // Generic type, could be enhanced + Status: status, + ID: int(prog.EventCount), // Use event count as an ID placeholder + } + programList = append(programList, programInfo) + } + + response := ProgramsResponse{ + Programs: programList, + TotalCount: len(programList), + QueryTime: time.Now().Format(time.RFC3339), + } w.Header().Set("Content-Type", "application/json") - if err := json.NewEncoder(w).Encode(programs); err != nil { + if err := json.NewEncoder(w).Encode(response); err != nil { logger.Errorf("Error encoding programs response: %v", err) http.Error(w, "Internal server error", http.StatusInternalServerError) } @@ -100,7 +126,7 @@ func HandlePrograms(w http.ResponseWriter, r *http.Request) { // @Param since query string false "Start time (RFC3339 format)" // @Param until query string false "End time (RFC3339 format)" // @Param limit query int false "Maximum number of events to return (default: 100)" -// @Success 200 {object} map[string]interface{} "Filtered events" +// @Success 200 {object} EventsResponse "Filtered events" // @Failure 500 {object} map[string]string "Internal server error" // @Failure 503 {object} map[string]string "Service unavailable" // @Router /api/events [get] @@ -158,10 +184,34 @@ func HandleEvents(w http.ResponseWriter, r *http.Request) { return } - response := map[string]interface{}{ - "events": events, - "count": len(events), - "query": query, + // Get total count for the same query without limit + totalQuery := query + totalQuery.Limit = 0 + totalCount, err := globalSystem.CountEvents(ctx, totalQuery) + if err != nil { + totalCount = len(events) // Fallback to returned count + } + + // Build filters struct for response + filters := EventFilters{ + Type: query.EventType, + PID: query.PID, + Command: query.Command, + Limit: query.Limit, + } + if !query.Since.IsZero() { + filters.Since = query.Since.Format(time.RFC3339) + } + if !query.Until.IsZero() { + filters.Until = query.Until.Format(time.RFC3339) + } + + response := EventsResponse{ + Events: events, + Count: len(events), + TotalCount: totalCount, + QueryTime: time.Now().Format(time.RFC3339), + Filters: filters, } w.Header().Set("Content-Type", "application/json") @@ -504,3 +554,45 @@ type PacketDropListResponse struct { EventsByPID map[uint32][]core.Event `json:"events_by_pid"` // Events grouped by PID QueryTime string `json:"query_time" example:"2023-01-01T12:00:00Z"` // Query timestamp } + +// HealthResponse represents the response for health check +type HealthResponse struct { + Status string `json:"status" example:"healthy"` // Service status + Component string `json:"component" example:"eBPF Monitor API"` // Component name + Uptime string `json:"uptime" example:"1h30m"` // Service uptime + Version string `json:"version" example:"1.0.0"` // API version +} + +// ProgramsResponse represents the response for listing eBPF programs +type ProgramsResponse struct { + Programs []ProgramInfo `json:"programs"` // List of eBPF programs + TotalCount int `json:"total_count" example:"2"` // Total number of programs + QueryTime string `json:"query_time" example:"2023-01-01T12:00:00Z"` // Query timestamp +} + +// ProgramInfo represents information about an eBPF program +type ProgramInfo struct { + Name string `json:"name" example:"connection_tracer"` // Program name + Type string `json:"type" example:"kprobe"` // Program type + Status string `json:"status" example:"loaded"` // Program status + ID int `json:"id" example:"123"` // Program ID +} + +// EventsResponse represents the response for querying events +type EventsResponse struct { + Events []core.Event `json:"events"` // List of events + Count int `json:"count" example:"25"` // Number of events returned + TotalCount int `json:"total_count" example:"150"` // Total number of matching events + QueryTime string `json:"query_time" example:"2023-01-01T12:00:00Z"` // Query timestamp + Filters EventFilters `json:"filters"` // Applied filters +} + +// EventFilters represents the filters applied to the event query +type EventFilters struct { + Type string `json:"type,omitempty" example:"connection"` // Event type filter + PID uint32 `json:"pid,omitempty" example:"1234"` // Process ID filter + Command string `json:"command,omitempty" example:"curl"` // Command filter + Since string `json:"since,omitempty" example:"2023-01-01T12:00:00Z"` // Start time filter + Until string `json:"until,omitempty" example:"2023-01-01T13:00:00Z"` // End time filter + Limit int `json:"limit,omitempty" example:"100"` // Limit filter +} From a3f1fadae91dfc158041796fd3ea613e6b897ed9 Mon Sep 17 00:00:00 2001 From: Simone Rodigari Date: Wed, 13 Aug 2025 08:17:58 +0100 Subject: [PATCH 06/11] run go fmt --- internal/aggregator/aggregator.go | 128 +++++++++--------- internal/aggregator/health.go | 6 +- internal/api/handlers.go | 36 ++--- internal/events/events.go | 10 +- .../events/kubernetes_integration_test.go | 4 +- internal/kubernetes/metadata.go | 18 +-- internal/kubernetes/metadata_test.go | 2 +- internal/storage/forwarding.go | 4 +- internal/system/system.go | 8 +- 9 files changed, 108 insertions(+), 108 deletions(-) diff --git a/internal/aggregator/aggregator.go b/internal/aggregator/aggregator.go index c75a5e6..d41477f 100644 --- a/internal/aggregator/aggregator.go +++ b/internal/aggregator/aggregator.go @@ -30,11 +30,11 @@ import ( // AggregatedEventsResponse represents the response for querying aggregated events type AggregatedEventsResponse struct { - Events []core.Event `json:"events"` // List of aggregated events - Count int `json:"count" example:"50"` // Number of events returned - TotalCount int `json:"total_count" example:"1250"` // Total number of matching events - QueryTime string `json:"query_time" example:"2023-01-01T12:00:00Z"` // Query timestamp - Filters AggregatedEventFilters `json:"filters"` // Applied filters + Events []core.Event `json:"events"` // List of aggregated events + Count int `json:"count" example:"50"` // Number of events returned + TotalCount int `json:"total_count" example:"1250"` // Total number of matching events + QueryTime string `json:"query_time" example:"2023-01-01T12:00:00Z"` // Query timestamp + Filters AggregatedEventFilters `json:"filters"` // Applied filters } // AggregatedEventFilters represents the filters applied to aggregated event queries @@ -48,69 +48,69 @@ type AggregatedEventFilters struct { // IngestResponse represents the response for event ingestion type IngestResponse struct { - EventsProcessed int `json:"events_processed" example:"25"` // Number of events processed - Success bool `json:"success" example:"true"` // Ingestion success status + EventsProcessed int `json:"events_processed" example:"25"` // Number of events processed + Success bool `json:"success" example:"true"` // Ingestion success status Message string `json:"message" example:"Events ingested successfully"` // Status message - Timestamp string `json:"timestamp" example:"2023-01-01T12:00:00Z"` // Processing timestamp + Timestamp string `json:"timestamp" example:"2023-01-01T12:00:00Z"` // Processing timestamp } // AggregationStatsResponse represents the response for aggregation statistics type AggregationStatsResponse struct { - TotalEvents int64 `json:"total_events" example:"12500"` // Total events stored - EventsByType map[string]int64 `json:"events_by_type"` // Events grouped by type - EventsByNode map[string]int64 `json:"events_by_node"` // Events grouped by node - ConnectedAgents int `json:"connected_agents" example:"5"` // Number of connected agents - LastEventTime string `json:"last_event_time" example:"2023-01-01T12:00:00Z"` // Timestamp of last event - AggregationStart string `json:"aggregation_start" example:"2023-01-01T10:00:00Z"` // When aggregation started - QueryTime string `json:"query_time" example:"2023-01-01T12:00:00Z"` // Query timestamp + TotalEvents int64 `json:"total_events" example:"12500"` // Total events stored + EventsByType map[string]int64 `json:"events_by_type"` // Events grouped by type + EventsByNode map[string]int64 `json:"events_by_node"` // Events grouped by node + ConnectedAgents int `json:"connected_agents" example:"5"` // Number of connected agents + LastEventTime string `json:"last_event_time" example:"2023-01-01T12:00:00Z"` // Timestamp of last event + AggregationStart string `json:"aggregation_start" example:"2023-01-01T10:00:00Z"` // When aggregation started + QueryTime string `json:"query_time" example:"2023-01-01T12:00:00Z"` // Query timestamp } // AggregatorProgramsResponse represents the response for aggregator programs information type AggregatorProgramsResponse struct { - ConnectedAgents []AgentInfo `json:"connected_agents"` // List of connected agents - AllPrograms []ProgramInfo `json:"all_programs"` // All programs across agents - TotalAgents int `json:"total_agents" example:"3"` // Total number of agents - TotalPrograms int `json:"total_programs" example:"6"` // Total number of programs + ConnectedAgents []AgentInfo `json:"connected_agents"` // List of connected agents + AllPrograms []ProgramInfo `json:"all_programs"` // All programs across agents + TotalAgents int `json:"total_agents" example:"3"` // Total number of agents + TotalPrograms int `json:"total_programs" example:"6"` // Total number of programs QueryTime string `json:"query_time" example:"2023-01-01T12:00:00Z"` // Query timestamp } // AgentInfo represents information about a connected agent type AgentInfo struct { - NodeName string `json:"node_name" example:"worker-1"` // Node name - LastSeen string `json:"last_seen" example:"2023-01-01T12:00:00Z"` // Last seen timestamp - EventCount int64 `json:"event_count" example:"2500"` // Number of events from this agent - Programs []ProgramInfo `json:"programs"` // Programs running on this agent - Status string `json:"status" example:"active"` // Agent status + NodeName string `json:"node_name" example:"worker-1"` // Node name + LastSeen string `json:"last_seen" example:"2023-01-01T12:00:00Z"` // Last seen timestamp + EventCount int64 `json:"event_count" example:"2500"` // Number of events from this agent + Programs []ProgramInfo `json:"programs"` // Programs running on this agent + Status string `json:"status" example:"active"` // Agent status } // ProgramInfo represents information about an eBPF program type ProgramInfo struct { - Name string `json:"name" example:"connection_tracer"` // Program name - Type string `json:"type" example:"kprobe"` // Program type - Status string `json:"status" example:"active"` // Program status - Node string `json:"node" example:"worker-1"` // Node where program is running - EventCount int64 `json:"event_count" example:"1250"` // Events generated by this program + Name string `json:"name" example:"connection_tracer"` // Program name + Type string `json:"type" example:"kprobe"` // Program type + Status string `json:"status" example:"active"` // Program status + Node string `json:"node" example:"worker-1"` // Node where program is running + EventCount int64 `json:"event_count" example:"1250"` // Events generated by this program } // AggregatedListResponse represents the response for listing aggregated connection/packet drop events type AggregatedListResponse struct { - TotalPIDs int `json:"total_pids" example:"8"` // Number of unique PIDs across all nodes - TotalEvents int `json:"total_events" example:"45"` // Total number of events - TotalNodes int `json:"total_nodes" example:"3"` // Number of nodes with events - EventsByPID map[uint32][]core.Event `json:"events_by_pid"` // Events grouped by PID - EventsByNode map[string]int `json:"events_by_node"` // Event count by node - QueryTime string `json:"query_time" example:"2023-01-01T12:00:00Z"` // Query timestamp + TotalPIDs int `json:"total_pids" example:"8"` // Number of unique PIDs across all nodes + TotalEvents int `json:"total_events" example:"45"` // Total number of events + TotalNodes int `json:"total_nodes" example:"3"` // Number of nodes with events + EventsByPID map[uint32][]core.Event `json:"events_by_pid"` // Events grouped by PID + EventsByNode map[string]int `json:"events_by_node"` // Event count by node + QueryTime string `json:"query_time" example:"2023-01-01T12:00:00Z"` // Query timestamp } // AggregatedSummaryResponse represents the response for aggregated connection/packet drop summaries type AggregatedSummaryResponse struct { - Count int `json:"count" example:"15"` // Total count across all nodes - CountByNode map[string]int `json:"count_by_node"` // Count by node - PID uint32 `json:"pid,omitempty" example:"1234"` // Process ID (if filtered) - Command string `json:"command,omitempty" example:"curl"` // Command name (if filtered) - DurationSeconds int `json:"duration_seconds" example:"60"` // Duration in seconds - TotalNodes int `json:"total_nodes" example:"3"` // Number of nodes with events - QueryTime string `json:"query_time" example:"2023-01-01T12:00:00Z"` // Query timestamp + Count int `json:"count" example:"15"` // Total count across all nodes + CountByNode map[string]int `json:"count_by_node"` // Count by node + PID uint32 `json:"pid,omitempty" example:"1234"` // Process ID (if filtered) + Command string `json:"command,omitempty" example:"curl"` // Command name (if filtered) + DurationSeconds int `json:"duration_seconds" example:"60"` // Duration in seconds + TotalNodes int `json:"total_nodes" example:"3"` // Number of nodes with events + QueryTime string `json:"query_time" example:"2023-01-01T12:00:00Z"` // Query timestamp } // Config represents aggregator configuration. @@ -338,7 +338,7 @@ func (a *Aggregator) HandlePrograms(w http.ResponseWriter, r *http.Request) { // Query recent events to infer connected agents and their programs query := core.Query{ - Limit: 1000, // Get a good sample of recent events + Limit: 1000, // Get a good sample of recent events Since: time.Now().Add(-10 * time.Minute), // Last 10 minutes } @@ -352,40 +352,40 @@ func (a *Aggregator) HandlePrograms(w http.ResponseWriter, r *http.Request) { // Aggregate information about connected agents and their programs agents := make(map[string]map[string]interface{}) // node_name -> agent info eventTypes := make(map[string]bool) // unique event types (indicate programs) - + for _, event := range events { metadata := event.Metadata() - + // Extract agent information nodeName, hasNode := metadata["k8s_node_name"].(string) podName, _ := metadata["k8s_pod_name"].(string) namespace, _ := metadata["k8s_namespace"].(string) - + if hasNode && nodeName != "" { if agents[nodeName] == nil { agents[nodeName] = map[string]interface{}{ - "node_name": nodeName, - "pod_name": podName, - "namespace": namespace, - "event_types": make(map[string]bool), - "last_seen": event.Time(), - "event_count": 0, + "node_name": nodeName, + "pod_name": podName, + "namespace": namespace, + "event_types": make(map[string]bool), + "last_seen": event.Time(), + "event_count": 0, } } - + // Update agent info agent := agents[nodeName] eventTypesMap := agent["event_types"].(map[string]bool) eventTypesMap[event.Type()] = true agent["event_types"] = eventTypesMap agent["event_count"] = agent["event_count"].(int) + 1 - + // Update last seen if this event is more recent if event.Time().After(agent["last_seen"].(time.Time)) { agent["last_seen"] = event.Time() } } - + // Track unique event types across all agents eventTypes[event.Type()] = true } @@ -393,14 +393,14 @@ func (a *Aggregator) HandlePrograms(w http.ResponseWriter, r *http.Request) { // Convert agents map to slice and format programs var connectedAgents []map[string]interface{} var allPrograms []map[string]interface{} - + for nodeName, agentInfo := range agents { eventTypesMap := agentInfo["event_types"].(map[string]bool) var programs []string for eventType := range eventTypesMap { programs = append(programs, eventType) } - + agentData := map[string]interface{}{ "node_name": nodeName, "pod_name": agentInfo["pod_name"], @@ -410,7 +410,7 @@ func (a *Aggregator) HandlePrograms(w http.ResponseWriter, r *http.Request) { "event_count": agentInfo["event_count"], } connectedAgents = append(connectedAgents, agentData) - + // Add programs to the global list for _, program := range programs { allPrograms = append(allPrograms, map[string]interface{}{ @@ -428,13 +428,13 @@ func (a *Aggregator) HandlePrograms(w http.ResponseWriter, r *http.Request) { } response := map[string]interface{}{ - "connected_agents": len(connectedAgents), - "unique_programs": uniquePrograms, - "agents": connectedAgents, - "all_programs": allPrograms, + "connected_agents": len(connectedAgents), + "unique_programs": uniquePrograms, + "agents": connectedAgents, + "all_programs": allPrograms, "total_events_analyzed": len(events), - "query_time": time.Now().Format(time.RFC3339), - "description": "Program information inferred from events received from connected agents", + "query_time": time.Now().Format(time.RFC3339), + "description": "Program information inferred from events received from connected agents", } w.Header().Set("Content-Type", "application/json") diff --git a/internal/aggregator/health.go b/internal/aggregator/health.go index 7541b37..c23dce6 100644 --- a/internal/aggregator/health.go +++ b/internal/aggregator/health.go @@ -10,9 +10,9 @@ import ( // HealthCheck represents the aggregator health status. type HealthCheck struct { - Status string `json:"status"` - Component string `json:"component"` - Uptime string `json:"uptime"` + Status string `json:"status"` + Component string `json:"component"` + Uptime string `json:"uptime"` Stats map[string]interface{} `json:"stats"` } diff --git a/internal/api/handlers.go b/internal/api/handlers.go index 1e5041a..5167573 100644 --- a/internal/api/handlers.go +++ b/internal/api/handlers.go @@ -44,7 +44,7 @@ func Initialize(sys *system.System) { // @Router /health [get] func HandleHealth(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") - + health := HealthResponse{ Status: "healthy", Component: "eBPF Monitor API", @@ -78,7 +78,7 @@ func HandlePrograms(w http.ResponseWriter, r *http.Request) { } programs := globalSystem.GetPrograms() - + // Convert to structured response var programList []ProgramInfo for _, prog := range programs { @@ -90,7 +90,7 @@ func HandlePrograms(w http.ResponseWriter, r *http.Request) { } else { status = "inactive" } - + programInfo := ProgramInfo{ Name: prog.Name, Type: "eBPF", // Generic type, could be enhanced @@ -557,34 +557,34 @@ type PacketDropListResponse struct { // HealthResponse represents the response for health check type HealthResponse struct { - Status string `json:"status" example:"healthy"` // Service status - Component string `json:"component" example:"eBPF Monitor API"` // Component name - Uptime string `json:"uptime" example:"1h30m"` // Service uptime - Version string `json:"version" example:"1.0.0"` // API version + Status string `json:"status" example:"healthy"` // Service status + Component string `json:"component" example:"eBPF Monitor API"` // Component name + Uptime string `json:"uptime" example:"1h30m"` // Service uptime + Version string `json:"version" example:"1.0.0"` // API version } // ProgramsResponse represents the response for listing eBPF programs type ProgramsResponse struct { - Programs []ProgramInfo `json:"programs"` // List of eBPF programs - TotalCount int `json:"total_count" example:"2"` // Total number of programs - QueryTime string `json:"query_time" example:"2023-01-01T12:00:00Z"` // Query timestamp + Programs []ProgramInfo `json:"programs"` // List of eBPF programs + TotalCount int `json:"total_count" example:"2"` // Total number of programs + QueryTime string `json:"query_time" example:"2023-01-01T12:00:00Z"` // Query timestamp } // ProgramInfo represents information about an eBPF program type ProgramInfo struct { - Name string `json:"name" example:"connection_tracer"` // Program name - Type string `json:"type" example:"kprobe"` // Program type - Status string `json:"status" example:"loaded"` // Program status - ID int `json:"id" example:"123"` // Program ID + Name string `json:"name" example:"connection_tracer"` // Program name + Type string `json:"type" example:"kprobe"` // Program type + Status string `json:"status" example:"loaded"` // Program status + ID int `json:"id" example:"123"` // Program ID } // EventsResponse represents the response for querying events type EventsResponse struct { - Events []core.Event `json:"events"` // List of events - Count int `json:"count" example:"25"` // Number of events returned - TotalCount int `json:"total_count" example:"150"` // Total number of matching events + Events []core.Event `json:"events"` // List of events + Count int `json:"count" example:"25"` // Number of events returned + TotalCount int `json:"total_count" example:"150"` // Total number of matching events QueryTime string `json:"query_time" example:"2023-01-01T12:00:00Z"` // Query timestamp - Filters EventFilters `json:"filters"` // Applied filters + Filters EventFilters `json:"filters"` // Applied filters } // EventFilters represents the filters applied to the event query diff --git a/internal/events/events.go b/internal/events/events.go index 68d0896..ebb07c8 100644 --- a/internal/events/events.go +++ b/internal/events/events.go @@ -23,7 +23,7 @@ var ( systemBootTime time.Time bootTimeCalculated bool bootTimeMutex sync.Mutex - + // Global Kubernetes metadata provider with proper synchronization k8sProvider *kubernetes.Provider k8sMutex sync.RWMutex // Protects both k8sProvider and initialization @@ -123,17 +123,17 @@ func getKubernetesProvider() *kubernetes.Provider { return provider } k8sMutex.RUnlock() - + // Slow path: need to initialize, acquire write lock k8sMutex.Lock() defer k8sMutex.Unlock() - + // Double-check after acquiring write lock if !k8sInit { k8sProvider = kubernetes.NewProvider() k8sInit = true } - + return k8sProvider } @@ -142,7 +142,7 @@ func getKubernetesProvider() *kubernetes.Provider { func resetKubernetesProvider() { k8sMutex.Lock() defer k8sMutex.Unlock() - + k8sProvider = nil k8sInit = false } diff --git a/internal/events/kubernetes_integration_test.go b/internal/events/kubernetes_integration_test.go index b190cd7..7260311 100644 --- a/internal/events/kubernetes_integration_test.go +++ b/internal/events/kubernetes_integration_test.go @@ -29,7 +29,7 @@ func TestKubernetesMetadataIntegration(t *testing.T) { os.Unsetenv("NODE_NAME") os.Unsetenv("POD_NAME") os.Unsetenv("POD_NAMESPACE") - + // Reset provider using the safe method resetKubernetesProvider() @@ -63,7 +63,7 @@ func TestKubernetesMetadataIntegration(t *testing.T) { os.Setenv("NODE_NAME", "test-node-1") os.Setenv("POD_NAME", "ebpf-monitor-abcde") os.Setenv("POD_NAMESPACE", "ebpf-system") - + // Reset provider to pick up new env vars using the safe method resetKubernetesProvider() diff --git a/internal/kubernetes/metadata.go b/internal/kubernetes/metadata.go index e6b5af2..531be23 100644 --- a/internal/kubernetes/metadata.go +++ b/internal/kubernetes/metadata.go @@ -25,7 +25,7 @@ func NewProvider() *Provider { p := &Provider{ enabled: isKubernetesEnvironment(), } - + if p.enabled { p.metadata = &Metadata{ NodeName: os.Getenv("NODE_NAME"), @@ -33,7 +33,7 @@ func NewProvider() *Provider { Namespace: os.Getenv("POD_NAMESPACE"), } } - + return p } @@ -48,11 +48,11 @@ func (p *Provider) IsEnabled() bool { func (p *Provider) GetMetadata() *Metadata { p.mu.RLock() defer p.mu.RUnlock() - + if !p.enabled || p.metadata == nil { return nil } - + // Return a copy to avoid race conditions return &Metadata{ NodeName: p.metadata.NodeName, @@ -66,12 +66,12 @@ func (p *Provider) AddToMap(data map[string]interface{}) { if !p.IsEnabled() { return } - + metadata := p.GetMetadata() if metadata == nil { return } - + if metadata.NodeName != "" { data["k8s_node_name"] = metadata.NodeName } @@ -89,16 +89,16 @@ func isKubernetesEnvironment() bool { if os.Getenv("KUBERNETES_SERVICE_HOST") != "" { return true } - + // Check deployment mode environment variable if os.Getenv("DEPLOYMENT_MODE") == "kubernetes" { return true } - + // Check if we can find Kubernetes service account token if _, err := os.Stat("/var/run/secrets/kubernetes.io/serviceaccount/token"); err == nil { return true } - + return false } diff --git a/internal/kubernetes/metadata_test.go b/internal/kubernetes/metadata_test.go index 31b10f6..31dcabd 100644 --- a/internal/kubernetes/metadata_test.go +++ b/internal/kubernetes/metadata_test.go @@ -89,7 +89,7 @@ func TestKubernetesProvider(t *testing.T) { provider := NewProvider() data := make(map[string]interface{}) - + provider.AddToMap(data) if data["k8s_node_name"] != "test-node" { diff --git a/internal/storage/forwarding.go b/internal/storage/forwarding.go index 1581916..a1a0231 100644 --- a/internal/storage/forwarding.go +++ b/internal/storage/forwarding.go @@ -10,8 +10,8 @@ import ( // ForwardingStorage wraps another storage and forwards events to an aggregator. type ForwardingStorage struct { - primary core.EventSink - aggregatorClient *client.AggregatorClient + primary core.EventSink + aggregatorClient *client.AggregatorClient } // NewForwardingStorage creates a new forwarding storage. diff --git a/internal/system/system.go b/internal/system/system.go index 2673fc6..8c4ab19 100644 --- a/internal/system/system.go +++ b/internal/system/system.go @@ -16,9 +16,9 @@ import ( // System is the main orchestrator for the eBPF monitoring system. type System struct { - manager core.Manager - storage core.EventSink - aggregatorClient *client.AggregatorClient + manager core.Manager + storage core.EventSink + aggregatorClient *client.AggregatorClient } // NewSystem creates a new eBPF monitoring system. @@ -26,7 +26,7 @@ func NewSystem() *System { manager := programs.NewManager() memStorage := storage.NewMemoryStorage() aggregatorClient := client.NewAggregatorClient() - + // Wrap storage with forwarding to aggregator forwardingStorage := storage.NewForwardingStorage(memStorage, aggregatorClient) From ca062aa6c4b8e8bb4f3c01ecf284f17fd47cc4f0 Mon Sep 17 00:00:00 2001 From: Simone Rodigari Date: Wed, 13 Aug 2025 08:20:13 +0100 Subject: [PATCH 07/11] fix linting issue --- internal/api/handlers.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/api/handlers.go b/internal/api/handlers.go index 5167573..82125c5 100644 --- a/internal/api/handlers.go +++ b/internal/api/handlers.go @@ -82,7 +82,7 @@ func HandlePrograms(w http.ResponseWriter, r *http.Request) { // Convert to structured response var programList []ProgramInfo for _, prog := range programs { - status := "unknown" + var status string if prog.Loaded && prog.Attached { status = "active" } else if prog.Loaded { From 0534d059b4c603c7b43278ff28e4f54c54a5a769 Mon Sep 17 00:00:00 2001 From: Simone Rodigari Date: Wed, 13 Aug 2025 08:34:25 +0100 Subject: [PATCH 08/11] fix health endpoint --- internal/api/handlers.go | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/internal/api/handlers.go b/internal/api/handlers.go index 82125c5..487c908 100644 --- a/internal/api/handlers.go +++ b/internal/api/handlers.go @@ -43,8 +43,13 @@ func Initialize(sys *system.System) { // @Failure 503 {object} map[string]string "Service unavailable" // @Router /health [get] func HandleHealth(w http.ResponseWriter, r *http.Request) { - w.Header().Set("Content-Type", "application/json") + if globalSystem == nil { + http.Error(w, "System not initialized", http.StatusServiceUnavailable) + return + } + w.Header().Set("Content-Type", "application/json") + health := HealthResponse{ Status: "healthy", Component: "eBPF Monitor API", @@ -58,9 +63,7 @@ func HandleHealth(w http.ResponseWriter, r *http.Request) { http.Error(w, "Internal server error", http.StatusInternalServerError) return } -} - -// HandlePrograms returns the status of all eBPF programs. +}// HandlePrograms returns the status of all eBPF programs. // // @Summary List eBPF programs // @Description Get the status and information of all loaded eBPF programs From bce7af78e9229089765b9dab6db217a429881483 Mon Sep 17 00:00:00 2001 From: Simone Rodigari Date: Wed, 13 Aug 2025 23:02:22 +0100 Subject: [PATCH 09/11] address comments --- docs/swagger-aggregator/docs.go | 345 +++++++++++++++++++++++++-- docs/swagger-aggregator/swagger.json | 345 +++++++++++++++++++++++++-- docs/swagger-aggregator/swagger.yaml | 269 +++++++++++++++++++-- docs/swagger/docs.go | 342 ++++++++++++++++++++++++-- docs/swagger/swagger.json | 342 ++++++++++++++++++++++++-- docs/swagger/swagger.yaml | 264 ++++++++++++++++++-- internal/aggregator/aggregator.go | 32 +-- internal/api/handlers.go | 40 ++-- 8 files changed, 1843 insertions(+), 136 deletions(-) diff --git a/docs/swagger-aggregator/docs.go b/docs/swagger-aggregator/docs.go index 438ca16..7cc53a3 100644 --- a/docs/swagger-aggregator/docs.go +++ b/docs/swagger-aggregator/docs.go @@ -11,6 +11,7 @@ const docTemplate = `{ "title": "{{.Title}}", "contact": { "name": "API Support", + "url": "https://github.com/srodi/ebpf-server/issues", "email": "support@example.com" }, "license": { @@ -38,19 +39,19 @@ const docTemplate = `{ "parameters": [ { "type": "integer", - "description": "Process ID (GET only)", + "description": "Process ID", "name": "pid", "in": "query" }, { "type": "string", - "description": "Command name (GET only)", + "description": "Command name", "name": "command", "in": "query" }, { "type": "integer", - "description": "Duration in seconds (GET only, default: 60)", + "description": "Duration in seconds (default: 60)", "name": "duration_seconds", "in": "query" }, @@ -114,19 +115,19 @@ const docTemplate = `{ "parameters": [ { "type": "integer", - "description": "Process ID (GET only)", + "description": "Process ID", "name": "pid", "in": "query" }, { "type": "string", - "description": "Command name (GET only)", + "description": "Command name", "name": "command", "in": "query" }, { "type": "integer", - "description": "Duration in seconds (GET only, default: 60)", + "description": "Duration in seconds (default: 60)", "name": "duration_seconds", "in": "query" }, @@ -283,8 +284,7 @@ const docTemplate = `{ "200": { "description": "Ingestion result", "schema": { - "type": "object", - "additionalProperties": true + "$ref": "#/definitions/internal_aggregator.IngestResponse" } }, "400": { @@ -406,19 +406,19 @@ const docTemplate = `{ "parameters": [ { "type": "integer", - "description": "Process ID (GET only)", + "description": "Process ID", "name": "pid", "in": "query" }, { "type": "string", - "description": "Command name (GET only)", + "description": "Command name", "name": "command", "in": "query" }, { "type": "integer", - "description": "Duration in seconds (GET only, default: 60)", + "description": "Duration in seconds (default: 60)", "name": "duration_seconds", "in": "query" }, @@ -482,19 +482,19 @@ const docTemplate = `{ "parameters": [ { "type": "integer", - "description": "Process ID (GET only)", + "description": "Process ID", "name": "pid", "in": "query" }, { "type": "string", - "description": "Command name (GET only)", + "description": "Command name", "name": "command", "in": "query" }, { "type": "integer", - "description": "Duration in seconds (GET only, default: 60)", + "description": "Duration in seconds (default: 60)", "name": "duration_seconds", "in": "query" }, @@ -602,8 +602,7 @@ const docTemplate = `{ "200": { "description": "Aggregation statistics", "schema": { - "type": "object", - "additionalProperties": true + "$ref": "#/definitions/internal_aggregator.AggregationStatsResponse" } }, "405": { @@ -945,6 +944,263 @@ const docTemplate = `{ } } }, + "internal_aggregator.AgentInfo": { + "type": "object", + "properties": { + "event_count": { + "description": "Number of events from this agent", + "type": "integer", + "example": 2500 + }, + "last_seen": { + "description": "Last seen timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "node_name": { + "description": "Node name", + "type": "string", + "example": "worker-1" + }, + "programs": { + "description": "Programs running on this agent", + "type": "array", + "items": { + "$ref": "#/definitions/internal_aggregator.ProgramInfo" + } + }, + "status": { + "description": "Agent status", + "type": "string", + "example": "active" + } + } + }, + "internal_aggregator.AggregatedEventFilters": { + "type": "object", + "properties": { + "limit": { + "description": "Limit filter", + "type": "integer", + "example": 100 + }, + "node": { + "description": "Node name filter", + "type": "string", + "example": "worker-1" + }, + "since": { + "description": "Start time filter", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "type": { + "description": "Event type filter", + "type": "string", + "example": "connection" + }, + "until": { + "description": "End time filter", + "type": "string", + "example": "2023-01-01T13:00:00Z" + } + } + }, + "internal_aggregator.AggregatedEventsResponse": { + "type": "object", + "properties": { + "count": { + "description": "Number of events returned", + "type": "integer", + "example": 50 + }, + "events": { + "description": "List of aggregated events", + "type": "array", + "items": {} + }, + "filters": { + "description": "Applied filters", + "allOf": [ + { + "$ref": "#/definitions/internal_aggregator.AggregatedEventFilters" + } + ] + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "total_count": { + "description": "Total number of matching events", + "type": "integer", + "example": 1250 + } + } + }, + "internal_aggregator.AggregatedListResponse": { + "type": "object", + "properties": { + "events_by_node": { + "description": "Event count by node", + "type": "object", + "additionalProperties": { + "type": "integer" + } + }, + "events_by_pid": { + "description": "Events grouped by PID", + "type": "object", + "additionalProperties": { + "type": "array", + "items": {} + } + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "total_events": { + "description": "Total number of events", + "type": "integer", + "example": 45 + }, + "total_nodes": { + "description": "Number of nodes with events", + "type": "integer", + "example": 3 + }, + "total_pids": { + "description": "Number of unique PIDs across all nodes", + "type": "integer", + "example": 8 + } + } + }, + "internal_aggregator.AggregatedSummaryResponse": { + "type": "object", + "properties": { + "command": { + "description": "Command name (if filtered)", + "type": "string", + "example": "curl" + }, + "count": { + "description": "Total count across all nodes", + "type": "integer", + "example": 15 + }, + "count_by_node": { + "description": "Count by node", + "type": "object", + "additionalProperties": { + "type": "integer" + } + }, + "duration_seconds": { + "description": "Duration in seconds", + "type": "integer", + "example": 60 + }, + "pid": { + "description": "Process ID (if filtered)", + "type": "integer", + "example": 1234 + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "total_nodes": { + "description": "Number of nodes with events", + "type": "integer", + "example": 3 + } + } + }, + "internal_aggregator.AggregationStatsResponse": { + "type": "object", + "properties": { + "aggregation_start": { + "description": "When aggregation started", + "type": "string", + "example": "2023-01-01T10:00:00Z" + }, + "connected_agents": { + "description": "Number of connected agents", + "type": "integer", + "example": 5 + }, + "events_by_node": { + "description": "Events grouped by node", + "type": "object", + "additionalProperties": { + "type": "integer", + "format": "int64" + } + }, + "events_by_type": { + "description": "Events grouped by type", + "type": "object", + "additionalProperties": { + "type": "integer", + "format": "int64" + } + }, + "last_event_time": { + "description": "Timestamp of last event", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "total_events": { + "description": "Total events stored", + "type": "integer", + "example": 12500 + } + } + }, + "internal_aggregator.AggregatorProgramsResponse": { + "type": "object", + "properties": { + "all_programs": { + "description": "All programs across agents", + "type": "array", + "items": { + "$ref": "#/definitions/internal_aggregator.ProgramInfo" + } + }, + "connected_agents": { + "description": "List of connected agents", + "type": "array", + "items": { + "$ref": "#/definitions/internal_aggregator.AgentInfo" + } + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "total_agents": { + "description": "Total number of agents", + "type": "integer", + "example": 3 + }, + "total_programs": { + "description": "Total number of programs", + "type": "integer", + "example": 6 + } + } + }, "internal_aggregator.HealthCheck": { "type": "object", "properties": { @@ -962,6 +1218,61 @@ const docTemplate = `{ "type": "string" } } + }, + "internal_aggregator.IngestResponse": { + "type": "object", + "properties": { + "events_processed": { + "description": "Number of events processed", + "type": "integer", + "example": 25 + }, + "message": { + "description": "Status message", + "type": "string", + "example": "Events ingested successfully" + }, + "success": { + "description": "Ingestion success status", + "type": "boolean", + "example": true + }, + "timestamp": { + "description": "Processing timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + } + } + }, + "internal_aggregator.ProgramInfo": { + "type": "object", + "properties": { + "event_count": { + "description": "Events generated by this program", + "type": "integer", + "example": 1250 + }, + "name": { + "description": "Program name", + "type": "string", + "example": "connection_tracer" + }, + "node": { + "description": "Node where program is running", + "type": "string", + "example": "worker-1" + }, + "status": { + "description": "Program status", + "type": "string", + "example": "active" + }, + "type": { + "description": "Program type", + "type": "string", + "example": "kprobe" + } + } } } }` @@ -973,7 +1284,7 @@ var SwaggerInfo = &swag.Spec{ BasePath: "/", Schemes: []string{}, Title: "eBPF Event Aggregator API", - Description: "HTTP API for aggregating and querying eBPF events from multiple //\t@Success\t\t200\t\t\t{object}\tAggregatedE//\t@Success\t\t200\t\t{object}\tIngestResponse\t\t\t\t\"Ingest//\t@Success\t\t200\t{object}\tAggrega//\t@Success\t\t200\t{object}\tAggregatorProgramsResponse\t\"Program information\"ionStatsResponse\t\"Aggregation statistics\"on result\"entsResponse\t\"Events and count\"gents", + Description: "HTTP API for aggregating and querying eBPF events from multiple agents", InfoInstanceName: "swagger", SwaggerTemplate: docTemplate, LeftDelim: "{{", diff --git a/docs/swagger-aggregator/swagger.json b/docs/swagger-aggregator/swagger.json index 8f01fcd..db7e831 100644 --- a/docs/swagger-aggregator/swagger.json +++ b/docs/swagger-aggregator/swagger.json @@ -1,10 +1,11 @@ { "swagger": "2.0", "info": { - "description": "HTTP API for aggregating and querying eBPF events from multiple //\t@Success\t\t200\t\t\t{object}\tAggregatedE//\t@Success\t\t200\t\t{object}\tIngestResponse\t\t\t\t\"Ingest//\t@Success\t\t200\t{object}\tAggrega//\t@Success\t\t200\t{object}\tAggregatorProgramsResponse\t\"Program information\"ionStatsResponse\t\"Aggregation statistics\"on result\"entsResponse\t\"Events and count\"gents", + "description": "HTTP API for aggregating and querying eBPF events from multiple agents", "title": "eBPF Event Aggregator API", "contact": { "name": "API Support", + "url": "https://github.com/srodi/ebpf-server/issues", "email": "support@example.com" }, "license": { @@ -32,19 +33,19 @@ "parameters": [ { "type": "integer", - "description": "Process ID (GET only)", + "description": "Process ID", "name": "pid", "in": "query" }, { "type": "string", - "description": "Command name (GET only)", + "description": "Command name", "name": "command", "in": "query" }, { "type": "integer", - "description": "Duration in seconds (GET only, default: 60)", + "description": "Duration in seconds (default: 60)", "name": "duration_seconds", "in": "query" }, @@ -108,19 +109,19 @@ "parameters": [ { "type": "integer", - "description": "Process ID (GET only)", + "description": "Process ID", "name": "pid", "in": "query" }, { "type": "string", - "description": "Command name (GET only)", + "description": "Command name", "name": "command", "in": "query" }, { "type": "integer", - "description": "Duration in seconds (GET only, default: 60)", + "description": "Duration in seconds (default: 60)", "name": "duration_seconds", "in": "query" }, @@ -277,8 +278,7 @@ "200": { "description": "Ingestion result", "schema": { - "type": "object", - "additionalProperties": true + "$ref": "#/definitions/internal_aggregator.IngestResponse" } }, "400": { @@ -400,19 +400,19 @@ "parameters": [ { "type": "integer", - "description": "Process ID (GET only)", + "description": "Process ID", "name": "pid", "in": "query" }, { "type": "string", - "description": "Command name (GET only)", + "description": "Command name", "name": "command", "in": "query" }, { "type": "integer", - "description": "Duration in seconds (GET only, default: 60)", + "description": "Duration in seconds (default: 60)", "name": "duration_seconds", "in": "query" }, @@ -476,19 +476,19 @@ "parameters": [ { "type": "integer", - "description": "Process ID (GET only)", + "description": "Process ID", "name": "pid", "in": "query" }, { "type": "string", - "description": "Command name (GET only)", + "description": "Command name", "name": "command", "in": "query" }, { "type": "integer", - "description": "Duration in seconds (GET only, default: 60)", + "description": "Duration in seconds (default: 60)", "name": "duration_seconds", "in": "query" }, @@ -596,8 +596,7 @@ "200": { "description": "Aggregation statistics", "schema": { - "type": "object", - "additionalProperties": true + "$ref": "#/definitions/internal_aggregator.AggregationStatsResponse" } }, "405": { @@ -939,6 +938,263 @@ } } }, + "internal_aggregator.AgentInfo": { + "type": "object", + "properties": { + "event_count": { + "description": "Number of events from this agent", + "type": "integer", + "example": 2500 + }, + "last_seen": { + "description": "Last seen timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "node_name": { + "description": "Node name", + "type": "string", + "example": "worker-1" + }, + "programs": { + "description": "Programs running on this agent", + "type": "array", + "items": { + "$ref": "#/definitions/internal_aggregator.ProgramInfo" + } + }, + "status": { + "description": "Agent status", + "type": "string", + "example": "active" + } + } + }, + "internal_aggregator.AggregatedEventFilters": { + "type": "object", + "properties": { + "limit": { + "description": "Limit filter", + "type": "integer", + "example": 100 + }, + "node": { + "description": "Node name filter", + "type": "string", + "example": "worker-1" + }, + "since": { + "description": "Start time filter", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "type": { + "description": "Event type filter", + "type": "string", + "example": "connection" + }, + "until": { + "description": "End time filter", + "type": "string", + "example": "2023-01-01T13:00:00Z" + } + } + }, + "internal_aggregator.AggregatedEventsResponse": { + "type": "object", + "properties": { + "count": { + "description": "Number of events returned", + "type": "integer", + "example": 50 + }, + "events": { + "description": "List of aggregated events", + "type": "array", + "items": {} + }, + "filters": { + "description": "Applied filters", + "allOf": [ + { + "$ref": "#/definitions/internal_aggregator.AggregatedEventFilters" + } + ] + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "total_count": { + "description": "Total number of matching events", + "type": "integer", + "example": 1250 + } + } + }, + "internal_aggregator.AggregatedListResponse": { + "type": "object", + "properties": { + "events_by_node": { + "description": "Event count by node", + "type": "object", + "additionalProperties": { + "type": "integer" + } + }, + "events_by_pid": { + "description": "Events grouped by PID", + "type": "object", + "additionalProperties": { + "type": "array", + "items": {} + } + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "total_events": { + "description": "Total number of events", + "type": "integer", + "example": 45 + }, + "total_nodes": { + "description": "Number of nodes with events", + "type": "integer", + "example": 3 + }, + "total_pids": { + "description": "Number of unique PIDs across all nodes", + "type": "integer", + "example": 8 + } + } + }, + "internal_aggregator.AggregatedSummaryResponse": { + "type": "object", + "properties": { + "command": { + "description": "Command name (if filtered)", + "type": "string", + "example": "curl" + }, + "count": { + "description": "Total count across all nodes", + "type": "integer", + "example": 15 + }, + "count_by_node": { + "description": "Count by node", + "type": "object", + "additionalProperties": { + "type": "integer" + } + }, + "duration_seconds": { + "description": "Duration in seconds", + "type": "integer", + "example": 60 + }, + "pid": { + "description": "Process ID (if filtered)", + "type": "integer", + "example": 1234 + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "total_nodes": { + "description": "Number of nodes with events", + "type": "integer", + "example": 3 + } + } + }, + "internal_aggregator.AggregationStatsResponse": { + "type": "object", + "properties": { + "aggregation_start": { + "description": "When aggregation started", + "type": "string", + "example": "2023-01-01T10:00:00Z" + }, + "connected_agents": { + "description": "Number of connected agents", + "type": "integer", + "example": 5 + }, + "events_by_node": { + "description": "Events grouped by node", + "type": "object", + "additionalProperties": { + "type": "integer", + "format": "int64" + } + }, + "events_by_type": { + "description": "Events grouped by type", + "type": "object", + "additionalProperties": { + "type": "integer", + "format": "int64" + } + }, + "last_event_time": { + "description": "Timestamp of last event", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "total_events": { + "description": "Total events stored", + "type": "integer", + "example": 12500 + } + } + }, + "internal_aggregator.AggregatorProgramsResponse": { + "type": "object", + "properties": { + "all_programs": { + "description": "All programs across agents", + "type": "array", + "items": { + "$ref": "#/definitions/internal_aggregator.ProgramInfo" + } + }, + "connected_agents": { + "description": "List of connected agents", + "type": "array", + "items": { + "$ref": "#/definitions/internal_aggregator.AgentInfo" + } + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "total_agents": { + "description": "Total number of agents", + "type": "integer", + "example": 3 + }, + "total_programs": { + "description": "Total number of programs", + "type": "integer", + "example": 6 + } + } + }, "internal_aggregator.HealthCheck": { "type": "object", "properties": { @@ -956,6 +1212,61 @@ "type": "string" } } + }, + "internal_aggregator.IngestResponse": { + "type": "object", + "properties": { + "events_processed": { + "description": "Number of events processed", + "type": "integer", + "example": 25 + }, + "message": { + "description": "Status message", + "type": "string", + "example": "Events ingested successfully" + }, + "success": { + "description": "Ingestion success status", + "type": "boolean", + "example": true + }, + "timestamp": { + "description": "Processing timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + } + } + }, + "internal_aggregator.ProgramInfo": { + "type": "object", + "properties": { + "event_count": { + "description": "Events generated by this program", + "type": "integer", + "example": 1250 + }, + "name": { + "description": "Program name", + "type": "string", + "example": "connection_tracer" + }, + "node": { + "description": "Node where program is running", + "type": "string", + "example": "worker-1" + }, + "status": { + "description": "Program status", + "type": "string", + "example": "active" + }, + "type": { + "description": "Program type", + "type": "string", + "example": "kprobe" + } + } } } } \ No newline at end of file diff --git a/docs/swagger-aggregator/swagger.yaml b/docs/swagger-aggregator/swagger.yaml index f5ff4ef..0d5a17f 100644 --- a/docs/swagger-aggregator/swagger.yaml +++ b/docs/swagger-aggregator/swagger.yaml @@ -222,6 +222,198 @@ definitions: example: 2 type: integer type: object + internal_aggregator.AgentInfo: + properties: + event_count: + description: Number of events from this agent + example: 2500 + type: integer + last_seen: + description: Last seen timestamp + example: "2023-01-01T12:00:00Z" + type: string + node_name: + description: Node name + example: worker-1 + type: string + programs: + description: Programs running on this agent + items: + $ref: '#/definitions/internal_aggregator.ProgramInfo' + type: array + status: + description: Agent status + example: active + type: string + type: object + internal_aggregator.AggregatedEventFilters: + properties: + limit: + description: Limit filter + example: 100 + type: integer + node: + description: Node name filter + example: worker-1 + type: string + since: + description: Start time filter + example: "2023-01-01T12:00:00Z" + type: string + type: + description: Event type filter + example: connection + type: string + until: + description: End time filter + example: "2023-01-01T13:00:00Z" + type: string + type: object + internal_aggregator.AggregatedEventsResponse: + properties: + count: + description: Number of events returned + example: 50 + type: integer + events: + description: List of aggregated events + items: {} + type: array + filters: + allOf: + - $ref: '#/definitions/internal_aggregator.AggregatedEventFilters' + description: Applied filters + query_time: + description: Query timestamp + example: "2023-01-01T12:00:00Z" + type: string + total_count: + description: Total number of matching events + example: 1250 + type: integer + type: object + internal_aggregator.AggregatedListResponse: + properties: + events_by_node: + additionalProperties: + type: integer + description: Event count by node + type: object + events_by_pid: + additionalProperties: + items: {} + type: array + description: Events grouped by PID + type: object + query_time: + description: Query timestamp + example: "2023-01-01T12:00:00Z" + type: string + total_events: + description: Total number of events + example: 45 + type: integer + total_nodes: + description: Number of nodes with events + example: 3 + type: integer + total_pids: + description: Number of unique PIDs across all nodes + example: 8 + type: integer + type: object + internal_aggregator.AggregatedSummaryResponse: + properties: + command: + description: Command name (if filtered) + example: curl + type: string + count: + description: Total count across all nodes + example: 15 + type: integer + count_by_node: + additionalProperties: + type: integer + description: Count by node + type: object + duration_seconds: + description: Duration in seconds + example: 60 + type: integer + pid: + description: Process ID (if filtered) + example: 1234 + type: integer + query_time: + description: Query timestamp + example: "2023-01-01T12:00:00Z" + type: string + total_nodes: + description: Number of nodes with events + example: 3 + type: integer + type: object + internal_aggregator.AggregationStatsResponse: + properties: + aggregation_start: + description: When aggregation started + example: "2023-01-01T10:00:00Z" + type: string + connected_agents: + description: Number of connected agents + example: 5 + type: integer + events_by_node: + additionalProperties: + format: int64 + type: integer + description: Events grouped by node + type: object + events_by_type: + additionalProperties: + format: int64 + type: integer + description: Events grouped by type + type: object + last_event_time: + description: Timestamp of last event + example: "2023-01-01T12:00:00Z" + type: string + query_time: + description: Query timestamp + example: "2023-01-01T12:00:00Z" + type: string + total_events: + description: Total events stored + example: 12500 + type: integer + type: object + internal_aggregator.AggregatorProgramsResponse: + properties: + all_programs: + description: All programs across agents + items: + $ref: '#/definitions/internal_aggregator.ProgramInfo' + type: array + connected_agents: + description: List of connected agents + items: + $ref: '#/definitions/internal_aggregator.AgentInfo' + type: array + query_time: + description: Query timestamp + example: "2023-01-01T12:00:00Z" + type: string + total_agents: + description: Total number of agents + example: 3 + type: integer + total_programs: + description: Total number of programs + example: 6 + type: integer + type: object internal_aggregator.HealthCheck: properties: component: @@ -234,14 +426,55 @@ definitions: uptime: type: string type: object + internal_aggregator.IngestResponse: + properties: + events_processed: + description: Number of events processed + example: 25 + type: integer + message: + description: Status message + example: Events ingested successfully + type: string + success: + description: Ingestion success status + example: true + type: boolean + timestamp: + description: Processing timestamp + example: "2023-01-01T12:00:00Z" + type: string + type: object + internal_aggregator.ProgramInfo: + properties: + event_count: + description: Events generated by this program + example: 1250 + type: integer + name: + description: Program name + example: connection_tracer + type: string + node: + description: Node where program is running + example: worker-1 + type: string + status: + description: Program status + example: active + type: string + type: + description: Program type + example: kprobe + type: string + type: object host: localhost:8081 info: contact: email: support@example.com name: API Support - description: "HTTP API for aggregating and querying eBPF events from multiple //\t@Success\t\t200\t\t\t{object}\tAggregatedE//\t@Success\t\t200\t\t{object}\tIngestResponse\t\t\t\t\"Ingest//\t@Success\t\t200\t{object}\tAggrega//\t@Success\t\t200\t{object}\tAggregatorProgramsResponse\t\"Program - information\"ionStatsResponse\t\"Aggregation statistics\"on result\"entsResponse\t\"Events - and count\"gents" + url: https://github.com/srodi/ebpf-server/issues + description: HTTP API for aggregating and querying eBPF events from multiple agents license: name: MIT url: https://github.com/srodi/ebpf-server/blob/main/LICENSE @@ -255,15 +488,15 @@ paths: description: Get count of connection events filtered by PID, command, and time window parameters: - - description: Process ID (GET only) + - description: Process ID in: query name: pid type: integer - - description: Command name (GET only) + - description: Command name in: query name: command type: string - - description: 'Duration in seconds (GET only, default: 60)' + - description: 'Duration in seconds (default: 60)' in: query name: duration_seconds type: integer @@ -306,15 +539,15 @@ paths: description: Get count of connection events filtered by PID, command, and time window parameters: - - description: Process ID (GET only) + - description: Process ID in: query name: pid type: integer - - description: Command name (GET only) + - description: Command name in: query name: command type: string - - description: 'Duration in seconds (GET only, default: 60)' + - description: 'Duration in seconds (default: 60)' in: query name: duration_seconds type: integer @@ -421,8 +654,7 @@ paths: "200": description: Ingestion result schema: - additionalProperties: true - type: object + $ref: '#/definitions/internal_aggregator.IngestResponse' "400": description: Bad request schema: @@ -499,15 +731,15 @@ paths: description: Get count of packet drop events filtered by PID, command, and time window parameters: - - description: Process ID (GET only) + - description: Process ID in: query name: pid type: integer - - description: Command name (GET only) + - description: Command name in: query name: command type: string - - description: 'Duration in seconds (GET only, default: 60)' + - description: 'Duration in seconds (default: 60)' in: query name: duration_seconds type: integer @@ -550,15 +782,15 @@ paths: description: Get count of packet drop events filtered by PID, command, and time window parameters: - - description: Process ID (GET only) + - description: Process ID in: query name: pid type: integer - - description: Command name (GET only) + - description: Command name in: query name: command type: string - - description: 'Duration in seconds (GET only, default: 60)' + - description: 'Duration in seconds (default: 60)' in: query name: duration_seconds type: integer @@ -634,8 +866,7 @@ paths: "200": description: Aggregation statistics schema: - additionalProperties: true - type: object + $ref: '#/definitions/internal_aggregator.AggregationStatsResponse' "405": description: Method not allowed schema: diff --git a/docs/swagger/docs.go b/docs/swagger/docs.go index 5dda5a6..63eb0aa 100644 --- a/docs/swagger/docs.go +++ b/docs/swagger/docs.go @@ -39,19 +39,19 @@ const docTemplate = `{ "parameters": [ { "type": "integer", - "description": "Process ID (GET only)", + "description": "Process ID", "name": "pid", "in": "query" }, { "type": "string", - "description": "Command name (GET only)", + "description": "Command name", "name": "command", "in": "query" }, { "type": "integer", - "description": "Duration in seconds (GET only, default: 60)", + "description": "Duration in seconds (default: 60)", "name": "duration_seconds", "in": "query" }, @@ -115,19 +115,19 @@ const docTemplate = `{ "parameters": [ { "type": "integer", - "description": "Process ID (GET only)", + "description": "Process ID", "name": "pid", "in": "query" }, { "type": "string", - "description": "Command name (GET only)", + "description": "Command name", "name": "command", "in": "query" }, { "type": "integer", - "description": "Duration in seconds (GET only, default: 60)", + "description": "Duration in seconds (default: 60)", "name": "duration_seconds", "in": "query" }, @@ -284,8 +284,7 @@ const docTemplate = `{ "200": { "description": "Ingestion result", "schema": { - "type": "object", - "additionalProperties": true + "$ref": "#/definitions/aggregator.IngestResponse" } }, "400": { @@ -407,19 +406,19 @@ const docTemplate = `{ "parameters": [ { "type": "integer", - "description": "Process ID (GET only)", + "description": "Process ID", "name": "pid", "in": "query" }, { "type": "string", - "description": "Command name (GET only)", + "description": "Command name", "name": "command", "in": "query" }, { "type": "integer", - "description": "Duration in seconds (GET only, default: 60)", + "description": "Duration in seconds (default: 60)", "name": "duration_seconds", "in": "query" }, @@ -483,19 +482,19 @@ const docTemplate = `{ "parameters": [ { "type": "integer", - "description": "Process ID (GET only)", + "description": "Process ID", "name": "pid", "in": "query" }, { "type": "string", - "description": "Command name (GET only)", + "description": "Command name", "name": "command", "in": "query" }, { "type": "integer", - "description": "Duration in seconds (GET only, default: 60)", + "description": "Duration in seconds (default: 60)", "name": "duration_seconds", "in": "query" }, @@ -603,8 +602,7 @@ const docTemplate = `{ "200": { "description": "Aggregation statistics", "schema": { - "type": "object", - "additionalProperties": true + "$ref": "#/definitions/aggregator.AggregationStatsResponse" } }, "405": { @@ -650,6 +648,263 @@ const docTemplate = `{ } }, "definitions": { + "aggregator.AgentInfo": { + "type": "object", + "properties": { + "event_count": { + "description": "Number of events from this agent", + "type": "integer", + "example": 2500 + }, + "last_seen": { + "description": "Last seen timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "node_name": { + "description": "Node name", + "type": "string", + "example": "worker-1" + }, + "programs": { + "description": "Programs running on this agent", + "type": "array", + "items": { + "$ref": "#/definitions/aggregator.ProgramInfo" + } + }, + "status": { + "description": "Agent status", + "type": "string", + "example": "active" + } + } + }, + "aggregator.AggregatedEventFilters": { + "type": "object", + "properties": { + "limit": { + "description": "Limit filter", + "type": "integer", + "example": 100 + }, + "node": { + "description": "Node name filter", + "type": "string", + "example": "worker-1" + }, + "since": { + "description": "Start time filter", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "type": { + "description": "Event type filter", + "type": "string", + "example": "connection" + }, + "until": { + "description": "End time filter", + "type": "string", + "example": "2023-01-01T13:00:00Z" + } + } + }, + "aggregator.AggregatedEventsResponse": { + "type": "object", + "properties": { + "count": { + "description": "Number of events returned", + "type": "integer", + "example": 50 + }, + "events": { + "description": "List of aggregated events", + "type": "array", + "items": {} + }, + "filters": { + "description": "Applied filters", + "allOf": [ + { + "$ref": "#/definitions/aggregator.AggregatedEventFilters" + } + ] + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "total_count": { + "description": "Total number of matching events", + "type": "integer", + "example": 1250 + } + } + }, + "aggregator.AggregatedListResponse": { + "type": "object", + "properties": { + "events_by_node": { + "description": "Event count by node", + "type": "object", + "additionalProperties": { + "type": "integer" + } + }, + "events_by_pid": { + "description": "Events grouped by PID", + "type": "object", + "additionalProperties": { + "type": "array", + "items": {} + } + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "total_events": { + "description": "Total number of events", + "type": "integer", + "example": 45 + }, + "total_nodes": { + "description": "Number of nodes with events", + "type": "integer", + "example": 3 + }, + "total_pids": { + "description": "Number of unique PIDs across all nodes", + "type": "integer", + "example": 8 + } + } + }, + "aggregator.AggregatedSummaryResponse": { + "type": "object", + "properties": { + "command": { + "description": "Command name (if filtered)", + "type": "string", + "example": "curl" + }, + "count": { + "description": "Total count across all nodes", + "type": "integer", + "example": 15 + }, + "count_by_node": { + "description": "Count by node", + "type": "object", + "additionalProperties": { + "type": "integer" + } + }, + "duration_seconds": { + "description": "Duration in seconds", + "type": "integer", + "example": 60 + }, + "pid": { + "description": "Process ID (if filtered)", + "type": "integer", + "example": 1234 + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "total_nodes": { + "description": "Number of nodes with events", + "type": "integer", + "example": 3 + } + } + }, + "aggregator.AggregationStatsResponse": { + "type": "object", + "properties": { + "aggregation_start": { + "description": "When aggregation started", + "type": "string", + "example": "2023-01-01T10:00:00Z" + }, + "connected_agents": { + "description": "Number of connected agents", + "type": "integer", + "example": 5 + }, + "events_by_node": { + "description": "Events grouped by node", + "type": "object", + "additionalProperties": { + "type": "integer", + "format": "int64" + } + }, + "events_by_type": { + "description": "Events grouped by type", + "type": "object", + "additionalProperties": { + "type": "integer", + "format": "int64" + } + }, + "last_event_time": { + "description": "Timestamp of last event", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "total_events": { + "description": "Total events stored", + "type": "integer", + "example": 12500 + } + } + }, + "aggregator.AggregatorProgramsResponse": { + "type": "object", + "properties": { + "all_programs": { + "description": "All programs across agents", + "type": "array", + "items": { + "$ref": "#/definitions/aggregator.ProgramInfo" + } + }, + "connected_agents": { + "description": "List of connected agents", + "type": "array", + "items": { + "$ref": "#/definitions/aggregator.AgentInfo" + } + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "total_agents": { + "description": "Total number of agents", + "type": "integer", + "example": 3 + }, + "total_programs": { + "description": "Total number of programs", + "type": "integer", + "example": 6 + } + } + }, "aggregator.HealthCheck": { "type": "object", "properties": { @@ -668,6 +923,61 @@ const docTemplate = `{ } } }, + "aggregator.IngestResponse": { + "type": "object", + "properties": { + "events_processed": { + "description": "Number of events processed", + "type": "integer", + "example": 25 + }, + "message": { + "description": "Status message", + "type": "string", + "example": "Events ingested successfully" + }, + "success": { + "description": "Ingestion success status", + "type": "boolean", + "example": true + }, + "timestamp": { + "description": "Processing timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + } + } + }, + "aggregator.ProgramInfo": { + "type": "object", + "properties": { + "event_count": { + "description": "Events generated by this program", + "type": "integer", + "example": 1250 + }, + "name": { + "description": "Program name", + "type": "string", + "example": "connection_tracer" + }, + "node": { + "description": "Node where program is running", + "type": "string", + "example": "worker-1" + }, + "status": { + "description": "Program status", + "type": "string", + "example": "active" + }, + "type": { + "description": "Program type", + "type": "string", + "example": "kprobe" + } + } + }, "internal_api.ConnectionListResponse": { "type": "object", "properties": { diff --git a/docs/swagger/swagger.json b/docs/swagger/swagger.json index 64b7d3d..44661cc 100644 --- a/docs/swagger/swagger.json +++ b/docs/swagger/swagger.json @@ -33,19 +33,19 @@ "parameters": [ { "type": "integer", - "description": "Process ID (GET only)", + "description": "Process ID", "name": "pid", "in": "query" }, { "type": "string", - "description": "Command name (GET only)", + "description": "Command name", "name": "command", "in": "query" }, { "type": "integer", - "description": "Duration in seconds (GET only, default: 60)", + "description": "Duration in seconds (default: 60)", "name": "duration_seconds", "in": "query" }, @@ -109,19 +109,19 @@ "parameters": [ { "type": "integer", - "description": "Process ID (GET only)", + "description": "Process ID", "name": "pid", "in": "query" }, { "type": "string", - "description": "Command name (GET only)", + "description": "Command name", "name": "command", "in": "query" }, { "type": "integer", - "description": "Duration in seconds (GET only, default: 60)", + "description": "Duration in seconds (default: 60)", "name": "duration_seconds", "in": "query" }, @@ -278,8 +278,7 @@ "200": { "description": "Ingestion result", "schema": { - "type": "object", - "additionalProperties": true + "$ref": "#/definitions/aggregator.IngestResponse" } }, "400": { @@ -401,19 +400,19 @@ "parameters": [ { "type": "integer", - "description": "Process ID (GET only)", + "description": "Process ID", "name": "pid", "in": "query" }, { "type": "string", - "description": "Command name (GET only)", + "description": "Command name", "name": "command", "in": "query" }, { "type": "integer", - "description": "Duration in seconds (GET only, default: 60)", + "description": "Duration in seconds (default: 60)", "name": "duration_seconds", "in": "query" }, @@ -477,19 +476,19 @@ "parameters": [ { "type": "integer", - "description": "Process ID (GET only)", + "description": "Process ID", "name": "pid", "in": "query" }, { "type": "string", - "description": "Command name (GET only)", + "description": "Command name", "name": "command", "in": "query" }, { "type": "integer", - "description": "Duration in seconds (GET only, default: 60)", + "description": "Duration in seconds (default: 60)", "name": "duration_seconds", "in": "query" }, @@ -597,8 +596,7 @@ "200": { "description": "Aggregation statistics", "schema": { - "type": "object", - "additionalProperties": true + "$ref": "#/definitions/aggregator.AggregationStatsResponse" } }, "405": { @@ -644,6 +642,263 @@ } }, "definitions": { + "aggregator.AgentInfo": { + "type": "object", + "properties": { + "event_count": { + "description": "Number of events from this agent", + "type": "integer", + "example": 2500 + }, + "last_seen": { + "description": "Last seen timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "node_name": { + "description": "Node name", + "type": "string", + "example": "worker-1" + }, + "programs": { + "description": "Programs running on this agent", + "type": "array", + "items": { + "$ref": "#/definitions/aggregator.ProgramInfo" + } + }, + "status": { + "description": "Agent status", + "type": "string", + "example": "active" + } + } + }, + "aggregator.AggregatedEventFilters": { + "type": "object", + "properties": { + "limit": { + "description": "Limit filter", + "type": "integer", + "example": 100 + }, + "node": { + "description": "Node name filter", + "type": "string", + "example": "worker-1" + }, + "since": { + "description": "Start time filter", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "type": { + "description": "Event type filter", + "type": "string", + "example": "connection" + }, + "until": { + "description": "End time filter", + "type": "string", + "example": "2023-01-01T13:00:00Z" + } + } + }, + "aggregator.AggregatedEventsResponse": { + "type": "object", + "properties": { + "count": { + "description": "Number of events returned", + "type": "integer", + "example": 50 + }, + "events": { + "description": "List of aggregated events", + "type": "array", + "items": {} + }, + "filters": { + "description": "Applied filters", + "allOf": [ + { + "$ref": "#/definitions/aggregator.AggregatedEventFilters" + } + ] + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "total_count": { + "description": "Total number of matching events", + "type": "integer", + "example": 1250 + } + } + }, + "aggregator.AggregatedListResponse": { + "type": "object", + "properties": { + "events_by_node": { + "description": "Event count by node", + "type": "object", + "additionalProperties": { + "type": "integer" + } + }, + "events_by_pid": { + "description": "Events grouped by PID", + "type": "object", + "additionalProperties": { + "type": "array", + "items": {} + } + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "total_events": { + "description": "Total number of events", + "type": "integer", + "example": 45 + }, + "total_nodes": { + "description": "Number of nodes with events", + "type": "integer", + "example": 3 + }, + "total_pids": { + "description": "Number of unique PIDs across all nodes", + "type": "integer", + "example": 8 + } + } + }, + "aggregator.AggregatedSummaryResponse": { + "type": "object", + "properties": { + "command": { + "description": "Command name (if filtered)", + "type": "string", + "example": "curl" + }, + "count": { + "description": "Total count across all nodes", + "type": "integer", + "example": 15 + }, + "count_by_node": { + "description": "Count by node", + "type": "object", + "additionalProperties": { + "type": "integer" + } + }, + "duration_seconds": { + "description": "Duration in seconds", + "type": "integer", + "example": 60 + }, + "pid": { + "description": "Process ID (if filtered)", + "type": "integer", + "example": 1234 + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "total_nodes": { + "description": "Number of nodes with events", + "type": "integer", + "example": 3 + } + } + }, + "aggregator.AggregationStatsResponse": { + "type": "object", + "properties": { + "aggregation_start": { + "description": "When aggregation started", + "type": "string", + "example": "2023-01-01T10:00:00Z" + }, + "connected_agents": { + "description": "Number of connected agents", + "type": "integer", + "example": 5 + }, + "events_by_node": { + "description": "Events grouped by node", + "type": "object", + "additionalProperties": { + "type": "integer", + "format": "int64" + } + }, + "events_by_type": { + "description": "Events grouped by type", + "type": "object", + "additionalProperties": { + "type": "integer", + "format": "int64" + } + }, + "last_event_time": { + "description": "Timestamp of last event", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "total_events": { + "description": "Total events stored", + "type": "integer", + "example": 12500 + } + } + }, + "aggregator.AggregatorProgramsResponse": { + "type": "object", + "properties": { + "all_programs": { + "description": "All programs across agents", + "type": "array", + "items": { + "$ref": "#/definitions/aggregator.ProgramInfo" + } + }, + "connected_agents": { + "description": "List of connected agents", + "type": "array", + "items": { + "$ref": "#/definitions/aggregator.AgentInfo" + } + }, + "query_time": { + "description": "Query timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + }, + "total_agents": { + "description": "Total number of agents", + "type": "integer", + "example": 3 + }, + "total_programs": { + "description": "Total number of programs", + "type": "integer", + "example": 6 + } + } + }, "aggregator.HealthCheck": { "type": "object", "properties": { @@ -662,6 +917,61 @@ } } }, + "aggregator.IngestResponse": { + "type": "object", + "properties": { + "events_processed": { + "description": "Number of events processed", + "type": "integer", + "example": 25 + }, + "message": { + "description": "Status message", + "type": "string", + "example": "Events ingested successfully" + }, + "success": { + "description": "Ingestion success status", + "type": "boolean", + "example": true + }, + "timestamp": { + "description": "Processing timestamp", + "type": "string", + "example": "2023-01-01T12:00:00Z" + } + } + }, + "aggregator.ProgramInfo": { + "type": "object", + "properties": { + "event_count": { + "description": "Events generated by this program", + "type": "integer", + "example": 1250 + }, + "name": { + "description": "Program name", + "type": "string", + "example": "connection_tracer" + }, + "node": { + "description": "Node where program is running", + "type": "string", + "example": "worker-1" + }, + "status": { + "description": "Program status", + "type": "string", + "example": "active" + }, + "type": { + "description": "Program type", + "type": "string", + "example": "kprobe" + } + } + }, "internal_api.ConnectionListResponse": { "type": "object", "properties": { diff --git a/docs/swagger/swagger.yaml b/docs/swagger/swagger.yaml index 6da84b1..5bd2e03 100644 --- a/docs/swagger/swagger.yaml +++ b/docs/swagger/swagger.yaml @@ -1,5 +1,197 @@ basePath: / definitions: + aggregator.AgentInfo: + properties: + event_count: + description: Number of events from this agent + example: 2500 + type: integer + last_seen: + description: Last seen timestamp + example: "2023-01-01T12:00:00Z" + type: string + node_name: + description: Node name + example: worker-1 + type: string + programs: + description: Programs running on this agent + items: + $ref: '#/definitions/aggregator.ProgramInfo' + type: array + status: + description: Agent status + example: active + type: string + type: object + aggregator.AggregatedEventFilters: + properties: + limit: + description: Limit filter + example: 100 + type: integer + node: + description: Node name filter + example: worker-1 + type: string + since: + description: Start time filter + example: "2023-01-01T12:00:00Z" + type: string + type: + description: Event type filter + example: connection + type: string + until: + description: End time filter + example: "2023-01-01T13:00:00Z" + type: string + type: object + aggregator.AggregatedEventsResponse: + properties: + count: + description: Number of events returned + example: 50 + type: integer + events: + description: List of aggregated events + items: {} + type: array + filters: + allOf: + - $ref: '#/definitions/aggregator.AggregatedEventFilters' + description: Applied filters + query_time: + description: Query timestamp + example: "2023-01-01T12:00:00Z" + type: string + total_count: + description: Total number of matching events + example: 1250 + type: integer + type: object + aggregator.AggregatedListResponse: + properties: + events_by_node: + additionalProperties: + type: integer + description: Event count by node + type: object + events_by_pid: + additionalProperties: + items: {} + type: array + description: Events grouped by PID + type: object + query_time: + description: Query timestamp + example: "2023-01-01T12:00:00Z" + type: string + total_events: + description: Total number of events + example: 45 + type: integer + total_nodes: + description: Number of nodes with events + example: 3 + type: integer + total_pids: + description: Number of unique PIDs across all nodes + example: 8 + type: integer + type: object + aggregator.AggregatedSummaryResponse: + properties: + command: + description: Command name (if filtered) + example: curl + type: string + count: + description: Total count across all nodes + example: 15 + type: integer + count_by_node: + additionalProperties: + type: integer + description: Count by node + type: object + duration_seconds: + description: Duration in seconds + example: 60 + type: integer + pid: + description: Process ID (if filtered) + example: 1234 + type: integer + query_time: + description: Query timestamp + example: "2023-01-01T12:00:00Z" + type: string + total_nodes: + description: Number of nodes with events + example: 3 + type: integer + type: object + aggregator.AggregationStatsResponse: + properties: + aggregation_start: + description: When aggregation started + example: "2023-01-01T10:00:00Z" + type: string + connected_agents: + description: Number of connected agents + example: 5 + type: integer + events_by_node: + additionalProperties: + format: int64 + type: integer + description: Events grouped by node + type: object + events_by_type: + additionalProperties: + format: int64 + type: integer + description: Events grouped by type + type: object + last_event_time: + description: Timestamp of last event + example: "2023-01-01T12:00:00Z" + type: string + query_time: + description: Query timestamp + example: "2023-01-01T12:00:00Z" + type: string + total_events: + description: Total events stored + example: 12500 + type: integer + type: object + aggregator.AggregatorProgramsResponse: + properties: + all_programs: + description: All programs across agents + items: + $ref: '#/definitions/aggregator.ProgramInfo' + type: array + connected_agents: + description: List of connected agents + items: + $ref: '#/definitions/aggregator.AgentInfo' + type: array + query_time: + description: Query timestamp + example: "2023-01-01T12:00:00Z" + type: string + total_agents: + description: Total number of agents + example: 3 + type: integer + total_programs: + description: Total number of programs + example: 6 + type: integer + type: object aggregator.HealthCheck: properties: component: @@ -12,6 +204,48 @@ definitions: uptime: type: string type: object + aggregator.IngestResponse: + properties: + events_processed: + description: Number of events processed + example: 25 + type: integer + message: + description: Status message + example: Events ingested successfully + type: string + success: + description: Ingestion success status + example: true + type: boolean + timestamp: + description: Processing timestamp + example: "2023-01-01T12:00:00Z" + type: string + type: object + aggregator.ProgramInfo: + properties: + event_count: + description: Events generated by this program + example: 1250 + type: integer + name: + description: Program name + example: connection_tracer + type: string + node: + description: Node where program is running + example: worker-1 + type: string + status: + description: Program status + example: active + type: string + type: + description: Program type + example: kprobe + type: string + type: object internal_api.ConnectionListResponse: properties: events_by_pid: @@ -254,15 +488,15 @@ paths: description: Get count of connection events filtered by PID, command, and time window parameters: - - description: Process ID (GET only) + - description: Process ID in: query name: pid type: integer - - description: Command name (GET only) + - description: Command name in: query name: command type: string - - description: 'Duration in seconds (GET only, default: 60)' + - description: 'Duration in seconds (default: 60)' in: query name: duration_seconds type: integer @@ -305,15 +539,15 @@ paths: description: Get count of connection events filtered by PID, command, and time window parameters: - - description: Process ID (GET only) + - description: Process ID in: query name: pid type: integer - - description: Command name (GET only) + - description: Command name in: query name: command type: string - - description: 'Duration in seconds (GET only, default: 60)' + - description: 'Duration in seconds (default: 60)' in: query name: duration_seconds type: integer @@ -420,8 +654,7 @@ paths: "200": description: Ingestion result schema: - additionalProperties: true - type: object + $ref: '#/definitions/aggregator.IngestResponse' "400": description: Bad request schema: @@ -498,15 +731,15 @@ paths: description: Get count of packet drop events filtered by PID, command, and time window parameters: - - description: Process ID (GET only) + - description: Process ID in: query name: pid type: integer - - description: Command name (GET only) + - description: Command name in: query name: command type: string - - description: 'Duration in seconds (GET only, default: 60)' + - description: 'Duration in seconds (default: 60)' in: query name: duration_seconds type: integer @@ -549,15 +782,15 @@ paths: description: Get count of packet drop events filtered by PID, command, and time window parameters: - - description: Process ID (GET only) + - description: Process ID in: query name: pid type: integer - - description: Command name (GET only) + - description: Command name in: query name: command type: string - - description: 'Duration in seconds (GET only, default: 60)' + - description: 'Duration in seconds (default: 60)' in: query name: duration_seconds type: integer @@ -633,8 +866,7 @@ paths: "200": description: Aggregation statistics schema: - additionalProperties: true - type: object + $ref: '#/definitions/aggregator.AggregationStatsResponse' "405": description: Method not allowed schema: diff --git a/internal/aggregator/aggregator.go b/internal/aggregator/aggregator.go index d41477f..ab48200 100644 --- a/internal/aggregator/aggregator.go +++ b/internal/aggregator/aggregator.go @@ -1,12 +1,12 @@ // Package aggregator provides event aggregation functionality for eBPF monitoring. // // @title eBPF Event Aggregator API -// @description HTTP API for aggregating and querying eBPF events from multiple // @Success 200 {object} AggregatedE// @Success 200 {object} IngestResponse "Ingest// @Success 200 {object} Aggrega// @Success 200 {object} AggregatorProgramsResponse "Program information"ionStatsResponse "Aggregation statistics"on result"entsResponse "Events and count"gents +// @description HTTP API for aggregating and querying eBPF events from multiple agents // @version 1.0.0 // @host localhost:8081 // @BasePath / // @contact.name API Support -// @contact.url// @Success 200 {object} AggregatedListResponse "// @Success 200 {object} AggregatedListResponse "Pack// @Success 200 {object} AggregatedSummaryResponse "Connection statistics"t// @Success 200 {object} AggregatedSummaryResponse "Packet drop statistics"drop events"onnection events"https://github.com/srodi/ebpf-server/issues +// @contact.url https://github.com/srodi/ebpf-server/issues // @contact.email support@example.com // @license.name MIT // @license.url https://github.com/srodi/ebpf-server/blob/main/LICENSE @@ -203,7 +203,7 @@ func (a *Aggregator) IsRunning() bool { // @Param since query string false "Start time (RFC3339 format)" // @Param until query string false "End time (RFC3339 format)" // @Param limit query int false "Maximum number of events to return" -// @Success 200 {object} map[string]interface{} "Events and count" +// @Success 200 {object} AggregatedEventsResponse "Events and count" // @Failure 405 {string} string "Method not allowed" // @Failure 500 {string} string "Internal server error" // @Router /api/events [get] @@ -242,7 +242,7 @@ func (a *Aggregator) HandleEvents(w http.ResponseWriter, r *http.Request) { // @Accept json // @Produce json // @Param events body object true "Events to ingest" -// @Success 200 {object} map[string]interface{} "Ingestion result" +// @Success 200 {object} IngestResponse "Ingestion result" // @Failure 400 {string} string "Bad request" // @Failure 405 {string} string "Method not allowed" // @Failure 500 {string} string "Internal server error" @@ -294,7 +294,7 @@ func (a *Aggregator) HandleIngest(w http.ResponseWriter, r *http.Request) { // @Tags stats // @Accept json // @Produce json -// @Success 200 {object} map[string]interface{} "Aggregation statistics" +// @Success 200 {object} AggregationStatsResponse "Aggregation statistics" // @Failure 405 {string} string "Method not allowed" // @Router /api/stats [get] func (a *Aggregator) HandleStats(w http.ResponseWriter, r *http.Request) { @@ -327,7 +327,7 @@ func (a *Aggregator) HandleStats(w http.ResponseWriter, r *http.Request) { // @Tags programs // @Accept json // @Produce json -// @Success 200 {object} map[string]interface{} "Program information" +// @Success 200 {object} AggregatorProgramsResponse "Program information" // @Failure 405 {string} string "Method not allowed" // @Router /api/programs [get] func (a *Aggregator) HandlePrograms(w http.ResponseWriter, r *http.Request) { @@ -594,7 +594,7 @@ func (a *Aggregator) GetPrograms() []core.ProgramStatus { // @Tags connections // @Accept json // @Produce json -// @Success 200 {object} map[string]interface{} "Connection events" +// @Success 200 {object} AggregatedListResponse "Connection events" // @Failure 500 {object} map[string]string "Internal server error" // @Failure 503 {object} map[string]string "Service unavailable" // @Router /api/list-connections [get] @@ -645,7 +645,7 @@ func (a *Aggregator) HandleListConnections(w http.ResponseWriter, r *http.Reques // @Tags packet_drops // @Accept json // @Produce json -// @Success 200 {object} map[string]interface{} "Packet drop events" +// @Success 200 {object} AggregatedListResponse "Packet drop events" // @Failure 500 {object} map[string]string "Internal server error" // @Failure 503 {object} map[string]string "Service unavailable" // @Router /api/list-packet-drops [get] @@ -696,11 +696,11 @@ func (a *Aggregator) HandleListPacketDrops(w http.ResponseWriter, r *http.Reques // @Tags connections // @Accept json // @Produce json -// @Param pid query int false "Process ID (GET only)" -// @Param command query string false "Command name (GET only)" -// @Param duration_seconds query int false "Duration in seconds (GET only, default: 60)" +// @Param pid query int false "Process ID" +// @Param command query string false "Command name" +// @Param duration_seconds query int false "Duration in seconds (default: 60)" // @Param request body map[string]interface{} false "Connection summary request (POST only)" -// @Success 200 {object} map[string]interface{} "Connection statistics" +// @Success 200 {object} AggregatedSummaryResponse "Connection statistics" // @Failure 400 {object} map[string]string "Bad request" // @Failure 500 {object} map[string]string "Internal server error" // @Router /api/connection-summary [get] @@ -775,11 +775,11 @@ func (a *Aggregator) HandleConnectionSummary(w http.ResponseWriter, r *http.Requ // @Tags packet_drops // @Accept json // @Produce json -// @Param pid query int false "Process ID (GET only)" -// @Param command query string false "Command name (GET only)" -// @Param duration_seconds query int false "Duration in seconds (GET only, default: 60)" +// @Param pid query int false "Process ID" +// @Param command query string false "Command name" +// @Param duration_seconds query int false "Duration in seconds (default: 60)" // @Param request body map[string]interface{} false "Packet drop summary request (POST only)" -// @Success 200 {object} map[string]interface{} "Packet drop statistics" +// @Success 200 {object} AggregatedSummaryResponse "Packet drop statistics" // @Failure 400 {object} map[string]string "Bad request" // @Failure 500 {object} map[string]string "Internal server error" // @Router /api/packet-drop-summary [get] diff --git a/internal/api/handlers.go b/internal/api/handlers.go index 487c908..0526989 100644 --- a/internal/api/handlers.go +++ b/internal/api/handlers.go @@ -14,6 +14,7 @@ package api import ( "context" + "crypto/sha256" "encoding/json" "net/http" "strconv" @@ -49,7 +50,7 @@ func HandleHealth(w http.ResponseWriter, r *http.Request) { } w.Header().Set("Content-Type", "application/json") - + health := HealthResponse{ Status: "healthy", Component: "eBPF Monitor API", @@ -63,17 +64,16 @@ func HandleHealth(w http.ResponseWriter, r *http.Request) { http.Error(w, "Internal server error", http.StatusInternalServerError) return } -}// HandlePrograms returns the status of all eBPF programs. -// -// @Summary List eBPF programs -// @Description Get the status and information of all loaded eBPF programs -// @Tags programs -// @Accept json -// @Produce json -// @Success 200 {object} ProgramsResponse "List of eBPF programs" -// @Failure 500 {object} map[string]string "Internal server error" -// @Failure 503 {object} map[string]string "Service unavailable" -// @Router /api/programs [get] +} // HandlePrograms returns the status of all eBPF programs. +// @Summary List eBPF programs +// @Description Get the status and information of all loaded eBPF programs +// @Tags programs +// @Accept json +// @Produce json +// @Success 200 {object} ProgramsResponse "List of eBPF programs" +// @Failure 500 {object} map[string]string "Internal server error" +// @Failure 503 {object} map[string]string "Service unavailable" +// @Router /api/programs [get] func HandlePrograms(w http.ResponseWriter, r *http.Request) { if globalSystem == nil { http.Error(w, "System not initialized", http.StatusServiceUnavailable) @@ -94,11 +94,13 @@ func HandlePrograms(w http.ResponseWriter, r *http.Request) { status = "inactive" } + // Use a hash of the program name as a unique ID + hash := sha256.Sum256([]byte(prog.Name)) programInfo := ProgramInfo{ Name: prog.Name, Type: "eBPF", // Generic type, could be enhanced Status: status, - ID: int(prog.EventCount), // Use event count as an ID placeholder + ID: int(hash[0]), // Use first byte of hash as ID } programList = append(programList, programInfo) } @@ -231,9 +233,9 @@ func HandleEvents(w http.ResponseWriter, r *http.Request) { // @Tags connections // @Accept json // @Produce json -// @Param pid query int false "Process ID (GET only)" -// @Param command query string false "Command name (GET only)" -// @Param duration_seconds query int false "Duration in seconds (GET only, default: 60)" +// @Param pid query int false "Process ID" +// @Param command query string false "Command name" +// @Param duration_seconds query int false "Duration in seconds (default: 60)" // @Param request body ConnectionSummaryRequest false "Connection summary request (POST only)" // @Success 200 {object} ConnectionSummaryResponse "Connection statistics" // @Failure 400 {object} map[string]string "Bad request" @@ -317,9 +319,9 @@ func HandleConnectionSummary(w http.ResponseWriter, r *http.Request) { // @Tags packet_drops // @Accept json // @Produce json -// @Param pid query int false "Process ID (GET only)" -// @Param command query string false "Command name (GET only)" -// @Param duration_seconds query int false "Duration in seconds (GET only, default: 60)" +// @Param pid query int false "Process ID" +// @Param command query string false "Command name" +// @Param duration_seconds query int false "Duration in seconds (default: 60)" // @Param request body PacketDropSummaryRequest false "Packet drop summary request (POST only)" // @Success 200 {object} PacketDropSummaryResponse "Packet drop statistics" // @Failure 400 {object} map[string]string "Bad request" From 293612d34e8923ed1f8ac8463a68e2bf110c96ff Mon Sep 17 00:00:00 2001 From: Simone Rodigari Date: Wed, 13 Aug 2025 23:04:15 +0100 Subject: [PATCH 10/11] update licence --- LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index 2643d79..5989c2e 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2025 eBPF Network Monitor +Copyright (c) 2025 Simone Rodigari Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal From 36fc30cb515bd2792c916a31c9f87eff97434cea Mon Sep 17 00:00:00 2001 From: Simone Rodigari Date: Thu, 14 Aug 2025 21:23:26 +0100 Subject: [PATCH 11/11] fix comments --- internal/aggregator/aggregator.go | 314 ++++++++++++++++++++------ internal/api/handlers.go | 8 +- kubernetes/aggregator-deployment.yaml | 2 +- 3 files changed, 254 insertions(+), 70 deletions(-) diff --git a/internal/aggregator/aggregator.go b/internal/aggregator/aggregator.go index ab48200..29ae8f0 100644 --- a/internal/aggregator/aggregator.go +++ b/internal/aggregator/aggregator.go @@ -118,13 +118,21 @@ type Config struct { HTTPAddr string } +// ProgramCache caches program information to avoid expensive queries +type ProgramCache struct { + data *AggregatorProgramsResponse + lastCheck time.Time + mu sync.RWMutex +} + // Aggregator collects and aggregates events from multiple eBPF agents. type Aggregator struct { - config *Config - storage core.EventSink - stats *Stats - mu sync.RWMutex - running bool + config *Config + storage core.EventSink + stats *Stats + programCache *ProgramCache + mu sync.RWMutex + running bool } // Stats represents aggregation statistics. @@ -154,6 +162,7 @@ func New(config *Config) (*Aggregator, error) { EventsByNode: make(map[string]int64), StartTime: time.Now(), }, + programCache: &ProgramCache{}, }, nil } @@ -168,6 +177,10 @@ func (a *Aggregator) Start(ctx context.Context) error { logger.Info("Starting event aggregator") a.running = true + + // Start cleanup routine for memory storage + go a.cleanupRoutine(ctx) + return nil } @@ -276,6 +289,12 @@ func (a *Aggregator) HandleIngest(w http.ResponseWriter, r *http.Request) { // Update stats a.updateStats(int64(processed), requestData.Events) + // Invalidate program cache if we processed events successfully + // This ensures the cache reflects newly ingested data + if processed > 0 { + a.invalidateProgramCache() + } + // Return success response w.Header().Set("Content-Type", "application/json") if err := json.NewEncoder(w).Encode(map[string]interface{}{ @@ -313,6 +332,20 @@ func (a *Aggregator) HandleStats(w http.ResponseWriter, r *http.Request) { } a.stats.mu.RUnlock() + // Add current storage info for debugging + if memStorage, ok := a.storage.(*storage.MemoryStorage); ok { + // Get a rough count of current events (last hour) + query := core.Query{ + Since: time.Now().Add(-1 * time.Hour), + } + currentEvents, _ := memStorage.Count(context.Background(), query) + statsData["current_events_last_hour"] = currentEvents + + // Get total events in storage + allEvents, _ := memStorage.Count(context.Background(), core.Query{}) + statsData["total_events_in_storage"] = allEvents + } + w.Header().Set("Content-Type", "application/json") if err := json.NewEncoder(w).Encode(statsData); err != nil { logger.Errorf("Failed to encode stats: %v", err) @@ -321,6 +354,7 @@ func (a *Aggregator) HandleStats(w http.ResponseWriter, r *http.Request) { // HandlePrograms handles HTTP requests for program information. // Since the aggregator doesn't run eBPF programs directly, it returns program status from connected agents. +// This endpoint uses caching to avoid expensive queries on each request. // // @Summary Get program information // @Description Get information about eBPF programs running on connected agents @@ -336,17 +370,53 @@ func (a *Aggregator) HandlePrograms(w http.ResponseWriter, r *http.Request) { return } + const cacheDuration = 2 * time.Minute // Cache for 2 minutes + + // Check if we have cached data that's still fresh + a.programCache.mu.RLock() + if a.programCache.data != nil && time.Since(a.programCache.lastCheck) < cacheDuration { + response := a.programCache.data + a.programCache.mu.RUnlock() + + logger.Debugf("Serving cached program information (age: %v)", time.Since(a.programCache.lastCheck)) + w.Header().Set("Content-Type", "application/json") + w.Header().Set("Cache-Control", fmt.Sprintf("max-age=%d", int(cacheDuration.Seconds()))) + if err := json.NewEncoder(w).Encode(response); err != nil { + logger.Errorf("Failed to encode cached programs response: %v", err) + } + return + } + a.programCache.mu.RUnlock() + + // Cache is stale or empty, refresh it + response, err := a.refreshProgramCache() + if err != nil { + logger.Errorf("Failed to refresh program cache: %v", err) + http.Error(w, "Internal server error", http.StatusInternalServerError) + return + } + + logger.Debugf("Serving fresh program information (%d agents, %d programs)", + response.TotalAgents, response.TotalPrograms) + + w.Header().Set("Content-Type", "application/json") + w.Header().Set("Cache-Control", fmt.Sprintf("max-age=%d", int(cacheDuration.Seconds()))) + if err := json.NewEncoder(w).Encode(response); err != nil { + logger.Errorf("Failed to encode programs response: %v", err) + } +} + +// refreshProgramCache refreshes the program information cache by querying recent events +func (a *Aggregator) refreshProgramCache() (*AggregatorProgramsResponse, error) { // Query recent events to infer connected agents and their programs query := core.Query{ Limit: 1000, // Get a good sample of recent events Since: time.Now().Add(-10 * time.Minute), // Last 10 minutes } - events, err := a.storage.Query(r.Context(), query) + events, err := a.storage.Query(context.Background(), query) if err != nil { - logger.Errorf("Failed to query events for program info: %v", err) - http.Error(w, "Internal server error", http.StatusInternalServerError) - return + return nil, fmt.Errorf("failed to query events for program info: %v", err) } // Aggregate information about connected agents and their programs @@ -391,55 +461,85 @@ func (a *Aggregator) HandlePrograms(w http.ResponseWriter, r *http.Request) { } // Convert agents map to slice and format programs - var connectedAgents []map[string]interface{} - var allPrograms []map[string]interface{} + var connectedAgents []AgentInfo + var allPrograms []ProgramInfo for nodeName, agentInfo := range agents { eventTypesMap := agentInfo["event_types"].(map[string]bool) - var programs []string + var programs []ProgramInfo + eventCount := int64(agentInfo["event_count"].(int)) + for eventType := range eventTypesMap { - programs = append(programs, eventType) + program := ProgramInfo{ + Name: eventType + "_tracer", + Type: eventType, + Status: "active", // Inferred from recent events + Node: nodeName, + EventCount: eventCount, + } + programs = append(programs, program) + allPrograms = append(allPrograms, program) } - agentData := map[string]interface{}{ - "node_name": nodeName, - "pod_name": agentInfo["pod_name"], - "namespace": agentInfo["namespace"], - "programs": programs, - "last_seen": agentInfo["last_seen"].(time.Time).Format(time.RFC3339), - "event_count": agentInfo["event_count"], + agentData := AgentInfo{ + NodeName: nodeName, + LastSeen: agentInfo["last_seen"].(time.Time).Format(time.RFC3339), + EventCount: eventCount, + Programs: programs, + Status: "active", } connectedAgents = append(connectedAgents, agentData) - - // Add programs to the global list - for _, program := range programs { - allPrograms = append(allPrograms, map[string]interface{}{ - "program_type": program, - "node_name": nodeName, - "status": "active", // Inferred from recent events - }) - } } - // Get unique program types - var uniquePrograms []string - for eventType := range eventTypes { - uniquePrograms = append(uniquePrograms, eventType) + response := &AggregatorProgramsResponse{ + ConnectedAgents: connectedAgents, + AllPrograms: allPrograms, + TotalAgents: len(connectedAgents), + TotalPrograms: len(allPrograms), + QueryTime: time.Now().Format(time.RFC3339), } - response := map[string]interface{}{ - "connected_agents": len(connectedAgents), - "unique_programs": uniquePrograms, - "agents": connectedAgents, - "all_programs": allPrograms, - "total_events_analyzed": len(events), - "query_time": time.Now().Format(time.RFC3339), - "description": "Program information inferred from events received from connected agents", - } + // Update cache + a.programCache.mu.Lock() + a.programCache.data = response + a.programCache.lastCheck = time.Now() + a.programCache.mu.Unlock() - w.Header().Set("Content-Type", "application/json") - if err := json.NewEncoder(w).Encode(response); err != nil { - logger.Errorf("Failed to encode programs response: %v", err) + return response, nil +} + +// invalidateProgramCache invalidates the program information cache +func (a *Aggregator) invalidateProgramCache() { + a.programCache.mu.Lock() + a.programCache.data = nil + a.programCache.lastCheck = time.Time{} + a.programCache.mu.Unlock() +} + +// cleanupRoutine runs periodic cleanup of old events to prevent memory bloat +func (a *Aggregator) cleanupRoutine(ctx context.Context) { + ticker := time.NewTicker(5 * time.Minute) // Cleanup every 5 minutes + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + logger.Debug("Cleanup routine stopping due to context cancellation") + return + case <-ticker.C: + if !a.IsRunning() { + continue + } + + // Clean up events older than 2 hours + maxAge := 2 * time.Hour + + if memStorage, ok := a.storage.(*storage.MemoryStorage); ok { + logger.Debugf("Running cleanup: removing events older than %v", maxAge) + memStorage.Cleanup(maxAge) + logger.Debugf("Cleanup completed") + } + } } } @@ -619,16 +719,29 @@ func (a *Aggregator) HandleListConnections(w http.ResponseWriter, r *http.Reques // Group by PID for compatibility eventsByPID := make(map[uint32][]core.Event) + eventsByNode := make(map[string]int) + nodeSet := make(map[string]bool) + for _, event := range events { pid := event.PID() eventsByPID[pid] = append(eventsByPID[pid], event) + + // Extract node information from event metadata + if metadata := event.Metadata(); metadata != nil { + if nodeName, ok := metadata["k8s_node_name"].(string); ok && nodeName != "" { + eventsByNode[nodeName]++ + nodeSet[nodeName] = true + } + } } - response := map[string]interface{}{ - "total_pids": len(eventsByPID), - "total_events": len(events), - "events_by_pid": eventsByPID, - "query_time": time.Now().Format(time.RFC3339), + response := AggregatedListResponse{ + TotalPIDs: len(eventsByPID), + TotalEvents: len(events), + TotalNodes: len(nodeSet), + EventsByPID: eventsByPID, + EventsByNode: eventsByNode, + QueryTime: time.Now().Format(time.RFC3339), } w.Header().Set("Content-Type", "application/json") @@ -670,16 +783,29 @@ func (a *Aggregator) HandleListPacketDrops(w http.ResponseWriter, r *http.Reques // Group by PID for compatibility eventsByPID := make(map[uint32][]core.Event) + eventsByNode := make(map[string]int) + nodeSet := make(map[string]bool) + for _, event := range events { pid := event.PID() eventsByPID[pid] = append(eventsByPID[pid], event) + + // Extract node information from event metadata + if metadata := event.Metadata(); metadata != nil { + if nodeName, ok := metadata["k8s_node_name"].(string); ok && nodeName != "" { + eventsByNode[nodeName]++ + nodeSet[nodeName] = true + } + } } - response := map[string]interface{}{ - "total_pids": len(eventsByPID), - "total_events": len(events), - "events_by_pid": eventsByPID, - "query_time": time.Now().Format(time.RFC3339), + response := AggregatedListResponse{ + TotalPIDs: len(eventsByPID), + TotalEvents: len(events), + TotalNodes: len(nodeSet), + EventsByPID: eventsByPID, + EventsByNode: eventsByNode, + QueryTime: time.Now().Format(time.RFC3339), } w.Header().Set("Content-Type", "application/json") @@ -753,12 +879,39 @@ func (a *Aggregator) HandleConnectionSummary(w http.ResponseWriter, r *http.Requ return } - response := map[string]interface{}{ - "count": count, - "pid": request.PID, - "command": request.Command, - "duration_seconds": request.Duration, - "query_time": time.Now().Format(time.RFC3339), + logger.Debugf("🔍 CONNECTION SUMMARY: query=%+v count=%d", query, count) + + // Get events to analyze by node for detailed response + eventsQuery := query + eventsQuery.Limit = 1000 // Reasonable limit for analysis + events, err := a.storage.Query(r.Context(), eventsQuery) + if err != nil { + logger.Errorf("Error querying connection events for node analysis: %v", err) + // Fall back to basic response without node breakdown + events = nil + } + + // Analyze events by node + countByNode := make(map[string]int) + nodeSet := make(map[string]bool) + + for _, event := range events { + if metadata := event.Metadata(); metadata != nil { + if nodeName, ok := metadata["k8s_node_name"].(string); ok && nodeName != "" { + countByNode[nodeName]++ + nodeSet[nodeName] = true + } + } + } + + response := AggregatedSummaryResponse{ + Count: count, + CountByNode: countByNode, + PID: request.PID, + Command: request.Command, + DurationSeconds: request.Duration, + TotalNodes: len(nodeSet), + QueryTime: time.Now().Format(time.RFC3339), } w.Header().Set("Content-Type", "application/json") @@ -832,12 +985,39 @@ func (a *Aggregator) HandlePacketDropSummary(w http.ResponseWriter, r *http.Requ return } - response := map[string]interface{}{ - "count": count, - "pid": request.PID, - "command": request.Command, - "duration_seconds": request.Duration, - "query_time": time.Now().Format(time.RFC3339), + logger.Debugf("🔍 PACKET DROP SUMMARY: query=%+v count=%d", query, count) + + // Get events to analyze by node for detailed response + eventsQuery := query + eventsQuery.Limit = 1000 // Reasonable limit for analysis + events, err := a.storage.Query(r.Context(), eventsQuery) + if err != nil { + logger.Errorf("Error querying packet drop events for node analysis: %v", err) + // Fall back to basic response without node breakdown + events = nil + } + + // Analyze events by node + countByNode := make(map[string]int) + nodeSet := make(map[string]bool) + + for _, event := range events { + if metadata := event.Metadata(); metadata != nil { + if nodeName, ok := metadata["k8s_node_name"].(string); ok && nodeName != "" { + countByNode[nodeName]++ + nodeSet[nodeName] = true + } + } + } + + response := AggregatedSummaryResponse{ + Count: count, + CountByNode: countByNode, + PID: request.PID, + Command: request.Command, + DurationSeconds: request.Duration, + TotalNodes: len(nodeSet), + QueryTime: time.Now().Format(time.RFC3339), } w.Header().Set("Content-Type", "application/json") diff --git a/internal/api/handlers.go b/internal/api/handlers.go index 0526989..3748de1 100644 --- a/internal/api/handlers.go +++ b/internal/api/handlers.go @@ -64,7 +64,9 @@ func HandleHealth(w http.ResponseWriter, r *http.Request) { http.Error(w, "Internal server error", http.StatusInternalServerError) return } -} // HandlePrograms returns the status of all eBPF programs. +} + +// HandlePrograms returns the status of all eBPF programs. // @Summary List eBPF programs // @Description Get the status and information of all loaded eBPF programs // @Tags programs @@ -96,11 +98,13 @@ func HandlePrograms(w http.ResponseWriter, r *http.Request) { // Use a hash of the program name as a unique ID hash := sha256.Sum256([]byte(prog.Name)) + // Use first 4 bytes of hash as ID for better uniqueness (2^32 possible values) + id := int(hash[0])<<24 | int(hash[1])<<16 | int(hash[2])<<8 | int(hash[3]) programInfo := ProgramInfo{ Name: prog.Name, Type: "eBPF", // Generic type, could be enhanced Status: status, - ID: int(hash[0]), // Use first byte of hash as ID + ID: id, } programList = append(programList, programInfo) } diff --git a/kubernetes/aggregator-deployment.yaml b/kubernetes/aggregator-deployment.yaml index 99add8e..9278f42 100644 --- a/kubernetes/aggregator-deployment.yaml +++ b/kubernetes/aggregator-deployment.yaml @@ -30,7 +30,7 @@ spec: - name: DEPLOYMENT_MODE value: "kubernetes" - name: LOG_LEVEL - value: "info" + value: "debug" resources: requests: memory: "256Mi"