diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index bf5f317..3f1be7d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -274,6 +274,7 @@ make check - `internal/config` - Configuration parsing + validation (8 table-driven tests) - `internal/bpf` - Event binary round-trip, helper methods, type consistency - `internal/collector` - Registry lifecycle, signal aggregation +- `internal/preflight` - Host prerequisite validation (kernel, BTF, caps, port) --- @@ -305,6 +306,7 @@ kerno/ │ ├── cli/ # Cobra CLI commands │ │ ├── root.go # Root command, flags, logger init │ │ ├── doctor.go # `kerno doctor` command +│ │ ├── preflight.go # `kerno preflight` command │ │ ├── version.go # `kerno version` command │ │ └── start.go # `kerno start` daemon command │ ├── collector/ # Signal collection + aggregation @@ -314,6 +316,9 @@ kerno/ │ │ └── config.go # Config struct, defaults, validation │ └── version/ # Build metadata │ └── version.go # Version, commit, date via ldflags +│ └── preflight/ # Host prerequisite validation +│ ├── checks.go # 10 check functions + RunAll +│ └── checks_test.go # Fixture-based unit tests ├── .github/ │ ├── workflows/ │ │ ├── ci.yml # Lint, test, build, BPF, Docker jobs diff --git a/README.md b/README.md index f71c00b..a046b96 100644 --- a/README.md +++ b/README.md @@ -146,6 +146,7 @@ The same binary, the same command. No Kubernetes required. ```bash curl -sfL https://raw.githubusercontent.com/optiqor/kerno/main/scripts/install.sh | sudo bash +sudo kerno preflight # validate host prerequisites sudo kerno doctor ``` diff --git a/deploy/helm/kerno/templates/daemonset.yaml b/deploy/helm/kerno/templates/daemonset.yaml index 84e0762..18c84c8 100644 --- a/deploy/helm/kerno/templates/daemonset.yaml +++ b/deploy/helm/kerno/templates/daemonset.yaml @@ -43,6 +43,28 @@ spec: affinity: {{- toYaml . | nindent 8 }} {{- end }} + {{- if .Values.preflight.initContainer }} + initContainers: + - name: preflight + image: {{ include "kerno.image" . }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + args: ["preflight"] + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + volumeMounts: + - name: sys-kernel-btf + mountPath: /sys/kernel/btf + readOnly: true + - name: sys-kernel-debug + mountPath: /sys/kernel/debug + readOnly: true + - name: proc + mountPath: /proc + readOnly: true + - name: sys-fs-cgroup + mountPath: /sys/fs/cgroup + readOnly: true + {{- end }} containers: - name: kerno image: {{ include "kerno.image" . }} diff --git a/deploy/helm/kerno/templates/preflight-job.yaml b/deploy/helm/kerno/templates/preflight-job.yaml new file mode 100644 index 0000000..c0787ca --- /dev/null +++ b/deploy/helm/kerno/templates/preflight-job.yaml @@ -0,0 +1,75 @@ +{{- if .Values.preflight.enabled }} +apiVersion: batch/v1 +kind: Job +metadata: + name: kerno-preflight + namespace: {{ .Release.Namespace }} + labels: + {{- include "kerno.labels" . | nindent 4 }} + app.kubernetes.io/component: preflight + annotations: + "helm.sh/hook": pre-install,pre-upgrade + "helm.sh/hook-weight": "-5" + "helm.sh/hook-delete-policy": hook-succeeded,before-hook-creation +spec: + backoffLimit: 0 + template: + spec: + restartPolicy: Never + serviceAccountName: kerno + hostPID: true + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: preflight + image: {{ include "kerno.image" . }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + args: ["preflight", "--output", "json"] + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + resources: + requests: + cpu: 50m + memory: 32Mi + limits: + cpu: 100m + memory: 64Mi + volumeMounts: + - name: sys-kernel-btf + mountPath: /sys/kernel/btf + readOnly: true + - name: sys-kernel-debug + mountPath: /sys/kernel/debug + readOnly: true + - name: proc + mountPath: /proc + readOnly: true + - name: sys-fs-cgroup + mountPath: /sys/fs/cgroup + readOnly: true + volumes: + - name: sys-kernel-btf + hostPath: + path: /sys/kernel/btf + type: Directory + - name: sys-kernel-debug + hostPath: + path: /sys/kernel/debug + type: Directory + - name: proc + hostPath: + path: /proc + type: Directory + - name: sys-fs-cgroup + hostPath: + path: /sys/fs/cgroup + type: Directory +{{- end }} diff --git a/deploy/helm/kerno/values.yaml b/deploy/helm/kerno/values.yaml index 371ba23..5650b70 100644 --- a/deploy/helm/kerno/values.yaml +++ b/deploy/helm/kerno/values.yaml @@ -120,3 +120,15 @@ securityContext: # Host path prefix — Kerno mounts host paths under /host/ to avoid conflicts. # All pollers read from /host/proc, /host/sys/fs/cgroup, etc. hostPathPrefix: /host + +# Preflight checks — validate host prerequisites before starting kerno. +# +# NOTE: The pre-install hook Job runs on a single scheduled node, so it +# validates only that one node — not every node the DaemonSet lands on. On a +# mixed-kernel cluster the hook can pass while some nodes still fail at start. +# Enable initContainer for a per-node gate that checks every node. +preflight: + # Enable the Helm pre-install hook Job (validates one node). + enabled: false + # Add preflight as an init container in the DaemonSet (validates every node). + initContainer: false diff --git a/deploy/systemd/kerno.service b/deploy/systemd/kerno.service index 237ebe4..d3166e5 100644 --- a/deploy/systemd/kerno.service +++ b/deploy/systemd/kerno.service @@ -21,6 +21,7 @@ Wants=network-online.target [Service] Type=simple +ExecStartPre=/usr/local/bin/kerno preflight ExecStart=/usr/local/bin/kerno start Restart=on-failure RestartSec=5s diff --git a/internal/cli/preflight.go b/internal/cli/preflight.go new file mode 100644 index 0000000..7aff0fa --- /dev/null +++ b/internal/cli/preflight.go @@ -0,0 +1,195 @@ +// Copyright 2026 Optiqor contributors +// SPDX-License-Identifier: Apache-2.0 + +package cli + +import ( + "encoding/json" + "fmt" + "os" + + "github.com/spf13/cobra" + + "github.com/optiqor/kerno/internal/preflight" +) + +func newPreflightCmd() *cobra.Command { + var ( + output string + outputDir string + promAddr string + ) + + cmd := &cobra.Command{ + Use: "preflight", + Short: "Validate host prerequisites for running kerno", + Long: `Preflight runs every host prerequisite check and reports each as PASS / FAIL / WARN. + +Use this before "kerno doctor" to verify the host can run kerno. Each check +includes a remediation hint when it fails. + +Exit codes: + 0 All checks passed (warnings are printed to stderr but don't block) + 1 One or more checks failed`, + Example: ` # Check if this host can run kerno + sudo kerno preflight + + # Machine-readable for Helm hooks and CI + kerno preflight --output json + + # Custom output directory and Prometheus port + sudo kerno preflight --output-dir /data/kerno --prom-addr :9091`, + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, _ []string) error { + // Inherit --output from root if not set via preflight flag. + if output == "" { + output, _ = cmd.Root().PersistentFlags().GetString("output") + } + + // Build CheckOptions from flags + config. + opts := preflight.CheckOptions{} + if outputDir != "" { + opts.OutputDir = outputDir + } + if promAddr != "" { + opts.PromAddr = promAddr + } else if cfg != nil && cfg.Prometheus.Addr != "" { + opts.PromAddr = cfg.Prometheus.Addr + } + + // Run all preflight checks. + results := preflight.RunAll(opts) + + // Render output. + switch output { + case "json": + return renderPreflightJSON(cmd, results) + default: + return renderPreflightPretty(cmd, results) + } + }, + } + + flags := cmd.Flags() + flags.StringVarP(&output, "output", "o", "", "output format: pretty, json") + flags.StringVar(&outputDir, "output-dir", "", "output directory to check (default: /var/log/kerno)") + flags.StringVar(&promAddr, "prom-addr", "", "Prometheus address to check (default from config)") + + return cmd +} + +// preflightSummary counts the results by status. +type preflightSummary struct { + Pass int `json:"pass"` + Warn int `json:"warn"` + Fail int `json:"fail"` +} + +// preflightReport is the JSON output structure. +type preflightReport struct { + Checks []preflight.Result `json:"checks"` + Summary preflightSummary `json:"summary"` + Ready bool `json:"ready"` +} + +func summarize(results []preflight.Result) preflightSummary { + var s preflightSummary + for i := range results { + switch results[i].Status { + case preflight.StatusPass: + s.Pass++ + case preflight.StatusWarn: + s.Warn++ + case preflight.StatusFail: + s.Fail++ + } + } + return s +} + +func renderPreflightJSON(cmd *cobra.Command, results []preflight.Result) error { + s := summarize(results) + report := preflightReport{ + Checks: results, + Summary: s, + Ready: s.Fail == 0, + } + + enc := json.NewEncoder(cmd.OutOrStdout()) + enc.SetIndent("", " ") + if err := enc.Encode(report); err != nil { + return fmt.Errorf("encoding JSON: %w", err) + } + + if s.Fail > 0 { + return &exitError{code: 1} + } + return nil +} + +func renderPreflightPretty(cmd *cobra.Command, results []preflight.Result) error { + w := cmd.OutOrStdout() + noColor := os.Getenv("NO_COLOR") != "" || !isTerminal() + + fmt.Fprintln(w, "==> Kerno preflight check") + fmt.Fprintln(w) + + for i := range results { + r := &results[i] + tag := formatStatusTag(r.Status, noColor) + fmt.Fprintf(w, "%s %s\n", tag, r.Message) + } + + s := summarize(results) + fmt.Fprintln(w) + + var verdict string + if s.Fail == 0 { + verdict = "ready to start" + } else { + verdict = "not ready" + } + fmt.Fprintf(w, "Result: %d PASS, %d WARN, %d FAIL → %s\n", s.Pass, s.Warn, s.Fail, verdict) + + // Print remediation hints for failures and warnings to stderr. + var hints []string + for i := range results { + if results[i].Detail != "" && results[i].Status != preflight.StatusPass { + hints = append(hints, fmt.Sprintf(" %s: %s", results[i].Name, results[i].Detail)) + } + } + if len(hints) > 0 { + fmt.Fprintln(os.Stderr) + fmt.Fprintln(os.Stderr, "Remediation hints:") + for _, h := range hints { + fmt.Fprintln(os.Stderr, h) + } + } + + if s.Fail > 0 { + return &exitError{code: 1} + } + return nil +} + +// formatStatusTag returns a colored [PASS], [WARN], or [FAIL] tag. +func formatStatusTag(s preflight.Status, noColor bool) string { + label := s.String() + + if noColor { + return "[" + label + "]" + } + + var color string + switch s { + case preflight.StatusPass: + color = "\033[32m" // green + case preflight.StatusWarn: + color = "\033[33m" // yellow + case preflight.StatusFail: + color = "\033[31m" // red + } + reset := "\033[0m" + + return color + "[" + label + "]" + reset +} diff --git a/internal/cli/root.go b/internal/cli/root.go index ae81213..68cc788 100644 --- a/internal/cli/root.go +++ b/internal/cli/root.go @@ -82,6 +82,7 @@ and copy-paste fix steps.`, watchCmd := newWatchCmd() auditCmd := newAuditCmd() chaosCmd := newChaosCmd() + preflightCmd := newPreflightCmd() versionCmd := newVersionCmd() completionCmd := newCompletionCmd() @@ -98,10 +99,11 @@ and copy-paste fix steps.`, auditCmd.GroupID = "observe" startCmd.GroupID = "ops" chaosCmd.GroupID = "ops" + preflightCmd.GroupID = "ops" versionCmd.GroupID = "ops" completionCmd.GroupID = "ops" - root.AddCommand(doctorCmd, explainCmd, predictCmd, traceCmd, watchCmd, auditCmd, startCmd, chaosCmd, versionCmd, completionCmd) + root.AddCommand(doctorCmd, explainCmd, predictCmd, traceCmd, watchCmd, auditCmd, startCmd, chaosCmd, preflightCmd, versionCmd, completionCmd) return root } diff --git a/internal/preflight/checks.go b/internal/preflight/checks.go new file mode 100644 index 0000000..be59e53 --- /dev/null +++ b/internal/preflight/checks.go @@ -0,0 +1,472 @@ +// Copyright 2026 Optiqor contributors +// SPDX-License-Identifier: Apache-2.0 + +// Package preflight validates host prerequisites for running kerno. +// Each check is a small function returning a Result; the RunAll function +// aggregates them into a single report. +package preflight + +import ( + "context" + "fmt" + "net" + "os" + "path/filepath" + "strconv" + "strings" + "syscall" + + "golang.org/x/sys/unix" +) + +// Status is the outcome of a single preflight check. +type Status int + +const ( + // StatusPass indicates the check passed. + StatusPass Status = iota + // StatusWarn indicates a non-fatal issue (kerno can degrade gracefully). + StatusWarn + // StatusFail indicates a blocking issue (kerno cannot start). + StatusFail +) + +// String returns the human-readable label for a Status. +func (s Status) String() string { + switch s { + case StatusPass: + return "PASS" + case StatusWarn: + return "WARN" + case StatusFail: + return "FAIL" + default: + return "UNKNOWN" + } +} + +// MarshalText implements encoding.TextMarshaler for JSON output. +func (s Status) MarshalText() ([]byte, error) { + return []byte(s.String()), nil +} + +// Result captures one preflight check outcome. +type Result struct { + Name string `json:"name"` + Status Status `json:"status"` + Message string `json:"message"` + Detail string `json:"detail,omitempty"` // remediation hint +} + +// CheckOptions carries configuration for filesystem-path overrides, +// enabling testability via fixtures (inject t.TempDir() paths instead of /proc). +type CheckOptions struct { + ProcPath string // default: "/proc" + SysPath string // default: "/sys" + CgroupPath string // default: "/sys/fs/cgroup" + BTFPath string // default: "/sys/kernel/btf/vmlinux" + DebugFSPath string // default: "/sys/kernel/debug" + TraceFSPath string // default: "/sys/kernel/tracing" + OutputDir string // default: "/var/log/kerno" + PromAddr string // default: ":9090" +} + +// withDefaults fills zero-value fields with production paths. +func (o CheckOptions) withDefaults() CheckOptions { + if o.ProcPath == "" { + o.ProcPath = "/proc" + } + if o.SysPath == "" { + o.SysPath = "/sys" + } + if o.CgroupPath == "" { + o.CgroupPath = "/sys/fs/cgroup" + } + if o.BTFPath == "" { + o.BTFPath = "/sys/kernel/btf/vmlinux" + } + if o.DebugFSPath == "" { + o.DebugFSPath = "/sys/kernel/debug" + } + if o.TraceFSPath == "" { + o.TraceFSPath = "/sys/kernel/tracing" + } + if o.OutputDir == "" { + o.OutputDir = "/var/log/kerno" + } + if o.PromAddr == "" { + o.PromAddr = ":9090" + } + return o +} + +// Stable kernel capability ABI constants. +const ( + capNetAdmin = 12 + capPerfmon = 38 + capBPF = 39 +) + +// CheckKernelVersion verifies the running kernel is >= 5.8 (required for +// CAP_BPF and modern BTF support). +func CheckKernelVersion() Result { + var utsname syscall.Utsname + if err := syscall.Uname(&utsname); err != nil { + return Result{ + Name: "Linux kernel version", + Status: StatusFail, + Message: fmt.Sprintf("failed to read kernel version: %v", err), + Detail: "unable to call uname(2) — this should not happen on Linux", + } + } + + release := utsNameToString(utsname.Release) + major, minor, err := parseKernelVersion(release) + if err != nil { + return Result{ + Name: "Linux kernel version", + Status: StatusFail, + Message: fmt.Sprintf("failed to parse kernel version %q: %v", release, err), + } + } + + if major < 5 || (major == 5 && minor < 8) { + return Result{ + Name: "Linux kernel version", + Status: StatusFail, + Message: fmt.Sprintf("kernel %d.%d is too old (need >= 5.8, running %s)", major, minor, release), + Detail: "upgrade to kernel 5.8+ for CAP_BPF and BTF support", + } + } + + return Result{ + Name: "Linux kernel version", + Status: StatusPass, + Message: fmt.Sprintf("Linux kernel >= 5.8 (running %s)", release), + } +} + +// CheckBTF verifies that /sys/kernel/btf/vmlinux is readable. It opens the +// file (not just stat) so a root-readable-only or zero-perm vmlinux fails +// here instead of later at eBPF load time. +func CheckBTF(opts CheckOptions) Result { + f, err := os.Open(opts.BTFPath) //nolint:gosec // path is a controlled default or test fixture + if err != nil { + return Result{ + Name: "BTF (BPF Type Format)", + Status: StatusFail, + Message: fmt.Sprintf("%s not readable: %v", opts.BTFPath, err), + Detail: "kernel needs CONFIG_DEBUG_INFO_BTF=y (kernel >= 5.8 with BTF)", + } + } + f.Close() + return Result{ + Name: "BTF (BPF Type Format)", + Status: StatusPass, + Message: fmt.Sprintf("%s readable", opts.BTFPath), + } +} + +// CheckCgroupV2 verifies that cgroup v2 is mounted at the expected path. +func CheckCgroupV2(opts CheckOptions) Result { + var stat unix.Statfs_t + if err := unix.Statfs(opts.CgroupPath, &stat); err != nil { + return Result{ + Name: "cgroup v2", + Status: StatusFail, + Message: fmt.Sprintf("failed to stat %s: %v", opts.CgroupPath, err), + Detail: "mount cgroup v2: sudo mount -t cgroup2 none /sys/fs/cgroup", + } + } + + const cgroup2Magic = 0x63677270 + if stat.Type != cgroup2Magic { + return Result{ + Name: "cgroup v2", + Status: StatusFail, + Message: fmt.Sprintf("%s is not cgroup v2 (type=0x%x)", opts.CgroupPath, stat.Type), + Detail: "kerno requires cgroup v2; add systemd.unified_cgroup_hierarchy=1 to kernel cmdline", + } + } + + return Result{ + Name: "cgroup v2", + Status: StatusPass, + Message: fmt.Sprintf("cgroup v2 mounted at %s", opts.CgroupPath), + } +} + +// CheckCapBPF verifies that CAP_BPF is in the effective capability set. +func CheckCapBPF(caps uint64, capErr error) Result { + if capErr != nil { + return Result{ + Name: "CAP_BPF", + Status: StatusFail, + Message: fmt.Sprintf("failed to read capabilities: %v", capErr), + Detail: "run with sudo or grant CAP_BPF to the binary", + } + } + if !hasCap(caps, capBPF) { + return Result{ + Name: "CAP_BPF", + Status: StatusFail, + Message: "CAP_BPF not available", + Detail: "run with sudo or: sudo setcap cap_bpf+ep /usr/local/bin/kerno", + } + } + return Result{ + Name: "CAP_BPF", + Status: StatusPass, + Message: "CAP_BPF available", + } +} + +// CheckCapPerfmon verifies that CAP_PERFMON is in the effective capability set. +func CheckCapPerfmon(caps uint64, capErr error) Result { + if capErr != nil { + return Result{ + Name: "CAP_PERFMON", + Status: StatusFail, + Message: fmt.Sprintf("failed to read capabilities: %v", capErr), + Detail: "run with sudo or grant CAP_PERFMON to the binary", + } + } + if !hasCap(caps, capPerfmon) { + return Result{ + Name: "CAP_PERFMON", + Status: StatusFail, + Message: "CAP_PERFMON not available", + Detail: "run with sudo or: sudo setcap cap_perfmon+ep /usr/local/bin/kerno", + } + } + return Result{ + Name: "CAP_PERFMON", + Status: StatusPass, + Message: "CAP_PERFMON available", + } +} + +// CheckProcReadable verifies that /proc/version is readable. +func CheckProcReadable(opts CheckOptions) Result { + path := filepath.Join(opts.ProcPath, "version") + f, err := os.Open(path) //nolint:gosec // path is constructed from a controlled prefix + constant + if err != nil { + return Result{ + Name: "/proc readable", + Status: StatusFail, + Message: fmt.Sprintf("%s not readable: %v", path, err), + Detail: "mount proc: sudo mount -t proc proc /proc", + } + } + f.Close() + return Result{ + Name: "/proc readable", + Status: StatusPass, + Message: "/proc readable", + } +} + +// CheckCapNetAdmin verifies that CAP_NET_ADMIN is in the effective capability +// set. This is a WARN (not FAIL) because kerno can degrade gracefully — only +// the TCP retransmit collector requires it. +func CheckCapNetAdmin(caps uint64, capErr error) Result { + if capErr != nil { + return Result{ + Name: "CAP_NET_ADMIN", + Status: StatusWarn, + Message: fmt.Sprintf("failed to read capabilities: %v", capErr), + Detail: "TCP retransmit collector will degrade; run with sudo for full coverage", + } + } + if !hasCap(caps, capNetAdmin) { + return Result{ + Name: "CAP_NET_ADMIN", + Status: StatusWarn, + Message: "CAP_NET_ADMIN missing — TCP retransmit collector will degrade", + Detail: "run with sudo or: sudo setcap cap_net_admin+ep /usr/local/bin/kerno", + } + } + return Result{ + Name: "CAP_NET_ADMIN", + Status: StatusPass, + Message: "CAP_NET_ADMIN available", + } +} + +// CheckOutputDir verifies that the output directory exists and is writable. +func CheckOutputDir(opts CheckOptions) Result { + info, err := os.Stat(opts.OutputDir) + if err != nil { + return Result{ + Name: "Output directory", + Status: StatusFail, + Message: fmt.Sprintf("output directory %s does not exist", opts.OutputDir), + Detail: fmt.Sprintf("create it: sudo mkdir -p %s && sudo chmod 755 %s", opts.OutputDir, opts.OutputDir), + } + } + if !info.IsDir() { + return Result{ + Name: "Output directory", + Status: StatusFail, + Message: fmt.Sprintf("%s exists but is not a directory", opts.OutputDir), + } + } + + // Write test file to verify write access. + probe := filepath.Join(opts.OutputDir, ".kerno-preflight-probe") + f, err := os.Create(probe) //nolint:gosec // path is constructed from a controlled prefix + constant + if err != nil { + return Result{ + Name: "Output directory", + Status: StatusFail, + Message: fmt.Sprintf("output directory %s not writable: %v", opts.OutputDir, err), + Detail: fmt.Sprintf("fix permissions: sudo chown $USER %s", opts.OutputDir), + } + } + f.Close() + _ = os.Remove(probe) // best-effort cleanup + + return Result{ + Name: "Output directory", + Status: StatusPass, + Message: fmt.Sprintf("output directory %s writable", opts.OutputDir), + } +} + +// CheckPortFree verifies that the Prometheus metrics port is available. +// This is a point-in-time check (TOCTOU) — the port may be claimed between +// the check and kerno startup. +func CheckPortFree(opts CheckOptions) Result { + var lc net.ListenConfig + ln, err := lc.Listen(context.Background(), "tcp", opts.PromAddr) + if err != nil { + return Result{ + Name: "Prometheus port", + Status: StatusFail, + Message: fmt.Sprintf("port %s already in use", opts.PromAddr), + Detail: "another process is listening on this port; change prometheus.addr in config or stop the conflicting process", + } + } + ln.Close() + + // Extract port number for display. + _, port, _ := net.SplitHostPort(opts.PromAddr) + return Result{ + Name: "Prometheus port", + Status: StatusPass, + Message: fmt.Sprintf("port %s available (snapshot; may be claimed before startup)", port), + } +} + +// CheckTracefs verifies that tracefs or debugfs/tracing is mounted. +// Modern kernels (5.1+) mount tracefs at /sys/kernel/tracing independently +// of debugfs. Older setups expose it via /sys/kernel/debug/tracing. +// Both are valid — we check both paths and WARN only if neither exists. +func CheckTracefs(opts CheckOptions) Result { + if _, err := os.Stat(opts.TraceFSPath); err == nil { + return Result{ + Name: "tracefs", + Status: StatusPass, + Message: fmt.Sprintf("tracefs mounted at %s", opts.TraceFSPath), + } + } + debugTracing := filepath.Join(opts.DebugFSPath, "tracing") + if _, err := os.Stat(debugTracing); err == nil { + return Result{ + Name: "tracefs", + Status: StatusPass, + Message: fmt.Sprintf("tracefs available via debugfs at %s", debugTracing), + } + } + return Result{ + Name: "tracefs", + Status: StatusWarn, + Message: "tracefs not found at /sys/kernel/tracing or /sys/kernel/debug/tracing", + Detail: "mount tracefs: sudo mount -t tracefs tracefs /sys/kernel/tracing", + } +} + +// RunAll executes all preflight checks and returns aggregated results. +func RunAll(opts CheckOptions) []Result { + opts = opts.withDefaults() + + // Read capabilities once, share across capability checks. + caps, capErr := getEffectiveCaps() + + return []Result{ + CheckKernelVersion(), + CheckBTF(opts), + CheckCgroupV2(opts), + CheckCapBPF(caps, capErr), + CheckCapPerfmon(caps, capErr), + CheckProcReadable(opts), + CheckCapNetAdmin(caps, capErr), + CheckOutputDir(opts), + CheckPortFree(opts), + CheckTracefs(opts), + } +} + +// getEffectiveCaps returns the combined effective capability bitmask +// for the current process using the stable Linux capability ABI. +func getEffectiveCaps() (uint64, error) { + hdr := unix.CapUserHeader{ + Version: unix.LINUX_CAPABILITY_VERSION_3, + Pid: 0, // current process + } + var data [2]unix.CapUserData + if err := unix.Capget(&hdr, &data[0]); err != nil { + return 0, fmt.Errorf("capget: %w", err) + } + return uint64(data[0].Effective) | (uint64(data[1].Effective) << 32), nil +} + +// hasCap checks whether a specific capability bit is set. +func hasCap(caps uint64, bit int) bool { + return caps&(1< 0 { + clean = clean[:idx] + } + } + parts := strings.Split(clean, ".") + if len(parts) < 2 { + return 0, 0, fmt.Errorf("invalid kernel release format: %q", release) + } + major, err = strconv.Atoi(parts[0]) + if err != nil { + return 0, 0, fmt.Errorf("parsing major version from %q: %w", release, err) + } + minor, err = strconv.Atoi(parts[1]) + if err != nil { + return 0, 0, fmt.Errorf("parsing minor version from %q: %w", release, err) + } + return major, minor, nil +} + +// utsNameToString converts a [65]int8 (or [65]byte on some archs) utsname +// field to a Go string, trimming at the first null byte. +func utsNameToString(arr [65]int8) string { + buf := make([]byte, 0, len(arr)) + for _, b := range arr { + if b == 0 { + break + } + buf = append(buf, byte(b)) + } + return string(buf) +} diff --git a/internal/preflight/checks_test.go b/internal/preflight/checks_test.go new file mode 100644 index 0000000..87983e9 --- /dev/null +++ b/internal/preflight/checks_test.go @@ -0,0 +1,247 @@ +// Copyright 2026 Optiqor contributors +// SPDX-License-Identifier: Apache-2.0 + +package preflight + +import ( + "context" + "net" + "os" + "path/filepath" + "testing" +) + +func TestParseKernelVersion(t *testing.T) { + tests := []struct { + release string + wantMajor int + wantMinor int + expectError bool + }{ + {"6.17.0", 6, 17, false}, + {"5.15.0-91-generic", 5, 15, false}, // Ubuntu + {"5.14.0-70.13.1.el9_0.x86_64", 5, 14, false}, // RHEL + {"6.1.0-rc4", 6, 1, false}, // release candidate + {"5.8.0+debug", 5, 8, false}, // custom build + {"5.4.0", 5, 4, false}, + {"4.19.0", 4, 19, false}, + {"invalid", 0, 0, true}, + {"", 0, 0, true}, + {"abc.def", 0, 0, true}, + } + + for _, tt := range tests { + t.Run(tt.release, func(t *testing.T) { + major, minor, err := parseKernelVersion(tt.release) + if tt.expectError { + if err == nil { + t.Errorf("expected error for %q, got (%d, %d)", tt.release, major, minor) + } + return + } + if err != nil { + t.Fatalf("unexpected error for %q: %v", tt.release, err) + } + if major != tt.wantMajor || minor != tt.wantMinor { + t.Errorf("parseKernelVersion(%q) = (%d, %d), want (%d, %d)", + tt.release, major, minor, tt.wantMajor, tt.wantMinor) + } + }) + } +} + +func TestCheckBTF_Present(t *testing.T) { + dir := t.TempDir() + btfPath := filepath.Join(dir, "vmlinux") + if err := os.WriteFile(btfPath, []byte("fake-btf"), 0o644); err != nil { + t.Fatal(err) + } + + r := CheckBTF(CheckOptions{BTFPath: btfPath}) + if r.Status != StatusPass { + t.Errorf("expected PASS, got %v: %s", r.Status, r.Message) + } +} + +func TestCheckBTF_Missing(t *testing.T) { + r := CheckBTF(CheckOptions{BTFPath: "/nonexistent/vmlinux"}) + if r.Status != StatusFail { + t.Errorf("expected FAIL, got %v: %s", r.Status, r.Message) + } +} + +func TestCheckProcReadable(t *testing.T) { + dir := t.TempDir() + versionPath := filepath.Join(dir, "version") + if err := os.WriteFile(versionPath, []byte("Linux version 6.17.0"), 0o644); err != nil { + t.Fatal(err) + } + + r := CheckProcReadable(CheckOptions{ProcPath: dir}) + if r.Status != StatusPass { + t.Errorf("expected PASS, got %v: %s", r.Status, r.Message) + } +} + +func TestCheckProcUnreadable(t *testing.T) { + r := CheckProcReadable(CheckOptions{ProcPath: "/nonexistent"}) + if r.Status != StatusFail { + t.Errorf("expected FAIL, got %v: %s", r.Status, r.Message) + } +} + +func TestCheckOutputDir_Writable(t *testing.T) { + dir := t.TempDir() + r := CheckOutputDir(CheckOptions{OutputDir: dir}) + if r.Status != StatusPass { + t.Errorf("expected PASS, got %v: %s", r.Status, r.Message) + } +} + +func TestCheckOutputDir_NonExistent(t *testing.T) { + r := CheckOutputDir(CheckOptions{OutputDir: "/nonexistent/output"}) + if r.Status != StatusFail { + t.Errorf("expected FAIL, got %v: %s", r.Status, r.Message) + } +} + +func TestCheckPortFree_Available(t *testing.T) { + // Use port 0 to get an available port. + r := CheckPortFree(CheckOptions{PromAddr: "127.0.0.1:0"}) + if r.Status != StatusPass { + t.Errorf("expected PASS, got %v: %s", r.Status, r.Message) + } +} + +func TestCheckPortFree_InUse(t *testing.T) { + // Bind a port, then check it reports FAIL. + var lc net.ListenConfig + ln, err := lc.Listen(context.Background(), "tcp", "127.0.0.1:0") + if err != nil { + t.Fatal(err) + } + defer ln.Close() + + addr := ln.Addr().String() + r := CheckPortFree(CheckOptions{PromAddr: addr}) + if r.Status != StatusFail { + t.Errorf("expected FAIL for in-use port %s, got %v: %s", addr, r.Status, r.Message) + } +} + +func TestCheckTracefs_TraceFSPresent(t *testing.T) { + dir := t.TempDir() + traceFSPath := filepath.Join(dir, "tracing") + if err := os.Mkdir(traceFSPath, 0o755); err != nil { + t.Fatal(err) + } + + r := CheckTracefs(CheckOptions{ + TraceFSPath: traceFSPath, + DebugFSPath: filepath.Join(dir, "debug"), + }) + if r.Status != StatusPass { + t.Errorf("expected PASS, got %v: %s", r.Status, r.Message) + } +} + +func TestCheckTracefs_DebugFSPresent(t *testing.T) { + dir := t.TempDir() + debugTracing := filepath.Join(dir, "debug", "tracing") + if err := os.MkdirAll(debugTracing, 0o755); err != nil { + t.Fatal(err) + } + + r := CheckTracefs(CheckOptions{ + TraceFSPath: filepath.Join(dir, "nonexistent"), + DebugFSPath: filepath.Join(dir, "debug"), + }) + if r.Status != StatusPass { + t.Errorf("expected PASS, got %v: %s", r.Status, r.Message) + } +} + +func TestCheckTracefs_NeitherPresent(t *testing.T) { + dir := t.TempDir() + r := CheckTracefs(CheckOptions{ + TraceFSPath: filepath.Join(dir, "nonexistent"), + DebugFSPath: filepath.Join(dir, "also-nonexistent"), + }) + if r.Status != StatusWarn { + t.Errorf("expected WARN, got %v: %s", r.Status, r.Message) + } +} + +func TestCheckCap_HasCap(t *testing.T) { + // Synthetic caps with CAP_BPF set (bit 39). + caps := uint64(1) << capBPF + r := CheckCapBPF(caps, nil) + if r.Status != StatusPass { + t.Errorf("expected PASS for CAP_BPF, got %v: %s", r.Status, r.Message) + } +} + +func TestCheckCap_MissingCap(t *testing.T) { + caps := uint64(0) + + // CAP_BPF missing should FAIL. + r := CheckCapBPF(caps, nil) + if r.Status != StatusFail { + t.Errorf("expected FAIL for missing CAP_BPF, got %v: %s", r.Status, r.Message) + } + + // CAP_NET_ADMIN missing should WARN (degraded, not blocked). + r = CheckCapNetAdmin(caps, nil) + if r.Status != StatusWarn { + t.Errorf("expected WARN for missing CAP_NET_ADMIN, got %v: %s", r.Status, r.Message) + } +} + +func TestCheckCap_CapgetError(t *testing.T) { + err := os.ErrPermission + r := CheckCapBPF(0, err) + if r.Status != StatusFail { + t.Errorf("expected FAIL on capget error, got %v: %s", r.Status, r.Message) + } +} + +func TestCheckCapPerfmon_HasCap(t *testing.T) { + caps := uint64(1) << capPerfmon + r := CheckCapPerfmon(caps, nil) + if r.Status != StatusPass { + t.Errorf("expected PASS for CAP_PERFMON, got %v: %s", r.Status, r.Message) + } +} + +func TestCheckCapPerfmon_MissingCap(t *testing.T) { + r := CheckCapPerfmon(0, nil) + if r.Status != StatusFail { + t.Errorf("expected FAIL for missing CAP_PERFMON, got %v: %s", r.Status, r.Message) + } +} + +func TestStatusString(t *testing.T) { + tests := []struct { + status Status + want string + }{ + {StatusPass, "PASS"}, + {StatusWarn, "WARN"}, + {StatusFail, "FAIL"}, + {Status(99), "UNKNOWN"}, + } + for _, tt := range tests { + if got := tt.status.String(); got != tt.want { + t.Errorf("Status(%d).String() = %q, want %q", tt.status, got, tt.want) + } + } +} + +func TestRunAll_ResultCount(t *testing.T) { + // RunAll with defaults will try real system paths. We just verify the + // count is correct (10 checks). + results := RunAll(CheckOptions{}) + if len(results) != 10 { + t.Errorf("RunAll returned %d results, want 10", len(results)) + } +}