From d133cbb38864a98e738bdf1c4f71b11671a8b8ac Mon Sep 17 00:00:00 2001 From: lukefoster11 Date: Thu, 30 Apr 2026 16:20:38 -0700 Subject: [PATCH] support agent-sandbox service Made-with: Cursor --- charts/retool/Chart.lock | 4 +- charts/retool/Chart.yaml | 4 +- charts/retool/files/gvisor-seccomp.json | 447 +++++++++++ charts/retool/templates/_helpers.tpl | 166 ++++ charts/retool/templates/_workers.tpl | 22 +- .../agent_sandbox_device_plugin.yaml | 91 +++ .../agent_sandbox_networkpolicy.yaml | 216 ++++++ .../templates/agent_sandbox_prepuller.yaml | 84 +++ .../templates/agent_sandbox_seccomp.yaml | 91 +++ .../templates/deployment_agent_sandbox.yaml | 714 ++++++++++++++++++ .../retool/templates/deployment_backend.yaml | 1 + .../templates/deployment_code_executor.yaml | 22 + .../templates/deployment_workflows.yaml | 1 + charts/retool/values.yaml | 249 ++++++ values.yaml | 249 ++++++ 15 files changed, 2353 insertions(+), 8 deletions(-) create mode 100644 charts/retool/files/gvisor-seccomp.json create mode 100644 charts/retool/templates/agent_sandbox_device_plugin.yaml create mode 100644 charts/retool/templates/agent_sandbox_networkpolicy.yaml create mode 100644 charts/retool/templates/agent_sandbox_prepuller.yaml create mode 100644 charts/retool/templates/agent_sandbox_seccomp.yaml create mode 100644 charts/retool/templates/deployment_agent_sandbox.yaml diff --git a/charts/retool/Chart.lock b/charts/retool/Chart.lock index 832b6f5e..7a8aeec9 100644 --- a/charts/retool/Chart.lock +++ b/charts/retool/Chart.lock @@ -5,5 +5,5 @@ dependencies: - name: retool-temporal-services-helm repository: "" version: 1.1.5 -digest: sha256:6b027cb2d661c436127fe34c4a5e14c820c691d4ec9e0c08609f416e6fe5af21 -generated: "2024-03-26T15:39:11.463027-04:00" +digest: sha256:7b9440db4914c56407c98faace390fd00374820b0f87987903912de7ac899ce8 +generated: "2026-04-22T17:14:51.109299-07:00" diff --git a/charts/retool/Chart.yaml b/charts/retool/Chart.yaml index d3e6b9bb..dbedb808 100644 --- a/charts/retool/Chart.yaml +++ b/charts/retool/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: retool description: A Helm chart for Kubernetes type: application -version: 6.10.2 +version: 6.11.0 maintainers: - name: Retool Engineering email: engineering+helm@retool.com @@ -13,4 +13,4 @@ dependencies: condition: postgresql.enabled - name: retool-temporal-services-helm version: 1.1.5 - condition: retool-temporal-services-helm.enabled,workflows.enabled + condition: retool-temporal-services-helm.enabled diff --git a/charts/retool/files/gvisor-seccomp.json b/charts/retool/files/gvisor-seccomp.json new file mode 100644 index 00000000..9b2a1de2 --- /dev/null +++ b/charts/retool/files/gvisor-seccomp.json @@ -0,0 +1,447 @@ +{ + "comment": "Docker default seccomp profile extended with syscalls required by gVisor runsc (systrap platform, rootless mode). Use with: docker run --security-opt seccomp=gvisor-seccomp.json", + "defaultAction": "SCMP_ACT_ERRNO", + "defaultErrnoRet": 1, + "archMap": [ + { + "architecture": "SCMP_ARCH_X86_64", + "subArchitectures": ["SCMP_ARCH_X86", "SCMP_ARCH_X32"] + }, + { + "architecture": "SCMP_ARCH_AARCH64", + "subArchitectures": [] + } + ], + "syscalls": [ + { + "comment": "Docker default allowlist (Docker 27.x, x86_64 + aarch64)", + "names": [ + "_llseek", + "_newselect", + "accept", + "accept4", + "access", + "acct", + "adjtimex", + "alarm", + "arch_prctl", + "bind", + "bpf", + "brk", + "cachestat", + "capget", + "capset", + "chdir", + "chmod", + "chown", + "chown32", + "chroot", + "clock_adjtime", + "clock_adjtime64", + "clock_getres", + "clock_getres_time64", + "clock_gettime", + "clock_gettime64", + "clock_nanosleep", + "clock_nanosleep_time64", + "clock_settime", + "clock_settime64", + "close", + "close_range", + "connect", + "copy_file_range", + "creat", + "delete_module", + "dup", + "dup2", + "dup3", + "epoll_create", + "epoll_create1", + "epoll_ctl", + "epoll_ctl_old", + "epoll_pwait", + "epoll_pwait2", + "epoll_wait", + "epoll_wait_old", + "eventfd", + "eventfd2", + "execve", + "execveat", + "exit", + "exit_group", + "faccessat", + "faccessat2", + "fadvise64", + "fadvise64_64", + "fallocate", + "fanotify_init", + "fanotify_mark", + "fchdir", + "fchmod", + "fchmodat", + "fchmodat2", + "fchown", + "fchown32", + "fchownat", + "fcntl", + "fcntl64", + "fdatasync", + "fgetxattr", + "finit_module", + "flistxattr", + "flock", + "fork", + "fremovexattr", + "fsconfig", + "fsetxattr", + "fsmount", + "fsopen", + "fspick", + "fstat", + "fstat64", + "fstatat64", + "fstatfs", + "fstatfs64", + "fsync", + "ftruncate", + "ftruncate64", + "futex", + "futex_requeue", + "futex_time64", + "futex_wait", + "futex_waitv", + "futex_wake", + "futimesat", + "get_mempolicy", + "get_robust_list", + "get_thread_area", + "getcpu", + "getcwd", + "getdents", + "getdents64", + "getegid", + "getegid32", + "geteuid", + "geteuid32", + "getgid", + "getgid32", + "getgroups", + "getgroups32", + "getitimer", + "getpeername", + "getpgid", + "getpgrp", + "getpid", + "getppid", + "getpriority", + "getrandom", + "getresgid", + "getresgid32", + "getresuid", + "getresuid32", + "getrlimit", + "getrusage", + "getsid", + "getsockname", + "getsockopt", + "gettid", + "gettimeofday", + "getuid", + "getuid32", + "getxattr", + "init_module", + "inotify_add_watch", + "inotify_init", + "inotify_init1", + "inotify_rm_watch", + "io_cancel", + "io_destroy", + "io_getevents", + "io_pgetevents", + "io_pgetevents_time64", + "io_setup", + "io_submit", + "io_uring_enter", + "io_uring_register", + "io_uring_setup", + "ioctl", + "ioperm", + "iopl", + "ioprio_get", + "ioprio_set", + "ipc", + "kcmp", + "kill", + "landlock_add_rule", + "landlock_create_ruleset", + "landlock_restrict_self", + "lchown", + "lchown32", + "lgetxattr", + "link", + "linkat", + "listen", + "listxattr", + "llistxattr", + "lookup_dcookie", + "lremovexattr", + "lseek", + "lsetxattr", + "lstat", + "lstat64", + "madvise", + "map_shadow_stack", + "mbind", + "membarrier", + "memfd_create", + "memfd_secret", + "mincore", + "mkdir", + "mkdirat", + "mknod", + "mknodat", + "mlock", + "mlock2", + "mlockall", + "mmap", + "mmap2", + "modify_ldt", + "mount_setattr", + "move_mount", + "mprotect", + "mq_getsetattr", + "mq_notify", + "mq_open", + "mq_timedreceive", + "mq_timedreceive_time64", + "mq_timedsend", + "mq_timedsend_time64", + "mq_unlink", + "mremap", + "msgctl", + "msgget", + "msgrcv", + "msgsnd", + "msync", + "munlock", + "munlockall", + "munmap", + "name_to_handle_at", + "nanosleep", + "newfstatat", + "open", + "open_by_handle_at", + "open_tree", + "openat", + "openat2", + "pause", + "perf_event_open", + "pidfd_getfd", + "pidfd_open", + "pidfd_send_signal", + "pipe", + "pipe2", + "pkey_alloc", + "pkey_free", + "pkey_mprotect", + "poll", + "ppoll", + "ppoll_time64", + "prctl", + "pread64", + "preadv", + "preadv2", + "prlimit64", + "process_madvise", + "process_mrelease", + "process_vm_readv", + "process_vm_writev", + "pselect6", + "pselect6_time64", + "pwrite64", + "pwritev", + "pwritev2", + "quotactl", + "quotactl_fd", + "read", + "readahead", + "readlink", + "readlinkat", + "readv", + "reboot", + "recv", + "recvfrom", + "recvmmsg", + "recvmmsg_time64", + "recvmsg", + "remap_file_pages", + "removexattr", + "rename", + "renameat", + "renameat2", + "restart_syscall", + "rmdir", + "rseq", + "rt_sigaction", + "rt_sigpending", + "rt_sigprocmask", + "rt_sigqueueinfo", + "rt_sigreturn", + "rt_sigsuspend", + "rt_sigtimedwait", + "rt_sigtimedwait_time64", + "rt_tgsigqueueinfo", + "sched_get_priority_max", + "sched_get_priority_min", + "sched_getaffinity", + "sched_getattr", + "sched_getparam", + "sched_getscheduler", + "sched_rr_get_interval", + "sched_rr_get_interval_time64", + "sched_setaffinity", + "sched_setattr", + "sched_setparam", + "sched_setscheduler", + "sched_yield", + "seccomp", + "select", + "semctl", + "semget", + "semop", + "semtimedop", + "semtimedop_time64", + "send", + "sendfile", + "sendfile64", + "sendmmsg", + "sendmsg", + "sendto", + "set_mempolicy", + "set_mempolicy_home_node", + "set_robust_list", + "set_thread_area", + "set_tid_address", + "set_tls", + "setdomainname", + "setfsgid", + "setfsgid32", + "setfsuid", + "setfsuid32", + "setgid", + "setgid32", + "setgroups", + "setgroups32", + "setitimer", + "setpgid", + "setpriority", + "setregid", + "setregid32", + "setresgid", + "setresgid32", + "setresuid", + "setresuid32", + "setreuid", + "setreuid32", + "setrlimit", + "setsid", + "setsockopt", + "settimeofday", + "setuid", + "setuid32", + "setxattr", + "shmat", + "shmctl", + "shmdt", + "shmget", + "shutdown", + "sigaltstack", + "signalfd", + "signalfd4", + "sigprocmask", + "sigreturn", + "socket", + "socketcall", + "socketpair", + "splice", + "stat", + "stat64", + "statfs", + "statfs64", + "statx", + "stime", + "symlink", + "symlinkat", + "sync", + "sync_file_range", + "sync_file_range2", + "syncfs", + "sysinfo", + "syslog", + "tee", + "tgkill", + "time", + "timer_create", + "timer_delete", + "timer_getoverrun", + "timer_gettime", + "timer_gettime64", + "timer_settime", + "timer_settime64", + "timerfd_create", + "timerfd_gettime", + "timerfd_gettime64", + "timerfd_settime", + "timerfd_settime64", + "times", + "tkill", + "truncate", + "truncate64", + "ugetrlimit", + "umask", + "umount", + "uname", + "unlink", + "unlinkat", + "utime", + "utimensat", + "utimensat_time64", + "utimes", + "vfork", + "vhangup", + "vmsplice", + "wait4", + "waitid", + "waitpid", + "write", + "writev" + ], + "action": "SCMP_ACT_ALLOW" + }, + { + "comment": "gVisor + pasta: namespace creation and entry (clone/unshare with CLONE_NEW* flags, setns to join namespaces)", + "names": ["clone", "clone3", "unshare", "setns"], + "action": "SCMP_ACT_ALLOW" + }, + { + "comment": "pasta: set hostname inside namespace (cosmetic, avoids warning)", + "names": ["sethostname"], + "action": "SCMP_ACT_ALLOW" + }, + { + "comment": "gVisor: sandbox filesystem setup (tmpfs, proc, bind mounts)", + "names": ["mount", "umount2"], + "action": "SCMP_ACT_ALLOW" + }, + { + "comment": "gVisor: filesystem root isolation for sentry and gofer", + "names": ["pivot_root"], + "action": "SCMP_ACT_ALLOW" + }, + { + "comment": "gVisor systrap platform: workload executor thread initialization", + "names": ["ptrace"], + "action": "SCMP_ACT_ALLOW" + } + ] +} diff --git a/charts/retool/templates/_helpers.tpl b/charts/retool/templates/_helpers.tpl index fde306d2..7716728a 100644 --- a/charts/retool/templates/_helpers.tpl +++ b/charts/retool/templates/_helpers.tpl @@ -289,6 +289,18 @@ Usage: (include "retool.agents.enabled" .) {{- $output -}} {{- end -}} +{{/* +Set R2 agent enabled +Usage: (include "retool.r2Agent.enabled" .) +*/}} +{{- define "retool.r2Agent.enabled" -}} +{{- $output := "" -}} +{{- if (eq (toString .Values.r2Agent.enabled) "true") -}} + {{- $output = "1" -}} +{{- end -}} +{{- $output -}} +{{- end -}} + {{/* Global Temporal configuration */}} {{- define "retool.temporalConfig" -}} {{- .Values.workflows.temporal | default .Values.temporal | toYaml -}} @@ -379,6 +391,160 @@ Set agent eval worker service name {{ template "retool.fullname" . }}-agent-eval-worker {{- end -}} +{{/* +Set R2 agent worker service name +*/}} +{{- define "retool.r2AgentWorker.name" -}} +{{ template "retool.fullname" . }}-r2-agent-worker +{{- end -}} + +{{/* +Selector labels for R2 agent worker. Note changes here will require manual +deployment recreation and incur downtime, so should be avoided. +*/}} +{{- define "retool.r2AgentWorker.selectorLabels" -}} +retoolService: {{ include "retool.r2AgentWorker.name" . }} +{{- end }} + +{{/* +Extra (non-selector) labels for R2 agent worker. +*/}} +{{- define "retool.r2AgentWorker.labels" -}} +app.kubernetes.io/name: {{ include "retool.r2AgentWorker.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +telemetry.retool.com/service-name: r2-agent-worker +{{- end }} + +{{/* +Set agent sandbox base name +*/}} +{{- define "retool.agentSandbox.name" -}} +{{ template "retool.fullname" . }}-agent-sandbox +{{- end -}} + +{{/* +Set agent sandbox controller name +*/}} +{{- define "retool.agentSandbox.controller.name" -}} +{{ template "retool.fullname" . }}-agent-sandbox-controller +{{- end -}} + +{{/* +Set agent sandbox proxy name +*/}} +{{- define "retool.agentSandbox.proxy.name" -}} +{{ template "retool.fullname" . }}-agent-sandbox-proxy +{{- end -}} + +{{/* +Secret name for agent sandbox. +Uses externalSecret.name if set, otherwise the auto-generated name. +*/}} +{{- define "retool.agentSandbox.secretName" -}} +{{- if .Values.agentSandbox.externalSecret.name -}} +{{ .Values.agentSandbox.externalSecret.name }} +{{- else -}} +{{ template "retool.agentSandbox.name" . }} +{{- end -}} +{{- end -}} + +{{/* +Selector labels for agent sandbox (sandbox pods / headless service). +*/}} +{{- define "retool.agentSandbox.selectorLabels" -}} +retoolService: {{ include "retool.agentSandbox.name" . }} +{{- end -}} + +{{/* +Extra labels for agent sandbox. +*/}} +{{- define "retool.agentSandbox.labels" -}} +app.kubernetes.io/name: {{ include "retool.agentSandbox.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +telemetry.retool.com/service-name: agent-sandbox +{{- end -}} + +{{/* +Selector labels for agent sandbox controller. +*/}} +{{- define "retool.agentSandbox.controller.selectorLabels" -}} +retoolService: {{ include "retool.agentSandbox.controller.name" . }} +{{- end -}} + +{{/* +Extra labels for agent sandbox controller. +*/}} +{{- define "retool.agentSandbox.controller.labels" -}} +app.kubernetes.io/name: {{ include "retool.agentSandbox.controller.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +app.kubernetes.io/component: controller +telemetry.retool.com/service-name: agent-sandbox-controller +{{- end -}} + +{{/* +Selector labels for agent sandbox proxy. +*/}} +{{- define "retool.agentSandbox.proxy.selectorLabels" -}} +retoolService: {{ include "retool.agentSandbox.proxy.name" . }} +{{- end -}} + +{{/* +Extra labels for agent sandbox proxy. +*/}} +{{- define "retool.agentSandbox.proxy.labels" -}} +app.kubernetes.io/name: {{ include "retool.agentSandbox.proxy.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +app.kubernetes.io/component: proxy +telemetry.retool.com/service-name: agent-sandbox-proxy +{{- end -}} + +{{/* +Agent sandbox env vars for the Retool backend, workflow backend, and workers. +Outputs env entries that tell the backend how to reach the agent sandbox services. +Usage: {{- include "retool.agentSandbox.backendEnvVars" . | nindent 10 }} +*/}} +{{- define "retool.agentSandbox.backendEnvVars" -}} +{{- if .Values.agentSandbox.enabled }} +- name: AGENT_EXECUTOR_ENABLED + value: "true" +- name: RR_AGENT_PUBSUB_BACKEND + value: "postgres" +- name: AGENT_EXECUTOR_CONTROLLER_INGRESS_DOMAIN + value: {{ .Values.agentSandbox.controllerUrl | default (printf "http://%s:%s" (include "retool.agentSandbox.controller.name" .) (toString .Values.agentSandbox.controller.port)) | quote }} +- name: AGENT_EXECUTOR_PROXY_INGRESS_DOMAIN + value: {{ .Values.agentSandbox.proxyUrl | default (printf "http://%s:%s" (include "retool.agentSandbox.proxy.name" .) (toString .Values.agentSandbox.proxy.port)) | quote }} +{{- if .Values.agentSandbox.frontendWsProxyDomain }} +- name: AGENT_EXECUTOR_FRONTEND_WS_PROXY_DOMAIN + value: {{ .Values.agentSandbox.frontendWsProxyDomain | quote }} +{{- end }} +{{- if or .Values.agentSandbox.proxyDomain .Values.agentSandbox.frontendWsProxyDomain }} +- name: AGENT_EXECUTOR_PROXY_DOMAIN + value: {{ .Values.agentSandbox.proxyDomain | default .Values.agentSandbox.frontendWsProxyDomain | quote }} +{{- end }} +{{- if or .Values.agentSandbox.jwtPrivateKey .Values.agentSandbox.externalSecret.name }} +- name: AGENT_EXECUTOR_JWT_PRIVATE_KEY + valueFrom: + secretKeyRef: + name: {{ include "retool.agentSandbox.secretName" . }} + key: jwt-private-key +{{- end }} +{{- if or .Values.agentSandbox.jwtPublicKey .Values.agentSandbox.externalSecret.name }} +- name: AGENT_EXECUTOR_JWT_PUBLIC_KEY + valueFrom: + secretKeyRef: + name: {{ include "retool.agentSandbox.secretName" . }} + key: jwt-public-key +{{- end }} +{{- if or .Values.agentSandbox.encryptionKey .Values.agentSandbox.externalSecret.name }} +- name: AGENT_EXECUTOR_ENCRYPTION_KEY + valueFrom: + secretKeyRef: + name: {{ include "retool.agentSandbox.secretName" . }} + key: encryption-key +{{- end }} +{{- end }} +{{- end -}} + {{/* Set code executor image tag Usage: (template "retool.codeExecutor.image.tag" .) diff --git a/charts/retool/templates/_workers.tpl b/charts/retool/templates/_workers.tpl index 22026911..d006cf40 100644 --- a/charts/retool/templates/_workers.tpl +++ b/charts/retool/templates/_workers.tpl @@ -3,6 +3,8 @@ type: agent - parent: agents type: agentEval +- parent: r2Agent + type: r2Agent - parent: workflows type: workflow {{- end -}} @@ -36,9 +38,20 @@ {{- end }} {{- end -}} -{{- $healthcheckPort := ternary 3012 3005 (eq $workerType "agentEval") -}} -{{- $serviceType := ternary "AGENT_EVAL_TEMPORAL_WORKER" "WORKFLOW_TEMPORAL_WORKER" (eq $workerType "agentEval") -}} -{{- $taskqueue := ternary "agent-eval" (ternary "agent" "" (eq $workerType "agent")) (eq $workerType "agentEval") -}} +{{- $healthcheckPort := 3005 -}} +{{- $serviceType := "WORKFLOW_TEMPORAL_WORKER" -}} +{{- $taskqueue := "" -}} +{{- if eq $workerType "agentEval" -}} + {{- $healthcheckPort = 3012 -}} + {{- $serviceType = "AGENT_EVAL_TEMPORAL_WORKER" -}} + {{- $taskqueue = "agent-eval" -}} +{{- else if eq $workerType "r2Agent" -}} + {{- $healthcheckPort = 3016 -}} + {{- $serviceType = "R2_AGENT_TEMPORAL_WORKER" -}} + {{- $taskqueue = "r2-agent" -}} +{{- else if eq $workerType "agent" -}} + {{- $taskqueue = "agent" -}} +{{- end -}} {{/* yaml starts here */}} apiVersion: apps/v1 @@ -100,7 +113,7 @@ spec: {{- end }} {{- end }} containers: - - name: {{ if eq $workerType "agentEval" }}agent-eval-worker{{ else }}{{ $workerType }}-worker{{ end }} + - name: {{ if eq $workerType "agentEval" }}agent-eval-worker{{ else if eq $workerType "r2Agent" }}r2-agent-worker{{ else }}{{ $workerType }}-worker{{ end }} image: "{{ $.Values.image.repository }}:{{ required "Please set a value for .Values.image.tag" $.Values.image.tag }}" imagePullPolicy: {{ $.Values.image.pullPolicy }} args: @@ -200,6 +213,7 @@ spec: value: {{ template "retool.postgresql.ssl_enabled" $ }} - name: CODE_EXECUTOR_INGRESS_DOMAIN value: http://{{ template "retool.codeExecutor.name" $ }} + {{- include "retool.agentSandbox.backendEnvVars" $ | nindent 10 }} {{- include "retool.telemetry.includeEnvVars" $ | nindent 10 }} diff --git a/charts/retool/templates/agent_sandbox_device_plugin.yaml b/charts/retool/templates/agent_sandbox_device_plugin.yaml new file mode 100644 index 00000000..c936ae28 --- /dev/null +++ b/charts/retool/templates/agent_sandbox_device_plugin.yaml @@ -0,0 +1,91 @@ +{{- if and .Values.agentSandbox.enabled .Values.agentSandbox.sandboxNetwork.devicePlugin }} +{{- $as := .Values.agentSandbox -}} +{{- $nodeSelector := $as.nodeSelector | default .Values.nodeSelector -}} +{{- $tolerations := $as.tolerations | default .Values.tolerations -}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "retool.agentSandbox.name" . }}-device-plugin + labels: + {{- include "retool.agentSandbox.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} + app.kubernetes.io/component: device-plugin +data: + conf.yaml: | + - devicematch: ^net/tun$ + nummaxdevices: {{ $as.devicePlugin.maxDevices | default 130 }} +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: {{ include "retool.agentSandbox.name" . }}-device-plugin + labels: + {{- include "retool.agentSandbox.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} + app.kubernetes.io/component: device-plugin +spec: + selector: + matchLabels: + retoolService: {{ include "retool.agentSandbox.name" . }}-device-plugin + template: + metadata: + labels: + retoolService: {{ include "retool.agentSandbox.name" . }}-device-plugin + app.kubernetes.io/name: {{ include "retool.agentSandbox.name" . }}-device-plugin + app.kubernetes.io/instance: {{ .Release.Name }} + {{- include "retool.labels" . | nindent 8 }} +{{- if .Values.podLabels }} +{{ toYaml .Values.podLabels | indent 8 }} +{{- end }} + spec: + automountServiceAccountToken: false + priorityClassName: system-node-critical +{{- if $nodeSelector }} + nodeSelector: +{{ toYaml $nodeSelector | indent 8 }} +{{- end }} + tolerations: +{{ toYaml $tolerations | indent 8 }} + containers: + - name: smarter-device-manager + image: "{{ $as.devicePlugin.image.repository }}:{{ $as.devicePlugin.image.tag }}" + imagePullPolicy: IfNotPresent + terminationMessagePath: /tmp/termination-log + terminationMessagePolicy: FallbackToLogsOnError + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: ["ALL"] + volumeMounts: + - name: device-plugin + mountPath: /var/lib/kubelet/device-plugins + - name: dev + mountPath: /dev + readOnly: true + - name: sys + mountPath: /sys + readOnly: true + - name: config + mountPath: /root/config + resources: + requests: + cpu: 10m + memory: 16Mi + limits: + cpu: 100m + memory: 32Mi + volumes: + - name: device-plugin + hostPath: + path: /var/lib/kubelet/device-plugins + - name: dev + hostPath: + path: /dev + - name: sys + hostPath: + path: /sys + - name: config + configMap: + name: {{ include "retool.agentSandbox.name" . }}-device-plugin +{{- end }} diff --git a/charts/retool/templates/agent_sandbox_networkpolicy.yaml b/charts/retool/templates/agent_sandbox_networkpolicy.yaml new file mode 100644 index 00000000..cc6d07e2 --- /dev/null +++ b/charts/retool/templates/agent_sandbox_networkpolicy.yaml @@ -0,0 +1,216 @@ +{{- if and .Values.agentSandbox.enabled .Values.agentSandbox.networkPolicy.enabled }} +{{- $as := .Values.agentSandbox -}} +{{- /* +======================================================================= + Sandbox Pod NetworkPolicy — restrict ingress/egress for executor Jobs +======================================================================= +*/}} +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ include "retool.agentSandbox.name" . }} + labels: + {{- include "retool.agentSandbox.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +spec: + podSelector: + matchLabels: + {{- include "retool.agentSandbox.selectorLabels" . | nindent 6 }} + policyTypes: + - Ingress + - Egress + ingress: + - from: + {{- if $as.networkPolicy.ingressFrom }} + {{- toYaml $as.networkPolicy.ingressFrom | nindent 8 }} + {{- else }} + - podSelector: + matchLabels: + {{- include "retool.selectorLabels" . | nindent 14 }} + {{- end }} + ports: + - port: {{ $as.sandbox.port }} + protocol: TCP + - from: + - podSelector: + matchLabels: + {{- include "retool.agentSandbox.controller.selectorLabels" . | nindent 14 }} + ports: + - port: {{ $as.sandbox.port }} + protocol: TCP + - from: + - podSelector: + matchLabels: + {{- include "retool.agentSandbox.proxy.selectorLabels" . | nindent 14 }} + ports: + - port: {{ $as.sandbox.port }} + protocol: TCP + egress: + {{- if $as.networkPolicy.dnsSelector }} + - to: + - namespaceSelector: + {{- toYaml $as.networkPolicy.dnsSelector.namespaceSelector | nindent 12 }} + podSelector: + {{- toYaml $as.networkPolicy.dnsSelector.podSelector | nindent 12 }} + ports: + - port: 53 + protocol: UDP + - port: 53 + protocol: TCP + {{- end }} + - to: + - podSelector: + matchLabels: + {{- include "retool.agentSandbox.proxy.selectorLabels" . | nindent 14 }} + ports: + - port: {{ $as.proxy.port }} + protocol: TCP + {{- with $as.networkPolicy.extraEgress }} + {{- toYaml . | nindent 4 }} + {{- end }} +--- +{{- /* +======================================================================= + Controller NetworkPolicy +======================================================================= +*/}} +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ include "retool.agentSandbox.controller.name" . }} + labels: + {{- include "retool.agentSandbox.controller.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +spec: + podSelector: + matchLabels: + {{- include "retool.agentSandbox.controller.selectorLabels" . | nindent 6 }} + policyTypes: + - Ingress + - Egress + ingress: + - from: + - podSelector: + matchLabels: + {{- include "retool.selectorLabels" . | nindent 14 }} + ports: + - port: {{ $as.controller.port }} + protocol: TCP + egress: + {{- if $as.networkPolicy.dnsSelector }} + - to: + - namespaceSelector: + {{- toYaml $as.networkPolicy.dnsSelector.namespaceSelector | nindent 12 }} + podSelector: + {{- toYaml $as.networkPolicy.dnsSelector.podSelector | nindent 12 }} + ports: + - port: 53 + protocol: UDP + - port: 53 + protocol: TCP + {{- end }} + - to: + - podSelector: + matchLabels: + {{- include "retool.agentSandbox.selectorLabels" . | nindent 14 }} + ports: + - port: {{ $as.sandbox.port }} + protocol: TCP + - to: + - ipBlock: + cidr: 0.0.0.0/0 + ports: + - port: 443 + protocol: TCP + - port: 6443 + protocol: TCP +--- +{{- /* +======================================================================= + Proxy NetworkPolicy +======================================================================= +*/}} +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ include "retool.agentSandbox.proxy.name" . }} + labels: + {{- include "retool.agentSandbox.proxy.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +spec: + podSelector: + matchLabels: + {{- include "retool.agentSandbox.proxy.selectorLabels" . | nindent 6 }} + policyTypes: + - Ingress + - Egress + ingress: + - from: + - podSelector: + matchLabels: + {{- include "retool.agentSandbox.selectorLabels" . | nindent 14 }} + ports: + - port: {{ $as.proxy.port }} + protocol: TCP + - from: + - podSelector: + matchLabels: + {{- include "retool.selectorLabels" . | nindent 14 }} + ports: + - port: {{ $as.proxy.port }} + protocol: TCP + {{- if ($as.proxy.ingress).enabled }} + {{- if $as.proxy.ingress.networkPolicy }} + - from: + - podSelector: + matchLabels: + {{- toYaml $as.proxy.ingress.networkPolicy.podSelector | nindent 14 }} + ports: + - port: {{ $as.proxy.port }} + protocol: TCP + {{- end }} + {{- end }} + egress: + {{- if $as.networkPolicy.dnsSelector }} + - to: + - namespaceSelector: + {{- toYaml $as.networkPolicy.dnsSelector.namespaceSelector | nindent 12 }} + podSelector: + {{- toYaml $as.networkPolicy.dnsSelector.podSelector | nindent 12 }} + ports: + - port: 53 + protocol: UDP + - port: 53 + protocol: TCP + {{- end }} + - to: + - podSelector: + matchLabels: + {{- include "retool.agentSandbox.selectorLabels" . | nindent 14 }} + ports: + - port: {{ $as.sandbox.port }} + protocol: TCP + {{- if $as.networkPolicy.backendAllowlist }} + - to: + {{- range $as.networkPolicy.backendAllowlist }} + - ipBlock: + cidr: {{ . }} + {{- end }} + {{- end }} + - to: + - ipBlock: + cidr: 0.0.0.0/0 + except: + {{- range $as.networkPolicy.blockedRanges }} + - {{ . }} + {{- end }} + {{- if $as.networkPolicy.blockedRanges6 }} + - to: + - ipBlock: + cidr: ::/0 + except: + {{- range $as.networkPolicy.blockedRanges6 }} + - {{ . }} + {{- end }} + {{- end }} +{{- end }} diff --git a/charts/retool/templates/agent_sandbox_prepuller.yaml b/charts/retool/templates/agent_sandbox_prepuller.yaml new file mode 100644 index 00000000..d47072d2 --- /dev/null +++ b/charts/retool/templates/agent_sandbox_prepuller.yaml @@ -0,0 +1,84 @@ +{{- if .Values.agentSandbox.enabled }} +{{- $as := .Values.agentSandbox -}} +{{- $nodeSelector := $as.nodeSelector | default .Values.nodeSelector -}} +{{- $tolerations := $as.tolerations | default .Values.tolerations -}} +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: {{ include "retool.agentSandbox.name" . }}-image-prepuller + labels: + {{- include "retool.agentSandbox.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} + app.kubernetes.io/component: image-prepuller +spec: + selector: + matchLabels: + retoolService: {{ include "retool.agentSandbox.name" . }}-image-prepuller + updateStrategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 100% + template: + metadata: + labels: + retoolService: {{ include "retool.agentSandbox.name" . }}-image-prepuller + app.kubernetes.io/name: {{ include "retool.agentSandbox.name" . }}-image-prepuller + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/component: image-prepuller + {{- include "retool.labels" . | nindent 8 }} +{{- if .Values.podLabels }} +{{ toYaml .Values.podLabels | indent 8 }} +{{- end }} + spec: + automountServiceAccountToken: false +{{- if $nodeSelector }} + nodeSelector: +{{ toYaml $nodeSelector | indent 8 }} +{{- end }} + tolerations: +{{ toYaml $tolerations | indent 8 }} + terminationGracePeriodSeconds: 5 + initContainers: + - name: pull-image + image: "{{ $as.image.repository }}:{{ $as.image.tag }}" + imagePullPolicy: {{ $as.image.pullPolicy }} + command: ["true"] + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1001 + capabilities: + drop: ["ALL"] + resources: + requests: + cpu: 1m + memory: 4Mi + limits: + cpu: 10m + memory: 16Mi + containers: + - name: pause + image: "{{ $as.initImage.repository }}:{{ $as.initImage.tag }}{{- if $as.initImage.digest }}@{{ $as.initImage.digest }}{{- end }}" + command: ["sleep", "infinity"] + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1001 + capabilities: + drop: ["ALL"] + seccompProfile: + type: RuntimeDefault + resources: + requests: + cpu: 1m + memory: 4Mi + limits: + cpu: 10m + memory: 16Mi +{{- if .Values.image.pullSecrets }} + imagePullSecrets: +{{ toYaml .Values.image.pullSecrets | indent 8 }} +{{- end }} +{{- end }} diff --git a/charts/retool/templates/agent_sandbox_seccomp.yaml b/charts/retool/templates/agent_sandbox_seccomp.yaml new file mode 100644 index 00000000..c6149e64 --- /dev/null +++ b/charts/retool/templates/agent_sandbox_seccomp.yaml @@ -0,0 +1,91 @@ +{{- if .Values.agentSandbox.enabled }} +{{- $as := .Values.agentSandbox -}} +{{- $nodeSelector := $as.nodeSelector | default .Values.nodeSelector -}} +{{- $tolerations := $as.tolerations | default .Values.tolerations -}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "retool.agentSandbox.name" . }}-seccomp + labels: + {{- include "retool.agentSandbox.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +data: + gvisor-seccomp.json: | + {{- .Files.Get "files/gvisor-seccomp.json" | nindent 4 }} +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: {{ include "retool.agentSandbox.name" . }}-node-installer + labels: + {{- include "retool.agentSandbox.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} + app.kubernetes.io/component: node-installer +spec: + selector: + matchLabels: + retoolService: {{ include "retool.agentSandbox.name" . }}-node-installer + template: + metadata: + labels: + retoolService: {{ include "retool.agentSandbox.name" . }}-node-installer + app.kubernetes.io/name: {{ include "retool.agentSandbox.name" . }}-node-installer + app.kubernetes.io/instance: {{ .Release.Name }} + {{- include "retool.labels" . | nindent 8 }} +{{- if .Values.podLabels }} +{{ toYaml .Values.podLabels | indent 8 }} +{{- end }} + spec: + automountServiceAccountToken: false +{{- if $nodeSelector }} + nodeSelector: +{{ toYaml $nodeSelector | indent 8 }} +{{- end }} + tolerations: +{{ toYaml $tolerations | indent 8 }} + initContainers: + - name: install + image: "{{ $as.initImage.repository }}:{{ $as.initImage.tag }}{{- if $as.initImage.digest }}@{{ $as.initImage.digest }}{{- end }}" + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: ["ALL"] + command: + - /bin/sh + - -c + - | + DEST="/host-seccomp/{{ $as.seccompProfile }}" + mkdir -p "$(dirname "$DEST")" + cp /seccomp-profile/gvisor-seccomp.json "$DEST" + echo "seccomp profile installed at $DEST" + volumeMounts: + - name: seccomp-profile + mountPath: /seccomp-profile + - name: host-seccomp + mountPath: /host-seccomp + containers: + - name: pause + image: "{{ $as.initImage.repository }}:{{ $as.initImage.tag }}{{- if $as.initImage.digest }}@{{ $as.initImage.digest }}{{- end }}" + command: ["sleep", "infinity"] + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: ["ALL"] + resources: + requests: + cpu: 1m + memory: 4Mi + limits: + cpu: 10m + memory: 16Mi + volumes: + - name: seccomp-profile + configMap: + name: {{ include "retool.agentSandbox.name" . }}-seccomp + - name: host-seccomp + hostPath: + path: /var/lib/kubelet/seccomp + type: DirectoryOrCreate +{{- end }} diff --git a/charts/retool/templates/deployment_agent_sandbox.yaml b/charts/retool/templates/deployment_agent_sandbox.yaml new file mode 100644 index 00000000..00e06fa5 --- /dev/null +++ b/charts/retool/templates/deployment_agent_sandbox.yaml @@ -0,0 +1,714 @@ +{{- if .Values.agentSandbox.enabled }} +{{- $as := .Values.agentSandbox -}} +{{- $secretName := include "retool.agentSandbox.secretName" . -}} +{{- $nodeSelector := $as.nodeSelector | default .Values.nodeSelector -}} +{{- $tolerations := $as.tolerations | default .Values.tolerations -}} +{{- /* +======================================================================= + Secret (skipped when externalSecret.name is set) +======================================================================= +*/}} +{{- if not $as.externalSecret.name }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ template "retool.agentSandbox.name" . }} + labels: + {{- include "retool.agentSandbox.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +type: Opaque +data: + jwt-public-key: {{ $as.jwtPublicKey | default "" | b64enc | quote }} + jwt-private-key: {{ $as.jwtPrivateKey | default "" | b64enc | quote }} + encryption-key: {{ $as.encryptionKey | default "" | b64enc | quote }} + api-secret: {{ $as.apiSecret | default "" | b64enc | quote }} + postgres-url: {{ $as.postgres.url | default "" | b64enc | quote }} +--- +{{- end }} +{{- /* +======================================================================= + RBAC for the controller (needs to manage Jobs, Pods, ConfigMaps) +======================================================================= +*/}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "retool.agentSandbox.controller.name" . }} + labels: + {{- include "retool.agentSandbox.controller.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "retool.agentSandbox.controller.name" . }} + labels: + {{- include "retool.agentSandbox.controller.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +rules: + - apiGroups: ["apps"] + resources: ["deployments"] + resourceNames: ["{{ include "retool.agentSandbox.name" . }}"] + verbs: ["get", "list", "watch", "update", "patch"] + - apiGroups: ["apps"] + resources: ["deployments/scale"] + resourceNames: ["{{ include "retool.agentSandbox.name" . }}"] + verbs: ["get", "patch"] + - apiGroups: ["apps"] + resources: ["daemonsets"] + verbs: ["get", "list", "watch"] + - apiGroups: ["batch"] + resources: ["jobs"] + verbs: ["get", "list", "watch", "create", "delete"] + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: ["configmaps"] + verbs: ["get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "retool.agentSandbox.controller.name" . }} + labels: + {{- include "retool.agentSandbox.controller.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ include "retool.agentSandbox.controller.name" . }} +subjects: + - kind: ServiceAccount + name: {{ include "retool.agentSandbox.controller.name" . }} + namespace: {{ .Release.Namespace }} +--- +{{- /* +======================================================================= + Job Template ConfigMap — defines the K8s Job spec the controller uses + to create sandbox pods. +======================================================================= +*/}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "retool.agentSandbox.name" . }}-job-template + labels: + {{- include "retool.agentSandbox.controller.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +data: + job-template.json: | + { + "apiVersion": "batch/v1", + "kind": "Job", + "metadata": { + "labels": { + "retoolService": "{{ include "retool.agentSandbox.name" . }}", + "app.kubernetes.io/name": "{{ include "retool.agentSandbox.name" . }}" + } + }, + "spec": { + "backoffLimit": 0, + "ttlSecondsAfterFinished": {{ $as.controller.scaling.jobRetentionSeconds }}, + "template": { + "metadata": { + "annotations": { + "karpenter.sh/do-not-disrupt": "true" + }, + "labels": { + "retoolService": "{{ include "retool.agentSandbox.name" . }}", + "app.kubernetes.io/name": "{{ include "retool.agentSandbox.name" . }}" + } + }, + "spec": { + "restartPolicy": "Never", + "subdomain": "{{ include "retool.agentSandbox.name" . }}-pods", + "automountServiceAccountToken": false, + {{- if $nodeSelector }} + "nodeSelector": {{ toJson $nodeSelector }}, + {{- end }} + {{- if $tolerations }} + "tolerations": {{ toJson $tolerations }}, + {{- end }} + "initContainers": [ + { + "name": "rootfs-etc-copy", + "image": "{{ $as.image.repository }}:__IMAGE_TAG__", + "command": ["/bin/sh", "-c", "cp -r /opt/sandbox-env/rootfs/etc/. /mnt/etc/"], + "securityContext": { + "runAsUser": 0, + "allowPrivilegeEscalation": false, + "readOnlyRootFilesystem": true, + "capabilities": {"drop": ["ALL"], "add": ["DAC_READ_SEARCH"]} + }, + "volumeMounts": [ + {"name": "rootfs-etc", "mountPath": "/mnt/etc"} + ], + "resources": { + "requests": {"cpu": "10m", "memory": "16Mi"}, + "limits": {"cpu": "100m", "memory": "32Mi"} + } + } + ], + "containers": [ + { + "name": "agent-sandbox", + "image": "{{ $as.image.repository }}:__IMAGE_TAG__", + "ports": [{"containerPort": {{ $as.sandbox.port }}, "protocol": "TCP"}], + "securityContext": { + "runAsUser": 1001, + "runAsGroup": 1001, + "allowPrivilegeEscalation": false, + "readOnlyRootFilesystem": true, + "capabilities": {"drop": ["ALL"]}, + "seccompProfile": {"type": "Localhost", "localhostProfile": "{{ $as.seccompProfile }}"} + }, + "env": [ + {"name": "NODE_ENV", "value": "production"}, + {"name": "EXECUTOR_PORT", "value": "{{ $as.sandbox.port }}"}, + {"name": "POD_NAME", "valueFrom": {"fieldRef": {"fieldPath": "metadata.name"}}}, + {"name": "POD_UID", "valueFrom": {"fieldRef": {"fieldPath": "metadata.uid"}}}, + {"name": "POD_IP", "valueFrom": {"fieldRef": {"fieldPath": "status.podIP"}}}, + {"name": "SANDBOX_NETWORK_ENABLED", "value": "{{ $as.sandboxNetwork.enabled }}"}, + {"name": "SANDBOX_IDLE_TIMEOUT_MS", "value": "{{ $as.sandbox.sandboxIdleTimeoutMs }}"}, + {"name": "SANDBOX_MEMORY_LIMIT", "value": "{{ $as.sandbox.sandboxMemoryLimit }}"} + {{- if or $as.jwtPublicKey $as.externalSecret.name }} + ,{"name": "AGENT_EXECUTOR_JWT_PUBLIC_KEY", "valueFrom": {"secretKeyRef": {"name": "{{ $secretName }}", "key": "jwt-public-key"}}} + {{- end }} + {{- if $as.proxy.backendDomainSuffixes }} + ,{"name": "BACKEND_DOMAIN_SUFFIXES", "value": "{{ $as.proxy.backendDomainSuffixes }}"} + {{- end }} + {{- if $as.sandboxNetwork.enabled }} + ,{"name": "SANDBOX_HTTP_PROXY", "value": "{{ $as.sandboxNetwork.httpProxy | default (printf "http://%s:%s" (include "retool.agentSandbox.proxy.name" .) (toString $as.proxy.port)) }}"} + {{- end }} + {{- if $as.snapshotStorage.s3Bucket }} + ,{"name": "S3_BUCKET", "value": "{{ $as.snapshotStorage.s3Bucket }}"} + ,{"name": "S3_ENDPOINT", "value": "{{ $as.snapshotStorage.s3Endpoint }}"} + ,{"name": "S3_REGION", "value": "{{ $as.snapshotStorage.s3Region }}"} + ,{"name": "AWS_ACCESS_KEY_ID", "valueFrom": {"secretKeyRef": {"name": "{{ $as.snapshotStorage.credentialsSecretName | default $secretName }}", "key": "awsAccessKeyId"}}} + ,{"name": "AWS_SECRET_ACCESS_KEY", "valueFrom": {"secretKeyRef": {"name": "{{ $as.snapshotStorage.credentialsSecretName | default $secretName }}", "key": "awsSecretAccessKey"}}} + {{- end }} + {{- range $as.sandbox.extraEnv }} + ,{{ toJson . }} + {{- end }} + ], + "volumeMounts": [ + {{- if and $as.sandboxNetwork.enabled (not $as.sandboxNetwork.devicePlugin) }} + {"name": "dev-tun", "mountPath": "/dev/net/tun"}, + {{- end }} + {"name": "run", "mountPath": "/run"}, + {"name": "tmp", "mountPath": "/tmp"}, + {"name": "rootfs-appjob", "mountPath": "/opt/sandbox-env/rootfs/app/job"}, + {"name": "rootfs-etc", "mountPath": "/opt/sandbox-env/rootfs/etc"} + ], + {{- $res := deepCopy $as.sandbox.resources }} + {{- if $as.sandboxNetwork.devicePlugin }} + {{- $_ := set $res.limits "smarter-devices/net_tun" 1 }} + {{- end }} + "resources": {{ toJson $res }} + } + ], + "volumes": [ + {{- if and $as.sandboxNetwork.enabled (not $as.sandboxNetwork.devicePlugin) }} + {"name": "dev-tun", "hostPath": {"path": "/dev/net/tun", "type": "CharDevice"}}, + {{- end }} + {"name": "run", "emptyDir": {"medium": "Memory", "sizeLimit": "64Mi"}}, + {"name": "tmp", "emptyDir": {"sizeLimit": "{{ $as.sandbox.tmpDirSizeLimit | default "20Gi" }}"}}, + {"name": "rootfs-appjob", "emptyDir": {"sizeLimit": "{{ $as.sandbox.rootfsSizeLimit | default "2Gi" }}"}}, + {"name": "rootfs-etc", "emptyDir": {"medium": "Memory", "sizeLimit": "4Mi"}} + ] + } + } + } + } +--- +{{- /* +======================================================================= + Controller Deployment +======================================================================= +*/}} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "retool.agentSandbox.controller.name" . }} + labels: + {{- include "retool.agentSandbox.controller.selectorLabels" . | nindent 4 }} + {{- include "retool.agentSandbox.controller.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +{{- if .Values.deployment.labels }} +{{ toYaml .Values.deployment.labels | indent 4 }} +{{- end }} +{{- if .Values.deployment.annotations }} + annotations: +{{ toYaml .Values.deployment.annotations | indent 4 }} +{{- end }} +spec: + replicas: {{ $as.controller.replicaCount }} + selector: + matchLabels: + {{- include "retool.agentSandbox.controller.selectorLabels" . | nindent 6 }} + revisionHistoryLimit: {{ .Values.revisionHistoryLimit }} + template: + metadata: + annotations: +{{- if .Values.podAnnotations }} +{{ toYaml .Values.podAnnotations | indent 8 }} +{{- end }} +{{- if $as.annotations }} +{{ toYaml $as.annotations | indent 8 }} +{{- end }} + labels: + {{- include "retool.agentSandbox.controller.selectorLabels" . | nindent 8 }} + {{- include "retool.agentSandbox.controller.labels" . | nindent 8 }} + {{- include "retool.labels" . | nindent 8 }} +{{- if .Values.podLabels }} +{{ toYaml .Values.podLabels | indent 8 }} +{{- end }} +{{- if $as.labels }} +{{ toYaml $as.labels | indent 8 }} +{{- end }} + spec: + serviceAccountName: {{ include "retool.agentSandbox.controller.name" . }} + automountServiceAccountToken: true + {{- if .Values.priorityClassName }} + priorityClassName: "{{ .Values.priorityClassName }}" + {{- end }} + containers: + - name: controller + image: "{{ $as.image.repository }}:{{ $as.image.tag }}" + imagePullPolicy: {{ $as.image.pullPolicy }} + ports: + - name: http + containerPort: {{ $as.controller.port }} + protocol: TCP + securityContext: + runAsUser: 1001 + runAsGroup: 1001 + runAsNonRoot: true + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: ["ALL"] + env: + - name: NODE_ENV + value: "production" + - name: AGENT_EXECUTOR_ROLE + value: "controller" + - name: CONTROLLER_PORT + value: {{ $as.controller.port | quote }} + - name: STATE_BACKEND + value: "postgres" + - name: AGENT_EXECUTOR_POSTGRES_URL + valueFrom: + secretKeyRef: + name: {{ $secretName }} + key: postgres-url + - name: AGENT_EXECUTOR_POSTGRES_SCHEMA + value: {{ $as.postgres.schema | quote }} + - name: AGENT_EXECUTOR_POSTGRES_POOL_MAX + value: {{ $as.postgres.poolMax | quote }} + - name: STATE_SWEEPER_INTERVAL_MS + value: {{ $as.postgres.sweeperIntervalMs | quote }} + - name: K8S_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: JOB_NAME_PREFIX + value: {{ include "retool.agentSandbox.name" . }}-job + - name: JOB_APP_LABEL + value: {{ include "retool.agentSandbox.name" . }} + - name: EXECUTOR_DEPLOYMENT_NAME + value: {{ include "retool.agentSandbox.name" . }} + - name: EXECUTOR_SERVICE_NAME + value: {{ include "retool.agentSandbox.name" . }}-pods + - name: SLOTS_PER_POD + value: {{ $as.controller.scaling.slotsPerPod | quote }} + - name: EXECUTOR_MIN_REPLICAS + value: {{ $as.controller.scaling.minReplicas | quote }} + - name: EXECUTOR_MAX_REPLICAS + value: {{ $as.controller.scaling.maxReplicas | quote }} + - name: SCALE_UP_THRESHOLD + value: {{ $as.controller.scaling.scaleUpThreshold | quote }} + - name: SCALE_DOWN_THRESHOLD + value: {{ $as.controller.scaling.scaleDownThreshold | quote }} + - name: SCALE_DOWN_GRACE_PERIOD_MS + value: {{ $as.controller.scaling.scaleDownGracePeriodMs | quote }} + - name: PREWARM_POOL_SIZE + value: {{ $as.controller.scaling.prewarmPoolSize | quote }} + - name: MAX_TOTAL_JOBS + value: {{ $as.controller.scaling.maxTotalJobs | quote }} + - name: MAX_CONCURRENT_CREATES + value: {{ $as.controller.scaling.maxConcurrentCreates | quote }} + - name: JOB_RETENTION_SECONDS + value: {{ $as.controller.scaling.jobRetentionSeconds | quote }} + - name: ASSIGNED_SANDBOX_TTL_SECONDS + value: {{ $as.controller.scaling.assignedSandboxTtlSeconds | quote }} + - name: RECONCILE_INTERVAL_MS + value: {{ $as.controller.scaling.reconcileIntervalMs | quote }} + - name: LEADER_TTL_MS + value: {{ $as.controller.scaling.leaderTtlMs | quote }} + - name: LEADER_RENEW_MS + value: {{ $as.controller.scaling.leaderRenewMs | quote }} + - name: DEPLOYED_IMAGE_TAG + value: {{ $as.image.tag | quote }} + - name: JOB_TEMPLATE_CONFIGMAP + value: {{ include "retool.agentSandbox.name" . }}-job-template + - name: DAEMONSET_NAME + value: {{ include "retool.agentSandbox.name" . }}-image-prepuller + {{- if or $as.jwtPublicKey $as.externalSecret.name }} + - name: AGENT_EXECUTOR_JWT_PUBLIC_KEY + valueFrom: + secretKeyRef: + name: {{ $secretName }} + key: jwt-public-key + {{- end }} + livenessProbe: + httpGet: + path: /livez + port: http + initialDelaySeconds: 3 + periodSeconds: 10 + timeoutSeconds: 3 + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 1 + periodSeconds: 2 + timeoutSeconds: 3 + resources: + {{- toYaml $as.controller.resources | nindent 12 }} +{{- if .Values.image.pullSecrets }} + imagePullSecrets: +{{ toYaml .Values.image.pullSecrets | indent 8 }} +{{- end }} +{{- if $as.affinity }} + affinity: +{{ toYaml $as.affinity | indent 8 }} +{{- end }} +{{- if $nodeSelector }} + nodeSelector: +{{ toYaml $nodeSelector | indent 8 }} +{{- end }} + tolerations: +{{ toYaml $tolerations | indent 8 }} +--- +{{- /* +======================================================================= + Controller Service (ClusterIP) +======================================================================= +*/}} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "retool.agentSandbox.controller.name" . }} + labels: + {{- include "retool.agentSandbox.controller.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +spec: + type: ClusterIP + ports: + - port: {{ $as.controller.port }} + targetPort: http + protocol: TCP + name: http + selector: + {{- include "retool.agentSandbox.controller.selectorLabels" . | nindent 4 }} +--- +{{- /* +======================================================================= + Proxy Deployment +======================================================================= +*/}} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "retool.agentSandbox.proxy.name" . }} + labels: + {{- include "retool.agentSandbox.proxy.selectorLabels" . | nindent 4 }} + {{- include "retool.agentSandbox.proxy.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +{{- if .Values.deployment.labels }} +{{ toYaml .Values.deployment.labels | indent 4 }} +{{- end }} +{{- if .Values.deployment.annotations }} + annotations: +{{ toYaml .Values.deployment.annotations | indent 4 }} +{{- end }} +spec: + replicas: {{ $as.proxy.replicaCount }} + selector: + matchLabels: + {{- include "retool.agentSandbox.proxy.selectorLabels" . | nindent 6 }} + revisionHistoryLimit: {{ .Values.revisionHistoryLimit }} + template: + metadata: + annotations: +{{- if .Values.podAnnotations }} +{{ toYaml .Values.podAnnotations | indent 8 }} +{{- end }} +{{- if $as.annotations }} +{{ toYaml $as.annotations | indent 8 }} +{{- end }} + labels: + {{- include "retool.agentSandbox.proxy.selectorLabels" . | nindent 8 }} + {{- include "retool.agentSandbox.proxy.labels" . | nindent 8 }} + {{- include "retool.labels" . | nindent 8 }} +{{- if .Values.podLabels }} +{{ toYaml .Values.podLabels | indent 8 }} +{{- end }} +{{- if $as.labels }} +{{ toYaml $as.labels | indent 8 }} +{{- end }} + spec: + automountServiceAccountToken: false + {{- if .Values.priorityClassName }} + priorityClassName: "{{ .Values.priorityClassName }}" + {{- end }} + containers: + - name: proxy + image: "{{ $as.image.repository }}:{{ $as.image.tag }}" + imagePullPolicy: {{ $as.image.pullPolicy }} + ports: + - name: http + containerPort: {{ $as.proxy.port }} + protocol: TCP + securityContext: + runAsUser: 1001 + runAsGroup: 1001 + runAsNonRoot: true + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: ["ALL"] + env: + - name: NODE_ENV + value: "production" + - name: AGENT_EXECUTOR_ROLE + value: "proxy" + - name: PROXY_PORT + value: {{ $as.proxy.port | quote }} + - name: STATE_BACKEND + value: "postgres" + - name: AGENT_EXECUTOR_POSTGRES_URL + valueFrom: + secretKeyRef: + name: {{ $secretName }} + key: postgres-url + - name: AGENT_EXECUTOR_POSTGRES_SCHEMA + value: {{ $as.postgres.schema | quote }} + - name: AGENT_EXECUTOR_POSTGRES_POOL_MAX + value: {{ $as.postgres.poolMax | quote }} + - name: STATE_SWEEPER_INTERVAL_MS + value: {{ $as.postgres.sweeperIntervalMs | quote }} + {{- if $as.proxy.allowedDomains }} + - name: ALLOWED_DOMAINS + value: {{ $as.proxy.allowedDomains | quote }} + {{- end }} + - name: BACKEND_URL + value: {{ $as.proxy.backendUrl | default (printf "http://%s:%s" (include "retool.fullname" .) (toString .Values.service.internalPort)) | quote }} + {{- if $as.proxy.backendDomainSuffixes }} + - name: BACKEND_DOMAIN_SUFFIXES + value: {{ $as.proxy.backendDomainSuffixes | quote }} + {{- end }} + {{- if or $as.encryptionKey $as.externalSecret.name }} + - name: AGENT_EXECUTOR_ENCRYPTION_KEY + valueFrom: + secretKeyRef: + name: {{ $secretName }} + key: encryption-key + {{- end }} + {{- if or $as.jwtPublicKey $as.externalSecret.name }} + - name: AGENT_EXECUTOR_JWT_PUBLIC_KEY + valueFrom: + secretKeyRef: + name: {{ $secretName }} + key: jwt-public-key + {{- end }} + - name: EXECUTOR_PORT + value: {{ $as.sandbox.port | quote }} + - name: EXECUTOR_SERVICE_NAME + value: {{ include "retool.agentSandbox.name" . }}-pods + - name: K8S_NAMESPACE + value: {{ .Release.Namespace | quote }} + {{- if $as.proxy.sandboxProxyTimeoutMs }} + - name: SANDBOX_PROXY_TIMEOUT_MS + value: {{ $as.proxy.sandboxProxyTimeoutMs | quote }} + {{- end }} + livenessProbe: + httpGet: + path: /livez + port: http + initialDelaySeconds: 3 + periodSeconds: 10 + timeoutSeconds: 3 + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 1 + periodSeconds: 2 + timeoutSeconds: 3 + resources: + {{- toYaml $as.proxy.resources | nindent 12 }} +{{- if .Values.image.pullSecrets }} + imagePullSecrets: +{{ toYaml .Values.image.pullSecrets | indent 8 }} +{{- end }} +{{- if $as.affinity }} + affinity: +{{ toYaml $as.affinity | indent 8 }} +{{- end }} +{{- if $nodeSelector }} + nodeSelector: +{{ toYaml $nodeSelector | indent 8 }} +{{- end }} + tolerations: +{{ toYaml $tolerations | indent 8 }} +--- +{{- /* +======================================================================= + Proxy Service +======================================================================= +*/}} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "retool.agentSandbox.proxy.name" . }} + labels: + {{- include "retool.agentSandbox.proxy.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +{{- with ($as.proxy.service).annotations }} + annotations: + {{- toYaml . | nindent 4 }} +{{- end }} +spec: + type: {{ ($as.proxy.service).type | default "ClusterIP" }} + ports: + - port: {{ $as.proxy.port }} + targetPort: http + protocol: TCP + name: http + selector: + {{- include "retool.agentSandbox.proxy.selectorLabels" . | nindent 4 }} +--- +{{- /* +======================================================================= + Proxy Ingress (optional — exposes proxy to frontend for WebSocket) +======================================================================= +*/}} +{{- if ($as.proxy.ingress).enabled }} +{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.Version }} +apiVersion: networking.k8s.io/v1 +{{- else }} +apiVersion: networking.k8s.io/v1beta1 +{{- end }} +kind: Ingress +metadata: + name: {{ include "retool.agentSandbox.proxy.name" . }} + labels: + {{- include "retool.agentSandbox.proxy.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +{{- with $as.proxy.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} +{{- end }} +spec: + {{- if and $as.proxy.ingress.ingressClassName (semverCompare ">=1.18-0" .Capabilities.KubeVersion.Version) }} + ingressClassName: {{ $as.proxy.ingress.ingressClassName }} + {{- end }} + rules: + - host: {{ $as.proxy.ingress.host | quote }} + http: + paths: + - path: / + {{- if semverCompare ">=1.18-0" .Capabilities.KubeVersion.Version }} + pathType: Prefix + {{- end }} + backend: + {{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.Version }} + service: + name: {{ include "retool.agentSandbox.proxy.name" . }} + port: + number: {{ $as.proxy.port }} + {{- else }} + serviceName: {{ include "retool.agentSandbox.proxy.name" . }} + servicePort: {{ $as.proxy.port }} + {{- end }} +{{- with $as.proxy.ingress.tls }} + tls: + {{- toYaml . | nindent 4 }} +{{- end }} +--- +{{- end }} +{{- /* +======================================================================= + Headless Service for direct pod addressing (sandbox routing). + Executor Job pods use subdomain to register DNS: + ...svc.cluster.local: +======================================================================= +*/}} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "retool.agentSandbox.name" . }}-pods + labels: + {{- include "retool.agentSandbox.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +spec: + clusterIP: None + ports: + - port: {{ $as.sandbox.port }} + targetPort: {{ $as.sandbox.port }} + protocol: TCP + name: http + selector: + {{- include "retool.agentSandbox.selectorLabels" . | nindent 4 }} +--- +{{- /* +======================================================================= + PodDisruptionBudget for controller (when replicas > 1) +======================================================================= +*/}} +{{- if gt (int $as.controller.replicaCount) 1 }} +{{- if semverCompare ">=1.21-0" .Capabilities.KubeVersion.Version -}} +apiVersion: policy/v1 +{{- else -}} +apiVersion: policy/v1beta1 +{{- end }} +kind: PodDisruptionBudget +metadata: + name: {{ include "retool.agentSandbox.controller.name" . }} + labels: + {{- include "retool.agentSandbox.controller.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +spec: + maxUnavailable: 1 + selector: + matchLabels: + {{- include "retool.agentSandbox.controller.selectorLabels" . | nindent 6 }} +--- +{{- end }} +{{- if .Values.podDisruptionBudget }} +{{- if semverCompare ">=1.21-0" .Capabilities.KubeVersion.Version -}} +apiVersion: policy/v1 +{{- else -}} +apiVersion: policy/v1beta1 +{{- end }} +kind: PodDisruptionBudget +metadata: + name: {{ include "retool.agentSandbox.proxy.name" . }} + labels: + {{- include "retool.agentSandbox.proxy.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +spec: + {{- toYaml .Values.podDisruptionBudget | nindent 2 }} + selector: + matchLabels: + {{- include "retool.agentSandbox.proxy.selectorLabels" . | nindent 6 }} +{{- end }} +{{- end }} diff --git a/charts/retool/templates/deployment_backend.yaml b/charts/retool/templates/deployment_backend.yaml index 57206e55..d938d733 100644 --- a/charts/retool/templates/deployment_backend.yaml +++ b/charts/retool/templates/deployment_backend.yaml @@ -161,6 +161,7 @@ spec: - name: CODE_EXECUTOR_INGRESS_DOMAIN value: http://{{ template "retool.codeExecutor.name" . }} {{- end }} + {{- include "retool.agentSandbox.backendEnvVars" . | nindent 10 }} {{- if ($temporalConfig).sslEnabled }} - name: WORKFLOW_TEMPORAL_TLS_ENABLED value: "true" diff --git a/charts/retool/templates/deployment_code_executor.yaml b/charts/retool/templates/deployment_code_executor.yaml index b5ff877d..7b080207 100644 --- a/charts/retool/templates/deployment_code_executor.yaml +++ b/charts/retool/templates/deployment_code_executor.yaml @@ -61,6 +61,9 @@ spec: {{ else }} privileged: true {{ end }} + {{- if .Values.securityContext.extraContainerSecurityContext }} +{{ toYaml .Values.securityContext.extraContainerSecurityContext | indent 10 }} + {{- end }} env: - name: DEPLOYMENT_TEMPLATE_TYPE value: {{ template "retool.deploymentTemplateType" . }} @@ -115,11 +118,30 @@ spec: volumeMounts: {{- if .Values.codeExecutor.volumeMounts }} {{ toYaml .Values.codeExecutor.volumeMounts | indent 10 }} +{{- end }} +{{- if .Values.extraVolumeMounts }} +{{ toYaml .Values.extraVolumeMounts | indent 10 }} +{{- end }} +{{- range .Values.extraConfigMapMounts }} + - name: {{ .name }} + mountPath: {{ .mountPath }} + subPath: {{ .subPath }} +{{- end }} +{{- with .Values.extraContainers }} +{{ tpl . $ | indent 6 }} {{- end }} volumes: {{- if .Values.codeExecutor.volumes }} {{ toYaml .Values.codeExecutor.volumes | indent 8 }} {{- end }} +{{- range .Values.extraConfigMapMounts }} + - name: {{ .name }} + configMap: + name: {{ .configMap }} +{{- end }} +{{- if .Values.extraVolumes }} +{{ toYaml .Values.extraVolumes | indent 8 }} +{{- end }} {{- if .Values.image.pullSecrets }} imagePullSecrets: {{ toYaml .Values.image.pullSecrets | indent 8 }} diff --git a/charts/retool/templates/deployment_workflows.yaml b/charts/retool/templates/deployment_workflows.yaml index a03f741b..29e781b6 100644 --- a/charts/retool/templates/deployment_workflows.yaml +++ b/charts/retool/templates/deployment_workflows.yaml @@ -176,6 +176,7 @@ spec: value: http://{{ include "retool.workflowBackend.name" . }} - name: CODE_EXECUTOR_INGRESS_DOMAIN value: http://{{ template "retool.codeExecutor.name" . }} + {{- include "retool.agentSandbox.backendEnvVars" . | nindent 10 }} {{- if include "shouldIncludeConfigSecretsEnvVars" . }} - name: LICENSE_KEY valueFrom: diff --git a/charts/retool/values.yaml b/charts/retool/values.yaml index 1c925d2b..a28f72c6 100644 --- a/charts/retool/values.yaml +++ b/charts/retool/values.yaml @@ -643,6 +643,255 @@ agents: # Annotations for agent worker pods annotations: {} +# R2 Agent: server-side agent loop worker (independent from agents above). +r2Agent: + enabled: false + + # Labels for R2 agent worker pods + labels: {} + + # R2 agent configuration + config: {} + + # Annotations for R2 agent worker pods + annotations: {} + + # R2 agent worker configuration + worker: + replicaCount: 1 + + resources: + limits: + cpu: 2000m + memory: 4096Mi + requests: + cpu: 1000m + memory: 2048Mi + +# Agent Sandbox Service: sandboxed code execution for AI agents. +# Deploys a controller (manages sandbox lifecycle), proxy (HTTP proxy for sandbox egress), +# and ephemeral Job-based sandboxes. Uses Postgres for controller/proxy state. +agentSandbox: + enabled: false + + image: + repository: tryretool/agent-sandbox-service + tag: latest + pullPolicy: IfNotPresent + + # Lightweight init image used by the prepuller and seccomp DaemonSets. + # Pinning by digest is recommended for production. + initImage: + repository: busybox + tag: '1.37.0' + # Manifest list digest — set to '' in test environments where images are + # pre-loaded (containerd 2.0 can't resolve digest references for side-loaded images). + digest: '' + + # Annotations for agent sandbox pods + annotations: {} + + # Labels for agent sandbox pods + labels: {} + + # Pre-existing K8s Secret. When set, the chart skips creating its own Secret + # and references this for keys: jwt-public-key, jwt-private-key, encryption-key, + # api-secret, postgres-url. + externalSecret: + name: '' + + # Secrets (ignored when externalSecret.name is set) + # JWT key pair (ES256) for sandbox token authentication. + jwtPublicKey: '' + jwtPrivateKey: '' + # Hex-encoded 256-bit key for encrypting credentials stored in state backend. + # Must match the backend's AGENT_EXECUTOR_ENCRYPTION_KEY. + encryptionKey: '' + # API secret for admin/test endpoints. + apiSecret: '' + + # Postgres state backend (shared by controller and proxy for state coordination). + # Connection string for the agent sandbox's state database. + postgres: + url: '' + schema: 'agent_executor' + poolMax: 10 + sweeperIntervalMs: 60000 + + # Sandbox network access via pasta userspace networking. + # When enabled, sandboxes get isolated outbound access with L7 filtering. + sandboxNetwork: + enabled: true + # Deploy smarter-device-manager to register /dev/net/tun with the kubelet. + # Required because containerd's default device cgroup blocks /dev/net/tun; + # the device plugin's DeviceSpec path is the only reliable way to grant + # device cgroup access without privileged mode. + devicePlugin: true + # HTTP proxy for sandbox egress L7 filtering. Defaults to the in-cluster + # agent-sandbox-proxy service URL when empty. + httpProxy: '' + + # smarter-device-manager: registers /dev/net/tun with the kubelet so sandbox + # pods can request it via resources.limits. + devicePlugin: + image: + repository: ghcr.io/smarter-project/smarter-device-manager + tag: v1.20.12 + # Number of /dev/net/tun device slots to register. + # Set high enough to accommodate maxTotalJobs + prewarm pool. + maxDevices: 130 + + # Seccomp profile path relative to /var/lib/kubelet/seccomp/. + # The seccomp node-installer DaemonSet copies the profile to this path + # on every node automatically. + seccompProfile: retool/gvisor-seccomp.json + + # S3-compatible snapshot storage. + # When s3Bucket is set, snapshots are persisted to S3 and survive pod restarts. + snapshotStorage: + s3Bucket: '' + s3Endpoint: '' + s3Region: 'us-east-1' + # Name of a K8s Secret containing keys awsAccessKeyId and awsSecretAccessKey. + # If empty, falls back to the main agent sandbox secret. + credentialsSecretName: '' + + # Sandbox (Job) configuration + sandbox: + port: 3017 + resources: + requests: + cpu: 500m + memory: 512Mi + limits: + cpu: '2' + memory: 4Gi + # Per-sandbox cgroup memory.max limit in bytes. + sandboxMemoryLimit: '1610612736' # 1.5 GB + # Idle timeout (ms) before an unassigned sandbox self-terminates. + sandboxIdleTimeoutMs: 300000 + tmpDirSizeLimit: 20Gi + # Separate limit for the rootfs-appjob volume — the sandbox root filesystem + # is a static ~600MB extraction, so 2Gi provides headroom without the 20Gi + # allocated for /tmp. + rootfsSizeLimit: 2Gi + # Additional environment variables for sandbox containers. + extraEnv: [] + + # Controller: tracks capacity, assigns sandbox pods, manages scaling + controller: + replicaCount: 1 + port: 3018 + resources: + requests: + cpu: 250m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + scaling: + slotsPerPod: 4 + minReplicas: 1 + maxReplicas: 10 + scaleUpThreshold: 2 + scaleDownThreshold: 8 + scaleDownGracePeriodMs: 300000 + prewarmPoolSize: 5 + maxTotalJobs: 50 + maxConcurrentCreates: 3 + jobRetentionSeconds: 300 + assignedSandboxTtlSeconds: 3600 + reconcileIntervalMs: 5000 + leaderTtlMs: 10000 + leaderRenewMs: 3000 + + # Proxy: HTTP proxy for sandbox egress with credential injection. + # The proxy must be reachable by frontend browsers for WebSocket connections. + proxy: + replicaCount: 1 + port: 3019 + resources: + requests: + cpu: 250m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + allowedDomains: '' + # URL the proxy uses to reach the Retool backend for token exchange. + # Defaults to http://:3000 (same-cluster backend service). + backendUrl: '' + backendDomainSuffixes: '' + sandboxProxyTimeoutMs: '' + service: + # Set to LoadBalancer or NodePort to expose the proxy externally. + type: ClusterIP + annotations: {} + # Optional ingress to expose the proxy to frontend browsers for WebSocket connections. + # This is separate from the main Retool ingress since the proxy typically runs on its own domain. + ingress: + enabled: false + # ingressClassName: + annotations: {} + # kubernetes.io/ingress.class: nginx + # nginx.ingress.kubernetes.io/proxy-read-timeout: "3600" + # nginx.ingress.kubernetes.io/proxy-send-timeout: "3600" + host: '' + # e.g. sandbox.yourdomain.com + tls: [] + # - secretName: sandbox-tls + # hosts: + # - sandbox.yourdomain.com + + # Backend integration: these tell the Retool backend how to reach agent executor. + # controllerUrl and proxyUrl default to internal service URLs when empty. + controllerUrl: '' + proxyUrl: '' + # Required: public URL for frontend browsers to reach the proxy via WebSocket. + # e.g. https://sandbox.yourdomain.com + frontendWsProxyDomain: '' + # Public URL for proxy domain. Defaults to frontendWsProxyDomain if empty. + proxyDomain: '' + + # NetworkPolicy: restrict sandbox, controller, and proxy pod traffic. + # Strongly recommended for production to isolate sandbox egress. + networkPolicy: + enabled: false + # CIDR ranges to block in proxy egress rules. Must stay in sync with + # DEFAULT_BLOCKED_CIDRS in the agent-executor source. + blockedRanges: + - 169.254.0.0/16 # link-local / cloud metadata + - 10.0.0.0/8 # private (RFC 1918) + - 172.16.0.0/12 # private (RFC 1918) + - 192.168.0.0/16 # private (RFC 1918) + - 100.64.0.0/10 # carrier-grade NAT (RFC 6598) + - 127.0.0.0/8 # loopback + - 0.0.0.0/8 # "this network" (RFC 791) + blockedRanges6: + - fc00::/7 # IPv6 unique local addresses + - fe80::/10 # IPv6 link-local + - '::1/128' # IPv6 loopback + # Restrict DNS egress to pods matching this selector (typically kube-dns/coredns). + # Set to empty to allow DNS to any destination (not recommended). + dnsSelector: + namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: kube-system + podSelector: + matchLabels: + k8s-app: kube-dns + extraEgress: [] + # CIDRs allowed in proxy egress even if they fall within blockedRanges + # (e.g. private backend endpoint). + backendAllowlist: [] + # Override sandbox ingress source selector (defaults to retool backend pods). + ingressFrom: [] + + # Node placement overrides (falls back to global nodeSelector/tolerations if empty) + nodeSelector: {} + tolerations: [] + affinity: {} + # SHARED TEMPORAL CONFIGURATION # This configuration is shared between all workers. # In order to use workers, temporal must be configured. diff --git a/values.yaml b/values.yaml index 1c925d2b..a28f72c6 100644 --- a/values.yaml +++ b/values.yaml @@ -643,6 +643,255 @@ agents: # Annotations for agent worker pods annotations: {} +# R2 Agent: server-side agent loop worker (independent from agents above). +r2Agent: + enabled: false + + # Labels for R2 agent worker pods + labels: {} + + # R2 agent configuration + config: {} + + # Annotations for R2 agent worker pods + annotations: {} + + # R2 agent worker configuration + worker: + replicaCount: 1 + + resources: + limits: + cpu: 2000m + memory: 4096Mi + requests: + cpu: 1000m + memory: 2048Mi + +# Agent Sandbox Service: sandboxed code execution for AI agents. +# Deploys a controller (manages sandbox lifecycle), proxy (HTTP proxy for sandbox egress), +# and ephemeral Job-based sandboxes. Uses Postgres for controller/proxy state. +agentSandbox: + enabled: false + + image: + repository: tryretool/agent-sandbox-service + tag: latest + pullPolicy: IfNotPresent + + # Lightweight init image used by the prepuller and seccomp DaemonSets. + # Pinning by digest is recommended for production. + initImage: + repository: busybox + tag: '1.37.0' + # Manifest list digest — set to '' in test environments where images are + # pre-loaded (containerd 2.0 can't resolve digest references for side-loaded images). + digest: '' + + # Annotations for agent sandbox pods + annotations: {} + + # Labels for agent sandbox pods + labels: {} + + # Pre-existing K8s Secret. When set, the chart skips creating its own Secret + # and references this for keys: jwt-public-key, jwt-private-key, encryption-key, + # api-secret, postgres-url. + externalSecret: + name: '' + + # Secrets (ignored when externalSecret.name is set) + # JWT key pair (ES256) for sandbox token authentication. + jwtPublicKey: '' + jwtPrivateKey: '' + # Hex-encoded 256-bit key for encrypting credentials stored in state backend. + # Must match the backend's AGENT_EXECUTOR_ENCRYPTION_KEY. + encryptionKey: '' + # API secret for admin/test endpoints. + apiSecret: '' + + # Postgres state backend (shared by controller and proxy for state coordination). + # Connection string for the agent sandbox's state database. + postgres: + url: '' + schema: 'agent_executor' + poolMax: 10 + sweeperIntervalMs: 60000 + + # Sandbox network access via pasta userspace networking. + # When enabled, sandboxes get isolated outbound access with L7 filtering. + sandboxNetwork: + enabled: true + # Deploy smarter-device-manager to register /dev/net/tun with the kubelet. + # Required because containerd's default device cgroup blocks /dev/net/tun; + # the device plugin's DeviceSpec path is the only reliable way to grant + # device cgroup access without privileged mode. + devicePlugin: true + # HTTP proxy for sandbox egress L7 filtering. Defaults to the in-cluster + # agent-sandbox-proxy service URL when empty. + httpProxy: '' + + # smarter-device-manager: registers /dev/net/tun with the kubelet so sandbox + # pods can request it via resources.limits. + devicePlugin: + image: + repository: ghcr.io/smarter-project/smarter-device-manager + tag: v1.20.12 + # Number of /dev/net/tun device slots to register. + # Set high enough to accommodate maxTotalJobs + prewarm pool. + maxDevices: 130 + + # Seccomp profile path relative to /var/lib/kubelet/seccomp/. + # The seccomp node-installer DaemonSet copies the profile to this path + # on every node automatically. + seccompProfile: retool/gvisor-seccomp.json + + # S3-compatible snapshot storage. + # When s3Bucket is set, snapshots are persisted to S3 and survive pod restarts. + snapshotStorage: + s3Bucket: '' + s3Endpoint: '' + s3Region: 'us-east-1' + # Name of a K8s Secret containing keys awsAccessKeyId and awsSecretAccessKey. + # If empty, falls back to the main agent sandbox secret. + credentialsSecretName: '' + + # Sandbox (Job) configuration + sandbox: + port: 3017 + resources: + requests: + cpu: 500m + memory: 512Mi + limits: + cpu: '2' + memory: 4Gi + # Per-sandbox cgroup memory.max limit in bytes. + sandboxMemoryLimit: '1610612736' # 1.5 GB + # Idle timeout (ms) before an unassigned sandbox self-terminates. + sandboxIdleTimeoutMs: 300000 + tmpDirSizeLimit: 20Gi + # Separate limit for the rootfs-appjob volume — the sandbox root filesystem + # is a static ~600MB extraction, so 2Gi provides headroom without the 20Gi + # allocated for /tmp. + rootfsSizeLimit: 2Gi + # Additional environment variables for sandbox containers. + extraEnv: [] + + # Controller: tracks capacity, assigns sandbox pods, manages scaling + controller: + replicaCount: 1 + port: 3018 + resources: + requests: + cpu: 250m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + scaling: + slotsPerPod: 4 + minReplicas: 1 + maxReplicas: 10 + scaleUpThreshold: 2 + scaleDownThreshold: 8 + scaleDownGracePeriodMs: 300000 + prewarmPoolSize: 5 + maxTotalJobs: 50 + maxConcurrentCreates: 3 + jobRetentionSeconds: 300 + assignedSandboxTtlSeconds: 3600 + reconcileIntervalMs: 5000 + leaderTtlMs: 10000 + leaderRenewMs: 3000 + + # Proxy: HTTP proxy for sandbox egress with credential injection. + # The proxy must be reachable by frontend browsers for WebSocket connections. + proxy: + replicaCount: 1 + port: 3019 + resources: + requests: + cpu: 250m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + allowedDomains: '' + # URL the proxy uses to reach the Retool backend for token exchange. + # Defaults to http://:3000 (same-cluster backend service). + backendUrl: '' + backendDomainSuffixes: '' + sandboxProxyTimeoutMs: '' + service: + # Set to LoadBalancer or NodePort to expose the proxy externally. + type: ClusterIP + annotations: {} + # Optional ingress to expose the proxy to frontend browsers for WebSocket connections. + # This is separate from the main Retool ingress since the proxy typically runs on its own domain. + ingress: + enabled: false + # ingressClassName: + annotations: {} + # kubernetes.io/ingress.class: nginx + # nginx.ingress.kubernetes.io/proxy-read-timeout: "3600" + # nginx.ingress.kubernetes.io/proxy-send-timeout: "3600" + host: '' + # e.g. sandbox.yourdomain.com + tls: [] + # - secretName: sandbox-tls + # hosts: + # - sandbox.yourdomain.com + + # Backend integration: these tell the Retool backend how to reach agent executor. + # controllerUrl and proxyUrl default to internal service URLs when empty. + controllerUrl: '' + proxyUrl: '' + # Required: public URL for frontend browsers to reach the proxy via WebSocket. + # e.g. https://sandbox.yourdomain.com + frontendWsProxyDomain: '' + # Public URL for proxy domain. Defaults to frontendWsProxyDomain if empty. + proxyDomain: '' + + # NetworkPolicy: restrict sandbox, controller, and proxy pod traffic. + # Strongly recommended for production to isolate sandbox egress. + networkPolicy: + enabled: false + # CIDR ranges to block in proxy egress rules. Must stay in sync with + # DEFAULT_BLOCKED_CIDRS in the agent-executor source. + blockedRanges: + - 169.254.0.0/16 # link-local / cloud metadata + - 10.0.0.0/8 # private (RFC 1918) + - 172.16.0.0/12 # private (RFC 1918) + - 192.168.0.0/16 # private (RFC 1918) + - 100.64.0.0/10 # carrier-grade NAT (RFC 6598) + - 127.0.0.0/8 # loopback + - 0.0.0.0/8 # "this network" (RFC 791) + blockedRanges6: + - fc00::/7 # IPv6 unique local addresses + - fe80::/10 # IPv6 link-local + - '::1/128' # IPv6 loopback + # Restrict DNS egress to pods matching this selector (typically kube-dns/coredns). + # Set to empty to allow DNS to any destination (not recommended). + dnsSelector: + namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: kube-system + podSelector: + matchLabels: + k8s-app: kube-dns + extraEgress: [] + # CIDRs allowed in proxy egress even if they fall within blockedRanges + # (e.g. private backend endpoint). + backendAllowlist: [] + # Override sandbox ingress source selector (defaults to retool backend pods). + ingressFrom: [] + + # Node placement overrides (falls back to global nodeSelector/tolerations if empty) + nodeSelector: {} + tolerations: [] + affinity: {} + # SHARED TEMPORAL CONFIGURATION # This configuration is shared between all workers. # In order to use workers, temporal must be configured.