From 2b94436ff82e9e43466f665a442256b1a6eeb9f8 Mon Sep 17 00:00:00 2001 From: Antonio Paolillo Date: Tue, 3 Feb 2026 09:53:37 +0100 Subject: [PATCH] Add image squashing via multi-stage build Introduce optional image squashing support in DockerBuilder. When enabled, the generated Dockerfile uses a multi-stage build where the final stage starts FROM scratch and copies the full filesystem from the build stage, eliminating layer bloat from intermediate steps. The implementation preserves ARG, ENV, WORKDIR, USER, CMD, and ENTRYPOINT metadata in the final image, with a safe default CMD when none is specified. Signed-off-by: Antonio Paolillo --- src/pythainer/builders/__init__.py | 111 ++++++++++++++++++++++++++++- 1 file changed, 110 insertions(+), 1 deletion(-) diff --git a/src/pythainer/builders/__init__.py b/src/pythainer/builders/__init__.py index c0dd451..b7963fd 100644 --- a/src/pythainer/builders/__init__.py +++ b/src/pythainer/builders/__init__.py @@ -355,6 +355,100 @@ def __init__( self._tag = tag self._package_manager = package_manager self._use_buildkit = use_buildkit + self._squash = False + + def squash(self) -> None: + """ + Enable squashing of the Docker image. + + When enabled, the build will generate a multi-stage Dockerfile where the final + stage starts FROM scratch and copies the entire filesystem from the build stage. + This eliminates layer bloat from intermediate build steps (e.g., files that were + created and then deleted). + + ENV, WORKDIR, and USER instructions are preserved in the final image. + """ + self._squash = True + + def _extract_dockerfile_instructions(self, prefixes: tuple[str, ...]) -> List[str]: + """ + Extract Dockerfile instructions matching given prefixes. + + Parameters: + prefixes: Tuple of instruction prefixes to match (e.g., ("ARG ", "ENV ")). + + Returns: + List of matching instruction strings. + """ + results = [] + for cmd in self._build_commands: + if not isinstance(cmd, StrDockerBuildCommand): + continue + cmd_str = cmd.get_str_for_dockerfile() + if cmd_str.startswith(prefixes): + results.append(cmd_str) + return results + + def _find_last_dockerfile_instruction(self, prefix: str) -> str | None: + """ + Find the last Dockerfile instruction matching a given prefix. + + Parameters: + prefix: Instruction prefix to match (e.g., "WORKDIR "). + + Returns: + The last matching instruction string, or None if not found. + """ + result = None + for cmd in self._build_commands: + if not isinstance(cmd, StrDockerBuildCommand): + continue + cmd_str = cmd.get_str_for_dockerfile() + if cmd_str.startswith(prefix): + result = cmd_str + return result + + def _generate_squash_suffix(self) -> str: + """ + Generate the second stage of a multi-stage Dockerfile for squashing. + + Extracts ARG, ENV, WORKDIR, and USER instructions from the build commands + and creates a stage that copies the filesystem from scratch. + + Returns: + str: Dockerfile content for the squash stage. + + TODO: This implementation relies on string parsing of Dockerfile commands, + which is fragile. Once pythainer has a proper AST representation for + Dockerfile instructions (instead of raw strings), this should be + refactored to use typed command objects directly. + """ + lines = [ + "", + "# Squash stage: copy filesystem and reapply metadata", + "FROM scratch", + "COPY --from=0 / /", + ] + + # Extract ARG instructions (needed for variable references in USER/WORKDIR) + lines.extend(self._extract_dockerfile_instructions(("ARG ",))) + + # Extract ENV instructions + lines.extend(self._extract_dockerfile_instructions(("ENV ",))) + + # Find and append the last WORKDIR, USER, ENTRYPOINT, and CMD + for prefix in ("WORKDIR ", "USER ", "ENTRYPOINT ", "CMD "): + last_instruction = self._find_last_dockerfile_instruction(prefix) + if last_instruction: + lines.append(last_instruction) + + # Default to /bin/bash if no CMD or ENTRYPOINT was specified + has_cmd = self._find_last_dockerfile_instruction("CMD ") + has_entrypoint = self._find_last_dockerfile_instruction("ENTRYPOINT ") + if not has_cmd and not has_entrypoint: + lines.append('CMD ["/bin/bash"]') + + return "\n".join(lines) + "\n" def generate_dockerfile(self, dockerfile_paths: List[PathType]) -> None: """ @@ -364,11 +458,26 @@ def generate_dockerfile(self, dockerfile_paths: List[PathType]) -> None: Parameters: dockerfile_paths (List[PathType]): A list of paths where the Dockerfile should be saved. """ + # For squash mode, add "AS builder" to the FROM line + commands = self._build_commands + if self._squash: + commands = [] + for cmd in self._build_commands: + if isinstance(cmd, StrDockerBuildCommand): + cmd_str = cmd.get_str_for_dockerfile() + if cmd_str.startswith("FROM ") and " AS " not in cmd_str: + commands.append(StrDockerBuildCommand(cmd_str + " AS builder")) + continue + commands.append(cmd) + dockerfile_content = render_dockerfile_content( package_manager=self._package_manager, - commands=self._build_commands, + commands=commands, ) + if self._squash: + dockerfile_content += self._generate_squash_suffix() + all_dockerfile_paths = dockerfile_paths + [ "/tmp/Dockerfile", "/tmp/pythainer/docker/latest/Dockerfile",