Edison-Watch · IliaMManolov · Jun 20, 2025 · Jul 8, 2025 · Jul 10, 2025 · Jul 10, 2025
diff --git a/common/config/global_config.yaml b/common/config/global_config.yaml
@@ -234,6 +234,10 @@ scope:
     force_filter_libs: false
     force_filter_interfaces: false
 
-# Sentry error reporting
+build_agent:
+  enabled: false
+  approved_commands: ["forge", "npm", "solc", "foundryup", "pnpm", "yarn", "git submodule", "nvm"]
+
+# error reporting
 sentry:
-  enabled: true
+  enabled: true
diff --git a/hackbot/src/pocgen/build_agent.py b/hackbot/src/pocgen/build_agent.py
@@ -0,0 +1,116 @@
+from hackbot.src.pocgen.runtime_environment import RuntimeEnvironment, ExecutionResults
+from typing import Sequence
+from hackbot.utils.dspy.tools import enrich_tools, dspy_tool, async_execute_command_tool, async_common_tools
+from common.config import global_config
+
+import dspy
+from hackbot.utils.dspy.dspy_inference import DSPYInference
+
+
+@dspy_tool(runtime_env=RuntimeEnvironment, approved_commands=Sequence[str])
+async def final_check_before_completion(*, runtime_env: RuntimeEnvironment, approved_commands: Sequence[str]) -> str:
+    """Always run this tool before finishing. This tool will tell you whether you are done or not."""
+    print("Trying to finish up...")
+
+    result1 = await async_execute_command_tool(
+        "forge build", runtime_env=runtime_env, approved_commands=approved_commands
+    )
+    if result1.startswith("Error"):
+        return (
+            "Failure: Forge build failed! Please continue getting the repo to build. Here is your output: "
+            + result1
+        )
+
+    print("Forge build passed!")
+
+    return "Success: Forge build passed!"
+
+
+class RunCommands(dspy.Signature):
+    """
+    Run commands in the given directory.
+
+    IMPORTANT: Your response MUST strictly follow the format below. Each field, including the headers, must be on a new line.
+
+    Example:
+    [[ ## next_thought ## ]]
+    I need to see what files are in the current directory.
+    [[ ## next_tool_name ## ]]
+    async_list_files_tool
+    [[ ## next_tool_args ## ]]
+    {"relative_path": "."}
+    """
+
+    high_level_instructions: str = dspy.InputField()  # type: ignore # noqa
+    forge_build_runs: bool = dspy.OutputField()  # type: ignore # noqa
+    forge_test_runs: bool = dspy.OutputField()  # type: ignore # noqa
+    shell_commands_ran: list[str] = dspy.OutputField()  # type: ignore # noqa
+
+
+async def compile_codebase(runtime_env: RuntimeEnvironment) -> ExecutionResults:
+    """Compile the codebase using a dspy-based LLM agent."""
+
+    approved_commands = global_config.build_agent.approved_commands
+
+    await runtime_env.execute_command(["git", "reset", "--hard"])
+    await runtime_env.execute_command(["git", "clean", "-ffdx"])
+
+    # All dspy inference and tool execution must happen within this block
+    with enrich_tools(runtime_env=runtime_env, approved_commands=approved_commands):
+        inf_module = DSPYInference(
+            pred_signature=RunCommands,
+            tools=[
+                *async_common_tools,
+                final_check_before_completion,
+            ],
+            max_iters=50,
+        )
+
+        result = await inf_module.run(
+            high_level_instructions=f"""
+                You are a professional smart contract security engineer.
+                Your current directory is inside of a git repository of a Foundry Forge Solidity project.
+                Your job is to build the project and run the tests.
+                You are allowed to read through files and folders in the project to understand it.
+                You can additionally only run the following commands at the root of the project:
+                {', '.join(approved_commands)}
+                You will not combine commands with && or ||.
+                You will always try to use the non-interactive or json versions of the commands. This is because the output is passed back to you in the same format.
+                Sometimes repos fail to give instructions on how to set up dependencies. In this case you should figure out how to pull them from one of the supplied commands.
+                For example, if you find out that forge-std doesn't exist, you can pull it in via `forge install foundry-rs/forge-std`.
+                Please be mindful of required versions and stick to them.
+                You should run a command with `--help` if you are unsure about its capabilities and don't have any ideas of how to proceed.
+                When supplying any relative path to a tool you will never use `..`. This is forbidden and will cause the tool to fail.
+                You are not allowed to move freely using `cd`.
+                You can additionally modify any file in the project that you have already read.
+                Your goal is to run `forge build` successfully and to run `forge test` (which must complete but may have failing tests so it may not necessarily "succeed").
+                Keep in mind that the `forge soldeer` subcommand exists and is the dependency manager for some projects. You should expect to see a `soldeer.lock` in these cases.
+                You will not give up until these commands run successfully.
+                You will never run any commands that open ports or a shell.
+                You only have one shot at this so make sure to do it right.
+                You should start by finding and opening the readme. Then, you should see what other files and folders are in the project.
+                Your response will always finish with a call to `final_check_before_completion` that returns a success.
+                You will not execute any further commands after you have verified that `forge build` and `forge test` work.
+            """
+        )
+        print(result.shell_commands_ran)
+
+    if not result.forge_build_runs:
+        return ExecutionResults(
+            stdout=",".join(result.shell_commands_ran),
+            stderr="Build agent failed to build the repository.",
+            exit_code=1,
+            cmd_full="build_agent"
+        )
+
+    return ExecutionResults(
+        stdout=",".join(result.shell_commands_ran),
+        stderr="",
+        exit_code=0,
+        cmd_full="build_agent"
+    )
+
+
+
+
+
diff --git a/hackbot/src/pocgen/repo.py b/hackbot/src/pocgen/repo.py
@@ -19,6 +19,7 @@
 from hackbot.src.pocgen.utils import BugProcessingEnvInfo
 from common.utils.spinner import EnhancedSpinner as Spinner
 from common.utils.logging_utils import logger as log
+from hackbot.src.pocgen.build_agent import compile_codebase as agentic_compile
 
 
 @dataclass
@@ -550,11 +551,16 @@ async def deps_and_compile(self) -> ExecutionResults:
         Returns:
             ExecutionResults from the compilation
         """
-        # Install dependencies first
-        await self.install_dependencies(run_npm_install=True)
-
-        # Then compile
-        return await self.compile_codebase()
+        if global_config.build_agent.enabled:
+            log.info("Using the build agent to compile the codebase.")
+            return await agentic_compile(self.runtime_env)
+        else:
+            log.info("Using the classic deterministic method to compile the codebase.")
+            # Install dependencies first
+            await self.install_dependencies(run_npm_install=True)
+
+            # Then compile
+            return await self.compile_codebase()
 
     async def copy_to(self, target_dir: Path) -> None:
         """Copy the current runtime environment to a target directory (on real disk)."""

diff --git a/hackbot/src/pocgen/runtime_environment.py b/hackbot/src/pocgen/runtime_environment.py
@@ -3,7 +3,7 @@
 import subprocess
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Any, ContextManager, Protocol
+from typing import Any, ContextManager, Protocol, runtime_checkable
 
 from common.utils import tempfiles
 from hackbot.utils.docker.docker_runtime import DockerWrapper
@@ -22,6 +22,7 @@ class ExecutionResults:
     cmd_full: str  # noqa
 
 
+@runtime_checkable
 class RuntimeEnvironment(Protocol):
     """Protocol defining the interface for runtime environments."""
 

diff --git a/hackbot/tests/pocgen/test_repo.py b/hackbot/tests/pocgen/test_repo.py
@@ -0,0 +1,26 @@
+import pytest
+# import asyncio
+from hackbot.src.pocgen.repo import Repository
+from hackbot.tests.test_template import slow_and_nondeterministic_test, TestTemplate
+# from hackbot.src.pocgen.runtime_environment import LocalEnvironment
+from common.utils import tempfiles
+from hackbot.utils.git.clone import clone_repository
+
+class TestRepo(TestTemplate):
+    @pytest.fixture(autouse=True)
+    def setup_method(self, setup):
+        pass
+
+    @pytest.mark.asyncio
+    @pytest.mark.parametrize("repo_url", [
+        "https://github.com/code-423n4/2022-04-backed"
+    ])
+    @slow_and_nondeterministic_test
+    async def test_deps_and_compile_with_agent(self, repo_url: str):
+        with tempfiles.TemporaryDirectory() as temp_dir:
+            clone_repository(repo_url, target_dir=temp_dir)
+
+            async with Repository(from_dir=temp_dir) as repo:
+                result = await repo.deps_and_compile()
+
+                assert result.exit_code == 0, f"Compilation failed with exit code {result.exit_code}\\nSTDOUT:\\n{result.stdout}\\nSTDERR:\\n{result.stderr}" 
diff --git a/hackbot/utils/dspy/demo.py b/hackbot/utils/dspy/demo.py
@@ -3,6 +3,7 @@
 import sys
 from typing import Sequence
 from hackbot.utils.dspy.tools import common_tools, execute_command_tool, enrich_tools, dspy_tool
+from hackbot.utils.dspy.dspy_structs import dspy_to_llm_struct
 from pathlib import Path
 
 import dspy
@@ -80,7 +81,7 @@ class RunCommands(dspy.Signature):
         max_iters=50,
     )
 
-    result = asyncio.run(
+    raw_result: dspy.Prediction = asyncio.run(
         inf_module.run(
             high_level_instructions=f"""
             You are a professional smart contract security engineer.
@@ -109,4 +110,6 @@ class RunCommands(dspy.Signature):
         """
         )
     )
-    print(result.shell_command_ran)
+    result = dspy_to_llm_struct(raw_result, RunCommands)
+    shell_command_ran = result.shell_command_ran
+    print(shell_command_ran)
diff --git a/hackbot/utils/dspy/dspy_inference.py b/hackbot/utils/dspy/dspy_inference.py
@@ -1,6 +1,5 @@
-from typing import Callable, Awaitable, Any, get_origin
+from typing import Callable, Any, get_origin
 import dspy
-from dspy.utils.asyncify import asyncify as dspy_asyncify
 from langfuse.decorators import observe
 from litellm.exceptions import ServiceUnavailableError
 from loguru import logger as log
@@ -111,9 +110,6 @@ def __init__(
             )
         else:
             self.inference_module = dspy.Predict(pred_signature)
-        self.inference_module_async: Callable[..., Awaitable[Any]] = dspy_asyncify(
-            self.inference_module
-        )
 
     @observe()
     @retry(
@@ -130,13 +126,14 @@ def __init__(
     async def run(
         self,
         **kwargs: Any,
-    ) -> Any:
+    ) -> dspy.Prediction:
         try:
+            result: dspy.Prediction
             if self.callback:
                 with dspy.context(lm=self.lm, callbacks=[self.callback]):
-                    result = await self.inference_module_async(**kwargs, lm=self.lm)
+                    result = await self.inference_module.acall(**kwargs, lm=self.lm)
             else:
-                result = await self.inference_module_async(**kwargs, lm=self.lm)
+                result = await self.inference_module.acall(**kwargs, lm=self.lm)
             return result
         except Exception as e:
             log.error(f"Error in run: {str(e)}")