Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions common/config/global_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,10 @@ scope:
force_filter_libs: false
force_filter_interfaces: false

# Sentry error reporting
build_agent:
enabled: false
approved_commands: ["forge", "npm", "solc", "foundryup", "pnpm", "yarn", "git submodule", "nvm"]

# error reporting
sentry:
enabled: true
enabled: true
116 changes: 116 additions & 0 deletions hackbot/src/pocgen/build_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
from hackbot.src.pocgen.runtime_environment import RuntimeEnvironment, ExecutionResults
from typing import Sequence
from hackbot.utils.dspy.tools import enrich_tools, dspy_tool, async_execute_command_tool, async_common_tools
from common.config import global_config

import dspy
from hackbot.utils.dspy.dspy_inference import DSPYInference


@dspy_tool(runtime_env=RuntimeEnvironment, approved_commands=Sequence[str])
async def final_check_before_completion(*, runtime_env: RuntimeEnvironment, approved_commands: Sequence[str]) -> str:
"""Always run this tool before finishing. This tool will tell you whether you are done or not."""
print("Trying to finish up...")

result1 = await async_execute_command_tool(
"forge build", runtime_env=runtime_env, approved_commands=approved_commands
)
if result1.startswith("Error"):
return (
"Failure: Forge build failed! Please continue getting the repo to build. Here is your output: "
+ result1
)

print("Forge build passed!")

return "Success: Forge build passed!"


class RunCommands(dspy.Signature):
"""
Run commands in the given directory.

IMPORTANT: Your response MUST strictly follow the format below. Each field, including the headers, must be on a new line.

Example:
[[ ## next_thought ## ]]
I need to see what files are in the current directory.
[[ ## next_tool_name ## ]]
async_list_files_tool
[[ ## next_tool_args ## ]]
{"relative_path": "."}
"""

high_level_instructions: str = dspy.InputField() # type: ignore # noqa
forge_build_runs: bool = dspy.OutputField() # type: ignore # noqa
forge_test_runs: bool = dspy.OutputField() # type: ignore # noqa
shell_commands_ran: list[str] = dspy.OutputField() # type: ignore # noqa


async def compile_codebase(runtime_env: RuntimeEnvironment) -> ExecutionResults:
"""Compile the codebase using a dspy-based LLM agent."""

approved_commands = global_config.build_agent.approved_commands

await runtime_env.execute_command(["git", "reset", "--hard"])
await runtime_env.execute_command(["git", "clean", "-ffdx"])

# All dspy inference and tool execution must happen within this block
with enrich_tools(runtime_env=runtime_env, approved_commands=approved_commands):
inf_module = DSPYInference(
pred_signature=RunCommands,
tools=[
*async_common_tools,
final_check_before_completion,
],
max_iters=50,
)

result = await inf_module.run(
high_level_instructions=f"""
You are a professional smart contract security engineer.
Your current directory is inside of a git repository of a Foundry Forge Solidity project.
Your job is to build the project and run the tests.
You are allowed to read through files and folders in the project to understand it.
You can additionally only run the following commands at the root of the project:
{', '.join(approved_commands)}
You will not combine commands with && or ||.
You will always try to use the non-interactive or json versions of the commands. This is because the output is passed back to you in the same format.
Sometimes repos fail to give instructions on how to set up dependencies. In this case you should figure out how to pull them from one of the supplied commands.
For example, if you find out that forge-std doesn't exist, you can pull it in via `forge install foundry-rs/forge-std`.
Please be mindful of required versions and stick to them.
You should run a command with `--help` if you are unsure about its capabilities and don't have any ideas of how to proceed.
When supplying any relative path to a tool you will never use `..`. This is forbidden and will cause the tool to fail.
You are not allowed to move freely using `cd`.
You can additionally modify any file in the project that you have already read.
Your goal is to run `forge build` successfully and to run `forge test` (which must complete but may have failing tests so it may not necessarily "succeed").
Keep in mind that the `forge soldeer` subcommand exists and is the dependency manager for some projects. You should expect to see a `soldeer.lock` in these cases.
You will not give up until these commands run successfully.
You will never run any commands that open ports or a shell.
You only have one shot at this so make sure to do it right.
You should start by finding and opening the readme. Then, you should see what other files and folders are in the project.
Your response will always finish with a call to `final_check_before_completion` that returns a success.
You will not execute any further commands after you have verified that `forge build` and `forge test` work.
"""
)
print(result.shell_commands_ran)

if not result.forge_build_runs:
return ExecutionResults(
stdout=",".join(result.shell_commands_ran),
stderr="Build agent failed to build the repository.",
exit_code=1,
cmd_full="build_agent"
)

return ExecutionResults(
stdout=",".join(result.shell_commands_ran),
stderr="",
exit_code=0,
cmd_full="build_agent"
)





16 changes: 11 additions & 5 deletions hackbot/src/pocgen/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from hackbot.src.pocgen.utils import BugProcessingEnvInfo
from common.utils.spinner import EnhancedSpinner as Spinner
from common.utils.logging_utils import logger as log
from hackbot.src.pocgen.build_agent import compile_codebase as agentic_compile


@dataclass
Expand Down Expand Up @@ -550,11 +551,16 @@ async def deps_and_compile(self) -> ExecutionResults:
Returns:
ExecutionResults from the compilation
"""
# Install dependencies first
await self.install_dependencies(run_npm_install=True)

# Then compile
return await self.compile_codebase()
if global_config.build_agent.enabled:
log.info("Using the build agent to compile the codebase.")
return await agentic_compile(self.runtime_env)
else:
log.info("Using the classic deterministic method to compile the codebase.")
# Install dependencies first
await self.install_dependencies(run_npm_install=True)

# Then compile
return await self.compile_codebase()

async def copy_to(self, target_dir: Path) -> None:
"""Copy the current runtime environment to a target directory (on real disk)."""
Expand Down
3 changes: 2 additions & 1 deletion hackbot/src/pocgen/runtime_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import subprocess
from dataclasses import dataclass
from pathlib import Path
from typing import Any, ContextManager, Protocol
from typing import Any, ContextManager, Protocol, runtime_checkable

from common.utils import tempfiles
from hackbot.utils.docker.docker_runtime import DockerWrapper
Expand All @@ -22,6 +22,7 @@ class ExecutionResults:
cmd_full: str # noqa


@runtime_checkable
class RuntimeEnvironment(Protocol):
"""Protocol defining the interface for runtime environments."""

Expand Down
26 changes: 26 additions & 0 deletions hackbot/tests/pocgen/test_repo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import pytest
# import asyncio
from hackbot.src.pocgen.repo import Repository
from hackbot.tests.test_template import slow_and_nondeterministic_test, TestTemplate
# from hackbot.src.pocgen.runtime_environment import LocalEnvironment
from common.utils import tempfiles
from hackbot.utils.git.clone import clone_repository

class TestRepo(TestTemplate):
@pytest.fixture(autouse=True)
def setup_method(self, setup):
pass

@pytest.mark.asyncio
@pytest.mark.parametrize("repo_url", [
"https://github.com/code-423n4/2022-04-backed"
])
@slow_and_nondeterministic_test
async def test_deps_and_compile_with_agent(self, repo_url: str):
with tempfiles.TemporaryDirectory() as temp_dir:
clone_repository(repo_url, target_dir=temp_dir)

async with Repository(from_dir=temp_dir) as repo:
result = await repo.deps_and_compile()

assert result.exit_code == 0, f"Compilation failed with exit code {result.exit_code}\\nSTDOUT:\\n{result.stdout}\\nSTDERR:\\n{result.stderr}"
7 changes: 5 additions & 2 deletions hackbot/utils/dspy/demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import sys
from typing import Sequence
from hackbot.utils.dspy.tools import common_tools, execute_command_tool, enrich_tools, dspy_tool
from hackbot.utils.dspy.dspy_structs import dspy_to_llm_struct
from pathlib import Path

import dspy
Expand Down Expand Up @@ -80,7 +81,7 @@ class RunCommands(dspy.Signature):
max_iters=50,
)

result = asyncio.run(
raw_result: dspy.Prediction = asyncio.run(
inf_module.run(
high_level_instructions=f"""
You are a professional smart contract security engineer.
Expand Down Expand Up @@ -109,4 +110,6 @@ class RunCommands(dspy.Signature):
"""
)
)
print(result.shell_command_ran)
result = dspy_to_llm_struct(raw_result, RunCommands)
shell_command_ran = result.shell_command_ran
print(shell_command_ran)
13 changes: 5 additions & 8 deletions hackbot/utils/dspy/dspy_inference.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from typing import Callable, Awaitable, Any, get_origin
from typing import Callable, Any, get_origin
import dspy
from dspy.utils.asyncify import asyncify as dspy_asyncify
from langfuse.decorators import observe
from litellm.exceptions import ServiceUnavailableError
from loguru import logger as log
Expand Down Expand Up @@ -111,9 +110,6 @@ def __init__(
)
else:
self.inference_module = dspy.Predict(pred_signature)
self.inference_module_async: Callable[..., Awaitable[Any]] = dspy_asyncify(
self.inference_module
)

@observe()
@retry(
Expand All @@ -130,13 +126,14 @@ def __init__(
async def run(
self,
**kwargs: Any,
) -> Any:
) -> dspy.Prediction:
try:
result: dspy.Prediction
if self.callback:
with dspy.context(lm=self.lm, callbacks=[self.callback]):
result = await self.inference_module_async(**kwargs, lm=self.lm)
result = await self.inference_module.acall(**kwargs, lm=self.lm)
else:
result = await self.inference_module_async(**kwargs, lm=self.lm)
result = await self.inference_module.acall(**kwargs, lm=self.lm)
return result
except Exception as e:
log.error(f"Error in run: {str(e)}")
Expand Down
Loading
Loading