open-s4c · apaolillo · Apr 2, 2026 · Mar 31, 2026
diff --git a/ROADMAP.md b/ROADMAP.md
@@ -176,6 +176,17 @@ future.
 
 - Refactor logging
 
+- Hierarchical record_dir across all phases:
+  currently record_dir is only set on RunContext/CollectContext (the per-run
+  data directory). When the new API is finalized, extend record_dir to
+  FetchContext (results root) and BuildContext (build-variable subdirectory),
+  so each phase can store artifacts at the appropriate level in the hierarchy.
+  Example:
+    - FetchContext:   results/
+    - BuildContext:   results/build_type-Release/
+    - RunContext:     results/build_type-Release/bench_name-readrandom/nb_threads-4/run-01/
+    - CollectContext: (same as RunContext, inherited)
+
 - Save stdout, stderr of command in the record hierarchy and put additional
   information in the record:
   - The json file with the record, detailing input, output and perf values

diff --git a/benchkit/core/bktypes/contexts.py b/benchkit/core/bktypes/contexts.py
@@ -142,6 +142,7 @@ def from_fetch(
             ctx: The FetchContext from the previous phase.
             fetch_result: The result returned by the fetch phase.
             build_args: Build-specific arguments (e.g., compiler, optimization level).
+            default_args: Optional default arguments.
 
         Returns:
             A BuildContext instance ready for building.
@@ -203,6 +204,7 @@ def from_build(
         run_args: Vars,
         default_args: Vars | None = None,
         duration_s: int | None = None,
+        record_dir: Path | None = None,
     ) -> "RunContext":
         """
         Create a RunContext from a completed build phase.
@@ -211,7 +213,9 @@ def from_build(
             ctx: The BuildContext from the previous phase.
             build_result: The result returned by the build phase.
             run_args: Run-specific arguments (e.g., benchmark name, thread count).
+            default_args: Optional default arguments.
             duration_s: Optional duration limit in seconds.
+            record_dir: Optional per-run directory for storing artifacts.
 
         Returns:
             A RunContext instance ready for execution.
@@ -221,7 +225,7 @@ def from_build(
             exec=ctx.exec,
             vars=ctx.vars,
             default_args=ctx.default_args | (default_args or {}),
-            record_dir=ctx.record_dir,
+            record_dir=ctx.record_dir if record_dir is None else record_dir,
             fetch_args=ctx.fetch_args,
             fetch_result=ctx.fetch_result,
             build_args=ctx.build_args,

diff --git a/benchkit/core/compat/new2old.py b/benchkit/core/compat/new2old.py
@@ -352,7 +352,7 @@ def bootstrap(
         validate_benchmark(bench=self.benchmark)
 
         fetch_args = _check_fetch_args(benchmark=self.benchmark, parameter_space=args)
-        self._session_fetch = self._stepper.fetch(args=fetch_args, record_dir=record_dir)
+        self._session_fetch = self._stepper.fetch(args=fetch_args)
 
     @property
     def bench_src_path(self) -> Path:
@@ -448,6 +448,7 @@ def _transform_run_ctx(run_ctx: RunContext) -> RunContext:
             session=self._last_session_build,
             args=run_args,
             duration_s=duration_s,
+            record_dir=record_data_dir,
             ctx_transform=_transform_run_ctx,
         )
         return self._last_session_run.run_result.outputs[-1].stdout

diff --git a/benchkit/engine/stepper.py b/benchkit/engine/stepper.py
@@ -168,6 +168,7 @@ def run(
         args: Vars,
         duration_s: int | None,
         *,
+        record_dir: Path | None = None,
         ctx_transform: RunCtxTransform | None = None,
     ) -> StepSession:
         run_args, default_args = _get_step_args(step_fn=self.bench.run, args=args)
@@ -177,6 +178,7 @@ def run(
             run_args=run_args,
             default_args=default_args,
             duration_s=duration_s,
+            record_dir=record_dir,
         )
 
         if ctx_transform is not None: