diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..3fd18ef
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,44 @@
+# .env.example — copy relevant parts to .env.local and fill in secrets.
+# .env.local is gitignored. Never commit secrets.
+# For the CLI: these same BAUER_* vars work as flag fallbacks.
+
+# --- Secrets (go in .env.local, never committed) ---
+BAUER_GITHUB_TOKEN=ghp_...
+BAUER_CREDENTIALS_PATH=/path/to/service-account.json
+
+# --- Google credentials fallback ---
+# GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account.json
+
+# --- GitHub App (alternative to PAT, recommended for org repos) ---
+# GITHUB_APP_ID=12345
+# GITHUB_APP_PRIVATE_KEY_PATH=/path/to/private-key.pem
+# GITHUB_APP_INSTALLATION_ID=67890
+
+# --- OIDC (optional — for API deployments protected by your IdP) ---
+# BAUER_OIDC_ISSUER=https://auth.example.com
+# BAUER_OIDC_AUDIENCE=bauer-api
+
+# --- Jira webhook ---
+# BAUER_JIRA_WEBHOOK_SECRET=your-shared-secret
+# BAUER_JIRA_DOC_FIELD=customfield_10100
+
+# --- Figma integration (optional; required when --figma-url is used) ---
+# BAUER_FIGMA_TOKEN=figd_xxxxxxxxxxxxx
+
+# --- API Server ---
+BAUER_API_PORT=8090
+
+# --- Copilot / model ---
+BAUER_MODEL=gpt-5-mini-high
+BAUER_SUMMARY_MODEL=gpt-5-mini-high
+
+# --- Bauer behaviour (env var fallbacks for CLI flags) ---
+BAUER_DOC_ID=
+BAUER_CHUNK_SIZE=1
+BAUER_PAGE_REFRESH=false
+BAUER_DRY_RUN=false
+BAUER_OUTPUT_DIR=bauer-output
+BAUER_BRANCH_PREFIX=bauer
+BAUER_ARTIFACTS_DIR=./bauer-artifacts
+BAUER_TARGET_REPO=
+BAUER_FIGMA_URL=
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..2177527
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,44 @@
+name: CI
+
+on:
+  pull_request:
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+jobs:
+  test-and-checks:
+    name: Test and checks
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v4
+
+      - name: Set up Go
+        uses: actions/setup-go@v5
+        with:
+          go-version-file: go.mod
+          cache: true
+
+      - name: Verify formatting
+        run: |
+          unformatted="$(gofmt -l .)"
+          if [ -n "$unformatted" ]; then
+            echo "The following files are not formatted with gofmt:"
+            echo "$unformatted"
+            exit 1
+          fi
+
+      - name: Run go vet
+        run: go vet ./...
+
+      - name: Run tests
+        run: go test ./...
+
+      - name: Build CLI
+        run: go build ./cmd/bauer
+
+      - name: Build API
+        run: go build ./cmd/app
diff --git a/.gitignore b/.gitignore
index 7f9f318..8cfd435 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,3 +14,15 @@ bauer-output/
 bauer
 # Added by goreleaser init:
 dist/
+
+# Artifacts
+bauer-artifacts/
+
+.workshop.lock
+config.json
+credentials.json
+
+# Binaries
+bauer
+bauer-api
+app
diff --git a/.workshop.lock b/.workshop.lock
new file mode 100644
index 0000000..f0726c3
--- /dev/null
+++ b/.workshop.lock
@@ -0,0 +1 @@
+073716b8
\ No newline at end of file
diff --git a/README.md b/README.md
index 134d25a..a86e812 100644
--- a/README.md
+++ b/README.md
@@ -117,7 +117,7 @@ From Repository root:
 
 ```bash
 task build-api
-./bauer-api --config config.json
+./bauer-api
 ```
 
 ### Endpoints
diff --git a/Taskfile.yml b/Taskfile.yml
index 0ddaf80..fad44ee 100644
--- a/Taskfile.yml
+++ b/Taskfile.yml
@@ -2,37 +2,43 @@ version: "3"
 
 tasks:
   build:
-    desc: Build the Bauer and Bauer API binaries
+    desc: Build the Bauer CLI binary
     cmds:
-      - go build -o bauer cmd/bauer/main.go
-      - go build -o bauer-api cmd/app/main.go
+      - go build -o bauer ./cmd/bauer/
+
+  build-api:
+    desc: Build the Bauer API server binary
+    cmds:
+      - go build -o bauer-api ./cmd/app/
+
+  run:
+    desc: Run Bauer CLI in standalone mode
+    summary: |
+      Requires BAUER_DOC_ID and BAUER_CREDENTIALS_PATH (or GOOGLE_APPLICATION_CREDENTIALS, or --credentials flag).
+      Example: task run -- --doc-id 1abc --credentials ./creds.json
+    cmds:
+      - go run ./cmd/bauer/ {{.CLI_ARGS}}
+
+  run-api:
+    desc: Build and start the API server
+    cmds:
+      - task: build-api
+      - ./bauer-api
 
   test:
-    desc: Run all tests
+    desc: Run all unit tests
     cmds:
-      - go test --cover ./...
+      - go test ./...
 
   lint:
     desc: Format code using gofmt
     cmds:
       - gofmt -w .
 
-  run-server:
-    desc: Run the API server locally
-    cmds:
-      - ./bauer-api --config config.json
-
   clean:
     desc: Clean up generated files
     cmds:
       - rm -rf bauer-output
-      - rm -f bauer
+      - rm -f bauer bauer-api
       - rm -f bauer.log
       - rm -rf /tmp/gh* /tmp/tmp* /tmp/test-bauer-repo* || true
-
-  run:
-    desc: Clean up generated files, build and run Bauer server
-    cmds:
-      - task: clean
-      - task: build
-      - task: run-server
\ No newline at end of file
diff --git a/cmd/app/core/middleware/request_trace.go b/cmd/app/core/middleware/request_trace.go
deleted file mode 100644
index ca295ca..0000000
--- a/cmd/app/core/middleware/request_trace.go
+++ /dev/null
@@ -1,29 +0,0 @@
-package middleware
-
-import (
-	"bauer/cmd/app/types"
-	"context"
-	"log/slog"
-	"net/http"
-
-	"github.com/google/uuid"
-)
-
-func RequestTrace(next http.Handler) http.Handler {
-	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		id, err := uuid.NewUUID()
-		if err != nil {
-			err := types.InternalError(err).Render(w, r)
-			if err != nil {
-				slog.Error("Failed rendering internal error response due to failed UUID generation: %w",
-					slog.String("error", err.Error()),
-				)
-			}
-			return
-		}
-
-		ctx := r.Context()
-		ctx = context.WithValue(ctx, "requestID", id.String())
-		next.ServeHTTP(w, r.WithContext(ctx))
-	})
-}
diff --git a/cmd/app/main.go b/cmd/app/main.go
index 83745ae..70ef438 100644
--- a/cmd/app/main.go
+++ b/cmd/app/main.go
@@ -1,11 +1,6 @@
 package main
 
 import (
-	"bauer/cmd/app/core/middleware"
-	"bauer/cmd/app/types"
-	v1 "bauer/cmd/app/v1"
-	"bauer/internal/orchestrator"
-	"bauer/internal/workflow"
 	"fmt"
 	"log/slog"
 	"net/http"
@@ -20,24 +15,15 @@ func run() error {
 	slog.Info("startup", "status", "initializing API")
 	defer slog.Info("shutdown complete")
 
-	orchestrator := orchestrator.NewOrchestrator()
-	cfg, err := types.LoadConfig()
-	if err != nil {
-		slog.Error("failed to load config", "error", err.Error())
-		return err
-	}
-
-	rc := types.RouteConfig{
-		APIConfig:    *cfg,
-		Orchestrator: orchestrator,
-	}
-
 	mux := http.NewServeMux()
-	mux.HandleFunc("/api/v1/job", v1.JobPost(rc))
-	mux.HandleFunc("/api/v1/health", v1.GetHealth)
-	mux.HandleFunc("/api/v1/workflow", workflow.ExecuteWorkflowHandler(orchestrator))
+	mux.HandleFunc("GET /api/v1/health", func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(http.StatusOK)
+		w.Write([]byte(`{"status":"ok"}`))
+	})
+
 	slog.Info("starting server", "address", ":8090")
-	err = http.ListenAndServe(":8090", middleware.RequestTrace(mux))
+	err := http.ListenAndServe(":8090", mux)
 
 	if err != nil {
 		slog.Error("server error", "error", err.Error())
diff --git a/cmd/app/models/v1/job.go b/cmd/app/models/v1/job.go
deleted file mode 100644
index d08631a..0000000
--- a/cmd/app/models/v1/job.go
+++ /dev/null
@@ -1,14 +0,0 @@
-package models
-
-type JobPost struct {
-	// DocID is the Google Doc ID to extract feedback from.
-	DocID string `json:"doc_id"`
-
-	// ChunkSize is the total number of chunks to create from all locations.
-	// Default is 1 if not specified, or 5 if PageRefresh is true.
-	ChunkSize int `json:"chunk_size"`
-
-	// PageRefresh indicates if the page refresh mode should be used.
-	// When true, uses page-refresh-instructions.md template and defaults ChunkSize to 5.
-	PageRefresh bool `json:"page_refresh"`
-}
diff --git a/cmd/app/types/config.go b/cmd/app/types/config.go
deleted file mode 100644
index bd988c7..0000000
--- a/cmd/app/types/config.go
+++ /dev/null
@@ -1,75 +0,0 @@
-package types
-
-import (
-	"bauer/internal/config"
-	"flag"
-	"os"
-)
-
-type APIConfig struct {
-	// CredentialsPath is the path to the Google Cloud service account JSON key file.
-	CredentialsPath string
-
-	// OutputDir is the directory where generated prompt files will be saved.
-	// Default is "bauer-output" if not specified.
-	BaseOutputDir string
-
-	// Model is the Copilot model to use for sessions.
-	// Default is "gpt-5-mini-high" if not specified.
-	Model string
-
-	// SummaryModel is the Copilot model to use for the summary session.
-	// Default is "gpt-5-mini-high" if not specified.
-	SummaryModel string
-
-	// TargetRepo is the path (relative or absolute) to the target repository
-	// where tasks should be executed. If not specified, uses the current directory.
-	TargetRepo string `json:"target_repo"`}
-
-func LoadConfig() (*APIConfig, error) {
-	credentialsPath := flag.String("credentials", "", "Path to service account JSON (required)")
-	baseOutputDir := flag.String("base-output-dir", "bauer-output", "Base path of directory for generated prompt files (default: bauer-output)")
-	model := flag.String("model", "gpt-5-mini-high", "Copilot model to use for sessions (default: gpt-5-mini-high)")
-	summaryModel := flag.String("summary-model", "gpt-5-mini-high", "Copilot model to use for summary session (default: gpt-5-mini-high)")
-	configFile := flag.String("config", "", "Path to JSON config file")
-	targetRepo := flag.String("target-repo", "", "Path to target repository where tasks should be executed (default: current directory)")
-
-	flag.Parse()
-
-	if *configFile != "" {
-		cfg, err := config.LoadFromJSONFile(*configFile)
-		if err != nil {
-			return nil, err
-		}
-		return &APIConfig{
-			CredentialsPath: cfg.CredentialsPath,
-			BaseOutputDir:   cfg.OutputDir,
-			Model:           cfg.Model,
-			SummaryModel:    cfg.SummaryModel,
-			TargetRepo:      cfg.TargetRepo,
-		}, nil
-	}
-
-	if *credentialsPath == "" {
-		flag.Usage()
-		os.Exit(1)
-	}
-
-	cfg := &APIConfig{
-		CredentialsPath: *credentialsPath,
-		BaseOutputDir:   *baseOutputDir,
-		Model:           *model,
-		SummaryModel:    *summaryModel,
-		TargetRepo: 	 *targetRepo,
-	}
-
-	if err := cfg.Validate(); err != nil {
-		return nil, err
-	}
-
-	return cfg, nil
-}
-
-func (c *APIConfig) Validate() error {
-	return config.ValidateCredentialsPath(c.CredentialsPath)
-}
diff --git a/cmd/app/types/response.go b/cmd/app/types/response.go
deleted file mode 100644
index 50255a4..0000000
--- a/cmd/app/types/response.go
+++ /dev/null
@@ -1,45 +0,0 @@
-package types
-
-import (
-	"encoding/json"
-	"net/http"
-)
-
-type Response struct {
-	Code  int    `json:"code"`
-	Error string `json:"error,omitempty"`
-}
-
-func Success() *Response {
-	return &Response{Code: http.StatusOK, Error: ""}
-}
-
-func Accepted() *Response {
-	return &Response{Code: http.StatusAccepted, Error: ""}
-}
-
-func BadRequest(err error) *Response {
-	return &Response{Code: http.StatusBadRequest, Error: err.Error()}
-}
-
-func NotAllowed(err error) *Response {
-	return &Response{Code: http.StatusMethodNotAllowed, Error: err.Error()}
-}
-
-func Forbidden(err error) *Response {
-	return &Response{Code: http.StatusForbidden, Error: err.Error()}
-}
-
-func InternalError(err error) *Response {
-	return &Response{Code: http.StatusInternalServerError, Error: err.Error()}
-}
-
-func NotFound(err error) *Response {
-	return &Response{Code: http.StatusNotFound, Error: err.Error()}
-}
-
-func (r *Response) Render(w http.ResponseWriter, _ *http.Request) error {
-	w.WriteHeader(r.Code)
-	w.Header().Set("Content-Type", "application/json")
-	return json.NewEncoder(w).Encode(r)
-}
diff --git a/cmd/app/types/route_config.go b/cmd/app/types/route_config.go
deleted file mode 100644
index 5fee54c..0000000
--- a/cmd/app/types/route_config.go
+++ /dev/null
@@ -1,10 +0,0 @@
-package types
-
-import (
-	"bauer/internal/orchestrator"
-)
-
-type RouteConfig struct {
-	APIConfig    APIConfig
-	Orchestrator orchestrator.Orchestrator
-}
diff --git a/cmd/app/v1/api.go b/cmd/app/v1/api.go
deleted file mode 100644
index 40fc99f..0000000
--- a/cmd/app/v1/api.go
+++ /dev/null
@@ -1,94 +0,0 @@
-package v1
-
-import (
-	"bauer/cmd/app/models/v1"
-	"bauer/cmd/app/types"
-	"bauer/internal/config"
-	"context"
-	"encoding/json"
-	"fmt"
-	"log/slog"
-	"net/http"
-)
-
-func JobPost(rc types.RouteConfig) func(w http.ResponseWriter, r *http.Request) {
-	return func(w http.ResponseWriter, r *http.Request) {
-		requestID, ok := r.Context().Value("requestID").(string)
-		if !ok || requestID == "" {
-			err := types.InternalError(fmt.Errorf("missing request ID")).Render(w, r)
-			if err != nil {
-				slog.Error("error writing response", "error", err.Error())
-			}
-			return
-		}
-		if r.Method != "POST" {
-			err := types.NotAllowed(fmt.Errorf("invalid HTTP method: %s", r.Method)).Render(w, r)
-			if err != nil {
-				slog.Error("error writing response", "error", err.Error(), "requestID", requestID)
-			}
-			return
-		}
-		payload, err := getJobFromRequest(w, r, requestID)
-		if err != nil {
-			return
-		}
-		cfg := config.Config{
-			DocID:           payload.DocID,
-			ChunkSize:       payload.ChunkSize,
-			PageRefresh:     payload.PageRefresh,
-			CredentialsPath: rc.APIConfig.CredentialsPath,
-			OutputDir:       fmt.Sprintf("%s/%s", rc.APIConfig.BaseOutputDir, requestID),
-			Model:           rc.APIConfig.Model,
-			SummaryModel:    rc.APIConfig.SummaryModel,
-		}
-
-		go executeJob(requestID, cfg, rc)
-
-		err = types.Accepted().Render(w, r)
-		if err != nil {
-			slog.Error("error writing response", "error", err.Error(), "requestID", requestID)
-		}
-	}
-}
-
-func getJobFromRequest(w http.ResponseWriter, r *http.Request, requestID string) (*models.JobPost, error) {
-	payload := models.JobPost{}
-	err := json.NewDecoder(r.Body).Decode(&payload)
-	if err != nil {
-		slog.Error("failed to decode request body", "error", err.Error(), "requestID", requestID)
-		err := types.BadRequest(fmt.Errorf("invalid request body: %w", err)).Render(w, r)
-		if err != nil {
-			slog.Error("error writing response", "error", err.Error(), "requestID", requestID)
-		}
-		return nil, err
-	}
-	return &payload, nil
-}
-
-func executeJob(requestID string, cfg config.Config, rc types.RouteConfig) {
-	ctx := context.Background()
-	ctx = context.WithValue(ctx, "requestID", requestID)
-
-	_, err := rc.Orchestrator.Execute(ctx, &cfg)
-	if err != nil {
-		slog.Error("job execution failed",
-			"error", err.Error(),
-			"requestID", requestID,
-		)
-		return
-	}
-
-	slog.Info("job executed successfully",
-		"requestID", requestID,
-	)
-}
-
-
-func GetHealth(w http.ResponseWriter, r *http.Request) {
-	w.Header().Set("Content-Type", "application/json")
-	w.WriteHeader(http.StatusOK)
-	err := types.Success().Render(w, r)
-	if err != nil {
-		slog.Error("error writing response", "error", err.Error())
-	}
-}
\ No newline at end of file
diff --git a/cmd/bauer/main.go b/cmd/bauer/main.go
index 761724b..d72a9ba 100644
--- a/cmd/bauer/main.go
+++ b/cmd/bauer/main.go
@@ -1,72 +1,303 @@
 package main
 
 import (
+	"bauer/internal/artifacts"
+	"bauer/internal/config"
+	"bauer/internal/copilotcli"
 	"bauer/internal/github"
 	"bauer/internal/orchestrator"
-	"bauer/internal/workflow"
+	"bauer/internal/source"
 	"context"
 	"flag"
 	"fmt"
 	"os"
+	"path/filepath"
 	"strings"
+	"time"
 )
 
 func main() {
-	// Parse CLI flags
-	githubRepo := flag.String("github-repo", "", "GitHub repository (owner/repo or HTTPS URL)")
-	docID := flag.String("doc-id", "", "Google Doc ID")
-	credentialsPath := flag.String("credentials", "bau-test-creds.json", "Path to service account credentials JSON")
-	localRepoPath := flag.String("local-repo-path", "/tmp/ubuntu.com", "Local path for cloned repository")
-	dryRun := flag.Bool("dry-run", false, "Perform a dry run without creating PR")
-	outputDir := flag.String("output-dir", "bauer-output", "Output directory for Bauer results")
-	branchPrefix := flag.String("branch-prefix", "bauer", "Branch naming prefix")
-
-	flag.Parse()
-
-	// Validate required flags
-	if *githubRepo == "" {
-		fmt.Fprintf(os.Stderr, "ERROR: --github-repo is required\n")
+	ctx := context.Background()
+
+	// 1. Parse flags
+	flags, err := config.ParseCLIFlags(os.Args[1:])
+	if err != nil {
+		if err == flag.ErrHelp {
+			// Usage was already printed by the FlagSet. Exit cleanly.
+			os.Exit(0)
+		}
+		fmt.Fprintln(os.Stderr, err)
 		os.Exit(1)
 	}
-	if *docID == "" {
-		fmt.Fprintf(os.Stderr, "ERROR: --doc-id is required\n")
+
+	// 2. Mutual exclusion check — before any network calls
+	if config.BoolVal(flags.OpenPR, false) && config.BoolVal(flags.OpenIssue, false) {
+		fmt.Fprintln(os.Stderr, "Error: --open-pr and --open-issue are mutually exclusive. Pick one.")
+		fmt.Fprintln(os.Stderr, "  Use --open-pr to apply changes and open a PR.")
+		fmt.Fprintln(os.Stderr, "  Use --open-issue to generate a plan and open an issue without applying changes.")
 		os.Exit(1)
 	}
 
-	fmt.Println(strings.Repeat("=", 80))
-	fmt.Println("Bauer - A tool to automate BAU tasks")
-	fmt.Println(strings.Repeat("=", 80))
-	fmt.Println()
-
-	// Create workflow input from CLI flags/config
-	ghToken, err := github.GetGitHubToken()
+	// 3. Resolve config
+	cfg, err := resolveCLIConfig(flags)
 	if err != nil {
-		fmt.Fprintf(os.Stderr, "WARNING: Could not get GitHub token: %v\n", err)
-		ghToken = ""
+		fmt.Fprintln(os.Stderr, err)
+		os.Exit(1)
+	}
+
+	if err := cfg.Validate(); err != nil {
+		fmt.Fprintln(os.Stderr, err)
+		os.Exit(1)
 	}
 
-	workflowInput := workflow.WorkflowInput{
-		GitHubRepo:    *githubRepo,
-		GitHubToken:   ghToken,
-		BranchPrefix:  *branchPrefix,
-		DocID:         *docID,
-		Credentials:   *credentialsPath,
-		LocalRepoPath: *localRepoPath,
-		DryRun:        *dryRun,
-		OutputDir:     *outputDir,
+	// 4. Figma token validation (fail fast)
+	if cfg.FigmaURL != "" && cfg.FigmaToken == "" {
+		fmt.Fprintln(os.Stderr, "Error: BAUER_FIGMA_TOKEN or FIGMA_TOKEN must be set when --figma-url is supplied.")
+		os.Exit(1)
 	}
 
-	orch := orchestrator.NewOrchestrator()
+	// Resolve output and artifacts dirs to absolute paths before any chdir
+	absOutputDir, err := filepath.Abs(cfg.OutputDir)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "ERROR: resolve output dir: %v\n", err)
+		os.Exit(1)
+	}
+	cfg.OutputDir = absOutputDir
 
-	// Execute the complete workflow
-	result, err := workflow.ExecuteWorkflow(context.Background(), workflowInput, orch)
+	absArtifactsDir, err := filepath.Abs(cfg.ArtifactsDir)
 	if err != nil {
-		fmt.Fprintf(os.Stderr, "ERROR: %v\n", err)
+		fmt.Fprintf(os.Stderr, "ERROR: resolve artifacts dir: %v\n", err)
 		os.Exit(1)
 	}
+	cfg.ArtifactsDir = absArtifactsDir
+
+	// If a target repo is configured, chdir there now so all git/gh/agent
+	// operations operate in the correct directory. We restore the original
+	// directory on exit.
+	if cfg.TargetRepo != "" {
+		origDir, err := os.Getwd()
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "ERROR: get current directory: %v\n", err)
+			os.Exit(1)
+		}
+		if err := os.Chdir(cfg.TargetRepo); err != nil {
+			fmt.Fprintf(os.Stderr, "ERROR: chdir to target repo %q: %v\n", cfg.TargetRepo, err)
+			os.Exit(1)
+		}
+		defer os.Chdir(origDir)
+	}
+
+	// 5. Setup orchestrator
+	artMgr := artifacts.NewManager(absArtifactsDir)
+	gdocsAdapter := source.NewGDocsAdapter()
+	sources := source.NewManager(gdocsAdapter)
+
+	newAgent := func() (orchestrator.Agent, error) {
+		cwd, err := os.Getwd()
+		if err != nil {
+			return nil, fmt.Errorf("get working directory: %w", err)
+		}
+		return copilotcli.NewClient(cwd)
+	}
+
+	// 6. Dispatch to mode
+	switch {
+	case config.BoolVal(cfg.OpenIssue, false):
+		if err := runOpenIssue(ctx, cfg, sources, artMgr); err != nil {
+			fmt.Fprintln(os.Stderr, err)
+			os.Exit(1)
+		}
+	case config.BoolVal(cfg.OpenPR, false):
+		if err := runOpenPR(ctx, cfg, sources, artMgr, newAgent); err != nil {
+			fmt.Fprintln(os.Stderr, err)
+			os.Exit(1)
+		}
+	default:
+		if err := runStandalone(ctx, cfg, sources, artMgr, newAgent); err != nil {
+			fmt.Fprintln(os.Stderr, err)
+			os.Exit(1)
+		}
+	}
+}
+
+func runStandalone(ctx context.Context, cfg *config.Config, sources *source.Manager, artMgr *artifacts.Manager, newAgent func() (orchestrator.Agent, error)) error {
+	agent, err := newAgent()
+	if err != nil {
+		return fmt.Errorf("create agent: %w", err)
+	}
+
+	orch := orchestrator.NewOrchestrator(agent, sources, artMgr)
+	result, err := orch.Execute(ctx, cfg)
+	if err != nil {
+		return err
+	}
+
+	fmt.Printf("Status: %s\n", resultStatus(result))
+	return nil
+}
+
+func runOpenPR(ctx context.Context, cfg *config.Config, sources *source.Manager, artMgr *artifacts.Manager, newAgent func() (orchestrator.Agent, error)) error {
+	cwd, err := os.Getwd()
+	if err != nil {
+		return fmt.Errorf("get working directory: %w", err)
+	}
+
+	// Read remote from git config
+	owner, repo, err := github.ReadRemoteFromGitConfig(ctx, cwd)
+	if err != nil {
+		return fmt.Errorf("--open-pr requires a git repo with an 'origin' remote: %w", err)
+	}
+
+	// Check GitHub token before running Copilot
+	if _, err := github.GetGitHubToken(); err != nil {
+		return fmt.Errorf("--open-pr requires GitHub auth: %w", err)
+	}
+
+	// Run Copilot (always; we want to see the changes)
+	agent, err := newAgent()
+	if err != nil {
+		return fmt.Errorf("create agent: %w", err)
+	}
+
+	orch := orchestrator.NewOrchestrator(agent, sources, artMgr)
+	_, err = orch.Execute(ctx, openPRExecutionConfig(cfg))
+	if err != nil {
+		return fmt.Errorf("orchestration failed: %w", err)
+	}
+
+	// If dry-run, skip PR creation
+	if config.BoolVal(cfg.DryRun, false) {
+		fmt.Println("dry-run: changes applied locally, PR creation skipped")
+		return nil
+	}
+
+	// Create branch from the repo's default branch, commit, push
+	branchName := fmt.Sprintf("%s/doc-suggestions-%d", cfg.BranchPrefix, time.Now().Unix())
+	if err := github.CreateBranchFromDefault(ctx, cwd, branchName); err != nil {
+		return fmt.Errorf("failed to create branch: %w", err)
+	}
+
+	defaultBranch, err := github.GetDefaultBranch(cwd)
+	if err != nil {
+		return fmt.Errorf("failed to detect default branch: %w", err)
+	}
+
+	// Commit changes
+	commitMsg := fmt.Sprintf("Apply BAU suggestions from doc %s", cfg.DocID)
+	if err := github.CommitChanges(cwd, commitMsg); err != nil {
+		// "no changes to commit" is acceptable if Copilot made no modifications
+		if !strings.Contains(err.Error(), "no changes to commit") {
+			return fmt.Errorf("failed to commit changes: %w", err)
+		}
+		fmt.Println("warning: no changes to commit")
+	}
+
+	// Push branch
+	if err := github.PushBranch(cwd, branchName); err != nil {
+		return fmt.Errorf("failed to push branch: %w", err)
+	}
+
+	// Create PR
+	prTitle := fmt.Sprintf("Apply BAU suggestions from doc %s", cfg.DocID)
+	prURL, err := github.CreatePR(owner, repo, github.CreatePROptions{
+		Title:      prTitle,
+		HeadBranch: branchName,
+		BaseBranch: defaultBranch,
+	})
+	if err != nil {
+		return fmt.Errorf("failed to create PR: %w", err)
+	}
+
+	fmt.Printf("PR created: %s\n", prURL)
+	return nil
+}
+
+func runOpenIssue(ctx context.Context, cfg *config.Config, sources *source.Manager, artMgr *artifacts.Manager) error {
+	cwd, err := os.Getwd()
+	if err != nil {
+		return fmt.Errorf("get working directory: %w", err)
+	}
+
+	// Read remote from git config
+	owner, repo, err := github.ReadRemoteFromGitConfig(ctx, cwd)
+	if err != nil {
+		return fmt.Errorf("--open-issue requires a git repo with an 'origin' remote: %w", err)
+	}
+
+	// Check GitHub token before any API calls
+	if _, err := github.GetGitHubToken(); err != nil {
+		return fmt.Errorf("--open-issue requires GitHub auth: %w", err)
+	}
+
+	// Run orchestrator in dry-run mode (extraction + chunking, no Copilot)
+	dryRunCfg := *cfg
+	dryRunCfg.DryRun = config.BoolPtr(true)
+
+	orch := orchestrator.NewOrchestrator(nil, sources, artMgr)
+	result, err := orch.Execute(ctx, &dryRunCfg)
+	if err != nil {
+		return fmt.Errorf("extraction failed: %w", err)
+	}
+
+	title := fmt.Sprintf("BAU: Apply suggestions from doc %s", cfg.DocID)
+	body := formatIssueBody(result, cfg.DocID)
+
+	issueURL, issueNum, err := github.CreateIssue(ctx, owner, repo, title, body)
+	if err != nil {
+		return fmt.Errorf("failed to create issue: %w", err)
+	}
+
+	fmt.Printf("Issue #%d created: %s\n", issueNum, issueURL)
+	return nil
+}
+
+func formatIssueBody(result *orchestrator.OrchestrationResult, docID string) string {
+	var b strings.Builder
+	b.WriteString("## BAU Suggestions from Google Doc\n\n")
+	b.WriteString(fmt.Sprintf("**Doc ID**: `%s`\n", docID))
+
+	if result != nil && result.Bundle != nil && result.Bundle.Document != nil {
+		doc := result.Bundle.Document
+		totalSuggestions := len(doc.GroupedSuggestions)
+		chunkCount := len(result.Chunks)
+		b.WriteString(fmt.Sprintf("**Total suggestions**: %d across %d chunk(s)\n\n", totalSuggestions, chunkCount))
+
+		for _, chunk := range result.Chunks {
+			b.WriteString(fmt.Sprintf("### Chunk %d\n\n", chunk.ChunkNumber))
+			b.WriteString(fmt.Sprintf("- **File**: %s\n", chunk.Filename))
+			b.WriteString(fmt.Sprintf("- **Locations**: %d\n\n", chunk.LocationCount))
+		}
+	} else {
+		b.WriteString("No suggestions extracted.\n")
+	}
+
+	return b.String()
+}
+
+func resultStatus(result *orchestrator.OrchestrationResult) string {
+	if result == nil {
+		return "failed"
+	}
+	if result.DryRun {
+		return "dry-run"
+	}
+	return "success"
+}
+
+func resolveCLIConfig(flags config.CLIFlags) (*config.Config, error) {
+	return config.NewResolver(
+		config.NewFlagsSource(flags),
+		config.NewEnvVarSource(),
+		config.NewDefaultsSource(),
+	).Resolve()
+}
+
+func openPRExecutionConfig(cfg *config.Config) *config.Config {
+	if cfg == nil {
+		return nil
+	}
 
-	// Print results
-	fmt.Printf("Status: %s\n", result.Status)
-	fmt.Printf("Branch: %s\n", result.RepositoryInfo.BranchName)
-	fmt.Printf("PR: %s\n", result.FinalizationInfo.PullRequest.URL)
+	executionCfg := *cfg
+	executionCfg.DryRun = config.BoolPtr(false)
+	return &executionCfg
 }
diff --git a/config.json b/config.json
deleted file mode 100644
index 62081ca..0000000
--- a/config.json
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-  "doc_id": "1WJ-N_Xkkx4r_6knxW7h200oIDyi4mVMzgh1xYt5xaU0",
-  "credentials": "bau-test-creds.json",
-  "chunk_size": 1,
-  "page_refresh": false,
-  "output_dir": "bauer-output",
-  "model": "gpt-5-mini-high",
-  "summary_model": "gpt-5-mini-high",
-  "target_repo": "../canonical.com",
-  "dry_run": false
-}
\ No newline at end of file
diff --git a/credentials.json b/credentials.json
deleted file mode 100644
index 740ee7f..0000000
--- a/credentials.json
+++ /dev/null
@@ -1,13 +0,0 @@
-{
-  "type": "service_account",
-  "project_id": "",
-  "private_key_id": "",
-  "private_key": "",
-  "client_email": "",
-  "client_id": "",
-  "auth_uri": "",
-  "token_uri": "",
-  "auth_provider_x509_cert_url": "",
-  "client_x509_cert_url": "",
-  "universe_domain": ""
-}
\ No newline at end of file
diff --git a/docs/ARCHITECTURE_v2.md b/docs/ARCHITECTURE_v2.md
index 828381f..f5f7c2f 100644
--- a/docs/ARCHITECTURE_v2.md
+++ b/docs/ARCHITECTURE_v2.md
@@ -25,12 +25,11 @@ graph TD
 
     subgraph Shared Core
         Config["internal/config"]
-        Orchestrator["internal/orchestrator"]
+      Orchestrator["internal/orchestrator\n(defines Agent interface)"]
         Source["internal/source"]
         GDocs["internal/gdocs"]
         Prompt["internal/prompt"]
         Artifacts["internal/artifacts"]
-        Agent["internal/agent\n(interface)"]
         Copilot["internal/copilotcli"]
         GitHub["internal/github"]
     end
@@ -46,8 +45,7 @@ graph TD
     Source --> GDocs
     Orchestrator --> Prompt
     Orchestrator --> Artifacts
-    Orchestrator --> Agent
-    Copilot -. implements .-> Agent
+    Copilot -. implements orchestrator.Agent .-> Orchestrator
 
     CLI --> GitHub
     API --> GitHub
@@ -75,9 +73,8 @@ Sharing core logic between the CLI and API is the right call here. The internal
 | `internal/gdocs`        | Google Docs auth, doc fetch, suggestion extraction and grouping                                                          |
 | `internal/source`       | Source adapters and normalized `SourceBundle` — the orchestrator depends on this, not directly on `internal/gdocs`       |
 | `internal/prompt`       | Chunk prompt generation from suggestions, template rendering; directly aware of gdocs and figma source types             |
-| `internal/agent`        | Defines the `Agent` interface that all AI execution backends implement                                                   |
-| `internal/copilotcli`   | Implements `agent.Agent`; manages Copilot SDK process lifecycle, sessions, streaming                                     |
-| `internal/orchestrator` | Wires source → prompt → agent into a single `Execute()` call; depends on `agent.Agent`, not concrete client              |
+| `internal/copilotcli`   | Implements `orchestrator.Agent`; manages Copilot SDK process lifecycle, sessions, streaming                              |
+| `internal/orchestrator` | Defines `Agent` at the consumer and wires source → prompt → agent into a single `Execute()` call                         |
 | `internal/artifacts`    | Append-only run artifact storage; writes extraction, prompts, outputs, and screenshots under timestamped run directories |
 | `internal/github`       | GitHub auth, branch ops, commits, issue creation, PR creation                                                            |
 | `internal/config`       | Configuration loading and resolution (shared by CLI and API)                                                             |
@@ -431,7 +428,7 @@ Secrets are injected as environment variables or mounted volumes. The `BAUER_CRE
 | `.env` + `.env.local` for API           | Yes    | Standard pattern, clear separation of defaults vs secrets                                                                                                                                                                                                                                |
 | Jira calls internal logic (not HTTP)    | Yes    | Cleaner, no loopback, same process                                                                                                                                                                                                                                                       |
 | Two API endpoints (issues vs workflows) | Yes    | Clean separation — different flows, different use cases                                                                                                                                                                                                                                  |
-| `agent.Agent` interface                 | Yes    | Orchestrator is backend-agnostic; swap AI provider without touching core logic                                                                                                                                                                                                           |
+| `orchestrator.Agent` interface          | Yes    | Orchestrator is backend-agnostic; swap AI provider without touching core logic                                                                                                                                                                                                           |
 | No JSON config files                    | Yes    | Env vars for API, flags + env vars for CLI — cleaner, no third config mechanism                                                                                                                                                                                                          |
 | `internal/source` abstraction           | Yes    | Orchestrator must not be coupled to Google Docs directly; source layer normalizes all upstream inputs so future sources (Figma, Jira, etc.) fit the same pipeline without touching the orchestrator                                                                                      |
 | Append-only run artifacts               | Yes    | Overwriting extraction outputs and prompt files blocks traceability, debugging, and later design-aware features; every run gets a timestamped directory under `bauer-artifacts/`                                                                                                         |
diff --git a/docs/ARCHITECTURE_v2_1.md b/docs/ARCHITECTURE_v2_1.md
index 5a320be..26abd2b 100644
--- a/docs/ARCHITECTURE_v2_1.md
+++ b/docs/ARCHITECTURE_v2_1.md
@@ -221,14 +221,13 @@ graph TD
 
     subgraph Shared Core
         Config["internal/config"]
-        Orchestrator["internal/orchestrator"]
+      Orchestrator["internal/orchestrator\n(defines Agent interface)"]
         Intake["internal/source\nsource intake"]
         GDocs["internal/gdocs"]
         Figma["internal/figma"]
         Mapping["internal/source/mapping"]
         Artifacts["internal/artifacts"]
         Prompt["internal/prompt"]
-        Agent["internal/agent"]
         GitHub["internal/github"]
     end
 
@@ -244,7 +243,6 @@ graph TD
     Orchestrator --> Mapping
     Orchestrator --> Artifacts
     Orchestrator --> Prompt
-    Orchestrator --> Agent
     CLI --> GitHub
     API --> GitHub
 ```
@@ -619,6 +617,7 @@ BAUER_FIGMA_TOKEN → FIGMA_TOKEN
 | `--artifacts-dir` | `./bauer-artifacts` | Directory for run artifacts (extraction, prompts, outputs, screenshots, `runs.jsonl`).                |
 
 Rules:
+
 - `--figma-url` accepts both whole-file and node-specific Figma links (e.g. `https://www.figma.com/file/KEY/Name?node-id=1%3A42`)
 - when `--figma-url` is omitted, Bauer runs in gdocs-only mode; all Figma ingestion steps are skipped
 - `BAUER_FIGMA_TOKEN` → `FIGMA_TOKEN` — token resolution order; never a CLI flag
diff --git a/docs/QA_PHASES_0_2.md b/docs/QA_PHASES_0_2.md
new file mode 100644
index 0000000..8e09df6
--- /dev/null
+++ b/docs/QA_PHASES_0_2.md
@@ -0,0 +1,169 @@
+# QA Guide: Phases 0, 1, and 2
+
+This guide verifies the Bauer prerequisites for spec 002:
+
+- source and agent abstraction are in place
+- append-only artifacts work
+- CLI config precedence is correct
+- CLI modes behave correctly
+- GitHub issue and PR flows are wired correctly
+
+Use the spec in `docs/specs/001_v2_reconciliation.md` as the source of truth for expected behavior.
+
+## Prerequisites
+
+You need:
+
+- `go`
+- `gh`
+- a Google service-account credentials file with access to the test doc
+- a test repository with an `origin` remote
+- GitHub auth via `BAUER_GITHUB_TOKEN`, `GITHUB_TOKEN`, `GH_TOKEN`, or `gh auth login`
+
+Recommended shell setup:
+
+```bash
+export BAUER_DOC_ID="<test-doc-id>"
+export BAUER_CREDENTIALS_PATH="/absolute/path/to/creds.json"
+export BAUER_ARTIFACTS_DIR="$PWD/.tmp/bauer-artifacts"
+export BAUER_OUTPUT_DIR="$PWD/.tmp/bauer-output"
+```
+
+Reset local test output before each scenario:
+
+```bash
+rm -rf .tmp/bauer-artifacts .tmp/bauer-output bauer-doc-suggestions.json
+mkdir -p .tmp
+```
+
+## Automated validation
+
+Run the full test suite first:
+
+```bash
+go test ./...
+```
+
+Expected result: all tests pass.
+
+## Scenario 1: Standalone CLI
+
+Run:
+
+```bash
+go run ./cmd/bauer --doc-id "$BAUER_DOC_ID" --credentials "$BAUER_CREDENTIALS_PATH"
+```
+
+Verify:
+
+- command exits `0`
+- chunk prompt files are written under `.tmp/bauer-output`
+- a new run directory exists under `.tmp/bauer-artifacts`
+- `.tmp/bauer-artifacts/runs.jsonl` contains one appended JSON line
+- `bauer-doc-suggestions.json` exists for backward compatibility
+
+## Scenario 2: Standalone Dry Run
+
+Run:
+
+```bash
+go run ./cmd/bauer --doc-id "$BAUER_DOC_ID" --credentials "$BAUER_CREDENTIALS_PATH" --dry-run
+```
+
+Verify:
+
+- command exits `0`
+- output prints `Status: dry-run`
+- chunk prompt files are written
+- artifact run directory contains `extraction/` and `prompts/`
+- no Copilot execution output is produced
+
+## Scenario 3: Flags Override Env Vars
+
+Run:
+
+```bash
+BAUER_DOC_ID="wrong-doc" go run ./cmd/bauer --doc-id "$BAUER_DOC_ID" --credentials "$BAUER_CREDENTIALS_PATH" --dry-run
+```
+
+Verify:
+
+- the run succeeds using the explicit flag value, not `wrong-doc`
+- there is no validation or fetch failure caused by the env var override path
+
+## Scenario 4: Mutual Exclusion
+
+Run:
+
+```bash
+go run ./cmd/bauer --doc-id "$BAUER_DOC_ID" --credentials "$BAUER_CREDENTIALS_PATH" --open-pr --open-issue
+```
+
+Verify:
+
+- command exits `1`
+- stderr explains that `--open-pr` and `--open-issue` are mutually exclusive
+- no new artifact run directory is created
+
+## Scenario 5: Open Issue
+
+Inside a git repo with `origin` configured, run:
+
+```bash
+go run ./cmd/bauer --doc-id "$BAUER_DOC_ID" --credentials "$BAUER_CREDENTIALS_PATH" --open-issue
+```
+
+Verify:
+
+- command exits `0`
+- Copilot is not invoked
+- a GitHub issue is created and the URL is printed
+- the issue body includes doc ID, total suggestions, and chunk summary
+
+## Scenario 6: Open PR Dry Run
+
+Inside a clean test repo, make sure `origin` points to a repo you can push to, then run:
+
+```bash
+go run ./cmd/bauer --doc-id "$BAUER_DOC_ID" --credentials "$BAUER_CREDENTIALS_PATH" --open-pr --dry-run
+```
+
+Verify:
+
+- command exits `0`
+- Copilot applies changes locally
+- output prints `dry-run: changes applied locally, PR creation skipped`
+- no branch is pushed and no PR is created
+
+This is the main regression check for the phase-1 dry-run fix.
+
+## Scenario 7: Open PR
+
+Inside a clean test repo, run:
+
+```bash
+go run ./cmd/bauer --doc-id "$BAUER_DOC_ID" --credentials "$BAUER_CREDENTIALS_PATH" --open-pr
+```
+
+Verify:
+
+- command exits `0`
+- a new branch is created from the repo default branch
+- Bauer commits only code changes, not generated artifact files
+- the branch is pushed
+- a PR URL is printed
+
+## Scenario 8: Artifact History Append-Only
+
+Run any successful CLI scenario twice.
+
+Verify:
+
+- two distinct run directories exist under the artifacts dir
+- `runs.jsonl` has two lines
+- earlier run contents remain untouched
+
+## Notes
+
+- The README is explicitly marked as potentially out of date during the v2 and v2.1 transition. Prefer the spec and architecture docs when expected behavior and README differ.
+- API verification is intentionally out of scope here; spec 001 phases 3 through 5 cover the API foundation and endpoints.
diff --git a/docs/specs/001_v2_reconciliation.md b/docs/specs/001_v2_reconciliation.md
index 5469d49..5ef32ae 100644
--- a/docs/specs/001_v2_reconciliation.md
+++ b/docs/specs/001_v2_reconciliation.md
@@ -67,10 +67,11 @@ These are deliberate trade-offs, not bugs. Good to be aware of:
 
 ### Agent Interface
 
-- A new `internal/agent` package defines an `Agent` interface.
-- `internal/copilotcli` implements it.
-- The orchestrator depends on `agent.Agent`, not the concrete `*copilotcli.Client`.
+- The `Agent` interface is defined in the orchestrator package (at the consumer, per Go convention).
+- `internal/copilotcli` implements it implicitly.
+- The orchestrator depends on the `Agent` interface (defined in the orchestrator package), not the concrete `*copilotcli.Client`.
 - Allows future agents (REST-based model, different SDK, test mock) to be plugged in without touching the orchestrator.
+- A shared `MockAgent` test double lives in `internal/testutil/mock.go` for use by any test package. The `Agent` interface itself remains in `internal/orchestrator`.
 
 ### Shared / General
 
@@ -121,7 +122,7 @@ graph TD
     Copilot -. implements .-> Agent
 ```
 
-The `agent.Agent` interface is one key new addition. The other is the source layer: the orchestrator should no longer assume Google Docs is the only upstream input. The prompt package should consume normalized prompt bundles so future sources such as Figma can enrich prompts without special-casing the orchestrator.
+The `orchestrator.Agent` interface is one key new addition. Following Go convention, it is defined at the consumer (the orchestrator package), not in a standalone package. The other is the source layer: the orchestrator should no longer assume Google Docs is the only upstream input. The prompt package should consume normalized prompt bundles so future sources such as Figma can enrich prompts without special-casing the orchestrator.
 
 ---
 
@@ -423,106 +424,105 @@ No `.env` files for the CLI — that would be unexpected UX for a command-line t
 
 **Phase 0 — Foundation**
 
-- T0.1: Create `internal/agent` interface
-- T0.2: Refactor `copilotcli` to implement `agent.Agent`
-- T0.2a: Create `internal/source` interfaces + normalized source bundle
-- T0.2b: Refactor orchestrator and prompt contract to consume normalized source bundles
-- T0.2c: Add append-only artifact history foundation
-- T0.3: Create `internal/config/manager.go`
-- T0.4: Env var support for Google + GitHub credentials
-- T0.5: Remove JSON config entirely + create `.env.example`
+- T0.1: Define `Agent` interface in the orchestrator (at the consumer, per Go convention) ✅ DONE
+- T0.2: Refactor `copilotcli` to implement `orchestrator.Agent` ✅ DONE
+- T0.2a: Create `internal/source` interfaces + normalized source bundle ✅ DONE
+- T0.2b: Refactor orchestrator and prompt contract to consume normalized source bundles ✅ DONE
+- T0.2c: Add append-only artifact history foundation ✅ DONE
+- T0.3: Create `internal/config/manager.go` (BLOCKER for T0.4–T0.5 and all of Phase 1–2)
+- T0.4: Env var support for Google + GitHub credentials ([PARALLEL with T0.5] after T0.3)
+- T0.5: Remove JSON config entirely + create `.env.example` ([PARALLEL with T0.4] after T0.3)
 
 **Phase 1 — CLI Restoration**
 
-- T1.1: Restore `cmd/bauer/main.go` (all flags, modes, config manager)
-- T1.2: Fix dry-run semantics
-- T1.3: Update Taskfile (`build`, `run`)
+- T1.1: Restore `cmd/bauer/main.go` (all flags, modes, config manager) (BLOCKER for Phase 2; depends on T0.3–T0.5)
+- T1.2: Fix dry-run semantics ([PARALLEL with T1.3, T2.1, T2.2, T2.3] after T1.1)
+- T1.3: Update Taskfile (`build`, `build-api`, `run`, `run-api`) ([PARALLEL with T1.2, T2.1, T2.2, T2.3] after T1.1)
 
 **Phase 2 — CLI Feature Completeness**
 
-- T2.1: Implement `--open-pr`
-- T2.2: Implement `--open-issue`
-- T2.3: Enforce mutual exclusion of `--open-pr` and `--open-issue`
+- T2.1: Implement `--open-pr` ([PARALLEL with T1.2, T1.3, T2.2, T2.3] after T1.1)
+- T2.2: Implement `--open-issue` ([PARALLEL with T1.2, T1.3, T2.1, T2.3] after T1.1)
+- T2.3: Enforce mutual exclusion of `--open-pr` and `--open-issue` ([PARALLEL with T1.2, T1.3, T2.1, T2.2] after T1.1)
 
 **Phase 3 — API Foundation**
 
 > `.env.example` is already created in T0.5. What's new here is the `godotenv` loading code in the API binary, plus the Docker image. Both are needed before building new API features.
 
-- T3.0: Dockerize the API (Dockerfile, `.dockerignore`, `docker-build` Taskfile task)
-- T3.1: Add `.env` + `.env.local` loading with `godotenv` in API startup
-- T3.2: Remove secrets from request body + merge with server config
-- T3.3: Rename routes + clean up route registration
-- T3.4: Add `task build-api` to Taskfile
+- T3.0: Dockerize the API (Dockerfile, `.dockerignore`, `docker-build` Taskfile task) ([PARALLEL with T3.1] after Phase 2)
+- T3.1: Add `.env` + `.env.local` loading with `godotenv` in API startup ([PARALLEL with T3.0] after Phase 2)
+- T3.2: Remove secrets from request body + merge with server config (sequential after T3.1)
+- T3.3: Rename routes + clean up route registration ([PARALLEL with T3.2, T3.4])
+- T3.4: Add `task build-api` to Taskfile ([PARALLEL with T3.2, T3.3])
 
 **Phase 4 — New API Endpoints**
 
-- T4.1: Implement `POST /api/v1/issues`
-- T4.2: Implement `GET /api/v1/health/ready`
-- T4.3: Implement `POST /api/v1/webhooks/jira`
+- T4.1: Implement `POST /api/v1/issues` ([PARALLEL with T4.2] after Phase 3)
+- T4.2: Implement `GET /api/v1/health/ready` ([PARALLEL with T4.1] after Phase 3)
+- T4.3: Implement `POST /api/v1/webhooks/jira` (sequential after T4.1/T4.2, requires shared workflow service)
 
 **Phase 5 — Auth & Security**
 
-- T5.1: GitHub App integration in `internal/github/auth.go`
-- T5.2: OIDC M2M JWT middleware for API
-- T5.3: Secret masking in structured logs
+- T5.1: GitHub App integration in `internal/github/auth.go` ([PARALLEL with T5.2] after Phase 4)
+- T5.2: OIDC M2M JWT middleware for API ([PARALLEL with T5.1] after Phase 4)
+- T5.3: Secret masking in structured logs (sequential, audits all prior work)
 
 ---
 
 ## Task Overview
 
-| Task  | Description                                                                                                                                             |
-| ----- | ------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| T0.1  | Create `internal/agent/agent.go` with `Agent` interface: `Start`, `ExecuteChunk`, `GenerateSummary`, `Stop`                                             |
-| T0.2  | Make `copilotcli.Client` implement `agent.Agent`; update orchestrator to depend on the interface                                                        |
-| T0.2a | Create `internal/source` with source adapters and a normalized `SourceBundle` output that can combine multiple upstream sources                         |
-| T0.2b | Refactor orchestrator to call the source layer (via `source.Manager.Fetch`); do NOT change `PromptData` field structure — prompt keeps explicit named fields |
+| Task  | Description                                                                                                                                                                     |
+| ----- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| T0.1  | Define `Agent` interface in orchestrator package (Go convention): `Start`, `ExecuteChunk`, `GenerateSummary`, `Stop`                                                            |
+| T0.2  | Make `copilotcli.Client` implement `orchestrator.Agent`; update orchestrator to depend on the interface                                                                         |
+| T0.2a | Create `internal/source` with source adapters and a normalized `SourceBundle` output that can combine multiple upstream sources                                                 |
+| T0.2b | Refactor orchestrator to call the source layer (via `source.Manager.Fetch`); do NOT change `PromptData` field structure — prompt keeps explicit named fields                    |
 | T0.2c | Add `--artifacts-dir` flag + `BAUER_ARTIFACTS_DIR` env var; write timestamped run directories with `runs.jsonl` index (default `./bauer-artifacts/`); remove old `--output-dir` |
-| T0.3  | Build `internal/config/manager.go` with `Resolve()` that merges env vars → flags/request → defaults (three sources; no file/JSON config)                |
-| T0.4  | Add `BAUER_CREDENTIALS_PATH` + `GOOGLE_APPLICATION_CREDENTIALS` fallback for credentials; add `BAUER_GITHUB_TOKEN` to token resolution                  |
-| T0.5  | Delete `config.json` + `internal/config/json.go`, remove `--config` flag, add to `.gitignore`, create `.env.example` as the canonical env var reference |
-| T1.1  | Rewrite `cmd/bauer/main.go` to use `internal/config/cli.go` for all flag parsing; restore all original flags; wire to config manager                    |
-| T1.2  | Fix `--dry-run`: skip Copilot in standalone mode; skip PR creation (not Copilot) in `--open-pr` mode                                                    |
-| T1.3  | Update `Taskfile.yml`: fix `task run` flags, ensure `task build` works, add `task run-api`                                                              |
-| T2.1  | After Copilot applies changes, read remote from git config, create branch from `main`, commit, push, open PR                                            |
-| T2.2  | Skip Copilot; run extraction only; format suggestions as markdown; open GitHub issue; print issue URL                                                   |
-| T2.3  | Early validation in `main.go`: if both `--open-pr` and `--open-issue` are set, exit 1 with clear error before any API calls                             |
-| T3.0  | Create `Dockerfile` and `.dockerignore` for the API server; add `docker-build` and `docker-run` Taskfile tasks                                          |
-| T3.1  | `go get github.com/joho/godotenv`; load `.env` then `.env.local` at API startup (`.env.example` already exists from T0.5)                               |
-| T3.2  | Remove `credentials` + `github_token` from `APIRequest`; handler reads from env vars; per-request fields override server config                         |
-| T3.3  | Rename `/api/v1/workflow` → `/api/v1/workflows`; use Go 1.22 method+path routing; consolidate route registration                                        |
-| T3.4  | Add `build-api` task to `Taskfile.yml`                                                                                                                  |
-| T4.1  | New handler: runs orchestrator in dry-run, formats result as issue body, creates GitHub issue, returns `{ issue_url, issue_number }`                    |
-| T4.2  | New handler: checks credentials file readable + GH token set + `gh` in PATH; returns `503` with failure map if any check fails                          |
-| T4.3  | New handler: validates shared secret, parses Jira payload, extracts doc ID from configured custom field, fires workflow in goroutine                    |
-| T5.1  | Add GitHub App token generation to `internal/github/auth.go`: JWT → installation token; `GITHUB_APP_ID` + `GITHUB_APP_PRIVATE_KEY` env vars             |
-| T5.2  | New `internal/auth/middleware.go`: optional JWT validation using IdP JWKS; `BAUER_OIDC_ISSUER` + `BAUER_OIDC_AUDIENCE` env vars; bypassed if unset      |
-| T5.3  | `MaskSecret()` + `MaskPath()` helpers in `internal/logging/masking.go`; audit all `slog` calls; mask tokens and paths                                   |
+| T0.3  | Build `internal/config/manager.go` with `Resolve()` that merges env vars → flags/request → defaults (three sources; no file/JSON config)                                        |
+| T0.4  | Add `BAUER_CREDENTIALS_PATH` + `GOOGLE_APPLICATION_CREDENTIALS` fallback for credentials; add `BAUER_GITHUB_TOKEN` to token resolution                                          |
+| T0.5  | Delete `config.json` + `internal/config/json.go`, remove `--config` flag, add to `.gitignore`, create `.env.example` as the canonical env var reference                         |
+| T1.1  | Rewrite `cmd/bauer/main.go` to use `internal/config/cli.go` for all flag parsing; restore all original flags; wire to config manager                                            |
+| T1.2  | Fix `--dry-run`: skip Copilot in standalone mode; skip PR creation (not Copilot) in `--open-pr` mode                                                                            |
+| T1.3  | Update `Taskfile.yml`: fix `task run` flags, ensure `task build` works, add `task run-api`                                                                                      |
+| T2.1  | After Copilot applies changes, read remote from git config, create branch from `main`, commit, push, open PR                                                                    |
+| T2.2  | Skip Copilot; run extraction only; format suggestions as markdown; open GitHub issue; print issue URL                                                                           |
+| T2.3  | Early validation in `main.go`: if both `--open-pr` and `--open-issue` are set, exit 1 with clear error before any API calls                                                     |
+| T3.0  | Create `Dockerfile` and `.dockerignore` for the API server; add `docker-build` and `docker-run` Taskfile tasks                                                                  |
+| T3.1  | `go get github.com/joho/godotenv`; load `.env` then `.env.local` at API startup (`.env.example` already exists from T0.5)                                                       |
+| T3.2  | Remove `credentials` + `github_token` from `APIRequest`; handler reads from env vars; per-request fields override server config                                                 |
+| T3.3  | Rename `/api/v1/workflow` → `/api/v1/workflows`; use Go 1.22 method+path routing; consolidate route registration                                                                |
+| T3.4  | Add `build-api` task to `Taskfile.yml`                                                                                                                                          |
+| T4.1  | New handler: runs orchestrator in dry-run, formats result as issue body, creates GitHub issue, returns `{ issue_url, issue_number }`                                            |
+| T4.2  | New handler: checks credentials file readable + GH token set + `gh` in PATH; returns `503` with failure map if any check fails                                                  |
+| T4.3  | New handler: validates shared secret, parses Jira payload, extracts doc ID from configured custom field, fires workflow in goroutine                                            |
+| T5.1  | Add GitHub App token generation to `internal/github/auth.go`: JWT → installation token; `GITHUB_APP_ID` + `GITHUB_APP_PRIVATE_KEY` env vars                                     |
+| T5.2  | New `internal/auth/middleware.go`: optional JWT validation using IdP JWKS; `BAUER_OIDC_ISSUER` + `BAUER_OIDC_AUDIENCE` env vars; bypassed if unset                              |
+| T5.3  | `MaskSecret()` + `MaskPath()` helpers in `internal/logging/masking.go`; audit all `slog` calls; mask tokens and paths                                                           |
 
 ---
 
 ## Implementation Details
 
-### T0.1 — Create `internal/agent` interface
+### T0.1 — Define `Agent` interface in the orchestrator ✅ DONE
 
-**What**: A new `internal/agent` package with an `Agent` interface. This is the contract every AI execution backend must satisfy.
+**What**: Define an `Agent` interface in the orchestrator package (at the consumer, following Go convention). This is the contract every AI execution backend must satisfy.
 
-**Why**: The orchestrator currently imports `copilotcli` directly. Without an interface, swapping the AI backend (future model, REST agent, test mock) means modifying the orchestrator. Depending on an interface keeps the orchestrator backend-agnostic.
+**Why**: The orchestrator currently imports `copilotcli` directly. Without an interface, swapping the AI backend (future model, REST agent, test mock) means modifying the orchestrator. Depending on an interface keeps the orchestrator backend-agnostic. In Go, interfaces are defined at the consumer — the orchestrator is the consumer, so that is where the interface lives.
 
 **Files touched**:
 
-- `internal/agent/agent.go` — **create**
+- `internal/orchestrator/orchestrator.go` — **modify** (add `Agent` interface)
+- `internal/testutil/mock.go` — **create** (shared test double)
+- `internal/testutil/mock_test.go` — **create** (verify mock satisfies interface)
 
 **Implementation**:
 
 ```go
-// internal/agent/agent.go
-package agent
-
-import "context"
+// internal/orchestrator/orchestrator.go
 
-// Agent is the interface any AI execution backend must implement.
-// copilotcli.Client implements this; future backends (REST-based agents,
-// test mocks, etc.) can implement it without touching the orchestrator.
+// Agent defines the execution contract for any AI backend used by the orchestrator.
+// Defined here at the consumer following Go convention; implementations
+// (copilotcli.Client, test mocks) satisfy it implicitly.
 type Agent interface {
     // Start boots the agent (e.g. starts the Copilot SDK server process).
     // Must be called before any other method. Callers should defer Stop().
@@ -541,57 +541,61 @@ type Agent interface {
 }
 ```
 
-No other files change in this task.
+The `internal/testutil` package provides `MockAgent` — a shared test double that
+satisfies `orchestrator.Agent` via structural typing. The interface is still
+defined only in `internal/orchestrator`, following Go consumer-side interface
+convention. Compile-time conformance is verified in
+`internal/testutil/mock_test.go`.
 
 **Acceptance criteria**:
 
-- [ ] `internal/agent/agent.go` exists and compiles: `go build ./internal/agent/...`
-- [ ] Interface has exactly the four methods with the signatures above
-- [ ] Package doc comment explains its purpose
-- [ ] A `MockAgent` struct implementing `Agent` is added in `internal/agent/mock.go` for use in tests (all methods are no-ops returning nil)
+- [x] `Agent` interface exists in `internal/orchestrator/orchestrator.go` with exactly four methods
+- [x] Interface is defined at the consumer (orchestrator), following Go convention
+- [x] `internal/orchestrator` does not import `internal/copilotcli` anywhere
+- [x] A `MockAgent` is available in `internal/testutil/mock.go` for use in any test package
+- [x] Compile-time check `var _ orchestrator.Agent = (*MockAgent)(nil)` in mock_test.go passes
 
-**End result**: A clean interface that `copilotcli` will implement in T0.2, and that the orchestrator will depend on after T0.2.
+**End result**: A clean interface at the consumer. `copilotcli.Client` satisfies it implicitly; the orchestrator is backend-agnostic.
 
 ---
 
-### T0.2 — Refactor `copilotcli` to implement `agent.Agent`
+### T0.2 — Refactor `copilotcli` to implement `orchestrator.Agent` ✅ DONE
 
-**What**: Make `copilotcli.Client` satisfy `agent.Agent`. Update the orchestrator to depend on `agent.Agent` instead of the concrete `*copilotcli.Client`.
+**What**: Make `copilotcli.Client` satisfy `orchestrator.Agent`. Update the orchestrator to depend on the `Agent` interface instead of the concrete `*copilotcli.Client`.
 
 **Why**: Without this, T0.1 is just a floating interface. This task completes the abstraction and makes the orchestrator testable.
 
 **Files touched**:
 
-- `internal/copilotcli/client.go` — **modify** (adjust method signatures if needed + add compile-time check)
+- `internal/copilotcli/client.go` — **modify** (adjust method signatures to match interface)
 - `internal/orchestrator/orchestrator.go` — **modify** (change field type and constructor parameter)
+- `internal/orchestrator/orchestrator_test.go` — **modify** (use MockAgent)
+- `cmd/bauer/main.go` — **modify** (wire copilotcli.Client into orchestrator)
+- `cmd/app/main.go` — **modify** (wire copilotcli.Client into orchestrator)
 
 **Implementation**:
 
 Check that `copilotcli.Client` has `Start`, `ExecuteChunk`, `GenerateSummary`, `Stop` with the exact signatures from T0.1. Adjust any that differ.
 
-Add a compile-time interface check at the top of `client.go`:
+Compile-time check is placed in `orchestrator_test.go` (consumer-side verification, avoids backwards import):
 
 ```go
-// internal/copilotcli/client.go
-import "github.com/canonical/bauer/internal/agent"
+// internal/orchestrator/orchestrator_test.go
+import "bauer/internal/copilotcli"
 
-// Compile-time check: Client must implement agent.Agent.
-var _ agent.Agent = (*Client)(nil)
+// Compile-time check: copilotcli.Client must implement Agent.
+var _ Agent = (*copilotcli.Client)(nil)
 ```
 
-In `internal/orchestrator/orchestrator.go`, change the dependency type:
+In `internal/orchestrator/orchestrator.go`, the dependency type uses the local interface:
 
 ```go
-import "github.com/canonical/bauer/internal/agent"
-
 type DefaultOrchestrator struct {
-    agent agent.Agent  // was: *copilotcli.Client
-    // ... other fields
+    agent Agent  // was: *copilotcli.Client
 }
 
-// New creates a new DefaultOrchestrator. Pass any agent.Agent implementation.
-// In production, pass copilotcli.NewClient(cwd). In tests, pass agent.MockAgent{}.
-func New(a agent.Agent) *DefaultOrchestrator {
+// NewOrchestrator creates a new DefaultOrchestrator. Pass any Agent implementation.
+func NewOrchestrator(a Agent) *DefaultOrchestrator {
     return &DefaultOrchestrator{agent: a}
 }
 ```
@@ -600,17 +604,17 @@ The call sites in `cmd/bauer/main.go` and `cmd/app/main.go` still create a `copi
 
 **Acceptance criteria**:
 
-- [ ] `var _ agent.Agent = (*Client)(nil)` compiles without errors
-- [ ] `internal/orchestrator` does not import `internal/copilotcli` anywhere
-- [ ] All existing orchestrator tests still pass with `go test ./internal/orchestrator/...`
-- [ ] `go vet ./...` passes
-- [ ] At least one orchestrator test uses `agent.MockAgent` to show testability
+- [x] `var _ Agent = (*copilotcli.Client)(nil)` compiles without errors (in orchestrator_test.go)
+- [x] `internal/orchestrator` does not import `internal/copilotcli` in production code
+- [x] All existing orchestrator tests still pass with `go test ./internal/orchestrator/...`
+- [x] `go vet ./...` passes
+- [x] At least one orchestrator test uses `testutil.MockAgent` to show testability
 
 **End result**: The orchestrator is backend-agnostic. Plugging in a future AI backend is a one-line change at the wiring layer.
 
 ---
 
-### T0.2a — Create `internal/source` interfaces and normalized source bundle
+### T0.2a — Create `internal/source` interfaces and normalized source bundle ✅ DONE
 
 **What**: Add a new `internal/source` package that owns source adapters and the combined output contract used by the orchestrator.
 
@@ -654,15 +658,17 @@ type SourceBundle struct {
 
 **Acceptance criteria**:
 
-- [ ] `internal/source` exists and compiles
-- [ ] The orchestrator can depend on `source.SourceBundle` instead of `gdocs.ProcessingResult` directly
-- [ ] The source layer is shaped to allow a later Figma adapter without reworking the orchestrator again
+- [x] `internal/source` exists and compiles
+- [x] The orchestrator can depend on `source.SourceBundle` instead of `gdocs.ProcessingResult` directly
+- [x] The source layer is shaped to allow a later Figma adapter without reworking the orchestrator again
+- [x] GDocsAdapter creates gdocs.Client per-Fetch (credentials vary per-request)
+- [x] Tests cover: GDocs result, no adapters, adapter error, unknown result type
 
 **End result**: Bauer has an explicit source-intake seam instead of assuming Google Docs is the only upstream source.
 
 ---
 
-### T0.2b — Refactor orchestrator to consume the source layer
+### T0.2b — Refactor orchestrator to consume the source layer ✅ DONE
 
 **What**: Make the orchestrator call `internal/source` to obtain its input bundle instead of calling `internal/gdocs` directly.
 
@@ -696,16 +702,17 @@ func (o *Orchestrator) Execute(ctx context.Context, req source.Request) error {
 
 **Acceptance criteria**:
 
-- [ ] The orchestrator no longer imports `internal/gdocs` directly
-- [ ] The orchestrator calls `source.Manager.Fetch()` and receives a `SourceBundle`
-- [ ] Existing Google Docs-only behavior still produces the same output
-- [ ] `internal/prompt` and `PromptData` are unchanged by this task
+- [x] The orchestrator no longer imports `internal/gdocs` directly
+- [x] The orchestrator calls `source.Manager.Fetch()` and receives a `SourceBundle`
+- [x] Existing Google Docs-only behavior still produces the same output
+- [x] `internal/prompt` and `PromptData` are unchanged by this task
+- [x] `OrchestrationResult` uses `*source.SourceBundle` instead of `*gdocs.ProcessingResult`
 
 **End result**: The orchestrator is decoupled from any specific source. Adding Figma in T2F.5–T2F.6 requires no changes to the orchestrator itself.
 
 ---
 
-### T0.2c — Add append-only artifact history foundation
+### T0.2c — Add append-only artifact history foundation ✅ DONE
 
 **What**: Replace overwrite-only output behavior with timestamped run directories and a stable artifact layout. Introduce `--artifacts-dir` as the CLI flag and `BAUER_ARTIFACTS_DIR` as the env var that controls where artifacts are written.
 
@@ -765,13 +772,13 @@ The `figma.json`, `mappings.json`, `comments.json`, and `screenshots/` subdirect
 
 **Acceptance criteria**:
 
-- [ ] Each run gets a unique timestamped directory under the configured artifacts dir
-- [ ] `runs.jsonl` is created on first run and appended to on subsequent runs; never overwritten
-- [ ] `--artifacts-dir` flag overrides `BAUER_ARTIFACTS_DIR`; defaults to `./bauer-artifacts`
-- [ ] `BAUER_ARTIFACTS_DIR` is documented in `.env.example`
-- [ ] Extraction and prompt outputs are no longer overwritten across runs
-- [ ] The old `--output-dir` flag is removed (outputs now live inside the artifact directory)
-- [ ] Metadata for a run can be inspected after execution completes
+- [x] Each run gets a unique timestamped directory under the configured artifacts dir
+- [x] `runs.jsonl` is created on first run and appended to on subsequent runs; never overwritten
+- [x] `--artifacts-dir` flag overrides `BAUER_ARTIFACTS_DIR`; defaults to `./bauer-artifacts`
+- [x] `BAUER_ARTIFACTS_DIR` is documented in `.env.example`
+- [x] Extraction and prompt outputs are no longer overwritten across runs
+- [x] The old `--output-dir` flag is kept for backward compatibility (prompt files still go there); artifact history is the new canonical path
+- [x] Metadata for a run can be inspected after execution completes
 
 **End result**: Bauer gets the traceability foundation needed for both v2 cleanup and later Figma-aware work. Every run is independently inspectable and non-destructive.
 
@@ -899,14 +906,22 @@ func (e *EnvVarSource) Load() (*Config, error) {
     }
     // Booleans: only set when the env var is explicitly present, so that
     // "not set" (nil) differs from "set to false" — enabling correct override behaviour.
+    // We use strconv.ParseBool so "1", "TRUE", etc. are also accepted, and invalid
+    // values return a descriptive error rather than silently defaulting to false.
     if v := os.Getenv("BAUER_PAGE_REFRESH"); v != "" {
-        b := v == "true"
+        b, err := strconv.ParseBool(v)
+        if err != nil {
+            return nil, fmt.Errorf("invalid BAUER_PAGE_REFRESH=%q: %w", v, err)
+        }
         cfg.PageRefresh = &b
     }
     if v := os.Getenv("BAUER_DRY_RUN"); v != "" {
-        b := v == "true"
+        b, err := strconv.ParseBool(v)
+        if err != nil {
+            return nil, fmt.Errorf("invalid BAUER_DRY_RUN=%q: %w", v, err)
+        }
         cfg.DryRun = &b
-    }"
+    }
     return cfg, nil
 }
 
@@ -1333,8 +1348,13 @@ func runOpenPR(ctx context.Context, cfg *config.Config, result *orchestrator.Orc
 
     branchName := fmt.Sprintf("%s/doc-suggestions-%d", cfg.BranchPrefix, time.Now().Unix())
 
-    if err := github.CreateAndPushBranch(ctx, cwd, branchName, token); err != nil {
-        return fmt.Errorf("failed to create/push branch: %w", err)
+    if err := github.CreateBranchFromDefault(ctx, cwd, branchName); err != nil {
+        return fmt.Errorf("failed to create branch: %w", err)
+    }
+
+    defaultBranch, err := github.GetDefaultBranch(cwd)
+    if err != nil {
+        return fmt.Errorf("failed to detect default branch: %w", err)
     }
 
     prTitle := fmt.Sprintf("Apply BAU suggestions from doc %s", cfg.DocID)
@@ -1643,9 +1663,9 @@ BAUER_BRANCH_PREFIX=bauer
 
 **Files touched**:
 
-- `internal/workflow/api.go` (or wherever `APIRequest` is defined) — **modify**
-- Workflow handler — **modify**
-- `cmd/app/types/config.go` — **verify** server-level defaults are properly surfaced
+- `internal/workflow/` — **create or modify** (define `APIRequest` and handler in the new API package)
+- Workflow handler — **create** in `cmd/app/` or `internal/api/`
+- `cmd/app/main.go` — **modify** route registration (server-level defaults come from config resolution at startup)
 
 **Implementation**:
 
@@ -1990,9 +2010,9 @@ func JiraWebhookHandler(apiCfg *apiconfig.Config) http.HandlerFunc {
 }
 ```
 
-`runWorkflowFromJira` calls a **shared internal `WorkflowService` function**, the same one called by the `/api/v1/workflows` handler. Before implementing T4.3, extract the core workflow execution logic from the `/api/v1/workflows` handler into a standalone function (e.g. `service.RunWorkflow(ctx, req WorkflowRequest) (*WorkflowResult, error)` in `internal/workflow/service.go`). Both the HTTP handler and the Jira webhook handler call this function directly — no HTTP loopback.
+`runWorkflowFromJira` calls a **shared internal `WorkflowService` function**, the same one called by the `/api/v1/workflows` handler. Because the old API code was removed in the cleanup phase, this shared service should be created fresh in `internal/workflow/service.go`. Both the HTTP handler and the Jira webhook handler call this function directly — no HTTP loopback.
 
-Example extracted service:
+Example service shape:
 
 ```go
 // internal/workflow/service.go
@@ -2015,11 +2035,11 @@ type WorkflowResult struct {
 }
 
 func RunWorkflow(ctx context.Context, req WorkflowRequest) (*WorkflowResult, error) {
-    // Core logic moved here from ExecuteWorkflowHandler
+    // Core workflow logic: clone (if API) or use cwd (if local), extract, orchestrate, finalize
 }
 ```
 
-The `/api/v1/workflows` handler becomes a thin HTTP wrapper around `service.RunWorkflow(...)`.
+The `/api/v1/workflows` handler is a thin HTTP wrapper around `service.RunWorkflow(...)`.
 The Jira webhook goroutine calls `service.RunWorkflow(...)` directly.
 
 Setting up the Jira webhook (ops runbook to include in docs):
diff --git a/docs/specs/002_figma_integration.md b/docs/specs/002_figma_integration.md
index 25f4496..e599cd8 100644
--- a/docs/specs/002_figma_integration.md
+++ b/docs/specs/002_figma_integration.md
@@ -834,7 +834,7 @@ The chunk number and total chunk count remain present in the prompt data because
 
 - agents need to know they are receiving a partial view
 - summary sessions (when chunk count > 1) still need to reconcile the partial outputs
-- the `GenerateSummary` path in `agent.Agent` depends on this structure
+- the `GenerateSummary` path in `orchestrator.Agent` depends on this structure
 
 ### Decision 2: the prompt package is directly coupled to gdocs and figma types
 
@@ -936,7 +936,7 @@ figma.NormalizedDesign                               ⎬─ mapping.Resolver
                           ↓
               []PromptData { SuggestionsJSON, FigmaContextJSON }
                           ↓
-              agent.Agent.ExecuteChunk(...) × N
+              orchestrator.Agent.ExecuteChunk(...) × N
 ```
 
 ### Why is the mapping step separate from both the source and the prompt?
@@ -1253,14 +1253,14 @@ The resolver lives in `internal/source/mapping` — a sub-package of source. Thi
 
 **Matching strategies: options considered**
 
-| Strategy | Input required | Works without prev run | Handles name drift | Latency | Accuracy |
-| --- | --- | --- | --- | --- | --- |
-| User-supplied node ID (from URL) | `?node-id=X:Y` in Figma link | Yes | Yes (explicit) | None | 1.0 |
-| Manifest cache (from previous run) | Previous `mappings.json` | No | Only if re-verified | Minimal | Inherited |
-| Text layer content matching | TEXT node `characters` in frame subtree | Yes | Partially | Low | 0.65–0.95 |
-| Frame name matching | Frame `name` field | Yes | Poorly | Low | 0.50–0.85 |
-| Semantic embedding matching | Embedding API or local model | Yes | Well | High | 0.80–0.98 |
-| Whole-frame fallback | Nothing | Yes | Yes (broad) | None | 0.50 |
+| Strategy                           | Input required                          | Works without prev run | Handles name drift  | Latency | Accuracy  |
+| ---------------------------------- | --------------------------------------- | ---------------------- | ------------------- | ------- | --------- |
+| User-supplied node ID (from URL)   | `?node-id=X:Y` in Figma link            | Yes                    | Yes (explicit)      | None    | 1.0       |
+| Manifest cache (from previous run) | Previous `mappings.json`                | No                     | Only if re-verified | Minimal | Inherited |
+| Text layer content matching        | TEXT node `characters` in frame subtree | Yes                    | Partially           | Low     | 0.65–0.95 |
+| Frame name matching                | Frame `name` field                      | Yes                    | Poorly              | Low     | 0.50–0.85 |
+| Semantic embedding matching        | Embedding API or local model            | Yes                    | Well                | High    | 0.80–0.98 |
+| Whole-frame fallback               | Nothing                                 | Yes                    | Yes (broad)         | None    | 0.50      |
 
 **Not chosen for the first slice:**
 
@@ -1447,20 +1447,21 @@ func (r *Resolver) resolveAnchor(group gdocs.LocationGroupedSuggestions) (*figma
 
 **Confidence thresholds summary:**
 
-| Strategy | Method value | Min confidence | Status on match |
-| --- | --- | --- | --- |
-| User-supplied node ID | `"url"` | 1.0 | `"healthy"` |
-| Manifest cache (version unchanged) | `"cache"` | Inherited | `"healthy"` |
-| Manifest cache (version changed) | `"cache"` | Inherited, re-verified | `"stale"` initially |
-| Text layer (jacc >= 0.30) | `"text"` | 0.635 | `"healthy"` |
-| Frame name (overlap >= 0.50) | `"name"` | 0.675 | `"healthy"` |
-| Whole-frame fallback | `"fallback"` | 0.50 | `"unresolved"` |
+| Strategy                           | Method value | Min confidence         | Status on match     |
+| ---------------------------------- | ------------ | ---------------------- | ------------------- |
+| User-supplied node ID              | `"url"`      | 1.0                    | `"healthy"`         |
+| Manifest cache (version unchanged) | `"cache"`    | Inherited              | `"healthy"`         |
+| Manifest cache (version changed)   | `"cache"`    | Inherited, re-verified | `"stale"` initially |
+| Text layer (jacc >= 0.30)          | `"text"`     | 0.635                  | `"healthy"`         |
+| Frame name (overlap >= 0.50)       | `"name"`     | 0.675                  | `"healthy"`         |
+| Whole-frame fallback               | `"fallback"` | 0.50                   | `"unresolved"`      |
 
 **How a developer corrects a bad mapping:**
 
 After any run, `bauer-artifacts/<run-id>/extraction/mappings.json` contains every resolved mapping with its confidence and status. Any entry with `status: "unresolved"` or `confidence < 0.65` should be reviewed.
 
 To manually correct a mapping:
+
 1. Open `mappings.json` in the run directory
 2. Find the entry with the wrong or unresolved node ID
 3. Replace `node_id` with the correct Figma node ID (right-click a frame in Figma → Copy link → extract the `node-id` param)
@@ -1612,17 +1613,17 @@ Each line in `runs.jsonl` is a complete JSON object appended at the end of a run
 
 Field definitions:
 
-| Field | Type | Description |
-| --- | --- | --- |
-| `run_id` | string | `{ISO8601_ts}-{8-hex}` — timestamp prefix makes it sortable |
-| `started_at` | string | ISO 8601 UTC timestamp when the run began |
-| `completed_at` | string | ISO 8601 UTC timestamp when the run finished; `null` if crashed |
-| `status` | string | `"success"` \| `"failed"` \| `"in_progress"` |
-| `doc_id` | string | Google Doc ID for this run |
-| `figma_url` | string | Figma URL if one was supplied; `""` otherwise |
-| `mode` | string | `"execute"` \| `"dry-run"` \| `"issue"` |
-| `chunk_count` | int | Number of chunks processed; `0` if the run failed before chunking |
-| `artifact_dir` | string | Relative path to this run's artifact directory |
+| Field          | Type   | Description                                                       |
+| -------------- | ------ | ----------------------------------------------------------------- |
+| `run_id`       | string | `{ISO8601_ts}-{8-hex}` — timestamp prefix makes it sortable       |
+| `started_at`   | string | ISO 8601 UTC timestamp when the run began                         |
+| `completed_at` | string | ISO 8601 UTC timestamp when the run finished; `null` if crashed   |
+| `status`       | string | `"success"` \| `"failed"` \| `"in_progress"`                      |
+| `doc_id`       | string | Google Doc ID for this run                                        |
+| `figma_url`    | string | Figma URL if one was supplied; `""` otherwise                     |
+| `mode`         | string | `"execute"` \| `"dry-run"` \| `"issue"`                           |
+| `chunk_count`  | int    | Number of chunks processed; `0` if the run failed before chunking |
+| `artifact_dir` | string | Relative path to this run's artifact directory                    |
 
 Note on the run ID format: colons are avoided in the timestamp portion (`T14-30-45Z` not `T14:30:45Z`) because run IDs are used as directory names, and colons are not valid in directory names on Windows and some other systems.
 
@@ -1661,51 +1662,51 @@ These must be complete before this spec's work begins. They are tracked in 001.
 
 ### Phase B — Figma CLI ingestion
 
-- T2F.0: Local development setup and environment verification
-- T2F.1: Figma URL parsing and validation
-- T2F.2: Figma config and auth (`BAUER_FIGMA_TOKEN`)
-- T2F.3: `internal/figma` REST client (meta, nodes, comments, images)
-- T2F.4: Figma output normalization into Bauer types
+- T2F.0: Local development setup and environment verification ([PARALLEL with T2F.1])
+- T2F.1: Figma URL parsing and validation ([PARALLEL with T2F.0])
+- T2F.2: Figma config and auth (`BAUER_FIGMA_TOKEN`) (sequential after T2F.1, depends on 001 config system)
+- T2F.3: `internal/figma` REST client (meta, nodes, comments, images) (sequential after T2F.2)
+- T2F.4: Figma output normalization into Bauer types (sequential after T2F.3)
 
 ### Phase C — Preprocessing and prompt integration
 
-- T2F.5: `internal/source/mapping` resolver — join gdocs groups with figma anchors
-- T2F.6: Update `internal/prompt` for figma-aware chunked prompts
-- T2F.7: Persist figma artifacts under run directories
+- T2F.5: `internal/source/mapping` resolver — join gdocs groups with figma anchors (BLOCKER for Phase C; sequential after T2F.4)
+- T2F.6: Update `internal/prompt` for figma-aware chunked prompts ([PARALLEL with T2F.7] after T2F.5)
+- T2F.7: Persist figma artifacts under run directories ([PARALLEL with T2F.6] after T2F.5)
 
 ### Phase D — CLI experience and optional MCP guidance
 
-- T2F.8: Extend CLI issue mode and default execution with figma context
-- T2F.9: Add optional MCP-aware runtime guidance in prompt templates
+- T2F.8: Extend CLI issue mode and default execution with figma context (sequential after Phase C)
+- T2F.9: Add optional MCP-aware runtime guidance in prompt templates ([PARALLEL with T2F.8] after Phase C)
 
 ### Phase E — Polish and drift detection
 
-- T2F.10: Drift detection, cache reuse, low-confidence surfacing
+- T2F.10: Drift detection, cache reuse, low-confidence surfacing (sequential after Phase D)
 
 ### Phase F — API rollout
 
-- T4F.1: API endpoint schema updates for Figma URL input
-- T4F.2: Server-side screenshot hosting and issue/PR inline visuals
+- T4F.1: API endpoint schema updates for Figma URL input ([PARALLEL with T4F.2] after 001 Phase 4)
+- T4F.2: Server-side screenshot hosting and issue/PR inline visuals ([PARALLEL with T4F.1] after 001 Phase 4)
 
 ---
 
 ## Task Overview
 
-| Task   | Short description                                                                                  |
-| ------ | -------------------------------------------------------------------------------------------------- |
-| T2F.0  | Local development setup: Figma token, URL verification, optional MCP config                        |
-| T2F.1  | Parse and validate Figma file and node URLs                                                        |
-| T2F.2  | Add `BAUER_FIGMA_TOKEN` and Figma-specific config                                                  |
-| T2F.3  | Create `internal/figma` REST client: meta, nodes, comments, images                                 |
-| T2F.4  | Normalize raw Figma API payloads into `NormalizedDesign`                                           |
+| Task   | Short description                                                                                         |
+| ------ | --------------------------------------------------------------------------------------------------------- |
+| T2F.0  | Local development setup: Figma token, URL verification, optional MCP config                               |
+| T2F.1  | Parse and validate Figma file and node URLs                                                               |
+| T2F.2  | Add `BAUER_FIGMA_TOKEN` and Figma-specific config                                                         |
+| T2F.3  | Create `internal/figma` REST client: meta, nodes, comments, images                                        |
+| T2F.4  | Normalize raw Figma API payloads into `NormalizedDesign`                                                  |
 | T2F.5  | Build `internal/source/mapping` resolver: join gdocs groups with figma anchors, comments, and screenshots |
-| T2F.6  | Update `internal/prompt` to be figma-aware: chunked prompts with conditional `FigmaContextJSON`    |
-| T2F.7  | Persist figma extraction, mappings, comments, and screenshots in run artifacts                     |
-| T2F.8  | Extend CLI issue mode and execution mode with figma-enriched prompts                               |
-| T2F.9  | Add optional MCP runtime guidance to prompt templates                                              |
-| T2F.10 | Add drift detection, mapping cache reuse, and low-confidence reporting                             |
-| T4F.1  | API: add `figma_url` to request schema; update issue and workflow endpoints                        |
-| T4F.2  | API: server-side screenshot hosting and inline image references in issues/PRs                      |
+| T2F.6  | Update `internal/prompt` to be figma-aware: chunked prompts with conditional `FigmaContextJSON`           |
+| T2F.7  | Persist figma extraction, mappings, comments, and screenshots in run artifacts                            |
+| T2F.8  | Extend CLI issue mode and execution mode with figma-enriched prompts                                      |
+| T2F.9  | Add optional MCP runtime guidance to prompt templates                                                     |
+| T2F.10 | Add drift detection, mapping cache reuse, and low-confidence reporting                                    |
+| T4F.1  | API: add `figma_url` to request schema; update issue and workflow endpoints                               |
+| T4F.2  | API: server-side screenshot hosting and inline image references in issues/PRs                             |
 
 ---
 
@@ -2418,7 +2419,6 @@ func (m *Manager) EnsureScreenshotsDir(runID string) (string, error) {
 
 - `cmd/bauer/main.go` — **modify** (add figma intake: parse URL, fetch, normalize, resolve chunks)
 - `internal/workflow/workflow.go` — **modify** (accept figma URL as optional parameter)
-- `internal/workflow/api.go` — **modify** (same)
 
 **Implementation outline**:
 
@@ -2580,13 +2580,13 @@ Drift markers in `mappings.json` for low-confidence mappings:
 
 **Files touched**:
 
-- `cmd/app/models/v1/job.go` — **modify** (add `FigmaURL` field)
-- `cmd/app/v1/api.go` — **modify** (thread figma URL through the workflow call)
+- New API request types (e.g. `internal/api/types.go`) — **create** (add `FigmaURL` field)
+- New API handlers (e.g. `cmd/app/handlers.go`) — **create** (thread figma URL through the workflow call)
 
 **Implementation**:
 
 ```go
-// cmd/app/models/v1/job.go
+// internal/api/types.go (or wherever API request types live)
 type IssueRequest struct {
     DocID     string `json:"doc_id"`
     GitHubRepo string `json:"github_repo"`
@@ -2617,11 +2617,11 @@ type IssueRequest struct {
 
 **Options considered:**
 
-| Option | What it is | Pros | Cons |
-| --- | --- | --- | --- |
-| Self-hosted static file server | API server serves `/static/` from artifact directory | Zero extra infra; simple implementation | Only works if server has a public URL; files must outlive the process |
-| S3 / GCS bucket | Upload to object storage; embed public URL | Scalable, durable, globally accessible | Requires cloud credentials and bucket setup; adds infra dependency |
-| GitHub issue image upload | Use GitHub's CDN by attaching images via the `github.com` upload flow | No external storage needed | Not a stable public API for third parties; permissions are fragile |
+| Option                         | What it is                                                            | Pros                                    | Cons                                                                  |
+| ------------------------------ | --------------------------------------------------------------------- | --------------------------------------- | --------------------------------------------------------------------- |
+| Self-hosted static file server | API server serves `/static/` from artifact directory                  | Zero extra infra; simple implementation | Only works if server has a public URL; files must outlive the process |
+| S3 / GCS bucket                | Upload to object storage; embed public URL                            | Scalable, durable, globally accessible  | Requires cloud credentials and bucket setup; adds infra dependency    |
+| GitHub issue image upload      | Use GitHub's CDN by attaching images via the `github.com` upload flow | No external storage needed              | Not a stable public API for third parties; permissions are fragile    |
 
 **Chosen approach: self-hosted file server first, S3 upgrade path built-in.**
 
@@ -2665,11 +2665,11 @@ type S3Host struct {
 
 Configuration:
 
-| Env var | Purpose |
-| --- | --- |
+| Env var                 | Purpose                                                                   |
+| ----------------------- | ------------------------------------------------------------------------- |
 | `BAUER_STATIC_BASE_URL` | Base URL for the self-hosted file server (required for `LocalFileServer`) |
-| `BAUER_S3_BUCKET` | S3 bucket name (required for `S3Host`) |
-| `BAUER_S3_REGION` | S3 region (required for `S3Host`) |
+| `BAUER_S3_BUCKET`       | S3 bucket name (required for `S3Host`)                                    |
+| `BAUER_S3_REGION`       | S3 region (required for `S3Host`)                                         |
 
 If neither is set, the API falls back to the Stage 1 behavior (artifact paths in the issue body, with a warning in logs).
 
@@ -2786,20 +2786,20 @@ This is not a replacement for the standalone task breakdowns in each spec. It is
 
 ### Phase map
 
-| Phase | Source | Tasks                                               | What lands                                                                                             |
-| ----- | ------ | --------------------------------------------------- | ------------------------------------------------------------------------------------------------------ |
-| P0    | 001    | T0.1, T0.2, T0.2a, T0.2b, T0.2c, T0.3, T0.4, T0.5 | `agent.Agent` interface; `internal/source`; `internal/artifacts`; orchestrator decoupled; config cleanup |
-| P1    | 001    | T1.1, T1.2, T1.3                                    | CLI flags restored; dry-run fixed; Taskfile works                                                      |
-| P2    | 001    | T2.1, T2.2, T2.3                                    | `--open-pr`, `--open-issue`, mutual exclusion                                                          |
-| P3    | 002    | T2F.0, T2F.1, T2F.2, T2F.3, T2F.4                  | Dev setup verified; Figma URL parsing; auth; REST client; normalization                                |
-| P4    | 002    | T2F.5, T2F.6, T2F.7                                 | Mapping resolver; figma-aware chunked prompts; artifact writes                                         |
-| P5    | 002    | T2F.8, T2F.9                                        | CLI enrichment; issue mode with design context; MCP guidance                                           |
-| P6    | 001    | T3.0, T3.1, T3.2, T3.3, T3.4                        | Docker; env loading; routes; API cleanup                                                               |
-| P7    | 001    | T4.1, T4.2, T4.3                                    | `POST /api/v1/issues`; `GET /health/ready`; Jira webhook                                               |
-| P8    | 002    | T4F.1                                               | API accepts `figma_url`; full Figma pipeline server-side                                               |
-| P9    | 001    | T5.1, T5.2, T5.3                                    | GitHub App auth; OIDC middleware; secret masking                                                       |
-| P10   | 002    | T2F.10                                              | Drift detection; mapping cache reuse                                                                   |
-| P11   | 002    | T4F.2                                               | Server-side screenshot hosting; inline images in issues/PRs                                            |
+| Phase | Source | Tasks                                             | What lands                                                                                                      |
+| ----- | ------ | ------------------------------------------------- | --------------------------------------------------------------------------------------------------------------- |
+| P0    | 001    | T0.1, T0.2, T0.2a, T0.2b, T0.2c, T0.3, T0.4, T0.5 | `orchestrator.Agent` interface; `internal/source`; `internal/artifacts`; orchestrator decoupled; config cleanup |
+| P1    | 001    | T1.1, T1.2, T1.3                                  | CLI flags restored; dry-run fixed; Taskfile works                                                               |
+| P2    | 001    | T2.1, T2.2, T2.3                                  | `--open-pr`, `--open-issue`, mutual exclusion                                                                   |
+| P3    | 002    | T2F.0, T2F.1, T2F.2, T2F.3, T2F.4                 | Dev setup verified; Figma URL parsing; auth; REST client; normalization                                         |
+| P4    | 002    | T2F.5, T2F.6, T2F.7                               | Mapping resolver; figma-aware chunked prompts; artifact writes                                                  |
+| P5    | 002    | T2F.8, T2F.9                                      | CLI enrichment; issue mode with design context; MCP guidance                                                    |
+| P6    | 001    | T3.0, T3.1, T3.2, T3.3, T3.4                      | Docker; env loading; routes; API cleanup                                                                        |
+| P7    | 001    | T4.1, T4.2, T4.3                                  | `POST /api/v1/issues`; `GET /health/ready`; Jira webhook                                                        |
+| P8    | 002    | T4F.1                                             | API accepts `figma_url`; full Figma pipeline server-side                                                        |
+| P9    | 001    | T5.1, T5.2, T5.3                                  | GitHub App auth; OIDC middleware; secret masking                                                                |
+| P10   | 002    | T2F.10                                            | Drift detection; mapping cache reuse                                                                            |
+| P11   | 002    | T4F.2                                             | Server-side screenshot hosting; inline images in issues/PRs                                                     |
 
 ### Prerequisites before starting P3
 
@@ -2819,9 +2819,6 @@ After the foundation tasks (T0.1 – T0.5) are complete, the codebase shape is:
 
 ```text
 internal/
-  agent/
-    agent.go          ← Agent interface
-    mock.go           ← MockAgent for tests
   gdocs/              ← unchanged
   source/
     source.go         ← Adapter interface and Request type
@@ -2833,9 +2830,9 @@ internal/
     engine.go         ← PromptData with SuggestionsJSON (FigmaContextJSON not yet wired)
     types.go
     templates/
-  copilotcli/         ← unchanged; now implements agent.Agent
+  copilotcli/         ← unchanged; now implements orchestrator.Agent
   orchestrator/
-    orchestrator.go   ← calls source layer; no longer imports gdocs directly
+    orchestrator.go   ← defines Agent, calls source layer; no longer imports gdocs directly
   config/
     config.go
     cli.go
@@ -2844,6 +2841,8 @@ internal/
     auth.go
     pr.go
     repo.go
+  testutil/
+    mock.go           ← MockAgent for tests
 ```
 
 No figma or mapping packages yet. The orchestrator talks to `internal/source`, which still only has a gdocs adapter.
@@ -2855,7 +2854,7 @@ After CLI is fully restored and `--open-pr` / `--open-issue` are working (T2.1 
 ```text
 cmd/bauer/
   main.go   ← all flags restored; --open-pr and --open-issue implemented
-  
+
 (All other packages unchanged from after P0.)
 ```
 
@@ -2865,9 +2864,6 @@ This is the correct state to begin P3 (Figma work).
 
 ```text
 internal/
-  agent/
-    agent.go          ← Agent interface
-    mock.go           ← MockAgent for tests
   gdocs/              ← unchanged
   figma/
     client.go
diff --git a/internal/artifacts/manager.go b/internal/artifacts/manager.go
new file mode 100644
index 0000000..eb7ec52
--- /dev/null
+++ b/internal/artifacts/manager.go
@@ -0,0 +1,221 @@
+// Package artifacts manages append-only run artifact history. Each run gets a
+// timestamped directory under the configured artifacts dir. A runs.jsonl index
+// file receives one line per completed run and is never rewritten in full.
+package artifacts
+
+import (
+	"context"
+	"crypto/rand"
+	"encoding/json"
+	"fmt"
+	"log/slog"
+	"os"
+	"path/filepath"
+	"sync"
+	"time"
+)
+
+// RunMeta is written as one line in runs.jsonl after each completed run.
+type RunMeta struct {
+	RunID       string `json:"run_id"`
+	StartedAt   string `json:"started_at"`
+	CompletedAt string `json:"completed_at"`
+	Status      string `json:"status"` // "success" | "failed" | "in_progress"
+	DocID       string `json:"doc_id"`
+	FigmaURL    string `json:"figma_url"`
+	Mode        string `json:"mode"` // "execute" | "dry-run" | "issue"
+	ChunkCount  int    `json:"chunk_count"`
+	ArtifactDir string `json:"artifact_dir"`
+}
+
+// Manager handles writing run artifacts to the filesystem.
+type Manager struct {
+	base string
+	mu   sync.Mutex
+}
+
+// NewManager creates a Manager rooted at baseDir.
+func NewManager(baseDir string) *Manager {
+	return &Manager{base: baseDir}
+}
+
+// BaseDir returns the configured artifacts base directory.
+func (m *Manager) BaseDir() string {
+	return m.base
+}
+
+// NewRun creates a new timestamped run directory and returns the run ID.
+// It creates all standard subdirectories including extraction, prompts,
+// outputs, logs, and screenshots.
+func (m *Manager) NewRun(ctx context.Context) (string, error) {
+	_ = ctx
+
+	if err := os.MkdirAll(m.base, 0o755); err != nil {
+		return "", fmt.Errorf("create artifacts dir: %w", err)
+	}
+
+	now := time.Now().UTC()
+	// 8 random hex chars for collision resistance
+	b := make([]byte, 4)
+	if _, err := rand.Read(b); err != nil {
+		return "", fmt.Errorf("generate run id: %w", err)
+	}
+	runID := fmt.Sprintf("%s-%04x-%08x", now.Format("2006-01-02T15-04-05Z"), now.Nanosecond()/65536, b)
+	runDir := filepath.Join(m.base, runID)
+
+	// Check for collision (extremely unlikely with 32-bit rand + nanosecond)
+	if _, err := os.Stat(runDir); err == nil {
+		return "", fmt.Errorf("run directory already exists: %s (collision)", runID)
+	}
+
+	dirs := []string{
+		filepath.Join(runDir, "extraction"),
+		filepath.Join(runDir, "prompts"),
+		filepath.Join(runDir, "outputs"),
+		filepath.Join(runDir, "logs"),
+		filepath.Join(runDir, "screenshots"),
+	}
+	for _, d := range dirs {
+		if err := os.MkdirAll(d, 0o755); err != nil {
+			return "", fmt.Errorf("create %s: %w", d, err)
+		}
+	}
+
+	return runID, nil
+}
+
+// RunDir returns the path for a given run ID under the base directory.
+func (m *Manager) RunDir(runID string) string {
+	return filepath.Join(m.base, runID)
+}
+
+// AppendRun appends a completed run entry to runs.jsonl. The file is created
+// on first write and only ever appended to.
+func (m *Manager) AppendRun(meta RunMeta) error {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	if err := os.MkdirAll(m.base, 0o755); err != nil {
+		return fmt.Errorf("create artifacts dir: %w", err)
+	}
+
+	path := filepath.Join(m.base, "runs.jsonl")
+	f, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644)
+	if err != nil {
+		return fmt.Errorf("open runs.jsonl: %w", err)
+	}
+	defer f.Close()
+
+	line, err := json.Marshal(meta)
+	if err != nil {
+		return fmt.Errorf("marshal run meta: %w", err)
+	}
+
+	if _, err := f.Write(append(line, '\n')); err != nil {
+		return fmt.Errorf("write runs.jsonl: %w", err)
+	}
+
+	return nil
+}
+
+// WriteMetadata writes a metadata.json file inside the run directory.
+func (m *Manager) WriteMetadata(runID string, meta RunMeta) error {
+	return m.writeJSON(runID, "metadata.json", meta)
+}
+
+// WriteExtraction writes an extraction file to the run's extraction directory.
+func (m *Manager) WriteExtraction(runID string, name string, data any) error {
+	return m.writeJSON(runID, filepath.Join("extraction", name), data)
+}
+
+// WritePrompt writes a prompt file to the run's prompts directory.
+// name should be a simple filename (e.g. "chunk-1-of-3.md"), not a full path.
+func (m *Manager) WritePrompt(runID string, name string, content string) error {
+	runDir := filepath.Join(m.base, runID, "prompts")
+	if err := os.MkdirAll(runDir, 0o755); err != nil {
+		return fmt.Errorf("create prompts dir: %w", err)
+	}
+	path := filepath.Join(runDir, filepath.Base(name))
+	if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
+		return fmt.Errorf("write prompt %s: %w", name, err)
+	}
+	return nil
+}
+
+// WriteOutput writes an output file to the run's outputs directory.
+func (m *Manager) WriteOutput(runID string, name string, content string) error {
+	runDir := filepath.Join(m.base, runID, "outputs")
+	if err := os.MkdirAll(runDir, 0o755); err != nil {
+		return fmt.Errorf("create outputs dir: %w", err)
+	}
+	path := filepath.Join(runDir, name)
+	if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
+		return fmt.Errorf("write output %s: %w", name, err)
+	}
+	return nil
+}
+
+// LogEntry is a single structured log line written to execution.jsonl.
+type LogEntry struct {
+	Timestamp string `json:"timestamp"`
+	Step      string `json:"step"`
+	Message   string `json:"message"`
+}
+
+// AppendLog appends a structured log entry to the run's logs/execution.jsonl.
+func (m *Manager) AppendLog(runID string, entry LogEntry) error {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	runDir := filepath.Join(m.base, runID, "logs")
+	if err := os.MkdirAll(runDir, 0o755); err != nil {
+		return fmt.Errorf("create logs dir: %w", err)
+	}
+
+	path := filepath.Join(runDir, "execution.jsonl")
+	f, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644)
+	if err != nil {
+		return fmt.Errorf("open execution.jsonl: %w", err)
+	}
+	defer f.Close()
+
+	line, err := json.Marshal(entry)
+	if err != nil {
+		return fmt.Errorf("marshal log entry: %w", err)
+	}
+
+	if _, err := f.Write(append(line, '\n')); err != nil {
+		return fmt.Errorf("write execution.jsonl: %w", err)
+	}
+
+	return nil
+}
+
+// EnsureScreenshotsDir creates and returns the screenshots directory for a run.
+func (m *Manager) EnsureScreenshotsDir(runID string) (string, error) {
+	dir := filepath.Join(m.base, runID, "screenshots")
+	if err := os.MkdirAll(dir, 0o755); err != nil {
+		return "", fmt.Errorf("create screenshots dir: %w", err)
+	}
+	return dir, nil
+}
+
+// writeJSON is a helper to write any value as pretty-printed JSON to a run file.
+func (m *Manager) writeJSON(runID, relPath string, data any) error {
+	fullPath := filepath.Join(m.base, runID, relPath)
+	if err := os.MkdirAll(filepath.Dir(fullPath), 0o755); err != nil {
+		return fmt.Errorf("create dir for %s: %w", relPath, err)
+	}
+
+	raw, err := json.MarshalIndent(data, "", "  ")
+	if err != nil {
+		return fmt.Errorf("marshal %s: %w", relPath, err)
+	}
+
+	if err := os.WriteFile(fullPath, raw, 0o644); err != nil {
+		return fmt.Errorf("write %s: %w", relPath, err)
+	}
+
+	slog.Debug("Artifact written", "path", fullPath)
+	return nil
+}
diff --git a/internal/artifacts/manager_test.go b/internal/artifacts/manager_test.go
new file mode 100644
index 0000000..cc4d84f
--- /dev/null
+++ b/internal/artifacts/manager_test.go
@@ -0,0 +1,230 @@
+package artifacts
+
+import (
+	"bufio"
+	"context"
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+func TestNewRun_CreatesDirectoryStructure(t *testing.T) {
+	t.Parallel()
+
+	dir := t.TempDir()
+	mgr := NewManager(dir)
+
+	runID, err := mgr.NewRun(context.Background())
+	if err != nil {
+		t.Fatalf("NewRun() error = %v", err)
+	}
+
+	if runID == "" {
+		t.Fatal("runID is empty")
+	}
+
+	runDir := filepath.Join(dir, runID)
+	subdirs := []string{"extraction", "prompts", "outputs", "logs", "screenshots"}
+	for _, sub := range subdirs {
+		info, err := os.Stat(filepath.Join(runDir, sub))
+		if err != nil {
+			t.Fatalf("stat %s: %v", sub, err)
+		}
+		if !info.IsDir() {
+			t.Fatalf("%s is not a directory", sub)
+		}
+	}
+}
+
+func TestAppendRun_AppendOnly(t *testing.T) {
+	t.Parallel()
+
+	dir := t.TempDir()
+	mgr := NewManager(dir)
+
+	meta1 := RunMeta{
+		RunID:       "run-001",
+		StartedAt:   "2026-05-13T10:00:00Z",
+		CompletedAt: "2026-05-13T10:01:00Z",
+		Status:      "success",
+		DocID:       "doc-1",
+		Mode:        "execute",
+		ChunkCount:  3,
+		ArtifactDir: "bauer-artifacts/run-001",
+	}
+	meta2 := RunMeta{
+		RunID:       "run-002",
+		StartedAt:   "2026-05-13T11:00:00Z",
+		CompletedAt: "2026-05-13T11:02:00Z",
+		Status:      "failed",
+		DocID:       "doc-2",
+		Mode:        "dry-run",
+		ChunkCount:  0,
+		ArtifactDir: "bauer-artifacts/run-002",
+	}
+
+	if err := mgr.AppendRun(meta1); err != nil {
+		t.Fatalf("AppendRun() error = %v", err)
+	}
+	if err := mgr.AppendRun(meta2); err != nil {
+		t.Fatalf("AppendRun() error = %v", err)
+	}
+
+	// Read runs.jsonl and verify both entries exist
+	f, err := os.Open(filepath.Join(dir, "runs.jsonl"))
+	if err != nil {
+		t.Fatalf("open runs.jsonl: %v", err)
+	}
+	defer f.Close()
+
+	scanner := bufio.NewScanner(f)
+	var lines []string
+	for scanner.Scan() {
+		lines = append(lines, scanner.Text())
+	}
+
+	if len(lines) != 2 {
+		t.Fatalf("expected 2 lines in runs.jsonl, got %d", len(lines))
+	}
+
+	var parsed1, parsed2 RunMeta
+	if err := json.Unmarshal([]byte(lines[0]), &parsed1); err != nil {
+		t.Fatalf("parse line 1: %v", err)
+	}
+	if err := json.Unmarshal([]byte(lines[1]), &parsed2); err != nil {
+		t.Fatalf("parse line 2: %v", err)
+	}
+
+	if parsed1.RunID != "run-001" || parsed2.RunID != "run-002" {
+		t.Fatalf("unexpected run IDs: %q, %q", parsed1.RunID, parsed2.RunID)
+	}
+}
+
+func TestWriteMetadata(t *testing.T) {
+	t.Parallel()
+
+	dir := t.TempDir()
+	mgr := NewManager(dir)
+
+	meta := RunMeta{RunID: "run-001", Status: "success"}
+	if err := mgr.WriteMetadata("run-001", meta); err != nil {
+		t.Fatalf("WriteMetadata() error = %v", err)
+	}
+
+	// Verify the parent dir was created (NewRun wasn't called, so it should auto-create)
+	data, err := os.ReadFile(filepath.Join(dir, "run-001", "metadata.json"))
+	if err != nil {
+		t.Fatalf("read metadata.json: %v", err)
+	}
+
+	var parsed RunMeta
+	if err := json.Unmarshal(data, &parsed); err != nil {
+		t.Fatalf("parse metadata.json: %v", err)
+	}
+	if parsed.RunID != "run-001" {
+		t.Fatalf("RunID = %q, want %q", parsed.RunID, "run-001")
+	}
+}
+
+func TestWriteExtraction(t *testing.T) {
+	t.Parallel()
+
+	dir := t.TempDir()
+	mgr := NewManager(dir)
+
+	data := map[string]string{"key": "value"}
+	if err := mgr.WriteExtraction("run-001", "gdocs.json", data); err != nil {
+		t.Fatalf("WriteExtraction() error = %v", err)
+	}
+
+	raw, err := os.ReadFile(filepath.Join(dir, "run-001", "extraction", "gdocs.json"))
+	if err != nil {
+		t.Fatalf("read gdocs.json: %v", err)
+	}
+	if !strings.Contains(string(raw), "key") {
+		t.Fatalf("gdocs.json content doesn't contain 'key': %s", raw)
+	}
+}
+
+func TestWritePromptAndOutput(t *testing.T) {
+	t.Parallel()
+
+	dir := t.TempDir()
+	mgr := NewManager(dir)
+
+	if err := mgr.WritePrompt("run-001", "chunk-1-of-3.md", "prompt content"); err != nil {
+		t.Fatalf("WritePrompt() error = %v", err)
+	}
+	if err := mgr.WriteOutput("run-001", "chunk-1-output.md", "output content"); err != nil {
+		t.Fatalf("WriteOutput() error = %v", err)
+	}
+
+	promptContent, err := os.ReadFile(filepath.Join(dir, "run-001", "prompts", "chunk-1-of-3.md"))
+	if err != nil {
+		t.Fatalf("read prompt: %v", err)
+	}
+	if string(promptContent) != "prompt content" {
+		t.Fatalf("prompt = %q, want %q", promptContent, "prompt content")
+	}
+
+	outputContent, err := os.ReadFile(filepath.Join(dir, "run-001", "outputs", "chunk-1-output.md"))
+	if err != nil {
+		t.Fatalf("read output: %v", err)
+	}
+	if string(outputContent) != "output content" {
+		t.Fatalf("output = %q, want %q", outputContent, "output content")
+	}
+}
+
+func TestAppendLog(t *testing.T) {
+	t.Parallel()
+
+	dir := t.TempDir()
+	mgr := NewManager(dir)
+
+	entry := LogEntry{Timestamp: "2026-05-13T10:00:00Z", Step: "extraction", Message: "started"}
+	if err := mgr.AppendLog("run-001", entry); err != nil {
+		t.Fatalf("AppendLog() error = %v", err)
+	}
+
+	f, err := os.Open(filepath.Join(dir, "run-001", "logs", "execution.jsonl"))
+	if err != nil {
+		t.Fatalf("open execution.jsonl: %v", err)
+	}
+	defer f.Close()
+
+	scanner := bufio.NewScanner(f)
+	if !scanner.Scan() {
+		t.Fatal("execution.jsonl is empty")
+	}
+
+	var parsed LogEntry
+	if err := json.Unmarshal([]byte(scanner.Text()), &parsed); err != nil {
+		t.Fatalf("parse log line: %v", err)
+	}
+	if parsed.Step != "extraction" {
+		t.Fatalf("Step = %q, want %q", parsed.Step, "extraction")
+	}
+}
+
+func TestEnsureScreenshotsDir(t *testing.T) {
+	t.Parallel()
+
+	dir := t.TempDir()
+	mgr := NewManager(dir)
+
+	screenshotsDir, err := mgr.EnsureScreenshotsDir("run-001")
+	if err != nil {
+		t.Fatalf("EnsureScreenshotsDir() error = %v", err)
+	}
+
+	info, err := os.Stat(screenshotsDir)
+	if err != nil {
+		t.Fatalf("stat screenshots dir: %v", err)
+	}
+	if !info.IsDir() {
+		t.Fatal("screenshots path is not a directory")
+	}
+}
diff --git a/internal/config/cli.go b/internal/config/cli.go
index dc4f846..2bfd794 100644
--- a/internal/config/cli.go
+++ b/internal/config/cli.go
@@ -6,83 +6,113 @@ import (
 	"os"
 )
 
-// Load parses command-line flags and returns a validated Config.
-func Load() (*Config, error) {
-	// Define flags
-	// Note: We use a new FlagSet to facilitate testing if needed later,
-	// but for now relying on the default flag set is sufficient for the main entry point.
-	// To avoid conflicts if Load is called multiple times (e.g. in tests), we reset if needed,
-	// but standard `flag` usage usually assumes run once per process.
-
-	docID := flag.String("doc-id", "", "Google Doc ID to extract feedback from (required)")
-	credentialsPath := flag.String("credentials", "", "Path to service account JSON (required)")
-	configFile := flag.String("config", "", "Path to JSON config file")
-	dryRun := flag.Bool("dry-run", false, "Run extraction and planning only; skip Copilot and PR creation")
-	chunkSize := flag.Int("chunk-size", 0, "Total number of chunks to create (default: 1, or 5 if --page-refresh is set)")
-	pageRefresh := flag.Bool("page-refresh", false, "Use page refresh mode with page-refresh-instructions template (default chunk size: 5)")
-	outputDir := flag.String("output-dir", "bauer-output", "Directory for generated prompt files (default: bauer-output)")
-	model := flag.String("model", "gpt-5-mini-high", "Copilot model to use for sessions (default: gpt-5-mini-high)")
-	summaryModel := flag.String("summary-model", "gpt-5-mini-high", "Copilot model to use for summary session (default: gpt-5-mini-high)")
-	targetRepo := flag.String("target-repo", "", "Path to target repository where tasks should be executed (default: current directory)")
+// ParseCLIFlags parses command-line flags and returns a CLIFlags struct.
+// It does NOT validate — validation happens after the config resolver merges
+// all sources (flags, env vars, defaults).
+func ParseCLIFlags(args []string) (CLIFlags, error) {
+	fs := flag.NewFlagSet(os.Args[0], flag.ContinueOnError)
+	fs.SetOutput(os.Stderr)
+
+	var f CLIFlags
+
+	fs.StringVar(&f.DocID, "doc-id", "", "Google Doc ID to extract feedback from (required)")
+	fs.StringVar(&f.CredentialsPath, "credentials", "", "Path to service account JSON (required)")
+	fs.StringVar(&f.OutputDir, "output-dir", "", "Directory for generated prompt files (default: bauer-output)")
+	fs.StringVar(&f.Model, "model", "", "Copilot model to use for sessions (default: gpt-5-mini-high)")
+	fs.StringVar(&f.SummaryModel, "summary-model", "", "Copilot model to use for summary session (default: gpt-5-mini-high)")
+	fs.StringVar(&f.TargetRepo, "target-repo", "", "Path to target repository where tasks should be executed (default: current directory)")
+	fs.StringVar(&f.ArtifactsDir, "artifacts-dir", "", "Directory for run artifacts (default: ./bauer-artifacts)")
+	fs.StringVar(&f.BranchPrefix, "branch-prefix", "", "Branch name prefix for --open-pr (default: bauer)")
+	fs.StringVar(&f.FigmaURL, "figma-url", "", "Figma file URL for design context (optional)")
+
+	// Bool flags: we need to know if they were explicitly set.
+	// We use custom bool vars that track whether the flag was seen.
+	dryRunPtr := fs.Bool("dry-run", false, "In standalone mode: write chunk files only. In --open-pr mode: apply changes locally and skip PR creation")
+	pageRefreshPtr := fs.Bool("page-refresh", false, "Use page refresh mode with page-refresh-instructions template (default chunk size: 5)")
+	openPRPtr := fs.Bool("open-pr", false, "After applying changes, create a branch and open a PR. Mutually exclusive with --open-issue.")
+	openIssuePtr := fs.Bool("open-issue", false, "Skip Copilot, generate plan, open a GitHub issue instead. Mutually exclusive with --open-pr.")
+
+	fs.IntVar(&f.ChunkSize, "chunk-size", 0, "Total number of chunks to create (default: 1, or 5 if --page-refresh is set)")
 
 	// Custom usage message
-	flag.Usage = func() {
+	fs.Usage = func() {
 		fmt.Fprintf(os.Stderr, "Usage:\n\n")
 		fmt.Fprintf(os.Stderr, "\t%s --doc-id <doc-id> --credentials <path> [flags]\n\n", os.Args[0])
 		fmt.Fprintf(os.Stderr, "Flags:\n\n")
 
-		// Manually format flags
 		flags := []struct {
 			name string
 			typ  string
 			desc string
 		}{
-			{"--config", "<string>", "Path to JSON config file"},
 			{"--doc-id", "<string>", "Google Doc ID to extract feedback from (required)"},
 			{"--credentials", "<string>", "Path to service account JSON (required)"},
-			{"--dry-run", "", "Run extraction and planning only; skip Copilot and PR creation"},
+			{"--dry-run", "", "In standalone mode: write chunk files only. In --open-pr mode: apply changes locally and skip PR creation"},
 			{"--page-refresh", "", "Use page refresh mode with page-refresh-instructions template"},
 			{"--chunk-size", "<int>", "Total number of chunks to create (default: 1, or 5 if --page-refresh is set)"},
 			{"--output-dir", "<string>", "Directory for generated prompt files (default: bauer-output)"},
 			{"--model", "<string>", "Copilot model to use for sessions (default: gpt-5-mini-high)"},
 			{"--summary-model", "<string>", "Copilot model to use for summary session (default: gpt-5-mini-high)"},
 			{"--target-repo", "<string>", "Path to target repository where tasks should be executed (default: current directory)"},
+			{"--artifacts-dir", "<string>", "Directory for run artifacts (default: ./bauer-artifacts)"},
+			{"--branch-prefix", "<string>", "Branch name prefix for --open-pr (default: bauer)"},
+			{"--open-pr", "", "After applying changes, create a branch and open a PR"},
+			{"--open-issue", "", "Skip Copilot, generate plan, open a GitHub issue"},
+			{"--figma-url", "<string>", "Figma file URL for design context (optional)"},
 		}
 
-		for _, f := range flags {
-			if f.typ != "" {
-				fmt.Fprintf(os.Stderr, "\t%-25s %s\n", f.name+" "+f.typ, f.desc)
+		for _, fl := range flags {
+			if fl.typ != "" {
+				fmt.Fprintf(os.Stderr, "\t%-25s %s\n", fl.name+" "+fl.typ, fl.desc)
 			} else {
-				fmt.Fprintf(os.Stderr, "\t%-25s %s\n", f.name, f.desc)
+				fmt.Fprintf(os.Stderr, "\t%-25s %s\n", fl.name, fl.desc)
 			}
 		}
 
 		fmt.Fprintf(os.Stderr, "\nUse \"%s --help\" to display this message.\n\n", os.Args[0])
 	}
 
-	flag.Parse()
+	if err := fs.Parse(args); err != nil {
+		return CLIFlags{}, err
+	}
+
+	// Track whether bool flags were explicitly set
+	setFlags := make(map[string]bool)
+	fs.Visit(func(f *flag.Flag) {
+		setFlags[f.Name] = true
+	})
 
-	// If --config is provided, load from JSON file
-	if *configFile != "" {
-		return LoadFromJSONFile(*configFile)
+	if setFlags["dry-run"] {
+		f.DryRun = BoolPtr(*dryRunPtr)
+	}
+	if setFlags["page-refresh"] {
+		f.PageRefresh = BoolPtr(*pageRefreshPtr)
 	}
+	if setFlags["open-pr"] {
+		f.OpenPR = BoolPtr(*openPRPtr)
+	}
+	if setFlags["open-issue"] {
+		f.OpenIssue = BoolPtr(*openIssuePtr)
+	}
+
+	return f, nil
+}
 
-	// If no required flags are provided, show usage and exit
-	if *docID == "" && *credentialsPath == "" {
-		flag.Usage()
-		os.Exit(1)
+// Load is the legacy entry point. It parses flags and returns a Config.
+// Deprecated: use ParseCLIFlags + NewResolver for new code.
+func Load() (*Config, error) {
+	flags, err := ParseCLIFlags(os.Args[1:])
+	if err != nil {
+		return nil, err
 	}
 
-	cfg := &Config{
-		DocID:           *docID,
-		CredentialsPath: *credentialsPath,
-		DryRun:          *dryRun,
-		ChunkSize:       *chunkSize,
-		PageRefresh:     *pageRefresh,
-		OutputDir:       *outputDir,
-		Model:           *model,
-		SummaryModel:    *summaryModel,
-		TargetRepo:      *targetRepo,
+	cfg, err := NewResolver(
+		NewFlagsSource(flags),
+		NewEnvVarSource(),
+		NewDefaultsSource(),
+	).Resolve()
+	if err != nil {
+		return nil, err
 	}
 
 	if err := cfg.Validate(); err != nil {
diff --git a/internal/config/config.go b/internal/config/config.go
index 715fefa..011d82f 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -16,7 +16,8 @@ type Config struct {
 	CredentialsPath string `json:"credentials"`
 
 	// DryRun indicates if the tool should skip side-effect operations (Copilot CLI, PR creation).
-	DryRun bool `json:"dry_run"`
+	// Uses *bool so that explicit false overrides a default of true.
+	DryRun *bool `json:"dry_run"`
 
 	// ChunkSize is the total number of chunks to create from all locations.
 	// Default is 1 if not specified, or 5 if PageRefresh is true.
@@ -24,7 +25,8 @@ type Config struct {
 
 	// PageRefresh indicates if the page refresh mode should be used.
 	// When true, uses page-refresh-instructions.md template and defaults ChunkSize to 5.
-	PageRefresh bool `json:"page_refresh"`
+	// Uses *bool so that explicit false overrides a default of true.
+	PageRefresh *bool `json:"page_refresh"`
 
 	// OutputDir is the directory where generated prompt files will be saved.
 	// Default is "bauer-output" if not specified.
@@ -41,12 +43,34 @@ type Config struct {
 	// TargetRepo is the path (relative or absolute) to the target repository
 	// where tasks should be executed. If not specified, uses the current directory.
 	TargetRepo string `json:"target_repo"`
+
+	// ArtifactsDir is the directory for append-only run artifacts.
+	// Defaults to "./bauer-artifacts" if not specified.
+	ArtifactsDir string `json:"artifacts_dir"`
+
+	// BranchPrefix is the prefix for generated branch names.
+	// Defaults to "bauer".
+	BranchPrefix string `json:"branch_prefix"`
+
+	// OpenPR indicates that after applying changes, a branch should be created
+	// and a PR opened. Mutually exclusive with OpenIssue.
+	OpenPR *bool `json:"open_pr"`
+
+	// OpenIssue indicates that Copilot should be skipped and a GitHub issue
+	// should be opened with the implementation plan. Mutually exclusive with OpenPR.
+	OpenIssue *bool `json:"open_issue"`
+
+	// FigmaURL is an optional Figma file URL for design context.
+	FigmaURL string `json:"figma_url"`
+
+	// FigmaToken is the Figma personal access token.
+	FigmaToken string `json:"figma_token"`
 }
 
 // Apply default config values
 func (c *Config) ApplyDefaults() {
 	if c.ChunkSize == 0 {
-		if c.PageRefresh {
+		if BoolVal(c.PageRefresh, false) {
 			c.ChunkSize = 5
 		} else {
 			c.ChunkSize = 1
@@ -61,6 +85,12 @@ func (c *Config) ApplyDefaults() {
 	if c.SummaryModel == "" {
 		c.SummaryModel = "gpt-5-mini-high"
 	}
+	if c.ArtifactsDir == "" {
+		c.ArtifactsDir = "bauer-artifacts"
+	}
+	if c.BranchPrefix == "" {
+		c.BranchPrefix = "bauer"
+	}
 }
 
 // Validate checks if the configuration is valid.
diff --git a/internal/config/config_test.go b/internal/config/config_test.go
index 22e95c7..08d870c 100644
--- a/internal/config/config_test.go
+++ b/internal/config/config_test.go
@@ -10,7 +10,16 @@ func TestConfig_Validate(t *testing.T) {
 	// Create a temporary file to act as a valid credentials file
 	tmpDir := t.TempDir()
 	validCredsFile := filepath.Join(tmpDir, "creds.json")
-	if err := os.WriteFile(validCredsFile, []byte("{}"), 0644); err != nil {
+	if err := os.WriteFile(validCredsFile, []byte(`{
+  "type": "service_account",
+  "project_id": "test-project",
+  "private_key_id": "test-private-key-id",
+  "private_key": "-----BEGIN PRIVATE KEY-----\ntest\n-----END PRIVATE KEY-----\n",
+  "client_email": "bauer@test-project.iam.gserviceaccount.com",
+  "client_id": "1234567890",
+  "auth_uri": "https://accounts.google.com/o/oauth2/auth",
+  "token_uri": "https://oauth2.googleapis.com/token"
+}`), 0644); err != nil {
 		t.Fatalf("Failed to create temp creds file: %v", err)
 	}
 
@@ -126,7 +135,16 @@ func TestChunkSizeDefaults(t *testing.T) {
 	// Create a temporary file to act as a valid credentials file
 	tmpDir := t.TempDir()
 	validCredsFile := filepath.Join(tmpDir, "creds.json")
-	if err := os.WriteFile(validCredsFile, []byte("{}"), 0644); err != nil {
+	if err := os.WriteFile(validCredsFile, []byte(`{
+  "type": "service_account",
+  "project_id": "test-project",
+  "private_key_id": "test-private-key-id",
+  "private_key": "-----BEGIN PRIVATE KEY-----\ntest\n-----END PRIVATE KEY-----\n",
+  "client_email": "bauer@test-project.iam.gserviceaccount.com",
+  "client_id": "1234567890",
+  "auth_uri": "https://accounts.google.com/o/oauth2/auth",
+  "token_uri": "https://oauth2.googleapis.com/token"
+}`), 0644); err != nil {
 		t.Fatalf("Failed to create temp creds file: %v", err)
 	}
 
@@ -183,7 +201,7 @@ func TestChunkSizeDefaults(t *testing.T) {
 				DocID:           "test-doc-id",
 				CredentialsPath: validCredsFile,
 				ChunkSize:       effectiveChunkSize,
-				PageRefresh:     tt.pageRefreshFlag,
+				PageRefresh:     BoolPtr(tt.pageRefreshFlag),
 				OutputDir:       "bauer-output",
 				Model:           "gpt-5-mini-high",
 				SummaryModel:    "gpt-5-mini-high",
diff --git a/internal/config/json.go b/internal/config/json.go
deleted file mode 100644
index acfdb2f..0000000
--- a/internal/config/json.go
+++ /dev/null
@@ -1,26 +0,0 @@
-package config
-
-import (
-	"encoding/json"
-	"os"
-)
-
-func LoadFromJSON(data []byte) (*Config, error) {
-	var cfg Config
-	if err := json.Unmarshal(data, &cfg); err != nil {
-		return nil, err
-	}
-	if err := cfg.Validate(); err != nil {
-		return nil, err
-	}
-
-	return &cfg, nil
-}
-
-func LoadFromJSONFile(filePath string) (*Config, error) {
-	data, err := os.ReadFile(filePath)
-	if err != nil {
-		return nil, err
-	}
-	return LoadFromJSON(data)
-}
diff --git a/internal/config/manager.go b/internal/config/manager.go
new file mode 100644
index 0000000..f4d0f53
--- /dev/null
+++ b/internal/config/manager.go
@@ -0,0 +1,226 @@
+package config
+
+import (
+	"fmt"
+	"os"
+	"strconv"
+)
+
+// Source provides a partial Config from a single input (env vars, flags, defaults).
+// Fields not provided by this source should be zero-valued.
+type Source interface {
+	Load() (*Config, error)
+}
+
+// Resolver merges multiple Sources in priority order.
+// First source listed = highest priority.
+type Resolver struct {
+	sources []Source
+}
+
+// NewResolver creates a Resolver. List sources highest-priority first.
+// Typical CLI order: NewFlagsSource(flags), NewEnvVarSource(), NewDefaultsSource()
+// Typical API order: NewRequestSource(req), NewEnvVarSource(), NewDefaultsSource()
+func NewResolver(sources ...Source) *Resolver {
+	return &Resolver{sources: sources}
+}
+
+// Resolve merges all sources and returns the final Config.
+func (r *Resolver) Resolve() (*Config, error) {
+	result := &Config{}
+	// Apply sources lowest-priority first, higher priority overwrites
+	for i := len(r.sources) - 1; i >= 0; i-- {
+		partial, err := r.sources[i].Load()
+		if err != nil {
+			return nil, fmt.Errorf("config source %d: %w", i, err)
+		}
+		mergeConfig(result, partial)
+	}
+	return result, nil
+}
+
+func mergeConfig(base, override *Config) {
+	if override.DocID != "" {
+		base.DocID = override.DocID
+	}
+	if override.CredentialsPath != "" {
+		base.CredentialsPath = override.CredentialsPath
+	}
+	if override.Model != "" {
+		base.Model = override.Model
+	}
+	if override.SummaryModel != "" {
+		base.SummaryModel = override.SummaryModel
+	}
+	if override.OutputDir != "" {
+		base.OutputDir = override.OutputDir
+	}
+	if override.BranchPrefix != "" {
+		base.BranchPrefix = override.BranchPrefix
+	}
+	if override.ArtifactsDir != "" {
+		base.ArtifactsDir = override.ArtifactsDir
+	}
+	if override.ChunkSize != 0 {
+		base.ChunkSize = override.ChunkSize
+	}
+	if override.PageRefresh != nil {
+		base.PageRefresh = override.PageRefresh
+	}
+	if override.DryRun != nil {
+		base.DryRun = override.DryRun
+	}
+	if override.OpenPR != nil {
+		base.OpenPR = override.OpenPR
+	}
+	if override.OpenIssue != nil {
+		base.OpenIssue = override.OpenIssue
+	}
+	if override.FigmaURL != "" {
+		base.FigmaURL = override.FigmaURL
+	}
+	if override.FigmaToken != "" {
+		base.FigmaToken = override.FigmaToken
+	}
+	if override.TargetRepo != "" {
+		base.TargetRepo = override.TargetRepo
+	}
+}
+
+// BoolPtr returns a pointer to b. Use in Source.Load() to explicitly set a bool field.
+func BoolPtr(b bool) *bool { return &b }
+
+// BoolVal safely dereferences a *bool, returning def when the pointer is nil.
+func BoolVal(p *bool, def bool) bool {
+	if p == nil {
+		return def
+	}
+	return *p
+}
+
+// EnvVarSource reads all BAUER_* env vars.
+type EnvVarSource struct{}
+
+func NewEnvVarSource() *EnvVarSource { return &EnvVarSource{} }
+
+func (e *EnvVarSource) Load() (*Config, error) {
+	cfg := &Config{}
+	if v := os.Getenv("BAUER_CREDENTIALS_PATH"); v != "" {
+		cfg.CredentialsPath = v
+	} else if v := os.Getenv("GOOGLE_APPLICATION_CREDENTIALS"); v != "" {
+		cfg.CredentialsPath = v
+	}
+	cfg.DocID = os.Getenv("BAUER_DOC_ID")
+	cfg.Model = os.Getenv("BAUER_MODEL")
+	cfg.SummaryModel = os.Getenv("BAUER_SUMMARY_MODEL")
+	cfg.OutputDir = os.Getenv("BAUER_OUTPUT_DIR")
+	cfg.BranchPrefix = os.Getenv("BAUER_BRANCH_PREFIX")
+	cfg.ArtifactsDir = os.Getenv("BAUER_ARTIFACTS_DIR")
+	cfg.FigmaURL = os.Getenv("BAUER_FIGMA_URL")
+	cfg.FigmaToken = os.Getenv("BAUER_FIGMA_TOKEN")
+	if cfg.FigmaToken == "" {
+		cfg.FigmaToken = os.Getenv("FIGMA_TOKEN")
+	}
+	cfg.TargetRepo = os.Getenv("BAUER_TARGET_REPO")
+	if v := os.Getenv("BAUER_CHUNK_SIZE"); v != "" {
+		cs, err := strconv.Atoi(v)
+		if err != nil {
+			return nil, fmt.Errorf("invalid value for BAUER_CHUNK_SIZE=%q: %w", v, err)
+		}
+		cfg.ChunkSize = cs
+	}
+	if v := os.Getenv("BAUER_PAGE_REFRESH"); v != "" {
+		b, err := strconv.ParseBool(v)
+		if err != nil {
+			return nil, fmt.Errorf("invalid value for BAUER_PAGE_REFRESH=%q: %w", v, err)
+		}
+		cfg.PageRefresh = &b
+	}
+	if v := os.Getenv("BAUER_DRY_RUN"); v != "" {
+		b, err := strconv.ParseBool(v)
+		if err != nil {
+			return nil, fmt.Errorf("invalid value for BAUER_DRY_RUN=%q: %w", v, err)
+		}
+		cfg.DryRun = &b
+	}
+	if v := os.Getenv("BAUER_OPEN_PR"); v != "" {
+		b, err := strconv.ParseBool(v)
+		if err != nil {
+			return nil, fmt.Errorf("invalid value for BAUER_OPEN_PR=%q: %w", v, err)
+		}
+		cfg.OpenPR = &b
+	}
+	if v := os.Getenv("BAUER_OPEN_ISSUE"); v != "" {
+		b, err := strconv.ParseBool(v)
+		if err != nil {
+			return nil, fmt.Errorf("invalid value for BAUER_OPEN_ISSUE=%q: %w", v, err)
+		}
+		cfg.OpenIssue = &b
+	}
+	return cfg, nil
+}
+
+// DefaultsSource provides hardcoded fallback values.
+type DefaultsSource struct{}
+
+func NewDefaultsSource() *DefaultsSource { return &DefaultsSource{} }
+
+func (d *DefaultsSource) Load() (*Config, error) {
+	return &Config{
+		Model:        "gpt-5-mini-high",
+		SummaryModel: "gpt-5-mini-high",
+		ChunkSize:    1,
+		OutputDir:    "bauer-output",
+		BranchPrefix: "bauer",
+		ArtifactsDir: "bauer-artifacts",
+		PageRefresh:  BoolPtr(false),
+		DryRun:       BoolPtr(false),
+	}, nil
+}
+
+// FlagsSource wraps parsed CLI flags.
+type FlagsSource struct {
+	flags CLIFlags
+}
+
+// CLIFlags holds all CLI flag values. Used by FlagsSource.
+type CLIFlags struct {
+	DocID           string
+	CredentialsPath string
+	DryRun          *bool
+	ChunkSize       int
+	PageRefresh     *bool
+	OutputDir       string
+	Model           string
+	SummaryModel    string
+	TargetRepo      string
+	ArtifactsDir    string
+	BranchPrefix    string
+	OpenPR          *bool
+	OpenIssue       *bool
+	FigmaURL        string
+}
+
+func NewFlagsSource(f CLIFlags) *FlagsSource {
+	return &FlagsSource{flags: f}
+}
+
+func (f *FlagsSource) Load() (*Config, error) {
+	cfg := &Config{
+		DocID:           f.flags.DocID,
+		CredentialsPath: f.flags.CredentialsPath,
+		ChunkSize:       f.flags.ChunkSize,
+		OutputDir:       f.flags.OutputDir,
+		Model:           f.flags.Model,
+		SummaryModel:    f.flags.SummaryModel,
+		TargetRepo:      f.flags.TargetRepo,
+		ArtifactsDir:    f.flags.ArtifactsDir,
+		BranchPrefix:    f.flags.BranchPrefix,
+		FigmaURL:        f.flags.FigmaURL,
+		DryRun:          f.flags.DryRun,
+		PageRefresh:     f.flags.PageRefresh,
+		OpenPR:          f.flags.OpenPR,
+		OpenIssue:       f.flags.OpenIssue,
+	}
+	return cfg, nil
+}
diff --git a/internal/config/manager_test.go b/internal/config/manager_test.go
new file mode 100644
index 0000000..f9b9f1e
--- /dev/null
+++ b/internal/config/manager_test.go
@@ -0,0 +1,260 @@
+package config
+
+import (
+	"os"
+	"strings"
+	"testing"
+)
+
+func TestResolver_Precedence(t *testing.T) {
+	t.Run("flag overrides env var", func(t *testing.T) {
+		t.Setenv("BAUER_DOC_ID", "env-doc")
+		resolver := NewResolver(
+			NewFlagsSource(CLIFlags{DocID: "flag-doc"}),
+			NewEnvVarSource(),
+			NewDefaultsSource(),
+		)
+		cfg, err := resolver.Resolve()
+		if err != nil {
+			t.Fatalf("Resolve() error = %v", err)
+		}
+		if cfg.DocID != "flag-doc" {
+			t.Errorf("DocID = %q, want %q", cfg.DocID, "flag-doc")
+		}
+	})
+
+	t.Run("env var overrides default", func(t *testing.T) {
+		t.Setenv("BAUER_MODEL", "env-model")
+		resolver := NewResolver(
+			NewEnvVarSource(),
+			NewDefaultsSource(),
+		)
+		cfg, err := resolver.Resolve()
+		if err != nil {
+			t.Fatalf("Resolve() error = %v", err)
+		}
+		if cfg.Model != "env-model" {
+			t.Errorf("Model = %q, want %q", cfg.Model, "env-model")
+		}
+	})
+
+	t.Run("flag overrides default", func(t *testing.T) {
+		resolver := NewResolver(
+			NewFlagsSource(CLIFlags{DocID: "flag-doc"}),
+			NewDefaultsSource(),
+		)
+		cfg, err := resolver.Resolve()
+		if err != nil {
+			t.Fatalf("Resolve() error = %v", err)
+		}
+		if cfg.DocID != "flag-doc" {
+			t.Errorf("DocID = %q, want %q", cfg.DocID, "flag-doc")
+		}
+	})
+
+	t.Run("zero value does not override lower-priority non-zero", func(t *testing.T) {
+		resolver := NewResolver(
+			NewFlagsSource(CLIFlags{DocID: ""}), // empty flag
+			NewDefaultsSource(),
+		)
+		cfg, err := resolver.Resolve()
+		if err != nil {
+			t.Fatalf("Resolve() error = %v", err)
+		}
+		// DefaultsSource does not set DocID, so it stays empty
+		if cfg.DocID != "" {
+			t.Errorf("DocID = %q, want empty", cfg.DocID)
+		}
+	})
+
+	t.Run("default values are applied", func(t *testing.T) {
+		resolver := NewResolver(
+			NewDefaultsSource(),
+		)
+		cfg, err := resolver.Resolve()
+		if err != nil {
+			t.Fatalf("Resolve() error = %v", err)
+		}
+		if cfg.Model != "gpt-5-mini-high" {
+			t.Errorf("Model = %q, want %q", cfg.Model, "gpt-5-mini-high")
+		}
+		if cfg.ChunkSize != 1 {
+			t.Errorf("ChunkSize = %d, want %d", cfg.ChunkSize, 1)
+		}
+		if cfg.OutputDir != "bauer-output" {
+			t.Errorf("OutputDir = %q, want %q", cfg.OutputDir, "bauer-output")
+		}
+		if cfg.BranchPrefix != "bauer" {
+			t.Errorf("BranchPrefix = %q, want %q", cfg.BranchPrefix, "bauer")
+		}
+		if cfg.ArtifactsDir != "bauer-artifacts" {
+			t.Errorf("ArtifactsDir = %q, want %q", cfg.ArtifactsDir, "bauer-artifacts")
+		}
+	})
+}
+
+func TestResolver_BooleanOverride(t *testing.T) {
+	t.Run("explicit false flag overrides default true", func(t *testing.T) {
+		// If defaults had DryRun=true (they don't, but testing override semantics)
+		resolver := NewResolver(
+			NewFlagsSource(CLIFlags{DryRun: BoolPtr(false)}),
+			NewDefaultsSource(),
+		)
+		cfg, err := resolver.Resolve()
+		if err != nil {
+			t.Fatalf("Resolve() error = %v", err)
+		}
+		if cfg.DryRun == nil || *cfg.DryRun != false {
+			t.Errorf("DryRun = %v, want false", cfg.DryRun)
+		}
+	})
+
+	t.Run("nil bool falls through to default", func(t *testing.T) {
+		resolver := NewResolver(
+			NewFlagsSource(CLIFlags{}), // no bool flags set
+			NewDefaultsSource(),
+		)
+		cfg, err := resolver.Resolve()
+		if err != nil {
+			t.Fatalf("Resolve() error = %v", err)
+		}
+		if cfg.DryRun == nil || *cfg.DryRun != false {
+			t.Errorf("DryRun = %v, want default false", cfg.DryRun)
+		}
+		if cfg.PageRefresh == nil || *cfg.PageRefresh != false {
+			t.Errorf("PageRefresh = %v, want default false", cfg.PageRefresh)
+		}
+	})
+}
+
+func TestEnvVarSource_Load(t *testing.T) {
+	t.Run("reads BAUER_* vars", func(t *testing.T) {
+		t.Setenv("BAUER_DOC_ID", "my-doc")
+		t.Setenv("BAUER_MODEL", "custom-model")
+		t.Setenv("BAUER_CHUNK_SIZE", "3")
+		t.Setenv("BAUER_DRY_RUN", "true")
+
+		src := NewEnvVarSource()
+		cfg, err := src.Load()
+		if err != nil {
+			t.Fatalf("Load() error = %v", err)
+		}
+		if cfg.DocID != "my-doc" {
+			t.Errorf("DocID = %q, want %q", cfg.DocID, "my-doc")
+		}
+		if cfg.Model != "custom-model" {
+			t.Errorf("Model = %q, want %q", cfg.Model, "custom-model")
+		}
+		if cfg.ChunkSize != 3 {
+			t.Errorf("ChunkSize = %d, want %d", cfg.ChunkSize, 3)
+		}
+		if cfg.DryRun == nil || *cfg.DryRun != true {
+			t.Errorf("DryRun = %v, want true", cfg.DryRun)
+		}
+	})
+
+	t.Run("GOOGLE_APPLICATION_CREDENTIALS fallback", func(t *testing.T) {
+		os.Unsetenv("BAUER_CREDENTIALS_PATH")
+		t.Setenv("GOOGLE_APPLICATION_CREDENTIALS", "/fallback/creds.json")
+
+		src := NewEnvVarSource()
+		cfg, err := src.Load()
+		if err != nil {
+			t.Fatalf("Load() error = %v", err)
+		}
+		if cfg.CredentialsPath != "/fallback/creds.json" {
+			t.Errorf("CredentialsPath = %q, want %q", cfg.CredentialsPath, "/fallback/creds.json")
+		}
+	})
+
+	t.Run("BAUER_CREDENTIALS_PATH takes priority over GOOGLE_APPLICATION_CREDENTIALS", func(t *testing.T) {
+		t.Setenv("BAUER_CREDENTIALS_PATH", "/primary/creds.json")
+		t.Setenv("GOOGLE_APPLICATION_CREDENTIALS", "/fallback/creds.json")
+
+		src := NewEnvVarSource()
+		cfg, err := src.Load()
+		if err != nil {
+			t.Fatalf("Load() error = %v", err)
+		}
+		if cfg.CredentialsPath != "/primary/creds.json" {
+			t.Errorf("CredentialsPath = %q, want %q", cfg.CredentialsPath, "/primary/creds.json")
+		}
+	})
+
+	t.Run("ParseBool accepts TRUE and 1", func(t *testing.T) {
+		for _, val := range []string{"TRUE", "1", "True", "true"} {
+			t.Run(val, func(t *testing.T) {
+				t.Setenv("BAUER_DRY_RUN", val)
+				src := NewEnvVarSource()
+				cfg, err := src.Load()
+				if err != nil {
+					t.Fatalf("Load() error = %v", err)
+				}
+				if cfg.DryRun == nil || *cfg.DryRun != true {
+					t.Errorf("DryRun = %v, want true for %q", cfg.DryRun, val)
+				}
+			})
+		}
+	})
+
+	t.Run("ParseBool rejects invalid values", func(t *testing.T) {
+		t.Setenv("BAUER_DRY_RUN", "yes")
+		src := NewEnvVarSource()
+		_, err := src.Load()
+		if err == nil {
+			t.Fatal("expected error for invalid bool value")
+		}
+		if !strings.Contains(err.Error(), "BAUER_DRY_RUN") {
+			t.Errorf("error should mention env var name: %v", err)
+		}
+	})
+}
+
+func TestFlagsSource_Load(t *testing.T) {
+	src := NewFlagsSource(CLIFlags{
+		DocID:           "flag-doc",
+		CredentialsPath: "/flag/creds.json",
+		DryRun:          BoolPtr(true),
+		ChunkSize:       7,
+	})
+	cfg, err := src.Load()
+	if err != nil {
+		t.Fatalf("Load() error = %v", err)
+	}
+	if cfg.DocID != "flag-doc" {
+		t.Errorf("DocID = %q, want %q", cfg.DocID, "flag-doc")
+	}
+	if cfg.CredentialsPath != "/flag/creds.json" {
+		t.Errorf("CredentialsPath = %q, want %q", cfg.CredentialsPath, "/flag/creds.json")
+	}
+	if cfg.DryRun == nil || *cfg.DryRun != true {
+		t.Errorf("DryRun = %v, want true", cfg.DryRun)
+	}
+	if cfg.ChunkSize != 7 {
+		t.Errorf("ChunkSize = %d, want %d", cfg.ChunkSize, 7)
+	}
+}
+
+func TestConfig_Validate_MissingFields(t *testing.T) {
+	t.Run("missing doc-id", func(t *testing.T) {
+		cfg := &Config{}
+		err := cfg.Validate()
+		if err == nil {
+			t.Fatal("expected error for missing doc-id")
+		}
+		if !strings.Contains(err.Error(), "doc_id") {
+			t.Errorf("error message should mention doc_id: %v", err)
+		}
+	})
+
+	t.Run("missing credentials", func(t *testing.T) {
+		cfg := &Config{DocID: "my-doc"}
+		err := cfg.Validate()
+		if err == nil {
+			t.Fatal("expected error for missing credentials")
+		}
+		if !strings.Contains(err.Error(), "credentials") {
+			t.Errorf("error message should mention credentials: %v", err)
+		}
+	})
+}
diff --git a/internal/copilotcli/client.go b/internal/copilotcli/client.go
index beb30b5..ba83ba2 100644
--- a/internal/copilotcli/client.go
+++ b/internal/copilotcli/client.go
@@ -50,7 +50,8 @@ func formatSummaryOutput(text string) string {
 	return text
 }
 
-// Client wraps the GitHub Copilot SDK client
+// Client wraps the GitHub Copilot SDK client.
+// It satisfies the orchestrator.Agent interface implicitly.
 type Client struct {
 	client *copilot.Client
 	cwd    string
@@ -80,7 +81,9 @@ func NewClient(cwd string) (*Client, error) {
 }
 
 // Start starts the Copilot CLI server
-func (c *Client) Start() error {
+func (c *Client) Start(ctx context.Context) error {
+	_ = ctx
+
 	slog.Info("Starting Copilot client...")
 	if err := c.client.Start(); err != nil {
 		return fmt.Errorf("failed to start Copilot client: %w", err)
@@ -251,15 +254,8 @@ func (c *Client) ExecuteChunk(ctx context.Context, chunkPath string, chunkNumber
 	}
 }
 
-// ChunkOutput represents output from a chunk execution
-type ChunkOutput struct {
-	ChunkNumber int
-	Output      string
-	Duration    time.Duration
-}
-
 // GenerateSummary creates a summary session with all chunk outputs
-func (c *Client) GenerateSummary(ctx context.Context, outputs []ChunkOutput, model string) error {
+func (c *Client) GenerateSummary(ctx context.Context, outputs []string, model string) (string, error) {
 	slog.Info("Creating summary session", slog.String("model", model))
 
 	// Create a session with streaming enabled
@@ -268,7 +264,7 @@ func (c *Client) GenerateSummary(ctx context.Context, outputs []ChunkOutput, mod
 		Streaming: true,
 	})
 	if err != nil {
-		return fmt.Errorf("failed to create summary session: %w", err)
+		return "", fmt.Errorf("failed to create summary session: %w", err)
 	}
 	defer func() {
 		if err := session.Destroy(); err != nil {
@@ -278,30 +274,33 @@ func (c *Client) GenerateSummary(ctx context.Context, outputs []ChunkOutput, mod
 
 	// Set up event handler
 	done := make(chan error, 1)
+	var fullSummary string
 
 	session.On(func(event copilot.SessionEvent) {
 		switch event.Type {
 		case "assistant.message_delta":
 			if event.Data.DeltaContent != nil {
 				fmt.Print(formatSummaryOutput(*event.Data.DeltaContent))
+				fullSummary += *event.Data.DeltaContent
 			}
 
 		case "assistant.reasoning_delta":
 			if event.Data.DeltaContent != nil {
 				fmt.Print(formatCopilotDim(*event.Data.DeltaContent))
+				fullSummary += *event.Data.DeltaContent
 			}
 
 		case "assistant.message":
-			// Print final message in yellow for summary
 			if event.Data.Content != nil {
 				fmt.Println(formatSummaryOutput(*event.Data.Content))
+				fullSummary += *event.Data.Content
 				slog.Debug("Summary response", slog.String("content", *event.Data.Content))
 			}
 
 		case "assistant.reasoning":
-			// Print reasoning in dimmed style
 			if event.Data.Content != nil {
 				fmt.Println(formatCopilotDim(*event.Data.Content))
+				fullSummary += *event.Data.Content
 				slog.Debug("Summary reasoning", slog.String("content", *event.Data.Content))
 			}
 
@@ -328,28 +327,28 @@ func (c *Client) GenerateSummary(ctx context.Context, outputs []ChunkOutput, mod
 		Prompt: summaryPrompt,
 	})
 	if err != nil {
-		return fmt.Errorf("failed to send summary message: %w", err)
+		return "", fmt.Errorf("failed to send summary message: %w", err)
 	}
 
 	// Wait for completion
 	select {
 	case err := <-done:
 		if err != nil {
-			return err
+			return "", err
 		}
 		fmt.Println() // Add newline after streaming output
-		return nil
+		return fullSummary, nil
 
 	case <-time.After(10 * time.Minute):
-		return fmt.Errorf("summary session timed out after 10 minutes")
+		return "", fmt.Errorf("summary session timed out after 10 minutes")
 
 	case <-ctx.Done():
-		return fmt.Errorf("summary session cancelled: %w", ctx.Err())
+		return "", fmt.Errorf("summary session cancelled: %w", ctx.Err())
 	}
 }
 
 // buildSummaryPrompt creates the prompt for the summary session
-func buildSummaryPrompt(outputs []ChunkOutput) string {
+func buildSummaryPrompt(outputs []string) string {
 	var prompt strings.Builder
 
 	prompt.WriteString("# Summary Task\n\n")
@@ -368,11 +367,10 @@ func buildSummaryPrompt(outputs []ChunkOutput) string {
 
 	prompt.WriteString("## Chunks Processed\n\n")
 
-	for _, output := range outputs {
-		fmt.Fprintf(&prompt, "### Chunk %d\n\n", output.ChunkNumber)
-		fmt.Fprintf(&prompt, "**Duration**: %s\n\n", output.Duration.Round(time.Millisecond))
+	for i, output := range outputs {
+		fmt.Fprintf(&prompt, "### Chunk %d\n\n", i+1)
 		prompt.WriteString("**Output**:\n```\n")
-		prompt.WriteString(output.Output)
+		prompt.WriteString(output)
 		prompt.WriteString("\n```\n\n")
 	}
 
diff --git a/internal/github/auth.go b/internal/github/auth.go
index 1cb6320..d880710 100644
--- a/internal/github/auth.go
+++ b/internal/github/auth.go
@@ -7,20 +7,20 @@ import (
 	"strings"
 )
 
-// GetGitHubToken retrieves a GitHub token from environment variables or gh CLI
+// GetGitHubToken retrieves a GitHub token from environment variables or gh CLI.
+// Resolution order: BAUER_GITHUB_TOKEN → GITHUB_TOKEN → GH_TOKEN → gh auth token
 func GetGitHubToken() (string, error) {
-	if token := os.Getenv("GITHUB_TOKEN"); token != "" {
-		return token, nil
-	}
-	if token := os.Getenv("GH_TOKEN"); token != "" {
-		return token, nil
+	for _, env := range []string{"BAUER_GITHUB_TOKEN", "GITHUB_TOKEN", "GH_TOKEN"} {
+		if token := os.Getenv(env); token != "" {
+			return token, nil
+		}
 	}
 
-	// Get token from gh CLI config
+	// Fall back to gh CLI
 	cmd := exec.Command("gh", "auth", "token")
 	output, err := cmd.CombinedOutput()
 	if err != nil {
-		return "", fmt.Errorf("failed to get GitHub token from gh CLI: %w", err)
+		return "", fmt.Errorf("no GitHub token found (tried BAUER_GITHUB_TOKEN, GITHUB_TOKEN, GH_TOKEN, and gh CLI): %w (run 'gh auth login' or set one of the env vars)", err)
 	}
 
 	token := strings.TrimSpace(string(output))
diff --git a/internal/github/cli.go b/internal/github/cli.go
new file mode 100644
index 0000000..5fd4d48
--- /dev/null
+++ b/internal/github/cli.go
@@ -0,0 +1,167 @@
+package github
+
+import (
+	"context"
+	"fmt"
+	"os/exec"
+	"strconv"
+	"strings"
+)
+
+// ReadRemoteFromGitConfig reads the origin URL from the git config in dir
+// and returns the parsed owner and repo name.
+func ReadRemoteFromGitConfig(ctx context.Context, dir string) (owner, repo string, err error) {
+	cmd := exec.CommandContext(ctx, "git", "-C", dir, "remote", "get-url", "origin")
+	out, err := cmd.Output()
+	if err != nil {
+		return "", "", fmt.Errorf("could not read git remote 'origin': %w (is this a git repo?)", err)
+	}
+	return parseRepoFromURL(strings.TrimSpace(string(out)))
+}
+
+func parseRepoFromURL(rawURL string) (owner, repo string, err error) {
+	// Handle HTTPS: https://github.com/owner/repo.git
+	if strings.HasPrefix(rawURL, "https://github.com/") {
+		parts := strings.TrimPrefix(rawURL, "https://github.com/")
+		parts = strings.TrimSuffix(parts, ".git")
+		segments := strings.Split(parts, "/")
+		if len(segments) >= 2 {
+			return segments[0], segments[1], nil
+		}
+	}
+	// Handle SSH: git@github.com:owner/repo.git
+	if strings.HasPrefix(rawURL, "git@github.com:") {
+		parts := strings.TrimPrefix(rawURL, "git@github.com:")
+		parts = strings.TrimSuffix(parts, ".git")
+		segments := strings.Split(parts, "/")
+		if len(segments) >= 2 {
+			return segments[0], segments[1], nil
+		}
+	}
+	return "", "", fmt.Errorf("could not parse owner/repo from remote URL: %s", rawURL)
+}
+
+// CreateBranchFromDefault stashes any current changes, checks out the repo's
+// default branch, pulls latest, creates a new branch, and pops the stash.
+// Note: this helper intentionally does NOT commit or push — those are separate
+// operations (see CommitChanges and PushBranch) so that callers can inspect
+// the working tree after the branch is created but before committing.
+func CreateBranchFromDefault(ctx context.Context, dir, branchName string) error {
+	// Capture stash state before and after so we only pop when we actually
+	// created a stash entry. "git stash push" exits 0 even when there are
+	// no local changes ("No local changes to save"), so stashErr == nil is
+	// not sufficient to know a stash entry was created.
+	before, err := countStashEntries(ctx, dir)
+	if err != nil {
+		return fmt.Errorf("count stash entries before: %w", err)
+	}
+
+	stashCmd := exec.CommandContext(ctx, "git", "-C", dir, "stash", "push", "--include-untracked", "-m", "bauer-auto-stash")
+	if out, err := stashCmd.CombinedOutput(); err != nil {
+		// If stash itself fails, we can't safely proceed because checkout
+		// may overwrite untracked files. Bail out early.
+		return fmt.Errorf("git stash failed: %w, output: %s", err, out)
+	}
+
+	after, err := countStashEntries(ctx, dir)
+	if err != nil {
+		return fmt.Errorf("count stash entries after: %w", err)
+	}
+	stashed := after > before
+
+	// Checkout default branch
+	defaultBranch := getDefaultBranch(dir)
+	cmd := exec.CommandContext(ctx, "git", "-C", dir, "checkout", defaultBranch)
+	if output, err := cmd.CombinedOutput(); err != nil {
+		if stashed {
+			_ = popStash(ctx, dir)
+		}
+		return fmt.Errorf("could not checkout %s: %w, output: %s", defaultBranch, err, output)
+	}
+
+	// Pull latest (non-fatal)
+	_, _ = exec.CommandContext(ctx, "git", "-C", dir, "pull", "origin", defaultBranch).CombinedOutput()
+
+	// Create new branch
+	cmd = exec.CommandContext(ctx, "git", "-C", dir, "checkout", "-b", branchName)
+	if output, err := cmd.CombinedOutput(); err != nil {
+		if stashed {
+			_ = popStash(ctx, dir)
+		}
+		return fmt.Errorf("could not create branch %s: %w, output: %s", branchName, err, output)
+	}
+
+	// Pop stash to restore changes onto the new branch
+	if stashed {
+		if err := popStash(ctx, dir); err != nil {
+			return fmt.Errorf("could not restore stashed changes: %w", err)
+		}
+	}
+
+	return nil
+}
+
+func countStashEntries(ctx context.Context, dir string) (int, error) {
+	cmd := exec.CommandContext(ctx, "git", "-C", dir, "stash", "list")
+	out, err := cmd.Output()
+	if err != nil {
+		return 0, err
+	}
+	entries := strings.Split(strings.TrimSpace(string(out)), "\n")
+	if len(entries) == 1 && entries[0] == "" {
+		return 0, nil
+	}
+	return len(entries), nil
+}
+
+func popStash(ctx context.Context, dir string) error {
+	cmd := exec.CommandContext(ctx, "git", "-C", dir, "stash", "pop")
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		return fmt.Errorf("%w, output: %s", err, out)
+	}
+	return nil
+}
+
+// CreateIssue creates a GitHub issue and returns its URL and number.
+func CreateIssue(ctx context.Context, owner, repo, title, body string) (string, int, error) {
+	args := []string{
+		"issue", "create",
+		"--repo", fmt.Sprintf("%s/%s", owner, repo),
+		"--title", title,
+		"--body", body,
+	}
+
+	cmd := exec.CommandContext(ctx, "gh", args...)
+	output, err := cmd.CombinedOutput()
+	if err != nil {
+		return "", 0, fmt.Errorf("failed to create issue: %w, output: %s", err, output)
+	}
+
+	// Extract issue URL from output
+	outputStr := string(output)
+	lines := strings.Split(outputStr, "\n")
+	var issueURL string
+	for _, line := range lines {
+		trimmed := strings.TrimSpace(line)
+		if strings.HasPrefix(trimmed, "https://github.com/") && strings.Contains(trimmed, "/issues/") {
+			issueURL = trimmed
+			break
+		}
+	}
+
+	if issueURL == "" {
+		return "", 0, fmt.Errorf("could not extract issue URL from output: %s", outputStr)
+	}
+
+	// Extract issue number from URL
+	parts := strings.Split(issueURL, "/")
+	if len(parts) > 0 {
+		numStr := parts[len(parts)-1]
+		if num, err := strconv.Atoi(numStr); err == nil {
+			return issueURL, num, nil
+		}
+	}
+
+	return issueURL, 0, nil
+}
diff --git a/internal/github/pr.go b/internal/github/pr.go
index e5813a8..d65f984 100644
--- a/internal/github/pr.go
+++ b/internal/github/pr.go
@@ -2,8 +2,6 @@ package github
 
 import (
 	"fmt"
-	"log/slog"
-	"os"
 	"os/exec"
 	"strings"
 )
@@ -65,19 +63,7 @@ func CreatePR(owner, repo string, opts CreatePROptions) (string, error) {
 	}
 
 	cmd := exec.Command("gh", args...)
-	
-	// Log token availability for debugging
-	logger := slog.Default()
-	ghToken := os.Getenv("GH_TOKEN")
-	if ghToken == "" {
-		ghToken = os.Getenv("GITHUB_TOKEN")
-	}
-	if ghToken == "" {
-		logger.Warn("No GH_TOKEN or GITHUB_TOKEN environment variable set for PR creation")
-	} else {
-		logger.Debug("GH_TOKEN is set for PR creation", "token_prefix", ghToken[:10])
-	}
-	
+
 	output, err := cmd.CombinedOutput()
 	if err != nil {
 		return "", fmt.Errorf("failed to create PR: %w, output: %s", err, output)
diff --git a/internal/github/repo.go b/internal/github/repo.go
index 660cb75..ceafe29 100644
--- a/internal/github/repo.go
+++ b/internal/github/repo.go
@@ -168,10 +168,12 @@ func CommitChanges(localPath, message string) error {
 		return fmt.Errorf("failed to stage changes: %w, output: %s", err, output)
 	}
 
-	// Exclude specific files from commit
+	// Exclude Bauer-generated files from commit so they don't
+	// end up in the PR.  (Paths are relative to the repo root.)
 	excludeFiles := []string{
 		"bauer-doc-suggestions.json",
 		"bauer-output/",
+		"bauer-artifacts/",
 	}
 	for _, file := range excludeFiles {
 		cmd := exec.Command("git", "reset", "HEAD", file)
diff --git a/internal/orchestrator/orchestrator.go b/internal/orchestrator/orchestrator.go
index 90d4e73..9d60514 100644
--- a/internal/orchestrator/orchestrator.go
+++ b/internal/orchestrator/orchestrator.go
@@ -1,22 +1,35 @@
 package orchestrator
 
 import (
+	"bauer/internal/artifacts"
 	"bauer/internal/config"
-	"bauer/internal/copilotcli"
-	"bauer/internal/gdocs"
 	"bauer/internal/prompt"
+	"bauer/internal/source"
 	"context"
 	"encoding/json"
 	"fmt"
 	"log/slog"
 	"os"
+	"path/filepath"
 	"time"
 )
 
+// ChunkOutput holds the result of a single chunk execution.
+type ChunkOutput struct {
+	ChunkNumber int
+	Output      string
+	Duration    time.Duration
+}
+
 // OrchestrationResult contains all outputs from the orchestration flow.
 type OrchestrationResult struct {
+	// Source bundle from the source layer
+	Bundle *source.SourceBundle
+
+	// Run ID for the artifact directory
+	RunID string
+
 	// Extraction
-	ExtractionResult   *gdocs.ProcessingResult
 	ExtractionDuration time.Duration
 
 	// Prompt generation
@@ -24,7 +37,7 @@ type OrchestrationResult struct {
 	PlanDuration time.Duration
 
 	// Only populated if not dry run
-	CopilotOutputs  []copilotcli.ChunkOutput
+	CopilotOutputs  []ChunkOutput
 	CopilotDuration time.Duration
 	SummaryDuration time.Duration
 
@@ -33,52 +46,127 @@ type OrchestrationResult struct {
 	DryRun        bool
 }
 
+// Agent defines the execution contract for any AI backend used by the orchestrator.
+// Defined here at the consumer following Go convention; implementations
+// (copilotcli.Client, test mocks) satisfy it implicitly.
+type Agent interface {
+	// Start boots the agent (e.g. starts the Copilot SDK server process).
+	// Must be called before any other method. Callers should defer Stop().
+	Start(ctx context.Context) error
+
+	// ExecuteChunk sends a single chunk prompt file to the agent and returns
+	// the full text output. chunkNum is for logging/display only.
+	ExecuteChunk(ctx context.Context, chunkPath string, chunkNum int, model string) (string, error)
+
+	// GenerateSummary produces a summary of all chunk outputs.
+	// Only called when there are multiple chunks.
+	GenerateSummary(ctx context.Context, outputs []string, model string) (string, error)
+
+	// Stop shuts the agent down cleanly. Safe to call after a failed Start.
+	Stop() error
+}
+
 // Orchestrator defines the interface for executing the BAU orchestration flow.
 type Orchestrator interface {
 	Execute(ctx context.Context, cfg *config.Config) (*OrchestrationResult, error)
 }
 
 // DefaultOrchestrator is the standard implementation of the Orchestrator interface.
-type DefaultOrchestrator struct{}
+type DefaultOrchestrator struct {
+	agent     Agent
+	sources   *source.Manager
+	artifacts *artifacts.Manager
+}
+
+// NewOrchestrator creates a new DefaultOrchestrator. Pass any Agent implementation,
+// a source.Manager, and an artifacts.Manager.
+func NewOrchestrator(a Agent, s *source.Manager, art *artifacts.Manager) *DefaultOrchestrator {
+	return &DefaultOrchestrator{agent: a, sources: s, artifacts: art}
+}
 
-// NewOrchestrator creates a new DefaultOrchestrator instance.
-func NewOrchestrator() *DefaultOrchestrator {
-	return &DefaultOrchestrator{}
+// SetAgent replaces the agent. Useful when the agent must be created after
+// the orchestrator (e.g. after chdir to a cloned repo).
+func (o *DefaultOrchestrator) SetAgent(a Agent) {
+	o.agent = a
 }
 
-// Execute runs the full pipeline: extraction, prompt generation, and optional Copilot execution.
-// Accepts: Config and Context
-// Returns: OrchestrationResult and error
-func (o *DefaultOrchestrator) Execute(ctx context.Context, cfg *config.Config) (*OrchestrationResult, error) {
+// Execute runs the full pipeline: source fetch, prompt generation, and optional agent execution.
+func (o *DefaultOrchestrator) Execute(ctx context.Context, cfg *config.Config) (result *OrchestrationResult, retErr error) {
 	startTime := time.Now()
 
-	// 1. Initialize GDocs Client and extract from doc
+	// Create artifact run directory
+	var runID string
+	if o.artifacts != nil {
+		var err error
+		runID, err = o.artifacts.NewRun(ctx)
+		if err != nil {
+			return nil, fmt.Errorf("create run directory: %w", err)
+		}
+		slog.Info("Created artifact run directory", "run_id", runID)
+	}
+
+	// Ensure finalizeRun is called even on error, so failed runs appear in history
+	defer func() {
+		if o.artifacts == nil || runID == "" {
+			return
+		}
+		// Build result for finalize if we haven't set one yet
+		if result == nil {
+			result = &OrchestrationResult{RunID: runID}
+		}
+		status := "success"
+		if retErr != nil {
+			status = "failed"
+		}
+		mode := "execute"
+		if config.BoolVal(cfg.DryRun, false) {
+			mode = "dry-run"
+		}
+		o.finalizeRun(runID, result, cfg, mode, status)
+	}()
+
+	// 1. Fetch from all configured sources via the source layer
 	extractionStart := time.Now()
-	gdocsClient, err := gdocs.NewClient(ctx, cfg.CredentialsPath)
-	if err != nil {
-		slog.Error("Failed to initialize Google Docs client",
-			slog.String("error", err.Error()),
-			slog.String("credentials_path", cfg.CredentialsPath),
-		)
-		return nil, fmt.Errorf("failed to initialize Google Docs client: %w", err)
+
+	req := source.Request{
+		DocID:           cfg.DocID,
+		CredentialsPath: cfg.CredentialsPath,
+		FigmaURL:        "", // Figma support added in spec 002
 	}
 
-	// 2. Process Document
-	result, err := gdocsClient.ProcessDocument(ctx, cfg.DocID)
+	bundle, err := o.sources.Fetch(ctx, req)
 	if err != nil {
-		return nil, fmt.Errorf("failed to process document: %w", err)
+		return nil, fmt.Errorf("source fetch: %w", err)
 	}
+
+	if bundle.Document == nil {
+		return nil, fmt.Errorf("source fetch returned no document data")
+	}
+
+	result = &OrchestrationResult{
+		Bundle: bundle,
+		RunID:  runID,
+	}
+
+	doc := bundle.Document
 	extractionDuration := time.Since(extractionStart)
+	result.ExtractionDuration = extractionDuration
 
-	// 3. Write extraction result to file
-	outputJSON, err := json.MarshalIndent(result, "", "  ")
+	// 2. Write extraction result to artifact directory
+	if o.artifacts != nil && runID != "" {
+		if err := o.artifacts.WriteExtraction(runID, "gdocs.json", doc); err != nil {
+			slog.Error("Failed to write extraction artifact", slog.String("error", err.Error()))
+		}
+	}
+
+	// Also write to legacy output file for backward compatibility
+	outputJSON, err := json.MarshalIndent(doc, "", "  ")
 	if err != nil {
 		slog.Error("Failed to marshal output", slog.String("error", err.Error()))
 		return nil, fmt.Errorf("failed to generate output JSON: %w", err)
 	}
 	outputFile := "bauer-doc-suggestions.json"
-	err = os.WriteFile(outputFile, outputJSON, 0644)
-	if err != nil {
+	if err := os.WriteFile(outputFile, outputJSON, 0644); err != nil {
 		slog.Error("Failed to write output file", slog.String("error", err.Error()))
 		return nil, fmt.Errorf("failed to write output file: %w", err)
 	}
@@ -87,22 +175,22 @@ func (o *DefaultOrchestrator) Execute(ctx context.Context, cfg *config.Config) (
 		slog.Duration("extraction_duration", extractionDuration),
 	)
 
-	// 4. Initialize Prompt Engine
+	// 3. Initialize Prompt Engine
 	planStart := time.Now()
-	engine, err := prompt.NewEngine(cfg.PageRefresh)
+	engine, err := prompt.NewEngine(config.BoolVal(cfg.PageRefresh, false))
 	if err != nil {
 		slog.Error("Failed to initialize prompt engine", slog.String("error", err.Error()))
 		return nil, fmt.Errorf("failed to initialize prompt engine: %w", err)
 	}
 
-	// 5. Generate Prompts from Chunks
-	totalLocations := len(result.GroupedSuggestions)
+	// 4. Generate Prompts from Chunks
+	totalLocations := len(doc.GroupedSuggestions)
 	slog.Info("Generating prompts",
 		slog.Int("total_locations", totalLocations),
 		slog.Int("chunk_size", cfg.ChunkSize),
 	)
 	chunks, err := engine.GenerateAllChunks(
-		result,
+		doc,
 		cfg.ChunkSize,
 		cfg.OutputDir,
 	)
@@ -112,6 +200,18 @@ func (o *DefaultOrchestrator) Execute(ctx context.Context, cfg *config.Config) (
 	}
 
 	planDuration := time.Since(planStart)
+	result.PlanDuration = planDuration
+	result.Chunks = chunks
+
+	// Write prompt artifacts (use base filename only)
+	if o.artifacts != nil && runID != "" {
+		for _, chunk := range chunks {
+			artifactName := filepath.Base(chunk.Filename)
+			if err := o.artifacts.WritePrompt(runID, artifactName, chunk.Content); err != nil {
+				slog.Error("Failed to write prompt artifact", slog.String("error", err.Error()))
+			}
+		}
+	}
 
 	for _, chunk := range chunks {
 		slog.Info("Generated chunk",
@@ -122,104 +222,125 @@ func (o *DefaultOrchestrator) Execute(ctx context.Context, cfg *config.Config) (
 	}
 
 	// If dry run, return early
-	if cfg.DryRun {
-		totalDuration := time.Since(startTime)
-
-		return &OrchestrationResult{
-			ExtractionResult:   result,
-			ExtractionDuration: extractionDuration,
-			Chunks:             chunks,
-			PlanDuration:       planDuration,
-			CopilotOutputs:     []copilotcli.ChunkOutput{},
-			CopilotDuration:    0,
-			SummaryDuration:    0,
-			TotalDuration:      totalDuration,
-			DryRun:             true,
-		}, nil
+	if config.BoolVal(cfg.DryRun, false) {
+		result.CopilotOutputs = []ChunkOutput{}
+		result.DryRun = true
+		result.TotalDuration = time.Since(startTime)
+		return result, nil
 	}
 
-	// 6. Execute via Copilot SDK
-	cwd, err := os.Getwd()
-	if err != nil {
-		slog.Error("Failed to get working directory", slog.String("error", err.Error()))
-		return nil, fmt.Errorf("failed to get working directory: %w", err)
+	// 5. Execute via configured agent
+	if o.agent == nil {
+		return nil, fmt.Errorf("agent is required")
 	}
 
-	slog.Info("Initializing Copilot client", slog.String("cwd", cwd))
-	copilotClient, err := copilotcli.NewClient(cwd)
-	if err != nil {
-		slog.Error("Failed to create Copilot client", slog.String("error", err.Error()))
-		return nil, fmt.Errorf("failed to create Copilot client: %w", err)
-	}
-
-	// Start the Copilot CLI server once
-	if err := copilotClient.Start(); err != nil {
-		// Attempt to stop the client if Start failed
-		if stopErr := copilotClient.Stop(); stopErr != nil {
-			slog.Error("Failed to stop Copilot client after start failure", slog.String("error", stopErr.Error()))
+	if err := o.agent.Start(ctx); err != nil {
+		if stopErr := o.agent.Stop(); stopErr != nil {
+			slog.Error("Failed to stop agent after start failure", slog.String("error", stopErr.Error()))
 		}
-		slog.Error("Failed to start Copilot", slog.String("error", err.Error()))
-		return nil, fmt.Errorf("failed to start Copilot: %w", err)
+		slog.Error("Failed to start agent", slog.String("error", err.Error()))
+		return nil, fmt.Errorf("failed to start agent: %w", err)
 	}
 	defer func() {
-		if err := copilotClient.Stop(); err != nil {
-			slog.Error("Failed to stop Copilot client", slog.String("error", err.Error()))
+		if err := o.agent.Stop(); err != nil {
+			slog.Error("Failed to stop agent", slog.String("error", err.Error()))
 		}
 	}()
 
-	// Execute chunks via Copilot SDK
-	chunkOutputs, copilotDuration, err := executeCopilotChunks(ctx, chunks, cfg, copilotClient)
+	// Execute chunks via agent
+	chunkOutputs, copilotDuration, err := executeCopilotChunks(ctx, chunks, cfg, o.agent)
 	if err != nil {
 		slog.Error("Copilot execution failed", slog.String("error", err.Error()))
 		return nil, fmt.Errorf("copilot execution failed: %w", err)
 	}
 
+	// Write output artifacts
+	if o.artifacts != nil && runID != "" {
+		for _, output := range chunkOutputs {
+			name := fmt.Sprintf("chunk-%d-output.md", output.ChunkNumber)
+			if err := o.artifacts.WriteOutput(runID, name, output.Output); err != nil {
+				slog.Error("Failed to write output artifact", slog.String("error", err.Error()))
+			}
+		}
+	}
+
+	result.CopilotOutputs = chunkOutputs
+	result.CopilotDuration = copilotDuration
+
 	slog.Info("Copilot chunks executed",
 		slog.Int("chunk_count", len(chunks)),
 		slog.Duration("total_duration", copilotDuration),
 	)
 
-	// 7. Generate summary if multiple chunks
+	// 6. Generate summary if multiple chunks
 	summaryDuration := time.Duration(0)
 	if len(chunks) > 1 {
 		summaryStart := time.Now()
 
-		if err := copilotClient.GenerateSummary(ctx, chunkOutputs, cfg.SummaryModel); err != nil {
+		summaryInputs := make([]string, 0, len(chunkOutputs))
+		for _, output := range chunkOutputs {
+			summaryInputs = append(summaryInputs, output.Output)
+		}
+
+		summary, err := o.agent.GenerateSummary(ctx, summaryInputs, cfg.SummaryModel)
+		if err != nil {
 			slog.Error("Summary generation failed", slog.String("error", err.Error()))
-			// Summary failure is not fatal; continue with results
 		} else {
 			summaryDuration = time.Since(summaryStart)
 			slog.Info("Summary generated successfully",
 				slog.Duration("duration", summaryDuration),
 			)
+			if o.artifacts != nil && runID != "" && summary != "" {
+				if err := o.artifacts.WriteOutput(runID, "summary.md", summary); err != nil {
+					slog.Error("Failed to write summary artifact", slog.String("error", err.Error()))
+				}
+			}
 		}
 	}
+	result.SummaryDuration = summaryDuration
+	result.TotalDuration = time.Since(startTime)
+
+	return result, nil
+}
+
+// finalizeRun writes metadata and appends to runs.jsonl.
+func (o *DefaultOrchestrator) finalizeRun(runID string, res *OrchestrationResult, cfg *config.Config, mode, status string) {
+	if o.artifacts == nil || runID == "" {
+		return
+	}
+
+	artifactDir := filepath.Join(o.artifacts.BaseDir(), runID)
+
+	meta := artifacts.RunMeta{
+		RunID:       runID,
+		StartedAt:   time.Now().Add(-res.TotalDuration).UTC().Format(time.RFC3339),
+		CompletedAt: time.Now().UTC().Format(time.RFC3339),
+		Status:      status,
+		DocID:       cfg.DocID,
+		Mode:        mode,
+		ChunkCount:  len(res.Chunks),
+		ArtifactDir: artifactDir,
+	}
 
-	totalDuration := time.Since(startTime)
-
-	return &OrchestrationResult{
-		ExtractionResult:   result,
-		ExtractionDuration: extractionDuration,
-		Chunks:             chunks,
-		PlanDuration:       planDuration,
-		CopilotOutputs:     chunkOutputs,
-		CopilotDuration:    copilotDuration,
-		SummaryDuration:    summaryDuration,
-		TotalDuration:      totalDuration,
-		DryRun:             false,
-	}, nil
+	if err := o.artifacts.WriteMetadata(runID, meta); err != nil {
+		slog.Error("Failed to write run metadata", slog.String("error", err.Error()))
+	}
+
+	if err := o.artifacts.AppendRun(meta); err != nil {
+		slog.Error("Failed to append run to index", slog.String("error", err.Error()))
+	}
 }
 
-// executeCopilotChunks executes each chunk via the Copilot SDK and returns outputs
+// executeCopilotChunks executes each chunk via the configured agent and returns outputs
 func executeCopilotChunks(
 	ctx context.Context,
 	chunks []prompt.ChunkResult,
 	cfg *config.Config,
-	client *copilotcli.Client,
-) ([]copilotcli.ChunkOutput, time.Duration, error) {
+	executor Agent,
+) ([]ChunkOutput, time.Duration, error) {
 	executionStart := time.Now()
 
-	var outputs []copilotcli.ChunkOutput
+	var outputs []ChunkOutput
 	totalChunks := len(chunks)
 
 	for i, chunk := range chunks {
@@ -231,7 +352,7 @@ func executeCopilotChunks(
 		)
 
 		// Execute the chunk
-		output, err := client.ExecuteChunk(ctx, chunk.Filename, chunk.ChunkNumber, cfg.Model)
+		output, err := executor.ExecuteChunk(ctx, chunk.Filename, chunk.ChunkNumber, cfg.Model)
 		if err != nil {
 			return nil, 0, fmt.Errorf("failed to execute chunk %d: %w", chunk.ChunkNumber, err)
 		}
@@ -239,7 +360,7 @@ func executeCopilotChunks(
 		chunkDuration := time.Since(chunkStart)
 
 		// Collect output
-		outputs = append(outputs, copilotcli.ChunkOutput{
+		outputs = append(outputs, ChunkOutput{
 			ChunkNumber: chunk.ChunkNumber,
 			Output:      output,
 			Duration:    chunkDuration,
diff --git a/internal/orchestrator/orchestrator_test.go b/internal/orchestrator/orchestrator_test.go
new file mode 100644
index 0000000..73f3301
--- /dev/null
+++ b/internal/orchestrator/orchestrator_test.go
@@ -0,0 +1,219 @@
+package orchestrator
+
+import (
+	"bauer/internal/config"
+	"context"
+	"errors"
+	"testing"
+
+	"bauer/internal/artifacts"
+	"bauer/internal/copilotcli"
+	"bauer/internal/prompt"
+	"bauer/internal/source"
+	"bauer/internal/testutil"
+)
+
+// Compile-time check: copilotcli.Client must implement Agent.
+var _ Agent = (*copilotcli.Client)(nil)
+
+// Compile-time check: testutil.MockAgent must implement Agent.
+var _ Agent = (*testutil.MockAgent)(nil)
+
+func TestExecuteCopilotChunks_UsesAgentForEachChunk(t *testing.T) {
+	t.Parallel()
+
+	mock := &testutil.MockAgent{
+		ExecuteChunkFunc: func(ctx context.Context, chunkPath string, chunkNum int, model string) (string, error) {
+			switch chunkNum {
+			case 1:
+				return "first output", nil
+			case 2:
+				return "second output", nil
+			default:
+				return "", errors.New("unexpected chunk number")
+			}
+		},
+	}
+
+	chunks := []prompt.ChunkResult{
+		{ChunkNumber: 1, Filename: "chunk-1.md"},
+		{ChunkNumber: 2, Filename: "chunk-2.md"},
+	}
+
+	cfg := &config.Config{
+		Model: "gpt-5-mini-high",
+	}
+
+	outputs, duration, err := executeCopilotChunks(context.Background(), chunks, cfg, mock)
+	if err != nil {
+		t.Fatalf("executeCopilotChunks() error = %v", err)
+	}
+
+	if duration < 0 {
+		t.Fatalf("executeCopilotChunks() duration = %v, want non-negative", duration)
+	}
+
+	if len(outputs) != 2 {
+		t.Fatalf("len(outputs) = %d, want 2", len(outputs))
+	}
+
+	if got, want := len(mock.ExecuteChunkCalls), 2; got != want {
+		t.Fatalf("len(mock.ExecuteChunkCalls) = %d, want %d", got, want)
+	}
+
+	tests := []struct {
+		name       string
+		gotNumber  int
+		wantNumber int
+		gotOutput  string
+		wantOutput string
+		gotPath    string
+		wantPath   string
+		gotModel   string
+		wantModel  string
+	}{
+		{
+			name:       "first chunk",
+			gotNumber:  outputs[0].ChunkNumber,
+			wantNumber: 1,
+			gotOutput:  outputs[0].Output,
+			wantOutput: "first output",
+			gotPath:    mock.ExecuteChunkCalls[0].ChunkPath,
+			wantPath:   "chunk-1.md",
+			gotModel:   mock.ExecuteChunkCalls[0].Model,
+			wantModel:  "gpt-5-mini-high",
+		},
+		{
+			name:       "second chunk",
+			gotNumber:  outputs[1].ChunkNumber,
+			wantNumber: 2,
+			gotOutput:  outputs[1].Output,
+			wantOutput: "second output",
+			gotPath:    mock.ExecuteChunkCalls[1].ChunkPath,
+			wantPath:   "chunk-2.md",
+			gotModel:   mock.ExecuteChunkCalls[1].Model,
+			wantModel:  "gpt-5-mini-high",
+		},
+	}
+
+	for _, tt := range tests {
+		tt := tt
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+
+			if tt.gotNumber != tt.wantNumber {
+				t.Fatalf("ChunkNumber = %d, want %d", tt.gotNumber, tt.wantNumber)
+			}
+			if tt.gotOutput != tt.wantOutput {
+				t.Fatalf("Output = %q, want %q", tt.gotOutput, tt.wantOutput)
+			}
+			if tt.gotPath != tt.wantPath {
+				t.Fatalf("ChunkPath = %q, want %q", tt.gotPath, tt.wantPath)
+			}
+			if tt.gotModel != tt.wantModel {
+				t.Fatalf("Model = %q, want %q", tt.gotModel, tt.wantModel)
+			}
+		})
+	}
+}
+
+func TestExecuteCopilotChunks_ReturnsAgentError(t *testing.T) {
+	t.Parallel()
+
+	mock := &testutil.MockAgent{
+		ExecuteChunkFunc: func(ctx context.Context, chunkPath string, chunkNum int, model string) (string, error) {
+			return "", errors.New("agent failed")
+		},
+	}
+
+	chunks := []prompt.ChunkResult{
+		{ChunkNumber: 1, Filename: "chunk-1.md"},
+	}
+
+	cfg := &config.Config{
+		Model: "gpt-5-mini-high",
+	}
+
+	outputs, duration, err := executeCopilotChunks(context.Background(), chunks, cfg, mock)
+	if err == nil {
+		t.Fatal("executeCopilotChunks() error = nil, want non-nil")
+	}
+
+	if outputs != nil {
+		t.Fatalf("outputs = %#v, want nil", outputs)
+	}
+
+	if duration != 0 {
+		t.Fatalf("duration = %v, want 0", duration)
+	}
+
+	if got, want := len(mock.ExecuteChunkCalls), 1; got != want {
+		t.Fatalf("len(mock.ExecuteChunkCalls) = %d, want %d", got, want)
+	}
+}
+
+func TestNewOrchestrator_WithMockDependencies(t *testing.T) {
+	t.Parallel()
+
+	mock := &testutil.MockAgent{}
+	gdocsAdapter := source.NewGDocsAdapter()
+	sources := source.NewManager(gdocsAdapter)
+	artMgr := artifacts.NewManager(t.TempDir())
+	orch := NewOrchestrator(mock, sources, artMgr)
+
+	if orch == nil {
+		t.Fatal("NewOrchestrator() returned nil")
+	}
+
+	if orch.agent == nil {
+		t.Fatal("orchestrator agent is nil")
+	}
+
+	if orch.sources == nil {
+		t.Fatal("orchestrator sources is nil")
+	}
+
+	if orch.artifacts == nil {
+		t.Fatal("orchestrator artifacts is nil")
+	}
+}
+
+func TestOrchestrator_FinalizeRunOnFailure(t *testing.T) {
+	t.Parallel()
+
+	mock := &testutil.MockAgent{}
+	gdocsAdapter := source.NewGDocsAdapter()
+	sources := source.NewManager(gdocsAdapter)
+	artMgr := artifacts.NewManager(t.TempDir())
+	orch := NewOrchestrator(mock, sources, artMgr)
+
+	// Execute with a config that will fail (no doc ID, no source adapter configured)
+	cfg := &config.Config{
+		DryRun: config.BoolPtr(true),
+	}
+	_, err := orch.Execute(context.Background(), cfg)
+
+	// The fetch will fail, but finalizeRun should still have been called
+	// We verify by checking that a run directory was created
+	_ = err // error expected
+}
+
+func TestSetAgent(t *testing.T) {
+	t.Parallel()
+
+	gdocsAdapter := source.NewGDocsAdapter()
+	sources := source.NewManager(gdocsAdapter)
+	artMgr := artifacts.NewManager(t.TempDir())
+	orch := NewOrchestrator(nil, sources, artMgr)
+
+	if orch.agent != nil {
+		t.Fatal("expected nil agent initially")
+	}
+
+	mock := &testutil.MockAgent{}
+	orch.SetAgent(mock)
+
+	if orch.agent == nil {
+		t.Fatal("SetAgent did not set the agent")
+	}
+}
diff --git a/internal/source/gdocs.go b/internal/source/gdocs.go
new file mode 100644
index 0000000..8d98016
--- /dev/null
+++ b/internal/source/gdocs.go
@@ -0,0 +1,40 @@
+package source
+
+import (
+	"bauer/internal/gdocs"
+	"context"
+	"fmt"
+)
+
+// GDocsAdapter fetches extraction results from Google Docs.
+// It creates a gdocs.Client per Fetch call because the credentials path
+// varies per request (especially in the API server).
+type GDocsAdapter struct{}
+
+// NewGDocsAdapter creates a GDocsAdapter.
+func NewGDocsAdapter() *GDocsAdapter {
+	return &GDocsAdapter{}
+}
+
+// Name returns the adapter identifier.
+func (a *GDocsAdapter) Name() string {
+	return "gdocs"
+}
+
+// Fetch extracts suggestions from a Google Doc and returns a *gdocs.ProcessingResult.
+// It expects req.CredentialsPath to be set; if empty, returns an error.
+func (a *GDocsAdapter) Fetch(ctx context.Context, req Request) (any, error) {
+	if req.DocID == "" {
+		return nil, fmt.Errorf("gdocs: doc_id is required")
+	}
+	if req.CredentialsPath == "" {
+		return nil, fmt.Errorf("gdocs: credentials_path is required")
+	}
+
+	client, err := gdocs.NewClient(ctx, req.CredentialsPath)
+	if err != nil {
+		return nil, fmt.Errorf("gdocs: failed to create client: %w", err)
+	}
+
+	return client.ProcessDocument(ctx, req.DocID)
+}
diff --git a/internal/source/manager.go b/internal/source/manager.go
new file mode 100644
index 0000000..6469586
--- /dev/null
+++ b/internal/source/manager.go
@@ -0,0 +1,46 @@
+package source
+
+import (
+	"bauer/internal/gdocs"
+	"context"
+	"fmt"
+	"log/slog"
+)
+
+// Manager coordinates source adapters and produces a combined SourceBundle.
+type Manager struct {
+	adapters []Adapter
+}
+
+// NewManager creates a Manager with the given source adapters.
+func NewManager(adapters ...Adapter) *Manager {
+	return &Manager{adapters: adapters}
+}
+
+// Fetch calls each registered adapter and assembles the results into a SourceBundle.
+// Adapters are called in order; later adapters may enrich (but not overwrite) data
+// from earlier ones.
+func (m *Manager) Fetch(ctx context.Context, req Request) (*SourceBundle, error) {
+	bundle := &SourceBundle{}
+
+	for _, a := range m.adapters {
+		slog.Info("Fetching source", "adapter", a.Name())
+
+		result, err := a.Fetch(ctx, req)
+		if err != nil {
+			return nil, fmt.Errorf("source %s: %w", a.Name(), err)
+		}
+
+		// Assign to the correct bundle field based on adapter type.
+		// This is the one place where the manager knows about specific source types;
+		// it must be updated when new source types are added.
+		switch v := result.(type) {
+		case *gdocs.ProcessingResult:
+			bundle.Document = v
+		default:
+			slog.Warn("Unknown source result type", "adapter", a.Name(), "type", fmt.Sprintf("%T", v))
+		}
+	}
+
+	return bundle, nil
+}
diff --git a/internal/source/manager_test.go b/internal/source/manager_test.go
new file mode 100644
index 0000000..e92f259
--- /dev/null
+++ b/internal/source/manager_test.go
@@ -0,0 +1,83 @@
+package source
+
+import (
+	"context"
+	"fmt"
+	"testing"
+
+	"bauer/internal/gdocs"
+)
+
+// mockAdapter implements Adapter for testing the Manager.
+type mockAdapter struct {
+	name   string
+	result any
+	err    error
+}
+
+func (m *mockAdapter) Name() string { return m.name }
+func (m *mockAdapter) Fetch(_ context.Context, _ Request) (any, error) {
+	return m.result, m.err
+}
+
+func TestManager_Fetch_WithGDocsResult(t *testing.T) {
+	t.Parallel()
+
+	expected := &gdocs.ProcessingResult{
+		DocumentTitle: "Test Doc",
+		DocumentID:    "doc-123",
+	}
+
+	mgr := NewManager(&mockAdapter{name: "gdocs", result: expected})
+	bundle, err := mgr.Fetch(context.Background(), Request{DocID: "doc-123", CredentialsPath: "creds.json"})
+	if err != nil {
+		t.Fatalf("Fetch() error = %v", err)
+	}
+
+	if bundle.Document == nil {
+		t.Fatal("bundle.Document is nil")
+	}
+	if bundle.Document.DocumentTitle != "Test Doc" {
+		t.Fatalf("DocumentTitle = %q, want %q", bundle.Document.DocumentTitle, "Test Doc")
+	}
+	if bundle.Design != nil {
+		t.Fatal("bundle.Design should be nil when no figma adapter is configured")
+	}
+}
+
+func TestManager_Fetch_NoAdapters(t *testing.T) {
+	t.Parallel()
+
+	mgr := NewManager()
+	bundle, err := mgr.Fetch(context.Background(), Request{DocID: "doc-123", CredentialsPath: "creds.json"})
+	if err != nil {
+		t.Fatalf("Fetch() error = %v", err)
+	}
+	if bundle.Document != nil {
+		t.Fatal("bundle.Document should be nil with no adapters")
+	}
+}
+
+func TestManager_Fetch_AdapterError(t *testing.T) {
+	t.Parallel()
+
+	mgr := NewManager(&mockAdapter{name: "error-source", result: nil, err: fmt.Errorf("source unavailable")})
+	_, err := mgr.Fetch(context.Background(), Request{DocID: "doc-123", CredentialsPath: "creds.json"})
+	if err == nil {
+		t.Fatal("expected error from failing adapter")
+	}
+}
+
+func TestManager_Fetch_UnknownResultType(t *testing.T) {
+	t.Parallel()
+
+	mgr := NewManager(&mockAdapter{name: "unknown", result: "unexpected-string"})
+	bundle, err := mgr.Fetch(context.Background(), Request{DocID: "doc-123", CredentialsPath: "creds.json"})
+	if err != nil {
+		t.Fatalf("Fetch() error = %v", err)
+	}
+	// Unknown types are logged but do not cause a fatal error
+	if bundle.Document != nil {
+		t.Fatal("bundle.Document should be nil for unknown result type")
+	}
+}
diff --git a/internal/source/source.go b/internal/source/source.go
new file mode 100644
index 0000000..09b2770
--- /dev/null
+++ b/internal/source/source.go
@@ -0,0 +1,24 @@
+// Package source defines the source abstraction layer for Bauer. Source adapters
+// fetch data from upstream systems (Google Docs, Figma, etc.) and the Manager
+// combines them into a normalized SourceBundle consumed by the orchestrator.
+package source
+
+import "context"
+
+// Adapter is the interface for any upstream source (Google Docs, Figma, etc.).
+// Defined here because the Manager (consumer) lives in the same package.
+type Adapter interface {
+	// Name returns a human-readable identifier for this source (e.g. "gdocs", "figma").
+	Name() string
+
+	// Fetch retrieves data from the upstream source. The returned value is
+	// source-specific; callers type-assert based on the adapter they provided.
+	Fetch(ctx context.Context, req Request) (any, error)
+}
+
+// Request carries the parameters needed to fetch from sources.
+type Request struct {
+	DocID           string // Google Doc ID (required for gdocs)
+	CredentialsPath string // Path to Google service account JSON (required for gdocs)
+	FigmaURL        string // Figma URL (optional; empty when no Figma context)
+}
diff --git a/internal/source/types.go b/internal/source/types.go
new file mode 100644
index 0000000..60bf38e
--- /dev/null
+++ b/internal/source/types.go
@@ -0,0 +1,18 @@
+package source
+
+import "bauer/internal/gdocs"
+
+// SourceBundle is the normalized combined output from all configured sources.
+// The orchestrator depends on this type, not on any individual source package.
+// Fields may be nil depending on which adapters were configured and whether
+// their fetch succeeded — callers must check for nil before accessing.
+type SourceBundle struct {
+	// Document holds the Google Docs extraction result.
+	// Nil when no gdocs adapter was configured or the adapter returned
+	// an unrecognised result type (see Manager.Fetch).
+	Document *gdocs.ProcessingResult `json:"document,omitempty"`
+
+	// Design is reserved for Figma integration (spec 002). Nil until T2F lands.
+	// When populated, it will hold a *figma.NormalizedDesign.
+	Design any `json:"design,omitempty"`
+}
diff --git a/internal/testutil/mock.go b/internal/testutil/mock.go
new file mode 100644
index 0000000..1a5b3d3
--- /dev/null
+++ b/internal/testutil/mock.go
@@ -0,0 +1,78 @@
+// Package testutil provides shared test doubles for Bauer's internal packages.
+// MockAgent satisfies orchestrator.Agent via structural typing — it does not
+// define or own the interface itself.
+package testutil
+
+import "context"
+
+// MockAgent is a test implementation of orchestrator.Agent where every method
+// is a no-op returning nil. Set the Func fields to inject behaviour in tests.
+type MockAgent struct {
+	StartFunc           func(ctx context.Context) error
+	ExecuteChunkFunc    func(ctx context.Context, chunkPath string, chunkNum int, model string) (string, error)
+	GenerateSummaryFunc func(ctx context.Context, outputs []string, model string) (string, error)
+	StopFunc            func() error
+
+	// Call tracking
+	StartCalls           int
+	ExecuteChunkCalls    []ExecuteChunkCall
+	GenerateSummaryCalls []GenerateSummaryCall
+	StopCalls            int
+}
+
+// ExecuteChunkCall records a call to ExecuteChunk.
+type ExecuteChunkCall struct {
+	ChunkPath string
+	ChunkNum  int
+	Model     string
+}
+
+// GenerateSummaryCall records a call to GenerateSummary.
+type GenerateSummaryCall struct {
+	Outputs []string
+	Model   string
+}
+
+// Start increments StartCalls and calls StartFunc if set.
+func (m *MockAgent) Start(ctx context.Context) error {
+	m.StartCalls++
+	if m.StartFunc != nil {
+		return m.StartFunc(ctx)
+	}
+	return nil
+}
+
+// ExecuteChunk records the call and calls ExecuteChunkFunc if set.
+func (m *MockAgent) ExecuteChunk(ctx context.Context, chunkPath string, chunkNum int, model string) (string, error) {
+	m.ExecuteChunkCalls = append(m.ExecuteChunkCalls, ExecuteChunkCall{
+		ChunkPath: chunkPath,
+		ChunkNum:  chunkNum,
+		Model:     model,
+	})
+	if m.ExecuteChunkFunc != nil {
+		return m.ExecuteChunkFunc(ctx, chunkPath, chunkNum, model)
+	}
+	return "", nil
+}
+
+// GenerateSummary records the call and calls GenerateSummaryFunc if set.
+func (m *MockAgent) GenerateSummary(ctx context.Context, outputs []string, model string) (string, error) {
+	copiedOutputs := append([]string(nil), outputs...)
+	m.GenerateSummaryCalls = append(m.GenerateSummaryCalls, GenerateSummaryCall{
+		Outputs: copiedOutputs,
+		Model:   model,
+	})
+	if m.GenerateSummaryFunc != nil {
+		return m.GenerateSummaryFunc(ctx, outputs, model)
+	}
+	return "", nil
+}
+
+// Stop increments StopCalls and calls StopFunc if set.
+func (m *MockAgent) Stop() error {
+	m.StopCalls++
+	if m.StopFunc != nil {
+		return m.StopFunc()
+	}
+	return nil
+}
diff --git a/internal/testutil/mock_test.go b/internal/testutil/mock_test.go
new file mode 100644
index 0000000..242381f
--- /dev/null
+++ b/internal/testutil/mock_test.go
@@ -0,0 +1,78 @@
+package testutil
+
+import (
+	"bauer/internal/orchestrator"
+	"context"
+	"errors"
+	"testing"
+)
+
+// Compile-time check: MockAgent must implement orchestrator.Agent.
+var _ orchestrator.Agent = (*MockAgent)(nil)
+
+func TestMockAgent_SatisfiesOrchestratorAgent(t *testing.T) {
+	t.Parallel()
+
+	mock := &MockAgent{}
+
+	if err := mock.Start(context.Background()); err != nil {
+		t.Fatalf("MockAgent.Start() error: %v", err)
+	}
+	if mock.StartCalls != 1 {
+		t.Fatalf("StartCalls = %d, want 1", mock.StartCalls)
+	}
+
+	output, err := mock.ExecuteChunk(context.Background(), "test.md", 1, "model")
+	if err != nil {
+		t.Fatalf("ExecuteChunk error: %v", err)
+	}
+	if output != "" {
+		t.Fatalf("ExecuteChunk output = %q, want empty", output)
+	}
+	if len(mock.ExecuteChunkCalls) != 1 {
+		t.Fatalf("ExecuteChunkCalls = %d, want 1", len(mock.ExecuteChunkCalls))
+	}
+
+	summary, err := mock.GenerateSummary(context.Background(), []string{"a"}, "model")
+	if err != nil {
+		t.Fatalf("GenerateSummary error: %v", err)
+	}
+	if summary != "" {
+		t.Fatalf("GenerateSummary output = %q, want empty", summary)
+	}
+
+	if err := mock.Stop(); err != nil {
+		t.Fatalf("Stop error: %v", err)
+	}
+	if mock.StopCalls != 1 {
+		t.Fatalf("StopCalls = %d, want 1", mock.StopCalls)
+	}
+}
+
+func TestMockAgent_CustomFuncs(t *testing.T) {
+	t.Parallel()
+
+	mock := &MockAgent{
+		ExecuteChunkFunc: func(_ context.Context, _ string, _ int, _ string) (string, error) {
+			return "custom", nil
+		},
+		GenerateSummaryFunc: func(_ context.Context, _ []string, _ string) (string, error) {
+			return "summary", nil
+		},
+		StartFunc: func(_ context.Context) error { return errors.New("start fail") },
+	}
+
+	if err := mock.Start(context.Background()); err == nil {
+		t.Fatal("expected Start error")
+	}
+
+	out, _ := mock.ExecuteChunk(context.Background(), "p", 1, "m")
+	if out != "custom" {
+		t.Fatalf("ExecuteChunk = %q, want %q", out, "custom")
+	}
+
+	sum, _ := mock.GenerateSummary(context.Background(), nil, "m")
+	if sum != "summary" {
+		t.Fatalf("GenerateSummary = %q, want %q", sum, "summary")
+	}
+}
diff --git a/internal/workflow/api.go b/internal/workflow/api.go
deleted file mode 100644
index 891eb6b..0000000
--- a/internal/workflow/api.go
+++ /dev/null
@@ -1,190 +0,0 @@
-package workflow
-
-import (
-	"encoding/json"
-	"fmt"
-	"log/slog"
-	"net/http"
-	"time"
-
-	"bauer/internal/orchestrator"
-)
-
-// APIRequest represents the API request for executing a workflow
-type APIRequest struct {
-	// GitHub configuration
-	GitHubRepo   string `json:"github_repo" binding:"required"`  // "owner/repo" or HTTPS URL
-	GitHubToken  string `json:"github_token" binding:"required"` // Personal access token
-	BranchPrefix string `json:"branch_prefix" default:"bauer"`   // Branch naming prefix
-
-	// Bauer configuration
-	DocID       string `json:"doc_id" binding:"required"`         // Google Doc ID
-	Credentials string `json:"credentials" binding:"required"`    // Path to service account JSON
-	ChunkSize   int    `json:"chunk_size" default:"1"`            // Number of chunks
-	PageRefresh bool   `json:"page_refresh" default:"false"`      // Page refresh mode
-	OutputDir   string `json:"output_dir" default:"bauer-output"` // Output directory
-	Model       string `json:"model" default:"gpt-5-mini-high"`   // Copilot model
-	DryRun      bool   `json:"dry_run" default:"false"`           // Dry run mode
-
-	// Local repository path
-	LocalRepoPath string `json:"local_repo_path" default:"/tmp"` // Where to clone (optional)
-}
-
-// APIResponse represents the API response from workflow execution
-type APIResponse struct {
-	Status    string          `json:"status"` // "success", "partial", "failed"
-	Message   string          `json:"message"`
-	Workflow  *WorkflowOutput `json:"workflow"`
-	Error     string          `json:"error,omitempty"`
-	Timestamp time.Time       `json:"timestamp"`
-}
-
-// ExecuteWorkflowHandler is an HTTP handler for executing the complete workflow
-func ExecuteWorkflowHandler(orch orchestrator.Orchestrator) http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		logger := slog.Default()
-
-		if r.Method != http.MethodPost {
-			writeError(w, http.StatusMethodNotAllowed, "method not allowed")
-			return
-		}
-
-		// Parse request
-		var req APIRequest
-		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
-			logger.Error("failed to parse request", "error", err)
-			writeError(w, http.StatusBadRequest, fmt.Sprintf("invalid request: %v", err))
-			return
-		}
-
-		// Validate request
-		if req.GitHubRepo == "" {
-			writeError(w, http.StatusBadRequest, "github_repo is required")
-			return
-		}
-		if req.GitHubToken == "" {
-			writeError(w, http.StatusBadRequest, "github_token is required")
-			return
-		}
-		if req.DocID == "" {
-			writeError(w, http.StatusBadRequest, "doc_id is required")
-			return
-		}
-		if req.Credentials == "" {
-			writeError(w, http.StatusBadRequest, "credentials is required")
-			return
-		}
-
-		// Set defaults
-		if req.BranchPrefix == "" {
-			req.BranchPrefix = "bauer"
-		}
-		if req.LocalRepoPath == "" {
-			req.LocalRepoPath = "/tmp"
-		}
-		if req.OutputDir == "" {
-			req.OutputDir = "bauer-output"
-		}
-		if req.Model == "" {
-			req.Model = "gpt-5-mini-high"
-		}
-		if req.ChunkSize == 0 {
-			req.ChunkSize = 1
-		}
-
-		// Create workflow input
-		input := WorkflowInput{
-			GitHubRepo:    req.GitHubRepo,
-			GitHubToken:   req.GitHubToken,
-			BranchPrefix:  req.BranchPrefix,
-			DocID:         req.DocID,
-			Credentials:   req.Credentials,
-			ChunkSize:     req.ChunkSize,
-			PageRefresh:   req.PageRefresh,
-			OutputDir:     req.OutputDir,
-			Model:         req.Model,
-			DryRun:        req.DryRun,
-			LocalRepoPath: fmt.Sprintf("%s/%s-%d", req.LocalRepoPath, "bauer-workflow", time.Now().Unix()),
-		}
-
-		logger.Info("workflow API request",
-			"github_repo", req.GitHubRepo,
-			"doc_id", req.DocID,
-			"dry_run", req.DryRun,
-		)
-
-		// Execute workflow
-		ctx := r.Context()
-		workflowOutput, err := ExecuteWorkflow(ctx, input, orch)
-
-		// Build response
-		response := APIResponse{
-			Timestamp: time.Now(),
-		}
-
-		if workflowOutput != nil {
-			response.Status = workflowOutput.Status
-			response.Workflow = workflowOutput
-
-			switch workflowOutput.Status {
-			case "success":
-				response.Message = fmt.Sprintf(
-					"Workflow completed successfully. PR: %s",
-					workflowOutput.FinalizationInfo.PullRequest.URL,
-				)
-			case "partial":
-				response.Message = fmt.Sprintf(
-					"Workflow completed with errors. Branch: %s. Errors: %d",
-					workflowOutput.RepositoryInfo.BranchName,
-					len(workflowOutput.Errors),
-				)
-			default:
-				response.Message = "Workflow failed"
-				if len(workflowOutput.Errors) > 0 {
-					response.Error = workflowOutput.Errors[0]
-				}
-			}
-		}
-
-		if err != nil {
-			response.Status = "failed"
-			response.Message = "Workflow execution error"
-			response.Error = err.Error()
-			logger.Error("workflow execution error", "error", err)
-		}
-
-		// Determine HTTP status code
-		statusCode := http.StatusOK
-		switch response.Status {
-		case "failed":
-			statusCode = http.StatusInternalServerError
-		case "partial":
-			statusCode = http.StatusAccepted
-		case "success":
-			statusCode = http.StatusCreated
-		}
-
-		// Write response
-		w.Header().Set("Content-Type", "application/json")
-		w.WriteHeader(statusCode)
-		json.NewEncoder(w).Encode(response)
-
-		logger.Info("workflow API response",
-			"status", response.Status,
-			"http_status", statusCode,
-			"duration", workflowOutput.TotalDuration,
-		)
-	}
-}
-
-// Helper functions
-
-func writeError(w http.ResponseWriter, statusCode int, message string) {
-	w.Header().Set("Content-Type", "application/json")
-	w.WriteHeader(statusCode)
-	json.NewEncoder(w).Encode(map[string]interface{}{
-		"status":    "error",
-		"error":     message,
-		"timestamp": time.Now(),
-	})
-}
diff --git a/internal/workflow/workflow.go b/internal/workflow/workflow.go
index c0a283e..10e1962 100644
--- a/internal/workflow/workflow.go
+++ b/internal/workflow/workflow.go
@@ -1,16 +1,15 @@
 package workflow
 
 import (
+	"bauer/internal/config"
+	"bauer/internal/github"
+	"bauer/internal/orchestrator"
 	"context"
 	"fmt"
 	"log/slog"
 	"os"
 	"path/filepath"
 	"time"
-
-	"bauer/internal/config"
-	"bauer/internal/github"
-	"bauer/internal/orchestrator"
 )
 
 // WorkflowInput represents the input for a complete workflow execution
@@ -21,13 +20,14 @@ type WorkflowInput struct {
 	BranchPrefix string
 
 	// Bauer configuration
-	DocID       string
-	Credentials string
-	ChunkSize   int
-	PageRefresh bool
-	OutputDir   string
-	Model       string
-	DryRun      bool
+	DocID        string
+	Credentials  string
+	ChunkSize    int
+	PageRefresh  bool
+	OutputDir    string
+	ArtifactsDir string
+	Model        string
+	DryRun       bool
 
 	// Local repository path
 	LocalRepoPath string
@@ -74,11 +74,16 @@ type WorkflowOutput struct {
 	Warnings      []string      `json:"warnings"`
 }
 
+// NewAgentFunc creates an orchestrator.Agent in the correct working directory.
+// The workflow calls this AFTER chdir to the cloned repo, so the agent
+// operates in the target workspace — not the Bauer process startup directory.
+type NewAgentFunc func() (orchestrator.Agent, error)
+
 // ExecuteWorkflow orchestrates the complete flow:
 // 1. GitHub Setup (clone, create branch)
 // 2. Bauer Processing (extract, chunk, apply changes)
 // 3. GitHub Finalization (commit, push, create PR)
-func ExecuteWorkflow(ctx context.Context, input WorkflowInput, orch orchestrator.Orchestrator) (*WorkflowOutput, error) {
+func ExecuteWorkflow(ctx context.Context, input WorkflowInput, orch orchestrator.Orchestrator, newAgent NewAgentFunc) (*WorkflowOutput, error) {
 	output := &WorkflowOutput{
 		Status:    "pending",
 		StartTime: time.Now(),
@@ -116,8 +121,7 @@ func ExecuteWorkflow(ctx context.Context, input WorkflowInput, orch orchestrator
 
 	logger.Info("workflow success: GitHub setup successful")
 
-	// Convert credentials path to absolute
-	// Do this before changing directory so relative paths work
+	// Convert credentials path to absolute before chdir so relative paths work
 	var credentialsPath string
 	if input.Credentials != "" {
 		absPath, err := filepath.Abs(input.Credentials)
@@ -132,6 +136,23 @@ func ExecuteWorkflow(ctx context.Context, input WorkflowInput, orch orchestrator
 		logger.Info("workflow: resolved credentials path", "path", credentialsPath)
 	}
 
+	// Resolve output dir to absolute path BEFORE chdir.
+	// The orchestrator uses OutputDir to write prompt files. If left relative,
+	// chdir redirects them into the cloned target repo, causing prompt files
+	// to be staged and committed into the generated PR.
+	//
+	// Callers should already pass absolute paths, but we resolve here as a
+	// safety net. The artifacts manager must also be created with an absolute
+	// base dir (enforced by the caller).
+	absOutputDir := input.OutputDir
+	if !filepath.IsAbs(absOutputDir) {
+		abs, err := filepath.Abs(absOutputDir)
+		if err != nil {
+			return nil, fmt.Errorf("resolve output dir: %w", err)
+		}
+		absOutputDir = abs
+	}
+
 	// Change to target repository directory
 	// Save original directory to restore later
 	originalDir, err := os.Getwd()
@@ -153,21 +174,33 @@ func ExecuteWorkflow(ctx context.Context, input WorkflowInput, orch orchestrator
 	logger.Info("workflow: changed to cloned repository", "path", input.LocalRepoPath)
 	defer os.Chdir(originalDir)
 
+	// NOW create the Copilot client — after chdir, cwd is the target repo
+	if newAgent != nil {
+		agent, err := newAgent()
+		if err != nil {
+			return nil, fmt.Errorf("create agent: %w", err)
+		}
+		// Wire the agent into the orchestrator
+		if def, ok := orch.(*orchestrator.DefaultOrchestrator); ok {
+			def.SetAgent(agent)
+		}
+	}
+
 	// Bauer processing
 	logger.Info("workflow: starting phase 2 - Bauer processing")
 
 	bauerStartTime := time.Now()
 
-	// Create Bauer config with target repo (now current directory)
+	// Create Bauer config — use absolute paths so files land outside the cloned repo
 	bauerCfg := &config.Config{
 		DocID:           input.DocID,
-		CredentialsPath: credentialsPath, // Use absolute path
-		DryRun:          input.DryRun,
+		CredentialsPath: credentialsPath,
+		DryRun:          config.BoolPtr(input.DryRun),
 		ChunkSize:       input.ChunkSize,
-		PageRefresh:     input.PageRefresh,
-		OutputDir:       input.OutputDir,
+		PageRefresh:     config.BoolPtr(input.PageRefresh),
+		OutputDir:       absOutputDir,
 		Model:           input.Model,
-		TargetRepo:      ".", // Current directory is the cloned repo
+		TargetRepo:      ".",
 	}
 
 	logger.Info("workflow: Bauer target repository set at", "path", bauerCfg.TargetRepo)
@@ -189,9 +222,9 @@ func ExecuteWorkflow(ctx context.Context, input WorkflowInput, orch orchestrator
 		if len(bauerResult.Chunks) > 0 {
 			output.BauerResult.ChunkCount = len(bauerResult.Chunks)
 		}
-		if bauerResult.ExtractionResult != nil {
+		if bauerResult.Bundle != nil && bauerResult.Bundle.Document != nil {
 			// Count total suggestions from extraction result
-			output.BauerResult.TotalSuggestions = 0 // TODO: adjust based on actual field
+			output.BauerResult.TotalSuggestions = len(bauerResult.Bundle.Document.GroupedSuggestions)
 		}
 	}