From 0ab18945175031036b892babced8e16fe33c0641 Mon Sep 17 00:00:00 2001
From: Albert Mavashev <amavashev@k2n.io>
Date: Sat, 16 May 2026 07:36:22 -0400
Subject: [PATCH 1/3] docs(rust): add config reference + async-openai
 integration; fix conversion friction
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Investigation of the cycles-client-rust 13:1 clone-to-install ratio (907
clones, 67 installs over 6 weeks). Surface quality is fine: crates.io
description is clean, docs.rs renders at 99.77%, README has badges +
cargo block above the quickstart, /examples has 5 files. The crate
itself is in good shape.

Two real gaps identified in the cycles-docs surface:

1. **Missing Rust client config reference.** Python, TypeScript, and
   Spring Boot all have `configuration/*-client-configuration-reference.md`
   docs. Rust did not. An evaluator hitting the docs to look up retry
   tuning, custom reqwest::Client wiring, the blocking variant, or env
   var prefix customization had nowhere to land.

2. **No real LLM-integration example.** The Rust quickstart and Rust
   integration guide both use `call_llm()` / `openai_call()` /
   `stream_llm_response()` as placeholders. An evaluator who copied the
   sample code had to invent the async-openai wiring themselves —
   including streaming token capture, OpenAI vs Cycles error mapping,
   and cap-to-max_tokens application. That's the friction point that
   most plausibly explains the ratio.

Changes:

- **NEW** `configuration/rust-client-configuration-reference.md` —
  CyclesConfig fields, env vars (default and custom prefix), builder
  API method reference, custom reqwest::Client guidance, blocking
  client variant behind feature flag, retry tuning examples. Verified
  against cycles-client-rust src/config.rs.

- **NEW** `how-to/integrating-cycles-with-async-openai.md` — concrete
  composition of `runcycles` with `async-openai`: basic chat completion
  with token extraction from `response.usage`, cap application to
  max_tokens, streaming via ReservationGuard with include_usage handling,
  OpenAI vs Cycles error separation, token-to-microcents helper, brief
  Anthropic variant via anthropic-sdk-rs. Gotchas section flags the
  five common stumbles (missing usage, None content, key leakage in
  metadata, runtime mismatch).

- **MOD** `quickstart/getting-started-with-the-rust-client.md` —
  Configuration section now links to the new config reference; Next
  Steps re-ordered to surface both new docs prominently.

- **MOD** `how-to/integrating-cycles-with-rust.md` — callout near the
  top pointing readers who want a real LLM example to the new
  async-openai doc.

- **MOD** `.vitepress/config.ts` — added Rust client config to the
  sidebar Configuration section; added async-openai entry under the
  Rust how-to subsection.

- **MOD** `how-to/index.md` — added "OpenAI (Rust / async-openai)" to
  the LLM providers row (previously listed only Python and TypeScript
  for OpenAI specifically).

Glossary linker ran on both new docs; added 7 contextual links across
them.

Out of scope (separate repo, can't edit from here):

- The cycles-client-rust /examples directory has 5 files but no LLM
  integration example. Adding examples/async_openai_completion.rs and
  examples/axum_middleware.rs would close a parallel gap on the repo
  side. Worth filing on cycles-client-rust as a follow-up issue.

- A README quickstart that opens with the async-openai example rather
  than the abstract `call_llm()` would compound this; same parallel
  gap on the repo side.
---
 .vitepress/config.ts                          |   2 +
 .../rust-client-configuration-reference.md    | 271 +++++++++++++
 how-to/index.md                               |   2 +-
 .../integrating-cycles-with-async-openai.md   | 374 ++++++++++++++++++
 how-to/integrating-cycles-with-rust.md        |   2 +
 .../getting-started-with-the-rust-client.md   |   9 +-
 6 files changed, 657 insertions(+), 3 deletions(-)
 create mode 100644 configuration/rust-client-configuration-reference.md
 create mode 100644 how-to/integrating-cycles-with-async-openai.md

diff --git a/.vitepress/config.ts b/.vitepress/config.ts
index 409662ee..82d05db6 100644
--- a/.vitepress/config.ts
+++ b/.vitepress/config.ts
@@ -496,6 +496,7 @@ export default defineConfig({
               collapsed: true,
               items: [
                 { text: 'Rust Integration Guide', link: '/how-to/integrating-cycles-with-rust' },
+                { text: 'Integrate with async-openai', link: '/how-to/integrating-cycles-with-async-openai' },
                 { text: 'Error Handling in Rust', link: '/how-to/error-handling-patterns-in-rust' },
               ]
             },
@@ -537,6 +538,7 @@ export default defineConfig({
           items: [
             { text: 'Python Client Configuration', link: '/configuration/python-client-configuration-reference' },
             { text: 'TypeScript Client Configuration', link: '/configuration/typescript-client-configuration-reference' },
+            { text: 'Rust Client Configuration', link: '/configuration/rust-client-configuration-reference' },
             { text: 'Spring Client Configuration', link: '/configuration/client-configuration-reference-for-cycles-spring-boot-starter' },
             { text: 'Server Configuration', link: '/configuration/server-configuration-reference-for-cycles' },
             { text: 'SpEL Expression Reference', link: '/configuration/spel-expression-reference-for-cycles' },
diff --git a/configuration/rust-client-configuration-reference.md b/configuration/rust-client-configuration-reference.md
new file mode 100644
index 00000000..9523f3df
--- /dev/null
+++ b/configuration/rust-client-configuration-reference.md
@@ -0,0 +1,271 @@
+---
+title: "Rust Client Configuration Reference"
+description: "Complete reference for the runcycles Rust client: CyclesConfig fields, environment variables, builder API, retry tuning, blocking variant, custom HTTP client."
+head:
+  - - meta
+    - name: keywords
+      content: "rust client configuration, runcycles config, cycles rust env vars, rust client builder, rust async client config, cycles rust retry tuning, rust blocking client"
+---
+
+# Rust Client Configuration Reference
+
+Complete reference for all configuration options in the `runcycles` Rust client. Targets `runcycles >= 0.2.0`. The async client is the default; the blocking variant is available behind a feature flag.
+
+For the introductory walkthrough, see the [Rust Client Quickstart](/quickstart/getting-started-with-the-rust-client). For runtime error patterns, see [Error Handling in Rust](/how-to/error-handling-patterns-in-rust).
+
+## CyclesConfig
+
+The `CyclesConfig` struct holds all client configuration. It can be constructed via the builder API (recommended), via `CyclesConfig::from_env()`, or by populating the struct fields directly.
+
+### Required fields
+
+| Field | Type | Description |
+|---|---|---|
+| `base_url` | `String` | Base URL of the [Cycles server](/glossary#cycles-server) (e.g. `http://localhost:7878`) |
+| `api_key` | `String` | API key for authentication. [Tenant](/glossary#tenant)-scoped key starting with `cyc_live_` |
+
+### Subject defaults
+
+These fields set default Subject values applied to every request unless overridden at the call site. Override at the call site by passing an explicit `Subject` to the request builder.
+
+| Field | Type | Default | Description |
+|---|---|---|---|
+| `tenant` | `Option<String>` | `None` | Default tenant |
+| `workspace` | `Option<String>` | `None` | Default workspace |
+| `app` | `Option<String>` | `None` | Default application name |
+| `workflow` | `Option<String>` | `None` | Default workflow |
+| `agent` | `Option<String>` | `None` | Default agent |
+| `toolset` | `Option<String>` | `None` | Default toolset |
+
+### HTTP timeouts
+
+| Field | Type | Default | Description |
+|---|---|---|---|
+| `connect_timeout` | `Duration` | `2_000 ms` | TCP connection timeout |
+| `read_timeout` | `Duration` | `5_000 ms` | Read timeout for responses |
+
+`Duration` values are constructed with `std::time::Duration::from_millis(...)` or `from_secs(...)` in programmatic configuration. Environment variables are expressed in milliseconds (see below).
+
+### Retry configuration
+
+Controls the commit retry engine for transient failures. The same engine runs in the async client and the blocking variant.
+
+| Field | Type | Default | Description |
+|---|---|---|---|
+| `retry_enabled` | `bool` | `true` | Enable automatic commit retries |
+| `retry_max_attempts` | `u32` | `5` | Maximum number of retry attempts |
+| `retry_initial_delay` | `Duration` | `500 ms` | Delay before the first retry |
+| `retry_multiplier` | `f64` | `2.0` | Exponential backoff multiplier between retries |
+| `retry_max_delay` | `Duration` | `30_000 ms` | Maximum delay between retries (cap) |
+
+#### How retry works
+
+When a commit fails with a transport error or a 5xx response, the retry engine schedules a retry using exponential backoff:
+
+```
+Attempt 1: wait 500 ms
+Attempt 2: wait 1_000 ms
+Attempt 3: wait 2_000 ms
+Attempt 4: wait 4_000 ms
+Attempt 5: wait 8_000 ms (capped at retry_max_delay if smaller)
+```
+
+Non-retryable errors (4xx responses other than 429, validation failures, deserialization errors) are not retried. `BudgetExceeded` carries a server-suggested `retry_after` that callers can apply manually.
+
+## Programmatic configuration
+
+The builder API is the recommended way to construct a client:
+
+```rust
+use runcycles::{CyclesClient, CyclesConfig};
+use std::time::Duration;
+
+let client = CyclesClient::builder(
+    "cyc_live_...",
+    "http://localhost:7878",
+)
+.tenant("acme-corp")
+.workspace("production")
+.app("support-bot")
+.connect_timeout(Duration::from_millis(2_000))
+.read_timeout(Duration::from_millis(5_000))
+.retry_enabled(true)
+.retry_max_attempts(5)
+.retry_initial_delay(Duration::from_millis(500))
+.retry_multiplier(2.0)
+.retry_max_delay(Duration::from_secs(30))
+.build();
+```
+
+Or construct `CyclesConfig` directly and pass it to `CyclesClient::new`:
+
+```rust
+use runcycles::{CyclesClient, CyclesConfig};
+use std::time::Duration;
+
+let config = CyclesConfig {
+    base_url: "http://localhost:7878".into(),
+    api_key: "cyc_live_...".into(),
+    tenant: Some("acme-corp".into()),
+    connect_timeout: Duration::from_millis(2_000),
+    read_timeout: Duration::from_millis(5_000),
+    retry_enabled: true,
+    retry_max_attempts: 5,
+    retry_initial_delay: Duration::from_millis(500),
+    retry_multiplier: 2.0,
+    retry_max_delay: Duration::from_secs(30),
+    ..Default::default()
+};
+
+let client = CyclesClient::new(config);
+```
+
+## Environment variable configuration
+
+Use `CyclesConfig::from_env()` to load configuration from environment variables. The default prefix is `CYCLES_`:
+
+```rust
+use runcycles::CyclesConfig;
+
+let config = CyclesConfig::from_env().expect("missing required CYCLES_* env vars");
+```
+
+| Environment variable | Maps to | Type | Required |
+|---|---|---|---|
+| `CYCLES_BASE_URL` | `base_url` | string | Yes |
+| `CYCLES_API_KEY` | `api_key` | string | Yes |
+| `CYCLES_TENANT` | `tenant` | string | No |
+| `CYCLES_WORKSPACE` | `workspace` | string | No |
+| `CYCLES_APP` | `app` | string | No |
+| `CYCLES_WORKFLOW` | `workflow` | string | No |
+| `CYCLES_AGENT` | `agent` | string | No |
+| `CYCLES_TOOLSET` | `toolset` | string | No |
+| `CYCLES_CONNECT_TIMEOUT` | `connect_timeout` | milliseconds (integer) | No |
+| `CYCLES_READ_TIMEOUT` | `read_timeout` | milliseconds (integer) | No |
+| `CYCLES_RETRY_ENABLED` | `retry_enabled` | `true` / `false` | No |
+| `CYCLES_RETRY_MAX_ATTEMPTS` | `retry_max_attempts` | integer | No |
+| `CYCLES_RETRY_INITIAL_DELAY` | `retry_initial_delay` | milliseconds (integer) | No |
+| `CYCLES_RETRY_MULTIPLIER` | `retry_multiplier` | float | No |
+| `CYCLES_RETRY_MAX_DELAY` | `retry_max_delay` | milliseconds (integer) | No |
+
+::: tip Custom env var prefix
+Unlike most clients in the corpus, the Rust client supports loading from a custom prefix. Useful when a single process runs multiple Cycles instances against different servers:
+
+```rust
+let primary  = CyclesConfig::from_env_with_prefix("CYCLES_PRIMARY_")?;
+let staging  = CyclesConfig::from_env_with_prefix("CYCLES_STAGING_")?;
+```
+
+The default `from_env()` is equivalent to `from_env_with_prefix("CYCLES_")`.
+:::
+
+## Resolution order
+
+For each Subject field, the request builder resolves values in this priority:
+
+1. **Per-call value** — passed explicitly to the request builder (e.g. `Subject { tenant: Some("override".into()), .. }`)
+2. **Config default** — set on the `CyclesConfig` / builder
+
+If neither provides a value, the field is omitted from the request and the server applies its own defaults.
+
+## Custom `reqwest::Client`
+
+By default, the client creates its own `reqwest::Client` with the configured timeouts. Pass a custom one when you need shared connection pooling, custom middleware, TLS pinning, or proxy support:
+
+```rust
+use runcycles::CyclesClient;
+use reqwest::Client;
+use std::time::Duration;
+
+let http = Client::builder()
+    .pool_max_idle_per_host(20)
+    .timeout(Duration::from_secs(10))
+    .build()?;
+
+let client = CyclesClient::builder(
+    "cyc_live_...",
+    "http://localhost:7878",
+)
+.http_client(http)  // overrides connect_timeout / read_timeout from config
+.tenant("acme-corp")
+.build();
+```
+
+When a custom `reqwest::Client` is provided, the config's `connect_timeout` and `read_timeout` are ignored — set them on the `reqwest::Client` instead.
+
+## Blocking client variant
+
+For applications running in synchronous contexts (CLI tools, sync HTTP frameworks like `rouille`, embedded scripts), the crate ships a blocking variant behind a feature flag.
+
+```toml
+# Cargo.toml
+[dependencies]
+runcycles = { version = "0.2", features = ["blocking"] }
+```
+
+```rust
+use runcycles::{BlockingCyclesClient, CyclesConfig};
+
+let client = BlockingCyclesClient::new(CyclesConfig::from_env()?);
+let resp = client.get_balances(&BalanceParams {
+    tenant: Some("acme-corp".into()),
+    ..Default::default()
+})?;
+```
+
+The blocking client mirrors the async client's surface but uses `reqwest::blocking::Client` underneath. The reserve-commit lifecycle, retry engine, and error types are identical; only the await points are removed.
+
+::: warning Don't mix runtimes
+The blocking client must not be called from inside a Tokio runtime (it will block the executor). For most applications using `tokio::main`, the async client is correct. The blocking variant is for genuinely synchronous contexts.
+:::
+
+## Disabling retry
+
+```rust
+let client = CyclesClient::builder("cyc_live_...", "http://localhost:7878")
+    .retry_enabled(false)
+    .build();
+```
+
+## Aggressive retry for critical commits
+
+```rust
+use std::time::Duration;
+
+let client = CyclesClient::builder("cyc_live_...", "http://localhost:7878")
+    .retry_max_attempts(10)
+    .retry_initial_delay(Duration::from_millis(200))
+    .retry_multiplier(1.5)
+    .retry_max_delay(Duration::from_secs(60))
+    .build();
+```
+
+## CyclesClientBuilder method reference
+
+| Method | Sets | Notes |
+|---|---|---|
+| `new(api_key, base_url)` | required fields | The constructor; both args are `impl Into<String>` |
+| `.tenant(s)` | subject default | All subject methods accept `impl Into<String>` |
+| `.workspace(s)` | subject default | |
+| `.app(s)` | subject default | |
+| `.workflow(s)` | subject default | |
+| `.agent(s)` | subject default | |
+| `.toolset(s)` | subject default | |
+| `.connect_timeout(d)` | HTTP | Takes `std::time::Duration` |
+| `.read_timeout(d)` | HTTP | Takes `std::time::Duration` |
+| `.retry_enabled(b)` | retry | Toggle the retry engine |
+| `.retry_max_attempts(n)` | retry | |
+| `.retry_initial_delay(d)` | retry | Takes `std::time::Duration` |
+| `.retry_multiplier(f)` | retry | Takes `f64` |
+| `.retry_max_delay(d)` | retry | Takes `std::time::Duration` |
+| `.http_client(c)` | HTTP | Provide a custom `reqwest::Client`; overrides timeouts |
+| `.build()` | finalizes | Returns `CyclesClient` (async) |
+| `.build_blocking()` | finalizes | Returns `BlockingCyclesClient`; requires the `blocking` feature |
+
+## Next steps
+
+- [Rust Client Quickstart](/quickstart/getting-started-with-the-rust-client) — installation and first [reservation](/glossary#reservation)
+- [Error Handling in Rust](/how-to/error-handling-patterns-in-rust) — retry, recovery, and [graceful degradation](/glossary#graceful-degradation)
+- [Integrating Cycles with Rust](/how-to/integrating-cycles-with-rust) — multi-step flows, streaming, framework integration
+- [Server Configuration Reference](/configuration/server-configuration-reference-for-cycles) — server-side properties
+- [How Reserve-Commit Works](/protocol/how-reserve-commit-works-in-cycles) — the underlying lifecycle
diff --git a/how-to/index.md b/how-to/index.md
index a18a5d86..f8ecff1f 100644
--- a/how-to/index.md
+++ b/how-to/index.md
@@ -18,7 +18,7 @@ Recipes for common tasks with Cycles, organized by what you're trying to do.
 
 ### LLM providers
 
-[OpenAI (Python)](/how-to/integrating-cycles-with-openai) · [OpenAI (TypeScript)](/how-to/integrating-cycles-with-openai-typescript) · [Anthropic (Python)](/how-to/integrating-cycles-with-anthropic) · [Anthropic (TypeScript)](/how-to/integrating-cycles-with-anthropic-typescript) · [AWS Bedrock](/how-to/integrating-cycles-with-aws-bedrock) · [Google Gemini](/how-to/integrating-cycles-with-google-gemini) · [Groq](/how-to/integrating-cycles-with-groq) · [Ollama / local LLMs](/how-to/integrating-cycles-with-ollama)
+[OpenAI (Python)](/how-to/integrating-cycles-with-openai) · [OpenAI (TypeScript)](/how-to/integrating-cycles-with-openai-typescript) · [OpenAI (Rust / async-openai)](/how-to/integrating-cycles-with-async-openai) · [Anthropic (Python)](/how-to/integrating-cycles-with-anthropic) · [Anthropic (TypeScript)](/how-to/integrating-cycles-with-anthropic-typescript) · [AWS Bedrock](/how-to/integrating-cycles-with-aws-bedrock) · [Google Gemini](/how-to/integrating-cycles-with-google-gemini) · [Groq](/how-to/integrating-cycles-with-groq) · [Ollama / local LLMs](/how-to/integrating-cycles-with-ollama)
 
 ### Agent frameworks
 
diff --git a/how-to/integrating-cycles-with-async-openai.md b/how-to/integrating-cycles-with-async-openai.md
new file mode 100644
index 00000000..4d03d0ba
--- /dev/null
+++ b/how-to/integrating-cycles-with-async-openai.md
@@ -0,0 +1,374 @@
+---
+title: "Integrate Cycles with async-openai (Rust)"
+description: "Wrap async-openai chat completions with Cycles reserve-commit: budget gates before the request, real token usage on commit, streaming, error mapping."
+head:
+  - - meta
+    - name: keywords
+      content: "async-openai cycles, rust openai budget control, runcycles openai, rust llm reserve commit, async-openai streaming budget, openai rust cost tracking, anthropic-sdk-rs cycles"
+---
+
+# Integrate Cycles with async-openai (Rust)
+
+The [Rust quickstart](/quickstart/getting-started-with-the-rust-client) and [Rust integration guide](/how-to/integrating-cycles-with-rust) both use a `call_llm()` placeholder where a real OpenAI call should go. This page fills that gap: it shows how `runcycles` composes with [`async-openai`](https://crates.io/crates/async-openai) (the dominant Rust client for the OpenAI API) for chat completions, streaming, and token-accurate commits.
+
+The same pattern transfers to [`anthropic-sdk-rs`](https://crates.io/crates/anthropic-sdk) and other Rust LLM clients — only the type names change. See the [Anthropic variant](#anthropic-variant-anthropic-sdk-rs) section at the bottom.
+
+## What you get
+
+- `with_cycles()` wrapping a real OpenAI call, with `prompt_tokens + completion_tokens` flowing through to the commit
+- A `ReservationGuard` pattern for streaming chat completions where token counts are only known at the end of the stream
+- Error mapping that distinguishes OpenAI errors (network, rate limit, auth) from Cycles errors (budget exceeded, [reservation](/glossary#reservation) expired) so callers can act on each correctly
+- Token-to-USD conversion at commit time for spend-denominated budgets
+
+## Cargo.toml
+
+```toml
+[dependencies]
+runcycles = "0.2"
+async-openai = "0.30"          # check crates.io for the current version
+tokio = { version = "1", features = ["full"] }
+futures = "0.3"                # for stream consumption
+```
+
+`async-openai` major versions change occasionally. The shape of the API (chat completions, usage, streaming) has been stable for several minor releases; the type paths in this guide are accurate to the 0.30.x line at publication. If you pin a different version, the names below may need a small adjustment.
+
+## The basic pattern: with_cycles + chat completions
+
+```rust
+use async_openai::{
+    Client,
+    types::{CreateChatCompletionRequestArgs, ChatCompletionRequestUserMessageArgs, Role},
+};
+use runcycles::{
+    CyclesClient, with_cycles, WithCyclesConfig,
+    models::{Amount, Subject, CyclesMetrics},
+};
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let cycles = CyclesClient::builder("cyc_live_...", "http://localhost:7878")
+        .tenant("acme-corp")
+        .build();
+    let openai = Client::new();
+
+    let prompt = "Summarize the runcycles crate in one sentence.";
+
+    let reply = with_cycles(
+        &cycles,
+        WithCyclesConfig::new(Amount::tokens(1_500))
+            .action("llm.completion", "gpt-4o-mini")
+            .subject(Subject {
+                tenant: Some("acme-corp".into()),
+                ..Default::default()
+            }),
+        |_ctx| async move {
+            let request = CreateChatCompletionRequestArgs::default()
+                .model("gpt-4o-mini")
+                .max_tokens(800u32)
+                .messages([ChatCompletionRequestUserMessageArgs::default()
+                    .content(prompt)
+                    .build()?
+                    .into()])
+                .build()?;
+
+            let response = openai.chat().create(request).await?;
+
+            let text = response
+                .choices
+                .first()
+                .and_then(|c| c.message.content.clone())
+                .unwrap_or_default();
+
+            // usage is `Option<CompletionUsage>`; treat missing as zero
+            let actual = response
+                .usage
+                .map(|u| u.total_tokens as i64)
+                .unwrap_or(0);
+
+            Ok((text, Amount::tokens(actual)))
+        },
+    )
+    .await?;
+
+    println!("{reply}");
+    Ok(())
+}
+```
+
+### What's happening
+
+| Step | What runs | What is recorded |
+|---|---|---|
+| Before the closure | Cycles reserves `1_500` [tokens](/glossary#tokens) against the session subject | Reservation created, decision evaluated |
+| Inside the closure | `openai.chat().create(request)` issues the actual API call | OpenAI bills your account for the real usage |
+| Return value | `(text, Amount::tokens(actual_total))` | The actual `total_tokens` becomes the commit amount |
+| After the closure | Cycles commits `actual` tokens, releases the unused reservation | Final spend recorded; the reservation lifecycle closes |
+
+If `openai.chat().create()` returns `Err`, the closure returns `Err` and the reservation is released — no commit, no false spend record.
+
+## Capping max_tokens from `ALLOW_WITH_CAPS`
+
+When Cycles returns `ALLOW_WITH_CAPS`, the `GuardContext` carries the server's cap suggestions. Apply them to the OpenAI request before issuing it:
+
+```rust
+let reply = with_cycles(
+    &cycles,
+    WithCyclesConfig::new(Amount::tokens(1_500))
+        .action("llm.completion", "gpt-4o-mini")
+        .subject(Subject { tenant: Some("acme-corp".into()), ..Default::default() }),
+    |ctx| async move {
+        // Default ceiling; override if Cycles capped lower
+        let mut max_tokens: u32 = 800;
+        if let Some(caps) = &ctx.caps {
+            if let Some(cap) = caps.max_tokens {
+                max_tokens = (cap as u32).min(max_tokens);
+            }
+        }
+
+        let request = CreateChatCompletionRequestArgs::default()
+            .model("gpt-4o-mini")
+            .max_tokens(max_tokens)
+            .messages([ChatCompletionRequestUserMessageArgs::default()
+                .content(prompt)
+                .build()?
+                .into()])
+            .build()?;
+
+        let response = openai.chat().create(request).await?;
+        let actual = response.usage.map(|u| u.total_tokens as i64).unwrap_or(0);
+        let text = response.choices.first().and_then(|c| c.message.content.clone()).unwrap_or_default();
+        Ok((text, Amount::tokens(actual)))
+    },
+).await?;
+```
+
+`caps.tool_allowlist` and `caps.tool_denylist` follow the same shape — if you wire OpenAI's function-calling tools, use those caps to filter your tool list before passing it to the request builder. See [Caps and the Three-Way Decision Model](/protocol/caps-and-the-three-way-decision-model-in-cycles) for the full cap surface.
+
+## Streaming: ReservationGuard with stream consumption
+
+Streaming chat completions return tokens one chunk at a time. The total token count is only known after the stream ends, which means `with_cycles()` (which expects the closure to return both the value and the actual cost in one go) is not the right primitive. Use a `ReservationGuard` instead:
+
+```rust
+use async_openai::{
+    Client,
+    types::{CreateChatCompletionRequestArgs, ChatCompletionRequestUserMessageArgs},
+};
+use futures::StreamExt;
+use runcycles::{
+    CyclesClient,
+    models::{
+        Amount, Subject, Action, ReservationCreateRequest, CommitRequest, CyclesMetrics,
+    },
+};
+
+let openai = Client::new();
+
+let guard = cycles.reserve(
+    ReservationCreateRequest::builder()
+        .subject(Subject { tenant: Some("acme-corp".into()), ..Default::default() })
+        .action(Action::new("llm.completion", "gpt-4o-mini"))
+        .estimate(Amount::tokens(2_000))
+        .ttl_ms(60_000_u64)
+        .build()
+).await?;
+
+// Apply caps before building the request
+let mut max_tokens: u32 = 1_500;
+if let Some(caps) = guard.caps() {
+    if let Some(cap) = caps.max_tokens {
+        max_tokens = (cap as u32).min(max_tokens);
+    }
+}
+
+let request = CreateChatCompletionRequestArgs::default()
+    .model("gpt-4o-mini")
+    .max_tokens(max_tokens)
+    .messages([ChatCompletionRequestUserMessageArgs::default()
+        .content(prompt)
+        .build()?
+        .into()])
+    .stream(true)
+    .build()?;
+
+let mut stream = openai.chat().create_stream(request).await?;
+
+let mut full_text = String::new();
+let mut final_usage_tokens: i64 = 0;
+
+while let Some(chunk_result) = stream.next().await {
+    let chunk = chunk_result?;
+    for choice in chunk.choices {
+        if let Some(content) = choice.delta.content {
+            full_text.push_str(&content);
+        }
+    }
+    // Some OpenAI-compatible providers stream a final usage chunk; the official
+    // OpenAI API streams usage only when `stream_options.include_usage = true`
+    // is set on the request. Capture it when present:
+    if let Some(usage) = chunk.usage {
+        final_usage_tokens = usage.total_tokens as i64;
+    }
+}
+
+// If usage wasn't streamed, fall back to a tokenizer estimate or a follow-up
+// non-streaming /v1/chat/completions call. For most production setups, set
+// `stream_options.include_usage = true` so the stream reports usage itself.
+if final_usage_tokens == 0 {
+    final_usage_tokens = estimate_tokens_with_tiktoken(&prompt, &full_text);
+}
+
+guard.commit(
+    CommitRequest::builder()
+        .actual(Amount::tokens(final_usage_tokens))
+        .metrics(CyclesMetrics {
+            tokens_output: Some(final_usage_tokens),
+            ..Default::default()
+        })
+        .build()
+).await?;
+```
+
+### Why the guard, not `with_cycles`
+
+`with_cycles()` evaluates the closure to a `(value, actual_cost)` tuple in one synchronous return. Streaming requires you to drive the stream to completion (which can take seconds), then commit the total. The guard exposes that lifecycle as two explicit steps — reserve before the stream begins, commit after it ends.
+
+If the stream errors midway (network failure, rate limit, content policy violation), drop the guard or call `guard.release(...).await?` — the reservation is returned to the pool. The guard's `Drop` implementation provides best-effort release on panic / early `?` return, but explicit release with a reason code is preferred for clean audit records.
+
+## Error handling: separating OpenAI errors from Cycles errors
+
+`async-openai` returns `OpenAIError`; Cycles returns `runcycles::Error`. Callers usually want to act on these differently:
+
+- **OpenAI errors** — rate-limit retries with backoff, model fallback (gpt-4o → gpt-4o-mini), prompt resubmission.
+- **Cycles errors** — [graceful degradation](/glossary#graceful-degradation) to a smaller model, deferred response, "budget exhausted" UX.
+
+A clean way to keep both inside `with_cycles()`:
+
+```rust
+use async_openai::error::OpenAIError;
+use runcycles::Error as CyclesError;
+
+#[derive(Debug)]
+enum CompletionError {
+    OpenAi(OpenAIError),
+    Cycles(CyclesError),
+}
+
+impl std::fmt::Display for CompletionError { /* impl */ }
+impl std::error::Error for CompletionError {}
+
+impl From<OpenAIError> for CompletionError {
+    fn from(e: OpenAIError) -> Self { CompletionError::OpenAi(e) }
+}
+impl From<CyclesError> for CompletionError {
+    fn from(e: CyclesError) -> Self { CompletionError::Cycles(e) }
+}
+
+let result: Result<(String, Amount), Box<dyn std::error::Error>> = with_cycles(
+    &cycles,
+    WithCyclesConfig::new(Amount::tokens(1_500))
+        .action("llm.completion", "gpt-4o-mini")
+        .subject(Subject { tenant: Some("acme-corp".into()), ..Default::default() }),
+    |_ctx| async move {
+        let response = openai.chat().create(request).await
+            .map_err(CompletionError::from)?;
+        let actual = response.usage.map(|u| u.total_tokens as i64).unwrap_or(0);
+        let text = response.choices.first().and_then(|c| c.message.content.clone()).unwrap_or_default();
+        Ok((text, Amount::tokens(actual)))
+    },
+).await;
+
+match result {
+    Ok((text, _)) => println!("{text}"),
+    Err(e) => {
+        // Walk the error chain to recover the typed variant
+        if let Some(CompletionError::OpenAi(oe)) = e.downcast_ref::<CompletionError>() {
+            // backoff / retry / fallback model
+        } else if let Some(CyclesError::BudgetExceeded { retry_after, .. }) = e.downcast_ref::<CyclesError>() {
+            // graceful degradation
+        }
+    }
+}
+```
+
+The Cycles error types and their convenience methods (`is_retryable`, `is_budget_exceeded`, `retry_after`) are covered in [Error Handling in Rust](/how-to/error-handling-patterns-in-rust).
+
+## Token-to-USD: when your budget is denominated in dollars, not tokens
+
+If the budget unit is `USD_MICROCENTS` rather than `TOKENS`, convert from the response usage at commit time:
+
+```rust
+fn tokens_to_microcents(prompt_tokens: u32, completion_tokens: u32, model: &str) -> u64 {
+    // Rates expressed as microcents per million tokens (1 cent = 10_000 microcents).
+    // The numbers below illustrate; pin yours to the provider's current pricing
+    // page and bump them as a release task — model rates change.
+    let (input_per_million_microcents, output_per_million_microcents) = match model {
+        "gpt-4o-mini" => (1_500_000, 6_000_000),   // illustrative
+        "gpt-4o"      => (2_500_000, 10_000_000),  // illustrative
+        _             => (1_500_000, 6_000_000),
+    };
+    let input  = (prompt_tokens as u64)     * input_per_million_microcents  / 1_000_000;
+    let output = (completion_tokens as u64) * output_per_million_microcents / 1_000_000;
+    input + output
+}
+
+// Inside the with_cycles closure:
+let usage = response.usage.unwrap_or_default();
+let microcents = tokens_to_microcents(usage.prompt_tokens, usage.completion_tokens, "gpt-4o-mini");
+Ok((text, Amount::usd_microcents(microcents as i64)))
+```
+
+Keeping the rate table in one helper makes provider rate changes a single-edit fix. For multi-provider deployments, hoist it to your shared `costs` module.
+
+For the canonical breakdown of provider rates and the cost-estimation patterns used elsewhere in the corpus, see [Cost Estimation Cheat Sheet](/how-to/cost-estimation-cheat-sheet).
+
+## Anthropic variant (`anthropic-sdk-rs`)
+
+The same composition works against Anthropic with one or two name changes:
+
+```rust
+use anthropic_sdk::{Client, MessagesRequest, Role};
+use runcycles::{with_cycles, WithCyclesConfig, models::{Amount, Subject}};
+
+let anthropic = Client::new();
+
+let reply = with_cycles(
+    &cycles,
+    WithCyclesConfig::new(Amount::tokens(1_500))
+        .action("llm.completion", "claude-3-5-sonnet")
+        .subject(Subject { tenant: Some("acme-corp".into()), ..Default::default() }),
+    |_ctx| async move {
+        let request = MessagesRequest::new("claude-3-5-sonnet-20241022")
+            .max_tokens(800)
+            .message(Role::User, prompt);
+
+        let response = anthropic.messages().create(request).await?;
+        let text = response.content_text();
+        let actual = (response.usage.input_tokens + response.usage.output_tokens) as i64;
+
+        Ok((text, Amount::tokens(actual)))
+    },
+).await?;
+```
+
+The Anthropic crates ecosystem is smaller than `async-openai`'s — exact crate names and APIs change. Pin to the crate version you're using and adapt the type paths above. The reserve-commit lifecycle is unchanged.
+
+## Common gotchas
+
+1. **Streaming without `include_usage` reports zero tokens.** OpenAI's official streaming endpoint emits usage only when `stream_options.include_usage = true` is set on the request. Without it, you'll commit zero tokens and the budget will not reflect actual spend. Set the option, or fall back to a tokenizer estimate.
+
+2. **`response.usage` is `Option`.** Some compatible servers (Ollama, vLLM, certain LiteLLM configs) don't return usage. Treat `None` as "estimate it locally" rather than "no spend."
+
+3. **`response.choices[0].message.content` can be `None`** when the model returns a tool-call or refusal. Handle the `None` case (commit zero or commit the prompt-token cost only) rather than unwrapping.
+
+4. **Don't include the OpenAI API key in the Cycles reservation metadata.** Cycles records actions, not credentials. If you're tagging the reservation with provider info, use the action name (`gpt-4o-mini`) — never the key.
+
+5. **Mismatched async runtimes.** `async-openai` uses `tokio`; the blocking `runcycles` variant requires not being inside a Tokio runtime. Pick one — for most LLM workloads, the async client is correct.
+
+## Next steps
+
+- [Rust Client Quickstart](/quickstart/getting-started-with-the-rust-client) — the lifecycle this page composes against
+- [Integrating Cycles with Rust](/how-to/integrating-cycles-with-rust) — broader integration patterns (multi-step, framework middleware)
+- [Error Handling in Rust](/how-to/error-handling-patterns-in-rust) — retry, backoff, graceful degradation
+- [Rust Client Configuration Reference](/configuration/rust-client-configuration-reference) — full config surface
+- [Cost Estimation Cheat Sheet](/how-to/cost-estimation-cheat-sheet) — token-to-dollar mapping across providers
+- [How Reserve-Commit Works](/protocol/how-reserve-commit-works-in-cycles) — the underlying lifecycle
diff --git a/how-to/integrating-cycles-with-rust.md b/how-to/integrating-cycles-with-rust.md
index 14976cb9..4a7fd018 100644
--- a/how-to/integrating-cycles-with-rust.md
+++ b/how-to/integrating-cycles-with-rust.md
@@ -32,6 +32,8 @@ export CYCLES_TENANT="acme"
 
 > **Need an API key?** Create one via the Admin Server — see [Deploy the Full Stack](/quickstart/deploying-the-full-cycles-stack#step-3-create-an-api-key) or [API Key Management](/how-to/api-key-management-in-cycles).
 
+> **Looking for a real LLM example?** The `openai_call` / `stream_llm_response` placeholders below are intentionally generic. For the concrete `async-openai` wiring — including streaming token capture and error mapping — see [Integrate Cycles with async-openai (Rust)](/how-to/integrating-cycles-with-async-openai).
+
 ## Quick start
 
 ```rust
diff --git a/quickstart/getting-started-with-the-rust-client.md b/quickstart/getting-started-with-the-rust-client.md
index ffeef29b..585b8cca 100644
--- a/quickstart/getting-started-with-the-rust-client.md
+++ b/quickstart/getting-started-with-the-rust-client.md
@@ -118,6 +118,8 @@ let config = CyclesConfig::from_env().expect("missing CYCLES_ env vars");
 let client = CyclesClient::new(config);
 ```
 
+For the complete configuration surface — retry tuning, custom `reqwest::Client`, blocking variant, env var prefix customization — see the [Rust Client Configuration Reference](/configuration/rust-client-configuration-reference).
+
 ## Automatic lifecycle with `with_cycles()`
 
 The simplest way to add budget enforcement — wrap any async operation:
@@ -435,6 +437,9 @@ For each `with_cycles()` call or `ReservationGuard`:
 
 ## Next steps
 
-- [Error Handling Patterns](/how-to/error-handling-patterns-in-cycles-client-code) — general error handling patterns
+- [Integrate Cycles with async-openai (Rust)](/how-to/integrating-cycles-with-async-openai) — replaces the `call_llm()` placeholders above with a real OpenAI chat completion, including streaming
+- [Rust Client Configuration Reference](/configuration/rust-client-configuration-reference) — full config surface, retry tuning, custom `reqwest::Client`, blocking variant
+- [Error Handling in Rust](/how-to/error-handling-patterns-in-rust) — Rust-specific error patterns, retries, RAII safety, graceful degradation
+- [Integrating Cycles with Rust](/how-to/integrating-cycles-with-rust) — broader integration patterns (multi-step flows, framework middleware)
 - [API Reference](/api/) — interactive endpoint documentation
-- [How Reserve/Commit Works](/protocol/how-reserve-commit-works-in-cycles) — the core protocol lifecycle
+- [How Reserve-Commit Works](/protocol/how-reserve-commit-works-in-cycles) — the core protocol lifecycle

From 1fe86b0b8892a2aafd7e46bca5d36ce330dbfb85 Mon Sep 17 00:00:00 2001
From: Albert Mavashev <amavashev@k2n.io>
Date: Sat, 16 May 2026 11:16:25 -0400
Subject: [PATCH 2/3] docs(rust): apply codex round-1 review (REVISE-MAJOR
 fixes)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Codex flagged substantial code-accuracy errors that would have produced
non-compiling examples. Verified each fix against the live cycles-client-rust
source (src/config.rs, src/blocking.rs, src/lifecycle.rs) and async-openai
0.30.x docs.

Apply/skip tally: 12 applied, 0 pushed back.

Config reference fixes:
- Retry section: I described the retry engine as if it ships today.
  In runcycles 0.2.x, retry.rs is dead-code and ReservationGuard::commit
  does not retry. Reframed the section as "fields present on the struct
  for a future engine; no runtime effect in 0.2.x." Removed the
  "Disabling retry" and "Aggressive retry" examples that used builder
  methods (retry_initial_delay/multiplier/max_delay) that do not exist
  on CyclesClientBuilder.
- Programmatic CyclesConfig example: removed `..Default::default()`
  (CyclesConfig does not implement Default). Spell out all fields, or
  use the builder.
- Subject defaults: my "Resolution order" section claimed config-level
  subject defaults are auto-applied to request subjects. Verified
  against lifecycle.rs: with_cycles uses WithCyclesConfig.subject (or
  Subject::default()), NOT the CyclesConfig subject. Rewrote the
  section to clarify that subject fields are stored on the config but
  not auto-applied; recommended building one Subject and reusing it.
- Blocking client: I said it "mirrors the async client's surface."
  Verified against blocking.rs: only low-level protocol methods are
  exposed (create_reservation, commit_reservation, release_reservation,
  extend_reservation, decide, create_event, list_reservations,
  get_reservation, get_balances). No with_cycles, no ReservationGuard.
  Updated the section to list what is actually available.
- Blocking snippet: added missing `?` after `BlockingCyclesClient::new(...)`
  (it returns `Result<Self, Error>`) and imported `BalanceParams`.
- Builder method reference table: removed retry_initial_delay,
  retry_multiplier, retry_max_delay (not on builder); noted these are
  reachable only by constructing CyclesConfig directly.
- Removed "Unlike most clients in the corpus" internal-review tone.

async-openai doc fixes:
- "dominant Rust client" → "widely used".
- "only the type names change" → reframed as not-portable (the
  provider-side type paths differ in meaningful ways).
- Streaming code: I described needing
  `stream_options.include_usage = true` but never set it on the request.
  Added `.stream_options(ChatCompletionStreamOptions { include_usage: true })`
  to the request builder, imported the type.
- Error handling section: completely rewrote. with_cycles wraps closure
  errors as `Error::Validation(format!("guarded function failed: {e}"))`
  — the typed OpenAIError is stringified and lost. Switched the
  error-aware pattern to ReservationGuard so the typed OpenAIError
  reaches the caller. Added explicit guidance on when to use
  with_cycles vs ReservationGuard.
- Anthropic section: demoted from concrete code (using crate API names
  I had not verified) to a brief "Other Rust LLM clients" note that
  describes what to adapt without making specific API claims about
  anthropic-sdk-rust.
- Streaming usage setup moved INTO the code block (was only mentioned
  in the gotchas section).
- Glossary anchor `#tokens` → `#token` (the glossary heading is
  singular).

Other:
- how-to/integrating-cycles-with-rust.md:283: updated stale link from
  /quickstart/getting-started-with-the-rust-client#configuration to
  /configuration/rust-client-configuration-reference.

Sources verified against cycles-client-rust HEAD (src/config.rs,
src/blocking.rs, src/lifecycle.rs) and async-openai 0.30.1 docs.rs.
---
 .../rust-client-configuration-reference.md    | 119 +++++-------
 .../integrating-cycles-with-async-openai.md   | 182 ++++++++++--------
 how-to/integrating-cycles-with-rust.md        |   2 +-
 3 files changed, 158 insertions(+), 145 deletions(-)

diff --git a/configuration/rust-client-configuration-reference.md b/configuration/rust-client-configuration-reference.md
index 9523f3df..3ad3dede 100644
--- a/configuration/rust-client-configuration-reference.md
+++ b/configuration/rust-client-configuration-reference.md
@@ -48,36 +48,24 @@ These fields set default Subject values applied to every request unless overridd
 
 ### Retry configuration
 
-Controls the commit retry engine for transient failures. The same engine runs in the async client and the blocking variant.
+The retry-related fields are present on `CyclesConfig` as configuration surface for a future automatic-retry engine; **the engine is not wired in `runcycles` 0.2.x.** Commit failures surface to the caller as `Error::Transport` or `Error::Api { status: 5xx, .. }` and the caller decides whether to retry. The fields below are documented so they are stable when the engine ships; setting them in 0.2.x has no runtime effect.
 
 | Field | Type | Default | Description |
 |---|---|---|---|
-| `retry_enabled` | `bool` | `true` | Enable automatic commit retries |
-| `retry_max_attempts` | `u32` | `5` | Maximum number of retry attempts |
-| `retry_initial_delay` | `Duration` | `500 ms` | Delay before the first retry |
-| `retry_multiplier` | `f64` | `2.0` | Exponential backoff multiplier between retries |
-| `retry_max_delay` | `Duration` | `30_000 ms` | Maximum delay between retries (cap) |
+| `retry_enabled` | `bool` | `true` | Future: enable automatic commit retries |
+| `retry_max_attempts` | `u32` | `5` | Future: maximum number of retry attempts |
+| `retry_initial_delay` | `Duration` | `500 ms` | Future: delay before the first retry |
+| `retry_multiplier` | `f64` | `2.0` | Future: exponential backoff multiplier between retries |
+| `retry_max_delay` | `Duration` | `30_000 ms` | Future: maximum delay between retries (cap) |
 
-#### How retry works
-
-When a commit fails with a transport error or a 5xx response, the retry engine schedules a retry using exponential backoff:
-
-```
-Attempt 1: wait 500 ms
-Attempt 2: wait 1_000 ms
-Attempt 3: wait 2_000 ms
-Attempt 4: wait 4_000 ms
-Attempt 5: wait 8_000 ms (capped at retry_max_delay if smaller)
-```
-
-Non-retryable errors (4xx responses other than 429, validation failures, deserialization errors) are not retried. `BudgetExceeded` carries a server-suggested `retry_after` that callers can apply manually.
+For now, callers wrap commits in their own retry loop using the `Error::is_retryable()` / `retry_after()` convenience methods. See [Error Handling in Rust](/how-to/error-handling-patterns-in-rust) for the in-app pattern. `BudgetExceeded` carries a server-suggested `retry_after` regardless of whether the engine is wired.
 
 ## Programmatic configuration
 
-The builder API is the recommended way to construct a client:
+The builder API is the recommended way to construct a client. The builder exposes the fields that callers tune in practice — connection settings, subject defaults, and the retry-enabled toggle. The remaining retry-engine fields (initial delay, multiplier, max delay) are configured by constructing `CyclesConfig` directly when the future engine lands; they are not yet on the builder.
 
 ```rust
-use runcycles::{CyclesClient, CyclesConfig};
+use runcycles::CyclesClient;
 use std::time::Duration;
 
 let client = CyclesClient::builder(
@@ -91,13 +79,10 @@ let client = CyclesClient::builder(
 .read_timeout(Duration::from_millis(5_000))
 .retry_enabled(true)
 .retry_max_attempts(5)
-.retry_initial_delay(Duration::from_millis(500))
-.retry_multiplier(2.0)
-.retry_max_delay(Duration::from_secs(30))
 .build();
 ```
 
-Or construct `CyclesConfig` directly and pass it to `CyclesClient::new`:
+To set the retry-engine fields that aren't on the builder, construct `CyclesConfig` directly:
 
 ```rust
 use runcycles::{CyclesClient, CyclesConfig};
@@ -107,6 +92,11 @@ let config = CyclesConfig {
     base_url: "http://localhost:7878".into(),
     api_key: "cyc_live_...".into(),
     tenant: Some("acme-corp".into()),
+    workspace: None,
+    app: None,
+    workflow: None,
+    agent: None,
+    toolset: None,
     connect_timeout: Duration::from_millis(2_000),
     read_timeout: Duration::from_millis(5_000),
     retry_enabled: true,
@@ -114,12 +104,13 @@ let config = CyclesConfig {
     retry_initial_delay: Duration::from_millis(500),
     retry_multiplier: 2.0,
     retry_max_delay: Duration::from_secs(30),
-    ..Default::default()
 };
 
 let client = CyclesClient::new(config);
 ```
 
+`CyclesConfig` does not implement `Default`; populate every field explicitly when constructing the struct directly, or use the builder.
+
 ## Environment variable configuration
 
 Use `CyclesConfig::from_env()` to load configuration from environment variables. The default prefix is `CYCLES_`:
@@ -149,7 +140,7 @@ let config = CyclesConfig::from_env().expect("missing required CYCLES_* env vars
 | `CYCLES_RETRY_MAX_DELAY` | `retry_max_delay` | milliseconds (integer) | No |
 
 ::: tip Custom env var prefix
-Unlike most clients in the corpus, the Rust client supports loading from a custom prefix. Useful when a single process runs multiple Cycles instances against different servers:
+The Rust client supports loading from a custom prefix, which is useful when a single process holds connections to multiple Cycles instances:
 
 ```rust
 let primary  = CyclesConfig::from_env_with_prefix("CYCLES_PRIMARY_")?;
@@ -159,14 +150,28 @@ let staging  = CyclesConfig::from_env_with_prefix("CYCLES_STAGING_")?;
 The default `from_env()` is equivalent to `from_env_with_prefix("CYCLES_")`.
 :::
 
-## Resolution order
+## Subject defaults: what they do (and don't)
+
+The subject fields on `CyclesConfig` (`tenant`, `workspace`, `app`, `workflow`, `agent`, `toolset`) are stored on the config and accessible via `client.config()`, but the high-level helpers in `runcycles` 0.2.x **do not automatically apply them** to the per-request `Subject`. Each `with_cycles()` / `client.reserve()` / `client.create_reservation()` call uses the `Subject` you pass in explicitly (or `Subject::default()` if you pass none).
+
+If you want a single tenant applied to every request, build the subject once and reuse it:
 
-For each Subject field, the request builder resolves values in this priority:
+```rust
+use runcycles::models::Subject;
+
+let subject = Subject {
+    tenant: Some("acme-corp".into()),
+    workspace: Some("production".into()),
+    ..Default::default()
+};
 
-1. **Per-call value** — passed explicitly to the request builder (e.g. `Subject { tenant: Some("override".into()), .. }`)
-2. **Config default** — set on the `CyclesConfig` / builder
+// Pass the same subject to every WithCyclesConfig / ReservationCreateRequest
+let cfg = WithCyclesConfig::new(Amount::tokens(1_000))
+    .action("llm.completion", "gpt-4o-mini")
+    .subject(subject.clone());
+```
 
-If neither provides a value, the field is omitted from the request and the server applies its own defaults.
+Future versions of the crate may wire the config's subject defaults into request subjects automatically; this reference will be updated when that lands.
 
 ## Custom `reqwest::Client`
 
@@ -204,63 +209,41 @@ runcycles = { version = "0.2", features = ["blocking"] }
 ```
 
 ```rust
-use runcycles::{BlockingCyclesClient, CyclesConfig};
+use runcycles::{BlockingCyclesClient, CyclesConfig, models::BalanceParams};
 
-let client = BlockingCyclesClient::new(CyclesConfig::from_env()?);
+let client = BlockingCyclesClient::new(CyclesConfig::from_env()?)?;
 let resp = client.get_balances(&BalanceParams {
     tenant: Some("acme-corp".into()),
     ..Default::default()
 })?;
 ```
 
-The blocking client mirrors the async client's surface but uses `reqwest::blocking::Client` underneath. The reserve-commit lifecycle, retry engine, and error types are identical; only the await points are removed.
+The blocking client exposes only the **low-level protocol methods** — `create_reservation`, `commit_reservation`, `release_reservation`, `extend_reservation`, `decide`, `create_event`, `list_reservations`, `get_reservation`, `get_balances`. The high-level `with_cycles()` helper and the `ReservationGuard` RAII pattern are **async-only** in 0.2.x; blocking callers compose the reserve / commit / release sequence themselves.
 
 ::: warning Don't mix runtimes
 The blocking client must not be called from inside a Tokio runtime (it will block the executor). For most applications using `tokio::main`, the async client is correct. The blocking variant is for genuinely synchronous contexts.
 :::
 
-## Disabling retry
-
-```rust
-let client = CyclesClient::builder("cyc_live_...", "http://localhost:7878")
-    .retry_enabled(false)
-    .build();
-```
-
-## Aggressive retry for critical commits
-
-```rust
-use std::time::Duration;
-
-let client = CyclesClient::builder("cyc_live_...", "http://localhost:7878")
-    .retry_max_attempts(10)
-    .retry_initial_delay(Duration::from_millis(200))
-    .retry_multiplier(1.5)
-    .retry_max_delay(Duration::from_secs(60))
-    .build();
-```
-
 ## CyclesClientBuilder method reference
 
 | Method | Sets | Notes |
 |---|---|---|
 | `new(api_key, base_url)` | required fields | The constructor; both args are `impl Into<String>` |
-| `.tenant(s)` | subject default | All subject methods accept `impl Into<String>` |
-| `.workspace(s)` | subject default | |
-| `.app(s)` | subject default | |
-| `.workflow(s)` | subject default | |
-| `.agent(s)` | subject default | |
-| `.toolset(s)` | subject default | |
+| `.tenant(s)` | config subject default | All subject methods accept `impl Into<String>`. Stored on the config but not auto-applied to request subjects — see [Subject defaults](#subject-defaults-what-they-do-and-don-t). |
+| `.workspace(s)` | config subject default | |
+| `.app(s)` | config subject default | |
+| `.workflow(s)` | config subject default | |
+| `.agent(s)` | config subject default | |
+| `.toolset(s)` | config subject default | |
 | `.connect_timeout(d)` | HTTP | Takes `std::time::Duration` |
 | `.read_timeout(d)` | HTTP | Takes `std::time::Duration` |
-| `.retry_enabled(b)` | retry | Toggle the retry engine |
-| `.retry_max_attempts(n)` | retry | |
-| `.retry_initial_delay(d)` | retry | Takes `std::time::Duration` |
-| `.retry_multiplier(f)` | retry | Takes `f64` |
-| `.retry_max_delay(d)` | retry | Takes `std::time::Duration` |
+| `.retry_enabled(b)` | retry-future | Sets the field for the future retry engine; no runtime effect in 0.2.x |
+| `.retry_max_attempts(n)` | retry-future | Sets the field for the future retry engine |
 | `.http_client(c)` | HTTP | Provide a custom `reqwest::Client`; overrides timeouts |
 | `.build()` | finalizes | Returns `CyclesClient` (async) |
-| `.build_blocking()` | finalizes | Returns `BlockingCyclesClient`; requires the `blocking` feature |
+| `.build_blocking()` | finalizes | Returns `Result<BlockingCyclesClient, Error>`; requires the `blocking` feature |
+
+The `retry_initial_delay`, `retry_multiplier`, and `retry_max_delay` fields are reachable only by constructing `CyclesConfig` directly (see [Programmatic configuration](#programmatic-configuration)).
 
 ## Next steps
 
diff --git a/how-to/integrating-cycles-with-async-openai.md b/how-to/integrating-cycles-with-async-openai.md
index 4d03d0ba..b35252d2 100644
--- a/how-to/integrating-cycles-with-async-openai.md
+++ b/how-to/integrating-cycles-with-async-openai.md
@@ -9,15 +9,15 @@ head:
 
 # Integrate Cycles with async-openai (Rust)
 
-The [Rust quickstart](/quickstart/getting-started-with-the-rust-client) and [Rust integration guide](/how-to/integrating-cycles-with-rust) both use a `call_llm()` placeholder where a real OpenAI call should go. This page fills that gap: it shows how `runcycles` composes with [`async-openai`](https://crates.io/crates/async-openai) (the dominant Rust client for the OpenAI API) for chat completions, streaming, and token-accurate commits.
+The [Rust quickstart](/quickstart/getting-started-with-the-rust-client) and [Rust integration guide](/how-to/integrating-cycles-with-rust) both use a `call_llm()` placeholder where a real OpenAI call should go. This page fills that gap: it shows how `runcycles` composes with [`async-openai`](https://crates.io/crates/async-openai) (a widely used Rust client for the OpenAI API) for chat completions, streaming, and token-accurate commits.
 
-The same pattern transfers to [`anthropic-sdk-rs`](https://crates.io/crates/anthropic-sdk) and other Rust LLM clients — only the type names change. See the [Anthropic variant](#anthropic-variant-anthropic-sdk-rs) section at the bottom.
+The same lifecycle composes against other Rust LLM clients (Anthropic, Bedrock, local LLMs via Ollama) — the reserve-commit shape doesn't change. See the brief [Other Rust LLM clients](#other-rust-llm-clients) note at the bottom.
 
 ## What you get
 
 - `with_cycles()` wrapping a real OpenAI call, with `prompt_tokens + completion_tokens` flowing through to the commit
 - A `ReservationGuard` pattern for streaming chat completions where token counts are only known at the end of the stream
-- Error mapping that distinguishes OpenAI errors (network, rate limit, auth) from Cycles errors (budget exceeded, [reservation](/glossary#reservation) expired) so callers can act on each correctly
+- Error-aware patterns using `ReservationGuard` that preserve typed `OpenAIError` for the caller (`with_cycles()` wraps closure errors as `Error::Validation` and loses the original type)
 - Token-to-USD conversion at commit time for spend-denominated budgets
 
 ## Cargo.toml
@@ -99,7 +99,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
 
 | Step | What runs | What is recorded |
 |---|---|---|
-| Before the closure | Cycles reserves `1_500` [tokens](/glossary#tokens) against the session subject | Reservation created, decision evaluated |
+| Before the closure | Cycles reserves `1_500` [tokens](/glossary#token) against the request subject | Reservation created, decision evaluated |
 | Inside the closure | `openai.chat().create(request)` issues the actual API call | OpenAI bills your account for the real usage |
 | Return value | `(text, Amount::tokens(actual_total))` | The actual `total_tokens` becomes the commit amount |
 | After the closure | Cycles commits `actual` tokens, releases the unused reservation | Final spend recorded; the reservation lifecycle closes |
@@ -146,12 +146,18 @@ let reply = with_cycles(
 
 ## Streaming: ReservationGuard with stream consumption
 
-Streaming chat completions return tokens one chunk at a time. The total token count is only known after the stream ends, which means `with_cycles()` (which expects the closure to return both the value and the actual cost in one go) is not the right primitive. Use a `ReservationGuard` instead:
+Streaming chat completions return tokens one chunk at a time. The total token count is only known after the stream ends, which means `with_cycles()` (which expects the closure to return both the value and the actual cost in one go) is not the right primitive. Use a `ReservationGuard` instead.
+
+OpenAI's streaming endpoint emits a final `usage` chunk only when `stream_options.include_usage` is set on the request. Set it explicitly:
 
 ```rust
 use async_openai::{
     Client,
-    types::{CreateChatCompletionRequestArgs, ChatCompletionRequestUserMessageArgs},
+    types::{
+        CreateChatCompletionRequestArgs,
+        ChatCompletionRequestUserMessageArgs,
+        ChatCompletionStreamOptions,
+    },
 };
 use futures::StreamExt;
 use runcycles::{
@@ -188,6 +194,8 @@ let request = CreateChatCompletionRequestArgs::default()
         .build()?
         .into()])
     .stream(true)
+    // Required for the stream to emit a final usage chunk.
+    .stream_options(ChatCompletionStreamOptions { include_usage: true })
     .build()?;
 
 let mut stream = openai.chat().create_stream(request).await?;
@@ -202,17 +210,14 @@ while let Some(chunk_result) = stream.next().await {
             full_text.push_str(&content);
         }
     }
-    // Some OpenAI-compatible providers stream a final usage chunk; the official
-    // OpenAI API streams usage only when `stream_options.include_usage = true`
-    // is set on the request. Capture it when present:
+    // The final chunk carries usage when include_usage was set.
     if let Some(usage) = chunk.usage {
         final_usage_tokens = usage.total_tokens as i64;
     }
 }
 
-// If usage wasn't streamed, fall back to a tokenizer estimate or a follow-up
-// non-streaming /v1/chat/completions call. For most production setups, set
-// `stream_options.include_usage = true` so the stream reports usage itself.
+// Defensive fallback: if usage didn't arrive (some OpenAI-compatible
+// providers don't honor include_usage), estimate locally.
 if final_usage_tokens == 0 {
     final_usage_tokens = estimate_tokens_with_tiktoken(&prompt, &full_text);
 }
@@ -232,64 +237,107 @@ guard.commit(
 
 `with_cycles()` evaluates the closure to a `(value, actual_cost)` tuple in one synchronous return. Streaming requires you to drive the stream to completion (which can take seconds), then commit the total. The guard exposes that lifecycle as two explicit steps — reserve before the stream begins, commit after it ends.
 
-If the stream errors midway (network failure, rate limit, content policy violation), drop the guard or call `guard.release(...).await?` — the reservation is returned to the pool. The guard's `Drop` implementation provides best-effort release on panic / early `?` return, but explicit release with a reason code is preferred for clean audit records.
+If the stream errors midway (network failure, rate limit, content policy violation), call `guard.release(...).await?` — the reservation is returned to the pool with a reason code. The guard's `Drop` implementation provides best-effort release on panic / early `?` return, but explicit release with a reason code is preferred for clean audit records.
 
-## Error handling: separating OpenAI errors from Cycles errors
+## Error handling: preserving the OpenAI error type
 
 `async-openai` returns `OpenAIError`; Cycles returns `runcycles::Error`. Callers usually want to act on these differently:
 
 - **OpenAI errors** — rate-limit retries with backoff, model fallback (gpt-4o → gpt-4o-mini), prompt resubmission.
 - **Cycles errors** — [graceful degradation](/glossary#graceful-degradation) to a smaller model, deferred response, "budget exhausted" UX.
 
-A clean way to keep both inside `with_cycles()`:
+`with_cycles()` is *not* the right primitive for error-aware flows. Its closure must return `Result<(T, Amount), Box<dyn std::error::Error + Send + Sync>>`, and any closure error is wrapped as `runcycles::Error::Validation(format!("guarded function failed: {e}"))`. The original typed error is stringified into the message and lost — the caller cannot recover it.
+
+For flows that need to act on the typed `OpenAIError`, use `ReservationGuard` and keep the error visible to the caller:
 
 ```rust
-use async_openai::error::OpenAIError;
-use runcycles::Error as CyclesError;
+use async_openai::{
+    Client,
+    error::OpenAIError,
+    types::{CreateChatCompletionRequestArgs, ChatCompletionRequestUserMessageArgs},
+};
+use runcycles::{
+    CyclesClient, Error as CyclesError,
+    models::{
+        Amount, Subject, Action, ReservationCreateRequest, CommitRequest, ReleaseRequest,
+    },
+};
 
-#[derive(Debug)]
+#[derive(Debug, thiserror::Error)]
 enum CompletionError {
-    OpenAi(OpenAIError),
-    Cycles(CyclesError),
+    #[error(transparent)] OpenAi(#[from] OpenAIError),
+    #[error(transparent)] Cycles(#[from] CyclesError),
 }
 
-impl std::fmt::Display for CompletionError { /* impl */ }
-impl std::error::Error for CompletionError {}
+async fn run_completion(
+    cycles: &CyclesClient,
+    openai: &Client<async_openai::config::OpenAIConfig>,
+    prompt: &str,
+) -> Result<String, CompletionError> {
+    let guard = cycles.reserve(
+        ReservationCreateRequest::builder()
+            .subject(Subject { tenant: Some("acme-corp".into()), ..Default::default() })
+            .action(Action::new("llm.completion", "gpt-4o-mini"))
+            .estimate(Amount::tokens(1_500))
+            .build()
+    ).await?;
+
+    let request = CreateChatCompletionRequestArgs::default()
+        .model("gpt-4o-mini")
+        .max_tokens(800u32)
+        .messages([ChatCompletionRequestUserMessageArgs::default()
+            .content(prompt)
+            .build()?
+            .into()])
+        .build()?;
+
+    let response = match openai.chat().create(request).await {
+        Ok(r) => r,
+        Err(e) => {
+            // Release the reservation with a reason; preserve the typed OpenAI error
+            let _ = guard.release(
+                ReleaseRequest::new(Some(format!("openai_error: {e}")))
+            ).await;
+            return Err(e.into()); // OpenAIError flows to the caller
+        }
+    };
+
+    let text = response.choices.first()
+        .and_then(|c| c.message.content.clone())
+        .unwrap_or_default();
+    let actual = response.usage.map(|u| u.total_tokens as i64).unwrap_or(0);
 
-impl From<OpenAIError> for CompletionError {
-    fn from(e: OpenAIError) -> Self { CompletionError::OpenAi(e) }
-}
-impl From<CyclesError> for CompletionError {
-    fn from(e: CyclesError) -> Self { CompletionError::Cycles(e) }
+    guard.commit(
+        CommitRequest::builder()
+            .actual(Amount::tokens(actual))
+            .build()
+    ).await?;
+
+    Ok(text)
 }
+```
 
-let result: Result<(String, Amount), Box<dyn std::error::Error>> = with_cycles(
-    &cycles,
-    WithCyclesConfig::new(Amount::tokens(1_500))
-        .action("llm.completion", "gpt-4o-mini")
-        .subject(Subject { tenant: Some("acme-corp".into()), ..Default::default() }),
-    |_ctx| async move {
-        let response = openai.chat().create(request).await
-            .map_err(CompletionError::from)?;
-        let actual = response.usage.map(|u| u.total_tokens as i64).unwrap_or(0);
-        let text = response.choices.first().and_then(|c| c.message.content.clone()).unwrap_or_default();
-        Ok((text, Amount::tokens(actual)))
-    },
-).await;
-
-match result {
-    Ok((text, _)) => println!("{text}"),
-    Err(e) => {
-        // Walk the error chain to recover the typed variant
-        if let Some(CompletionError::OpenAi(oe)) = e.downcast_ref::<CompletionError>() {
-            // backoff / retry / fallback model
-        } else if let Some(CyclesError::BudgetExceeded { retry_after, .. }) = e.downcast_ref::<CyclesError>() {
-            // graceful degradation
-        }
+At the call site, the typed branches are now available:
+
+```rust
+match run_completion(&cycles, &openai, prompt).await {
+    Ok(text) => println!("{text}"),
+    Err(CompletionError::OpenAi(_e)) => {
+        // backoff / retry / fallback model
+    }
+    Err(CompletionError::Cycles(CyclesError::BudgetExceeded { retry_after, .. })) => {
+        // graceful degradation — defer, downsize model, return cached response
+        let _ = retry_after;
+    }
+    Err(CompletionError::Cycles(other)) => {
+        // log and surface
+        eprintln!("cycles error: {other}");
     }
 }
 ```
 
+Use `with_cycles()` when the caller doesn't need to distinguish the underlying error type — for fire-and-forget background tasks, scripts, or higher-level orchestrators that uniformly retry on any failure. Switch to `ReservationGuard` whenever the caller needs to branch on the actual error.
+
 The Cycles error types and their convenience methods (`is_retryable`, `is_budget_exceeded`, `retry_after`) are covered in [Error Handling in Rust](/how-to/error-handling-patterns-in-rust).
 
 ## Token-to-USD: when your budget is denominated in dollars, not tokens
@@ -321,36 +369,18 @@ Keeping the rate table in one helper makes provider rate changes a single-edit f
 
 For the canonical breakdown of provider rates and the cost-estimation patterns used elsewhere in the corpus, see [Cost Estimation Cheat Sheet](/how-to/cost-estimation-cheat-sheet).
 
-## Anthropic variant (`anthropic-sdk-rs`)
-
-The same composition works against Anthropic with one or two name changes:
+## Other Rust LLM clients
 
-```rust
-use anthropic_sdk::{Client, MessagesRequest, Role};
-use runcycles::{with_cycles, WithCyclesConfig, models::{Amount, Subject}};
-
-let anthropic = Client::new();
+The reserve-commit shape is the same for any Rust LLM client. The four things you need to adapt to a new provider:
 
-let reply = with_cycles(
-    &cycles,
-    WithCyclesConfig::new(Amount::tokens(1_500))
-        .action("llm.completion", "claude-3-5-sonnet")
-        .subject(Subject { tenant: Some("acme-corp".into()), ..Default::default() }),
-    |_ctx| async move {
-        let request = MessagesRequest::new("claude-3-5-sonnet-20241022")
-            .max_tokens(800)
-            .message(Role::User, prompt);
+1. **The request builder type** — `CreateChatCompletionRequestArgs` for async-openai, `MessageCreateBuilder` / `MessageCreateParams` for Anthropic's `anthropic-sdk-rust`, the provider-specific equivalent elsewhere.
+2. **The call method** — `client.chat().create(req)` for async-openai; consult the provider crate's docs for the equivalent.
+3. **The response usage extraction** — `response.usage.map(|u| u.total_tokens as i64)` for async-openai; Anthropic returns `input_tokens` + `output_tokens` separately on its response usage object; check the crate.
+4. **The model name in the action label** — `.action("llm.completion", "claude-3-5-sonnet-20241022")` rather than `"gpt-4o-mini"`.
 
-        let response = anthropic.messages().create(request).await?;
-        let text = response.content_text();
-        let actual = (response.usage.input_tokens + response.usage.output_tokens) as i64;
-
-        Ok((text, Amount::tokens(actual)))
-    },
-).await?;
-```
+Pin to the specific crate version you're using and verify each of those four points against its current docs before copy-pasting. The Rust Anthropic ecosystem in particular has churn across crate names and major versions; the reserve-commit lifecycle is unchanged, but the provider-side type paths are not portable.
 
-The Anthropic crates ecosystem is smaller than `async-openai`'s — exact crate names and APIs change. Pin to the crate version you're using and adapt the type paths above. The reserve-commit lifecycle is unchanged.
+The [`Error Handling in Rust`](/how-to/error-handling-patterns-in-rust) patterns apply to all providers — the typed `OpenAIError` branch above becomes a typed `AnthropicError` (or equivalent) branch for the other crate.
 
 ## Common gotchas
 
diff --git a/how-to/integrating-cycles-with-rust.md b/how-to/integrating-cycles-with-rust.md
index 4a7fd018..69a736fd 100644
--- a/how-to/integrating-cycles-with-rust.md
+++ b/how-to/integrating-cycles-with-rust.md
@@ -280,7 +280,7 @@ let client = CyclesClient::builder("cyc_live_abc123", "http://localhost:7878")
     .build();
 ```
 
-See [Rust Client Configuration](/quickstart/getting-started-with-the-rust-client#configuration) for all options.
+See the [Rust Client Configuration Reference](/configuration/rust-client-configuration-reference) for the full surface.
 
 ## Blocking client
 

From fb88316ee1177ac285aed35f18fac82a1ebeb430 Mon Sep 17 00:00:00 2001
From: Albert Mavashev <amavashev@k2n.io>
Date: Sat, 16 May 2026 11:23:29 -0400
Subject: [PATCH 3/3] docs(rust): apply codex round-2 review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Apply/skip tally: 5 applied, 0 pushed back.

Applied:
- Config reference line 29: "applied to every request unless overridden"
  contradicted the corrected section at :153-174 and lifecycle.rs. Replaced
  with explicit "stored on the config but not auto-applied to per-request
  subjects in 0.2.x" + pointer to the detailed section.
- Config reference retry framing: narrowed "Transport or Api { 5xx }" to
  "transient commit failures (network, 5xx, timeouts) surface as Transport
  or Api{..}" since 4xx can also surface; softened "stable when the engine
  ships" to "documented here because they are already public" (avoids any
  implied semver promise about the future engine).
- Config reference blocking method list: added
  `create_reservation_with_metadata` (it IS public on BlockingCyclesClient
  per src/blocking.rs); added `config()` accessor, noted it's not a
  protocol method.
- async-openai error-handling pattern: `guard.release(...)` takes
  `impl Into<String>`, not a `ReleaseRequest`. Replaced
  `guard.release(ReleaseRequest::new(Some(...)))` with
  `guard.release(format!(...))`. Removed the now-unused `ReleaseRequest`
  import.
- async-openai line 366: "elsewhere in the corpus" → "elsewhere in the
  docs" (internal-review tone).

Codex verified against cycles-client-rust at runcycles 0.2.4 plus
async-openai 0.30.1 docs.rs.
---
 configuration/rust-client-configuration-reference.md |  6 +++---
 how-to/integrating-cycles-with-async-openai.md       | 10 +++-------
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/configuration/rust-client-configuration-reference.md b/configuration/rust-client-configuration-reference.md
index 3ad3dede..bb0b4ba4 100644
--- a/configuration/rust-client-configuration-reference.md
+++ b/configuration/rust-client-configuration-reference.md
@@ -26,7 +26,7 @@ The `CyclesConfig` struct holds all client configuration. It can be constructed
 
 ### Subject defaults
 
-These fields set default Subject values applied to every request unless overridden at the call site. Override at the call site by passing an explicit `Subject` to the request builder.
+These fields hold subject values that are stored on the config and available via `client.config()`. **They are not auto-applied to per-request subjects in 0.2.x** — see [Subject defaults: what they do (and don't)](#subject-defaults-what-they-do-and-don-t) below for the actual behavior.
 
 | Field | Type | Default | Description |
 |---|---|---|---|
@@ -48,7 +48,7 @@ These fields set default Subject values applied to every request unless overridd
 
 ### Retry configuration
 
-The retry-related fields are present on `CyclesConfig` as configuration surface for a future automatic-retry engine; **the engine is not wired in `runcycles` 0.2.x.** Commit failures surface to the caller as `Error::Transport` or `Error::Api { status: 5xx, .. }` and the caller decides whether to retry. The fields below are documented so they are stable when the engine ships; setting them in 0.2.x has no runtime effect.
+The retry-related fields are present on `CyclesConfig` as configuration surface for a future automatic-retry engine; **the engine is not wired in `runcycles` 0.2.x.** Transient commit failures (network, 5xx, timeouts) surface to the caller as `Error::Transport` or `Error::Api { .. }` and the caller decides whether to retry. The fields below are documented here because they are already public on the struct; setting them in 0.2.x has no runtime effect, and any wiring behavior in a future release will be announced separately.
 
 | Field | Type | Default | Description |
 |---|---|---|---|
@@ -218,7 +218,7 @@ let resp = client.get_balances(&BalanceParams {
 })?;
 ```
 
-The blocking client exposes only the **low-level protocol methods** — `create_reservation`, `commit_reservation`, `release_reservation`, `extend_reservation`, `decide`, `create_event`, `list_reservations`, `get_reservation`, `get_balances`. The high-level `with_cycles()` helper and the `ReservationGuard` RAII pattern are **async-only** in 0.2.x; blocking callers compose the reserve / commit / release sequence themselves.
+The blocking client exposes the low-level protocol methods only — `create_reservation`, `create_reservation_with_metadata`, `commit_reservation`, `release_reservation`, `extend_reservation`, `decide`, `create_event`, `list_reservations`, `get_reservation`, `get_balances` — plus a `config()` accessor. The high-level `with_cycles()` helper and the `ReservationGuard` RAII pattern are **async-only** in 0.2.x; blocking callers compose the reserve / commit / release sequence themselves.
 
 ::: warning Don't mix runtimes
 The blocking client must not be called from inside a Tokio runtime (it will block the executor). For most applications using `tokio::main`, the async client is correct. The blocking variant is for genuinely synchronous contexts.
diff --git a/how-to/integrating-cycles-with-async-openai.md b/how-to/integrating-cycles-with-async-openai.md
index b35252d2..4d248c15 100644
--- a/how-to/integrating-cycles-with-async-openai.md
+++ b/how-to/integrating-cycles-with-async-openai.md
@@ -258,9 +258,7 @@ use async_openai::{
 };
 use runcycles::{
     CyclesClient, Error as CyclesError,
-    models::{
-        Amount, Subject, Action, ReservationCreateRequest, CommitRequest, ReleaseRequest,
-    },
+    models::{Amount, Subject, Action, ReservationCreateRequest, CommitRequest},
 };
 
 #[derive(Debug, thiserror::Error)]
@@ -295,9 +293,7 @@ async fn run_completion(
         Ok(r) => r,
         Err(e) => {
             // Release the reservation with a reason; preserve the typed OpenAI error
-            let _ = guard.release(
-                ReleaseRequest::new(Some(format!("openai_error: {e}")))
-            ).await;
+            let _ = guard.release(format!("openai_error: {e}")).await;
             return Err(e.into()); // OpenAIError flows to the caller
         }
     };
@@ -367,7 +363,7 @@ Ok((text, Amount::usd_microcents(microcents as i64)))
 
 Keeping the rate table in one helper makes provider rate changes a single-edit fix. For multi-provider deployments, hoist it to your shared `costs` module.
 
-For the canonical breakdown of provider rates and the cost-estimation patterns used elsewhere in the corpus, see [Cost Estimation Cheat Sheet](/how-to/cost-estimation-cheat-sheet).
+For the canonical breakdown of provider rates and the cost-estimation patterns used elsewhere in the docs, see [Cost Estimation Cheat Sheet](/how-to/cost-estimation-cheat-sheet).
 
 ## Other Rust LLM clients