llamastack
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 9 additions & 5 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 9 additions & 5 deletions
diff --git a/‎.release-please-manifest.json‎
Lines changed: 1 addition & 1 deletion b/‎.release-please-manifest.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.stats.yml‎
Lines changed: 3 additions & 3 deletions b/‎.stats.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 55 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 55 additions & 0 deletions
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 1 addition & 2 deletions b/‎CONTRIBUTING.md‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎README.md‎
Lines changed: 44 additions & 0 deletions b/‎README.md‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎api.md‎
Lines changed: 3 additions & 3 deletions b/‎api.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎requirements-dev.lock‎
Lines changed: 12 additions & 12 deletions b/‎requirements-dev.lock‎
Lines changed: 12 additions & 12 deletions
diff --git a/‎scripts/format‎
Lines changed: 1 addition & 1 deletion b/‎scripts/format‎
Lines changed: 1 addition & 1 deletion
@@ -25,7 +25,7 @@ jobs:
       - name: Install uv
         uses: astral-sh/setup-uv@v5
         with:
-          version: '0.9.13'
+          version: '0.10.2'
 
       - name: Install dependencies
         run: uv sync --all-extras
@@ -47,7 +47,7 @@ jobs:
       - name: Install uv
         uses: astral-sh/setup-uv@v5
         with:
-          version: '0.9.13'
+          version: '0.10.2'
 
       - name: Install dependencies
         run: uv sync --all-extras
@@ -56,14 +56,18 @@ jobs:
         run: uv build
 
       - name: Get GitHub OIDC Token
-        if: github.repository == 'stainless-sdks/llama-stack-client-python'
+        if: |-
+          github.repository == 'stainless-sdks/llama-stack-client-python' &&
+          !startsWith(github.ref, 'refs/heads/stl/')
         id: github-oidc
         uses: actions/github-script@v8
         with:
           script: core.setOutput('github_token', await core.getIDToken());
 
       - name: Upload tarball
-        if: github.repository == 'stainless-sdks/llama-stack-client-python'
+        if: |-
+          github.repository == 'stainless-sdks/llama-stack-client-python' &&
+          !startsWith(github.ref, 'refs/heads/stl/')
         env:
           URL: https://pkg.stainless.com/s
           AUTH: ${{ steps.github-oidc.outputs.github_token }}
@@ -81,7 +85,7 @@ jobs:
       - name: Install uv
         uses: astral-sh/setup-uv@v5
         with:
-          version: '0.9.13'
+          version: '0.10.2'
 
       - name: Bootstrap
         run: ./scripts/bootstrap
 
@@ -1,3 +1,3 @@
 {
-  ".": "0.5.0-alpha.2"
+  ".": "0.6.1-alpha.1"
 }
@@ -1,4 +1,4 @@
 configured_endpoints: 108
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-958e990011d6b4c27513743a151ec4c80c3103650a80027380d15f1d6b108e32.yml
-openapi_spec_hash: 5b49d825dbc2a26726ca752914a65114
-config_hash: 19b84a0a93d566334ae134dafc71991f
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-1b387ba7b0e0d1aa931032ac2101e5a473b9fa42975e6575cf889feace342b80.yml
+openapi_spec_hash: a144868005520bd3f8f9dc3d8cac1c22
+config_hash: ef1f9b33e203c71cfc10d91890c1ed2d
@@ -1,5 +1,60 @@
 # Changelog
 
+## 0.6.1-alpha.1 (2026-03-13)
+
+Full Changelog: [v0.5.0-alpha.2...v0.6.1-alpha.1](https://github.com/llamastack/llama-stack-client-python/compare/v0.5.0-alpha.2...v0.6.1-alpha.1)
+
+### ⚠ BREAKING CHANGES
+
+* improve consistency of post-training API endpoints
+
+### Features
+
+* accept list content blocks in Responses API function_call_output ([f6f1fc3](https://github.com/llamastack/llama-stack-client-python/commit/f6f1fc36008f4fdb7af19aa2aabfcd2482d4a1bc))
+* Add prompt_cache_key parameter support ([6b45699](https://github.com/llamastack/llama-stack-client-python/commit/6b45699185d934a5f8395c5cc3046f6c5aceb770))
+* add skip_model_availability to openai_mixin for remote models ([7ef952b](https://github.com/llamastack/llama-stack-client-python/commit/7ef952b78a5c1b8bd49509c9be7ba8781dfb7462))
+* add support for 'frequency_penalty' param to Responses API ([56d39cc](https://github.com/llamastack/llama-stack-client-python/commit/56d39cc9ff9d6f54e303fc377d605ae17bac9584))
+* add support for 'presence_penalty' param to Responses API ([4f57d15](https://github.com/llamastack/llama-stack-client-python/commit/4f57d159caba431676dced864f8f0871c3692f7b))
+* add support for /responses background parameter ([4f8bf45](https://github.com/llamastack/llama-stack-client-python/commit/4f8bf4526e529a74b9c53cac6df8e4beb2808d60))
+* Add top_logprobs parameter support ([2196986](https://github.com/llamastack/llama-stack-client-python/commit/21969867a82596e8be0aeeddbb6d8ccedf3e0f8b))
+* add top_p parameter support to responses API ([23e3b9f](https://github.com/llamastack/llama-stack-client-python/commit/23e3b9fcf7a23378c200604d0f57dc5a9e6a8527))
+* Add truncation parameter support ([7501365](https://github.com/llamastack/llama-stack-client-python/commit/7501365fe89795e87accfb6b1f2329da25d0efeb))
+* improve consistency of post-training API endpoints ([99057fd](https://github.com/llamastack/llama-stack-client-python/commit/99057fdc74bafdf54479674ba75b447cd4681cb6))
+* **inference:** bidirectional reasoning token passthrough for chat completions ([c314639](https://github.com/llamastack/llama-stack-client-python/commit/c314639b35a234ca340a08b5615a38ec838ab4f4))
+* **vector_io:** Implement Contextual Retrieval for improved RAG search quality ([89ec5a7](https://github.com/llamastack/llama-stack-client-python/commit/89ec5a7bf405e688bd404877e49ab1ee9b49bf7e))
+
+
+### Bug Fixes
+
+* align chat completion usage schema with OpenAI spec ([3974d5d](https://github.com/llamastack/llama-stack-client-python/commit/3974d5db8270e2548d0cdd54204c1603ca7a84a8))
+* Enabled models list works ([#314](https://github.com/llamastack/llama-stack-client-python/issues/314)) ([acd5e64](https://github.com/llamastack/llama-stack-client-python/commit/acd5e64a9e82083192a31f85f9c810291cabcadb))
+* **inference:** use flat response message model for chat/completions ([e58e2e4](https://github.com/llamastack/llama-stack-client-python/commit/e58e2e4dee9c9bbb72e4903e30f169991d10e545))
+* **responses:** achieve full OpenResponses conformance — 6/6 tests passing ([631ab2c](https://github.com/llamastack/llama-stack-client-python/commit/631ab2c19c7cd33ac81598a795ae8be93bdd5a4b))
+* **stainless:** handle [DONE] SSE terminator in streaming responses ([17f0029](https://github.com/llamastack/llama-stack-client-python/commit/17f0029a3bd6719c4f71ab7b14af8cac23f9e7f1))
+* **vector_io:** align Protocol signatures with request models ([ea58fd8](https://github.com/llamastack/llama-stack-client-python/commit/ea58fd88201ef59e580443688100cafe45f305c0))
+
+
+### Chores
+
+* **api:** minor updates ([17a2705](https://github.com/llamastack/llama-stack-client-python/commit/17a270528b503591de15f9e9fcbc378007b75eda))
+* **ci:** bump uv version ([f014d4c](https://github.com/llamastack/llama-stack-client-python/commit/f014d4ca0301a48078c4692cfa828016cb92c52e))
+* **ci:** skip uploading artifacts on stainless-internal branches ([dbddad9](https://github.com/llamastack/llama-stack-client-python/commit/dbddad9711a0ba0d2396a654e5b5220537acfc6b))
+* **docs:** add missing descriptions ([f1a093b](https://github.com/llamastack/llama-stack-client-python/commit/f1a093b71b5ae56f23143268ab68d851b6336ae9))
+* format all `api.md` files ([0e3e262](https://github.com/llamastack/llama-stack-client-python/commit/0e3e2626081ca9268297742990368c7ed6493b40))
+* **internal:** add request options to SSE classes ([2ecc682](https://github.com/llamastack/llama-stack-client-python/commit/2ecc682c1fccc86c643ad3da40e5134352745525))
+* **internal:** bump dependencies ([612291e](https://github.com/llamastack/llama-stack-client-python/commit/612291e2142b710cdd643af16bbe83e514f7a44e))
+* **internal:** fix lint error on Python 3.14 ([a0f6975](https://github.com/llamastack/llama-stack-client-python/commit/a0f69750827b016bb27a52bdd77fcbbacd311020))
+* **internal:** make `test_proxy_environment_variables` more resilient ([6bc2bb4](https://github.com/llamastack/llama-stack-client-python/commit/6bc2bb4e81b16d23e20090f45dbd8a53a63c158d))
+* **internal:** make `test_proxy_environment_variables` more resilient to env ([44bbae1](https://github.com/llamastack/llama-stack-client-python/commit/44bbae12bb8b4f72d1fb50db29bedd69f30340b7))
+* **test:** do not count install time for mock server timeout ([185de33](https://github.com/llamastack/llama-stack-client-python/commit/185de33c3b15256972df173610aa2d0d2fcb5f87))
+* update mock server docs ([92cb087](https://github.com/llamastack/llama-stack-client-python/commit/92cb087355ffa1fd50e3a35b8e888853784c9fe9))
+* update placeholder string ([406b9bb](https://github.com/llamastack/llama-stack-client-python/commit/406b9bbd327d9ce4c2423a553c15d4a7889025f9))
+
+
+### Refactors
+
+* **types:** use `extra_items` from PEP 728 ([629ca09](https://github.com/llamastack/llama-stack-client-python/commit/629ca09b3c8ca32dc95082900e41df21c9dd4855))
+
 ## 0.5.0-alpha.2 (2026-02-05)
 
 Full Changelog: [v0.5.0-alpha.1...v0.5.0-alpha.2](https://github.com/llamastack/llama-stack-client-python/compare/v0.5.0-alpha.1...v0.5.0-alpha.2)
 
@@ -88,8 +88,7 @@ $ pip install ./path-to-wheel-file.whl
 Most tests require you to [set up a mock server](https://github.com/stoplightio/prism) against the OpenAPI spec to run the tests.
 
 ```sh
-# you will need npm installed
-$ npx prism mock path/to/your/openapi.yml
+$ ./scripts/mock
 ```
 
 ```sh
 
@@ -128,6 +128,50 @@ async def main() -> None:
 asyncio.run(main())
 ```
 
+## Streaming responses
+
+We provide support for streaming responses using Server Side Events (SSE).
+
+```python
+from llama_stack_client import LlamaStackClient
+
+client = LlamaStackClient()
+
+stream = client.chat.completions.create(
+    messages=[
+        {
+            "content": "string",
+            "role": "user",
+        }
+    ],
+    model="model",
+    stream=True,
+)
+for completion in stream:
+    print(completion.id)
+```
+
+The async client uses the exact same interface.
+
+```python
+from llama_stack_client import AsyncLlamaStackClient
+
+client = AsyncLlamaStackClient()
+
+stream = await client.chat.completions.create(
+    messages=[
+        {
+            "content": "string",
+            "role": "user",
+        }
+    ],
+    model="model",
+    stream=True,
+)
+async for completion in stream:
+    print(completion.id)
+```
+
 ## Using types
 
 Nested request parameters are [TypedDicts](https://docs.python.org/3/library/typing.html#typing.TypedDict). Responses are [Pydantic models](https://docs.pydantic.dev) which also provide helper methods for things like:
 
@@ -474,9 +474,9 @@ from llama_stack_client.types.alpha.post_training import (
 Methods:
 
 - <code title="get /v1alpha/post-training/jobs">client.alpha.post_training.job.<a href="./src/llama_stack_client/resources/alpha/post_training/job.py">list</a>() -> <a href="./src/llama_stack_client/types/alpha/post_training/job_list_response.py">JobListResponse</a></code>
-- <code title="get /v1alpha/post-training/job/artifacts">client.alpha.post_training.job.<a href="./src/llama_stack_client/resources/alpha/post_training/job.py">artifacts</a>() -> <a href="./src/llama_stack_client/types/alpha/post_training/job_artifacts_response.py">JobArtifactsResponse</a></code>
-- <code title="post /v1alpha/post-training/job/cancel">client.alpha.post_training.job.<a href="./src/llama_stack_client/resources/alpha/post_training/job.py">cancel</a>() -> None</code>
-- <code title="get /v1alpha/post-training/job/status">client.alpha.post_training.job.<a href="./src/llama_stack_client/resources/alpha/post_training/job.py">status</a>() -> <a href="./src/llama_stack_client/types/alpha/post_training/job_status_response.py">JobStatusResponse</a></code>
+- <code title="get /v1alpha/post-training/jobs/{job_uuid}/artifacts">client.alpha.post_training.job.<a href="./src/llama_stack_client/resources/alpha/post_training/job.py">artifacts</a>(job_uuid) -> <a href="./src/llama_stack_client/types/alpha/post_training/job_artifacts_response.py">JobArtifactsResponse</a></code>
+- <code title="post /v1alpha/post-training/jobs/{job_uuid}/cancel">client.alpha.post_training.job.<a href="./src/llama_stack_client/resources/alpha/post_training/job.py">cancel</a>(job_uuid) -> None</code>
+- <code title="get /v1alpha/post-training/jobs/{job_uuid}/status">client.alpha.post_training.job.<a href="./src/llama_stack_client/resources/alpha/post_training/job.py">status</a>(job_uuid) -> <a href="./src/llama_stack_client/types/alpha/post_training/job_status_response.py">JobStatusResponse</a></code>
 
 ## Benchmarks
 
 
@@ -1,6 +1,6 @@
 [project]
 name = "llama_stack_client"
-version = "0.5.0-alpha.2"
+version = "0.6.1-alpha.1"
 description = "The official Python library for the llama-stack-client API"
 dynamic = ["readme"]
 license = "MIT"
 
@@ -3,12 +3,12 @@
 -e .
 annotated-types==0.7.0
     # via pydantic
-anyio==4.12.0
+anyio==4.12.1
     # via
     #   httpx
     #   llama-stack-client
 black==26.1.0
-certifi==2025.11.12
+certifi==2026.1.4
     # via
     #   httpcore
     #   httpx
@@ -52,7 +52,7 @@ idna==3.11
     #   anyio
     #   httpx
     #   requests
-importlib-metadata==8.7.0
+importlib-metadata==8.7.1
 iniconfig==2.3.0
     # via pytest
 markdown-it-py==4.0.0
@@ -64,11 +64,11 @@ mypy-extensions==1.1.0
     # via
     #   black
     #   mypy
-nodeenv==1.9.1
+nodeenv==1.10.0
     # via
     #   pre-commit
     #   pyright
-numpy==2.4.1
+numpy==2.4.2
     # via pandas
 packaging==25.0
     # via
@@ -89,7 +89,7 @@ pluggy==1.6.0
 pre-commit==4.5.1
 prompt-toolkit==3.0.52
     # via llama-stack-client
-pyaml==25.7.0
+pyaml==26.2.1
     # via llama-stack-client
 pydantic==2.12.5
     # via llama-stack-client
@@ -100,15 +100,15 @@ pygments==2.19.2
     #   pytest
     #   rich
 pyright==1.1.399
-pytest==9.0.1
+pytest==9.0.2
     # via
     #   pytest-asyncio
     #   pytest-xdist
 pytest-asyncio==1.3.0
 pytest-xdist==3.8.0
 python-dateutil==2.9.0.post0
     # via pandas
-pytokens==0.4.0
+pytokens==0.4.1
     # via black
 pyyaml==6.0.3
     # via
@@ -119,7 +119,7 @@ requests==2.32.5
 respx==0.22.0
 rich==14.2.0
     # via llama-stack-client
-ruff==0.14.7
+ruff==0.14.13
 six==1.17.0
     # via python-dateutil
 sniffio==1.3.1
@@ -128,8 +128,8 @@ termcolor==3.3.0
     # via
     #   fire
     #   llama-stack-client
-time-machine==3.1.0
-tqdm==4.67.1
+time-machine==3.2.0
+tqdm==4.67.3
     # via llama-stack-client
 typing-extensions==4.15.0
     # via
@@ -149,7 +149,7 @@ urllib3==2.6.3
     # via requests
 virtualenv==20.36.1
     # via pre-commit
-wcwidth==0.3.1
+wcwidth==0.6.0
     # via prompt-toolkit
 zipp==3.23.0
     # via importlib-metadata
@@ -11,4 +11,4 @@ uv run ruff check --fix .
 uv run ruff format
 
 echo "==> Formatting docs"
-uv run python scripts/utils/ruffen-docs.py README.md api.md
+uv run python scripts/utils/ruffen-docs.py README.md $(find . -type f -name api.md)
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`	`1`	`{`
`2`		`- ".": "0.5.0-alpha.2"`
	`2`	`+ ".": "0.6.1-alpha.1"`
`3`	`3`	`}`