From c6b84e853be657a330e6754894911bdaac07dc67 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Fri, 8 May 2026 16:32:53 +0000
Subject: [PATCH 1/5] fix(client): add missing f-string prefix in file type
 error message

---
 src/sambanova/_files.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/sambanova/_files.py b/src/sambanova/_files.py
index 6071d1e..6877ecb 100644
--- a/src/sambanova/_files.py
+++ b/src/sambanova/_files.py
@@ -99,7 +99,7 @@ async def async_to_httpx_files(files: RequestFiles | None) -> HttpxRequestFiles
     elif is_sequence_t(files):
         files = [(key, await _async_transform_file(file)) for key, file in files]
     else:
-        raise TypeError("Unexpected file type input {type(files)}, expected mapping or sequence")
+        raise TypeError(f"Unexpected file type input {type(files)}, expected mapping or sequence")
 
     return files
 

From cc4aade45c484980fafacf2e315ca6f1564e0ace Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Mon, 11 May 2026 17:44:21 +0000
Subject: [PATCH 2/5] feat(internal/types): support eagerly validating pydantic
 iterators

---
 src/sambanova/_models.py | 80 ++++++++++++++++++++++++++++++++++++++++
 tests/test_models.py     | 60 ++++++++++++++++++++++++++++--
 2 files changed, 137 insertions(+), 3 deletions(-)

diff --git a/src/sambanova/_models.py b/src/sambanova/_models.py
index 29070e0..8c5ab26 100644
--- a/src/sambanova/_models.py
+++ b/src/sambanova/_models.py
@@ -25,7 +25,9 @@
     ClassVar,
     Protocol,
     Required,
+    Annotated,
     ParamSpec,
+    TypeAlias,
     TypedDict,
     TypeGuard,
     final,
@@ -79,7 +81,15 @@
 from ._constants import RAW_RESPONSE_HEADER
 
 if TYPE_CHECKING:
+    from pydantic import GetCoreSchemaHandler, ValidatorFunctionWrapHandler
+    from pydantic_core import CoreSchema, core_schema
     from pydantic_core.core_schema import ModelField, ModelSchema, LiteralSchema, ModelFieldsSchema
+else:
+    try:
+        from pydantic_core import CoreSchema, core_schema
+    except ImportError:
+        CoreSchema = None
+        core_schema = None
 
 __all__ = ["BaseModel", "GenericModel"]
 
@@ -396,6 +406,76 @@ def model_dump_json(
             )
 
 
+class _EagerIterable(list[_T], Generic[_T]):
+    """
+    Accepts any Iterable[T] input (including generators), consumes it
+    eagerly, and validates all items upfront.
+
+    Validation preserves the original container type where possible
+    (e.g. a set[T] stays a set[T]).  Serialization (model_dump / JSON)
+    always emits a list — round-tripping through model_dump() will not
+    restore the original container type.
+    """
+
+    @classmethod
+    def __get_pydantic_core_schema__(
+        cls,
+        source_type: Any,
+        handler: GetCoreSchemaHandler,
+    ) -> CoreSchema:
+        (item_type,) = get_args(source_type) or (Any,)
+        item_schema: CoreSchema = handler.generate_schema(item_type)
+        list_of_items_schema: CoreSchema = core_schema.list_schema(item_schema)
+
+        return core_schema.no_info_wrap_validator_function(
+            cls._validate,
+            list_of_items_schema,
+            serialization=core_schema.plain_serializer_function_ser_schema(
+                cls._serialize,
+                info_arg=False,
+            ),
+        )
+
+    @staticmethod
+    def _validate(v: Iterable[_T], handler: "ValidatorFunctionWrapHandler") -> Any:
+        original_type: type[Any] = type(v)
+
+        # Normalize to list so list_schema can validate each item
+        if isinstance(v, list):
+            items: list[_T] = v
+        else:
+            try:
+                items = list(v)
+            except TypeError as e:
+                raise TypeError("Value is not iterable") from e
+
+        # Validate items against the inner schema
+        validated: list[_T] = handler(items)
+
+        # Reconstruct original container type
+        if original_type is list:
+            return validated
+        # str(list) produces the list's repr, not a string built from items,
+        # so skip reconstruction for str and its subclasses.
+        if issubclass(original_type, str):
+            return validated
+        try:
+            return original_type(validated)
+        except (TypeError, ValueError):
+            # If the type cannot be reconstructed, just return the validated list
+            return validated
+
+    @staticmethod
+    def _serialize(v: Iterable[_T]) -> list[_T]:
+        """Always serialize as a list so Pydantic's JSON encoder is happy."""
+        if isinstance(v, list):
+            return v
+        return list(v)
+
+
+EagerIterable: TypeAlias = Annotated[Iterable[_T], _EagerIterable]
+
+
 def _construct_field(value: object, field: FieldInfo, key: str) -> object:
     if value is None:
         return field_get_default(field)
diff --git a/tests/test_models.py b/tests/test_models.py
index d559b5c..eda8fb8 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -1,7 +1,8 @@
 import json
-from typing import TYPE_CHECKING, Any, Dict, List, Union, Optional, cast
+from typing import TYPE_CHECKING, Any, Dict, List, Union, Iterable, Optional, cast
 from datetime import datetime, timezone
-from typing_extensions import Literal, Annotated, TypeAliasType
+from collections import deque
+from typing_extensions import Literal, Annotated, TypedDict, TypeAliasType
 
 import pytest
 import pydantic
@@ -9,7 +10,7 @@
 
 from sambanova._utils import PropertyInfo
 from sambanova._compat import PYDANTIC_V1, parse_obj, model_dump, model_json
-from sambanova._models import DISCRIMINATOR_CACHE, BaseModel, construct_type
+from sambanova._models import DISCRIMINATOR_CACHE, BaseModel, EagerIterable, construct_type
 
 
 class BasicModel(BaseModel):
@@ -961,3 +962,56 @@ def __getattr__(self, attr: str) -> Item: ...
     assert model.a.prop == 1
     assert isinstance(model.a, Item)
     assert model.other == "foo"
+
+
+# NOTE: Workaround for Pydantic Iterable behavior.
+# Iterable fields are replaced with a ValidatorIterator and may be consumed
+# during serialization, which can cause subsequent dumps to return empty data.
+# See: https://github.com/pydantic/pydantic/issues/9541
+@pytest.mark.parametrize(
+    "data, expected_validated",
+    [
+        ([1, 2, 3], [1, 2, 3]),
+        ((1, 2, 3), (1, 2, 3)),
+        (set([1, 2, 3]), set([1, 2, 3])),
+        (iter([1, 2, 3]), [1, 2, 3]),
+        ([], []),
+        ((x for x in [1, 2, 3]), [1, 2, 3]),
+        (map(lambda x: x, [1, 2, 3]), [1, 2, 3]),
+        (frozenset([1, 2, 3]), frozenset([1, 2, 3])),
+        (deque([1, 2, 3]), deque([1, 2, 3])),
+    ],
+    ids=["list", "tuple", "set", "iterator", "empty", "generator", "map", "frozenset", "deque"],
+)
+@pytest.mark.skipif(PYDANTIC_V1, reason="this is only supported in pydantic v2")
+def test_iterable_construction(data: Iterable[int], expected_validated: Iterable[int]) -> None:
+    class TypeWithIterable(TypedDict):
+        items: EagerIterable[int]
+
+    class Model(BaseModel):
+        data: TypeWithIterable
+
+    m = Model.model_validate({"data": {"items": data}})
+    assert m.data["items"] == expected_validated
+
+    # Verify repeated dumps don't lose data (the original bug)
+    assert m.model_dump()["data"]["items"] == list(expected_validated)
+    assert m.model_dump()["data"]["items"] == list(expected_validated)
+
+
+@pytest.mark.skipif(PYDANTIC_V1, reason="this is only supported in pydantic v2")
+def test_iterable_construction_str_falls_back_to_list() -> None:
+    # str is iterable (over chars), but str(list_of_chars) produces the list's repr
+    # rather than reconstructing a string from items. We special-case str to fall
+    # back to list instead of attempting reconstruction.
+    class TypeWithIterable(TypedDict):
+        items: EagerIterable[str]
+
+    class Model(BaseModel):
+        data: TypeWithIterable
+
+    m = Model.model_validate({"data": {"items": "hello"}})
+
+    # falls back to list of chars rather than calling str(["h", "e", "l", "l", "o"])
+    assert m.data["items"] == ["h", "e", "l", "l", "o"]
+    assert m.model_dump()["data"]["items"] == ["h", "e", "l", "l", "o"]

From 4b1c00a59dc5d6bf8e04812dbcf008b1a775f46b Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Tue, 12 May 2026 19:12:28 +0000
Subject: [PATCH 3/5] ci: pin GitHub Actions to commit SHAs

Pin all GitHub Actions referenced in generated workflows (both
first-party `actions/*` and third-party) to immutable commit SHAs.
Updating pinned actions is now a deliberate codegen-side bump rather
than implicit on every workflow run.
---
 .github/workflows/ci.yml             | 14 +++++++-------
 .github/workflows/publish-pypi.yml   |  4 ++--
 .github/workflows/release-doctor.yml |  2 +-
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 0477bdc..f3eb789 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -21,10 +21,10 @@ jobs:
     runs-on: ${{ github.repository == 'stainless-sdks/sambanova-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
     if: (github.event_name == 'push' || github.event.pull_request.head.repo.fork) && (github.event_name != 'push' || github.event.head_commit.message != 'codegen metadata')
     steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
 
       - name: Install uv
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5.4.2
         with:
           version: '0.10.2'
 
@@ -43,10 +43,10 @@ jobs:
       id-token: write
     runs-on: ${{ github.repository == 'stainless-sdks/sambanova-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
     steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
 
       - name: Install uv
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5.4.2
         with:
           version: '0.10.2'
 
@@ -61,7 +61,7 @@ jobs:
           github.repository == 'stainless-sdks/sambanova-python' &&
           !startsWith(github.ref, 'refs/heads/stl/')
         id: github-oidc
-        uses: actions/github-script@v8
+        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
         with:
           script: core.setOutput('github_token', await core.getIDToken());
 
@@ -81,10 +81,10 @@ jobs:
     runs-on: ${{ github.repository == 'stainless-sdks/sambanova-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
     if: github.event_name == 'push' || github.event.pull_request.head.repo.fork
     steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
 
       - name: Install uv
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5.4.2
         with:
           version: '0.10.2'
 
diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml
index 3f73e27..884b664 100644
--- a/.github/workflows/publish-pypi.yml
+++ b/.github/workflows/publish-pypi.yml
@@ -17,10 +17,10 @@ jobs:
       id-token: write
 
     steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
 
       - name: Install uv
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5.4.2
         with:
           version: '0.9.13'
 
diff --git a/.github/workflows/release-doctor.yml b/.github/workflows/release-doctor.yml
index 45a764d..1696088 100644
--- a/.github/workflows/release-doctor.yml
+++ b/.github/workflows/release-doctor.yml
@@ -12,7 +12,7 @@ jobs:
     if: github.repository == 'sambanova/sambanova-python' && (github.event_name == 'push' || github.event_name == 'workflow_dispatch' || startsWith(github.head_ref, 'release-please') || github.head_ref == 'next')
 
     steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
 
       - name: Check release environment
         run: |

From 0ddfd334decc86831f840cd8c0de2c3a73e378a2 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Tue, 26 May 2026 21:54:09 +0000
Subject: [PATCH 4/5] feat(api): add anthropic compatible messages api support

---
 .stats.yml                                    |    8 +-
 api.md                                        |   19 +
 src/sambanova/_client.py                      |   79 +-
 src/sambanova/resources/__init__.py           |   14 +
 src/sambanova/resources/messages.py           | 1197 +++++++++++++++++
 src/sambanova/types/__init__.py               |    6 +
 src/sambanova/types/message.py                |  328 +++++
 .../types/message_count_tokens_params.py      |  979 ++++++++++++++
 .../types/message_count_tokens_response.py    |   12 +
 src/sambanova/types/message_create_params.py  | 1095 +++++++++++++++
 .../types/message_create_response.py          |  449 +++++++
 src/sambanova/types/message_stream_event.py   |  451 +++++++
 tests/api_resources/test_messages.py          |  595 ++++++++
 13 files changed, 5227 insertions(+), 5 deletions(-)
 create mode 100644 src/sambanova/resources/messages.py
 create mode 100644 src/sambanova/types/message.py
 create mode 100644 src/sambanova/types/message_count_tokens_params.py
 create mode 100644 src/sambanova/types/message_count_tokens_response.py
 create mode 100644 src/sambanova/types/message_create_params.py
 create mode 100644 src/sambanova/types/message_create_response.py
 create mode 100644 src/sambanova/types/message_stream_event.py
 create mode 100644 tests/api_resources/test_messages.py

diff --git a/.stats.yml b/.stats.yml
index 8386d48..29d23f7 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 8
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/sambanova/sambanova-5884297580e5423cf40bd59057ea55da0384fe34f431a80cac0eece6176c6057.yml
-openapi_spec_hash: 9306c1d75784a840a2973024fa94d22d
-config_hash: 315596f19f192be2b7bf343664a7eb90
+configured_endpoints: 10
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/sambanova/sambanova-f9a2632e2ea9632e8b40258f57bfa8b529b926d72fd8b1f9550848fbb880e0de.yml
+openapi_spec_hash: 8df9e2ad31769c26c590dacf3517bc36
+config_hash: d33fc68f92caf09c6b3b40675a111114
diff --git a/api.md b/api.md
index 2678e70..41de6f3 100644
--- a/api.md
+++ b/api.md
@@ -88,6 +88,25 @@ Methods:
 
 - <code title="post /responses">client.responses.<a href="./src/sambanova/resources/responses.py">create</a>(\*\*<a href="src/sambanova/types/response_create_params.py">params</a>) -> <a href="./src/sambanova/types/response_create_response.py">ResponseCreateResponse</a></code>
 
+# Messages
+
+Types:
+
+```python
+from sambanova.types import (
+    Message,
+    MessageCountTokensResponse,
+    MessageErrorResponse,
+    MessageStreamEvent,
+    MessageCreateResponse,
+)
+```
+
+Methods:
+
+- <code title="post /messages">client.messages.<a href="./src/sambanova/resources/messages.py">create</a>(\*\*<a href="src/sambanova/types/message_create_params.py">params</a>) -> <a href="./src/sambanova/types/message_create_response.py">MessageCreateResponse</a></code>
+- <code title="post /messages/count_tokens">client.messages.<a href="./src/sambanova/resources/messages.py">count_tokens</a>(\*\*<a href="src/sambanova/types/message_count_tokens_params.py">params</a>) -> <a href="./src/sambanova/types/message_count_tokens_response.py">MessageCountTokensResponse</a></code>
+
 # Models
 
 Types:
diff --git a/src/sambanova/_client.py b/src/sambanova/_client.py
index 5b044c0..08b45e4 100644
--- a/src/sambanova/_client.py
+++ b/src/sambanova/_client.py
@@ -35,8 +35,9 @@
 )
 
 if TYPE_CHECKING:
-    from .resources import chat, audio, models, responses, embeddings, completions
+    from .resources import chat, audio, models, messages, responses, embeddings, completions
     from .resources.models import ModelsResource, AsyncModelsResource
+    from .resources.messages import MessagesResource, AsyncMessagesResource
     from .resources.chat.chat import ChatResource, AsyncChatResource
     from .resources.responses import ResponsesResource, AsyncResponsesResource
     from .resources.embeddings import EmbeddingsResource, AsyncEmbeddingsResource
@@ -58,12 +59,14 @@
 class SambaNova(SyncAPIClient):
     # client options
     api_key: str
+    x_api_key: str | None
     integration_source: str | None
 
     def __init__(
         self,
         *,
         api_key: str | None = None,
+        x_api_key: str | None = None,
         integration_source: str | None = None,
         base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = not_given,
@@ -88,6 +91,7 @@ def __init__(
 
         This automatically infers the following arguments from their corresponding environment variables if they are not provided:
         - `api_key` from `SAMBANOVA_API_KEY`
+        - `x_api_key` from `SAMBANOVA_API_KEY`
         - `integration_source` from `SAMBANOVA_INTEGRATION_SOURCE`
         """
         if api_key is None:
@@ -98,6 +102,10 @@ def __init__(
             )
         self.api_key = api_key
 
+        if x_api_key is None:
+            x_api_key = os.environ.get("SAMBANOVA_API_KEY")
+        self.x_api_key = x_api_key
+
         if integration_source is None:
             integration_source = os.environ.get("SAMBANOVA_INTEGRATION_SOURCE")
         self.integration_source = integration_source
@@ -159,6 +167,12 @@ def responses(self) -> ResponsesResource:
 
         return ResponsesResource(self)
 
+    @cached_property
+    def messages(self) -> MessagesResource:
+        from .resources.messages import MessagesResource
+
+        return MessagesResource(self)
+
     @cached_property
     def models(self) -> ModelsResource:
         from .resources.models import ModelsResource
@@ -181,9 +195,20 @@ def qs(self) -> Querystring:
     @property
     @override
     def auth_headers(self) -> dict[str, str]:
+        return {**self._api_key, **self._x_api_key}
+
+    @property
+    def _api_key(self) -> dict[str, str]:
         api_key = self.api_key
         return {"Authorization": f"Bearer {api_key}"}
 
+    @property
+    def _x_api_key(self) -> dict[str, str]:
+        x_api_key = self.x_api_key
+        if x_api_key is None:
+            return {}
+        return {"x-api-key": x_api_key}
+
     @property
     @override
     def default_headers(self) -> dict[str, str | Omit]:
@@ -198,6 +223,7 @@ def copy(
         self,
         *,
         api_key: str | None = None,
+        x_api_key: str | None = None,
         integration_source: str | None = None,
         base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = not_given,
@@ -233,6 +259,7 @@ def copy(
         http_client = http_client or self._client
         return self.__class__(
             api_key=api_key or self.api_key,
+            x_api_key=x_api_key or self.x_api_key,
             integration_source=integration_source or self.integration_source,
             base_url=base_url or self.base_url,
             timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
@@ -284,12 +311,14 @@ def _make_status_error(
 class AsyncSambaNova(AsyncAPIClient):
     # client options
     api_key: str
+    x_api_key: str | None
     integration_source: str | None
 
     def __init__(
         self,
         *,
         api_key: str | None = None,
+        x_api_key: str | None = None,
         integration_source: str | None = None,
         base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = not_given,
@@ -314,6 +343,7 @@ def __init__(
 
         This automatically infers the following arguments from their corresponding environment variables if they are not provided:
         - `api_key` from `SAMBANOVA_API_KEY`
+        - `x_api_key` from `SAMBANOVA_API_KEY`
         - `integration_source` from `SAMBANOVA_INTEGRATION_SOURCE`
         """
         if api_key is None:
@@ -324,6 +354,10 @@ def __init__(
             )
         self.api_key = api_key
 
+        if x_api_key is None:
+            x_api_key = os.environ.get("SAMBANOVA_API_KEY")
+        self.x_api_key = x_api_key
+
         if integration_source is None:
             integration_source = os.environ.get("SAMBANOVA_INTEGRATION_SOURCE")
         self.integration_source = integration_source
@@ -385,6 +419,12 @@ def responses(self) -> AsyncResponsesResource:
 
         return AsyncResponsesResource(self)
 
+    @cached_property
+    def messages(self) -> AsyncMessagesResource:
+        from .resources.messages import AsyncMessagesResource
+
+        return AsyncMessagesResource(self)
+
     @cached_property
     def models(self) -> AsyncModelsResource:
         from .resources.models import AsyncModelsResource
@@ -407,9 +447,20 @@ def qs(self) -> Querystring:
     @property
     @override
     def auth_headers(self) -> dict[str, str]:
+        return {**self._api_key, **self._x_api_key}
+
+    @property
+    def _api_key(self) -> dict[str, str]:
         api_key = self.api_key
         return {"Authorization": f"Bearer {api_key}"}
 
+    @property
+    def _x_api_key(self) -> dict[str, str]:
+        x_api_key = self.x_api_key
+        if x_api_key is None:
+            return {}
+        return {"x-api-key": x_api_key}
+
     @property
     @override
     def default_headers(self) -> dict[str, str | Omit]:
@@ -424,6 +475,7 @@ def copy(
         self,
         *,
         api_key: str | None = None,
+        x_api_key: str | None = None,
         integration_source: str | None = None,
         base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = not_given,
@@ -459,6 +511,7 @@ def copy(
         http_client = http_client or self._client
         return self.__class__(
             api_key=api_key or self.api_key,
+            x_api_key=x_api_key or self.x_api_key,
             integration_source=integration_source or self.integration_source,
             base_url=base_url or self.base_url,
             timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
@@ -543,6 +596,12 @@ def responses(self) -> responses.ResponsesResourceWithRawResponse:
 
         return ResponsesResourceWithRawResponse(self._client.responses)
 
+    @cached_property
+    def messages(self) -> messages.MessagesResourceWithRawResponse:
+        from .resources.messages import MessagesResourceWithRawResponse
+
+        return MessagesResourceWithRawResponse(self._client.messages)
+
     @cached_property
     def models(self) -> models.ModelsResourceWithRawResponse:
         from .resources.models import ModelsResourceWithRawResponse
@@ -586,6 +645,12 @@ def responses(self) -> responses.AsyncResponsesResourceWithRawResponse:
 
         return AsyncResponsesResourceWithRawResponse(self._client.responses)
 
+    @cached_property
+    def messages(self) -> messages.AsyncMessagesResourceWithRawResponse:
+        from .resources.messages import AsyncMessagesResourceWithRawResponse
+
+        return AsyncMessagesResourceWithRawResponse(self._client.messages)
+
     @cached_property
     def models(self) -> models.AsyncModelsResourceWithRawResponse:
         from .resources.models import AsyncModelsResourceWithRawResponse
@@ -629,6 +694,12 @@ def responses(self) -> responses.ResponsesResourceWithStreamingResponse:
 
         return ResponsesResourceWithStreamingResponse(self._client.responses)
 
+    @cached_property
+    def messages(self) -> messages.MessagesResourceWithStreamingResponse:
+        from .resources.messages import MessagesResourceWithStreamingResponse
+
+        return MessagesResourceWithStreamingResponse(self._client.messages)
+
     @cached_property
     def models(self) -> models.ModelsResourceWithStreamingResponse:
         from .resources.models import ModelsResourceWithStreamingResponse
@@ -672,6 +743,12 @@ def responses(self) -> responses.AsyncResponsesResourceWithStreamingResponse:
 
         return AsyncResponsesResourceWithStreamingResponse(self._client.responses)
 
+    @cached_property
+    def messages(self) -> messages.AsyncMessagesResourceWithStreamingResponse:
+        from .resources.messages import AsyncMessagesResourceWithStreamingResponse
+
+        return AsyncMessagesResourceWithStreamingResponse(self._client.messages)
+
     @cached_property
     def models(self) -> models.AsyncModelsResourceWithStreamingResponse:
         from .resources.models import AsyncModelsResourceWithStreamingResponse
diff --git a/src/sambanova/resources/__init__.py b/src/sambanova/resources/__init__.py
index 03ab754..0cfdcd3 100644
--- a/src/sambanova/resources/__init__.py
+++ b/src/sambanova/resources/__init__.py
@@ -24,6 +24,14 @@
     ModelsResourceWithStreamingResponse,
     AsyncModelsResourceWithStreamingResponse,
 )
+from .messages import (
+    MessagesResource,
+    AsyncMessagesResource,
+    MessagesResourceWithRawResponse,
+    AsyncMessagesResourceWithRawResponse,
+    MessagesResourceWithStreamingResponse,
+    AsyncMessagesResourceWithStreamingResponse,
+)
 from .responses import (
     ResponsesResource,
     AsyncResponsesResource,
@@ -80,6 +88,12 @@
     "AsyncResponsesResourceWithRawResponse",
     "ResponsesResourceWithStreamingResponse",
     "AsyncResponsesResourceWithStreamingResponse",
+    "MessagesResource",
+    "AsyncMessagesResource",
+    "MessagesResourceWithRawResponse",
+    "AsyncMessagesResourceWithRawResponse",
+    "MessagesResourceWithStreamingResponse",
+    "AsyncMessagesResourceWithStreamingResponse",
     "ModelsResource",
     "AsyncModelsResource",
     "ModelsResourceWithRawResponse",
diff --git a/src/sambanova/resources/messages.py b/src/sambanova/resources/messages.py
new file mode 100644
index 0000000..dfbcf82
--- /dev/null
+++ b/src/sambanova/resources/messages.py
@@ -0,0 +1,1197 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Any, Union, Iterable, Optional, cast
+from typing_extensions import Literal, overload
+
+import httpx
+
+from ..types import message_create_params, message_count_tokens_params
+from .._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
+from .._utils import required_args, maybe_transform, strip_not_given, async_maybe_transform
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from .._streaming import Stream, AsyncStream
+from .._base_client import make_request_options
+from ..types.message_stream_event import MessageStreamEvent
+from ..types.message_create_response import MessageCreateResponse
+from ..types.message_count_tokens_response import MessageCountTokensResponse
+
+__all__ = ["MessagesResource", "AsyncMessagesResource"]
+
+
+class MessagesResource(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> MessagesResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/sambanova/sambanova-python#accessing-raw-response-data-eg-headers
+        """
+        return MessagesResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> MessagesResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/sambanova/sambanova-python#with_streaming_response
+        """
+        return MessagesResourceWithStreamingResponse(self)
+
+    @overload
+    def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[message_create_params.Message],
+        model: Union[
+            str,
+            Literal[
+                "Meta-Llama-3.3-70B-Instruct",
+                "Meta-Llama-3.2-1B-Instruct",
+                "Meta-Llama-3.2-3B-Instruct",
+                "Llama-3.2-11B-Vision-Instruct",
+                "Llama-3.2-90B-Vision-Instruct",
+                "Meta-Llama-3.1-8B-Instruct",
+                "Meta-Llama-3.1-70B-Instruct",
+                "Meta-Llama-3.1-405B-Instruct",
+                "Qwen2.5-Coder-32B-Instruct",
+                "Qwen2.5-72B-Instruct",
+                "QwQ-32B-Preview",
+                "Meta-Llama-Guard-3-8B",
+                "DeepSeek-R1",
+                "DeepSeek-R1-0528",
+                "DeepSeek-V3-0324",
+                "DeepSeek-V3.1",
+                "DeepSeek-V3.1-cb",
+                "DeepSeek-V3.1-Terminus",
+                "DeepSeek-V3.2",
+                "DeepSeek-R1-Distill-Llama-70B",
+                "Llama-4-Maverick-17B-128E-Instruct",
+                "Llama-4-Scout-17B-16E-Instruct",
+                "Qwen3-32B",
+                "Qwen3-235B",
+                "Llama-3.3-Swallow-70B-Instruct-v0.4",
+                "gpt-oss-120b",
+                "ALLaM-7B-Instruct-preview",
+                "MiniMax-M2.5",
+                "MiniMax-M2.7",
+                "gemma-3-12b-it",
+            ],
+        ],
+        container: Optional[str] | Omit = omit,
+        metadata: message_create_params.Metadata | Omit = omit,
+        service_tier: Optional[Literal["auto", "standard_only"]] | Omit = omit,
+        stop_sequences: Optional[SequenceNotStr[str]] | Omit = omit,
+        stream: Optional[Literal[False]] | Omit = omit,
+        system: Union[str, Iterable[message_create_params.SystemSystemTextBlockArray]] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        thinking: message_create_params.Thinking | Omit = omit,
+        tool_choice: Optional[message_create_params.ToolChoice] | Omit = omit,
+        tools: Optional[Iterable[message_create_params.Tool]] | Omit = omit,
+        top_k: Optional[int] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        anthropic_version: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> MessageCreateResponse:
+        """Anthropic Messages API compatible endpoint.
+
+        Generates a model response for the
+        supplied conversation. Authentication accepts either the bearer
+        `Authorization: Bearer <key>` header (SambaNova SDK default) or the `x-api-key`
+        header (Anthropic SDK default); the same API key is used in both cases. When
+        `stream: true` is set, the response is a sequence of Server-Sent Events whose
+        payloads conform to `MessageStreamEvent`; otherwise the response is a single
+        `Message` object.
+
+        Args:
+          max_tokens: Maximum number of tokens to generate. The combined input + output token count is
+              bounded by the model's context window.
+
+          messages: Conversation turns.
+
+          model: The model ID to use (e.g. gpt-oss-120b). See available
+              [models](https://docs.sambanova.ai/docs/en/models/sambacloud-models)
+
+          container: Existing code-execution container ID to reuse. **In v1**: silently dropped
+
+          metadata: Free-form metadata attached to the request. Currently only `user_id` Additional
+              fields are accepted but ignored.
+
+          service_tier: Service-tier preference. **In v1**: silently dropped
+
+          stop_sequences: Custom strings that, when generated, cause the model to stop.
+
+          stream: If true, the response is a sequence of Server-Sent Events whose payloads conform
+              to `MessageStreamEvent`.
+
+          system: System prompt for the conversation. Accepts either a single string (most common)
+              or an array of text blocks (used when individual segments need `cache_control`
+              markers). Multiple text blocks are joined with newlines and prepended to the
+              conversation as a `role: system` message.
+
+          temperature: Sampling temperature in `[0.0, 2.0]`. Higher values produce more random output,
+              lower values more deterministic. Adjust only one of `temperature`, `top_p`,
+              `top_k`.
+
+          thinking: Controls Anthropic-style extended thinking. **In v1**: only `type:"disabled"` is
+              silently accepted as a no-op; `type:"enabled"` and `type:"adaptive"` return a
+              400 `invalid_request_error` (`unsupported_parameter`).
+
+          tool_choice: How the model should choose from the provided tools.
+
+          tools: Tool definitions the model may call.
+
+          top_k: Top-k sampling. Considers only the K most likely tokens at each step. Set to 0
+              to disable.
+
+          top_p: Nucleus sampling. Considers tokens with cumulative probability mass up to
+              `top_p`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[message_create_params.Message],
+        model: Union[
+            str,
+            Literal[
+                "Meta-Llama-3.3-70B-Instruct",
+                "Meta-Llama-3.2-1B-Instruct",
+                "Meta-Llama-3.2-3B-Instruct",
+                "Llama-3.2-11B-Vision-Instruct",
+                "Llama-3.2-90B-Vision-Instruct",
+                "Meta-Llama-3.1-8B-Instruct",
+                "Meta-Llama-3.1-70B-Instruct",
+                "Meta-Llama-3.1-405B-Instruct",
+                "Qwen2.5-Coder-32B-Instruct",
+                "Qwen2.5-72B-Instruct",
+                "QwQ-32B-Preview",
+                "Meta-Llama-Guard-3-8B",
+                "DeepSeek-R1",
+                "DeepSeek-R1-0528",
+                "DeepSeek-V3-0324",
+                "DeepSeek-V3.1",
+                "DeepSeek-V3.1-cb",
+                "DeepSeek-V3.1-Terminus",
+                "DeepSeek-V3.2",
+                "DeepSeek-R1-Distill-Llama-70B",
+                "Llama-4-Maverick-17B-128E-Instruct",
+                "Llama-4-Scout-17B-16E-Instruct",
+                "Qwen3-32B",
+                "Qwen3-235B",
+                "Llama-3.3-Swallow-70B-Instruct-v0.4",
+                "gpt-oss-120b",
+                "ALLaM-7B-Instruct-preview",
+                "MiniMax-M2.5",
+                "MiniMax-M2.7",
+                "gemma-3-12b-it",
+            ],
+        ],
+        stream: Literal[True],
+        container: Optional[str] | Omit = omit,
+        metadata: message_create_params.Metadata | Omit = omit,
+        service_tier: Optional[Literal["auto", "standard_only"]] | Omit = omit,
+        stop_sequences: Optional[SequenceNotStr[str]] | Omit = omit,
+        system: Union[str, Iterable[message_create_params.SystemSystemTextBlockArray]] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        thinking: message_create_params.Thinking | Omit = omit,
+        tool_choice: Optional[message_create_params.ToolChoice] | Omit = omit,
+        tools: Optional[Iterable[message_create_params.Tool]] | Omit = omit,
+        top_k: Optional[int] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        anthropic_version: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Stream[MessageStreamEvent]:
+        """Anthropic Messages API compatible endpoint.
+
+        Generates a model response for the
+        supplied conversation. Authentication accepts either the bearer
+        `Authorization: Bearer <key>` header (SambaNova SDK default) or the `x-api-key`
+        header (Anthropic SDK default); the same API key is used in both cases. When
+        `stream: true` is set, the response is a sequence of Server-Sent Events whose
+        payloads conform to `MessageStreamEvent`; otherwise the response is a single
+        `Message` object.
+
+        Args:
+          max_tokens: Maximum number of tokens to generate. The combined input + output token count is
+              bounded by the model's context window.
+
+          messages: Conversation turns.
+
+          model: The model ID to use (e.g. gpt-oss-120b). See available
+              [models](https://docs.sambanova.ai/docs/en/models/sambacloud-models)
+
+          stream: If true, the response is a sequence of Server-Sent Events whose payloads conform
+              to `MessageStreamEvent`.
+
+          container: Existing code-execution container ID to reuse. **In v1**: silently dropped
+
+          metadata: Free-form metadata attached to the request. Currently only `user_id` Additional
+              fields are accepted but ignored.
+
+          service_tier: Service-tier preference. **In v1**: silently dropped
+
+          stop_sequences: Custom strings that, when generated, cause the model to stop.
+
+          system: System prompt for the conversation. Accepts either a single string (most common)
+              or an array of text blocks (used when individual segments need `cache_control`
+              markers). Multiple text blocks are joined with newlines and prepended to the
+              conversation as a `role: system` message.
+
+          temperature: Sampling temperature in `[0.0, 2.0]`. Higher values produce more random output,
+              lower values more deterministic. Adjust only one of `temperature`, `top_p`,
+              `top_k`.
+
+          thinking: Controls Anthropic-style extended thinking. **In v1**: only `type:"disabled"` is
+              silently accepted as a no-op; `type:"enabled"` and `type:"adaptive"` return a
+              400 `invalid_request_error` (`unsupported_parameter`).
+
+          tool_choice: How the model should choose from the provided tools.
+
+          tools: Tool definitions the model may call.
+
+          top_k: Top-k sampling. Considers only the K most likely tokens at each step. Set to 0
+              to disable.
+
+          top_p: Nucleus sampling. Considers tokens with cumulative probability mass up to
+              `top_p`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[message_create_params.Message],
+        model: Union[
+            str,
+            Literal[
+                "Meta-Llama-3.3-70B-Instruct",
+                "Meta-Llama-3.2-1B-Instruct",
+                "Meta-Llama-3.2-3B-Instruct",
+                "Llama-3.2-11B-Vision-Instruct",
+                "Llama-3.2-90B-Vision-Instruct",
+                "Meta-Llama-3.1-8B-Instruct",
+                "Meta-Llama-3.1-70B-Instruct",
+                "Meta-Llama-3.1-405B-Instruct",
+                "Qwen2.5-Coder-32B-Instruct",
+                "Qwen2.5-72B-Instruct",
+                "QwQ-32B-Preview",
+                "Meta-Llama-Guard-3-8B",
+                "DeepSeek-R1",
+                "DeepSeek-R1-0528",
+                "DeepSeek-V3-0324",
+                "DeepSeek-V3.1",
+                "DeepSeek-V3.1-cb",
+                "DeepSeek-V3.1-Terminus",
+                "DeepSeek-V3.2",
+                "DeepSeek-R1-Distill-Llama-70B",
+                "Llama-4-Maverick-17B-128E-Instruct",
+                "Llama-4-Scout-17B-16E-Instruct",
+                "Qwen3-32B",
+                "Qwen3-235B",
+                "Llama-3.3-Swallow-70B-Instruct-v0.4",
+                "gpt-oss-120b",
+                "ALLaM-7B-Instruct-preview",
+                "MiniMax-M2.5",
+                "MiniMax-M2.7",
+                "gemma-3-12b-it",
+            ],
+        ],
+        stream: bool,
+        container: Optional[str] | Omit = omit,
+        metadata: message_create_params.Metadata | Omit = omit,
+        service_tier: Optional[Literal["auto", "standard_only"]] | Omit = omit,
+        stop_sequences: Optional[SequenceNotStr[str]] | Omit = omit,
+        system: Union[str, Iterable[message_create_params.SystemSystemTextBlockArray]] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        thinking: message_create_params.Thinking | Omit = omit,
+        tool_choice: Optional[message_create_params.ToolChoice] | Omit = omit,
+        tools: Optional[Iterable[message_create_params.Tool]] | Omit = omit,
+        top_k: Optional[int] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        anthropic_version: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> MessageCreateResponse | Stream[MessageStreamEvent]:
+        """Anthropic Messages API compatible endpoint.
+
+        Generates a model response for the
+        supplied conversation. Authentication accepts either the bearer
+        `Authorization: Bearer <key>` header (SambaNova SDK default) or the `x-api-key`
+        header (Anthropic SDK default); the same API key is used in both cases. When
+        `stream: true` is set, the response is a sequence of Server-Sent Events whose
+        payloads conform to `MessageStreamEvent`; otherwise the response is a single
+        `Message` object.
+
+        Args:
+          max_tokens: Maximum number of tokens to generate. The combined input + output token count is
+              bounded by the model's context window.
+
+          messages: Conversation turns.
+
+          model: The model ID to use (e.g. gpt-oss-120b). See available
+              [models](https://docs.sambanova.ai/docs/en/models/sambacloud-models)
+
+          stream: If true, the response is a sequence of Server-Sent Events whose payloads conform
+              to `MessageStreamEvent`.
+
+          container: Existing code-execution container ID to reuse. **In v1**: silently dropped
+
+          metadata: Free-form metadata attached to the request. Currently only `user_id` Additional
+              fields are accepted but ignored.
+
+          service_tier: Service-tier preference. **In v1**: silently dropped
+
+          stop_sequences: Custom strings that, when generated, cause the model to stop.
+
+          system: System prompt for the conversation. Accepts either a single string (most common)
+              or an array of text blocks (used when individual segments need `cache_control`
+              markers). Multiple text blocks are joined with newlines and prepended to the
+              conversation as a `role: system` message.
+
+          temperature: Sampling temperature in `[0.0, 2.0]`. Higher values produce more random output,
+              lower values more deterministic. Adjust only one of `temperature`, `top_p`,
+              `top_k`.
+
+          thinking: Controls Anthropic-style extended thinking. **In v1**: only `type:"disabled"` is
+              silently accepted as a no-op; `type:"enabled"` and `type:"adaptive"` return a
+              400 `invalid_request_error` (`unsupported_parameter`).
+
+          tool_choice: How the model should choose from the provided tools.
+
+          tools: Tool definitions the model may call.
+
+          top_k: Top-k sampling. Considers only the K most likely tokens at each step. Set to 0
+              to disable.
+
+          top_p: Nucleus sampling. Considers tokens with cumulative probability mass up to
+              `top_p`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["max_tokens", "messages", "model"], ["max_tokens", "messages", "model", "stream"])
+    def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[message_create_params.Message],
+        model: Union[
+            str,
+            Literal[
+                "Meta-Llama-3.3-70B-Instruct",
+                "Meta-Llama-3.2-1B-Instruct",
+                "Meta-Llama-3.2-3B-Instruct",
+                "Llama-3.2-11B-Vision-Instruct",
+                "Llama-3.2-90B-Vision-Instruct",
+                "Meta-Llama-3.1-8B-Instruct",
+                "Meta-Llama-3.1-70B-Instruct",
+                "Meta-Llama-3.1-405B-Instruct",
+                "Qwen2.5-Coder-32B-Instruct",
+                "Qwen2.5-72B-Instruct",
+                "QwQ-32B-Preview",
+                "Meta-Llama-Guard-3-8B",
+                "DeepSeek-R1",
+                "DeepSeek-R1-0528",
+                "DeepSeek-V3-0324",
+                "DeepSeek-V3.1",
+                "DeepSeek-V3.1-cb",
+                "DeepSeek-V3.1-Terminus",
+                "DeepSeek-V3.2",
+                "DeepSeek-R1-Distill-Llama-70B",
+                "Llama-4-Maverick-17B-128E-Instruct",
+                "Llama-4-Scout-17B-16E-Instruct",
+                "Qwen3-32B",
+                "Qwen3-235B",
+                "Llama-3.3-Swallow-70B-Instruct-v0.4",
+                "gpt-oss-120b",
+                "ALLaM-7B-Instruct-preview",
+                "MiniMax-M2.5",
+                "MiniMax-M2.7",
+                "gemma-3-12b-it",
+            ],
+        ],
+        container: Optional[str] | Omit = omit,
+        metadata: message_create_params.Metadata | Omit = omit,
+        service_tier: Optional[Literal["auto", "standard_only"]] | Omit = omit,
+        stop_sequences: Optional[SequenceNotStr[str]] | Omit = omit,
+        stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
+        system: Union[str, Iterable[message_create_params.SystemSystemTextBlockArray]] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        thinking: message_create_params.Thinking | Omit = omit,
+        tool_choice: Optional[message_create_params.ToolChoice] | Omit = omit,
+        tools: Optional[Iterable[message_create_params.Tool]] | Omit = omit,
+        top_k: Optional[int] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        anthropic_version: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> MessageCreateResponse | Stream[MessageStreamEvent]:
+        extra_headers = {**strip_not_given({"anthropic-version": anthropic_version}), **(extra_headers or {})}
+        return self._post(
+            "/messages",
+            body=maybe_transform(
+                {
+                    "max_tokens": max_tokens,
+                    "messages": messages,
+                    "model": model,
+                    "container": container,
+                    "metadata": metadata,
+                    "service_tier": service_tier,
+                    "stop_sequences": stop_sequences,
+                    "stream": stream,
+                    "system": system,
+                    "temperature": temperature,
+                    "thinking": thinking,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_k": top_k,
+                    "top_p": top_p,
+                },
+                message_create_params.MessageCreateParamsStreaming
+                if stream
+                else message_create_params.MessageCreateParamsNonStreaming,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=cast(Any, MessageCreateResponse),  # Union types cannot be passed in as arguments in the type system
+            stream=stream or False,
+            stream_cls=Stream[MessageStreamEvent],
+        )
+
+    def count_tokens(
+        self,
+        *,
+        messages: Iterable[message_count_tokens_params.Message],
+        model: str,
+        system: Union[str, Iterable[message_count_tokens_params.SystemSystemTextBlockArray]] | Omit = omit,
+        thinking: message_count_tokens_params.Thinking | Omit = omit,
+        tool_choice: Optional[message_count_tokens_params.ToolChoice] | Omit = omit,
+        tools: Optional[Iterable[message_count_tokens_params.Tool]] | Omit = omit,
+        anthropic_version: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> MessageCountTokensResponse:
+        """Anthropic `count_tokens` compatible endpoint.
+
+        Returns the number of input tokens
+        that would be consumed by a `POST /messages` call with the same prompt content
+        (system, messages, tools, tool_choice). Authentication accepts either the bearer
+        `Authorization: Bearer <key>` header (SambaNova SDK default) or the `x-api-key`
+        header (Anthropic SDK default); the same API key is used in both cases.
+
+        Args:
+          messages: Conversation turns.
+
+          model: Model identifier.
+
+          system: System prompt for the conversation. Accepts either a single string (most common)
+              or an array of text blocks (used when individual segments need `cache_control`
+              markers). Multiple text blocks are joined with newlines and prepended to the
+              conversation as a `role: system` message.
+
+          thinking: Controls Anthropic-style extended thinking. **In v1**: only `type:"disabled"` is
+              silently accepted as a no-op; `type:"enabled"` and `type:"adaptive"` return a
+              400 `invalid_request_error` (`unsupported_parameter`).
+
+          tool_choice: How the model should choose from the provided tools.
+
+          tools: Tool definitions the model may call.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {**strip_not_given({"anthropic-version": anthropic_version}), **(extra_headers or {})}
+        return self._post(
+            "/messages/count_tokens",
+            body=maybe_transform(
+                {
+                    "messages": messages,
+                    "model": model,
+                    "system": system,
+                    "thinking": thinking,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                },
+                message_count_tokens_params.MessageCountTokensParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageCountTokensResponse,
+        )
+
+
+class AsyncMessagesResource(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncMessagesResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/sambanova/sambanova-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncMessagesResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncMessagesResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/sambanova/sambanova-python#with_streaming_response
+        """
+        return AsyncMessagesResourceWithStreamingResponse(self)
+
+    @overload
+    async def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[message_create_params.Message],
+        model: Union[
+            str,
+            Literal[
+                "Meta-Llama-3.3-70B-Instruct",
+                "Meta-Llama-3.2-1B-Instruct",
+                "Meta-Llama-3.2-3B-Instruct",
+                "Llama-3.2-11B-Vision-Instruct",
+                "Llama-3.2-90B-Vision-Instruct",
+                "Meta-Llama-3.1-8B-Instruct",
+                "Meta-Llama-3.1-70B-Instruct",
+                "Meta-Llama-3.1-405B-Instruct",
+                "Qwen2.5-Coder-32B-Instruct",
+                "Qwen2.5-72B-Instruct",
+                "QwQ-32B-Preview",
+                "Meta-Llama-Guard-3-8B",
+                "DeepSeek-R1",
+                "DeepSeek-R1-0528",
+                "DeepSeek-V3-0324",
+                "DeepSeek-V3.1",
+                "DeepSeek-V3.1-cb",
+                "DeepSeek-V3.1-Terminus",
+                "DeepSeek-V3.2",
+                "DeepSeek-R1-Distill-Llama-70B",
+                "Llama-4-Maverick-17B-128E-Instruct",
+                "Llama-4-Scout-17B-16E-Instruct",
+                "Qwen3-32B",
+                "Qwen3-235B",
+                "Llama-3.3-Swallow-70B-Instruct-v0.4",
+                "gpt-oss-120b",
+                "ALLaM-7B-Instruct-preview",
+                "MiniMax-M2.5",
+                "MiniMax-M2.7",
+                "gemma-3-12b-it",
+            ],
+        ],
+        container: Optional[str] | Omit = omit,
+        metadata: message_create_params.Metadata | Omit = omit,
+        service_tier: Optional[Literal["auto", "standard_only"]] | Omit = omit,
+        stop_sequences: Optional[SequenceNotStr[str]] | Omit = omit,
+        stream: Optional[Literal[False]] | Omit = omit,
+        system: Union[str, Iterable[message_create_params.SystemSystemTextBlockArray]] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        thinking: message_create_params.Thinking | Omit = omit,
+        tool_choice: Optional[message_create_params.ToolChoice] | Omit = omit,
+        tools: Optional[Iterable[message_create_params.Tool]] | Omit = omit,
+        top_k: Optional[int] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        anthropic_version: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> MessageCreateResponse:
+        """Anthropic Messages API compatible endpoint.
+
+        Generates a model response for the
+        supplied conversation. Authentication accepts either the bearer
+        `Authorization: Bearer <key>` header (SambaNova SDK default) or the `x-api-key`
+        header (Anthropic SDK default); the same API key is used in both cases. When
+        `stream: true` is set, the response is a sequence of Server-Sent Events whose
+        payloads conform to `MessageStreamEvent`; otherwise the response is a single
+        `Message` object.
+
+        Args:
+          max_tokens: Maximum number of tokens to generate. The combined input + output token count is
+              bounded by the model's context window.
+
+          messages: Conversation turns.
+
+          model: The model ID to use (e.g. gpt-oss-120b). See available
+              [models](https://docs.sambanova.ai/docs/en/models/sambacloud-models)
+
+          container: Existing code-execution container ID to reuse. **In v1**: silently dropped
+
+          metadata: Free-form metadata attached to the request. Currently only `user_id` Additional
+              fields are accepted but ignored.
+
+          service_tier: Service-tier preference. **In v1**: silently dropped
+
+          stop_sequences: Custom strings that, when generated, cause the model to stop.
+
+          stream: If true, the response is a sequence of Server-Sent Events whose payloads conform
+              to `MessageStreamEvent`.
+
+          system: System prompt for the conversation. Accepts either a single string (most common)
+              or an array of text blocks (used when individual segments need `cache_control`
+              markers). Multiple text blocks are joined with newlines and prepended to the
+              conversation as a `role: system` message.
+
+          temperature: Sampling temperature in `[0.0, 2.0]`. Higher values produce more random output,
+              lower values more deterministic. Adjust only one of `temperature`, `top_p`,
+              `top_k`.
+
+          thinking: Controls Anthropic-style extended thinking. **In v1**: only `type:"disabled"` is
+              silently accepted as a no-op; `type:"enabled"` and `type:"adaptive"` return a
+              400 `invalid_request_error` (`unsupported_parameter`).
+
+          tool_choice: How the model should choose from the provided tools.
+
+          tools: Tool definitions the model may call.
+
+          top_k: Top-k sampling. Considers only the K most likely tokens at each step. Set to 0
+              to disable.
+
+          top_p: Nucleus sampling. Considers tokens with cumulative probability mass up to
+              `top_p`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[message_create_params.Message],
+        model: Union[
+            str,
+            Literal[
+                "Meta-Llama-3.3-70B-Instruct",
+                "Meta-Llama-3.2-1B-Instruct",
+                "Meta-Llama-3.2-3B-Instruct",
+                "Llama-3.2-11B-Vision-Instruct",
+                "Llama-3.2-90B-Vision-Instruct",
+                "Meta-Llama-3.1-8B-Instruct",
+                "Meta-Llama-3.1-70B-Instruct",
+                "Meta-Llama-3.1-405B-Instruct",
+                "Qwen2.5-Coder-32B-Instruct",
+                "Qwen2.5-72B-Instruct",
+                "QwQ-32B-Preview",
+                "Meta-Llama-Guard-3-8B",
+                "DeepSeek-R1",
+                "DeepSeek-R1-0528",
+                "DeepSeek-V3-0324",
+                "DeepSeek-V3.1",
+                "DeepSeek-V3.1-cb",
+                "DeepSeek-V3.1-Terminus",
+                "DeepSeek-V3.2",
+                "DeepSeek-R1-Distill-Llama-70B",
+                "Llama-4-Maverick-17B-128E-Instruct",
+                "Llama-4-Scout-17B-16E-Instruct",
+                "Qwen3-32B",
+                "Qwen3-235B",
+                "Llama-3.3-Swallow-70B-Instruct-v0.4",
+                "gpt-oss-120b",
+                "ALLaM-7B-Instruct-preview",
+                "MiniMax-M2.5",
+                "MiniMax-M2.7",
+                "gemma-3-12b-it",
+            ],
+        ],
+        stream: Literal[True],
+        container: Optional[str] | Omit = omit,
+        metadata: message_create_params.Metadata | Omit = omit,
+        service_tier: Optional[Literal["auto", "standard_only"]] | Omit = omit,
+        stop_sequences: Optional[SequenceNotStr[str]] | Omit = omit,
+        system: Union[str, Iterable[message_create_params.SystemSystemTextBlockArray]] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        thinking: message_create_params.Thinking | Omit = omit,
+        tool_choice: Optional[message_create_params.ToolChoice] | Omit = omit,
+        tools: Optional[Iterable[message_create_params.Tool]] | Omit = omit,
+        top_k: Optional[int] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        anthropic_version: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncStream[MessageStreamEvent]:
+        """Anthropic Messages API compatible endpoint.
+
+        Generates a model response for the
+        supplied conversation. Authentication accepts either the bearer
+        `Authorization: Bearer <key>` header (SambaNova SDK default) or the `x-api-key`
+        header (Anthropic SDK default); the same API key is used in both cases. When
+        `stream: true` is set, the response is a sequence of Server-Sent Events whose
+        payloads conform to `MessageStreamEvent`; otherwise the response is a single
+        `Message` object.
+
+        Args:
+          max_tokens: Maximum number of tokens to generate. The combined input + output token count is
+              bounded by the model's context window.
+
+          messages: Conversation turns.
+
+          model: The model ID to use (e.g. gpt-oss-120b). See available
+              [models](https://docs.sambanova.ai/docs/en/models/sambacloud-models)
+
+          stream: If true, the response is a sequence of Server-Sent Events whose payloads conform
+              to `MessageStreamEvent`.
+
+          container: Existing code-execution container ID to reuse. **In v1**: silently dropped
+
+          metadata: Free-form metadata attached to the request. Currently only `user_id` Additional
+              fields are accepted but ignored.
+
+          service_tier: Service-tier preference. **In v1**: silently dropped
+
+          stop_sequences: Custom strings that, when generated, cause the model to stop.
+
+          system: System prompt for the conversation. Accepts either a single string (most common)
+              or an array of text blocks (used when individual segments need `cache_control`
+              markers). Multiple text blocks are joined with newlines and prepended to the
+              conversation as a `role: system` message.
+
+          temperature: Sampling temperature in `[0.0, 2.0]`. Higher values produce more random output,
+              lower values more deterministic. Adjust only one of `temperature`, `top_p`,
+              `top_k`.
+
+          thinking: Controls Anthropic-style extended thinking. **In v1**: only `type:"disabled"` is
+              silently accepted as a no-op; `type:"enabled"` and `type:"adaptive"` return a
+              400 `invalid_request_error` (`unsupported_parameter`).
+
+          tool_choice: How the model should choose from the provided tools.
+
+          tools: Tool definitions the model may call.
+
+          top_k: Top-k sampling. Considers only the K most likely tokens at each step. Set to 0
+              to disable.
+
+          top_p: Nucleus sampling. Considers tokens with cumulative probability mass up to
+              `top_p`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[message_create_params.Message],
+        model: Union[
+            str,
+            Literal[
+                "Meta-Llama-3.3-70B-Instruct",
+                "Meta-Llama-3.2-1B-Instruct",
+                "Meta-Llama-3.2-3B-Instruct",
+                "Llama-3.2-11B-Vision-Instruct",
+                "Llama-3.2-90B-Vision-Instruct",
+                "Meta-Llama-3.1-8B-Instruct",
+                "Meta-Llama-3.1-70B-Instruct",
+                "Meta-Llama-3.1-405B-Instruct",
+                "Qwen2.5-Coder-32B-Instruct",
+                "Qwen2.5-72B-Instruct",
+                "QwQ-32B-Preview",
+                "Meta-Llama-Guard-3-8B",
+                "DeepSeek-R1",
+                "DeepSeek-R1-0528",
+                "DeepSeek-V3-0324",
+                "DeepSeek-V3.1",
+                "DeepSeek-V3.1-cb",
+                "DeepSeek-V3.1-Terminus",
+                "DeepSeek-V3.2",
+                "DeepSeek-R1-Distill-Llama-70B",
+                "Llama-4-Maverick-17B-128E-Instruct",
+                "Llama-4-Scout-17B-16E-Instruct",
+                "Qwen3-32B",
+                "Qwen3-235B",
+                "Llama-3.3-Swallow-70B-Instruct-v0.4",
+                "gpt-oss-120b",
+                "ALLaM-7B-Instruct-preview",
+                "MiniMax-M2.5",
+                "MiniMax-M2.7",
+                "gemma-3-12b-it",
+            ],
+        ],
+        stream: bool,
+        container: Optional[str] | Omit = omit,
+        metadata: message_create_params.Metadata | Omit = omit,
+        service_tier: Optional[Literal["auto", "standard_only"]] | Omit = omit,
+        stop_sequences: Optional[SequenceNotStr[str]] | Omit = omit,
+        system: Union[str, Iterable[message_create_params.SystemSystemTextBlockArray]] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        thinking: message_create_params.Thinking | Omit = omit,
+        tool_choice: Optional[message_create_params.ToolChoice] | Omit = omit,
+        tools: Optional[Iterable[message_create_params.Tool]] | Omit = omit,
+        top_k: Optional[int] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        anthropic_version: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> MessageCreateResponse | AsyncStream[MessageStreamEvent]:
+        """Anthropic Messages API compatible endpoint.
+
+        Generates a model response for the
+        supplied conversation. Authentication accepts either the bearer
+        `Authorization: Bearer <key>` header (SambaNova SDK default) or the `x-api-key`
+        header (Anthropic SDK default); the same API key is used in both cases. When
+        `stream: true` is set, the response is a sequence of Server-Sent Events whose
+        payloads conform to `MessageStreamEvent`; otherwise the response is a single
+        `Message` object.
+
+        Args:
+          max_tokens: Maximum number of tokens to generate. The combined input + output token count is
+              bounded by the model's context window.
+
+          messages: Conversation turns.
+
+          model: The model ID to use (e.g. gpt-oss-120b). See available
+              [models](https://docs.sambanova.ai/docs/en/models/sambacloud-models)
+
+          stream: If true, the response is a sequence of Server-Sent Events whose payloads conform
+              to `MessageStreamEvent`.
+
+          container: Existing code-execution container ID to reuse. **In v1**: silently dropped
+
+          metadata: Free-form metadata attached to the request. Currently only `user_id` Additional
+              fields are accepted but ignored.
+
+          service_tier: Service-tier preference. **In v1**: silently dropped
+
+          stop_sequences: Custom strings that, when generated, cause the model to stop.
+
+          system: System prompt for the conversation. Accepts either a single string (most common)
+              or an array of text blocks (used when individual segments need `cache_control`
+              markers). Multiple text blocks are joined with newlines and prepended to the
+              conversation as a `role: system` message.
+
+          temperature: Sampling temperature in `[0.0, 2.0]`. Higher values produce more random output,
+              lower values more deterministic. Adjust only one of `temperature`, `top_p`,
+              `top_k`.
+
+          thinking: Controls Anthropic-style extended thinking. **In v1**: only `type:"disabled"` is
+              silently accepted as a no-op; `type:"enabled"` and `type:"adaptive"` return a
+              400 `invalid_request_error` (`unsupported_parameter`).
+
+          tool_choice: How the model should choose from the provided tools.
+
+          tools: Tool definitions the model may call.
+
+          top_k: Top-k sampling. Considers only the K most likely tokens at each step. Set to 0
+              to disable.
+
+          top_p: Nucleus sampling. Considers tokens with cumulative probability mass up to
+              `top_p`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["max_tokens", "messages", "model"], ["max_tokens", "messages", "model", "stream"])
+    async def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[message_create_params.Message],
+        model: Union[
+            str,
+            Literal[
+                "Meta-Llama-3.3-70B-Instruct",
+                "Meta-Llama-3.2-1B-Instruct",
+                "Meta-Llama-3.2-3B-Instruct",
+                "Llama-3.2-11B-Vision-Instruct",
+                "Llama-3.2-90B-Vision-Instruct",
+                "Meta-Llama-3.1-8B-Instruct",
+                "Meta-Llama-3.1-70B-Instruct",
+                "Meta-Llama-3.1-405B-Instruct",
+                "Qwen2.5-Coder-32B-Instruct",
+                "Qwen2.5-72B-Instruct",
+                "QwQ-32B-Preview",
+                "Meta-Llama-Guard-3-8B",
+                "DeepSeek-R1",
+                "DeepSeek-R1-0528",
+                "DeepSeek-V3-0324",
+                "DeepSeek-V3.1",
+                "DeepSeek-V3.1-cb",
+                "DeepSeek-V3.1-Terminus",
+                "DeepSeek-V3.2",
+                "DeepSeek-R1-Distill-Llama-70B",
+                "Llama-4-Maverick-17B-128E-Instruct",
+                "Llama-4-Scout-17B-16E-Instruct",
+                "Qwen3-32B",
+                "Qwen3-235B",
+                "Llama-3.3-Swallow-70B-Instruct-v0.4",
+                "gpt-oss-120b",
+                "ALLaM-7B-Instruct-preview",
+                "MiniMax-M2.5",
+                "MiniMax-M2.7",
+                "gemma-3-12b-it",
+            ],
+        ],
+        container: Optional[str] | Omit = omit,
+        metadata: message_create_params.Metadata | Omit = omit,
+        service_tier: Optional[Literal["auto", "standard_only"]] | Omit = omit,
+        stop_sequences: Optional[SequenceNotStr[str]] | Omit = omit,
+        stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
+        system: Union[str, Iterable[message_create_params.SystemSystemTextBlockArray]] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        thinking: message_create_params.Thinking | Omit = omit,
+        tool_choice: Optional[message_create_params.ToolChoice] | Omit = omit,
+        tools: Optional[Iterable[message_create_params.Tool]] | Omit = omit,
+        top_k: Optional[int] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        anthropic_version: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> MessageCreateResponse | AsyncStream[MessageStreamEvent]:
+        extra_headers = {**strip_not_given({"anthropic-version": anthropic_version}), **(extra_headers or {})}
+        return await self._post(
+            "/messages",
+            body=await async_maybe_transform(
+                {
+                    "max_tokens": max_tokens,
+                    "messages": messages,
+                    "model": model,
+                    "container": container,
+                    "metadata": metadata,
+                    "service_tier": service_tier,
+                    "stop_sequences": stop_sequences,
+                    "stream": stream,
+                    "system": system,
+                    "temperature": temperature,
+                    "thinking": thinking,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_k": top_k,
+                    "top_p": top_p,
+                },
+                message_create_params.MessageCreateParamsStreaming
+                if stream
+                else message_create_params.MessageCreateParamsNonStreaming,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=cast(Any, MessageCreateResponse),  # Union types cannot be passed in as arguments in the type system
+            stream=stream or False,
+            stream_cls=AsyncStream[MessageStreamEvent],
+        )
+
+    async def count_tokens(
+        self,
+        *,
+        messages: Iterable[message_count_tokens_params.Message],
+        model: str,
+        system: Union[str, Iterable[message_count_tokens_params.SystemSystemTextBlockArray]] | Omit = omit,
+        thinking: message_count_tokens_params.Thinking | Omit = omit,
+        tool_choice: Optional[message_count_tokens_params.ToolChoice] | Omit = omit,
+        tools: Optional[Iterable[message_count_tokens_params.Tool]] | Omit = omit,
+        anthropic_version: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> MessageCountTokensResponse:
+        """Anthropic `count_tokens` compatible endpoint.
+
+        Returns the number of input tokens
+        that would be consumed by a `POST /messages` call with the same prompt content
+        (system, messages, tools, tool_choice). Authentication accepts either the bearer
+        `Authorization: Bearer <key>` header (SambaNova SDK default) or the `x-api-key`
+        header (Anthropic SDK default); the same API key is used in both cases.
+
+        Args:
+          messages: Conversation turns.
+
+          model: Model identifier.
+
+          system: System prompt for the conversation. Accepts either a single string (most common)
+              or an array of text blocks (used when individual segments need `cache_control`
+              markers). Multiple text blocks are joined with newlines and prepended to the
+              conversation as a `role: system` message.
+
+          thinking: Controls Anthropic-style extended thinking. **In v1**: only `type:"disabled"` is
+              silently accepted as a no-op; `type:"enabled"` and `type:"adaptive"` return a
+              400 `invalid_request_error` (`unsupported_parameter`).
+
+          tool_choice: How the model should choose from the provided tools.
+
+          tools: Tool definitions the model may call.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {**strip_not_given({"anthropic-version": anthropic_version}), **(extra_headers or {})}
+        return await self._post(
+            "/messages/count_tokens",
+            body=await async_maybe_transform(
+                {
+                    "messages": messages,
+                    "model": model,
+                    "system": system,
+                    "thinking": thinking,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                },
+                message_count_tokens_params.MessageCountTokensParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageCountTokensResponse,
+        )
+
+
+class MessagesResourceWithRawResponse:
+    def __init__(self, messages: MessagesResource) -> None:
+        self._messages = messages
+
+        self.create = to_raw_response_wrapper(
+            messages.create,
+        )
+        self.count_tokens = to_raw_response_wrapper(
+            messages.count_tokens,
+        )
+
+
+class AsyncMessagesResourceWithRawResponse:
+    def __init__(self, messages: AsyncMessagesResource) -> None:
+        self._messages = messages
+
+        self.create = async_to_raw_response_wrapper(
+            messages.create,
+        )
+        self.count_tokens = async_to_raw_response_wrapper(
+            messages.count_tokens,
+        )
+
+
+class MessagesResourceWithStreamingResponse:
+    def __init__(self, messages: MessagesResource) -> None:
+        self._messages = messages
+
+        self.create = to_streamed_response_wrapper(
+            messages.create,
+        )
+        self.count_tokens = to_streamed_response_wrapper(
+            messages.count_tokens,
+        )
+
+
+class AsyncMessagesResourceWithStreamingResponse:
+    def __init__(self, messages: AsyncMessagesResource) -> None:
+        self._messages = messages
+
+        self.create = async_to_streamed_response_wrapper(
+            messages.create,
+        )
+        self.count_tokens = async_to_streamed_response_wrapper(
+            messages.count_tokens,
+        )
diff --git a/src/sambanova/types/__init__.py b/src/sambanova/types/__init__.py
index 7ff1451..bb5bf32 100644
--- a/src/sambanova/types/__init__.py
+++ b/src/sambanova/types/__init__.py
@@ -2,15 +2,21 @@
 
 from __future__ import annotations
 
+from .message import Message as Message
 from .model_response import ModelResponse as ModelResponse
 from .models_response import ModelsResponse as ModelsResponse
 from .response_response import ResponseResponse as ResponseResponse
 from .completion_response import CompletionResponse as CompletionResponse
 from .embeddings_response import EmbeddingsResponse as EmbeddingsResponse
+from .message_stream_event import MessageStreamEvent as MessageStreamEvent
+from .message_create_params import MessageCreateParams as MessageCreateParams
 from .response_stream_event import ResponseStreamEvent as ResponseStreamEvent
 from .response_create_params import ResponseCreateParams as ResponseCreateParams
 from .embedding_create_params import EmbeddingCreateParams as EmbeddingCreateParams
+from .message_create_response import MessageCreateResponse as MessageCreateResponse
 from .completion_create_params import CompletionCreateParams as CompletionCreateParams
 from .response_create_response import ResponseCreateResponse as ResponseCreateResponse
 from .completion_create_response import CompletionCreateResponse as CompletionCreateResponse
 from .completion_stream_response import CompletionStreamResponse as CompletionStreamResponse
+from .message_count_tokens_params import MessageCountTokensParams as MessageCountTokensParams
+from .message_count_tokens_response import MessageCountTokensResponse as MessageCountTokensResponse
diff --git a/src/sambanova/types/message.py b/src/sambanova/types/message.py
new file mode 100644
index 0000000..26719b1
--- /dev/null
+++ b/src/sambanova/types/message.py
@@ -0,0 +1,328 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from .._utils import PropertyInfo
+from .._models import BaseModel
+
+__all__ = [
+    "Message",
+    "Content",
+    "ContentMessageOutputTextBlock",
+    "ContentMessageOutputToolUseBlock",
+    "ContentMessageOutputThinkingBlock",
+    "ContentMessageOutputRedactedThinkingBlock",
+    "ContentMessageOutputServerToolUseBlock",
+    "ContentMessageOutputWebSearchToolResultBlock",
+    "ContentMessageOutputWebFetchToolResultBlock",
+    "ContentMessageOutputCodeExecutionToolResultBlock",
+    "ContentMessageOutputBashCodeExecutionToolResultBlock",
+    "ContentMessageOutputTextEditorCodeExecutionToolResultBlock",
+    "ContentMessageOutputToolSearchToolResultBlock",
+    "ContentMessageOutputContainerUploadBlock",
+    "Usage",
+    "Container",
+    "StopDetails",
+]
+
+
+class ContentMessageOutputTextBlock(BaseModel):
+    """Plain-text segment of the model's response."""
+
+    text: str
+
+    type: Literal["text"]
+
+    citations: Optional[List[Dict[str, object]]] = None
+    """Not emitted in v1."""
+
+
+class ContentMessageOutputToolUseBlock(BaseModel):
+    """Tool call generated by the model."""
+
+    id: str
+    """Unique identifier for this tool call."""
+
+    input: Dict[str, object]
+    """Tool inputs as a JSON object."""
+
+    name: str
+    """Name of the tool being called."""
+
+    type: Literal["tool_use"]
+
+    caller: Optional[Dict[str, object]] = None
+    """Anthropic routing metadata. Always `null` in SambaNova responses."""
+
+
+class ContentMessageOutputThinkingBlock(BaseModel):
+    """Extended-reasoning trace from the model. Emitted by reasoning models."""
+
+    thinking: str
+
+    type: Literal["thinking"]
+
+    signature: Optional[str] = None
+
+
+class ContentMessageOutputRedactedThinkingBlock(BaseModel):
+    """
+    Anthropic compatibility only — SambaNova does not produce encrypted thinking output. Never emitted in responses.
+    """
+
+    data: str
+
+    type: Literal["redacted_thinking"]
+
+
+class ContentMessageOutputServerToolUseBlock(BaseModel):
+    """Anthropic compatibility only — SambaNova does not run server-side tools.
+
+    Never emitted in responses; defined for Anthropic SDK type-parity.
+    """
+
+    id: str
+
+    input: Dict[str, object]
+
+    name: str
+
+    type: Literal["server_tool_use"]
+
+
+class ContentMessageOutputWebSearchToolResultBlock(BaseModel):
+    """Anthropic compatibility only — SambaNova does not run server-side `web_search`.
+
+    Never emitted in responses.
+    """
+
+    content: List[Dict[str, object]]
+
+    tool_use_id: str
+
+    type: Literal["web_search_tool_result"]
+
+
+class ContentMessageOutputWebFetchToolResultBlock(BaseModel):
+    """Anthropic compatibility only — SambaNova does not run server-side `web_fetch`.
+
+    Never emitted in responses.
+    """
+
+    content: Dict[str, object]
+
+    tool_use_id: str
+
+    type: Literal["web_fetch_tool_result"]
+
+
+class ContentMessageOutputCodeExecutionToolResultBlock(BaseModel):
+    """
+    Anthropic compatibility only — SambaNova does not run server-side `code_execution`. Never emitted in responses.
+    """
+
+    content: Dict[str, object]
+
+    tool_use_id: str
+
+    type: Literal["code_execution_tool_result"]
+
+
+class ContentMessageOutputBashCodeExecutionToolResultBlock(BaseModel):
+    """
+    Anthropic compatibility only — SambaNova does not run server-side bash code execution. Never emitted in responses.
+    """
+
+    content: Dict[str, object]
+
+    tool_use_id: str
+
+    type: Literal["bash_code_execution_tool_result"]
+
+
+class ContentMessageOutputTextEditorCodeExecutionToolResultBlock(BaseModel):
+    """
+    Anthropic compatibility only — SambaNova does not run server-side text-editor code execution. Never emitted in responses.
+    """
+
+    content: Dict[str, object]
+
+    tool_use_id: str
+
+    type: Literal["text_editor_code_execution_tool_result"]
+
+
+class ContentMessageOutputToolSearchToolResultBlock(BaseModel):
+    """Anthropic compatibility only — SambaNova does not run server-side `tool_search`.
+
+    Never emitted in responses.
+    """
+
+    content: Dict[str, object]
+
+    tool_use_id: str
+
+    type: Literal["tool_search_tool_result"]
+
+
+class ContentMessageOutputContainerUploadBlock(BaseModel):
+    """
+    Anthropic compatibility only — SambaNova does not produce container_upload blocks (these come from Anthropic's server-side `code_execution` tool). Never emitted in responses.
+    """
+
+    file_id: str
+
+    type: Literal["container_upload"]
+
+
+Content: TypeAlias = Annotated[
+    Union[
+        ContentMessageOutputTextBlock,
+        ContentMessageOutputToolUseBlock,
+        ContentMessageOutputThinkingBlock,
+        ContentMessageOutputRedactedThinkingBlock,
+        ContentMessageOutputServerToolUseBlock,
+        ContentMessageOutputWebSearchToolResultBlock,
+        ContentMessageOutputWebFetchToolResultBlock,
+        ContentMessageOutputCodeExecutionToolResultBlock,
+        ContentMessageOutputBashCodeExecutionToolResultBlock,
+        ContentMessageOutputTextEditorCodeExecutionToolResultBlock,
+        ContentMessageOutputToolSearchToolResultBlock,
+        ContentMessageOutputContainerUploadBlock,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class Usage(BaseModel):
+    """Token accounting for the request."""
+
+    input_tokens: int
+    """Total tokens in the prompt (system + messages + tools)."""
+
+    output_tokens: int
+    """Total tokens generated by the model."""
+
+    cache_creation: Optional[Dict[str, object]] = None
+    """Anthropic SDK alias for cache write metrics.
+
+    Always `null` in SambaNova responses; use `cache_creation_input_tokens` instead.
+    """
+
+    cache_creation_input_tokens: Optional[int] = None
+    """Tokens written to prompt cache.
+
+    Absent in v1; emitted once prompt caching wiring lands (CP-2897).
+    """
+
+    cache_read_input_tokens: Optional[int] = None
+    """Tokens read from prompt cache.
+
+    Absent in v1; emitted once prompt caching wiring lands (CP-2897).
+    """
+
+    inference_geo: Optional[str] = None
+    """Geographic region that served the request.
+
+    Anthropic compatibility only - SambaNova does not expose geo routing, always
+    `null`.
+    """
+
+    server_tool_use: Optional[Dict[str, object]] = None
+    """Server-tool usage metrics (e.g.
+
+    `web_search_requests`). Anthropic compatibility only — SambaNova does not run
+    server tools, so this field is never emitted.
+    """
+
+    service_tier: Optional[str] = None
+    """Service tier that processed the request.
+
+    Anthropic compatibility only — SambaNova is single-tier and never emits this
+    field.
+    """
+
+
+class Container(BaseModel):
+    """Code-execution container reference.
+
+    Anthropic compatibility only — SambaNova does not run server-side code execution, so this field is never emitted on responses.
+    """
+
+    id: str
+
+    expires_at: str
+    """ISO-8601 timestamp."""
+
+
+class StopDetails(BaseModel):
+    """Refusal stop details.
+
+    Anthropic compatibility only — `refusal` is never emitted as a stop_reason by SambaNova (content filtering is not exposed at the API layer).
+    """
+
+    type: Literal["refusal"]
+
+    category: Optional[Literal["cyber", "bio"]] = None
+
+
+class Message(BaseModel):
+    """Non-streaming response from `POST /messages`.
+
+    Wire-compatible with the official Anthropic Messages API.
+    """
+
+    id: str
+    """Unique identifier for this message."""
+
+    content: List[Content]
+
+    model: str
+    """Model that produced the response."""
+
+    role: Literal["assistant"]
+
+    stop_reason: Optional[
+        Literal[
+            "end_turn",
+            "max_tokens",
+            "tool_use",
+            "pause_turn",
+            "refusal",
+            "stop_sequence",
+            "model_context_window_exceeded",
+        ]
+    ] = None
+    """Reason the model stopped generating.
+
+    SambaNova emits `end_turn`, `max_tokens`, `tool_use`, and `stop_sequence`. The
+    remaining values are defined for Anthropic SDK type-parity but never returned:
+    `pause_turn` (server-tool loop limit, not produced); `refusal` (content filter,
+    not exposed); `model_context_window_exceeded` (folded to `max_tokens`).
+    """
+
+    type: Literal["message"]
+
+    usage: Usage
+    """Token accounting for the request."""
+
+    container: Optional[Container] = None
+    """Code-execution container reference.
+
+    Anthropic compatibility only — SambaNova does not run server-side code
+    execution, so this field is never emitted on responses.
+    """
+
+    stop_details: Optional[StopDetails] = None
+    """Refusal stop details.
+
+    Anthropic compatibility only — `refusal` is never emitted as a stop_reason by
+    SambaNova (content filtering is not exposed at the API layer).
+    """
+
+    stop_sequence: Optional[str] = None
+    """The matched stop sequence that triggered termination.
+
+    Present when `stop_reason` is `stop_sequence`; `null` otherwise.
+    """
diff --git a/src/sambanova/types/message_count_tokens_params.py b/src/sambanova/types/message_count_tokens_params.py
new file mode 100644
index 0000000..f8852c3
--- /dev/null
+++ b/src/sambanova/types/message_count_tokens_params.py
@@ -0,0 +1,979 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable, Optional
+from typing_extensions import Literal, Required, Annotated, TypeAlias, TypedDict
+
+from .._types import SequenceNotStr
+from .._utils import PropertyInfo
+
+__all__ = [
+    "MessageCountTokensParams",
+    "Message",
+    "MessageContentContentBlockArray",
+    "MessageContentContentBlockArrayMessageInputTextBlock",
+    "MessageContentContentBlockArrayMessageInputTextBlockCacheControl",
+    "MessageContentContentBlockArrayMessageInputImageBlock",
+    "MessageContentContentBlockArrayMessageInputImageBlockSource",
+    "MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceBase64",
+    "MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceURL",
+    "MessageContentContentBlockArrayMessageInputImageBlockCacheControl",
+    "MessageContentContentBlockArrayMessageInputToolUseBlock",
+    "MessageContentContentBlockArrayMessageInputToolUseBlockCacheControl",
+    "MessageContentContentBlockArrayMessageInputToolResultBlock",
+    "MessageContentContentBlockArrayMessageInputToolResultBlockCacheControl",
+    "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArray",
+    "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlock",
+    "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlockCacheControl",
+    "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlock",
+    "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSource",
+    "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceBase64",
+    "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceURL",
+    "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockCacheControl",
+    "MessageContentContentBlockArrayMessageInputServerToolUseBlock",
+    "MessageContentContentBlockArrayMessageInputServerToolUseBlockCacheControl",
+    "MessageContentContentBlockArrayMessageInputSearchResultBlock",
+    "MessageContentContentBlockArrayMessageInputSearchResultBlockCacheControl",
+    "MessageContentContentBlockArrayMessageInputSearchResultBlockContent",
+    "MessageContentContentBlockArrayMessageInputSearchResultBlockContentCacheControl",
+    "MessageContentContentBlockArrayMessageInputWebSearchToolResultBlock",
+    "MessageContentContentBlockArrayMessageInputWebSearchToolResultBlockCacheControl",
+    "MessageContentContentBlockArrayMessageInputWebFetchToolResultBlock",
+    "MessageContentContentBlockArrayMessageInputWebFetchToolResultBlockCacheControl",
+    "MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlock",
+    "MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlockCacheControl",
+    "MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlock",
+    "MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlockCacheControl",
+    "MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlock",
+    "MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlockCacheControl",
+    "MessageContentContentBlockArrayMessageInputToolSearchToolResultBlock",
+    "MessageContentContentBlockArrayMessageInputToolSearchToolResultBlockCacheControl",
+    "MessageContentContentBlockArrayMessageInputThinkingBlock",
+    "MessageContentContentBlockArrayMessageInputRedactedThinkingBlock",
+    "MessageContentContentBlockArrayMessageInputContainerUploadBlock",
+    "MessageContentContentBlockArrayMessageInputContainerUploadBlockCacheControl",
+    "MessageContentContentBlockArrayMessageInputDocumentBlock",
+    "MessageContentContentBlockArrayMessageInputDocumentBlockCacheControl",
+    "SystemSystemTextBlockArray",
+    "SystemSystemTextBlockArrayCacheControl",
+    "Thinking",
+    "ThinkingMessageThinkingDisabled",
+    "ThinkingMessageThinkingEnabled",
+    "ThinkingMessageThinkingAdaptive",
+    "ToolChoice",
+    "ToolChoiceMessageToolChoiceAuto",
+    "ToolChoiceMessageToolChoiceAny",
+    "ToolChoiceMessageToolChoiceNone",
+    "ToolChoiceMessageToolChoiceTool",
+    "Tool",
+    "ToolCacheControl",
+]
+
+
+class MessageCountTokensParams(TypedDict, total=False):
+    messages: Required[Iterable[Message]]
+    """Conversation turns."""
+
+    model: Required[str]
+    """Model identifier."""
+
+    system: Union[str, Iterable[SystemSystemTextBlockArray]]
+    """System prompt for the conversation.
+
+    Accepts either a single string (most common) or an array of text blocks (used
+    when individual segments need `cache_control` markers). Multiple text blocks are
+    joined with newlines and prepended to the conversation as a `role: system`
+    message.
+    """
+
+    thinking: Thinking
+    """Controls Anthropic-style extended thinking.
+
+    **In v1**: only `type:"disabled"` is silently accepted as a no-op;
+    `type:"enabled"` and `type:"adaptive"` return a 400 `invalid_request_error`
+    (`unsupported_parameter`).
+    """
+
+    tool_choice: Optional[ToolChoice]
+    """How the model should choose from the provided tools."""
+
+    tools: Optional[Iterable[Tool]]
+    """Tool definitions the model may call."""
+
+    anthropic_version: Annotated[str, PropertyInfo(alias="anthropic-version")]
+
+
+class MessageContentContentBlockArrayMessageInputTextBlockCacheControl(TypedDict, total=False):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputTextBlock(TypedDict, total=False):
+    """Plain-text segment of a message."""
+
+    text: Required[str]
+
+    type: Required[Literal["text"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputTextBlockCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+    citations: Optional[Iterable[Dict[str, object]]]
+
+
+class MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceBase64(TypedDict, total=False):
+    """Inline image data encoded as base64."""
+
+    data: Required[str]
+    """Base64-encoded image bytes (no `data:` URI prefix)."""
+
+    media_type: Required[Literal["image/jpeg", "image/png", "image/gif", "image/webp"]]
+    """MIME type of the image bytes."""
+
+    type: Required[Literal["base64"]]
+
+
+class MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceURL(TypedDict, total=False):
+    """HTTPS URL pointing to an image.
+
+    **Returns 400 in v1** — URL fetching is blocked. Use `type:"base64"` instead.
+    """
+
+    type: Required[Literal["url"]]
+
+    url: Required[str]
+
+
+MessageContentContentBlockArrayMessageInputImageBlockSource: TypeAlias = Union[
+    MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceBase64,
+    MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceURL,
+]
+
+
+class MessageContentContentBlockArrayMessageInputImageBlockCacheControl(TypedDict, total=False):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputImageBlock(TypedDict, total=False):
+    """Image content.
+
+    Only `source.type:"base64"` is supported in v1; URL sources return 400.
+    """
+
+    source: Required[MessageContentContentBlockArrayMessageInputImageBlockSource]
+    """Inline image data encoded as base64."""
+
+    type: Required[Literal["image"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputImageBlockCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+
+class MessageContentContentBlockArrayMessageInputToolUseBlockCacheControl(TypedDict, total=False):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputToolUseBlock(TypedDict, total=False):
+    """A prior assistant turn that invoked a tool."""
+
+    id: Required[str]
+    """Unique identifier for the tool call (used to correlate `tool_result`)."""
+
+    input: Required[Dict[str, object]]
+    """Tool inputs as a JSON object."""
+
+    name: Required[str]
+    """Name of the tool being invoked."""
+
+    type: Required[Literal["tool_use"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputToolUseBlockCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+
+class MessageContentContentBlockArrayMessageInputToolResultBlockCacheControl(TypedDict, total=False):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlockCacheControl(
+    TypedDict, total=False
+):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlock(
+    TypedDict, total=False
+):
+    """Plain-text segment of a message."""
+
+    text: Required[str]
+
+    type: Required[Literal["text"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlockCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+    citations: Optional[Iterable[Dict[str, object]]]
+
+
+class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceBase64(
+    TypedDict, total=False
+):
+    """Inline image data encoded as base64."""
+
+    data: Required[str]
+    """Base64-encoded image bytes (no `data:` URI prefix)."""
+
+    media_type: Required[Literal["image/jpeg", "image/png", "image/gif", "image/webp"]]
+    """MIME type of the image bytes."""
+
+    type: Required[Literal["base64"]]
+
+
+class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceURL(
+    TypedDict, total=False
+):
+    """HTTPS URL pointing to an image.
+
+    **Returns 400 in v1** — URL fetching is blocked. Use `type:"base64"` instead.
+    """
+
+    type: Required[Literal["url"]]
+
+    url: Required[str]
+
+
+MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSource: TypeAlias = Union[
+    MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceBase64,
+    MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceURL,
+]
+
+
+class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockCacheControl(
+    TypedDict, total=False
+):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlock(
+    TypedDict, total=False
+):
+    """Image content.
+
+    Only `source.type:"base64"` is supported in v1; URL sources return 400.
+    """
+
+    source: Required[
+        MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSource
+    ]
+    """Inline image data encoded as base64."""
+
+    type: Required[Literal["image"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+
+MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArray: TypeAlias = Union[
+    MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlock,
+    MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlock,
+]
+
+
+class MessageContentContentBlockArrayMessageInputToolResultBlock(TypedDict, total=False):
+    """Result of a prior tool call."""
+
+    tool_use_id: Required[str]
+    """ID of the `tool_use` block this result corresponds to."""
+
+    type: Required[Literal["tool_result"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputToolResultBlockCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+    content: Union[
+        str, Iterable[MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArray]
+    ]
+
+    is_error: Optional[bool]
+    """Silently dropped in v1."""
+
+
+class MessageContentContentBlockArrayMessageInputServerToolUseBlockCacheControl(TypedDict, total=False):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputServerToolUseBlock(TypedDict, total=False):
+    """Anthropic compatibility only — SambaNova does not run server-side tools.
+
+    A prior assistant turn that invoked an Anthropic-hosted tool (web_search, code_execution, etc.). Accepted in conversation history (e.g. replaying an Anthropic-served session) but never originates from a SambaNova response. New `server_tool_use`-type tool definitions on outgoing requests are rejected with 400 `unsupported_tool_type`.
+    """
+
+    id: Required[str]
+
+    input: Required[Dict[str, object]]
+
+    name: Required[str]
+
+    type: Required[Literal["server_tool_use"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputServerToolUseBlockCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+
+class MessageContentContentBlockArrayMessageInputSearchResultBlockCacheControl(TypedDict, total=False):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputSearchResultBlockContentCacheControl(TypedDict, total=False):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputSearchResultBlockContent(TypedDict, total=False):
+    """Plain-text segment of a message."""
+
+    text: Required[str]
+
+    type: Required[Literal["text"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputSearchResultBlockContentCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+    citations: Optional[Iterable[Dict[str, object]]]
+
+
+class MessageContentContentBlockArrayMessageInputSearchResultBlock(TypedDict, total=False):
+    """Inline search result content.
+
+    In v1 the `title`, `source`, and `content[]` text are extracted into a text block; citations are dropped.
+    """
+
+    type: Required[Literal["search_result"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputSearchResultBlockCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+    citations: Optional[Dict[str, object]]
+
+    content: Iterable[MessageContentContentBlockArrayMessageInputSearchResultBlockContent]
+
+    source: str
+
+    title: str
+
+
+class MessageContentContentBlockArrayMessageInputWebSearchToolResultBlockCacheControl(TypedDict, total=False):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputWebSearchToolResultBlock(TypedDict, total=False):
+    """Anthropic compatibility only — SambaNova does not run server-side `web_search`.
+
+    Echo of a prior Anthropic-served `web_search` tool call; accepted in conversation history but never originates from a SambaNova response. When present, only `title` (`url`) per result is extracted into a tool message.
+    """
+
+    content: Required[Iterable[Dict[str, object]]]
+
+    tool_use_id: Required[str]
+
+    type: Required[Literal["web_search_tool_result"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputWebSearchToolResultBlockCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+
+class MessageContentContentBlockArrayMessageInputWebFetchToolResultBlockCacheControl(TypedDict, total=False):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputWebFetchToolResultBlock(TypedDict, total=False):
+    """Anthropic compatibility only — SambaNova does not run server-side `web_fetch`.
+
+    Echo of a prior Anthropic-served `web_fetch` tool call; accepted in conversation history but never originates from a SambaNova response. When present, only the text content is extracted.
+    """
+
+    content: Required[Dict[str, object]]
+
+    tool_use_id: Required[str]
+
+    type: Required[Literal["web_fetch_tool_result"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputWebFetchToolResultBlockCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+
+class MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlockCacheControl(TypedDict, total=False):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlock(TypedDict, total=False):
+    """
+    Anthropic compatibility only — SambaNova does not run server-side `code_execution`. Echo of a prior Anthropic-served `code_execution` tool call; accepted in conversation history but never originates from a SambaNova response. When present, only `stdout`, `stderr`, and `return_code` are extracted; image output is dropped.
+    """
+
+    content: Required[Dict[str, object]]
+
+    tool_use_id: Required[str]
+
+    type: Required[Literal["code_execution_tool_result"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlockCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+
+class MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlockCacheControl(TypedDict, total=False):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlock(TypedDict, total=False):
+    """
+    Anthropic compatibility only — SambaNova does not run server-side bash code execution. Echo of a prior Anthropic-served bash tool call; accepted in conversation history but never originates from a SambaNova response. Same lossy extraction as `code_execution_tool_result`.
+    """
+
+    content: Required[Dict[str, object]]
+
+    tool_use_id: Required[str]
+
+    type: Required[Literal["bash_code_execution_tool_result"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlockCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+
+class MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlockCacheControl(
+    TypedDict, total=False
+):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlock(TypedDict, total=False):
+    """
+    Anthropic compatibility only — SambaNova does not run server-side text-editor code execution. Echo of a prior Anthropic-served text-editor tool call; accepted in conversation history but never originates from a SambaNova response. When present, only file content is extracted; metadata (line count, file type) is dropped.
+    """
+
+    content: Required[Dict[str, object]]
+
+    tool_use_id: Required[str]
+
+    type: Required[Literal["text_editor_code_execution_tool_result"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlockCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+
+class MessageContentContentBlockArrayMessageInputToolSearchToolResultBlockCacheControl(TypedDict, total=False):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputToolSearchToolResultBlock(TypedDict, total=False):
+    """Anthropic compatibility only — SambaNova does not run server-side `tool_search`.
+
+    Echo of a prior Anthropic-served `tool_search` tool call; accepted in conversation history but never originates from a SambaNova response. When present, an empty string is emitted to the tool message (no plain-text fields).
+    """
+
+    content: Required[Dict[str, object]]
+
+    tool_use_id: Required[str]
+
+    type: Required[Literal["tool_search_tool_result"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputToolSearchToolResultBlockCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+
+class MessageContentContentBlockArrayMessageInputThinkingBlock(TypedDict, total=False):
+    """Extended-reasoning trace from a prior assistant turn."""
+
+    signature: Required[str]
+
+    thinking: Required[str]
+
+    type: Required[Literal["thinking"]]
+
+
+class MessageContentContentBlockArrayMessageInputRedactedThinkingBlock(TypedDict, total=False):
+    """
+    Anthropic compatibility only — SambaNova does not produce encrypted thinking output. Echo of a prior Anthropic-served response where `thinking.display:"omitted"` was set. Accepted in conversation history but never originates from a SambaNova response. Silently dropped on input.
+    """
+
+    data: Required[str]
+
+    type: Required[Literal["redacted_thinking"]]
+
+
+class MessageContentContentBlockArrayMessageInputContainerUploadBlockCacheControl(TypedDict, total=False):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputContainerUploadBlock(TypedDict, total=False):
+    """
+    Anthropic compatibility only — SambaNova does not produce container_upload blocks (these come from Anthropic's server-side `code_execution` tool). Accepted in conversation history but never originates from a SambaNova response. Silently dropped on input.
+    """
+
+    file_id: Required[str]
+
+    type: Required[Literal["container_upload"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputContainerUploadBlockCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+
+class MessageContentContentBlockArrayMessageInputDocumentBlockCacheControl(TypedDict, total=False):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputDocumentBlock(TypedDict, total=False):
+    """PDF or document content.
+
+    **Returns 400** — no document-extraction pipeline available.
+    """
+
+    source: Required[Dict[str, object]]
+
+    type: Required[Literal["document"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputDocumentBlockCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+    citations: Optional[Dict[str, object]]
+
+    context: Optional[str]
+
+    title: Optional[str]
+
+
+MessageContentContentBlockArray: TypeAlias = Union[
+    MessageContentContentBlockArrayMessageInputTextBlock,
+    MessageContentContentBlockArrayMessageInputImageBlock,
+    MessageContentContentBlockArrayMessageInputToolUseBlock,
+    MessageContentContentBlockArrayMessageInputToolResultBlock,
+    MessageContentContentBlockArrayMessageInputServerToolUseBlock,
+    MessageContentContentBlockArrayMessageInputSearchResultBlock,
+    MessageContentContentBlockArrayMessageInputWebSearchToolResultBlock,
+    MessageContentContentBlockArrayMessageInputWebFetchToolResultBlock,
+    MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlock,
+    MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlock,
+    MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlock,
+    MessageContentContentBlockArrayMessageInputToolSearchToolResultBlock,
+    MessageContentContentBlockArrayMessageInputThinkingBlock,
+    MessageContentContentBlockArrayMessageInputRedactedThinkingBlock,
+    MessageContentContentBlockArrayMessageInputContainerUploadBlock,
+    MessageContentContentBlockArrayMessageInputDocumentBlock,
+]
+
+
+class Message(TypedDict, total=False):
+    """A turn in the conversation."""
+
+    content: Required[Union[str, Iterable[MessageContentContentBlockArray]]]
+
+    role: Required[Literal["user", "assistant"]]
+    """Conversational role.
+
+    `user` for the human-side turn, `assistant` for prior model output.
+    """
+
+
+class SystemSystemTextBlockArrayCacheControl(TypedDict, total=False):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class SystemSystemTextBlockArray(TypedDict, total=False):
+    """A text segment within a structured `system` prompt array.
+
+    Multiple text blocks are concatenated (with newlines) and prepended to the conversation as a `role: system` message at the chat-completions layer.
+    """
+
+    text: Required[str]
+    """Plain-text content of the system prompt segment."""
+
+    type: Required[Literal["text"]]
+
+    cache_control: SystemSystemTextBlockArrayCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+    citations: Optional[Iterable[Dict[str, object]]]
+    """Optional citations. **In v1**: silently dropped"""
+
+
+class ThinkingMessageThinkingDisabled(TypedDict, total=False):
+    """Disables Anthropic-style extended thinking.
+
+    **In v1**: silently accepted as a no-op
+    """
+
+    type: Required[Literal["disabled"]]
+
+
+class ThinkingMessageThinkingEnabled(TypedDict, total=False):
+    """Enables Anthropic-style extended thinking with a fixed budget.
+
+    **In v1**: returns a 400 `invalid_request_error` (`unsupported_parameter`).
+    """
+
+    budget_tokens: Required[int]
+    """
+    Maximum tokens the model may spend on extended thinking before producing the
+    final answer.
+    """
+
+    type: Required[Literal["enabled"]]
+
+
+class ThinkingMessageThinkingAdaptive(TypedDict, total=False):
+    """Enables Anthropic-style adaptive extended thinking.
+
+    **In v1**: returns a 400 `invalid_request_error` (`unsupported_parameter`).
+    """
+
+    type: Required[Literal["adaptive"]]
+
+    budget_tokens: Optional[int]
+    """Optional upper bound on tokens spent on adaptive thinking.
+
+    When omitted, the backend chooses based on prompt complexity.
+    """
+
+
+Thinking: TypeAlias = Union[
+    ThinkingMessageThinkingDisabled, ThinkingMessageThinkingEnabled, ThinkingMessageThinkingAdaptive
+]
+
+
+class ToolChoiceMessageToolChoiceAuto(TypedDict, total=False):
+    """Let the model decide whether and which tool to use."""
+
+    type: Required[Literal["auto"]]
+
+    disable_parallel_tool_use: Optional[bool]
+    """Silently dropped."""
+
+
+class ToolChoiceMessageToolChoiceAny(TypedDict, total=False):
+    """Require the model to call one of the provided tools."""
+
+    type: Required[Literal["any"]]
+
+    disable_parallel_tool_use: Optional[bool]
+    """Silently dropped."""
+
+
+class ToolChoiceMessageToolChoiceNone(TypedDict, total=False):
+    """Forbid the model from calling any tool."""
+
+    type: Required[Literal["none"]]
+
+
+class ToolChoiceMessageToolChoiceTool(TypedDict, total=False):
+    """Force the model to call a specific tool by name."""
+
+    name: Required[str]
+    """Name of the required tool."""
+
+    type: Required[Literal["tool"]]
+
+    disable_parallel_tool_use: Optional[bool]
+    """Silently dropped."""
+
+
+ToolChoice: TypeAlias = Union[
+    ToolChoiceMessageToolChoiceAuto,
+    ToolChoiceMessageToolChoiceAny,
+    ToolChoiceMessageToolChoiceNone,
+    ToolChoiceMessageToolChoiceTool,
+]
+
+
+class ToolCacheControl(TypedDict, total=False):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class Tool(TypedDict, total=False):
+    """User-defined function tool definition.
+
+    Only custom function tools are supported (Anthropic's `type:"custom"` style or the absent-type Beta style). Anthropic-hosted server tools (`web_search`, `code_execution`, `bash`, `text_editor`, `memory`, `tool_search` variants) return 400 `unsupported_tool_type` if sent.
+    """
+
+    name: Required[str]
+    """Tool name. Must match `^[a-zA-Z0-9_-]+$`."""
+
+    allowed_callers: Optional[SequenceNotStr[str]]
+    """Silently dropped."""
+
+    cache_control: Optional[ToolCacheControl]
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+    defer_loading: Optional[bool]
+    """Silently dropped."""
+
+    description: Optional[str]
+    """Human-readable description of when the tool should be used."""
+
+    eager_input_streaming: Optional[bool]
+    """Silently dropped."""
+
+    input_examples: Optional[Iterable[Dict[str, object]]]
+    """Silently dropped."""
+
+    input_schema: Optional[Dict[str, object]]
+    """JSON Schema describing the tool's expected input.
+
+    Required by the Anthropic spec; accepted as optional by SambaNova.
+    """
+
+    strict: Optional[bool]
+    """Silently dropped."""
+
+    type: Optional[Literal["custom"]]
+    """Tool-type discriminator.
+
+    May be omitted (defaults to custom) or set to `custom`. Other values return 400
+    `unsupported_tool_type`.
+    """
diff --git a/src/sambanova/types/message_count_tokens_response.py b/src/sambanova/types/message_count_tokens_response.py
new file mode 100644
index 0000000..e3e8adf
--- /dev/null
+++ b/src/sambanova/types/message_count_tokens_response.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .._models import BaseModel
+
+__all__ = ["MessageCountTokensResponse"]
+
+
+class MessageCountTokensResponse(BaseModel):
+    """Token count for the supplied prompt."""
+
+    input_tokens: int
+    """Total tokens in the prompt (system + messages + tools)."""
diff --git a/src/sambanova/types/message_create_params.py b/src/sambanova/types/message_create_params.py
new file mode 100644
index 0000000..4da6620
--- /dev/null
+++ b/src/sambanova/types/message_create_params.py
@@ -0,0 +1,1095 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable, Optional
+from typing_extensions import Literal, Required, Annotated, TypeAlias, TypedDict
+
+from .._types import SequenceNotStr
+from .._utils import PropertyInfo
+
+__all__ = [
+    "MessageCreateParamsBase",
+    "Message",
+    "MessageContentContentBlockArray",
+    "MessageContentContentBlockArrayMessageInputTextBlock",
+    "MessageContentContentBlockArrayMessageInputTextBlockCacheControl",
+    "MessageContentContentBlockArrayMessageInputImageBlock",
+    "MessageContentContentBlockArrayMessageInputImageBlockSource",
+    "MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceBase64",
+    "MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceURL",
+    "MessageContentContentBlockArrayMessageInputImageBlockCacheControl",
+    "MessageContentContentBlockArrayMessageInputToolUseBlock",
+    "MessageContentContentBlockArrayMessageInputToolUseBlockCacheControl",
+    "MessageContentContentBlockArrayMessageInputToolResultBlock",
+    "MessageContentContentBlockArrayMessageInputToolResultBlockCacheControl",
+    "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArray",
+    "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlock",
+    "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlockCacheControl",
+    "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlock",
+    "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSource",
+    "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceBase64",
+    "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceURL",
+    "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockCacheControl",
+    "MessageContentContentBlockArrayMessageInputServerToolUseBlock",
+    "MessageContentContentBlockArrayMessageInputServerToolUseBlockCacheControl",
+    "MessageContentContentBlockArrayMessageInputSearchResultBlock",
+    "MessageContentContentBlockArrayMessageInputSearchResultBlockCacheControl",
+    "MessageContentContentBlockArrayMessageInputSearchResultBlockContent",
+    "MessageContentContentBlockArrayMessageInputSearchResultBlockContentCacheControl",
+    "MessageContentContentBlockArrayMessageInputWebSearchToolResultBlock",
+    "MessageContentContentBlockArrayMessageInputWebSearchToolResultBlockCacheControl",
+    "MessageContentContentBlockArrayMessageInputWebFetchToolResultBlock",
+    "MessageContentContentBlockArrayMessageInputWebFetchToolResultBlockCacheControl",
+    "MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlock",
+    "MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlockCacheControl",
+    "MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlock",
+    "MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlockCacheControl",
+    "MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlock",
+    "MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlockCacheControl",
+    "MessageContentContentBlockArrayMessageInputToolSearchToolResultBlock",
+    "MessageContentContentBlockArrayMessageInputToolSearchToolResultBlockCacheControl",
+    "MessageContentContentBlockArrayMessageInputThinkingBlock",
+    "MessageContentContentBlockArrayMessageInputRedactedThinkingBlock",
+    "MessageContentContentBlockArrayMessageInputContainerUploadBlock",
+    "MessageContentContentBlockArrayMessageInputContainerUploadBlockCacheControl",
+    "MessageContentContentBlockArrayMessageInputDocumentBlock",
+    "MessageContentContentBlockArrayMessageInputDocumentBlockCacheControl",
+    "Metadata",
+    "SystemSystemTextBlockArray",
+    "SystemSystemTextBlockArrayCacheControl",
+    "Thinking",
+    "ThinkingMessageThinkingDisabled",
+    "ThinkingMessageThinkingEnabled",
+    "ThinkingMessageThinkingAdaptive",
+    "ToolChoice",
+    "ToolChoiceMessageToolChoiceAuto",
+    "ToolChoiceMessageToolChoiceAny",
+    "ToolChoiceMessageToolChoiceNone",
+    "ToolChoiceMessageToolChoiceTool",
+    "Tool",
+    "ToolCacheControl",
+    "MessageCreateParamsNonStreaming",
+    "MessageCreateParamsStreaming",
+]
+
+
+class MessageCreateParamsBase(TypedDict, total=False):
+    max_tokens: Required[int]
+    """Maximum number of tokens to generate.
+
+    The combined input + output token count is bounded by the model's context
+    window.
+    """
+
+    messages: Required[Iterable[Message]]
+    """Conversation turns."""
+
+    model: Required[
+        Union[
+            str,
+            Literal[
+                "Meta-Llama-3.3-70B-Instruct",
+                "Meta-Llama-3.2-1B-Instruct",
+                "Meta-Llama-3.2-3B-Instruct",
+                "Llama-3.2-11B-Vision-Instruct",
+                "Llama-3.2-90B-Vision-Instruct",
+                "Meta-Llama-3.1-8B-Instruct",
+                "Meta-Llama-3.1-70B-Instruct",
+                "Meta-Llama-3.1-405B-Instruct",
+                "Qwen2.5-Coder-32B-Instruct",
+                "Qwen2.5-72B-Instruct",
+                "QwQ-32B-Preview",
+                "Meta-Llama-Guard-3-8B",
+                "DeepSeek-R1",
+                "DeepSeek-R1-0528",
+                "DeepSeek-V3-0324",
+                "DeepSeek-V3.1",
+                "DeepSeek-V3.1-cb",
+                "DeepSeek-V3.1-Terminus",
+                "DeepSeek-V3.2",
+                "DeepSeek-R1-Distill-Llama-70B",
+                "Llama-4-Maverick-17B-128E-Instruct",
+                "Llama-4-Scout-17B-16E-Instruct",
+                "Qwen3-32B",
+                "Qwen3-235B",
+                "Llama-3.3-Swallow-70B-Instruct-v0.4",
+                "gpt-oss-120b",
+                "ALLaM-7B-Instruct-preview",
+                "MiniMax-M2.5",
+                "MiniMax-M2.7",
+                "gemma-3-12b-it",
+            ],
+        ]
+    ]
+    """The model ID to use (e.g.
+
+    gpt-oss-120b). See available
+    [models](https://docs.sambanova.ai/docs/en/models/sambacloud-models)
+    """
+
+    container: Optional[str]
+    """Existing code-execution container ID to reuse. **In v1**: silently dropped"""
+
+    metadata: Metadata
+    """Free-form metadata attached to the request.
+
+    Currently only `user_id` Additional fields are accepted but ignored.
+    """
+
+    service_tier: Optional[Literal["auto", "standard_only"]]
+    """Service-tier preference. **In v1**: silently dropped"""
+
+    stop_sequences: Optional[SequenceNotStr[str]]
+    """Custom strings that, when generated, cause the model to stop."""
+
+    system: Union[str, Iterable[SystemSystemTextBlockArray]]
+    """System prompt for the conversation.
+
+    Accepts either a single string (most common) or an array of text blocks (used
+    when individual segments need `cache_control` markers). Multiple text blocks are
+    joined with newlines and prepended to the conversation as a `role: system`
+    message.
+    """
+
+    temperature: Optional[float]
+    """Sampling temperature in `[0.0, 2.0]`.
+
+    Higher values produce more random output, lower values more deterministic.
+    Adjust only one of `temperature`, `top_p`, `top_k`.
+    """
+
+    thinking: Thinking
+    """Controls Anthropic-style extended thinking.
+
+    **In v1**: only `type:"disabled"` is silently accepted as a no-op;
+    `type:"enabled"` and `type:"adaptive"` return a 400 `invalid_request_error`
+    (`unsupported_parameter`).
+    """
+
+    tool_choice: Optional[ToolChoice]
+    """How the model should choose from the provided tools."""
+
+    tools: Optional[Iterable[Tool]]
+    """Tool definitions the model may call."""
+
+    top_k: Optional[int]
+    """Top-k sampling.
+
+    Considers only the K most likely tokens at each step. Set to 0 to disable.
+    """
+
+    top_p: Optional[float]
+    """Nucleus sampling.
+
+    Considers tokens with cumulative probability mass up to `top_p`.
+    """
+
+    anthropic_version: Annotated[str, PropertyInfo(alias="anthropic-version")]
+
+
+class MessageContentContentBlockArrayMessageInputTextBlockCacheControl(TypedDict, total=False):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputTextBlock(TypedDict, total=False):
+    """Plain-text segment of a message."""
+
+    text: Required[str]
+
+    type: Required[Literal["text"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputTextBlockCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+    citations: Optional[Iterable[Dict[str, object]]]
+
+
+class MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceBase64(TypedDict, total=False):
+    """Inline image data encoded as base64."""
+
+    data: Required[str]
+    """Base64-encoded image bytes (no `data:` URI prefix)."""
+
+    media_type: Required[Literal["image/jpeg", "image/png", "image/gif", "image/webp"]]
+    """MIME type of the image bytes."""
+
+    type: Required[Literal["base64"]]
+
+
+class MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceURL(TypedDict, total=False):
+    """HTTPS URL pointing to an image.
+
+    **Returns 400 in v1** — URL fetching is blocked. Use `type:"base64"` instead.
+    """
+
+    type: Required[Literal["url"]]
+
+    url: Required[str]
+
+
+MessageContentContentBlockArrayMessageInputImageBlockSource: TypeAlias = Union[
+    MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceBase64,
+    MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceURL,
+]
+
+
+class MessageContentContentBlockArrayMessageInputImageBlockCacheControl(TypedDict, total=False):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputImageBlock(TypedDict, total=False):
+    """Image content.
+
+    Only `source.type:"base64"` is supported in v1; URL sources return 400.
+    """
+
+    source: Required[MessageContentContentBlockArrayMessageInputImageBlockSource]
+    """Inline image data encoded as base64."""
+
+    type: Required[Literal["image"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputImageBlockCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+
+class MessageContentContentBlockArrayMessageInputToolUseBlockCacheControl(TypedDict, total=False):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputToolUseBlock(TypedDict, total=False):
+    """A prior assistant turn that invoked a tool."""
+
+    id: Required[str]
+    """Unique identifier for the tool call (used to correlate `tool_result`)."""
+
+    input: Required[Dict[str, object]]
+    """Tool inputs as a JSON object."""
+
+    name: Required[str]
+    """Name of the tool being invoked."""
+
+    type: Required[Literal["tool_use"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputToolUseBlockCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+
+class MessageContentContentBlockArrayMessageInputToolResultBlockCacheControl(TypedDict, total=False):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlockCacheControl(
+    TypedDict, total=False
+):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlock(
+    TypedDict, total=False
+):
+    """Plain-text segment of a message."""
+
+    text: Required[str]
+
+    type: Required[Literal["text"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlockCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+    citations: Optional[Iterable[Dict[str, object]]]
+
+
+class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceBase64(
+    TypedDict, total=False
+):
+    """Inline image data encoded as base64."""
+
+    data: Required[str]
+    """Base64-encoded image bytes (no `data:` URI prefix)."""
+
+    media_type: Required[Literal["image/jpeg", "image/png", "image/gif", "image/webp"]]
+    """MIME type of the image bytes."""
+
+    type: Required[Literal["base64"]]
+
+
+class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceURL(
+    TypedDict, total=False
+):
+    """HTTPS URL pointing to an image.
+
+    **Returns 400 in v1** — URL fetching is blocked. Use `type:"base64"` instead.
+    """
+
+    type: Required[Literal["url"]]
+
+    url: Required[str]
+
+
+MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSource: TypeAlias = Union[
+    MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceBase64,
+    MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceURL,
+]
+
+
+class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockCacheControl(
+    TypedDict, total=False
+):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlock(
+    TypedDict, total=False
+):
+    """Image content.
+
+    Only `source.type:"base64"` is supported in v1; URL sources return 400.
+    """
+
+    source: Required[
+        MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSource
+    ]
+    """Inline image data encoded as base64."""
+
+    type: Required[Literal["image"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+
+MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArray: TypeAlias = Union[
+    MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlock,
+    MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlock,
+]
+
+
+class MessageContentContentBlockArrayMessageInputToolResultBlock(TypedDict, total=False):
+    """Result of a prior tool call."""
+
+    tool_use_id: Required[str]
+    """ID of the `tool_use` block this result corresponds to."""
+
+    type: Required[Literal["tool_result"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputToolResultBlockCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+    content: Union[
+        str, Iterable[MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArray]
+    ]
+
+    is_error: Optional[bool]
+    """Silently dropped in v1."""
+
+
+class MessageContentContentBlockArrayMessageInputServerToolUseBlockCacheControl(TypedDict, total=False):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputServerToolUseBlock(TypedDict, total=False):
+    """Anthropic compatibility only — SambaNova does not run server-side tools.
+
+    A prior assistant turn that invoked an Anthropic-hosted tool (web_search, code_execution, etc.). Accepted in conversation history (e.g. replaying an Anthropic-served session) but never originates from a SambaNova response. New `server_tool_use`-type tool definitions on outgoing requests are rejected with 400 `unsupported_tool_type`.
+    """
+
+    id: Required[str]
+
+    input: Required[Dict[str, object]]
+
+    name: Required[str]
+
+    type: Required[Literal["server_tool_use"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputServerToolUseBlockCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+
+class MessageContentContentBlockArrayMessageInputSearchResultBlockCacheControl(TypedDict, total=False):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputSearchResultBlockContentCacheControl(TypedDict, total=False):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputSearchResultBlockContent(TypedDict, total=False):
+    """Plain-text segment of a message."""
+
+    text: Required[str]
+
+    type: Required[Literal["text"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputSearchResultBlockContentCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+    citations: Optional[Iterable[Dict[str, object]]]
+
+
+class MessageContentContentBlockArrayMessageInputSearchResultBlock(TypedDict, total=False):
+    """Inline search result content.
+
+    In v1 the `title`, `source`, and `content[]` text are extracted into a text block; citations are dropped.
+    """
+
+    type: Required[Literal["search_result"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputSearchResultBlockCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+    citations: Optional[Dict[str, object]]
+
+    content: Iterable[MessageContentContentBlockArrayMessageInputSearchResultBlockContent]
+
+    source: str
+
+    title: str
+
+
+class MessageContentContentBlockArrayMessageInputWebSearchToolResultBlockCacheControl(TypedDict, total=False):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputWebSearchToolResultBlock(TypedDict, total=False):
+    """Anthropic compatibility only — SambaNova does not run server-side `web_search`.
+
+    Echo of a prior Anthropic-served `web_search` tool call; accepted in conversation history but never originates from a SambaNova response. When present, only `title` (`url`) per result is extracted into a tool message.
+    """
+
+    content: Required[Iterable[Dict[str, object]]]
+
+    tool_use_id: Required[str]
+
+    type: Required[Literal["web_search_tool_result"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputWebSearchToolResultBlockCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+
+class MessageContentContentBlockArrayMessageInputWebFetchToolResultBlockCacheControl(TypedDict, total=False):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputWebFetchToolResultBlock(TypedDict, total=False):
+    """Anthropic compatibility only — SambaNova does not run server-side `web_fetch`.
+
+    Echo of a prior Anthropic-served `web_fetch` tool call; accepted in conversation history but never originates from a SambaNova response. When present, only the text content is extracted.
+    """
+
+    content: Required[Dict[str, object]]
+
+    tool_use_id: Required[str]
+
+    type: Required[Literal["web_fetch_tool_result"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputWebFetchToolResultBlockCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+
+class MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlockCacheControl(TypedDict, total=False):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlock(TypedDict, total=False):
+    """
+    Anthropic compatibility only — SambaNova does not run server-side `code_execution`. Echo of a prior Anthropic-served `code_execution` tool call; accepted in conversation history but never originates from a SambaNova response. When present, only `stdout`, `stderr`, and `return_code` are extracted; image output is dropped.
+    """
+
+    content: Required[Dict[str, object]]
+
+    tool_use_id: Required[str]
+
+    type: Required[Literal["code_execution_tool_result"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlockCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+
+class MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlockCacheControl(TypedDict, total=False):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlock(TypedDict, total=False):
+    """
+    Anthropic compatibility only — SambaNova does not run server-side bash code execution. Echo of a prior Anthropic-served bash tool call; accepted in conversation history but never originates from a SambaNova response. Same lossy extraction as `code_execution_tool_result`.
+    """
+
+    content: Required[Dict[str, object]]
+
+    tool_use_id: Required[str]
+
+    type: Required[Literal["bash_code_execution_tool_result"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlockCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+
+class MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlockCacheControl(
+    TypedDict, total=False
+):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlock(TypedDict, total=False):
+    """
+    Anthropic compatibility only — SambaNova does not run server-side text-editor code execution. Echo of a prior Anthropic-served text-editor tool call; accepted in conversation history but never originates from a SambaNova response. When present, only file content is extracted; metadata (line count, file type) is dropped.
+    """
+
+    content: Required[Dict[str, object]]
+
+    tool_use_id: Required[str]
+
+    type: Required[Literal["text_editor_code_execution_tool_result"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlockCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+
+class MessageContentContentBlockArrayMessageInputToolSearchToolResultBlockCacheControl(TypedDict, total=False):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputToolSearchToolResultBlock(TypedDict, total=False):
+    """Anthropic compatibility only — SambaNova does not run server-side `tool_search`.
+
+    Echo of a prior Anthropic-served `tool_search` tool call; accepted in conversation history but never originates from a SambaNova response. When present, an empty string is emitted to the tool message (no plain-text fields).
+    """
+
+    content: Required[Dict[str, object]]
+
+    tool_use_id: Required[str]
+
+    type: Required[Literal["tool_search_tool_result"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputToolSearchToolResultBlockCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+
+class MessageContentContentBlockArrayMessageInputThinkingBlock(TypedDict, total=False):
+    """Extended-reasoning trace from a prior assistant turn."""
+
+    signature: Required[str]
+
+    thinking: Required[str]
+
+    type: Required[Literal["thinking"]]
+
+
+class MessageContentContentBlockArrayMessageInputRedactedThinkingBlock(TypedDict, total=False):
+    """
+    Anthropic compatibility only — SambaNova does not produce encrypted thinking output. Echo of a prior Anthropic-served response where `thinking.display:"omitted"` was set. Accepted in conversation history but never originates from a SambaNova response. Silently dropped on input.
+    """
+
+    data: Required[str]
+
+    type: Required[Literal["redacted_thinking"]]
+
+
+class MessageContentContentBlockArrayMessageInputContainerUploadBlockCacheControl(TypedDict, total=False):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputContainerUploadBlock(TypedDict, total=False):
+    """
+    Anthropic compatibility only — SambaNova does not produce container_upload blocks (these come from Anthropic's server-side `code_execution` tool). Accepted in conversation history but never originates from a SambaNova response. Silently dropped on input.
+    """
+
+    file_id: Required[str]
+
+    type: Required[Literal["container_upload"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputContainerUploadBlockCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+
+class MessageContentContentBlockArrayMessageInputDocumentBlockCacheControl(TypedDict, total=False):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputDocumentBlock(TypedDict, total=False):
+    """PDF or document content.
+
+    **Returns 400** — no document-extraction pipeline available.
+    """
+
+    source: Required[Dict[str, object]]
+
+    type: Required[Literal["document"]]
+
+    cache_control: MessageContentContentBlockArrayMessageInputDocumentBlockCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+    citations: Optional[Dict[str, object]]
+
+    context: Optional[str]
+
+    title: Optional[str]
+
+
+MessageContentContentBlockArray: TypeAlias = Union[
+    MessageContentContentBlockArrayMessageInputTextBlock,
+    MessageContentContentBlockArrayMessageInputImageBlock,
+    MessageContentContentBlockArrayMessageInputToolUseBlock,
+    MessageContentContentBlockArrayMessageInputToolResultBlock,
+    MessageContentContentBlockArrayMessageInputServerToolUseBlock,
+    MessageContentContentBlockArrayMessageInputSearchResultBlock,
+    MessageContentContentBlockArrayMessageInputWebSearchToolResultBlock,
+    MessageContentContentBlockArrayMessageInputWebFetchToolResultBlock,
+    MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlock,
+    MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlock,
+    MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlock,
+    MessageContentContentBlockArrayMessageInputToolSearchToolResultBlock,
+    MessageContentContentBlockArrayMessageInputThinkingBlock,
+    MessageContentContentBlockArrayMessageInputRedactedThinkingBlock,
+    MessageContentContentBlockArrayMessageInputContainerUploadBlock,
+    MessageContentContentBlockArrayMessageInputDocumentBlock,
+]
+
+
+class Message(TypedDict, total=False):
+    """A turn in the conversation."""
+
+    content: Required[Union[str, Iterable[MessageContentContentBlockArray]]]
+
+    role: Required[Literal["user", "assistant"]]
+    """Conversational role.
+
+    `user` for the human-side turn, `assistant` for prior model output.
+    """
+
+
+class Metadata(TypedDict, total=False, extra_items=object):  # type: ignore[call-arg]
+    """Free-form metadata attached to the request.
+
+    Currently only `user_id` Additional fields are accepted but ignored.
+    """
+
+    user_id: Optional[str]
+    """External identifier for the end-user making the request.
+
+    Mapped internally to the Chat Completions `user` field.
+    """
+
+
+class SystemSystemTextBlockArrayCacheControl(TypedDict, total=False):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class SystemSystemTextBlockArray(TypedDict, total=False):
+    """A text segment within a structured `system` prompt array.
+
+    Multiple text blocks are concatenated (with newlines) and prepended to the conversation as a `role: system` message at the chat-completions layer.
+    """
+
+    text: Required[str]
+    """Plain-text content of the system prompt segment."""
+
+    type: Required[Literal["text"]]
+
+    cache_control: SystemSystemTextBlockArrayCacheControl
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+    citations: Optional[Iterable[Dict[str, object]]]
+    """Optional citations. **In v1**: silently dropped"""
+
+
+class ThinkingMessageThinkingDisabled(TypedDict, total=False):
+    """Disables Anthropic-style extended thinking.
+
+    **In v1**: silently accepted as a no-op
+    """
+
+    type: Required[Literal["disabled"]]
+
+
+class ThinkingMessageThinkingEnabled(TypedDict, total=False):
+    """Enables Anthropic-style extended thinking with a fixed budget.
+
+    **In v1**: returns a 400 `invalid_request_error` (`unsupported_parameter`).
+    """
+
+    budget_tokens: Required[int]
+    """
+    Maximum tokens the model may spend on extended thinking before producing the
+    final answer.
+    """
+
+    type: Required[Literal["enabled"]]
+
+
+class ThinkingMessageThinkingAdaptive(TypedDict, total=False):
+    """Enables Anthropic-style adaptive extended thinking.
+
+    **In v1**: returns a 400 `invalid_request_error` (`unsupported_parameter`).
+    """
+
+    type: Required[Literal["adaptive"]]
+
+    budget_tokens: Optional[int]
+    """Optional upper bound on tokens spent on adaptive thinking.
+
+    When omitted, the backend chooses based on prompt complexity.
+    """
+
+
+Thinking: TypeAlias = Union[
+    ThinkingMessageThinkingDisabled, ThinkingMessageThinkingEnabled, ThinkingMessageThinkingAdaptive
+]
+
+
+class ToolChoiceMessageToolChoiceAuto(TypedDict, total=False):
+    """Let the model decide whether and which tool to use."""
+
+    type: Required[Literal["auto"]]
+
+    disable_parallel_tool_use: Optional[bool]
+    """Silently dropped."""
+
+
+class ToolChoiceMessageToolChoiceAny(TypedDict, total=False):
+    """Require the model to call one of the provided tools."""
+
+    type: Required[Literal["any"]]
+
+    disable_parallel_tool_use: Optional[bool]
+    """Silently dropped."""
+
+
+class ToolChoiceMessageToolChoiceNone(TypedDict, total=False):
+    """Forbid the model from calling any tool."""
+
+    type: Required[Literal["none"]]
+
+
+class ToolChoiceMessageToolChoiceTool(TypedDict, total=False):
+    """Force the model to call a specific tool by name."""
+
+    name: Required[str]
+    """Name of the required tool."""
+
+    type: Required[Literal["tool"]]
+
+    disable_parallel_tool_use: Optional[bool]
+    """Silently dropped."""
+
+
+ToolChoice: TypeAlias = Union[
+    ToolChoiceMessageToolChoiceAuto,
+    ToolChoiceMessageToolChoiceAny,
+    ToolChoiceMessageToolChoiceNone,
+    ToolChoiceMessageToolChoiceTool,
+]
+
+
+class ToolCacheControl(TypedDict, total=False):
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+    """
+
+    type: Required[Literal["ephemeral"]]
+    """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+    ttl: Optional[str]
+    """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class Tool(TypedDict, total=False):
+    """User-defined function tool definition.
+
+    Only custom function tools are supported (Anthropic's `type:"custom"` style or the absent-type Beta style). Anthropic-hosted server tools (`web_search`, `code_execution`, `bash`, `text_editor`, `memory`, `tool_search` variants) return 400 `unsupported_tool_type` if sent.
+    """
+
+    name: Required[str]
+    """Tool name. Must match `^[a-zA-Z0-9_-]+$`."""
+
+    allowed_callers: Optional[SequenceNotStr[str]]
+    """Silently dropped."""
+
+    cache_control: Optional[ToolCacheControl]
+    """
+    Marks the preceding content block (or system text block) as a prompt- cache
+    breakpoint. Marker positions are collected by the adapter; their wiring into the
+    router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+    value is ignored.
+    """
+
+    defer_loading: Optional[bool]
+    """Silently dropped."""
+
+    description: Optional[str]
+    """Human-readable description of when the tool should be used."""
+
+    eager_input_streaming: Optional[bool]
+    """Silently dropped."""
+
+    input_examples: Optional[Iterable[Dict[str, object]]]
+    """Silently dropped."""
+
+    input_schema: Optional[Dict[str, object]]
+    """JSON Schema describing the tool's expected input.
+
+    Required by the Anthropic spec; accepted as optional by SambaNova.
+    """
+
+    strict: Optional[bool]
+    """Silently dropped."""
+
+    type: Optional[Literal["custom"]]
+    """Tool-type discriminator.
+
+    May be omitted (defaults to custom) or set to `custom`. Other values return 400
+    `unsupported_tool_type`.
+    """
+
+
+class MessageCreateParamsNonStreaming(MessageCreateParamsBase, total=False):
+    stream: Optional[Literal[False]]
+    """
+    If true, the response is a sequence of Server-Sent Events whose payloads conform
+    to `MessageStreamEvent`.
+    """
+
+
+class MessageCreateParamsStreaming(MessageCreateParamsBase):
+    stream: Required[Literal[True]]
+    """
+    If true, the response is a sequence of Server-Sent Events whose payloads conform
+    to `MessageStreamEvent`.
+    """
+
+
+MessageCreateParams = Union[MessageCreateParamsNonStreaming, MessageCreateParamsStreaming]
diff --git a/src/sambanova/types/message_create_response.py b/src/sambanova/types/message_create_response.py
new file mode 100644
index 0000000..7f80d24
--- /dev/null
+++ b/src/sambanova/types/message_create_response.py
@@ -0,0 +1,449 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from .._utils import PropertyInfo
+from .message import Message
+from .._models import BaseModel
+
+__all__ = [
+    "MessageCreateResponse",
+    "MessageStartEvent",
+    "MessageContentBlockStartEvent",
+    "MessageContentBlockStartEventContentBlock",
+    "MessageContentBlockStartEventContentBlockMessageOutputTextBlock",
+    "MessageContentBlockStartEventContentBlockMessageOutputToolUseBlock",
+    "MessageContentBlockStartEventContentBlockMessageOutputThinkingBlock",
+    "MessageContentBlockStartEventContentBlockMessageOutputRedactedThinkingBlock",
+    "MessageContentBlockStartEventContentBlockMessageOutputServerToolUseBlock",
+    "MessageContentBlockStartEventContentBlockMessageOutputWebSearchToolResultBlock",
+    "MessageContentBlockStartEventContentBlockMessageOutputWebFetchToolResultBlock",
+    "MessageContentBlockStartEventContentBlockMessageOutputCodeExecutionToolResultBlock",
+    "MessageContentBlockStartEventContentBlockMessageOutputBashCodeExecutionToolResultBlock",
+    "MessageContentBlockStartEventContentBlockMessageOutputTextEditorCodeExecutionToolResultBlock",
+    "MessageContentBlockStartEventContentBlockMessageOutputToolSearchToolResultBlock",
+    "MessageContentBlockStartEventContentBlockMessageOutputContainerUploadBlock",
+    "MessageContentBlockDeltaEvent",
+    "MessageContentBlockDeltaEventDelta",
+    "MessageContentBlockDeltaEventDeltaMessageContentBlockTextDelta",
+    "MessageContentBlockDeltaEventDeltaMessageContentBlockInputJsonDelta",
+    "MessageContentBlockDeltaEventDeltaMessageContentBlockThinkingDelta",
+    "MessageContentBlockDeltaEventDeltaMessageContentBlockSignatureDelta",
+    "MessageContentBlockStopEvent",
+    "MessageDeltaEvent",
+    "MessageDeltaEventDelta",
+    "MessageDeltaEventDeltaStopDetails",
+    "MessageDeltaEventUsage",
+    "MessageStopEvent",
+    "MessagePingEvent",
+    "MessageStreamErrorEvent",
+    "MessageStreamErrorEventError",
+]
+
+
+class MessageStartEvent(BaseModel):
+    """First event of a stream.
+
+    Carries the initial Message envelope (empty `content[]`, `stop_reason: null`) and token usage from prompt processing.
+    """
+
+    message: Message
+    """Non-streaming response from `POST /messages`.
+
+    Wire-compatible with the official Anthropic Messages API.
+    """
+
+    type: Literal["message_start"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputTextBlock(BaseModel):
+    """Plain-text segment of the model's response."""
+
+    text: str
+
+    type: Literal["text"]
+
+    citations: Optional[List[Dict[str, object]]] = None
+    """Not emitted in v1."""
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputToolUseBlock(BaseModel):
+    """Tool call generated by the model."""
+
+    id: str
+    """Unique identifier for this tool call."""
+
+    input: Dict[str, object]
+    """Tool inputs as a JSON object."""
+
+    name: str
+    """Name of the tool being called."""
+
+    type: Literal["tool_use"]
+
+    caller: Optional[Dict[str, object]] = None
+    """Anthropic routing metadata. Always `null` in SambaNova responses."""
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputThinkingBlock(BaseModel):
+    """Extended-reasoning trace from the model. Emitted by reasoning models."""
+
+    thinking: str
+
+    type: Literal["thinking"]
+
+    signature: Optional[str] = None
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputRedactedThinkingBlock(BaseModel):
+    """
+    Anthropic compatibility only — SambaNova does not produce encrypted thinking output. Never emitted in responses.
+    """
+
+    data: str
+
+    type: Literal["redacted_thinking"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputServerToolUseBlock(BaseModel):
+    """Anthropic compatibility only — SambaNova does not run server-side tools.
+
+    Never emitted in responses; defined for Anthropic SDK type-parity.
+    """
+
+    id: str
+
+    input: Dict[str, object]
+
+    name: str
+
+    type: Literal["server_tool_use"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputWebSearchToolResultBlock(BaseModel):
+    """Anthropic compatibility only — SambaNova does not run server-side `web_search`.
+
+    Never emitted in responses.
+    """
+
+    content: List[Dict[str, object]]
+
+    tool_use_id: str
+
+    type: Literal["web_search_tool_result"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputWebFetchToolResultBlock(BaseModel):
+    """Anthropic compatibility only — SambaNova does not run server-side `web_fetch`.
+
+    Never emitted in responses.
+    """
+
+    content: Dict[str, object]
+
+    tool_use_id: str
+
+    type: Literal["web_fetch_tool_result"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputCodeExecutionToolResultBlock(BaseModel):
+    """
+    Anthropic compatibility only — SambaNova does not run server-side `code_execution`. Never emitted in responses.
+    """
+
+    content: Dict[str, object]
+
+    tool_use_id: str
+
+    type: Literal["code_execution_tool_result"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputBashCodeExecutionToolResultBlock(BaseModel):
+    """
+    Anthropic compatibility only — SambaNova does not run server-side bash code execution. Never emitted in responses.
+    """
+
+    content: Dict[str, object]
+
+    tool_use_id: str
+
+    type: Literal["bash_code_execution_tool_result"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputTextEditorCodeExecutionToolResultBlock(BaseModel):
+    """
+    Anthropic compatibility only — SambaNova does not run server-side text-editor code execution. Never emitted in responses.
+    """
+
+    content: Dict[str, object]
+
+    tool_use_id: str
+
+    type: Literal["text_editor_code_execution_tool_result"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputToolSearchToolResultBlock(BaseModel):
+    """Anthropic compatibility only — SambaNova does not run server-side `tool_search`.
+
+    Never emitted in responses.
+    """
+
+    content: Dict[str, object]
+
+    tool_use_id: str
+
+    type: Literal["tool_search_tool_result"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputContainerUploadBlock(BaseModel):
+    """
+    Anthropic compatibility only — SambaNova does not produce container_upload blocks (these come from Anthropic's server-side `code_execution` tool). Never emitted in responses.
+    """
+
+    file_id: str
+
+    type: Literal["container_upload"]
+
+
+MessageContentBlockStartEventContentBlock: TypeAlias = Annotated[
+    Union[
+        MessageContentBlockStartEventContentBlockMessageOutputTextBlock,
+        MessageContentBlockStartEventContentBlockMessageOutputToolUseBlock,
+        MessageContentBlockStartEventContentBlockMessageOutputThinkingBlock,
+        MessageContentBlockStartEventContentBlockMessageOutputRedactedThinkingBlock,
+        MessageContentBlockStartEventContentBlockMessageOutputServerToolUseBlock,
+        MessageContentBlockStartEventContentBlockMessageOutputWebSearchToolResultBlock,
+        MessageContentBlockStartEventContentBlockMessageOutputWebFetchToolResultBlock,
+        MessageContentBlockStartEventContentBlockMessageOutputCodeExecutionToolResultBlock,
+        MessageContentBlockStartEventContentBlockMessageOutputBashCodeExecutionToolResultBlock,
+        MessageContentBlockStartEventContentBlockMessageOutputTextEditorCodeExecutionToolResultBlock,
+        MessageContentBlockStartEventContentBlockMessageOutputToolSearchToolResultBlock,
+        MessageContentBlockStartEventContentBlockMessageOutputContainerUploadBlock,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class MessageContentBlockStartEvent(BaseModel):
+    """Opens a new content block. One per block in `content[]`."""
+
+    content_block: MessageContentBlockStartEventContentBlock
+    """Typed content block in the model's response."""
+
+    index: int
+    """Zero-based index of the block within `content[]`."""
+
+    type: Literal["content_block_start"]
+
+
+class MessageContentBlockDeltaEventDeltaMessageContentBlockTextDelta(BaseModel):
+    """Incremental text chunk for an open text content block."""
+
+    text: str
+
+    type: Literal["text_delta"]
+
+
+class MessageContentBlockDeltaEventDeltaMessageContentBlockInputJsonDelta(BaseModel):
+    """Incremental fragment of a tool_use block's `input` JSON.
+
+    Concatenate successive `partial_json` strings to reconstruct the full input object.
+    """
+
+    partial_json: str
+
+    type: Literal["input_json_delta"]
+
+
+class MessageContentBlockDeltaEventDeltaMessageContentBlockThinkingDelta(BaseModel):
+    """Incremental thinking chunk for an open thinking block.
+
+    Emitted by reasoning models.
+    """
+
+    thinking: str
+
+    type: Literal["thinking_delta"]
+
+
+class MessageContentBlockDeltaEventDeltaMessageContentBlockSignatureDelta(BaseModel):
+    """Signature for an open thinking block.
+
+    Emitted at the end of a thinking stream (paired with the closing `content_block_stop`); the `signature` value may be an empty string when the backend has no signed payload to attach.
+    """
+
+    signature: str
+
+    type: Literal["signature_delta"]
+
+
+MessageContentBlockDeltaEventDelta: TypeAlias = Annotated[
+    Union[
+        MessageContentBlockDeltaEventDeltaMessageContentBlockTextDelta,
+        MessageContentBlockDeltaEventDeltaMessageContentBlockInputJsonDelta,
+        MessageContentBlockDeltaEventDeltaMessageContentBlockThinkingDelta,
+        MessageContentBlockDeltaEventDeltaMessageContentBlockSignatureDelta,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class MessageContentBlockDeltaEvent(BaseModel):
+    """Incremental update to the currently open content block."""
+
+    delta: MessageContentBlockDeltaEventDelta
+    """Incremental update to an open content block."""
+
+    index: int
+    """Zero-based index of the block within `content[]`."""
+
+    type: Literal["content_block_delta"]
+
+
+class MessageContentBlockStopEvent(BaseModel):
+    """Closes the current content block."""
+
+    index: int
+    """Zero-based index of the block within `content[]`."""
+
+    type: Literal["content_block_stop"]
+
+
+class MessageDeltaEventDeltaStopDetails(BaseModel):
+    """Refusal stop details.
+
+    Anthropic compatibility only — `refusal` is never emitted as a stop_reason by SambaNova (content filtering is not exposed at the API layer).
+    """
+
+    type: Literal["refusal"]
+
+    category: Optional[Literal["cyber", "bio"]] = None
+
+
+class MessageDeltaEventDelta(BaseModel):
+    stop_reason: Literal[
+        "end_turn", "max_tokens", "tool_use", "pause_turn", "refusal", "stop_sequence", "model_context_window_exceeded"
+    ]
+    """Reason the model stopped generating.
+
+    SambaNova emits `end_turn`, `max_tokens`, `tool_use`, and `stop_sequence`. The
+    remaining values are defined for Anthropic SDK type-parity but never returned:
+    `pause_turn` (server-tool loop limit, not produced); `refusal` (content filter,
+    not exposed); `model_context_window_exceeded` (folded to `max_tokens`).
+    """
+
+    stop_details: Optional[MessageDeltaEventDeltaStopDetails] = None
+    """Refusal stop details.
+
+    Anthropic compatibility only — `refusal` is never emitted as a stop_reason by
+    SambaNova (content filtering is not exposed at the API layer).
+    """
+
+    stop_sequence: Optional[str] = None
+    """Custom stop sequence that triggered termination.
+
+    Field is emitted but value is always `null` in v1 (backend collapses
+    `StopSequenceHit` and `EndOfText` into the same finish_reason).
+    """
+
+
+class MessageDeltaEventUsage(BaseModel):
+    """
+    Final token accounting emitted in the closing `message_delta` event of a stream.
+    """
+
+    output_tokens: int
+    """Total tokens generated (final count)."""
+
+    cache_creation_input_tokens: Optional[int] = None
+    """Tokens written to prompt cache. Absent in v1;"""
+
+    cache_read_input_tokens: Optional[int] = None
+    """Tokens read from prompt cache. Absent in v1;"""
+
+    input_tokens: Optional[int] = None
+    """Total tokens in the prompt (echoed from `message_start`)."""
+
+    server_tool_use: Optional[Dict[str, object]] = None
+    """Server-tool usage metrics.
+
+    Anthropic compatibility only — SambaNova does not run server tools, so this
+    field is never emitted.
+    """
+
+
+class MessageDeltaEvent(BaseModel):
+    """Penultimate event of the stream.
+
+    Carries the final `stop_reason`, optional `stop_sequence`, and final usage counts.
+    """
+
+    delta: MessageDeltaEventDelta
+
+    type: Literal["message_delta"]
+
+    usage: MessageDeltaEventUsage
+    """
+    Final token accounting emitted in the closing `message_delta` event of a stream.
+    """
+
+
+class MessageStopEvent(BaseModel):
+    """Final event of the stream. No fields beyond `type`."""
+
+    type: Literal["message_stop"]
+
+
+class MessagePingEvent(BaseModel):
+    """Keepalive heartbeat. May appear at any point in the stream."""
+
+    type: Literal["ping"]
+
+
+class MessageStreamErrorEventError(BaseModel):
+    """Inner error object carried inside a `MessageErrorResponse`.
+
+    The `type` value follows Anthropic's published error taxonomy.
+    """
+
+    message: str
+    """Human-readable explanation of the error."""
+
+    type: Literal[
+        "invalid_request_error",
+        "authentication_error",
+        "permission_error",
+        "not_found_error",
+        "request_too_large",
+        "rate_limit_error",
+        "api_error",
+        "overloaded_error",
+        "not_implemented_error",
+    ]
+    """Error category. Values follow Anthropic's taxonomy."""
+
+
+class MessageStreamErrorEvent(BaseModel):
+    """Streamed error envelope. Terminates the stream."""
+
+    error: MessageStreamErrorEventError
+    """Inner error object carried inside a `MessageErrorResponse`.
+
+    The `type` value follows Anthropic's published error taxonomy.
+    """
+
+    type: Literal["error"]
+
+
+MessageCreateResponse: TypeAlias = Union[
+    Message,
+    MessageStartEvent,
+    MessageContentBlockStartEvent,
+    MessageContentBlockDeltaEvent,
+    MessageContentBlockStopEvent,
+    MessageDeltaEvent,
+    MessageStopEvent,
+    MessagePingEvent,
+    MessageStreamErrorEvent,
+]
diff --git a/src/sambanova/types/message_stream_event.py b/src/sambanova/types/message_stream_event.py
new file mode 100644
index 0000000..95645b5
--- /dev/null
+++ b/src/sambanova/types/message_stream_event.py
@@ -0,0 +1,451 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from .._utils import PropertyInfo
+from .message import Message
+from .._models import BaseModel
+
+__all__ = [
+    "MessageStreamEvent",
+    "MessageStartEvent",
+    "MessageContentBlockStartEvent",
+    "MessageContentBlockStartEventContentBlock",
+    "MessageContentBlockStartEventContentBlockMessageOutputTextBlock",
+    "MessageContentBlockStartEventContentBlockMessageOutputToolUseBlock",
+    "MessageContentBlockStartEventContentBlockMessageOutputThinkingBlock",
+    "MessageContentBlockStartEventContentBlockMessageOutputRedactedThinkingBlock",
+    "MessageContentBlockStartEventContentBlockMessageOutputServerToolUseBlock",
+    "MessageContentBlockStartEventContentBlockMessageOutputWebSearchToolResultBlock",
+    "MessageContentBlockStartEventContentBlockMessageOutputWebFetchToolResultBlock",
+    "MessageContentBlockStartEventContentBlockMessageOutputCodeExecutionToolResultBlock",
+    "MessageContentBlockStartEventContentBlockMessageOutputBashCodeExecutionToolResultBlock",
+    "MessageContentBlockStartEventContentBlockMessageOutputTextEditorCodeExecutionToolResultBlock",
+    "MessageContentBlockStartEventContentBlockMessageOutputToolSearchToolResultBlock",
+    "MessageContentBlockStartEventContentBlockMessageOutputContainerUploadBlock",
+    "MessageContentBlockDeltaEvent",
+    "MessageContentBlockDeltaEventDelta",
+    "MessageContentBlockDeltaEventDeltaMessageContentBlockTextDelta",
+    "MessageContentBlockDeltaEventDeltaMessageContentBlockInputJsonDelta",
+    "MessageContentBlockDeltaEventDeltaMessageContentBlockThinkingDelta",
+    "MessageContentBlockDeltaEventDeltaMessageContentBlockSignatureDelta",
+    "MessageContentBlockStopEvent",
+    "MessageDeltaEvent",
+    "MessageDeltaEventDelta",
+    "MessageDeltaEventDeltaStopDetails",
+    "MessageDeltaEventUsage",
+    "MessageStopEvent",
+    "MessagePingEvent",
+    "MessageStreamErrorEvent",
+    "MessageStreamErrorEventError",
+]
+
+
+class MessageStartEvent(BaseModel):
+    """First event of a stream.
+
+    Carries the initial Message envelope (empty `content[]`, `stop_reason: null`) and token usage from prompt processing.
+    """
+
+    message: Message
+    """Non-streaming response from `POST /messages`.
+
+    Wire-compatible with the official Anthropic Messages API.
+    """
+
+    type: Literal["message_start"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputTextBlock(BaseModel):
+    """Plain-text segment of the model's response."""
+
+    text: str
+
+    type: Literal["text"]
+
+    citations: Optional[List[Dict[str, object]]] = None
+    """Not emitted in v1."""
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputToolUseBlock(BaseModel):
+    """Tool call generated by the model."""
+
+    id: str
+    """Unique identifier for this tool call."""
+
+    input: Dict[str, object]
+    """Tool inputs as a JSON object."""
+
+    name: str
+    """Name of the tool being called."""
+
+    type: Literal["tool_use"]
+
+    caller: Optional[Dict[str, object]] = None
+    """Anthropic routing metadata. Always `null` in SambaNova responses."""
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputThinkingBlock(BaseModel):
+    """Extended-reasoning trace from the model. Emitted by reasoning models."""
+
+    thinking: str
+
+    type: Literal["thinking"]
+
+    signature: Optional[str] = None
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputRedactedThinkingBlock(BaseModel):
+    """
+    Anthropic compatibility only — SambaNova does not produce encrypted thinking output. Never emitted in responses.
+    """
+
+    data: str
+
+    type: Literal["redacted_thinking"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputServerToolUseBlock(BaseModel):
+    """Anthropic compatibility only — SambaNova does not run server-side tools.
+
+    Never emitted in responses; defined for Anthropic SDK type-parity.
+    """
+
+    id: str
+
+    input: Dict[str, object]
+
+    name: str
+
+    type: Literal["server_tool_use"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputWebSearchToolResultBlock(BaseModel):
+    """Anthropic compatibility only — SambaNova does not run server-side `web_search`.
+
+    Never emitted in responses.
+    """
+
+    content: List[Dict[str, object]]
+
+    tool_use_id: str
+
+    type: Literal["web_search_tool_result"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputWebFetchToolResultBlock(BaseModel):
+    """Anthropic compatibility only — SambaNova does not run server-side `web_fetch`.
+
+    Never emitted in responses.
+    """
+
+    content: Dict[str, object]
+
+    tool_use_id: str
+
+    type: Literal["web_fetch_tool_result"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputCodeExecutionToolResultBlock(BaseModel):
+    """
+    Anthropic compatibility only — SambaNova does not run server-side `code_execution`. Never emitted in responses.
+    """
+
+    content: Dict[str, object]
+
+    tool_use_id: str
+
+    type: Literal["code_execution_tool_result"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputBashCodeExecutionToolResultBlock(BaseModel):
+    """
+    Anthropic compatibility only — SambaNova does not run server-side bash code execution. Never emitted in responses.
+    """
+
+    content: Dict[str, object]
+
+    tool_use_id: str
+
+    type: Literal["bash_code_execution_tool_result"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputTextEditorCodeExecutionToolResultBlock(BaseModel):
+    """
+    Anthropic compatibility only — SambaNova does not run server-side text-editor code execution. Never emitted in responses.
+    """
+
+    content: Dict[str, object]
+
+    tool_use_id: str
+
+    type: Literal["text_editor_code_execution_tool_result"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputToolSearchToolResultBlock(BaseModel):
+    """Anthropic compatibility only — SambaNova does not run server-side `tool_search`.
+
+    Never emitted in responses.
+    """
+
+    content: Dict[str, object]
+
+    tool_use_id: str
+
+    type: Literal["tool_search_tool_result"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputContainerUploadBlock(BaseModel):
+    """
+    Anthropic compatibility only — SambaNova does not produce container_upload blocks (these come from Anthropic's server-side `code_execution` tool). Never emitted in responses.
+    """
+
+    file_id: str
+
+    type: Literal["container_upload"]
+
+
+MessageContentBlockStartEventContentBlock: TypeAlias = Annotated[
+    Union[
+        MessageContentBlockStartEventContentBlockMessageOutputTextBlock,
+        MessageContentBlockStartEventContentBlockMessageOutputToolUseBlock,
+        MessageContentBlockStartEventContentBlockMessageOutputThinkingBlock,
+        MessageContentBlockStartEventContentBlockMessageOutputRedactedThinkingBlock,
+        MessageContentBlockStartEventContentBlockMessageOutputServerToolUseBlock,
+        MessageContentBlockStartEventContentBlockMessageOutputWebSearchToolResultBlock,
+        MessageContentBlockStartEventContentBlockMessageOutputWebFetchToolResultBlock,
+        MessageContentBlockStartEventContentBlockMessageOutputCodeExecutionToolResultBlock,
+        MessageContentBlockStartEventContentBlockMessageOutputBashCodeExecutionToolResultBlock,
+        MessageContentBlockStartEventContentBlockMessageOutputTextEditorCodeExecutionToolResultBlock,
+        MessageContentBlockStartEventContentBlockMessageOutputToolSearchToolResultBlock,
+        MessageContentBlockStartEventContentBlockMessageOutputContainerUploadBlock,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class MessageContentBlockStartEvent(BaseModel):
+    """Opens a new content block. One per block in `content[]`."""
+
+    content_block: MessageContentBlockStartEventContentBlock
+    """Typed content block in the model's response."""
+
+    index: int
+    """Zero-based index of the block within `content[]`."""
+
+    type: Literal["content_block_start"]
+
+
+class MessageContentBlockDeltaEventDeltaMessageContentBlockTextDelta(BaseModel):
+    """Incremental text chunk for an open text content block."""
+
+    text: str
+
+    type: Literal["text_delta"]
+
+
+class MessageContentBlockDeltaEventDeltaMessageContentBlockInputJsonDelta(BaseModel):
+    """Incremental fragment of a tool_use block's `input` JSON.
+
+    Concatenate successive `partial_json` strings to reconstruct the full input object.
+    """
+
+    partial_json: str
+
+    type: Literal["input_json_delta"]
+
+
+class MessageContentBlockDeltaEventDeltaMessageContentBlockThinkingDelta(BaseModel):
+    """Incremental thinking chunk for an open thinking block.
+
+    Emitted by reasoning models.
+    """
+
+    thinking: str
+
+    type: Literal["thinking_delta"]
+
+
+class MessageContentBlockDeltaEventDeltaMessageContentBlockSignatureDelta(BaseModel):
+    """Signature for an open thinking block.
+
+    Emitted at the end of a thinking stream (paired with the closing `content_block_stop`); the `signature` value may be an empty string when the backend has no signed payload to attach.
+    """
+
+    signature: str
+
+    type: Literal["signature_delta"]
+
+
+MessageContentBlockDeltaEventDelta: TypeAlias = Annotated[
+    Union[
+        MessageContentBlockDeltaEventDeltaMessageContentBlockTextDelta,
+        MessageContentBlockDeltaEventDeltaMessageContentBlockInputJsonDelta,
+        MessageContentBlockDeltaEventDeltaMessageContentBlockThinkingDelta,
+        MessageContentBlockDeltaEventDeltaMessageContentBlockSignatureDelta,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class MessageContentBlockDeltaEvent(BaseModel):
+    """Incremental update to the currently open content block."""
+
+    delta: MessageContentBlockDeltaEventDelta
+    """Incremental update to an open content block."""
+
+    index: int
+    """Zero-based index of the block within `content[]`."""
+
+    type: Literal["content_block_delta"]
+
+
+class MessageContentBlockStopEvent(BaseModel):
+    """Closes the current content block."""
+
+    index: int
+    """Zero-based index of the block within `content[]`."""
+
+    type: Literal["content_block_stop"]
+
+
+class MessageDeltaEventDeltaStopDetails(BaseModel):
+    """Refusal stop details.
+
+    Anthropic compatibility only — `refusal` is never emitted as a stop_reason by SambaNova (content filtering is not exposed at the API layer).
+    """
+
+    type: Literal["refusal"]
+
+    category: Optional[Literal["cyber", "bio"]] = None
+
+
+class MessageDeltaEventDelta(BaseModel):
+    stop_reason: Literal[
+        "end_turn", "max_tokens", "tool_use", "pause_turn", "refusal", "stop_sequence", "model_context_window_exceeded"
+    ]
+    """Reason the model stopped generating.
+
+    SambaNova emits `end_turn`, `max_tokens`, `tool_use`, and `stop_sequence`. The
+    remaining values are defined for Anthropic SDK type-parity but never returned:
+    `pause_turn` (server-tool loop limit, not produced); `refusal` (content filter,
+    not exposed); `model_context_window_exceeded` (folded to `max_tokens`).
+    """
+
+    stop_details: Optional[MessageDeltaEventDeltaStopDetails] = None
+    """Refusal stop details.
+
+    Anthropic compatibility only — `refusal` is never emitted as a stop_reason by
+    SambaNova (content filtering is not exposed at the API layer).
+    """
+
+    stop_sequence: Optional[str] = None
+    """Custom stop sequence that triggered termination.
+
+    Field is emitted but value is always `null` in v1 (backend collapses
+    `StopSequenceHit` and `EndOfText` into the same finish_reason).
+    """
+
+
+class MessageDeltaEventUsage(BaseModel):
+    """
+    Final token accounting emitted in the closing `message_delta` event of a stream.
+    """
+
+    output_tokens: int
+    """Total tokens generated (final count)."""
+
+    cache_creation_input_tokens: Optional[int] = None
+    """Tokens written to prompt cache. Absent in v1;"""
+
+    cache_read_input_tokens: Optional[int] = None
+    """Tokens read from prompt cache. Absent in v1;"""
+
+    input_tokens: Optional[int] = None
+    """Total tokens in the prompt (echoed from `message_start`)."""
+
+    server_tool_use: Optional[Dict[str, object]] = None
+    """Server-tool usage metrics.
+
+    Anthropic compatibility only — SambaNova does not run server tools, so this
+    field is never emitted.
+    """
+
+
+class MessageDeltaEvent(BaseModel):
+    """Penultimate event of the stream.
+
+    Carries the final `stop_reason`, optional `stop_sequence`, and final usage counts.
+    """
+
+    delta: MessageDeltaEventDelta
+
+    type: Literal["message_delta"]
+
+    usage: MessageDeltaEventUsage
+    """
+    Final token accounting emitted in the closing `message_delta` event of a stream.
+    """
+
+
+class MessageStopEvent(BaseModel):
+    """Final event of the stream. No fields beyond `type`."""
+
+    type: Literal["message_stop"]
+
+
+class MessagePingEvent(BaseModel):
+    """Keepalive heartbeat. May appear at any point in the stream."""
+
+    type: Literal["ping"]
+
+
+class MessageStreamErrorEventError(BaseModel):
+    """Inner error object carried inside a `MessageErrorResponse`.
+
+    The `type` value follows Anthropic's published error taxonomy.
+    """
+
+    message: str
+    """Human-readable explanation of the error."""
+
+    type: Literal[
+        "invalid_request_error",
+        "authentication_error",
+        "permission_error",
+        "not_found_error",
+        "request_too_large",
+        "rate_limit_error",
+        "api_error",
+        "overloaded_error",
+        "not_implemented_error",
+    ]
+    """Error category. Values follow Anthropic's taxonomy."""
+
+
+class MessageStreamErrorEvent(BaseModel):
+    """Streamed error envelope. Terminates the stream."""
+
+    error: MessageStreamErrorEventError
+    """Inner error object carried inside a `MessageErrorResponse`.
+
+    The `type` value follows Anthropic's published error taxonomy.
+    """
+
+    type: Literal["error"]
+
+
+MessageStreamEvent: TypeAlias = Annotated[
+    Union[
+        MessageStartEvent,
+        MessageContentBlockStartEvent,
+        MessageContentBlockDeltaEvent,
+        MessageContentBlockStopEvent,
+        MessageDeltaEvent,
+        MessageStopEvent,
+        MessagePingEvent,
+        MessageStreamErrorEvent,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/tests/api_resources/test_messages.py b/tests/api_resources/test_messages.py
new file mode 100644
index 0000000..2f34086
--- /dev/null
+++ b/tests/api_resources/test_messages.py
@@ -0,0 +1,595 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from sambanova import SambaNova, AsyncSambaNova
+from tests.utils import assert_matches_type
+from sambanova.types import (
+    MessageCreateResponse,
+    MessageCountTokensResponse,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestMessages:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create_overload_1(self, client: SambaNova) -> None:
+        message = client.messages.create(
+            max_tokens=1024,
+            messages=[
+                {
+                    "content": "Hello, Claude!",
+                    "role": "user",
+                }
+            ],
+            model="DeepSeek-V3.1",
+        )
+        assert_matches_type(MessageCreateResponse, message, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params_overload_1(self, client: SambaNova) -> None:
+        message = client.messages.create(
+            max_tokens=1024,
+            messages=[
+                {
+                    "content": "Hello, Claude!",
+                    "role": "user",
+                }
+            ],
+            model="DeepSeek-V3.1",
+            container="container",
+            metadata={"user_id": "user_id"},
+            service_tier="auto",
+            stop_sequences=["string"],
+            stream=False,
+            system="string",
+            temperature=1,
+            thinking={"type": "disabled"},
+            tool_choice={
+                "type": "auto",
+                "disable_parallel_tool_use": True,
+            },
+            tools=[
+                {
+                    "name": "name",
+                    "allowed_callers": ["string"],
+                    "cache_control": {
+                        "type": "ephemeral",
+                        "ttl": "ttl",
+                    },
+                    "defer_loading": True,
+                    "description": "description",
+                    "eager_input_streaming": True,
+                    "input_examples": [{"foo": "bar"}],
+                    "input_schema": {"foo": "bar"},
+                    "strict": True,
+                    "type": "custom",
+                }
+            ],
+            top_k=0,
+            top_p=0,
+            anthropic_version="2023-06-01",
+        )
+        assert_matches_type(MessageCreateResponse, message, path=["response"])
+
+    @parametrize
+    def test_raw_response_create_overload_1(self, client: SambaNova) -> None:
+        response = client.messages.with_raw_response.create(
+            max_tokens=1024,
+            messages=[
+                {
+                    "content": "Hello, Claude!",
+                    "role": "user",
+                }
+            ],
+            model="DeepSeek-V3.1",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        message = response.parse()
+        assert_matches_type(MessageCreateResponse, message, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create_overload_1(self, client: SambaNova) -> None:
+        with client.messages.with_streaming_response.create(
+            max_tokens=1024,
+            messages=[
+                {
+                    "content": "Hello, Claude!",
+                    "role": "user",
+                }
+            ],
+            model="DeepSeek-V3.1",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            message = response.parse()
+            assert_matches_type(MessageCreateResponse, message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_create_overload_2(self, client: SambaNova) -> None:
+        message_stream = client.messages.create(
+            max_tokens=1024,
+            messages=[
+                {
+                    "content": "Hello, Claude!",
+                    "role": "user",
+                }
+            ],
+            model="DeepSeek-V3.1",
+            stream=True,
+        )
+        message_stream.response.close()
+
+    @parametrize
+    def test_method_create_with_all_params_overload_2(self, client: SambaNova) -> None:
+        message_stream = client.messages.create(
+            max_tokens=1024,
+            messages=[
+                {
+                    "content": "Hello, Claude!",
+                    "role": "user",
+                }
+            ],
+            model="DeepSeek-V3.1",
+            stream=True,
+            container="container",
+            metadata={"user_id": "user_id"},
+            service_tier="auto",
+            stop_sequences=["string"],
+            system="string",
+            temperature=1,
+            thinking={"type": "disabled"},
+            tool_choice={
+                "type": "auto",
+                "disable_parallel_tool_use": True,
+            },
+            tools=[
+                {
+                    "name": "name",
+                    "allowed_callers": ["string"],
+                    "cache_control": {
+                        "type": "ephemeral",
+                        "ttl": "ttl",
+                    },
+                    "defer_loading": True,
+                    "description": "description",
+                    "eager_input_streaming": True,
+                    "input_examples": [{"foo": "bar"}],
+                    "input_schema": {"foo": "bar"},
+                    "strict": True,
+                    "type": "custom",
+                }
+            ],
+            top_k=0,
+            top_p=0,
+            anthropic_version="2023-06-01",
+        )
+        message_stream.response.close()
+
+    @parametrize
+    def test_raw_response_create_overload_2(self, client: SambaNova) -> None:
+        response = client.messages.with_raw_response.create(
+            max_tokens=1024,
+            messages=[
+                {
+                    "content": "Hello, Claude!",
+                    "role": "user",
+                }
+            ],
+            model="DeepSeek-V3.1",
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        stream.close()
+
+    @parametrize
+    def test_streaming_response_create_overload_2(self, client: SambaNova) -> None:
+        with client.messages.with_streaming_response.create(
+            max_tokens=1024,
+            messages=[
+                {
+                    "content": "Hello, Claude!",
+                    "role": "user",
+                }
+            ],
+            model="DeepSeek-V3.1",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = response.parse()
+            stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_count_tokens(self, client: SambaNova) -> None:
+        message = client.messages.count_tokens(
+            messages=[
+                {
+                    "content": "Hello, Claude!",
+                    "role": "user",
+                }
+            ],
+            model="DeepSeek-V3.1",
+        )
+        assert_matches_type(MessageCountTokensResponse, message, path=["response"])
+
+    @parametrize
+    def test_method_count_tokens_with_all_params(self, client: SambaNova) -> None:
+        message = client.messages.count_tokens(
+            messages=[
+                {
+                    "content": "Hello, Claude!",
+                    "role": "user",
+                }
+            ],
+            model="DeepSeek-V3.1",
+            system="string",
+            thinking={"type": "disabled"},
+            tool_choice={
+                "type": "auto",
+                "disable_parallel_tool_use": True,
+            },
+            tools=[
+                {
+                    "name": "name",
+                    "allowed_callers": ["string"],
+                    "cache_control": {
+                        "type": "ephemeral",
+                        "ttl": "ttl",
+                    },
+                    "defer_loading": True,
+                    "description": "description",
+                    "eager_input_streaming": True,
+                    "input_examples": [{"foo": "bar"}],
+                    "input_schema": {"foo": "bar"},
+                    "strict": True,
+                    "type": "custom",
+                }
+            ],
+            anthropic_version="2023-06-01",
+        )
+        assert_matches_type(MessageCountTokensResponse, message, path=["response"])
+
+    @parametrize
+    def test_raw_response_count_tokens(self, client: SambaNova) -> None:
+        response = client.messages.with_raw_response.count_tokens(
+            messages=[
+                {
+                    "content": "Hello, Claude!",
+                    "role": "user",
+                }
+            ],
+            model="DeepSeek-V3.1",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        message = response.parse()
+        assert_matches_type(MessageCountTokensResponse, message, path=["response"])
+
+    @parametrize
+    def test_streaming_response_count_tokens(self, client: SambaNova) -> None:
+        with client.messages.with_streaming_response.count_tokens(
+            messages=[
+                {
+                    "content": "Hello, Claude!",
+                    "role": "user",
+                }
+            ],
+            model="DeepSeek-V3.1",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            message = response.parse()
+            assert_matches_type(MessageCountTokensResponse, message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncMessages:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create_overload_1(self, async_client: AsyncSambaNova) -> None:
+        message = await async_client.messages.create(
+            max_tokens=1024,
+            messages=[
+                {
+                    "content": "Hello, Claude!",
+                    "role": "user",
+                }
+            ],
+            model="DeepSeek-V3.1",
+        )
+        assert_matches_type(MessageCreateResponse, message, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params_overload_1(self, async_client: AsyncSambaNova) -> None:
+        message = await async_client.messages.create(
+            max_tokens=1024,
+            messages=[
+                {
+                    "content": "Hello, Claude!",
+                    "role": "user",
+                }
+            ],
+            model="DeepSeek-V3.1",
+            container="container",
+            metadata={"user_id": "user_id"},
+            service_tier="auto",
+            stop_sequences=["string"],
+            stream=False,
+            system="string",
+            temperature=1,
+            thinking={"type": "disabled"},
+            tool_choice={
+                "type": "auto",
+                "disable_parallel_tool_use": True,
+            },
+            tools=[
+                {
+                    "name": "name",
+                    "allowed_callers": ["string"],
+                    "cache_control": {
+                        "type": "ephemeral",
+                        "ttl": "ttl",
+                    },
+                    "defer_loading": True,
+                    "description": "description",
+                    "eager_input_streaming": True,
+                    "input_examples": [{"foo": "bar"}],
+                    "input_schema": {"foo": "bar"},
+                    "strict": True,
+                    "type": "custom",
+                }
+            ],
+            top_k=0,
+            top_p=0,
+            anthropic_version="2023-06-01",
+        )
+        assert_matches_type(MessageCreateResponse, message, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create_overload_1(self, async_client: AsyncSambaNova) -> None:
+        response = await async_client.messages.with_raw_response.create(
+            max_tokens=1024,
+            messages=[
+                {
+                    "content": "Hello, Claude!",
+                    "role": "user",
+                }
+            ],
+            model="DeepSeek-V3.1",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        message = await response.parse()
+        assert_matches_type(MessageCreateResponse, message, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create_overload_1(self, async_client: AsyncSambaNova) -> None:
+        async with async_client.messages.with_streaming_response.create(
+            max_tokens=1024,
+            messages=[
+                {
+                    "content": "Hello, Claude!",
+                    "role": "user",
+                }
+            ],
+            model="DeepSeek-V3.1",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            message = await response.parse()
+            assert_matches_type(MessageCreateResponse, message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_create_overload_2(self, async_client: AsyncSambaNova) -> None:
+        message_stream = await async_client.messages.create(
+            max_tokens=1024,
+            messages=[
+                {
+                    "content": "Hello, Claude!",
+                    "role": "user",
+                }
+            ],
+            model="DeepSeek-V3.1",
+            stream=True,
+        )
+        await message_stream.response.aclose()
+
+    @parametrize
+    async def test_method_create_with_all_params_overload_2(self, async_client: AsyncSambaNova) -> None:
+        message_stream = await async_client.messages.create(
+            max_tokens=1024,
+            messages=[
+                {
+                    "content": "Hello, Claude!",
+                    "role": "user",
+                }
+            ],
+            model="DeepSeek-V3.1",
+            stream=True,
+            container="container",
+            metadata={"user_id": "user_id"},
+            service_tier="auto",
+            stop_sequences=["string"],
+            system="string",
+            temperature=1,
+            thinking={"type": "disabled"},
+            tool_choice={
+                "type": "auto",
+                "disable_parallel_tool_use": True,
+            },
+            tools=[
+                {
+                    "name": "name",
+                    "allowed_callers": ["string"],
+                    "cache_control": {
+                        "type": "ephemeral",
+                        "ttl": "ttl",
+                    },
+                    "defer_loading": True,
+                    "description": "description",
+                    "eager_input_streaming": True,
+                    "input_examples": [{"foo": "bar"}],
+                    "input_schema": {"foo": "bar"},
+                    "strict": True,
+                    "type": "custom",
+                }
+            ],
+            top_k=0,
+            top_p=0,
+            anthropic_version="2023-06-01",
+        )
+        await message_stream.response.aclose()
+
+    @parametrize
+    async def test_raw_response_create_overload_2(self, async_client: AsyncSambaNova) -> None:
+        response = await async_client.messages.with_raw_response.create(
+            max_tokens=1024,
+            messages=[
+                {
+                    "content": "Hello, Claude!",
+                    "role": "user",
+                }
+            ],
+            model="DeepSeek-V3.1",
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = await response.parse()
+        await stream.close()
+
+    @parametrize
+    async def test_streaming_response_create_overload_2(self, async_client: AsyncSambaNova) -> None:
+        async with async_client.messages.with_streaming_response.create(
+            max_tokens=1024,
+            messages=[
+                {
+                    "content": "Hello, Claude!",
+                    "role": "user",
+                }
+            ],
+            model="DeepSeek-V3.1",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = await response.parse()
+            await stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_count_tokens(self, async_client: AsyncSambaNova) -> None:
+        message = await async_client.messages.count_tokens(
+            messages=[
+                {
+                    "content": "Hello, Claude!",
+                    "role": "user",
+                }
+            ],
+            model="DeepSeek-V3.1",
+        )
+        assert_matches_type(MessageCountTokensResponse, message, path=["response"])
+
+    @parametrize
+    async def test_method_count_tokens_with_all_params(self, async_client: AsyncSambaNova) -> None:
+        message = await async_client.messages.count_tokens(
+            messages=[
+                {
+                    "content": "Hello, Claude!",
+                    "role": "user",
+                }
+            ],
+            model="DeepSeek-V3.1",
+            system="string",
+            thinking={"type": "disabled"},
+            tool_choice={
+                "type": "auto",
+                "disable_parallel_tool_use": True,
+            },
+            tools=[
+                {
+                    "name": "name",
+                    "allowed_callers": ["string"],
+                    "cache_control": {
+                        "type": "ephemeral",
+                        "ttl": "ttl",
+                    },
+                    "defer_loading": True,
+                    "description": "description",
+                    "eager_input_streaming": True,
+                    "input_examples": [{"foo": "bar"}],
+                    "input_schema": {"foo": "bar"},
+                    "strict": True,
+                    "type": "custom",
+                }
+            ],
+            anthropic_version="2023-06-01",
+        )
+        assert_matches_type(MessageCountTokensResponse, message, path=["response"])
+
+    @parametrize
+    async def test_raw_response_count_tokens(self, async_client: AsyncSambaNova) -> None:
+        response = await async_client.messages.with_raw_response.count_tokens(
+            messages=[
+                {
+                    "content": "Hello, Claude!",
+                    "role": "user",
+                }
+            ],
+            model="DeepSeek-V3.1",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        message = await response.parse()
+        assert_matches_type(MessageCountTokensResponse, message, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_count_tokens(self, async_client: AsyncSambaNova) -> None:
+        async with async_client.messages.with_streaming_response.count_tokens(
+            messages=[
+                {
+                    "content": "Hello, Claude!",
+                    "role": "user",
+                }
+            ],
+            model="DeepSeek-V3.1",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            message = await response.parse()
+            assert_matches_type(MessageCountTokensResponse, message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True

From 546fc59615a07e268930611150b2a91ec0073c74 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Tue, 26 May 2026 21:54:31 +0000
Subject: [PATCH 5/5] release: 1.9.0

---
 .release-please-manifest.json |  2 +-
 CHANGELOG.md                  | 14 ++++++++++++++
 pyproject.toml                |  2 +-
 src/sambanova/_version.py     |  2 +-
 4 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 24d5689..4fcfdf7 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "1.8.2"
+  ".": "1.9.0"
 }
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9523365..3b6a1db 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,19 @@
 # Changelog
 
+## 1.9.0 (2026-05-26)
+
+Full Changelog: [v1.8.2...v1.9.0](https://github.com/sambanova/sambanova-python/compare/v1.8.2...v1.9.0)
+
+### Features
+
+* **api:** add anthropic compatible messages api support ([0ddfd33](https://github.com/sambanova/sambanova-python/commit/0ddfd334decc86831f840cd8c0de2c3a73e378a2))
+* **internal/types:** support eagerly validating pydantic iterators ([cc4aade](https://github.com/sambanova/sambanova-python/commit/cc4aade45c484980fafacf2e315ca6f1564e0ace))
+
+
+### Bug Fixes
+
+* **client:** add missing f-string prefix in file type error message ([c6b84e8](https://github.com/sambanova/sambanova-python/commit/c6b84e853be657a330e6754894911bdaac07dc67))
+
 ## 1.8.2 (2026-05-07)
 
 Full Changelog: [v1.8.1...v1.8.2](https://github.com/sambanova/sambanova-python/compare/v1.8.1...v1.8.2)
diff --git a/pyproject.toml b/pyproject.toml
index 28f88b6..23fbe75 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "sambanova"
-version = "1.8.2"
+version = "1.9.0"
 description = "The official Python library for the SambaNova API"
 dynamic = ["readme"]
 license = "Apache-2.0"
diff --git a/src/sambanova/_version.py b/src/sambanova/_version.py
index d3cc5ec..48fad55 100644
--- a/src/sambanova/_version.py
+++ b/src/sambanova/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "sambanova"
-__version__ = "1.8.2"  # x-release-please-version
+__version__ = "1.9.0"  # x-release-please-version