From c6b84e853be657a330e6754894911bdaac07dc67 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
<142633134+stainless-app[bot]@users.noreply.github.com>
Date: Fri, 8 May 2026 16:32:53 +0000
Subject: [PATCH 1/5] fix(client): add missing f-string prefix in file type
error message
---
src/sambanova/_files.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/sambanova/_files.py b/src/sambanova/_files.py
index 6071d1e..6877ecb 100644
--- a/src/sambanova/_files.py
+++ b/src/sambanova/_files.py
@@ -99,7 +99,7 @@ async def async_to_httpx_files(files: RequestFiles | None) -> HttpxRequestFiles
elif is_sequence_t(files):
files = [(key, await _async_transform_file(file)) for key, file in files]
else:
- raise TypeError("Unexpected file type input {type(files)}, expected mapping or sequence")
+ raise TypeError(f"Unexpected file type input {type(files)}, expected mapping or sequence")
return files
From cc4aade45c484980fafacf2e315ca6f1564e0ace Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
<142633134+stainless-app[bot]@users.noreply.github.com>
Date: Mon, 11 May 2026 17:44:21 +0000
Subject: [PATCH 2/5] feat(internal/types): support eagerly validating pydantic
iterators
---
src/sambanova/_models.py | 80 ++++++++++++++++++++++++++++++++++++++++
tests/test_models.py | 60 ++++++++++++++++++++++++++++--
2 files changed, 137 insertions(+), 3 deletions(-)
diff --git a/src/sambanova/_models.py b/src/sambanova/_models.py
index 29070e0..8c5ab26 100644
--- a/src/sambanova/_models.py
+++ b/src/sambanova/_models.py
@@ -25,7 +25,9 @@
ClassVar,
Protocol,
Required,
+ Annotated,
ParamSpec,
+ TypeAlias,
TypedDict,
TypeGuard,
final,
@@ -79,7 +81,15 @@
from ._constants import RAW_RESPONSE_HEADER
if TYPE_CHECKING:
+ from pydantic import GetCoreSchemaHandler, ValidatorFunctionWrapHandler
+ from pydantic_core import CoreSchema, core_schema
from pydantic_core.core_schema import ModelField, ModelSchema, LiteralSchema, ModelFieldsSchema
+else:
+ try:
+ from pydantic_core import CoreSchema, core_schema
+ except ImportError:
+ CoreSchema = None
+ core_schema = None
__all__ = ["BaseModel", "GenericModel"]
@@ -396,6 +406,76 @@ def model_dump_json(
)
+class _EagerIterable(list[_T], Generic[_T]):
+ """
+ Accepts any Iterable[T] input (including generators), consumes it
+ eagerly, and validates all items upfront.
+
+ Validation preserves the original container type where possible
+ (e.g. a set[T] stays a set[T]). Serialization (model_dump / JSON)
+ always emits a list — round-tripping through model_dump() will not
+ restore the original container type.
+ """
+
+ @classmethod
+ def __get_pydantic_core_schema__(
+ cls,
+ source_type: Any,
+ handler: GetCoreSchemaHandler,
+ ) -> CoreSchema:
+ (item_type,) = get_args(source_type) or (Any,)
+ item_schema: CoreSchema = handler.generate_schema(item_type)
+ list_of_items_schema: CoreSchema = core_schema.list_schema(item_schema)
+
+ return core_schema.no_info_wrap_validator_function(
+ cls._validate,
+ list_of_items_schema,
+ serialization=core_schema.plain_serializer_function_ser_schema(
+ cls._serialize,
+ info_arg=False,
+ ),
+ )
+
+ @staticmethod
+ def _validate(v: Iterable[_T], handler: "ValidatorFunctionWrapHandler") -> Any:
+ original_type: type[Any] = type(v)
+
+ # Normalize to list so list_schema can validate each item
+ if isinstance(v, list):
+ items: list[_T] = v
+ else:
+ try:
+ items = list(v)
+ except TypeError as e:
+ raise TypeError("Value is not iterable") from e
+
+ # Validate items against the inner schema
+ validated: list[_T] = handler(items)
+
+ # Reconstruct original container type
+ if original_type is list:
+ return validated
+ # str(list) produces the list's repr, not a string built from items,
+ # so skip reconstruction for str and its subclasses.
+ if issubclass(original_type, str):
+ return validated
+ try:
+ return original_type(validated)
+ except (TypeError, ValueError):
+ # If the type cannot be reconstructed, just return the validated list
+ return validated
+
+ @staticmethod
+ def _serialize(v: Iterable[_T]) -> list[_T]:
+ """Always serialize as a list so Pydantic's JSON encoder is happy."""
+ if isinstance(v, list):
+ return v
+ return list(v)
+
+
+EagerIterable: TypeAlias = Annotated[Iterable[_T], _EagerIterable]
+
+
def _construct_field(value: object, field: FieldInfo, key: str) -> object:
if value is None:
return field_get_default(field)
diff --git a/tests/test_models.py b/tests/test_models.py
index d559b5c..eda8fb8 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -1,7 +1,8 @@
import json
-from typing import TYPE_CHECKING, Any, Dict, List, Union, Optional, cast
+from typing import TYPE_CHECKING, Any, Dict, List, Union, Iterable, Optional, cast
from datetime import datetime, timezone
-from typing_extensions import Literal, Annotated, TypeAliasType
+from collections import deque
+from typing_extensions import Literal, Annotated, TypedDict, TypeAliasType
import pytest
import pydantic
@@ -9,7 +10,7 @@
from sambanova._utils import PropertyInfo
from sambanova._compat import PYDANTIC_V1, parse_obj, model_dump, model_json
-from sambanova._models import DISCRIMINATOR_CACHE, BaseModel, construct_type
+from sambanova._models import DISCRIMINATOR_CACHE, BaseModel, EagerIterable, construct_type
class BasicModel(BaseModel):
@@ -961,3 +962,56 @@ def __getattr__(self, attr: str) -> Item: ...
assert model.a.prop == 1
assert isinstance(model.a, Item)
assert model.other == "foo"
+
+
+# NOTE: Workaround for Pydantic Iterable behavior.
+# Iterable fields are replaced with a ValidatorIterator and may be consumed
+# during serialization, which can cause subsequent dumps to return empty data.
+# See: https://github.com/pydantic/pydantic/issues/9541
+@pytest.mark.parametrize(
+ "data, expected_validated",
+ [
+ ([1, 2, 3], [1, 2, 3]),
+ ((1, 2, 3), (1, 2, 3)),
+ (set([1, 2, 3]), set([1, 2, 3])),
+ (iter([1, 2, 3]), [1, 2, 3]),
+ ([], []),
+ ((x for x in [1, 2, 3]), [1, 2, 3]),
+ (map(lambda x: x, [1, 2, 3]), [1, 2, 3]),
+ (frozenset([1, 2, 3]), frozenset([1, 2, 3])),
+ (deque([1, 2, 3]), deque([1, 2, 3])),
+ ],
+ ids=["list", "tuple", "set", "iterator", "empty", "generator", "map", "frozenset", "deque"],
+)
+@pytest.mark.skipif(PYDANTIC_V1, reason="this is only supported in pydantic v2")
+def test_iterable_construction(data: Iterable[int], expected_validated: Iterable[int]) -> None:
+ class TypeWithIterable(TypedDict):
+ items: EagerIterable[int]
+
+ class Model(BaseModel):
+ data: TypeWithIterable
+
+ m = Model.model_validate({"data": {"items": data}})
+ assert m.data["items"] == expected_validated
+
+ # Verify repeated dumps don't lose data (the original bug)
+ assert m.model_dump()["data"]["items"] == list(expected_validated)
+ assert m.model_dump()["data"]["items"] == list(expected_validated)
+
+
+@pytest.mark.skipif(PYDANTIC_V1, reason="this is only supported in pydantic v2")
+def test_iterable_construction_str_falls_back_to_list() -> None:
+ # str is iterable (over chars), but str(list_of_chars) produces the list's repr
+ # rather than reconstructing a string from items. We special-case str to fall
+ # back to list instead of attempting reconstruction.
+ class TypeWithIterable(TypedDict):
+ items: EagerIterable[str]
+
+ class Model(BaseModel):
+ data: TypeWithIterable
+
+ m = Model.model_validate({"data": {"items": "hello"}})
+
+ # falls back to list of chars rather than calling str(["h", "e", "l", "l", "o"])
+ assert m.data["items"] == ["h", "e", "l", "l", "o"]
+ assert m.model_dump()["data"]["items"] == ["h", "e", "l", "l", "o"]
From 4b1c00a59dc5d6bf8e04812dbcf008b1a775f46b Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
<142633134+stainless-app[bot]@users.noreply.github.com>
Date: Tue, 12 May 2026 19:12:28 +0000
Subject: [PATCH 3/5] ci: pin GitHub Actions to commit SHAs
Pin all GitHub Actions referenced in generated workflows (both
first-party `actions/*` and third-party) to immutable commit SHAs.
Updating pinned actions is now a deliberate codegen-side bump rather
than implicit on every workflow run.
---
.github/workflows/ci.yml | 14 +++++++-------
.github/workflows/publish-pypi.yml | 4 ++--
.github/workflows/release-doctor.yml | 2 +-
3 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 0477bdc..f3eb789 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -21,10 +21,10 @@ jobs:
runs-on: ${{ github.repository == 'stainless-sdks/sambanova-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
if: (github.event_name == 'push' || github.event.pull_request.head.repo.fork) && (github.event_name != 'push' || github.event.head_commit.message != 'codegen metadata')
steps:
- - uses: actions/checkout@v6
+ - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Install uv
- uses: astral-sh/setup-uv@v5
+ uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5.4.2
with:
version: '0.10.2'
@@ -43,10 +43,10 @@ jobs:
id-token: write
runs-on: ${{ github.repository == 'stainless-sdks/sambanova-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
steps:
- - uses: actions/checkout@v6
+ - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Install uv
- uses: astral-sh/setup-uv@v5
+ uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5.4.2
with:
version: '0.10.2'
@@ -61,7 +61,7 @@ jobs:
github.repository == 'stainless-sdks/sambanova-python' &&
!startsWith(github.ref, 'refs/heads/stl/')
id: github-oidc
- uses: actions/github-script@v8
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
with:
script: core.setOutput('github_token', await core.getIDToken());
@@ -81,10 +81,10 @@ jobs:
runs-on: ${{ github.repository == 'stainless-sdks/sambanova-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
if: github.event_name == 'push' || github.event.pull_request.head.repo.fork
steps:
- - uses: actions/checkout@v6
+ - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Install uv
- uses: astral-sh/setup-uv@v5
+ uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5.4.2
with:
version: '0.10.2'
diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml
index 3f73e27..884b664 100644
--- a/.github/workflows/publish-pypi.yml
+++ b/.github/workflows/publish-pypi.yml
@@ -17,10 +17,10 @@ jobs:
id-token: write
steps:
- - uses: actions/checkout@v6
+ - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Install uv
- uses: astral-sh/setup-uv@v5
+ uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5.4.2
with:
version: '0.9.13'
diff --git a/.github/workflows/release-doctor.yml b/.github/workflows/release-doctor.yml
index 45a764d..1696088 100644
--- a/.github/workflows/release-doctor.yml
+++ b/.github/workflows/release-doctor.yml
@@ -12,7 +12,7 @@ jobs:
if: github.repository == 'sambanova/sambanova-python' && (github.event_name == 'push' || github.event_name == 'workflow_dispatch' || startsWith(github.head_ref, 'release-please') || github.head_ref == 'next')
steps:
- - uses: actions/checkout@v6
+ - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Check release environment
run: |
From 0ddfd334decc86831f840cd8c0de2c3a73e378a2 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
<142633134+stainless-app[bot]@users.noreply.github.com>
Date: Tue, 26 May 2026 21:54:09 +0000
Subject: [PATCH 4/5] feat(api): add anthropic compatible messages api support
---
.stats.yml | 8 +-
api.md | 19 +
src/sambanova/_client.py | 79 +-
src/sambanova/resources/__init__.py | 14 +
src/sambanova/resources/messages.py | 1197 +++++++++++++++++
src/sambanova/types/__init__.py | 6 +
src/sambanova/types/message.py | 328 +++++
.../types/message_count_tokens_params.py | 979 ++++++++++++++
.../types/message_count_tokens_response.py | 12 +
src/sambanova/types/message_create_params.py | 1095 +++++++++++++++
.../types/message_create_response.py | 449 +++++++
src/sambanova/types/message_stream_event.py | 451 +++++++
tests/api_resources/test_messages.py | 595 ++++++++
13 files changed, 5227 insertions(+), 5 deletions(-)
create mode 100644 src/sambanova/resources/messages.py
create mode 100644 src/sambanova/types/message.py
create mode 100644 src/sambanova/types/message_count_tokens_params.py
create mode 100644 src/sambanova/types/message_count_tokens_response.py
create mode 100644 src/sambanova/types/message_create_params.py
create mode 100644 src/sambanova/types/message_create_response.py
create mode 100644 src/sambanova/types/message_stream_event.py
create mode 100644 tests/api_resources/test_messages.py
diff --git a/.stats.yml b/.stats.yml
index 8386d48..29d23f7 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 8
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/sambanova/sambanova-5884297580e5423cf40bd59057ea55da0384fe34f431a80cac0eece6176c6057.yml
-openapi_spec_hash: 9306c1d75784a840a2973024fa94d22d
-config_hash: 315596f19f192be2b7bf343664a7eb90
+configured_endpoints: 10
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/sambanova/sambanova-f9a2632e2ea9632e8b40258f57bfa8b529b926d72fd8b1f9550848fbb880e0de.yml
+openapi_spec_hash: 8df9e2ad31769c26c590dacf3517bc36
+config_hash: d33fc68f92caf09c6b3b40675a111114
diff --git a/api.md b/api.md
index 2678e70..41de6f3 100644
--- a/api.md
+++ b/api.md
@@ -88,6 +88,25 @@ Methods:
- client.responses.create(\*\*params) -> ResponseCreateResponse
+# Messages
+
+Types:
+
+```python
+from sambanova.types import (
+ Message,
+ MessageCountTokensResponse,
+ MessageErrorResponse,
+ MessageStreamEvent,
+ MessageCreateResponse,
+)
+```
+
+Methods:
+
+- client.messages.create(\*\*params) -> MessageCreateResponse
+- client.messages.count_tokens(\*\*params) -> MessageCountTokensResponse
+
# Models
Types:
diff --git a/src/sambanova/_client.py b/src/sambanova/_client.py
index 5b044c0..08b45e4 100644
--- a/src/sambanova/_client.py
+++ b/src/sambanova/_client.py
@@ -35,8 +35,9 @@
)
if TYPE_CHECKING:
- from .resources import chat, audio, models, responses, embeddings, completions
+ from .resources import chat, audio, models, messages, responses, embeddings, completions
from .resources.models import ModelsResource, AsyncModelsResource
+ from .resources.messages import MessagesResource, AsyncMessagesResource
from .resources.chat.chat import ChatResource, AsyncChatResource
from .resources.responses import ResponsesResource, AsyncResponsesResource
from .resources.embeddings import EmbeddingsResource, AsyncEmbeddingsResource
@@ -58,12 +59,14 @@
class SambaNova(SyncAPIClient):
# client options
api_key: str
+ x_api_key: str | None
integration_source: str | None
def __init__(
self,
*,
api_key: str | None = None,
+ x_api_key: str | None = None,
integration_source: str | None = None,
base_url: str | httpx.URL | None = None,
timeout: float | Timeout | None | NotGiven = not_given,
@@ -88,6 +91,7 @@ def __init__(
This automatically infers the following arguments from their corresponding environment variables if they are not provided:
- `api_key` from `SAMBANOVA_API_KEY`
+ - `x_api_key` from `SAMBANOVA_API_KEY`
- `integration_source` from `SAMBANOVA_INTEGRATION_SOURCE`
"""
if api_key is None:
@@ -98,6 +102,10 @@ def __init__(
)
self.api_key = api_key
+ if x_api_key is None:
+ x_api_key = os.environ.get("SAMBANOVA_API_KEY")
+ self.x_api_key = x_api_key
+
if integration_source is None:
integration_source = os.environ.get("SAMBANOVA_INTEGRATION_SOURCE")
self.integration_source = integration_source
@@ -159,6 +167,12 @@ def responses(self) -> ResponsesResource:
return ResponsesResource(self)
+ @cached_property
+ def messages(self) -> MessagesResource:
+ from .resources.messages import MessagesResource
+
+ return MessagesResource(self)
+
@cached_property
def models(self) -> ModelsResource:
from .resources.models import ModelsResource
@@ -181,9 +195,20 @@ def qs(self) -> Querystring:
@property
@override
def auth_headers(self) -> dict[str, str]:
+ return {**self._api_key, **self._x_api_key}
+
+ @property
+ def _api_key(self) -> dict[str, str]:
api_key = self.api_key
return {"Authorization": f"Bearer {api_key}"}
+ @property
+ def _x_api_key(self) -> dict[str, str]:
+ x_api_key = self.x_api_key
+ if x_api_key is None:
+ return {}
+ return {"x-api-key": x_api_key}
+
@property
@override
def default_headers(self) -> dict[str, str | Omit]:
@@ -198,6 +223,7 @@ def copy(
self,
*,
api_key: str | None = None,
+ x_api_key: str | None = None,
integration_source: str | None = None,
base_url: str | httpx.URL | None = None,
timeout: float | Timeout | None | NotGiven = not_given,
@@ -233,6 +259,7 @@ def copy(
http_client = http_client or self._client
return self.__class__(
api_key=api_key or self.api_key,
+ x_api_key=x_api_key or self.x_api_key,
integration_source=integration_source or self.integration_source,
base_url=base_url or self.base_url,
timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
@@ -284,12 +311,14 @@ def _make_status_error(
class AsyncSambaNova(AsyncAPIClient):
# client options
api_key: str
+ x_api_key: str | None
integration_source: str | None
def __init__(
self,
*,
api_key: str | None = None,
+ x_api_key: str | None = None,
integration_source: str | None = None,
base_url: str | httpx.URL | None = None,
timeout: float | Timeout | None | NotGiven = not_given,
@@ -314,6 +343,7 @@ def __init__(
This automatically infers the following arguments from their corresponding environment variables if they are not provided:
- `api_key` from `SAMBANOVA_API_KEY`
+ - `x_api_key` from `SAMBANOVA_API_KEY`
- `integration_source` from `SAMBANOVA_INTEGRATION_SOURCE`
"""
if api_key is None:
@@ -324,6 +354,10 @@ def __init__(
)
self.api_key = api_key
+ if x_api_key is None:
+ x_api_key = os.environ.get("SAMBANOVA_API_KEY")
+ self.x_api_key = x_api_key
+
if integration_source is None:
integration_source = os.environ.get("SAMBANOVA_INTEGRATION_SOURCE")
self.integration_source = integration_source
@@ -385,6 +419,12 @@ def responses(self) -> AsyncResponsesResource:
return AsyncResponsesResource(self)
+ @cached_property
+ def messages(self) -> AsyncMessagesResource:
+ from .resources.messages import AsyncMessagesResource
+
+ return AsyncMessagesResource(self)
+
@cached_property
def models(self) -> AsyncModelsResource:
from .resources.models import AsyncModelsResource
@@ -407,9 +447,20 @@ def qs(self) -> Querystring:
@property
@override
def auth_headers(self) -> dict[str, str]:
+ return {**self._api_key, **self._x_api_key}
+
+ @property
+ def _api_key(self) -> dict[str, str]:
api_key = self.api_key
return {"Authorization": f"Bearer {api_key}"}
+ @property
+ def _x_api_key(self) -> dict[str, str]:
+ x_api_key = self.x_api_key
+ if x_api_key is None:
+ return {}
+ return {"x-api-key": x_api_key}
+
@property
@override
def default_headers(self) -> dict[str, str | Omit]:
@@ -424,6 +475,7 @@ def copy(
self,
*,
api_key: str | None = None,
+ x_api_key: str | None = None,
integration_source: str | None = None,
base_url: str | httpx.URL | None = None,
timeout: float | Timeout | None | NotGiven = not_given,
@@ -459,6 +511,7 @@ def copy(
http_client = http_client or self._client
return self.__class__(
api_key=api_key or self.api_key,
+ x_api_key=x_api_key or self.x_api_key,
integration_source=integration_source or self.integration_source,
base_url=base_url or self.base_url,
timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
@@ -543,6 +596,12 @@ def responses(self) -> responses.ResponsesResourceWithRawResponse:
return ResponsesResourceWithRawResponse(self._client.responses)
+ @cached_property
+ def messages(self) -> messages.MessagesResourceWithRawResponse:
+ from .resources.messages import MessagesResourceWithRawResponse
+
+ return MessagesResourceWithRawResponse(self._client.messages)
+
@cached_property
def models(self) -> models.ModelsResourceWithRawResponse:
from .resources.models import ModelsResourceWithRawResponse
@@ -586,6 +645,12 @@ def responses(self) -> responses.AsyncResponsesResourceWithRawResponse:
return AsyncResponsesResourceWithRawResponse(self._client.responses)
+ @cached_property
+ def messages(self) -> messages.AsyncMessagesResourceWithRawResponse:
+ from .resources.messages import AsyncMessagesResourceWithRawResponse
+
+ return AsyncMessagesResourceWithRawResponse(self._client.messages)
+
@cached_property
def models(self) -> models.AsyncModelsResourceWithRawResponse:
from .resources.models import AsyncModelsResourceWithRawResponse
@@ -629,6 +694,12 @@ def responses(self) -> responses.ResponsesResourceWithStreamingResponse:
return ResponsesResourceWithStreamingResponse(self._client.responses)
+ @cached_property
+ def messages(self) -> messages.MessagesResourceWithStreamingResponse:
+ from .resources.messages import MessagesResourceWithStreamingResponse
+
+ return MessagesResourceWithStreamingResponse(self._client.messages)
+
@cached_property
def models(self) -> models.ModelsResourceWithStreamingResponse:
from .resources.models import ModelsResourceWithStreamingResponse
@@ -672,6 +743,12 @@ def responses(self) -> responses.AsyncResponsesResourceWithStreamingResponse:
return AsyncResponsesResourceWithStreamingResponse(self._client.responses)
+ @cached_property
+ def messages(self) -> messages.AsyncMessagesResourceWithStreamingResponse:
+ from .resources.messages import AsyncMessagesResourceWithStreamingResponse
+
+ return AsyncMessagesResourceWithStreamingResponse(self._client.messages)
+
@cached_property
def models(self) -> models.AsyncModelsResourceWithStreamingResponse:
from .resources.models import AsyncModelsResourceWithStreamingResponse
diff --git a/src/sambanova/resources/__init__.py b/src/sambanova/resources/__init__.py
index 03ab754..0cfdcd3 100644
--- a/src/sambanova/resources/__init__.py
+++ b/src/sambanova/resources/__init__.py
@@ -24,6 +24,14 @@
ModelsResourceWithStreamingResponse,
AsyncModelsResourceWithStreamingResponse,
)
+from .messages import (
+ MessagesResource,
+ AsyncMessagesResource,
+ MessagesResourceWithRawResponse,
+ AsyncMessagesResourceWithRawResponse,
+ MessagesResourceWithStreamingResponse,
+ AsyncMessagesResourceWithStreamingResponse,
+)
from .responses import (
ResponsesResource,
AsyncResponsesResource,
@@ -80,6 +88,12 @@
"AsyncResponsesResourceWithRawResponse",
"ResponsesResourceWithStreamingResponse",
"AsyncResponsesResourceWithStreamingResponse",
+ "MessagesResource",
+ "AsyncMessagesResource",
+ "MessagesResourceWithRawResponse",
+ "AsyncMessagesResourceWithRawResponse",
+ "MessagesResourceWithStreamingResponse",
+ "AsyncMessagesResourceWithStreamingResponse",
"ModelsResource",
"AsyncModelsResource",
"ModelsResourceWithRawResponse",
diff --git a/src/sambanova/resources/messages.py b/src/sambanova/resources/messages.py
new file mode 100644
index 0000000..dfbcf82
--- /dev/null
+++ b/src/sambanova/resources/messages.py
@@ -0,0 +1,1197 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Any, Union, Iterable, Optional, cast
+from typing_extensions import Literal, overload
+
+import httpx
+
+from ..types import message_create_params, message_count_tokens_params
+from .._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
+from .._utils import required_args, maybe_transform, strip_not_given, async_maybe_transform
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import (
+ to_raw_response_wrapper,
+ to_streamed_response_wrapper,
+ async_to_raw_response_wrapper,
+ async_to_streamed_response_wrapper,
+)
+from .._streaming import Stream, AsyncStream
+from .._base_client import make_request_options
+from ..types.message_stream_event import MessageStreamEvent
+from ..types.message_create_response import MessageCreateResponse
+from ..types.message_count_tokens_response import MessageCountTokensResponse
+
+__all__ = ["MessagesResource", "AsyncMessagesResource"]
+
+
+class MessagesResource(SyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> MessagesResourceWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/sambanova/sambanova-python#accessing-raw-response-data-eg-headers
+ """
+ return MessagesResourceWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> MessagesResourceWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/sambanova/sambanova-python#with_streaming_response
+ """
+ return MessagesResourceWithStreamingResponse(self)
+
+ @overload
+ def create(
+ self,
+ *,
+ max_tokens: int,
+ messages: Iterable[message_create_params.Message],
+ model: Union[
+ str,
+ Literal[
+ "Meta-Llama-3.3-70B-Instruct",
+ "Meta-Llama-3.2-1B-Instruct",
+ "Meta-Llama-3.2-3B-Instruct",
+ "Llama-3.2-11B-Vision-Instruct",
+ "Llama-3.2-90B-Vision-Instruct",
+ "Meta-Llama-3.1-8B-Instruct",
+ "Meta-Llama-3.1-70B-Instruct",
+ "Meta-Llama-3.1-405B-Instruct",
+ "Qwen2.5-Coder-32B-Instruct",
+ "Qwen2.5-72B-Instruct",
+ "QwQ-32B-Preview",
+ "Meta-Llama-Guard-3-8B",
+ "DeepSeek-R1",
+ "DeepSeek-R1-0528",
+ "DeepSeek-V3-0324",
+ "DeepSeek-V3.1",
+ "DeepSeek-V3.1-cb",
+ "DeepSeek-V3.1-Terminus",
+ "DeepSeek-V3.2",
+ "DeepSeek-R1-Distill-Llama-70B",
+ "Llama-4-Maverick-17B-128E-Instruct",
+ "Llama-4-Scout-17B-16E-Instruct",
+ "Qwen3-32B",
+ "Qwen3-235B",
+ "Llama-3.3-Swallow-70B-Instruct-v0.4",
+ "gpt-oss-120b",
+ "ALLaM-7B-Instruct-preview",
+ "MiniMax-M2.5",
+ "MiniMax-M2.7",
+ "gemma-3-12b-it",
+ ],
+ ],
+ container: Optional[str] | Omit = omit,
+ metadata: message_create_params.Metadata | Omit = omit,
+ service_tier: Optional[Literal["auto", "standard_only"]] | Omit = omit,
+ stop_sequences: Optional[SequenceNotStr[str]] | Omit = omit,
+ stream: Optional[Literal[False]] | Omit = omit,
+ system: Union[str, Iterable[message_create_params.SystemSystemTextBlockArray]] | Omit = omit,
+ temperature: Optional[float] | Omit = omit,
+ thinking: message_create_params.Thinking | Omit = omit,
+ tool_choice: Optional[message_create_params.ToolChoice] | Omit = omit,
+ tools: Optional[Iterable[message_create_params.Tool]] | Omit = omit,
+ top_k: Optional[int] | Omit = omit,
+ top_p: Optional[float] | Omit = omit,
+ anthropic_version: str | Omit = omit,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
+ ) -> MessageCreateResponse:
+ """Anthropic Messages API compatible endpoint.
+
+ Generates a model response for the
+ supplied conversation. Authentication accepts either the bearer
+ `Authorization: Bearer ` header (SambaNova SDK default) or the `x-api-key`
+ header (Anthropic SDK default); the same API key is used in both cases. When
+ `stream: true` is set, the response is a sequence of Server-Sent Events whose
+ payloads conform to `MessageStreamEvent`; otherwise the response is a single
+ `Message` object.
+
+ Args:
+ max_tokens: Maximum number of tokens to generate. The combined input + output token count is
+ bounded by the model's context window.
+
+ messages: Conversation turns.
+
+ model: The model ID to use (e.g. gpt-oss-120b). See available
+ [models](https://docs.sambanova.ai/docs/en/models/sambacloud-models)
+
+ container: Existing code-execution container ID to reuse. **In v1**: silently dropped
+
+ metadata: Free-form metadata attached to the request. Currently only `user_id` Additional
+ fields are accepted but ignored.
+
+ service_tier: Service-tier preference. **In v1**: silently dropped
+
+ stop_sequences: Custom strings that, when generated, cause the model to stop.
+
+ stream: If true, the response is a sequence of Server-Sent Events whose payloads conform
+ to `MessageStreamEvent`.
+
+ system: System prompt for the conversation. Accepts either a single string (most common)
+ or an array of text blocks (used when individual segments need `cache_control`
+ markers). Multiple text blocks are joined with newlines and prepended to the
+ conversation as a `role: system` message.
+
+ temperature: Sampling temperature in `[0.0, 2.0]`. Higher values produce more random output,
+ lower values more deterministic. Adjust only one of `temperature`, `top_p`,
+ `top_k`.
+
+ thinking: Controls Anthropic-style extended thinking. **In v1**: only `type:"disabled"` is
+ silently accepted as a no-op; `type:"enabled"` and `type:"adaptive"` return a
+ 400 `invalid_request_error` (`unsupported_parameter`).
+
+ tool_choice: How the model should choose from the provided tools.
+
+ tools: Tool definitions the model may call.
+
+ top_k: Top-k sampling. Considers only the K most likely tokens at each step. Set to 0
+ to disable.
+
+ top_p: Nucleus sampling. Considers tokens with cumulative probability mass up to
+ `top_p`.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ def create(
+ self,
+ *,
+ max_tokens: int,
+ messages: Iterable[message_create_params.Message],
+ model: Union[
+ str,
+ Literal[
+ "Meta-Llama-3.3-70B-Instruct",
+ "Meta-Llama-3.2-1B-Instruct",
+ "Meta-Llama-3.2-3B-Instruct",
+ "Llama-3.2-11B-Vision-Instruct",
+ "Llama-3.2-90B-Vision-Instruct",
+ "Meta-Llama-3.1-8B-Instruct",
+ "Meta-Llama-3.1-70B-Instruct",
+ "Meta-Llama-3.1-405B-Instruct",
+ "Qwen2.5-Coder-32B-Instruct",
+ "Qwen2.5-72B-Instruct",
+ "QwQ-32B-Preview",
+ "Meta-Llama-Guard-3-8B",
+ "DeepSeek-R1",
+ "DeepSeek-R1-0528",
+ "DeepSeek-V3-0324",
+ "DeepSeek-V3.1",
+ "DeepSeek-V3.1-cb",
+ "DeepSeek-V3.1-Terminus",
+ "DeepSeek-V3.2",
+ "DeepSeek-R1-Distill-Llama-70B",
+ "Llama-4-Maverick-17B-128E-Instruct",
+ "Llama-4-Scout-17B-16E-Instruct",
+ "Qwen3-32B",
+ "Qwen3-235B",
+ "Llama-3.3-Swallow-70B-Instruct-v0.4",
+ "gpt-oss-120b",
+ "ALLaM-7B-Instruct-preview",
+ "MiniMax-M2.5",
+ "MiniMax-M2.7",
+ "gemma-3-12b-it",
+ ],
+ ],
+ stream: Literal[True],
+ container: Optional[str] | Omit = omit,
+ metadata: message_create_params.Metadata | Omit = omit,
+ service_tier: Optional[Literal["auto", "standard_only"]] | Omit = omit,
+ stop_sequences: Optional[SequenceNotStr[str]] | Omit = omit,
+ system: Union[str, Iterable[message_create_params.SystemSystemTextBlockArray]] | Omit = omit,
+ temperature: Optional[float] | Omit = omit,
+ thinking: message_create_params.Thinking | Omit = omit,
+ tool_choice: Optional[message_create_params.ToolChoice] | Omit = omit,
+ tools: Optional[Iterable[message_create_params.Tool]] | Omit = omit,
+ top_k: Optional[int] | Omit = omit,
+ top_p: Optional[float] | Omit = omit,
+ anthropic_version: str | Omit = omit,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
+ ) -> Stream[MessageStreamEvent]:
+ """Anthropic Messages API compatible endpoint.
+
+ Generates a model response for the
+ supplied conversation. Authentication accepts either the bearer
+ `Authorization: Bearer ` header (SambaNova SDK default) or the `x-api-key`
+ header (Anthropic SDK default); the same API key is used in both cases. When
+ `stream: true` is set, the response is a sequence of Server-Sent Events whose
+ payloads conform to `MessageStreamEvent`; otherwise the response is a single
+ `Message` object.
+
+ Args:
+ max_tokens: Maximum number of tokens to generate. The combined input + output token count is
+ bounded by the model's context window.
+
+ messages: Conversation turns.
+
+ model: The model ID to use (e.g. gpt-oss-120b). See available
+ [models](https://docs.sambanova.ai/docs/en/models/sambacloud-models)
+
+ stream: If true, the response is a sequence of Server-Sent Events whose payloads conform
+ to `MessageStreamEvent`.
+
+ container: Existing code-execution container ID to reuse. **In v1**: silently dropped
+
+ metadata: Free-form metadata attached to the request. Currently only `user_id` Additional
+ fields are accepted but ignored.
+
+ service_tier: Service-tier preference. **In v1**: silently dropped
+
+ stop_sequences: Custom strings that, when generated, cause the model to stop.
+
+ system: System prompt for the conversation. Accepts either a single string (most common)
+ or an array of text blocks (used when individual segments need `cache_control`
+ markers). Multiple text blocks are joined with newlines and prepended to the
+ conversation as a `role: system` message.
+
+ temperature: Sampling temperature in `[0.0, 2.0]`. Higher values produce more random output,
+ lower values more deterministic. Adjust only one of `temperature`, `top_p`,
+ `top_k`.
+
+ thinking: Controls Anthropic-style extended thinking. **In v1**: only `type:"disabled"` is
+ silently accepted as a no-op; `type:"enabled"` and `type:"adaptive"` return a
+ 400 `invalid_request_error` (`unsupported_parameter`).
+
+ tool_choice: How the model should choose from the provided tools.
+
+ tools: Tool definitions the model may call.
+
+ top_k: Top-k sampling. Considers only the K most likely tokens at each step. Set to 0
+ to disable.
+
+ top_p: Nucleus sampling. Considers tokens with cumulative probability mass up to
+ `top_p`.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ def create(
+ self,
+ *,
+ max_tokens: int,
+ messages: Iterable[message_create_params.Message],
+ model: Union[
+ str,
+ Literal[
+ "Meta-Llama-3.3-70B-Instruct",
+ "Meta-Llama-3.2-1B-Instruct",
+ "Meta-Llama-3.2-3B-Instruct",
+ "Llama-3.2-11B-Vision-Instruct",
+ "Llama-3.2-90B-Vision-Instruct",
+ "Meta-Llama-3.1-8B-Instruct",
+ "Meta-Llama-3.1-70B-Instruct",
+ "Meta-Llama-3.1-405B-Instruct",
+ "Qwen2.5-Coder-32B-Instruct",
+ "Qwen2.5-72B-Instruct",
+ "QwQ-32B-Preview",
+ "Meta-Llama-Guard-3-8B",
+ "DeepSeek-R1",
+ "DeepSeek-R1-0528",
+ "DeepSeek-V3-0324",
+ "DeepSeek-V3.1",
+ "DeepSeek-V3.1-cb",
+ "DeepSeek-V3.1-Terminus",
+ "DeepSeek-V3.2",
+ "DeepSeek-R1-Distill-Llama-70B",
+ "Llama-4-Maverick-17B-128E-Instruct",
+ "Llama-4-Scout-17B-16E-Instruct",
+ "Qwen3-32B",
+ "Qwen3-235B",
+ "Llama-3.3-Swallow-70B-Instruct-v0.4",
+ "gpt-oss-120b",
+ "ALLaM-7B-Instruct-preview",
+ "MiniMax-M2.5",
+ "MiniMax-M2.7",
+ "gemma-3-12b-it",
+ ],
+ ],
+ stream: bool,
+ container: Optional[str] | Omit = omit,
+ metadata: message_create_params.Metadata | Omit = omit,
+ service_tier: Optional[Literal["auto", "standard_only"]] | Omit = omit,
+ stop_sequences: Optional[SequenceNotStr[str]] | Omit = omit,
+ system: Union[str, Iterable[message_create_params.SystemSystemTextBlockArray]] | Omit = omit,
+ temperature: Optional[float] | Omit = omit,
+ thinking: message_create_params.Thinking | Omit = omit,
+ tool_choice: Optional[message_create_params.ToolChoice] | Omit = omit,
+ tools: Optional[Iterable[message_create_params.Tool]] | Omit = omit,
+ top_k: Optional[int] | Omit = omit,
+ top_p: Optional[float] | Omit = omit,
+ anthropic_version: str | Omit = omit,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
+ ) -> MessageCreateResponse | Stream[MessageStreamEvent]:
+ """Anthropic Messages API compatible endpoint.
+
+ Generates a model response for the
+ supplied conversation. Authentication accepts either the bearer
+ `Authorization: Bearer ` header (SambaNova SDK default) or the `x-api-key`
+ header (Anthropic SDK default); the same API key is used in both cases. When
+ `stream: true` is set, the response is a sequence of Server-Sent Events whose
+ payloads conform to `MessageStreamEvent`; otherwise the response is a single
+ `Message` object.
+
+ Args:
+ max_tokens: Maximum number of tokens to generate. The combined input + output token count is
+ bounded by the model's context window.
+
+ messages: Conversation turns.
+
+ model: The model ID to use (e.g. gpt-oss-120b). See available
+ [models](https://docs.sambanova.ai/docs/en/models/sambacloud-models)
+
+ stream: If true, the response is a sequence of Server-Sent Events whose payloads conform
+ to `MessageStreamEvent`.
+
+ container: Existing code-execution container ID to reuse. **In v1**: silently dropped
+
+ metadata: Free-form metadata attached to the request. Currently only `user_id` Additional
+ fields are accepted but ignored.
+
+ service_tier: Service-tier preference. **In v1**: silently dropped
+
+ stop_sequences: Custom strings that, when generated, cause the model to stop.
+
+ system: System prompt for the conversation. Accepts either a single string (most common)
+ or an array of text blocks (used when individual segments need `cache_control`
+ markers). Multiple text blocks are joined with newlines and prepended to the
+ conversation as a `role: system` message.
+
+ temperature: Sampling temperature in `[0.0, 2.0]`. Higher values produce more random output,
+ lower values more deterministic. Adjust only one of `temperature`, `top_p`,
+ `top_k`.
+
+ thinking: Controls Anthropic-style extended thinking. **In v1**: only `type:"disabled"` is
+ silently accepted as a no-op; `type:"enabled"` and `type:"adaptive"` return a
+ 400 `invalid_request_error` (`unsupported_parameter`).
+
+ tool_choice: How the model should choose from the provided tools.
+
+ tools: Tool definitions the model may call.
+
+ top_k: Top-k sampling. Considers only the K most likely tokens at each step. Set to 0
+ to disable.
+
+ top_p: Nucleus sampling. Considers tokens with cumulative probability mass up to
+ `top_p`.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @required_args(["max_tokens", "messages", "model"], ["max_tokens", "messages", "model", "stream"])
+ def create(
+ self,
+ *,
+ max_tokens: int,
+ messages: Iterable[message_create_params.Message],
+ model: Union[
+ str,
+ Literal[
+ "Meta-Llama-3.3-70B-Instruct",
+ "Meta-Llama-3.2-1B-Instruct",
+ "Meta-Llama-3.2-3B-Instruct",
+ "Llama-3.2-11B-Vision-Instruct",
+ "Llama-3.2-90B-Vision-Instruct",
+ "Meta-Llama-3.1-8B-Instruct",
+ "Meta-Llama-3.1-70B-Instruct",
+ "Meta-Llama-3.1-405B-Instruct",
+ "Qwen2.5-Coder-32B-Instruct",
+ "Qwen2.5-72B-Instruct",
+ "QwQ-32B-Preview",
+ "Meta-Llama-Guard-3-8B",
+ "DeepSeek-R1",
+ "DeepSeek-R1-0528",
+ "DeepSeek-V3-0324",
+ "DeepSeek-V3.1",
+ "DeepSeek-V3.1-cb",
+ "DeepSeek-V3.1-Terminus",
+ "DeepSeek-V3.2",
+ "DeepSeek-R1-Distill-Llama-70B",
+ "Llama-4-Maverick-17B-128E-Instruct",
+ "Llama-4-Scout-17B-16E-Instruct",
+ "Qwen3-32B",
+ "Qwen3-235B",
+ "Llama-3.3-Swallow-70B-Instruct-v0.4",
+ "gpt-oss-120b",
+ "ALLaM-7B-Instruct-preview",
+ "MiniMax-M2.5",
+ "MiniMax-M2.7",
+ "gemma-3-12b-it",
+ ],
+ ],
+ container: Optional[str] | Omit = omit,
+ metadata: message_create_params.Metadata | Omit = omit,
+ service_tier: Optional[Literal["auto", "standard_only"]] | Omit = omit,
+ stop_sequences: Optional[SequenceNotStr[str]] | Omit = omit,
+ stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
+ system: Union[str, Iterable[message_create_params.SystemSystemTextBlockArray]] | Omit = omit,
+ temperature: Optional[float] | Omit = omit,
+ thinking: message_create_params.Thinking | Omit = omit,
+ tool_choice: Optional[message_create_params.ToolChoice] | Omit = omit,
+ tools: Optional[Iterable[message_create_params.Tool]] | Omit = omit,
+ top_k: Optional[int] | Omit = omit,
+ top_p: Optional[float] | Omit = omit,
+ anthropic_version: str | Omit = omit,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
+ ) -> MessageCreateResponse | Stream[MessageStreamEvent]:
+ extra_headers = {**strip_not_given({"anthropic-version": anthropic_version}), **(extra_headers or {})}
+ return self._post(
+ "/messages",
+ body=maybe_transform(
+ {
+ "max_tokens": max_tokens,
+ "messages": messages,
+ "model": model,
+ "container": container,
+ "metadata": metadata,
+ "service_tier": service_tier,
+ "stop_sequences": stop_sequences,
+ "stream": stream,
+ "system": system,
+ "temperature": temperature,
+ "thinking": thinking,
+ "tool_choice": tool_choice,
+ "tools": tools,
+ "top_k": top_k,
+ "top_p": top_p,
+ },
+ message_create_params.MessageCreateParamsStreaming
+ if stream
+ else message_create_params.MessageCreateParamsNonStreaming,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=cast(Any, MessageCreateResponse), # Union types cannot be passed in as arguments in the type system
+ stream=stream or False,
+ stream_cls=Stream[MessageStreamEvent],
+ )
+
+ def count_tokens(
+ self,
+ *,
+ messages: Iterable[message_count_tokens_params.Message],
+ model: str,
+ system: Union[str, Iterable[message_count_tokens_params.SystemSystemTextBlockArray]] | Omit = omit,
+ thinking: message_count_tokens_params.Thinking | Omit = omit,
+ tool_choice: Optional[message_count_tokens_params.ToolChoice] | Omit = omit,
+ tools: Optional[Iterable[message_count_tokens_params.Tool]] | Omit = omit,
+ anthropic_version: str | Omit = omit,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
+ ) -> MessageCountTokensResponse:
+ """Anthropic `count_tokens` compatible endpoint.
+
+ Returns the number of input tokens
+ that would be consumed by a `POST /messages` call with the same prompt content
+ (system, messages, tools, tool_choice). Authentication accepts either the bearer
+ `Authorization: Bearer ` header (SambaNova SDK default) or the `x-api-key`
+ header (Anthropic SDK default); the same API key is used in both cases.
+
+ Args:
+ messages: Conversation turns.
+
+ model: Model identifier.
+
+ system: System prompt for the conversation. Accepts either a single string (most common)
+ or an array of text blocks (used when individual segments need `cache_control`
+ markers). Multiple text blocks are joined with newlines and prepended to the
+ conversation as a `role: system` message.
+
+ thinking: Controls Anthropic-style extended thinking. **In v1**: only `type:"disabled"` is
+ silently accepted as a no-op; `type:"enabled"` and `type:"adaptive"` return a
+ 400 `invalid_request_error` (`unsupported_parameter`).
+
+ tool_choice: How the model should choose from the provided tools.
+
+ tools: Tool definitions the model may call.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ extra_headers = {**strip_not_given({"anthropic-version": anthropic_version}), **(extra_headers or {})}
+ return self._post(
+ "/messages/count_tokens",
+ body=maybe_transform(
+ {
+ "messages": messages,
+ "model": model,
+ "system": system,
+ "thinking": thinking,
+ "tool_choice": tool_choice,
+ "tools": tools,
+ },
+ message_count_tokens_params.MessageCountTokensParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=MessageCountTokensResponse,
+ )
+
+
+class AsyncMessagesResource(AsyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> AsyncMessagesResourceWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/sambanova/sambanova-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncMessagesResourceWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncMessagesResourceWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/sambanova/sambanova-python#with_streaming_response
+ """
+ return AsyncMessagesResourceWithStreamingResponse(self)
+
+ @overload
+ async def create(
+ self,
+ *,
+ max_tokens: int,
+ messages: Iterable[message_create_params.Message],
+ model: Union[
+ str,
+ Literal[
+ "Meta-Llama-3.3-70B-Instruct",
+ "Meta-Llama-3.2-1B-Instruct",
+ "Meta-Llama-3.2-3B-Instruct",
+ "Llama-3.2-11B-Vision-Instruct",
+ "Llama-3.2-90B-Vision-Instruct",
+ "Meta-Llama-3.1-8B-Instruct",
+ "Meta-Llama-3.1-70B-Instruct",
+ "Meta-Llama-3.1-405B-Instruct",
+ "Qwen2.5-Coder-32B-Instruct",
+ "Qwen2.5-72B-Instruct",
+ "QwQ-32B-Preview",
+ "Meta-Llama-Guard-3-8B",
+ "DeepSeek-R1",
+ "DeepSeek-R1-0528",
+ "DeepSeek-V3-0324",
+ "DeepSeek-V3.1",
+ "DeepSeek-V3.1-cb",
+ "DeepSeek-V3.1-Terminus",
+ "DeepSeek-V3.2",
+ "DeepSeek-R1-Distill-Llama-70B",
+ "Llama-4-Maverick-17B-128E-Instruct",
+ "Llama-4-Scout-17B-16E-Instruct",
+ "Qwen3-32B",
+ "Qwen3-235B",
+ "Llama-3.3-Swallow-70B-Instruct-v0.4",
+ "gpt-oss-120b",
+ "ALLaM-7B-Instruct-preview",
+ "MiniMax-M2.5",
+ "MiniMax-M2.7",
+ "gemma-3-12b-it",
+ ],
+ ],
+ container: Optional[str] | Omit = omit,
+ metadata: message_create_params.Metadata | Omit = omit,
+ service_tier: Optional[Literal["auto", "standard_only"]] | Omit = omit,
+ stop_sequences: Optional[SequenceNotStr[str]] | Omit = omit,
+ stream: Optional[Literal[False]] | Omit = omit,
+ system: Union[str, Iterable[message_create_params.SystemSystemTextBlockArray]] | Omit = omit,
+ temperature: Optional[float] | Omit = omit,
+ thinking: message_create_params.Thinking | Omit = omit,
+ tool_choice: Optional[message_create_params.ToolChoice] | Omit = omit,
+ tools: Optional[Iterable[message_create_params.Tool]] | Omit = omit,
+ top_k: Optional[int] | Omit = omit,
+ top_p: Optional[float] | Omit = omit,
+ anthropic_version: str | Omit = omit,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
+ ) -> MessageCreateResponse:
+ """Anthropic Messages API compatible endpoint.
+
+ Generates a model response for the
+ supplied conversation. Authentication accepts either the bearer
+ `Authorization: Bearer ` header (SambaNova SDK default) or the `x-api-key`
+ header (Anthropic SDK default); the same API key is used in both cases. When
+ `stream: true` is set, the response is a sequence of Server-Sent Events whose
+ payloads conform to `MessageStreamEvent`; otherwise the response is a single
+ `Message` object.
+
+ Args:
+ max_tokens: Maximum number of tokens to generate. The combined input + output token count is
+ bounded by the model's context window.
+
+ messages: Conversation turns.
+
+ model: The model ID to use (e.g. gpt-oss-120b). See available
+ [models](https://docs.sambanova.ai/docs/en/models/sambacloud-models)
+
+ container: Existing code-execution container ID to reuse. **In v1**: silently dropped
+
+ metadata: Free-form metadata attached to the request. Currently only `user_id` Additional
+ fields are accepted but ignored.
+
+ service_tier: Service-tier preference. **In v1**: silently dropped
+
+ stop_sequences: Custom strings that, when generated, cause the model to stop.
+
+ stream: If true, the response is a sequence of Server-Sent Events whose payloads conform
+ to `MessageStreamEvent`.
+
+ system: System prompt for the conversation. Accepts either a single string (most common)
+ or an array of text blocks (used when individual segments need `cache_control`
+ markers). Multiple text blocks are joined with newlines and prepended to the
+ conversation as a `role: system` message.
+
+ temperature: Sampling temperature in `[0.0, 2.0]`. Higher values produce more random output,
+ lower values more deterministic. Adjust only one of `temperature`, `top_p`,
+ `top_k`.
+
+ thinking: Controls Anthropic-style extended thinking. **In v1**: only `type:"disabled"` is
+ silently accepted as a no-op; `type:"enabled"` and `type:"adaptive"` return a
+ 400 `invalid_request_error` (`unsupported_parameter`).
+
+ tool_choice: How the model should choose from the provided tools.
+
+ tools: Tool definitions the model may call.
+
+ top_k: Top-k sampling. Considers only the K most likely tokens at each step. Set to 0
+ to disable.
+
+ top_p: Nucleus sampling. Considers tokens with cumulative probability mass up to
+ `top_p`.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ async def create(
+ self,
+ *,
+ max_tokens: int,
+ messages: Iterable[message_create_params.Message],
+ model: Union[
+ str,
+ Literal[
+ "Meta-Llama-3.3-70B-Instruct",
+ "Meta-Llama-3.2-1B-Instruct",
+ "Meta-Llama-3.2-3B-Instruct",
+ "Llama-3.2-11B-Vision-Instruct",
+ "Llama-3.2-90B-Vision-Instruct",
+ "Meta-Llama-3.1-8B-Instruct",
+ "Meta-Llama-3.1-70B-Instruct",
+ "Meta-Llama-3.1-405B-Instruct",
+ "Qwen2.5-Coder-32B-Instruct",
+ "Qwen2.5-72B-Instruct",
+ "QwQ-32B-Preview",
+ "Meta-Llama-Guard-3-8B",
+ "DeepSeek-R1",
+ "DeepSeek-R1-0528",
+ "DeepSeek-V3-0324",
+ "DeepSeek-V3.1",
+ "DeepSeek-V3.1-cb",
+ "DeepSeek-V3.1-Terminus",
+ "DeepSeek-V3.2",
+ "DeepSeek-R1-Distill-Llama-70B",
+ "Llama-4-Maverick-17B-128E-Instruct",
+ "Llama-4-Scout-17B-16E-Instruct",
+ "Qwen3-32B",
+ "Qwen3-235B",
+ "Llama-3.3-Swallow-70B-Instruct-v0.4",
+ "gpt-oss-120b",
+ "ALLaM-7B-Instruct-preview",
+ "MiniMax-M2.5",
+ "MiniMax-M2.7",
+ "gemma-3-12b-it",
+ ],
+ ],
+ stream: Literal[True],
+ container: Optional[str] | Omit = omit,
+ metadata: message_create_params.Metadata | Omit = omit,
+ service_tier: Optional[Literal["auto", "standard_only"]] | Omit = omit,
+ stop_sequences: Optional[SequenceNotStr[str]] | Omit = omit,
+ system: Union[str, Iterable[message_create_params.SystemSystemTextBlockArray]] | Omit = omit,
+ temperature: Optional[float] | Omit = omit,
+ thinking: message_create_params.Thinking | Omit = omit,
+ tool_choice: Optional[message_create_params.ToolChoice] | Omit = omit,
+ tools: Optional[Iterable[message_create_params.Tool]] | Omit = omit,
+ top_k: Optional[int] | Omit = omit,
+ top_p: Optional[float] | Omit = omit,
+ anthropic_version: str | Omit = omit,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
+ ) -> AsyncStream[MessageStreamEvent]:
+ """Anthropic Messages API compatible endpoint.
+
+ Generates a model response for the
+ supplied conversation. Authentication accepts either the bearer
+ `Authorization: Bearer ` header (SambaNova SDK default) or the `x-api-key`
+ header (Anthropic SDK default); the same API key is used in both cases. When
+ `stream: true` is set, the response is a sequence of Server-Sent Events whose
+ payloads conform to `MessageStreamEvent`; otherwise the response is a single
+ `Message` object.
+
+ Args:
+ max_tokens: Maximum number of tokens to generate. The combined input + output token count is
+ bounded by the model's context window.
+
+ messages: Conversation turns.
+
+ model: The model ID to use (e.g. gpt-oss-120b). See available
+ [models](https://docs.sambanova.ai/docs/en/models/sambacloud-models)
+
+ stream: If true, the response is a sequence of Server-Sent Events whose payloads conform
+ to `MessageStreamEvent`.
+
+ container: Existing code-execution container ID to reuse. **In v1**: silently dropped
+
+ metadata: Free-form metadata attached to the request. Currently only `user_id` Additional
+ fields are accepted but ignored.
+
+ service_tier: Service-tier preference. **In v1**: silently dropped
+
+ stop_sequences: Custom strings that, when generated, cause the model to stop.
+
+ system: System prompt for the conversation. Accepts either a single string (most common)
+ or an array of text blocks (used when individual segments need `cache_control`
+ markers). Multiple text blocks are joined with newlines and prepended to the
+ conversation as a `role: system` message.
+
+ temperature: Sampling temperature in `[0.0, 2.0]`. Higher values produce more random output,
+ lower values more deterministic. Adjust only one of `temperature`, `top_p`,
+ `top_k`.
+
+ thinking: Controls Anthropic-style extended thinking. **In v1**: only `type:"disabled"` is
+ silently accepted as a no-op; `type:"enabled"` and `type:"adaptive"` return a
+ 400 `invalid_request_error` (`unsupported_parameter`).
+
+ tool_choice: How the model should choose from the provided tools.
+
+ tools: Tool definitions the model may call.
+
+ top_k: Top-k sampling. Considers only the K most likely tokens at each step. Set to 0
+ to disable.
+
+ top_p: Nucleus sampling. Considers tokens with cumulative probability mass up to
+ `top_p`.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ async def create(
+ self,
+ *,
+ max_tokens: int,
+ messages: Iterable[message_create_params.Message],
+ model: Union[
+ str,
+ Literal[
+ "Meta-Llama-3.3-70B-Instruct",
+ "Meta-Llama-3.2-1B-Instruct",
+ "Meta-Llama-3.2-3B-Instruct",
+ "Llama-3.2-11B-Vision-Instruct",
+ "Llama-3.2-90B-Vision-Instruct",
+ "Meta-Llama-3.1-8B-Instruct",
+ "Meta-Llama-3.1-70B-Instruct",
+ "Meta-Llama-3.1-405B-Instruct",
+ "Qwen2.5-Coder-32B-Instruct",
+ "Qwen2.5-72B-Instruct",
+ "QwQ-32B-Preview",
+ "Meta-Llama-Guard-3-8B",
+ "DeepSeek-R1",
+ "DeepSeek-R1-0528",
+ "DeepSeek-V3-0324",
+ "DeepSeek-V3.1",
+ "DeepSeek-V3.1-cb",
+ "DeepSeek-V3.1-Terminus",
+ "DeepSeek-V3.2",
+ "DeepSeek-R1-Distill-Llama-70B",
+ "Llama-4-Maverick-17B-128E-Instruct",
+ "Llama-4-Scout-17B-16E-Instruct",
+ "Qwen3-32B",
+ "Qwen3-235B",
+ "Llama-3.3-Swallow-70B-Instruct-v0.4",
+ "gpt-oss-120b",
+ "ALLaM-7B-Instruct-preview",
+ "MiniMax-M2.5",
+ "MiniMax-M2.7",
+ "gemma-3-12b-it",
+ ],
+ ],
+ stream: bool,
+ container: Optional[str] | Omit = omit,
+ metadata: message_create_params.Metadata | Omit = omit,
+ service_tier: Optional[Literal["auto", "standard_only"]] | Omit = omit,
+ stop_sequences: Optional[SequenceNotStr[str]] | Omit = omit,
+ system: Union[str, Iterable[message_create_params.SystemSystemTextBlockArray]] | Omit = omit,
+ temperature: Optional[float] | Omit = omit,
+ thinking: message_create_params.Thinking | Omit = omit,
+ tool_choice: Optional[message_create_params.ToolChoice] | Omit = omit,
+ tools: Optional[Iterable[message_create_params.Tool]] | Omit = omit,
+ top_k: Optional[int] | Omit = omit,
+ top_p: Optional[float] | Omit = omit,
+ anthropic_version: str | Omit = omit,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
+ ) -> MessageCreateResponse | AsyncStream[MessageStreamEvent]:
+ """Anthropic Messages API compatible endpoint.
+
+ Generates a model response for the
+ supplied conversation. Authentication accepts either the bearer
+ `Authorization: Bearer ` header (SambaNova SDK default) or the `x-api-key`
+ header (Anthropic SDK default); the same API key is used in both cases. When
+ `stream: true` is set, the response is a sequence of Server-Sent Events whose
+ payloads conform to `MessageStreamEvent`; otherwise the response is a single
+ `Message` object.
+
+ Args:
+ max_tokens: Maximum number of tokens to generate. The combined input + output token count is
+ bounded by the model's context window.
+
+ messages: Conversation turns.
+
+ model: The model ID to use (e.g. gpt-oss-120b). See available
+ [models](https://docs.sambanova.ai/docs/en/models/sambacloud-models)
+
+ stream: If true, the response is a sequence of Server-Sent Events whose payloads conform
+ to `MessageStreamEvent`.
+
+ container: Existing code-execution container ID to reuse. **In v1**: silently dropped
+
+ metadata: Free-form metadata attached to the request. Currently only `user_id` Additional
+ fields are accepted but ignored.
+
+ service_tier: Service-tier preference. **In v1**: silently dropped
+
+ stop_sequences: Custom strings that, when generated, cause the model to stop.
+
+ system: System prompt for the conversation. Accepts either a single string (most common)
+ or an array of text blocks (used when individual segments need `cache_control`
+ markers). Multiple text blocks are joined with newlines and prepended to the
+ conversation as a `role: system` message.
+
+ temperature: Sampling temperature in `[0.0, 2.0]`. Higher values produce more random output,
+ lower values more deterministic. Adjust only one of `temperature`, `top_p`,
+ `top_k`.
+
+ thinking: Controls Anthropic-style extended thinking. **In v1**: only `type:"disabled"` is
+ silently accepted as a no-op; `type:"enabled"` and `type:"adaptive"` return a
+ 400 `invalid_request_error` (`unsupported_parameter`).
+
+ tool_choice: How the model should choose from the provided tools.
+
+ tools: Tool definitions the model may call.
+
+ top_k: Top-k sampling. Considers only the K most likely tokens at each step. Set to 0
+ to disable.
+
+ top_p: Nucleus sampling. Considers tokens with cumulative probability mass up to
+ `top_p`.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @required_args(["max_tokens", "messages", "model"], ["max_tokens", "messages", "model", "stream"])
+ async def create(
+ self,
+ *,
+ max_tokens: int,
+ messages: Iterable[message_create_params.Message],
+ model: Union[
+ str,
+ Literal[
+ "Meta-Llama-3.3-70B-Instruct",
+ "Meta-Llama-3.2-1B-Instruct",
+ "Meta-Llama-3.2-3B-Instruct",
+ "Llama-3.2-11B-Vision-Instruct",
+ "Llama-3.2-90B-Vision-Instruct",
+ "Meta-Llama-3.1-8B-Instruct",
+ "Meta-Llama-3.1-70B-Instruct",
+ "Meta-Llama-3.1-405B-Instruct",
+ "Qwen2.5-Coder-32B-Instruct",
+ "Qwen2.5-72B-Instruct",
+ "QwQ-32B-Preview",
+ "Meta-Llama-Guard-3-8B",
+ "DeepSeek-R1",
+ "DeepSeek-R1-0528",
+ "DeepSeek-V3-0324",
+ "DeepSeek-V3.1",
+ "DeepSeek-V3.1-cb",
+ "DeepSeek-V3.1-Terminus",
+ "DeepSeek-V3.2",
+ "DeepSeek-R1-Distill-Llama-70B",
+ "Llama-4-Maverick-17B-128E-Instruct",
+ "Llama-4-Scout-17B-16E-Instruct",
+ "Qwen3-32B",
+ "Qwen3-235B",
+ "Llama-3.3-Swallow-70B-Instruct-v0.4",
+ "gpt-oss-120b",
+ "ALLaM-7B-Instruct-preview",
+ "MiniMax-M2.5",
+ "MiniMax-M2.7",
+ "gemma-3-12b-it",
+ ],
+ ],
+ container: Optional[str] | Omit = omit,
+ metadata: message_create_params.Metadata | Omit = omit,
+ service_tier: Optional[Literal["auto", "standard_only"]] | Omit = omit,
+ stop_sequences: Optional[SequenceNotStr[str]] | Omit = omit,
+ stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
+ system: Union[str, Iterable[message_create_params.SystemSystemTextBlockArray]] | Omit = omit,
+ temperature: Optional[float] | Omit = omit,
+ thinking: message_create_params.Thinking | Omit = omit,
+ tool_choice: Optional[message_create_params.ToolChoice] | Omit = omit,
+ tools: Optional[Iterable[message_create_params.Tool]] | Omit = omit,
+ top_k: Optional[int] | Omit = omit,
+ top_p: Optional[float] | Omit = omit,
+ anthropic_version: str | Omit = omit,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
+ ) -> MessageCreateResponse | AsyncStream[MessageStreamEvent]:
+ extra_headers = {**strip_not_given({"anthropic-version": anthropic_version}), **(extra_headers or {})}
+ return await self._post(
+ "/messages",
+ body=await async_maybe_transform(
+ {
+ "max_tokens": max_tokens,
+ "messages": messages,
+ "model": model,
+ "container": container,
+ "metadata": metadata,
+ "service_tier": service_tier,
+ "stop_sequences": stop_sequences,
+ "stream": stream,
+ "system": system,
+ "temperature": temperature,
+ "thinking": thinking,
+ "tool_choice": tool_choice,
+ "tools": tools,
+ "top_k": top_k,
+ "top_p": top_p,
+ },
+ message_create_params.MessageCreateParamsStreaming
+ if stream
+ else message_create_params.MessageCreateParamsNonStreaming,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=cast(Any, MessageCreateResponse), # Union types cannot be passed in as arguments in the type system
+ stream=stream or False,
+ stream_cls=AsyncStream[MessageStreamEvent],
+ )
+
+ async def count_tokens(
+ self,
+ *,
+ messages: Iterable[message_count_tokens_params.Message],
+ model: str,
+ system: Union[str, Iterable[message_count_tokens_params.SystemSystemTextBlockArray]] | Omit = omit,
+ thinking: message_count_tokens_params.Thinking | Omit = omit,
+ tool_choice: Optional[message_count_tokens_params.ToolChoice] | Omit = omit,
+ tools: Optional[Iterable[message_count_tokens_params.Tool]] | Omit = omit,
+ anthropic_version: str | Omit = omit,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
+ ) -> MessageCountTokensResponse:
+ """Anthropic `count_tokens` compatible endpoint.
+
+ Returns the number of input tokens
+ that would be consumed by a `POST /messages` call with the same prompt content
+ (system, messages, tools, tool_choice). Authentication accepts either the bearer
+ `Authorization: Bearer ` header (SambaNova SDK default) or the `x-api-key`
+ header (Anthropic SDK default); the same API key is used in both cases.
+
+ Args:
+ messages: Conversation turns.
+
+ model: Model identifier.
+
+ system: System prompt for the conversation. Accepts either a single string (most common)
+ or an array of text blocks (used when individual segments need `cache_control`
+ markers). Multiple text blocks are joined with newlines and prepended to the
+ conversation as a `role: system` message.
+
+ thinking: Controls Anthropic-style extended thinking. **In v1**: only `type:"disabled"` is
+ silently accepted as a no-op; `type:"enabled"` and `type:"adaptive"` return a
+ 400 `invalid_request_error` (`unsupported_parameter`).
+
+ tool_choice: How the model should choose from the provided tools.
+
+ tools: Tool definitions the model may call.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ extra_headers = {**strip_not_given({"anthropic-version": anthropic_version}), **(extra_headers or {})}
+ return await self._post(
+ "/messages/count_tokens",
+ body=await async_maybe_transform(
+ {
+ "messages": messages,
+ "model": model,
+ "system": system,
+ "thinking": thinking,
+ "tool_choice": tool_choice,
+ "tools": tools,
+ },
+ message_count_tokens_params.MessageCountTokensParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=MessageCountTokensResponse,
+ )
+
+
+class MessagesResourceWithRawResponse:
+ def __init__(self, messages: MessagesResource) -> None:
+ self._messages = messages
+
+ self.create = to_raw_response_wrapper(
+ messages.create,
+ )
+ self.count_tokens = to_raw_response_wrapper(
+ messages.count_tokens,
+ )
+
+
+class AsyncMessagesResourceWithRawResponse:
+ def __init__(self, messages: AsyncMessagesResource) -> None:
+ self._messages = messages
+
+ self.create = async_to_raw_response_wrapper(
+ messages.create,
+ )
+ self.count_tokens = async_to_raw_response_wrapper(
+ messages.count_tokens,
+ )
+
+
+class MessagesResourceWithStreamingResponse:
+ def __init__(self, messages: MessagesResource) -> None:
+ self._messages = messages
+
+ self.create = to_streamed_response_wrapper(
+ messages.create,
+ )
+ self.count_tokens = to_streamed_response_wrapper(
+ messages.count_tokens,
+ )
+
+
+class AsyncMessagesResourceWithStreamingResponse:
+ def __init__(self, messages: AsyncMessagesResource) -> None:
+ self._messages = messages
+
+ self.create = async_to_streamed_response_wrapper(
+ messages.create,
+ )
+ self.count_tokens = async_to_streamed_response_wrapper(
+ messages.count_tokens,
+ )
diff --git a/src/sambanova/types/__init__.py b/src/sambanova/types/__init__.py
index 7ff1451..bb5bf32 100644
--- a/src/sambanova/types/__init__.py
+++ b/src/sambanova/types/__init__.py
@@ -2,15 +2,21 @@
from __future__ import annotations
+from .message import Message as Message
from .model_response import ModelResponse as ModelResponse
from .models_response import ModelsResponse as ModelsResponse
from .response_response import ResponseResponse as ResponseResponse
from .completion_response import CompletionResponse as CompletionResponse
from .embeddings_response import EmbeddingsResponse as EmbeddingsResponse
+from .message_stream_event import MessageStreamEvent as MessageStreamEvent
+from .message_create_params import MessageCreateParams as MessageCreateParams
from .response_stream_event import ResponseStreamEvent as ResponseStreamEvent
from .response_create_params import ResponseCreateParams as ResponseCreateParams
from .embedding_create_params import EmbeddingCreateParams as EmbeddingCreateParams
+from .message_create_response import MessageCreateResponse as MessageCreateResponse
from .completion_create_params import CompletionCreateParams as CompletionCreateParams
from .response_create_response import ResponseCreateResponse as ResponseCreateResponse
from .completion_create_response import CompletionCreateResponse as CompletionCreateResponse
from .completion_stream_response import CompletionStreamResponse as CompletionStreamResponse
+from .message_count_tokens_params import MessageCountTokensParams as MessageCountTokensParams
+from .message_count_tokens_response import MessageCountTokensResponse as MessageCountTokensResponse
diff --git a/src/sambanova/types/message.py b/src/sambanova/types/message.py
new file mode 100644
index 0000000..26719b1
--- /dev/null
+++ b/src/sambanova/types/message.py
@@ -0,0 +1,328 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from .._utils import PropertyInfo
+from .._models import BaseModel
+
+__all__ = [
+ "Message",
+ "Content",
+ "ContentMessageOutputTextBlock",
+ "ContentMessageOutputToolUseBlock",
+ "ContentMessageOutputThinkingBlock",
+ "ContentMessageOutputRedactedThinkingBlock",
+ "ContentMessageOutputServerToolUseBlock",
+ "ContentMessageOutputWebSearchToolResultBlock",
+ "ContentMessageOutputWebFetchToolResultBlock",
+ "ContentMessageOutputCodeExecutionToolResultBlock",
+ "ContentMessageOutputBashCodeExecutionToolResultBlock",
+ "ContentMessageOutputTextEditorCodeExecutionToolResultBlock",
+ "ContentMessageOutputToolSearchToolResultBlock",
+ "ContentMessageOutputContainerUploadBlock",
+ "Usage",
+ "Container",
+ "StopDetails",
+]
+
+
+class ContentMessageOutputTextBlock(BaseModel):
+ """Plain-text segment of the model's response."""
+
+ text: str
+
+ type: Literal["text"]
+
+ citations: Optional[List[Dict[str, object]]] = None
+ """Not emitted in v1."""
+
+
+class ContentMessageOutputToolUseBlock(BaseModel):
+ """Tool call generated by the model."""
+
+ id: str
+ """Unique identifier for this tool call."""
+
+ input: Dict[str, object]
+ """Tool inputs as a JSON object."""
+
+ name: str
+ """Name of the tool being called."""
+
+ type: Literal["tool_use"]
+
+ caller: Optional[Dict[str, object]] = None
+ """Anthropic routing metadata. Always `null` in SambaNova responses."""
+
+
+class ContentMessageOutputThinkingBlock(BaseModel):
+ """Extended-reasoning trace from the model. Emitted by reasoning models."""
+
+ thinking: str
+
+ type: Literal["thinking"]
+
+ signature: Optional[str] = None
+
+
+class ContentMessageOutputRedactedThinkingBlock(BaseModel):
+ """
+ Anthropic compatibility only — SambaNova does not produce encrypted thinking output. Never emitted in responses.
+ """
+
+ data: str
+
+ type: Literal["redacted_thinking"]
+
+
+class ContentMessageOutputServerToolUseBlock(BaseModel):
+ """Anthropic compatibility only — SambaNova does not run server-side tools.
+
+ Never emitted in responses; defined for Anthropic SDK type-parity.
+ """
+
+ id: str
+
+ input: Dict[str, object]
+
+ name: str
+
+ type: Literal["server_tool_use"]
+
+
+class ContentMessageOutputWebSearchToolResultBlock(BaseModel):
+ """Anthropic compatibility only — SambaNova does not run server-side `web_search`.
+
+ Never emitted in responses.
+ """
+
+ content: List[Dict[str, object]]
+
+ tool_use_id: str
+
+ type: Literal["web_search_tool_result"]
+
+
+class ContentMessageOutputWebFetchToolResultBlock(BaseModel):
+ """Anthropic compatibility only — SambaNova does not run server-side `web_fetch`.
+
+ Never emitted in responses.
+ """
+
+ content: Dict[str, object]
+
+ tool_use_id: str
+
+ type: Literal["web_fetch_tool_result"]
+
+
+class ContentMessageOutputCodeExecutionToolResultBlock(BaseModel):
+ """
+ Anthropic compatibility only — SambaNova does not run server-side `code_execution`. Never emitted in responses.
+ """
+
+ content: Dict[str, object]
+
+ tool_use_id: str
+
+ type: Literal["code_execution_tool_result"]
+
+
+class ContentMessageOutputBashCodeExecutionToolResultBlock(BaseModel):
+ """
+ Anthropic compatibility only — SambaNova does not run server-side bash code execution. Never emitted in responses.
+ """
+
+ content: Dict[str, object]
+
+ tool_use_id: str
+
+ type: Literal["bash_code_execution_tool_result"]
+
+
+class ContentMessageOutputTextEditorCodeExecutionToolResultBlock(BaseModel):
+ """
+ Anthropic compatibility only — SambaNova does not run server-side text-editor code execution. Never emitted in responses.
+ """
+
+ content: Dict[str, object]
+
+ tool_use_id: str
+
+ type: Literal["text_editor_code_execution_tool_result"]
+
+
+class ContentMessageOutputToolSearchToolResultBlock(BaseModel):
+ """Anthropic compatibility only — SambaNova does not run server-side `tool_search`.
+
+ Never emitted in responses.
+ """
+
+ content: Dict[str, object]
+
+ tool_use_id: str
+
+ type: Literal["tool_search_tool_result"]
+
+
+class ContentMessageOutputContainerUploadBlock(BaseModel):
+ """
+ Anthropic compatibility only — SambaNova does not produce container_upload blocks (these come from Anthropic's server-side `code_execution` tool). Never emitted in responses.
+ """
+
+ file_id: str
+
+ type: Literal["container_upload"]
+
+
+Content: TypeAlias = Annotated[
+ Union[
+ ContentMessageOutputTextBlock,
+ ContentMessageOutputToolUseBlock,
+ ContentMessageOutputThinkingBlock,
+ ContentMessageOutputRedactedThinkingBlock,
+ ContentMessageOutputServerToolUseBlock,
+ ContentMessageOutputWebSearchToolResultBlock,
+ ContentMessageOutputWebFetchToolResultBlock,
+ ContentMessageOutputCodeExecutionToolResultBlock,
+ ContentMessageOutputBashCodeExecutionToolResultBlock,
+ ContentMessageOutputTextEditorCodeExecutionToolResultBlock,
+ ContentMessageOutputToolSearchToolResultBlock,
+ ContentMessageOutputContainerUploadBlock,
+ ],
+ PropertyInfo(discriminator="type"),
+]
+
+
+class Usage(BaseModel):
+ """Token accounting for the request."""
+
+ input_tokens: int
+ """Total tokens in the prompt (system + messages + tools)."""
+
+ output_tokens: int
+ """Total tokens generated by the model."""
+
+ cache_creation: Optional[Dict[str, object]] = None
+ """Anthropic SDK alias for cache write metrics.
+
+ Always `null` in SambaNova responses; use `cache_creation_input_tokens` instead.
+ """
+
+ cache_creation_input_tokens: Optional[int] = None
+ """Tokens written to prompt cache.
+
+ Absent in v1; emitted once prompt caching wiring lands (CP-2897).
+ """
+
+ cache_read_input_tokens: Optional[int] = None
+ """Tokens read from prompt cache.
+
+ Absent in v1; emitted once prompt caching wiring lands (CP-2897).
+ """
+
+ inference_geo: Optional[str] = None
+ """Geographic region that served the request.
+
+ Anthropic compatibility only - SambaNova does not expose geo routing, always
+ `null`.
+ """
+
+ server_tool_use: Optional[Dict[str, object]] = None
+ """Server-tool usage metrics (e.g.
+
+ `web_search_requests`). Anthropic compatibility only — SambaNova does not run
+ server tools, so this field is never emitted.
+ """
+
+ service_tier: Optional[str] = None
+ """Service tier that processed the request.
+
+ Anthropic compatibility only — SambaNova is single-tier and never emits this
+ field.
+ """
+
+
+class Container(BaseModel):
+ """Code-execution container reference.
+
+ Anthropic compatibility only — SambaNova does not run server-side code execution, so this field is never emitted on responses.
+ """
+
+ id: str
+
+ expires_at: str
+ """ISO-8601 timestamp."""
+
+
+class StopDetails(BaseModel):
+ """Refusal stop details.
+
+ Anthropic compatibility only — `refusal` is never emitted as a stop_reason by SambaNova (content filtering is not exposed at the API layer).
+ """
+
+ type: Literal["refusal"]
+
+ category: Optional[Literal["cyber", "bio"]] = None
+
+
+class Message(BaseModel):
+ """Non-streaming response from `POST /messages`.
+
+ Wire-compatible with the official Anthropic Messages API.
+ """
+
+ id: str
+ """Unique identifier for this message."""
+
+ content: List[Content]
+
+ model: str
+ """Model that produced the response."""
+
+ role: Literal["assistant"]
+
+ stop_reason: Optional[
+ Literal[
+ "end_turn",
+ "max_tokens",
+ "tool_use",
+ "pause_turn",
+ "refusal",
+ "stop_sequence",
+ "model_context_window_exceeded",
+ ]
+ ] = None
+ """Reason the model stopped generating.
+
+ SambaNova emits `end_turn`, `max_tokens`, `tool_use`, and `stop_sequence`. The
+ remaining values are defined for Anthropic SDK type-parity but never returned:
+ `pause_turn` (server-tool loop limit, not produced); `refusal` (content filter,
+ not exposed); `model_context_window_exceeded` (folded to `max_tokens`).
+ """
+
+ type: Literal["message"]
+
+ usage: Usage
+ """Token accounting for the request."""
+
+ container: Optional[Container] = None
+ """Code-execution container reference.
+
+ Anthropic compatibility only — SambaNova does not run server-side code
+ execution, so this field is never emitted on responses.
+ """
+
+ stop_details: Optional[StopDetails] = None
+ """Refusal stop details.
+
+ Anthropic compatibility only — `refusal` is never emitted as a stop_reason by
+ SambaNova (content filtering is not exposed at the API layer).
+ """
+
+ stop_sequence: Optional[str] = None
+ """The matched stop sequence that triggered termination.
+
+ Present when `stop_reason` is `stop_sequence`; `null` otherwise.
+ """
diff --git a/src/sambanova/types/message_count_tokens_params.py b/src/sambanova/types/message_count_tokens_params.py
new file mode 100644
index 0000000..f8852c3
--- /dev/null
+++ b/src/sambanova/types/message_count_tokens_params.py
@@ -0,0 +1,979 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable, Optional
+from typing_extensions import Literal, Required, Annotated, TypeAlias, TypedDict
+
+from .._types import SequenceNotStr
+from .._utils import PropertyInfo
+
+__all__ = [
+ "MessageCountTokensParams",
+ "Message",
+ "MessageContentContentBlockArray",
+ "MessageContentContentBlockArrayMessageInputTextBlock",
+ "MessageContentContentBlockArrayMessageInputTextBlockCacheControl",
+ "MessageContentContentBlockArrayMessageInputImageBlock",
+ "MessageContentContentBlockArrayMessageInputImageBlockSource",
+ "MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceBase64",
+ "MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceURL",
+ "MessageContentContentBlockArrayMessageInputImageBlockCacheControl",
+ "MessageContentContentBlockArrayMessageInputToolUseBlock",
+ "MessageContentContentBlockArrayMessageInputToolUseBlockCacheControl",
+ "MessageContentContentBlockArrayMessageInputToolResultBlock",
+ "MessageContentContentBlockArrayMessageInputToolResultBlockCacheControl",
+ "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArray",
+ "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlock",
+ "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlockCacheControl",
+ "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlock",
+ "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSource",
+ "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceBase64",
+ "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceURL",
+ "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockCacheControl",
+ "MessageContentContentBlockArrayMessageInputServerToolUseBlock",
+ "MessageContentContentBlockArrayMessageInputServerToolUseBlockCacheControl",
+ "MessageContentContentBlockArrayMessageInputSearchResultBlock",
+ "MessageContentContentBlockArrayMessageInputSearchResultBlockCacheControl",
+ "MessageContentContentBlockArrayMessageInputSearchResultBlockContent",
+ "MessageContentContentBlockArrayMessageInputSearchResultBlockContentCacheControl",
+ "MessageContentContentBlockArrayMessageInputWebSearchToolResultBlock",
+ "MessageContentContentBlockArrayMessageInputWebSearchToolResultBlockCacheControl",
+ "MessageContentContentBlockArrayMessageInputWebFetchToolResultBlock",
+ "MessageContentContentBlockArrayMessageInputWebFetchToolResultBlockCacheControl",
+ "MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlock",
+ "MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlockCacheControl",
+ "MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlock",
+ "MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlockCacheControl",
+ "MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlock",
+ "MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlockCacheControl",
+ "MessageContentContentBlockArrayMessageInputToolSearchToolResultBlock",
+ "MessageContentContentBlockArrayMessageInputToolSearchToolResultBlockCacheControl",
+ "MessageContentContentBlockArrayMessageInputThinkingBlock",
+ "MessageContentContentBlockArrayMessageInputRedactedThinkingBlock",
+ "MessageContentContentBlockArrayMessageInputContainerUploadBlock",
+ "MessageContentContentBlockArrayMessageInputContainerUploadBlockCacheControl",
+ "MessageContentContentBlockArrayMessageInputDocumentBlock",
+ "MessageContentContentBlockArrayMessageInputDocumentBlockCacheControl",
+ "SystemSystemTextBlockArray",
+ "SystemSystemTextBlockArrayCacheControl",
+ "Thinking",
+ "ThinkingMessageThinkingDisabled",
+ "ThinkingMessageThinkingEnabled",
+ "ThinkingMessageThinkingAdaptive",
+ "ToolChoice",
+ "ToolChoiceMessageToolChoiceAuto",
+ "ToolChoiceMessageToolChoiceAny",
+ "ToolChoiceMessageToolChoiceNone",
+ "ToolChoiceMessageToolChoiceTool",
+ "Tool",
+ "ToolCacheControl",
+]
+
+
+class MessageCountTokensParams(TypedDict, total=False):
+ messages: Required[Iterable[Message]]
+ """Conversation turns."""
+
+ model: Required[str]
+ """Model identifier."""
+
+ system: Union[str, Iterable[SystemSystemTextBlockArray]]
+ """System prompt for the conversation.
+
+ Accepts either a single string (most common) or an array of text blocks (used
+ when individual segments need `cache_control` markers). Multiple text blocks are
+ joined with newlines and prepended to the conversation as a `role: system`
+ message.
+ """
+
+ thinking: Thinking
+ """Controls Anthropic-style extended thinking.
+
+ **In v1**: only `type:"disabled"` is silently accepted as a no-op;
+ `type:"enabled"` and `type:"adaptive"` return a 400 `invalid_request_error`
+ (`unsupported_parameter`).
+ """
+
+ tool_choice: Optional[ToolChoice]
+ """How the model should choose from the provided tools."""
+
+ tools: Optional[Iterable[Tool]]
+ """Tool definitions the model may call."""
+
+ anthropic_version: Annotated[str, PropertyInfo(alias="anthropic-version")]
+
+
+class MessageContentContentBlockArrayMessageInputTextBlockCacheControl(TypedDict, total=False):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputTextBlock(TypedDict, total=False):
+ """Plain-text segment of a message."""
+
+ text: Required[str]
+
+ type: Required[Literal["text"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputTextBlockCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+ citations: Optional[Iterable[Dict[str, object]]]
+
+
+class MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceBase64(TypedDict, total=False):
+ """Inline image data encoded as base64."""
+
+ data: Required[str]
+ """Base64-encoded image bytes (no `data:` URI prefix)."""
+
+ media_type: Required[Literal["image/jpeg", "image/png", "image/gif", "image/webp"]]
+ """MIME type of the image bytes."""
+
+ type: Required[Literal["base64"]]
+
+
+class MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceURL(TypedDict, total=False):
+ """HTTPS URL pointing to an image.
+
+ **Returns 400 in v1** — URL fetching is blocked. Use `type:"base64"` instead.
+ """
+
+ type: Required[Literal["url"]]
+
+ url: Required[str]
+
+
+MessageContentContentBlockArrayMessageInputImageBlockSource: TypeAlias = Union[
+ MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceBase64,
+ MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceURL,
+]
+
+
+class MessageContentContentBlockArrayMessageInputImageBlockCacheControl(TypedDict, total=False):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputImageBlock(TypedDict, total=False):
+ """Image content.
+
+ Only `source.type:"base64"` is supported in v1; URL sources return 400.
+ """
+
+ source: Required[MessageContentContentBlockArrayMessageInputImageBlockSource]
+ """Inline image data encoded as base64."""
+
+ type: Required[Literal["image"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputImageBlockCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+
+class MessageContentContentBlockArrayMessageInputToolUseBlockCacheControl(TypedDict, total=False):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputToolUseBlock(TypedDict, total=False):
+ """A prior assistant turn that invoked a tool."""
+
+ id: Required[str]
+ """Unique identifier for the tool call (used to correlate `tool_result`)."""
+
+ input: Required[Dict[str, object]]
+ """Tool inputs as a JSON object."""
+
+ name: Required[str]
+ """Name of the tool being invoked."""
+
+ type: Required[Literal["tool_use"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputToolUseBlockCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+
+class MessageContentContentBlockArrayMessageInputToolResultBlockCacheControl(TypedDict, total=False):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlockCacheControl(
+ TypedDict, total=False
+):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlock(
+ TypedDict, total=False
+):
+ """Plain-text segment of a message."""
+
+ text: Required[str]
+
+ type: Required[Literal["text"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlockCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+ citations: Optional[Iterable[Dict[str, object]]]
+
+
+class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceBase64(
+ TypedDict, total=False
+):
+ """Inline image data encoded as base64."""
+
+ data: Required[str]
+ """Base64-encoded image bytes (no `data:` URI prefix)."""
+
+ media_type: Required[Literal["image/jpeg", "image/png", "image/gif", "image/webp"]]
+ """MIME type of the image bytes."""
+
+ type: Required[Literal["base64"]]
+
+
+class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceURL(
+ TypedDict, total=False
+):
+ """HTTPS URL pointing to an image.
+
+ **Returns 400 in v1** — URL fetching is blocked. Use `type:"base64"` instead.
+ """
+
+ type: Required[Literal["url"]]
+
+ url: Required[str]
+
+
+MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSource: TypeAlias = Union[
+ MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceBase64,
+ MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceURL,
+]
+
+
+class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockCacheControl(
+ TypedDict, total=False
+):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlock(
+ TypedDict, total=False
+):
+ """Image content.
+
+ Only `source.type:"base64"` is supported in v1; URL sources return 400.
+ """
+
+ source: Required[
+ MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSource
+ ]
+ """Inline image data encoded as base64."""
+
+ type: Required[Literal["image"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+
+MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArray: TypeAlias = Union[
+ MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlock,
+ MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlock,
+]
+
+
+class MessageContentContentBlockArrayMessageInputToolResultBlock(TypedDict, total=False):
+ """Result of a prior tool call."""
+
+ tool_use_id: Required[str]
+ """ID of the `tool_use` block this result corresponds to."""
+
+ type: Required[Literal["tool_result"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputToolResultBlockCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+ content: Union[
+ str, Iterable[MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArray]
+ ]
+
+ is_error: Optional[bool]
+ """Silently dropped in v1."""
+
+
+class MessageContentContentBlockArrayMessageInputServerToolUseBlockCacheControl(TypedDict, total=False):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputServerToolUseBlock(TypedDict, total=False):
+ """Anthropic compatibility only — SambaNova does not run server-side tools.
+
+ A prior assistant turn that invoked an Anthropic-hosted tool (web_search, code_execution, etc.). Accepted in conversation history (e.g. replaying an Anthropic-served session) but never originates from a SambaNova response. New `server_tool_use`-type tool definitions on outgoing requests are rejected with 400 `unsupported_tool_type`.
+ """
+
+ id: Required[str]
+
+ input: Required[Dict[str, object]]
+
+ name: Required[str]
+
+ type: Required[Literal["server_tool_use"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputServerToolUseBlockCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+
+class MessageContentContentBlockArrayMessageInputSearchResultBlockCacheControl(TypedDict, total=False):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputSearchResultBlockContentCacheControl(TypedDict, total=False):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputSearchResultBlockContent(TypedDict, total=False):
+ """Plain-text segment of a message."""
+
+ text: Required[str]
+
+ type: Required[Literal["text"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputSearchResultBlockContentCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+ citations: Optional[Iterable[Dict[str, object]]]
+
+
+class MessageContentContentBlockArrayMessageInputSearchResultBlock(TypedDict, total=False):
+ """Inline search result content.
+
+ In v1 the `title`, `source`, and `content[]` text are extracted into a text block; citations are dropped.
+ """
+
+ type: Required[Literal["search_result"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputSearchResultBlockCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+ citations: Optional[Dict[str, object]]
+
+ content: Iterable[MessageContentContentBlockArrayMessageInputSearchResultBlockContent]
+
+ source: str
+
+ title: str
+
+
+class MessageContentContentBlockArrayMessageInputWebSearchToolResultBlockCacheControl(TypedDict, total=False):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputWebSearchToolResultBlock(TypedDict, total=False):
+ """Anthropic compatibility only — SambaNova does not run server-side `web_search`.
+
+ Echo of a prior Anthropic-served `web_search` tool call; accepted in conversation history but never originates from a SambaNova response. When present, only `title` (`url`) per result is extracted into a tool message.
+ """
+
+ content: Required[Iterable[Dict[str, object]]]
+
+ tool_use_id: Required[str]
+
+ type: Required[Literal["web_search_tool_result"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputWebSearchToolResultBlockCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+
+class MessageContentContentBlockArrayMessageInputWebFetchToolResultBlockCacheControl(TypedDict, total=False):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputWebFetchToolResultBlock(TypedDict, total=False):
+ """Anthropic compatibility only — SambaNova does not run server-side `web_fetch`.
+
+ Echo of a prior Anthropic-served `web_fetch` tool call; accepted in conversation history but never originates from a SambaNova response. When present, only the text content is extracted.
+ """
+
+ content: Required[Dict[str, object]]
+
+ tool_use_id: Required[str]
+
+ type: Required[Literal["web_fetch_tool_result"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputWebFetchToolResultBlockCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+
+class MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlockCacheControl(TypedDict, total=False):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlock(TypedDict, total=False):
+ """
+ Anthropic compatibility only — SambaNova does not run server-side `code_execution`. Echo of a prior Anthropic-served `code_execution` tool call; accepted in conversation history but never originates from a SambaNova response. When present, only `stdout`, `stderr`, and `return_code` are extracted; image output is dropped.
+ """
+
+ content: Required[Dict[str, object]]
+
+ tool_use_id: Required[str]
+
+ type: Required[Literal["code_execution_tool_result"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlockCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+
+class MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlockCacheControl(TypedDict, total=False):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlock(TypedDict, total=False):
+ """
+ Anthropic compatibility only — SambaNova does not run server-side bash code execution. Echo of a prior Anthropic-served bash tool call; accepted in conversation history but never originates from a SambaNova response. Same lossy extraction as `code_execution_tool_result`.
+ """
+
+ content: Required[Dict[str, object]]
+
+ tool_use_id: Required[str]
+
+ type: Required[Literal["bash_code_execution_tool_result"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlockCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+
+class MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlockCacheControl(
+ TypedDict, total=False
+):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlock(TypedDict, total=False):
+ """
+ Anthropic compatibility only — SambaNova does not run server-side text-editor code execution. Echo of a prior Anthropic-served text-editor tool call; accepted in conversation history but never originates from a SambaNova response. When present, only file content is extracted; metadata (line count, file type) is dropped.
+ """
+
+ content: Required[Dict[str, object]]
+
+ tool_use_id: Required[str]
+
+ type: Required[Literal["text_editor_code_execution_tool_result"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlockCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+
+class MessageContentContentBlockArrayMessageInputToolSearchToolResultBlockCacheControl(TypedDict, total=False):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputToolSearchToolResultBlock(TypedDict, total=False):
+ """Anthropic compatibility only — SambaNova does not run server-side `tool_search`.
+
+ Echo of a prior Anthropic-served `tool_search` tool call; accepted in conversation history but never originates from a SambaNova response. When present, an empty string is emitted to the tool message (no plain-text fields).
+ """
+
+ content: Required[Dict[str, object]]
+
+ tool_use_id: Required[str]
+
+ type: Required[Literal["tool_search_tool_result"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputToolSearchToolResultBlockCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+
+class MessageContentContentBlockArrayMessageInputThinkingBlock(TypedDict, total=False):
+ """Extended-reasoning trace from a prior assistant turn."""
+
+ signature: Required[str]
+
+ thinking: Required[str]
+
+ type: Required[Literal["thinking"]]
+
+
+class MessageContentContentBlockArrayMessageInputRedactedThinkingBlock(TypedDict, total=False):
+ """
+ Anthropic compatibility only — SambaNova does not produce encrypted thinking output. Echo of a prior Anthropic-served response where `thinking.display:"omitted"` was set. Accepted in conversation history but never originates from a SambaNova response. Silently dropped on input.
+ """
+
+ data: Required[str]
+
+ type: Required[Literal["redacted_thinking"]]
+
+
+class MessageContentContentBlockArrayMessageInputContainerUploadBlockCacheControl(TypedDict, total=False):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputContainerUploadBlock(TypedDict, total=False):
+ """
+ Anthropic compatibility only — SambaNova does not produce container_upload blocks (these come from Anthropic's server-side `code_execution` tool). Accepted in conversation history but never originates from a SambaNova response. Silently dropped on input.
+ """
+
+ file_id: Required[str]
+
+ type: Required[Literal["container_upload"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputContainerUploadBlockCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+
+class MessageContentContentBlockArrayMessageInputDocumentBlockCacheControl(TypedDict, total=False):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputDocumentBlock(TypedDict, total=False):
+ """PDF or document content.
+
+ **Returns 400** — no document-extraction pipeline available.
+ """
+
+ source: Required[Dict[str, object]]
+
+ type: Required[Literal["document"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputDocumentBlockCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+ citations: Optional[Dict[str, object]]
+
+ context: Optional[str]
+
+ title: Optional[str]
+
+
+MessageContentContentBlockArray: TypeAlias = Union[
+ MessageContentContentBlockArrayMessageInputTextBlock,
+ MessageContentContentBlockArrayMessageInputImageBlock,
+ MessageContentContentBlockArrayMessageInputToolUseBlock,
+ MessageContentContentBlockArrayMessageInputToolResultBlock,
+ MessageContentContentBlockArrayMessageInputServerToolUseBlock,
+ MessageContentContentBlockArrayMessageInputSearchResultBlock,
+ MessageContentContentBlockArrayMessageInputWebSearchToolResultBlock,
+ MessageContentContentBlockArrayMessageInputWebFetchToolResultBlock,
+ MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlock,
+ MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlock,
+ MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlock,
+ MessageContentContentBlockArrayMessageInputToolSearchToolResultBlock,
+ MessageContentContentBlockArrayMessageInputThinkingBlock,
+ MessageContentContentBlockArrayMessageInputRedactedThinkingBlock,
+ MessageContentContentBlockArrayMessageInputContainerUploadBlock,
+ MessageContentContentBlockArrayMessageInputDocumentBlock,
+]
+
+
+class Message(TypedDict, total=False):
+ """A turn in the conversation."""
+
+ content: Required[Union[str, Iterable[MessageContentContentBlockArray]]]
+
+ role: Required[Literal["user", "assistant"]]
+ """Conversational role.
+
+ `user` for the human-side turn, `assistant` for prior model output.
+ """
+
+
+class SystemSystemTextBlockArrayCacheControl(TypedDict, total=False):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class SystemSystemTextBlockArray(TypedDict, total=False):
+ """A text segment within a structured `system` prompt array.
+
+ Multiple text blocks are concatenated (with newlines) and prepended to the conversation as a `role: system` message at the chat-completions layer.
+ """
+
+ text: Required[str]
+ """Plain-text content of the system prompt segment."""
+
+ type: Required[Literal["text"]]
+
+ cache_control: SystemSystemTextBlockArrayCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+ citations: Optional[Iterable[Dict[str, object]]]
+ """Optional citations. **In v1**: silently dropped"""
+
+
+class ThinkingMessageThinkingDisabled(TypedDict, total=False):
+ """Disables Anthropic-style extended thinking.
+
+ **In v1**: silently accepted as a no-op
+ """
+
+ type: Required[Literal["disabled"]]
+
+
+class ThinkingMessageThinkingEnabled(TypedDict, total=False):
+ """Enables Anthropic-style extended thinking with a fixed budget.
+
+ **In v1**: returns a 400 `invalid_request_error` (`unsupported_parameter`).
+ """
+
+ budget_tokens: Required[int]
+ """
+ Maximum tokens the model may spend on extended thinking before producing the
+ final answer.
+ """
+
+ type: Required[Literal["enabled"]]
+
+
+class ThinkingMessageThinkingAdaptive(TypedDict, total=False):
+ """Enables Anthropic-style adaptive extended thinking.
+
+ **In v1**: returns a 400 `invalid_request_error` (`unsupported_parameter`).
+ """
+
+ type: Required[Literal["adaptive"]]
+
+ budget_tokens: Optional[int]
+ """Optional upper bound on tokens spent on adaptive thinking.
+
+ When omitted, the backend chooses based on prompt complexity.
+ """
+
+
+Thinking: TypeAlias = Union[
+ ThinkingMessageThinkingDisabled, ThinkingMessageThinkingEnabled, ThinkingMessageThinkingAdaptive
+]
+
+
+class ToolChoiceMessageToolChoiceAuto(TypedDict, total=False):
+ """Let the model decide whether and which tool to use."""
+
+ type: Required[Literal["auto"]]
+
+ disable_parallel_tool_use: Optional[bool]
+ """Silently dropped."""
+
+
+class ToolChoiceMessageToolChoiceAny(TypedDict, total=False):
+ """Require the model to call one of the provided tools."""
+
+ type: Required[Literal["any"]]
+
+ disable_parallel_tool_use: Optional[bool]
+ """Silently dropped."""
+
+
+class ToolChoiceMessageToolChoiceNone(TypedDict, total=False):
+ """Forbid the model from calling any tool."""
+
+ type: Required[Literal["none"]]
+
+
+class ToolChoiceMessageToolChoiceTool(TypedDict, total=False):
+ """Force the model to call a specific tool by name."""
+
+ name: Required[str]
+ """Name of the required tool."""
+
+ type: Required[Literal["tool"]]
+
+ disable_parallel_tool_use: Optional[bool]
+ """Silently dropped."""
+
+
+ToolChoice: TypeAlias = Union[
+ ToolChoiceMessageToolChoiceAuto,
+ ToolChoiceMessageToolChoiceAny,
+ ToolChoiceMessageToolChoiceNone,
+ ToolChoiceMessageToolChoiceTool,
+]
+
+
+class ToolCacheControl(TypedDict, total=False):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class Tool(TypedDict, total=False):
+ """User-defined function tool definition.
+
+ Only custom function tools are supported (Anthropic's `type:"custom"` style or the absent-type Beta style). Anthropic-hosted server tools (`web_search`, `code_execution`, `bash`, `text_editor`, `memory`, `tool_search` variants) return 400 `unsupported_tool_type` if sent.
+ """
+
+ name: Required[str]
+ """Tool name. Must match `^[a-zA-Z0-9_-]+$`."""
+
+ allowed_callers: Optional[SequenceNotStr[str]]
+ """Silently dropped."""
+
+ cache_control: Optional[ToolCacheControl]
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+ defer_loading: Optional[bool]
+ """Silently dropped."""
+
+ description: Optional[str]
+ """Human-readable description of when the tool should be used."""
+
+ eager_input_streaming: Optional[bool]
+ """Silently dropped."""
+
+ input_examples: Optional[Iterable[Dict[str, object]]]
+ """Silently dropped."""
+
+ input_schema: Optional[Dict[str, object]]
+ """JSON Schema describing the tool's expected input.
+
+ Required by the Anthropic spec; accepted as optional by SambaNova.
+ """
+
+ strict: Optional[bool]
+ """Silently dropped."""
+
+ type: Optional[Literal["custom"]]
+ """Tool-type discriminator.
+
+ May be omitted (defaults to custom) or set to `custom`. Other values return 400
+ `unsupported_tool_type`.
+ """
diff --git a/src/sambanova/types/message_count_tokens_response.py b/src/sambanova/types/message_count_tokens_response.py
new file mode 100644
index 0000000..e3e8adf
--- /dev/null
+++ b/src/sambanova/types/message_count_tokens_response.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .._models import BaseModel
+
+__all__ = ["MessageCountTokensResponse"]
+
+
+class MessageCountTokensResponse(BaseModel):
+ """Token count for the supplied prompt."""
+
+ input_tokens: int
+ """Total tokens in the prompt (system + messages + tools)."""
diff --git a/src/sambanova/types/message_create_params.py b/src/sambanova/types/message_create_params.py
new file mode 100644
index 0000000..4da6620
--- /dev/null
+++ b/src/sambanova/types/message_create_params.py
@@ -0,0 +1,1095 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable, Optional
+from typing_extensions import Literal, Required, Annotated, TypeAlias, TypedDict
+
+from .._types import SequenceNotStr
+from .._utils import PropertyInfo
+
+__all__ = [
+ "MessageCreateParamsBase",
+ "Message",
+ "MessageContentContentBlockArray",
+ "MessageContentContentBlockArrayMessageInputTextBlock",
+ "MessageContentContentBlockArrayMessageInputTextBlockCacheControl",
+ "MessageContentContentBlockArrayMessageInputImageBlock",
+ "MessageContentContentBlockArrayMessageInputImageBlockSource",
+ "MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceBase64",
+ "MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceURL",
+ "MessageContentContentBlockArrayMessageInputImageBlockCacheControl",
+ "MessageContentContentBlockArrayMessageInputToolUseBlock",
+ "MessageContentContentBlockArrayMessageInputToolUseBlockCacheControl",
+ "MessageContentContentBlockArrayMessageInputToolResultBlock",
+ "MessageContentContentBlockArrayMessageInputToolResultBlockCacheControl",
+ "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArray",
+ "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlock",
+ "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlockCacheControl",
+ "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlock",
+ "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSource",
+ "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceBase64",
+ "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceURL",
+ "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockCacheControl",
+ "MessageContentContentBlockArrayMessageInputServerToolUseBlock",
+ "MessageContentContentBlockArrayMessageInputServerToolUseBlockCacheControl",
+ "MessageContentContentBlockArrayMessageInputSearchResultBlock",
+ "MessageContentContentBlockArrayMessageInputSearchResultBlockCacheControl",
+ "MessageContentContentBlockArrayMessageInputSearchResultBlockContent",
+ "MessageContentContentBlockArrayMessageInputSearchResultBlockContentCacheControl",
+ "MessageContentContentBlockArrayMessageInputWebSearchToolResultBlock",
+ "MessageContentContentBlockArrayMessageInputWebSearchToolResultBlockCacheControl",
+ "MessageContentContentBlockArrayMessageInputWebFetchToolResultBlock",
+ "MessageContentContentBlockArrayMessageInputWebFetchToolResultBlockCacheControl",
+ "MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlock",
+ "MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlockCacheControl",
+ "MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlock",
+ "MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlockCacheControl",
+ "MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlock",
+ "MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlockCacheControl",
+ "MessageContentContentBlockArrayMessageInputToolSearchToolResultBlock",
+ "MessageContentContentBlockArrayMessageInputToolSearchToolResultBlockCacheControl",
+ "MessageContentContentBlockArrayMessageInputThinkingBlock",
+ "MessageContentContentBlockArrayMessageInputRedactedThinkingBlock",
+ "MessageContentContentBlockArrayMessageInputContainerUploadBlock",
+ "MessageContentContentBlockArrayMessageInputContainerUploadBlockCacheControl",
+ "MessageContentContentBlockArrayMessageInputDocumentBlock",
+ "MessageContentContentBlockArrayMessageInputDocumentBlockCacheControl",
+ "Metadata",
+ "SystemSystemTextBlockArray",
+ "SystemSystemTextBlockArrayCacheControl",
+ "Thinking",
+ "ThinkingMessageThinkingDisabled",
+ "ThinkingMessageThinkingEnabled",
+ "ThinkingMessageThinkingAdaptive",
+ "ToolChoice",
+ "ToolChoiceMessageToolChoiceAuto",
+ "ToolChoiceMessageToolChoiceAny",
+ "ToolChoiceMessageToolChoiceNone",
+ "ToolChoiceMessageToolChoiceTool",
+ "Tool",
+ "ToolCacheControl",
+ "MessageCreateParamsNonStreaming",
+ "MessageCreateParamsStreaming",
+]
+
+
+class MessageCreateParamsBase(TypedDict, total=False):
+ max_tokens: Required[int]
+ """Maximum number of tokens to generate.
+
+ The combined input + output token count is bounded by the model's context
+ window.
+ """
+
+ messages: Required[Iterable[Message]]
+ """Conversation turns."""
+
+ model: Required[
+ Union[
+ str,
+ Literal[
+ "Meta-Llama-3.3-70B-Instruct",
+ "Meta-Llama-3.2-1B-Instruct",
+ "Meta-Llama-3.2-3B-Instruct",
+ "Llama-3.2-11B-Vision-Instruct",
+ "Llama-3.2-90B-Vision-Instruct",
+ "Meta-Llama-3.1-8B-Instruct",
+ "Meta-Llama-3.1-70B-Instruct",
+ "Meta-Llama-3.1-405B-Instruct",
+ "Qwen2.5-Coder-32B-Instruct",
+ "Qwen2.5-72B-Instruct",
+ "QwQ-32B-Preview",
+ "Meta-Llama-Guard-3-8B",
+ "DeepSeek-R1",
+ "DeepSeek-R1-0528",
+ "DeepSeek-V3-0324",
+ "DeepSeek-V3.1",
+ "DeepSeek-V3.1-cb",
+ "DeepSeek-V3.1-Terminus",
+ "DeepSeek-V3.2",
+ "DeepSeek-R1-Distill-Llama-70B",
+ "Llama-4-Maverick-17B-128E-Instruct",
+ "Llama-4-Scout-17B-16E-Instruct",
+ "Qwen3-32B",
+ "Qwen3-235B",
+ "Llama-3.3-Swallow-70B-Instruct-v0.4",
+ "gpt-oss-120b",
+ "ALLaM-7B-Instruct-preview",
+ "MiniMax-M2.5",
+ "MiniMax-M2.7",
+ "gemma-3-12b-it",
+ ],
+ ]
+ ]
+ """The model ID to use (e.g.
+
+ gpt-oss-120b). See available
+ [models](https://docs.sambanova.ai/docs/en/models/sambacloud-models)
+ """
+
+ container: Optional[str]
+ """Existing code-execution container ID to reuse. **In v1**: silently dropped"""
+
+ metadata: Metadata
+ """Free-form metadata attached to the request.
+
+ Currently only `user_id` Additional fields are accepted but ignored.
+ """
+
+ service_tier: Optional[Literal["auto", "standard_only"]]
+ """Service-tier preference. **In v1**: silently dropped"""
+
+ stop_sequences: Optional[SequenceNotStr[str]]
+ """Custom strings that, when generated, cause the model to stop."""
+
+ system: Union[str, Iterable[SystemSystemTextBlockArray]]
+ """System prompt for the conversation.
+
+ Accepts either a single string (most common) or an array of text blocks (used
+ when individual segments need `cache_control` markers). Multiple text blocks are
+ joined with newlines and prepended to the conversation as a `role: system`
+ message.
+ """
+
+ temperature: Optional[float]
+ """Sampling temperature in `[0.0, 2.0]`.
+
+ Higher values produce more random output, lower values more deterministic.
+ Adjust only one of `temperature`, `top_p`, `top_k`.
+ """
+
+ thinking: Thinking
+ """Controls Anthropic-style extended thinking.
+
+ **In v1**: only `type:"disabled"` is silently accepted as a no-op;
+ `type:"enabled"` and `type:"adaptive"` return a 400 `invalid_request_error`
+ (`unsupported_parameter`).
+ """
+
+ tool_choice: Optional[ToolChoice]
+ """How the model should choose from the provided tools."""
+
+ tools: Optional[Iterable[Tool]]
+ """Tool definitions the model may call."""
+
+ top_k: Optional[int]
+ """Top-k sampling.
+
+ Considers only the K most likely tokens at each step. Set to 0 to disable.
+ """
+
+ top_p: Optional[float]
+ """Nucleus sampling.
+
+ Considers tokens with cumulative probability mass up to `top_p`.
+ """
+
+ anthropic_version: Annotated[str, PropertyInfo(alias="anthropic-version")]
+
+
+class MessageContentContentBlockArrayMessageInputTextBlockCacheControl(TypedDict, total=False):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputTextBlock(TypedDict, total=False):
+ """Plain-text segment of a message."""
+
+ text: Required[str]
+
+ type: Required[Literal["text"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputTextBlockCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+ citations: Optional[Iterable[Dict[str, object]]]
+
+
+class MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceBase64(TypedDict, total=False):
+ """Inline image data encoded as base64."""
+
+ data: Required[str]
+ """Base64-encoded image bytes (no `data:` URI prefix)."""
+
+ media_type: Required[Literal["image/jpeg", "image/png", "image/gif", "image/webp"]]
+ """MIME type of the image bytes."""
+
+ type: Required[Literal["base64"]]
+
+
+class MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceURL(TypedDict, total=False):
+ """HTTPS URL pointing to an image.
+
+ **Returns 400 in v1** — URL fetching is blocked. Use `type:"base64"` instead.
+ """
+
+ type: Required[Literal["url"]]
+
+ url: Required[str]
+
+
+MessageContentContentBlockArrayMessageInputImageBlockSource: TypeAlias = Union[
+ MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceBase64,
+ MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceURL,
+]
+
+
+class MessageContentContentBlockArrayMessageInputImageBlockCacheControl(TypedDict, total=False):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputImageBlock(TypedDict, total=False):
+ """Image content.
+
+ Only `source.type:"base64"` is supported in v1; URL sources return 400.
+ """
+
+ source: Required[MessageContentContentBlockArrayMessageInputImageBlockSource]
+ """Inline image data encoded as base64."""
+
+ type: Required[Literal["image"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputImageBlockCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+
+class MessageContentContentBlockArrayMessageInputToolUseBlockCacheControl(TypedDict, total=False):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputToolUseBlock(TypedDict, total=False):
+ """A prior assistant turn that invoked a tool."""
+
+ id: Required[str]
+ """Unique identifier for the tool call (used to correlate `tool_result`)."""
+
+ input: Required[Dict[str, object]]
+ """Tool inputs as a JSON object."""
+
+ name: Required[str]
+ """Name of the tool being invoked."""
+
+ type: Required[Literal["tool_use"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputToolUseBlockCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+
+class MessageContentContentBlockArrayMessageInputToolResultBlockCacheControl(TypedDict, total=False):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlockCacheControl(
+ TypedDict, total=False
+):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlock(
+ TypedDict, total=False
+):
+ """Plain-text segment of a message."""
+
+ text: Required[str]
+
+ type: Required[Literal["text"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlockCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+ citations: Optional[Iterable[Dict[str, object]]]
+
+
+class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceBase64(
+ TypedDict, total=False
+):
+ """Inline image data encoded as base64."""
+
+ data: Required[str]
+ """Base64-encoded image bytes (no `data:` URI prefix)."""
+
+ media_type: Required[Literal["image/jpeg", "image/png", "image/gif", "image/webp"]]
+ """MIME type of the image bytes."""
+
+ type: Required[Literal["base64"]]
+
+
+class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceURL(
+ TypedDict, total=False
+):
+ """HTTPS URL pointing to an image.
+
+ **Returns 400 in v1** — URL fetching is blocked. Use `type:"base64"` instead.
+ """
+
+ type: Required[Literal["url"]]
+
+ url: Required[str]
+
+
+MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSource: TypeAlias = Union[
+ MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceBase64,
+ MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceURL,
+]
+
+
+class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockCacheControl(
+ TypedDict, total=False
+):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlock(
+ TypedDict, total=False
+):
+ """Image content.
+
+ Only `source.type:"base64"` is supported in v1; URL sources return 400.
+ """
+
+ source: Required[
+ MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSource
+ ]
+ """Inline image data encoded as base64."""
+
+ type: Required[Literal["image"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+
+MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArray: TypeAlias = Union[
+ MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlock,
+ MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlock,
+]
+
+
+class MessageContentContentBlockArrayMessageInputToolResultBlock(TypedDict, total=False):
+ """Result of a prior tool call."""
+
+ tool_use_id: Required[str]
+ """ID of the `tool_use` block this result corresponds to."""
+
+ type: Required[Literal["tool_result"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputToolResultBlockCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+ content: Union[
+ str, Iterable[MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArray]
+ ]
+
+ is_error: Optional[bool]
+ """Silently dropped in v1."""
+
+
+class MessageContentContentBlockArrayMessageInputServerToolUseBlockCacheControl(TypedDict, total=False):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputServerToolUseBlock(TypedDict, total=False):
+ """Anthropic compatibility only — SambaNova does not run server-side tools.
+
+ A prior assistant turn that invoked an Anthropic-hosted tool (web_search, code_execution, etc.). Accepted in conversation history (e.g. replaying an Anthropic-served session) but never originates from a SambaNova response. New `server_tool_use`-type tool definitions on outgoing requests are rejected with 400 `unsupported_tool_type`.
+ """
+
+ id: Required[str]
+
+ input: Required[Dict[str, object]]
+
+ name: Required[str]
+
+ type: Required[Literal["server_tool_use"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputServerToolUseBlockCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+
+class MessageContentContentBlockArrayMessageInputSearchResultBlockCacheControl(TypedDict, total=False):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputSearchResultBlockContentCacheControl(TypedDict, total=False):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputSearchResultBlockContent(TypedDict, total=False):
+ """Plain-text segment of a message."""
+
+ text: Required[str]
+
+ type: Required[Literal["text"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputSearchResultBlockContentCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+ citations: Optional[Iterable[Dict[str, object]]]
+
+
+class MessageContentContentBlockArrayMessageInputSearchResultBlock(TypedDict, total=False):
+ """Inline search result content.
+
+ In v1 the `title`, `source`, and `content[]` text are extracted into a text block; citations are dropped.
+ """
+
+ type: Required[Literal["search_result"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputSearchResultBlockCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+ citations: Optional[Dict[str, object]]
+
+ content: Iterable[MessageContentContentBlockArrayMessageInputSearchResultBlockContent]
+
+ source: str
+
+ title: str
+
+
+class MessageContentContentBlockArrayMessageInputWebSearchToolResultBlockCacheControl(TypedDict, total=False):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputWebSearchToolResultBlock(TypedDict, total=False):
+ """Anthropic compatibility only — SambaNova does not run server-side `web_search`.
+
+ Echo of a prior Anthropic-served `web_search` tool call; accepted in conversation history but never originates from a SambaNova response. When present, only `title` (`url`) per result is extracted into a tool message.
+ """
+
+ content: Required[Iterable[Dict[str, object]]]
+
+ tool_use_id: Required[str]
+
+ type: Required[Literal["web_search_tool_result"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputWebSearchToolResultBlockCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+
+class MessageContentContentBlockArrayMessageInputWebFetchToolResultBlockCacheControl(TypedDict, total=False):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputWebFetchToolResultBlock(TypedDict, total=False):
+ """Anthropic compatibility only — SambaNova does not run server-side `web_fetch`.
+
+ Echo of a prior Anthropic-served `web_fetch` tool call; accepted in conversation history but never originates from a SambaNova response. When present, only the text content is extracted.
+ """
+
+ content: Required[Dict[str, object]]
+
+ tool_use_id: Required[str]
+
+ type: Required[Literal["web_fetch_tool_result"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputWebFetchToolResultBlockCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+
+class MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlockCacheControl(TypedDict, total=False):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlock(TypedDict, total=False):
+ """
+ Anthropic compatibility only — SambaNova does not run server-side `code_execution`. Echo of a prior Anthropic-served `code_execution` tool call; accepted in conversation history but never originates from a SambaNova response. When present, only `stdout`, `stderr`, and `return_code` are extracted; image output is dropped.
+ """
+
+ content: Required[Dict[str, object]]
+
+ tool_use_id: Required[str]
+
+ type: Required[Literal["code_execution_tool_result"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlockCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+
+class MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlockCacheControl(TypedDict, total=False):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlock(TypedDict, total=False):
+ """
+ Anthropic compatibility only — SambaNova does not run server-side bash code execution. Echo of a prior Anthropic-served bash tool call; accepted in conversation history but never originates from a SambaNova response. Same lossy extraction as `code_execution_tool_result`.
+ """
+
+ content: Required[Dict[str, object]]
+
+ tool_use_id: Required[str]
+
+ type: Required[Literal["bash_code_execution_tool_result"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlockCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+
+class MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlockCacheControl(
+ TypedDict, total=False
+):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlock(TypedDict, total=False):
+ """
+ Anthropic compatibility only — SambaNova does not run server-side text-editor code execution. Echo of a prior Anthropic-served text-editor tool call; accepted in conversation history but never originates from a SambaNova response. When present, only file content is extracted; metadata (line count, file type) is dropped.
+ """
+
+ content: Required[Dict[str, object]]
+
+ tool_use_id: Required[str]
+
+ type: Required[Literal["text_editor_code_execution_tool_result"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlockCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+
+class MessageContentContentBlockArrayMessageInputToolSearchToolResultBlockCacheControl(TypedDict, total=False):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputToolSearchToolResultBlock(TypedDict, total=False):
+ """Anthropic compatibility only — SambaNova does not run server-side `tool_search`.
+
+ Echo of a prior Anthropic-served `tool_search` tool call; accepted in conversation history but never originates from a SambaNova response. When present, an empty string is emitted to the tool message (no plain-text fields).
+ """
+
+ content: Required[Dict[str, object]]
+
+ tool_use_id: Required[str]
+
+ type: Required[Literal["tool_search_tool_result"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputToolSearchToolResultBlockCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+
+class MessageContentContentBlockArrayMessageInputThinkingBlock(TypedDict, total=False):
+ """Extended-reasoning trace from a prior assistant turn."""
+
+ signature: Required[str]
+
+ thinking: Required[str]
+
+ type: Required[Literal["thinking"]]
+
+
+class MessageContentContentBlockArrayMessageInputRedactedThinkingBlock(TypedDict, total=False):
+ """
+ Anthropic compatibility only — SambaNova does not produce encrypted thinking output. Echo of a prior Anthropic-served response where `thinking.display:"omitted"` was set. Accepted in conversation history but never originates from a SambaNova response. Silently dropped on input.
+ """
+
+ data: Required[str]
+
+ type: Required[Literal["redacted_thinking"]]
+
+
+class MessageContentContentBlockArrayMessageInputContainerUploadBlockCacheControl(TypedDict, total=False):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputContainerUploadBlock(TypedDict, total=False):
+ """
+ Anthropic compatibility only — SambaNova does not produce container_upload blocks (these come from Anthropic's server-side `code_execution` tool). Accepted in conversation history but never originates from a SambaNova response. Silently dropped on input.
+ """
+
+ file_id: Required[str]
+
+ type: Required[Literal["container_upload"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputContainerUploadBlockCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+
+class MessageContentContentBlockArrayMessageInputDocumentBlockCacheControl(TypedDict, total=False):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class MessageContentContentBlockArrayMessageInputDocumentBlock(TypedDict, total=False):
+ """PDF or document content.
+
+ **Returns 400** — no document-extraction pipeline available.
+ """
+
+ source: Required[Dict[str, object]]
+
+ type: Required[Literal["document"]]
+
+ cache_control: MessageContentContentBlockArrayMessageInputDocumentBlockCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+ citations: Optional[Dict[str, object]]
+
+ context: Optional[str]
+
+ title: Optional[str]
+
+
+MessageContentContentBlockArray: TypeAlias = Union[
+ MessageContentContentBlockArrayMessageInputTextBlock,
+ MessageContentContentBlockArrayMessageInputImageBlock,
+ MessageContentContentBlockArrayMessageInputToolUseBlock,
+ MessageContentContentBlockArrayMessageInputToolResultBlock,
+ MessageContentContentBlockArrayMessageInputServerToolUseBlock,
+ MessageContentContentBlockArrayMessageInputSearchResultBlock,
+ MessageContentContentBlockArrayMessageInputWebSearchToolResultBlock,
+ MessageContentContentBlockArrayMessageInputWebFetchToolResultBlock,
+ MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlock,
+ MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlock,
+ MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlock,
+ MessageContentContentBlockArrayMessageInputToolSearchToolResultBlock,
+ MessageContentContentBlockArrayMessageInputThinkingBlock,
+ MessageContentContentBlockArrayMessageInputRedactedThinkingBlock,
+ MessageContentContentBlockArrayMessageInputContainerUploadBlock,
+ MessageContentContentBlockArrayMessageInputDocumentBlock,
+]
+
+
+class Message(TypedDict, total=False):
+ """A turn in the conversation."""
+
+ content: Required[Union[str, Iterable[MessageContentContentBlockArray]]]
+
+ role: Required[Literal["user", "assistant"]]
+ """Conversational role.
+
+ `user` for the human-side turn, `assistant` for prior model output.
+ """
+
+
+class Metadata(TypedDict, total=False, extra_items=object): # type: ignore[call-arg]
+ """Free-form metadata attached to the request.
+
+ Currently only `user_id` Additional fields are accepted but ignored.
+ """
+
+ user_id: Optional[str]
+ """External identifier for the end-user making the request.
+
+ Mapped internally to the Chat Completions `user` field.
+ """
+
+
+class SystemSystemTextBlockArrayCacheControl(TypedDict, total=False):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class SystemSystemTextBlockArray(TypedDict, total=False):
+ """A text segment within a structured `system` prompt array.
+
+ Multiple text blocks are concatenated (with newlines) and prepended to the conversation as a `role: system` message at the chat-completions layer.
+ """
+
+ text: Required[str]
+ """Plain-text content of the system prompt segment."""
+
+ type: Required[Literal["text"]]
+
+ cache_control: SystemSystemTextBlockArrayCacheControl
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+ citations: Optional[Iterable[Dict[str, object]]]
+ """Optional citations. **In v1**: silently dropped"""
+
+
+class ThinkingMessageThinkingDisabled(TypedDict, total=False):
+ """Disables Anthropic-style extended thinking.
+
+ **In v1**: silently accepted as a no-op
+ """
+
+ type: Required[Literal["disabled"]]
+
+
+class ThinkingMessageThinkingEnabled(TypedDict, total=False):
+ """Enables Anthropic-style extended thinking with a fixed budget.
+
+ **In v1**: returns a 400 `invalid_request_error` (`unsupported_parameter`).
+ """
+
+ budget_tokens: Required[int]
+ """
+ Maximum tokens the model may spend on extended thinking before producing the
+ final answer.
+ """
+
+ type: Required[Literal["enabled"]]
+
+
+class ThinkingMessageThinkingAdaptive(TypedDict, total=False):
+ """Enables Anthropic-style adaptive extended thinking.
+
+ **In v1**: returns a 400 `invalid_request_error` (`unsupported_parameter`).
+ """
+
+ type: Required[Literal["adaptive"]]
+
+ budget_tokens: Optional[int]
+ """Optional upper bound on tokens spent on adaptive thinking.
+
+ When omitted, the backend chooses based on prompt complexity.
+ """
+
+
+Thinking: TypeAlias = Union[
+ ThinkingMessageThinkingDisabled, ThinkingMessageThinkingEnabled, ThinkingMessageThinkingAdaptive
+]
+
+
+class ToolChoiceMessageToolChoiceAuto(TypedDict, total=False):
+ """Let the model decide whether and which tool to use."""
+
+ type: Required[Literal["auto"]]
+
+ disable_parallel_tool_use: Optional[bool]
+ """Silently dropped."""
+
+
+class ToolChoiceMessageToolChoiceAny(TypedDict, total=False):
+ """Require the model to call one of the provided tools."""
+
+ type: Required[Literal["any"]]
+
+ disable_parallel_tool_use: Optional[bool]
+ """Silently dropped."""
+
+
+class ToolChoiceMessageToolChoiceNone(TypedDict, total=False):
+ """Forbid the model from calling any tool."""
+
+ type: Required[Literal["none"]]
+
+
+class ToolChoiceMessageToolChoiceTool(TypedDict, total=False):
+ """Force the model to call a specific tool by name."""
+
+ name: Required[str]
+ """Name of the required tool."""
+
+ type: Required[Literal["tool"]]
+
+ disable_parallel_tool_use: Optional[bool]
+ """Silently dropped."""
+
+
+ToolChoice: TypeAlias = Union[
+ ToolChoiceMessageToolChoiceAuto,
+ ToolChoiceMessageToolChoiceAny,
+ ToolChoiceMessageToolChoiceNone,
+ ToolChoiceMessageToolChoiceTool,
+]
+
+
+class ToolCacheControl(TypedDict, total=False):
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored.
+ """
+
+ type: Required[Literal["ephemeral"]]
+ """Cache breakpoint type. Only `ephemeral` is supported by Anthropic."""
+
+ ttl: Optional[str]
+ """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1"""
+
+
+class Tool(TypedDict, total=False):
+ """User-defined function tool definition.
+
+ Only custom function tools are supported (Anthropic's `type:"custom"` style or the absent-type Beta style). Anthropic-hosted server tools (`web_search`, `code_execution`, `bash`, `text_editor`, `memory`, `tool_search` variants) return 400 `unsupported_tool_type` if sent.
+ """
+
+ name: Required[str]
+ """Tool name. Must match `^[a-zA-Z0-9_-]+$`."""
+
+ allowed_callers: Optional[SequenceNotStr[str]]
+ """Silently dropped."""
+
+ cache_control: Optional[ToolCacheControl]
+ """
+ Marks the preceding content block (or system text block) as a prompt- cache
+ breakpoint. Marker positions are collected by the adapter; their wiring into the
+ router's longest-prefix matching **In v1**: position is recorded; the `ttl`
+ value is ignored.
+ """
+
+ defer_loading: Optional[bool]
+ """Silently dropped."""
+
+ description: Optional[str]
+ """Human-readable description of when the tool should be used."""
+
+ eager_input_streaming: Optional[bool]
+ """Silently dropped."""
+
+ input_examples: Optional[Iterable[Dict[str, object]]]
+ """Silently dropped."""
+
+ input_schema: Optional[Dict[str, object]]
+ """JSON Schema describing the tool's expected input.
+
+ Required by the Anthropic spec; accepted as optional by SambaNova.
+ """
+
+ strict: Optional[bool]
+ """Silently dropped."""
+
+ type: Optional[Literal["custom"]]
+ """Tool-type discriminator.
+
+ May be omitted (defaults to custom) or set to `custom`. Other values return 400
+ `unsupported_tool_type`.
+ """
+
+
+class MessageCreateParamsNonStreaming(MessageCreateParamsBase, total=False):
+ stream: Optional[Literal[False]]
+ """
+ If true, the response is a sequence of Server-Sent Events whose payloads conform
+ to `MessageStreamEvent`.
+ """
+
+
+class MessageCreateParamsStreaming(MessageCreateParamsBase):
+ stream: Required[Literal[True]]
+ """
+ If true, the response is a sequence of Server-Sent Events whose payloads conform
+ to `MessageStreamEvent`.
+ """
+
+
+MessageCreateParams = Union[MessageCreateParamsNonStreaming, MessageCreateParamsStreaming]
diff --git a/src/sambanova/types/message_create_response.py b/src/sambanova/types/message_create_response.py
new file mode 100644
index 0000000..7f80d24
--- /dev/null
+++ b/src/sambanova/types/message_create_response.py
@@ -0,0 +1,449 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from .._utils import PropertyInfo
+from .message import Message
+from .._models import BaseModel
+
+__all__ = [
+ "MessageCreateResponse",
+ "MessageStartEvent",
+ "MessageContentBlockStartEvent",
+ "MessageContentBlockStartEventContentBlock",
+ "MessageContentBlockStartEventContentBlockMessageOutputTextBlock",
+ "MessageContentBlockStartEventContentBlockMessageOutputToolUseBlock",
+ "MessageContentBlockStartEventContentBlockMessageOutputThinkingBlock",
+ "MessageContentBlockStartEventContentBlockMessageOutputRedactedThinkingBlock",
+ "MessageContentBlockStartEventContentBlockMessageOutputServerToolUseBlock",
+ "MessageContentBlockStartEventContentBlockMessageOutputWebSearchToolResultBlock",
+ "MessageContentBlockStartEventContentBlockMessageOutputWebFetchToolResultBlock",
+ "MessageContentBlockStartEventContentBlockMessageOutputCodeExecutionToolResultBlock",
+ "MessageContentBlockStartEventContentBlockMessageOutputBashCodeExecutionToolResultBlock",
+ "MessageContentBlockStartEventContentBlockMessageOutputTextEditorCodeExecutionToolResultBlock",
+ "MessageContentBlockStartEventContentBlockMessageOutputToolSearchToolResultBlock",
+ "MessageContentBlockStartEventContentBlockMessageOutputContainerUploadBlock",
+ "MessageContentBlockDeltaEvent",
+ "MessageContentBlockDeltaEventDelta",
+ "MessageContentBlockDeltaEventDeltaMessageContentBlockTextDelta",
+ "MessageContentBlockDeltaEventDeltaMessageContentBlockInputJsonDelta",
+ "MessageContentBlockDeltaEventDeltaMessageContentBlockThinkingDelta",
+ "MessageContentBlockDeltaEventDeltaMessageContentBlockSignatureDelta",
+ "MessageContentBlockStopEvent",
+ "MessageDeltaEvent",
+ "MessageDeltaEventDelta",
+ "MessageDeltaEventDeltaStopDetails",
+ "MessageDeltaEventUsage",
+ "MessageStopEvent",
+ "MessagePingEvent",
+ "MessageStreamErrorEvent",
+ "MessageStreamErrorEventError",
+]
+
+
+class MessageStartEvent(BaseModel):
+ """First event of a stream.
+
+ Carries the initial Message envelope (empty `content[]`, `stop_reason: null`) and token usage from prompt processing.
+ """
+
+ message: Message
+ """Non-streaming response from `POST /messages`.
+
+ Wire-compatible with the official Anthropic Messages API.
+ """
+
+ type: Literal["message_start"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputTextBlock(BaseModel):
+ """Plain-text segment of the model's response."""
+
+ text: str
+
+ type: Literal["text"]
+
+ citations: Optional[List[Dict[str, object]]] = None
+ """Not emitted in v1."""
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputToolUseBlock(BaseModel):
+ """Tool call generated by the model."""
+
+ id: str
+ """Unique identifier for this tool call."""
+
+ input: Dict[str, object]
+ """Tool inputs as a JSON object."""
+
+ name: str
+ """Name of the tool being called."""
+
+ type: Literal["tool_use"]
+
+ caller: Optional[Dict[str, object]] = None
+ """Anthropic routing metadata. Always `null` in SambaNova responses."""
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputThinkingBlock(BaseModel):
+ """Extended-reasoning trace from the model. Emitted by reasoning models."""
+
+ thinking: str
+
+ type: Literal["thinking"]
+
+ signature: Optional[str] = None
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputRedactedThinkingBlock(BaseModel):
+ """
+ Anthropic compatibility only — SambaNova does not produce encrypted thinking output. Never emitted in responses.
+ """
+
+ data: str
+
+ type: Literal["redacted_thinking"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputServerToolUseBlock(BaseModel):
+ """Anthropic compatibility only — SambaNova does not run server-side tools.
+
+ Never emitted in responses; defined for Anthropic SDK type-parity.
+ """
+
+ id: str
+
+ input: Dict[str, object]
+
+ name: str
+
+ type: Literal["server_tool_use"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputWebSearchToolResultBlock(BaseModel):
+ """Anthropic compatibility only — SambaNova does not run server-side `web_search`.
+
+ Never emitted in responses.
+ """
+
+ content: List[Dict[str, object]]
+
+ tool_use_id: str
+
+ type: Literal["web_search_tool_result"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputWebFetchToolResultBlock(BaseModel):
+ """Anthropic compatibility only — SambaNova does not run server-side `web_fetch`.
+
+ Never emitted in responses.
+ """
+
+ content: Dict[str, object]
+
+ tool_use_id: str
+
+ type: Literal["web_fetch_tool_result"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputCodeExecutionToolResultBlock(BaseModel):
+ """
+ Anthropic compatibility only — SambaNova does not run server-side `code_execution`. Never emitted in responses.
+ """
+
+ content: Dict[str, object]
+
+ tool_use_id: str
+
+ type: Literal["code_execution_tool_result"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputBashCodeExecutionToolResultBlock(BaseModel):
+ """
+ Anthropic compatibility only — SambaNova does not run server-side bash code execution. Never emitted in responses.
+ """
+
+ content: Dict[str, object]
+
+ tool_use_id: str
+
+ type: Literal["bash_code_execution_tool_result"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputTextEditorCodeExecutionToolResultBlock(BaseModel):
+ """
+ Anthropic compatibility only — SambaNova does not run server-side text-editor code execution. Never emitted in responses.
+ """
+
+ content: Dict[str, object]
+
+ tool_use_id: str
+
+ type: Literal["text_editor_code_execution_tool_result"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputToolSearchToolResultBlock(BaseModel):
+ """Anthropic compatibility only — SambaNova does not run server-side `tool_search`.
+
+ Never emitted in responses.
+ """
+
+ content: Dict[str, object]
+
+ tool_use_id: str
+
+ type: Literal["tool_search_tool_result"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputContainerUploadBlock(BaseModel):
+ """
+ Anthropic compatibility only — SambaNova does not produce container_upload blocks (these come from Anthropic's server-side `code_execution` tool). Never emitted in responses.
+ """
+
+ file_id: str
+
+ type: Literal["container_upload"]
+
+
+MessageContentBlockStartEventContentBlock: TypeAlias = Annotated[
+ Union[
+ MessageContentBlockStartEventContentBlockMessageOutputTextBlock,
+ MessageContentBlockStartEventContentBlockMessageOutputToolUseBlock,
+ MessageContentBlockStartEventContentBlockMessageOutputThinkingBlock,
+ MessageContentBlockStartEventContentBlockMessageOutputRedactedThinkingBlock,
+ MessageContentBlockStartEventContentBlockMessageOutputServerToolUseBlock,
+ MessageContentBlockStartEventContentBlockMessageOutputWebSearchToolResultBlock,
+ MessageContentBlockStartEventContentBlockMessageOutputWebFetchToolResultBlock,
+ MessageContentBlockStartEventContentBlockMessageOutputCodeExecutionToolResultBlock,
+ MessageContentBlockStartEventContentBlockMessageOutputBashCodeExecutionToolResultBlock,
+ MessageContentBlockStartEventContentBlockMessageOutputTextEditorCodeExecutionToolResultBlock,
+ MessageContentBlockStartEventContentBlockMessageOutputToolSearchToolResultBlock,
+ MessageContentBlockStartEventContentBlockMessageOutputContainerUploadBlock,
+ ],
+ PropertyInfo(discriminator="type"),
+]
+
+
+class MessageContentBlockStartEvent(BaseModel):
+ """Opens a new content block. One per block in `content[]`."""
+
+ content_block: MessageContentBlockStartEventContentBlock
+ """Typed content block in the model's response."""
+
+ index: int
+ """Zero-based index of the block within `content[]`."""
+
+ type: Literal["content_block_start"]
+
+
+class MessageContentBlockDeltaEventDeltaMessageContentBlockTextDelta(BaseModel):
+ """Incremental text chunk for an open text content block."""
+
+ text: str
+
+ type: Literal["text_delta"]
+
+
+class MessageContentBlockDeltaEventDeltaMessageContentBlockInputJsonDelta(BaseModel):
+ """Incremental fragment of a tool_use block's `input` JSON.
+
+ Concatenate successive `partial_json` strings to reconstruct the full input object.
+ """
+
+ partial_json: str
+
+ type: Literal["input_json_delta"]
+
+
+class MessageContentBlockDeltaEventDeltaMessageContentBlockThinkingDelta(BaseModel):
+ """Incremental thinking chunk for an open thinking block.
+
+ Emitted by reasoning models.
+ """
+
+ thinking: str
+
+ type: Literal["thinking_delta"]
+
+
+class MessageContentBlockDeltaEventDeltaMessageContentBlockSignatureDelta(BaseModel):
+ """Signature for an open thinking block.
+
+ Emitted at the end of a thinking stream (paired with the closing `content_block_stop`); the `signature` value may be an empty string when the backend has no signed payload to attach.
+ """
+
+ signature: str
+
+ type: Literal["signature_delta"]
+
+
+MessageContentBlockDeltaEventDelta: TypeAlias = Annotated[
+ Union[
+ MessageContentBlockDeltaEventDeltaMessageContentBlockTextDelta,
+ MessageContentBlockDeltaEventDeltaMessageContentBlockInputJsonDelta,
+ MessageContentBlockDeltaEventDeltaMessageContentBlockThinkingDelta,
+ MessageContentBlockDeltaEventDeltaMessageContentBlockSignatureDelta,
+ ],
+ PropertyInfo(discriminator="type"),
+]
+
+
+class MessageContentBlockDeltaEvent(BaseModel):
+ """Incremental update to the currently open content block."""
+
+ delta: MessageContentBlockDeltaEventDelta
+ """Incremental update to an open content block."""
+
+ index: int
+ """Zero-based index of the block within `content[]`."""
+
+ type: Literal["content_block_delta"]
+
+
+class MessageContentBlockStopEvent(BaseModel):
+ """Closes the current content block."""
+
+ index: int
+ """Zero-based index of the block within `content[]`."""
+
+ type: Literal["content_block_stop"]
+
+
+class MessageDeltaEventDeltaStopDetails(BaseModel):
+ """Refusal stop details.
+
+ Anthropic compatibility only — `refusal` is never emitted as a stop_reason by SambaNova (content filtering is not exposed at the API layer).
+ """
+
+ type: Literal["refusal"]
+
+ category: Optional[Literal["cyber", "bio"]] = None
+
+
+class MessageDeltaEventDelta(BaseModel):
+ stop_reason: Literal[
+ "end_turn", "max_tokens", "tool_use", "pause_turn", "refusal", "stop_sequence", "model_context_window_exceeded"
+ ]
+ """Reason the model stopped generating.
+
+ SambaNova emits `end_turn`, `max_tokens`, `tool_use`, and `stop_sequence`. The
+ remaining values are defined for Anthropic SDK type-parity but never returned:
+ `pause_turn` (server-tool loop limit, not produced); `refusal` (content filter,
+ not exposed); `model_context_window_exceeded` (folded to `max_tokens`).
+ """
+
+ stop_details: Optional[MessageDeltaEventDeltaStopDetails] = None
+ """Refusal stop details.
+
+ Anthropic compatibility only — `refusal` is never emitted as a stop_reason by
+ SambaNova (content filtering is not exposed at the API layer).
+ """
+
+ stop_sequence: Optional[str] = None
+ """Custom stop sequence that triggered termination.
+
+ Field is emitted but value is always `null` in v1 (backend collapses
+ `StopSequenceHit` and `EndOfText` into the same finish_reason).
+ """
+
+
+class MessageDeltaEventUsage(BaseModel):
+ """
+ Final token accounting emitted in the closing `message_delta` event of a stream.
+ """
+
+ output_tokens: int
+ """Total tokens generated (final count)."""
+
+ cache_creation_input_tokens: Optional[int] = None
+ """Tokens written to prompt cache. Absent in v1;"""
+
+ cache_read_input_tokens: Optional[int] = None
+ """Tokens read from prompt cache. Absent in v1;"""
+
+ input_tokens: Optional[int] = None
+ """Total tokens in the prompt (echoed from `message_start`)."""
+
+ server_tool_use: Optional[Dict[str, object]] = None
+ """Server-tool usage metrics.
+
+ Anthropic compatibility only — SambaNova does not run server tools, so this
+ field is never emitted.
+ """
+
+
+class MessageDeltaEvent(BaseModel):
+ """Penultimate event of the stream.
+
+ Carries the final `stop_reason`, optional `stop_sequence`, and final usage counts.
+ """
+
+ delta: MessageDeltaEventDelta
+
+ type: Literal["message_delta"]
+
+ usage: MessageDeltaEventUsage
+ """
+ Final token accounting emitted in the closing `message_delta` event of a stream.
+ """
+
+
+class MessageStopEvent(BaseModel):
+ """Final event of the stream. No fields beyond `type`."""
+
+ type: Literal["message_stop"]
+
+
+class MessagePingEvent(BaseModel):
+ """Keepalive heartbeat. May appear at any point in the stream."""
+
+ type: Literal["ping"]
+
+
+class MessageStreamErrorEventError(BaseModel):
+ """Inner error object carried inside a `MessageErrorResponse`.
+
+ The `type` value follows Anthropic's published error taxonomy.
+ """
+
+ message: str
+ """Human-readable explanation of the error."""
+
+ type: Literal[
+ "invalid_request_error",
+ "authentication_error",
+ "permission_error",
+ "not_found_error",
+ "request_too_large",
+ "rate_limit_error",
+ "api_error",
+ "overloaded_error",
+ "not_implemented_error",
+ ]
+ """Error category. Values follow Anthropic's taxonomy."""
+
+
+class MessageStreamErrorEvent(BaseModel):
+ """Streamed error envelope. Terminates the stream."""
+
+ error: MessageStreamErrorEventError
+ """Inner error object carried inside a `MessageErrorResponse`.
+
+ The `type` value follows Anthropic's published error taxonomy.
+ """
+
+ type: Literal["error"]
+
+
+MessageCreateResponse: TypeAlias = Union[
+ Message,
+ MessageStartEvent,
+ MessageContentBlockStartEvent,
+ MessageContentBlockDeltaEvent,
+ MessageContentBlockStopEvent,
+ MessageDeltaEvent,
+ MessageStopEvent,
+ MessagePingEvent,
+ MessageStreamErrorEvent,
+]
diff --git a/src/sambanova/types/message_stream_event.py b/src/sambanova/types/message_stream_event.py
new file mode 100644
index 0000000..95645b5
--- /dev/null
+++ b/src/sambanova/types/message_stream_event.py
@@ -0,0 +1,451 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from .._utils import PropertyInfo
+from .message import Message
+from .._models import BaseModel
+
+__all__ = [
+ "MessageStreamEvent",
+ "MessageStartEvent",
+ "MessageContentBlockStartEvent",
+ "MessageContentBlockStartEventContentBlock",
+ "MessageContentBlockStartEventContentBlockMessageOutputTextBlock",
+ "MessageContentBlockStartEventContentBlockMessageOutputToolUseBlock",
+ "MessageContentBlockStartEventContentBlockMessageOutputThinkingBlock",
+ "MessageContentBlockStartEventContentBlockMessageOutputRedactedThinkingBlock",
+ "MessageContentBlockStartEventContentBlockMessageOutputServerToolUseBlock",
+ "MessageContentBlockStartEventContentBlockMessageOutputWebSearchToolResultBlock",
+ "MessageContentBlockStartEventContentBlockMessageOutputWebFetchToolResultBlock",
+ "MessageContentBlockStartEventContentBlockMessageOutputCodeExecutionToolResultBlock",
+ "MessageContentBlockStartEventContentBlockMessageOutputBashCodeExecutionToolResultBlock",
+ "MessageContentBlockStartEventContentBlockMessageOutputTextEditorCodeExecutionToolResultBlock",
+ "MessageContentBlockStartEventContentBlockMessageOutputToolSearchToolResultBlock",
+ "MessageContentBlockStartEventContentBlockMessageOutputContainerUploadBlock",
+ "MessageContentBlockDeltaEvent",
+ "MessageContentBlockDeltaEventDelta",
+ "MessageContentBlockDeltaEventDeltaMessageContentBlockTextDelta",
+ "MessageContentBlockDeltaEventDeltaMessageContentBlockInputJsonDelta",
+ "MessageContentBlockDeltaEventDeltaMessageContentBlockThinkingDelta",
+ "MessageContentBlockDeltaEventDeltaMessageContentBlockSignatureDelta",
+ "MessageContentBlockStopEvent",
+ "MessageDeltaEvent",
+ "MessageDeltaEventDelta",
+ "MessageDeltaEventDeltaStopDetails",
+ "MessageDeltaEventUsage",
+ "MessageStopEvent",
+ "MessagePingEvent",
+ "MessageStreamErrorEvent",
+ "MessageStreamErrorEventError",
+]
+
+
+class MessageStartEvent(BaseModel):
+ """First event of a stream.
+
+ Carries the initial Message envelope (empty `content[]`, `stop_reason: null`) and token usage from prompt processing.
+ """
+
+ message: Message
+ """Non-streaming response from `POST /messages`.
+
+ Wire-compatible with the official Anthropic Messages API.
+ """
+
+ type: Literal["message_start"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputTextBlock(BaseModel):
+ """Plain-text segment of the model's response."""
+
+ text: str
+
+ type: Literal["text"]
+
+ citations: Optional[List[Dict[str, object]]] = None
+ """Not emitted in v1."""
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputToolUseBlock(BaseModel):
+ """Tool call generated by the model."""
+
+ id: str
+ """Unique identifier for this tool call."""
+
+ input: Dict[str, object]
+ """Tool inputs as a JSON object."""
+
+ name: str
+ """Name of the tool being called."""
+
+ type: Literal["tool_use"]
+
+ caller: Optional[Dict[str, object]] = None
+ """Anthropic routing metadata. Always `null` in SambaNova responses."""
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputThinkingBlock(BaseModel):
+ """Extended-reasoning trace from the model. Emitted by reasoning models."""
+
+ thinking: str
+
+ type: Literal["thinking"]
+
+ signature: Optional[str] = None
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputRedactedThinkingBlock(BaseModel):
+ """
+ Anthropic compatibility only — SambaNova does not produce encrypted thinking output. Never emitted in responses.
+ """
+
+ data: str
+
+ type: Literal["redacted_thinking"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputServerToolUseBlock(BaseModel):
+ """Anthropic compatibility only — SambaNova does not run server-side tools.
+
+ Never emitted in responses; defined for Anthropic SDK type-parity.
+ """
+
+ id: str
+
+ input: Dict[str, object]
+
+ name: str
+
+ type: Literal["server_tool_use"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputWebSearchToolResultBlock(BaseModel):
+ """Anthropic compatibility only — SambaNova does not run server-side `web_search`.
+
+ Never emitted in responses.
+ """
+
+ content: List[Dict[str, object]]
+
+ tool_use_id: str
+
+ type: Literal["web_search_tool_result"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputWebFetchToolResultBlock(BaseModel):
+ """Anthropic compatibility only — SambaNova does not run server-side `web_fetch`.
+
+ Never emitted in responses.
+ """
+
+ content: Dict[str, object]
+
+ tool_use_id: str
+
+ type: Literal["web_fetch_tool_result"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputCodeExecutionToolResultBlock(BaseModel):
+ """
+ Anthropic compatibility only — SambaNova does not run server-side `code_execution`. Never emitted in responses.
+ """
+
+ content: Dict[str, object]
+
+ tool_use_id: str
+
+ type: Literal["code_execution_tool_result"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputBashCodeExecutionToolResultBlock(BaseModel):
+ """
+ Anthropic compatibility only — SambaNova does not run server-side bash code execution. Never emitted in responses.
+ """
+
+ content: Dict[str, object]
+
+ tool_use_id: str
+
+ type: Literal["bash_code_execution_tool_result"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputTextEditorCodeExecutionToolResultBlock(BaseModel):
+ """
+ Anthropic compatibility only — SambaNova does not run server-side text-editor code execution. Never emitted in responses.
+ """
+
+ content: Dict[str, object]
+
+ tool_use_id: str
+
+ type: Literal["text_editor_code_execution_tool_result"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputToolSearchToolResultBlock(BaseModel):
+ """Anthropic compatibility only — SambaNova does not run server-side `tool_search`.
+
+ Never emitted in responses.
+ """
+
+ content: Dict[str, object]
+
+ tool_use_id: str
+
+ type: Literal["tool_search_tool_result"]
+
+
+class MessageContentBlockStartEventContentBlockMessageOutputContainerUploadBlock(BaseModel):
+ """
+ Anthropic compatibility only — SambaNova does not produce container_upload blocks (these come from Anthropic's server-side `code_execution` tool). Never emitted in responses.
+ """
+
+ file_id: str
+
+ type: Literal["container_upload"]
+
+
+MessageContentBlockStartEventContentBlock: TypeAlias = Annotated[
+ Union[
+ MessageContentBlockStartEventContentBlockMessageOutputTextBlock,
+ MessageContentBlockStartEventContentBlockMessageOutputToolUseBlock,
+ MessageContentBlockStartEventContentBlockMessageOutputThinkingBlock,
+ MessageContentBlockStartEventContentBlockMessageOutputRedactedThinkingBlock,
+ MessageContentBlockStartEventContentBlockMessageOutputServerToolUseBlock,
+ MessageContentBlockStartEventContentBlockMessageOutputWebSearchToolResultBlock,
+ MessageContentBlockStartEventContentBlockMessageOutputWebFetchToolResultBlock,
+ MessageContentBlockStartEventContentBlockMessageOutputCodeExecutionToolResultBlock,
+ MessageContentBlockStartEventContentBlockMessageOutputBashCodeExecutionToolResultBlock,
+ MessageContentBlockStartEventContentBlockMessageOutputTextEditorCodeExecutionToolResultBlock,
+ MessageContentBlockStartEventContentBlockMessageOutputToolSearchToolResultBlock,
+ MessageContentBlockStartEventContentBlockMessageOutputContainerUploadBlock,
+ ],
+ PropertyInfo(discriminator="type"),
+]
+
+
+class MessageContentBlockStartEvent(BaseModel):
+ """Opens a new content block. One per block in `content[]`."""
+
+ content_block: MessageContentBlockStartEventContentBlock
+ """Typed content block in the model's response."""
+
+ index: int
+ """Zero-based index of the block within `content[]`."""
+
+ type: Literal["content_block_start"]
+
+
+class MessageContentBlockDeltaEventDeltaMessageContentBlockTextDelta(BaseModel):
+ """Incremental text chunk for an open text content block."""
+
+ text: str
+
+ type: Literal["text_delta"]
+
+
+class MessageContentBlockDeltaEventDeltaMessageContentBlockInputJsonDelta(BaseModel):
+ """Incremental fragment of a tool_use block's `input` JSON.
+
+ Concatenate successive `partial_json` strings to reconstruct the full input object.
+ """
+
+ partial_json: str
+
+ type: Literal["input_json_delta"]
+
+
+class MessageContentBlockDeltaEventDeltaMessageContentBlockThinkingDelta(BaseModel):
+ """Incremental thinking chunk for an open thinking block.
+
+ Emitted by reasoning models.
+ """
+
+ thinking: str
+
+ type: Literal["thinking_delta"]
+
+
+class MessageContentBlockDeltaEventDeltaMessageContentBlockSignatureDelta(BaseModel):
+ """Signature for an open thinking block.
+
+ Emitted at the end of a thinking stream (paired with the closing `content_block_stop`); the `signature` value may be an empty string when the backend has no signed payload to attach.
+ """
+
+ signature: str
+
+ type: Literal["signature_delta"]
+
+
+MessageContentBlockDeltaEventDelta: TypeAlias = Annotated[
+ Union[
+ MessageContentBlockDeltaEventDeltaMessageContentBlockTextDelta,
+ MessageContentBlockDeltaEventDeltaMessageContentBlockInputJsonDelta,
+ MessageContentBlockDeltaEventDeltaMessageContentBlockThinkingDelta,
+ MessageContentBlockDeltaEventDeltaMessageContentBlockSignatureDelta,
+ ],
+ PropertyInfo(discriminator="type"),
+]
+
+
+class MessageContentBlockDeltaEvent(BaseModel):
+ """Incremental update to the currently open content block."""
+
+ delta: MessageContentBlockDeltaEventDelta
+ """Incremental update to an open content block."""
+
+ index: int
+ """Zero-based index of the block within `content[]`."""
+
+ type: Literal["content_block_delta"]
+
+
+class MessageContentBlockStopEvent(BaseModel):
+ """Closes the current content block."""
+
+ index: int
+ """Zero-based index of the block within `content[]`."""
+
+ type: Literal["content_block_stop"]
+
+
+class MessageDeltaEventDeltaStopDetails(BaseModel):
+ """Refusal stop details.
+
+ Anthropic compatibility only — `refusal` is never emitted as a stop_reason by SambaNova (content filtering is not exposed at the API layer).
+ """
+
+ type: Literal["refusal"]
+
+ category: Optional[Literal["cyber", "bio"]] = None
+
+
+class MessageDeltaEventDelta(BaseModel):
+ stop_reason: Literal[
+ "end_turn", "max_tokens", "tool_use", "pause_turn", "refusal", "stop_sequence", "model_context_window_exceeded"
+ ]
+ """Reason the model stopped generating.
+
+ SambaNova emits `end_turn`, `max_tokens`, `tool_use`, and `stop_sequence`. The
+ remaining values are defined for Anthropic SDK type-parity but never returned:
+ `pause_turn` (server-tool loop limit, not produced); `refusal` (content filter,
+ not exposed); `model_context_window_exceeded` (folded to `max_tokens`).
+ """
+
+ stop_details: Optional[MessageDeltaEventDeltaStopDetails] = None
+ """Refusal stop details.
+
+ Anthropic compatibility only — `refusal` is never emitted as a stop_reason by
+ SambaNova (content filtering is not exposed at the API layer).
+ """
+
+ stop_sequence: Optional[str] = None
+ """Custom stop sequence that triggered termination.
+
+ Field is emitted but value is always `null` in v1 (backend collapses
+ `StopSequenceHit` and `EndOfText` into the same finish_reason).
+ """
+
+
+class MessageDeltaEventUsage(BaseModel):
+ """
+ Final token accounting emitted in the closing `message_delta` event of a stream.
+ """
+
+ output_tokens: int
+ """Total tokens generated (final count)."""
+
+ cache_creation_input_tokens: Optional[int] = None
+ """Tokens written to prompt cache. Absent in v1;"""
+
+ cache_read_input_tokens: Optional[int] = None
+ """Tokens read from prompt cache. Absent in v1;"""
+
+ input_tokens: Optional[int] = None
+ """Total tokens in the prompt (echoed from `message_start`)."""
+
+ server_tool_use: Optional[Dict[str, object]] = None
+ """Server-tool usage metrics.
+
+ Anthropic compatibility only — SambaNova does not run server tools, so this
+ field is never emitted.
+ """
+
+
+class MessageDeltaEvent(BaseModel):
+ """Penultimate event of the stream.
+
+ Carries the final `stop_reason`, optional `stop_sequence`, and final usage counts.
+ """
+
+ delta: MessageDeltaEventDelta
+
+ type: Literal["message_delta"]
+
+ usage: MessageDeltaEventUsage
+ """
+ Final token accounting emitted in the closing `message_delta` event of a stream.
+ """
+
+
+class MessageStopEvent(BaseModel):
+ """Final event of the stream. No fields beyond `type`."""
+
+ type: Literal["message_stop"]
+
+
+class MessagePingEvent(BaseModel):
+ """Keepalive heartbeat. May appear at any point in the stream."""
+
+ type: Literal["ping"]
+
+
+class MessageStreamErrorEventError(BaseModel):
+ """Inner error object carried inside a `MessageErrorResponse`.
+
+ The `type` value follows Anthropic's published error taxonomy.
+ """
+
+ message: str
+ """Human-readable explanation of the error."""
+
+ type: Literal[
+ "invalid_request_error",
+ "authentication_error",
+ "permission_error",
+ "not_found_error",
+ "request_too_large",
+ "rate_limit_error",
+ "api_error",
+ "overloaded_error",
+ "not_implemented_error",
+ ]
+ """Error category. Values follow Anthropic's taxonomy."""
+
+
+class MessageStreamErrorEvent(BaseModel):
+ """Streamed error envelope. Terminates the stream."""
+
+ error: MessageStreamErrorEventError
+ """Inner error object carried inside a `MessageErrorResponse`.
+
+ The `type` value follows Anthropic's published error taxonomy.
+ """
+
+ type: Literal["error"]
+
+
+MessageStreamEvent: TypeAlias = Annotated[
+ Union[
+ MessageStartEvent,
+ MessageContentBlockStartEvent,
+ MessageContentBlockDeltaEvent,
+ MessageContentBlockStopEvent,
+ MessageDeltaEvent,
+ MessageStopEvent,
+ MessagePingEvent,
+ MessageStreamErrorEvent,
+ ],
+ PropertyInfo(discriminator="type"),
+]
diff --git a/tests/api_resources/test_messages.py b/tests/api_resources/test_messages.py
new file mode 100644
index 0000000..2f34086
--- /dev/null
+++ b/tests/api_resources/test_messages.py
@@ -0,0 +1,595 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from sambanova import SambaNova, AsyncSambaNova
+from tests.utils import assert_matches_type
+from sambanova.types import (
+ MessageCreateResponse,
+ MessageCountTokensResponse,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestMessages:
+ parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @parametrize
+ def test_method_create_overload_1(self, client: SambaNova) -> None:
+ message = client.messages.create(
+ max_tokens=1024,
+ messages=[
+ {
+ "content": "Hello, Claude!",
+ "role": "user",
+ }
+ ],
+ model="DeepSeek-V3.1",
+ )
+ assert_matches_type(MessageCreateResponse, message, path=["response"])
+
+ @parametrize
+ def test_method_create_with_all_params_overload_1(self, client: SambaNova) -> None:
+ message = client.messages.create(
+ max_tokens=1024,
+ messages=[
+ {
+ "content": "Hello, Claude!",
+ "role": "user",
+ }
+ ],
+ model="DeepSeek-V3.1",
+ container="container",
+ metadata={"user_id": "user_id"},
+ service_tier="auto",
+ stop_sequences=["string"],
+ stream=False,
+ system="string",
+ temperature=1,
+ thinking={"type": "disabled"},
+ tool_choice={
+ "type": "auto",
+ "disable_parallel_tool_use": True,
+ },
+ tools=[
+ {
+ "name": "name",
+ "allowed_callers": ["string"],
+ "cache_control": {
+ "type": "ephemeral",
+ "ttl": "ttl",
+ },
+ "defer_loading": True,
+ "description": "description",
+ "eager_input_streaming": True,
+ "input_examples": [{"foo": "bar"}],
+ "input_schema": {"foo": "bar"},
+ "strict": True,
+ "type": "custom",
+ }
+ ],
+ top_k=0,
+ top_p=0,
+ anthropic_version="2023-06-01",
+ )
+ assert_matches_type(MessageCreateResponse, message, path=["response"])
+
+ @parametrize
+ def test_raw_response_create_overload_1(self, client: SambaNova) -> None:
+ response = client.messages.with_raw_response.create(
+ max_tokens=1024,
+ messages=[
+ {
+ "content": "Hello, Claude!",
+ "role": "user",
+ }
+ ],
+ model="DeepSeek-V3.1",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ message = response.parse()
+ assert_matches_type(MessageCreateResponse, message, path=["response"])
+
+ @parametrize
+ def test_streaming_response_create_overload_1(self, client: SambaNova) -> None:
+ with client.messages.with_streaming_response.create(
+ max_tokens=1024,
+ messages=[
+ {
+ "content": "Hello, Claude!",
+ "role": "user",
+ }
+ ],
+ model="DeepSeek-V3.1",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ message = response.parse()
+ assert_matches_type(MessageCreateResponse, message, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_method_create_overload_2(self, client: SambaNova) -> None:
+ message_stream = client.messages.create(
+ max_tokens=1024,
+ messages=[
+ {
+ "content": "Hello, Claude!",
+ "role": "user",
+ }
+ ],
+ model="DeepSeek-V3.1",
+ stream=True,
+ )
+ message_stream.response.close()
+
+ @parametrize
+ def test_method_create_with_all_params_overload_2(self, client: SambaNova) -> None:
+ message_stream = client.messages.create(
+ max_tokens=1024,
+ messages=[
+ {
+ "content": "Hello, Claude!",
+ "role": "user",
+ }
+ ],
+ model="DeepSeek-V3.1",
+ stream=True,
+ container="container",
+ metadata={"user_id": "user_id"},
+ service_tier="auto",
+ stop_sequences=["string"],
+ system="string",
+ temperature=1,
+ thinking={"type": "disabled"},
+ tool_choice={
+ "type": "auto",
+ "disable_parallel_tool_use": True,
+ },
+ tools=[
+ {
+ "name": "name",
+ "allowed_callers": ["string"],
+ "cache_control": {
+ "type": "ephemeral",
+ "ttl": "ttl",
+ },
+ "defer_loading": True,
+ "description": "description",
+ "eager_input_streaming": True,
+ "input_examples": [{"foo": "bar"}],
+ "input_schema": {"foo": "bar"},
+ "strict": True,
+ "type": "custom",
+ }
+ ],
+ top_k=0,
+ top_p=0,
+ anthropic_version="2023-06-01",
+ )
+ message_stream.response.close()
+
+ @parametrize
+ def test_raw_response_create_overload_2(self, client: SambaNova) -> None:
+ response = client.messages.with_raw_response.create(
+ max_tokens=1024,
+ messages=[
+ {
+ "content": "Hello, Claude!",
+ "role": "user",
+ }
+ ],
+ model="DeepSeek-V3.1",
+ stream=True,
+ )
+
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ stream = response.parse()
+ stream.close()
+
+ @parametrize
+ def test_streaming_response_create_overload_2(self, client: SambaNova) -> None:
+ with client.messages.with_streaming_response.create(
+ max_tokens=1024,
+ messages=[
+ {
+ "content": "Hello, Claude!",
+ "role": "user",
+ }
+ ],
+ model="DeepSeek-V3.1",
+ stream=True,
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ stream = response.parse()
+ stream.close()
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_method_count_tokens(self, client: SambaNova) -> None:
+ message = client.messages.count_tokens(
+ messages=[
+ {
+ "content": "Hello, Claude!",
+ "role": "user",
+ }
+ ],
+ model="DeepSeek-V3.1",
+ )
+ assert_matches_type(MessageCountTokensResponse, message, path=["response"])
+
+ @parametrize
+ def test_method_count_tokens_with_all_params(self, client: SambaNova) -> None:
+ message = client.messages.count_tokens(
+ messages=[
+ {
+ "content": "Hello, Claude!",
+ "role": "user",
+ }
+ ],
+ model="DeepSeek-V3.1",
+ system="string",
+ thinking={"type": "disabled"},
+ tool_choice={
+ "type": "auto",
+ "disable_parallel_tool_use": True,
+ },
+ tools=[
+ {
+ "name": "name",
+ "allowed_callers": ["string"],
+ "cache_control": {
+ "type": "ephemeral",
+ "ttl": "ttl",
+ },
+ "defer_loading": True,
+ "description": "description",
+ "eager_input_streaming": True,
+ "input_examples": [{"foo": "bar"}],
+ "input_schema": {"foo": "bar"},
+ "strict": True,
+ "type": "custom",
+ }
+ ],
+ anthropic_version="2023-06-01",
+ )
+ assert_matches_type(MessageCountTokensResponse, message, path=["response"])
+
+ @parametrize
+ def test_raw_response_count_tokens(self, client: SambaNova) -> None:
+ response = client.messages.with_raw_response.count_tokens(
+ messages=[
+ {
+ "content": "Hello, Claude!",
+ "role": "user",
+ }
+ ],
+ model="DeepSeek-V3.1",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ message = response.parse()
+ assert_matches_type(MessageCountTokensResponse, message, path=["response"])
+
+ @parametrize
+ def test_streaming_response_count_tokens(self, client: SambaNova) -> None:
+ with client.messages.with_streaming_response.count_tokens(
+ messages=[
+ {
+ "content": "Hello, Claude!",
+ "role": "user",
+ }
+ ],
+ model="DeepSeek-V3.1",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ message = response.parse()
+ assert_matches_type(MessageCountTokensResponse, message, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncMessages:
+ parametrize = pytest.mark.parametrize(
+ "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+ )
+
+ @parametrize
+ async def test_method_create_overload_1(self, async_client: AsyncSambaNova) -> None:
+ message = await async_client.messages.create(
+ max_tokens=1024,
+ messages=[
+ {
+ "content": "Hello, Claude!",
+ "role": "user",
+ }
+ ],
+ model="DeepSeek-V3.1",
+ )
+ assert_matches_type(MessageCreateResponse, message, path=["response"])
+
+ @parametrize
+ async def test_method_create_with_all_params_overload_1(self, async_client: AsyncSambaNova) -> None:
+ message = await async_client.messages.create(
+ max_tokens=1024,
+ messages=[
+ {
+ "content": "Hello, Claude!",
+ "role": "user",
+ }
+ ],
+ model="DeepSeek-V3.1",
+ container="container",
+ metadata={"user_id": "user_id"},
+ service_tier="auto",
+ stop_sequences=["string"],
+ stream=False,
+ system="string",
+ temperature=1,
+ thinking={"type": "disabled"},
+ tool_choice={
+ "type": "auto",
+ "disable_parallel_tool_use": True,
+ },
+ tools=[
+ {
+ "name": "name",
+ "allowed_callers": ["string"],
+ "cache_control": {
+ "type": "ephemeral",
+ "ttl": "ttl",
+ },
+ "defer_loading": True,
+ "description": "description",
+ "eager_input_streaming": True,
+ "input_examples": [{"foo": "bar"}],
+ "input_schema": {"foo": "bar"},
+ "strict": True,
+ "type": "custom",
+ }
+ ],
+ top_k=0,
+ top_p=0,
+ anthropic_version="2023-06-01",
+ )
+ assert_matches_type(MessageCreateResponse, message, path=["response"])
+
+ @parametrize
+ async def test_raw_response_create_overload_1(self, async_client: AsyncSambaNova) -> None:
+ response = await async_client.messages.with_raw_response.create(
+ max_tokens=1024,
+ messages=[
+ {
+ "content": "Hello, Claude!",
+ "role": "user",
+ }
+ ],
+ model="DeepSeek-V3.1",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ message = await response.parse()
+ assert_matches_type(MessageCreateResponse, message, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_create_overload_1(self, async_client: AsyncSambaNova) -> None:
+ async with async_client.messages.with_streaming_response.create(
+ max_tokens=1024,
+ messages=[
+ {
+ "content": "Hello, Claude!",
+ "role": "user",
+ }
+ ],
+ model="DeepSeek-V3.1",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ message = await response.parse()
+ assert_matches_type(MessageCreateResponse, message, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_method_create_overload_2(self, async_client: AsyncSambaNova) -> None:
+ message_stream = await async_client.messages.create(
+ max_tokens=1024,
+ messages=[
+ {
+ "content": "Hello, Claude!",
+ "role": "user",
+ }
+ ],
+ model="DeepSeek-V3.1",
+ stream=True,
+ )
+ await message_stream.response.aclose()
+
+ @parametrize
+ async def test_method_create_with_all_params_overload_2(self, async_client: AsyncSambaNova) -> None:
+ message_stream = await async_client.messages.create(
+ max_tokens=1024,
+ messages=[
+ {
+ "content": "Hello, Claude!",
+ "role": "user",
+ }
+ ],
+ model="DeepSeek-V3.1",
+ stream=True,
+ container="container",
+ metadata={"user_id": "user_id"},
+ service_tier="auto",
+ stop_sequences=["string"],
+ system="string",
+ temperature=1,
+ thinking={"type": "disabled"},
+ tool_choice={
+ "type": "auto",
+ "disable_parallel_tool_use": True,
+ },
+ tools=[
+ {
+ "name": "name",
+ "allowed_callers": ["string"],
+ "cache_control": {
+ "type": "ephemeral",
+ "ttl": "ttl",
+ },
+ "defer_loading": True,
+ "description": "description",
+ "eager_input_streaming": True,
+ "input_examples": [{"foo": "bar"}],
+ "input_schema": {"foo": "bar"},
+ "strict": True,
+ "type": "custom",
+ }
+ ],
+ top_k=0,
+ top_p=0,
+ anthropic_version="2023-06-01",
+ )
+ await message_stream.response.aclose()
+
+ @parametrize
+ async def test_raw_response_create_overload_2(self, async_client: AsyncSambaNova) -> None:
+ response = await async_client.messages.with_raw_response.create(
+ max_tokens=1024,
+ messages=[
+ {
+ "content": "Hello, Claude!",
+ "role": "user",
+ }
+ ],
+ model="DeepSeek-V3.1",
+ stream=True,
+ )
+
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ stream = await response.parse()
+ await stream.close()
+
+ @parametrize
+ async def test_streaming_response_create_overload_2(self, async_client: AsyncSambaNova) -> None:
+ async with async_client.messages.with_streaming_response.create(
+ max_tokens=1024,
+ messages=[
+ {
+ "content": "Hello, Claude!",
+ "role": "user",
+ }
+ ],
+ model="DeepSeek-V3.1",
+ stream=True,
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ stream = await response.parse()
+ await stream.close()
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_method_count_tokens(self, async_client: AsyncSambaNova) -> None:
+ message = await async_client.messages.count_tokens(
+ messages=[
+ {
+ "content": "Hello, Claude!",
+ "role": "user",
+ }
+ ],
+ model="DeepSeek-V3.1",
+ )
+ assert_matches_type(MessageCountTokensResponse, message, path=["response"])
+
+ @parametrize
+ async def test_method_count_tokens_with_all_params(self, async_client: AsyncSambaNova) -> None:
+ message = await async_client.messages.count_tokens(
+ messages=[
+ {
+ "content": "Hello, Claude!",
+ "role": "user",
+ }
+ ],
+ model="DeepSeek-V3.1",
+ system="string",
+ thinking={"type": "disabled"},
+ tool_choice={
+ "type": "auto",
+ "disable_parallel_tool_use": True,
+ },
+ tools=[
+ {
+ "name": "name",
+ "allowed_callers": ["string"],
+ "cache_control": {
+ "type": "ephemeral",
+ "ttl": "ttl",
+ },
+ "defer_loading": True,
+ "description": "description",
+ "eager_input_streaming": True,
+ "input_examples": [{"foo": "bar"}],
+ "input_schema": {"foo": "bar"},
+ "strict": True,
+ "type": "custom",
+ }
+ ],
+ anthropic_version="2023-06-01",
+ )
+ assert_matches_type(MessageCountTokensResponse, message, path=["response"])
+
+ @parametrize
+ async def test_raw_response_count_tokens(self, async_client: AsyncSambaNova) -> None:
+ response = await async_client.messages.with_raw_response.count_tokens(
+ messages=[
+ {
+ "content": "Hello, Claude!",
+ "role": "user",
+ }
+ ],
+ model="DeepSeek-V3.1",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ message = await response.parse()
+ assert_matches_type(MessageCountTokensResponse, message, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_count_tokens(self, async_client: AsyncSambaNova) -> None:
+ async with async_client.messages.with_streaming_response.count_tokens(
+ messages=[
+ {
+ "content": "Hello, Claude!",
+ "role": "user",
+ }
+ ],
+ model="DeepSeek-V3.1",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ message = await response.parse()
+ assert_matches_type(MessageCountTokensResponse, message, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
From 546fc59615a07e268930611150b2a91ec0073c74 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
<142633134+stainless-app[bot]@users.noreply.github.com>
Date: Tue, 26 May 2026 21:54:31 +0000
Subject: [PATCH 5/5] release: 1.9.0
---
.release-please-manifest.json | 2 +-
CHANGELOG.md | 14 ++++++++++++++
pyproject.toml | 2 +-
src/sambanova/_version.py | 2 +-
4 files changed, 17 insertions(+), 3 deletions(-)
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 24d5689..4fcfdf7 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
{
- ".": "1.8.2"
+ ".": "1.9.0"
}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9523365..3b6a1db 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,19 @@
# Changelog
+## 1.9.0 (2026-05-26)
+
+Full Changelog: [v1.8.2...v1.9.0](https://github.com/sambanova/sambanova-python/compare/v1.8.2...v1.9.0)
+
+### Features
+
+* **api:** add anthropic compatible messages api support ([0ddfd33](https://github.com/sambanova/sambanova-python/commit/0ddfd334decc86831f840cd8c0de2c3a73e378a2))
+* **internal/types:** support eagerly validating pydantic iterators ([cc4aade](https://github.com/sambanova/sambanova-python/commit/cc4aade45c484980fafacf2e315ca6f1564e0ace))
+
+
+### Bug Fixes
+
+* **client:** add missing f-string prefix in file type error message ([c6b84e8](https://github.com/sambanova/sambanova-python/commit/c6b84e853be657a330e6754894911bdaac07dc67))
+
## 1.8.2 (2026-05-07)
Full Changelog: [v1.8.1...v1.8.2](https://github.com/sambanova/sambanova-python/compare/v1.8.1...v1.8.2)
diff --git a/pyproject.toml b/pyproject.toml
index 28f88b6..23fbe75 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "sambanova"
-version = "1.8.2"
+version = "1.9.0"
description = "The official Python library for the SambaNova API"
dynamic = ["readme"]
license = "Apache-2.0"
diff --git a/src/sambanova/_version.py b/src/sambanova/_version.py
index d3cc5ec..48fad55 100644
--- a/src/sambanova/_version.py
+++ b/src/sambanova/_version.py
@@ -1,4 +1,4 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
__title__ = "sambanova"
-__version__ = "1.8.2" # x-release-please-version
+__version__ = "1.9.0" # x-release-please-version