From c6b84e853be657a330e6754894911bdaac07dc67 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Fri, 8 May 2026 16:32:53 +0000 Subject: [PATCH 1/5] fix(client): add missing f-string prefix in file type error message --- src/sambanova/_files.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sambanova/_files.py b/src/sambanova/_files.py index 6071d1e..6877ecb 100644 --- a/src/sambanova/_files.py +++ b/src/sambanova/_files.py @@ -99,7 +99,7 @@ async def async_to_httpx_files(files: RequestFiles | None) -> HttpxRequestFiles elif is_sequence_t(files): files = [(key, await _async_transform_file(file)) for key, file in files] else: - raise TypeError("Unexpected file type input {type(files)}, expected mapping or sequence") + raise TypeError(f"Unexpected file type input {type(files)}, expected mapping or sequence") return files From cc4aade45c484980fafacf2e315ca6f1564e0ace Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Mon, 11 May 2026 17:44:21 +0000 Subject: [PATCH 2/5] feat(internal/types): support eagerly validating pydantic iterators --- src/sambanova/_models.py | 80 ++++++++++++++++++++++++++++++++++++++++ tests/test_models.py | 60 ++++++++++++++++++++++++++++-- 2 files changed, 137 insertions(+), 3 deletions(-) diff --git a/src/sambanova/_models.py b/src/sambanova/_models.py index 29070e0..8c5ab26 100644 --- a/src/sambanova/_models.py +++ b/src/sambanova/_models.py @@ -25,7 +25,9 @@ ClassVar, Protocol, Required, + Annotated, ParamSpec, + TypeAlias, TypedDict, TypeGuard, final, @@ -79,7 +81,15 @@ from ._constants import RAW_RESPONSE_HEADER if TYPE_CHECKING: + from pydantic import GetCoreSchemaHandler, ValidatorFunctionWrapHandler + from pydantic_core import CoreSchema, core_schema from pydantic_core.core_schema import ModelField, ModelSchema, LiteralSchema, ModelFieldsSchema +else: + try: + from pydantic_core import CoreSchema, core_schema + except ImportError: + CoreSchema = None + core_schema = None __all__ = ["BaseModel", "GenericModel"] @@ -396,6 +406,76 @@ def model_dump_json( ) +class _EagerIterable(list[_T], Generic[_T]): + """ + Accepts any Iterable[T] input (including generators), consumes it + eagerly, and validates all items upfront. + + Validation preserves the original container type where possible + (e.g. a set[T] stays a set[T]). Serialization (model_dump / JSON) + always emits a list — round-tripping through model_dump() will not + restore the original container type. + """ + + @classmethod + def __get_pydantic_core_schema__( + cls, + source_type: Any, + handler: GetCoreSchemaHandler, + ) -> CoreSchema: + (item_type,) = get_args(source_type) or (Any,) + item_schema: CoreSchema = handler.generate_schema(item_type) + list_of_items_schema: CoreSchema = core_schema.list_schema(item_schema) + + return core_schema.no_info_wrap_validator_function( + cls._validate, + list_of_items_schema, + serialization=core_schema.plain_serializer_function_ser_schema( + cls._serialize, + info_arg=False, + ), + ) + + @staticmethod + def _validate(v: Iterable[_T], handler: "ValidatorFunctionWrapHandler") -> Any: + original_type: type[Any] = type(v) + + # Normalize to list so list_schema can validate each item + if isinstance(v, list): + items: list[_T] = v + else: + try: + items = list(v) + except TypeError as e: + raise TypeError("Value is not iterable") from e + + # Validate items against the inner schema + validated: list[_T] = handler(items) + + # Reconstruct original container type + if original_type is list: + return validated + # str(list) produces the list's repr, not a string built from items, + # so skip reconstruction for str and its subclasses. + if issubclass(original_type, str): + return validated + try: + return original_type(validated) + except (TypeError, ValueError): + # If the type cannot be reconstructed, just return the validated list + return validated + + @staticmethod + def _serialize(v: Iterable[_T]) -> list[_T]: + """Always serialize as a list so Pydantic's JSON encoder is happy.""" + if isinstance(v, list): + return v + return list(v) + + +EagerIterable: TypeAlias = Annotated[Iterable[_T], _EagerIterable] + + def _construct_field(value: object, field: FieldInfo, key: str) -> object: if value is None: return field_get_default(field) diff --git a/tests/test_models.py b/tests/test_models.py index d559b5c..eda8fb8 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,7 +1,8 @@ import json -from typing import TYPE_CHECKING, Any, Dict, List, Union, Optional, cast +from typing import TYPE_CHECKING, Any, Dict, List, Union, Iterable, Optional, cast from datetime import datetime, timezone -from typing_extensions import Literal, Annotated, TypeAliasType +from collections import deque +from typing_extensions import Literal, Annotated, TypedDict, TypeAliasType import pytest import pydantic @@ -9,7 +10,7 @@ from sambanova._utils import PropertyInfo from sambanova._compat import PYDANTIC_V1, parse_obj, model_dump, model_json -from sambanova._models import DISCRIMINATOR_CACHE, BaseModel, construct_type +from sambanova._models import DISCRIMINATOR_CACHE, BaseModel, EagerIterable, construct_type class BasicModel(BaseModel): @@ -961,3 +962,56 @@ def __getattr__(self, attr: str) -> Item: ... assert model.a.prop == 1 assert isinstance(model.a, Item) assert model.other == "foo" + + +# NOTE: Workaround for Pydantic Iterable behavior. +# Iterable fields are replaced with a ValidatorIterator and may be consumed +# during serialization, which can cause subsequent dumps to return empty data. +# See: https://github.com/pydantic/pydantic/issues/9541 +@pytest.mark.parametrize( + "data, expected_validated", + [ + ([1, 2, 3], [1, 2, 3]), + ((1, 2, 3), (1, 2, 3)), + (set([1, 2, 3]), set([1, 2, 3])), + (iter([1, 2, 3]), [1, 2, 3]), + ([], []), + ((x for x in [1, 2, 3]), [1, 2, 3]), + (map(lambda x: x, [1, 2, 3]), [1, 2, 3]), + (frozenset([1, 2, 3]), frozenset([1, 2, 3])), + (deque([1, 2, 3]), deque([1, 2, 3])), + ], + ids=["list", "tuple", "set", "iterator", "empty", "generator", "map", "frozenset", "deque"], +) +@pytest.mark.skipif(PYDANTIC_V1, reason="this is only supported in pydantic v2") +def test_iterable_construction(data: Iterable[int], expected_validated: Iterable[int]) -> None: + class TypeWithIterable(TypedDict): + items: EagerIterable[int] + + class Model(BaseModel): + data: TypeWithIterable + + m = Model.model_validate({"data": {"items": data}}) + assert m.data["items"] == expected_validated + + # Verify repeated dumps don't lose data (the original bug) + assert m.model_dump()["data"]["items"] == list(expected_validated) + assert m.model_dump()["data"]["items"] == list(expected_validated) + + +@pytest.mark.skipif(PYDANTIC_V1, reason="this is only supported in pydantic v2") +def test_iterable_construction_str_falls_back_to_list() -> None: + # str is iterable (over chars), but str(list_of_chars) produces the list's repr + # rather than reconstructing a string from items. We special-case str to fall + # back to list instead of attempting reconstruction. + class TypeWithIterable(TypedDict): + items: EagerIterable[str] + + class Model(BaseModel): + data: TypeWithIterable + + m = Model.model_validate({"data": {"items": "hello"}}) + + # falls back to list of chars rather than calling str(["h", "e", "l", "l", "o"]) + assert m.data["items"] == ["h", "e", "l", "l", "o"] + assert m.model_dump()["data"]["items"] == ["h", "e", "l", "l", "o"] From 4b1c00a59dc5d6bf8e04812dbcf008b1a775f46b Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Tue, 12 May 2026 19:12:28 +0000 Subject: [PATCH 3/5] ci: pin GitHub Actions to commit SHAs Pin all GitHub Actions referenced in generated workflows (both first-party `actions/*` and third-party) to immutable commit SHAs. Updating pinned actions is now a deliberate codegen-side bump rather than implicit on every workflow run. --- .github/workflows/ci.yml | 14 +++++++------- .github/workflows/publish-pypi.yml | 4 ++-- .github/workflows/release-doctor.yml | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0477bdc..f3eb789 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,10 +21,10 @@ jobs: runs-on: ${{ github.repository == 'stainless-sdks/sambanova-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }} if: (github.event_name == 'push' || github.event.pull_request.head.repo.fork) && (github.event_name != 'push' || github.event.head_commit.message != 'codegen metadata') steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Install uv - uses: astral-sh/setup-uv@v5 + uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5.4.2 with: version: '0.10.2' @@ -43,10 +43,10 @@ jobs: id-token: write runs-on: ${{ github.repository == 'stainless-sdks/sambanova-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }} steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Install uv - uses: astral-sh/setup-uv@v5 + uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5.4.2 with: version: '0.10.2' @@ -61,7 +61,7 @@ jobs: github.repository == 'stainless-sdks/sambanova-python' && !startsWith(github.ref, 'refs/heads/stl/') id: github-oidc - uses: actions/github-script@v8 + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 with: script: core.setOutput('github_token', await core.getIDToken()); @@ -81,10 +81,10 @@ jobs: runs-on: ${{ github.repository == 'stainless-sdks/sambanova-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }} if: github.event_name == 'push' || github.event.pull_request.head.repo.fork steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Install uv - uses: astral-sh/setup-uv@v5 + uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5.4.2 with: version: '0.10.2' diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml index 3f73e27..884b664 100644 --- a/.github/workflows/publish-pypi.yml +++ b/.github/workflows/publish-pypi.yml @@ -17,10 +17,10 @@ jobs: id-token: write steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Install uv - uses: astral-sh/setup-uv@v5 + uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5.4.2 with: version: '0.9.13' diff --git a/.github/workflows/release-doctor.yml b/.github/workflows/release-doctor.yml index 45a764d..1696088 100644 --- a/.github/workflows/release-doctor.yml +++ b/.github/workflows/release-doctor.yml @@ -12,7 +12,7 @@ jobs: if: github.repository == 'sambanova/sambanova-python' && (github.event_name == 'push' || github.event_name == 'workflow_dispatch' || startsWith(github.head_ref, 'release-please') || github.head_ref == 'next') steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Check release environment run: | From 0ddfd334decc86831f840cd8c0de2c3a73e378a2 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Tue, 26 May 2026 21:54:09 +0000 Subject: [PATCH 4/5] feat(api): add anthropic compatible messages api support --- .stats.yml | 8 +- api.md | 19 + src/sambanova/_client.py | 79 +- src/sambanova/resources/__init__.py | 14 + src/sambanova/resources/messages.py | 1197 +++++++++++++++++ src/sambanova/types/__init__.py | 6 + src/sambanova/types/message.py | 328 +++++ .../types/message_count_tokens_params.py | 979 ++++++++++++++ .../types/message_count_tokens_response.py | 12 + src/sambanova/types/message_create_params.py | 1095 +++++++++++++++ .../types/message_create_response.py | 449 +++++++ src/sambanova/types/message_stream_event.py | 451 +++++++ tests/api_resources/test_messages.py | 595 ++++++++ 13 files changed, 5227 insertions(+), 5 deletions(-) create mode 100644 src/sambanova/resources/messages.py create mode 100644 src/sambanova/types/message.py create mode 100644 src/sambanova/types/message_count_tokens_params.py create mode 100644 src/sambanova/types/message_count_tokens_response.py create mode 100644 src/sambanova/types/message_create_params.py create mode 100644 src/sambanova/types/message_create_response.py create mode 100644 src/sambanova/types/message_stream_event.py create mode 100644 tests/api_resources/test_messages.py diff --git a/.stats.yml b/.stats.yml index 8386d48..29d23f7 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ -configured_endpoints: 8 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/sambanova/sambanova-5884297580e5423cf40bd59057ea55da0384fe34f431a80cac0eece6176c6057.yml -openapi_spec_hash: 9306c1d75784a840a2973024fa94d22d -config_hash: 315596f19f192be2b7bf343664a7eb90 +configured_endpoints: 10 +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/sambanova/sambanova-f9a2632e2ea9632e8b40258f57bfa8b529b926d72fd8b1f9550848fbb880e0de.yml +openapi_spec_hash: 8df9e2ad31769c26c590dacf3517bc36 +config_hash: d33fc68f92caf09c6b3b40675a111114 diff --git a/api.md b/api.md index 2678e70..41de6f3 100644 --- a/api.md +++ b/api.md @@ -88,6 +88,25 @@ Methods: - client.responses.create(\*\*params) -> ResponseCreateResponse +# Messages + +Types: + +```python +from sambanova.types import ( + Message, + MessageCountTokensResponse, + MessageErrorResponse, + MessageStreamEvent, + MessageCreateResponse, +) +``` + +Methods: + +- client.messages.create(\*\*params) -> MessageCreateResponse +- client.messages.count_tokens(\*\*params) -> MessageCountTokensResponse + # Models Types: diff --git a/src/sambanova/_client.py b/src/sambanova/_client.py index 5b044c0..08b45e4 100644 --- a/src/sambanova/_client.py +++ b/src/sambanova/_client.py @@ -35,8 +35,9 @@ ) if TYPE_CHECKING: - from .resources import chat, audio, models, responses, embeddings, completions + from .resources import chat, audio, models, messages, responses, embeddings, completions from .resources.models import ModelsResource, AsyncModelsResource + from .resources.messages import MessagesResource, AsyncMessagesResource from .resources.chat.chat import ChatResource, AsyncChatResource from .resources.responses import ResponsesResource, AsyncResponsesResource from .resources.embeddings import EmbeddingsResource, AsyncEmbeddingsResource @@ -58,12 +59,14 @@ class SambaNova(SyncAPIClient): # client options api_key: str + x_api_key: str | None integration_source: str | None def __init__( self, *, api_key: str | None = None, + x_api_key: str | None = None, integration_source: str | None = None, base_url: str | httpx.URL | None = None, timeout: float | Timeout | None | NotGiven = not_given, @@ -88,6 +91,7 @@ def __init__( This automatically infers the following arguments from their corresponding environment variables if they are not provided: - `api_key` from `SAMBANOVA_API_KEY` + - `x_api_key` from `SAMBANOVA_API_KEY` - `integration_source` from `SAMBANOVA_INTEGRATION_SOURCE` """ if api_key is None: @@ -98,6 +102,10 @@ def __init__( ) self.api_key = api_key + if x_api_key is None: + x_api_key = os.environ.get("SAMBANOVA_API_KEY") + self.x_api_key = x_api_key + if integration_source is None: integration_source = os.environ.get("SAMBANOVA_INTEGRATION_SOURCE") self.integration_source = integration_source @@ -159,6 +167,12 @@ def responses(self) -> ResponsesResource: return ResponsesResource(self) + @cached_property + def messages(self) -> MessagesResource: + from .resources.messages import MessagesResource + + return MessagesResource(self) + @cached_property def models(self) -> ModelsResource: from .resources.models import ModelsResource @@ -181,9 +195,20 @@ def qs(self) -> Querystring: @property @override def auth_headers(self) -> dict[str, str]: + return {**self._api_key, **self._x_api_key} + + @property + def _api_key(self) -> dict[str, str]: api_key = self.api_key return {"Authorization": f"Bearer {api_key}"} + @property + def _x_api_key(self) -> dict[str, str]: + x_api_key = self.x_api_key + if x_api_key is None: + return {} + return {"x-api-key": x_api_key} + @property @override def default_headers(self) -> dict[str, str | Omit]: @@ -198,6 +223,7 @@ def copy( self, *, api_key: str | None = None, + x_api_key: str | None = None, integration_source: str | None = None, base_url: str | httpx.URL | None = None, timeout: float | Timeout | None | NotGiven = not_given, @@ -233,6 +259,7 @@ def copy( http_client = http_client or self._client return self.__class__( api_key=api_key or self.api_key, + x_api_key=x_api_key or self.x_api_key, integration_source=integration_source or self.integration_source, base_url=base_url or self.base_url, timeout=self.timeout if isinstance(timeout, NotGiven) else timeout, @@ -284,12 +311,14 @@ def _make_status_error( class AsyncSambaNova(AsyncAPIClient): # client options api_key: str + x_api_key: str | None integration_source: str | None def __init__( self, *, api_key: str | None = None, + x_api_key: str | None = None, integration_source: str | None = None, base_url: str | httpx.URL | None = None, timeout: float | Timeout | None | NotGiven = not_given, @@ -314,6 +343,7 @@ def __init__( This automatically infers the following arguments from their corresponding environment variables if they are not provided: - `api_key` from `SAMBANOVA_API_KEY` + - `x_api_key` from `SAMBANOVA_API_KEY` - `integration_source` from `SAMBANOVA_INTEGRATION_SOURCE` """ if api_key is None: @@ -324,6 +354,10 @@ def __init__( ) self.api_key = api_key + if x_api_key is None: + x_api_key = os.environ.get("SAMBANOVA_API_KEY") + self.x_api_key = x_api_key + if integration_source is None: integration_source = os.environ.get("SAMBANOVA_INTEGRATION_SOURCE") self.integration_source = integration_source @@ -385,6 +419,12 @@ def responses(self) -> AsyncResponsesResource: return AsyncResponsesResource(self) + @cached_property + def messages(self) -> AsyncMessagesResource: + from .resources.messages import AsyncMessagesResource + + return AsyncMessagesResource(self) + @cached_property def models(self) -> AsyncModelsResource: from .resources.models import AsyncModelsResource @@ -407,9 +447,20 @@ def qs(self) -> Querystring: @property @override def auth_headers(self) -> dict[str, str]: + return {**self._api_key, **self._x_api_key} + + @property + def _api_key(self) -> dict[str, str]: api_key = self.api_key return {"Authorization": f"Bearer {api_key}"} + @property + def _x_api_key(self) -> dict[str, str]: + x_api_key = self.x_api_key + if x_api_key is None: + return {} + return {"x-api-key": x_api_key} + @property @override def default_headers(self) -> dict[str, str | Omit]: @@ -424,6 +475,7 @@ def copy( self, *, api_key: str | None = None, + x_api_key: str | None = None, integration_source: str | None = None, base_url: str | httpx.URL | None = None, timeout: float | Timeout | None | NotGiven = not_given, @@ -459,6 +511,7 @@ def copy( http_client = http_client or self._client return self.__class__( api_key=api_key or self.api_key, + x_api_key=x_api_key or self.x_api_key, integration_source=integration_source or self.integration_source, base_url=base_url or self.base_url, timeout=self.timeout if isinstance(timeout, NotGiven) else timeout, @@ -543,6 +596,12 @@ def responses(self) -> responses.ResponsesResourceWithRawResponse: return ResponsesResourceWithRawResponse(self._client.responses) + @cached_property + def messages(self) -> messages.MessagesResourceWithRawResponse: + from .resources.messages import MessagesResourceWithRawResponse + + return MessagesResourceWithRawResponse(self._client.messages) + @cached_property def models(self) -> models.ModelsResourceWithRawResponse: from .resources.models import ModelsResourceWithRawResponse @@ -586,6 +645,12 @@ def responses(self) -> responses.AsyncResponsesResourceWithRawResponse: return AsyncResponsesResourceWithRawResponse(self._client.responses) + @cached_property + def messages(self) -> messages.AsyncMessagesResourceWithRawResponse: + from .resources.messages import AsyncMessagesResourceWithRawResponse + + return AsyncMessagesResourceWithRawResponse(self._client.messages) + @cached_property def models(self) -> models.AsyncModelsResourceWithRawResponse: from .resources.models import AsyncModelsResourceWithRawResponse @@ -629,6 +694,12 @@ def responses(self) -> responses.ResponsesResourceWithStreamingResponse: return ResponsesResourceWithStreamingResponse(self._client.responses) + @cached_property + def messages(self) -> messages.MessagesResourceWithStreamingResponse: + from .resources.messages import MessagesResourceWithStreamingResponse + + return MessagesResourceWithStreamingResponse(self._client.messages) + @cached_property def models(self) -> models.ModelsResourceWithStreamingResponse: from .resources.models import ModelsResourceWithStreamingResponse @@ -672,6 +743,12 @@ def responses(self) -> responses.AsyncResponsesResourceWithStreamingResponse: return AsyncResponsesResourceWithStreamingResponse(self._client.responses) + @cached_property + def messages(self) -> messages.AsyncMessagesResourceWithStreamingResponse: + from .resources.messages import AsyncMessagesResourceWithStreamingResponse + + return AsyncMessagesResourceWithStreamingResponse(self._client.messages) + @cached_property def models(self) -> models.AsyncModelsResourceWithStreamingResponse: from .resources.models import AsyncModelsResourceWithStreamingResponse diff --git a/src/sambanova/resources/__init__.py b/src/sambanova/resources/__init__.py index 03ab754..0cfdcd3 100644 --- a/src/sambanova/resources/__init__.py +++ b/src/sambanova/resources/__init__.py @@ -24,6 +24,14 @@ ModelsResourceWithStreamingResponse, AsyncModelsResourceWithStreamingResponse, ) +from .messages import ( + MessagesResource, + AsyncMessagesResource, + MessagesResourceWithRawResponse, + AsyncMessagesResourceWithRawResponse, + MessagesResourceWithStreamingResponse, + AsyncMessagesResourceWithStreamingResponse, +) from .responses import ( ResponsesResource, AsyncResponsesResource, @@ -80,6 +88,12 @@ "AsyncResponsesResourceWithRawResponse", "ResponsesResourceWithStreamingResponse", "AsyncResponsesResourceWithStreamingResponse", + "MessagesResource", + "AsyncMessagesResource", + "MessagesResourceWithRawResponse", + "AsyncMessagesResourceWithRawResponse", + "MessagesResourceWithStreamingResponse", + "AsyncMessagesResourceWithStreamingResponse", "ModelsResource", "AsyncModelsResource", "ModelsResourceWithRawResponse", diff --git a/src/sambanova/resources/messages.py b/src/sambanova/resources/messages.py new file mode 100644 index 0000000..dfbcf82 --- /dev/null +++ b/src/sambanova/resources/messages.py @@ -0,0 +1,1197 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Any, Union, Iterable, Optional, cast +from typing_extensions import Literal, overload + +import httpx + +from ..types import message_create_params, message_count_tokens_params +from .._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given +from .._utils import required_args, maybe_transform, strip_not_given, async_maybe_transform +from .._compat import cached_property +from .._resource import SyncAPIResource, AsyncAPIResource +from .._response import ( + to_raw_response_wrapper, + to_streamed_response_wrapper, + async_to_raw_response_wrapper, + async_to_streamed_response_wrapper, +) +from .._streaming import Stream, AsyncStream +from .._base_client import make_request_options +from ..types.message_stream_event import MessageStreamEvent +from ..types.message_create_response import MessageCreateResponse +from ..types.message_count_tokens_response import MessageCountTokensResponse + +__all__ = ["MessagesResource", "AsyncMessagesResource"] + + +class MessagesResource(SyncAPIResource): + @cached_property + def with_raw_response(self) -> MessagesResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/sambanova/sambanova-python#accessing-raw-response-data-eg-headers + """ + return MessagesResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> MessagesResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/sambanova/sambanova-python#with_streaming_response + """ + return MessagesResourceWithStreamingResponse(self) + + @overload + def create( + self, + *, + max_tokens: int, + messages: Iterable[message_create_params.Message], + model: Union[ + str, + Literal[ + "Meta-Llama-3.3-70B-Instruct", + "Meta-Llama-3.2-1B-Instruct", + "Meta-Llama-3.2-3B-Instruct", + "Llama-3.2-11B-Vision-Instruct", + "Llama-3.2-90B-Vision-Instruct", + "Meta-Llama-3.1-8B-Instruct", + "Meta-Llama-3.1-70B-Instruct", + "Meta-Llama-3.1-405B-Instruct", + "Qwen2.5-Coder-32B-Instruct", + "Qwen2.5-72B-Instruct", + "QwQ-32B-Preview", + "Meta-Llama-Guard-3-8B", + "DeepSeek-R1", + "DeepSeek-R1-0528", + "DeepSeek-V3-0324", + "DeepSeek-V3.1", + "DeepSeek-V3.1-cb", + "DeepSeek-V3.1-Terminus", + "DeepSeek-V3.2", + "DeepSeek-R1-Distill-Llama-70B", + "Llama-4-Maverick-17B-128E-Instruct", + "Llama-4-Scout-17B-16E-Instruct", + "Qwen3-32B", + "Qwen3-235B", + "Llama-3.3-Swallow-70B-Instruct-v0.4", + "gpt-oss-120b", + "ALLaM-7B-Instruct-preview", + "MiniMax-M2.5", + "MiniMax-M2.7", + "gemma-3-12b-it", + ], + ], + container: Optional[str] | Omit = omit, + metadata: message_create_params.Metadata | Omit = omit, + service_tier: Optional[Literal["auto", "standard_only"]] | Omit = omit, + stop_sequences: Optional[SequenceNotStr[str]] | Omit = omit, + stream: Optional[Literal[False]] | Omit = omit, + system: Union[str, Iterable[message_create_params.SystemSystemTextBlockArray]] | Omit = omit, + temperature: Optional[float] | Omit = omit, + thinking: message_create_params.Thinking | Omit = omit, + tool_choice: Optional[message_create_params.ToolChoice] | Omit = omit, + tools: Optional[Iterable[message_create_params.Tool]] | Omit = omit, + top_k: Optional[int] | Omit = omit, + top_p: Optional[float] | Omit = omit, + anthropic_version: str | Omit = omit, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> MessageCreateResponse: + """Anthropic Messages API compatible endpoint. + + Generates a model response for the + supplied conversation. Authentication accepts either the bearer + `Authorization: Bearer ` header (SambaNova SDK default) or the `x-api-key` + header (Anthropic SDK default); the same API key is used in both cases. When + `stream: true` is set, the response is a sequence of Server-Sent Events whose + payloads conform to `MessageStreamEvent`; otherwise the response is a single + `Message` object. + + Args: + max_tokens: Maximum number of tokens to generate. The combined input + output token count is + bounded by the model's context window. + + messages: Conversation turns. + + model: The model ID to use (e.g. gpt-oss-120b). See available + [models](https://docs.sambanova.ai/docs/en/models/sambacloud-models) + + container: Existing code-execution container ID to reuse. **In v1**: silently dropped + + metadata: Free-form metadata attached to the request. Currently only `user_id` Additional + fields are accepted but ignored. + + service_tier: Service-tier preference. **In v1**: silently dropped + + stop_sequences: Custom strings that, when generated, cause the model to stop. + + stream: If true, the response is a sequence of Server-Sent Events whose payloads conform + to `MessageStreamEvent`. + + system: System prompt for the conversation. Accepts either a single string (most common) + or an array of text blocks (used when individual segments need `cache_control` + markers). Multiple text blocks are joined with newlines and prepended to the + conversation as a `role: system` message. + + temperature: Sampling temperature in `[0.0, 2.0]`. Higher values produce more random output, + lower values more deterministic. Adjust only one of `temperature`, `top_p`, + `top_k`. + + thinking: Controls Anthropic-style extended thinking. **In v1**: only `type:"disabled"` is + silently accepted as a no-op; `type:"enabled"` and `type:"adaptive"` return a + 400 `invalid_request_error` (`unsupported_parameter`). + + tool_choice: How the model should choose from the provided tools. + + tools: Tool definitions the model may call. + + top_k: Top-k sampling. Considers only the K most likely tokens at each step. Set to 0 + to disable. + + top_p: Nucleus sampling. Considers tokens with cumulative probability mass up to + `top_p`. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + def create( + self, + *, + max_tokens: int, + messages: Iterable[message_create_params.Message], + model: Union[ + str, + Literal[ + "Meta-Llama-3.3-70B-Instruct", + "Meta-Llama-3.2-1B-Instruct", + "Meta-Llama-3.2-3B-Instruct", + "Llama-3.2-11B-Vision-Instruct", + "Llama-3.2-90B-Vision-Instruct", + "Meta-Llama-3.1-8B-Instruct", + "Meta-Llama-3.1-70B-Instruct", + "Meta-Llama-3.1-405B-Instruct", + "Qwen2.5-Coder-32B-Instruct", + "Qwen2.5-72B-Instruct", + "QwQ-32B-Preview", + "Meta-Llama-Guard-3-8B", + "DeepSeek-R1", + "DeepSeek-R1-0528", + "DeepSeek-V3-0324", + "DeepSeek-V3.1", + "DeepSeek-V3.1-cb", + "DeepSeek-V3.1-Terminus", + "DeepSeek-V3.2", + "DeepSeek-R1-Distill-Llama-70B", + "Llama-4-Maverick-17B-128E-Instruct", + "Llama-4-Scout-17B-16E-Instruct", + "Qwen3-32B", + "Qwen3-235B", + "Llama-3.3-Swallow-70B-Instruct-v0.4", + "gpt-oss-120b", + "ALLaM-7B-Instruct-preview", + "MiniMax-M2.5", + "MiniMax-M2.7", + "gemma-3-12b-it", + ], + ], + stream: Literal[True], + container: Optional[str] | Omit = omit, + metadata: message_create_params.Metadata | Omit = omit, + service_tier: Optional[Literal["auto", "standard_only"]] | Omit = omit, + stop_sequences: Optional[SequenceNotStr[str]] | Omit = omit, + system: Union[str, Iterable[message_create_params.SystemSystemTextBlockArray]] | Omit = omit, + temperature: Optional[float] | Omit = omit, + thinking: message_create_params.Thinking | Omit = omit, + tool_choice: Optional[message_create_params.ToolChoice] | Omit = omit, + tools: Optional[Iterable[message_create_params.Tool]] | Omit = omit, + top_k: Optional[int] | Omit = omit, + top_p: Optional[float] | Omit = omit, + anthropic_version: str | Omit = omit, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> Stream[MessageStreamEvent]: + """Anthropic Messages API compatible endpoint. + + Generates a model response for the + supplied conversation. Authentication accepts either the bearer + `Authorization: Bearer ` header (SambaNova SDK default) or the `x-api-key` + header (Anthropic SDK default); the same API key is used in both cases. When + `stream: true` is set, the response is a sequence of Server-Sent Events whose + payloads conform to `MessageStreamEvent`; otherwise the response is a single + `Message` object. + + Args: + max_tokens: Maximum number of tokens to generate. The combined input + output token count is + bounded by the model's context window. + + messages: Conversation turns. + + model: The model ID to use (e.g. gpt-oss-120b). See available + [models](https://docs.sambanova.ai/docs/en/models/sambacloud-models) + + stream: If true, the response is a sequence of Server-Sent Events whose payloads conform + to `MessageStreamEvent`. + + container: Existing code-execution container ID to reuse. **In v1**: silently dropped + + metadata: Free-form metadata attached to the request. Currently only `user_id` Additional + fields are accepted but ignored. + + service_tier: Service-tier preference. **In v1**: silently dropped + + stop_sequences: Custom strings that, when generated, cause the model to stop. + + system: System prompt for the conversation. Accepts either a single string (most common) + or an array of text blocks (used when individual segments need `cache_control` + markers). Multiple text blocks are joined with newlines and prepended to the + conversation as a `role: system` message. + + temperature: Sampling temperature in `[0.0, 2.0]`. Higher values produce more random output, + lower values more deterministic. Adjust only one of `temperature`, `top_p`, + `top_k`. + + thinking: Controls Anthropic-style extended thinking. **In v1**: only `type:"disabled"` is + silently accepted as a no-op; `type:"enabled"` and `type:"adaptive"` return a + 400 `invalid_request_error` (`unsupported_parameter`). + + tool_choice: How the model should choose from the provided tools. + + tools: Tool definitions the model may call. + + top_k: Top-k sampling. Considers only the K most likely tokens at each step. Set to 0 + to disable. + + top_p: Nucleus sampling. Considers tokens with cumulative probability mass up to + `top_p`. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + def create( + self, + *, + max_tokens: int, + messages: Iterable[message_create_params.Message], + model: Union[ + str, + Literal[ + "Meta-Llama-3.3-70B-Instruct", + "Meta-Llama-3.2-1B-Instruct", + "Meta-Llama-3.2-3B-Instruct", + "Llama-3.2-11B-Vision-Instruct", + "Llama-3.2-90B-Vision-Instruct", + "Meta-Llama-3.1-8B-Instruct", + "Meta-Llama-3.1-70B-Instruct", + "Meta-Llama-3.1-405B-Instruct", + "Qwen2.5-Coder-32B-Instruct", + "Qwen2.5-72B-Instruct", + "QwQ-32B-Preview", + "Meta-Llama-Guard-3-8B", + "DeepSeek-R1", + "DeepSeek-R1-0528", + "DeepSeek-V3-0324", + "DeepSeek-V3.1", + "DeepSeek-V3.1-cb", + "DeepSeek-V3.1-Terminus", + "DeepSeek-V3.2", + "DeepSeek-R1-Distill-Llama-70B", + "Llama-4-Maverick-17B-128E-Instruct", + "Llama-4-Scout-17B-16E-Instruct", + "Qwen3-32B", + "Qwen3-235B", + "Llama-3.3-Swallow-70B-Instruct-v0.4", + "gpt-oss-120b", + "ALLaM-7B-Instruct-preview", + "MiniMax-M2.5", + "MiniMax-M2.7", + "gemma-3-12b-it", + ], + ], + stream: bool, + container: Optional[str] | Omit = omit, + metadata: message_create_params.Metadata | Omit = omit, + service_tier: Optional[Literal["auto", "standard_only"]] | Omit = omit, + stop_sequences: Optional[SequenceNotStr[str]] | Omit = omit, + system: Union[str, Iterable[message_create_params.SystemSystemTextBlockArray]] | Omit = omit, + temperature: Optional[float] | Omit = omit, + thinking: message_create_params.Thinking | Omit = omit, + tool_choice: Optional[message_create_params.ToolChoice] | Omit = omit, + tools: Optional[Iterable[message_create_params.Tool]] | Omit = omit, + top_k: Optional[int] | Omit = omit, + top_p: Optional[float] | Omit = omit, + anthropic_version: str | Omit = omit, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> MessageCreateResponse | Stream[MessageStreamEvent]: + """Anthropic Messages API compatible endpoint. + + Generates a model response for the + supplied conversation. Authentication accepts either the bearer + `Authorization: Bearer ` header (SambaNova SDK default) or the `x-api-key` + header (Anthropic SDK default); the same API key is used in both cases. When + `stream: true` is set, the response is a sequence of Server-Sent Events whose + payloads conform to `MessageStreamEvent`; otherwise the response is a single + `Message` object. + + Args: + max_tokens: Maximum number of tokens to generate. The combined input + output token count is + bounded by the model's context window. + + messages: Conversation turns. + + model: The model ID to use (e.g. gpt-oss-120b). See available + [models](https://docs.sambanova.ai/docs/en/models/sambacloud-models) + + stream: If true, the response is a sequence of Server-Sent Events whose payloads conform + to `MessageStreamEvent`. + + container: Existing code-execution container ID to reuse. **In v1**: silently dropped + + metadata: Free-form metadata attached to the request. Currently only `user_id` Additional + fields are accepted but ignored. + + service_tier: Service-tier preference. **In v1**: silently dropped + + stop_sequences: Custom strings that, when generated, cause the model to stop. + + system: System prompt for the conversation. Accepts either a single string (most common) + or an array of text blocks (used when individual segments need `cache_control` + markers). Multiple text blocks are joined with newlines and prepended to the + conversation as a `role: system` message. + + temperature: Sampling temperature in `[0.0, 2.0]`. Higher values produce more random output, + lower values more deterministic. Adjust only one of `temperature`, `top_p`, + `top_k`. + + thinking: Controls Anthropic-style extended thinking. **In v1**: only `type:"disabled"` is + silently accepted as a no-op; `type:"enabled"` and `type:"adaptive"` return a + 400 `invalid_request_error` (`unsupported_parameter`). + + tool_choice: How the model should choose from the provided tools. + + tools: Tool definitions the model may call. + + top_k: Top-k sampling. Considers only the K most likely tokens at each step. Set to 0 + to disable. + + top_p: Nucleus sampling. Considers tokens with cumulative probability mass up to + `top_p`. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @required_args(["max_tokens", "messages", "model"], ["max_tokens", "messages", "model", "stream"]) + def create( + self, + *, + max_tokens: int, + messages: Iterable[message_create_params.Message], + model: Union[ + str, + Literal[ + "Meta-Llama-3.3-70B-Instruct", + "Meta-Llama-3.2-1B-Instruct", + "Meta-Llama-3.2-3B-Instruct", + "Llama-3.2-11B-Vision-Instruct", + "Llama-3.2-90B-Vision-Instruct", + "Meta-Llama-3.1-8B-Instruct", + "Meta-Llama-3.1-70B-Instruct", + "Meta-Llama-3.1-405B-Instruct", + "Qwen2.5-Coder-32B-Instruct", + "Qwen2.5-72B-Instruct", + "QwQ-32B-Preview", + "Meta-Llama-Guard-3-8B", + "DeepSeek-R1", + "DeepSeek-R1-0528", + "DeepSeek-V3-0324", + "DeepSeek-V3.1", + "DeepSeek-V3.1-cb", + "DeepSeek-V3.1-Terminus", + "DeepSeek-V3.2", + "DeepSeek-R1-Distill-Llama-70B", + "Llama-4-Maverick-17B-128E-Instruct", + "Llama-4-Scout-17B-16E-Instruct", + "Qwen3-32B", + "Qwen3-235B", + "Llama-3.3-Swallow-70B-Instruct-v0.4", + "gpt-oss-120b", + "ALLaM-7B-Instruct-preview", + "MiniMax-M2.5", + "MiniMax-M2.7", + "gemma-3-12b-it", + ], + ], + container: Optional[str] | Omit = omit, + metadata: message_create_params.Metadata | Omit = omit, + service_tier: Optional[Literal["auto", "standard_only"]] | Omit = omit, + stop_sequences: Optional[SequenceNotStr[str]] | Omit = omit, + stream: Optional[Literal[False]] | Literal[True] | Omit = omit, + system: Union[str, Iterable[message_create_params.SystemSystemTextBlockArray]] | Omit = omit, + temperature: Optional[float] | Omit = omit, + thinking: message_create_params.Thinking | Omit = omit, + tool_choice: Optional[message_create_params.ToolChoice] | Omit = omit, + tools: Optional[Iterable[message_create_params.Tool]] | Omit = omit, + top_k: Optional[int] | Omit = omit, + top_p: Optional[float] | Omit = omit, + anthropic_version: str | Omit = omit, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> MessageCreateResponse | Stream[MessageStreamEvent]: + extra_headers = {**strip_not_given({"anthropic-version": anthropic_version}), **(extra_headers or {})} + return self._post( + "/messages", + body=maybe_transform( + { + "max_tokens": max_tokens, + "messages": messages, + "model": model, + "container": container, + "metadata": metadata, + "service_tier": service_tier, + "stop_sequences": stop_sequences, + "stream": stream, + "system": system, + "temperature": temperature, + "thinking": thinking, + "tool_choice": tool_choice, + "tools": tools, + "top_k": top_k, + "top_p": top_p, + }, + message_create_params.MessageCreateParamsStreaming + if stream + else message_create_params.MessageCreateParamsNonStreaming, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=cast(Any, MessageCreateResponse), # Union types cannot be passed in as arguments in the type system + stream=stream or False, + stream_cls=Stream[MessageStreamEvent], + ) + + def count_tokens( + self, + *, + messages: Iterable[message_count_tokens_params.Message], + model: str, + system: Union[str, Iterable[message_count_tokens_params.SystemSystemTextBlockArray]] | Omit = omit, + thinking: message_count_tokens_params.Thinking | Omit = omit, + tool_choice: Optional[message_count_tokens_params.ToolChoice] | Omit = omit, + tools: Optional[Iterable[message_count_tokens_params.Tool]] | Omit = omit, + anthropic_version: str | Omit = omit, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> MessageCountTokensResponse: + """Anthropic `count_tokens` compatible endpoint. + + Returns the number of input tokens + that would be consumed by a `POST /messages` call with the same prompt content + (system, messages, tools, tool_choice). Authentication accepts either the bearer + `Authorization: Bearer ` header (SambaNova SDK default) or the `x-api-key` + header (Anthropic SDK default); the same API key is used in both cases. + + Args: + messages: Conversation turns. + + model: Model identifier. + + system: System prompt for the conversation. Accepts either a single string (most common) + or an array of text blocks (used when individual segments need `cache_control` + markers). Multiple text blocks are joined with newlines and prepended to the + conversation as a `role: system` message. + + thinking: Controls Anthropic-style extended thinking. **In v1**: only `type:"disabled"` is + silently accepted as a no-op; `type:"enabled"` and `type:"adaptive"` return a + 400 `invalid_request_error` (`unsupported_parameter`). + + tool_choice: How the model should choose from the provided tools. + + tools: Tool definitions the model may call. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = {**strip_not_given({"anthropic-version": anthropic_version}), **(extra_headers or {})} + return self._post( + "/messages/count_tokens", + body=maybe_transform( + { + "messages": messages, + "model": model, + "system": system, + "thinking": thinking, + "tool_choice": tool_choice, + "tools": tools, + }, + message_count_tokens_params.MessageCountTokensParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=MessageCountTokensResponse, + ) + + +class AsyncMessagesResource(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncMessagesResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/sambanova/sambanova-python#accessing-raw-response-data-eg-headers + """ + return AsyncMessagesResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncMessagesResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/sambanova/sambanova-python#with_streaming_response + """ + return AsyncMessagesResourceWithStreamingResponse(self) + + @overload + async def create( + self, + *, + max_tokens: int, + messages: Iterable[message_create_params.Message], + model: Union[ + str, + Literal[ + "Meta-Llama-3.3-70B-Instruct", + "Meta-Llama-3.2-1B-Instruct", + "Meta-Llama-3.2-3B-Instruct", + "Llama-3.2-11B-Vision-Instruct", + "Llama-3.2-90B-Vision-Instruct", + "Meta-Llama-3.1-8B-Instruct", + "Meta-Llama-3.1-70B-Instruct", + "Meta-Llama-3.1-405B-Instruct", + "Qwen2.5-Coder-32B-Instruct", + "Qwen2.5-72B-Instruct", + "QwQ-32B-Preview", + "Meta-Llama-Guard-3-8B", + "DeepSeek-R1", + "DeepSeek-R1-0528", + "DeepSeek-V3-0324", + "DeepSeek-V3.1", + "DeepSeek-V3.1-cb", + "DeepSeek-V3.1-Terminus", + "DeepSeek-V3.2", + "DeepSeek-R1-Distill-Llama-70B", + "Llama-4-Maverick-17B-128E-Instruct", + "Llama-4-Scout-17B-16E-Instruct", + "Qwen3-32B", + "Qwen3-235B", + "Llama-3.3-Swallow-70B-Instruct-v0.4", + "gpt-oss-120b", + "ALLaM-7B-Instruct-preview", + "MiniMax-M2.5", + "MiniMax-M2.7", + "gemma-3-12b-it", + ], + ], + container: Optional[str] | Omit = omit, + metadata: message_create_params.Metadata | Omit = omit, + service_tier: Optional[Literal["auto", "standard_only"]] | Omit = omit, + stop_sequences: Optional[SequenceNotStr[str]] | Omit = omit, + stream: Optional[Literal[False]] | Omit = omit, + system: Union[str, Iterable[message_create_params.SystemSystemTextBlockArray]] | Omit = omit, + temperature: Optional[float] | Omit = omit, + thinking: message_create_params.Thinking | Omit = omit, + tool_choice: Optional[message_create_params.ToolChoice] | Omit = omit, + tools: Optional[Iterable[message_create_params.Tool]] | Omit = omit, + top_k: Optional[int] | Omit = omit, + top_p: Optional[float] | Omit = omit, + anthropic_version: str | Omit = omit, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> MessageCreateResponse: + """Anthropic Messages API compatible endpoint. + + Generates a model response for the + supplied conversation. Authentication accepts either the bearer + `Authorization: Bearer ` header (SambaNova SDK default) or the `x-api-key` + header (Anthropic SDK default); the same API key is used in both cases. When + `stream: true` is set, the response is a sequence of Server-Sent Events whose + payloads conform to `MessageStreamEvent`; otherwise the response is a single + `Message` object. + + Args: + max_tokens: Maximum number of tokens to generate. The combined input + output token count is + bounded by the model's context window. + + messages: Conversation turns. + + model: The model ID to use (e.g. gpt-oss-120b). See available + [models](https://docs.sambanova.ai/docs/en/models/sambacloud-models) + + container: Existing code-execution container ID to reuse. **In v1**: silently dropped + + metadata: Free-form metadata attached to the request. Currently only `user_id` Additional + fields are accepted but ignored. + + service_tier: Service-tier preference. **In v1**: silently dropped + + stop_sequences: Custom strings that, when generated, cause the model to stop. + + stream: If true, the response is a sequence of Server-Sent Events whose payloads conform + to `MessageStreamEvent`. + + system: System prompt for the conversation. Accepts either a single string (most common) + or an array of text blocks (used when individual segments need `cache_control` + markers). Multiple text blocks are joined with newlines and prepended to the + conversation as a `role: system` message. + + temperature: Sampling temperature in `[0.0, 2.0]`. Higher values produce more random output, + lower values more deterministic. Adjust only one of `temperature`, `top_p`, + `top_k`. + + thinking: Controls Anthropic-style extended thinking. **In v1**: only `type:"disabled"` is + silently accepted as a no-op; `type:"enabled"` and `type:"adaptive"` return a + 400 `invalid_request_error` (`unsupported_parameter`). + + tool_choice: How the model should choose from the provided tools. + + tools: Tool definitions the model may call. + + top_k: Top-k sampling. Considers only the K most likely tokens at each step. Set to 0 + to disable. + + top_p: Nucleus sampling. Considers tokens with cumulative probability mass up to + `top_p`. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + async def create( + self, + *, + max_tokens: int, + messages: Iterable[message_create_params.Message], + model: Union[ + str, + Literal[ + "Meta-Llama-3.3-70B-Instruct", + "Meta-Llama-3.2-1B-Instruct", + "Meta-Llama-3.2-3B-Instruct", + "Llama-3.2-11B-Vision-Instruct", + "Llama-3.2-90B-Vision-Instruct", + "Meta-Llama-3.1-8B-Instruct", + "Meta-Llama-3.1-70B-Instruct", + "Meta-Llama-3.1-405B-Instruct", + "Qwen2.5-Coder-32B-Instruct", + "Qwen2.5-72B-Instruct", + "QwQ-32B-Preview", + "Meta-Llama-Guard-3-8B", + "DeepSeek-R1", + "DeepSeek-R1-0528", + "DeepSeek-V3-0324", + "DeepSeek-V3.1", + "DeepSeek-V3.1-cb", + "DeepSeek-V3.1-Terminus", + "DeepSeek-V3.2", + "DeepSeek-R1-Distill-Llama-70B", + "Llama-4-Maverick-17B-128E-Instruct", + "Llama-4-Scout-17B-16E-Instruct", + "Qwen3-32B", + "Qwen3-235B", + "Llama-3.3-Swallow-70B-Instruct-v0.4", + "gpt-oss-120b", + "ALLaM-7B-Instruct-preview", + "MiniMax-M2.5", + "MiniMax-M2.7", + "gemma-3-12b-it", + ], + ], + stream: Literal[True], + container: Optional[str] | Omit = omit, + metadata: message_create_params.Metadata | Omit = omit, + service_tier: Optional[Literal["auto", "standard_only"]] | Omit = omit, + stop_sequences: Optional[SequenceNotStr[str]] | Omit = omit, + system: Union[str, Iterable[message_create_params.SystemSystemTextBlockArray]] | Omit = omit, + temperature: Optional[float] | Omit = omit, + thinking: message_create_params.Thinking | Omit = omit, + tool_choice: Optional[message_create_params.ToolChoice] | Omit = omit, + tools: Optional[Iterable[message_create_params.Tool]] | Omit = omit, + top_k: Optional[int] | Omit = omit, + top_p: Optional[float] | Omit = omit, + anthropic_version: str | Omit = omit, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> AsyncStream[MessageStreamEvent]: + """Anthropic Messages API compatible endpoint. + + Generates a model response for the + supplied conversation. Authentication accepts either the bearer + `Authorization: Bearer ` header (SambaNova SDK default) or the `x-api-key` + header (Anthropic SDK default); the same API key is used in both cases. When + `stream: true` is set, the response is a sequence of Server-Sent Events whose + payloads conform to `MessageStreamEvent`; otherwise the response is a single + `Message` object. + + Args: + max_tokens: Maximum number of tokens to generate. The combined input + output token count is + bounded by the model's context window. + + messages: Conversation turns. + + model: The model ID to use (e.g. gpt-oss-120b). See available + [models](https://docs.sambanova.ai/docs/en/models/sambacloud-models) + + stream: If true, the response is a sequence of Server-Sent Events whose payloads conform + to `MessageStreamEvent`. + + container: Existing code-execution container ID to reuse. **In v1**: silently dropped + + metadata: Free-form metadata attached to the request. Currently only `user_id` Additional + fields are accepted but ignored. + + service_tier: Service-tier preference. **In v1**: silently dropped + + stop_sequences: Custom strings that, when generated, cause the model to stop. + + system: System prompt for the conversation. Accepts either a single string (most common) + or an array of text blocks (used when individual segments need `cache_control` + markers). Multiple text blocks are joined with newlines and prepended to the + conversation as a `role: system` message. + + temperature: Sampling temperature in `[0.0, 2.0]`. Higher values produce more random output, + lower values more deterministic. Adjust only one of `temperature`, `top_p`, + `top_k`. + + thinking: Controls Anthropic-style extended thinking. **In v1**: only `type:"disabled"` is + silently accepted as a no-op; `type:"enabled"` and `type:"adaptive"` return a + 400 `invalid_request_error` (`unsupported_parameter`). + + tool_choice: How the model should choose from the provided tools. + + tools: Tool definitions the model may call. + + top_k: Top-k sampling. Considers only the K most likely tokens at each step. Set to 0 + to disable. + + top_p: Nucleus sampling. Considers tokens with cumulative probability mass up to + `top_p`. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + async def create( + self, + *, + max_tokens: int, + messages: Iterable[message_create_params.Message], + model: Union[ + str, + Literal[ + "Meta-Llama-3.3-70B-Instruct", + "Meta-Llama-3.2-1B-Instruct", + "Meta-Llama-3.2-3B-Instruct", + "Llama-3.2-11B-Vision-Instruct", + "Llama-3.2-90B-Vision-Instruct", + "Meta-Llama-3.1-8B-Instruct", + "Meta-Llama-3.1-70B-Instruct", + "Meta-Llama-3.1-405B-Instruct", + "Qwen2.5-Coder-32B-Instruct", + "Qwen2.5-72B-Instruct", + "QwQ-32B-Preview", + "Meta-Llama-Guard-3-8B", + "DeepSeek-R1", + "DeepSeek-R1-0528", + "DeepSeek-V3-0324", + "DeepSeek-V3.1", + "DeepSeek-V3.1-cb", + "DeepSeek-V3.1-Terminus", + "DeepSeek-V3.2", + "DeepSeek-R1-Distill-Llama-70B", + "Llama-4-Maverick-17B-128E-Instruct", + "Llama-4-Scout-17B-16E-Instruct", + "Qwen3-32B", + "Qwen3-235B", + "Llama-3.3-Swallow-70B-Instruct-v0.4", + "gpt-oss-120b", + "ALLaM-7B-Instruct-preview", + "MiniMax-M2.5", + "MiniMax-M2.7", + "gemma-3-12b-it", + ], + ], + stream: bool, + container: Optional[str] | Omit = omit, + metadata: message_create_params.Metadata | Omit = omit, + service_tier: Optional[Literal["auto", "standard_only"]] | Omit = omit, + stop_sequences: Optional[SequenceNotStr[str]] | Omit = omit, + system: Union[str, Iterable[message_create_params.SystemSystemTextBlockArray]] | Omit = omit, + temperature: Optional[float] | Omit = omit, + thinking: message_create_params.Thinking | Omit = omit, + tool_choice: Optional[message_create_params.ToolChoice] | Omit = omit, + tools: Optional[Iterable[message_create_params.Tool]] | Omit = omit, + top_k: Optional[int] | Omit = omit, + top_p: Optional[float] | Omit = omit, + anthropic_version: str | Omit = omit, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> MessageCreateResponse | AsyncStream[MessageStreamEvent]: + """Anthropic Messages API compatible endpoint. + + Generates a model response for the + supplied conversation. Authentication accepts either the bearer + `Authorization: Bearer ` header (SambaNova SDK default) or the `x-api-key` + header (Anthropic SDK default); the same API key is used in both cases. When + `stream: true` is set, the response is a sequence of Server-Sent Events whose + payloads conform to `MessageStreamEvent`; otherwise the response is a single + `Message` object. + + Args: + max_tokens: Maximum number of tokens to generate. The combined input + output token count is + bounded by the model's context window. + + messages: Conversation turns. + + model: The model ID to use (e.g. gpt-oss-120b). See available + [models](https://docs.sambanova.ai/docs/en/models/sambacloud-models) + + stream: If true, the response is a sequence of Server-Sent Events whose payloads conform + to `MessageStreamEvent`. + + container: Existing code-execution container ID to reuse. **In v1**: silently dropped + + metadata: Free-form metadata attached to the request. Currently only `user_id` Additional + fields are accepted but ignored. + + service_tier: Service-tier preference. **In v1**: silently dropped + + stop_sequences: Custom strings that, when generated, cause the model to stop. + + system: System prompt for the conversation. Accepts either a single string (most common) + or an array of text blocks (used when individual segments need `cache_control` + markers). Multiple text blocks are joined with newlines and prepended to the + conversation as a `role: system` message. + + temperature: Sampling temperature in `[0.0, 2.0]`. Higher values produce more random output, + lower values more deterministic. Adjust only one of `temperature`, `top_p`, + `top_k`. + + thinking: Controls Anthropic-style extended thinking. **In v1**: only `type:"disabled"` is + silently accepted as a no-op; `type:"enabled"` and `type:"adaptive"` return a + 400 `invalid_request_error` (`unsupported_parameter`). + + tool_choice: How the model should choose from the provided tools. + + tools: Tool definitions the model may call. + + top_k: Top-k sampling. Considers only the K most likely tokens at each step. Set to 0 + to disable. + + top_p: Nucleus sampling. Considers tokens with cumulative probability mass up to + `top_p`. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @required_args(["max_tokens", "messages", "model"], ["max_tokens", "messages", "model", "stream"]) + async def create( + self, + *, + max_tokens: int, + messages: Iterable[message_create_params.Message], + model: Union[ + str, + Literal[ + "Meta-Llama-3.3-70B-Instruct", + "Meta-Llama-3.2-1B-Instruct", + "Meta-Llama-3.2-3B-Instruct", + "Llama-3.2-11B-Vision-Instruct", + "Llama-3.2-90B-Vision-Instruct", + "Meta-Llama-3.1-8B-Instruct", + "Meta-Llama-3.1-70B-Instruct", + "Meta-Llama-3.1-405B-Instruct", + "Qwen2.5-Coder-32B-Instruct", + "Qwen2.5-72B-Instruct", + "QwQ-32B-Preview", + "Meta-Llama-Guard-3-8B", + "DeepSeek-R1", + "DeepSeek-R1-0528", + "DeepSeek-V3-0324", + "DeepSeek-V3.1", + "DeepSeek-V3.1-cb", + "DeepSeek-V3.1-Terminus", + "DeepSeek-V3.2", + "DeepSeek-R1-Distill-Llama-70B", + "Llama-4-Maverick-17B-128E-Instruct", + "Llama-4-Scout-17B-16E-Instruct", + "Qwen3-32B", + "Qwen3-235B", + "Llama-3.3-Swallow-70B-Instruct-v0.4", + "gpt-oss-120b", + "ALLaM-7B-Instruct-preview", + "MiniMax-M2.5", + "MiniMax-M2.7", + "gemma-3-12b-it", + ], + ], + container: Optional[str] | Omit = omit, + metadata: message_create_params.Metadata | Omit = omit, + service_tier: Optional[Literal["auto", "standard_only"]] | Omit = omit, + stop_sequences: Optional[SequenceNotStr[str]] | Omit = omit, + stream: Optional[Literal[False]] | Literal[True] | Omit = omit, + system: Union[str, Iterable[message_create_params.SystemSystemTextBlockArray]] | Omit = omit, + temperature: Optional[float] | Omit = omit, + thinking: message_create_params.Thinking | Omit = omit, + tool_choice: Optional[message_create_params.ToolChoice] | Omit = omit, + tools: Optional[Iterable[message_create_params.Tool]] | Omit = omit, + top_k: Optional[int] | Omit = omit, + top_p: Optional[float] | Omit = omit, + anthropic_version: str | Omit = omit, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> MessageCreateResponse | AsyncStream[MessageStreamEvent]: + extra_headers = {**strip_not_given({"anthropic-version": anthropic_version}), **(extra_headers or {})} + return await self._post( + "/messages", + body=await async_maybe_transform( + { + "max_tokens": max_tokens, + "messages": messages, + "model": model, + "container": container, + "metadata": metadata, + "service_tier": service_tier, + "stop_sequences": stop_sequences, + "stream": stream, + "system": system, + "temperature": temperature, + "thinking": thinking, + "tool_choice": tool_choice, + "tools": tools, + "top_k": top_k, + "top_p": top_p, + }, + message_create_params.MessageCreateParamsStreaming + if stream + else message_create_params.MessageCreateParamsNonStreaming, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=cast(Any, MessageCreateResponse), # Union types cannot be passed in as arguments in the type system + stream=stream or False, + stream_cls=AsyncStream[MessageStreamEvent], + ) + + async def count_tokens( + self, + *, + messages: Iterable[message_count_tokens_params.Message], + model: str, + system: Union[str, Iterable[message_count_tokens_params.SystemSystemTextBlockArray]] | Omit = omit, + thinking: message_count_tokens_params.Thinking | Omit = omit, + tool_choice: Optional[message_count_tokens_params.ToolChoice] | Omit = omit, + tools: Optional[Iterable[message_count_tokens_params.Tool]] | Omit = omit, + anthropic_version: str | Omit = omit, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> MessageCountTokensResponse: + """Anthropic `count_tokens` compatible endpoint. + + Returns the number of input tokens + that would be consumed by a `POST /messages` call with the same prompt content + (system, messages, tools, tool_choice). Authentication accepts either the bearer + `Authorization: Bearer ` header (SambaNova SDK default) or the `x-api-key` + header (Anthropic SDK default); the same API key is used in both cases. + + Args: + messages: Conversation turns. + + model: Model identifier. + + system: System prompt for the conversation. Accepts either a single string (most common) + or an array of text blocks (used when individual segments need `cache_control` + markers). Multiple text blocks are joined with newlines and prepended to the + conversation as a `role: system` message. + + thinking: Controls Anthropic-style extended thinking. **In v1**: only `type:"disabled"` is + silently accepted as a no-op; `type:"enabled"` and `type:"adaptive"` return a + 400 `invalid_request_error` (`unsupported_parameter`). + + tool_choice: How the model should choose from the provided tools. + + tools: Tool definitions the model may call. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = {**strip_not_given({"anthropic-version": anthropic_version}), **(extra_headers or {})} + return await self._post( + "/messages/count_tokens", + body=await async_maybe_transform( + { + "messages": messages, + "model": model, + "system": system, + "thinking": thinking, + "tool_choice": tool_choice, + "tools": tools, + }, + message_count_tokens_params.MessageCountTokensParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=MessageCountTokensResponse, + ) + + +class MessagesResourceWithRawResponse: + def __init__(self, messages: MessagesResource) -> None: + self._messages = messages + + self.create = to_raw_response_wrapper( + messages.create, + ) + self.count_tokens = to_raw_response_wrapper( + messages.count_tokens, + ) + + +class AsyncMessagesResourceWithRawResponse: + def __init__(self, messages: AsyncMessagesResource) -> None: + self._messages = messages + + self.create = async_to_raw_response_wrapper( + messages.create, + ) + self.count_tokens = async_to_raw_response_wrapper( + messages.count_tokens, + ) + + +class MessagesResourceWithStreamingResponse: + def __init__(self, messages: MessagesResource) -> None: + self._messages = messages + + self.create = to_streamed_response_wrapper( + messages.create, + ) + self.count_tokens = to_streamed_response_wrapper( + messages.count_tokens, + ) + + +class AsyncMessagesResourceWithStreamingResponse: + def __init__(self, messages: AsyncMessagesResource) -> None: + self._messages = messages + + self.create = async_to_streamed_response_wrapper( + messages.create, + ) + self.count_tokens = async_to_streamed_response_wrapper( + messages.count_tokens, + ) diff --git a/src/sambanova/types/__init__.py b/src/sambanova/types/__init__.py index 7ff1451..bb5bf32 100644 --- a/src/sambanova/types/__init__.py +++ b/src/sambanova/types/__init__.py @@ -2,15 +2,21 @@ from __future__ import annotations +from .message import Message as Message from .model_response import ModelResponse as ModelResponse from .models_response import ModelsResponse as ModelsResponse from .response_response import ResponseResponse as ResponseResponse from .completion_response import CompletionResponse as CompletionResponse from .embeddings_response import EmbeddingsResponse as EmbeddingsResponse +from .message_stream_event import MessageStreamEvent as MessageStreamEvent +from .message_create_params import MessageCreateParams as MessageCreateParams from .response_stream_event import ResponseStreamEvent as ResponseStreamEvent from .response_create_params import ResponseCreateParams as ResponseCreateParams from .embedding_create_params import EmbeddingCreateParams as EmbeddingCreateParams +from .message_create_response import MessageCreateResponse as MessageCreateResponse from .completion_create_params import CompletionCreateParams as CompletionCreateParams from .response_create_response import ResponseCreateResponse as ResponseCreateResponse from .completion_create_response import CompletionCreateResponse as CompletionCreateResponse from .completion_stream_response import CompletionStreamResponse as CompletionStreamResponse +from .message_count_tokens_params import MessageCountTokensParams as MessageCountTokensParams +from .message_count_tokens_response import MessageCountTokensResponse as MessageCountTokensResponse diff --git a/src/sambanova/types/message.py b/src/sambanova/types/message.py new file mode 100644 index 0000000..26719b1 --- /dev/null +++ b/src/sambanova/types/message.py @@ -0,0 +1,328 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Dict, List, Union, Optional +from typing_extensions import Literal, Annotated, TypeAlias + +from .._utils import PropertyInfo +from .._models import BaseModel + +__all__ = [ + "Message", + "Content", + "ContentMessageOutputTextBlock", + "ContentMessageOutputToolUseBlock", + "ContentMessageOutputThinkingBlock", + "ContentMessageOutputRedactedThinkingBlock", + "ContentMessageOutputServerToolUseBlock", + "ContentMessageOutputWebSearchToolResultBlock", + "ContentMessageOutputWebFetchToolResultBlock", + "ContentMessageOutputCodeExecutionToolResultBlock", + "ContentMessageOutputBashCodeExecutionToolResultBlock", + "ContentMessageOutputTextEditorCodeExecutionToolResultBlock", + "ContentMessageOutputToolSearchToolResultBlock", + "ContentMessageOutputContainerUploadBlock", + "Usage", + "Container", + "StopDetails", +] + + +class ContentMessageOutputTextBlock(BaseModel): + """Plain-text segment of the model's response.""" + + text: str + + type: Literal["text"] + + citations: Optional[List[Dict[str, object]]] = None + """Not emitted in v1.""" + + +class ContentMessageOutputToolUseBlock(BaseModel): + """Tool call generated by the model.""" + + id: str + """Unique identifier for this tool call.""" + + input: Dict[str, object] + """Tool inputs as a JSON object.""" + + name: str + """Name of the tool being called.""" + + type: Literal["tool_use"] + + caller: Optional[Dict[str, object]] = None + """Anthropic routing metadata. Always `null` in SambaNova responses.""" + + +class ContentMessageOutputThinkingBlock(BaseModel): + """Extended-reasoning trace from the model. Emitted by reasoning models.""" + + thinking: str + + type: Literal["thinking"] + + signature: Optional[str] = None + + +class ContentMessageOutputRedactedThinkingBlock(BaseModel): + """ + Anthropic compatibility only — SambaNova does not produce encrypted thinking output. Never emitted in responses. + """ + + data: str + + type: Literal["redacted_thinking"] + + +class ContentMessageOutputServerToolUseBlock(BaseModel): + """Anthropic compatibility only — SambaNova does not run server-side tools. + + Never emitted in responses; defined for Anthropic SDK type-parity. + """ + + id: str + + input: Dict[str, object] + + name: str + + type: Literal["server_tool_use"] + + +class ContentMessageOutputWebSearchToolResultBlock(BaseModel): + """Anthropic compatibility only — SambaNova does not run server-side `web_search`. + + Never emitted in responses. + """ + + content: List[Dict[str, object]] + + tool_use_id: str + + type: Literal["web_search_tool_result"] + + +class ContentMessageOutputWebFetchToolResultBlock(BaseModel): + """Anthropic compatibility only — SambaNova does not run server-side `web_fetch`. + + Never emitted in responses. + """ + + content: Dict[str, object] + + tool_use_id: str + + type: Literal["web_fetch_tool_result"] + + +class ContentMessageOutputCodeExecutionToolResultBlock(BaseModel): + """ + Anthropic compatibility only — SambaNova does not run server-side `code_execution`. Never emitted in responses. + """ + + content: Dict[str, object] + + tool_use_id: str + + type: Literal["code_execution_tool_result"] + + +class ContentMessageOutputBashCodeExecutionToolResultBlock(BaseModel): + """ + Anthropic compatibility only — SambaNova does not run server-side bash code execution. Never emitted in responses. + """ + + content: Dict[str, object] + + tool_use_id: str + + type: Literal["bash_code_execution_tool_result"] + + +class ContentMessageOutputTextEditorCodeExecutionToolResultBlock(BaseModel): + """ + Anthropic compatibility only — SambaNova does not run server-side text-editor code execution. Never emitted in responses. + """ + + content: Dict[str, object] + + tool_use_id: str + + type: Literal["text_editor_code_execution_tool_result"] + + +class ContentMessageOutputToolSearchToolResultBlock(BaseModel): + """Anthropic compatibility only — SambaNova does not run server-side `tool_search`. + + Never emitted in responses. + """ + + content: Dict[str, object] + + tool_use_id: str + + type: Literal["tool_search_tool_result"] + + +class ContentMessageOutputContainerUploadBlock(BaseModel): + """ + Anthropic compatibility only — SambaNova does not produce container_upload blocks (these come from Anthropic's server-side `code_execution` tool). Never emitted in responses. + """ + + file_id: str + + type: Literal["container_upload"] + + +Content: TypeAlias = Annotated[ + Union[ + ContentMessageOutputTextBlock, + ContentMessageOutputToolUseBlock, + ContentMessageOutputThinkingBlock, + ContentMessageOutputRedactedThinkingBlock, + ContentMessageOutputServerToolUseBlock, + ContentMessageOutputWebSearchToolResultBlock, + ContentMessageOutputWebFetchToolResultBlock, + ContentMessageOutputCodeExecutionToolResultBlock, + ContentMessageOutputBashCodeExecutionToolResultBlock, + ContentMessageOutputTextEditorCodeExecutionToolResultBlock, + ContentMessageOutputToolSearchToolResultBlock, + ContentMessageOutputContainerUploadBlock, + ], + PropertyInfo(discriminator="type"), +] + + +class Usage(BaseModel): + """Token accounting for the request.""" + + input_tokens: int + """Total tokens in the prompt (system + messages + tools).""" + + output_tokens: int + """Total tokens generated by the model.""" + + cache_creation: Optional[Dict[str, object]] = None + """Anthropic SDK alias for cache write metrics. + + Always `null` in SambaNova responses; use `cache_creation_input_tokens` instead. + """ + + cache_creation_input_tokens: Optional[int] = None + """Tokens written to prompt cache. + + Absent in v1; emitted once prompt caching wiring lands (CP-2897). + """ + + cache_read_input_tokens: Optional[int] = None + """Tokens read from prompt cache. + + Absent in v1; emitted once prompt caching wiring lands (CP-2897). + """ + + inference_geo: Optional[str] = None + """Geographic region that served the request. + + Anthropic compatibility only - SambaNova does not expose geo routing, always + `null`. + """ + + server_tool_use: Optional[Dict[str, object]] = None + """Server-tool usage metrics (e.g. + + `web_search_requests`). Anthropic compatibility only — SambaNova does not run + server tools, so this field is never emitted. + """ + + service_tier: Optional[str] = None + """Service tier that processed the request. + + Anthropic compatibility only — SambaNova is single-tier and never emits this + field. + """ + + +class Container(BaseModel): + """Code-execution container reference. + + Anthropic compatibility only — SambaNova does not run server-side code execution, so this field is never emitted on responses. + """ + + id: str + + expires_at: str + """ISO-8601 timestamp.""" + + +class StopDetails(BaseModel): + """Refusal stop details. + + Anthropic compatibility only — `refusal` is never emitted as a stop_reason by SambaNova (content filtering is not exposed at the API layer). + """ + + type: Literal["refusal"] + + category: Optional[Literal["cyber", "bio"]] = None + + +class Message(BaseModel): + """Non-streaming response from `POST /messages`. + + Wire-compatible with the official Anthropic Messages API. + """ + + id: str + """Unique identifier for this message.""" + + content: List[Content] + + model: str + """Model that produced the response.""" + + role: Literal["assistant"] + + stop_reason: Optional[ + Literal[ + "end_turn", + "max_tokens", + "tool_use", + "pause_turn", + "refusal", + "stop_sequence", + "model_context_window_exceeded", + ] + ] = None + """Reason the model stopped generating. + + SambaNova emits `end_turn`, `max_tokens`, `tool_use`, and `stop_sequence`. The + remaining values are defined for Anthropic SDK type-parity but never returned: + `pause_turn` (server-tool loop limit, not produced); `refusal` (content filter, + not exposed); `model_context_window_exceeded` (folded to `max_tokens`). + """ + + type: Literal["message"] + + usage: Usage + """Token accounting for the request.""" + + container: Optional[Container] = None + """Code-execution container reference. + + Anthropic compatibility only — SambaNova does not run server-side code + execution, so this field is never emitted on responses. + """ + + stop_details: Optional[StopDetails] = None + """Refusal stop details. + + Anthropic compatibility only — `refusal` is never emitted as a stop_reason by + SambaNova (content filtering is not exposed at the API layer). + """ + + stop_sequence: Optional[str] = None + """The matched stop sequence that triggered termination. + + Present when `stop_reason` is `stop_sequence`; `null` otherwise. + """ diff --git a/src/sambanova/types/message_count_tokens_params.py b/src/sambanova/types/message_count_tokens_params.py new file mode 100644 index 0000000..f8852c3 --- /dev/null +++ b/src/sambanova/types/message_count_tokens_params.py @@ -0,0 +1,979 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Dict, Union, Iterable, Optional +from typing_extensions import Literal, Required, Annotated, TypeAlias, TypedDict + +from .._types import SequenceNotStr +from .._utils import PropertyInfo + +__all__ = [ + "MessageCountTokensParams", + "Message", + "MessageContentContentBlockArray", + "MessageContentContentBlockArrayMessageInputTextBlock", + "MessageContentContentBlockArrayMessageInputTextBlockCacheControl", + "MessageContentContentBlockArrayMessageInputImageBlock", + "MessageContentContentBlockArrayMessageInputImageBlockSource", + "MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceBase64", + "MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceURL", + "MessageContentContentBlockArrayMessageInputImageBlockCacheControl", + "MessageContentContentBlockArrayMessageInputToolUseBlock", + "MessageContentContentBlockArrayMessageInputToolUseBlockCacheControl", + "MessageContentContentBlockArrayMessageInputToolResultBlock", + "MessageContentContentBlockArrayMessageInputToolResultBlockCacheControl", + "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArray", + "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlock", + "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlockCacheControl", + "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlock", + "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSource", + "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceBase64", + "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceURL", + "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockCacheControl", + "MessageContentContentBlockArrayMessageInputServerToolUseBlock", + "MessageContentContentBlockArrayMessageInputServerToolUseBlockCacheControl", + "MessageContentContentBlockArrayMessageInputSearchResultBlock", + "MessageContentContentBlockArrayMessageInputSearchResultBlockCacheControl", + "MessageContentContentBlockArrayMessageInputSearchResultBlockContent", + "MessageContentContentBlockArrayMessageInputSearchResultBlockContentCacheControl", + "MessageContentContentBlockArrayMessageInputWebSearchToolResultBlock", + "MessageContentContentBlockArrayMessageInputWebSearchToolResultBlockCacheControl", + "MessageContentContentBlockArrayMessageInputWebFetchToolResultBlock", + "MessageContentContentBlockArrayMessageInputWebFetchToolResultBlockCacheControl", + "MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlock", + "MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlockCacheControl", + "MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlock", + "MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlockCacheControl", + "MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlock", + "MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlockCacheControl", + "MessageContentContentBlockArrayMessageInputToolSearchToolResultBlock", + "MessageContentContentBlockArrayMessageInputToolSearchToolResultBlockCacheControl", + "MessageContentContentBlockArrayMessageInputThinkingBlock", + "MessageContentContentBlockArrayMessageInputRedactedThinkingBlock", + "MessageContentContentBlockArrayMessageInputContainerUploadBlock", + "MessageContentContentBlockArrayMessageInputContainerUploadBlockCacheControl", + "MessageContentContentBlockArrayMessageInputDocumentBlock", + "MessageContentContentBlockArrayMessageInputDocumentBlockCacheControl", + "SystemSystemTextBlockArray", + "SystemSystemTextBlockArrayCacheControl", + "Thinking", + "ThinkingMessageThinkingDisabled", + "ThinkingMessageThinkingEnabled", + "ThinkingMessageThinkingAdaptive", + "ToolChoice", + "ToolChoiceMessageToolChoiceAuto", + "ToolChoiceMessageToolChoiceAny", + "ToolChoiceMessageToolChoiceNone", + "ToolChoiceMessageToolChoiceTool", + "Tool", + "ToolCacheControl", +] + + +class MessageCountTokensParams(TypedDict, total=False): + messages: Required[Iterable[Message]] + """Conversation turns.""" + + model: Required[str] + """Model identifier.""" + + system: Union[str, Iterable[SystemSystemTextBlockArray]] + """System prompt for the conversation. + + Accepts either a single string (most common) or an array of text blocks (used + when individual segments need `cache_control` markers). Multiple text blocks are + joined with newlines and prepended to the conversation as a `role: system` + message. + """ + + thinking: Thinking + """Controls Anthropic-style extended thinking. + + **In v1**: only `type:"disabled"` is silently accepted as a no-op; + `type:"enabled"` and `type:"adaptive"` return a 400 `invalid_request_error` + (`unsupported_parameter`). + """ + + tool_choice: Optional[ToolChoice] + """How the model should choose from the provided tools.""" + + tools: Optional[Iterable[Tool]] + """Tool definitions the model may call.""" + + anthropic_version: Annotated[str, PropertyInfo(alias="anthropic-version")] + + +class MessageContentContentBlockArrayMessageInputTextBlockCacheControl(TypedDict, total=False): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputTextBlock(TypedDict, total=False): + """Plain-text segment of a message.""" + + text: Required[str] + + type: Required[Literal["text"]] + + cache_control: MessageContentContentBlockArrayMessageInputTextBlockCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + citations: Optional[Iterable[Dict[str, object]]] + + +class MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceBase64(TypedDict, total=False): + """Inline image data encoded as base64.""" + + data: Required[str] + """Base64-encoded image bytes (no `data:` URI prefix).""" + + media_type: Required[Literal["image/jpeg", "image/png", "image/gif", "image/webp"]] + """MIME type of the image bytes.""" + + type: Required[Literal["base64"]] + + +class MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceURL(TypedDict, total=False): + """HTTPS URL pointing to an image. + + **Returns 400 in v1** — URL fetching is blocked. Use `type:"base64"` instead. + """ + + type: Required[Literal["url"]] + + url: Required[str] + + +MessageContentContentBlockArrayMessageInputImageBlockSource: TypeAlias = Union[ + MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceBase64, + MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceURL, +] + + +class MessageContentContentBlockArrayMessageInputImageBlockCacheControl(TypedDict, total=False): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputImageBlock(TypedDict, total=False): + """Image content. + + Only `source.type:"base64"` is supported in v1; URL sources return 400. + """ + + source: Required[MessageContentContentBlockArrayMessageInputImageBlockSource] + """Inline image data encoded as base64.""" + + type: Required[Literal["image"]] + + cache_control: MessageContentContentBlockArrayMessageInputImageBlockCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + +class MessageContentContentBlockArrayMessageInputToolUseBlockCacheControl(TypedDict, total=False): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputToolUseBlock(TypedDict, total=False): + """A prior assistant turn that invoked a tool.""" + + id: Required[str] + """Unique identifier for the tool call (used to correlate `tool_result`).""" + + input: Required[Dict[str, object]] + """Tool inputs as a JSON object.""" + + name: Required[str] + """Name of the tool being invoked.""" + + type: Required[Literal["tool_use"]] + + cache_control: MessageContentContentBlockArrayMessageInputToolUseBlockCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + +class MessageContentContentBlockArrayMessageInputToolResultBlockCacheControl(TypedDict, total=False): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlockCacheControl( + TypedDict, total=False +): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlock( + TypedDict, total=False +): + """Plain-text segment of a message.""" + + text: Required[str] + + type: Required[Literal["text"]] + + cache_control: MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlockCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + citations: Optional[Iterable[Dict[str, object]]] + + +class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceBase64( + TypedDict, total=False +): + """Inline image data encoded as base64.""" + + data: Required[str] + """Base64-encoded image bytes (no `data:` URI prefix).""" + + media_type: Required[Literal["image/jpeg", "image/png", "image/gif", "image/webp"]] + """MIME type of the image bytes.""" + + type: Required[Literal["base64"]] + + +class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceURL( + TypedDict, total=False +): + """HTTPS URL pointing to an image. + + **Returns 400 in v1** — URL fetching is blocked. Use `type:"base64"` instead. + """ + + type: Required[Literal["url"]] + + url: Required[str] + + +MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSource: TypeAlias = Union[ + MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceBase64, + MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceURL, +] + + +class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockCacheControl( + TypedDict, total=False +): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlock( + TypedDict, total=False +): + """Image content. + + Only `source.type:"base64"` is supported in v1; URL sources return 400. + """ + + source: Required[ + MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSource + ] + """Inline image data encoded as base64.""" + + type: Required[Literal["image"]] + + cache_control: MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + +MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArray: TypeAlias = Union[ + MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlock, + MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlock, +] + + +class MessageContentContentBlockArrayMessageInputToolResultBlock(TypedDict, total=False): + """Result of a prior tool call.""" + + tool_use_id: Required[str] + """ID of the `tool_use` block this result corresponds to.""" + + type: Required[Literal["tool_result"]] + + cache_control: MessageContentContentBlockArrayMessageInputToolResultBlockCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + content: Union[ + str, Iterable[MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArray] + ] + + is_error: Optional[bool] + """Silently dropped in v1.""" + + +class MessageContentContentBlockArrayMessageInputServerToolUseBlockCacheControl(TypedDict, total=False): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputServerToolUseBlock(TypedDict, total=False): + """Anthropic compatibility only — SambaNova does not run server-side tools. + + A prior assistant turn that invoked an Anthropic-hosted tool (web_search, code_execution, etc.). Accepted in conversation history (e.g. replaying an Anthropic-served session) but never originates from a SambaNova response. New `server_tool_use`-type tool definitions on outgoing requests are rejected with 400 `unsupported_tool_type`. + """ + + id: Required[str] + + input: Required[Dict[str, object]] + + name: Required[str] + + type: Required[Literal["server_tool_use"]] + + cache_control: MessageContentContentBlockArrayMessageInputServerToolUseBlockCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + +class MessageContentContentBlockArrayMessageInputSearchResultBlockCacheControl(TypedDict, total=False): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputSearchResultBlockContentCacheControl(TypedDict, total=False): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputSearchResultBlockContent(TypedDict, total=False): + """Plain-text segment of a message.""" + + text: Required[str] + + type: Required[Literal["text"]] + + cache_control: MessageContentContentBlockArrayMessageInputSearchResultBlockContentCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + citations: Optional[Iterable[Dict[str, object]]] + + +class MessageContentContentBlockArrayMessageInputSearchResultBlock(TypedDict, total=False): + """Inline search result content. + + In v1 the `title`, `source`, and `content[]` text are extracted into a text block; citations are dropped. + """ + + type: Required[Literal["search_result"]] + + cache_control: MessageContentContentBlockArrayMessageInputSearchResultBlockCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + citations: Optional[Dict[str, object]] + + content: Iterable[MessageContentContentBlockArrayMessageInputSearchResultBlockContent] + + source: str + + title: str + + +class MessageContentContentBlockArrayMessageInputWebSearchToolResultBlockCacheControl(TypedDict, total=False): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputWebSearchToolResultBlock(TypedDict, total=False): + """Anthropic compatibility only — SambaNova does not run server-side `web_search`. + + Echo of a prior Anthropic-served `web_search` tool call; accepted in conversation history but never originates from a SambaNova response. When present, only `title` (`url`) per result is extracted into a tool message. + """ + + content: Required[Iterable[Dict[str, object]]] + + tool_use_id: Required[str] + + type: Required[Literal["web_search_tool_result"]] + + cache_control: MessageContentContentBlockArrayMessageInputWebSearchToolResultBlockCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + +class MessageContentContentBlockArrayMessageInputWebFetchToolResultBlockCacheControl(TypedDict, total=False): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputWebFetchToolResultBlock(TypedDict, total=False): + """Anthropic compatibility only — SambaNova does not run server-side `web_fetch`. + + Echo of a prior Anthropic-served `web_fetch` tool call; accepted in conversation history but never originates from a SambaNova response. When present, only the text content is extracted. + """ + + content: Required[Dict[str, object]] + + tool_use_id: Required[str] + + type: Required[Literal["web_fetch_tool_result"]] + + cache_control: MessageContentContentBlockArrayMessageInputWebFetchToolResultBlockCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + +class MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlockCacheControl(TypedDict, total=False): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlock(TypedDict, total=False): + """ + Anthropic compatibility only — SambaNova does not run server-side `code_execution`. Echo of a prior Anthropic-served `code_execution` tool call; accepted in conversation history but never originates from a SambaNova response. When present, only `stdout`, `stderr`, and `return_code` are extracted; image output is dropped. + """ + + content: Required[Dict[str, object]] + + tool_use_id: Required[str] + + type: Required[Literal["code_execution_tool_result"]] + + cache_control: MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlockCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + +class MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlockCacheControl(TypedDict, total=False): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlock(TypedDict, total=False): + """ + Anthropic compatibility only — SambaNova does not run server-side bash code execution. Echo of a prior Anthropic-served bash tool call; accepted in conversation history but never originates from a SambaNova response. Same lossy extraction as `code_execution_tool_result`. + """ + + content: Required[Dict[str, object]] + + tool_use_id: Required[str] + + type: Required[Literal["bash_code_execution_tool_result"]] + + cache_control: MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlockCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + +class MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlockCacheControl( + TypedDict, total=False +): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlock(TypedDict, total=False): + """ + Anthropic compatibility only — SambaNova does not run server-side text-editor code execution. Echo of a prior Anthropic-served text-editor tool call; accepted in conversation history but never originates from a SambaNova response. When present, only file content is extracted; metadata (line count, file type) is dropped. + """ + + content: Required[Dict[str, object]] + + tool_use_id: Required[str] + + type: Required[Literal["text_editor_code_execution_tool_result"]] + + cache_control: MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlockCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + +class MessageContentContentBlockArrayMessageInputToolSearchToolResultBlockCacheControl(TypedDict, total=False): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputToolSearchToolResultBlock(TypedDict, total=False): + """Anthropic compatibility only — SambaNova does not run server-side `tool_search`. + + Echo of a prior Anthropic-served `tool_search` tool call; accepted in conversation history but never originates from a SambaNova response. When present, an empty string is emitted to the tool message (no plain-text fields). + """ + + content: Required[Dict[str, object]] + + tool_use_id: Required[str] + + type: Required[Literal["tool_search_tool_result"]] + + cache_control: MessageContentContentBlockArrayMessageInputToolSearchToolResultBlockCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + +class MessageContentContentBlockArrayMessageInputThinkingBlock(TypedDict, total=False): + """Extended-reasoning trace from a prior assistant turn.""" + + signature: Required[str] + + thinking: Required[str] + + type: Required[Literal["thinking"]] + + +class MessageContentContentBlockArrayMessageInputRedactedThinkingBlock(TypedDict, total=False): + """ + Anthropic compatibility only — SambaNova does not produce encrypted thinking output. Echo of a prior Anthropic-served response where `thinking.display:"omitted"` was set. Accepted in conversation history but never originates from a SambaNova response. Silently dropped on input. + """ + + data: Required[str] + + type: Required[Literal["redacted_thinking"]] + + +class MessageContentContentBlockArrayMessageInputContainerUploadBlockCacheControl(TypedDict, total=False): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputContainerUploadBlock(TypedDict, total=False): + """ + Anthropic compatibility only — SambaNova does not produce container_upload blocks (these come from Anthropic's server-side `code_execution` tool). Accepted in conversation history but never originates from a SambaNova response. Silently dropped on input. + """ + + file_id: Required[str] + + type: Required[Literal["container_upload"]] + + cache_control: MessageContentContentBlockArrayMessageInputContainerUploadBlockCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + +class MessageContentContentBlockArrayMessageInputDocumentBlockCacheControl(TypedDict, total=False): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputDocumentBlock(TypedDict, total=False): + """PDF or document content. + + **Returns 400** — no document-extraction pipeline available. + """ + + source: Required[Dict[str, object]] + + type: Required[Literal["document"]] + + cache_control: MessageContentContentBlockArrayMessageInputDocumentBlockCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + citations: Optional[Dict[str, object]] + + context: Optional[str] + + title: Optional[str] + + +MessageContentContentBlockArray: TypeAlias = Union[ + MessageContentContentBlockArrayMessageInputTextBlock, + MessageContentContentBlockArrayMessageInputImageBlock, + MessageContentContentBlockArrayMessageInputToolUseBlock, + MessageContentContentBlockArrayMessageInputToolResultBlock, + MessageContentContentBlockArrayMessageInputServerToolUseBlock, + MessageContentContentBlockArrayMessageInputSearchResultBlock, + MessageContentContentBlockArrayMessageInputWebSearchToolResultBlock, + MessageContentContentBlockArrayMessageInputWebFetchToolResultBlock, + MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlock, + MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlock, + MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlock, + MessageContentContentBlockArrayMessageInputToolSearchToolResultBlock, + MessageContentContentBlockArrayMessageInputThinkingBlock, + MessageContentContentBlockArrayMessageInputRedactedThinkingBlock, + MessageContentContentBlockArrayMessageInputContainerUploadBlock, + MessageContentContentBlockArrayMessageInputDocumentBlock, +] + + +class Message(TypedDict, total=False): + """A turn in the conversation.""" + + content: Required[Union[str, Iterable[MessageContentContentBlockArray]]] + + role: Required[Literal["user", "assistant"]] + """Conversational role. + + `user` for the human-side turn, `assistant` for prior model output. + """ + + +class SystemSystemTextBlockArrayCacheControl(TypedDict, total=False): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class SystemSystemTextBlockArray(TypedDict, total=False): + """A text segment within a structured `system` prompt array. + + Multiple text blocks are concatenated (with newlines) and prepended to the conversation as a `role: system` message at the chat-completions layer. + """ + + text: Required[str] + """Plain-text content of the system prompt segment.""" + + type: Required[Literal["text"]] + + cache_control: SystemSystemTextBlockArrayCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + citations: Optional[Iterable[Dict[str, object]]] + """Optional citations. **In v1**: silently dropped""" + + +class ThinkingMessageThinkingDisabled(TypedDict, total=False): + """Disables Anthropic-style extended thinking. + + **In v1**: silently accepted as a no-op + """ + + type: Required[Literal["disabled"]] + + +class ThinkingMessageThinkingEnabled(TypedDict, total=False): + """Enables Anthropic-style extended thinking with a fixed budget. + + **In v1**: returns a 400 `invalid_request_error` (`unsupported_parameter`). + """ + + budget_tokens: Required[int] + """ + Maximum tokens the model may spend on extended thinking before producing the + final answer. + """ + + type: Required[Literal["enabled"]] + + +class ThinkingMessageThinkingAdaptive(TypedDict, total=False): + """Enables Anthropic-style adaptive extended thinking. + + **In v1**: returns a 400 `invalid_request_error` (`unsupported_parameter`). + """ + + type: Required[Literal["adaptive"]] + + budget_tokens: Optional[int] + """Optional upper bound on tokens spent on adaptive thinking. + + When omitted, the backend chooses based on prompt complexity. + """ + + +Thinking: TypeAlias = Union[ + ThinkingMessageThinkingDisabled, ThinkingMessageThinkingEnabled, ThinkingMessageThinkingAdaptive +] + + +class ToolChoiceMessageToolChoiceAuto(TypedDict, total=False): + """Let the model decide whether and which tool to use.""" + + type: Required[Literal["auto"]] + + disable_parallel_tool_use: Optional[bool] + """Silently dropped.""" + + +class ToolChoiceMessageToolChoiceAny(TypedDict, total=False): + """Require the model to call one of the provided tools.""" + + type: Required[Literal["any"]] + + disable_parallel_tool_use: Optional[bool] + """Silently dropped.""" + + +class ToolChoiceMessageToolChoiceNone(TypedDict, total=False): + """Forbid the model from calling any tool.""" + + type: Required[Literal["none"]] + + +class ToolChoiceMessageToolChoiceTool(TypedDict, total=False): + """Force the model to call a specific tool by name.""" + + name: Required[str] + """Name of the required tool.""" + + type: Required[Literal["tool"]] + + disable_parallel_tool_use: Optional[bool] + """Silently dropped.""" + + +ToolChoice: TypeAlias = Union[ + ToolChoiceMessageToolChoiceAuto, + ToolChoiceMessageToolChoiceAny, + ToolChoiceMessageToolChoiceNone, + ToolChoiceMessageToolChoiceTool, +] + + +class ToolCacheControl(TypedDict, total=False): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class Tool(TypedDict, total=False): + """User-defined function tool definition. + + Only custom function tools are supported (Anthropic's `type:"custom"` style or the absent-type Beta style). Anthropic-hosted server tools (`web_search`, `code_execution`, `bash`, `text_editor`, `memory`, `tool_search` variants) return 400 `unsupported_tool_type` if sent. + """ + + name: Required[str] + """Tool name. Must match `^[a-zA-Z0-9_-]+$`.""" + + allowed_callers: Optional[SequenceNotStr[str]] + """Silently dropped.""" + + cache_control: Optional[ToolCacheControl] + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + defer_loading: Optional[bool] + """Silently dropped.""" + + description: Optional[str] + """Human-readable description of when the tool should be used.""" + + eager_input_streaming: Optional[bool] + """Silently dropped.""" + + input_examples: Optional[Iterable[Dict[str, object]]] + """Silently dropped.""" + + input_schema: Optional[Dict[str, object]] + """JSON Schema describing the tool's expected input. + + Required by the Anthropic spec; accepted as optional by SambaNova. + """ + + strict: Optional[bool] + """Silently dropped.""" + + type: Optional[Literal["custom"]] + """Tool-type discriminator. + + May be omitted (defaults to custom) or set to `custom`. Other values return 400 + `unsupported_tool_type`. + """ diff --git a/src/sambanova/types/message_count_tokens_response.py b/src/sambanova/types/message_count_tokens_response.py new file mode 100644 index 0000000..e3e8adf --- /dev/null +++ b/src/sambanova/types/message_count_tokens_response.py @@ -0,0 +1,12 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from .._models import BaseModel + +__all__ = ["MessageCountTokensResponse"] + + +class MessageCountTokensResponse(BaseModel): + """Token count for the supplied prompt.""" + + input_tokens: int + """Total tokens in the prompt (system + messages + tools).""" diff --git a/src/sambanova/types/message_create_params.py b/src/sambanova/types/message_create_params.py new file mode 100644 index 0000000..4da6620 --- /dev/null +++ b/src/sambanova/types/message_create_params.py @@ -0,0 +1,1095 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Dict, Union, Iterable, Optional +from typing_extensions import Literal, Required, Annotated, TypeAlias, TypedDict + +from .._types import SequenceNotStr +from .._utils import PropertyInfo + +__all__ = [ + "MessageCreateParamsBase", + "Message", + "MessageContentContentBlockArray", + "MessageContentContentBlockArrayMessageInputTextBlock", + "MessageContentContentBlockArrayMessageInputTextBlockCacheControl", + "MessageContentContentBlockArrayMessageInputImageBlock", + "MessageContentContentBlockArrayMessageInputImageBlockSource", + "MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceBase64", + "MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceURL", + "MessageContentContentBlockArrayMessageInputImageBlockCacheControl", + "MessageContentContentBlockArrayMessageInputToolUseBlock", + "MessageContentContentBlockArrayMessageInputToolUseBlockCacheControl", + "MessageContentContentBlockArrayMessageInputToolResultBlock", + "MessageContentContentBlockArrayMessageInputToolResultBlockCacheControl", + "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArray", + "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlock", + "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlockCacheControl", + "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlock", + "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSource", + "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceBase64", + "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceURL", + "MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockCacheControl", + "MessageContentContentBlockArrayMessageInputServerToolUseBlock", + "MessageContentContentBlockArrayMessageInputServerToolUseBlockCacheControl", + "MessageContentContentBlockArrayMessageInputSearchResultBlock", + "MessageContentContentBlockArrayMessageInputSearchResultBlockCacheControl", + "MessageContentContentBlockArrayMessageInputSearchResultBlockContent", + "MessageContentContentBlockArrayMessageInputSearchResultBlockContentCacheControl", + "MessageContentContentBlockArrayMessageInputWebSearchToolResultBlock", + "MessageContentContentBlockArrayMessageInputWebSearchToolResultBlockCacheControl", + "MessageContentContentBlockArrayMessageInputWebFetchToolResultBlock", + "MessageContentContentBlockArrayMessageInputWebFetchToolResultBlockCacheControl", + "MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlock", + "MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlockCacheControl", + "MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlock", + "MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlockCacheControl", + "MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlock", + "MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlockCacheControl", + "MessageContentContentBlockArrayMessageInputToolSearchToolResultBlock", + "MessageContentContentBlockArrayMessageInputToolSearchToolResultBlockCacheControl", + "MessageContentContentBlockArrayMessageInputThinkingBlock", + "MessageContentContentBlockArrayMessageInputRedactedThinkingBlock", + "MessageContentContentBlockArrayMessageInputContainerUploadBlock", + "MessageContentContentBlockArrayMessageInputContainerUploadBlockCacheControl", + "MessageContentContentBlockArrayMessageInputDocumentBlock", + "MessageContentContentBlockArrayMessageInputDocumentBlockCacheControl", + "Metadata", + "SystemSystemTextBlockArray", + "SystemSystemTextBlockArrayCacheControl", + "Thinking", + "ThinkingMessageThinkingDisabled", + "ThinkingMessageThinkingEnabled", + "ThinkingMessageThinkingAdaptive", + "ToolChoice", + "ToolChoiceMessageToolChoiceAuto", + "ToolChoiceMessageToolChoiceAny", + "ToolChoiceMessageToolChoiceNone", + "ToolChoiceMessageToolChoiceTool", + "Tool", + "ToolCacheControl", + "MessageCreateParamsNonStreaming", + "MessageCreateParamsStreaming", +] + + +class MessageCreateParamsBase(TypedDict, total=False): + max_tokens: Required[int] + """Maximum number of tokens to generate. + + The combined input + output token count is bounded by the model's context + window. + """ + + messages: Required[Iterable[Message]] + """Conversation turns.""" + + model: Required[ + Union[ + str, + Literal[ + "Meta-Llama-3.3-70B-Instruct", + "Meta-Llama-3.2-1B-Instruct", + "Meta-Llama-3.2-3B-Instruct", + "Llama-3.2-11B-Vision-Instruct", + "Llama-3.2-90B-Vision-Instruct", + "Meta-Llama-3.1-8B-Instruct", + "Meta-Llama-3.1-70B-Instruct", + "Meta-Llama-3.1-405B-Instruct", + "Qwen2.5-Coder-32B-Instruct", + "Qwen2.5-72B-Instruct", + "QwQ-32B-Preview", + "Meta-Llama-Guard-3-8B", + "DeepSeek-R1", + "DeepSeek-R1-0528", + "DeepSeek-V3-0324", + "DeepSeek-V3.1", + "DeepSeek-V3.1-cb", + "DeepSeek-V3.1-Terminus", + "DeepSeek-V3.2", + "DeepSeek-R1-Distill-Llama-70B", + "Llama-4-Maverick-17B-128E-Instruct", + "Llama-4-Scout-17B-16E-Instruct", + "Qwen3-32B", + "Qwen3-235B", + "Llama-3.3-Swallow-70B-Instruct-v0.4", + "gpt-oss-120b", + "ALLaM-7B-Instruct-preview", + "MiniMax-M2.5", + "MiniMax-M2.7", + "gemma-3-12b-it", + ], + ] + ] + """The model ID to use (e.g. + + gpt-oss-120b). See available + [models](https://docs.sambanova.ai/docs/en/models/sambacloud-models) + """ + + container: Optional[str] + """Existing code-execution container ID to reuse. **In v1**: silently dropped""" + + metadata: Metadata + """Free-form metadata attached to the request. + + Currently only `user_id` Additional fields are accepted but ignored. + """ + + service_tier: Optional[Literal["auto", "standard_only"]] + """Service-tier preference. **In v1**: silently dropped""" + + stop_sequences: Optional[SequenceNotStr[str]] + """Custom strings that, when generated, cause the model to stop.""" + + system: Union[str, Iterable[SystemSystemTextBlockArray]] + """System prompt for the conversation. + + Accepts either a single string (most common) or an array of text blocks (used + when individual segments need `cache_control` markers). Multiple text blocks are + joined with newlines and prepended to the conversation as a `role: system` + message. + """ + + temperature: Optional[float] + """Sampling temperature in `[0.0, 2.0]`. + + Higher values produce more random output, lower values more deterministic. + Adjust only one of `temperature`, `top_p`, `top_k`. + """ + + thinking: Thinking + """Controls Anthropic-style extended thinking. + + **In v1**: only `type:"disabled"` is silently accepted as a no-op; + `type:"enabled"` and `type:"adaptive"` return a 400 `invalid_request_error` + (`unsupported_parameter`). + """ + + tool_choice: Optional[ToolChoice] + """How the model should choose from the provided tools.""" + + tools: Optional[Iterable[Tool]] + """Tool definitions the model may call.""" + + top_k: Optional[int] + """Top-k sampling. + + Considers only the K most likely tokens at each step. Set to 0 to disable. + """ + + top_p: Optional[float] + """Nucleus sampling. + + Considers tokens with cumulative probability mass up to `top_p`. + """ + + anthropic_version: Annotated[str, PropertyInfo(alias="anthropic-version")] + + +class MessageContentContentBlockArrayMessageInputTextBlockCacheControl(TypedDict, total=False): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputTextBlock(TypedDict, total=False): + """Plain-text segment of a message.""" + + text: Required[str] + + type: Required[Literal["text"]] + + cache_control: MessageContentContentBlockArrayMessageInputTextBlockCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + citations: Optional[Iterable[Dict[str, object]]] + + +class MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceBase64(TypedDict, total=False): + """Inline image data encoded as base64.""" + + data: Required[str] + """Base64-encoded image bytes (no `data:` URI prefix).""" + + media_type: Required[Literal["image/jpeg", "image/png", "image/gif", "image/webp"]] + """MIME type of the image bytes.""" + + type: Required[Literal["base64"]] + + +class MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceURL(TypedDict, total=False): + """HTTPS URL pointing to an image. + + **Returns 400 in v1** — URL fetching is blocked. Use `type:"base64"` instead. + """ + + type: Required[Literal["url"]] + + url: Required[str] + + +MessageContentContentBlockArrayMessageInputImageBlockSource: TypeAlias = Union[ + MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceBase64, + MessageContentContentBlockArrayMessageInputImageBlockSourceMessageInputImageSourceURL, +] + + +class MessageContentContentBlockArrayMessageInputImageBlockCacheControl(TypedDict, total=False): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputImageBlock(TypedDict, total=False): + """Image content. + + Only `source.type:"base64"` is supported in v1; URL sources return 400. + """ + + source: Required[MessageContentContentBlockArrayMessageInputImageBlockSource] + """Inline image data encoded as base64.""" + + type: Required[Literal["image"]] + + cache_control: MessageContentContentBlockArrayMessageInputImageBlockCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + +class MessageContentContentBlockArrayMessageInputToolUseBlockCacheControl(TypedDict, total=False): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputToolUseBlock(TypedDict, total=False): + """A prior assistant turn that invoked a tool.""" + + id: Required[str] + """Unique identifier for the tool call (used to correlate `tool_result`).""" + + input: Required[Dict[str, object]] + """Tool inputs as a JSON object.""" + + name: Required[str] + """Name of the tool being invoked.""" + + type: Required[Literal["tool_use"]] + + cache_control: MessageContentContentBlockArrayMessageInputToolUseBlockCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + +class MessageContentContentBlockArrayMessageInputToolResultBlockCacheControl(TypedDict, total=False): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlockCacheControl( + TypedDict, total=False +): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlock( + TypedDict, total=False +): + """Plain-text segment of a message.""" + + text: Required[str] + + type: Required[Literal["text"]] + + cache_control: MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlockCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + citations: Optional[Iterable[Dict[str, object]]] + + +class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceBase64( + TypedDict, total=False +): + """Inline image data encoded as base64.""" + + data: Required[str] + """Base64-encoded image bytes (no `data:` URI prefix).""" + + media_type: Required[Literal["image/jpeg", "image/png", "image/gif", "image/webp"]] + """MIME type of the image bytes.""" + + type: Required[Literal["base64"]] + + +class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceURL( + TypedDict, total=False +): + """HTTPS URL pointing to an image. + + **Returns 400 in v1** — URL fetching is blocked. Use `type:"base64"` instead. + """ + + type: Required[Literal["url"]] + + url: Required[str] + + +MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSource: TypeAlias = Union[ + MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceBase64, + MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSourceMessageInputImageSourceURL, +] + + +class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockCacheControl( + TypedDict, total=False +): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlock( + TypedDict, total=False +): + """Image content. + + Only `source.type:"base64"` is supported in v1; URL sources return 400. + """ + + source: Required[ + MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockSource + ] + """Inline image data encoded as base64.""" + + type: Required[Literal["image"]] + + cache_control: MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlockCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + +MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArray: TypeAlias = Union[ + MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputTextBlock, + MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArrayMessageInputImageBlock, +] + + +class MessageContentContentBlockArrayMessageInputToolResultBlock(TypedDict, total=False): + """Result of a prior tool call.""" + + tool_use_id: Required[str] + """ID of the `tool_use` block this result corresponds to.""" + + type: Required[Literal["tool_result"]] + + cache_control: MessageContentContentBlockArrayMessageInputToolResultBlockCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + content: Union[ + str, Iterable[MessageContentContentBlockArrayMessageInputToolResultBlockContentToolResultContentArray] + ] + + is_error: Optional[bool] + """Silently dropped in v1.""" + + +class MessageContentContentBlockArrayMessageInputServerToolUseBlockCacheControl(TypedDict, total=False): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputServerToolUseBlock(TypedDict, total=False): + """Anthropic compatibility only — SambaNova does not run server-side tools. + + A prior assistant turn that invoked an Anthropic-hosted tool (web_search, code_execution, etc.). Accepted in conversation history (e.g. replaying an Anthropic-served session) but never originates from a SambaNova response. New `server_tool_use`-type tool definitions on outgoing requests are rejected with 400 `unsupported_tool_type`. + """ + + id: Required[str] + + input: Required[Dict[str, object]] + + name: Required[str] + + type: Required[Literal["server_tool_use"]] + + cache_control: MessageContentContentBlockArrayMessageInputServerToolUseBlockCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + +class MessageContentContentBlockArrayMessageInputSearchResultBlockCacheControl(TypedDict, total=False): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputSearchResultBlockContentCacheControl(TypedDict, total=False): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputSearchResultBlockContent(TypedDict, total=False): + """Plain-text segment of a message.""" + + text: Required[str] + + type: Required[Literal["text"]] + + cache_control: MessageContentContentBlockArrayMessageInputSearchResultBlockContentCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + citations: Optional[Iterable[Dict[str, object]]] + + +class MessageContentContentBlockArrayMessageInputSearchResultBlock(TypedDict, total=False): + """Inline search result content. + + In v1 the `title`, `source`, and `content[]` text are extracted into a text block; citations are dropped. + """ + + type: Required[Literal["search_result"]] + + cache_control: MessageContentContentBlockArrayMessageInputSearchResultBlockCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + citations: Optional[Dict[str, object]] + + content: Iterable[MessageContentContentBlockArrayMessageInputSearchResultBlockContent] + + source: str + + title: str + + +class MessageContentContentBlockArrayMessageInputWebSearchToolResultBlockCacheControl(TypedDict, total=False): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputWebSearchToolResultBlock(TypedDict, total=False): + """Anthropic compatibility only — SambaNova does not run server-side `web_search`. + + Echo of a prior Anthropic-served `web_search` tool call; accepted in conversation history but never originates from a SambaNova response. When present, only `title` (`url`) per result is extracted into a tool message. + """ + + content: Required[Iterable[Dict[str, object]]] + + tool_use_id: Required[str] + + type: Required[Literal["web_search_tool_result"]] + + cache_control: MessageContentContentBlockArrayMessageInputWebSearchToolResultBlockCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + +class MessageContentContentBlockArrayMessageInputWebFetchToolResultBlockCacheControl(TypedDict, total=False): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputWebFetchToolResultBlock(TypedDict, total=False): + """Anthropic compatibility only — SambaNova does not run server-side `web_fetch`. + + Echo of a prior Anthropic-served `web_fetch` tool call; accepted in conversation history but never originates from a SambaNova response. When present, only the text content is extracted. + """ + + content: Required[Dict[str, object]] + + tool_use_id: Required[str] + + type: Required[Literal["web_fetch_tool_result"]] + + cache_control: MessageContentContentBlockArrayMessageInputWebFetchToolResultBlockCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + +class MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlockCacheControl(TypedDict, total=False): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlock(TypedDict, total=False): + """ + Anthropic compatibility only — SambaNova does not run server-side `code_execution`. Echo of a prior Anthropic-served `code_execution` tool call; accepted in conversation history but never originates from a SambaNova response. When present, only `stdout`, `stderr`, and `return_code` are extracted; image output is dropped. + """ + + content: Required[Dict[str, object]] + + tool_use_id: Required[str] + + type: Required[Literal["code_execution_tool_result"]] + + cache_control: MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlockCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + +class MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlockCacheControl(TypedDict, total=False): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlock(TypedDict, total=False): + """ + Anthropic compatibility only — SambaNova does not run server-side bash code execution. Echo of a prior Anthropic-served bash tool call; accepted in conversation history but never originates from a SambaNova response. Same lossy extraction as `code_execution_tool_result`. + """ + + content: Required[Dict[str, object]] + + tool_use_id: Required[str] + + type: Required[Literal["bash_code_execution_tool_result"]] + + cache_control: MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlockCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + +class MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlockCacheControl( + TypedDict, total=False +): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlock(TypedDict, total=False): + """ + Anthropic compatibility only — SambaNova does not run server-side text-editor code execution. Echo of a prior Anthropic-served text-editor tool call; accepted in conversation history but never originates from a SambaNova response. When present, only file content is extracted; metadata (line count, file type) is dropped. + """ + + content: Required[Dict[str, object]] + + tool_use_id: Required[str] + + type: Required[Literal["text_editor_code_execution_tool_result"]] + + cache_control: MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlockCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + +class MessageContentContentBlockArrayMessageInputToolSearchToolResultBlockCacheControl(TypedDict, total=False): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputToolSearchToolResultBlock(TypedDict, total=False): + """Anthropic compatibility only — SambaNova does not run server-side `tool_search`. + + Echo of a prior Anthropic-served `tool_search` tool call; accepted in conversation history but never originates from a SambaNova response. When present, an empty string is emitted to the tool message (no plain-text fields). + """ + + content: Required[Dict[str, object]] + + tool_use_id: Required[str] + + type: Required[Literal["tool_search_tool_result"]] + + cache_control: MessageContentContentBlockArrayMessageInputToolSearchToolResultBlockCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + +class MessageContentContentBlockArrayMessageInputThinkingBlock(TypedDict, total=False): + """Extended-reasoning trace from a prior assistant turn.""" + + signature: Required[str] + + thinking: Required[str] + + type: Required[Literal["thinking"]] + + +class MessageContentContentBlockArrayMessageInputRedactedThinkingBlock(TypedDict, total=False): + """ + Anthropic compatibility only — SambaNova does not produce encrypted thinking output. Echo of a prior Anthropic-served response where `thinking.display:"omitted"` was set. Accepted in conversation history but never originates from a SambaNova response. Silently dropped on input. + """ + + data: Required[str] + + type: Required[Literal["redacted_thinking"]] + + +class MessageContentContentBlockArrayMessageInputContainerUploadBlockCacheControl(TypedDict, total=False): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputContainerUploadBlock(TypedDict, total=False): + """ + Anthropic compatibility only — SambaNova does not produce container_upload blocks (these come from Anthropic's server-side `code_execution` tool). Accepted in conversation history but never originates from a SambaNova response. Silently dropped on input. + """ + + file_id: Required[str] + + type: Required[Literal["container_upload"]] + + cache_control: MessageContentContentBlockArrayMessageInputContainerUploadBlockCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + +class MessageContentContentBlockArrayMessageInputDocumentBlockCacheControl(TypedDict, total=False): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class MessageContentContentBlockArrayMessageInputDocumentBlock(TypedDict, total=False): + """PDF or document content. + + **Returns 400** — no document-extraction pipeline available. + """ + + source: Required[Dict[str, object]] + + type: Required[Literal["document"]] + + cache_control: MessageContentContentBlockArrayMessageInputDocumentBlockCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + citations: Optional[Dict[str, object]] + + context: Optional[str] + + title: Optional[str] + + +MessageContentContentBlockArray: TypeAlias = Union[ + MessageContentContentBlockArrayMessageInputTextBlock, + MessageContentContentBlockArrayMessageInputImageBlock, + MessageContentContentBlockArrayMessageInputToolUseBlock, + MessageContentContentBlockArrayMessageInputToolResultBlock, + MessageContentContentBlockArrayMessageInputServerToolUseBlock, + MessageContentContentBlockArrayMessageInputSearchResultBlock, + MessageContentContentBlockArrayMessageInputWebSearchToolResultBlock, + MessageContentContentBlockArrayMessageInputWebFetchToolResultBlock, + MessageContentContentBlockArrayMessageInputCodeExecutionToolResultBlock, + MessageContentContentBlockArrayMessageInputBashCodeExecutionToolResultBlock, + MessageContentContentBlockArrayMessageInputTextEditorCodeExecutionToolResultBlock, + MessageContentContentBlockArrayMessageInputToolSearchToolResultBlock, + MessageContentContentBlockArrayMessageInputThinkingBlock, + MessageContentContentBlockArrayMessageInputRedactedThinkingBlock, + MessageContentContentBlockArrayMessageInputContainerUploadBlock, + MessageContentContentBlockArrayMessageInputDocumentBlock, +] + + +class Message(TypedDict, total=False): + """A turn in the conversation.""" + + content: Required[Union[str, Iterable[MessageContentContentBlockArray]]] + + role: Required[Literal["user", "assistant"]] + """Conversational role. + + `user` for the human-side turn, `assistant` for prior model output. + """ + + +class Metadata(TypedDict, total=False, extra_items=object): # type: ignore[call-arg] + """Free-form metadata attached to the request. + + Currently only `user_id` Additional fields are accepted but ignored. + """ + + user_id: Optional[str] + """External identifier for the end-user making the request. + + Mapped internally to the Chat Completions `user` field. + """ + + +class SystemSystemTextBlockArrayCacheControl(TypedDict, total=False): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class SystemSystemTextBlockArray(TypedDict, total=False): + """A text segment within a structured `system` prompt array. + + Multiple text blocks are concatenated (with newlines) and prepended to the conversation as a `role: system` message at the chat-completions layer. + """ + + text: Required[str] + """Plain-text content of the system prompt segment.""" + + type: Required[Literal["text"]] + + cache_control: SystemSystemTextBlockArrayCacheControl + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + citations: Optional[Iterable[Dict[str, object]]] + """Optional citations. **In v1**: silently dropped""" + + +class ThinkingMessageThinkingDisabled(TypedDict, total=False): + """Disables Anthropic-style extended thinking. + + **In v1**: silently accepted as a no-op + """ + + type: Required[Literal["disabled"]] + + +class ThinkingMessageThinkingEnabled(TypedDict, total=False): + """Enables Anthropic-style extended thinking with a fixed budget. + + **In v1**: returns a 400 `invalid_request_error` (`unsupported_parameter`). + """ + + budget_tokens: Required[int] + """ + Maximum tokens the model may spend on extended thinking before producing the + final answer. + """ + + type: Required[Literal["enabled"]] + + +class ThinkingMessageThinkingAdaptive(TypedDict, total=False): + """Enables Anthropic-style adaptive extended thinking. + + **In v1**: returns a 400 `invalid_request_error` (`unsupported_parameter`). + """ + + type: Required[Literal["adaptive"]] + + budget_tokens: Optional[int] + """Optional upper bound on tokens spent on adaptive thinking. + + When omitted, the backend chooses based on prompt complexity. + """ + + +Thinking: TypeAlias = Union[ + ThinkingMessageThinkingDisabled, ThinkingMessageThinkingEnabled, ThinkingMessageThinkingAdaptive +] + + +class ToolChoiceMessageToolChoiceAuto(TypedDict, total=False): + """Let the model decide whether and which tool to use.""" + + type: Required[Literal["auto"]] + + disable_parallel_tool_use: Optional[bool] + """Silently dropped.""" + + +class ToolChoiceMessageToolChoiceAny(TypedDict, total=False): + """Require the model to call one of the provided tools.""" + + type: Required[Literal["any"]] + + disable_parallel_tool_use: Optional[bool] + """Silently dropped.""" + + +class ToolChoiceMessageToolChoiceNone(TypedDict, total=False): + """Forbid the model from calling any tool.""" + + type: Required[Literal["none"]] + + +class ToolChoiceMessageToolChoiceTool(TypedDict, total=False): + """Force the model to call a specific tool by name.""" + + name: Required[str] + """Name of the required tool.""" + + type: Required[Literal["tool"]] + + disable_parallel_tool_use: Optional[bool] + """Silently dropped.""" + + +ToolChoice: TypeAlias = Union[ + ToolChoiceMessageToolChoiceAuto, + ToolChoiceMessageToolChoiceAny, + ToolChoiceMessageToolChoiceNone, + ToolChoiceMessageToolChoiceTool, +] + + +class ToolCacheControl(TypedDict, total=False): + """ + Marks the preceding content block (or system text block) as a prompt- cache breakpoint. Marker positions are collected by the adapter; their wiring into the router's longest-prefix matching **In v1**: position is recorded; the `ttl` value is ignored. + """ + + type: Required[Literal["ephemeral"]] + """Cache breakpoint type. Only `ephemeral` is supported by Anthropic.""" + + ttl: Optional[str] + """Optional time-to-live hint (e.g. `"5m"`, `"1h"`). **Currently ignored** in v1""" + + +class Tool(TypedDict, total=False): + """User-defined function tool definition. + + Only custom function tools are supported (Anthropic's `type:"custom"` style or the absent-type Beta style). Anthropic-hosted server tools (`web_search`, `code_execution`, `bash`, `text_editor`, `memory`, `tool_search` variants) return 400 `unsupported_tool_type` if sent. + """ + + name: Required[str] + """Tool name. Must match `^[a-zA-Z0-9_-]+$`.""" + + allowed_callers: Optional[SequenceNotStr[str]] + """Silently dropped.""" + + cache_control: Optional[ToolCacheControl] + """ + Marks the preceding content block (or system text block) as a prompt- cache + breakpoint. Marker positions are collected by the adapter; their wiring into the + router's longest-prefix matching **In v1**: position is recorded; the `ttl` + value is ignored. + """ + + defer_loading: Optional[bool] + """Silently dropped.""" + + description: Optional[str] + """Human-readable description of when the tool should be used.""" + + eager_input_streaming: Optional[bool] + """Silently dropped.""" + + input_examples: Optional[Iterable[Dict[str, object]]] + """Silently dropped.""" + + input_schema: Optional[Dict[str, object]] + """JSON Schema describing the tool's expected input. + + Required by the Anthropic spec; accepted as optional by SambaNova. + """ + + strict: Optional[bool] + """Silently dropped.""" + + type: Optional[Literal["custom"]] + """Tool-type discriminator. + + May be omitted (defaults to custom) or set to `custom`. Other values return 400 + `unsupported_tool_type`. + """ + + +class MessageCreateParamsNonStreaming(MessageCreateParamsBase, total=False): + stream: Optional[Literal[False]] + """ + If true, the response is a sequence of Server-Sent Events whose payloads conform + to `MessageStreamEvent`. + """ + + +class MessageCreateParamsStreaming(MessageCreateParamsBase): + stream: Required[Literal[True]] + """ + If true, the response is a sequence of Server-Sent Events whose payloads conform + to `MessageStreamEvent`. + """ + + +MessageCreateParams = Union[MessageCreateParamsNonStreaming, MessageCreateParamsStreaming] diff --git a/src/sambanova/types/message_create_response.py b/src/sambanova/types/message_create_response.py new file mode 100644 index 0000000..7f80d24 --- /dev/null +++ b/src/sambanova/types/message_create_response.py @@ -0,0 +1,449 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Dict, List, Union, Optional +from typing_extensions import Literal, Annotated, TypeAlias + +from .._utils import PropertyInfo +from .message import Message +from .._models import BaseModel + +__all__ = [ + "MessageCreateResponse", + "MessageStartEvent", + "MessageContentBlockStartEvent", + "MessageContentBlockStartEventContentBlock", + "MessageContentBlockStartEventContentBlockMessageOutputTextBlock", + "MessageContentBlockStartEventContentBlockMessageOutputToolUseBlock", + "MessageContentBlockStartEventContentBlockMessageOutputThinkingBlock", + "MessageContentBlockStartEventContentBlockMessageOutputRedactedThinkingBlock", + "MessageContentBlockStartEventContentBlockMessageOutputServerToolUseBlock", + "MessageContentBlockStartEventContentBlockMessageOutputWebSearchToolResultBlock", + "MessageContentBlockStartEventContentBlockMessageOutputWebFetchToolResultBlock", + "MessageContentBlockStartEventContentBlockMessageOutputCodeExecutionToolResultBlock", + "MessageContentBlockStartEventContentBlockMessageOutputBashCodeExecutionToolResultBlock", + "MessageContentBlockStartEventContentBlockMessageOutputTextEditorCodeExecutionToolResultBlock", + "MessageContentBlockStartEventContentBlockMessageOutputToolSearchToolResultBlock", + "MessageContentBlockStartEventContentBlockMessageOutputContainerUploadBlock", + "MessageContentBlockDeltaEvent", + "MessageContentBlockDeltaEventDelta", + "MessageContentBlockDeltaEventDeltaMessageContentBlockTextDelta", + "MessageContentBlockDeltaEventDeltaMessageContentBlockInputJsonDelta", + "MessageContentBlockDeltaEventDeltaMessageContentBlockThinkingDelta", + "MessageContentBlockDeltaEventDeltaMessageContentBlockSignatureDelta", + "MessageContentBlockStopEvent", + "MessageDeltaEvent", + "MessageDeltaEventDelta", + "MessageDeltaEventDeltaStopDetails", + "MessageDeltaEventUsage", + "MessageStopEvent", + "MessagePingEvent", + "MessageStreamErrorEvent", + "MessageStreamErrorEventError", +] + + +class MessageStartEvent(BaseModel): + """First event of a stream. + + Carries the initial Message envelope (empty `content[]`, `stop_reason: null`) and token usage from prompt processing. + """ + + message: Message + """Non-streaming response from `POST /messages`. + + Wire-compatible with the official Anthropic Messages API. + """ + + type: Literal["message_start"] + + +class MessageContentBlockStartEventContentBlockMessageOutputTextBlock(BaseModel): + """Plain-text segment of the model's response.""" + + text: str + + type: Literal["text"] + + citations: Optional[List[Dict[str, object]]] = None + """Not emitted in v1.""" + + +class MessageContentBlockStartEventContentBlockMessageOutputToolUseBlock(BaseModel): + """Tool call generated by the model.""" + + id: str + """Unique identifier for this tool call.""" + + input: Dict[str, object] + """Tool inputs as a JSON object.""" + + name: str + """Name of the tool being called.""" + + type: Literal["tool_use"] + + caller: Optional[Dict[str, object]] = None + """Anthropic routing metadata. Always `null` in SambaNova responses.""" + + +class MessageContentBlockStartEventContentBlockMessageOutputThinkingBlock(BaseModel): + """Extended-reasoning trace from the model. Emitted by reasoning models.""" + + thinking: str + + type: Literal["thinking"] + + signature: Optional[str] = None + + +class MessageContentBlockStartEventContentBlockMessageOutputRedactedThinkingBlock(BaseModel): + """ + Anthropic compatibility only — SambaNova does not produce encrypted thinking output. Never emitted in responses. + """ + + data: str + + type: Literal["redacted_thinking"] + + +class MessageContentBlockStartEventContentBlockMessageOutputServerToolUseBlock(BaseModel): + """Anthropic compatibility only — SambaNova does not run server-side tools. + + Never emitted in responses; defined for Anthropic SDK type-parity. + """ + + id: str + + input: Dict[str, object] + + name: str + + type: Literal["server_tool_use"] + + +class MessageContentBlockStartEventContentBlockMessageOutputWebSearchToolResultBlock(BaseModel): + """Anthropic compatibility only — SambaNova does not run server-side `web_search`. + + Never emitted in responses. + """ + + content: List[Dict[str, object]] + + tool_use_id: str + + type: Literal["web_search_tool_result"] + + +class MessageContentBlockStartEventContentBlockMessageOutputWebFetchToolResultBlock(BaseModel): + """Anthropic compatibility only — SambaNova does not run server-side `web_fetch`. + + Never emitted in responses. + """ + + content: Dict[str, object] + + tool_use_id: str + + type: Literal["web_fetch_tool_result"] + + +class MessageContentBlockStartEventContentBlockMessageOutputCodeExecutionToolResultBlock(BaseModel): + """ + Anthropic compatibility only — SambaNova does not run server-side `code_execution`. Never emitted in responses. + """ + + content: Dict[str, object] + + tool_use_id: str + + type: Literal["code_execution_tool_result"] + + +class MessageContentBlockStartEventContentBlockMessageOutputBashCodeExecutionToolResultBlock(BaseModel): + """ + Anthropic compatibility only — SambaNova does not run server-side bash code execution. Never emitted in responses. + """ + + content: Dict[str, object] + + tool_use_id: str + + type: Literal["bash_code_execution_tool_result"] + + +class MessageContentBlockStartEventContentBlockMessageOutputTextEditorCodeExecutionToolResultBlock(BaseModel): + """ + Anthropic compatibility only — SambaNova does not run server-side text-editor code execution. Never emitted in responses. + """ + + content: Dict[str, object] + + tool_use_id: str + + type: Literal["text_editor_code_execution_tool_result"] + + +class MessageContentBlockStartEventContentBlockMessageOutputToolSearchToolResultBlock(BaseModel): + """Anthropic compatibility only — SambaNova does not run server-side `tool_search`. + + Never emitted in responses. + """ + + content: Dict[str, object] + + tool_use_id: str + + type: Literal["tool_search_tool_result"] + + +class MessageContentBlockStartEventContentBlockMessageOutputContainerUploadBlock(BaseModel): + """ + Anthropic compatibility only — SambaNova does not produce container_upload blocks (these come from Anthropic's server-side `code_execution` tool). Never emitted in responses. + """ + + file_id: str + + type: Literal["container_upload"] + + +MessageContentBlockStartEventContentBlock: TypeAlias = Annotated[ + Union[ + MessageContentBlockStartEventContentBlockMessageOutputTextBlock, + MessageContentBlockStartEventContentBlockMessageOutputToolUseBlock, + MessageContentBlockStartEventContentBlockMessageOutputThinkingBlock, + MessageContentBlockStartEventContentBlockMessageOutputRedactedThinkingBlock, + MessageContentBlockStartEventContentBlockMessageOutputServerToolUseBlock, + MessageContentBlockStartEventContentBlockMessageOutputWebSearchToolResultBlock, + MessageContentBlockStartEventContentBlockMessageOutputWebFetchToolResultBlock, + MessageContentBlockStartEventContentBlockMessageOutputCodeExecutionToolResultBlock, + MessageContentBlockStartEventContentBlockMessageOutputBashCodeExecutionToolResultBlock, + MessageContentBlockStartEventContentBlockMessageOutputTextEditorCodeExecutionToolResultBlock, + MessageContentBlockStartEventContentBlockMessageOutputToolSearchToolResultBlock, + MessageContentBlockStartEventContentBlockMessageOutputContainerUploadBlock, + ], + PropertyInfo(discriminator="type"), +] + + +class MessageContentBlockStartEvent(BaseModel): + """Opens a new content block. One per block in `content[]`.""" + + content_block: MessageContentBlockStartEventContentBlock + """Typed content block in the model's response.""" + + index: int + """Zero-based index of the block within `content[]`.""" + + type: Literal["content_block_start"] + + +class MessageContentBlockDeltaEventDeltaMessageContentBlockTextDelta(BaseModel): + """Incremental text chunk for an open text content block.""" + + text: str + + type: Literal["text_delta"] + + +class MessageContentBlockDeltaEventDeltaMessageContentBlockInputJsonDelta(BaseModel): + """Incremental fragment of a tool_use block's `input` JSON. + + Concatenate successive `partial_json` strings to reconstruct the full input object. + """ + + partial_json: str + + type: Literal["input_json_delta"] + + +class MessageContentBlockDeltaEventDeltaMessageContentBlockThinkingDelta(BaseModel): + """Incremental thinking chunk for an open thinking block. + + Emitted by reasoning models. + """ + + thinking: str + + type: Literal["thinking_delta"] + + +class MessageContentBlockDeltaEventDeltaMessageContentBlockSignatureDelta(BaseModel): + """Signature for an open thinking block. + + Emitted at the end of a thinking stream (paired with the closing `content_block_stop`); the `signature` value may be an empty string when the backend has no signed payload to attach. + """ + + signature: str + + type: Literal["signature_delta"] + + +MessageContentBlockDeltaEventDelta: TypeAlias = Annotated[ + Union[ + MessageContentBlockDeltaEventDeltaMessageContentBlockTextDelta, + MessageContentBlockDeltaEventDeltaMessageContentBlockInputJsonDelta, + MessageContentBlockDeltaEventDeltaMessageContentBlockThinkingDelta, + MessageContentBlockDeltaEventDeltaMessageContentBlockSignatureDelta, + ], + PropertyInfo(discriminator="type"), +] + + +class MessageContentBlockDeltaEvent(BaseModel): + """Incremental update to the currently open content block.""" + + delta: MessageContentBlockDeltaEventDelta + """Incremental update to an open content block.""" + + index: int + """Zero-based index of the block within `content[]`.""" + + type: Literal["content_block_delta"] + + +class MessageContentBlockStopEvent(BaseModel): + """Closes the current content block.""" + + index: int + """Zero-based index of the block within `content[]`.""" + + type: Literal["content_block_stop"] + + +class MessageDeltaEventDeltaStopDetails(BaseModel): + """Refusal stop details. + + Anthropic compatibility only — `refusal` is never emitted as a stop_reason by SambaNova (content filtering is not exposed at the API layer). + """ + + type: Literal["refusal"] + + category: Optional[Literal["cyber", "bio"]] = None + + +class MessageDeltaEventDelta(BaseModel): + stop_reason: Literal[ + "end_turn", "max_tokens", "tool_use", "pause_turn", "refusal", "stop_sequence", "model_context_window_exceeded" + ] + """Reason the model stopped generating. + + SambaNova emits `end_turn`, `max_tokens`, `tool_use`, and `stop_sequence`. The + remaining values are defined for Anthropic SDK type-parity but never returned: + `pause_turn` (server-tool loop limit, not produced); `refusal` (content filter, + not exposed); `model_context_window_exceeded` (folded to `max_tokens`). + """ + + stop_details: Optional[MessageDeltaEventDeltaStopDetails] = None + """Refusal stop details. + + Anthropic compatibility only — `refusal` is never emitted as a stop_reason by + SambaNova (content filtering is not exposed at the API layer). + """ + + stop_sequence: Optional[str] = None + """Custom stop sequence that triggered termination. + + Field is emitted but value is always `null` in v1 (backend collapses + `StopSequenceHit` and `EndOfText` into the same finish_reason). + """ + + +class MessageDeltaEventUsage(BaseModel): + """ + Final token accounting emitted in the closing `message_delta` event of a stream. + """ + + output_tokens: int + """Total tokens generated (final count).""" + + cache_creation_input_tokens: Optional[int] = None + """Tokens written to prompt cache. Absent in v1;""" + + cache_read_input_tokens: Optional[int] = None + """Tokens read from prompt cache. Absent in v1;""" + + input_tokens: Optional[int] = None + """Total tokens in the prompt (echoed from `message_start`).""" + + server_tool_use: Optional[Dict[str, object]] = None + """Server-tool usage metrics. + + Anthropic compatibility only — SambaNova does not run server tools, so this + field is never emitted. + """ + + +class MessageDeltaEvent(BaseModel): + """Penultimate event of the stream. + + Carries the final `stop_reason`, optional `stop_sequence`, and final usage counts. + """ + + delta: MessageDeltaEventDelta + + type: Literal["message_delta"] + + usage: MessageDeltaEventUsage + """ + Final token accounting emitted in the closing `message_delta` event of a stream. + """ + + +class MessageStopEvent(BaseModel): + """Final event of the stream. No fields beyond `type`.""" + + type: Literal["message_stop"] + + +class MessagePingEvent(BaseModel): + """Keepalive heartbeat. May appear at any point in the stream.""" + + type: Literal["ping"] + + +class MessageStreamErrorEventError(BaseModel): + """Inner error object carried inside a `MessageErrorResponse`. + + The `type` value follows Anthropic's published error taxonomy. + """ + + message: str + """Human-readable explanation of the error.""" + + type: Literal[ + "invalid_request_error", + "authentication_error", + "permission_error", + "not_found_error", + "request_too_large", + "rate_limit_error", + "api_error", + "overloaded_error", + "not_implemented_error", + ] + """Error category. Values follow Anthropic's taxonomy.""" + + +class MessageStreamErrorEvent(BaseModel): + """Streamed error envelope. Terminates the stream.""" + + error: MessageStreamErrorEventError + """Inner error object carried inside a `MessageErrorResponse`. + + The `type` value follows Anthropic's published error taxonomy. + """ + + type: Literal["error"] + + +MessageCreateResponse: TypeAlias = Union[ + Message, + MessageStartEvent, + MessageContentBlockStartEvent, + MessageContentBlockDeltaEvent, + MessageContentBlockStopEvent, + MessageDeltaEvent, + MessageStopEvent, + MessagePingEvent, + MessageStreamErrorEvent, +] diff --git a/src/sambanova/types/message_stream_event.py b/src/sambanova/types/message_stream_event.py new file mode 100644 index 0000000..95645b5 --- /dev/null +++ b/src/sambanova/types/message_stream_event.py @@ -0,0 +1,451 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Dict, List, Union, Optional +from typing_extensions import Literal, Annotated, TypeAlias + +from .._utils import PropertyInfo +from .message import Message +from .._models import BaseModel + +__all__ = [ + "MessageStreamEvent", + "MessageStartEvent", + "MessageContentBlockStartEvent", + "MessageContentBlockStartEventContentBlock", + "MessageContentBlockStartEventContentBlockMessageOutputTextBlock", + "MessageContentBlockStartEventContentBlockMessageOutputToolUseBlock", + "MessageContentBlockStartEventContentBlockMessageOutputThinkingBlock", + "MessageContentBlockStartEventContentBlockMessageOutputRedactedThinkingBlock", + "MessageContentBlockStartEventContentBlockMessageOutputServerToolUseBlock", + "MessageContentBlockStartEventContentBlockMessageOutputWebSearchToolResultBlock", + "MessageContentBlockStartEventContentBlockMessageOutputWebFetchToolResultBlock", + "MessageContentBlockStartEventContentBlockMessageOutputCodeExecutionToolResultBlock", + "MessageContentBlockStartEventContentBlockMessageOutputBashCodeExecutionToolResultBlock", + "MessageContentBlockStartEventContentBlockMessageOutputTextEditorCodeExecutionToolResultBlock", + "MessageContentBlockStartEventContentBlockMessageOutputToolSearchToolResultBlock", + "MessageContentBlockStartEventContentBlockMessageOutputContainerUploadBlock", + "MessageContentBlockDeltaEvent", + "MessageContentBlockDeltaEventDelta", + "MessageContentBlockDeltaEventDeltaMessageContentBlockTextDelta", + "MessageContentBlockDeltaEventDeltaMessageContentBlockInputJsonDelta", + "MessageContentBlockDeltaEventDeltaMessageContentBlockThinkingDelta", + "MessageContentBlockDeltaEventDeltaMessageContentBlockSignatureDelta", + "MessageContentBlockStopEvent", + "MessageDeltaEvent", + "MessageDeltaEventDelta", + "MessageDeltaEventDeltaStopDetails", + "MessageDeltaEventUsage", + "MessageStopEvent", + "MessagePingEvent", + "MessageStreamErrorEvent", + "MessageStreamErrorEventError", +] + + +class MessageStartEvent(BaseModel): + """First event of a stream. + + Carries the initial Message envelope (empty `content[]`, `stop_reason: null`) and token usage from prompt processing. + """ + + message: Message + """Non-streaming response from `POST /messages`. + + Wire-compatible with the official Anthropic Messages API. + """ + + type: Literal["message_start"] + + +class MessageContentBlockStartEventContentBlockMessageOutputTextBlock(BaseModel): + """Plain-text segment of the model's response.""" + + text: str + + type: Literal["text"] + + citations: Optional[List[Dict[str, object]]] = None + """Not emitted in v1.""" + + +class MessageContentBlockStartEventContentBlockMessageOutputToolUseBlock(BaseModel): + """Tool call generated by the model.""" + + id: str + """Unique identifier for this tool call.""" + + input: Dict[str, object] + """Tool inputs as a JSON object.""" + + name: str + """Name of the tool being called.""" + + type: Literal["tool_use"] + + caller: Optional[Dict[str, object]] = None + """Anthropic routing metadata. Always `null` in SambaNova responses.""" + + +class MessageContentBlockStartEventContentBlockMessageOutputThinkingBlock(BaseModel): + """Extended-reasoning trace from the model. Emitted by reasoning models.""" + + thinking: str + + type: Literal["thinking"] + + signature: Optional[str] = None + + +class MessageContentBlockStartEventContentBlockMessageOutputRedactedThinkingBlock(BaseModel): + """ + Anthropic compatibility only — SambaNova does not produce encrypted thinking output. Never emitted in responses. + """ + + data: str + + type: Literal["redacted_thinking"] + + +class MessageContentBlockStartEventContentBlockMessageOutputServerToolUseBlock(BaseModel): + """Anthropic compatibility only — SambaNova does not run server-side tools. + + Never emitted in responses; defined for Anthropic SDK type-parity. + """ + + id: str + + input: Dict[str, object] + + name: str + + type: Literal["server_tool_use"] + + +class MessageContentBlockStartEventContentBlockMessageOutputWebSearchToolResultBlock(BaseModel): + """Anthropic compatibility only — SambaNova does not run server-side `web_search`. + + Never emitted in responses. + """ + + content: List[Dict[str, object]] + + tool_use_id: str + + type: Literal["web_search_tool_result"] + + +class MessageContentBlockStartEventContentBlockMessageOutputWebFetchToolResultBlock(BaseModel): + """Anthropic compatibility only — SambaNova does not run server-side `web_fetch`. + + Never emitted in responses. + """ + + content: Dict[str, object] + + tool_use_id: str + + type: Literal["web_fetch_tool_result"] + + +class MessageContentBlockStartEventContentBlockMessageOutputCodeExecutionToolResultBlock(BaseModel): + """ + Anthropic compatibility only — SambaNova does not run server-side `code_execution`. Never emitted in responses. + """ + + content: Dict[str, object] + + tool_use_id: str + + type: Literal["code_execution_tool_result"] + + +class MessageContentBlockStartEventContentBlockMessageOutputBashCodeExecutionToolResultBlock(BaseModel): + """ + Anthropic compatibility only — SambaNova does not run server-side bash code execution. Never emitted in responses. + """ + + content: Dict[str, object] + + tool_use_id: str + + type: Literal["bash_code_execution_tool_result"] + + +class MessageContentBlockStartEventContentBlockMessageOutputTextEditorCodeExecutionToolResultBlock(BaseModel): + """ + Anthropic compatibility only — SambaNova does not run server-side text-editor code execution. Never emitted in responses. + """ + + content: Dict[str, object] + + tool_use_id: str + + type: Literal["text_editor_code_execution_tool_result"] + + +class MessageContentBlockStartEventContentBlockMessageOutputToolSearchToolResultBlock(BaseModel): + """Anthropic compatibility only — SambaNova does not run server-side `tool_search`. + + Never emitted in responses. + """ + + content: Dict[str, object] + + tool_use_id: str + + type: Literal["tool_search_tool_result"] + + +class MessageContentBlockStartEventContentBlockMessageOutputContainerUploadBlock(BaseModel): + """ + Anthropic compatibility only — SambaNova does not produce container_upload blocks (these come from Anthropic's server-side `code_execution` tool). Never emitted in responses. + """ + + file_id: str + + type: Literal["container_upload"] + + +MessageContentBlockStartEventContentBlock: TypeAlias = Annotated[ + Union[ + MessageContentBlockStartEventContentBlockMessageOutputTextBlock, + MessageContentBlockStartEventContentBlockMessageOutputToolUseBlock, + MessageContentBlockStartEventContentBlockMessageOutputThinkingBlock, + MessageContentBlockStartEventContentBlockMessageOutputRedactedThinkingBlock, + MessageContentBlockStartEventContentBlockMessageOutputServerToolUseBlock, + MessageContentBlockStartEventContentBlockMessageOutputWebSearchToolResultBlock, + MessageContentBlockStartEventContentBlockMessageOutputWebFetchToolResultBlock, + MessageContentBlockStartEventContentBlockMessageOutputCodeExecutionToolResultBlock, + MessageContentBlockStartEventContentBlockMessageOutputBashCodeExecutionToolResultBlock, + MessageContentBlockStartEventContentBlockMessageOutputTextEditorCodeExecutionToolResultBlock, + MessageContentBlockStartEventContentBlockMessageOutputToolSearchToolResultBlock, + MessageContentBlockStartEventContentBlockMessageOutputContainerUploadBlock, + ], + PropertyInfo(discriminator="type"), +] + + +class MessageContentBlockStartEvent(BaseModel): + """Opens a new content block. One per block in `content[]`.""" + + content_block: MessageContentBlockStartEventContentBlock + """Typed content block in the model's response.""" + + index: int + """Zero-based index of the block within `content[]`.""" + + type: Literal["content_block_start"] + + +class MessageContentBlockDeltaEventDeltaMessageContentBlockTextDelta(BaseModel): + """Incremental text chunk for an open text content block.""" + + text: str + + type: Literal["text_delta"] + + +class MessageContentBlockDeltaEventDeltaMessageContentBlockInputJsonDelta(BaseModel): + """Incremental fragment of a tool_use block's `input` JSON. + + Concatenate successive `partial_json` strings to reconstruct the full input object. + """ + + partial_json: str + + type: Literal["input_json_delta"] + + +class MessageContentBlockDeltaEventDeltaMessageContentBlockThinkingDelta(BaseModel): + """Incremental thinking chunk for an open thinking block. + + Emitted by reasoning models. + """ + + thinking: str + + type: Literal["thinking_delta"] + + +class MessageContentBlockDeltaEventDeltaMessageContentBlockSignatureDelta(BaseModel): + """Signature for an open thinking block. + + Emitted at the end of a thinking stream (paired with the closing `content_block_stop`); the `signature` value may be an empty string when the backend has no signed payload to attach. + """ + + signature: str + + type: Literal["signature_delta"] + + +MessageContentBlockDeltaEventDelta: TypeAlias = Annotated[ + Union[ + MessageContentBlockDeltaEventDeltaMessageContentBlockTextDelta, + MessageContentBlockDeltaEventDeltaMessageContentBlockInputJsonDelta, + MessageContentBlockDeltaEventDeltaMessageContentBlockThinkingDelta, + MessageContentBlockDeltaEventDeltaMessageContentBlockSignatureDelta, + ], + PropertyInfo(discriminator="type"), +] + + +class MessageContentBlockDeltaEvent(BaseModel): + """Incremental update to the currently open content block.""" + + delta: MessageContentBlockDeltaEventDelta + """Incremental update to an open content block.""" + + index: int + """Zero-based index of the block within `content[]`.""" + + type: Literal["content_block_delta"] + + +class MessageContentBlockStopEvent(BaseModel): + """Closes the current content block.""" + + index: int + """Zero-based index of the block within `content[]`.""" + + type: Literal["content_block_stop"] + + +class MessageDeltaEventDeltaStopDetails(BaseModel): + """Refusal stop details. + + Anthropic compatibility only — `refusal` is never emitted as a stop_reason by SambaNova (content filtering is not exposed at the API layer). + """ + + type: Literal["refusal"] + + category: Optional[Literal["cyber", "bio"]] = None + + +class MessageDeltaEventDelta(BaseModel): + stop_reason: Literal[ + "end_turn", "max_tokens", "tool_use", "pause_turn", "refusal", "stop_sequence", "model_context_window_exceeded" + ] + """Reason the model stopped generating. + + SambaNova emits `end_turn`, `max_tokens`, `tool_use`, and `stop_sequence`. The + remaining values are defined for Anthropic SDK type-parity but never returned: + `pause_turn` (server-tool loop limit, not produced); `refusal` (content filter, + not exposed); `model_context_window_exceeded` (folded to `max_tokens`). + """ + + stop_details: Optional[MessageDeltaEventDeltaStopDetails] = None + """Refusal stop details. + + Anthropic compatibility only — `refusal` is never emitted as a stop_reason by + SambaNova (content filtering is not exposed at the API layer). + """ + + stop_sequence: Optional[str] = None + """Custom stop sequence that triggered termination. + + Field is emitted but value is always `null` in v1 (backend collapses + `StopSequenceHit` and `EndOfText` into the same finish_reason). + """ + + +class MessageDeltaEventUsage(BaseModel): + """ + Final token accounting emitted in the closing `message_delta` event of a stream. + """ + + output_tokens: int + """Total tokens generated (final count).""" + + cache_creation_input_tokens: Optional[int] = None + """Tokens written to prompt cache. Absent in v1;""" + + cache_read_input_tokens: Optional[int] = None + """Tokens read from prompt cache. Absent in v1;""" + + input_tokens: Optional[int] = None + """Total tokens in the prompt (echoed from `message_start`).""" + + server_tool_use: Optional[Dict[str, object]] = None + """Server-tool usage metrics. + + Anthropic compatibility only — SambaNova does not run server tools, so this + field is never emitted. + """ + + +class MessageDeltaEvent(BaseModel): + """Penultimate event of the stream. + + Carries the final `stop_reason`, optional `stop_sequence`, and final usage counts. + """ + + delta: MessageDeltaEventDelta + + type: Literal["message_delta"] + + usage: MessageDeltaEventUsage + """ + Final token accounting emitted in the closing `message_delta` event of a stream. + """ + + +class MessageStopEvent(BaseModel): + """Final event of the stream. No fields beyond `type`.""" + + type: Literal["message_stop"] + + +class MessagePingEvent(BaseModel): + """Keepalive heartbeat. May appear at any point in the stream.""" + + type: Literal["ping"] + + +class MessageStreamErrorEventError(BaseModel): + """Inner error object carried inside a `MessageErrorResponse`. + + The `type` value follows Anthropic's published error taxonomy. + """ + + message: str + """Human-readable explanation of the error.""" + + type: Literal[ + "invalid_request_error", + "authentication_error", + "permission_error", + "not_found_error", + "request_too_large", + "rate_limit_error", + "api_error", + "overloaded_error", + "not_implemented_error", + ] + """Error category. Values follow Anthropic's taxonomy.""" + + +class MessageStreamErrorEvent(BaseModel): + """Streamed error envelope. Terminates the stream.""" + + error: MessageStreamErrorEventError + """Inner error object carried inside a `MessageErrorResponse`. + + The `type` value follows Anthropic's published error taxonomy. + """ + + type: Literal["error"] + + +MessageStreamEvent: TypeAlias = Annotated[ + Union[ + MessageStartEvent, + MessageContentBlockStartEvent, + MessageContentBlockDeltaEvent, + MessageContentBlockStopEvent, + MessageDeltaEvent, + MessageStopEvent, + MessagePingEvent, + MessageStreamErrorEvent, + ], + PropertyInfo(discriminator="type"), +] diff --git a/tests/api_resources/test_messages.py b/tests/api_resources/test_messages.py new file mode 100644 index 0000000..2f34086 --- /dev/null +++ b/tests/api_resources/test_messages.py @@ -0,0 +1,595 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import os +from typing import Any, cast + +import pytest + +from sambanova import SambaNova, AsyncSambaNova +from tests.utils import assert_matches_type +from sambanova.types import ( + MessageCreateResponse, + MessageCountTokensResponse, +) + +base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") + + +class TestMessages: + parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"]) + + @parametrize + def test_method_create_overload_1(self, client: SambaNova) -> None: + message = client.messages.create( + max_tokens=1024, + messages=[ + { + "content": "Hello, Claude!", + "role": "user", + } + ], + model="DeepSeek-V3.1", + ) + assert_matches_type(MessageCreateResponse, message, path=["response"]) + + @parametrize + def test_method_create_with_all_params_overload_1(self, client: SambaNova) -> None: + message = client.messages.create( + max_tokens=1024, + messages=[ + { + "content": "Hello, Claude!", + "role": "user", + } + ], + model="DeepSeek-V3.1", + container="container", + metadata={"user_id": "user_id"}, + service_tier="auto", + stop_sequences=["string"], + stream=False, + system="string", + temperature=1, + thinking={"type": "disabled"}, + tool_choice={ + "type": "auto", + "disable_parallel_tool_use": True, + }, + tools=[ + { + "name": "name", + "allowed_callers": ["string"], + "cache_control": { + "type": "ephemeral", + "ttl": "ttl", + }, + "defer_loading": True, + "description": "description", + "eager_input_streaming": True, + "input_examples": [{"foo": "bar"}], + "input_schema": {"foo": "bar"}, + "strict": True, + "type": "custom", + } + ], + top_k=0, + top_p=0, + anthropic_version="2023-06-01", + ) + assert_matches_type(MessageCreateResponse, message, path=["response"]) + + @parametrize + def test_raw_response_create_overload_1(self, client: SambaNova) -> None: + response = client.messages.with_raw_response.create( + max_tokens=1024, + messages=[ + { + "content": "Hello, Claude!", + "role": "user", + } + ], + model="DeepSeek-V3.1", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + message = response.parse() + assert_matches_type(MessageCreateResponse, message, path=["response"]) + + @parametrize + def test_streaming_response_create_overload_1(self, client: SambaNova) -> None: + with client.messages.with_streaming_response.create( + max_tokens=1024, + messages=[ + { + "content": "Hello, Claude!", + "role": "user", + } + ], + model="DeepSeek-V3.1", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + message = response.parse() + assert_matches_type(MessageCreateResponse, message, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + def test_method_create_overload_2(self, client: SambaNova) -> None: + message_stream = client.messages.create( + max_tokens=1024, + messages=[ + { + "content": "Hello, Claude!", + "role": "user", + } + ], + model="DeepSeek-V3.1", + stream=True, + ) + message_stream.response.close() + + @parametrize + def test_method_create_with_all_params_overload_2(self, client: SambaNova) -> None: + message_stream = client.messages.create( + max_tokens=1024, + messages=[ + { + "content": "Hello, Claude!", + "role": "user", + } + ], + model="DeepSeek-V3.1", + stream=True, + container="container", + metadata={"user_id": "user_id"}, + service_tier="auto", + stop_sequences=["string"], + system="string", + temperature=1, + thinking={"type": "disabled"}, + tool_choice={ + "type": "auto", + "disable_parallel_tool_use": True, + }, + tools=[ + { + "name": "name", + "allowed_callers": ["string"], + "cache_control": { + "type": "ephemeral", + "ttl": "ttl", + }, + "defer_loading": True, + "description": "description", + "eager_input_streaming": True, + "input_examples": [{"foo": "bar"}], + "input_schema": {"foo": "bar"}, + "strict": True, + "type": "custom", + } + ], + top_k=0, + top_p=0, + anthropic_version="2023-06-01", + ) + message_stream.response.close() + + @parametrize + def test_raw_response_create_overload_2(self, client: SambaNova) -> None: + response = client.messages.with_raw_response.create( + max_tokens=1024, + messages=[ + { + "content": "Hello, Claude!", + "role": "user", + } + ], + model="DeepSeek-V3.1", + stream=True, + ) + + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + stream = response.parse() + stream.close() + + @parametrize + def test_streaming_response_create_overload_2(self, client: SambaNova) -> None: + with client.messages.with_streaming_response.create( + max_tokens=1024, + messages=[ + { + "content": "Hello, Claude!", + "role": "user", + } + ], + model="DeepSeek-V3.1", + stream=True, + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + stream = response.parse() + stream.close() + + assert cast(Any, response.is_closed) is True + + @parametrize + def test_method_count_tokens(self, client: SambaNova) -> None: + message = client.messages.count_tokens( + messages=[ + { + "content": "Hello, Claude!", + "role": "user", + } + ], + model="DeepSeek-V3.1", + ) + assert_matches_type(MessageCountTokensResponse, message, path=["response"]) + + @parametrize + def test_method_count_tokens_with_all_params(self, client: SambaNova) -> None: + message = client.messages.count_tokens( + messages=[ + { + "content": "Hello, Claude!", + "role": "user", + } + ], + model="DeepSeek-V3.1", + system="string", + thinking={"type": "disabled"}, + tool_choice={ + "type": "auto", + "disable_parallel_tool_use": True, + }, + tools=[ + { + "name": "name", + "allowed_callers": ["string"], + "cache_control": { + "type": "ephemeral", + "ttl": "ttl", + }, + "defer_loading": True, + "description": "description", + "eager_input_streaming": True, + "input_examples": [{"foo": "bar"}], + "input_schema": {"foo": "bar"}, + "strict": True, + "type": "custom", + } + ], + anthropic_version="2023-06-01", + ) + assert_matches_type(MessageCountTokensResponse, message, path=["response"]) + + @parametrize + def test_raw_response_count_tokens(self, client: SambaNova) -> None: + response = client.messages.with_raw_response.count_tokens( + messages=[ + { + "content": "Hello, Claude!", + "role": "user", + } + ], + model="DeepSeek-V3.1", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + message = response.parse() + assert_matches_type(MessageCountTokensResponse, message, path=["response"]) + + @parametrize + def test_streaming_response_count_tokens(self, client: SambaNova) -> None: + with client.messages.with_streaming_response.count_tokens( + messages=[ + { + "content": "Hello, Claude!", + "role": "user", + } + ], + model="DeepSeek-V3.1", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + message = response.parse() + assert_matches_type(MessageCountTokensResponse, message, path=["response"]) + + assert cast(Any, response.is_closed) is True + + +class TestAsyncMessages: + parametrize = pytest.mark.parametrize( + "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"] + ) + + @parametrize + async def test_method_create_overload_1(self, async_client: AsyncSambaNova) -> None: + message = await async_client.messages.create( + max_tokens=1024, + messages=[ + { + "content": "Hello, Claude!", + "role": "user", + } + ], + model="DeepSeek-V3.1", + ) + assert_matches_type(MessageCreateResponse, message, path=["response"]) + + @parametrize + async def test_method_create_with_all_params_overload_1(self, async_client: AsyncSambaNova) -> None: + message = await async_client.messages.create( + max_tokens=1024, + messages=[ + { + "content": "Hello, Claude!", + "role": "user", + } + ], + model="DeepSeek-V3.1", + container="container", + metadata={"user_id": "user_id"}, + service_tier="auto", + stop_sequences=["string"], + stream=False, + system="string", + temperature=1, + thinking={"type": "disabled"}, + tool_choice={ + "type": "auto", + "disable_parallel_tool_use": True, + }, + tools=[ + { + "name": "name", + "allowed_callers": ["string"], + "cache_control": { + "type": "ephemeral", + "ttl": "ttl", + }, + "defer_loading": True, + "description": "description", + "eager_input_streaming": True, + "input_examples": [{"foo": "bar"}], + "input_schema": {"foo": "bar"}, + "strict": True, + "type": "custom", + } + ], + top_k=0, + top_p=0, + anthropic_version="2023-06-01", + ) + assert_matches_type(MessageCreateResponse, message, path=["response"]) + + @parametrize + async def test_raw_response_create_overload_1(self, async_client: AsyncSambaNova) -> None: + response = await async_client.messages.with_raw_response.create( + max_tokens=1024, + messages=[ + { + "content": "Hello, Claude!", + "role": "user", + } + ], + model="DeepSeek-V3.1", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + message = await response.parse() + assert_matches_type(MessageCreateResponse, message, path=["response"]) + + @parametrize + async def test_streaming_response_create_overload_1(self, async_client: AsyncSambaNova) -> None: + async with async_client.messages.with_streaming_response.create( + max_tokens=1024, + messages=[ + { + "content": "Hello, Claude!", + "role": "user", + } + ], + model="DeepSeek-V3.1", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + message = await response.parse() + assert_matches_type(MessageCreateResponse, message, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + async def test_method_create_overload_2(self, async_client: AsyncSambaNova) -> None: + message_stream = await async_client.messages.create( + max_tokens=1024, + messages=[ + { + "content": "Hello, Claude!", + "role": "user", + } + ], + model="DeepSeek-V3.1", + stream=True, + ) + await message_stream.response.aclose() + + @parametrize + async def test_method_create_with_all_params_overload_2(self, async_client: AsyncSambaNova) -> None: + message_stream = await async_client.messages.create( + max_tokens=1024, + messages=[ + { + "content": "Hello, Claude!", + "role": "user", + } + ], + model="DeepSeek-V3.1", + stream=True, + container="container", + metadata={"user_id": "user_id"}, + service_tier="auto", + stop_sequences=["string"], + system="string", + temperature=1, + thinking={"type": "disabled"}, + tool_choice={ + "type": "auto", + "disable_parallel_tool_use": True, + }, + tools=[ + { + "name": "name", + "allowed_callers": ["string"], + "cache_control": { + "type": "ephemeral", + "ttl": "ttl", + }, + "defer_loading": True, + "description": "description", + "eager_input_streaming": True, + "input_examples": [{"foo": "bar"}], + "input_schema": {"foo": "bar"}, + "strict": True, + "type": "custom", + } + ], + top_k=0, + top_p=0, + anthropic_version="2023-06-01", + ) + await message_stream.response.aclose() + + @parametrize + async def test_raw_response_create_overload_2(self, async_client: AsyncSambaNova) -> None: + response = await async_client.messages.with_raw_response.create( + max_tokens=1024, + messages=[ + { + "content": "Hello, Claude!", + "role": "user", + } + ], + model="DeepSeek-V3.1", + stream=True, + ) + + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + stream = await response.parse() + await stream.close() + + @parametrize + async def test_streaming_response_create_overload_2(self, async_client: AsyncSambaNova) -> None: + async with async_client.messages.with_streaming_response.create( + max_tokens=1024, + messages=[ + { + "content": "Hello, Claude!", + "role": "user", + } + ], + model="DeepSeek-V3.1", + stream=True, + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + stream = await response.parse() + await stream.close() + + assert cast(Any, response.is_closed) is True + + @parametrize + async def test_method_count_tokens(self, async_client: AsyncSambaNova) -> None: + message = await async_client.messages.count_tokens( + messages=[ + { + "content": "Hello, Claude!", + "role": "user", + } + ], + model="DeepSeek-V3.1", + ) + assert_matches_type(MessageCountTokensResponse, message, path=["response"]) + + @parametrize + async def test_method_count_tokens_with_all_params(self, async_client: AsyncSambaNova) -> None: + message = await async_client.messages.count_tokens( + messages=[ + { + "content": "Hello, Claude!", + "role": "user", + } + ], + model="DeepSeek-V3.1", + system="string", + thinking={"type": "disabled"}, + tool_choice={ + "type": "auto", + "disable_parallel_tool_use": True, + }, + tools=[ + { + "name": "name", + "allowed_callers": ["string"], + "cache_control": { + "type": "ephemeral", + "ttl": "ttl", + }, + "defer_loading": True, + "description": "description", + "eager_input_streaming": True, + "input_examples": [{"foo": "bar"}], + "input_schema": {"foo": "bar"}, + "strict": True, + "type": "custom", + } + ], + anthropic_version="2023-06-01", + ) + assert_matches_type(MessageCountTokensResponse, message, path=["response"]) + + @parametrize + async def test_raw_response_count_tokens(self, async_client: AsyncSambaNova) -> None: + response = await async_client.messages.with_raw_response.count_tokens( + messages=[ + { + "content": "Hello, Claude!", + "role": "user", + } + ], + model="DeepSeek-V3.1", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + message = await response.parse() + assert_matches_type(MessageCountTokensResponse, message, path=["response"]) + + @parametrize + async def test_streaming_response_count_tokens(self, async_client: AsyncSambaNova) -> None: + async with async_client.messages.with_streaming_response.count_tokens( + messages=[ + { + "content": "Hello, Claude!", + "role": "user", + } + ], + model="DeepSeek-V3.1", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + message = await response.parse() + assert_matches_type(MessageCountTokensResponse, message, path=["response"]) + + assert cast(Any, response.is_closed) is True From 546fc59615a07e268930611150b2a91ec0073c74 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Tue, 26 May 2026 21:54:31 +0000 Subject: [PATCH 5/5] release: 1.9.0 --- .release-please-manifest.json | 2 +- CHANGELOG.md | 14 ++++++++++++++ pyproject.toml | 2 +- src/sambanova/_version.py | 2 +- 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 24d5689..4fcfdf7 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "1.8.2" + ".": "1.9.0" } diff --git a/CHANGELOG.md b/CHANGELOG.md index 9523365..3b6a1db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,19 @@ # Changelog +## 1.9.0 (2026-05-26) + +Full Changelog: [v1.8.2...v1.9.0](https://github.com/sambanova/sambanova-python/compare/v1.8.2...v1.9.0) + +### Features + +* **api:** add anthropic compatible messages api support ([0ddfd33](https://github.com/sambanova/sambanova-python/commit/0ddfd334decc86831f840cd8c0de2c3a73e378a2)) +* **internal/types:** support eagerly validating pydantic iterators ([cc4aade](https://github.com/sambanova/sambanova-python/commit/cc4aade45c484980fafacf2e315ca6f1564e0ace)) + + +### Bug Fixes + +* **client:** add missing f-string prefix in file type error message ([c6b84e8](https://github.com/sambanova/sambanova-python/commit/c6b84e853be657a330e6754894911bdaac07dc67)) + ## 1.8.2 (2026-05-07) Full Changelog: [v1.8.1...v1.8.2](https://github.com/sambanova/sambanova-python/compare/v1.8.1...v1.8.2) diff --git a/pyproject.toml b/pyproject.toml index 28f88b6..23fbe75 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "sambanova" -version = "1.8.2" +version = "1.9.0" description = "The official Python library for the SambaNova API" dynamic = ["readme"] license = "Apache-2.0" diff --git a/src/sambanova/_version.py b/src/sambanova/_version.py index d3cc5ec..48fad55 100644 --- a/src/sambanova/_version.py +++ b/src/sambanova/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. __title__ = "sambanova" -__version__ = "1.8.2" # x-release-please-version +__version__ = "1.9.0" # x-release-please-version