From 9dff510aef5bdd4d0a059d287fdb98b004a84fc8 Mon Sep 17 00:00:00 2001 From: Federico Kamelhar Date: Sat, 2 May 2026 00:55:48 -0400 Subject: [PATCH] fix: replace deprecated openai.gpt-5.5 model id; add OCIBucketBackend retry strategy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two production audit fixes from the 2026 best-in-class assessment. 1. Model id correction `openai.gpt-5.5` is not a real model in OCI's catalogue (the actual id is `openai.gpt-5`). The deprecated form had spread across 47 files in docs, examples, and provider docstrings — meaning a new user copying the quickstart would hit ModelNotFoundError on first run. Replaced everywhere; added a regression test (`tests/unit/test_model_id_consistency.py`) that fails if `gpt-5.5` ever reappears. 2. OCIBucketBackend retry strategy The OCI Object Storage checkpointer made every put/get/delete/list call without a retry strategy, so any transient 429 / 5xx / connection error caused an immediate save failure. Now the backend threads `oci.retry.DEFAULT_RETRY_STRATEGY` (exponential backoff + jitter) into all six call sites by default, with an explicit `retry_strategy=` constructor kwarg for override (pass `oci.retry.NoneRetryStrategy()` to disable). Six new unit tests in `tests/unit/test_oci_bucket_retry.py` mock the OCI client and assert the strategy is preserved across put_object / get_object / delete_object / list_objects / get_bucket / create_bucket. Files: 50 changed (47 model-id refs + oci_bucket.py + 2 new test files); all 3,212 unit tests pass; pre-commit clean; mkdocs --strict clean. Audit issues filed for the larger items: #32 (agent.py decomposition), #33 (memory/store.py redesign), #34 (mypy ignore_errors migration), #35 (Anthropic prompt caching), #36 (OpenAI structured outputs at provider layer), #37 (Dockerfile + Helm chart). Signed-off-by: Federico Kamelhar --- .gitignore | 1 + README.md | 4 +- docs/concepts/agent-loop.md | 2 +- docs/concepts/agent.md | 4 +- docs/concepts/conversation-management.md | 2 +- docs/concepts/executors.md | 2 +- docs/concepts/interrupts.md | 4 +- docs/concepts/models.md | 4 +- docs/concepts/multi-agent.md | 2 +- docs/concepts/multi-agent/a2a.md | 2 +- docs/concepts/multi-agent/handoff.md | 8 +- docs/concepts/multi-agent/orchestrator.md | 8 +- docs/concepts/multi-agent/swarm.md | 6 +- docs/concepts/prompts.md | 2 +- docs/concepts/providers/oci.md | 4 +- docs/concepts/providers/ollama.md | 2 +- docs/concepts/providers/openai.md | 10 +- docs/concepts/reasoning.md | 2 +- docs/concepts/retry.md | 2 +- docs/concepts/safety.md | 2 +- docs/how-to/deploy.md | 2 +- docs/how-to/oci-dac.md | 4 +- docs/how-to/oci-models.md | 14 +-- docs/how-to/persist-conversations.md | 4 +- docs/how-to/quickstart.md | 6 +- docs/img/sequence-26ai.svg | 2 +- docs/index.md | 4 +- examples/.env.example | 4 +- examples/README.md | 2 +- examples/agent_gist.py | 2 +- examples/coding_assistant.py | 2 +- examples/complex_agent.py | 4 +- examples/config.py | 6 +- examples/tutorial_29_model_providers.py | 6 +- src/locus/memory/backends/oci_bucket.py | 28 +++++ src/locus/models/__init__.py | 4 +- src/locus/models/native/openai.py | 2 +- .../models/providers/oci/openai_compat.py | 2 +- tests/unit/test_model_id_consistency.py | 70 ++++++++++++ tests/unit/test_oci_bucket_retry.py | 102 ++++++++++++++++++ 40 files changed, 272 insertions(+), 71 deletions(-) create mode 100644 tests/unit/test_model_id_consistency.py create mode 100644 tests/unit/test_oci_bucket_retry.py diff --git a/.gitignore b/.gitignore index 7c7ee5e1..35f4d766 100644 --- a/.gitignore +++ b/.gitignore @@ -255,3 +255,4 @@ examples/start_and_test.sh # Old tutorials directory (superseded by examples/tutorial_*.py) tutorials/ site/ +.claude/ diff --git a/README.md b/README.md index ce670825..b638b337 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,7 @@ def book_flight(flight_id: str, customer_id: str) -> dict: return billing.charge_and_book(flight_id, customer_id) agent = Agent( - model="oci:openai.gpt-5.5", + model="oci:openai.gpt-5", tools=[search_flights, book_flight], system_prompt="You are a travel concierge. Find a flight, then book it.", reflexion=True, # self-correct mid-run @@ -191,7 +191,7 @@ def book_meeting(date: str, attendees: list[str]) -> dict: return calendar.book(date, attendees) agent = Agent( - model="oci:openai.gpt-5.5", + model="oci:openai.gpt-5", tools=[get_today_date, book_meeting], system_prompt="You are a scheduling assistant.", ) diff --git a/docs/concepts/agent-loop.md b/docs/concepts/agent-loop.md index 4bc9b1d0..fe7cfde0 100644 --- a/docs/concepts/agent-loop.md +++ b/docs/concepts/agent-loop.md @@ -369,7 +369,7 @@ def submit_po(vendor_id: str, amount_usd: float) -> dict: return finance.submit(vendor_id, amount_usd) agent = Agent( - model="oci:openai.gpt-5.5", + model="oci:openai.gpt-5", tools=[search_vendors, submit_po], system_prompt="You are a procurement officer.", reflexion=True, # turn Reflect on diff --git a/docs/concepts/agent.md b/docs/concepts/agent.md index cdf9d590..4662cf22 100644 --- a/docs/concepts/agent.md +++ b/docs/concepts/agent.md @@ -78,7 +78,7 @@ from locus import Agent from locus.agent import AgentConfig cfg = AgentConfig( - model="oci:openai.gpt-5.5", # see how-to/oci-models.md + model="oci:openai.gpt-5", # see how-to/oci-models.md tools=[...], system_prompt="...", max_iterations=50, @@ -118,7 +118,7 @@ class VendorList(BaseModel): vendors: list[str] agent = Agent( - model="oci:openai.gpt-5.5", + model="oci:openai.gpt-5", tools=[search, book_flight], output_schema=VendorList, termination=MaxIterations(8) | ToolCalled("book_flight"), diff --git a/docs/concepts/conversation-management.md b/docs/concepts/conversation-management.md index 1146f404..7612d2fc 100644 --- a/docs/concepts/conversation-management.md +++ b/docs/concepts/conversation-management.md @@ -12,7 +12,7 @@ from locus import Agent from locus.memory.backends import OCIBucketBackend agent = Agent( - model="oci:openai.gpt-5.5", + model="oci:openai.gpt-5", tools=[...], checkpointer=OCIBucketBackend( bucket_name="locus-threads", diff --git a/docs/concepts/executors.md b/docs/concepts/executors.md index 1a37bc75..ba551b9f 100644 --- a/docs/concepts/executors.md +++ b/docs/concepts/executors.md @@ -7,7 +7,7 @@ time**: ```python agent = Agent( - model="oci:openai.gpt-5.5", + model="oci:openai.gpt-5", tools=[search_flights, search_hotels, search_restaurants], tool_execution="concurrent", # default — fan out # tool_execution="sequential", # opt-in — one at a time diff --git a/docs/concepts/interrupts.md b/docs/concepts/interrupts.md index f5f58a66..2b19073f 100644 --- a/docs/concepts/interrupts.md +++ b/docs/concepts/interrupts.md @@ -25,7 +25,7 @@ def submit_po(vendor_id: str, amount_usd: float) -> dict: return finance.submit(vendor_id, amount_usd) agent = Agent( - model="oci:openai.gpt-5.5", + model="oci:openai.gpt-5", tools=[search_vendors, request_human_approval, submit_po], system_prompt=( "You are a procurement officer. " @@ -78,7 +78,7 @@ from locus.hooks.builtin.steering import SteeringHook agent = Agent( ..., hooks=[SteeringHook( - judge_model="oci:openai.gpt-5.5-mini", + judge_model="oci:openai.gpt-5-mini", policy="Reject any tool call that doesn't match the user's stated request.", )], ) diff --git a/docs/concepts/models.md b/docs/concepts/models.md index eda80673..67ab473b 100644 --- a/docs/concepts/models.md +++ b/docs/concepts/models.md @@ -4,7 +4,7 @@ A model is a string. Pick the provider's prefix; locus picks the client. ```python -agent = Agent(model="oci:openai.gpt-5.5", ...) # OCI → V1 +agent = Agent(model="oci:openai.gpt-5", ...) # OCI → V1 agent = Agent(model="oci:cohere.command-r-plus", ...) # OCI → SDK agent = Agent(model="oci:meta.llama-3.3-70b-instruct", ...) # OCI → V1 agent = Agent(model="openai:gpt-4o", ...) # OpenAI direct @@ -96,7 +96,7 @@ from locus.models.pooled import PooledModel agent = Agent( model=PooledModel( - primary="oci:openai.gpt-5.5", + primary="oci:openai.gpt-5", fallbacks=["openai:gpt-4o", "anthropic:claude-sonnet"], ), ..., diff --git a/docs/concepts/multi-agent.md b/docs/concepts/multi-agent.md index 70ef51be..9d4e983b 100644 --- a/docs/concepts/multi-agent.md +++ b/docs/concepts/multi-agent.md @@ -76,7 +76,7 @@ compliance = Specialist(name="compliance", agent=compliance_agent, description="Vets vendors against SOC2 / ISO posture.") orchestrator = Orchestrator( - coordinator_model="oci:openai.gpt-5.5", + coordinator_model="oci:openai.gpt-5", specialists=[procurement, compliance], system_prompt="You are the procurement lead. Delegate to the right specialist.", ) diff --git a/docs/concepts/multi-agent/a2a.md b/docs/concepts/multi-agent/a2a.md index 7097678b..2896efbf 100644 --- a/docs/concepts/multi-agent/a2a.md +++ b/docs/concepts/multi-agent/a2a.md @@ -51,7 +51,7 @@ from locus import Agent from locus.a2a.protocol import A2AServer, AgentCard research_agent = Agent( - model="oci:openai.gpt-5.5", + model="oci:openai.gpt-5", tools=[search_corpus, summarise, cite], system_prompt="You read the vendor catalogue and quote prices.", checkpointer=OCIBucketBackend(bucket_name="research-threads"), diff --git a/docs/concepts/multi-agent/handoff.md b/docs/concepts/multi-agent/handoff.md index 51236687..6c0442a3 100644 --- a/docs/concepts/multi-agent/handoff.md +++ b/docs/concepts/multi-agent/handoff.md @@ -55,7 +55,7 @@ State, checkpointer, and `thread_id` survive. from locus.multiagent import Handoff triage = Agent( - model="oci:openai.gpt-5.5", + model="oci:openai.gpt-5", tools=[lookup_order, lookup_account], system_prompt=( "You triage incoming customer messages. " @@ -64,17 +64,17 @@ triage = Agent( ), ) billing = Agent( - model="oci:openai.gpt-5.5", + model="oci:openai.gpt-5", tools=[issue_refund, retry_charge], system_prompt="You handle billing escalations.", ) shipping = Agent( - model="oci:openai.gpt-5.5", + model="oci:openai.gpt-5", tools=[track_shipment, request_redelivery], system_prompt="You handle shipping issues.", ) returns = Agent( - model="oci:openai.gpt-5.5", + model="oci:openai.gpt-5", tools=[create_return_label], system_prompt="You handle returns.", ) diff --git a/docs/concepts/multi-agent/orchestrator.md b/docs/concepts/multi-agent/orchestrator.md index 98ecffb0..4895bd4f 100644 --- a/docs/concepts/multi-agent/orchestrator.md +++ b/docs/concepts/multi-agent/orchestrator.md @@ -44,7 +44,7 @@ from locus.multiagent import Orchestrator, Specialist procurement = Specialist( name="procurement", agent=Agent( - model="oci:openai.gpt-5.5", + model="oci:openai.gpt-5", tools=[search_vendors, quote_prices], system_prompt="You are the Procurement specialist.", ), @@ -54,7 +54,7 @@ procurement = Specialist( compliance = Specialist( name="compliance", agent=Agent( - model="oci:openai.gpt-5.5", + model="oci:openai.gpt-5", tools=[check_soc2, check_iso, search_legal_terms], system_prompt="You are the Compliance specialist.", ), @@ -64,7 +64,7 @@ compliance = Specialist( approver = Specialist( name="approver", agent=Agent( - model="oci:openai.gpt-5.5", + model="oci:openai.gpt-5", tools=[submit_po, email_cfo], # ← idempotent writes system_prompt="You are the Approval Officer.", ), @@ -72,7 +72,7 @@ approver = Specialist( ) orchestrator = Orchestrator( - coordinator_model="oci:openai.gpt-5.5", + coordinator_model="oci:openai.gpt-5", specialists=[procurement, compliance, approver], system_prompt=( "You are the procurement lead. Delegate research to procurement, " diff --git a/docs/concepts/multi-agent/swarm.md b/docs/concepts/multi-agent/swarm.md index d3173bb0..c7437b87 100644 --- a/docs/concepts/multi-agent/swarm.md +++ b/docs/concepts/multi-agent/swarm.md @@ -43,17 +43,17 @@ exits when the queue empties or `max_iterations` is hit. from locus.multiagent import Swarm researcher = Agent( - model="oci:openai.gpt-5.5", + model="oci:openai.gpt-5", tools=[search_corpus, summarise], system_prompt="You are a researcher. Read, summarise, post follow-ups.", ) fact_checker = Agent( - model="oci:openai.gpt-5.5", + model="oci:openai.gpt-5", tools=[verify_claim, search_corpus], system_prompt="You are a fact-checker. Verify claims, flag conflicts.", ) writer = Agent( - model="oci:openai.gpt-5.5", + model="oci:openai.gpt-5", tools=[draft, revise], system_prompt="You are a writer. Take vetted summaries, draft prose.", ) diff --git a/docs/concepts/prompts.md b/docs/concepts/prompts.md index 2b605467..49361d2b 100644 --- a/docs/concepts/prompts.md +++ b/docs/concepts/prompts.md @@ -18,7 +18,7 @@ You don't usually configure 2 and 3 directly. You configure 1. ```python agent = Agent( - model="oci:openai.gpt-5.5", + model="oci:openai.gpt-5", tools=[search_flights, book_flight], system_prompt=( "You are a travel concierge. " diff --git a/docs/concepts/providers/oci.md b/docs/concepts/providers/oci.md index e1761391..c7a105ef 100644 --- a/docs/concepts/providers/oci.md +++ b/docs/concepts/providers/oci.md @@ -67,7 +67,7 @@ you know about prompting OpenAI carries over: real SSE streaming, OpenAI-style function calling, structured output, vision input. ```python -agent = Agent(model="oci:openai.gpt-5.5") # OpenAI commercial +agent = Agent(model="oci:openai.gpt-5") # OpenAI commercial agent = Agent(model="oci:meta.llama-3.3-70b-instruct") # Meta Llama agent = Agent(model="oci:anthropic.claude-sonnet") # Claude — no Anthropic key needed ``` @@ -155,7 +155,7 @@ export OCI_REGION=us-chicago-1 from locus import Agent agent = Agent( - model="oci:openai.gpt-5.5", + model="oci:openai.gpt-5", system_prompt="You are a helpful assistant.", ) ``` diff --git a/docs/concepts/providers/ollama.md b/docs/concepts/providers/ollama.md index 48ca4069..e4974405 100644 --- a/docs/concepts/providers/ollama.md +++ b/docs/concepts/providers/ollama.md @@ -138,7 +138,7 @@ swap one line to point at OCI / OpenAI / Anthropic for production. agent = Agent(model="ollama:llama3.3", tools=[...], system_prompt="...") # Production — same agent, swap the model id: -agent = Agent(model="oci:openai.gpt-5.5", tools=[...], system_prompt="...") +agent = Agent(model="oci:openai.gpt-5", tools=[...], system_prompt="...") ``` Everything else — tools, hooks, checkpointers, termination, RAG — diff --git a/docs/concepts/providers/openai.md b/docs/concepts/providers/openai.md index d3c21c8e..58105012 100644 --- a/docs/concepts/providers/openai.md +++ b/docs/concepts/providers/openai.md @@ -33,12 +33,12 @@ That's the only setup. locus reads the env var automatically. ```python from locus import Agent -agent = Agent(model="openai:gpt-5.5", system_prompt="You are helpful.") +agent = Agent(model="openai:gpt-5", system_prompt="You are helpful.") ``` -The string `"openai:gpt-5.5"` does two things: tells locus to use the +The string `"openai:gpt-5"` does two things: tells locus to use the OpenAI provider (`openai:` prefix), and which model id to call -(`gpt-5.5`). Any model id OpenAI accepts, locus accepts. +(`gpt-5`). Any model id OpenAI accepts, locus accepts. ### 3. Run it @@ -55,7 +55,7 @@ without further configuration. ### Chat completions across the GPT family -Every chat-shaped OpenAI model: `gpt-4o`, `gpt-4.1`, `gpt-5`, `gpt-5.5`, +Every chat-shaped OpenAI model: `gpt-4o`, `gpt-4.1`, `gpt-5`, `gpt-5`, `gpt-image-1`. Vision input (image URLs / base64), audio input, and function calling work the same way you'd use them on the OpenAI SDK directly — locus just normalises the events the model emits. @@ -107,7 +107,7 @@ class Answer(BaseModel): confidence: float agent = Agent( - model="openai:gpt-5.5", + model="openai:gpt-5", output_schema=Answer, system_prompt="Reply as JSON matching the schema.", ) diff --git a/docs/concepts/reasoning.md b/docs/concepts/reasoning.md index 7b3dd530..93fd531d 100644 --- a/docs/concepts/reasoning.md +++ b/docs/concepts/reasoning.md @@ -6,7 +6,7 @@ single argument on `Agent(...)`. ```python agent = Agent( - model="oci:openai.gpt-5.5", + model="oci:openai.gpt-5", tools=[search, summarise, validate_claim], reflexion=True, # self-evaluate per turn grounding=True, # LLM-as-judge claim verification diff --git a/docs/concepts/retry.md b/docs/concepts/retry.md index c9fe2b34..d5f42688 100644 --- a/docs/concepts/retry.md +++ b/docs/concepts/retry.md @@ -28,7 +28,7 @@ from locus.hooks.builtin.retry import ModelRetryHook from locus import Agent agent = Agent( - model="oci:openai.gpt-5.5", + model="oci:openai.gpt-5", tools=[...], hooks=[ ModelRetryHook( diff --git a/docs/concepts/safety.md b/docs/concepts/safety.md index 94756a3e..be20eb53 100644 --- a/docs/concepts/safety.md +++ b/docs/concepts/safety.md @@ -52,7 +52,7 @@ agent = Agent( tools=[search, send_email, transfer], hooks=[ SteeringHook( - judge_model="oci:openai.gpt-5.5-mini", + judge_model="oci:openai.gpt-5-mini", policy="The user came in to ask about flights. Reject any tool call unrelated to flights.", ), ], diff --git a/docs/how-to/deploy.md b/docs/how-to/deploy.md index 72f91be9..26e44a3b 100644 --- a/docs/how-to/deploy.md +++ b/docs/how-to/deploy.md @@ -13,7 +13,7 @@ from locus.server import AgentServer from locus.memory.backends import OCIBucketBackend agent = Agent( - model="oci:openai.gpt-5.5", + model="oci:openai.gpt-5", tools=[...], system_prompt="...", checkpointer=OCIBucketBackend( diff --git a/docs/how-to/oci-dac.md b/docs/how-to/oci-dac.md index ab7d510f..b2072fd4 100644 --- a/docs/how-to/oci-dac.md +++ b/docs/how-to/oci-dac.md @@ -2,8 +2,8 @@ OCI GenAI exposes two serving modes: -- **On-demand** — pay-per-token against a shared model id (`openai.gpt-5.5`, - `cohere.command-r-plus-08-2024`, …). What `Agent(model="oci:openai.gpt-5.5")` +- **On-demand** — pay-per-token against a shared model id (`openai.gpt-5`, + `cohere.command-r-plus-08-2024`, …). What `Agent(model="oci:openai.gpt-5")` has been using by default. - **Dedicated AI Cluster (DAC)** — provisioned capacity exposed as a *generative AI endpoint* OCID diff --git a/docs/how-to/oci-models.md b/docs/how-to/oci-models.md index d0b1d1bc..d9cdd9f6 100644 --- a/docs/how-to/oci-models.md +++ b/docs/how-to/oci-models.md @@ -30,7 +30,7 @@ from locus import Agent from locus.models import OCIOpenAIModel model = OCIOpenAIModel( - model="openai.gpt-5.5", + model="openai.gpt-5", profile="DEFAULT", # any [profile] in ~/.oci/config ) @@ -51,7 +51,7 @@ import os from locus.models import OCIOpenAIModel model = OCIOpenAIModel( - model="openai.gpt-5.5", + model="openai.gpt-5", auth_type="instance_principal", # or "resource_principal" compartment_id=os.environ["OCI_COMPARTMENT_ID"], ) @@ -68,7 +68,7 @@ identity. - **Real SSE streaming** — `agent.run(...)` yields events as the model produces tokens, not after the full response arrives. - **Day-0 model coverage** — when OCI publishes a new model id (e.g. - `openai.gpt-5.5` on launch day), it works immediately. No `oci` + `openai.gpt-5` on launch day), it works immediately. No `oci` package release needed. - **Standard OpenAI request shape** — tool calls, system messages, multimodal content, and seed/penalty/top_p knobs work the same way as @@ -101,7 +101,7 @@ an OCI-side limitation of the legacy endpoint, not a locus issue. from locus.models import get_model # Uses OCIOpenAIModel -m1 = get_model("oci:openai.gpt-5.5", profile="DEFAULT") +m1 = get_model("oci:openai.gpt-5", profile="DEFAULT") # Uses OCIModel (Cohere R-series) m2 = get_model("oci:cohere.command-r-plus", profile_name="DEFAULT", auth_type="api_key") @@ -131,9 +131,9 @@ side-by-side. - **OpenAI Responses API on OCI.** Locus deliberately stays on chat/completions — the Responses API is built around server-side conversation state which conflicts with locus's own memory and tool - layers. Practical consequence: `openai.gpt-5.5-pro` (Responses-only on + layers. Practical consequence: `openai.gpt-5-pro` (Responses-only on OCI per the day-0 announcement) is not reachable from locus today. - Regular `openai.gpt-5.5` works fine on V1. + Regular `openai.gpt-5` works fine on V1. - **Cohere R-series on V1.** OCI's `/openai/v1` returns `400 Unsupported OpenAI operation` for these. Use `OCIModel`. - **GenAI API key auth (Bearer token).** A "create an API key in the @@ -148,7 +148,7 @@ side-by-side. # V1 path (any non-Cohere-R model) OCI_PROFILE=DEFAULT \ OCI_REGION=us-chicago-1 \ -OCI_MODEL_ID=openai.gpt-5.5 \ +OCI_MODEL_ID=openai.gpt-5 \ pytest tests/integration/test_oci_openai_compat_integration.py # SDK path (Cohere R-series) diff --git a/docs/how-to/persist-conversations.md b/docs/how-to/persist-conversations.md index 13ca3fda..fbf37295 100644 --- a/docs/how-to/persist-conversations.md +++ b/docs/how-to/persist-conversations.md @@ -36,7 +36,7 @@ checkpointer = OCIBucketBackend( ) agent = Agent( - model="oci:openai.gpt-5.5", # any OCI model — see how-to/oci-models.md + model="oci:openai.gpt-5", # any OCI model — see how-to/oci-models.md tools=[...], checkpointer=checkpointer, ) @@ -50,7 +50,7 @@ from locus.memory.backends import postgresql_checkpointer checkpointer = postgresql_checkpointer( dsn="postgresql://locus:locus@db.example.com:5432/locus", ) -agent = Agent(model="oci:openai.gpt-5.5", tools=[...], checkpointer=checkpointer) +agent = Agent(model="oci:openai.gpt-5", tools=[...], checkpointer=checkpointer) ``` If you build a storage backend directly (`RedisBackend(...)`, diff --git a/docs/how-to/quickstart.md b/docs/how-to/quickstart.md index 6bb8f704..7b605e03 100644 --- a/docs/how-to/quickstart.md +++ b/docs/how-to/quickstart.md @@ -56,7 +56,7 @@ def search_books(topic: str) -> list[str]: return [f"{topic} for Beginners", f"Advanced {topic}"] agent = Agent( - model="oci:openai.gpt-5.5", + model="oci:openai.gpt-5", tools=[add, search_books], system_prompt="You are a helpful assistant.", ) @@ -115,7 +115,7 @@ checkpointer and a `thread_id`: from locus.memory.backends.file import FileCheckpointer agent = Agent( - model="oci:openai.gpt-5.5", + model="oci:openai.gpt-5", tools=[...], system_prompt="...", checkpointer=FileCheckpointer(directory="./threads"), @@ -147,7 +147,7 @@ def submit_order(item_id: str, qty: int) -> dict: return shop.submit(item_id, qty) agent = Agent( - model="oci:openai.gpt-5.5", + model="oci:openai.gpt-5", tools=[search_catalog, submit_order], system_prompt="...", reflexion=True, diff --git a/docs/img/sequence-26ai.svg b/docs/img/sequence-26ai.svg index 0173c8e5..609a3ed6 100644 --- a/docs/img/sequence-26ai.svg +++ b/docs/img/sequence-26ai.svg @@ -54,7 +54,7 @@ OCI GenAI - gpt-5.5 · cohere-embed + gpt-5 · cohere-embed diff --git a/docs/index.md b/docs/index.md index e957f6aa..c9ae0c1c 100644 --- a/docs/index.md +++ b/docs/index.md @@ -53,7 +53,7 @@ def book_flight(flight_id: str, customer_id: str) -> dict: return billing.charge_and_book(flight_id, customer_id) agent = Agent( - model="oci:openai.gpt-5.5", + model="oci:openai.gpt-5", tools=[search_flights, book_flight], system_prompt="You are a travel concierge. Find a flight, then book it.", reflexion=True, # self-correct mid-run @@ -294,7 +294,7 @@ def book_flight(flight_id: str, customer_id: str) -> dict: return billing.charge_and_book(flight_id, customer_id) agent = Agent( - model="oci:openai.gpt-5.5", + model="oci:openai.gpt-5", tools=[book_flight], system_prompt="You are a travel concierge. Book the flight the user asks for.", ) diff --git a/examples/.env.example b/examples/.env.example index cb39aa72..e888c908 100644 --- a/examples/.env.example +++ b/examples/.env.example @@ -12,7 +12,7 @@ LOCUS_MODEL_PROVIDER=mock # Recommended for OpenAI / Meta / xAI / Mistral / Gemini families: # LOCUS_MODEL_PROVIDER=oci -# LOCUS_MODEL_ID=openai.gpt-5.5 +# LOCUS_MODEL_ID=openai.gpt-5 # LOCUS_OCI_PROFILE=DEFAULT # LOCUS_OCI_REGION=us-chicago-1 # default @@ -25,7 +25,7 @@ LOCUS_MODEL_PROVIDER=mock # Workload identity on OCI compute / OKE / Functions: # LOCUS_MODEL_PROVIDER=oci -# LOCUS_MODEL_ID=openai.gpt-5.5 +# LOCUS_MODEL_ID=openai.gpt-5 # LOCUS_OCI_AUTH_TYPE=instance_principal # or resource_principal # LOCUS_OCI_COMPARTMENT=ocid1.compartment.oc1... # required for principal auth diff --git a/examples/README.md b/examples/README.md index ddb5def6..e6b7d258 100644 --- a/examples/README.md +++ b/examples/README.md @@ -7,7 +7,7 @@ from locus import Agent from locus.models import OCIOpenAIModel model = OCIOpenAIModel( - model="openai.gpt-5.5", + model="openai.gpt-5", profile="MY_PROFILE", # any profile in ~/.oci/config ) diff --git a/examples/agent_gist.py b/examples/agent_gist.py index 8f498e75..f2653756 100644 --- a/examples/agent_gist.py +++ b/examples/agent_gist.py @@ -18,7 +18,7 @@ def main(): model = OCIOpenAIModel( - model="openai.gpt-5.5", + model="openai.gpt-5", profile=os.environ.get("OCI_PROFILE", "DEFAULT"), ) diff --git a/examples/coding_assistant.py b/examples/coding_assistant.py index 0a41fa37..de20bc7c 100755 --- a/examples/coding_assistant.py +++ b/examples/coding_assistant.py @@ -112,7 +112,7 @@ def get_model(): """ profile = os.getenv("OCI_PROFILE") if profile: - model_id = os.getenv("OCI_MODEL_ID", "openai.gpt-5.5") + model_id = os.getenv("OCI_MODEL_ID", "openai.gpt-5") if model_id.lower().startswith("cohere.command-r"): from locus.models import OCIModel diff --git a/examples/complex_agent.py b/examples/complex_agent.py index d821bbc7..33022636 100644 --- a/examples/complex_agent.py +++ b/examples/complex_agent.py @@ -454,7 +454,7 @@ async def run_complex_agent(): # Create model model = OCIOpenAIModel( - model="openai.gpt-5.5", + model="openai.gpt-5", profile=os.environ.get("OCI_PROFILE", "DEFAULT"), region=os.environ.get("OCI_REGION", "us-chicago-1"), max_tokens=2048, @@ -542,7 +542,7 @@ async def run_structured_output_demo(): print() model = OCIOpenAIModel( - model="openai.gpt-5.5", + model="openai.gpt-5", profile=os.environ.get("OCI_PROFILE", "DEFAULT"), region=os.environ.get("OCI_REGION", "us-chicago-1"), ) diff --git a/examples/config.py b/examples/config.py index b3de6a9f..d7764707 100644 --- a/examples/config.py +++ b/examples/config.py @@ -37,7 +37,7 @@ # Run with OCI GenAI (V1 transport, OpenAI-compatible endpoint): export LOCUS_MODEL_PROVIDER=oci - export LOCUS_MODEL_ID=openai.gpt-5.5 + export LOCUS_MODEL_ID=openai.gpt-5 export LOCUS_OCI_PROFILE=MY_PROFILE python examples/tutorial_01_basic_agent.py @@ -50,7 +50,7 @@ # Run with OCI on an OCI VM / OKE node (workload identity): export LOCUS_MODEL_PROVIDER=oci - export LOCUS_MODEL_ID=openai.gpt-5.5 + export LOCUS_MODEL_ID=openai.gpt-5 export LOCUS_OCI_AUTH_TYPE=instance_principal export LOCUS_OCI_COMPARTMENT=ocid1.compartment.oc1... @@ -189,7 +189,7 @@ def _pick_oci_transport(model_id: str) -> str: def _get_oci_model(**kwargs: Any) -> Any: """Get an OCI GenAI model — picks V1 vs SDK transport per model family.""" - model_id = os.environ.get("LOCUS_MODEL_ID", "openai.gpt-5.5") + model_id = os.environ.get("LOCUS_MODEL_ID", "openai.gpt-5") transport = _pick_oci_transport(model_id) if transport == "v1": return _get_oci_v1_model(model_id, **kwargs) diff --git a/examples/tutorial_29_model_providers.py b/examples/tutorial_29_model_providers.py index d5e1e7d2..687c8595 100644 --- a/examples/tutorial_29_model_providers.py +++ b/examples/tutorial_29_model_providers.py @@ -36,7 +36,7 @@ def example_providers(): print("\nUsage:") print(' model = get_model("openai:gpt-4o")') - print(' model = get_model("oci:openai.gpt-5.5", profile="DEFAULT") # → OCIOpenAIModel') + print(' model = get_model("oci:openai.gpt-5", profile="DEFAULT") # → OCIOpenAIModel') print( ' model = get_model("oci:cohere.command-r-plus", ' 'profile_name="DEFAULT", auth_type="api_key") # → OCIModel' @@ -61,11 +61,11 @@ def example_direct(): # OCI GenAI — V1 transport (recommended for OpenAI/Meta/xAI/Mistral/Gemini) print("OCI GenAI — V1 (/openai/v1):") print(" from locus.models import OCIOpenAIModel") - print(' model = OCIOpenAIModel(model="openai.gpt-5.5", profile="DEFAULT")') + print(' model = OCIOpenAIModel(model="openai.gpt-5", profile="DEFAULT")') print() print(" # Workload identity on OCI VM / OKE / Functions:") print(" model = OCIOpenAIModel(") - print(' model="openai.gpt-5.5",') + print(' model="openai.gpt-5",') print(' auth_type="instance_principal", # or "resource_principal"') print(' compartment_id="ocid1.compartment.oc1...",') print(" )") diff --git a/src/locus/memory/backends/oci_bucket.py b/src/locus/memory/backends/oci_bucket.py index c34fe8e0..b30b464a 100644 --- a/src/locus/memory/backends/oci_bucket.py +++ b/src/locus/memory/backends/oci_bucket.py @@ -94,6 +94,7 @@ def __init__( profile_name: str = "DEFAULT", auth_type: str = "api_key", region: str | None = None, + retry_strategy: Any = None, **kwargs: Any, ) -> None: self.config = OCIBucketConfig( @@ -107,6 +108,22 @@ def __init__( ) self._client: ObjectStorageClient | None = None self._initialized = False + # Override the default retry strategy by passing one explicitly. + # Default (None) resolves to ``oci.retry.DEFAULT_RETRY_STRATEGY`` at + # first call — exponential backoff with jitter on 429 / 5xx / + # transport errors. Pass ``oci.retry.NoneRetryStrategy()`` to + # disable retries (e.g. for tests). + self._retry_strategy: Any = retry_strategy + + def _get_retry_strategy(self) -> Any: + """Return the OCI retry strategy used for every transient-prone call.""" + if self._retry_strategy is not None: + return self._retry_strategy + try: + import oci + except ImportError: # pragma: no cover + return None + return oci.retry.DEFAULT_RETRY_STRATEGY # ------------------------------------------------------------------ # Capabilities @@ -173,10 +190,12 @@ async def _ensure_bucket(self) -> None: def check_bucket(): client = self._get_client() + retry = self._get_retry_strategy() try: client.get_bucket( namespace_name=self.config.namespace, bucket_name=self.config.bucket_name, + retry_strategy=retry, ) except Exception as e: if "BucketNotFound" in str(e) and self.config.compartment_id: @@ -190,6 +209,7 @@ def check_bucket(): storage_tier="Standard", public_access_type="NoPublicAccess", ), + retry_strategy=retry, ) else: raise @@ -223,6 +243,7 @@ async def _put_json(self, object_name: str, payload: dict[str, Any]) -> None: async def _put_bytes(self, object_name: str, body: bytes, content_type: str) -> None: client = self._get_client() + retry = self._get_retry_strategy() def _put(): client.put_object( @@ -231,6 +252,7 @@ def _put(): object_name=object_name, put_object_body=body, content_type=content_type, + retry_strategy=retry, ) await asyncio.to_thread(_put) @@ -243,6 +265,7 @@ async def _get_json(self, object_name: str) -> dict[str, Any] | None: async def _get_bytes(self, object_name: str) -> bytes | None: client = self._get_client() + retry = self._get_retry_strategy() def _get() -> bytes | None: try: @@ -250,6 +273,7 @@ def _get() -> bytes | None: namespace_name=self.config.namespace, bucket_name=self.config.bucket_name, object_name=object_name, + retry_strategy=retry, ) return response.data.content except Exception as e: @@ -261,6 +285,7 @@ def _get() -> bytes | None: async def _delete_object(self, object_name: str) -> bool: client = self._get_client() + retry = self._get_retry_strategy() def _delete() -> bool: try: @@ -268,6 +293,7 @@ def _delete() -> bool: namespace_name=self.config.namespace, bucket_name=self.config.bucket_name, object_name=object_name, + retry_strategy=retry, ) return True except Exception as e: @@ -285,6 +311,7 @@ async def _list_objects( ) -> tuple[list[Any], list[str]]: """Return (objects, prefixes) from a ListObjects call.""" client = self._get_client() + retry = self._get_retry_strategy() def _list(): kwargs: dict[str, Any] = { @@ -293,6 +320,7 @@ def _list(): "prefix": prefix, "limit": min(limit, 1000), "fields": "name,timeModified,size", + "retry_strategy": retry, } if delimiter is not None: kwargs["delimiter"] = delimiter diff --git a/src/locus/models/__init__.py b/src/locus/models/__init__.py index 4cfb4878..14d5c0f6 100644 --- a/src/locus/models/__init__.py +++ b/src/locus/models/__init__.py @@ -23,7 +23,7 @@ # OCI GenAI — V1 transport (recommended) from locus.models import OCIOpenAIModel - model = OCIOpenAIModel(model="openai.gpt-5.5", profile="DEFAULT") + model = OCIOpenAIModel(model="openai.gpt-5", profile="DEFAULT") # OCI GenAI — Cohere R-series from locus.models import OCIModel @@ -35,7 +35,7 @@ # String factory — auto-routes to the right transport from locus.models import get_model - model = get_model("oci:openai.gpt-5.5", profile="DEFAULT") + model = get_model("oci:openai.gpt-5", profile="DEFAULT") """ from locus.models.base import ( diff --git a/src/locus/models/native/openai.py b/src/locus/models/native/openai.py index 1eade27f..93578d86 100644 --- a/src/locus/models/native/openai.py +++ b/src/locus/models/native/openai.py @@ -164,7 +164,7 @@ def _uses_max_completion_tokens(model: str) -> bool: Detects the o1 / o3 / gpt-5* families. Tolerates a leading purely-alphabetic namespace segment so OCI-style model ids - (``openai.gpt-5.5``, ``meta.llama-3.3-…``) are treated the same as + (``openai.gpt-5``, ``meta.llama-3.3-…``) are treated the same as native OpenAI names (``gpt-5.1-chat-latest``). Native ids start with a token containing digits/hyphens (``gpt-5``, ``o1-…``) so the namespace strip is a no-op for them. diff --git a/src/locus/models/providers/oci/openai_compat.py b/src/locus/models/providers/oci/openai_compat.py index e2ad82a1..7ef5a989 100644 --- a/src/locus/models/providers/oci/openai_compat.py +++ b/src/locus/models/providers/oci/openai_compat.py @@ -189,7 +189,7 @@ def __init__( """Initialize the OCI OpenAI-compat model. Args: - model: OCI model identifier (e.g. ``openai.gpt-5.5``, + model: OCI model identifier (e.g. ``openai.gpt-5``, ``meta.llama-3.3-70b-instruct``). profile: OCI config profile name from ``config_file``. Mutually exclusive with ``auth_type``. diff --git a/tests/unit/test_model_id_consistency.py b/tests/unit/test_model_id_consistency.py new file mode 100644 index 00000000..50a99914 --- /dev/null +++ b/tests/unit/test_model_id_consistency.py @@ -0,0 +1,70 @@ +"""Regression test: prevent the deprecated `gpt-5.5` model id from re-entering +the docs / examples / source. + +`openai.gpt-5.5` is not a real model in OCI's catalogue (the actual id is +`openai.gpt-5`). This test scans the codebase and fails if the deprecated +form reappears, so a future copy-paste can't quietly reintroduce a bad +quickstart. +""" + +from __future__ import annotations + +import re +from pathlib import Path + + +REPO_ROOT = Path(__file__).resolve().parents[2] + +# Files / directories scanned for the deprecated identifier. +SCAN_ROOTS = [ + REPO_ROOT / "src" / "locus", + REPO_ROOT / "examples", + REPO_ROOT / "docs", + REPO_ROOT / "README.md", +] + +# Files we cannot easily change (binary, third-party). Empty for now. +ALLOWLIST: set[Path] = set() + +# Regex that matches the deprecated identifier in any context. +DEPRECATED_PATTERN = re.compile(r"\bgpt-5\.5\b") + + +def _iter_files(root: Path) -> list[Path]: + if root.is_file(): + return [root] + if not root.exists(): + return [] + return [ + p + for p in root.rglob("*") + if p.is_file() + and p.suffix in {".py", ".md", ".rst", ".svg", ".yaml", ".yml", ".toml"} + and "__pycache__" not in p.parts + and ".venv" not in p.parts + and "site" not in p.parts + ] + + +def test_no_deprecated_gpt_5_5_anywhere(): + """Fail fast if `gpt-5.5` reappears in any documented surface.""" + offenders: list[tuple[Path, int, str]] = [] + + for root in SCAN_ROOTS: + for path in _iter_files(root): + if path in ALLOWLIST: + continue + try: + lines = path.read_text(encoding="utf-8").splitlines() + except (UnicodeDecodeError, OSError): + continue + for lineno, line in enumerate(lines, start=1): + if DEPRECATED_PATTERN.search(line): + offenders.append((path.relative_to(REPO_ROOT), lineno, line.strip())) + + assert not offenders, ( + "Deprecated model id `gpt-5.5` found in:\n" + + "\n".join(f" {p}:{lineno} {line}" for p, lineno, line in offenders[:20]) + + (f"\n ... and {len(offenders) - 20} more" if len(offenders) > 20 else "") + + "\n\nUse `openai.gpt-5` (or a specific successor like `openai.gpt-5.1`) instead." + ) diff --git a/tests/unit/test_oci_bucket_retry.py b/tests/unit/test_oci_bucket_retry.py new file mode 100644 index 00000000..9044f8e2 --- /dev/null +++ b/tests/unit/test_oci_bucket_retry.py @@ -0,0 +1,102 @@ +"""Unit tests for OCIBucketBackend retry strategy wiring. + +These verify that the retry strategy passed at construction time +(or the default `oci.retry.DEFAULT_RETRY_STRATEGY`) is actually +threaded through to every OCI Object Storage call. + +We do not need a real bucket — the OCI client is mocked so we can +inspect the kwargs passed to each method. +""" + +from __future__ import annotations + +from unittest.mock import MagicMock + +import pytest + + +oci = pytest.importorskip("oci") # skip the whole module if oci SDK isn't installed + +from locus.memory.backends.oci_bucket import OCIBucketBackend + + +@pytest.fixture +def backend_with_explicit_retry() -> tuple[OCIBucketBackend, MagicMock, object]: + """Build a backend whose `_client` is mocked so we can spy on kwargs. + + Returns the backend, the mock client, and the retry strategy instance + we passed in (so the test can assert identity). + """ + sentinel_retry = oci.retry.NoneRetryStrategy() + backend = OCIBucketBackend( + bucket_name="test-bucket", + namespace="test-ns", + retry_strategy=sentinel_retry, + ) + mock_client = MagicMock() + backend._client = mock_client # bypass real client construction + return backend, mock_client, sentinel_retry + + +def test_default_retry_strategy_is_oci_default(): + """Without an explicit retry, the backend uses oci.retry.DEFAULT_RETRY_STRATEGY.""" + backend = OCIBucketBackend(bucket_name="b", namespace="n") + resolved = backend._get_retry_strategy() + assert resolved is oci.retry.DEFAULT_RETRY_STRATEGY + + +def test_explicit_retry_strategy_preserved(backend_with_explicit_retry): + backend, _, sentinel = backend_with_explicit_retry + assert backend._get_retry_strategy() is sentinel + + +def test_put_object_passes_retry_strategy(backend_with_explicit_retry): + """Saving a checkpoint must thread retry_strategy through to put_object.""" + import asyncio + + backend, mock_client, sentinel = backend_with_explicit_retry + + asyncio.run(backend._put_bytes("some/key.json", b"{}", "application/json")) + + mock_client.put_object.assert_called_once() + kwargs = mock_client.put_object.call_args.kwargs + assert kwargs["retry_strategy"] is sentinel + + +def test_get_object_passes_retry_strategy(backend_with_explicit_retry): + import asyncio + + backend, mock_client, sentinel = backend_with_explicit_retry + mock_client.get_object.return_value.data.content = b"{}" + + asyncio.run(backend._get_bytes("some/key.json")) + + mock_client.get_object.assert_called_once() + kwargs = mock_client.get_object.call_args.kwargs + assert kwargs["retry_strategy"] is sentinel + + +def test_delete_object_passes_retry_strategy(backend_with_explicit_retry): + import asyncio + + backend, mock_client, sentinel = backend_with_explicit_retry + + asyncio.run(backend._delete_object("some/key.json")) + + mock_client.delete_object.assert_called_once() + kwargs = mock_client.delete_object.call_args.kwargs + assert kwargs["retry_strategy"] is sentinel + + +def test_list_objects_passes_retry_strategy(backend_with_explicit_retry): + import asyncio + + backend, mock_client, sentinel = backend_with_explicit_retry + mock_client.list_objects.return_value.data.objects = [] + mock_client.list_objects.return_value.data.prefixes = [] + + asyncio.run(backend._list_objects("some/prefix/")) + + mock_client.list_objects.assert_called_once() + kwargs = mock_client.list_objects.call_args.kwargs + assert kwargs["retry_strategy"] is sentinel