diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index a4a7deae..74405042 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -22,6 +22,11 @@ "name": "gemini-to-nova-migration", "source": "./skills/gemini-to-nova-migration", "description": "Migrate Google Gemini 2.0/2.5/3.x Python code and prompts to Amazon Nova 2 Lite (boto3 Bedrock Runtime)." + }, + { + "name": "openai-to-nova-migration", + "source": "./skills/openai-to-nova-migration", + "description": "Migrate OpenAI GPT-4o/4.1/5.x Python code and prompts to Amazon Nova 2 Lite (boto3 Bedrock Runtime)." } ] } diff --git a/skills/README.md b/skills/README.md index 5ddc90c5..216eec31 100644 --- a/skills/README.md +++ b/skills/README.md @@ -9,4 +9,5 @@ Reusable [Agent Skills](https://agentskills.io/specification) for building with | [text-agent-to-strands-voice-agent](./text-agent-to-strands-voice-agent/) | Migrate a text-based agent to a real-time voice agent using Strands BidiAgent with Amazon Nova Sonic | | [nova-prompter](./nova-prompter/) | Write and optimize prompts for Amazon Nova 1 and Nova 2 Lite — Claude Code plugins (`/nova1-prompt`, `/nova2-prompt`) and matching Kiro powers, with multimodal coverage for Nova 2 | | [titan-nova-mme-migration](./titan-nova-mme-migration/) | Migrate Amazon Bedrock embedding code from Titan Text V2 / Titan Multimodal G1 to Amazon Nova Multimodal Embeddings — handles request schema, dimension mapping, `embeddingPurpose`, and client-side text+image fusion | -| [gemini-to-nova-migration](./gemini-to-nova-migration/) | Migrate Google Gemini 2.0/2.5/3.x Python code and prompts to Amazon Nova 2 Lite — converts SDK calls (`google-genai` / `google-generativeai` → `boto3` Bedrock `converse`), rewrites prompts to `##Section##` format, handles multimodal, tool calling, structured output, streaming, and reasoning mode | \ No newline at end of file +| [gemini-to-nova-migration](./gemini-to-nova-migration/) | Migrate Google Gemini 2.0/2.5/3.x Python code and prompts to Amazon Nova 2 Lite — converts SDK calls (`google-genai` / `google-generativeai` → `boto3` Bedrock `converse`), rewrites prompts to `##Section##` format, handles multimodal, tool calling, structured output, streaming, and reasoning mode | +| [openai-to-nova-migration](./openai-to-nova-migration/) | Migrate OpenAI GPT-4o/4.1/5.x Python code and prompts to Amazon Nova 2 Lite — converts SDK calls (`openai` → `boto3` Bedrock `converse`) across Chat Completions, Responses, and Assistants APIs, extracts system prompts, nests inference params, rewrites prompts to `##Section##` format, and handles multimodal, tool calling, structured output, streaming, and extended thinking | \ No newline at end of file diff --git a/skills/openai-to-nova-migration/.claude-plugin/plugin.json b/skills/openai-to-nova-migration/.claude-plugin/plugin.json new file mode 100644 index 00000000..2eae9dc6 --- /dev/null +++ b/skills/openai-to-nova-migration/.claude-plugin/plugin.json @@ -0,0 +1,22 @@ +{ + "author": { + "name": "Amazon Web Services" + }, + "description": "Migrate OpenAI (GPT-4o, GPT-4.1, GPT-5.x) Python code and prompts to Amazon Nova 2 Lite (boto3 Bedrock Runtime). Handles SDK conversion, request/response restructuring, prompt reformatting, tool calling, structured output, multimodal, and extended thinking migration.", + "homepage": "https://github.com/aws-samples/amazon-nova-samples", + "keywords": [ + "bedrock", + "nova", + "openai", + "gpt", + "migration", + "python", + "boto3", + "prompt-engineering", + "aws" + ], + "license": "Apache-2.0", + "name": "openai-to-nova-migration", + "repository": "https://github.com/aws-samples/amazon-nova-samples", + "version": "1.0.0" +} diff --git a/skills/openai-to-nova-migration/README.md b/skills/openai-to-nova-migration/README.md new file mode 100644 index 00000000..f3b0dbc9 --- /dev/null +++ b/skills/openai-to-nova-migration/README.md @@ -0,0 +1,127 @@ +# OpenAI → Nova 2 Lite Migration Skill + +An [Agent Skill](https://agentskills.io/specification) that migrates OpenAI Python code and prompts to [Amazon Nova 2 Lite](https://docs.aws.amazon.com/nova/latest/nova2-userguide/getting-started-nova-2.html) (`us.amazon.nova-2-lite-v1:0`) on Amazon Bedrock. + +The skill converts SDK calls (`openai` → `boto3` Bedrock Runtime `converse`) and rewrites prompts to follow Nova 2 Lite formatting and constraints. This is not a one-line model swap — authentication, message structure, parameter nesting, and error handling all change. Every migration delivers two things: **working migrated code** and an **explanation of every change**, including any features that can't be ported 1:1. + +The skill supports **GPT-4o, GPT-4.1, GPT-5.x**, and legacy GPT-4 / GPT-3.5 as source models, across three API styles: + +| Source API style | Detected from | +|---|---| +| Chat Completions API | `client.chat.completions.create(...)` | +| Responses API | `client.responses.create(...)` — `instructions=` + `input=` | +| Assistants API | `client.beta.assistants.create(...)` / `client.beta.threads.*` | + +**Target:** `us.amazon.nova-2-lite-v1:0` + +## Skill structure + +``` +openai-to-nova-migration/ +├── SKILL.md # Skill instructions and migration workflow +├── README.md # This file +└── references/ + ├── feature-mapping.md # Complete OpenAI → Nova mapping tables + ├── code-examples.md # Before/after code patterns + └── chat-completions-patterns.md # Chat Completions, Responses API + Assistants API specifics +``` + +## What the skill handles + +| Area | Detail | +|---|---| +| SDK | `openai` → `boto3` bedrock-runtime `converse` / `converse_stream` | +| Auth | `OPENAI_API_KEY` (bearer token) → AWS IAM credentials | +| Request structure | `role: "system"` message → `system=[{"text": ...}]`; flat string content → typed content blocks | +| Inference params | Top-level `max_tokens`/`temperature`/`top_p` → nested `inferenceConfig` with camelCase | +| Prompt format | Free-form → `##Section Name##` delimiters with canonical Nova section names | +| Multimodal | Enforces system-prompt-is-persona-only rule, media-before-text ordering, URL/base64 → raw bytes | +| Function calling | `tools` (function) → `toolSpec`, `tool_choice` → `toolChoice` | +| Structured output | `response_format` → inline schema (simple) or tool-forcing (complex) | +| Reasoning | `reasoning_effort` (GPT-5.x) → `additionalModelRequestFields.reasoningConfig`, mapped to a Nova effort level | +| Built-in tools | Assistants code interpreter / file search → Nova `nova_code_interpreter` / Bedrock Knowledge Bases | +| Error handling | `RateLimitError` / `APIError` → `ThrottlingException` / `ValidationException` | + +## API differences at a glance + +| | OpenAI | Nova 2 Lite | +|---|---|---| +| **SDK** | `openai` | `boto3` bedrock-runtime | +| **Call** | `client.chat.completions.create()` / `client.responses.create()` | `client.converse()` | +| **Auth** | `OPENAI_API_KEY` | AWS credentials (IAM role, profile, env vars) | +| **System prompt** | `{"role": "system", ...}` message | `system=[{"text": ...}]` (persona only for multimodal) | +| **Message content** | Flat string | Typed content blocks (`[{"text": ...}]`) | +| **Inference params** | Top-level `max_tokens`, `temperature`, `top_p` | Nested `inferenceConfig` (`maxTokens`, `temperature`, `topP`) | +| **Tools** | `tools=[{"type": "function", ...}]` | `toolConfig={"tools": [{"toolSpec": ...}]}` | +| **Tool choice** | `tool_choice` (`auto`/`required`/`none`) | `toolChoice` (`auto`/`any`/`tool`) | +| **Structured output** | `response_format` (JSON schema) | Inline prompt schema or tool-forcing | +| **Reasoning** | `reasoning_effort` (GPT-5.x) | `additionalModelRequestFields={"reasoningConfig": {...}}` | +| **Streaming** | `stream=True` | `converse_stream()` | +| **Stateful turns** | Assistants threads, `previous_response_id` | Pass full message history each call | +| **Images** | URL or base64 data URI | Raw binary bytes; media MUST precede text | +| **Errors** | `RateLimitError` / `APIError` | `ThrottlingException` / `ValidationException` | + +## Reasoning effort mapping + +When the source model supports reasoning effort (GPT-5.x, `o1`, `o3`) **and** it's enabled, the skill asks the user which Nova effort level to use: + +| Nova effort | Config | +|---|---| +| `low` | `additionalModelRequestFields={"reasoningConfig": {"type": "enabled", "maxReasoningEffort": "low"}}` | +| `medium` | `additionalModelRequestFields={"reasoningConfig": {"type": "enabled", "maxReasoningEffort": "medium"}}` | +| `high` | `additionalModelRequestFields={"reasoningConfig": {"type": "enabled", "maxReasoningEffort": "high"}}` — must omit `inferenceConfig` and set client `read_timeout=3600` | + +OpenAI and Nova effort scales are not numerically equivalent — start at Nova `low` and increase only if evaluation shows quality gaps. If the source model has no native reasoning (e.g. `gpt-4o`), or `reasoning_effort` isn't enabled, omit `additionalModelRequestFields` entirely (reasoning is disabled by default). + +## Installation + +### Claude Code + +This skill ships as the `openai-to-nova-migration` plugin via the `aws-samples-amazon-nova-samples` marketplace. + +Add the marketplace, then install the plugin: + +``` +/plugin marketplace add https://github.com/aws-samples/amazon-nova-samples +/plugin install openai-to-nova-migration@aws-samples-amazon-nova-samples +``` + +After install, invoke the skill on your OpenAI code with `/openai-to-nova`. + +## Prerequisites + +- An AWS account with Amazon Bedrock access +- `us.amazon.nova-2-lite-v1:0` enabled in your region +- Python 3.8+ with `boto3` (only needed to run the migrated code) +- `bedrock:InvokeTool` IAM permission if migrating to Nova's built-in web grounding or code interpreter + +AWS credentials and Bedrock access are only required when you actually run the migrated code — the migration itself runs inside the host tool. + +## Example prompts + +``` +Migrate this OpenAI code to Amazon Nova 2 Lite: + +``` + +``` +I have a GPT-5.2 function-calling app with reasoning_effort="high". +Convert it to Nova 2 Lite. +``` + +``` +Rewrite this GPT-4o multimodal prompt (image input) for Nova 2 Lite. +``` + +``` +Migrate this OpenAI Assistants API app (code interpreter) to Nova 2 Lite. +``` + +## Related resources + +- [Amazon Nova 2 User Guide](https://docs.aws.amazon.com/nova/latest/nova2-userguide/getting-started-nova-2.html) +- [Amazon Nova 2 Prompt Engineering Guide](https://docs.aws.amazon.com/nova/latest/nova2-userguide/prompt-engineering-guide.html) +- [Migrate from Amazon Nova 1 to Amazon Nova 2 on Amazon Bedrock](https://aws.amazon.com/blogs/machine-learning/migrate-from-amazon-nova-1-to-amazon-nova-2-on-amazon-bedrock/) +- [Amazon Bedrock `converse` API](https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_Converse.html) +- [Agent Skills open standard — Anthropic](https://www.anthropic.com/engineering/equipping-agents-for-the-real-world-with-agent-skills) +- [Amazon Nova Samples](https://github.com/aws-samples/amazon-nova-samples) diff --git a/skills/openai-to-nova-migration/SKILL.md b/skills/openai-to-nova-migration/SKILL.md new file mode 100644 index 00000000..2e988105 --- /dev/null +++ b/skills/openai-to-nova-migration/SKILL.md @@ -0,0 +1,334 @@ +--- +name: openai-to-nova +description: Migrate OpenAI GPT-4o/4.1/5.x Python code and prompts to Amazon Nova 2 Lite. Use when converting OpenAI Python API code (openai SDK — Chat Completions, Responses, or Assistants API) to Nova 2 Lite (boto3 Bedrock Runtime), rewriting OpenAI prompts for Nova format, or migrating function calling, structured output, multimodal, or reasoning features from OpenAI to Nova. +tags: [skill, migration, openai, gpt, nova, bedrock] +--- + +# OpenAI to Nova 2 Lite Migration + +## Overview + +Migrate Python application code and prompts from OpenAI (GPT-4o, GPT-4.1, GPT-5.x) to Amazon Nova 2 Lite. Transforms SDK calls (`openai` → `boto3` Bedrock Runtime `converse` API) and rewrites prompts to follow Nova 2 Lite formatting and constraints. + +This is not a one-line model swap. Authentication, message structure, parameter nesting, and error handling all change. Every migration delivers two things: **working migrated code** and an **explanation of every change**, including any features that cannot be ported 1:1. + +## Usage + +Use this skill when: +- Converting OpenAI Python code to call Nova 2 Lite via Bedrock +- Rewriting prompts originally written for OpenAI to Nova 2 Lite format +- Migrating function calling / tool use from OpenAI to Nova +- Adapting multimodal OpenAI code (images) to Nova 2 Lite (images, video) +- Converting OpenAI structured output (`response_format`) to Nova's approach +- Switching from OpenAI reasoning effort (GPT-5.x) to Nova extended thinking + +## Core Concepts + +### Key Differences + +1. **SDK (Python)**: `openai` → `boto3` Bedrock Runtime `converse` API +2. **Authentication**: API key (`OPENAI_API_KEY`) → AWS IAM credentials (role, profile, env vars) +3. **System prompt**: `role: "system"` message → dedicated `system=[{"text": ...}]` parameter +4. **Message content**: Flat string → typed content blocks (`[{"text": "..."}]`) +5. **Inference params**: Top-level `max_tokens`, `temperature`, `top_p` → nested in `inferenceConfig` with camelCase (`maxTokens`, `topP`) +6. **Prompt format**: Free-form / markdown → `##Section Name##` delimiters +7. **Structured output**: Native `response_format` (JSON schema) → tool-forcing or inline schema +8. **Reasoning**: `reasoning_effort` (GPT-5.x) → `additionalModelRequestFields.reasoningConfig` +9. **Multimodal images**: URL or base64 → raw binary bytes; media MUST precede text + +### What Cannot Be Migrated Directly + +- Assistants API threads / persistent state → Manage conversation history externally, pass full history each call +- Built-in code interpreter (Assistants) → Use Nova's `nova_code_interpreter` built-in tool +- Built-in file search / retrieval (Assistants) → Use Amazon Bedrock Knowledge Bases +- Image generation (DALL-E / `gpt-image`) → Use Amazon Nova Canvas (separate model) +- Text-to-speech / Whisper → Use Amazon Polly / Amazon Transcribe +- Realtime API (voice) → Use Amazon Nova Sonic +- Fine-tuned OpenAI models → Re-run customization on Nova via Amazon Bedrock + +## Migration Workflow + +You **MUST** follow these steps in order. After each step, confirm findings with the user before proceeding to the next. + +### Step 1: Analyze the OpenAI Code + +First, identify the OpenAI SDK and API style: +- [ ] Chat Completions API — `client.chat.completions.create(...)` (most common) +- [ ] Responses API — `client.responses.create(...)` (newer; `input=` + `instructions=`) +- [ ] Assistants API — `client.beta.assistants.create(...)` / `client.beta.threads.*` (stateful, built-in tools) + +Then identify which features are used: +- [ ] Basic text generation +- [ ] System prompt / instructions +- [ ] Multi-turn conversation (message history or threads) +- [ ] Function calling / tools +- [ ] Structured output (`response_format`, JSON mode) +- [ ] Multimodal (images) +- [ ] Streaming +- [ ] Reasoning effort (GPT-5.x `reasoning_effort`) +- [ ] Built-in tools (code interpreter, file search, web search) + +You **MUST** flag any features in the "cannot migrate" list above and inform the user of alternatives before proceeding. + +If the source model is a high-capability reasoning model (`gpt-5.2`, `o3`, `o1`) run at high `reasoning_effort`, you **MUST** ask the user whether they have evaluated Nova 2 Lite for their use case. Start every migration with Nova 2 Lite; Nova 2 Pro exists for workloads where Lite with high-effort reasoning still falls short, but the user should validate that need with benchmark data before targeting Pro. + +### Step 2: Classify the Use Case + +**Reasoning support by OpenAI model:** + +| OpenAI Model | Native Reasoning Effort | +|--------------|------------------------| +| `gpt-4o` / `gpt-4o-mini` | No — if CoT is used, it's via prompt text ("think step by step") | +| `gpt-4.1` / `gpt-4.1-mini` | No | +| `gpt-4` / `gpt-3.5-turbo` | No | +| `gpt-5` / `gpt-5.2` | Yes — `reasoning_effort` parameter | +| `o1` / `o3` | Yes — reasoning models | + +**Migration rules for reasoning:** +- If the source model does NOT support reasoning effort → omit `additionalModelRequestFields` entirely. If the prompt uses CoT ("think step by step"), keep that prompt text as-is. +- If the source model supports reasoning effort but it's NOT enabled → omit `additionalModelRequestFields` entirely. +- If reasoning IS enabled → ask the user which Nova reasoning effort level to use: + +| Nova Effort | Config | +|-------------|--------| +| `low` | `additionalModelRequestFields={"reasoningConfig": {"type": "enabled", "maxReasoningEffort": "low"}}` | +| `medium` | `additionalModelRequestFields={"reasoningConfig": {"type": "enabled", "maxReasoningEffort": "medium"}}` | +| `high` | `additionalModelRequestFields={"reasoningConfig": {"type": "enabled", "maxReasoningEffort": "high"}}` — **MUST omit `inferenceConfig`** (temperature, topP, maxTokens, topK not allowed) and set client `read_timeout=3600` | + +> Do NOT map OpenAI's effort level directly to Nova's by name. OpenAI `minimal`/`low`/`medium`/`high` and Nova `low`/`medium`/`high` are not numerically equivalent. Default to Nova `low` first, then increase only if evaluation shows quality gaps. Present the three options and let the user decide. + +Determine the Nova 2 Lite use case type for correct inference config: + +**Text/Agentic:** +| Use Case | Temperature | Top P | Reasoning | +|----------|------------|-------|-----------| +| `general` | 0.7 | default | DISABLED | +| `tool_calling` | 0.7 | 0.9 | DISABLED | +| `tool_calling_reasoning` | 1 | 0.9 | ENABLED | +| `complex_reasoning` | 0.7 | default | ENABLED | + +**Multimodal:** +| Use Case | Temperature | Reasoning | +|----------|------------|-----------| +| OCR | 0.7 | DISABLED | +| Key information extraction | 0 | OPTIONAL | +| Object/UI detection | 0 | DISABLED | +| Video summary/caption | 0 | OPTIONAL | +| Video timestamps/classification | 0 | DISABLED | + +> **Precedence rule (high effort overrides the temperature/topP columns).** The temperature and Top P values above apply only when reasoning is disabled or set to `low`/`medium`. When `maxReasoningEffort` is `high`, you **MUST NOT** set `temperature`, `topP`, `topK`, or `maxTokens` — omit `inferenceConfig` entirely, or the request returns a validation error. So a `tool_calling_reasoning` workload at high effort drops the `temperature=1, topP=0.9` values; at low/medium effort it keeps them. (Per the Amazon Nova 2 User Guide: *"Temperature, topP and topK cannot be used with maxReasoningEffort set to high."*) Note also that high effort can produce more than 65K output tokens (observed up to 128K) — size your downstream handling accordingly. + +### Step 3: Migrate the Code + +You **MUST** read `references/feature-mapping.md` for the complete field mapping table. +You **SHOULD** read `references/code-examples.md` for before/after patterns. + +If the source code uses the Responses API (`client.responses.create`) or the Assistants API (`client.beta.assistants.*`), you **MUST** also read `references/chat-completions-patterns.md` for the specific parameter mappings, Responses API migration, and Assistants-to-built-in-tools handling. + +**Ask the user which region-prefixed model ID to use:** + +| Model ID | Region | +|----------|--------| +| `us.amazon.nova-2-lite-v1:0` | US (us-east-1, us-west-2) | +| `eu.amazon.nova-2-lite-v1:0` | EU (eu-west-1, etc.) | +| `jp.amazon.nova-2-lite-v1:0` | Japan (ap-northeast-1) | +| `global.amazon.nova-2-lite-v1:0` | Cross-region inference | + +Default to `us.amazon.nova-2-lite-v1:0` if the user doesn't specify. + +**SDK transformation (Python):** +```python +# OpenAI +from openai import OpenAI +client = OpenAI(api_key="...") +response = client.chat.completions.create(model="gpt-4o-mini", messages=[...]) + +# Nova 2 Lite +import boto3 +client = boto3.client("bedrock-runtime") +response = client.converse(modelId="us.amazon.nova-2-lite-v1:0", messages=[...]) +``` + +**Extract the system prompt** from the `messages` array into a dedicated `system` parameter: +```python +# OpenAI — system is a message +messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Hello"}, +] + +# Nova — system is a separate parameter; content is a typed block +system = [{"text": "You are a helpful assistant."}] +messages = [{"role": "user", "content": [{"text": "Hello"}]}] +``` + +**Only include `additionalModelRequestFields` when reasoning is enabled:** +```python +# Reasoning DISABLED (default) — omit additionalModelRequestFields entirely +response = client.converse(modelId="us.amazon.nova-2-lite-v1:0", ...) + +# Reasoning ENABLED — include reasoningConfig +response = client.converse( + modelId="us.amazon.nova-2-lite-v1:0", + ..., + additionalModelRequestFields={ + "reasoningConfig": {"type": "enabled", "maxReasoningEffort": "medium"} + }, +) +``` + +### Step 4: Migrate the Prompt + +OpenAI models tolerate loosely phrased instructions. Nova 2 Lite delivers higher quality when the prompt clearly separates task, input, requirements, constraints, and output format. You **MUST** apply these transformations: + +1. **Add explicit structure** using `##Section Name##` delimiters (the section name is wrapped on both sides by `##`, not a markdown header). Convert any markdown headers or inline instructions into canonical Nova sections: + - `##Task Summary:##` — defines the task + - `##Context Information:##` — background + - `##Model Instructions:##` — behavioral rules + - `##Response style and format requirements:##` — output format + - `##Examples##` — few-shot examples + - `##Reference##` — RAG grounding content + +2. **For multimodal prompts:** + - Move ALL task instructions from the system prompt to the user prompt + - Keep the system prompt as persona + response style only + - Ensure media content precedes text in the content array + +3. **Add suppression guardrail** where appropriate: + ``` + DO NOT mention anything inside ##Model Instructions## or ##Examples## in the response. + ``` + +4. **For long context (>10K tokens):** + ``` + BEGIN INPUT DOCUMENTS + DOCUMENT 1 START + {content} + DOCUMENT 1 END + END INPUT DOCUMENTS + + BEGIN QUESTION + {query} + END QUESTION + + BEGIN INSTRUCTIONS + {instructions} + END INSTRUCTIONS + ``` + +> Example transformation — vague OpenAI prompt `"Summarize this document and highlight key risks"` becomes a structured Nova prompt with explicit `## Task Summary:` and `## Response style and format requirements:` sections specifying format, columns, tone, and scope. See `references/code-examples.md`. + +### Step 5: Migrate Structured Output + +If the OpenAI code uses structured output (`response_format={"type": "json_object"}` or `response_format=` / `json_schema`), apply this step. Otherwise skip to Step 6. + +- **Simple JSON (≤10 keys):** Use inline schema in the prompt + `temperature=0` +- **Complex JSON (>10 keys):** Use tool-forcing with the schema in `toolSpec.inputSchema` and `toolChoice={"tool": {"name": ...}}` + +See `references/code-examples.md` Example 3 for both patterns. + +### Step 6: Migrate Tool Calling + +If the OpenAI code uses function calling / tools, apply this step. Otherwise skip to Step 7. + +Key differences: +- OpenAI `tools=[{"type": "function", "function": {...}}]` → Nova `toolConfig={"tools": [{"toolSpec": {...}}]}` +- OpenAI `function.parameters` (JSON Schema) → Nova `toolSpec.inputSchema.json` (JSON Schema — structurally compatible, re-wrap) +- OpenAI `tool_choice` (`auto`/`required`/`none`/named) → Nova `toolChoice` (`auto`/`any`/`tool`) +- OpenAI built-in tools (code interpreter, web search) → Nova built-in tools (`nova_code_interpreter`, `nova_grounding`) — see `references/chat-completions-patterns.md` +- Keep tool descriptions to 20-50 words; parameter descriptions to ~10 words +- Reference tools by name in the system prompt: `Use the 'tool_name' tool for X` + +> **Tool use + extended thinking is supported.** Nova 2 Lite allows `toolConfig` together with reasoning enabled — the model reasons about which tools to use and how to interpret their results (per the Amazon Nova 2 User Guide: *"Extended thinking works seamlessly with tool calling"*). When continuing a tool round-trip with reasoning enabled, append the assistant's full content blocks (including any `reasoningContent`) back into `messages`, then add the `toolResult` in a user-role message. + +### Step 7: Present the Result + +Output format: + +``` +## Migrated Code + +**Use case:** {type} +**Inference config:** temperature={T}, reasoning={enabled/disabled} +**Breaking changes:** {list any features that couldn't be migrated 1:1} + +### Code +{complete migrated code} + +### Prompt +**SYSTEM PROMPT:** +{system prompt — persona only for multimodal} + +**USER PROMPT:** +{user prompt with ##Section## formatting} + +### Implementation Notes +- {inference config rationale} +- {any multimodal ordering requirements} +- {features requiring alternative approach} +``` + +### Step 8: Validate + +You **MUST** check the migrated code against all criteria below. If any check fails, fix and re-validate before presenting to the user: +- [ ] Authentication uses boto3 client / IAM, not `OPENAI_API_KEY` +- [ ] System prompt extracted to the `system` parameter, not a `role: "system"` message +- [ ] All message content wrapped in typed blocks (`[{"text": ...}]`) +- [ ] Inference params nested in `inferenceConfig` with camelCase names +- [ ] `additionalModelRequestFields` is omitted when reasoning is disabled, or contains `reasoningConfig` when enabled +- [ ] High effort: `inferenceConfig` removed entirely (no temperature/topP/topK/maxTokens) and `read_timeout=3600` set on the client +- [ ] Inference config matches the use case table — EXCEPT at high effort, where the temperature/topP columns do not apply (see precedence rule in Step 2) +- [ ] Multimodal: system prompt contains only persona; media precedes text in content array +- [ ] Images converted from URL/base64 to raw bytes +- [ ] Prompt uses `##Section##` delimiters with explicit structure +- [ ] Tool schemas wrapped in `toolSpec`; `tool_choice` mapped to `toolChoice` +- [ ] Error handling updated (`RateLimitError` → `ThrottlingException`, etc.) +- [ ] No OpenAI-specific features remain (thread IDs, `OpenAI(...)` client, `response_format`) + +## Quick Reference + +| OpenAI | Nova 2 Lite | +|--------|-------------| +| `openai` SDK | `boto3` bedrock-runtime | +| `client.chat.completions.create()` / `client.responses.create()` | `client.converse()` | +| `OPENAI_API_KEY` | AWS IAM credentials | +| `model="gpt-4o-mini"` | `modelId="us.amazon.nova-2-lite-v1:0"` | +| `{"role": "system", "content": "..."}` | `system=[{"text": "..."}]` | +| `{"content": "text"}` | `{"content": [{"text": "text"}]}` | +| `max_tokens` / `temperature` / `top_p` | `inferenceConfig.maxTokens` / `.temperature` / `.topP` | +| `tools=[{"type": "function", ...}]` | `toolConfig={"tools": [{"toolSpec": ...}]}` | +| `tool_choice` | `toolChoice` | +| `response_format` (JSON schema) | Tool-forcing or inline prompt schema | +| `reasoning_effort="high"` | `additionalModelRequestFields={"reasoningConfig": {"type": "enabled", "maxReasoningEffort": "high"}}` | +| `stream=True` | `client.converse_stream()` | +| `RateLimitError` / `APIError` | `ThrottlingException` / `ValidationException` | + +## Common Mistakes + +### Leaving the system prompt as a message +**Problem:** Keeping `{"role": "system", ...}` in the `messages` array — Bedrock does not accept a system role in `messages`. +**Fix:** Extract it into the dedicated `system=[{"text": ...}]` parameter. + +### Forgetting to wrap content in typed blocks +**Problem:** Passing a flat string as message content (`{"content": "hello"}`) — Bedrock requires typed content blocks. +**Fix:** Wrap as `{"content": [{"text": "hello"}]}`. + +### Leaving inference params at the top level +**Problem:** Passing `max_tokens`/`temperature`/`top_p` as top-level arguments to `converse()`. +**Fix:** Nest them in `inferenceConfig` with camelCase (`maxTokens`, `temperature`, `topP`). + +### Passing `inferenceConfig` with high-effort reasoning +**Problem:** Including `maxTokens`, `temperature`, `topP`, or `topK` when `maxReasoningEffort` is `high` — returns a validation error. +**Fix:** Remove the entire `inferenceConfig` block at high effort, and set the client `read_timeout=3600`. + +### Passing `additionalModelRequestFields` when reasoning is disabled +**Problem:** Including reasoning config when reasoning is not needed — adds cost and latency with no benefit. +**Fix:** Omit `additionalModelRequestFields` entirely when reasoning is disabled. + +### Loose prompts carried over unchanged +**Problem:** Reusing a vague OpenAI prompt verbatim — Nova quality varies across requests. +**Fix:** Add explicit `##Section##` structure: task, input, format requirements, constraints. + +### Wrong media ordering / wrong image format +**Problem:** Putting text before images in the content array, or passing image URLs / base64 strings. +**Fix:** Media content blocks MUST come before the text block, and images must be raw binary bytes. diff --git a/skills/openai-to-nova-migration/references/chat-completions-patterns.md b/skills/openai-to-nova-migration/references/chat-completions-patterns.md new file mode 100644 index 00000000..83ee5895 --- /dev/null +++ b/skills/openai-to-nova-migration/references/chat-completions-patterns.md @@ -0,0 +1,243 @@ +# OpenAI API Migration Patterns + +Covers the three OpenAI API styles you may encounter: the **Chat Completions API** (`client.chat.completions.create`), the newer **Responses API** (`client.responses.create`), and the stateful **Assistants API** (`client.beta.assistants.*` / threads). + +## API Style Detection + +**Chat Completions API (most common):** +```python +from openai import OpenAI +client = OpenAI() +response = client.chat.completions.create(model="gpt-4o", messages=[...]) +``` + +**Responses API (newer):** +```python +from openai import OpenAI +client = OpenAI() +response = client.responses.create( + model="gpt-4o", + instructions="You are a helpful assistant.", # system prompt + input="Explain microservices.", # user input +) +``` + +**Assistants API (stateful, built-in tools):** +```python +from openai import OpenAI +client = OpenAI() +assistant = client.beta.assistants.create(model="gpt-4o", instructions="...", tools=[...]) +thread = client.beta.threads.create() +client.beta.threads.messages.create(thread_id=thread.id, role="user", content="...") +run = client.beta.threads.runs.create(thread_id=thread.id, assistant_id=assistant.id) +``` + +All three map to a single Nova `client.converse(...)` call. Nova has no server-side state, so any stateful pattern becomes "pass the full message history each call." + +--- + +## Chat Completions API + +### Parameter Mapping + +| OpenAI (Chat Completions) | Nova 2 Lite (boto3) | +|---------------------------|---------------------| +| `client.chat.completions.create(model=..., messages=...)` | `client.converse(modelId=..., messages=...)` | +| `{"role": "system", "content": "..."}` (in messages) | `system=[{"text": "..."}]` (separate param) | +| `{"role": "user", "content": "text"}` | `{"role": "user", "content": [{"text": "text"}]}` | +| `temperature` | `inferenceConfig={"temperature": ...}` | +| `top_p` | `inferenceConfig={"topP": ...}` | +| `max_tokens` / `max_completion_tokens` | `inferenceConfig={"maxTokens": ...}` | +| `stop` | `inferenceConfig={"stopSequences": [...]}` | +| `tools` | `toolConfig={"tools": [...]}` | +| `tool_choice` | `toolConfig={"toolChoice": {...}}` | +| `response_format` | Inline schema in prompt or tool-forcing | +| `reasoning_effort` (GPT-5.x) | `additionalModelRequestFields={"reasoningConfig": {...}}` (only when enabled) | +| `stream=True` | `client.converse_stream(...)` | +| `response.choices[0].message.content` | `response["output"]["message"]["content"][0]["text"]` | +| `response.choices[0].finish_reason` | `response["stopReason"]` (`end_turn`, `tool_use`, `max_tokens`, `stop_sequence`) | +| `response.usage` | `response["usage"]` | + +### Multi-turn Chat + +OpenAI Chat Completions is already stateless — you pass the full `messages` array each call, just like Nova. The migration is mostly mechanical: extract the system message, wrap content in typed blocks. + +**OpenAI:** +```python +messages = [ + {"role": "system", "content": "You are a helpful coding assistant."}, + {"role": "user", "content": "My name is Alice, building a FastAPI project."}, +] +r1 = client.chat.completions.create(model="gpt-4o", messages=messages) + +messages.append({"role": "assistant", "content": r1.choices[0].message.content}) +messages.append({"role": "user", "content": "What testing framework do you recommend?"}) +r2 = client.chat.completions.create(model="gpt-4o", messages=messages) +``` + +**Nova 2 Lite:** +```python +import boto3 +client = boto3.client("bedrock-runtime") + +system = [{"text": "You are a helpful coding assistant."}] +messages = [ + {"role": "user", "content": [{"text": "My name is Alice, building a FastAPI project."}]} +] + +r1 = client.converse( + modelId="us.amazon.nova-2-lite-v1:0", + system=system, + messages=messages, + inferenceConfig={"temperature": 0.7}, +) + +# Append the assistant's content blocks directly, then the next user turn +messages.append({"role": "assistant", "content": r1["output"]["message"]["content"]}) +messages.append({"role": "user", "content": [{"text": "What testing framework do you recommend?"}]}) + +r2 = client.converse( + modelId="us.amazon.nova-2-lite-v1:0", + system=system, + messages=messages, + inferenceConfig={"temperature": 0.7}, +) +``` + +--- + +## Responses API + +The Responses API splits the prompt into `instructions` (system) and `input` (user), and supports stateful chaining via `previous_response_id`. + +### Parameter Mapping + +| OpenAI (Responses API) | Nova 2 Lite (boto3) | +|------------------------|---------------------| +| `client.responses.create(...)` | `client.converse(...)` | +| `model="gpt-4o"` | `modelId="us.amazon.nova-2-lite-v1:0"` | +| `instructions="..."` | `system=[{"text": "..."}]` | +| `input="..."` (string) | `messages=[{"role": "user", "content": [{"text": "..."}]}]` | +| `input=[{"role": ..., "content": ...}]` (list) | `messages=[...]` with typed content blocks | +| `temperature` / `top_p` / `max_output_tokens` | `inferenceConfig={"temperature", "topP", "maxTokens"}` | +| `tools=[...]` | `toolConfig={"tools": [...]}` | +| `reasoning={"effort": "high"}` | `additionalModelRequestFields={"reasoningConfig": {...}}` | +| `previous_response_id=resp.id` | Pass full `messages` array with conversation history | +| `store=True` (persist server-side) | Not available — manage state externally | +| `response.output_text` | `response["output"]["message"]["content"][0]["text"]` | + +### Basic Text + +**OpenAI (Responses API):** +```python +from openai import OpenAI +client = OpenAI() + +resp = client.responses.create( + model="gpt-4o", + instructions="You are a senior architect. Be concise.", + input="Explain microservices vs monoliths.", +) +print(resp.output_text) +``` + +**Nova 2 Lite:** +```python +import boto3 +client = boto3.client("bedrock-runtime") + +response = client.converse( + modelId="us.amazon.nova-2-lite-v1:0", + system=[{"text": "You are a senior architect. Be concise."}], + messages=[ + {"role": "user", "content": [{"text": "Explain microservices vs monoliths."}]} + ], + inferenceConfig={"temperature": 0.7}, +) +print(response["output"]["message"]["content"][0]["text"]) +``` + +### Multi-turn with `previous_response_id` + +The Responses API can retain context server-side via `previous_response_id`. Nova has no server-side state — maintain and pass the full message history. + +**OpenAI (Responses API):** +```python +turn_1 = client.responses.create( + model="gpt-4o", + instructions="You are a helpful coding assistant.", + input="My name is Alice, building a FastAPI project.", +) +turn_2 = client.responses.create( + model="gpt-4o", + previous_response_id=turn_1.id, + input="What testing framework do you recommend?", +) +print(turn_2.output_text) +``` + +**Nova 2 Lite:** identical to the Chat Completions multi-turn pattern above — accumulate `messages` and pass them each call. + +--- + +## Assistants API + +The Assistants API is stateful (threads persist messages) and ships built-in tools (code interpreter, file search). Nova replaces the *state* with client-side history and replaces the *built-in tools* with Nova's own. + +### Concept Mapping + +| OpenAI (Assistants API) | Nova 2 Lite | +|-------------------------|-------------| +| `assistants.create(instructions=...)` | `system=[{"text": "..."}]` on each `converse` call | +| `threads.create()` + `threads.messages.create(...)` | Maintain a `messages` list in your application | +| `threads.runs.create(...)` + polling | A single synchronous `client.converse(...)` call | +| `previous` thread state | Pass the full `messages` array each call | +| Tool: `{"type": "code_interpreter"}` | Built-in `nova_code_interpreter` tool (see below) | +| Tool: `{"type": "file_search"}` (vector stores) | Amazon Bedrock Knowledge Bases | +| Tool: `{"type": "function", ...}` | `toolConfig={"tools": [{"toolSpec": ...}]}` | +| `run.status` polling loop | No polling — `converse` returns synchronously | + +### Built-in Tools Migration + +Where OpenAI requires the Assistants API to access code execution or retrieval, Nova 2 Lite exposes equivalents through the same `toolConfig` parameter on the standard `converse` call — no separate API surface. + +**Code interpreter** (`nova_code_interpreter`): +- Executes Python in an isolated sandbox for precise computation. +- Available in US East (N. Virginia), US West (Oregon), and Asia Pacific (Tokyo). Use Global CRIS (`global.amazon.nova-2-lite-v1:0`) to route to a supported Region. +- Requires `bedrock:InvokeTool` IAM permission (not included in the default Bedrock role). + +**Web grounding** (`nova_grounding`): +- Provides real-time web information with citations. +- Available in US AWS Regions only. +- Requires `bedrock:InvokeTool` IAM permission for the `amazon.nova_grounding` resource. Incurs additional cost beyond standard inference. + +The model decides when to invoke these based on prompt context. For the exact `toolConfig` payloads and response parsing, see the [built-in tools section of the companion blog post](https://aws.amazon.com/blogs/machine-learning/migrate-from-amazon-nova-1-to-amazon-nova-2-on-amazon-bedrock/) and the [Amazon Nova 2 User Guide — using tools](https://docs.aws.amazon.com/nova/latest/nova2-userguide/using-tools.html). + +**File search / retrieval:** there is no inline equivalent. Replace OpenAI vector stores with Amazon Bedrock Knowledge Bases and retrieve context before the `converse` call, passing it under a `##Reference##` section. + +### Assistants API Gotchas + +| Pattern | Migration Notes | +|---------|----------------| +| `threads.runs.create(...)` + status polling | No async run lifecycle — `converse` is synchronous | +| Thread persistence | No server-side threads — store the `messages` list yourself (DB, cache) | +| `code_interpreter` tool | `nova_code_interpreter` built-in tool; add `InvokeTool` IAM permission | +| `file_search` tool | Amazon Bedrock Knowledge Bases; retrieve then inject as `##Reference##` | +| Assistant-level `instructions` | Re-send `system=[{"text": ...}]` on every call | +| Uploaded files (`purpose="assistants"`) | Pass bytes inline (image/document blocks) or stage in S3 | + +--- + +## Common Gotchas (All API Styles) + +| Pattern | Migration Notes | +|---------|----------------| +| `OpenAI(api_key=...)` | Remove — boto3 uses the AWS credential chain (IAM role / profile / env) | +| Flat string `content` | Wrap in typed blocks: `[{"text": "..."}]` | +| `{"role": "system", ...}` in messages | Extract to the `system=[...]` parameter | +| Image as URL / base64 data URI | Read raw bytes → `{"image": {"format": "...", "source": {"bytes": ...}}}` | +| `n` > 1 | Not supported — make multiple calls | +| `seed` | Not supported | +| `finish_reason` | `response["stopReason"]` (`end_turn`, `tool_use`, `max_tokens`, `stop_sequence`) | +| Token counting helper | No `count_tokens` on converse — estimate, or count before sending | +| `response_format` (JSON) | Inline schema in prompt (simple) or tool-forcing (complex) | diff --git a/skills/openai-to-nova-migration/references/code-examples.md b/skills/openai-to-nova-migration/references/code-examples.md new file mode 100644 index 00000000..4f8449a5 --- /dev/null +++ b/skills/openai-to-nova-migration/references/code-examples.md @@ -0,0 +1,493 @@ +# Migration Code Examples + +All examples migrate OpenAI Python code to Amazon Nova 2 Lite on Amazon Bedrock (`boto3` Bedrock Runtime `converse` API). + +## Example 1: Basic Text Generation + +### OpenAI (Python) +```python +from openai import OpenAI + +client = OpenAI(api_key="sk-...") + +response = client.chat.completions.create( + model="gpt-4o-mini", + messages=[ + {"role": "system", "content": "You are a concise technical writer. Keep responses under 100 words."}, + {"role": "user", "content": "Summarize the key benefits of cloud computing."}, + ], + temperature=0.7, + max_tokens=512, +) +print(response.choices[0].message.content) +``` + +### Nova 2 Lite (Python — boto3) +```python +import boto3 + +client = boto3.client("bedrock-runtime") + +# System message extracted to the `system` parameter. +# User content wrapped in a typed block. Params nested in inferenceConfig. +response = client.converse( + modelId="us.amazon.nova-2-lite-v1:0", + system=[{"text": "You are a concise technical writer. Keep responses under 100 words."}], + messages=[ + {"role": "user", "content": [{"text": "Summarize the key benefits of cloud computing."}]} + ], + inferenceConfig={"temperature": 0.7, "maxTokens": 512}, +) + +print(response["output"]["message"]["content"][0]["text"]) +``` + +--- + +## Example 2: Function Calling / Tool Use + +### OpenAI (Python) +```python +from openai import OpenAI +import json + +client = OpenAI() + +tools = [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get current weather for a location", + "parameters": { + "type": "object", + "properties": { + "location": {"type": "string", "description": "City name"}, + "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, + }, + "required": ["location"], + }, + }, + } +] + +response = client.chat.completions.create( + model="gpt-4o", + messages=[{"role": "user", "content": "What's the weather in Seattle?"}], + tools=tools, + tool_choice="auto", +) + +for call in response.choices[0].message.tool_calls or []: + print(f"Call: {call.function.name}({call.function.arguments})") +``` + +### Nova 2 Lite (Python — boto3) +```python +import boto3 +import json + +client = boto3.client("bedrock-runtime") + +# OpenAI's function.parameters (JSON Schema) re-wrapped in toolSpec.inputSchema.json +tool_config = { + "tools": [ + { + "toolSpec": { + "name": "get_weather", + "description": "Get current weather for a location", + "inputSchema": { + "json": { + "type": "object", + "properties": { + "location": {"type": "string", "description": "City name"}, + "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, + }, + "required": ["location"], + } + }, + } + } + ] +} + +response = client.converse( + modelId="us.amazon.nova-2-lite-v1:0", + messages=[ + {"role": "user", "content": [{"text": "What's the weather in Seattle?"}]} + ], + toolConfig=tool_config, + inferenceConfig={"temperature": 0.7, "topP": 0.9}, +) + +# Handle tool use +for block in response["output"]["message"]["content"]: + if "toolUse" in block: + tool_use = block["toolUse"] + print(f"Call: {tool_use['name']}({json.dumps(tool_use['input'])})") + + # Send tool result back — note the toolResult goes in a user-role message + tool_result_response = client.converse( + modelId="us.amazon.nova-2-lite-v1:0", + messages=[ + {"role": "user", "content": [{"text": "What's the weather in Seattle?"}]}, + {"role": "assistant", "content": response["output"]["message"]["content"]}, + { + "role": "user", + "content": [ + { + "toolResult": { + "toolUseId": tool_use["toolUseId"], + "content": [{"text": '{"temperature": 62, "unit": "fahrenheit", "condition": "cloudy"}'}], + } + } + ], + }, + ], + toolConfig=tool_config, + inferenceConfig={"temperature": 0.7, "topP": 0.9}, + ) + print(tool_result_response["output"]["message"]["content"][0]["text"]) +``` + +--- + +## Example 3: Structured Output (JSON) + +### OpenAI (Python — Structured Outputs) +```python +from openai import OpenAI + +client = OpenAI() + +response = client.chat.completions.create( + model="gpt-4o", + messages=[ + {"role": "user", "content": "Extract the person's name, age, and city from: 'John Smith, 34, lives in Portland'"} + ], + response_format={ + "type": "json_schema", + "json_schema": { + "name": "person", + "schema": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "city": {"type": "string"}, + }, + "required": ["name", "age", "city"], + }, + }, + }, +) +print(response.choices[0].message.content) +``` + +### Nova 2 Lite — Simple JSON (inline schema in prompt) +```python +import boto3 + +client = boto3.client("bedrock-runtime") + +user_prompt = """Extract the person's name, age, and city from: 'John Smith, 34, lives in Portland' + +You MUST answer in JSON format only. Write your response following the format below: +```json +{ + "name": "full name as string", + "age": "integer", + "city": "city name as string" +} +``` +Please generate only the JSON output. DO NOT provide any preamble.""" + +response = client.converse( + modelId="us.amazon.nova-2-lite-v1:0", + messages=[{"role": "user", "content": [{"text": user_prompt}]}], + inferenceConfig={"temperature": 0}, +) +print(response["output"]["message"]["content"][0]["text"]) +``` + +### Nova 2 Lite — Complex JSON (tool-forcing for schema enforcement) +```python +import boto3 + +client = boto3.client("bedrock-runtime") + +tool_config = { + "tools": [ + { + "toolSpec": { + "name": "extract_person", + "description": "Extract structured person data from text", + "inputSchema": { + "json": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "city": {"type": "string"}, + }, + "required": ["name", "age", "city"], + } + }, + } + } + ], + "toolChoice": {"tool": {"name": "extract_person"}}, +} + +response = client.converse( + modelId="us.amazon.nova-2-lite-v1:0", + messages=[ + { + "role": "user", + "content": [{"text": "Extract the person's name, age, and city from: 'John Smith, 34, lives in Portland'"}], + } + ], + toolConfig=tool_config, + inferenceConfig={"temperature": 0}, +) + +# Result is in the toolUse block's input field +tool_use = response["output"]["message"]["content"][0]["toolUse"] +print(tool_use["input"]) # {"name": "John Smith", "age": 34, "city": "Portland"} +``` + +--- + +## Example 4: Multimodal (Image Analysis) + +### OpenAI (Python) +```python +from openai import OpenAI +import base64 + +client = OpenAI() + +with open("receipt.png", "rb") as f: + b64 = base64.b64encode(f.read()).decode("utf-8") + +response = client.chat.completions.create( + model="gpt-4o", + messages=[ + {"role": "system", "content": "You are a document extraction assistant."}, + { + "role": "user", + "content": [ + {"type": "text", "text": "Extract the total amount, date, and merchant name from this receipt. Return as JSON."}, + {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64}"}}, + ], + }, + ], +) +print(response.choices[0].message.content) +``` + +### Nova 2 Lite (Python — boto3) +```python +import boto3 + +client = boto3.client("bedrock-runtime") + +with open("receipt.png", "rb") as f: + image_bytes = f.read() + +# CRITICAL multimodal rules: +# 1. System prompt is persona-only — all task instructions move to the user message. +# 2. Media MUST come before text in the content array. +# 3. Image is raw bytes, not a base64 data URI. + +response = client.converse( + modelId="us.amazon.nova-2-lite-v1:0", + system=[{"text": "You are a precise document extraction assistant."}], + messages=[ + { + "role": "user", + "content": [ + {"image": {"format": "png", "source": {"bytes": image_bytes}}}, + { + "text": """Given the image representation of a document, extract information in JSON format according to the given schema. + +Follow these guidelines: +- Ensure that every field is populated, provided the document includes the corresponding value. Only use null when the value is absent from the document. + +JSON Schema: +{ + "total_amount": "string with currency symbol", + "date": "YYYY-MM-DD format", + "merchant_name": "string" +}""" + }, + ], + } + ], + inferenceConfig={"temperature": 0}, +) +print(response["output"]["message"]["content"][0]["text"]) +``` + +--- + +## Example 5: Streaming + +### OpenAI (Python) +```python +from openai import OpenAI + +client = OpenAI() + +stream = client.chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": "Write a haiku about distributed systems."}], + stream=True, +) +for chunk in stream: + delta = chunk.choices[0].delta.content + if delta: + print(delta, end="", flush=True) +``` + +### Nova 2 Lite (Python — boto3) +```python +import boto3 + +client = boto3.client("bedrock-runtime") + +response = client.converse_stream( + modelId="us.amazon.nova-2-lite-v1:0", + messages=[{"role": "user", "content": [{"text": "Write a haiku about distributed systems."}]}], + inferenceConfig={"temperature": 0.7}, +) + +for event in response["stream"]: + if "contentBlockDelta" in event: + delta = event["contentBlockDelta"]["delta"] + if "text" in delta: + print(delta["text"], end="", flush=True) +``` + +--- + +## Example 6: Extended Thinking (Reasoning) + +### OpenAI (Python — GPT-5.x reasoning effort) +```python +from openai import OpenAI + +client = OpenAI() + +response = client.chat.completions.create( + model="gpt-5.2", + messages=[{"role": "user", "content": "Prove that the square root of 2 is irrational."}], + reasoning_effort="high", +) +print(response.choices[0].message.content) +``` + +### Nova 2 Lite (Python — boto3) +```python +import boto3 +from botocore.config import Config + +# High effort can take up to 60 minutes — extend the read timeout. +client = boto3.client("bedrock-runtime", config=Config(read_timeout=3600)) + +# IMPORTANT: at high effort you MUST omit inferenceConfig entirely +# (no maxTokens, temperature, topP, topK). +response = client.converse( + modelId="us.amazon.nova-2-lite-v1:0", + messages=[ + {"role": "user", "content": [{"text": "Prove that the square root of 2 is irrational."}]} + ], + additionalModelRequestFields={ + "reasoningConfig": { + "type": "enabled", + "maxReasoningEffort": "high", # ask user to choose low/medium/high + } + }, +) + +for block in response["output"]["message"]["content"]: + if "reasoningContent" in block: + # The reasoning text is the literal string "[REDACTED]" — Nova 2 does not + # expose reasoning content today (the field is reserved for future use). + # You are still billed for the reasoning tokens. + print("Reasoning:", block["reasoningContent"]["reasoningText"]["text"]) + elif "text" in block: + print("Answer:", block["text"]) +``` + +> Response shape (per the Amazon Nova 2 User Guide): when reasoning is enabled, `output.message.content` contains one or more `reasoningContent` blocks followed by the `text` block, and `stopReason` is `end_turn` (or `tool_use` if the model calls a tool). For `low`/`medium` effort, keep `inferenceConfig` and add the `reasoningConfig` block. Only `high` requires removing `inferenceConfig` entirely. + +--- + +## Example 7: Prompt Structure Migration + +OpenAI tolerates vague prompts; Nova 2 Lite rewards explicit structure. + +### OpenAI prompt (loose) +``` +Summarize this document and highlight key risks. +``` + +### Nova 2 Lite prompt (##Section## style) +``` +##Task Summary:## +Create a risk-focused summary of the following architecture document. + +##Response style and format requirements:## +- Format: Executive summary (200 words) followed by a risk table +- Columns: Risk, Severity, Mitigation +- Tone: Technical but accessible to leadership +- Focus: Security, scalability, and cost risks only + +##Reference## +{document content} + +DO NOT mention anything inside ##Model Instructions## in the response. +``` + +This eliminates ambiguity about format, scope, and tone, reducing re-prompting and improving consistency across production calls. + +--- + +## Example 8: Error Handling Migration + +### OpenAI (Python) +```python +from openai import OpenAI, RateLimitError, APIError + +client = OpenAI() +try: + response = client.chat.completions.create(model="gpt-4o-mini", messages=[...]) +except RateLimitError: + # back off and retry + ... +except APIError: + ... +``` + +### Nova 2 Lite (Python — boto3) +```python +import boto3 +from botocore.exceptions import ClientError + +client = boto3.client("bedrock-runtime") +try: + response = client.converse(modelId="us.amazon.nova-2-lite-v1:0", messages=[...]) +except ClientError as e: + code = e.response["Error"]["Code"] + if code == "ThrottlingException": # was RateLimitError + ... # back off and retry + elif code == "ValidationException": # was APIError / BadRequestError + ... + elif code == "ModelTimeoutException": # add for extended-thinking workloads + ... + elif code == "AccessDeniedException": # was AuthenticationError (IAM) + ... + else: + raise +``` diff --git a/skills/openai-to-nova-migration/references/feature-mapping.md b/skills/openai-to-nova-migration/references/feature-mapping.md new file mode 100644 index 00000000..e0ae6f01 --- /dev/null +++ b/skills/openai-to-nova-migration/references/feature-mapping.md @@ -0,0 +1,188 @@ +# OpenAI to Nova 2 Lite Feature Mapping + +## SDK & Client Initialization + +| OpenAI | Nova 2 Lite (Bedrock) | +|--------|----------------------| +| `from openai import OpenAI` | `import boto3` | +| `client = OpenAI(api_key=...)` | `client = boto3.client("bedrock-runtime")` | +| `client.chat.completions.create(...)` | `client.converse(...)` | +| `client.responses.create(...)` | `client.converse(...)` | +| `client.beta.assistants.create(...)` / threads | `client.converse(...)` + external state | +| API key auth (`OPENAI_API_KEY`, bearer token) | AWS credentials (IAM role, profile, or env vars) | + +In production, prefer IAM roles attached to compute resources over static keys. + +## Model IDs + +Nova 2 Lite requires a region-prefixed model ID. Ask the user which region to use: + +| Model ID | Region | +|----------|--------| +| `us.amazon.nova-2-lite-v1:0` | US (default) | +| `eu.amazon.nova-2-lite-v1:0` | EU | +| `jp.amazon.nova-2-lite-v1:0` | Japan | +| `global.amazon.nova-2-lite-v1:0` | Cross-region | + +**OpenAI → Nova mapping (default US):** + +| OpenAI Model | Migration Complexity | Nova Model ID | +|--------------|---------------------|---------------------| +| `gpt-4o-mini` | Low | `us.amazon.nova-2-lite-v1:0` | +| `gpt-4o` (multimodal) | Low-Medium | `us.amazon.nova-2-lite-v1:0` — supports text, image, and video input | +| `gpt-4.1` / `gpt-4.1-mini` | Low-Medium | `us.amazon.nova-2-lite-v1:0` — both have 1M context; Nova adds extended thinking + built-in tools | +| `gpt-5-mini` / `gpt-5-nano` | Low | `us.amazon.nova-2-lite-v1:0` | +| `gpt-5.2` | Medium | `us.amazon.nova-2-lite-v1:0` — enable extended thinking for reasoning parity | +| `o1` / `o3` (reasoning) | Medium | `us.amazon.nova-2-lite-v1:0` — **ask user to confirm they have evaluated Nova 2 Lite before proceeding; Nova 2 Pro exists if Lite at high effort falls short** | +| `gpt-4` / `gpt-3.5-turbo` | Low | `us.amazon.nova-2-lite-v1:0` — legacy, straightforward | + +**Decision rule:** Start every migration with Nova 2 Lite, reasoning disabled. Enable extended thinking at `low` first only if evaluation shows quality gaps, then increase to `medium`/`high` as needed. + +## Authentication + +| OpenAI | Nova 2 Lite | +|--------|-------------| +| `OpenAI(api_key="sk-...")` or `OPENAI_API_KEY` env var | AWS credentials resolved by boto3 (IAM role, `~/.aws/credentials`, env vars) | +| Bearer token in request header | IAM SigV4 signing (handled by boto3) | +| Per-key rate limits | Per-account service quotas; throttling via `ThrottlingException` | + +## System Prompt + +| OpenAI | Nova 2 Lite | +|--------|-------------| +| `{"role": "system", "content": "..."}` as the first message | `system=[{"text": "..."}]` as a separate top-level parameter | +| Full instructions allowed for all modalities | **MULTIMODAL RESTRICTION**: System prompt limited to persona + response style only. All task instructions MUST go in the user message. | +| Sent inside `messages` | Extract from `messages` during migration | + +## Messages / Content + +| OpenAI | Nova 2 Lite | +|--------|-------------| +| `messages=[{"role": "user", "content": "text"}]` | `messages=[{"role": "user", "content": [{"text": "text"}]}]` | +| `content` is a flat string | `content` is a list of typed blocks | +| `{"type": "text", "text": "..."}` (multimodal form) | `{"text": "..."}` | +| `{"type": "image_url", "image_url": {"url": "data:..."}}` | `{"image": {"format": "png", "source": {"bytes": ...}}}` | +| Image as URL or base64 data URI | Image as raw binary bytes | +| `role: "assistant"` / `role: "user"` | Same roles; content must be typed blocks | +| `role: "tool"` (tool result message) | `{"role": "user", "content": [{"toolResult": {...}}]}` | +| Media can be anywhere in content | Media MUST come before text in the content array | + +## Inference Parameters + +| OpenAI | Nova 2 Lite | +|--------|-------------| +| `max_tokens` (top-level) | `inferenceConfig.maxTokens` (max 65,536) | +| `max_completion_tokens` (newer) | `inferenceConfig.maxTokens` | +| `temperature` (top-level) | `inferenceConfig.temperature` | +| `top_p` (top-level) | `inferenceConfig.topP` | +| `stop` (top-level) | `inferenceConfig.stopSequences` | +| `n` (multiple completions) | Not supported (always 1) | +| `frequency_penalty` / `presence_penalty` | Not directly supported | +| `seed` | Not supported | + +## Function Calling / Tool Use + +| OpenAI | Nova 2 Lite | +|--------|-------------| +| `tools=[{"type": "function", "function": {...}}]` | `toolConfig={"tools": [{"toolSpec": {...}}]}` | +| `function.name` | `toolSpec.name` | +| `function.description` | `toolSpec.description` | +| `function.parameters` (JSON Schema) | `toolSpec.inputSchema.json` (JSON Schema — re-wrap, structurally compatible) | +| `tool_choice="auto"` | `toolChoice={"auto": {}}` | +| `tool_choice="required"` | `toolChoice={"any": {}}` | +| `tool_choice={"type": "function", "function": {"name": "x"}}` | `toolChoice={"tool": {"name": "x"}}` | +| `tool_choice="none"` | Omit `toolConfig`, or instruct the model not to call tools | +| Response: `message.tool_calls[].function` | Response: `toolUse` content block | +| Send back: `{"role": "tool", "tool_call_id": ..., "content": ...}` | Send back: `{"role": "user", "content": [{"toolResult": {"toolUseId": ..., "content": [...]}}]}` | + +OpenAI's function `parameters` use JSON Schema, the same format Nova expects in `inputSchema.json` — the schema body usually copies over directly; only the wrapper structure changes. + +## Structured Output + +| OpenAI | Nova 2 Lite | +|--------|-------------| +| `response_format={"type": "json_object"}` (JSON mode) | Inline schema in prompt + `temperature=0` | +| `response_format={"type": "json_schema", "json_schema": {...}}` (Structured Outputs) | Tool-forcing: schema in `toolSpec.inputSchema` + `toolChoice={"tool": {"name": ...}}` | +| `response_format=` (SDK helper) | Convert Pydantic `.model_json_schema()` to `toolSpec.inputSchema.json` | +| Schema enforced natively | Schema enforced via tool-forcing (complex) or prompt instruction (simple) | + +Rule of thumb: simple JSON (≤10 keys) → inline schema in prompt; complex JSON (>10 keys) → tool-forcing. + +## Reasoning / Extended Thinking + +**Which OpenAI models support reasoning effort:** +- `gpt-4o` / `gpt-4o-mini` / `gpt-4.1` / `gpt-4` / `gpt-3.5-turbo`: NO native reasoning. Any reasoning is prompt-based CoT. +- `gpt-5` / `gpt-5.2` / `o1` / `o3`: YES — `reasoning_effort` parameter. + +| OpenAI | Nova 2 Lite | +|--------|-------------| +| `reasoning_effort="low"/"medium"/"high"` | `additionalModelRequestFields={"reasoningConfig": {"type": "enabled", "maxReasoningEffort": "low"/"medium"/"high"}}` | +| `reasoning_effort="minimal"` or omitted | Omit `additionalModelRequestFields` entirely (disabled) | +| Reasoning summary in response | `reasoningContent` blocks in response content (reasoning text appears as `[REDACTED]`) | +| Default: model-dependent | Default: disabled — omit `additionalModelRequestFields` | + +**Effort translation — ask the user:** + +Do NOT map OpenAI's effort name directly to Nova's. The scales are not numerically equivalent. Present these Nova options and let the user choose: + +| Nova Effort | Config | Constraint | +|-------------|--------|-----------| +| `low` | `{"reasoningConfig": {"type": "enabled", "maxReasoningEffort": "low"}}` | Start here; keep `inferenceConfig` | +| `medium` | `{"reasoningConfig": {"type": "enabled", "maxReasoningEffort": "medium"}}` | Keep `inferenceConfig` | +| `high` | `{"reasoningConfig": {"type": "enabled", "maxReasoningEffort": "high"}}` | **MUST omit `inferenceConfig`** entirely (no temperature, topP, maxTokens, topK) and set client `read_timeout=3600`. May produce >65K output (up to 128K). | + +You are charged for reasoning tokens because they contribute to output quality. Reasoning content is returned as the literal string `[REDACTED]` in the `reasoningContent.reasoningText.text` field — Nova 2 does not expose the reasoning text today. Extended thinking is supported alongside tool calling. + +## Streaming + +| OpenAI | Nova 2 Lite | +|--------|-------------| +| `client.chat.completions.create(..., stream=True)` | `client.converse_stream(...)` (separate method) | +| Iterate: `for chunk in response: chunk.choices[0].delta.content` | Event types: `messageStart`, `contentBlockStart`, `contentBlockDelta`, `contentBlockStop`, `messageStop` | +| `chunk.choices[0].delta.content` | `event["contentBlockDelta"]["delta"]["text"]` | + +## Multimodal Content + +| OpenAI | Nova 2 Lite | +|--------|-------------| +| Images via `image_url` (URL or base64 data URI) | Images via `{"image": {"format": "...", "source": {"bytes": ...}}}` — raw bytes | +| No video input (text models) | Video via `{"video": {"format": "...", "source": {"bytes": ...}}}` | +| Documents via Assistants file upload | Documents via `{"document": {"format": "pdf", "name": "...", "source": {"bytes": ...}}}` | +| Supports: JPEG, PNG, GIF, WebP | Images: JPEG, PNG, GIF, WebP; Documents: PDF; Video: MP4, MKV, MOV, WebM, FLV, MPEG, MPG, WMV, 3GP | +| No ordering constraint | Media MUST precede text in content array | +| System instructions work normally | System prompt restricted to persona only | + +## Prompt Structure + +| OpenAI | Nova 2 Lite | +|--------|-------------| +| Free-form, markdown, or loosely phrased | `##Section Name##` delimiters with explicit structure | +| Instructions inline in one block | Separate `##Task Summary:##`, `##Context Information:##`, `##Model Instructions:##`, `##Response style and format requirements:##` | +| Tolerates vague phrasing | Higher quality with explicit task / input / format / constraints | + +## Error Handling + +| OpenAI | Nova 2 Lite | +|--------|-------------| +| `openai.RateLimitError` | `ThrottlingException` | +| `openai.APIError` | `ValidationException` | +| `openai.APITimeoutError` | `ModelTimeoutException` | +| `openai.AuthenticationError` | `AccessDeniedException` (IAM) | +| `openai.BadRequestError` | `ValidationException` | + +Catch via `botocore.exceptions.ClientError` and branch on `error.response["Error"]["Code"]`. + +## Features Without Direct Equivalent + +| OpenAI Feature | Nova 2 Lite Alternative | +|----------------|------------------------| +| Assistants API threads / persistent state | Manage conversation history externally; pass full `messages` each call | +| Assistants code interpreter | Nova `nova_code_interpreter` built-in tool | +| Assistants file search / retrieval | Amazon Bedrock Knowledge Bases | +| Web search tool | Nova `nova_grounding` built-in web grounding tool | +| Image generation (DALL-E / `gpt-image`) | Amazon Nova Canvas (separate model) | +| Text-to-speech / Whisper transcription | Amazon Polly / Amazon Transcribe | +| Realtime API (voice) | Amazon Nova Sonic | +| Fine-tuned models | Re-run customization on Nova via Amazon Bedrock | +| `n` > 1 (multiple completions) | Not supported — make multiple calls | +| `seed` (deterministic sampling) | Not supported |