From 3ca63c671a9d96dc0ea16fccc08c5d0589cd0c7e Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Wed, 13 May 2026 14:24:03 +1000 Subject: [PATCH 01/48] DOC-3498: tinymceai on-prem documentation. --- -scripts/render-mermaid.sh | 48 + .../advanced-scenarios-fig-1.mmd | 4 + .../advanced-scenarios-fig-1.svg | 1 + .../advanced-scenarios-fig-2.mmd | 14 + .../advanced-scenarios-fig-2.svg | 1 + .../advanced-scenarios-fig-3.mmd | 8 + .../advanced-scenarios-fig-3.svg | 1 + .../complete-guide-fig-1.mmd | 42 + .../complete-guide-fig-1.svg | 1 + .../complete-guide-fig-2.mmd | 17 + .../complete-guide-fig-2.svg | 1 + .../complete-guide-fig-3.mmd | 18 + .../complete-guide-fig-3.svg | 1 + .../complete-guide-fig-4.mmd | 25 + .../complete-guide-fig-4.svg | 1 + .../complete-guide-fig-5.mmd | 29 + .../complete-guide-fig-5.svg | 1 + .../complete-guide-fig-6.mmd | 20 + .../complete-guide-fig-6.svg | 1 + .../complete-guide-fig-7.mmd | 4 + .../complete-guide-fig-7.svg | 1 + .../complete-guide-fig-8.mmd | 14 + .../complete-guide-fig-8.svg | 1 + .../complete-guide-fig-9.mmd | 20 + .../complete-guide-fig-9.svg | 1 + .../database-setup-fig-1.mmd | 12 + .../database-setup-fig-1.svg | 1 + .../eap-setup-guide-fig-1.svg | 1 + .../framework-integration-fig-1.mmd | 20 + .../framework-integration-fig-1.svg | 1 + .../jwt-authentication-fig-1.mmd | 29 + .../jwt-authentication-fig-1.svg | 1 + .../production-guide-fig-1.mmd | 30 + .../production-guide-fig-1.svg | 1 + .../providers-guide-fig-1.mmd | 25 + .../providers-guide-fig-1.svg | 1 + .../troubleshooting-fig-1.mmd | 20 + .../troubleshooting-fig-1.svg | 1 + modules/ROOT/nav.adoc | 13 +- .../pages/tinymceai-on-premises-advanced.adoc | 439 ++++++++ .../pages/tinymceai-on-premises-database.adoc | 668 ++++++++++++ .../tinymceai-on-premises-frameworks.adoc | 222 ++++ ...tinymceai-on-premises-getting-started.adoc | 428 ++++++++ .../ROOT/pages/tinymceai-on-premises-jwt.adoc | 911 ++++++++++++++++ .../tinymceai-on-premises-production.adoc | 582 +++++++++++ .../tinymceai-on-premises-providers.adoc | 985 ++++++++++++++++++ .../tinymceai-on-premises-reference.adoc | 177 ++++ ...tinymceai-on-premises-troubleshooting.adoc | 310 ++++++ modules/ROOT/pages/tinymceai-on-premises.adoc | 145 +++ 49 files changed, 5297 insertions(+), 1 deletion(-) create mode 100755 -scripts/render-mermaid.sh create mode 100644 modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-1.mmd create mode 100644 modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-1.svg create mode 100644 modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-2.mmd create mode 100644 modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-2.svg create mode 100644 modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-3.mmd create mode 100644 modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-3.svg create mode 100644 modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.mmd create mode 100644 modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.svg create mode 100644 modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.mmd create mode 100644 modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.svg create mode 100644 modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-3.mmd create mode 100644 modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-3.svg create mode 100644 modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-4.mmd create mode 100644 modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-4.svg create mode 100644 modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-5.mmd create mode 100644 modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-5.svg create mode 100644 modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-6.mmd create mode 100644 modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-6.svg create mode 100644 modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-7.mmd create mode 100644 modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-7.svg create mode 100644 modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-8.mmd create mode 100644 modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-8.svg create mode 100644 modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-9.mmd create mode 100644 modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-9.svg create mode 100644 modules/ROOT/images/tinymceai-on-premises/database-setup-fig-1.mmd create mode 100644 modules/ROOT/images/tinymceai-on-premises/database-setup-fig-1.svg create mode 100644 modules/ROOT/images/tinymceai-on-premises/eap-setup-guide-fig-1.svg create mode 100644 modules/ROOT/images/tinymceai-on-premises/framework-integration-fig-1.mmd create mode 100644 modules/ROOT/images/tinymceai-on-premises/framework-integration-fig-1.svg create mode 100644 modules/ROOT/images/tinymceai-on-premises/jwt-authentication-fig-1.mmd create mode 100644 modules/ROOT/images/tinymceai-on-premises/jwt-authentication-fig-1.svg create mode 100644 modules/ROOT/images/tinymceai-on-premises/production-guide-fig-1.mmd create mode 100644 modules/ROOT/images/tinymceai-on-premises/production-guide-fig-1.svg create mode 100644 modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.mmd create mode 100644 modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.svg create mode 100644 modules/ROOT/images/tinymceai-on-premises/troubleshooting-fig-1.mmd create mode 100644 modules/ROOT/images/tinymceai-on-premises/troubleshooting-fig-1.svg create mode 100644 modules/ROOT/pages/tinymceai-on-premises-advanced.adoc create mode 100644 modules/ROOT/pages/tinymceai-on-premises-database.adoc create mode 100644 modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc create mode 100644 modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc create mode 100644 modules/ROOT/pages/tinymceai-on-premises-jwt.adoc create mode 100644 modules/ROOT/pages/tinymceai-on-premises-production.adoc create mode 100644 modules/ROOT/pages/tinymceai-on-premises-providers.adoc create mode 100644 modules/ROOT/pages/tinymceai-on-premises-reference.adoc create mode 100644 modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc create mode 100644 modules/ROOT/pages/tinymceai-on-premises.adoc diff --git a/-scripts/render-mermaid.sh b/-scripts/render-mermaid.sh new file mode 100755 index 0000000000..c568b6b529 --- /dev/null +++ b/-scripts/render-mermaid.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +# +# Re-renders all .mmd Mermaid sources to .svg in the on-premises images folder. +# +# Usage (from repo root): +# ./-scripts/render-mermaid.sh +# +# Requirements: +# Node.js (npx downloads @mermaid-js/mermaid-cli automatically) +# +set -euo pipefail + +DIAGRAM_DIR="modules/ROOT/images/tinymceai-on-premises" +CONFIG_FILE=$(mktemp) + +cat > "$CONFIG_FILE" << 'JSON' +{ + "htmlLabels": false, + "flowchart": { "htmlLabels": false, "useMaxWidth": true }, + "sequence": { "useMaxWidth": true }, + "theme": "default" +} +JSON + +trap 'rm -f "$CONFIG_FILE"' EXIT + +count=0 +for mmd in "$DIAGRAM_DIR"/*.mmd; do + [ -f "$mmd" ] || continue + svg="${mmd%.mmd}.svg" + name=$(basename "$mmd") + printf " Rendering %s\n" "$name" + npx -y @mermaid-js/mermaid-cli -i "$mmd" -o "$svg" \ + -c "$CONFIG_FILE" --backgroundColor white 2>/dev/null + + # Mermaid outputs width="100%" which has no intrinsic size in tags. + # Replace with the actual pixel width from the viewBox so browsers can + # calculate the correct aspect ratio when the page scales the image. + vb_width=$(grep -o 'viewBox="[^"]*"' "$svg" | head -1 | awk -F'[ "]' '{print $4}') + if [ -n "$vb_width" ]; then + vb_int=$(printf "%.0f" "$vb_width") + perl -i -pe "s/width=\"100%\"/width=\"${vb_int}\"/" "$svg" + fi + + count=$((count + 1)) +done + +printf "\nRendered %d diagrams in %s\n" "$count" "$DIAGRAM_DIR" diff --git a/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-1.mmd b/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-1.mmd new file mode 100644 index 0000000000..6d69c87d3a --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-1.mmd @@ -0,0 +1,4 @@ +flowchart LR + Editor[TinyMCE editor] <-->|chat / quick actions| AI[AI Service] + AI -->|MCP tools/call| MCP[MCP Server
knowledge-hub] + MCP -->|read| KB[Confluence ·
Notion ·
GitBook ·
internal wiki] diff --git a/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-1.svg new file mode 100644 index 0000000000..2ab529c4b1 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-1.svg @@ -0,0 +1 @@ +chat / quick actionsMCP tools/callreadTinyMCE editorAI ServiceMCP Serverknowledge-hubConfluence ·Notion ·GitBook ·internal wiki \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-2.mmd b/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-2.mmd new file mode 100644 index 0000000000..b8eb0690a6 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-2.mmd @@ -0,0 +1,14 @@ +flowchart LR + subgraph Tenants[Your SaaS customers] + CA[Customer A users] + CB[Customer B users] + CC[Customer C users] + end + subgraph AISvc[Single AI service deployment] + EA[Environment A
access keys A
isolated conversations] + EB[Environment B
access keys B
isolated conversations] + EC[Environment C
access keys C
isolated conversations] + end + CA --> EA --> OpenAI[OpenAI] + CB --> EB --> Anthropic[Anthropic] + CC --> EC --> Azure[Azure OpenAI] diff --git a/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-2.svg b/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-2.svg new file mode 100644 index 0000000000..8687168cdd --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-2.svg @@ -0,0 +1 @@ +Single AI service deploymentYour SaaS customersCustomer A usersCustomer B usersCustomer C usersEnvironment Aaccess keys Aisolated conversationsEnvironment Baccess keys Bisolated conversationsEnvironment Caccess keys Cisolated conversationsOpenAIAnthropicAzure OpenAI \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-3.mmd b/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-3.mmd new file mode 100644 index 0000000000..b58745d342 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-3.mmd @@ -0,0 +1,8 @@ +flowchart LR + Lawyer[TinyMCE editor
used by lawyer] <--> AI[AI Service] + AI -->|tools/call| MCP1[MCP: contract-db] + AI -->|tools/call| MCP2[MCP: compliance-checker] + AI -->|tools/call| MCP3[MCP: precedent-search] + MCP1 --> ContractDB[(Contract clause
repository)] + MCP2 --> ComplianceRules[(Regulatory
rule sets)] + MCP3 --> PrecedentIdx[(Precedent
search index)] diff --git a/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-3.svg b/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-3.svg new file mode 100644 index 0000000000..97885c4cfb --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-3.svg @@ -0,0 +1 @@ +tools/calltools/calltools/callTinyMCE editorused by lawyerAI ServiceMCP: contract-dbMCP: compliance-checkerMCP: precedent-searchContract clauserepositoryRegulatoryrule setsPrecedentsearch index \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.mmd b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.mmd new file mode 100644 index 0000000000..15070a485a --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.mmd @@ -0,0 +1,42 @@ +flowchart TB + Browser["Browser
TinyMCE editor + tinymceai plugin"] + TokenEP["Your token endpoint
signs HS256 JWTs"] + Browser -->|"fetch JWT"| TokenEP + Browser -->|"HTTPS + Bearer JWT"| LB + + subgraph App["Application layer (stateless, N replicas)"] + LB["Reverse proxy / Load balancer
nginx · ALB · K8s Ingress
TLS termination · SSE pass-through"] + AI1["ai-service replica 1"] + AI2["ai-service replica 2"] + AIN["ai-service replica N"] + LB --> AI1 + LB --> AI2 + LB --> AIN + end + + subgraph Data["Shared data layer"] + DB[("SQL database
MySQL 8.0+ or
PostgreSQL 13+")] + Cache[("Redis 3.2.6+
single node or cluster")] + Storage[("File storage
database · filesystem ·
S3 · Azure Blob")] + end + + AI1 --> DB + AI1 --> Cache + AI1 --> Storage + AI2 --> DB + AI2 --> Cache + AI2 --> Storage + AIN --> DB + AIN --> Cache + AIN --> Storage + + AI1 -->|HTTPS| LLM["LLM provider
OpenAI · Anthropic · Google ·
Azure · Bedrock · Vertex · self-hosted"] + AI2 -->|HTTPS| LLM + AIN -->|HTTPS| LLM + + AI1 -.-> Obs["OpenTelemetry · Langfuse ·
log aggregator"] + AI2 -.-> Obs + AIN -.-> Obs + + AI1 -.->|tool calls| MCP["MCP servers
internal knowledge bases"] + AI2 -.-> MCP diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.svg new file mode 100644 index 0000000000..6ba18d328b --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.svg @@ -0,0 +1 @@ +Shared data layerApplication layer (stateless, N replicas)fetch JWTHTTPS + Bearer JWTHTTPSHTTPSHTTPStool callsBrowserTinyMCE editor + tinymceaipluginYour token endpointsigns HS256 JWTsReverse proxy / Loadbalancernginx · ALB · K8s IngressTLS termination · SSEpass-throughai-service replica 1ai-service replica 2ai-service replica NSQL databaseMySQL 8.0+ orPostgreSQL 13+Redis 3.2.6+single node or clusterFile storagedatabase · filesystem ·S3 · Azure BlobLLM providerOpenAI · Anthropic ·Google ·Azure · Bedrock · Vertex ·self-hostedOpenTelemetry · Langfuse ·log aggregatorMCP serversinternal knowledge bases \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.mmd b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.mmd new file mode 100644 index 0000000000..8fb6a35113 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.mmd @@ -0,0 +1,17 @@ +flowchart TD + Start([New deployment]) --> Q1{Evaluating or
going to production?} + Q1 -->|Evaluating locally| Compose[Docker Compose
all services on one host
Part 2 quick start] + Q1 -->|Production| Q2{Orchestrator?} + Q2 -->|Kubernetes| K8s[Kubernetes deployment
Section 33] + Q2 -->|AWS ECS / Fargate| ECS[ECS task definition
Section 34] + Q2 -->|Docker / Podman on VMs| VMs[Docker or Podman compose
Sections 9.1 / 9.2] + Q2 -->|Bare metal / no containers| Bare[Native install for
data layer; container
for AI service
Section 8.5] + Compose --> DB{Database?} + K8s --> DB + ECS --> DB + VMs --> DB + Bare --> DB + DB -->|Managed cloud DB| Managed[RDS · Cloud SQL ·
Azure Database] + DB -->|Self-managed| Self[Containers or native install] + Managed --> Done([Continue with Part 3]) + Self --> Done diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.svg b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.svg new file mode 100644 index 0000000000..dcf71a1ca6 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.svg @@ -0,0 +1 @@ +Evaluating locallyProductionKubernetesAWS ECS / FargateDocker / Podman on VMsBare metal / no containersManaged cloud DBSelf-managedNew deploymentEvaluating orgoing to production?Docker Composeall services on one hostPart 2 quick startOrchestrator?Kubernetes deploymentSection 33ECS task definitionSection 34Docker or Podman composeSections 9.1 / 9.2Native install fordata layer; containerfor AI serviceSection 8.5Database?RDS · Cloud SQL ·Azure DatabaseContainers or native installContinue with Part 3 \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-3.mmd b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-3.mmd new file mode 100644 index 0000000000..e5fa13ba4f --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-3.mmd @@ -0,0 +1,18 @@ +flowchart TB + Internet([Internet]) --> Ingress[Ingress controller
nginx-ingress · ALB controller
proxy-buffering off] + Ingress --> SvcAI[Service: ai-service] + SvcAI --> Pod1[Pod: ai-service replica 1] + SvcAI --> Pod2[Pod: ai-service replica 2] + SvcAI --> PodN[Pod: ai-service replica N] + Pod1 --> SvcDB[Service: database
or external RDS] + Pod2 --> SvcDB + PodN --> SvcDB + Pod1 --> SvcRedis[Service: redis
or external ElastiCache] + Pod2 --> SvcRedis + PodN --> SvcRedis + Pod1 --> S3[(S3 / Azure Blob)] + Pod2 --> S3 + PodN --> S3 + HPA[HorizontalPodAutoscaler] -. scales .-> Pod1 + HPA -. scales .-> Pod2 + HPA -. scales .-> PodN diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-3.svg b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-3.svg new file mode 100644 index 0000000000..a13caed517 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-3.svg @@ -0,0 +1 @@ +scalesscalesscalesInternetIngress controllernginx-ingress · ALBcontrollerproxy-buffering offService: ai-servicePod: ai-service replica 1Pod: ai-service replica 2Pod: ai-service replica NService: databaseor external RDSService: redisor external ElastiCacheS3 / Azure BlobHorizontalPodAutoscaler \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-4.mmd b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-4.mmd new file mode 100644 index 0000000000..ae5f01914b --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-4.mmd @@ -0,0 +1,25 @@ +flowchart LR + subgraph PR[PROVIDERS env var] + P1["my-openai-key
type: openai
apiKeys: [sk-...]"] + P2["my-bedrock
type: bedrock
credentials: {...}"] + P3["my-ollama
type: openai-compatible
baseUrl: .../v1"] + end + subgraph MD[MODELS env var] + M1["id: gpt-4.1
provider: my-openai-key
features: [...]"] + M2["id: us.anthropic.claude-sonnet-4-...
provider: my-bedrock
features: [...]"] + M3["id: qwen3:0.6b
provider: my-ollama
features: [...]"] + end + subgraph JWT[JWT auth.ai.permissions] + K1["ai:models:my-openai-key:gpt-4.1"] + K2["ai:models:my-bedrock:us.anthropic.claude-sonnet-4-..."] + K3["ai:models:my-ollama:qwen3:0.6b"] + end + M1 -.provider key.-> P1 + M2 -.provider key.-> P2 + M3 -.provider key.-> P3 + K1 -.gates access.-> M1 + K2 -.gates access.-> M2 + K3 -.gates access.-> M3 + M1 ==>|forwarded| LLM1[OpenAI API] + M2 ==>|forwarded| LLM2[AWS Bedrock] + M3 ==>|forwarded| LLM3[Local Ollama] diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-4.svg b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-4.svg new file mode 100644 index 0000000000..7a119ff245 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-4.svg @@ -0,0 +1 @@ +JWT auth.ai.permissionsMODELS env varPROVIDERS env varprovider keyprovider keyprovider keygates accessgates accessgates accessforwardedforwardedforwardedmy-openai-keytype: openaiapiKeys: [sk-...]my-bedrocktype: bedrockcredentials: {...}my-ollamatype: openai-compatiblebaseUrl: .../v1id: gpt-4.1provider: my-openai-keyfeatures: [...]id:us.anthropic.claude-sonnet-4-...provider: my-bedrockfeatures: [...]id: qwen3:0.6bprovider: my-ollamafeatures: [...]ai:models:my-openai-key:gpt-4.1ai:models:my-bedrock:us.anthropic.claude-sonnet-4-...ai:models:my-ollama:qwen3:0.6bOpenAI APIAWS BedrockLocal Ollama \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-5.mmd b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-5.mmd new file mode 100644 index 0000000000..87929497d5 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-5.mmd @@ -0,0 +1,29 @@ +sequenceDiagram + autonumber + actor User + participant Editor as TinyMCE editor
tinymceai plugin + participant App as Your backend
token endpoint + participant AI as AI service + participant LLM as LLM provider + + User->>Editor: Triggers an AI feature + Editor->>App: POST /api/ai-token
session cookie or Bearer + App->>App: Authenticate the user + Note over App: Sign HS256 JWT with API Secret
aud = environment ID
sub = user ID
auth.ai.permissions = [...] + App-->>Editor: { token: eyJ... } + Editor->>AI: POST /v1/conversations/id/messages
Authorization: Bearer eyJ... + AI->>AI: Verify HS256 signature
check aud, exp, permissions + + alt Token valid and permissions allow + AI->>LLM: Forward prompt + LLM-->>AI: Stream response chunks + AI-->>Editor: SSE text-delta events + else Signature does not match + AI-->>Editor: 401 invalid-jwt-signature + else aud not registered with AI runtime + AI-->>Editor: 401 invalid-jwt-payload + else Past expiry plus 60s leeway + AI-->>Editor: 401 invalid-jwt + else Permissions do not cover action + AI-->>Editor: 200 with allowed false + end diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-5.svg b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-5.svg new file mode 100644 index 0000000000..c800f9248a --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-5.svg @@ -0,0 +1 @@ +LLM providerAI serviceYour backendtoken endpointTinyMCE editortinymceai pluginLLM providerAI serviceYour backendtoken endpointTinyMCE editortinymceai pluginSign HS256 JWT with API Secretaud = environment IDsub = user IDauth.ai.permissions = [...]alt[Token valid and permissions allow][Signature does not match][aud not registered with AI runtime][Past expiry plus 60s leeway][Permissions do not cover action]UserTriggers an AI feature1POST /api/ai-tokensession cookie or Bearer2Authenticate the user3{ token: eyJ... }4POST /v1/conversations/id/messagesAuthorization: Bearer eyJ...5Verify HS256 signaturecheck aud, exp, permissions6Forward prompt7Stream response chunks8SSE text-delta events9401 invalid-jwt-signature10401 invalid-jwt-payload11401 invalid-jwt12200 with allowed false13User \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-6.mmd b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-6.mmd new file mode 100644 index 0000000000..452423aa61 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-6.mmd @@ -0,0 +1,20 @@ +sequenceDiagram + autonumber + actor User + participant Editor as TinyMCE + tinymceai plugin + participant Provider as tinymceai_token_provider
your function + participant App as Your backend
token endpoint + participant AI as AI service + + Note over Editor: tinymce.init runs once
plugin registers toolbar buttons + User->>Editor: Click AI button or open chat + Editor->>Provider: invoke + Provider->>App: fetch /api/ai-token
credentials include + App-->>Provider: { token eyJ... } + Provider-->>Editor: { token } + Editor->>AI: HTTPS request
Authorization Bearer eyJ... + AI-->>Editor: SSE stream + loop For each chunk + Editor->>Editor: Render streaming text + end + Note over Editor,Provider: Plugin re-invokes the provider
before token expiry
do not cache the JWT yourself diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-6.svg b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-6.svg new file mode 100644 index 0000000000..f7f9c365fa --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-6.svg @@ -0,0 +1 @@ +AI serviceYour backendtoken endpointtinymceai_token_provideryour functionTinyMCE + tinymceai pluginAI serviceYour backendtoken endpointtinymceai_token_provideryour functionTinyMCE + tinymceai plugintinymce.init runs onceplugin registers toolbar buttonsloop[For each chunk]Plugin re-invokes the providerbefore token expirydo not cache the JWT yourselfUserClick AI button or open chat1invoke2fetch /api/ai-tokencredentials include3{ token eyJ... }4{ token }5HTTPS requestAuthorization Bearer eyJ...6SSE stream7Render streaming text8User \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-7.mmd b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-7.mmd new file mode 100644 index 0000000000..a3059f2048 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-7.mmd @@ -0,0 +1,4 @@ +flowchart LR + Editor[TinyMCE editor] <-->|"chat / quick actions"| AI[AI Service] + AI -->|"MCP tools/call"| MCP[MCP Server
knowledge-hub] + MCP -->|"read"| KB[(Confluence ·
Notion ·
GitBook ·
internal wiki)] diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-7.svg b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-7.svg new file mode 100644 index 0000000000..3f0cb68ec5 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-7.svg @@ -0,0 +1 @@ +chat / quick actionsMCP tools/callreadTinyMCE editorAI ServiceMCP Serverknowledge-hubConfluence ·Notion ·GitBook ·internal wiki \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-8.mmd b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-8.mmd new file mode 100644 index 0000000000..b8eb0690a6 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-8.mmd @@ -0,0 +1,14 @@ +flowchart LR + subgraph Tenants[Your SaaS customers] + CA[Customer A users] + CB[Customer B users] + CC[Customer C users] + end + subgraph AISvc[Single AI service deployment] + EA[Environment A
access keys A
isolated conversations] + EB[Environment B
access keys B
isolated conversations] + EC[Environment C
access keys C
isolated conversations] + end + CA --> EA --> OpenAI[OpenAI] + CB --> EB --> Anthropic[Anthropic] + CC --> EC --> Azure[Azure OpenAI] diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-8.svg b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-8.svg new file mode 100644 index 0000000000..8687168cdd --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-8.svg @@ -0,0 +1 @@ +Single AI service deploymentYour SaaS customersCustomer A usersCustomer B usersCustomer C usersEnvironment Aaccess keys Aisolated conversationsEnvironment Baccess keys Bisolated conversationsEnvironment Caccess keys Cisolated conversationsOpenAIAnthropicAzure OpenAI \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-9.mmd b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-9.mmd new file mode 100644 index 0000000000..5714d740be --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-9.mmd @@ -0,0 +1,20 @@ +flowchart TD + Start([Something is wrong]) --> Q1{Container is
running?
docker ps shows it} + Q1 -->|No - exited or wont pull| S1[Container startup failures] + Q1 -->|Yes| Q2{curl /health
returns 200?} + Q2 -->|No - times out or 5xx| S1 + Q2 -->|Yes| Q3{API call returns
auth error?} + Q3 -->|Yes - 401 allowed false
invalid-jwt-...| S2[API and JWT authentication] + Q3 -->|No| Q4{SSE stream
carries event error
from LLM?} + Q4 -->|Yes| S3[LLM provider errors] + Q4 -->|No| Q5{Editor side
broken?
no toolbar token 401
hanging stream} + Q5 -->|Yes| S4[Editor and front-end] + Q5 -->|No| Q6{Slow timing out
or failing under load?} + Q6 -->|Yes| S5[Performance and capacity] + Q6 -->|No| S6[Diagnostic recipes] + S1 --> Recipe([If none fit
see Diagnostic recipes
then escalate]) + S2 --> Recipe + S3 --> Recipe + S4 --> Recipe + S5 --> Recipe + S6 --> Recipe diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-9.svg b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-9.svg new file mode 100644 index 0000000000..f611b021be --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-9.svg @@ -0,0 +1 @@ +No - exited or wont pullYesNo - times out or 5xxYesYes - 401 allowed falseinvalid-jwt-...NoYesNoYesNoYesNoSomething is wrongContainer isrunning?docker ps shows itContainer startup failurescurl /healthreturns 200?API call returnsauth error?API and JWT authenticationSSE streamcarries event errorfrom LLM?LLM provider errorsEditor sidebroken?no toolbar token 401hanging streamEditor and front-endSlow timing outor failing under load?Performance and capacityDiagnostic recipesIf none fitsee Diagnostic recipesthen escalate \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/database-setup-fig-1.mmd b/modules/ROOT/images/tinymceai-on-premises/database-setup-fig-1.mmd new file mode 100644 index 0000000000..4e914e0bd2 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/database-setup-fig-1.mmd @@ -0,0 +1,12 @@ +flowchart TD + Start([Where will MySQL/Postgres and Redis run?]) --> Q1{Evaluating or
deploying to prod?} + Q1 -->|Evaluating locally| Compose[Docker Compose
image: mysql:8.0 or postgres:16
+ redis:7] + Q1 -->|Deploying| Q2{Cloud or self-managed?} + Q2 -->|Cloud / managed services| Managed[AWS RDS · Cloud SQL ·
Azure Database
+ ElastiCache · Memorystore ·
Azure Cache for Redis] + Q2 -->|Self-managed| Q3{Container runtime
available?} + Q3 -->|Docker or Podman| Containers[Containers on the same
network or pod as ai-service] + Q3 -->|None - bare metal or VM| Native[Native install
brew · apt · yum · dnf
service runs on host] + Compose --> Verify([Verify: nc -zv host port
then start ai-service]) + Managed --> Verify + Containers --> Verify + Native --> Verify diff --git a/modules/ROOT/images/tinymceai-on-premises/database-setup-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/database-setup-fig-1.svg new file mode 100644 index 0000000000..d0ae05837f --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/database-setup-fig-1.svg @@ -0,0 +1 @@ +Evaluating locallyDeployingCloud / managed servicesSelf-managedDocker or PodmanNone - bare metal or VMWhere will MySQL/Postgresand Redis run?Evaluating ordeploying to prod?Docker Composeimage: mysql:8.0 orpostgres:16+ redis:7Cloud or self-managed?AWS RDS · Cloud SQL ·Azure Database+ ElastiCache ·Memorystore ·Azure Cache for RedisContainer runtimeavailable?Containers on the samenetwork or pod asai-serviceNative installbrew · apt · yum · dnfservice runs on hostVerify: nc -zv host portthen start ai-service \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/eap-setup-guide-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/eap-setup-guide-fig-1.svg new file mode 100644 index 0000000000..1f7fc0d0fc --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/eap-setup-guide-fig-1.svg @@ -0,0 +1 @@ +

Data Layer

Application Layer

obtain JWT

HTTPS + JWT

HTTPS

optional

Client
(TinyMCE Editor / REST API)

JWT Endpoint
(customer-provided)

Load Balancer
(optional)

TinyMCE AI Service
(1+ container instances)

SQL Database
MySQL 8.0+ · Postgres 13+

Redis 3.2.6+

File Storage
S3 · Azure Blob · filesystem · DB

LLM Provider
(OpenAI · Anthropic · Google
Azure · Bedrock · Vertex AI · self-hosted)

OpenTelemetry / Langfuse

\ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/framework-integration-fig-1.mmd b/modules/ROOT/images/tinymceai-on-premises/framework-integration-fig-1.mmd new file mode 100644 index 0000000000..9d1c8bc3e8 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/framework-integration-fig-1.mmd @@ -0,0 +1,20 @@ +sequenceDiagram + autonumber + actor User + participant Editor as TinyMCE + tinymceai plugin + participant Provider as tinymceai_token_provider
(your function) + participant App as Your backend
(token endpoint) + participant AI as AI service + + Note over Editor: tinymce.init() runs once
plugin registers toolbar buttons + User->>Editor: Click AI button or open chat + Editor->>Provider: invoke() + Provider->>App: fetch('/api/ai-token', { credentials: 'include' }) + App-->>Provider: { token: "eyJ..." } + Provider-->>Editor: { token } + Editor->>AI: HTTPS request
Authorization: Bearer eyJ... + AI-->>Editor: SSE stream + loop For each chunk + Editor->>Editor: Render streaming text + end + Note over Editor,Provider: Plugin re-invokes the provider
before token expiry — do not
cache the JWT yourself diff --git a/modules/ROOT/images/tinymceai-on-premises/framework-integration-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/framework-integration-fig-1.svg new file mode 100644 index 0000000000..a8fbe09718 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/framework-integration-fig-1.svg @@ -0,0 +1 @@ +AI serviceYour backend(token endpoint)tinymceai_token_provider(your function)TinyMCE + tinymceai pluginAI serviceYour backend(token endpoint)tinymceai_token_provider(your function)TinyMCE + tinymceai plugintinymce.init() runs onceplugin registers toolbar buttonsloop[For each chunk]Plugin re-invokes the providerbefore token expiry — do notcache the JWT yourselfUserClick AI button or open chat1invoke()2fetch('/api/ai-token', { credentials: 'include' })3{ token: "eyJ..." }4{ token }5HTTPS requestAuthorization: Bearer eyJ...6SSE stream7Render streaming text8User \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/jwt-authentication-fig-1.mmd b/modules/ROOT/images/tinymceai-on-premises/jwt-authentication-fig-1.mmd new file mode 100644 index 0000000000..587af33a91 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/jwt-authentication-fig-1.mmd @@ -0,0 +1,29 @@ +sequenceDiagram + autonumber + actor User + participant Editor as TinyMCE editor
(tinymceai plugin) + participant App as Your application backend
(token endpoint) + participant AI as AI service + participant LLM as LLM provider + + User->>Editor: Triggers an AI feature + Editor->>App: POST /api/ai-token
session cookie or Bearer + App->>App: Authenticate the user + Note over App: Sign HS256 JWT with API Secret
aud = environment ID
sub = user ID
auth.ai.permissions = [...] + App-->>Editor: { "token": "eyJ..." } + Editor->>AI: POST /v1/conversations/{id}/messages
Authorization: Bearer eyJ... + AI->>AI: Verify HS256 signature
check aud, exp, permissions + + alt Token valid and permissions allow the action + AI->>LLM: Forward prompt + LLM-->>AI: Stream response chunks + AI-->>Editor: SSE: text-delta events + else Signature does not match + AI-->>Editor: 401 invalid-jwt-signature + else aud is not registered with AI runtime + AI-->>Editor: 401 invalid-jwt-payload + else Past expiry plus 60s leeway + AI-->>Editor: 401 invalid-jwt + else Permissions do not cover the action + AI-->>Editor: 200 with allowed:false + end diff --git a/modules/ROOT/images/tinymceai-on-premises/jwt-authentication-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/jwt-authentication-fig-1.svg new file mode 100644 index 0000000000..8cd0598928 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/jwt-authentication-fig-1.svg @@ -0,0 +1 @@ +LLM providerAI serviceYour application backend(token endpoint)TinyMCE editor(tinymceai plugin)LLM providerAI serviceYour application backend(token endpoint)TinyMCE editor(tinymceai plugin)Sign HS256 JWT with API Secretaud = environment IDsub = user IDauth.ai.permissions = [...]alt[Token valid and permissions allow the action][Signature does not match][aud is not registered with AI runtime][Past expiry plus 60s leeway][Permissions do not cover the action]UserTriggers an AI feature1POST /api/ai-tokensession cookie or Bearer2Authenticate the user3{ "token": "eyJ..." }4POST /v1/conversations/{id}/messagesAuthorization: Bearer eyJ...5Verify HS256 signaturecheck aud, exp, permissions6Forward prompt7Stream response chunks8SSE: text-delta events9401 invalid-jwt-signature10401 invalid-jwt-payload11401 invalid-jwt12200 with allowed:false13User \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/production-guide-fig-1.mmd b/modules/ROOT/images/tinymceai-on-premises/production-guide-fig-1.mmd new file mode 100644 index 0000000000..8f5031c7b0 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/production-guide-fig-1.mmd @@ -0,0 +1,30 @@ +flowchart TB + Browser([TinyMCE in browser]) + Browser -->|HTTPS + JWT| LB[Reverse Proxy / Load Balancer
nginx · ALB · Ingress
TLS termination
proxy_buffering off] + LB -->|HTTP :8000| AI1[ai-service replica 1] + LB -->|HTTP :8000| AI2[ai-service replica 2] + LB -->|HTTP :8000| AIN[ai-service replica N] + + subgraph DataLayer["Shared data layer"] + DB[("MySQL 8.0+ /
Postgres 13+
Multi-AZ in prod")] + Cache[("Redis 7
cluster or managed")] + Storage[("S3 · Azure Blob ·
filesystem · DB")] + end + + AI1 --> DB + AI1 --> Cache + AI1 --> Storage + AI2 --> DB + AI2 --> Cache + AI2 --> Storage + AIN --> DB + AIN --> Cache + AIN --> Storage + + AI1 --> LLM[LLM Provider
OpenAI · Anthropic · Google ·
Azure · Bedrock · Vertex · self-hosted] + AI2 --> LLM + AIN --> LLM + + AI1 -.->|optional| Obs[OpenTelemetry · Langfuse ·
log aggregator] + AI2 -.-> Obs + AIN -.-> Obs diff --git a/modules/ROOT/images/tinymceai-on-premises/production-guide-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/production-guide-fig-1.svg new file mode 100644 index 0000000000..c3fb67a075 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/production-guide-fig-1.svg @@ -0,0 +1 @@ +Shared data layerHTTPS + JWTHTTP :8000HTTP :8000HTTP :8000optionalTinyMCE in browserReverse Proxy / LoadBalancernginx · ALB · IngressTLS terminationproxy_buffering offai-service replica 1ai-service replica 2ai-service replica NMySQL 8.0+ /Postgres 13+Multi-AZ in prodRedis 7cluster or managedS3 · Azure Blob ·filesystem · DBLLM ProviderOpenAI · Anthropic ·Google ·Azure · Bedrock · Vertex ·self-hostedOpenTelemetry · Langfuse ·log aggregator \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.mmd b/modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.mmd new file mode 100644 index 0000000000..2086e9cf30 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.mmd @@ -0,0 +1,25 @@ +flowchart LR + subgraph PR[PROVIDERS env var · JSON object] + P1["my-openai-key
type: openai
apiKeys: [sk-...]"] + P2["my-bedrock
type: bedrock
credentials: {...}"] + P3["my-ollama
type: openai-compatible
baseUrl: .../v1"] + end + subgraph MD[MODELS env var · JSON array] + M1["id: gpt-4.1
provider: my-openai-key
features: [...]"] + M2["id: us.anthropic.claude-sonnet-4-...
provider: my-bedrock
features: [...]"] + M3["id: qwen3:0.6b
provider: my-ollama
features: [...]"] + end + subgraph JWT[JWT auth.ai.permissions] + K1["ai:models:my-openai-key:gpt-4.1"] + K2["ai:models:my-bedrock:us.anthropic.claude-sonnet-4-..."] + K3["ai:models:my-ollama:qwen3:0.6b"] + end + M1 -.references provider key.-> P1 + M2 -.references provider key.-> P2 + M3 -.references provider key.-> P3 + K1 -.gates per-user access.-> M1 + K2 -.gates per-user access.-> M2 + K3 -.gates per-user access.-> M3 + M1 ==>|forwarded to upstream| LLM1[OpenAI API] + M2 ==>|forwarded to upstream| LLM2[AWS Bedrock] + M3 ==>|forwarded to upstream| LLM3[Local Ollama] diff --git a/modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.svg new file mode 100644 index 0000000000..a5f55d50a3 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.svg @@ -0,0 +1 @@ +JWT auth.ai.permissionsMODELS env var · JSON arrayPROVIDERS env var · JSON objectreferences provider keyreferences provider keyreferences provider keygates per-user accessgates per-user accessgates per-user accessforwarded to upstreamforwarded to upstreamforwarded to upstreammy-openai-keytype: openaiapiKeys: [sk-...]my-bedrocktype: bedrockcredentials: {...}my-ollamatype: openai-compatiblebaseUrl: .../v1id: gpt-4.1provider: my-openai-keyfeatures: [...]id:us.anthropic.claude-sonnet-4-...provider: my-bedrockfeatures: [...]id: qwen3:0.6bprovider: my-ollamafeatures: [...]ai:models:my-openai-key:gpt-4.1ai:models:my-bedrock:us.anthropic.claude-sonnet-4-...ai:models:my-ollama:qwen3:0.6bOpenAI APIAWS BedrockLocal Ollama \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/troubleshooting-fig-1.mmd b/modules/ROOT/images/tinymceai-on-premises/troubleshooting-fig-1.mmd new file mode 100644 index 0000000000..e866425fd9 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/troubleshooting-fig-1.mmd @@ -0,0 +1,20 @@ +flowchart TD + Start([Something is wrong]) --> Q1{Container is
running?
docker ps shows it} + Q1 -->|No - exited or won't pull| S1[Container startup failures] + Q1 -->|Yes| Q2{curl /health
returns 200?} + Q2 -->|No - times out or 5xx| S1 + Q2 -->|Yes| Q3{API call returns
auth error?} + Q3 -->|Yes - 401, allowed:false,
invalid-jwt-*| S2[API and JWT authentication] + Q3 -->|No| Q4{SSE stream
carries event: error
from LLM?} + Q4 -->|Yes| S3[LLM provider errors] + Q4 -->|No| Q5{Editor side
broken?
no toolbar, token 401,
hanging stream} + Q5 -->|Yes| S4[Editor and front-end] + Q5 -->|No| Q6{Slow, timing out,
or failing under load?} + Q6 -->|Yes| S5[Performance and capacity] + Q6 -->|No| S6[Production and scaling] + S1 --> Recipe([If none fit:
see Diagnostic recipes
then escalate to
support@tiny.cloud]) + S2 --> Recipe + S3 --> Recipe + S4 --> Recipe + S5 --> Recipe + S6 --> Recipe diff --git a/modules/ROOT/images/tinymceai-on-premises/troubleshooting-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/troubleshooting-fig-1.svg new file mode 100644 index 0000000000..a8651978ca --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/troubleshooting-fig-1.svg @@ -0,0 +1 @@ +No - exited or won't pullYesNo - times out or 5xxYesYes - 401, allowed:false,invalid-jwt-*NoYesNoYesNoYesNoSomething is wrongContainer isrunning?docker ps shows itContainer startup failurescurl /healthreturns 200?API call returnsauth error?API and JWT authenticationSSE streamcarries event: errorfrom LLM?LLM provider errorsEditor sidebroken?no toolbar, token 401,hanging streamEditor and front-endSlow, timing out,or failing under load?Performance and capacityProduction and scalingIf none fit:see Diagnostic recipesthen escalate tosupport@tiny.cloud \ No newline at end of file diff --git a/modules/ROOT/nav.adoc b/modules/ROOT/nav.adoc index f9327d1f6f..82f443cab3 100644 --- a/modules/ROOT/nav.adoc +++ b/modules/ROOT/nav.adoc @@ -264,11 +264,22 @@ ****** xref:tinymceai-api-overview.adoc[API Overview] ****** xref:tinymceai-api-quick-start.adoc[API Quick Start] ****** xref:tinymceai-streaming.adoc[Streaming] -**** xref:tinymceai-jwt-authentication-intro.adoc[JWT Authentication] +**** xref:tinymceai-jwt-authentication-intro.adoc[JWT authentication] ***** xref:tinymceai-permissions.adoc[Permissions] ***** Guides ****** xref:tinymceai-with-jwt-authentication-nodejs.adoc[JWT authentication (Node.js)] ****** xref:tinymceai-with-jwt-authentication-php.adoc[JWT authentication (PHP)] +**** On-premises deployment +***** xref:tinymceai-on-premises.adoc[Overview] +***** xref:tinymceai-on-premises-getting-started.adoc[Getting started] +***** xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage] +***** xref:tinymceai-on-premises-providers.adoc[LLM providers] +***** xref:tinymceai-on-premises-jwt.adoc[JWT authentication] +***** xref:tinymceai-on-premises-frameworks.adoc[Framework integration] +***** xref:tinymceai-on-premises-production.adoc[Production deployment] +***** xref:tinymceai-on-premises-advanced.adoc[Advanced scenarios] +***** xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting] +***** xref:tinymceai-on-premises-reference.adoc[Reference] **** xref:ai.adoc[AI Assistant (legacy)] ***** xref:ai-openai.adoc[OpenAI ChatGPT integration guide] ***** xref:ai-azure.adoc[Azure AI integration guide] diff --git a/modules/ROOT/pages/tinymceai-on-premises-advanced.adoc b/modules/ROOT/pages/tinymceai-on-premises-advanced.adoc new file mode 100644 index 0000000000..425273c777 --- /dev/null +++ b/modules/ROOT/pages/tinymceai-on-premises-advanced.adoc @@ -0,0 +1,439 @@ += TinyMCE AI on-premises: advanced scenarios +:navtitle: Advanced scenarios +:description: Advanced scenarios for TinyMCE AI on-premises service +:keywords: AI, on-premises, multi-tenant, MCP, regulated + +[.lead] +This guide covers enterprise scenarios for the AI on-premises service through concrete examples. Each scenario builds on the xref:tinymceai-on-premises-getting-started.adoc[Getting started guide]; teams may implement any scenario on its own. + +== Internal knowledge base through MCP + +*Use case:* Engineers writing documentation can query internal wikis, API specs, and runbooks directly from the editor, without switching context. + +=== Architecture + +[.text-center] +image::tinymceai-on-premises/advanced-scenarios-fig-1.svg[MCP integration: TinyMCE rich text editor communicates with AI service which calls MCP knowledge-hub server,width=100%] + +=== Configuration reference + +The Model Context Protocol (MCP) allows the AI service to call external tools (internal wikis, API specs, runbooks, contract databases, compliance checkers) from inside conversations. The AI service connects over *Streamable HTTP transport*. + +[cols="1,3",options="header"] +|=== +|Option |Description +|`url` |HTTP endpoint of the MCP server (Streamable HTTP transport). +|`headers` |Auth headers sent with every request. Single shared token per server. See <>. +|`tools.disabled` |Array of tool names to block from LLM access. +|`options.callToolTimeout` |Per-tool-call timeout in seconds (default 60). +|=== + +NOTE: MCP tools are available in AI *conversations* only, not in reviews or quick actions. + +TIP: On Linux Docker, add `extra_hosts: ["host.docker.internal:host-gateway"]` to the AI service to reach MCP servers running on the host. + +[[single-shared-token-limitation]] +=== Single-shared-token limitation + +The `headers` field is fixed at deploy time. Every MCP tool call shares the same token; there is no per-user MCP authentication path yet. If the MCP server needs per-user context, encode it in the conversation prompt or in a header that maps user identity at the MCP server side (for example, using a token the MCP server itself swaps for a per-user identity). + +=== Implementation + +. *Create an MCP server that exposes the knowledge base:* ++ +.Knowledge-base MCP server (Express) +[%collapsible] +==== +[source,javascript] +---- +// knowledge-mcp-server.js +const express = require('express'); +const app = express(); +app.use(express.json()); + +const KNOWLEDGE_BASE = { + 'api-guidelines': 'All REST APIs must use JSON, include pagination through Link headers, and return 4xx for client errors with a machine-readable error code.', + 'deployment-process': 'Deployments require: 1) PR approval, 2) passing CI, 3) staging verification, 4) production canary (5% traffic for 30min), 5) full rollout.', + 'security-policy': 'All user data must be encrypted at rest (AES-256) and in transit (TLS 1.3). PII requires additional field-level encryption.', +}; + +app.post('/mcp', (req, res) => { + const { method, id, params } = req.body; + + if (method === 'initialize') { + return res.json({ + jsonrpc: '2.0', id, + result: { + protocolVersion: '2024-11-05', + capabilities: { tools: {} }, + serverInfo: { name: 'knowledge-hub', version: '1.0.0' } + } + }); + } + + if (method === 'tools/list') { + return res.json({ + jsonrpc: '2.0', id, + result: { + tools: [{ + name: 'search_knowledge_base', + description: 'Search the company knowledge base for policies, guidelines, and procedures', + inputSchema: { + type: 'object', + properties: { + query: { type: 'string', description: 'Search query' } + }, + required: ['query'] + } + }, { + name: 'get_api_spec', + description: 'Get the OpenAPI spec for an internal service', + inputSchema: { + type: 'object', + properties: { + service: { type: 'string', description: 'Service name (for example user-service, billing-api)' } + }, + required: ['service'] + } + }] + } + }); + } + + if (method === 'tools/call') { + const { name, arguments: args } = params; + if (name === 'search_knowledge_base') { + const query = (args?.query || '').toLowerCase(); + const results = Object.entries(KNOWLEDGE_BASE) + .filter(([key]) => key.includes(query) || query.includes(key.split('-')[0])) + .map(([key, value]) => `##${key}\n${value}`) + .join('\n\n'); + return res.json({ + jsonrpc: '2.0', id, + result: { content: [{ type: 'text', text: results || 'No results found.' }] } + }); + } + return res.json({ + jsonrpc: '2.0', id, + result: { content: [{ type: 'text', text: 'Spec not found for: ' + args?.service }] } + }); + } + + res.json({ jsonrpc: '2.0', id, error: { code: -32601, message: 'Unknown method' } }); +}); + +app.listen(3001, () => console.log('Knowledge MCP server on http://localhost:3001/mcp')); +---- +==== + +. *Configure the AI service:* ++ +[source,bash] +---- +-e MCP_SERVERS='{ + "knowledge-hub": { + "url": "http://host.docker.internal:3001/mcp", + "options": { "callToolTimeout": 30 } + } +}' +---- + +. *Sample AI chat message:* ++ +____ +"What are the API guidelines for error handling?" +____ ++ +The assistant calls the `search_knowledge_base` tool, retrieves the relevant policy, and responds with sourced information without leaving the rich text editor. + + + +== Multi-tenant SaaS platform + +*Use case:* A SaaS platform provides AI writing features to customers. Each customer gets isolated conversations, separate LLM budgets, and per-tenant configuration. + +=== Architecture + +[.text-center] +image::tinymceai-on-premises/advanced-scenarios-fig-2.svg[alt="Multi-tenant SaaS architecture with per-customer environments access keys and conversation isolation",width=100%] + +Each environment provides: + +* Its own access keys (independent rotation) +* Isolated conversation history (queries are partitioned by `sub` within an environment) +* Separate billing and usage tracking (through Langfuse or a custom metrics pipeline) + +=== Implementation + +. *Create one environment per customer through the Management Panel:* ++ +* Customer A -> Environment `env-customer-a` +* Customer B -> Environment `env-customer-b` +* Customer C -> Environment `env-customer-c` + +. *Token server generates JWTs with the correct environment:* ++ +.Multi-tenant JWT generation +[%collapsible] +==== +[source,javascript] +---- +app.post('/api/ai-token', requireAuth, (req, res) => { + const customer = getCustomerConfig(req.user.organizationId); + + const token = jwt.sign({ + aud: customer.envId, + sub: req.user.id, + user: { name: req.user.name, email: req.user.email }, + auth: { + ai: { + permissions: getPermissionsForPlan(customer.plan) + } + } + }, customer.apiSecret, { algorithm: 'HS256', expiresIn: '1h' }); + + res.json({ token }); +}); + +function getPermissionsForPlan(plan) { + switch (plan) { + case 'enterprise': + return ['ai:conversations:*', 'ai:models:agent', 'ai:actions:system:*', 'ai:reviews:system:*']; + case 'pro': + return ['ai:conversations:*', 'ai:actions:system:*']; + case 'basic': + return ['ai:actions:system:*']; + } +} + +function envFor(tenantId) { + return { + envId: process.env[`AI_ENV_ID_${tenantId}`], + apiSecret: process.env[`AI_API_SECRET_${tenantId}`] + }; +} +---- +==== + +. *Result:* Full data isolation between customers, with feature gating based on subscription tier. + +CAUTION: Conversation history is isolated by the `sub` claim in the JWT. Reusing a single `sub` value for multiple users within one environment causes those users to share conversation history. Always use a stable, unique-per-user identifier (such as an internal user ID) as the `sub` value. + + + +== Custom models with guardrails + +*Use case:* A regulated industry (healthcare, finance, legal) needs AI writing assistance but must use approved models with content filtering. + +=== Implementation + +. *Use a self-hosted model with an OpenAI-compatible API (such as vLLM or Ollama):* ++ +[source,bash] +---- +-e PROVIDERS='{ + "approved-llm": { + "type": "openai-compatible", + "baseUrl": "http://host.docker.internal:8080/v1", + "headers": {"Authorization": "Bearer internal-key"} + } +}' +---- + +. *Restrict to specific models only:* ++ +[source,bash] +---- +-e MODELS='[{ + "id": "llama-3.1-70b-medical", + "name": "Medical Assistant (Llama 3.1 70B)", + "description": "Fine-tuned for medical documentation. HIPAA-compliant.", + "provider": "approved-llm", + "recommended": true, + "features": ["conversations", "reviews", "actions"] +}, { + "id": "llama-3.1-8b-general", + "name": "General Writing (Llama 3.1 8B)", + "description": "Fast general-purpose model for drafting and editing.", + "provider": "approved-llm", + "features": ["actions"] +}]' +---- + +. *Result:* The configuration exposes only approved, audited models. Content does not leave the network. Combine with Langfuse for a full audit trail. + + + +== AI-powered document pipeline + +*Use case:* Legal team drafts contracts. AI assists with clause generation, compliance checking, and precedent search, powered by internal legal databases. + +=== Architecture + +[.text-center] +image::tinymceai-on-premises/advanced-scenarios-fig-3.svg[Regulated industry scenario: legal editor connects to AI service with contract-db and compliance MCP servers,width=100%] + +=== Configuration + +.Document pipeline MCP server configuration +[%collapsible] +==== +[source,bash] +---- +-e MCP_SERVERS='{ + "contract-db": { + "url": "http://host.docker.internal:3001/mcp", + "options": {"callToolTimeout": 30} + }, + "compliance-checker": { + "url": "http://host.docker.internal:3002/mcp", + "options": {"callToolTimeout": 60} + }, + "precedent-search": { + "url": "http://host.docker.internal:3003/mcp", + "tools": {"disabled": ["delete_precedent"]}, + "options": {"callToolTimeout": 120} + } +}' +---- +==== + +*Example prompts:* + +* "Draft a non-compete clause for California employees" +* "Check this contract section for GDPR compliance issues" +* "Find precedent for limitation of liability in SaaS agreements" + +Internal databases supply the data for these prompts. Aside from the LLM request itself, no content goes to external services. + + + +== Web-augmented research assistant + +*Use case:* Content team writing blog posts and marketing copy can pull live data from the web and internal sources. + +=== Configuration + +[source,bash] +---- +-e WEBRESOURCES_ENABLED='true' \ +-e WEBRESOURCES_ENDPOINT='http://host.docker.internal:4000/scrape' \ +-e WEBRESOURCES_REQUEST_TIMEOUT='10000' \ +-e WEBSEARCH_ENABLED='true' \ +-e WEBSEARCH_ENDPOINT='http://host.docker.internal:4001/search' \ +-e WEBSEARCH_REQUEST_TIMEOUT='10000' \ +-e WEBSEARCH_HEADERS='{"Authorization":"Bearer search-api-key"}' +---- + +=== Web scraping endpoint contract + +[cols="1,2",options="header"] +|=== +|Direction |Payload +|Request |JSON object with a `url` field (page to fetch). +|Response |JSON object with `type` (`text/html` or `text/markdown`) and `data` (body content). +|=== + +.Request body +[source,json] +---- +{ "url": "https://example.com/article" } +---- + +.Response body +[source,json] +---- +{ "type": "text/html", "data": "

Example page body

" } +---- + +==== Scraper example (Playwright) + +[source,javascript] +---- +// scraper-service.js +const { chromium } = require('playwright'); +const express = require('express'); +const app = express(); +app.use(express.json()); + +app.post('/scrape', async (req, res) => { + const browser = await chromium.launch(); + const page = await browser.newPage(); + await page.goto(req.body.url, { waitUntil: 'networkidle' }); + const content = await page.content(); + await browser.close(); + res.json({ type: 'text/html', data: content }); +}); + +app.listen(4000); +---- + +=== Web search endpoint contract + +[cols="1,2",options="header"] +|=== +|Direction |Payload +|Request |JSON object with a `query` field (search string). +|Response |JSON object with a `results` array; each item includes `url`, `text`, `title`, and optional `author`, `publishedAt`, and `favicon`. +|=== + +.Request body +[source,json] +---- +{ "query": "search string" } +---- + +.Response body +[source,json] +---- +{ + "results": [ + { + "url": "https://example.com/article", + "text": "Content snippet", + "title": "Article Title", + "author": "Author", + "publishedAt": "2026-04-30T10:00:00Z", + "favicon": "https://example.com/favicon.ico" + } + ] +} +---- + +==== Search example (SerpAPI) + +[source,javascript] +---- +// search-service.js +const express = require('express'); +const app = express(); +app.use(express.json()); + +app.post('/search', async (req, res) => { + const response = await fetch( + `https://serpapi.com/search.json?q=${encodeURIComponent(req.body.query)}&api_key=${process.env.SERP_API_KEY}` + ); + const data = await response.json(); + const results = (data.organic_results || []).slice(0, 5).map(r => ({ + url: r.link, + title: r.title, + text: r.snippet + })); + res.json({ results }); +}); + +app.listen(4001); +---- + +NOTE: A model must include `capabilities.webSearch: true` in its `MODELS` entry to expose the web search toggle. + +*Example prompts:* + +* "Research the latest trends in AI governance and write a summary" +* "Read this URL and rewrite the key points for the target audience: pass:[https://…]" + + + +For production deployment guidance including Kubernetes manifests, scaling, security hardening, rate limiting, and observability, see xref:tinymceai-on-premises-production.adoc[Production deployment]. + +For common errors and debugging steps, see xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting]. diff --git a/modules/ROOT/pages/tinymceai-on-premises-database.adoc b/modules/ROOT/pages/tinymceai-on-premises-database.adoc new file mode 100644 index 0000000000..8b61052ddb --- /dev/null +++ b/modules/ROOT/pages/tinymceai-on-premises-database.adoc @@ -0,0 +1,668 @@ += Database, Redis, and infrastructure setup +:navtitle: Database, Redis, and storage +:description: Database, Redis, and file storage setup for the TinyMCE AI On-Premises service +:keywords: AI, on-premises, database, MySQL, PostgreSQL, Redis, Docker, Podman, file storage, S3, Azure Blob + +This page covers the data layer: the SQL database, Redis, and file storage. +For container runtimes, reverse proxies, TLS, Kubernetes, and ECS deployment, see the xref:tinymceai-on-premises-production.adoc[Production deployment guide]. + +== Supported versions + +[cols="1,1,1,2",options="header"] +|=== +|Component |Minimum |Recommended |Notes + +|MySQL +|8.0 +|8.0.x (latest patch) +|Pin to `mysql:8.0`. See <>. + +|PostgreSQL +|13 +|16 +| + +|Redis +|3.2.6 +|7.x +|Redis Cluster and TLS supported through `REDIS_CLUSTER_NODES` and `REDIS_TLS_ENABLE`. +|=== + +The AI service supports both MySQL and PostgreSQL equally. Pick whichever the operations team already runs. + +== Choosing a setup path + +[.text-center] +image::tinymceai-on-premises/database-setup-fig-1.svg[Database setup decision tree: local Docker Compose vs managed cloud database for evaluation and production,width=100%] + +All paths produce the same end state: a running database the AI service can connect to. + +[cols="1,1,1",options="header"] +|=== +|Path |MySQL |PostgreSQL + +|Docker / Podman |Yes |Yes +|Docker Compose |Yes |Yes +|Native (macOS / Linux) |Yes |Yes +|Managed cloud (RDS, Cloud SQL, Azure) |Yes |Yes +|=== + + + +[[postgresql-schema-prerequisite]] +== PostgreSQL schema prerequisite + +The AI service expects a schema named `cs-on-premises` (with a hyphen). If that schema does not exist, the container crashes on first boot with: + +.... +error: schema "cs-on-premises" does not exist +.... + +Apply one of the following fixes *before* starting the AI service for the first time. + +=== Option A: pre-create the schema + +The double-quotes are mandatory because the schema name contains a hyphen. + +[source,sql] +---- +CREATE SCHEMA "cs-on-premises"; +---- + +Verify with `\dn` in psql. `cs-on-premises` should appear in the list. + +=== Option B: use the default `public` schema + +Set the `DATABASE_SCHEMA` environment variable on the AI service container: + +.... +DATABASE_SCHEMA=public +.... + +This bypasses the hyphenated schema entirely. + +NOTE: MySQL does not have this issue. The database itself is the namespace, set through `DATABASE_DATABASE`. + + + +[[mysql-version-pinning]] +== MySQL version pinning + +Do *not* use `mysql:8`. That tag now floats to MySQL 8.4, which removes the `default-authentication-plugin=mysql_native_password` startup flag the AI service relies on. The container crashloops with: + +.... +[ERROR] [MY-000067] [Server] unknown variable 'default-authentication-plugin=mysql_native_password'. +[ERROR] [MY-010119] [Server] Aborting +.... + +Pin to `mysql:8.0` in every manifest: `docker run`, Docker Compose, Kubernetes, Helm, ECS. + +TIP: The same principle applies to PostgreSQL. Pin `postgres:16` rather than `postgres:latest`. + + + +[[database-user-privileges]] +== Database user privileges + +On first boot the AI service runs schema migrations and creates roughly 32 tables across the following namespaces: `ai_assistant_*`, `environments__*`, `security__*`, `insights__*`, `blob_storage__*`, and `cs_migrations*`. + +The database user needs enough privilege to create, alter, and operate on these tables. + +=== MySQL + +[source,sql] +---- +CREATE USER 'ai_service'@'%' IDENTIFIED BY 'STRONG_PASSWORD'; +GRANT SELECT, INSERT, UPDATE, DELETE, + ALTER, CREATE, DROP, INDEX, + TRIGGER, LOCK TABLES, REFERENCES + ON ai_service.* TO 'ai_service'@'%'; +FLUSH PRIVILEGES; +---- + +.Development shortcut +[%collapsible] +==== +[source,sql] +---- +GRANT ALL PRIVILEGES ON ai_service.* TO 'ai_service'@'%'; +---- +==== + +[NOTE] +-- +Some builds report false-positive "Not enough permissions to access database" errors even with `ALL PRIVILEGES`. If this occurs, grant the privileges globally rather than per-database, or use the MySQL `root` user for development. +-- + +=== PostgreSQL + +[source,sql] +---- +CREATE USER ai_service WITH PASSWORD 'STRONG_PASSWORD'; +CREATE DATABASE ai_service OWNER ai_service; +\c ai_service +CREATE SCHEMA "cs-on-premises" AUTHORIZATION ai_service; +GRANT CREATE, USAGE ON SCHEMA "cs-on-premises" TO ai_service; +GRANT ALL ON ALL TABLES IN SCHEMA "cs-on-premises" TO ai_service; +GRANT ALL ON ALL SEQUENCES IN SCHEMA "cs-on-premises" TO ai_service; +ALTER DEFAULT PRIVILEGES IN SCHEMA "cs-on-premises" + GRANT ALL ON TABLES TO ai_service; +ALTER DEFAULT PRIVILEGES IN SCHEMA "cs-on-premises" + GRANT ALL ON SEQUENCES TO ai_service; +---- + +.Development shortcut +[%collapsible] +==== +[source,sql] +---- +GRANT ALL ON SCHEMA "cs-on-premises" TO ai_service; +---- +==== + +If `DATABASE_SCHEMA=public` was chosen, substitute `public` for `"cs-on-premises"` in each grant statement. + + + +== Database setup + +=== Docker Compose (recommended for evaluation) + +.MySQL compose file +[%collapsible] +==== +[source,yaml] +---- +services: + mysql: + image: mysql:8.0 + environment: + MYSQL_ROOT_PASSWORD: ROOT_PASSWORD + MYSQL_DATABASE: ai_service + MYSQL_USER: ai_service + MYSQL_PASSWORD: STRONG_PASSWORD + ports: + - "3306:3306" + volumes: + - mysql_data:/var/lib/mysql + healthcheck: + test: ["CMD", "mysqladmin", "ping", "-h", "localhost"] + interval: 10s + timeout: 5s + retries: 5 + + redis: + image: redis:7 + ports: + - "6379:6379" + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 10s + timeout: 5s + retries: 5 + +volumes: + mysql_data: +---- +==== + +.PostgreSQL compose file +[%collapsible] +==== +[source,yaml] +---- +services: + postgres: + image: postgres:16 + environment: + POSTGRES_DB: ai_service + POSTGRES_USER: ai_service + POSTGRES_PASSWORD: STRONG_PASSWORD + ports: + - "5432:5432" + volumes: + - pg_data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ai_service -d ai_service"] + interval: 10s + timeout: 5s + retries: 5 + + redis: + image: redis:7 + ports: + - "6379:6379" + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 10s + timeout: 5s + retries: 5 + +volumes: + pg_data: +---- +==== + +After `docker compose up -d`, create the PostgreSQL schema (if not using `DATABASE_SCHEMA=public`): + +[source,bash] +---- +docker compose exec postgres psql -U ai_service -d ai_service \ + -c 'CREATE SCHEMA "cs-on-premises";' +---- + +=== Docker single container + +.MySQL +[%collapsible] +==== +[source,bash] +---- +docker run -d \ + --name ai-mysql \ + -e MYSQL_ROOT_PASSWORD=ROOT_PASSWORD \ + -e MYSQL_DATABASE=ai_service \ + -e MYSQL_USER=ai_service \ + -e MYSQL_PASSWORD=STRONG_PASSWORD \ + -p 3306:3306 \ + -v ai_mysql_data:/var/lib/mysql \ + mysql:8.0 +---- +==== + +.PostgreSQL +[%collapsible] +==== +[source,bash] +---- +docker run -d \ + --name ai-postgres \ + -e POSTGRES_DB=ai_service \ + -e POSTGRES_USER=ai_service \ + -e POSTGRES_PASSWORD=STRONG_PASSWORD \ + -p 5432:5432 \ + -v ai_pg_data:/var/lib/postgresql/data \ + postgres:16 +---- + +Then create the schema: + +[source,bash] +---- +docker exec -i ai-postgres psql -U ai_service -d ai_service \ + -c 'CREATE SCHEMA "cs-on-premises";' +---- +==== + +TIP: For Podman, substitute `podman` for `docker` throughout. On rootless Podman, use named volumes rather than bind-mounted host paths to avoid SELinux and UID mapping issues. + +=== Native install (macOS) + +.MySQL and PostgreSQL on macOS +[%collapsible] +==== +*MySQL:* + +[source,bash] +---- +brew install mysql +brew services start mysql +mysql_secure_installation +mysql -u root -p <<'SQL' +CREATE DATABASE ai_service; +CREATE USER 'ai_service'@'%' IDENTIFIED BY 'STRONG_PASSWORD'; +GRANT SELECT, INSERT, UPDATE, DELETE, ALTER, CREATE, DROP, + INDEX, TRIGGER, LOCK TABLES, REFERENCES + ON ai_service.* TO 'ai_service'@'%'; +FLUSH PRIVILEGES; +SQL +---- + +*PostgreSQL:* + +[source,bash] +---- +brew install postgresql@16 +brew services start postgresql@16 +createuser -P ai_service +createdb -O ai_service ai_service +psql -d ai_service -c 'CREATE SCHEMA "cs-on-premises" AUTHORIZATION ai_service;' +---- + +Verify all services are running: + +[source,bash] +---- +brew services list +---- +==== + +=== Native install (Linux) + +.MySQL and PostgreSQL on Debian/Ubuntu +[%collapsible] +==== +*MySQL:* + +[source,bash] +---- +sudo apt update +sudo apt install -y mysql-server +sudo systemctl enable --now mysql +sudo mysql_secure_installation +sudo mysql <<'SQL' +CREATE DATABASE ai_service; +CREATE USER 'ai_service'@'%' IDENTIFIED BY 'STRONG_PASSWORD'; +GRANT SELECT, INSERT, UPDATE, DELETE, ALTER, CREATE, DROP, + INDEX, TRIGGER, LOCK TABLES, REFERENCES + ON ai_service.* TO 'ai_service'@'%'; +FLUSH PRIVILEGES; +SQL +---- + +To allow remote connections, edit `/etc/mysql/mysql.conf.d/mysqld.cnf`, set `bind-address = 0.0.0.0`, and restart with `sudo systemctl restart mysql`. + +*PostgreSQL:* + +[source,bash] +---- +sudo apt update +sudo apt install -y postgresql postgresql-contrib +sudo systemctl enable --now postgresql +sudo -u postgres psql <<'SQL' +CREATE USER ai_service WITH PASSWORD 'STRONG_PASSWORD'; +CREATE DATABASE ai_service OWNER ai_service; +SQL +sudo -u postgres psql -d ai_service \ + -c 'CREATE SCHEMA "cs-on-premises" AUTHORIZATION ai_service;' +---- + +To allow remote connections, edit `/etc/postgresql/16/main/postgresql.conf` (`listen_addresses = '*'`) and add to `/etc/postgresql/16/main/pg_hba.conf`: + +.... +host ai_service ai_service 0.0.0.0/0 scram-sha-256 +.... + +Restart with `sudo systemctl restart postgresql`. +==== + +=== Managed cloud + +The AI service handles schema migration automatically. The pre-steps are: + +. Provision the database instance (RDS, Cloud SQL, or Azure Database). +. Create the database (`ai_service`). +. Create a dedicated user with the privileges documented in <>. +. *PostgreSQL only:* create the `cs-on-premises` schema or set `DATABASE_SCHEMA=public`. +. Open the security group or firewall for the AI service on port `3306` (MySQL) or `5432` (PostgreSQL). + +[cols="1,1,1,1",options="header"] +|=== +|Provider |MySQL |PostgreSQL |Redis + +|AWS |RDS for MySQL |RDS for PostgreSQL |ElastiCache for Redis +|GCP |Cloud SQL (MySQL) |Cloud SQL (PostgreSQL) |Memorystore for Redis +|Azure |Azure Database for MySQL |Azure Database for PostgreSQL |Azure Cache for Redis +|=== + +For production, enable Multi-AZ (or the equivalent zonal redundancy) and automated backups. + +[[host-docker-internal]] +=== Connecting to a host-local database from Docker + +When the AI service runs in Docker but the database or Redis runs natively on the host, the container must resolve the host's IP address. + +*Docker Desktop (macOS, Windows)* and *Podman 4{plus}* inject `host.docker.internal` automatically. + +*Native Linux Docker* does not. Add `host-gateway` explicitly: + +[source,yaml] +---- +services: + ai-service: + image: registry.containers.tiny.cloud/ai-service:latest + extra_hosts: + - "host.docker.internal:host-gateway" + environment: + DATABASE_HOST: host.docker.internal + REDIS_HOST: host.docker.internal +---- + +Or with `docker run`: + +[source,bash] +---- +docker run --add-host=host.docker.internal:host-gateway ... +---- + + + +== Redis + +Every AI service instance must reach Redis. Redis holds session coordination, SSE delivery, and rate-limiting state. A temporary Redis outage degrades streaming but does not destroy persistent data. + +=== Setup + +Redis is typically included in the Docker Compose file alongside the database (see the compose examples above). For standalone setup: + +[source,bash] +---- +docker run -d --name ai-redis -p 6379:6379 -v ai_redis_data:/data redis:7 +---- + +.macOS / Linux native install +[%collapsible] +==== +*macOS:* + +[source,bash] +---- +brew install redis +brew services start redis +---- + +*Linux (Debian/Ubuntu):* + +[source,bash] +---- +sudo apt install -y redis-server +sudo systemctl enable --now redis-server +---- +==== + +=== Connection variables + +[cols="1,1,2",options="header"] +|=== +|Variable |Required |Description + +|`REDIS_HOST` |Yes |Hostname +|`REDIS_PORT` |No |Default `6379` +|`REDIS_PASSWORD` |No |Password +|`REDIS_USER` |No |Username (Redis 6{plus} ACL) +|`REDIS_DB` |No |Database number (default `1`) +|`REDIS_IP_FAMILY` |No |Set to `6` for IPv6 +|=== + +=== TLS + +[cols="1,2",options="header"] +|=== +|Variable |Description + +|`REDIS_TLS_ENABLE` |`true` to enable TLS +|`REDIS_TLS_CA` |Path to CA certificate +|`REDIS_TLS_KEY` |Path to client key +|`REDIS_TLS_CERT` |Path to client certificate +|=== + +=== Cluster + +[cols="1,2",options="header"] +|=== +|Variable |Description + +|`REDIS_CLUSTER_NODES` |Comma-separated `host:port[:password]` list +|`REDIS_IP_FAMILY` |Set to `6` for IPv6 domains +|=== + +.Cluster examples +[%collapsible] +==== +[source,bash] +---- +# Standard cluster +REDIS_CLUSTER_NODES="redis1.example.com:7000,redis2.example.com:7001,redis3.example.com:7002" + +# Cluster with per-node passwords +REDIS_CLUSTER_NODES="redis1.example.com:7000:pass1,redis2.example.com:7001:pass2" + +# IPv6 cluster +REDIS_IP_FAMILY=6 +REDIS_CLUSTER_NODES="[::1]:7000,[::1]:7001,[::1]:7002" +---- +==== + +IMPORTANT: In production, always set `REDIS_PASSWORD` or use a managed Redis instance with authentication enabled. + + + +== File storage + +Separate from the SQL database, the AI service persists user file uploads (attachments, images). The storage back end is selected by the `STORAGE_DRIVER` environment variable. + +[cols="1,2,2",options="header"] +|=== +|Driver |When to use |Notes + +|`database` +|Demos and smallest deployments +|Stores files as SQL blobs. Hard cap around 4 GB total. No extra configuration required. + +|`filesystem` +|Single-instance with a persistent volume +|Requires a writable mounted volume. See <>. + +|`s3` +|Production on AWS, or S3-compatible (MinIO, Wasabi) +|Use a same-region bucket. + +|`azure` +|Production on Azure +|Azure Blob Storage. +|=== + +=== S3 + +[source,bash] +---- +STORAGE_DRIVER=s3 +STORAGE_REGION=us-east-1 +STORAGE_ACCESS_KEY_ID=ACCESS_KEY +STORAGE_SECRET_ACCESS_KEY=SECRET_KEY +STORAGE_BUCKET=BUCKET_NAME +STORAGE_ENDPOINT=https://custom-s3-endpoint # optional, for S3-compatible +---- + +NOTE: The correct variable names are `STORAGE_BUCKET` and `STORAGE_REGION`, not `STORAGE_S3_BUCKET` or `STORAGE_S3_REGION`. + +=== Azure Blob + +[source,bash] +---- +STORAGE_DRIVER=azure +STORAGE_ACCOUNT_NAME=ACCOUNT_NAME +STORAGE_ACCOUNT_KEY=ACCOUNT_KEY +STORAGE_CONTAINER=CONTAINER_NAME +STORAGE_ENDPOINT=https://custom-endpoint # optional +---- + +[[filesystem-storage]] +=== Filesystem + +[source,bash] +---- +STORAGE_DRIVER=filesystem +STORAGE_LOCATION=/tmp/ai-storage +---- + +IMPORTANT: The container runs as a non-root user and cannot write under `/var`. Mount a writable volume and point `STORAGE_LOCATION` at the mount point: `-v ./ai-storage:/tmp/ai-storage`. + +=== Database + +[source,bash] +---- +STORAGE_DRIVER=database +---- + +Files are stored in the SQL database as blobs, capped at roughly 4 GB total. This is the simplest option for initial evaluation. + + + +== Verification + +=== MySQL + +[source,bash] +---- +mysql --host=DB_HOST --user=ai_service --password=STRONG_PASSWORD \ + ai_service --port=3306 -e "SELECT 1" +---- + +Expected: a table with `1` in a single column. + +=== PostgreSQL + +[source,bash] +---- +psql -h DB_HOST -U ai_service -d ai_service -c "SELECT 1" +---- + +Expected: `?column?` returning `1`. + +=== Redis + +[source,bash] +---- +redis-cli -h REDIS_HOST ping +---- + +Expected: `PONG`. + +=== AI service migration + +After starting the AI service, confirm it has connected and run migrations: + +[source,bash] +---- +docker logs ai-service 2>&1 | grep -i 'migrat\|schema\|database' +---- + +Expected output (paraphrased): + +.... +Connecting to database (driver=postgres host=...) +Running migrations on schema "cs-on-premises" +Migrations complete: 32 tables ready +Server is listening on port 8000. +.... + +If `schema "cs-on-premises" does not exist` appears, return to <>. If `unknown variable 'default-authentication-plugin'` appears, return to <>. + +To list the tables created by migration: + +.PostgreSQL +[source,sql] +---- +SELECT table_name FROM information_schema.tables + WHERE table_schema = 'cs-on-premises' + ORDER BY table_name; +---- + +.MySQL +[source,sql] +---- +SHOW TABLES IN ai_service; +---- + +Tables prefixed `ai_assistant_`, `environments__`, `security__`, `insights__`, `blob_storage__`, and `cs_migrations` should appear. diff --git a/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc b/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc new file mode 100644 index 0000000000..0bb1be316d --- /dev/null +++ b/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc @@ -0,0 +1,222 @@ += TinyMCE AI on-premises: editor-side integration +:navtitle: Framework integration +:description: Connecting the TinyMCE editor to the on-premises AI service from React, Vue, Angular, Svelte, or vanilla JavaScript. +:keywords: AI, on-premises, React, Vue, Angular, Svelte, token provider + + +This page covers the *editor-side* configuration that connects TinyMCE to the on-premises AI service. It assumes: + +* The AI service is already running. See xref:tinymceai-on-premises-getting-started.adoc[Getting started] for setup instructions. +* A token endpoint exists that signs JWTs for the AI service. See xref:tinymceai-on-premises-jwt.adoc[JWT authentication] for back-end implementations. +* The TinyMCE API key has the AI feature enabled. Retrieve or upgrade a key at https://www.tiny.cloud/my-account/integrate/. + +For general framework setup (installing wrappers, component structure, SSR patterns), see the existing integration guides: + +* xref:react-cloud.adoc[React] +* xref:vue-cloud.adoc[Vue.js] +* xref:angular-cloud.adoc[Angular] +* xref:svelte-cloud.adoc[Svelte] + +The on-premises AI integration adds the options documented below to the standard TinyMCE `init` configuration. + + + +== Required editor options + +[cols="1,3",options="header"] +|=== +|Option |Description + +|`plugins` +|Must include `tinymceai`. + +|`toolbar` +|Include one or more of `tinymceai-chat`, `tinymceai-review`, `tinymceai-quickactions`. + +|`tinymceai_service_url` +|The origin of the AI service (no trailing slash, no path), for example `\https://ai.yourcompany.com`. + +|`tinymceai_token_provider` +|A function returning `Promise<{ token: string }>`. See <> below. +|=== + + + +== Minimal example + +The following vanilla JavaScript example contains every on-premises-specific option. The same `init` options apply identically inside the React, Vue, Angular, and Svelte wrapper components. + +[source,html] +---- + + + + + + + + + + +---- + +Replace `/path/to/tinymce/` with the location of the self-hosted TinyMCE assets. See xref:installation.adoc[Self-hosted installation] for download and setup instructions. + + + +[[token-provider]] +== `tinymceai_token_provider` + +A function that returns a `Promise` resolving to an object with a `token` property containing the JWT string. + +.Expected return shape +[source,javascript] +---- +{ token: 'eyJhbGciOiJIUzI1NiIs...' } +---- + +.Example provider +[source,javascript] +---- +tinymceai_token_provider: () => { + return fetch('/api/ai-token', { method: 'POST' }) + .then((r) => r.json()) + .then((data) => ({ token: data.token })); +} +---- + +[cols="1,3",options="header"] +|=== +|Behavior |Detail + +|Automatic refresh +|The plugin calls the provider on initialization and again when the cached token nears expiry (60-second safety margin). Do not cache the JWT inside the provider. + +|Error handling +|If the function rejects or the endpoint returns a non-OK response, the plugin surfaces an error in the editor UI. + +|Token lifetime +|Tokens should be short-lived (5-15 minutes recommended). See xref:tinymceai-on-premises-jwt.adoc[JWT authentication] for signing key, payload structure, and lifetime guidance. +|=== + + + +== Authenticating the token request + +The `tinymceai_token_provider` fetches a JWT from the application back end. How that back end authenticates the browser request depends on the application architecture. + +=== Session cookie + +If the page and the token endpoint share an origin (or a parent domain), the browser sends session cookies automatically: + +[source,javascript] +---- +fetch('/api/ai-token', { method: 'POST', credentials: 'include' }) +---- + +For cross-origin token endpoints, the back end must respond with `Access-Control-Allow-Origin: ` (not `*`) and `Access-Control-Allow-Credentials: true`, and the session cookie must be set with `SameSite=None; Secure`. + +=== Bearer header + +If the application already holds a session JWT (injected at render time, or from an auth library), forward it as a header: + +[source,javascript] +---- +fetch('/api/ai-token', { + method: 'POST', + headers: { 'Authorization': `Bearer ${sessionJwt}` } +}) +---- + +This pattern avoids cookies entirely and works well for cross-origin setups. + + + +== Cross-origin requests to the AI service + +When `tinymceai_service_url` points to a different origin from the page (the common production case), the AI service must return CORS headers permitting the editor origin. The service reads the `ALLOWED_ORIGINS` environment variable for this. + +To verify CORS from a terminal: + +[source,bash] +---- +curl -i -X OPTIONS https://ai.yourcompany.com/v1/conversations \ + -H 'Origin: https://app.yourcompany.com' \ + -H 'Access-Control-Request-Method: POST' \ + -H 'Access-Control-Request-Headers: authorization,content-type' +---- + +The response should include `Access-Control-Allow-Origin: \https://app.yourcompany.com`. If it shows `*` or no CORS header, update `ALLOWED_ORIGINS` on the AI service container and restart. + + + +== Content Security Policy + +If the application sets a `Content-Security-Policy` header, allow the AI service origin in `connect-src`: + +.... +Content-Security-Policy: + connect-src 'self' https://ai.yourcompany.com; + script-src 'self'; +.... + +If using the Tiny CDN instead of self-hosted assets, also add `\https://cdn.tiny.cloud` to `script-src`. + + + + +== Common integration errors + +[cols="1,1,2",options="header"] +|=== +|Symptom |Likely cause |Fix + +|Editor loads but no AI buttons appear +|`plugins` does not include `tinymceai`, or TinyMCE is version 7.x or earlier +|Set `plugins: 'tinymceai'` and confirm the script URL uses `/tinymce/8/`. Verify the API key has the AI feature enabled. + +|`POST /api/ai-token` returns 401 +|The token endpoint rejects the fetch +|Confirm the fetch sends the session cookie (`credentials: 'include'`) or `Authorization` header that the back end expects. + +|AI responses hang then time out +|Reverse proxy is buffering SSE +|Disable proxy buffering. See xref:tinymceai-on-premises-production.adoc[Production deployment]. + +|Browser console shows a CORS error on `/v1/conversations` +|`ALLOWED_ORIGINS` does not include the editor origin +|Update `ALLOWED_ORIGINS` and restart the AI service. + +|`tinymceai_token_provider` called in a tight loop +|Token endpoint returns invalid JSON or non-200 +|Validate: `curl -X POST http://localhost:3000/api/ai-token` should return `pass:c[{"token":"eyJ..."}]` with HTTP 200. +|=== + +For other issues, see xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting]. + + + +== See also + +* xref:tinymceai-on-premises-getting-started.adoc[Getting started] +* xref:tinymceai-on-premises-jwt.adoc[JWT authentication] +* xref:tinymceai-on-premises-providers.adoc[LLM providers] +* xref:tinymceai-on-premises-production.adoc[Production deployment] +* xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting] diff --git a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc new file mode 100644 index 0000000000..84d6af7f96 --- /dev/null +++ b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc @@ -0,0 +1,428 @@ += Getting started with TinyMCE AI On-Premise +:navtitle: Getting started +:description: Five-minute Docker Compose quick start for TinyMCE AI on-premises service +:keywords: AI, on-premises, getting started, docker, quick start + +This section produces a fully working setup (AI service, database, Redis, token server, and a browser editor) in roughly five minutes on any machine with Docker. This quick start validates the stack components before designing a production deployment. Production engineers should still review this section to understand the conceptual flow before continuing to xref:tinymceai-on-premises-production.adoc[the Production Deployment Guide]. + +== Five-minute demo with Docker Compose + +=== Create the project folder + +[source,bash] +---- +mkdir tinymce-ai-onpremise && cd tinymce-ai-onpremise +---- + +=== Authenticate with the container registry + +The service image lives at `registry.containers.tiny.cloud/ai-service`. + +For Docker: + +[source,bash] +---- +docker login -u 'TINY_REGISTRY_USERNAME' https://registry.containers.tiny.cloud +# Docker prompts for the password; this avoids leaking it in shell history. +---- + +For Podman: + +[source,bash] +---- +podman login -u 'TINY_REGISTRY_USERNAME' registry.containers.tiny.cloud +---- + +Replace `TINY_REGISTRY_USERNAME` with the username supplied by the Tiny account representative. If credentials have not been received, contact `support@tiny.cloud`. + +=== Pull the AI service image + +[source,bash] +---- +docker pull registry.containers.tiny.cloud/ai-service:latest +---- + +For Podman, substitute `podman pull`. For production, pin a specific version tag (for example `:5.1.0`) rather than `:latest`. + +=== Create `docker-compose.yml` + +Create the file with exactly the contents below. Indentation is two spaces, never tabs. + +[source,yaml] +---- +services: + mysql: + image: mysql:8.0 + environment: + MYSQL_ROOT_PASSWORD: ${DB_PASSWORD:-changeme} + MYSQL_DATABASE: ai_service + ports: + - "3306:3306" + volumes: + - mysql_data:/var/lib/mysql + healthcheck: + test: ["CMD", "mysqladmin", "ping", "-h", "localhost"] + interval: 10s + timeout: 5s + retries: 5 + + redis: + image: redis:7 + ports: + - "6379:6379" + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 10s + timeout: 5s + retries: 5 + +volumes: + mysql_data: +---- + +TIP: Pin `mysql:8.0`, not `mysql:8`. The `:8` tag points to MySQL 8.4, which is incompatible with the AI service. See xref:tinymceai-on-premises-database.adoc#mysql-version-pinning[MySQL version pinning] for details. + +PostgreSQL is equally supported. See xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage] for an equivalent compose file. Review the xref:tinymceai-on-premises-database.adoc#postgresql-schema-prerequisite[PostgreSQL schema prerequisite] before switching. + +[NOTE] +-- +If any service in the stack needs to reach the host machine (for example a self-hosted Ollama running on the host), add an `extra_hosts` entry to the `ai-service` block in the compose file above: + +[source,yaml] +---- +extra_hosts: + - "host.docker.internal:host-gateway" +---- + +Docker Desktop (macOS, Windows) and Podman 4{plus} auto-inject this alias. Native Linux Docker does not. +-- + +=== Create the `.env` file + +[source,bash] +---- +# --- Required: provided by Tiny --- +LICENSE_KEY=PASTE_SUPPLIED_LICENSE_KEY_HERE +TINYMCE_API_KEY=PASTE_TINYMCE_API_KEY_HERE + +# --- Required: strong secret used to log into the Management Panel --- +MANAGEMENT_SECRET=REPLACE_WITH_STRONG_SECRET + +# --- Required: database password (must match docker-compose.yml) --- +DB_PASSWORD=changeme + +# --- Required: at least one LLM provider key --- +OPENAI_API_KEY=sk-proj-PASTE_OPENAI_KEY_HERE +# ANTHROPIC_API_KEY=sk-ant-PASTE_ANTHROPIC_KEY_HERE +# GOOGLE_API_KEY=AIza-PASTE_GOOGLE_KEY_HERE + +# --- Filled in after creating an environment (leave blank for now) --- +AI_ENV_ID= +AI_API_SECRET= +---- + +IMPORTANT: `LICENSE_KEY` and `TINYMCE_API_KEY` are different credentials. `LICENSE_KEY` is the long string from the account representative. `TINYMCE_API_KEY` is the short string from the tiny.cloud dashboard. + +=== Start MySQL and Redis + +[source,bash] +---- +docker compose up -d +---- + +Wait ~15 seconds for MySQL to initialize, then verify: + +[source,bash] +---- +docker compose ps +---- + +Both containers should report `healthy` in the STATUS column. If MySQL still shows `starting`, wait another 10 seconds and re-run. + +=== Launch the AI service + +Run from the same folder as the `.env` file: + +.Full launch script +[%collapsible] +==== +[source,bash] +---- +set -a && source .env && set +a + +PROVIDERS='{' +if [ -n "$OPENAI_API_KEY" ]; then + PROVIDERS+='"openai":{"type":"openai","apiKeys":["'"$OPENAI_API_KEY"'"]}' +fi +if [ -n "$ANTHROPIC_API_KEY" ]; then + [ "$PROVIDERS" != '{' ] && PROVIDERS+=',' + PROVIDERS+='"anthropic":{"type":"anthropic","apiKeys":["'"$ANTHROPIC_API_KEY"'"]}' +fi +if [ -n "$GOOGLE_API_KEY" ]; then + [ "$PROVIDERS" != '{' ] && PROVIDERS+=',' + PROVIDERS+='"google":{"type":"google","apiKeys":["'"$GOOGLE_API_KEY"'"]}' +fi +PROVIDERS+='}' + +# Resolve the compose network name (varies across Docker versions and folder names) +NETWORK=$(docker network ls --format '{{.Name}}' | grep "^$(basename "$PWD" | tr '[:upper:]' '[:lower:]')_default$" | head -1) +if [ -z "$NETWORK" ]; then + NETWORK="$(basename "$PWD" | tr '[:upper:]' '[:lower:]')_default" +fi + +docker run --init -d -p 8000:8000 \ + --network "$NETWORK" \ + --name ai-service \ + -e LICENSE_KEY="$LICENSE_KEY" \ + -e ENVIRONMENTS_MANAGEMENT_SECRET_KEY="$MANAGEMENT_SECRET" \ + -e DATABASE_DRIVER='mysql' \ + -e DATABASE_HOST='mysql' \ + -e DATABASE_USER='root' \ + -e DATABASE_PASSWORD="$DB_PASSWORD" \ + -e DATABASE_DATABASE='ai_service' \ + -e REDIS_HOST='redis' \ + -e PROVIDERS="$PROVIDERS" \ + -e STORAGE_DRIVER='database' \ + -e ENABLE_METRIC_LOGS='true' \ + registry.containers.tiny.cloud/ai-service:latest +---- +==== + +For Podman, replace `docker run` with `podman run` and use a Podman pod instead of a compose network. See xref:tinymceai-on-premises-production.adoc[Production deployment] for Podman-specific guidance. + +For native databases (the database runs on the host or in a managed service rather than in Docker), drop the `--network` flag and set `DATABASE_HOST=host.docker.internal` (Docker Desktop and Podman 4{plus}). On native Linux Docker, additionally pass `--add-host=host.docker.internal:host-gateway`. + +Wait five seconds, then verify: + +[source,bash] +---- +curl http://localhost:8000/health +---- + +Expected response: + +[source,json] +---- +{"serviceName":"on-premises-http","uptime":5.123} +---- + +[WARNING] +-- +If the container exits immediately, run `docker logs ai-service`. The most common causes are documented in the xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting] guide. The top three are: malformed `LICENSE_KEY` (line breaks from word wrap), missing PostgreSQL schema, and JSON syntax error in `PROVIDERS`. +-- + +=== Create an environment and access key + +The AI service isolates users into Environments. Each environment has its own access keys. + +[arabic] +. Open the Management Panel: *http://localhost:8000/panel/* +. Sign in using the `MANAGEMENT_SECRET` from `.env`. +. Click *Create Environment* and give it a name (for example "Development"). +. Note the *Environment ID* displayed (a short identifier like `viOu8BnjJHb0HGK091p`). +. Inside the environment, click *Create Access Key*. +. *Copy the API Secret immediately*. The Management Panel shows it only once. + +Update `.env` with the new values: + +[source,bash] +---- +AI_ENV_ID=PASTE_ENVIRONMENT_ID_HERE +AI_API_SECRET=PASTE_API_SECRET_HERE +---- + +[IMPORTANT] +-- +Always create environments through the Management Panel UI. Environments created through the raw Management API are not fully registered and cause `invalid-jwt-payload` or `Environment not found` errors. See the xref:tinymceai-on-premises-jwt.adoc[JWT authentication] guide for details on environment and access key management. +-- + +=== Create the token server + +The token server signs JWTs for the editor. The Node.js example below is for the demo only; the xref:tinymceai-on-premises-jwt.adoc[JWT authentication] guide contains production-ready endpoints in 8 languages (Node, Django, Flask, Laravel, Rails, .NET, Go, Spring Boot). + +Create `package.json`: + +[source,json] +---- +{ + "name": "tinymce-ai-onpremise-demo", + "private": true, + "scripts": { + "start": "node token-server.js" + }, + "dependencies": { + "dotenv": "^16.0.0", + "express": "^4.18.0", + "jsonwebtoken": "^9.0.0" + } +} +---- + +Create `token-server.js`: + +.Full token-server.js listing +[%collapsible] +==== +[source,javascript] +---- +require('dotenv').config(); +const express = require('express'); +const jwt = require('jsonwebtoken'); + +const PORT = process.env.PORT || 3000; +const AI_ENV_ID = process.env.AI_ENV_ID; +const AI_API_SECRET = process.env.AI_API_SECRET; +const AI_SERVICE_URL = process.env.AI_SERVICE_URL || 'http://localhost:8000'; +const TINYMCE_API_KEY = process.env.TINYMCE_API_KEY || 'no-api-key'; + +if (!AI_ENV_ID || !AI_API_SECRET) { + console.error('ERROR: AI_ENV_ID and AI_API_SECRET must be set in .env'); + console.error('Create an environment first: visit http://localhost:8000/panel/'); + process.exit(1); +} + +const app = express(); +app.use(express.json()); + +app.post('/api/ai-token', (req, res) => { + const token = jwt.sign({ + aud: AI_ENV_ID, + sub: 'demo-user-001', + user: { name: 'Demo User', email: 'demo@example.com' }, + auth: { + ai: { + permissions: [ + 'ai:conversations:*', + 'ai:models:agent', + 'ai:actions:system:*', + 'ai:reviews:system:*' + ] + } + } + }, AI_API_SECRET, { algorithm: 'HS256', expiresIn: '1h' }); + + res.json({ token }); +}); + +app.get('/', (req, res) => { + res.send(` + + + TinyMCE AI on-premises Demo + + + + +

TinyMCE AI on-premises Demo

+

Select text and use the AI toolbar, or open the AI chat sidebar.

+ + + +`); +}); + +app.listen(PORT, () => { + console.log('Editor: http://localhost:' + PORT); + console.log('Token API: http://localhost:' + PORT + '/api/ai-token'); + console.log('AI Service: ' + AI_SERVICE_URL); +}); +---- +==== + +=== Install and run + +[source,bash] +---- +npm install +npm start +---- + +=== Open the demo + +Open *http://localhost:3000* in a browser. The editor loads with the AI toolbar. Select text and try the AI features. Responses stream in real time from the chosen LLM provider, processed entirely within the local infrastructure. + +The TinyMCE AI on-premises service is now running. + +== Verifying the installation + +After completing the quick start, exercise the pipeline end-to-end from the command line. + +[source,bash] +---- +# 1. Health check +curl http://localhost:8000/health +---- + +Expected: + +[source,json] +---- +{"serviceName":"on-premises-http","uptime":12.345} +---- + +[source,bash] +---- +# 2. Generate a token +curl -s -X POST http://localhost:3000/api/ai-token | python3 -m json.tool +---- + +Expected: + +[source,json] +---- +{ + "token": "eyJhbGciOiJIUzI1NiIs..." +} +---- + +[source,bash] +---- +# 3. Create a conversation and send a message +TOKEN=$(curl -s -X POST http://localhost:3000/api/ai-token | python3 -c "import sys,json;print(json.load(sys.stdin)['token'])") + +curl -s -X POST http://localhost:8000/v1/conversations \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"id":"verify-1","title":"Verification"}' + +curl -s -N -X POST http://localhost:8000/v1/conversations/verify-1/messages \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"prompt":"Say hello in five words.","model":"agent-1"}' +---- + +The message endpoint returns a Server-Sent Events stream: + +[source,text] +---- +event: message-metadata +data: {"messageId":"abc123"} + +event: text-delta +data: {"textDelta":"Hello "} + +event: text-delta +data: {"textDelta":"there, "} + +event: text-delta +data: {"textDelta":"friend!"} + +event: done +data: {} +---- + +If the stream emits `event: error`, inspect the `data` payload. Provider errors (invalid API key, IAM denial, model unavailable) ride inside the SSE response. The HTTP status stays 200. See the xref:tinymceai-on-premises-troubleshooting.adoc[LLM provider errors] section in the Troubleshooting guide for details. + +A successful round-trip confirms: container health, database connectivity, Redis connectivity, JWT signing, JWT verification, permissions checking, environment registration, LLM provider authentication, and SSE streaming. If problems persist after these checks, focus on the editor configuration next. diff --git a/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc b/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc new file mode 100644 index 0000000000..909fa9401e --- /dev/null +++ b/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc @@ -0,0 +1,911 @@ += JWT authentication for the on-premises AI service +:navtitle: JWT authentication +:description: JWT authentication for the TinyMCE AI on-premises service using HS256 symmetric signing +:keywords: AI, on-premises, JWT, authentication, HS256 + +The on-premises AI service uses *HS256* (HMAC-SHA256, symmetric shared secret) for JWT authentication. This is different from the Tiny Cloud AI service, which uses RS256. + +[WARNING] +-- +Do not follow the xref:tinymceai-jwt-authentication-intro.adoc[Cloud JWT guide] for on-premises deployments. The on-premises verifier silently rejects RS256-signed tokens with `invalid-jwt-signature` and no indication that the algorithm is wrong. +-- + + + +== End-to-end flow + +[.text-center] +image::tinymceai-on-premises/jwt-authentication-fig-1.svg[alt="JWT token exchange sequence between user application back end and AI service with error branches",width=100%] + +The shared secret (API Secret) never leaves the application back end. The editor only ever sees signed tokens, and the AI service only ever sees signed tokens; neither has direct access to the secret. + + + +== Signing model + +[cols=",",options="header",] +|=== +|Property |Value +|Algorithm |`HS256` (HMAC-SHA256) +|Key type |Symmetric shared secret +|Key source |*API Secret* generated for an access key inside an environment through the Management Panel +|Header format |`Authorization: Bearer ` +|=== + +Pin implementations to `HS256`. + +=== The API Secret + +The API Secret is generated when creating an access key inside an environment, in the Management Panel under *Environments → → Access keys → New access key*. + +* It is shown *once* on the creation screen. Copy it immediately into a secret manager such as Vault, AWS Secrets Manager, Doppler, or a local `.env` file. +* If the secret is lost, rotate: create a new access key, deploy the new secret, then revoke the old key. +* This is *not* the `ENVIRONMENTS_MANAGEMENT_SECRET_KEY` environment variable; that one is used for Management Panel logins, not user-facing AI tokens. Mixing them up produces `invalid-jwt-signature`. + +=== API Secret compared with `ENVIRONMENTS_MANAGEMENT_SECRET_KEY` + +[cols=",,",options="header",] +|=== +|Credential |Purpose |Used by +|*API Secret* |Signs user-facing JWTs presented to the AI runtime endpoints. Created per access key inside an environment. |The application token endpoint. Never appears in any management call. +|*`ENVIRONMENTS_MANAGEMENT_SECRET_KEY`* |Signs Management Panel logins. Set as an environment variable on the AI service container. |The Management Panel UI. +|=== + +These two credentials are unrelated. Using one in place of the other produces `invalid-jwt-signature`. + + + +== Required claims + +Every token MUST contain the following claims. + +[cols=",,",options="header",] +|=== +|Claim |Type |Description +|`aud` |string |The Environment ID, copied from the Management Panel. UUID-shaped. Type *must be string*, not array; the verifier rejects array-shaped `aud` (the default in some JWT libraries) with `invalid-jwt-payload`. +|`iat` |number |Issued-at, seconds since epoch (UTC). +|`exp` |number |Expiry, seconds since epoch (UTC). Recommend `iat {plus} 3600` for demos, `iat {plus} 900` for production. The server applies 60 seconds of clock-skew leeway; tokens up to 60 seconds past `exp` still verify. +|`sub` |string |Unique, stable user identifier. Conversation history is isolated per-`sub`; do not reuse one `sub` across users or conversations will leak between them. +|`auth.ai.permissions` |`string[]` |Array of feature permission strings. See the permissions reference below. Wildcards (`*`) are accepted only in the documented positions; the bare string `"*"` is rejected. +|=== + +== Optional claims + +[cols=",,",options="header",] +|=== +|Claim |Type |Description +|`user.name` |string |Display name shown in the conversation history UI. +|`user.email` |string |Email shown in the conversation history UI. Not used for authentication. +|=== + +The verifier ignores additional unknown claims. Standard JWT claims (`iss`, `nbf`, `jti`) cause no harm when included; the verifier does not validate them, but they pass through. + + + +== Permissions reference + +This is the canonical permission list for the AI service. + +=== Conversation and global features + +[cols=",",options="header",] +|=== +|Permission |Grants +|`ai:conversations:*` |All conversation operations: create, list, send message, delete +|`ai:conversations:create` |Create new conversations +|`ai:conversations:read` |List and read existing conversations +|`ai:conversations:delete` |Delete conversations +|`ai:models:agent` |Access the built-in agent model (model ID `agent-1`) +|`ai:models::` |Access a specific custom model configured through the `MODELS` env var +|`ai:actions:system:*` |All built-in quick actions (rewrite, summarize, expand, translate, change tone, and related operations) +|`ai:reviews:system:*` |All built-in review features (correctness, clarity, readability, tone, and related checks) +|=== + +=== Model permission syntax + +`ai:models::` selects a specific custom model. The parser is *not* a greedy colon-split; it understands that `` may itself contain colons and dots. + +Examples: + +.... +ai:models:openai:gpt-5-mini +ai:models:openai:gpt-4o +ai:models:anthropic:claude-sonnet-4-5 +ai:models:bedrock:us.anthropic.claude-sonnet-4-20250514-v1:0 +ai:models:vertex:gemini-2.5-pro +ai:models:azure:my-gpt5-deployment +.... + +For Azure, `` is the *deployment name* configured in the Azure portal, not the underlying OpenAI model name. + +For Bedrock models with an inference profile prefix (`us.`, `eu.`, `apac.`) and embedded version colons (`v1:0`), include them verbatim; the parser handles them. + +=== What not to put in `auth.ai.permissions` + +[cols=",",options="header",] +|=== +|Do not use |Reason +|`ai:admin` |Appears in the *cloud* JWT doc. The on-premises service rejects this with `allowed: false` on every endpoint. There is no admin scope in on-premises deployments; admin actions go through the Management Panel. +|`"*"` (the bare string) |Rejected. The verifier requires structured permission strings. +|`useAllFeatures: true` |The on-premises service requires the explicit `auth.ai.permissions` array. +|A single string instead of an array |Rejected. `auth.ai.permissions` must be ``string[]``. +|=== + +=== Full-access set + +For demos and admin-tier users, this is the standard grant: + +[source,json] +---- +[ + "ai:conversations:*", + "ai:models:agent", + "ai:actions:system:*", + "ai:reviews:system:*" +] +---- + +When adding custom models through the `MODELS` environment variable, append one `ai:models::` entry for each custom model to expose in the selector. + + + +== Example payload + +A complete, decoded payload for a logged-in user with full access to a single OpenAI model: + +[source,json] +---- +{ + "aud": "5f1a2b3c-1234-5678-9abc-def012345678", + "iat": 1746950400, + "exp": 1746954000, + "sub": "user_8f3c9a12", + "user": { + "name": "Priya Patel", + "email": "priya.patel@example.com" + }, + "auth": { + "ai": { + "permissions": [ + "ai:conversations:*", + "ai:models:agent", + "ai:models:openai:gpt-5-mini", + "ai:actions:system:*", + "ai:reviews:system:*" + ] + } + } +} +---- + +Signed with HS256 using the API Secret, then sent as: + +.... +Authorization: Bearer eyJhbGciOiJIUzI1NiIs... +.... + +=== Clock-skew leeway + +The service allows up to 60 seconds of clock skew on the `exp` claim. Keep the token server and the AI service synchronized with NTP. + + + +== Production token endpoint examples + +Each example reads `AI_ENV_ID` and `AI_API_SECRET` from environment variables, authenticates the user through the framework's session/auth layer, signs an HS256 token, and returns `{"token": "..."}` as JSON. The endpoint runs in the application back end; the AI service never sees the API Secret directly. + +[cols="1,1",options="header"] +|=== +|Language |Framework / Library + +|Node.js |Express + `jsonwebtoken` +|Python |Django + `PyJWT`, Flask + `PyJWT` +|PHP |Laravel + `firebase/php-jwt` +|Ruby |Rails + `jwt` +|C# |.NET + `System.IdentityModel.Tokens.Jwt` +|Go |`golang-jwt/jwt/v5` +|Java |Spring Boot + `jjwt` +|=== + +.Node.js (Express + jsonwebtoken) +[%collapsible] +==== +[source,bash] +---- +npm install express jsonwebtoken +---- + +[source,javascript] +---- +const express = require('express'); +const jwt = require('jsonwebtoken'); + +const app = express(); +const ENV_ID = process.env.AI_ENV_ID; +const API_SECRET = process.env.AI_API_SECRET; + +app.post('/api/ai-token', requireLogin, (req, res) => { + const user = req.user; + const now = Math.floor(Date.now() / 1000); + + const payload = { + aud: ENV_ID, + iat: now, + exp: now + 3600, + sub: String(user.id), + user: { + name: user.displayName, + email: user.email, + }, + auth: { + ai: { + permissions: [ + 'ai:conversations:*', + 'ai:models:agent', + 'ai:actions:system:*', + 'ai:reviews:system:*', + ], + }, + }, + }; + + const token = jwt.sign(payload, API_SECRET, { algorithm: 'HS256' }); + res.json({ token }); +}); + +function requireLogin(req, res, next) { + if (!req.user) return res.status(401).json({ error: 'unauthenticated' }); + next(); +} + +app.listen(3000); +---- +==== + +.Python (Django + PyJWT) +[%collapsible] +==== +[source,bash] +---- +pip install PyJWT +---- + +[source,python] +---- +import os +import time +import jwt +from django.http import JsonResponse +from django.views.decorators.http import require_POST +from django.contrib.auth.decorators import login_required + +ENV_ID = os.environ["AI_ENV_ID"] +API_SECRET = os.environ["AI_API_SECRET"] + + +@require_POST +@login_required +def ai_token(request): + user = request.user + now = int(time.time()) + + payload = { + "aud": ENV_ID, + "iat": now, + "exp": now + 3600, + "sub": str(user.pk), + "user": { + "name": user.get_full_name() or user.username, + "email": user.email, + }, + "auth": { + "ai": { + "permissions": [ + "ai:conversations:*", + "ai:models:agent", + "ai:actions:system:*", + "ai:reviews:system:*", + ], + }, + }, + } + + token = jwt.encode(payload, API_SECRET, algorithm="HS256") + return JsonResponse({"token": token}) +---- + +Register the view in `urls.py`: + +[source,python] +---- +from django.urls import path +from . import views + +urlpatterns = [ + path("api/ai-token", views.ai_token, name="ai-token"), +] +---- +==== + +.Python (Flask + PyJWT) +[%collapsible] +==== +[source,bash] +---- +pip install Flask PyJWT +---- + +[source,python] +---- +import os +import time +import jwt +from flask import Flask, jsonify, abort, session + +app = Flask(__name__) +ENV_ID = os.environ["AI_ENV_ID"] +API_SECRET = os.environ["AI_API_SECRET"] + + +@app.post("/api/ai-token") +def ai_token(): + user = session.get("user") + if not user: + abort(401) + + now = int(time.time()) + payload = { + "aud": ENV_ID, + "iat": now, + "exp": now + 3600, + "sub": str(user["id"]), + "user": { + "name": user["name"], + "email": user["email"], + }, + "auth": { + "ai": { + "permissions": [ + "ai:conversations:*", + "ai:models:agent", + "ai:actions:system:*", + "ai:reviews:system:*", + ], + }, + }, + } + + token = jwt.encode(payload, API_SECRET, algorithm="HS256") + return jsonify({"token": token}) +---- +==== + +.PHP (Laravel {plus} firebase/php-jwt) +[%collapsible] +==== +[source,bash] +---- +composer require firebase/php-jwt +---- + +[source,php] +---- + $envId, + 'iat' => $now, + 'exp' => $now + 3600, + 'sub' => (string) $user->id, + 'user' => [ + 'name' => $user->name, + 'email' => $user->email, + ], + 'auth' => [ + 'ai' => [ + 'permissions' => [ + 'ai:conversations:*', + 'ai:models:agent', + 'ai:actions:system:*', + 'ai:reviews:system:*', + ], + ], + ], + ]; + + $token = JWT::encode($payload, $apiSecret, 'HS256'); + return response()->json(['token' => $token]); + } +} +---- + +Route (`routes/web.php` or `routes/api.php`): + +[source,php] +---- +use App\Http\Controllers\AiTokenController; + +Route::post('/api/ai-token', [AiTokenController::class, 'issue']) + ->middleware('auth'); +---- +==== + +.Ruby (Rails {plus} jwt) +[%collapsible] +==== +[source,ruby] +---- +# Gemfile +gem 'jwt' +---- + +[source,ruby] +---- +class AiTokensController < ApplicationController + before_action :authenticate_user! + + def create + env_id = ENV.fetch('AI_ENV_ID') + api_secret = ENV.fetch('AI_API_SECRET') + + now = Time.now.to_i + payload = { + aud: env_id, + iat: now, + exp: now + 3600, + sub: current_user.id.to_s, + user: { + name: current_user.name, + email: current_user.email + }, + auth: { + ai: { + permissions: [ + 'ai:conversations:*', + 'ai:models:agent', + 'ai:actions:system:*', + 'ai:reviews:system:*' + ] + } + } + } + + token = JWT.encode(payload, api_secret, 'HS256') + render json: { token: token } + end +end +---- + +Route (`config/routes.rb`): + +[source,ruby] +---- +post '/api/ai-token', to: 'ai_tokens#create' +---- +==== + +.C# (.NET {plus} System.IdentityModel.Tokens.Jwt) +[%collapsible] +==== +[source,bash] +---- +dotnet add package System.IdentityModel.Tokens.Jwt +---- + +[source,c#] +---- +using System; +using System.IdentityModel.Tokens.Jwt; +using System.Security.Claims; +using System.Text; +using System.Text.Json; +using Microsoft.AspNetCore.Authorization; +using Microsoft.AspNetCore.Mvc; +using Microsoft.IdentityModel.Tokens; + +[ApiController] +[Route("api/ai-token")] +[Authorize] +public class AiTokenController : ControllerBase +{ + [HttpPost] + public IActionResult Issue() + { + var envId = Environment.GetEnvironmentVariable("AI_ENV_ID")!; + var apiSecret = Environment.GetEnvironmentVariable("AI_API_SECRET")!; + + var userId = User.FindFirst(ClaimTypes.NameIdentifier)!.Value; + var userName = User.FindFirst(ClaimTypes.Name)?.Value ?? ""; + var userEmail = User.FindFirst(ClaimTypes.Email)?.Value ?? ""; + + var now = DateTimeOffset.UtcNow.ToUnixTimeSeconds(); + + var payload = new JwtPayload + { + { "aud", envId }, + { "iat", now }, + { "exp", now + 3600 }, + { "sub", userId }, + { "user", new { name = userName, email = userEmail } }, + { "auth", new { + ai = new { + permissions = new[] { + "ai:conversations:*", + "ai:models:agent", + "ai:actions:system:*", + "ai:reviews:system:*" + } + } + }} + }; + + var key = new SymmetricSecurityKey(Encoding.UTF8.GetBytes(apiSecret)); + var creds = new SigningCredentials(key, SecurityAlgorithms.HmacSha256); + var header = new JwtHeader(creds); + + var jwt = new JwtSecurityToken(header, payload); + var token = new JwtSecurityTokenHandler().WriteToken(jwt); + + return Ok(new { token }); + } +} +---- +==== + +.Go (golang-jwt/jwt/v5) +[%collapsible] +==== +[source,bash] +---- +go get github.com/golang-jwt/jwt/v5 +---- + +[source,go] +---- +package main + +import ( + "encoding/json" + "net/http" + "os" + "time" + + "github.com/golang-jwt/jwt/v5" +) + +type tokenResponse struct { + Token string `json:"token"` +} + +func aiTokenHandler(w http.ResponseWriter, r *http.Request) { + user, ok := userFromSession(r) + if !ok { + http.Error(w, "unauthenticated", http.StatusUnauthorized) + return + } + + envID := os.Getenv("AI_ENV_ID") + apiSecret := os.Getenv("AI_API_SECRET") + + now := time.Now().Unix() + claims := jwt.MapClaims{ + "aud": envID, + "iat": now, + "exp": now + 3600, + "sub": user.ID, + "user": map[string]string{ + "name": user.Name, + "email": user.Email, + }, + "auth": map[string]any{ + "ai": map[string]any{ + "permissions": []string{ + "ai:conversations:*", + "ai:models:agent", + "ai:actions:system:*", + "ai:reviews:system:*", + }, + }, + }, + } + + token := jwt.NewWithClaims(jwt.SigningMethodHS256, claims) + signed, err := token.SignedString([]byte(apiSecret)) + if err != nil { + http.Error(w, "sign failed", http.StatusInternalServerError) + return + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(tokenResponse{Token: signed}) +} + +func main() { + http.HandleFunc("/api/ai-token", aiTokenHandler) + http.ListenAndServe(":3000", nil) +} +---- +==== + +.Java (Spring Boot {plus} jjwt) +[%collapsible] +==== +[source,xml] +---- + + + io.jsonwebtoken + jjwt-api + 0.12.6 + + + io.jsonwebtoken + jjwt-impl + 0.12.6 + runtime + + + io.jsonwebtoken + jjwt-jackson + 0.12.6 + runtime + +---- + +[source,java] +---- +package com.example.ai; + +import io.jsonwebtoken.Jwts; +import io.jsonwebtoken.security.Keys; +import org.springframework.security.core.annotation.AuthenticationPrincipal; +import org.springframework.security.core.userdetails.UserDetails; +import org.springframework.web.bind.annotation.*; + +import javax.crypto.SecretKey; +import java.nio.charset.StandardCharsets; +import java.time.Instant; +import java.util.List; +import java.util.Map; + +@RestController +@RequestMapping("/api/ai-token") +public class AiTokenController { + + private final String envId = System.getenv("AI_ENV_ID"); + private final String apiSecret = System.getenv("AI_API_SECRET"); + + @PostMapping + public Map issue(@AuthenticationPrincipal UserDetails user) { + SecretKey key = Keys.hmacShaKeyFor(apiSecret.getBytes(StandardCharsets.UTF_8)); + Instant now = Instant.now(); + + String token = Jwts.builder() + .audience().add(envId).and() + .issuedAt(java.util.Date.from(now)) + .expiration(java.util.Date.from(now.plusSeconds(3600))) + .subject(user.getUsername()) + .claim("user", Map.of( + "name", user.getUsername(), + "email", "" + )) + .claim("auth", Map.of( + "ai", Map.of( + "permissions", List.of( + "ai:conversations:*", + "ai:models:agent", + "ai:actions:system:*", + "ai:reviews:system:*" + ) + ) + )) + .signWith(key, Jwts.SIG.HS256) + .compact(); + + return Map.of("token", token); + } +} +---- +==== + + + +== Editor-side token provider + +Configure the TinyMCE editor to fetch a token from the application endpoint. The plugin calls the provider on demand and re-fetches when the token nears expiry. + +[source,javascript] +---- +tinymce.init({ + selector: 'textarea', + plugins: 'tinymceai', + toolbar: 'undo redo | bold italic | tinymceai-chat tinymceai-review tinymceai-quickactions', + + tinymceai_service_url: 'https://ai.example.com', + + tinymceai_token_provider: () => + fetch('/api/ai-token', { method: 'POST', credentials: 'include' }) + .then(r => r.json()) + .then(d => ({ token: d.token })), +}); +---- + +IMPORTANT: Do not cache the JWT in application code. The plugin calls the provider on initialization and again as the token nears expiry; it manages refresh internally. + +The provider must return a Promise that resolves to `pass:c[{ token: '' }]`. Returning the raw string fails silently. If the provider rejects or returns a non-OK response, the plugin surfaces an error in the editor UI. + +TIP: Set `credentials: 'include'` on the fetch when the token endpoint relies on session cookies. Without it, the browser does not send cookies on cross-origin requests. When the token endpoint is on the same origin as the editor, `credentials: 'include'` is harmless but unnecessary. + +For cross-origin setups, configure the back end server to respond with `Access-Control-Allow-Origin: ` (not `*`) and `Access-Control-Allow-Credentials: true`. Set the session cookie with `SameSite=None; Secure`. + +For framework-specific (React, Vue, Angular) integration, see xref:tinymceai-on-premises-frameworks.adoc[Framework integration]. + + + +== Permission gating patterns + +A common deployment shape: one AI service serving multiple subscription tiers. The token endpoint derives the permission set from role, plan, or tenant. + +=== Tiered permissions (basic / pro / enterprise) + +[source,javascript] +---- +function permissionsFor(user) { + const base = [ + 'ai:conversations:*', + 'ai:actions:system:*', + ]; + + switch (user.plan) { + case 'basic': + return [ + ...base, + 'ai:models:openai:gpt-5-mini', + ]; + + case 'pro': + return [ + ...base, + 'ai:reviews:system:*', + 'ai:models:agent', + 'ai:models:openai:gpt-5-mini', + 'ai:models:openai:gpt-4o', + ]; + + case 'enterprise': + return [ + ...base, + 'ai:reviews:system:*', + 'ai:models:agent', + 'ai:models:openai:gpt-5-mini', + 'ai:models:openai:gpt-4o', + 'ai:models:anthropic:claude-sonnet-4-5', + 'ai:models:bedrock:us.anthropic.claude-sonnet-4-20250514-v1:0', + ]; + + default: + return base; + } +} +---- + +=== Read-only viewers + +For deployments that should expose history without allowing new conversations: + +[source,javascript] +---- +[ + 'ai:conversations:read', +] +---- + +=== Multi-tenant: separate environments + +If tenants must be *fully isolated* (separate conversation history, separate access keys, separate audit logs), give each tenant its own Environment in the Management Panel, mint tokens with the tenant-specific `aud` and `AI_API_SECRET`, and route in the token endpoint: + +[source,javascript] +---- +function envFor(tenantId) { + return { + envId: process.env[`AI_ENV_ID_${tenantId}`], + apiSecret: process.env[`AI_API_SECRET_${tenantId}`], + }; +} +---- + + + +== Verification and troubleshooting + +=== Decode a token without verifying + +`jwt.io` accepts pasted tokens and shows the header and payload. Alternatively: + +[source,bash] +---- +python3 -c "import jwt; print(jwt.decode('', options={'verify_signature': False}))" +---- + +[source,bash] +---- +node -e "console.log(JSON.parse(Buffer.from(process.argv[1].split('.')[1],'base64url')))" '' +---- + +When debugging, start here. Most "auth failures" reflect wrong claim values rather than signing problems. + +=== Common failure modes + +[cols=",,",options="header",] +|=== +|Symptom |Cause |Fix +|`invalid-jwt-signature` |API Secret mismatch |Verify `AI_API_SECRET` matches the value displayed at access-key creation. If lost, create a new access key and rotate. +|`invalid-jwt-signature` (after copying cloud guide) |Token signed with RS256 |Switch to HS256 with the API Secret. See top-of-page warning. +|`invalid-jwt-payload` |`aud` does not match a real Environment ID |Confirm the Environment ID from the Management Panel matches `aud` exactly. +|`invalid-jwt-payload` (env "exists") |Environment created through raw management API rather than the Management Panel UI |Recreate through the panel. See the Environment creation section below. +|`invalid-jwt` (not `jwt-expired`) |Token is past `exp` by more than 60 seconds |Request a new token. The server allows 60-second clock-skew leeway; anything beyond is rejected with `invalid-jwt`. +|`Environment not found` |Environment is in `environments__environment` / `security__environment` but not in `ai_assistant_environments` |Recreate through Management Panel UI. +|`allowed: false` on every endpoint |Wrong shape for `auth.ai.permissions` |Must be ``string[]``. Not a single string. Not `useAllFeatures`. Not `ai:admin`. +|`allowed: false` on specific endpoints only |Missing the specific permission |Decode token, check the `auth.ai.permissions` array against the table above. +|Token silently rejected, no decoded error |RS256 signature |Re-sign with HS256. +|`aud` claim type mismatch |`aud` issued as array instead of string |Some JWT libraries default to array `aud`. Force string. +|Editor shows "Failed to authenticate" |Token endpoint returned non-JSON, returned `token` as nested object, or CORS blocked the request |Open browser devtools → Network → inspect the response from `/api/ai-token`. +|=== + +=== Sanity-check a token manually + +[source,bash] +---- +TOKEN=$(curl -s -X POST http://localhost:3001/api/ai-token | jq -r .token) + +curl -i https://ai.example.com/v1/conversations \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{}' +---- + +A `201 Created` confirms the full chain works: secret, claims, permissions, environment registration. + + + +== Token lifetime guidance + +[cols=",",options="header",] +|=== +|Scenario |Recommended `exp - iat` +|Local development |1 hour (`3600`) +|Demos |1 hour +|Production |5–15 minutes (`300`–`900`) +|High-security / regulated |5 minutes, plus short-lived sessions on the auth layer +|=== + +Short-lived tokens limit exposure if a token leaks through a browser extension, log capture, or error report. The editor re-requests a token as needed through `tinymceai_token_provider`, so long-lived tokens provide no practical benefit. + + + + +== See also + +* xref:tinymceai-on-premises-getting-started.adoc[Getting started] -- end-to-end deployment, including a demo token server +* xref:tinymceai-on-premises-providers.adoc[LLM providers] -- configuring custom models through `MODELS` and the `ai:models::` permission syntax +* xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting] -- full troubleshooting catalog beyond JWT +* xref:tinymceai-on-premises-frameworks.adoc[Framework integration] -- editor-side integration patterns for React, Vue, and Angular, including `tinymceai_token_provider` wrappers diff --git a/modules/ROOT/pages/tinymceai-on-premises-production.adoc b/modules/ROOT/pages/tinymceai-on-premises-production.adoc new file mode 100644 index 0000000000..5b685f3f49 --- /dev/null +++ b/modules/ROOT/pages/tinymceai-on-premises-production.adoc @@ -0,0 +1,582 @@ += TinyMCE AI on-premises: Production deployment guide +:navtitle: Production deployment +:description: Production deployment guide for the TinyMCE AI on-premises service +:keywords: AI, on-premises, production, Kubernetes, ECS, scaling + + + + +== Architecture overview + +[.text-center] +image::tinymceai-on-premises/production-guide-fig-1.svg[alt="Production deployment topology with reverse proxy AI service replicas database and Redis behind TLS",width=100%] + +The AI service is stateless, persists all state to MySQL/PostgreSQL and Redis, and scales horizontally behind a load balancer. + + + +== TLS / HTTPS + +The AI service does not terminate TLS. Place a reverse proxy in front. + +=== Nginx example + +[source,nginx] +---- +server { + listen 443 ssl; + server_name ai.example.com; + + ssl_certificate /etc/ssl/certs/ai.example.com.pem; + ssl_certificate_key /etc/ssl/private/ai.example.com.key; + + location / { + proxy_pass http://ai-service:8000; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # SSE streaming support + proxy_buffering off; + proxy_cache off; + proxy_read_timeout 300s; + } +} +---- + +[IMPORTANT] +-- +SSE streaming requires `proxy_buffering off`. Without it, AI responses appear to hang until the entire response is generated. +-- + +=== AWS ALB + +* Target group: HTTP on port 8000 +* Health check path: `/health` +* Idle timeout: 300 seconds (for long AI responses) +* Stickiness: not required (service is stateless) + + + +== Horizontal scaling + +The AI service is stateless. All persistent state lives in the SQL database, Redis, and the file-storage back end. Any number of replicas can run behind a load balancer. All replicas must share identical environment variable configuration. + +=== Scaling considerations + +[cols=",",options="header",] +|=== +|Component |Scaling approach +|AI service |Add more containers (stateless) +|MySQL / PostgreSQL |Read replicas or managed DB (RDS, Cloud SQL, Azure Database) +|Redis |Redis Cluster or Sentinel; managed Redis (ElastiCache, Memorystore, Azure Cache) +|File storage |S3 / Azure Blob recommended for production. The `database` storage driver is intended for development only. +|=== + +[IMPORTANT] +-- +When deploying for the first time or upgrading to a new version, start a single instance and wait for it to become healthy before scaling up. Subsequent scale events do not require this precaution. +-- + + + + +== Kubernetes deployment + +=== Namespace and image pull secret + +[source,bash] +---- +kubectl create namespace tinymce-ai + +kubectl create secret docker-registry tiny-registry \ + --namespace tinymce-ai \ + --docker-server=registry.containers.tiny.cloud \ + --docker-username=TINY_REGISTRY_USERNAME \ + --docker-password='TINY_REGISTRY_ACCESS_TOKEN' +---- + +=== Application secrets + +[source,yaml] +---- +apiVersion: v1 +kind: Secret +metadata: + name: ai-service-secrets + namespace: tinymce-ai +type: Opaque +stringData: + license-key: "EXAMPLE_LICENSE_KEY" + management-secret: "EXAMPLE_MANAGEMENT_SECRET" + db-password: "EXAMPLE_DB_PASSWORD" + redis-password: "EXAMPLE_REDIS_PASSWORD" + providers: | + { + "openai": { + "type": "openai", + "apiKeys": ["sk-proj-EXAMPLE_KEY"] + } + } +---- + +In production, use Sealed Secrets, External Secrets Operator, or HashiCorp Vault rather than committing raw secret manifests. + +=== Deployment + +.Full Kubernetes Deployment manifest +[%collapsible] +==== +[source,yaml] +---- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ai-service + namespace: tinymce-ai +spec: + replicas: 2 + selector: + matchLabels: + app: ai-service + template: + metadata: + labels: + app: ai-service + spec: + imagePullSecrets: + - name: tiny-registry + containers: + - name: ai-service + image: registry.containers.tiny.cloud/ai-service:latest + ports: + - containerPort: 8000 + env: + - name: LICENSE_KEY + valueFrom: + secretKeyRef: + name: ai-service-secrets + key: license-key + - name: ENVIRONMENTS_MANAGEMENT_SECRET_KEY + valueFrom: + secretKeyRef: + name: ai-service-secrets + key: management-secret + - name: DATABASE_DRIVER + value: "mysql" + - name: DATABASE_HOST + value: "mysql.tinymce-ai.svc.cluster.local" + - name: DATABASE_USER + value: "ai_service" + - name: DATABASE_PASSWORD + valueFrom: + secretKeyRef: + name: ai-service-secrets + key: db-password + - name: DATABASE_DATABASE + value: "ai_service" + - name: REDIS_HOST + value: "redis.tinymce-ai.svc.cluster.local" + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: ai-service-secrets + key: redis-password + - name: PROVIDERS + valueFrom: + secretKeyRef: + name: ai-service-secrets + key: providers + - name: STORAGE_DRIVER + value: "s3" + - name: STORAGE_REGION + value: "us-east-1" + - name: STORAGE_BUCKET + value: "example-ai-storage-bucket" + - name: ENABLE_METRIC_LOGS + value: "true" + readinessProbe: + httpGet: + path: /health + port: 8000 + initialDelaySeconds: 10 + periodSeconds: 5 + livenessProbe: + httpGet: + path: /health + port: 8000 + initialDelaySeconds: 30 + periodSeconds: 10 + resources: + requests: + memory: "512Mi" + cpu: "500m" + limits: + memory: "2Gi" + cpu: "2000m" +---- +==== + +=== Service + +[source,yaml] +---- +apiVersion: v1 +kind: Service +metadata: + name: ai-service + namespace: tinymce-ai +spec: + selector: + app: ai-service + ports: + - port: 8000 + targetPort: 8000 +---- + +=== Ingress + +[source,yaml] +---- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: ai-service + namespace: tinymce-ai + annotations: + nginx.ingress.kubernetes.io/proxy-buffering: "off" + nginx.ingress.kubernetes.io/proxy-read-timeout: "300" + nginx.ingress.kubernetes.io/proxy-send-timeout: "300" +spec: + tls: + - hosts: + - ai.example.com + secretName: ai-tls-cert + rules: + - host: ai.example.com + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: ai-service + port: + number: 8000 +---- + +=== Horizontal pod autoscaler + +[source,yaml] +---- +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: ai-service-hpa + namespace: tinymce-ai +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: ai-service + minReplicas: 3 + maxReplicas: 20 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 70 +---- + + + + +== AWS ECS / Fargate + +=== Task definition + +.Full ECS Fargate task definition +[%collapsible] +==== +[source,json] +---- +{ + "family": "ai-service", + "networkMode": "awsvpc", + "requiresCompatibilities": ["FARGATE"], + "cpu": "1024", + "memory": "2048", + "containerDefinitions": [ + { + "name": "ai-service", + "image": "registry.containers.tiny.cloud/ai-service:latest", + "portMappings": [{ "containerPort": 8000 }], + "healthCheck": { + "command": ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"], + "interval": 30, + "timeout": 5, + "retries": 3 + }, + "secrets": [ + { "name": "LICENSE_KEY", "valueFrom": "arn:aws:secretsmanager:us-east-1:111122223333:secret:ai-license" }, + { "name": "ENVIRONMENTS_MANAGEMENT_SECRET_KEY", "valueFrom": "arn:aws:secretsmanager:us-east-1:111122223333:secret:ai-mgmt-secret" }, + { "name": "DATABASE_PASSWORD", "valueFrom": "arn:aws:secretsmanager:us-east-1:111122223333:secret:ai-db" }, + { "name": "PROVIDERS", "valueFrom": "arn:aws:secretsmanager:us-east-1:111122223333:secret:ai-providers" } + ], + "environment": [ + { "name": "DATABASE_DRIVER", "value": "mysql" }, + { "name": "DATABASE_HOST", "value": "example-rds-endpoint.region.rds.amazonaws.com" }, + { "name": "DATABASE_USER", "value": "ai_service" }, + { "name": "DATABASE_DATABASE", "value": "ai_service" }, + { "name": "REDIS_HOST", "value": "example-elasticache-endpoint.region.cache.amazonaws.com" }, + { "name": "STORAGE_DRIVER", "value": "s3" }, + { "name": "STORAGE_BUCKET", "value": "example-ai-storage-bucket" }, + { "name": "STORAGE_REGION", "value": "us-east-1" } + ] + } + ] +} +---- +==== + +=== Infrastructure recommendations + +[cols=",",options="header",] +|=== +|Service |AWS recommendation +|Database |RDS for MySQL 8.0 (Multi-AZ for HA) +|Redis |ElastiCache for Redis 7 (cluster mode) +|Storage |Same-region S3 bucket +|Load balancer |ALB with `/health` target health check, 300 s idle timeout +|Secrets |AWS Secrets Manager +|Registry pull credentials |Secrets Manager {plus} ECR pull-through cache, or a private repository mirroring `registry.containers.tiny.cloud` +|=== + + + +== Security hardening + +[cols=",",options="header",] +|=== +|Practice |Implementation +|Network isolation |Place the AI service in a private subnet; expose only through a load balancer. Restrict database and Redis to the AI service security group. +|Block panel from the public internet |Restrict `/panel/` to an admin VPN or IP allowlist. The panel manages secrets and access keys. +|TLS everywhere |Terminate TLS 1.3 at the reverse proxy. Use internal mTLS between the AI service and the data layer where supported. +|Secrets management |Use Vault, AWS Secrets Manager, Azure Key Vault, or GCP Secret Manager. Never store secrets directly in orchestration manifests or commit them to source control. +|Database encryption at rest |Turn on encryption at rest in the cloud provider console. RDS, Cloud SQL, and Azure Database enable this by default. +|Redis authentication |Always set `REDIS_PASSWORD` (or use a managed Redis instance with authentication enabled). +|Container security |Run as non-root, use a read-only filesystem where possible, and drop unnecessary Linux capabilities. +|Image scanning |Scan `registry.containers.tiny.cloud/ai-service` with Trivy, Snyk, or the registry's built-in scanner. +|Least-privilege JWTs |Grant only the permissions each user role requires. Avoid full-access tokens in production. +|API secret rotation |Periodically create a new access key, add the new key to the configuration, then revoke the old key. The token endpoint reads the secret at request time. +|Audit logging |Enable `ENABLE_METRIC_LOGS=true` and ship logs to a SIEM. +|LLM API key rotation |Add the new key to the `PROVIDERS` array, restart the service, then revoke the old key after confirming the new one works. +|=== + +== Rate limiting + +The AI service has no built-in rate limiting. Place rate-limit rules in front of the service to prevent a runaway client from consuming LLM provider quota or overloading the database. + +=== nginx + +[source,nginx] +---- +limit_req_zone $http_authorization zone=ai_jwt:10m rate=10r/s; + +server { + location /v1/ { + limit_req zone=ai_jwt burst=20 nodelay; + proxy_pass http://ai-service:8000; + proxy_buffering off; + proxy_read_timeout 300s; + } +} +---- + +=== AWS ALB / WAF + +ALB does not rate limit natively. Use AWS WAF with a rate-based rule keyed on the `Authorization` header. + +=== Cloudflare + +Use Cloudflare Rate Limiting with a custom rule keyed on the `Authorization` header for the AI service hostname. + +For per-tenant rate limiting, key on the `aud` claim by parsing it in the reverse proxy, or gate token issuance per tenant per minute at the token endpoint. + + +== Observability + +=== Health monitoring + +Poll `/health` on each instance to confirm it is running. A healthy instance responds with HTTP 200. + +[source,bash] +---- +curl -f http://ai-service:8000/health +---- + +=== Structured metric logs + +Set the `ENABLE_METRIC_LOGS` environment variable to enable request-level JSON logs to stdout: + +[source,bash] +---- +-e ENABLE_METRIC_LOGS='true' +---- + +When enabled, the service writes a structured JSON entry for each request. Key fields include the request duration, HTTP status code, and outcome status. These entries are suitable for ingestion into any log aggregator that supports JSON parsing. + +=== OpenTelemetry + +[source,bash] +---- +-e LLM_TELEMETRY_ENABLED='true' \ +-e OTEL_EXPORTER_OTLP_TRACES_ENDPOINT='http://otel-collector:4318/v1/traces' \ +-e OTEL_TRACES_SAMPLER_ARG='1.0' \ +-e OTEL_DEBUG='true' +---- + +[cols="1,1,1,3",options="header"] +|=== +|Variable |Required |Default |Description +|`LLM_TELEMETRY_ENABLED` |Yes |`false` |Primary telemetry switch +|`OTEL_EXPORTER_OTLP_TRACES_ENDPOINT` |Yes |- |OTLP endpoint URL +|`OTEL_TRACES_SAMPLER_ARG` |No |`1.0` |Sampling rate (0.0 to 1.0) +|`OTEL_DEBUG` |No |- |Verbose OTLP diagnostic logging +|=== + +Compatible with Jaeger, Grafana Tempo, Datadog, New Relic, Honeycomb, and any OTLP-compatible back end. + +=== Langfuse + +Langfuse provides AI-specific observability: token usage, latency per LLM call, prompt quality scores, and cost tracking. + +[source,bash] +---- +-e LANGFUSE_PUBLIC_KEY='pk-lf-...' \ +-e LANGFUSE_SECRET_KEY='sk-lf-...' \ +-e LANGFUSE_BASE_URL='https://cloud.langfuse.com' \ +-e LANGFUSE_DEBUG='true' +---- + +[cols="1,1,1,3",options="header"] +|=== +|Variable |Required |Default |Description +|`LANGFUSE_PUBLIC_KEY` |Yes (if used) |- |Langfuse public key +|`LANGFUSE_SECRET_KEY` |Yes (if used) |- |Langfuse secret key +|`LANGFUSE_BASE_URL` |No |`https://cloud.langfuse.com` |Self-hosted Langfuse URL +|`LANGFUSE_DEBUG` |No |- |Verbose Langfuse logging +|=== + +Langfuse also requires `LLM_TELEMETRY_ENABLED=true` and a valid `OTEL_EXPORTER_OTLP_TRACES_ENDPOINT`. + +OpenTelemetry and Langfuse can run at the same time. The service emits to both without conflict. + +=== Distributed logging + +For production multi-instance deployments, ship container logs to a central aggregator. + +[cols="1,3",options="header"] +|=== +|Platform |Log driver / approach +|AWS |CloudWatch Logs through the `awslogs` driver, or Fluent Bit DaemonSet on EKS +|GCP |Cloud Logging (automatic on GKE), or Fluent Bit +|Azure |Azure Monitor (automatic on Azure Container Apps and AKS) +|Self-hosted (ELK) |Fluent Bit or Filebeat to Elasticsearch {plus} Kibana +|Self-hosted (Loki) |Fluent Bit or Promtail to Grafana Loki +|Fluentd |Use the Docker fluentd log driver +|=== + +.Fluentd log driver example +[source,bash] +---- +docker run ... \ + --log-driver=fluentd \ + --log-opt fluentd-address=localhost:24224 \ + --log-opt tag=ai-service \ + ... +---- + +The metric logs produced by the `ENABLE_METRIC_LOGS` option are already structured JSON and parse cleanly in any aggregator. + +=== Recommended monitoring + +The following checks help catch common issues early: + +* **Health endpoint** -- poll `/health` on each instance; alert if any instance returns a non-200 response for more than 60 seconds. +* **Error rate** -- monitor the HTTP 5xx rate in the metric logs or traces; a sustained increase may indicate an LLM provider outage or a misconfigured environment. +* **Latency** -- track request duration; a sudden increase typically points to LLM provider throttling or network issues. +* **Container restarts** -- alert on repeated container restarts, which may indicate a missing environment variable or a database connectivity problem. + +For troubleshooting specific error patterns, see xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting]. + + + +== Backup and recovery + +=== Database + +The database contains environments, access keys, conversations, messages, and file metadata. Back up the database using standard production practices: + +* *MySQL:* `mysqldump` or managed snapshots (RDS automated backups). +* *PostgreSQL:* `pg_dump` or managed snapshots. + +Enable point-in-time recovery. + +=== File storage + +[cols=",",options="header",] +|=== +|Back end |Backup approach +|`database` |The SQL database stores file blobs; database backups include them. +|`filesystem` |Back up the mounted volume. +|`s3` |Enable versioning on the bucket for point-in-time recovery. +|`azure` |Enable Blob versioning. +|=== + +=== Redis + +Redis holds ephemeral state. Losing Redis data does not affect persistent data. No backup is required. + + + +== Upgrade process + +. Pull the new image: ++ +[source,bash] +---- +docker pull registry.containers.tiny.cloud/ai-service:NEW_VERSION +---- +. For rolling deploys across version boundaries: start *one* instance at the new version and wait for it to become healthy before rolling the rest. +. For Kubernetes: update the image tag in the Deployment. The default `RollingUpdate` strategy handles zero-downtime upgrades, provided the first new pod becomes Ready before the rollout continues. +. Verify `/health` on every replica before declaring the upgrade complete. + +Review the release notes for the target version and take a database backup before upgrading. + +License keys are per-deployment, not per-replica. One key covers any number of replicas of a single deployment. + + + +== Sizing guide + +[cols=",,,,",options="header",] +|=== +|Users |AI service replicas |Database |Redis |Notes +|1 to 50 |1 |db.t3.small (or 2 vCPU / 4 GB self-managed) |cache.t3.micro |Development and small teams +|50 to 500 |2 |db.r6g.large |cache.r6g.large |Small production +|500 to 5,000 |3 to 5 |db.r6g.xlarge (Multi-AZ) |cache.r6g.xlarge (cluster) |Medium production +|5,000{plus} |5{plus} (HPA) |db.r6g.2xlarge{plus} |cache.r6g.2xlarge{plus} |Large production; contact Tiny for guidance +|=== + +Starting point for self-managed deployments: + +* AI service instance: 2 vCPU / 4 GB RAM +* Database instance: 2 vCPU / 8 GB RAM +* Redis instance: 1 vCPU / 2 GB RAM + +Scale based on user count, average prompt size, and concurrent streaming connections. The LLM provider's rate limits are usually the binding constraint long before the AI service or database becomes one. diff --git a/modules/ROOT/pages/tinymceai-on-premises-providers.adoc b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc new file mode 100644 index 0000000000..f7ababd98a --- /dev/null +++ b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc @@ -0,0 +1,985 @@ += LLM provider configuration +:navtitle: LLM providers +:description: LLM provider configuration for TinyMCE AI on-premises service +:keywords: AI, on-premises, LLM, OpenAI, Anthropic, Google, Gemini, Azure, Bedrock, Vertex, Ollama, vLLM, openai-compatible + + + + +The `PROVIDERS` environment variable tells the AI service how to reach the upstream LLM. The `MODELS` environment variable tells the service which models are exposed to clients and which features each model supports. This page is the definitive reference for both: every supported `type`, every required field, and every known issue encountered in production. + +Start with the xref:tinymceai-on-premises-getting-started.adoc[Getting Started guide] if the AI service container is not yet running. The following sections assume a running `ai-service` container. + +== Provider and model architecture + +The AI service uses two related environment variables: + +[cols=",,",options="header",] +|=== +|Variable |Type |What it does +|`PROVIDERS` |JSON object |Map of provider IDs to provider configurations. Each entry says how to authenticate with one upstream LLM API. +|`MODELS` |JSON array |List of models exposed to clients. Each model points at a `PROVIDERS` entry and declares which features it can serve. +|JWT `auth.ai.permissions` |string array |Per-user authorization list. Includes `ai:models::` entries to gate access to individual models. +|=== + +The `PROVIDERS` keys are arbitrary identifiers (for example `"openai"`, `"my-bedrock"`, `"team-azure"`). Each value object has a `type` field that picks the implementation: + +[cols=",",options="header",] +|=== +|`type` |Provider +|`openai` |OpenAI (api.openai.com) +|`anthropic` |Anthropic (api.anthropic.com) +|`google` |Google AI Studio / Gemini direct +|`azure` |Azure OpenAI Service +|`bedrock` |AWS Bedrock +|`vertex` |Google Cloud Vertex AI +|`openai-compatible` |Any OpenAI-compatible HTTP API (Ollama, vLLM, LM Studio, llama.cpp, LocalAI, OpenRouter, Together, Groq, Fireworks, and similar) +|=== + +The *key* (not the `type`) is what gets referenced from: + +* ``MODELS[].provider``; to wire a model to a provider +* JWT `auth.ai.permissions`; for per-provider or per-model access control (see xref:tinymceai-on-premises-jwt.adoc[JWT authentication]) + +[source,json] +---- +{ + "my-openai-key": { "type": "openai", "apiKeys": ["sk-proj-..."] }, + "my-bedrock": { "type": "bedrock", "region": "us-east-1", "credentials": { "accessKeyId": "...", "secretAccessKey": "..." } }, + "my-ollama": { "type": "openai-compatible", "baseUrl": "http://host.docker.internal:11434/v1" } +} +---- + +=== How the pieces fit together + +[.text-center] +image::tinymceai-on-premises/providers-guide-fig-1.svg[alt="PROVIDERS JSON structure mapping provider keys to OpenAI Anthropic Azure Bedrock Vertex and self-hosted endpoints",width=100%] + +The diagram reflects three stacked layers: *how to authenticate* with each upstream (`PROVIDERS`), *what to expose* to clients (`MODELS`), and *who can use which model* (JWT permissions). The same provider key string ties all three together. + + + +[[models-required]] +== Defining the model list + +The on-premises service ships with a built-in default model list that covers *only* OpenAI, Anthropic, and Google direct. For every other provider type (Azure, Bedrock, Vertex, openai-compatible), define `MODELS` explicitly; otherwise nothing usable is exposed. + +When only `PROVIDERS` is configured and `MODELS` is omitted, a `GET /v1/models/1` call returns only the built-in `agent-1` placeholder with `allowed: false`: + +[source,json] +---- +{ + "models": [ + { "id": "agent-1", "allowed": false, "features": [] } + ] +} +---- + +Clients (the TinyMCE editor included) will then fall back to the disabled agent and every AI request will fail with no useful error in the UI. The model list endpoint also only accepts the literal version `1`: + +[source,bash] +---- +# Works +curl http://localhost:8000/v1/models/1 + +# All 500 +curl http://localhost:8000/v1/models/v1 +curl http://localhost:8000/v1/models/v2 +curl http://localhost:8000/v1/models/latest +---- + +*Minimum `MODELS` entry* to make a model usable: + +[source,json] +---- +[ + { + "id": "gpt-4o-mini", + "provider": "my-openai-key", + "description": "OpenAI GPT-4o mini", + "features": ["conversations", "reviews", "actions"] + } +] +---- + +The `features` array must include at least one of `conversations`, `reviews`, or `actions` for the model to be selectable for that feature. A model with no overlapping features is invisible to that part of the UI. + +A full field reference for `MODELS` is at the end of this page. + + + +== OpenAI + +API key from https://platform.openai.com/api-keys[platform.openai.com]. With OpenAI alone, `MODELS` can be omitted; the built-in catalog covers common models. + +.Configuration details +[%collapsible] +==== +*JSON shape:* + +[source,json] +---- +{ + "openai": { + "type": "openai", + "apiKeys": ["sk-proj-YOUR_KEY_HERE"] + } +} +---- + +*Fields:* + +[cols=",,",options="header",] +|=== +|Field |Required |Notes +|`type` |Yes |Literal `"openai"` +|`apiKeys` |Yes |Array of one or more API keys. Multiple keys allow zero-downtime rotation. +|`baseUrl` |No |Override only if proxying through a private OpenAI gateway. +|=== + +*Full `docker run` example:* + +[source,bash] +---- +docker run --init -d -p 8000:8000 \ + --name ai-service \ + -e LICENSE_KEY="$LICENSE_KEY" \ + -e ENVIRONMENTS_MANAGEMENT_SECRET_KEY="$MANAGEMENT_SECRET" \ + -e DATABASE_DRIVER='mysql' \ + -e DATABASE_HOST='mysql' \ + -e DATABASE_USER='root' \ + -e DATABASE_PASSWORD="$DB_PASSWORD" \ + -e DATABASE_DATABASE='ai_service' \ + -e REDIS_HOST='redis' \ + -e STORAGE_DRIVER='database' \ + -e PROVIDERS='{"openai":{"type":"openai","apiKeys":["sk-proj-YOUR_KEY_HERE"]}}' \ + registry.containers.tiny.cloud/ai-service:latest +---- + +*Verify:* + +[source,bash] +---- +curl -s -H "Authorization: Bearer sk-proj-YOUR_KEY_HERE" \ + https://api.openai.com/v1/models | head -20 +---- + +When that `curl` call succeeds from the host, the same key in `PROVIDERS` works inside the container. + +IMPORTANT: *Project-scoped keys* (`sk-proj-...`) only work for models the project has been granted access to. A 404 on `gpt-4o` usually means the key's project is restricted. *Org-level keys* require the org header, which the service does not send; use a project key instead. +==== + + + +== Anthropic + +API key from https://console.anthropic.com/[console.anthropic.com]. Same shape as OpenAI. + +.Configuration details +[%collapsible] +==== +*JSON shape:* + +[source,json] +---- +{ + "anthropic": { + "type": "anthropic", + "apiKeys": ["sk-ant-YOUR_KEY_HERE"] + } +} +---- + +*Fields:* + +[cols=",,",options="header",] +|=== +|Field |Required |Notes +|`type` |Yes |Literal `"anthropic"` +|`apiKeys` |Yes |Array. Rotation behaves the same as OpenAI. +|=== + +*Reasoning models:* + +Claude 4.x models (Sonnet 4, Opus 4) support extended thinking. To surface the reasoning toggle in the TinyMCE UI, add `capabilities.reasoning: true` to the model entry in `MODELS`: + +[source,json] +---- +{ + "id": "claude-sonnet-4-5", + "provider": "anthropic", + "description": "Anthropic Claude Sonnet 4.5 with extended thinking", + "capabilities": { "reasoning": true }, + "features": ["conversations", "reviews", "actions"] +} +---- + +*Minimal example:* + +[source,bash] +---- +-e PROVIDERS='{"anthropic":{"type":"anthropic","apiKeys":["sk-ant-YOUR_KEY_HERE"]}}' +---- + +*Verify:* + +[source,bash] +---- +curl -s https://api.anthropic.com/v1/models \ + -H "x-api-key: sk-ant-YOUR_KEY_HERE" \ + -H "anthropic-version: 2023-06-01" +---- +==== + + + +== Google (Gemini direct) + +Direct Gemini access through https://aistudio.google.com/app/apikey[Google AI Studio]. Distinct from Vertex AI; different keys, different endpoints, different billing path. + +.Configuration details +[%collapsible] +==== +*JSON shape:* + +[source,json] +---- +{ + "google": { + "type": "google", + "apiKeys": ["AIza-YOUR_KEY_HERE"] + } +} +---- + +*Fields:* + +[cols=",,",options="header",] +|=== +|Field |Required |Notes +|`type` |Yes |Literal `"google"` +|`apiKeys` |Yes |Array of `AIza...` keys from AI Studio. +|=== + +*Minimal example:* + +[source,bash] +---- +-e PROVIDERS='{"google":{"type":"google","apiKeys":["AIza-YOUR_KEY_HERE"]}}' +---- + +*Verify:* + +[source,bash] +---- +curl -s "https://generativelanguage.googleapis.com/v1beta/models?key=AIza-YOUR_KEY_HERE" +---- + +IMPORTANT: AI Studio `AIza` keys do *not* work against Vertex endpoints. For Vertex, see the <> section. + +TIP: Free-tier keys are heavily rate limited and return 429 responses under modest load. Move to a paid tier before load testing or production traffic. +==== + + + +== Azure OpenAI + +Azure-hosted OpenAI models. Requires an Azure subscription, an Azure OpenAI resource, and at least one deployment. The ``MODELS[].id`` must match the Azure deployment name exactly. + +.Configuration details +[%collapsible] +==== +*Prerequisites in the Azure portal:* + +[arabic] +. Create an Azure OpenAI resource. Note the *resource name*; this is the subdomain prefix in `https://.openai.azure.com`. +. Apply for model access if required by the region. +. In Azure AI Studio, create a *deployment* for each model to expose. The deployment name is arbitrary (for example `prod-gpt4o`, `cheap-mini`). +. Copy one of the two API keys from *Keys and Endpoint* in the resource overview. + +*JSON shape:* + +[source,json] +---- +{ + "azure": { + "type": "azure", + "resourceName": "YOUR_RESOURCE_NAME", + "apiKeys": ["YOUR_AZURE_KEY_HERE"], + "apiVersion": "2024-10-21" + } +} +---- + +*Fields:* + +[cols=",,",options="header",] +|=== +|Field |Required |Notes +|`type` |Yes |Literal `"azure"` +|`resourceName` |Yes |The `*.openai.azure.com` prefix only, not the full URL. +|`apiKeys` |Yes |Array. Azure issues two keys per resource for zero-downtime key rotation. +|`apiVersion` |Yes |Always set explicitly. Refer to https://learn.microsoft.com/azure/ai-services/openai/reference[Microsoft's API version matrix] for current values. +|=== + +IMPORTANT: The ``MODELS[].id`` value must match the Azure *deployment name* exactly. A mismatch produces a `DeploymentNotFound` error. Use human-readable deployment names because the ID also appears in JWT permission strings and the editor model picker. + +*Two-deployment example:* + +[source,bash] +---- +-e PROVIDERS='{ + "azure": { + "type": "azure", + "resourceName": "tinymce-ai", + "apiKeys": ["YOUR_AZURE_KEY_HERE"], + "apiVersion": "2024-10-21" + } +}' + +-e MODELS='[ + { + "id": "prod-gpt4o", + "provider": "azure", + "name": "GPT-4o (production)", + "description": "Azure deployment of GPT-4o for production traffic", + "recommended": true, + "features": ["conversations", "reviews", "actions"] + }, + { + "id": "cheap-mini", + "provider": "azure", + "name": "GPT-4o mini (low cost)", + "description": "Azure deployment of GPT-4o mini for cheap actions", + "features": ["reviews", "actions"] + } +]' +---- + +*Verify:* + +[source,bash] +---- +curl -s -H "api-key: YOUR_AZURE_KEY_HERE" \ + "https://YOUR_RESOURCE_NAME.openai.azure.com/openai/deployments?api-version=2024-10-21" +---- + +Expect a JSON list that includes the deployment IDs from `MODELS`. + +IMPORTANT: *Wrong region quota:* each Azure region has independent quotas. Bursty workloads may throttle at modest QPS; split traffic across resources in different regions if needed. *Content filter false positives:* Azure's safety filter is stricter than OpenAI direct. Reviews on certain technical content can return `content_filter` errors. Configure custom content filter policies in Azure AI Studio. +==== + + + +== AWS Bedrock + +Amazon's hosted-model marketplace (Anthropic, Meta, Mistral, Cohere, Amazon Titan). Credentials must be inlined; the AWS SDK default credential chain is not used. + +.Configuration details +[%collapsible] +==== +IMPORTANT: The AI service does *not* use the AWS SDK default credential chain. `AWS_PROFILE`, `~/.aws/credentials`, IRSA, EC2 instance profiles, ECS task roles, and web identity tokens are all ignored. Inline the credentials in the `PROVIDERS` JSON. + +*JSON shape:* + +[source,json] +---- +{ + "bedrock": { + "type": "bedrock", + "region": "us-east-1", + "credentials": { + "accessKeyId": "AKIA_YOUR_KEY_HERE", + "secretAccessKey": "YOUR_SECRET_HERE", + "sessionToken": "OPTIONAL_FOR_STS" + } + } +} +---- + +The `sessionToken` field is optional but required for STS-issued short-lived credentials. Plan a rotation procedure when using temporary credentials. + +*Prerequisites checklist:* + +[arabic] +. *Enable model access.* Bedrock console -> *Model access* -> Manage model access. Each model must be approved per-region. +. *Subscribe through AWS Marketplace* for non-Amazon models. Anthropic Claude on Bedrock requires a one-time Marketplace subscription. +. *Create an IAM user or role* with the permissions below. +. *Pick a region that has the model.* Not every model is in every region. Check Bedrock console > Model catalog before assuming. + +*Required IAM permissions:* + +[source,json] +---- +{ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "BedrockInvoke", + "Effect": "Allow", + "Action": [ + "bedrock:InvokeModel", + "bedrock:InvokeModelWithResponseStream", + "bedrock:Converse", + "bedrock:ConverseStream" + ], + "Resource": "*" + } + ] +} +---- + +The service uses both `InvokeModel` and `Converse` paths depending on the model family, so include both. + +*Claude 4.x cross-region inference profiles:* + +Claude 4.x on Bedrock uses *cross-region inference profiles* rather than per-region model IDs. The ``MODELS[].id`` must be the inference profile ID, prefixed with the regional group: + +[cols=",",options="header",] +|=== +|Region group |Example profile ID +|US |`us.anthropic.claude-sonnet-4-5-20250929-v1:0` +|EU |`eu.anthropic.claude-sonnet-4-5-20250929-v1:0` +|APAC |`apac.anthropic.claude-sonnet-4-5-20250929-v1:0` +|=== + +Using the bare model ID (`anthropic.claude-sonnet-4-5-...`) returns `ValidationException`. + +*Minimal example:* + +[source,bash] +---- +-e PROVIDERS='{ + "bedrock": { + "type": "bedrock", + "region": "us-east-1", + "credentials": { + "accessKeyId": "AKIA_YOUR_KEY_HERE", + "secretAccessKey": "YOUR_SECRET_HERE" + } + } +}' + +-e MODELS='[ + { + "id": "us.anthropic.claude-sonnet-4-5-20250929-v1:0", + "provider": "bedrock", + "name": "Claude Sonnet 4.5 (Bedrock)", + "description": "Anthropic Claude Sonnet 4.5 through AWS Bedrock cross-region inference", + "recommended": true, + "capabilities": { "reasoning": true }, + "features": ["conversations", "reviews", "actions"] + } +]' +---- + +*Verify before running the AI service:* + +[source,bash] +---- +aws bedrock list-foundation-models --region us-east-1 \ + --query 'modelSummaries[?contains(modelId,`claude`)].[modelId,modelLifecycle.status]' \ + --output table +---- + +[source,bash] +---- +aws bedrock-runtime converse \ + --region us-east-1 \ + --model-id us.anthropic.claude-sonnet-4-5-20250929-v1:0 \ + --messages '[{"role":"user","content":[{"text":"Say hello in five words."}]}]' +---- + +If either fails, the AI service fails with the same root cause; fix IAM and model access before continuing. + +IMPORTANT: *`AccessDeniedException`* almost always means missing model access in the Bedrock console. *`ThrottlingException`* on the first call indicates low default quotas; request an increase through Service Quotas. *`ValidationException`* means a mismatch between regional model IDs and cross-region inference profiles. *`INVALID_PAYMENT_INSTRUMENT`* indicates a missing AWS Marketplace subscription. +==== + + + +[[google-vertex-ai]] +== Google Vertex AI + +Google's enterprise model surface. Project-scoped, IAM-driven, GCP-billed. Credentials must be inlined (same limitation as Bedrock). + +.Configuration details +[%collapsible] +==== +IMPORTANT: The Vertex adapter ignores ADC, `GOOGLE_APPLICATION_CREDENTIALS`, GKE Workload Identity, and Compute Engine metadata server credentials. Inline either a service-account key or an account-bound API key in the `PROVIDERS` JSON. + +*JSON shape (service account):* + +[source,json] +---- +{ + "vertex": { + "type": "vertex", + "project": "YOUR_GCP_PROJECT_ID", + "location": "us-central1", + "credentials": { + "clientEmail": "ai-service@YOUR_GCP_PROJECT_ID.iam.gserviceaccount.com", + "privateKey": "-----BEGIN PRIVATE KEY-----\nMIIE...\n-----END PRIVATE KEY-----\n" + } + } +} +---- + +*JSON shape (account-bound API key):* + +[source,json] +---- +{ + "vertex": { + "type": "vertex", + "project": "YOUR_GCP_PROJECT_ID", + "location": "us-central1", + "apiKeys": ["YOUR_VERTEX_API_KEY"] + } +} +---- + +*Fields:* + +[cols=",,",options="header",] +|=== +|Field |Required |Notes +|`type` |Yes |Literal `"vertex"` +|`project` |Yes |GCP project ID, not the project number. +|`location` |Yes |Region, for example `us-central1`, `europe-west4`. Must support the model family. +|`credentials.clientEmail` |If using SA |The `client_email` field from the SA JSON key. +|`credentials.privateKey` |If using SA |The `private_key` field from the SA JSON key. See the newline escaping note below. +|`apiKeys` |If using account-bound key |Array of one or more Vertex-bound API keys. +|=== + +*`private_key` newline escaping:* + +When copying the `private_key` from a GCP service account JSON key into a `.env` file and then expanding with `set -a && source .env`, the shell collapses the `\n` sequences and the key becomes invalid (Vertex returns `invalid_grant`). A reliable approach is to build the entire `PROVIDERS` value using a JSON serializer: + +[source,bash] +---- +python3 - <<'EOF' > providers.json +import json +sa = json.load(open("/path/to/service-account.json")) +providers = { + "vertex": { + "type": "vertex", + "project": sa["project_id"], + "location": "us-central1", + "credentials": { + "clientEmail": sa["client_email"], + "privateKey": sa["private_key"] + } + } +} +print(json.dumps(providers)) +EOF + +export PROVIDERS="$(cat providers.json)" +---- + +*Account-bound API keys still require a service account:* + +The Vertex "account-bound API key" is bound to a Google Cloud project AND a service account inside that project. A service account must exist and have `roles/aiplatform.user` granted; the API key only avoids distributing the private key. + +*GCP org policies that block setup:* + +[cols=",",options="header",] +|=== +|Org policy |Blocks +|`iam.disableServiceAccountCreation` |Creating the service account. Symptom: `403 Permission iam.serviceAccounts.create denied`. +|`iam.disableServiceAccountKeyCreation` |Downloading a JSON key. Symptom: *Create key* greyed out, or `FAILED_PRECONDITION`. +|`iam.allow.disabled` |Account-bound API key creation. Symptom: `API_KEY_INVALID`. +|=== + +When a GCP org enforces any of these, the security or cloud team must grant a project-level exception or pre-provision the credentials. + +*Minimal example:* + +[source,bash] +---- +-e PROVIDERS='{ + "vertex": { + "type": "vertex", + "project": "tinymce-ai", + "location": "us-central1", + "credentials": { + "clientEmail": "ai-service@tinymce-ai.iam.gserviceaccount.com", + "privateKey": "-----BEGIN PRIVATE KEY-----\nMIIEv...\n-----END PRIVATE KEY-----\n" + } + } +}' + +-e MODELS='[ + { + "id": "gemini-2.0-flash-001", + "provider": "vertex", + "name": "Gemini 2.0 Flash (Vertex)", + "description": "Google Gemini 2.0 Flash through Vertex AI", + "features": ["conversations", "reviews", "actions"] + } +]' +---- + +The service account needs `roles/aiplatform.user` (or a custom role with `aiplatform.endpoints.predict` and `aiplatform.endpoints.streamGenerateContent`). + +*Verify:* + +[source,bash] +---- +gcloud auth activate-service-account \ + --key-file=/path/to/service-account.json + +gcloud ai models list \ + --region=us-central1 \ + --project=YOUR_GCP_PROJECT_ID +---- + +IMPORTANT: *Region mismatch:* set `location` to a region that hosts the model. *`SERVICE_DISABLED`* means the Vertex AI API is not enabled; run `gcloud services enable aiplatform.googleapis.com`. *Quota:* new projects default to 5 QPS; request an increase before production traffic. +==== + + + +== OpenAI-compatible (Ollama, vLLM, LM Studio, and similar) + +For any HTTP API that implements the OpenAI Chat Completions interface, including self-hosted runtimes and commercial aggregators (OpenRouter, Together, Groq, Fireworks). The `baseUrl` *must* include the `/v1` suffix. + +.Configuration details +[%collapsible] +==== +*JSON shape:* + +[source,json] +---- +{ + "local-llm": { + "type": "openai-compatible", + "baseUrl": "http://host.docker.internal:11434/v1", + "apiKeys": ["optional-bearer-token"], + "headers": { "X-Custom-Header": "value" } + } +} +---- + +*Fields:* + +[cols=",,",options="header",] +|=== +|Field |Required |Notes +|`type` |Yes |Literal `"openai-compatible"` +|`baseUrl` |Yes |*Must include the `/v1` suffix.* Without it, every request fails with a misleading "Not Found" SSE error. +|`apiKeys` |No |Sent as `Authorization: Bearer `. Most local runtimes ignore it. +|`headers` |No |Additional headers such as auth tokens or tenant IDs. +|=== + +NOTE: File uploads through this adapter are limited to `image/*` MIME types. PDFs and Office documents are not forwarded. To work with non-image files, route through an OpenAI, Anthropic, or Bedrock provider instead. + +*Ollama-specific setup:* + +Ollama listens on `127.0.0.1:11434` by default, which is unreachable from inside a Docker container. Bind to all interfaces: + +[source,bash] +---- +OLLAMA_HOST=0.0.0.0:11434 ollama serve +---- + +On Linux, add the host gateway so `host.docker.internal` resolves: + +[source,yaml] +---- +services: + ai-service: + image: registry.containers.tiny.cloud/ai-service:latest + extra_hosts: + - "host.docker.internal:host-gateway" +---- + +If Ollama returns "does not support tools", the model was built from a raw GGUF without a chat template. Use `ollama pull` for a Library model that includes a proper Modelfile, or author a custom one. + +The reasoning toggle (`capabilities.reasoning: true`) is cosmetic for Ollama-backed models; the openai-compatible adapter does not translate it to the native Ollama API. + +*Timeout:* + +Large self-hosted models on consumer hardware can exceed the default 180-second timeout. Override with: + +[source,bash] +---- +-e LLM_TIMEOUT_MS='600000' +---- + +*Example -- Ollama:* + +[source,bash] +---- +-e PROVIDERS='{ + "ollama": { + "type": "openai-compatible", + "baseUrl": "http://host.docker.internal:11434/v1" + } +}' + +-e MODELS='[ + { + "id": "qwen3:0.6b", + "provider": "ollama", + "name": "Qwen3 0.6B (local)", + "description": "Local Ollama model for fast actions", + "features": ["conversations", "reviews", "actions"] + } +]' + +-e LLM_TIMEOUT_MS='600000' +---- + +*Example -- vLLM:* + +[source,bash] +---- +-e PROVIDERS='{ + "vllm": { + "type": "openai-compatible", + "baseUrl": "http://vllm-host.internal:8001/v1", + "apiKeys": ["YOUR_VLLM_TOKEN"] + } +}' + +-e MODELS='[ + { + "id": "meta-llama/Llama-3.1-8B-Instruct", + "provider": "vllm", + "name": "Llama 3.1 8B (vLLM)", + "description": "Self-hosted Llama 3.1 8B served through vLLM", + "features": ["conversations", "reviews", "actions"] + } +]' +---- + +*Example -- LM Studio:* + +[source,bash] +---- +-e PROVIDERS='{ + "lmstudio": { + "type": "openai-compatible", + "baseUrl": "http://host.docker.internal:1234/v1" + } +}' + +-e MODELS='[ + { + "id": "lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF", + "provider": "lmstudio", + "name": "Llama 3.1 8B (LM Studio)", + "description": "Local LM Studio runtime", + "features": ["conversations", "actions"] + } +]' +---- + +*Verify:* + +[source,bash] +---- +curl -s http://host.docker.internal:11434/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "qwen3:0.6b", + "messages": [{"role":"user","content":"Say hello in five words."}], + "stream": false + }' +---- + +When the `curl` call returns a chat completion, the AI service can use the same endpoint. +==== + + + +== Multiple providers coexistence + +`PROVIDERS` is a single JSON object that may contain any number of entries. Each entry is independent; the service maintains a separate client pool per provider. There is no limit beyond JSON-in-env-var size constraints (many shells allow 64 KB{plus} or more in a single variable). + +A `MODELS` array routes individual models to specific providers using the `provider` field, which must match a `PROVIDERS` key *exactly* (case-sensitive). + +=== Example: three providers, mixed routing + +.Click to expand: three-provider `PROVIDERS` and `MODELS` example +[%collapsible] +==== +[source,bash] +---- +-e PROVIDERS='{ + "openai": { "type": "openai", "apiKeys": ["sk-proj-YOUR_KEY_HERE"] }, + "bedrock-us": { + "type": "bedrock", + "region": "us-east-1", + "credentials": { + "accessKeyId": "AKIA_YOUR_KEY_HERE", + "secretAccessKey": "YOUR_SECRET_HERE" + } + }, + "ollama": { "type": "openai-compatible", "baseUrl": "http://host.docker.internal:11434/v1" } +}' + +-e MODELS='[ + { + "id": "gpt-4o", + "provider": "openai", + "name": "GPT-4o", + "description": "OpenAI flagship model for conversations", + "recommended": true, + "features": ["conversations"] + }, + { + "id": "us.anthropic.claude-sonnet-4-5-20250929-v1:0", + "provider": "bedrock-us", + "name": "Claude Sonnet 4.5 (Bedrock)", + "description": "Claude on Bedrock for reviews", + "capabilities": { "reasoning": true }, + "features": ["reviews"] + }, + { + "id": "qwen3:0.6b", + "provider": "ollama", + "name": "Qwen3 0.6B (local)", + "description": "Local model for cheap quick actions", + "features": ["actions"] + } +]' +---- +==== + +This wires conversations to OpenAI, reviews to Bedrock-hosted Claude, and quick actions to a local Ollama model. The TinyMCE editor will pick the appropriate provider for each feature based on which models declare which `features`. + +A `MODELS` entry with a `provider` value that does not exist in `PROVIDERS` is silently skipped; that model will not appear in `/v1/models/1`. When a model is missing from the model selector in the rich text editor, check the spelling of its `provider` field. + + + +== `MODELS` field reference + +=== Top-level fields + +[cols=",,,,",options="header",] +|=== +|Field |Required |Type |Default |Notes +|`id` |Yes |string |- |Model identifier sent to the upstream provider. For Azure, this must match the deployment name. For Bedrock, this must match the model ID or inference profile. +|`provider` |Yes |string |- |Must match a key in `PROVIDERS` exactly. +|`description` |Yes |string |- |Shown in model picker tooltips. +|`name` |No |string |value of `id` |Display name in the model picker. +|`recommended` |No |boolean |`false` |If `true`, marks the model as recommended in the picker. Only one model should be flagged per environment. +|`capabilities.webSearch` |No |boolean |`false` |Whether to allow the web search toggle for this model. Requires `WEBSEARCH_ENABLED`. +|`capabilities.reasoning` |No |boolean |`false` |Whether to expose a reasoning/extended-thinking toggle. Supported providers include Anthropic, Bedrock-Claude, and OpenAI o-series. Cosmetic for Ollama (see openai-compatible section). +|`contextLimits` |No |object |see below |Per-model context constraints. +|`features` |Yes |`string[]` |- |Which features the model is eligible for. Must contain at least one usable feature. +|=== + +=== `contextLimits` defaults + +[cols=",,",options="header",] +|=== +|Field |Default |Unit +|`maxContextLength` |`256000` |characters +|`maxFiles` |`100` |count +|`maxFileSize` |`5242880` (Anthropic) / `7340032` (all others) |bytes +|`maxTotalFileSize` |`31457280` |bytes +|`maxTotalPdfFilePages` |`100` |pages +|=== + +Override `contextLimits` only when necessary, for example when a model has a smaller real context window than the default 256K character budget, or when a deployment policy restricts file size. + +=== Available `features` strings + +The full set of feature strings recognized by the service: + +[source,text] +---- +conversations +conversations.titleGeneration +reviews +reviews.correctness +reviews.clarity +reviews.readability +reviews.make-longer +reviews.make-shorter +reviews.make-tone-casual +reviews.make-tone-direct +reviews.make-tone-friendly +reviews.make-tone-confident +reviews.make-tone-professional +reviews.translate +actions +actions.make-longer +actions.make-shorter +actions.continue +actions.make-tone-casual +actions.make-tone-direct +actions.make-tone-friendly +actions.make-tone-confident +actions.make-tone-professional +actions.translate +actions.fix-grammar +actions.improve-writing +---- + +The three umbrella values `conversations`, `reviews`, and `actions` enable the entire family. Use a specific sub-feature only when restricting a model to a subset; for example, a low-cost model that handles only `actions.fix-grammar`. + +A model with no `features` entry, or with only sub-features the editor does not request, will be hidden from the picker. + + + +== Rotating API keys without downtime + +The `apiKeys` field on every provider type that has one is an *array*. The service treats all entries as valid for incoming requests, which allows rotating keys with zero downtime: + +[arabic] +. Append the new key to the array: ++ +[source,json] +---- +"apiKeys": ["sk-proj-OLD_KEY", "sk-proj-NEW_KEY"] +---- +. Restart the AI service container so it picks up the new `PROVIDERS` value. +. Verify the new key works (run a chat completion through the service). +. Revoke the old key in the provider console. +. Remove the old key from `apiKeys`: ++ +[source,json] +---- +"apiKeys": ["sk-proj-NEW_KEY"] +---- +. Restart the AI service container again. + +The same procedure works for `anthropic`, `google`, `azure`, and `openai-compatible` provider types. For Bedrock and Vertex, swap the `credentials` object atomically; there is no array of credential objects, so plan a short maintenance window or run two AI service replicas behind a load balancer for a no-downtime swap. + + + +== Quick troubleshooting index + +[cols=",,",options="header",] +|=== +|Symptom |Most likely cause |Section +|Editor shows "model unavailable" / `agent-1 allowed:false` |`MODELS` not set or every entry skipped |<> +|`GET /v1/models/v1` returns 500 |Wrong compatibility version |<> +|Bedrock returns `NoValidApiKeysFoundError` |Relying on the AWS default credential chain |Bedrock +|Bedrock returns `AccessDeniedException` |Model access not enabled in console |Bedrock prerequisites +|Bedrock returns `ValidationException` on Claude 4 |Bare model ID used instead of cross-region inference profile |Bedrock +|Bedrock returns `INVALID_PAYMENT_INSTRUMENT` |Missing AWS Marketplace subscription for the model family |Bedrock prerequisites +|Vertex returns `invalid_grant` |`private_key` newline escaping mangled |Vertex +|Vertex returns `SERVICE_DISABLED` |aiplatform.googleapis.com not enabled |Vertex +|Azure returns `DeploymentNotFound` |``MODELS[].id`` not equal to deployment name |Azure +|Azure SDK errors about missing API version |`apiVersion` not set |Azure +|Ollama: "Not Found" in SSE |`baseUrl` missing `/v1` |OpenAI-compatible +|Ollama: "does not support tools" |Bare-GGUF Modelfile without chat template |OpenAI-compatible +|Ollama: reasoning toggle has no effect |Not supported through the openai-compatible adapter |OpenAI-compatible +|Self-hosted model times out at 180s |Default `LLM_TIMEOUT_MS` |OpenAI-compatible +|AI request hangs forever in browser |Reverse proxy buffering SSE |See xref:tinymceai-on-premises-production.adoc[Production deployment] for the reverse proxy and TLS section +|=== + +More general troubleshooting (database, JWT, storage, networking) lives in xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting]. + + + +== See also + +* xref:tinymceai-on-premises-getting-started.adoc[Getting started] -- initial container bring-up and demo +* xref:tinymceai-on-premises-jwt.adoc[JWT authentication] -- per-model and per-provider JWT permissions +* xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage] -- MySQL/Postgres configuration for the AI service +* xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting] -- general troubleshooting beyond provider configuration diff --git a/modules/ROOT/pages/tinymceai-on-premises-reference.adoc b/modules/ROOT/pages/tinymceai-on-premises-reference.adoc new file mode 100644 index 0000000000..9cfd897e98 --- /dev/null +++ b/modules/ROOT/pages/tinymceai-on-premises-reference.adoc @@ -0,0 +1,177 @@ += TinyMCE AI on-premises reference +:navtitle: Reference +:description: Environment variable, API endpoint, SSE, and error code reference for the TinyMCE AI on-premises service +:keywords: AI, on-premises, reference, API, environment variables, error codes + +== Environment variable reference + +Alphabetized. Required-ness is marked relative to a minimum working deployment. + +[cols=",,,",options="header",] +|=== +|Variable |Required |Default |Description +|`ALLOWED_ORIGINS` |Recommended |- |Comma-separated list of CORS-allowed editor origins. Required for cross-origin editor deployments. +|`DATABASE_DATABASE` |Yes |- |Database name (`ai_service` is the convention). +|`DATABASE_DRIVER` |Yes |- |`mysql` or `postgres`. +|`DATABASE_HOST` |Yes |- |Database hostname or IP. +|`DATABASE_PASSWORD` |Yes |- |Database password. +|`DATABASE_PORT` |No |3306 (MySQL) / 5432 (PostgreSQL) |Database port. +|`DATABASE_SCHEMA` |PostgreSQL only |`cs-on-premises` |PostgreSQL schema name. Pre-create or set to `public`. +|`DATABASE_SSL_CA` |No |- |Path to CA cert for database TLS. +|`DATABASE_SSL_CERT` |No |- |Path to client cert. +|`DATABASE_SSL_KEY` |No |- |Path to client key. +|`DATABASE_USER` |Yes |- |Database user. +|`ENABLE_METRIC_LOGS` |No |`false` |Emit JSON request logs to stdout. +|`ENVIRONMENTS_MANAGEMENT_SECRET_KEY` |Yes |- |Management Panel login secret. *Not* used to sign user JWTs. +|`LANGFUSE_BASE_URL` |No |`https://cloud.langfuse.com` |Self-hosted Langfuse URL. +|`LANGFUSE_DEBUG` |No |- |Verbose Langfuse logging. +|`LANGFUSE_PUBLIC_KEY` |If using Langfuse |- |Langfuse public key. +|`LANGFUSE_SECRET_KEY` |If using Langfuse |- |Langfuse secret key. +|`LICENSE_KEY` |Yes |- |AI service license key (long string from Tiny). +|`LLM_TELEMETRY_ENABLED` |No |`false` |Primary OpenTelemetry switch. +|`LLM_TIMEOUT_MS` |No |180000 |Per-request LLM timeout in ms. Raise for large self-hosted models. +|`MCP_SERVERS` |No |- |JSON object; MCP server configuration. See xref:tinymceai-on-premises-advanced.adoc[Advanced scenarios]. +|`MODELS` |Sometimes |- |JSON array; required for Azure / Bedrock / Vertex / openai-compatible. See xref:tinymceai-on-premises-providers.adoc[LLM providers]. +|`OTEL_DEBUG` |No |- |Verbose OTLP diagnostic logging. +|`OTEL_EXPORTER_OTLP_TRACES_ENDPOINT` |If using OTEL |- |OTLP traces endpoint URL. +|`OTEL_TRACES_SAMPLER_ARG` |No |`1.0` |OTLP sampling rate (0.0 to 1.0). +|`PROVIDERS` |Yes |- |JSON object; LLM provider configuration. See xref:tinymceai-on-premises-providers.adoc[LLM providers]. +|`REDIS_CLUSTER_NODES` |No |- |Comma-separated `host:port[:password]` for Redis Cluster mode. +|`REDIS_DB` |No |1 |Redis database number. +|`REDIS_HOST` |Yes |- |Redis hostname. +|`REDIS_IP_FAMILY` |No |- |Set to `6` for IPv6. +|`REDIS_PASSWORD` |No |- |Redis password. +|`REDIS_PORT` |No |6379 |Redis port. +|`REDIS_TLS_CA` |No |- |Path to CA cert for Redis TLS. +|`REDIS_TLS_CERT` |No |- |Path to Redis client cert. +|`REDIS_TLS_ENABLE` |No |`false` |Enable Redis TLS. +|`REDIS_TLS_KEY` |No |- |Path to Redis client key. +|`REDIS_USER` |No |- |Redis username (ACL). +|`STORAGE_ACCESS_KEY_ID` |If using S3 |- |S3 access key. +|`STORAGE_ACCOUNT_KEY` |If using Azure Blob |- |Azure storage account key. +|`STORAGE_ACCOUNT_NAME` |If using Azure Blob |- |Azure storage account name. +|`STORAGE_BUCKET` |If using S3 |- |S3 bucket name. +|`STORAGE_CONTAINER` |If using Azure Blob |- |Azure container name. +|`STORAGE_DRIVER` |Yes |- |`database`, `filesystem`, `s3`, or `azure`. +|`STORAGE_ENDPOINT` |No |- |Custom endpoint (S3-compatible or Azure-compatible). +|`STORAGE_LOCATION` |If using filesystem |- |Mount point for filesystem storage. Must be writable by the container user. +|`STORAGE_REGION` |If using S3 |- |S3 region. +|`STORAGE_SECRET_ACCESS_KEY` |If using S3 |- |S3 secret access key. +|`WEBRESOURCES_ENABLED` |No |`false` |Enable web scraping endpoint forwarding. +|`WEBRESOURCES_ENDPOINT` |If web resources enabled |- |Scraper URL. +|`WEBRESOURCES_REQUEST_TIMEOUT` |No |- |Scraper request timeout in ms. +|`WEBSEARCH_ENABLED` |No |`false` |Enable web search forwarding. +|`WEBSEARCH_ENDPOINT` |If web search enabled |- |Search URL. +|`WEBSEARCH_HEADERS` |No |- |JSON object; extra headers sent to the search endpoint. +|`WEBSEARCH_REQUEST_TIMEOUT` |No |- |Search request timeout in ms. +|=== + +== API endpoint reference + +[cols=",,,",options="header",] +|=== +|Method |Path |Auth |Description +|GET |`/health` |None |Liveness probe. Returns `{"serviceName":"on-premises-http","uptime":}`. Not metric-logged. +|GET |`/docs/` |None |ReDoc-rendered API documentation. +|GET |`/v1/api/doc.json` |None |OpenAPI 3 JSON spec. +|GET |`/panel/` |Management secret |Management Panel UI. Sign in with `ENVIRONMENTS_MANAGEMENT_SECRET_KEY`. +|GET |`/v1/models/1` |JWT |List available models for the current token. The compatibility version literal `1` is the only accepted value; `v1`, `v2`, `latest` all return 500. +|POST |`/v1/conversations` |JWT |Create a conversation. Body *must* include client-supplied `id`. +|GET |`/v1/conversations` |JWT |List conversations for the current `sub`. +|GET |`/v1/conversations/\{id}` |JWT |Read one conversation. +|POST |`/v1/conversations/\{id}/messages` |JWT |Send a message. Returns SSE stream. +|DELETE |`/v1/conversations/\{id}` |JWT |Delete a conversation. +|POST |`/v1/actions/\{actionId}` |JWT |Run a quick action. Body shape: `{"content":[{"type":"text","content":"..."}]}` (no `modelId`). +|POST |`/v1/reviews/\{reviewId}` |JWT |Run a review. +|=== + +NOTE: Environment management (create, read, update, delete) is handled through the Management Panel UI at `/panel/`. + +== Server-Sent Events reference + +The message endpoint returns `Content-Type: text/event-stream`. Events use named types: + +[cols=",,",options="header",] +|=== +|Event |Payload shape |Meaning +|`message-metadata` |`{"messageId":"..."}` |Sent once at the start of each message. +|`text-delta` |`{"textDelta":"..."}` |Incremental text fragment. The editor concatenates these. +|`tool-call` |`{"toolName":"...","arguments":{...}}` |Emitted when the model invokes an MCP tool. +|`tool-result` |`{"toolName":"...","result":{...}}` |Emitted when an MCP tool returns. +|`error` |`{"message":"...","cause":{...}}` |Provider error. HTTP status remains 200; the error is in-stream. +|`done` |`{}` |Sent once at the end of the stream. +|=== + +Healthy stream example: + +[source,text] +---- +event: message-metadata +data: {"messageId":"abc123"} + +event: text-delta +data: {"textDelta":"Hello "} + +event: text-delta +data: {"textDelta":"there!"} + +event: done +data: {} +---- + +Error stream example: + +[source,text] +---- +event: message-metadata +data: {"messageId":"abc123"} + +event: error +data: {"message":"Incorrect API key provided","cause":{"providerStatusCode":401}} +---- + +Browser client parsing notes: + +* Each event is two lines: `event: ` and `data: `, separated from the next event by a blank line. +* `data` is always valid JSON. +* Unknown `event` types carry informational payloads and can be ignored for forward compatibility. +* `text-delta` is the only event that contributes to the visible response body. + +== Error code reference + +Error codes returned in HTTP 4xx responses and inside SSE `event: error` payloads. + +[cols=",,,",options="header",] +|=== +|Code |Origin |Likely cause |Fix +|`invalid-jwt-signature` |JWT verifier |Wrong API Secret, or used `ENVIRONMENTS_MANAGEMENT_SECRET_KEY`, or signed with RS256 |Re-sign with HS256 using the correct API Secret +|`invalid-jwt-payload` |JWT verifier |`aud` does not match a real Environment ID, OR environment created through raw API not Panel UI |Re-copy env ID from `/panel/`, or recreate the env through the Panel UI +|`invalid-jwt` |JWT verifier |Token >60 s past `exp` |Issue tokens with shorter lifetime and refresh sooner +|`Environment not found` |AI runtime |Same as `invalid-jwt-payload` second sub-cause |Recreate env through Panel UI +|`missing-permissions` |Permission checker |`auth.ai.permissions` array does not cover the requested action |Add the missing permission string +|`invalid-request-data` |Input validator |Field validation failed (most commonly the 100,000 char prompt cap) |Fix the request body. See error message +|`environment-not-found` |AI runtime |Same as `Environment not found` |Recreate through Panel UI +|`conversation in use` |Conversation runtime |Stream-abort left stale state |Start a new conversation +|`conversation does not exist` |Conversation runtime |Follow-up to `conversation in use` |Start a new conversation +|`NoValidApiKeysFoundError` |Bedrock / Vertex adapter |Inline credentials missing |Inline `credentials` in `PROVIDERS` +|`AccessDeniedException` |Bedrock |Missing model access or IAM permissions |Enable Bedrock model access; attach the IAM policy from xref:tinymceai-on-premises-providers.adoc[LLM providers] +|`INVALID_PAYMENT_INSTRUMENT` |Bedrock |Anthropic on Bedrock without Marketplace subscription |Subscribe through AWS Marketplace +|`ValidationException` |Bedrock |Wrong model ID format (regional instead of cross-region) |Use the inference profile ID for Claude 4.x +|`DeploymentNotFound` |Azure |``MODELS[].id`` does not match Azure deployment name |Set ``MODELS[].id`` to the exact deployment name +|`invalid_grant` |Vertex |Mangled `private_key` newlines |Build `PROVIDERS` from `json.dumps()` of the SA key +|`SERVICE_DISABLED` |Vertex |`aiplatform.googleapis.com` not enabled |`gcloud services enable aiplatform.googleapis.com` +|`API_KEY_INVALID` |Vertex |Account-bound API key blocked by org policy |Grant policy exception +|`Incorrect API key provided` |OpenAI / Anthropic / Google |Bad API key |Update `PROVIDERS` and `--force-recreate` +|`Wrong license key.` |AI service startup |Truncated or whitespace-padded license key |Re-paste as a single line +|=== + +== Known limits + +[cols="1,1,3",options="header"] +|=== +|Limit |Value |Notes +|Maximum prompt length |100,000 characters |Hard limit enforced by the service. Summarize or shorten source content before it exceeds this threshold. +|File support (OpenAI-compatible providers) |Images only (`image/*`) |PDFs, text, and Office files are not forwarded to OpenAI-compatible providers. Use a non-OpenAI-compatible provider for non-image file attachments. +|MCP tool availability |Conversations only |MCP tools are not available in reviews or quick actions. +|MCP authentication |Single shared token per server |The `headers` field in `MCP_SERVERS` is fixed at deploy time. Per-user authentication is not supported. +|=== diff --git a/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc b/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc new file mode 100644 index 0000000000..6a8a8f1969 --- /dev/null +++ b/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc @@ -0,0 +1,310 @@ += Troubleshooting +:navtitle: Troubleshooting +:description: Troubleshooting guide for the TinyMCE AI on-premises service +:keywords: AI, on-premises, troubleshooting, errors, debugging + +Match the symptom to the fix below. If the symptom does not fit any section, escalate to `support@tiny.cloud` with the output of `docker logs ai-service --tail 200` and a redacted copy of the `PROVIDERS` value. + +== Quick triage + +[.text-center] +image::tinymceai-on-premises/troubleshooting-fig-1.svg[alt="Troubleshooting triage decision tree covering container health JWT and LLM connectivity failures",width=100%] + +[cols="1,1",options="header"] +|=== +|Symptom area |Go to +|Container will not start or exits during boot |<> +|Container is running, `/health` returns OK, but API calls fail |<> +|Conversation starts, but the SSE stream carries an `event: error` |<> +|Editor renders, but AI toolbar is missing, token fetch fails, or responses hang |<> +|Responses are slow or time out |<> +|Scaling, upgrades, or deployment questions |xref:tinymceai-on-premises-production.adoc[Production deployment] +|=== + + +[[container-startup-failures]] +== Container startup failures + +Run `docker logs ai-service` first. All entries below assume the log output is available. + +[cols="2,2,3",options="header"] +|=== +|Error / symptom |Cause |Fix + +|`Wrong license key.` +|Key was truncated, contains a line break, or has surrounding whitespace +|Paste the key as a single unbroken line. Verify the first and last eight characters against the original. + +|`EACCES: permission denied, mkdir '/var/storage'` +|`STORAGE_LOCATION` points to a path the container user cannot write +|Switch to `STORAGE_DRIVER=database`, or mount a writable volume and point `STORAGE_LOCATION` at it (for example `/tmp/ai-storage`). + +|`Not enough permissions to access database.` +|MySQL user lacks required privileges +|Grant the privileges listed in the error. See xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage] for the GRANT statement. + +|`schema "cs-on-premises" does not exist` +|Postgres schema not pre-created +|Run `CREATE SCHEMA "cs-on-premises";` (double quotes required), or set `DATABASE_SCHEMA=public`. See xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage]. + +|`[MY-000067] unknown variable 'default-authentication-plugin'` +|`mysql:8` tag now points to MySQL 8.4, which removed that variable +|Pin `mysql:8.0` in the compose file and run `docker compose up -d --force-recreate mysql`. + +|Container exits with no useful log +|Missing required env var, or malformed JSON in `PROVIDERS` / `MODELS` +|Run `docker inspect ai-service {vbar} jq '.[0].Config.Env'` and compare against the xref:tinymceai-on-premises-reference.adoc[environment variable reference]. Validate JSON with `echo "$PROVIDERS" {vbar} jq .` + +|`/health` times out despite successful boot +|Port mapping missing +|Add `-p 8000:8000` to `docker run`, or `ports: ["8000:8000"]` in compose. Confirm with `docker port ai-service`. + +|`getaddrinfo EAI_AGAIN mysql` or `getaddrinfo ENOTFOUND redis` +|AI service is on a different Docker network from the data layer +|Use `docker compose` (shared network), or set `DATABASE_HOST=host.docker.internal`. On Linux, add `extra_hosts: ["host.docker.internal:host-gateway"]` to the AI service. +|=== + + +[[api-and-jwt-authentication]] +== API and JWT authentication + +These assume the container is running and `/health` returns OK. + +[cols="2,3,3",options="header"] +|=== +|Error / symptom |Cause |Fix + +|`invalid-jwt-signature` +|Token signed with the wrong key. Most commonly, signed with `ENVIRONMENTS_MANAGEMENT_SECRET_KEY` instead of the per-access-key *API Secret* +|Re-copy the API Secret from the Management Panel at `/panel/` and re-sign the token. + +|`invalid-jwt-payload` +|`aud` claim does not match a known Environment ID, or `aud` is an array instead of a string +|Copy the Environment ID from `/panel/`. Ensure `aud` is a string, not an array. Recreate environments through the Panel UI only. + +|`invalid-jwt` (expired) +|Token is past its `exp` claim +|Issue tokens with a reasonable lifetime (for example `exp = now {plus} 3600`) and refresh before expiry. Synchronize clocks with NTP. + +|`Environment not found` +|Environment was not created through the Management Panel UI +|Delete and recreate the environment through `/panel/`. Update `AI_ENV_ID` in `.env`. + +|JWT silently rejected +|Token signed with RS256 instead of HS256 +|Re-sign with `algorithm: 'HS256'` and the API Secret. See xref:tinymceai-on-premises-jwt.adoc[JWT authentication]. + +|`allowed: false` on every endpoint +|`auth.ai.permissions` is a string, shorthand, or wrong shape +|Use the explicit array form. See the <> below. + +|`409 conversation in use` then `404 conversation does not exist` +|Stream abort left temporary state blocking the conversation +|Start a new conversation or reload the page. Custom UIs should create a fresh conversation after cancel. +|=== + +[[correct-permissions-shape]] +.Correct permissions shape +[%collapsible] +==== +[source,json] +---- +{ + "auth": { + "ai": { + "permissions": [ + "ai:conversations:*", + "ai:models:agent", + "ai:actions:system:*", + "ai:reviews:system:*" + ] + } + } +} +---- + +Common mistakes that produce `allowed: false`: `"permissions": "ai:admin"` (string shorthand), `"permissions": "*"`, `"useAllFeatures": true`, or a single permission as a string instead of an array. See xref:tinymceai-on-premises-jwt.adoc[JWT authentication] for the full permission catalog. +==== + + +[[llm-provider-errors]] +== LLM provider errors + +These appear as `event: error` inside the SSE stream. The HTTP response is still 200. + +=== Cloud providers (OpenAI, Anthropic, Google) + +[cols="2,3",options="header"] +|=== +|Error |Fix +|`Incorrect API key provided` |Update the key in `PROVIDERS`, then `docker compose up -d --force-recreate ai-service`. +|=== + +=== AWS Bedrock + +[cols="2,3",options="header"] +|=== +|Error |Fix +|`NoValidApiKeysFoundError` |Inline `accessKeyId` and `secretAccessKey` inside `credentials` in `PROVIDERS`. The AWS SDK default credential chain is not used. See xref:tinymceai-on-premises-providers.adoc[LLM providers]. +|`AccessDeniedException` |Enable model access in *Bedrock console -> Model access*. Attach an IAM policy with `bedrock:InvokeModel`, `bedrock:Converse`, and `bedrock:ConverseStream`. +|`INVALID_PAYMENT_INSTRUMENT` |Complete the AWS Marketplace subscription for Anthropic in *Bedrock console -> Model access -> Anthropic*. +|`ValidationException` (model invocation not supported) |Use the region-prefixed inference profile ID (for example `us.anthropic.claude-sonnet-4-...`). See xref:tinymceai-on-premises-providers.adoc[LLM providers]. +|=== + +=== Google Vertex AI + +[cols="2,3",options="header"] +|=== +|Error |Fix +|`NoValidApiKeysFoundError` |Inline `clientEmail` and `privateKey` inside `credentials` in `PROVIDERS`. Google ADC is not used. See xref:tinymceai-on-premises-providers.adoc[LLM providers]. +|Auth errors with a valid service account |`private_key` newlines were mangled during copy-paste. Build `PROVIDERS` with a script (`json.dumps()` on the SA JSON file) rather than hand-editing. +|`SERVICE_DISABLED` |Run `gcloud services enable aiplatform.googleapis.com --project=`. +|Blocked by GCP org policy |Check `iam.disableServiceAccountCreation`, `iam.disableServiceAccountKeyCreation`, and account-bound API key policies. Exempt the AI service project from all three. +|=== + +=== Azure OpenAI + +[cols="2,3",options="header"] +|=== +|Error |Fix +|Model not found / `DeploymentNotFound` |`MODELS[].id` must match the Azure deployment name exactly. +|API errors with no provider message |Set `apiVersion` explicitly. See https://learn.microsoft.com/azure/ai-services/openai/reference[Microsoft's API version matrix]. +|=== + +=== OpenAI-compatible (Ollama, vLLM, LM Studio) + +[cols="2,3",options="header"] +|=== +|Error |Fix +|"Not Found" in SSE error |`baseUrl` is missing the `/v1` suffix. Ollama default: `\http://host.docker.internal:11434/v1`. +|`ECONNREFUSED` on Linux |Start Ollama with `OLLAMA_HOST=0.0.0.0:11434 ollama serve`. Add `extra_hosts: ["host.docker.internal:host-gateway"]` to the AI service compose entry. +|"does not support tools" |Use an official model (`ollama pull qwen3:0.6b`) rather than a bare GGUF. Custom models need a Modelfile with `TEMPLATE` and tool support. See xref:tinymceai-on-premises-providers.adoc[LLM providers]. +|`createMessage` hangs ~180s then times out |Model is too slow for the default timeout. Set `LLM_TIMEOUT_MS` higher, use a lighter quantization, or use a smaller model. +|=== + + +[[editor-and-front-end]] +== Editor and front end + +Confirm `/health` is OK and a direct `curl` to `/v1/conversations` works before investigating the editor. + +[cols="2,3",options="header"] +|=== +|Symptom |Fix + +|No AI buttons in the toolbar +|Ensure TinyMCE 8{plus} is loaded, `plugins: 'tinymceai'` is set, and the toolbar string includes `tinymceai`. Verify the API key has the AI feature enabled. + +|Token fetch returns 401 +|The token endpoint's own authentication middleware is rejecting the request. Check session cookies, CORS credentials, and bearer tokens in the browser network tab. + +|Token returned but rejected by the AI service +|See <> above: wrong secret, wrong `aud`, wrong algorithm (RS256 instead of HS256), or wrong permissions shape. + +|AI responses hang in the browser +|The reverse proxy is buffering the SSE stream. Set `proxy_buffering off;` and `proxy_cache off;` in nginx (or the equivalent for the load balancer). + +|CORS error on `/v1/conversations` +|Add the editor's origin (scheme {plus} host {plus} port) to the `ALLOWED_ORIGINS` environment variable. + +|Editor renders then disappears (Next.js / Nuxt / SvelteKit) +|TinyMCE references `window` at load time. Load the editor client-only: `dynamic(() => import('./Editor'), { ssr: false })` in Next.js, `` in Nuxt, `onMount` in SvelteKit. See xref:tinymceai-on-premises-frameworks.adoc[Framework integration]. + +|`tinymceai_token_provider` called in a tight loop +|Token endpoint is returning an invalid JWT or non-JSON response. Test with `curl -X POST http://localhost:3000/api/ai-token` and verify the response is `pass:c[{"token":"eyJ..."}]`. +|=== + + +[[performance]] +== Performance + +[cols="2,3",options="header"] +|=== +|Symptom |Fix +|Self-hosted model is slow through the AI service compared with raw `curl` |Co-locate the inference server with the AI service. Use a smaller or more quantized model. Disable telemetry during development (`LLM_TELEMETRY_ENABLED=false`). +|Containers OOM or MySQL takes 60{plus} seconds to start (Colima) |Default Colima VM is too small. Run `colima stop && colima start --cpu 4 --memory 8 --disk 100`. +|=== + + +== Diagnostic recipes + +.Expand for copy-ready diagnostic commands +[%collapsible] +==== +*Tail logs:* + +[source,bash] +---- +docker logs ai-service --tail 200 -f +---- + +*Liveness check:* + +[source,bash] +---- +curl -fsS http://localhost:8000/health +---- + +*Decode a JWT (inspect payload without verifying):* + +[source,bash] +---- +python3 -c "import jwt,sys,json; print(json.dumps(jwt.decode(sys.argv[1], options={'verify_signature': False}), indent=2))" +---- + +*Recreate after an env change:* + +[source,bash] +---- +docker compose up -d --force-recreate ai-service +---- + +*Inspect effective environment:* + +[source,bash] +---- +docker inspect ai-service | jq '.[0].Config.Env' +---- + +*Validate PROVIDERS JSON:* + +[source,bash] +---- +echo "$PROVIDERS" | jq . +---- + +*Test data layer connectivity from inside the container:* + +[source,bash] +---- +docker compose exec ai-service /bin/sh -c "nc -zv mysql 3306" +docker compose exec ai-service /bin/sh -c "nc -zv redis 6379" +---- + +*End-to-end smoke test (token mint through streamed response):* + +[source,bash] +---- +TOKEN=$(curl -s -X POST http://localhost:3000/api/ai-token | jq -r '.token') + +curl -s -X POST http://localhost:8000/v1/conversations \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"id":"smoke-1","title":"Smoke test"}' + +curl -N -X POST http://localhost:8000/v1/conversations/smoke-1/messages \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"prompt":"Say hi in five words.","model":"agent-1"}' +---- +==== + + +== Related pages + +* xref:tinymceai-on-premises-getting-started.adoc[Getting started] +* xref:tinymceai-on-premises-jwt.adoc[JWT authentication] +* xref:tinymceai-on-premises-providers.adoc[LLM providers] +* xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage] +* xref:tinymceai-on-premises-frameworks.adoc[Framework integration] +* xref:tinymceai-on-premises-production.adoc[Production deployment] diff --git a/modules/ROOT/pages/tinymceai-on-premises.adoc b/modules/ROOT/pages/tinymceai-on-premises.adoc new file mode 100644 index 0000000000..3698a06238 --- /dev/null +++ b/modules/ROOT/pages/tinymceai-on-premises.adoc @@ -0,0 +1,145 @@ += TinyMCE AI On-Premise +:navtitle: Overview +:description: Self-hosted AI writing assistance for TinyMCE; architecture, prerequisites, and topic guide +:keywords: AI, on-premises, self-hosted, deployment, overview + +The TinyMCE AI on-premises service is a self-hosted back end that powers AI writing assistance inside the TinyMCE rich text editor. It runs entirely within the host infrastructure. Editor content, conversation history, file attachments, and user data never leave the network. + +The service ships as a single OCI container image (`registry.containers.tiny.cloud/ai-service`). It exposes a REST API, a Management Panel, Server-Sent Events streaming, and an OpenAPI spec. + +== Architecture + +[.text-center] +image::tinymceai-on-premises/complete-guide-fig-1.svg[alt="Service architecture showing browser with TinyMCE token endpoint AI service database Redis and LLM providers",width=100%] + +Data flow for a single AI request: + +[arabic] +. The browser loads TinyMCE with the `tinymceai` plugin. +. The user triggers an AI feature (chat, review, quick action). +. The plugin calls the token endpoint, which signs an HS256 JWT with the API Secret. +. The plugin sends the JWT and prompt to the AI service. +. The AI service verifies the token, checks per-feature permissions, and forwards the prompt to the configured LLM. +. The LLM streams its response back to the browser through Server-Sent Events. + +The shared secret (API Secret) never leaves the back end; the editor and the AI service only ever see signed tokens. + +== Prerequisites + +[cols="1,3",options="header"] +|=== +|Requirement |Details + +|Container runtime +|Docker 20.10{plus}, Podman 4{plus}, or any OCI-compatible runtime. Kubernetes, AWS ECS, or Azure Container Apps are also supported. + +|SQL database +|MySQL 8.0 or PostgreSQL 13{plus} (16 recommended). + +|Redis +|3.2.6{plus} (7.x recommended). Single node, Sentinel, or Cluster mode. + +|LLM access +|At least one provider. Multiple providers can coexist. + +|TinyMCE 8.0{plus} +|The `tinymceai` plugin is a premium plugin. + +|License key and registry credentials +|Provided by a Tiny account representative. + +|Token endpoint +|A back end that signs HS256 JWTs. + +|Reverse proxy +|The AI service does not terminate TLS. Use nginx, HAProxy, or a cloud load balancer. +|=== + +== Choosing a setup path + +[.text-center] +image::tinymceai-on-premises/complete-guide-fig-2.svg[Setup path decision tree,width=100%] + +Each path carries the same level of documentation. After identifying which path fits the operational model, complete its topic guides in the order listed. + +== Topic guides + +For a first-time deployment, progress through the guides in order. Each topic guide also stands alone when only one area applies. + +[cols="1,3",options="header"] +|=== +|Guide |Scope + +|xref:tinymceai-on-premises-getting-started.adoc[Getting started] +|Five-minute Docker Compose quick start. Stand up the AI service, database, Redis, token server, and a browser editor. + +|xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage] +|MySQL and PostgreSQL setup, Redis configuration, container runtimes (Docker, Podman, Kubernetes, ECS), and reverse proxy with TLS. + +|xref:tinymceai-on-premises-providers.adoc[LLM providers] +|OpenAI, Anthropic, Google Gemini, Azure OpenAI, AWS Bedrock, Google Vertex AI, and self-hosted endpoints (Ollama, vLLM, LM Studio). Custom model catalog and API key rotation. + +|xref:tinymceai-on-premises-jwt.adoc[JWT authentication] +|HS256 signing model, required and optional claims, permissions reference, and token endpoint examples in 8 languages. + +|xref:tinymceai-on-premises-frameworks.adoc[Framework integration] +|Editor-side configuration: plugin options, token provider, authentication patterns, CORS, and deployment checklists. + +|xref:tinymceai-on-premises-production.adoc[Production deployment] +|Kubernetes manifests, AWS ECS task definitions, horizontal scaling, sizing, security hardening, rate limiting, observability, backup and recovery, and upgrades. + +|xref:tinymceai-on-premises-advanced.adoc[Advanced scenarios] +|MCP server integration, web scraping and search, multi-tenant patterns, custom models with guardrails, and AI-powered document pipelines. + +|xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting] +|Quick triage, container startup failures, JWT errors, LLM provider errors, editor issues, performance, and diagnostic recipes. + +|xref:tinymceai-on-premises-reference.adoc[Reference] +|Environment variable reference, API endpoint reference, Server-Sent Events reference, and error code reference. +|=== + +== Support + +* *Technical support:* https://support.tiny.cloud[Submit a support request] (available to customers with an active commercial license). +* *Account and licensing:* https://www.tiny.cloud/contact/[Contact Tiny]. + +When submitting a support request, include: + +Container logs:: ++ +[source,console] +---- +docker logs ai-service --tail 200 +---- + +Effective environment:: +Redact secrets before submitting. ++ +[source,console] +---- +docker inspect ai-service | jq '.[0].Config.Env' +---- + +Health check:: ++ +[source,console] +---- +curl -fsS http://localhost:8000/health +---- ++ +Expected response: ++ +[source,json] +---- +{"serviceName":"on-premises-http","uptime":1234} +---- + +Decoded JWT payload:: +Strip the signature and decode with a JWT library. + +Image version:: ++ +[source,console] +---- +docker inspect ai-service | jq '.[0].Config.Image' +---- From 8bb31625c11fa7389095e66c924814af7939c8a1 Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Wed, 13 May 2026 14:55:27 +1000 Subject: [PATCH 02/48] DOC-3498: Address content gaps identified in source audit Add missing customer-facing content identified by comparing the original internal documentation against the current on-premises AsciiDoc pages: capabilities matrix on the overview page, Podman production runbook, performance characteristics table, expanded known limits reference, MySQL 8.4 caveat, Ollama systemd and Modelfile examples, and getting-started teardown and config update guidance. --- .../pages/tinymceai-on-premises-database.adoc | 2 +- ...tinymceai-on-premises-getting-started.adoc | 39 ++++++++++++ .../tinymceai-on-premises-production.adoc | 62 +++++++++++++++++++ .../tinymceai-on-premises-providers.adoc | 34 +++++++++- .../tinymceai-on-premises-reference.adoc | 13 +++- modules/ROOT/pages/tinymceai-on-premises.adoc | 43 +++++++++++++ 6 files changed, 190 insertions(+), 3 deletions(-) diff --git a/modules/ROOT/pages/tinymceai-on-premises-database.adoc b/modules/ROOT/pages/tinymceai-on-premises-database.adoc index 8b61052ddb..32bb045509 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-database.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-database.adoc @@ -95,7 +95,7 @@ Do *not* use `mysql:8`. That tag now floats to MySQL 8.4, which removes the `def [ERROR] [MY-010119] [Server] Aborting .... -Pin to `mysql:8.0` in every manifest: `docker run`, Docker Compose, Kubernetes, Helm, ECS. +Pin to `mysql:8.0` in every manifest: `docker run`, Docker Compose, Kubernetes, Helm, ECS. Running MySQL 8.4 with workarounds (removing the flag and switching to `caching_sha2_password`) is not a supported configuration. TIP: The same principle applies to PostgreSQL. Pin `postgres:16` rather than `postgres:latest`. diff --git a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc index 84d6af7f96..10b121979c 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc @@ -426,3 +426,42 @@ data: {} If the stream emits `event: error`, inspect the `data` payload. Provider errors (invalid API key, IAM denial, model unavailable) ride inside the SSE response. The HTTP status stays 200. See the xref:tinymceai-on-premises-troubleshooting.adoc[LLM provider errors] section in the Troubleshooting guide for details. A successful round-trip confirms: container health, database connectivity, Redis connectivity, JWT signing, JWT verification, permissions checking, environment registration, LLM provider authentication, and SSE streaming. If problems persist after these checks, focus on the editor configuration next. + +== Updating configuration + +IMPORTANT: `docker compose restart` after `.env` changes silently keeps the old environment values. The restart preserves the container and does not re-read `.env`. Always use `docker compose up -d --force-recreate` instead. + +[source,bash] +---- +docker compose up -d --force-recreate +# Or recreate only the AI service: +docker compose up -d --force-recreate ai-service +---- + +For Kubernetes, update the Secret and trigger a rollout restart: + +[source,bash] +---- +kubectl rollout restart deployment/ai-service -n tinymce-ai +---- + +== Stopping and cleaning up + +[source,bash] +---- +# Stop the AI service (standalone Docker) +docker stop ai-service && docker rm ai-service + +# Stop the Docker Compose stack +docker compose down + +# Remove all data including volumes (destructive) +docker compose down -v +---- + +For Kubernetes, scale the deployment to zero or delete it. Persistent volumes for the database are retained unless explicitly deleted. + +[source,bash] +---- +kubectl delete deployment ai-service -n tinymce-ai +---- diff --git a/modules/ROOT/pages/tinymceai-on-premises-production.adoc b/modules/ROOT/pages/tinymceai-on-premises-production.adoc index 5b685f3f49..63438fe85a 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-production.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-production.adoc @@ -83,6 +83,41 @@ When deploying for the first time or upgrading to a new version, start a single +== Podman deployment + +The AI service works with Podman as an alternative to Docker. In Podman, containers within a pod share a network namespace, so use `127.0.0.1` instead of container names for hostnames. + +[source,bash] +---- +podman login -u 'TINY_REGISTRY_USERNAME' registry.containers.tiny.cloud + +podman pull registry.containers.tiny.cloud/ai-service:latest + +podman pod create --name ai-pod -p 8000:8000 -p 3306:3306 -p 6379:6379 + +podman run -d --pod ai-pod --name mysql \ + -e MYSQL_ROOT_PASSWORD=ROOT_PASSWORD \ + -e MYSQL_DATABASE=ai_service \ + mysql:8.0 + +podman run -d --pod ai-pod --name redis redis:7 + +podman run --init -d --pod ai-pod --name ai-service \ + -e LICENSE_KEY='T8LK:...' \ + -e ENVIRONMENTS_MANAGEMENT_SECRET_KEY='MANAGEMENT_SECRET' \ + -e DATABASE_DRIVER='mysql' \ + -e DATABASE_HOST='127.0.0.1' \ + -e DATABASE_USER='root' \ + -e DATABASE_PASSWORD='ROOT_PASSWORD' \ + -e DATABASE_DATABASE='ai_service' \ + -e REDIS_HOST='127.0.0.1' \ + -e PROVIDERS='{"openai":{"type":"openai","apiKeys":["sk-proj-..."]}}' \ + -e STORAGE_DRIVER='database' \ + registry.containers.tiny.cloud/ai-service:latest +---- + +IMPORTANT: Pin to `mysql:8.0`. The `mysql:8` tag floats to MySQL 8.4, which removes the `default-authentication-plugin` flag and causes a crash loop. See xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage] for details. + == Kubernetes deployment === Namespace and image pull secret @@ -562,6 +597,33 @@ License keys are per-deployment, not per-replica. One key covers any number of r +== Performance characteristics + +[cols="1,1",options="header"] +|=== +|Metric |Typical value + +|Cold start +|Approximately 3 seconds + +|Health check response +|Less than 10 ms + +|Token validation +|Less than 5 ms + +|Time to first token (LLM) +|200 ms to 2 s (depends on provider and model) + +|Memory per instance +|256 to 512 MB + +|Concurrent connections +|1,000{plus} per instance +|=== + +These values are approximate and vary with hardware, provider latency, and prompt complexity. The LLM provider's rate limits are typically the binding constraint before the AI service becomes one. + == Sizing guide [cols=",,,,",options="header",] diff --git a/modules/ROOT/pages/tinymceai-on-premises-providers.adoc b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc index f7ababd98a..425066ed95 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-providers.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc @@ -319,7 +319,7 @@ Azure-hosted OpenAI models. Requires an Azure subscription, an Azure OpenAI reso |`type` |Yes |Literal `"azure"` |`resourceName` |Yes |The `*.openai.azure.com` prefix only, not the full URL. |`apiKeys` |Yes |Array. Azure issues two keys per resource for zero-downtime key rotation. -|`apiVersion` |Yes |Always set explicitly. Refer to https://learn.microsoft.com/azure/ai-services/openai/reference[Microsoft's API version matrix] for current values. +|`apiVersion` |Yes |Always set explicitly. Omitting it produces a confusing SDK error about a missing query string parameter. Refer to https://learn.microsoft.com/azure/ai-services/openai/reference[Microsoft's API version matrix] for current stable values. |=== IMPORTANT: The ``MODELS[].id`` value must match the Azure *deployment name* exactly. A mismatch produces a `DeploymentNotFound` error. Use human-readable deployment names because the ID also appears in JWT permission strings and the editor model picker. @@ -682,6 +682,17 @@ Ollama listens on `127.0.0.1:11434` by default, which is unreachable from inside OLLAMA_HOST=0.0.0.0:11434 ollama serve ---- +On Linux with systemd, create an override file instead: + +[source,ini] +---- +# /etc/systemd/system/ollama.service.d/override.conf +[Service] +Environment="OLLAMA_HOST=0.0.0.0:11434" +---- + +Then reload and restart: `sudo systemctl daemon-reload && sudo systemctl restart ollama`. + On Linux, add the host gateway so `host.docker.internal` resolves: [source,yaml] @@ -695,6 +706,27 @@ services: If Ollama returns "does not support tools", the model was built from a raw GGUF without a chat template. Use `ollama pull` for a Library model that includes a proper Modelfile, or author a custom one. +.Custom Modelfile example +[%collapsible] +==== +[source] +---- +FROM /path/to/your-model.gguf + +TEMPLATE """{{ if .System }}<|im_start|>system +{{ .System }}<|im_end|> +{{ end }}{{ range .Messages }}<|im_start|>{{ .Role }} +{{ .Content }}<|im_end|> +{{ end }}<|im_start|>assistant +""" + +PARAMETER stop "<|im_end|>" +PARAMETER stop "<|im_start|>" +---- + +The exact template depends on the base model. Check the model card for the recommended chat template. Verify tool support with `ollama show ` before connecting to the AI service. +==== + The reasoning toggle (`capabilities.reasoning: true`) is cosmetic for Ollama-backed models; the openai-compatible adapter does not translate it to the native Ollama API. *Timeout:* diff --git a/modules/ROOT/pages/tinymceai-on-premises-reference.adoc b/modules/ROOT/pages/tinymceai-on-premises-reference.adoc index 9cfd897e98..17ecc864ec 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-reference.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-reference.adoc @@ -170,8 +170,19 @@ Error codes returned in HTTP 4xx responses and inside SSE `event: error` payload [cols="1,1,3",options="header"] |=== |Limit |Value |Notes -|Maximum prompt length |100,000 characters |Hard limit enforced by the service. Summarize or shorten source content before it exceeds this threshold. + +|Maximum prompt length |100,000 characters |Hard limit enforced by the service. Requests exceeding this return `invalid-request-data`. Summarize or shorten source content before it exceeds this threshold. +|Conversation create |Client-supplied `id` required |The plugin auto-generates `tiny-`. Raw API callers must supply a unique `id` in the create body. +|Stream-abort recovery |Stop button leaves stale state |The next message returns `409 conversation in use` then `404 conversation does not exist`. Recovery: start a new conversation or reload. +|Built-in rate limiting |None |Front the service with nginx `limit_req` or ALB rate-limit rules. See xref:tinymceai-on-premises-production.adoc#rate-limiting[Rate limiting]. |File support (OpenAI-compatible providers) |Images only (`image/*`) |PDFs, text, and Office files are not forwarded to OpenAI-compatible providers. Use a non-OpenAI-compatible provider for non-image file attachments. |MCP tool availability |Conversations only |MCP tools are not available in reviews or quick actions. |MCP authentication |Single shared token per server |The `headers` field in `MCP_SERVERS` is fixed at deploy time. Per-user authentication is not supported. +|PostgreSQL default schema |`cs-on-premises` (with hyphen) |Pre-create with `CREATE SCHEMA "cs-on-premises";` or set `DATABASE_SCHEMA=public`. +|`/v1/models/\{compatibilityVersion}` |Only accepts `1` |Values such as `v1`, `v2`, or `latest` return 500. +|Environment creation through raw API |Not supported |Always create environments through the Management Panel UI. +|Bedrock credentials |Inline only |The SDK default credential chain (IRSA, instance roles, `AWS_PROFILE`) is not used. +|Vertex credentials |Inline only |Application Default Credentials, `GOOGLE_APPLICATION_CREDENTIALS`, and the metadata server are not used. +|Azure `MODELS[].id` |Must equal deployment name |There is no separate `deploymentName` field. The ID is the deployment name. +|OpenAI-compatible `baseUrl` |Must include `/v1` suffix |Omitting it produces a "Not Found" SSE error. |=== diff --git a/modules/ROOT/pages/tinymceai-on-premises.adoc b/modules/ROOT/pages/tinymceai-on-premises.adoc index 3698a06238..6909153808 100644 --- a/modules/ROOT/pages/tinymceai-on-premises.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises.adoc @@ -24,6 +24,49 @@ Data flow for a single AI request: The shared secret (API Secret) never leaves the back end; the editor and the AI service only ever see signed tokens. +== Capabilities + +[cols="1,2",options="header"] +|=== +|Capability |Details + +|Conversational AI assistant +|Multi-turn chat sidebar. Conversation history is isolated per user through the JWT `sub` claim. + +|Document review +|Correctness, clarity, readability, tone, and translation. + +|Quick actions +|Rewrite, summarize, expand, change tone, fix grammar, translate, continue, and improve writing. + +|LLM provider flexibility +|OpenAI, Anthropic, Google Gemini, Azure OpenAI, AWS Bedrock, Google Vertex AI, or any self-hosted OpenAI-compatible endpoint. Multiple providers can coexist. + +|MCP integration +|Connect internal tools, databases, and knowledge bases through Model Context Protocol over Streamable HTTP transport. + +|Web scraping and web search +|Pluggable endpoints for fetching web pages and running searches. + +|Multi-tenant environments +|Isolated conversation history and per-tenant access keys through Environments. + +|Per-user, per-feature permissions +|Fine-grained control through the `auth.ai.permissions` JWT claim. + +|Streaming responses +|Server-Sent Events from the LLM back to the browser. + +|File attachments +|Database, filesystem, Amazon S3, or Azure Blob Storage. + +|Observability +|Structured request logs, OpenTelemetry, and Langfuse. All three run as independent simultaneous pipelines. + +|Horizontal scaling +|The service is stateless. Share identical environment configuration across replicas. +|=== + == Prerequisites [cols="1,3",options="header"] From deae589ded377732ca3280cd658d5330d82fa05b Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Wed, 13 May 2026 15:08:21 +1000 Subject: [PATCH 03/48] DOC-3498: Expand acronyms on first prose use across on-premises pages Expand 18 acronyms (OCI, JWT, LLM, SSE, TLS, CORS, MCP, NTP, HPA, OTLP, IRSA, ADC, SSR, CSP, SIEM, PII, HA, mTLS) on first prose occurrence per page for readers unfamiliar with the terms. --- .../pages/tinymceai-on-premises-advanced.adoc | 4 ++-- .../pages/tinymceai-on-premises-database.adoc | 4 ++-- .../tinymceai-on-premises-frameworks.adoc | 12 ++++++------ .../tinymceai-on-premises-getting-started.adoc | 6 +++--- .../ROOT/pages/tinymceai-on-premises-jwt.adoc | 8 ++++---- .../tinymceai-on-premises-production.adoc | 18 +++++++++--------- .../pages/tinymceai-on-premises-providers.adoc | 10 +++++----- .../pages/tinymceai-on-premises-reference.adoc | 16 ++++++++-------- .../tinymceai-on-premises-troubleshooting.adoc | 12 ++++++------ modules/ROOT/pages/tinymceai-on-premises.adoc | 12 ++++++------ 10 files changed, 51 insertions(+), 51 deletions(-) diff --git a/modules/ROOT/pages/tinymceai-on-premises-advanced.adoc b/modules/ROOT/pages/tinymceai-on-premises-advanced.adoc index 425273c777..e0a9aee863 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-advanced.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-advanced.adoc @@ -150,7 +150,7 @@ The assistant calls the `search_knowledge_base` tool, retrieves the relevant pol == Multi-tenant SaaS platform -*Use case:* A SaaS platform provides AI writing features to customers. Each customer gets isolated conversations, separate LLM budgets, and per-tenant configuration. +*Use case:* A SaaS platform provides AI writing features to customers. Each customer gets isolated conversations, separate large language model (LLM) budgets, and per-tenant configuration. === Architecture @@ -171,7 +171,7 @@ Each environment provides: * Customer B -> Environment `env-customer-b` * Customer C -> Environment `env-customer-c` -. *Token server generates JWTs with the correct environment:* +. *Token server generates JSON Web Tokens (JWTs) with the correct environment:* + .Multi-tenant JWT generation [%collapsible] diff --git a/modules/ROOT/pages/tinymceai-on-premises-database.adoc b/modules/ROOT/pages/tinymceai-on-premises-database.adoc index 32bb045509..215dd1759e 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-database.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-database.adoc @@ -4,7 +4,7 @@ :keywords: AI, on-premises, database, MySQL, PostgreSQL, Redis, Docker, Podman, file storage, S3, Azure Blob This page covers the data layer: the SQL database, Redis, and file storage. -For container runtimes, reverse proxies, TLS, Kubernetes, and ECS deployment, see the xref:tinymceai-on-premises-production.adoc[Production deployment guide]. +For container runtimes, reverse proxies, Transport Layer Security (TLS), Kubernetes, and ECS deployment, see the xref:tinymceai-on-premises-production.adoc[Production deployment guide]. == Supported versions @@ -439,7 +439,7 @@ docker run --add-host=host.docker.internal:host-gateway ... == Redis -Every AI service instance must reach Redis. Redis holds session coordination, SSE delivery, and rate-limiting state. A temporary Redis outage degrades streaming but does not destroy persistent data. +Every AI service instance must reach Redis. Redis holds session coordination, Server-Sent Events (SSE) delivery, and rate-limiting state. A temporary Redis outage degrades streaming but does not destroy persistent data. === Setup diff --git a/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc b/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc index 0bb1be316d..6990af476b 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc @@ -7,10 +7,10 @@ This page covers the *editor-side* configuration that connects TinyMCE to the on-premises AI service. It assumes: * The AI service is already running. See xref:tinymceai-on-premises-getting-started.adoc[Getting started] for setup instructions. -* A token endpoint exists that signs JWTs for the AI service. See xref:tinymceai-on-premises-jwt.adoc[JWT authentication] for back-end implementations. +* A token endpoint exists that signs JSON Web Tokens (JWTs) for the AI service. See xref:tinymceai-on-premises-jwt.adoc[JWT authentication] for back-end implementations. * The TinyMCE API key has the AI feature enabled. Retrieve or upgrade a key at https://www.tiny.cloud/my-account/integrate/. -For general framework setup (installing wrappers, component structure, SSR patterns), see the existing integration guides: +For general framework setup (installing wrappers, component structure, server-side rendering (SSR) patterns), see the existing integration guides: * xref:react-cloud.adoc[React] * xref:vue-cloud.adoc[Vue.js] @@ -151,7 +151,7 @@ This pattern avoids cookies entirely and works well for cross-origin setups. == Cross-origin requests to the AI service -When `tinymceai_service_url` points to a different origin from the page (the common production case), the AI service must return CORS headers permitting the editor origin. The service reads the `ALLOWED_ORIGINS` environment variable for this. +When `tinymceai_service_url` points to a different origin from the page (the common production case), the AI service must return Cross-Origin Resource Sharing (CORS) headers permitting the editor origin. The service reads the `ALLOWED_ORIGINS` environment variable for this. To verify CORS from a terminal: @@ -167,7 +167,7 @@ The response should include `Access-Control-Allow-Origin: \https://app.yourcompa -== Content Security Policy +== Content Security Policy (CSP) If the application sets a `Content-Security-Policy` header, allow the AI service origin in `connect-src`: @@ -197,7 +197,7 @@ If using the Tiny CDN instead of self-hosted assets, also add `\https://cdn.tiny |Confirm the fetch sends the session cookie (`credentials: 'include'`) or `Authorization` header that the back end expects. |AI responses hang then time out -|Reverse proxy is buffering SSE +|Reverse proxy is buffering Server-Sent Events (SSE) |Disable proxy buffering. See xref:tinymceai-on-premises-production.adoc[Production deployment]. |Browser console shows a CORS error on `/v1/conversations` @@ -217,6 +217,6 @@ For other issues, see xref:tinymceai-on-premises-troubleshooting.adoc[Troublesho * xref:tinymceai-on-premises-getting-started.adoc[Getting started] * xref:tinymceai-on-premises-jwt.adoc[JWT authentication] -* xref:tinymceai-on-premises-providers.adoc[LLM providers] +* xref:tinymceai-on-premises-providers.adoc[large language model (LLM) providers] * xref:tinymceai-on-premises-production.adoc[Production deployment] * xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting] diff --git a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc index 10b121979c..487554c0a4 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc @@ -238,7 +238,7 @@ Always create environments through the Management Panel UI. Environments created === Create the token server -The token server signs JWTs for the editor. The Node.js example below is for the demo only; the xref:tinymceai-on-premises-jwt.adoc[JWT authentication] guide contains production-ready endpoints in 8 languages (Node, Django, Flask, Laravel, Rails, .NET, Go, Spring Boot). +The token server signs JSON Web Tokens (JWTs) for the editor. The Node.js example below is for the demo only; the xref:tinymceai-on-premises-jwt.adoc[JWT authentication] guide contains production-ready endpoints in 8 languages (Node, Django, Flask, Laravel, Rails, .NET, Go, Spring Boot). Create `package.json`: @@ -351,7 +351,7 @@ npm start === Open the demo -Open *http://localhost:3000* in a browser. The editor loads with the AI toolbar. Select text and try the AI features. Responses stream in real time from the chosen LLM provider, processed entirely within the local infrastructure. +Open *http://localhost:3000* in a browser. The editor loads with the AI toolbar. Select text and try the AI features. Responses stream in real time from the chosen large language model (LLM) provider, processed entirely within the local infrastructure. The TinyMCE AI on-premises service is now running. @@ -423,7 +423,7 @@ event: done data: {} ---- -If the stream emits `event: error`, inspect the `data` payload. Provider errors (invalid API key, IAM denial, model unavailable) ride inside the SSE response. The HTTP status stays 200. See the xref:tinymceai-on-premises-troubleshooting.adoc[LLM provider errors] section in the Troubleshooting guide for details. +If the stream emits `event: error`, inspect the `data` payload. Provider errors (invalid API key, IAM denial, model unavailable) ride inside the Server-Sent Events (SSE) response. The HTTP status stays 200. See the xref:tinymceai-on-premises-troubleshooting.adoc[LLM provider errors] section in the Troubleshooting guide for details. A successful round-trip confirms: container health, database connectivity, Redis connectivity, JWT signing, JWT verification, permissions checking, environment registration, LLM provider authentication, and SSE streaming. If problems persist after these checks, focus on the editor configuration next. diff --git a/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc b/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc index 909fa9401e..7c88f42535 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc @@ -3,7 +3,7 @@ :description: JWT authentication for the TinyMCE AI on-premises service using HS256 symmetric signing :keywords: AI, on-premises, JWT, authentication, HS256 -The on-premises AI service uses *HS256* (HMAC-SHA256, symmetric shared secret) for JWT authentication. This is different from the Tiny Cloud AI service, which uses RS256. +The on-premises AI service uses *HS256* (HMAC-SHA256, symmetric shared secret) for JSON Web Token (JWT) authentication. This is different from the Tiny Cloud AI service, which uses RS256. [WARNING] -- @@ -186,7 +186,7 @@ Authorization: Bearer eyJhbGciOiJIUzI1NiIs... === Clock-skew leeway -The service allows up to 60 seconds of clock skew on the `exp` claim. Keep the token server and the AI service synchronized with NTP. +The service allows up to 60 seconds of clock skew on the `exp` claim. Keep the token server and the AI service synchronized with Network Time Protocol (NTP). @@ -868,7 +868,7 @@ When debugging, start here. Most "auth failures" reflect wrong claim values rath |`allowed: false` on specific endpoints only |Missing the specific permission |Decode token, check the `auth.ai.permissions` array against the table above. |Token silently rejected, no decoded error |RS256 signature |Re-sign with HS256. |`aud` claim type mismatch |`aud` issued as array instead of string |Some JWT libraries default to array `aud`. Force string. -|Editor shows "Failed to authenticate" |Token endpoint returned non-JSON, returned `token` as nested object, or CORS blocked the request |Open browser devtools → Network → inspect the response from `/api/ai-token`. +|Editor shows "Failed to authenticate" |Token endpoint returned non-JSON, returned `token` as nested object, or Cross-Origin Resource Sharing (CORS) blocked the request |Open browser devtools → Network → inspect the response from `/api/ai-token`. |=== === Sanity-check a token manually @@ -906,6 +906,6 @@ Short-lived tokens limit exposure if a token leaks through a browser extension, == See also * xref:tinymceai-on-premises-getting-started.adoc[Getting started] -- end-to-end deployment, including a demo token server -* xref:tinymceai-on-premises-providers.adoc[LLM providers] -- configuring custom models through `MODELS` and the `ai:models::` permission syntax +* xref:tinymceai-on-premises-providers.adoc[large language model (LLM) providers] -- configuring custom models through `MODELS` and the `ai:models::` permission syntax * xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting] -- full troubleshooting catalog beyond JWT * xref:tinymceai-on-premises-frameworks.adoc[Framework integration] -- editor-side integration patterns for React, Vue, and Angular, including `tinymceai_token_provider` wrappers diff --git a/modules/ROOT/pages/tinymceai-on-premises-production.adoc b/modules/ROOT/pages/tinymceai-on-premises-production.adoc index 63438fe85a..a3806dd277 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-production.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-production.adoc @@ -17,7 +17,7 @@ The AI service is stateless, persists all state to MySQL/PostgreSQL and Redis, a == TLS / HTTPS -The AI service does not terminate TLS. Place a reverse proxy in front. +The AI service does not terminate Transport Layer Security (TLS). Place a reverse proxy in front. === Nginx example @@ -48,7 +48,7 @@ server { [IMPORTANT] -- -SSE streaming requires `proxy_buffering off`. Without it, AI responses appear to hang until the entire response is generated. +Server-Sent Events (SSE) streaming requires `proxy_buffering off`. Without it, AI responses appear to hang until the entire response is generated. -- === AWS ALB @@ -383,7 +383,7 @@ spec: [cols=",",options="header",] |=== |Service |AWS recommendation -|Database |RDS for MySQL 8.0 (Multi-AZ for HA) +|Database |RDS for MySQL 8.0 (Multi-AZ for high availability (HA)) |Redis |ElastiCache for Redis 7 (cluster mode) |Storage |Same-region S3 bucket |Load balancer |ALB with `/health` target health check, 300 s idle timeout @@ -400,16 +400,16 @@ spec: |Practice |Implementation |Network isolation |Place the AI service in a private subnet; expose only through a load balancer. Restrict database and Redis to the AI service security group. |Block panel from the public internet |Restrict `/panel/` to an admin VPN or IP allowlist. The panel manages secrets and access keys. -|TLS everywhere |Terminate TLS 1.3 at the reverse proxy. Use internal mTLS between the AI service and the data layer where supported. +|TLS everywhere |Terminate TLS 1.3 at the reverse proxy. Use internal mutual TLS (mTLS) between the AI service and the data layer where supported. |Secrets management |Use Vault, AWS Secrets Manager, Azure Key Vault, or GCP Secret Manager. Never store secrets directly in orchestration manifests or commit them to source control. |Database encryption at rest |Turn on encryption at rest in the cloud provider console. RDS, Cloud SQL, and Azure Database enable this by default. |Redis authentication |Always set `REDIS_PASSWORD` (or use a managed Redis instance with authentication enabled). |Container security |Run as non-root, use a read-only filesystem where possible, and drop unnecessary Linux capabilities. |Image scanning |Scan `registry.containers.tiny.cloud/ai-service` with Trivy, Snyk, or the registry's built-in scanner. -|Least-privilege JWTs |Grant only the permissions each user role requires. Avoid full-access tokens in production. +|Least-privilege JSON Web Tokens (JWTs) |Grant only the permissions each user role requires. Avoid full-access tokens in production. |API secret rotation |Periodically create a new access key, add the new key to the configuration, then revoke the old key. The token endpoint reads the secret at request time. -|Audit logging |Enable `ENABLE_METRIC_LOGS=true` and ship logs to a SIEM. -|LLM API key rotation |Add the new key to the `PROVIDERS` array, restart the service, then revoke the old key after confirming the new one works. +|Audit logging |Enable `ENABLE_METRIC_LOGS=true` and ship logs to a Security Information and Event Management (SIEM). +|Large language model (LLM) API key rotation |Add the new key to the `PROVIDERS` array, restart the service, then revoke the old key after confirming the new one works. |=== == Rate limiting @@ -479,7 +479,7 @@ When enabled, the service writes a structured JSON entry for each request. Key f |=== |Variable |Required |Default |Description |`LLM_TELEMETRY_ENABLED` |Yes |`false` |Primary telemetry switch -|`OTEL_EXPORTER_OTLP_TRACES_ENDPOINT` |Yes |- |OTLP endpoint URL +|`OTEL_EXPORTER_OTLP_TRACES_ENDPOINT` |Yes |- |OpenTelemetry Protocol (OTLP) endpoint URL |`OTEL_TRACES_SAMPLER_ARG` |No |`1.0` |Sampling rate (0.0 to 1.0) |`OTEL_DEBUG` |No |- |Verbose OTLP diagnostic logging |=== @@ -632,7 +632,7 @@ These values are approximate and vary with hardware, provider latency, and promp |1 to 50 |1 |db.t3.small (or 2 vCPU / 4 GB self-managed) |cache.t3.micro |Development and small teams |50 to 500 |2 |db.r6g.large |cache.r6g.large |Small production |500 to 5,000 |3 to 5 |db.r6g.xlarge (Multi-AZ) |cache.r6g.xlarge (cluster) |Medium production -|5,000{plus} |5{plus} (HPA) |db.r6g.2xlarge{plus} |cache.r6g.2xlarge{plus} |Large production; contact Tiny for guidance +|5,000{plus} |5{plus} (Horizontal Pod Autoscaler (HPA)) |db.r6g.2xlarge{plus} |cache.r6g.2xlarge{plus} |Large production; contact Tiny for guidance |=== Starting point for self-managed deployments: diff --git a/modules/ROOT/pages/tinymceai-on-premises-providers.adoc b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc index 425066ed95..6676c3b7e0 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-providers.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc @@ -6,7 +6,7 @@ -The `PROVIDERS` environment variable tells the AI service how to reach the upstream LLM. The `MODELS` environment variable tells the service which models are exposed to clients and which features each model supports. This page is the definitive reference for both: every supported `type`, every required field, and every known issue encountered in production. +The `PROVIDERS` environment variable tells the AI service how to reach the upstream large language model (LLM). The `MODELS` environment variable tells the service which models are exposed to clients and which features each model supports. This page is the definitive reference for both: every supported `type`, every required field, and every known issue encountered in production. Start with the xref:tinymceai-on-premises-getting-started.adoc[Getting Started guide] if the AI service container is not yet running. The following sections assume a running `ai-service` container. @@ -19,7 +19,7 @@ The AI service uses two related environment variables: |Variable |Type |What it does |`PROVIDERS` |JSON object |Map of provider IDs to provider configurations. Each entry says how to authenticate with one upstream LLM API. |`MODELS` |JSON array |List of models exposed to clients. Each model points at a `PROVIDERS` entry and declares which features it can serve. -|JWT `auth.ai.permissions` |string array |Per-user authorization list. Includes `ai:models::` entries to gate access to individual models. +|JSON Web Token (JWT) `auth.ai.permissions` |string array |Per-user authorization list. Includes `ai:models::` entries to gate access to individual models. |=== The `PROVIDERS` keys are arbitrary identifiers (for example `"openai"`, `"my-bedrock"`, `"team-azure"`). Each value object has a `type` field that picks the implementation: @@ -378,7 +378,7 @@ Amazon's hosted-model marketplace (Anthropic, Meta, Mistral, Cohere, Amazon Tita .Configuration details [%collapsible] ==== -IMPORTANT: The AI service does *not* use the AWS SDK default credential chain. `AWS_PROFILE`, `~/.aws/credentials`, IRSA, EC2 instance profiles, ECS task roles, and web identity tokens are all ignored. Inline the credentials in the `PROVIDERS` JSON. +IMPORTANT: The AI service does *not* use the AWS SDK default credential chain. `AWS_PROFILE`, `~/.aws/credentials`, IAM Roles for Service Accounts (IRSA), EC2 instance profiles, ECS task roles, and web identity tokens are all ignored. Inline the credentials in the `PROVIDERS` JSON. *JSON shape:* @@ -505,7 +505,7 @@ Google's enterprise model surface. Project-scoped, IAM-driven, GCP-billed. Crede .Configuration details [%collapsible] ==== -IMPORTANT: The Vertex adapter ignores ADC, `GOOGLE_APPLICATION_CREDENTIALS`, GKE Workload Identity, and Compute Engine metadata server credentials. Inline either a service-account key or an account-bound API key in the `PROVIDERS` JSON. +IMPORTANT: The Vertex adapter ignores Application Default Credentials (ADC), `GOOGLE_APPLICATION_CREDENTIALS`, GKE Workload Identity, and Compute Engine metadata server credentials. Inline either a service-account key or an account-bound API key in the `PROVIDERS` JSON. *JSON shape (service account):* @@ -666,7 +666,7 @@ For any HTTP API that implements the OpenAI Chat Completions interface, includin |=== |Field |Required |Notes |`type` |Yes |Literal `"openai-compatible"` -|`baseUrl` |Yes |*Must include the `/v1` suffix.* Without it, every request fails with a misleading "Not Found" SSE error. +|`baseUrl` |Yes |*Must include the `/v1` suffix.* Without it, every request fails with a misleading "Not Found" Server-Sent Events (SSE) error. |`apiKeys` |No |Sent as `Authorization: Bearer `. Most local runtimes ignore it. |`headers` |No |Additional headers such as auth tokens or tenant IDs. |=== diff --git a/modules/ROOT/pages/tinymceai-on-premises-reference.adoc b/modules/ROOT/pages/tinymceai-on-premises-reference.adoc index 17ecc864ec..7470783f81 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-reference.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-reference.adoc @@ -10,29 +10,29 @@ Alphabetized. Required-ness is marked relative to a minimum working deployment. [cols=",,,",options="header",] |=== |Variable |Required |Default |Description -|`ALLOWED_ORIGINS` |Recommended |- |Comma-separated list of CORS-allowed editor origins. Required for cross-origin editor deployments. +|`ALLOWED_ORIGINS` |Recommended |- |Comma-separated list of Cross-Origin Resource Sharing (CORS)-allowed editor origins. Required for cross-origin editor deployments. |`DATABASE_DATABASE` |Yes |- |Database name (`ai_service` is the convention). |`DATABASE_DRIVER` |Yes |- |`mysql` or `postgres`. |`DATABASE_HOST` |Yes |- |Database hostname or IP. |`DATABASE_PASSWORD` |Yes |- |Database password. |`DATABASE_PORT` |No |3306 (MySQL) / 5432 (PostgreSQL) |Database port. |`DATABASE_SCHEMA` |PostgreSQL only |`cs-on-premises` |PostgreSQL schema name. Pre-create or set to `public`. -|`DATABASE_SSL_CA` |No |- |Path to CA cert for database TLS. +|`DATABASE_SSL_CA` |No |- |Path to CA cert for database Transport Layer Security (TLS). |`DATABASE_SSL_CERT` |No |- |Path to client cert. |`DATABASE_SSL_KEY` |No |- |Path to client key. |`DATABASE_USER` |Yes |- |Database user. |`ENABLE_METRIC_LOGS` |No |`false` |Emit JSON request logs to stdout. -|`ENVIRONMENTS_MANAGEMENT_SECRET_KEY` |Yes |- |Management Panel login secret. *Not* used to sign user JWTs. +|`ENVIRONMENTS_MANAGEMENT_SECRET_KEY` |Yes |- |Management Panel login secret. *Not* used to sign user JSON Web Tokens (JWTs). |`LANGFUSE_BASE_URL` |No |`https://cloud.langfuse.com` |Self-hosted Langfuse URL. |`LANGFUSE_DEBUG` |No |- |Verbose Langfuse logging. |`LANGFUSE_PUBLIC_KEY` |If using Langfuse |- |Langfuse public key. |`LANGFUSE_SECRET_KEY` |If using Langfuse |- |Langfuse secret key. |`LICENSE_KEY` |Yes |- |AI service license key (long string from Tiny). |`LLM_TELEMETRY_ENABLED` |No |`false` |Primary OpenTelemetry switch. -|`LLM_TIMEOUT_MS` |No |180000 |Per-request LLM timeout in ms. Raise for large self-hosted models. -|`MCP_SERVERS` |No |- |JSON object; MCP server configuration. See xref:tinymceai-on-premises-advanced.adoc[Advanced scenarios]. +|`LLM_TIMEOUT_MS` |No |180000 |Per-request large language model (LLM) timeout in ms. Raise for large self-hosted models. +|`MCP_SERVERS` |No |- |JSON object; Model Context Protocol (MCP) server configuration. See xref:tinymceai-on-premises-advanced.adoc[Advanced scenarios]. |`MODELS` |Sometimes |- |JSON array; required for Azure / Bedrock / Vertex / openai-compatible. See xref:tinymceai-on-premises-providers.adoc[LLM providers]. -|`OTEL_DEBUG` |No |- |Verbose OTLP diagnostic logging. +|`OTEL_DEBUG` |No |- |Verbose OpenTelemetry Protocol (OTLP) diagnostic logging. |`OTEL_EXPORTER_OTLP_TRACES_ENDPOINT` |If using OTEL |- |OTLP traces endpoint URL. |`OTEL_TRACES_SAMPLER_ARG` |No |`1.0` |OTLP sampling rate (0.0 to 1.0). |`PROVIDERS` |Yes |- |JSON object; LLM provider configuration. See xref:tinymceai-on-premises-providers.adoc[LLM providers]. @@ -79,7 +79,7 @@ Alphabetized. Required-ness is marked relative to a minimum working deployment. |POST |`/v1/conversations` |JWT |Create a conversation. Body *must* include client-supplied `id`. |GET |`/v1/conversations` |JWT |List conversations for the current `sub`. |GET |`/v1/conversations/\{id}` |JWT |Read one conversation. -|POST |`/v1/conversations/\{id}/messages` |JWT |Send a message. Returns SSE stream. +|POST |`/v1/conversations/\{id}/messages` |JWT |Send a message. Returns Server-Sent Events (SSE) stream. |DELETE |`/v1/conversations/\{id}` |JWT |Delete a conversation. |POST |`/v1/actions/\{actionId}` |JWT |Run a quick action. Body shape: `{"content":[{"type":"text","content":"..."}]}` (no `modelId`). |POST |`/v1/reviews/\{reviewId}` |JWT |Run a review. @@ -181,7 +181,7 @@ Error codes returned in HTTP 4xx responses and inside SSE `event: error` payload |PostgreSQL default schema |`cs-on-premises` (with hyphen) |Pre-create with `CREATE SCHEMA "cs-on-premises";` or set `DATABASE_SCHEMA=public`. |`/v1/models/\{compatibilityVersion}` |Only accepts `1` |Values such as `v1`, `v2`, or `latest` return 500. |Environment creation through raw API |Not supported |Always create environments through the Management Panel UI. -|Bedrock credentials |Inline only |The SDK default credential chain (IRSA, instance roles, `AWS_PROFILE`) is not used. +|Bedrock credentials |Inline only |The SDK default credential chain (IAM Roles for Service Accounts (IRSA), instance roles, `AWS_PROFILE`) is not used. |Vertex credentials |Inline only |Application Default Credentials, `GOOGLE_APPLICATION_CREDENTIALS`, and the metadata server are not used. |Azure `MODELS[].id` |Must equal deployment name |There is no separate `deploymentName` field. The ID is the deployment name. |OpenAI-compatible `baseUrl` |Must include `/v1` suffix |Omitting it produces a "Not Found" SSE error. diff --git a/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc b/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc index 6a8a8f1969..f18f327db5 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc @@ -15,7 +15,7 @@ image::tinymceai-on-premises/troubleshooting-fig-1.svg[alt="Troubleshooting tria |Symptom area |Go to |Container will not start or exits during boot |<> |Container is running, `/health` returns OK, but API calls fail |<> -|Conversation starts, but the SSE stream carries an `event: error` |<> +|Conversation starts, but the Server-Sent Events (SSE) stream carries an `event: error` |<> |Editor renders, but AI toolbar is missing, token fetch fails, or responses hang |<> |Responses are slow or time out |<> |Scaling, upgrades, or deployment questions |xref:tinymceai-on-premises-production.adoc[Production deployment] @@ -66,7 +66,7 @@ Run `docker logs ai-service` first. All entries below assume the log output is a [[api-and-jwt-authentication]] -== API and JWT authentication +== API and JSON Web Token (JWT) authentication These assume the container is running and `/health` returns OK. @@ -84,7 +84,7 @@ These assume the container is running and `/health` returns OK. |`invalid-jwt` (expired) |Token is past its `exp` claim -|Issue tokens with a reasonable lifetime (for example `exp = now {plus} 3600`) and refresh before expiry. Synchronize clocks with NTP. +|Issue tokens with a reasonable lifetime (for example `exp = now {plus} 3600`) and refresh before expiry. Synchronize clocks with Network Time Protocol (NTP). |`Environment not found` |Environment was not created through the Management Panel UI @@ -128,7 +128,7 @@ Common mistakes that produce `allowed: false`: `"permissions": "ai:admin"` (stri [[llm-provider-errors]] -== LLM provider errors +== Large language model (LLM) provider errors These appear as `event: error` inside the SSE stream. The HTTP response is still 200. @@ -156,7 +156,7 @@ These appear as `event: error` inside the SSE stream. The HTTP response is still [cols="2,3",options="header"] |=== |Error |Fix -|`NoValidApiKeysFoundError` |Inline `clientEmail` and `privateKey` inside `credentials` in `PROVIDERS`. Google ADC is not used. See xref:tinymceai-on-premises-providers.adoc[LLM providers]. +|`NoValidApiKeysFoundError` |Inline `clientEmail` and `privateKey` inside `credentials` in `PROVIDERS`. Google Application Default Credentials (ADC) is not used. See xref:tinymceai-on-premises-providers.adoc[LLM providers]. |Auth errors with a valid service account |`private_key` newlines were mangled during copy-paste. Build `PROVIDERS` with a script (`json.dumps()` on the SA JSON file) rather than hand-editing. |`SERVICE_DISABLED` |Run `gcloud services enable aiplatform.googleapis.com --project=`. |Blocked by GCP org policy |Check `iam.disableServiceAccountCreation`, `iam.disableServiceAccountKeyCreation`, and account-bound API key policies. Exempt the AI service project from all three. @@ -196,7 +196,7 @@ Confirm `/health` is OK and a direct `curl` to `/v1/conversations` works before |Ensure TinyMCE 8{plus} is loaded, `plugins: 'tinymceai'` is set, and the toolbar string includes `tinymceai`. Verify the API key has the AI feature enabled. |Token fetch returns 401 -|The token endpoint's own authentication middleware is rejecting the request. Check session cookies, CORS credentials, and bearer tokens in the browser network tab. +|The token endpoint's own authentication middleware is rejecting the request. Check session cookies, Cross-Origin Resource Sharing (CORS) credentials, and bearer tokens in the browser network tab. |Token returned but rejected by the AI service |See <> above: wrong secret, wrong `aud`, wrong algorithm (RS256 instead of HS256), or wrong permissions shape. diff --git a/modules/ROOT/pages/tinymceai-on-premises.adoc b/modules/ROOT/pages/tinymceai-on-premises.adoc index 6909153808..ebfae3fdff 100644 --- a/modules/ROOT/pages/tinymceai-on-premises.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises.adoc @@ -5,7 +5,7 @@ The TinyMCE AI on-premises service is a self-hosted back end that powers AI writing assistance inside the TinyMCE rich text editor. It runs entirely within the host infrastructure. Editor content, conversation history, file attachments, and user data never leave the network. -The service ships as a single OCI container image (`registry.containers.tiny.cloud/ai-service`). It exposes a REST API, a Management Panel, Server-Sent Events streaming, and an OpenAPI spec. +The service ships as a single Open Container Initiative (OCI) container image (`registry.containers.tiny.cloud/ai-service`). It exposes a REST API, a Management Panel, Server-Sent Events streaming, and an OpenAPI spec. == Architecture @@ -17,9 +17,9 @@ Data flow for a single AI request: [arabic] . The browser loads TinyMCE with the `tinymceai` plugin. . The user triggers an AI feature (chat, review, quick action). -. The plugin calls the token endpoint, which signs an HS256 JWT with the API Secret. +. The plugin calls the token endpoint, which signs an HS256 JSON Web Token (JWT) with the API Secret. . The plugin sends the JWT and prompt to the AI service. -. The AI service verifies the token, checks per-feature permissions, and forwards the prompt to the configured LLM. +. The AI service verifies the token, checks per-feature permissions, and forwards the prompt to the configured large language model (LLM). . The LLM streams its response back to the browser through Server-Sent Events. The shared secret (API Secret) never leaves the back end; the editor and the AI service only ever see signed tokens. @@ -42,7 +42,7 @@ The shared secret (API Secret) never leaves the back end; the editor and the AI |LLM provider flexibility |OpenAI, Anthropic, Google Gemini, Azure OpenAI, AWS Bedrock, Google Vertex AI, or any self-hosted OpenAI-compatible endpoint. Multiple providers can coexist. -|MCP integration +|Model Context Protocol (MCP) integration |Connect internal tools, databases, and knowledge bases through Model Context Protocol over Streamable HTTP transport. |Web scraping and web search @@ -95,7 +95,7 @@ The shared secret (API Secret) never leaves the back end; the editor and the AI |A back end that signs HS256 JWTs. |Reverse proxy -|The AI service does not terminate TLS. Use nginx, HAProxy, or a cloud load balancer. +|The AI service does not terminate Transport Layer Security (TLS). Use nginx, HAProxy, or a cloud load balancer. |=== == Choosing a setup path @@ -126,7 +126,7 @@ For a first-time deployment, progress through the guides in order. Each topic gu |HS256 signing model, required and optional claims, permissions reference, and token endpoint examples in 8 languages. |xref:tinymceai-on-premises-frameworks.adoc[Framework integration] -|Editor-side configuration: plugin options, token provider, authentication patterns, CORS, and deployment checklists. +|Editor-side configuration: plugin options, token provider, authentication patterns, Cross-Origin Resource Sharing (CORS), and deployment checklists. |xref:tinymceai-on-premises-production.adoc[Production deployment] |Kubernetes manifests, AWS ECS task definitions, horizontal scaling, sizing, security hardening, rate limiting, observability, backup and recovery, and upgrades. From 3bb50699ec2e485a2f6fa149b8de422145a5cda5 Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Thu, 14 May 2026 20:47:06 +1000 Subject: [PATCH 04/48] DOC-3498: Clean up architecture overview diagram Reduce edge clutter by connecting a single representative replica to downstream services and grouping the data layer into a subgraph. Fix SVG width to use a fixed pixel value consistent with other diagrams in the set. --- .../complete-guide-fig-1.mmd | 36 ++++++------------- .../complete-guide-fig-1.svg | 2 +- 2 files changed, 12 insertions(+), 26 deletions(-) diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.mmd b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.mmd index 15070a485a..0a6555af04 100644 --- a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.mmd +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.mmd @@ -6,37 +6,23 @@ flowchart TB subgraph App["Application layer (stateless, N replicas)"] LB["Reverse proxy / Load balancer
nginx · ALB · K8s Ingress
TLS termination · SSE pass-through"] - AI1["ai-service replica 1"] - AI2["ai-service replica 2"] AIN["ai-service replica N"] - LB --> AI1 - LB --> AI2 + AI2["ai-service replica 2"] + AI1["ai-service replica 1"] LB --> AIN + LB --> AI2 + LB --> AI1 end subgraph Data["Shared data layer"] - DB[("SQL database
MySQL 8.0+ or
PostgreSQL 13+")] - Cache[("Redis 3.2.6+
single node or cluster")] - Storage[("File storage
database · filesystem ·
S3 · Azure Blob")] + DB[("SQL database
MySQL 8.0+ / PostgreSQL 13+")] + Cache[("Redis 3.2.6+")] + Storage[("File storage
S3 · Azure Blob · filesystem")] end - AI1 --> DB - AI1 --> Cache - AI1 --> Storage - AI2 --> DB - AI2 --> Cache - AI2 --> Storage - AIN --> DB - AIN --> Cache - AIN --> Storage - - AI1 -->|HTTPS| LLM["LLM provider
OpenAI · Anthropic · Google ·
Azure · Bedrock · Vertex · self-hosted"] - AI2 -->|HTTPS| LLM - AIN -->|HTTPS| LLM + AI1 --> Data - AI1 -.-> Obs["OpenTelemetry · Langfuse ·
log aggregator"] - AI2 -.-> Obs - AIN -.-> Obs + AI1 -->|"HTTPS"| LLM["LLM provider
OpenAI · Anthropic · Google ·
Azure · Bedrock · Vertex ·
self-hosted"] - AI1 -.->|tool calls| MCP["MCP servers
internal knowledge bases"] - AI2 -.-> MCP + AI1 -.->|"telemetry"| Obs["OpenTelemetry · Langfuse"] + AI1 -.->|"tool calls"| MCP["MCP servers"] diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.svg index 6ba18d328b..4828889949 100644 --- a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.svg +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.svg @@ -1 +1 @@ -Shared data layerApplication layer (stateless, N replicas)fetch JWTHTTPS + Bearer JWTHTTPSHTTPSHTTPStool callsBrowserTinyMCE editor + tinymceaipluginYour token endpointsigns HS256 JWTsReverse proxy / Loadbalancernginx · ALB · K8s IngressTLS termination · SSEpass-throughai-service replica 1ai-service replica 2ai-service replica NSQL databaseMySQL 8.0+ orPostgreSQL 13+Redis 3.2.6+single node or clusterFile storagedatabase · filesystem ·S3 · Azure BlobLLM providerOpenAI · Anthropic ·Google ·Azure · Bedrock · Vertex ·self-hostedOpenTelemetry · Langfuse ·log aggregatorMCP serversinternal knowledge bases \ No newline at end of file +

Application layer (stateless, N replicas)

fetch JWT

HTTPS + Bearer JWT

HTTPS

telemetry

tool calls

Shared data layer

SQL database
MySQL 8.0+ / PostgreSQL 13+

Redis 3.2.6+

File storage
S3 · Azure Blob · filesystem

Browser
TinyMCE editor + tinymceai plugin

Your token endpoint
signs HS256 JWTs

Reverse proxy / Load balancer
nginx · ALB · K8s Ingress
TLS termination · SSE pass-through

ai-service replica N

ai-service replica 2

ai-service replica 1

LLM provider
OpenAI · Anthropic · Google ·
Azure · Bedrock · Vertex ·
self-hosted

OpenTelemetry · Langfuse

MCP servers

\ No newline at end of file From 68ed54f5351f1728219619e3b62c00c566d53b7a Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Mon, 18 May 2026 13:40:58 +1000 Subject: [PATCH 05/48] DOC-3498: Address PR review feedback from metricjs - Generalize overview page for standalone API use, not just TinyMCE - Swap complex architecture diagram for simplified overview diagram - Move detailed enterprise topology to production page (collapsible) - Fix capabilities table: chat, document review, file attachments, scaling - Redis: mark Sentinel as not supported - Remove TinyMCE 8.0+ from prerequisites (not required for API-only) - Reverse proxy changed from required to recommended - Fix decision tree cross-references (Section 33 -> guide names) - Replace troubleshooting flowchart with ordered triage list - Rename "Framework integration" to "TinyMCE integration" across all refs - Fix API key reference for on-prem (license key or API key) - Reorder support section docker commands for logical flow - Make MCP diagram arrows bidirectional - LLM providers: clarify native vs OpenAI-compatible providers - Re-render all mermaid diagrams --- .../advanced-scenarios-fig-1.mmd | 4 +- .../advanced-scenarios-fig-1.svg | 2 +- .../complete-guide-fig-1.svg | 2 +- .../complete-guide-fig-2.mmd | 12 ++--- .../complete-guide-fig-2.svg | 2 +- .../complete-guide-fig-3.svg | 2 +- .../complete-guide-fig-9.svg | 2 +- .../database-setup-fig-1.svg | 2 +- .../tinymceai-on-premises/overview-fig-1.mmd | 7 +++ .../tinymceai-on-premises/overview-fig-1.svg | 1 + .../production-guide-fig-1.svg | 2 +- .../troubleshooting-fig-1.svg | 2 +- modules/ROOT/nav.adoc | 2 +- .../tinymceai-on-premises-frameworks.adoc | 4 +- .../ROOT/pages/tinymceai-on-premises-jwt.adoc | 4 +- .../tinymceai-on-premises-production.adoc | 7 +++ ...tinymceai-on-premises-troubleshooting.adoc | 27 +++++----- modules/ROOT/pages/tinymceai-on-premises.adoc | 53 +++++++++---------- 18 files changed, 74 insertions(+), 63 deletions(-) create mode 100644 modules/ROOT/images/tinymceai-on-premises/overview-fig-1.mmd create mode 100644 modules/ROOT/images/tinymceai-on-premises/overview-fig-1.svg diff --git a/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-1.mmd b/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-1.mmd index 6d69c87d3a..842da64633 100644 --- a/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-1.mmd +++ b/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-1.mmd @@ -1,4 +1,4 @@ flowchart LR Editor[TinyMCE editor] <-->|chat / quick actions| AI[AI Service] - AI -->|MCP tools/call| MCP[MCP Server
knowledge-hub] - MCP -->|read| KB[Confluence ·
Notion ·
GitBook ·
internal wiki] + AI <-->|MCP tools/call| MCP[MCP Server
knowledge-hub] + MCP <-->|read| KB[Confluence ·
Notion ·
GitBook ·
internal wiki] diff --git a/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-1.svg index 2ab529c4b1..a6ed13be3a 100644 --- a/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-1.svg +++ b/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-1.svg @@ -1 +1 @@ -chat / quick actionsMCP tools/callreadTinyMCE editorAI ServiceMCP Serverknowledge-hubConfluence ·Notion ·GitBook ·internal wiki \ No newline at end of file +chat / quick actionsMCP tools/callreadTinyMCE editorAI ServiceMCP Serverknowledge-hubConfluence ·Notion ·GitBook ·internal wiki \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.svg index 4828889949..97aeabe7ca 100644 --- a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.svg +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.svg @@ -1 +1 @@ -

Application layer (stateless, N replicas)

fetch JWT

HTTPS + Bearer JWT

HTTPS

telemetry

tool calls

Shared data layer

SQL database
MySQL 8.0+ / PostgreSQL 13+

Redis 3.2.6+

File storage
S3 · Azure Blob · filesystem

Browser
TinyMCE editor + tinymceai plugin

Your token endpoint
signs HS256 JWTs

Reverse proxy / Load balancer
nginx · ALB · K8s Ingress
TLS termination · SSE pass-through

ai-service replica N

ai-service replica 2

ai-service replica 1

LLM provider
OpenAI · Anthropic · Google ·
Azure · Bedrock · Vertex ·
self-hosted

OpenTelemetry · Langfuse

MCP servers

\ No newline at end of file +Application layer (stateless, N replicas)fetch JWTHTTPS + Bearer JWTHTTPStelemetrytool callsShared data layerSQL databaseMySQL 8.0+ / PostgreSQL13+Redis 3.2.6+File storageS3 · Azure Blob · filesystemBrowserTinyMCE editor + tinymceaipluginYour token endpointsigns HS256 JWTsReverse proxy / Loadbalancernginx · ALB · K8s IngressTLS termination · SSEpass-throughai-service replica Nai-service replica 2ai-service replica 1LLM providerOpenAI · Anthropic ·Google ·Azure · Bedrock · Vertex ·self-hostedOpenTelemetry · LangfuseMCP servers \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.mmd b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.mmd index 8fb6a35113..91496059f6 100644 --- a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.mmd +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.mmd @@ -1,11 +1,11 @@ flowchart TD Start([New deployment]) --> Q1{Evaluating or
going to production?} - Q1 -->|Evaluating locally| Compose[Docker Compose
all services on one host
Part 2 quick start] + Q1 -->|Evaluating locally| Compose[Docker Compose
all services on one host
Getting started guide] Q1 -->|Production| Q2{Orchestrator?} - Q2 -->|Kubernetes| K8s[Kubernetes deployment
Section 33] - Q2 -->|AWS ECS / Fargate| ECS[ECS task definition
Section 34] - Q2 -->|Docker / Podman on VMs| VMs[Docker or Podman compose
Sections 9.1 / 9.2] - Q2 -->|Bare metal / no containers| Bare[Native install for
data layer; container
for AI service
Section 8.5] + Q2 -->|Kubernetes| K8s[Kubernetes deployment
Production guide] + Q2 -->|AWS ECS / Fargate| ECS[ECS task definition
Production guide] + Q2 -->|Docker / Podman on VMs| VMs[Docker or Podman compose
Database guide] + Q2 -->|Bare metal / no containers| Bare[Native install for
data layer; container
for AI service
Database guide] Compose --> DB{Database?} K8s --> DB ECS --> DB @@ -13,5 +13,5 @@ flowchart TD Bare --> DB DB -->|Managed cloud DB| Managed[RDS · Cloud SQL ·
Azure Database] DB -->|Self-managed| Self[Containers or native install] - Managed --> Done([Continue with Part 3]) + Managed --> Done([Continue with
LLM providers guide]) Self --> Done diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.svg b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.svg index dcf71a1ca6..2a4f22aa83 100644 --- a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.svg +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.svg @@ -1 +1 @@ -Evaluating locallyProductionKubernetesAWS ECS / FargateDocker / Podman on VMsBare metal / no containersManaged cloud DBSelf-managedNew deploymentEvaluating orgoing to production?Docker Composeall services on one hostPart 2 quick startOrchestrator?Kubernetes deploymentSection 33ECS task definitionSection 34Docker or Podman composeSections 9.1 / 9.2Native install fordata layer; containerfor AI serviceSection 8.5Database?RDS · Cloud SQL ·Azure DatabaseContainers or native installContinue with Part 3 \ No newline at end of file +Evaluating locallyProductionKubernetesAWS ECS / FargateDocker / Podman on VMsBare metal / no containersManaged cloud DBSelf-managedNew deploymentEvaluating orgoing to production?Docker Composeall services on one hostGetting started guideOrchestrator?Kubernetes deploymentProduction guideECS task definitionProduction guideDocker or Podman composeDatabase guideNative install fordata layer; containerfor AI serviceDatabase guideDatabase?RDS · Cloud SQL ·Azure DatabaseContainers or native installContinue withLLM providers guide \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-3.svg b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-3.svg index a13caed517..47f6946e61 100644 --- a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-3.svg +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-3.svg @@ -1 +1 @@ -scalesscalesscalesInternetIngress controllernginx-ingress · ALBcontrollerproxy-buffering offService: ai-servicePod: ai-service replica 1Pod: ai-service replica 2Pod: ai-service replica NService: databaseor external RDSService: redisor external ElastiCacheS3 / Azure BlobHorizontalPodAutoscaler \ No newline at end of file +scalesscalesscalesInternetIngress controllernginx-ingress · ALBcontrollerproxy-buffering offService: ai-servicePod: ai-service replica 1Pod: ai-service replica 2Pod: ai-service replica NService: databaseor external RDSService: redisor external ElastiCacheS3 / Azure BlobHorizontalPodAutoscaler \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-9.svg b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-9.svg index f611b021be..93e7f3ea46 100644 --- a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-9.svg +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-9.svg @@ -1 +1 @@ -No - exited or wont pullYesNo - times out or 5xxYesYes - 401 allowed falseinvalid-jwt-...NoYesNoYesNoYesNoSomething is wrongContainer isrunning?docker ps shows itContainer startup failurescurl /healthreturns 200?API call returnsauth error?API and JWT authenticationSSE streamcarries event errorfrom LLM?LLM provider errorsEditor sidebroken?no toolbar token 401hanging streamEditor and front-endSlow timing outor failing under load?Performance and capacityDiagnostic recipesIf none fitsee Diagnostic recipesthen escalate \ No newline at end of file +No - exited or wont pullYesNo - times out or 5xxYesYes - 401 allowed falseinvalid-jwt-...NoYesNoYesNoYesNoSomething is wrongContainer isrunning?docker ps shows itContainer startup failurescurl /healthreturns 200?API call returnsauth error?API and JWT authenticationSSE streamcarries event errorfrom LLM?LLM provider errorsEditor sidebroken?no toolbar token 401hanging streamEditor and front-endSlow timing outor failing under load?Performance and capacityDiagnostic recipesIf none fitsee Diagnostic recipesthen escalate \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/database-setup-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/database-setup-fig-1.svg index d0ae05837f..5055886d48 100644 --- a/modules/ROOT/images/tinymceai-on-premises/database-setup-fig-1.svg +++ b/modules/ROOT/images/tinymceai-on-premises/database-setup-fig-1.svg @@ -1 +1 @@ -Evaluating locallyDeployingCloud / managed servicesSelf-managedDocker or PodmanNone - bare metal or VMWhere will MySQL/Postgresand Redis run?Evaluating ordeploying to prod?Docker Composeimage: mysql:8.0 orpostgres:16+ redis:7Cloud or self-managed?AWS RDS · Cloud SQL ·Azure Database+ ElastiCache ·Memorystore ·Azure Cache for RedisContainer runtimeavailable?Containers on the samenetwork or pod asai-serviceNative installbrew · apt · yum · dnfservice runs on hostVerify: nc -zv host portthen start ai-service \ No newline at end of file +Evaluating locallyDeployingCloud / managed servicesSelf-managedDocker or PodmanNone - bare metal or VMWhere will MySQL/Postgresand Redis run?Evaluating ordeploying to prod?Docker Composeimage: mysql:8.0 orpostgres:16+ redis:7Cloud or self-managed?AWS RDS · Cloud SQL ·Azure Database+ ElastiCache ·Memorystore ·Azure Cache for RedisContainer runtimeavailable?Containers on the samenetwork or pod asai-serviceNative installbrew · apt · yum · dnfservice runs on hostVerify: nc -zv host portthen start ai-service \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/overview-fig-1.mmd b/modules/ROOT/images/tinymceai-on-premises/overview-fig-1.mmd new file mode 100644 index 0000000000..5253cc358c --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/overview-fig-1.mmd @@ -0,0 +1,7 @@ +flowchart LR + Client["Client application"] -->|"1. fetch JWT"| Token["Token endpoint"] + Client -->|"2. prompt + JWT"| AI["AI service
(container)"] + AI -->|"3. forward prompt"| LLM["LLM provider"] + LLM -->|"4. stream response"| AI + AI -->|"4. SSE stream"| Client + AI --- DB[("Database
+ Redis")] diff --git a/modules/ROOT/images/tinymceai-on-premises/overview-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/overview-fig-1.svg new file mode 100644 index 0000000000..b02724c3b0 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/overview-fig-1.svg @@ -0,0 +1 @@ +1. fetch JWT2. prompt + JWT3. forward prompt4. stream response4. SSE streamClient applicationToken endpointAI service(container)LLM providerDatabase+ Redis \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/production-guide-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/production-guide-fig-1.svg index c3fb67a075..beb42f95a2 100644 --- a/modules/ROOT/images/tinymceai-on-premises/production-guide-fig-1.svg +++ b/modules/ROOT/images/tinymceai-on-premises/production-guide-fig-1.svg @@ -1 +1 @@ -Shared data layerHTTPS + JWTHTTP :8000HTTP :8000HTTP :8000optionalTinyMCE in browserReverse Proxy / LoadBalancernginx · ALB · IngressTLS terminationproxy_buffering offai-service replica 1ai-service replica 2ai-service replica NMySQL 8.0+ /Postgres 13+Multi-AZ in prodRedis 7cluster or managedS3 · Azure Blob ·filesystem · DBLLM ProviderOpenAI · Anthropic ·Google ·Azure · Bedrock · Vertex ·self-hostedOpenTelemetry · Langfuse ·log aggregator \ No newline at end of file +Shared data layerHTTPS + JWTHTTP :8000HTTP :8000HTTP :8000optionalTinyMCE in browserReverse Proxy / LoadBalancernginx · ALB · IngressTLS terminationproxy_buffering offai-service replica 1ai-service replica 2ai-service replica NMySQL 8.0+ /Postgres 13+Multi-AZ in prodRedis 7cluster or managedS3 · Azure Blob ·filesystem · DBLLM ProviderOpenAI · Anthropic ·Google ·Azure · Bedrock · Vertex ·self-hostedOpenTelemetry · Langfuse ·log aggregator \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/troubleshooting-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/troubleshooting-fig-1.svg index a8651978ca..9c5fb14e3d 100644 --- a/modules/ROOT/images/tinymceai-on-premises/troubleshooting-fig-1.svg +++ b/modules/ROOT/images/tinymceai-on-premises/troubleshooting-fig-1.svg @@ -1 +1 @@ -No - exited or won't pullYesNo - times out or 5xxYesYes - 401, allowed:false,invalid-jwt-*NoYesNoYesNoYesNoSomething is wrongContainer isrunning?docker ps shows itContainer startup failurescurl /healthreturns 200?API call returnsauth error?API and JWT authenticationSSE streamcarries event: errorfrom LLM?LLM provider errorsEditor sidebroken?no toolbar, token 401,hanging streamEditor and front-endSlow, timing out,or failing under load?Performance and capacityProduction and scalingIf none fit:see Diagnostic recipesthen escalate tosupport@tiny.cloud \ No newline at end of file +No - exited or won't pullYesNo - times out or 5xxYesYes - 401, allowed:false,invalid-jwt-*NoYesNoYesNoYesNoSomething is wrongContainer isrunning?docker ps shows itContainer startup failurescurl /healthreturns 200?API call returnsauth error?API and JWT authenticationSSE streamcarries event: errorfrom LLM?LLM provider errorsEditor sidebroken?no toolbar, token 401,hanging streamEditor and front-endSlow, timing out,or failing under load?Performance and capacityProduction and scalingIf none fit:see Diagnostic recipesthen escalate tosupport@tiny.cloud \ No newline at end of file diff --git a/modules/ROOT/nav.adoc b/modules/ROOT/nav.adoc index 82f443cab3..4991a7e036 100644 --- a/modules/ROOT/nav.adoc +++ b/modules/ROOT/nav.adoc @@ -275,7 +275,7 @@ ***** xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage] ***** xref:tinymceai-on-premises-providers.adoc[LLM providers] ***** xref:tinymceai-on-premises-jwt.adoc[JWT authentication] -***** xref:tinymceai-on-premises-frameworks.adoc[Framework integration] +***** xref:tinymceai-on-premises-frameworks.adoc[TinyMCE integration] ***** xref:tinymceai-on-premises-production.adoc[Production deployment] ***** xref:tinymceai-on-premises-advanced.adoc[Advanced scenarios] ***** xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting] diff --git a/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc b/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc index 6990af476b..1511d02dea 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc @@ -1,5 +1,5 @@ = TinyMCE AI on-premises: editor-side integration -:navtitle: Framework integration +:navtitle: TinyMCE integration :description: Connecting the TinyMCE editor to the on-premises AI service from React, Vue, Angular, Svelte, or vanilla JavaScript. :keywords: AI, on-premises, React, Vue, Angular, Svelte, token provider @@ -8,7 +8,7 @@ This page covers the *editor-side* configuration that connects TinyMCE to the on * The AI service is already running. See xref:tinymceai-on-premises-getting-started.adoc[Getting started] for setup instructions. * A token endpoint exists that signs JSON Web Tokens (JWTs) for the AI service. See xref:tinymceai-on-premises-jwt.adoc[JWT authentication] for back-end implementations. -* The TinyMCE API key has the AI feature enabled. Retrieve or upgrade a key at https://www.tiny.cloud/my-account/integrate/. +* A valid TinyMCE license key or API key with the AI feature enabled. On-premises deployments typically use a license key provided by a Tiny account representative. For general framework setup (installing wrappers, component structure, server-side rendering (SSR) patterns), see the existing integration guides: diff --git a/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc b/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc index 7c88f42535..7ff72cf294 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc @@ -757,7 +757,7 @@ TIP: Set `credentials: 'include'` on the fetch when the token endpoint relies on For cross-origin setups, configure the back end server to respond with `Access-Control-Allow-Origin: ` (not `*`) and `Access-Control-Allow-Credentials: true`. Set the session cookie with `SameSite=None; Secure`. -For framework-specific (React, Vue, Angular) integration, see xref:tinymceai-on-premises-frameworks.adoc[Framework integration]. +For framework-specific (React, Vue, Angular) integration, see xref:tinymceai-on-premises-frameworks.adoc[TinyMCE integration]. @@ -908,4 +908,4 @@ Short-lived tokens limit exposure if a token leaks through a browser extension, * xref:tinymceai-on-premises-getting-started.adoc[Getting started] -- end-to-end deployment, including a demo token server * xref:tinymceai-on-premises-providers.adoc[large language model (LLM) providers] -- configuring custom models through `MODELS` and the `ai:models::` permission syntax * xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting] -- full troubleshooting catalog beyond JWT -* xref:tinymceai-on-premises-frameworks.adoc[Framework integration] -- editor-side integration patterns for React, Vue, and Angular, including `tinymceai_token_provider` wrappers +* xref:tinymceai-on-premises-frameworks.adoc[TinyMCE integration] -- editor-side integration patterns for React, Vue, and Angular, including `tinymceai_token_provider` wrappers diff --git a/modules/ROOT/pages/tinymceai-on-premises-production.adoc b/modules/ROOT/pages/tinymceai-on-premises-production.adoc index a3806dd277..9c0cce0780 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-production.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-production.adoc @@ -13,6 +13,13 @@ image::tinymceai-on-premises/production-guide-fig-1.svg[alt="Production deployme The AI service is stateless, persists all state to MySQL/PostgreSQL and Redis, and scales horizontally behind a load balancer. +.Detailed enterprise deployment topology +[%collapsible] +==== +[.text-center] +image::tinymceai-on-premises/complete-guide-fig-1.svg[alt="Enterprise architecture showing browser with TinyMCE token endpoint multiple AI service replicas database Redis LLM providers and observability",width=100%] +==== + == TLS / HTTPS diff --git a/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc b/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc index f18f327db5..cd1ce6ffc3 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc @@ -7,19 +7,17 @@ Match the symptom to the fix below. If the symptom does not fit any section, esc == Quick triage -[.text-center] -image::tinymceai-on-premises/troubleshooting-fig-1.svg[alt="Troubleshooting triage decision tree covering container health JWT and LLM connectivity failures",width=100%] +Work through this list to identify the symptom area: -[cols="1,1",options="header"] -|=== -|Symptom area |Go to -|Container will not start or exits during boot |<> -|Container is running, `/health` returns OK, but API calls fail |<> -|Conversation starts, but the Server-Sent Events (SSE) stream carries an `event: error` |<> -|Editor renders, but AI toolbar is missing, token fetch fails, or responses hang |<> -|Responses are slow or time out |<> -|Scaling, upgrades, or deployment questions |xref:tinymceai-on-premises-production.adoc[Production deployment] -|=== +. *Is the container running?* Run `docker ps` to check. If the container has exited or will not pull, see <>. +. *Does `/health` return 200?* Run `curl -fsS \http://localhost:8000/health`. If it times out or returns 5xx, see <>. +. *Does the API return an auth error?* If the response is 401, `allowed:false`, or `invalid-jwt-*`, see <>. +. *Does the SSE stream carry an `event: error` from the LLM?* If yes, see <>. +. *Is the editor side broken?* Missing toolbar, token 401, or hanging stream? See <>. +. *Slow, timing out, or failing under load?* See <>. +. *Scaling, upgrades, or deployment questions?* See xref:tinymceai-on-premises-production.adoc[Production deployment]. + +If none of the above match, see <> and then escalate to `support@tiny.cloud`. [[container-startup-failures]] @@ -208,7 +206,7 @@ Confirm `/health` is OK and a direct `curl` to `/v1/conversations` works before |Add the editor's origin (scheme {plus} host {plus} port) to the `ALLOWED_ORIGINS` environment variable. |Editor renders then disappears (Next.js / Nuxt / SvelteKit) -|TinyMCE references `window` at load time. Load the editor client-only: `dynamic(() => import('./Editor'), { ssr: false })` in Next.js, `` in Nuxt, `onMount` in SvelteKit. See xref:tinymceai-on-premises-frameworks.adoc[Framework integration]. +|TinyMCE references `window` at load time. Load the editor client-only: `dynamic(() => import('./Editor'), { ssr: false })` in Next.js, `` in Nuxt, `onMount` in SvelteKit. See xref:tinymceai-on-premises-frameworks.adoc[TinyMCE integration]. |`tinymceai_token_provider` called in a tight loop |Token endpoint is returning an invalid JWT or non-JSON response. Test with `curl -X POST http://localhost:3000/api/ai-token` and verify the response is `pass:c[{"token":"eyJ..."}]`. @@ -226,6 +224,7 @@ Confirm `/health` is OK and a direct `curl` to `/v1/conversations` works before |=== +[[diagnostic-recipes]] == Diagnostic recipes .Expand for copy-ready diagnostic commands @@ -306,5 +305,5 @@ curl -N -X POST http://localhost:8000/v1/conversations/smoke-1/messages \ * xref:tinymceai-on-premises-jwt.adoc[JWT authentication] * xref:tinymceai-on-premises-providers.adoc[LLM providers] * xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage] -* xref:tinymceai-on-premises-frameworks.adoc[Framework integration] +* xref:tinymceai-on-premises-frameworks.adoc[TinyMCE integration] * xref:tinymceai-on-premises-production.adoc[Production deployment] diff --git a/modules/ROOT/pages/tinymceai-on-premises.adoc b/modules/ROOT/pages/tinymceai-on-premises.adoc index ebfae3fdff..3deb6a7330 100644 --- a/modules/ROOT/pages/tinymceai-on-premises.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises.adoc @@ -3,24 +3,24 @@ :description: Self-hosted AI writing assistance for TinyMCE; architecture, prerequisites, and topic guide :keywords: AI, on-premises, self-hosted, deployment, overview -The TinyMCE AI on-premises service is a self-hosted back end that powers AI writing assistance inside the TinyMCE rich text editor. It runs entirely within the host infrastructure. Editor content, conversation history, file attachments, and user data never leave the network. +The TinyMCE AI on-premises service is a self-hosted back end that powers AI writing assistance. It can be used with the TinyMCE rich text editor, particularly the xref:tinymceai.adoc[TinyMCE AI plugin], or as a standalone service. It runs entirely within the host infrastructure. Document content, conversation history, file attachments, and user data never leave the network. The service ships as a single Open Container Initiative (OCI) container image (`registry.containers.tiny.cloud/ai-service`). It exposes a REST API, a Management Panel, Server-Sent Events streaming, and an OpenAPI spec. == Architecture [.text-center] -image::tinymceai-on-premises/complete-guide-fig-1.svg[alt="Service architecture showing browser with TinyMCE token endpoint AI service database Redis and LLM providers",width=100%] +image::tinymceai-on-premises/overview-fig-1.svg[alt="High-level architecture showing client token endpoint AI service LLM provider and data layer",width=100%] Data flow for a single AI request: [arabic] -. The browser loads TinyMCE with the `tinymceai` plugin. -. The user triggers an AI feature (chat, review, quick action). -. The plugin calls the token endpoint, which signs an HS256 JSON Web Token (JWT) with the API Secret. -. The plugin sends the JWT and prompt to the AI service. +. The client application requests a signed token from the token endpoint. +. The client sends the JWT and prompt to the AI service over HTTPS. . The AI service verifies the token, checks per-feature permissions, and forwards the prompt to the configured large language model (LLM). -. The LLM streams its response back to the browser through Server-Sent Events. +. The LLM streams its response back to the client through Server-Sent Events (SSE). + +When used with TinyMCE, the `tinymceai` plugin handles steps 1, 2, and 4 automatically through the `tinymceai_token_provider` callback. The shared secret (API Secret) never leaves the back end; the editor and the AI service only ever see signed tokens. @@ -31,10 +31,10 @@ The shared secret (API Secret) never leaves the back end; the editor and the AI |Capability |Details |Conversational AI assistant -|Multi-turn chat sidebar. Conversation history is isolated per user through the JWT `sub` claim. +|Multi-turn AI chat with support for document and file context. Conversation history is isolated per user through the JWT `sub` claim. |Document review -|Correctness, clarity, readability, tone, and translation. +|Review a document for correctness, clarity, readability, tone, and more, or translate to another language. |Quick actions |Rewrite, summarize, expand, change tone, fix grammar, translate, continue, and improve writing. @@ -58,13 +58,13 @@ The shared secret (API Secret) never leaves the back end; the editor and the AI |Server-Sent Events from the LLM back to the browser. |File attachments -|Database, filesystem, Amazon S3, or Azure Blob Storage. +|Use additional files as context for AI conversations. Storage options include database, filesystem, Amazon S3, or Azure Blob Storage. |Observability |Structured request logs, OpenTelemetry, and Langfuse. All three run as independent simultaneous pipelines. |Horizontal scaling -|The service is stateless. Share identical environment configuration across replicas. +|The service is stateless; add replicas behind a load balancer without shared local state. |=== == Prerequisites @@ -80,22 +80,19 @@ The shared secret (API Secret) never leaves the back end; the editor and the AI |MySQL 8.0 or PostgreSQL 13{plus} (16 recommended). |Redis -|3.2.6{plus} (7.x recommended). Single node, Sentinel, or Cluster mode. +|3.2.6{plus} (7.x recommended). Single node or Cluster mode supported. Sentinel not supported. |LLM access |At least one provider. Multiple providers can coexist. -|TinyMCE 8.0{plus} -|The `tinymceai` plugin is a premium plugin. - |License key and registry credentials |Provided by a Tiny account representative. |Token endpoint |A back end that signs HS256 JWTs. -|Reverse proxy -|The AI service does not terminate Transport Layer Security (TLS). Use nginx, HAProxy, or a cloud load balancer. +|Reverse proxy (recommended) +|The AI service does not terminate Transport Layer Security (TLS). A reverse proxy such as nginx, HAProxy, or a cloud load balancer is recommended for TLS termination in production. |=== == Choosing a setup path @@ -117,15 +114,15 @@ For a first-time deployment, progress through the guides in order. Each topic gu |Five-minute Docker Compose quick start. Stand up the AI service, database, Redis, token server, and a browser editor. |xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage] -|MySQL and PostgreSQL setup, Redis configuration, container runtimes (Docker, Podman, Kubernetes, ECS), and reverse proxy with TLS. +|Data layer setup: MySQL and PostgreSQL setup, Redis configuration, container runtimes (Docker, Podman, Kubernetes, ECS), and reverse proxy with TLS. |xref:tinymceai-on-premises-providers.adoc[LLM providers] -|OpenAI, Anthropic, Google Gemini, Azure OpenAI, AWS Bedrock, Google Vertex AI, and self-hosted endpoints (Ollama, vLLM, LM Studio). Custom model catalog and API key rotation. +|Connect to OpenAI, Anthropic, Google Gemini, Azure OpenAI, AWS Bedrock, Google Vertex AI, or any OpenAI-compatible endpoint (Ollama, vLLM, LM Studio). Custom model catalog and API key rotation. |xref:tinymceai-on-premises-jwt.adoc[JWT authentication] |HS256 signing model, required and optional claims, permissions reference, and token endpoint examples in 8 languages. -|xref:tinymceai-on-premises-frameworks.adoc[Framework integration] +|xref:tinymceai-on-premises-frameworks.adoc[TinyMCE integration] |Editor-side configuration: plugin options, token provider, authentication patterns, Cross-Origin Resource Sharing (CORS), and deployment checklists. |xref:tinymceai-on-premises-production.adoc[Production deployment] @@ -155,14 +152,6 @@ Container logs:: docker logs ai-service --tail 200 ---- -Effective environment:: -Redact secrets before submitting. -+ -[source,console] ----- -docker inspect ai-service | jq '.[0].Config.Env' ----- - Health check:: + [source,console] @@ -180,6 +169,14 @@ Expected response: Decoded JWT payload:: Strip the signature and decode with a JWT library. +Environment variables:: +Redact secrets before submitting. ++ +[source,console] +---- +docker inspect ai-service | jq '.[0].Config.Env' +---- + Image version:: + [source,console] From 260ed25c8b2cc426cc5f8c483f6303ad287f6002 Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Mon, 18 May 2026 14:35:19 +1000 Subject: [PATCH 06/48] DOC-3498: Update Docker image name to ai-service-tiny Registry URL confirmed as registry.containers.tiny.cloud/ai-service-tiny. --- .../ROOT/pages/tinymceai-on-premises-database.adoc | 2 +- .../pages/tinymceai-on-premises-getting-started.adoc | 6 +++--- .../ROOT/pages/tinymceai-on-premises-production.adoc | 12 ++++++------ .../ROOT/pages/tinymceai-on-premises-providers.adoc | 4 ++-- modules/ROOT/pages/tinymceai-on-premises.adoc | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/modules/ROOT/pages/tinymceai-on-premises-database.adoc b/modules/ROOT/pages/tinymceai-on-premises-database.adoc index 215dd1759e..2018e23069 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-database.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-database.adoc @@ -420,7 +420,7 @@ When the AI service runs in Docker but the database or Redis runs natively on th ---- services: ai-service: - image: registry.containers.tiny.cloud/ai-service:latest + image: registry.containers.tiny.cloud/ai-service-tiny:latest extra_hosts: - "host.docker.internal:host-gateway" environment: diff --git a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc index 487554c0a4..ced7424e3e 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc @@ -16,7 +16,7 @@ mkdir tinymce-ai-onpremise && cd tinymce-ai-onpremise === Authenticate with the container registry -The service image lives at `registry.containers.tiny.cloud/ai-service`. +The service image lives at `registry.containers.tiny.cloud/ai-service-tiny`. For Docker: @@ -39,7 +39,7 @@ Replace `TINY_REGISTRY_USERNAME` with the username supplied by the Tiny account [source,bash] ---- -docker pull registry.containers.tiny.cloud/ai-service:latest +docker pull registry.containers.tiny.cloud/ai-service-tiny:latest ---- For Podman, substitute `podman pull`. For production, pin a specific version tag (for example `:5.1.0`) rather than `:latest`. @@ -184,7 +184,7 @@ docker run --init -d -p 8000:8000 \ -e PROVIDERS="$PROVIDERS" \ -e STORAGE_DRIVER='database' \ -e ENABLE_METRIC_LOGS='true' \ - registry.containers.tiny.cloud/ai-service:latest + registry.containers.tiny.cloud/ai-service-tiny:latest ---- ==== diff --git a/modules/ROOT/pages/tinymceai-on-premises-production.adoc b/modules/ROOT/pages/tinymceai-on-premises-production.adoc index 9c0cce0780..d01abe31e4 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-production.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-production.adoc @@ -98,7 +98,7 @@ The AI service works with Podman as an alternative to Docker. In Podman, contain ---- podman login -u 'TINY_REGISTRY_USERNAME' registry.containers.tiny.cloud -podman pull registry.containers.tiny.cloud/ai-service:latest +podman pull registry.containers.tiny.cloud/ai-service-tiny:latest podman pod create --name ai-pod -p 8000:8000 -p 3306:3306 -p 6379:6379 @@ -120,7 +120,7 @@ podman run --init -d --pod ai-pod --name ai-service \ -e REDIS_HOST='127.0.0.1' \ -e PROVIDERS='{"openai":{"type":"openai","apiKeys":["sk-proj-..."]}}' \ -e STORAGE_DRIVER='database' \ - registry.containers.tiny.cloud/ai-service:latest + registry.containers.tiny.cloud/ai-service-tiny:latest ---- IMPORTANT: Pin to `mysql:8.0`. The `mysql:8` tag floats to MySQL 8.4, which removes the `default-authentication-plugin` flag and causes a crash loop. See xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage] for details. @@ -192,7 +192,7 @@ spec: - name: tiny-registry containers: - name: ai-service - image: registry.containers.tiny.cloud/ai-service:latest + image: registry.containers.tiny.cloud/ai-service-tiny:latest ports: - containerPort: 8000 env: @@ -355,7 +355,7 @@ spec: "containerDefinitions": [ { "name": "ai-service", - "image": "registry.containers.tiny.cloud/ai-service:latest", + "image": "registry.containers.tiny.cloud/ai-service-tiny:latest", "portMappings": [{ "containerPort": 8000 }], "healthCheck": { "command": ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"], @@ -412,7 +412,7 @@ spec: |Database encryption at rest |Turn on encryption at rest in the cloud provider console. RDS, Cloud SQL, and Azure Database enable this by default. |Redis authentication |Always set `REDIS_PASSWORD` (or use a managed Redis instance with authentication enabled). |Container security |Run as non-root, use a read-only filesystem where possible, and drop unnecessary Linux capabilities. -|Image scanning |Scan `registry.containers.tiny.cloud/ai-service` with Trivy, Snyk, or the registry's built-in scanner. +|Image scanning |Scan `registry.containers.tiny.cloud/ai-service-tiny` with Trivy, Snyk, or the registry's built-in scanner. |Least-privilege JSON Web Tokens (JWTs) |Grant only the permissions each user role requires. Avoid full-access tokens in production. |API secret rotation |Periodically create a new access key, add the new key to the configuration, then revoke the old key. The token endpoint reads the secret at request time. |Audit logging |Enable `ENABLE_METRIC_LOGS=true` and ship logs to a Security Information and Event Management (SIEM). @@ -592,7 +592,7 @@ Redis holds ephemeral state. Losing Redis data does not affect persistent data. + [source,bash] ---- -docker pull registry.containers.tiny.cloud/ai-service:NEW_VERSION +docker pull registry.containers.tiny.cloud/ai-service-tiny:NEW_VERSION ---- . For rolling deploys across version boundaries: start *one* instance at the new version and wait for it to become healthy before rolling the rest. . For Kubernetes: update the image tag in the Deployment. The default `RollingUpdate` strategy handles zero-downtime upgrades, provided the first new pod becomes Ready before the rollout continues. diff --git a/modules/ROOT/pages/tinymceai-on-premises-providers.adoc b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc index 6676c3b7e0..119f19d9d6 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-providers.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc @@ -153,7 +153,7 @@ docker run --init -d -p 8000:8000 \ -e REDIS_HOST='redis' \ -e STORAGE_DRIVER='database' \ -e PROVIDERS='{"openai":{"type":"openai","apiKeys":["sk-proj-YOUR_KEY_HERE"]}}' \ - registry.containers.tiny.cloud/ai-service:latest + registry.containers.tiny.cloud/ai-service-tiny:latest ---- *Verify:* @@ -699,7 +699,7 @@ On Linux, add the host gateway so `host.docker.internal` resolves: ---- services: ai-service: - image: registry.containers.tiny.cloud/ai-service:latest + image: registry.containers.tiny.cloud/ai-service-tiny:latest extra_hosts: - "host.docker.internal:host-gateway" ---- diff --git a/modules/ROOT/pages/tinymceai-on-premises.adoc b/modules/ROOT/pages/tinymceai-on-premises.adoc index 3deb6a7330..df6fe5bd11 100644 --- a/modules/ROOT/pages/tinymceai-on-premises.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises.adoc @@ -5,7 +5,7 @@ The TinyMCE AI on-premises service is a self-hosted back end that powers AI writing assistance. It can be used with the TinyMCE rich text editor, particularly the xref:tinymceai.adoc[TinyMCE AI plugin], or as a standalone service. It runs entirely within the host infrastructure. Document content, conversation history, file attachments, and user data never leave the network. -The service ships as a single Open Container Initiative (OCI) container image (`registry.containers.tiny.cloud/ai-service`). It exposes a REST API, a Management Panel, Server-Sent Events streaming, and an OpenAPI spec. +The service ships as a single Open Container Initiative (OCI) container image (`registry.containers.tiny.cloud/ai-service-tiny`). It exposes a REST API, a Management Panel, Server-Sent Events streaming, and an OpenAPI spec. == Architecture From 2e68692cee32cfd94353b7a50c23d91b4f8bb391 Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Tue, 19 May 2026 13:36:04 +1000 Subject: [PATCH 07/48] DOC-3498: Address review feedback from tiny-ben-tran and ArvinJ-H - Soften privacy claim to clarify LLM provider data handling - Reword data flow steps (JWT, prompt phrasing) - Clarify setup path section and topic guide introduction - Remove orphan diagrams (troubleshooting-fig-1, complete-guide-fig-9) - Remove "Must include" from plugins table, fix troubleshooting wording - Replace MySQL 8.4 references with "the latest MySQL" across all pages - Add provenance NOTE to performance characteristics - Consolidate production page diagrams (promote complete-guide-fig-1) - Improve overview and providers diagram layouts (LR, spacing) - Move provider examples out of collapsible block for visibility --- .../complete-guide-fig-9.mmd | 20 -------- .../complete-guide-fig-9.svg | 1 - .../tinymceai-on-premises/overview-fig-1.mmd | 11 +++-- .../tinymceai-on-premises/overview-fig-1.svg | 2 +- .../production-guide-fig-1.mmd | 30 ------------ .../production-guide-fig-1.svg | 1 - .../providers-guide-fig-1.mmd | 47 ++++++++++--------- .../providers-guide-fig-1.svg | 2 +- .../troubleshooting-fig-1.mmd | 20 -------- .../troubleshooting-fig-1.svg | 1 - .../pages/tinymceai-on-premises-database.adoc | 4 +- .../tinymceai-on-premises-frameworks.adoc | 4 +- ...tinymceai-on-premises-getting-started.adoc | 2 +- .../tinymceai-on-premises-production.adoc | 15 ++---- .../tinymceai-on-premises-providers.adoc | 12 ++--- ...tinymceai-on-premises-troubleshooting.adoc | 2 +- modules/ROOT/pages/tinymceai-on-premises.adoc | 12 ++--- 17 files changed, 57 insertions(+), 129 deletions(-) delete mode 100644 modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-9.mmd delete mode 100644 modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-9.svg delete mode 100644 modules/ROOT/images/tinymceai-on-premises/production-guide-fig-1.mmd delete mode 100644 modules/ROOT/images/tinymceai-on-premises/production-guide-fig-1.svg delete mode 100644 modules/ROOT/images/tinymceai-on-premises/troubleshooting-fig-1.mmd delete mode 100644 modules/ROOT/images/tinymceai-on-premises/troubleshooting-fig-1.svg diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-9.mmd b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-9.mmd deleted file mode 100644 index 5714d740be..0000000000 --- a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-9.mmd +++ /dev/null @@ -1,20 +0,0 @@ -flowchart TD - Start([Something is wrong]) --> Q1{Container is
running?
docker ps shows it} - Q1 -->|No - exited or wont pull| S1[Container startup failures] - Q1 -->|Yes| Q2{curl /health
returns 200?} - Q2 -->|No - times out or 5xx| S1 - Q2 -->|Yes| Q3{API call returns
auth error?} - Q3 -->|Yes - 401 allowed false
invalid-jwt-...| S2[API and JWT authentication] - Q3 -->|No| Q4{SSE stream
carries event error
from LLM?} - Q4 -->|Yes| S3[LLM provider errors] - Q4 -->|No| Q5{Editor side
broken?
no toolbar token 401
hanging stream} - Q5 -->|Yes| S4[Editor and front-end] - Q5 -->|No| Q6{Slow timing out
or failing under load?} - Q6 -->|Yes| S5[Performance and capacity] - Q6 -->|No| S6[Diagnostic recipes] - S1 --> Recipe([If none fit
see Diagnostic recipes
then escalate]) - S2 --> Recipe - S3 --> Recipe - S4 --> Recipe - S5 --> Recipe - S6 --> Recipe diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-9.svg b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-9.svg deleted file mode 100644 index 93e7f3ea46..0000000000 --- a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-9.svg +++ /dev/null @@ -1 +0,0 @@ -No - exited or wont pullYesNo - times out or 5xxYesYes - 401 allowed falseinvalid-jwt-...NoYesNoYesNoYesNoSomething is wrongContainer isrunning?docker ps shows itContainer startup failurescurl /healthreturns 200?API call returnsauth error?API and JWT authenticationSSE streamcarries event errorfrom LLM?LLM provider errorsEditor sidebroken?no toolbar token 401hanging streamEditor and front-endSlow timing outor failing under load?Performance and capacityDiagnostic recipesIf none fitsee Diagnostic recipesthen escalate \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/overview-fig-1.mmd b/modules/ROOT/images/tinymceai-on-premises/overview-fig-1.mmd index 5253cc358c..92152bd3f2 100644 --- a/modules/ROOT/images/tinymceai-on-premises/overview-fig-1.mmd +++ b/modules/ROOT/images/tinymceai-on-premises/overview-fig-1.mmd @@ -1,7 +1,10 @@ flowchart LR - Client["Client application"] -->|"1. fetch JWT"| Token["Token endpoint"] + Client["Client
application"] + + Client -->|"1. fetch JWT"| Token["Token
endpoint"] Client -->|"2. prompt + JWT"| AI["AI service
(container)"] - AI -->|"3. forward prompt"| LLM["LLM provider"] - LLM -->|"4. stream response"| AI - AI -->|"4. SSE stream"| Client + AI -->|"3. forward prompt"| LLM["LLM
provider"] AI --- DB[("Database
+ Redis")] + + LLM -.->|"4. stream response"| AI + AI -.->|"5. SSE stream"| Client diff --git a/modules/ROOT/images/tinymceai-on-premises/overview-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/overview-fig-1.svg index b02724c3b0..707b6d7e1a 100644 --- a/modules/ROOT/images/tinymceai-on-premises/overview-fig-1.svg +++ b/modules/ROOT/images/tinymceai-on-premises/overview-fig-1.svg @@ -1 +1 @@ -1. fetch JWT2. prompt + JWT3. forward prompt4. stream response4. SSE streamClient applicationToken endpointAI service(container)LLM providerDatabase+ Redis \ No newline at end of file +1. fetch JWT2. prompt + JWT3. forward prompt4. stream response5. SSE streamClientapplicationTokenendpointAI service(container)LLMproviderDatabase+ Redis \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/production-guide-fig-1.mmd b/modules/ROOT/images/tinymceai-on-premises/production-guide-fig-1.mmd deleted file mode 100644 index 8f5031c7b0..0000000000 --- a/modules/ROOT/images/tinymceai-on-premises/production-guide-fig-1.mmd +++ /dev/null @@ -1,30 +0,0 @@ -flowchart TB - Browser([TinyMCE in browser]) - Browser -->|HTTPS + JWT| LB[Reverse Proxy / Load Balancer
nginx · ALB · Ingress
TLS termination
proxy_buffering off] - LB -->|HTTP :8000| AI1[ai-service replica 1] - LB -->|HTTP :8000| AI2[ai-service replica 2] - LB -->|HTTP :8000| AIN[ai-service replica N] - - subgraph DataLayer["Shared data layer"] - DB[("MySQL 8.0+ /
Postgres 13+
Multi-AZ in prod")] - Cache[("Redis 7
cluster or managed")] - Storage[("S3 · Azure Blob ·
filesystem · DB")] - end - - AI1 --> DB - AI1 --> Cache - AI1 --> Storage - AI2 --> DB - AI2 --> Cache - AI2 --> Storage - AIN --> DB - AIN --> Cache - AIN --> Storage - - AI1 --> LLM[LLM Provider
OpenAI · Anthropic · Google ·
Azure · Bedrock · Vertex · self-hosted] - AI2 --> LLM - AIN --> LLM - - AI1 -.->|optional| Obs[OpenTelemetry · Langfuse ·
log aggregator] - AI2 -.-> Obs - AIN -.-> Obs diff --git a/modules/ROOT/images/tinymceai-on-premises/production-guide-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/production-guide-fig-1.svg deleted file mode 100644 index beb42f95a2..0000000000 --- a/modules/ROOT/images/tinymceai-on-premises/production-guide-fig-1.svg +++ /dev/null @@ -1 +0,0 @@ -Shared data layerHTTPS + JWTHTTP :8000HTTP :8000HTTP :8000optionalTinyMCE in browserReverse Proxy / LoadBalancernginx · ALB · IngressTLS terminationproxy_buffering offai-service replica 1ai-service replica 2ai-service replica NMySQL 8.0+ /Postgres 13+Multi-AZ in prodRedis 7cluster or managedS3 · Azure Blob ·filesystem · DBLLM ProviderOpenAI · Anthropic ·Google ·Azure · Bedrock · Vertex ·self-hostedOpenTelemetry · Langfuse ·log aggregator \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.mmd b/modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.mmd index 2086e9cf30..9586bf0667 100644 --- a/modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.mmd +++ b/modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.mmd @@ -1,25 +1,30 @@ flowchart LR - subgraph PR[PROVIDERS env var · JSON object] - P1["my-openai-key
type: openai
apiKeys: [sk-...]"] - P2["my-bedrock
type: bedrock
credentials: {...}"] - P3["my-ollama
type: openai-compatible
baseUrl: .../v1"] + subgraph JWT["JWT auth.ai.permissions"] + K1["ai:models:my-openai-key:
gpt-4.1"] + K2["ai:models:my-bedrock:
us.anthropic.claude-sonnet-4-..."] + K3["ai:models:my-ollama:
qwen3:0.6b"] end - subgraph MD[MODELS env var · JSON array] - M1["id: gpt-4.1
provider: my-openai-key
features: [...]"] - M2["id: us.anthropic.claude-sonnet-4-...
provider: my-bedrock
features: [...]"] - M3["id: qwen3:0.6b
provider: my-ollama
features: [...]"] + + subgraph MD["MODELS env var · JSON array"] + M1["gpt-4.1
provider: my-openai-key"] + M2["us.anthropic.claude-sonnet-4-...
provider: my-bedrock"] + M3["qwen3:0.6b
provider: my-ollama"] end - subgraph JWT[JWT auth.ai.permissions] - K1["ai:models:my-openai-key:gpt-4.1"] - K2["ai:models:my-bedrock:us.anthropic.claude-sonnet-4-..."] - K3["ai:models:my-ollama:qwen3:0.6b"] + + subgraph PR["PROVIDERS env var · JSON object"] + P1["my-openai-key
type: openai"] + P2["my-bedrock
type: bedrock"] + P3["my-ollama
type: openai-compatible"] end - M1 -.references provider key.-> P1 - M2 -.references provider key.-> P2 - M3 -.references provider key.-> P3 - K1 -.gates per-user access.-> M1 - K2 -.gates per-user access.-> M2 - K3 -.gates per-user access.-> M3 - M1 ==>|forwarded to upstream| LLM1[OpenAI API] - M2 ==>|forwarded to upstream| LLM2[AWS Bedrock] - M3 ==>|forwarded to upstream| LLM3[Local Ollama] + + K1 -->|"gates access"| M1 + K2 -->|"gates access"| M2 + K3 -->|"gates access"| M3 + + M1 -->|"provider key"| P1 + M2 -->|"provider key"| P2 + M3 -->|"provider key"| P3 + + P1 ==> LLM1["OpenAI API"] + P2 ==> LLM2["AWS Bedrock"] + P3 ==> LLM3["Local Ollama"] diff --git a/modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.svg index a5f55d50a3..d00943b640 100644 --- a/modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.svg +++ b/modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.svg @@ -1 +1 @@ -JWT auth.ai.permissionsMODELS env var · JSON arrayPROVIDERS env var · JSON objectreferences provider keyreferences provider keyreferences provider keygates per-user accessgates per-user accessgates per-user accessforwarded to upstreamforwarded to upstreamforwarded to upstreammy-openai-keytype: openaiapiKeys: [sk-...]my-bedrocktype: bedrockcredentials: {...}my-ollamatype: openai-compatiblebaseUrl: .../v1id: gpt-4.1provider: my-openai-keyfeatures: [...]id:us.anthropic.claude-sonnet-4-...provider: my-bedrockfeatures: [...]id: qwen3:0.6bprovider: my-ollamafeatures: [...]ai:models:my-openai-key:gpt-4.1ai:models:my-bedrock:us.anthropic.claude-sonnet-4-...ai:models:my-ollama:qwen3:0.6bOpenAI APIAWS BedrockLocal Ollama \ No newline at end of file +PROVIDERS env var · JSON objectMODELS env var · JSON arrayJWT auth.ai.permissionsgates accessgates accessgates accessprovider keyprovider keyprovider keyai:models:my-openai-key:gpt-4.1ai:models:my-bedrock:us.anthropic.claude-sonnet-4-...ai:models:my-ollama:qwen3:0.6bgpt-4.1provider: my-openai-keyus.anthropic.claude-sonnet-4-...provider: my-bedrockqwen3:0.6bprovider: my-ollamamy-openai-keytype: openaimy-bedrocktype: bedrockmy-ollamatype: openai-compatibleOpenAI APIAWS BedrockLocal Ollama \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/troubleshooting-fig-1.mmd b/modules/ROOT/images/tinymceai-on-premises/troubleshooting-fig-1.mmd deleted file mode 100644 index e866425fd9..0000000000 --- a/modules/ROOT/images/tinymceai-on-premises/troubleshooting-fig-1.mmd +++ /dev/null @@ -1,20 +0,0 @@ -flowchart TD - Start([Something is wrong]) --> Q1{Container is
running?
docker ps shows it} - Q1 -->|No - exited or won't pull| S1[Container startup failures] - Q1 -->|Yes| Q2{curl /health
returns 200?} - Q2 -->|No - times out or 5xx| S1 - Q2 -->|Yes| Q3{API call returns
auth error?} - Q3 -->|Yes - 401, allowed:false,
invalid-jwt-*| S2[API and JWT authentication] - Q3 -->|No| Q4{SSE stream
carries event: error
from LLM?} - Q4 -->|Yes| S3[LLM provider errors] - Q4 -->|No| Q5{Editor side
broken?
no toolbar, token 401,
hanging stream} - Q5 -->|Yes| S4[Editor and front-end] - Q5 -->|No| Q6{Slow, timing out,
or failing under load?} - Q6 -->|Yes| S5[Performance and capacity] - Q6 -->|No| S6[Production and scaling] - S1 --> Recipe([If none fit:
see Diagnostic recipes
then escalate to
support@tiny.cloud]) - S2 --> Recipe - S3 --> Recipe - S4 --> Recipe - S5 --> Recipe - S6 --> Recipe diff --git a/modules/ROOT/images/tinymceai-on-premises/troubleshooting-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/troubleshooting-fig-1.svg deleted file mode 100644 index 9c5fb14e3d..0000000000 --- a/modules/ROOT/images/tinymceai-on-premises/troubleshooting-fig-1.svg +++ /dev/null @@ -1 +0,0 @@ -No - exited or won't pullYesNo - times out or 5xxYesYes - 401, allowed:false,invalid-jwt-*NoYesNoYesNoYesNoSomething is wrongContainer isrunning?docker ps shows itContainer startup failurescurl /healthreturns 200?API call returnsauth error?API and JWT authenticationSSE streamcarries event: errorfrom LLM?LLM provider errorsEditor sidebroken?no toolbar, token 401,hanging streamEditor and front-endSlow, timing out,or failing under load?Performance and capacityProduction and scalingIf none fit:see Diagnostic recipesthen escalate tosupport@tiny.cloud \ No newline at end of file diff --git a/modules/ROOT/pages/tinymceai-on-premises-database.adoc b/modules/ROOT/pages/tinymceai-on-premises-database.adoc index 2018e23069..60a77f2df2 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-database.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-database.adoc @@ -88,14 +88,14 @@ NOTE: MySQL does not have this issue. The database itself is the namespace, set [[mysql-version-pinning]] == MySQL version pinning -Do *not* use `mysql:8`. That tag now floats to MySQL 8.4, which removes the `default-authentication-plugin=mysql_native_password` startup flag the AI service relies on. The container crashloops with: +Do *not* use `mysql:8`. That tag now floats to the latest MySQL, which removes the `default-authentication-plugin=mysql_native_password` startup flag the AI service relies on. The container crashloops with: .... [ERROR] [MY-000067] [Server] unknown variable 'default-authentication-plugin=mysql_native_password'. [ERROR] [MY-010119] [Server] Aborting .... -Pin to `mysql:8.0` in every manifest: `docker run`, Docker Compose, Kubernetes, Helm, ECS. Running MySQL 8.4 with workarounds (removing the flag and switching to `caching_sha2_password`) is not a supported configuration. +Pin to `mysql:8.0` in every manifest: `docker run`, Docker Compose, Kubernetes, Helm, ECS. Running newer MySQL versions with workarounds (removing the flag and switching to `caching_sha2_password`) is not a supported configuration. TIP: The same principle applies to PostgreSQL. Pin `postgres:16` rather than `postgres:latest`. diff --git a/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc b/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc index 1511d02dea..3f0899be75 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc @@ -28,7 +28,7 @@ The on-premises AI integration adds the options documented below to the standard |Option |Description |`plugins` -|Must include `tinymceai`. +|`tinymceai`. |`toolbar` |Include one or more of `tinymceai-chat`, `tinymceai-review`, `tinymceai-quickactions`. @@ -190,7 +190,7 @@ If using the Tiny CDN instead of self-hosted assets, also add `\https://cdn.tiny |Editor loads but no AI buttons appear |`plugins` does not include `tinymceai`, or TinyMCE is version 7.x or earlier -|Set `plugins: 'tinymceai'` and confirm the script URL uses `/tinymce/8/`. Verify the API key has the AI feature enabled. +|Add `tinymceai` to the `plugins` list and confirm the script URL uses `/tinymce/8/`. Verify the API key has the AI feature enabled. |`POST /api/ai-token` returns 401 |The token endpoint rejects the fetch diff --git a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc index ced7424e3e..b790b7c3b9 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc @@ -80,7 +80,7 @@ volumes: mysql_data: ---- -TIP: Pin `mysql:8.0`, not `mysql:8`. The `:8` tag points to MySQL 8.4, which is incompatible with the AI service. See xref:tinymceai-on-premises-database.adoc#mysql-version-pinning[MySQL version pinning] for details. +TIP: Pin `mysql:8.0`, not `mysql:8`. The `:8` tag points to the latest MySQL, which is incompatible with the AI service. See xref:tinymceai-on-premises-database.adoc#mysql-version-pinning[MySQL version pinning] for details. PostgreSQL is equally supported. See xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage] for an equivalent compose file. Review the xref:tinymceai-on-premises-database.adoc#postgresql-schema-prerequisite[PostgreSQL schema prerequisite] before switching. diff --git a/modules/ROOT/pages/tinymceai-on-premises-production.adoc b/modules/ROOT/pages/tinymceai-on-premises-production.adoc index d01abe31e4..ecf62d0370 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-production.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-production.adoc @@ -9,17 +9,10 @@ == Architecture overview [.text-center] -image::tinymceai-on-premises/production-guide-fig-1.svg[alt="Production deployment topology with reverse proxy AI service replicas database and Redis behind TLS",width=100%] +image::tinymceai-on-premises/complete-guide-fig-1.svg[alt="Enterprise architecture showing browser with TinyMCE token endpoint AI service replicas database Redis LLM providers and observability",width=100%] The AI service is stateless, persists all state to MySQL/PostgreSQL and Redis, and scales horizontally behind a load balancer. -.Detailed enterprise deployment topology -[%collapsible] -==== -[.text-center] -image::tinymceai-on-premises/complete-guide-fig-1.svg[alt="Enterprise architecture showing browser with TinyMCE token endpoint multiple AI service replicas database Redis LLM providers and observability",width=100%] -==== - == TLS / HTTPS @@ -123,7 +116,7 @@ podman run --init -d --pod ai-pod --name ai-service \ registry.containers.tiny.cloud/ai-service-tiny:latest ---- -IMPORTANT: Pin to `mysql:8.0`. The `mysql:8` tag floats to MySQL 8.4, which removes the `default-authentication-plugin` flag and causes a crash loop. See xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage] for details. +IMPORTANT: Pin to `mysql:8.0`. The `mysql:8` tag floats to the latest MySQL, which removes the `default-authentication-plugin` flag and causes a crash loop. See xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage] for details. == Kubernetes deployment @@ -606,6 +599,8 @@ License keys are per-deployment, not per-replica. One key covers any number of r == Performance characteristics +NOTE: The following values are approximate baselines observed during internal testing. Actual performance depends on hardware, network conditions, LLM provider latency, and prompt complexity. + [cols="1,1",options="header"] |=== |Metric |Typical value @@ -629,7 +624,7 @@ License keys are per-deployment, not per-replica. One key covers any number of r |1,000{plus} per instance |=== -These values are approximate and vary with hardware, provider latency, and prompt complexity. The LLM provider's rate limits are typically the binding constraint before the AI service becomes one. +The LLM provider's rate limits are typically the binding constraint before the AI service becomes one. == Sizing guide diff --git a/modules/ROOT/pages/tinymceai-on-premises-providers.adoc b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc index 119f19d9d6..4960833625 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-providers.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc @@ -725,11 +725,10 @@ PARAMETER stop "<|im_start|>" ---- The exact template depends on the base model. Check the model card for the recommended chat template. Verify tool support with `ollama show ` before connecting to the AI service. -==== The reasoning toggle (`capabilities.reasoning: true`) is cosmetic for Ollama-backed models; the openai-compatible adapter does not translate it to the native Ollama API. -*Timeout:* +=== Timeout Large self-hosted models on consumer hardware can exceed the default 180-second timeout. Override with: @@ -738,7 +737,7 @@ Large self-hosted models on consumer hardware can exceed the default 180-second -e LLM_TIMEOUT_MS='600000' ---- -*Example -- Ollama:* +=== Example -- Ollama [source,bash] ---- @@ -762,7 +761,7 @@ Large self-hosted models on consumer hardware can exceed the default 180-second -e LLM_TIMEOUT_MS='600000' ---- -*Example -- vLLM:* +=== Example -- vLLM [source,bash] ---- @@ -785,7 +784,7 @@ Large self-hosted models on consumer hardware can exceed the default 180-second ]' ---- -*Example -- LM Studio:* +=== Example -- LM Studio [source,bash] ---- @@ -807,7 +806,7 @@ Large self-hosted models on consumer hardware can exceed the default 180-second ]' ---- -*Verify:* +=== Verify [source,bash] ---- @@ -821,7 +820,6 @@ curl -s http://host.docker.internal:11434/v1/chat/completions \ ---- When the `curl` call returns a chat completion, the AI service can use the same endpoint. -==== diff --git a/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc b/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc index cd1ce6ffc3..78f405aab0 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc @@ -46,7 +46,7 @@ Run `docker logs ai-service` first. All entries below assume the log output is a |Run `CREATE SCHEMA "cs-on-premises";` (double quotes required), or set `DATABASE_SCHEMA=public`. See xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage]. |`[MY-000067] unknown variable 'default-authentication-plugin'` -|`mysql:8` tag now points to MySQL 8.4, which removed that variable +|`mysql:8` tag now points to the latest MySQL, which removed that variable |Pin `mysql:8.0` in the compose file and run `docker compose up -d --force-recreate mysql`. |Container exits with no useful log diff --git a/modules/ROOT/pages/tinymceai-on-premises.adoc b/modules/ROOT/pages/tinymceai-on-premises.adoc index df6fe5bd11..a5db854d49 100644 --- a/modules/ROOT/pages/tinymceai-on-premises.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises.adoc @@ -3,7 +3,7 @@ :description: Self-hosted AI writing assistance for TinyMCE; architecture, prerequisites, and topic guide :keywords: AI, on-premises, self-hosted, deployment, overview -The TinyMCE AI on-premises service is a self-hosted back end that powers AI writing assistance. It can be used with the TinyMCE rich text editor, particularly the xref:tinymceai.adoc[TinyMCE AI plugin], or as a standalone service. It runs entirely within the host infrastructure. Document content, conversation history, file attachments, and user data never leave the network. +The TinyMCE AI on-premises service is a self-hosted back end that powers AI writing assistance. It can be used with the TinyMCE rich text editor, particularly the xref:tinymceai.adoc[TinyMCE AI plugin], or as a standalone service. It runs entirely within the host infrastructure. Document content, conversation history, file attachments, and user data stay within the host network and are not stored by Tiny. Data sent to a configured LLM provider is subject to that provider's data handling policies. The service ships as a single Open Container Initiative (OCI) container image (`registry.containers.tiny.cloud/ai-service-tiny`). It exposes a REST API, a Management Panel, Server-Sent Events streaming, and an OpenAPI spec. @@ -15,12 +15,12 @@ image::tinymceai-on-premises/overview-fig-1.svg[alt="High-level architecture sho Data flow for a single AI request: [arabic] -. The client application requests a signed token from the token endpoint. -. The client sends the JWT and prompt to the AI service over HTTPS. +. The client application requests a JWT from the token endpoint. +. The client sends a prompt with the JWT to the AI service over HTTPS. . The AI service verifies the token, checks per-feature permissions, and forwards the prompt to the configured large language model (LLM). . The LLM streams its response back to the client through Server-Sent Events (SSE). -When used with TinyMCE, the `tinymceai` plugin handles steps 1, 2, and 4 automatically through the `tinymceai_token_provider` callback. +When used with TinyMCE `tinymceai`, the plugin handles steps 1, 2, and 4 automatically through the `tinymceai_token_provider` callback. The shared secret (API Secret) never leaves the back end; the editor and the AI service only ever see signed tokens. @@ -100,11 +100,11 @@ The shared secret (API Secret) never leaves the back end; the editor and the AI [.text-center] image::tinymceai-on-premises/complete-guide-fig-2.svg[Setup path decision tree,width=100%] -Each path carries the same level of documentation. After identifying which path fits the operational model, complete its topic guides in the order listed. +All setup paths lead to the same set of topic guides listed below. The decision tree helps identify which guides to prioritize based on the deployment target. == Topic guides -For a first-time deployment, progress through the guides in order. Each topic guide also stands alone when only one area applies. +For a first-time deployment, progress through the guides in order. Each guide can also be used independently as a reference for a specific topic. [cols="1,3",options="header"] |=== From 84caab23fcde7984726faec39258d8739da15daa Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Tue, 19 May 2026 13:38:45 +1000 Subject: [PATCH 08/48] DOC-3498: Remove internal testing reference from performance note --- modules/ROOT/pages/tinymceai-on-premises-production.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ROOT/pages/tinymceai-on-premises-production.adoc b/modules/ROOT/pages/tinymceai-on-premises-production.adoc index ecf62d0370..4368d4735d 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-production.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-production.adoc @@ -599,7 +599,7 @@ License keys are per-deployment, not per-replica. One key covers any number of r == Performance characteristics -NOTE: The following values are approximate baselines observed during internal testing. Actual performance depends on hardware, network conditions, LLM provider latency, and prompt complexity. +NOTE: The following values are approximate baselines. Actual performance depends on hardware, network conditions, LLM provider latency, and prompt complexity. [cols="1,1",options="header"] |=== From 21c8452239e2dbb61c240d0f88cfa0b69dce2d3d Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Tue, 19 May 2026 14:21:46 +1000 Subject: [PATCH 09/48] DOC-3498: Remove unverified performance and sizing sections Remove Performance characteristics and Sizing guide from the production page until engineering provides verified data. --- .../tinymceai-on-premises-production.adoc | 49 ------------------- modules/ROOT/pages/tinymceai-on-premises.adoc | 2 +- 2 files changed, 1 insertion(+), 50 deletions(-) diff --git a/modules/ROOT/pages/tinymceai-on-premises-production.adoc b/modules/ROOT/pages/tinymceai-on-premises-production.adoc index 4368d4735d..984c25cbf8 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-production.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-production.adoc @@ -595,52 +595,3 @@ Review the release notes for the target version and take a database backup befor License keys are per-deployment, not per-replica. One key covers any number of replicas of a single deployment. - - -== Performance characteristics - -NOTE: The following values are approximate baselines. Actual performance depends on hardware, network conditions, LLM provider latency, and prompt complexity. - -[cols="1,1",options="header"] -|=== -|Metric |Typical value - -|Cold start -|Approximately 3 seconds - -|Health check response -|Less than 10 ms - -|Token validation -|Less than 5 ms - -|Time to first token (LLM) -|200 ms to 2 s (depends on provider and model) - -|Memory per instance -|256 to 512 MB - -|Concurrent connections -|1,000{plus} per instance -|=== - -The LLM provider's rate limits are typically the binding constraint before the AI service becomes one. - -== Sizing guide - -[cols=",,,,",options="header",] -|=== -|Users |AI service replicas |Database |Redis |Notes -|1 to 50 |1 |db.t3.small (or 2 vCPU / 4 GB self-managed) |cache.t3.micro |Development and small teams -|50 to 500 |2 |db.r6g.large |cache.r6g.large |Small production -|500 to 5,000 |3 to 5 |db.r6g.xlarge (Multi-AZ) |cache.r6g.xlarge (cluster) |Medium production -|5,000{plus} |5{plus} (Horizontal Pod Autoscaler (HPA)) |db.r6g.2xlarge{plus} |cache.r6g.2xlarge{plus} |Large production; contact Tiny for guidance -|=== - -Starting point for self-managed deployments: - -* AI service instance: 2 vCPU / 4 GB RAM -* Database instance: 2 vCPU / 8 GB RAM -* Redis instance: 1 vCPU / 2 GB RAM - -Scale based on user count, average prompt size, and concurrent streaming connections. The LLM provider's rate limits are usually the binding constraint long before the AI service or database becomes one. diff --git a/modules/ROOT/pages/tinymceai-on-premises.adoc b/modules/ROOT/pages/tinymceai-on-premises.adoc index a5db854d49..1507c3ca04 100644 --- a/modules/ROOT/pages/tinymceai-on-premises.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises.adoc @@ -126,7 +126,7 @@ For a first-time deployment, progress through the guides in order. Each guide ca |Editor-side configuration: plugin options, token provider, authentication patterns, Cross-Origin Resource Sharing (CORS), and deployment checklists. |xref:tinymceai-on-premises-production.adoc[Production deployment] -|Kubernetes manifests, AWS ECS task definitions, horizontal scaling, sizing, security hardening, rate limiting, observability, backup and recovery, and upgrades. +|Kubernetes manifests, AWS ECS task definitions, horizontal scaling, security hardening, rate limiting, observability, backup and recovery, and upgrades. |xref:tinymceai-on-premises-advanced.adoc[Advanced scenarios] |MCP server integration, web scraping and search, multi-tenant patterns, custom models with guardrails, and AI-powered document pipelines. From cf8b90f4005d69cc76a51b08fcbc1bdfff5de570 Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Tue, 19 May 2026 14:52:04 +1000 Subject: [PATCH 10/48] DOC-3498: Remove redundant [arabic] list style attributes --- modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc | 1 - modules/ROOT/pages/tinymceai-on-premises-providers.adoc | 3 --- modules/ROOT/pages/tinymceai-on-premises.adoc | 1 - 3 files changed, 5 deletions(-) diff --git a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc index b790b7c3b9..a9461d5a7f 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc @@ -215,7 +215,6 @@ If the container exits immediately, run `docker logs ai-service`. The most commo The AI service isolates users into Environments. Each environment has its own access keys. -[arabic] . Open the Management Panel: *http://localhost:8000/panel/* . Sign in using the `MANAGEMENT_SECRET` from `.env`. . Click *Create Environment* and give it a name (for example "Development"). diff --git a/modules/ROOT/pages/tinymceai-on-premises-providers.adoc b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc index 4960833625..97b5538653 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-providers.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc @@ -291,7 +291,6 @@ Azure-hosted OpenAI models. Requires an Azure subscription, an Azure OpenAI reso ==== *Prerequisites in the Azure portal:* -[arabic] . Create an Azure OpenAI resource. Note the *resource name*; this is the subdomain prefix in `https://.openai.azure.com`. . Apply for model access if required by the region. . In Azure AI Studio, create a *deployment* for each model to expose. The deployment name is arbitrary (for example `prod-gpt4o`, `cheap-mini`). @@ -401,7 +400,6 @@ The `sessionToken` field is optional but required for STS-issued short-lived cre *Prerequisites checklist:* -[arabic] . *Enable model access.* Bedrock console -> *Model access* -> Manage model access. Each model must be approved per-region. . *Subscribe through AWS Marketplace* for non-Amazon models. Anthropic Claude on Bedrock requires a one-time Marketplace subscription. . *Create an IAM user or role* with the permissions below. @@ -959,7 +957,6 @@ A model with no `features` entry, or with only sub-features the editor does not The `apiKeys` field on every provider type that has one is an *array*. The service treats all entries as valid for incoming requests, which allows rotating keys with zero downtime: -[arabic] . Append the new key to the array: + [source,json] diff --git a/modules/ROOT/pages/tinymceai-on-premises.adoc b/modules/ROOT/pages/tinymceai-on-premises.adoc index 1507c3ca04..023ece9242 100644 --- a/modules/ROOT/pages/tinymceai-on-premises.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises.adoc @@ -14,7 +14,6 @@ image::tinymceai-on-premises/overview-fig-1.svg[alt="High-level architecture sho Data flow for a single AI request: -[arabic] . The client application requests a JWT from the token endpoint. . The client sends a prompt with the JWT to the AI service over HTTPS. . The AI service verifies the token, checks per-feature permissions, and forwards the prompt to the configured large language model (LLM). From 42fb21f5dd2d8957991d616e07e6742047810daa Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Tue, 19 May 2026 21:57:05 +1000 Subject: [PATCH 11/48] DOC-3498: Address evaluation findings from on-prem setup testing Fix Redis Sentinel contradiction, add terminationGracePeriodSeconds and PDB to K8s manifest, add S3 credentials and topology spread, bootstrap step after Service manifest, HPA I/O-bound caveat, managed database TLS section, Docker network resolution for Compose v2, MODELS requirement clarification, and assorted cross-links and callouts identified during the independent evaluation audit. --- .../pages/tinymceai-on-premises-database.adoc | 40 ++++++++++ ...tinymceai-on-premises-getting-started.adoc | 22 ++++-- .../tinymceai-on-premises-production.adoc | 74 ++++++++++++++++++- .../tinymceai-on-premises-providers.adoc | 8 +- .../tinymceai-on-premises-reference.adoc | 4 +- ...tinymceai-on-premises-troubleshooting.adoc | 1 + modules/ROOT/pages/tinymceai-on-premises.adoc | 2 +- 7 files changed, 134 insertions(+), 17 deletions(-) diff --git a/modules/ROOT/pages/tinymceai-on-premises-database.adoc b/modules/ROOT/pages/tinymceai-on-premises-database.adoc index 60a77f2df2..90e7d5c00b 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-database.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-database.adoc @@ -407,6 +407,37 @@ The AI service handles schema migration automatically. The pre-steps are: For production, enable Multi-AZ (or the equivalent zonal redundancy) and automated backups. +=== Managed database TLS + +Managed PostgreSQL services default to requiring TLS connections: + +* *AWS RDS:* `rds.force_ssl=1` (default for new instances) +* *Azure Database for PostgreSQL Flexible Server:* `require_secure_transport=ON` (default) +* *Google Cloud SQL:* TLS required unless explicitly disabled + +Without TLS configuration, the AI service connection fails with a generic error (commonly reported as a "permissions" issue). Configure `DATABASE_SSL_CA` with the provider's CA certificate bundle: + +[source,bash] +---- +# AWS RDS +DATABASE_SSL_CA=/certs/rds-combined-ca-bundle.pem + +# Azure Database for PostgreSQL +DATABASE_SSL_CA=/certs/DigiCertGlobalRootG2.crt.pem + +# Google Cloud SQL (when not using Cloud SQL Auth Proxy) +DATABASE_SSL_CA=/certs/server-ca.pem +---- + +Mount the certificate file into the container and reference the path in `DATABASE_SSL_CA`. Download the CA bundle from the cloud provider documentation. + +NOTE: `DATABASE_SSL_CERT` and `DATABASE_SSL_KEY` are required only for mutual TLS (mTLS). Most managed database services require only the CA certificate for server verification. + +[WARNING] +-- +If the managed database requires TLS and `DATABASE_SSL_CA` is not set, the AI service logs a connection error that does not mention TLS. Verify the database's TLS setting first when troubleshooting connection failures on managed services. +-- + [[host-docker-internal]] === Connecting to a host-local database from Docker @@ -564,6 +595,15 @@ STORAGE_BUCKET=BUCKET_NAME STORAGE_ENDPOINT=https://custom-s3-endpoint # optional, for S3-compatible ---- +The S3 access key requires the following minimum IAM permissions on the target bucket: + +* `s3:GetObject` +* `s3:PutObject` +* `s3:DeleteObject` +* `s3:ListBucket` + +For production, enable bucket versioning and server-side encryption (SSE-S3 or SSE-KMS). + NOTE: The correct variable names are `STORAGE_BUCKET` and `STORAGE_REGION`, not `STORAGE_S3_BUCKET` or `STORAGE_S3_REGION`. === Azure Blob diff --git a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc index a9461d5a7f..e2c6b2e2f7 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc @@ -116,7 +116,7 @@ OPENAI_API_KEY=sk-proj-PASTE_OPENAI_KEY_HERE # ANTHROPIC_API_KEY=sk-ant-PASTE_ANTHROPIC_KEY_HERE # GOOGLE_API_KEY=AIza-PASTE_GOOGLE_KEY_HERE -# --- Filled in after creating an environment (leave blank for now) --- +# --- Filled in after creating an environment (Step 7). Used by the token server, not the AI service. --- AI_ENV_ID= AI_API_SECRET= ---- @@ -141,6 +141,8 @@ Both containers should report `healthy` in the STATUS column. If MySQL still sho === Launch the AI service +The AI service runs as a standalone container outside of the Docker Compose stack. This separation allows upgrading or reconfiguring the AI service without restarting the database and Redis. + Run from the same folder as the `.env` file: .Full launch script @@ -165,9 +167,10 @@ fi PROVIDERS+='}' # Resolve the compose network name (varies across Docker versions and folder names) -NETWORK=$(docker network ls --format '{{.Name}}' | grep "^$(basename "$PWD" | tr '[:upper:]' '[:lower:]')_default$" | head -1) +NETWORK=$(docker network ls --format '{{.Name}}' | grep -E "^$(basename "$PWD" | tr '[:upper:]' '[:lower:]')[_-]default$" | head -1) if [ -z "$NETWORK" ]; then - NETWORK="$(basename "$PWD" | tr '[:upper:]' '[:lower:]')_default" + echo "ERROR: Could not find the Docker Compose network. Run 'docker network ls' and pass the network name with --network=." + exit 1 fi docker run --init -d -p 8000:8000 \ @@ -188,7 +191,7 @@ docker run --init -d -p 8000:8000 \ ---- ==== -For Podman, replace `docker run` with `podman run` and use a Podman pod instead of a compose network. See xref:tinymceai-on-premises-production.adoc[Production deployment] for Podman-specific guidance. +For Podman, replace `docker run` with `podman run` and use a Podman pod instead of a compose network. See xref:tinymceai-on-premises-production.adoc[Production deployment] for Podman-specific guidance. See xref:tinymceai-on-premises-production.adoc#_podman_deployment[Podman deployment] for a full example. For native databases (the database runs on the host or in a managed service rather than in Docker), drop the `--network` flag and set `DATABASE_HOST=host.docker.internal` (Docker Desktop and Podman 4{plus}). On native Linux Docker, additionally pass `--add-host=host.docker.internal:host-gateway`. @@ -396,6 +399,8 @@ curl -s -X POST http://localhost:8000/v1/conversations \ -H "Content-Type: application/json" \ -d '{"id":"verify-1","title":"Verification"}' +NOTE: The command below uses the built-in `agent-1` model. If `MODELS` has been explicitly configured, replace `agent-1` with the `id` of one of the configured models. See xref:tinymceai-on-premises-providers.adoc#models-required[Defining the model list]. + curl -s -N -X POST http://localhost:8000/v1/conversations/verify-1/messages \ -H "Authorization: Bearer $TOKEN" \ -H "Content-Type: application/json" \ @@ -428,13 +433,16 @@ A successful round-trip confirms: container health, database connectivity, Redis == Updating configuration -IMPORTANT: `docker compose restart` after `.env` changes silently keeps the old environment values. The restart preserves the container and does not re-read `.env`. Always use `docker compose up -d --force-recreate` instead. +IMPORTANT: After changing `.env` values, containers must be recreated to pick up new environment variables. A simple restart (`docker restart` or `docker compose restart`) preserves the old values. [source,bash] ---- +# Recreate the data layer (MySQL, Redis): docker compose up -d --force-recreate -# Or recreate only the AI service: -docker compose up -d --force-recreate ai-service + +# Recreate the standalone AI service: +docker stop ai-service && docker rm ai-service +# Then re-run the launch script from Step 5. ---- For Kubernetes, update the Secret and trigger a rollout restart: diff --git a/modules/ROOT/pages/tinymceai-on-premises-production.adoc b/modules/ROOT/pages/tinymceai-on-premises-production.adoc index 984c25cbf8..77e2b4a2b0 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-production.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-production.adoc @@ -62,7 +62,7 @@ Server-Sent Events (SSE) streaming requires `proxy_buffering off`. Without it, A == Horizontal scaling -The AI service is stateless. All persistent state lives in the SQL database, Redis, and the file-storage back end. Any number of replicas can run behind a load balancer. All replicas must share identical environment variable configuration. +The AI service is stateless. All persistent state lives in the SQL database, Redis, and the file-storage back end. Any number of replicas can run behind a load balancer. All replicas must share identical environment variable configuration. On first boot or after an image upgrade, start a single replica and wait for it to become healthy before scaling up (see <>). === Scaling considerations @@ -71,7 +71,7 @@ The AI service is stateless. All persistent state lives in the SQL database, Red |Component |Scaling approach |AI service |Add more containers (stateless) |MySQL / PostgreSQL |Read replicas or managed DB (RDS, Cloud SQL, Azure Database) -|Redis |Redis Cluster or Sentinel; managed Redis (ElastiCache, Memorystore, Azure Cache) +|Redis |Redis Cluster or managed Redis with built-in replication (ElastiCache, Memorystore, Azure Cache). Redis Sentinel is not supported. |File storage |S3 / Azure Blob recommended for production. The `database` storage driver is intended for development only. |=== @@ -181,6 +181,14 @@ spec: labels: app: ai-service spec: + terminationGracePeriodSeconds: 120 + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: topology.kubernetes.io/zone + whenUnsatisfiable: DoNotSchedule + labelSelector: + matchLabels: + app: ai-service imagePullSecrets: - name: tiny-registry containers: @@ -230,6 +238,16 @@ spec: value: "us-east-1" - name: STORAGE_BUCKET value: "example-ai-storage-bucket" + - name: STORAGE_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: ai-service-secrets + key: storage-access-key + - name: STORAGE_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: ai-service-secrets + key: storage-secret-key - name: ENABLE_METRIC_LOGS value: "true" readinessProbe: @@ -254,6 +272,8 @@ spec: ---- ==== +TIP: For PostgreSQL, change `DATABASE_DRIVER` to `"postgres"`, update `DATABASE_HOST` to the PostgreSQL endpoint, and ensure the `cs-on-premises` schema exists or set `DATABASE_SCHEMA=public`. See xref:tinymceai-on-premises-database.adoc#postgresql-schema-prerequisite[PostgreSQL schema prerequisite]. + === Service [source,yaml] @@ -271,6 +291,22 @@ spec: targetPort: 8000 ---- +=== Bootstrap the environment + +After the first pod reaches Ready status, create an environment and access key through the Management Panel: + +. Access the Management Panel at `\https:///panel/`. +. Sign in using the `ENVIRONMENTS_MANAGEMENT_SECRET_KEY`. +. Create an environment and note the Environment ID. +. Create an access key and copy the API Secret immediately (shown only once). + +These values are required by the token endpoint. See xref:tinymceai-on-premises-getting-started.adoc#_create_an_environment_and_access_key[Getting started — Create an environment and access key] for details. + +[IMPORTANT] +-- +Always create environments through the Management Panel UI. Environments created through the raw management API are not fully registered and cause `invalid-jwt-payload` errors. +-- + === Ingress [source,yaml] @@ -327,6 +363,24 @@ spec: averageUtilization: 70 ---- +NOTE: The AI service is I/O-bound (waiting on upstream LLM responses). CPU-based autoscaling is a safe starting point but may not trigger under high concurrency if CPU remains low. For production, consider supplementing with custom metrics (concurrent SSE streams, request queue depth) through KEDA or the Prometheus Adapter. + +=== Pod disruption budget + +[source,yaml] +---- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: ai-service-pdb + namespace: tinymce-ai +spec: + minAvailable: 1 + selector: + matchLabels: + app: ai-service +---- + @@ -378,13 +432,15 @@ spec: ---- ==== +NOTE: The AI service does not use ECS task role credentials for S3 access. Add `STORAGE_ACCESS_KEY_ID` and `STORAGE_SECRET_ACCESS_KEY` as secrets entries from AWS Secrets Manager. + === Infrastructure recommendations [cols=",",options="header",] |=== |Service |AWS recommendation |Database |RDS for MySQL 8.0 (Multi-AZ for high availability (HA)) -|Redis |ElastiCache for Redis 7 (cluster mode) +|Redis |ElastiCache for Redis 7 (cluster-mode-disabled with Multi-AZ replication, or cluster-mode-enabled with `REDIS_CLUSTER_NODES`) |Storage |Same-region S3 bucket |Load balancer |ALB with `/health` target health check, 300 s idle timeout |Secrets |AWS Secrets Manager @@ -412,6 +468,8 @@ spec: |Large language model (LLM) API key rotation |Add the new key to the `PROVIDERS` array, restart the service, then revoke the old key after confirming the new one works. |=== +NOTE: Azure-specific (AKS, Azure Database, Azure Cache) and GCP-specific (GKE, Cloud SQL, Memorystore) deployment guidance is planned. The secrets management, encryption, and observability recommendations above apply across all cloud providers. + == Rate limiting The AI service has no built-in rate limiting. Place rate-limit rules in front of the service to prevent a runaway client from consuming LLM provider quota or overloading the database. @@ -465,6 +523,14 @@ Set the `ENABLE_METRIC_LOGS` environment variable to enable request-level JSON l When enabled, the service writes a structured JSON entry for each request. Key fields include the request duration, HTTP status code, and outcome status. These entries are suitable for ingestion into any log aggregator that supports JSON parsing. +.Example metric log entry +[source,json] +---- +{"timestamp":"2026-05-19T10:30:00.123Z","method":"POST","path":"/v1/conversations/abc123/messages","statusCode":200,"durationMs":3421} +---- + +TIP: Inspect the first few entries with `docker logs ai-service --tail 5 | jq .` to discover all available fields for the current service version. + === OpenTelemetry [source,bash] @@ -588,7 +654,7 @@ Redis holds ephemeral state. Losing Redis data does not affect persistent data. docker pull registry.containers.tiny.cloud/ai-service-tiny:NEW_VERSION ---- . For rolling deploys across version boundaries: start *one* instance at the new version and wait for it to become healthy before rolling the rest. -. For Kubernetes: update the image tag in the Deployment. The default `RollingUpdate` strategy handles zero-downtime upgrades, provided the first new pod becomes Ready before the rollout continues. +. For Kubernetes: update the image tag in the Deployment. Set `strategy.rollingUpdate.maxSurge: 1` and `maxUnavailable: 0` to ensure at least one old pod remains available during migrations. The default `RollingUpdate` strategy handles zero-downtime upgrades, provided the first new pod becomes Ready before the rollout continues. . Verify `/health` on every replica before declaring the upgrade complete. Review the release notes for the target version and take a database backup before upgrading. diff --git a/modules/ROOT/pages/tinymceai-on-premises-providers.adoc b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc index 97b5538653..ab317d3267 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-providers.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc @@ -62,7 +62,9 @@ The diagram reflects three stacked layers: *how to authenticate* with each upstr [[models-required]] == Defining the model list -The on-premises service ships with a built-in default model list that covers *only* OpenAI, Anthropic, and Google direct. For every other provider type (Azure, Bedrock, Vertex, openai-compatible), define `MODELS` explicitly; otherwise nothing usable is exposed. +`MODELS` is required for Azure, Bedrock, Vertex, and openai-compatible providers. Without it, these providers expose nothing usable to clients. + +For OpenAI, Anthropic, and Google direct, the service has built-in routing knowledge that allows requests to reach those providers without an explicit `MODELS` definition. However, the `/v1/models/1` endpoint still returns only a disabled `agent-1` placeholder, and the editor model picker will not display real model names. For a production deployment, always define `MODELS` explicitly regardless of provider type. When only `PROVIDERS` is configured and `MODELS` is omitted, a `GET /v1/models/1` call returns only the built-in `agent-1` placeholder with `allowed: false`: @@ -110,7 +112,7 @@ A full field reference for `MODELS` is at the end of this page. == OpenAI -API key from https://platform.openai.com/api-keys[platform.openai.com]. With OpenAI alone, `MODELS` can be omitted; the built-in catalog covers common models. +API key from https://platform.openai.com/api-keys[platform.openai.com]. The built-in routing handles OpenAI models without an explicit `MODELS` definition, but defining `MODELS` is recommended for production (see <>). .Configuration details [%collapsible] @@ -877,7 +879,7 @@ A `MODELS` array routes individual models to specific providers using the `provi This wires conversations to OpenAI, reviews to Bedrock-hosted Claude, and quick actions to a local Ollama model. The TinyMCE editor will pick the appropriate provider for each feature based on which models declare which `features`. -A `MODELS` entry with a `provider` value that does not exist in `PROVIDERS` is silently skipped; that model will not appear in `/v1/models/1`. When a model is missing from the model selector in the rich text editor, check the spelling of its `provider` field. +A `MODELS` entry with a `provider` value that does not exist in `PROVIDERS` is silently skipped; that model will not appear in `/v1/models/1`. When a model is missing from the model selector in the rich text editor, check the spelling of its `provider` field against the keys in `PROVIDERS` (case-sensitive). See xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting] for additional debugging steps. diff --git a/modules/ROOT/pages/tinymceai-on-premises-reference.adoc b/modules/ROOT/pages/tinymceai-on-premises-reference.adoc index 7470783f81..c22e49abe3 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-reference.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-reference.adoc @@ -31,7 +31,7 @@ Alphabetized. Required-ness is marked relative to a minimum working deployment. |`LLM_TELEMETRY_ENABLED` |No |`false` |Primary OpenTelemetry switch. |`LLM_TIMEOUT_MS` |No |180000 |Per-request large language model (LLM) timeout in ms. Raise for large self-hosted models. |`MCP_SERVERS` |No |- |JSON object; Model Context Protocol (MCP) server configuration. See xref:tinymceai-on-premises-advanced.adoc[Advanced scenarios]. -|`MODELS` |Sometimes |- |JSON array; required for Azure / Bedrock / Vertex / openai-compatible. See xref:tinymceai-on-premises-providers.adoc[LLM providers]. +|`MODELS` |Yes (Azure, Bedrock, Vertex, openai-compatible); recommended for all providers |- |JSON array defining exposed models. Required for Azure, Bedrock, Vertex, and openai-compatible providers. Recommended for OpenAI, Anthropic, and Google direct. See xref:tinymceai-on-premises-providers.adoc[LLM providers]. |`OTEL_DEBUG` |No |- |Verbose OpenTelemetry Protocol (OTLP) diagnostic logging. |`OTEL_EXPORTER_OTLP_TRACES_ENDPOINT` |If using OTEL |- |OTLP traces endpoint URL. |`OTEL_TRACES_SAMPLER_ARG` |No |`1.0` |OTLP sampling rate (0.0 to 1.0). @@ -74,7 +74,7 @@ Alphabetized. Required-ness is marked relative to a minimum working deployment. |GET |`/health` |None |Liveness probe. Returns `{"serviceName":"on-premises-http","uptime":}`. Not metric-logged. |GET |`/docs/` |None |ReDoc-rendered API documentation. |GET |`/v1/api/doc.json` |None |OpenAPI 3 JSON spec. -|GET |`/panel/` |Management secret |Management Panel UI. Sign in with `ENVIRONMENTS_MANAGEMENT_SECRET_KEY`. +|GET |`/panel/` |Management secret (login form) |Management Panel UI. Sign in with `ENVIRONMENTS_MANAGEMENT_SECRET_KEY` through the browser login form. |GET |`/v1/models/1` |JWT |List available models for the current token. The compatibility version literal `1` is the only accepted value; `v1`, `v2`, `latest` all return 500. |POST |`/v1/conversations` |JWT |Create a conversation. Body *must* include client-supplied `id`. |GET |`/v1/conversations` |JWT |List conversations for the current `sub`. diff --git a/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc b/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc index 78f405aab0..7ec14c2cc6 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc @@ -291,6 +291,7 @@ curl -s -X POST http://localhost:8000/v1/conversations \ -H "Content-Type: application/json" \ -d '{"id":"smoke-1","title":"Smoke test"}' +# Replace 'agent-1' with a model ID from MODELS if explicitly configured curl -N -X POST http://localhost:8000/v1/conversations/smoke-1/messages \ -H "Authorization: Bearer $TOKEN" \ -H "Content-Type: application/json" \ diff --git a/modules/ROOT/pages/tinymceai-on-premises.adoc b/modules/ROOT/pages/tinymceai-on-premises.adoc index 023ece9242..9ed451f130 100644 --- a/modules/ROOT/pages/tinymceai-on-premises.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises.adoc @@ -113,7 +113,7 @@ For a first-time deployment, progress through the guides in order. Each guide ca |Five-minute Docker Compose quick start. Stand up the AI service, database, Redis, token server, and a browser editor. |xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage] -|Data layer setup: MySQL and PostgreSQL setup, Redis configuration, container runtimes (Docker, Podman, Kubernetes, ECS), and reverse proxy with TLS. +|Data layer setup: MySQL and PostgreSQL configuration, Redis connectivity, file storage options (S3, Azure Blob, filesystem, database), and host-local database connectivity. |xref:tinymceai-on-premises-providers.adoc[LLM providers] |Connect to OpenAI, Anthropic, Google Gemini, Azure OpenAI, AWS Bedrock, Google Vertex AI, or any OpenAI-compatible endpoint (Ollama, vLLM, LM Studio). Custom model catalog and API key rotation. From ad5b4edb04a84677b265e1516ca718f00cda6087 Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Wed, 20 May 2026 21:34:11 +1000 Subject: [PATCH 12/48] DOC-3498: Apply CockroachDB-benchmark review pass and address PR feedback - Add Credentials table and OpenAPI capability to Overview - Expand CORS section with format, wildcards, preflight, common mistakes - Add production readiness checklist and prerequisite statement - Document agent-1 default model behavior on Providers page - Add MODELS and secrets to ECS task definition example - Document IAM/IRSA limitation across all deployment targets - Trim rate limiting, distributed logging, PDB, topology to one-liners - Remove marketing sections from Advanced (guardrails, document pipeline) - Label install commands in JWT examples for clarity - Address reviewer feedback on Getting Started clarity and formatting --- .../pages/tinymceai-on-premises-advanced.adoc | 87 ------------- .../pages/tinymceai-on-premises-database.adoc | 9 +- .../tinymceai-on-premises-frameworks.adoc | 59 ++++++++- ...tinymceai-on-premises-getting-started.adoc | 39 ++++-- .../ROOT/pages/tinymceai-on-premises-jwt.adoc | 14 ++- .../tinymceai-on-premises-production.adoc | 118 +++++------------- .../tinymceai-on-premises-providers.adoc | 16 +++ .../tinymceai-on-premises-reference.adoc | 2 +- ...tinymceai-on-premises-troubleshooting.adoc | 6 +- modules/ROOT/pages/tinymceai-on-premises.adoc | 36 +++++- 10 files changed, 185 insertions(+), 201 deletions(-) diff --git a/modules/ROOT/pages/tinymceai-on-premises-advanced.adoc b/modules/ROOT/pages/tinymceai-on-premises-advanced.adoc index e0a9aee863..433fc05366 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-advanced.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-advanced.adoc @@ -221,93 +221,6 @@ CAUTION: Conversation history is isolated by the `sub` claim in the JWT. Reusing -== Custom models with guardrails - -*Use case:* A regulated industry (healthcare, finance, legal) needs AI writing assistance but must use approved models with content filtering. - -=== Implementation - -. *Use a self-hosted model with an OpenAI-compatible API (such as vLLM or Ollama):* -+ -[source,bash] ----- --e PROVIDERS='{ - "approved-llm": { - "type": "openai-compatible", - "baseUrl": "http://host.docker.internal:8080/v1", - "headers": {"Authorization": "Bearer internal-key"} - } -}' ----- - -. *Restrict to specific models only:* -+ -[source,bash] ----- --e MODELS='[{ - "id": "llama-3.1-70b-medical", - "name": "Medical Assistant (Llama 3.1 70B)", - "description": "Fine-tuned for medical documentation. HIPAA-compliant.", - "provider": "approved-llm", - "recommended": true, - "features": ["conversations", "reviews", "actions"] -}, { - "id": "llama-3.1-8b-general", - "name": "General Writing (Llama 3.1 8B)", - "description": "Fast general-purpose model for drafting and editing.", - "provider": "approved-llm", - "features": ["actions"] -}]' ----- - -. *Result:* The configuration exposes only approved, audited models. Content does not leave the network. Combine with Langfuse for a full audit trail. - - - -== AI-powered document pipeline - -*Use case:* Legal team drafts contracts. AI assists with clause generation, compliance checking, and precedent search, powered by internal legal databases. - -=== Architecture - -[.text-center] -image::tinymceai-on-premises/advanced-scenarios-fig-3.svg[Regulated industry scenario: legal editor connects to AI service with contract-db and compliance MCP servers,width=100%] - -=== Configuration - -.Document pipeline MCP server configuration -[%collapsible] -==== -[source,bash] ----- --e MCP_SERVERS='{ - "contract-db": { - "url": "http://host.docker.internal:3001/mcp", - "options": {"callToolTimeout": 30} - }, - "compliance-checker": { - "url": "http://host.docker.internal:3002/mcp", - "options": {"callToolTimeout": 60} - }, - "precedent-search": { - "url": "http://host.docker.internal:3003/mcp", - "tools": {"disabled": ["delete_precedent"]}, - "options": {"callToolTimeout": 120} - } -}' ----- -==== - -*Example prompts:* - -* "Draft a non-compete clause for California employees" -* "Check this contract section for GDPR compliance issues" -* "Find precedent for limitation of liability in SaaS agreements" - -Internal databases supply the data for these prompts. Aside from the LLM request itself, no content goes to external services. - - - == Web-augmented research assistant *Use case:* Content team writing blog posts and marketing copy can pull live data from the web and internal sources. diff --git a/modules/ROOT/pages/tinymceai-on-premises-database.adoc b/modules/ROOT/pages/tinymceai-on-premises-database.adoc index 90e7d5c00b..8bba9f5009 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-database.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-database.adoc @@ -595,14 +595,7 @@ STORAGE_BUCKET=BUCKET_NAME STORAGE_ENDPOINT=https://custom-s3-endpoint # optional, for S3-compatible ---- -The S3 access key requires the following minimum IAM permissions on the target bucket: - -* `s3:GetObject` -* `s3:PutObject` -* `s3:DeleteObject` -* `s3:ListBucket` - -For production, enable bucket versioning and server-side encryption (SSE-S3 or SSE-KMS). +The S3 access key requires read, write, delete, and list permissions on the target bucket. For production, enable bucket versioning and server-side encryption (SSE-S3 or SSE-KMS). NOTE: The correct variable names are `STORAGE_BUCKET` and `STORAGE_REGION`, not `STORAGE_S3_BUCKET` or `STORAGE_S3_REGION`. diff --git a/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc b/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc index 3f0899be75..56383fc3e7 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc @@ -19,6 +19,13 @@ For general framework setup (installing wrappers, component structure, server-si The on-premises AI integration adds the options documented below to the standard TinyMCE `init` configuration. +== Token flow + +[.text-center] +image::tinymceai-on-premises/framework-integration-fig-1.svg[alt="Sequence diagram showing editor calling token provider which fetches a JWT from the application backend then passes it to the AI service",width=100%] + +The plugin calls the token provider on initialization and again before the cached token expires. The application back end authenticates the browser request through its own session layer, signs an HS256 JWT with the API Secret, and returns it. The plugin then sends the JWT to the AI service with every request. + == Required editor options @@ -151,9 +158,38 @@ This pattern avoids cookies entirely and works well for cross-origin setups. == Cross-origin requests to the AI service -When `tinymceai_service_url` points to a different origin from the page (the common production case), the AI service must return Cross-Origin Resource Sharing (CORS) headers permitting the editor origin. The service reads the `ALLOWED_ORIGINS` environment variable for this. +In production the editor page and the AI service almost always live on different origins (`\https://app.yourcompany.com` vs `\https://ai.yourcompany.com`). The AI service must respond with CORS headers that permit the editor origin; otherwise the browser blocks every request. + +=== Configuring `ALLOWED_ORIGINS` -To verify CORS from a terminal: +Set the `ALLOWED_ORIGINS` environment variable on the AI service container to a comma-separated list of permitted editor origins (scheme + host + port): + +[source,bash] +---- +-e ALLOWED_ORIGINS='https://app.yourcompany.com,https://staging.yourcompany.com' +---- + +[cols="1,3",options="header"] +|=== +|Behavior |Detail + +|Format +|Comma-separated origins. Each entry must include the scheme (`https://`). Do not include paths or trailing slashes. + +|Default when unset +|The service rejects cross-origin requests (no `Access-Control-Allow-Origin` header). Set this variable for any deployment where the editor is on a different origin. + +|Wildcard +|`*` is accepted but not recommended for production. It allows any origin to call the AI service endpoints. + +|Preflight (OPTIONS) +|The service handles `OPTIONS` preflight requests internally and responds with the appropriate `Access-Control-Allow-Methods` and `Access-Control-Allow-Headers`. No reverse proxy configuration is required for OPTIONS. + +|Credentials +|The service responds with `Access-Control-Allow-Credentials: true` when the requesting origin matches an entry in `ALLOWED_ORIGINS`. +|=== + +=== Verifying CORS [source,bash] ---- @@ -165,6 +201,25 @@ curl -i -X OPTIONS https://ai.yourcompany.com/v1/conversations \ The response should include `Access-Control-Allow-Origin: \https://app.yourcompany.com`. If it shows `*` or no CORS header, update `ALLOWED_ORIGINS` on the AI service container and restart. +=== Common CORS mistakes + +[cols="1,2",options="header"] +|=== +|Mistake |Fix + +|Trailing slash in origin (`\https://app.example.com/`) +|Remove the trailing slash. + +|Missing port for non-standard ports (`\https://app.example.com:3000`) +|Include the port in `ALLOWED_ORIGINS`. + +|`ALLOWED_ORIGINS` not set at all +|All cross-origin requests fail silently. Add the editor origin. + +|Reverse proxy stripping `Origin` header +|Ensure the proxy passes the `Origin` header to the AI service. +|=== + == Content Security Policy (CSP) diff --git a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc index e2c6b2e2f7..21b5b6514f 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc @@ -3,7 +3,15 @@ :description: Five-minute Docker Compose quick start for TinyMCE AI on-premises service :keywords: AI, on-premises, getting started, docker, quick start -This section produces a fully working setup (AI service, database, Redis, token server, and a browser editor) in roughly five minutes on any machine with Docker. This quick start validates the stack components before designing a production deployment. Production engineers should still review this section to understand the conceptual flow before continuing to xref:tinymceai-on-premises-production.adoc[the Production Deployment Guide]. +This guide sets up a fully working local stack in roughly five minutes on any machine with Docker: + +* **MySQL 8.0** — conversation history and metadata +* **Redis** — caching and session state +* **TinyMCE AI service** — the on-premises AI back end +* **A minimal token server** (Node.js) — signs JWTs for the editor +* **A browser page with TinyMCE** — validates the end-to-end flow + +The quick start is designed to validate the stack components before moving to a production deployment. Production engineers should still review this section to understand the conceptual flow before continuing to xref:tinymceai-on-premises-production.adoc[the Production Deployment Guide]. == Five-minute demo with Docker Compose @@ -42,9 +50,11 @@ Replace `TINY_REGISTRY_USERNAME` with the username supplied by the Tiny account docker pull registry.containers.tiny.cloud/ai-service-tiny:latest ---- -For Podman, substitute `podman pull`. For production, pin a specific version tag (for example `:5.1.0`) rather than `:latest`. +For Podman, substitute `podman pull`. For production, pin a specific version tag (for example `:5.1.0`) rather than `:latest` to ensure repeatable deployments and avoid unexpected breaking changes. -=== Create `docker-compose.yml` +=== Create `docker-compose.yml` (data layer) + +This compose file starts the data layer services (MySQL and Redis) that the AI service depends on. The AI service itself is started separately in the next step, which allows upgrading or reconfiguring it independently. Create the file with exactly the contents below. Indentation is two spaces, never tabs. @@ -80,7 +90,7 @@ volumes: mysql_data: ---- -TIP: Pin `mysql:8.0`, not `mysql:8`. The `:8` tag points to the latest MySQL, which is incompatible with the AI service. See xref:tinymceai-on-premises-database.adoc#mysql-version-pinning[MySQL version pinning] for details. +TIP: Pin `mysql:8.0`, not `mysql:8`. The `:8` tag resolves to the latest MySQL minor version, which may use authentication plugins or SQL modes incompatible with the AI service schema migrations. See xref:tinymceai-on-premises-database.adoc#mysql-version-pinning[MySQL version pinning] for details. PostgreSQL is equally supported. See xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage] for an equivalent compose file. Review the xref:tinymceai-on-premises-database.adoc#postgresql-schema-prerequisite[PostgreSQL schema prerequisite] before switching. @@ -102,26 +112,29 @@ Docker Desktop (macOS, Windows) and Podman 4{plus} auto-inject this alias. Nativ [source,bash] ---- # --- Required: provided by Tiny --- -LICENSE_KEY=PASTE_SUPPLIED_LICENSE_KEY_HERE -TINYMCE_API_KEY=PASTE_TINYMCE_API_KEY_HERE +LICENSE_KEY= + +# --- Optional: only required when loading TinyMCE from cdn.tiny.cloud --- +# Omit for self-hosted editor bundles. +TINYMCE_API_KEY= # --- Required: strong secret used to log into the Management Panel --- -MANAGEMENT_SECRET=REPLACE_WITH_STRONG_SECRET +MANAGEMENT_SECRET= # --- Required: database password (must match docker-compose.yml) --- -DB_PASSWORD=changeme +DB_PASSWORD= # --- Required: at least one LLM provider key --- -OPENAI_API_KEY=sk-proj-PASTE_OPENAI_KEY_HERE -# ANTHROPIC_API_KEY=sk-ant-PASTE_ANTHROPIC_KEY_HERE -# GOOGLE_API_KEY=AIza-PASTE_GOOGLE_KEY_HERE +OPENAI_API_KEY= +# ANTHROPIC_API_KEY= +# GOOGLE_API_KEY= # --- Filled in after creating an environment (Step 7). Used by the token server, not the AI service. --- AI_ENV_ID= AI_API_SECRET= ---- -IMPORTANT: `LICENSE_KEY` and `TINYMCE_API_KEY` are different credentials. `LICENSE_KEY` is the long string from the account representative. `TINYMCE_API_KEY` is the short string from the tiny.cloud dashboard. +IMPORTANT: `LICENSE_KEY` and `TINYMCE_API_KEY` are different credentials. `LICENSE_KEY` is the long string from the account representative that activates the AI service. `TINYMCE_API_KEY` is the short string from the tiny.cloud dashboard used to load TinyMCE from the CDN — it is not required for self-hosted editor bundles. See the xref:tinymceai-on-premises.adoc#_credentials[Credentials] section on the Overview page. === Start MySQL and Redis @@ -137,7 +150,7 @@ Wait ~15 seconds for MySQL to initialize, then verify: docker compose ps ---- -Both containers should report `healthy` in the STATUS column. If MySQL still shows `starting`, wait another 10 seconds and re-run. +Both data layer containers (MySQL and Redis) should report `healthy` in the STATUS column. If MySQL still shows `starting`, wait another 10 seconds and re-run. === Launch the AI service diff --git a/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc b/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc index 7ff72cf294..888d5231a7 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc @@ -210,11 +210,13 @@ Each example reads `AI_ENV_ID` and `AI_API_SECRET` from environment variables, a .Node.js (Express + jsonwebtoken) [%collapsible] ==== +.Install dependencies [source,bash] ---- npm install express jsonwebtoken ---- +.Token endpoint [source,javascript] ---- const express = require('express'); @@ -265,11 +267,13 @@ app.listen(3000); .Python (Django + PyJWT) [%collapsible] ==== +.Install dependencies [source,bash] ---- pip install PyJWT ---- +.Token endpoint [source,python] ---- import os @@ -330,11 +334,13 @@ urlpatterns = [ .Python (Flask + PyJWT) [%collapsible] ==== +.Install dependencies [source,bash] ---- pip install Flask PyJWT ---- +.Token endpoint [source,python] ---- import os @@ -383,11 +389,13 @@ def ai_token(): .PHP (Laravel {plus} firebase/php-jwt) [%collapsible] ==== +.Install dependencies [source,bash] ---- composer require firebase/php-jwt ---- +.Token endpoint [source,php] ---- :` permissions to expose each model to users. See xref:tinymceai-on-premises-jwt.adoc[JWT authentication] for permission syntax. + +=== When `MODELS` is required + `MODELS` is required for Azure, Bedrock, Vertex, and openai-compatible providers. Without it, these providers expose nothing usable to clients. For OpenAI, Anthropic, and Google direct, the service has built-in routing knowledge that allows requests to reach those providers without an explicit `MODELS` definition. However, the `/v1/models/1` endpoint still returns only a disabled `agent-1` placeholder, and the editor model picker will not display real model names. For a production deployment, always define `MODELS` explicitly regardless of provider type. diff --git a/modules/ROOT/pages/tinymceai-on-premises-reference.adoc b/modules/ROOT/pages/tinymceai-on-premises-reference.adoc index c22e49abe3..4584bf87ba 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-reference.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-reference.adoc @@ -10,7 +10,7 @@ Alphabetized. Required-ness is marked relative to a minimum working deployment. [cols=",,,",options="header",] |=== |Variable |Required |Default |Description -|`ALLOWED_ORIGINS` |Recommended |- |Comma-separated list of Cross-Origin Resource Sharing (CORS)-allowed editor origins. Required for cross-origin editor deployments. +|`ALLOWED_ORIGINS` |Recommended |- |Comma-separated list of CORS-allowed editor origins. Required for cross-origin editor deployments. See xref:tinymceai-on-premises-frameworks.adoc#_cross_origin_requests_to_the_ai_service[Cross-origin requests] for format, wildcards, and verification. |`DATABASE_DATABASE` |Yes |- |Database name (`ai_service` is the convention). |`DATABASE_DRIVER` |Yes |- |`mysql` or `postgres`. |`DATABASE_HOST` |Yes |- |Database hostname or IP. diff --git a/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc b/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc index 7ec14c2cc6..46e2bd0f49 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc @@ -276,10 +276,12 @@ echo "$PROVIDERS" | jq . [source,bash] ---- -docker compose exec ai-service /bin/sh -c "nc -zv mysql 3306" -docker compose exec ai-service /bin/sh -c "nc -zv redis 6379" +docker exec ai-service /bin/sh -c "nc -zv mysql 3306" +docker exec ai-service /bin/sh -c "nc -zv redis 6379" ---- +Replace `mysql` and `redis` with the actual hostnames configured in `DATABASE_HOST` and `REDIS_HOST`. If the AI service was started with `docker compose` rather than standalone `docker run`, use `docker compose exec ai-service` instead. + *End-to-end smoke test (token mint through streamed response):* [source,bash] diff --git a/modules/ROOT/pages/tinymceai-on-premises.adoc b/modules/ROOT/pages/tinymceai-on-premises.adoc index 9ed451f130..371946309e 100644 --- a/modules/ROOT/pages/tinymceai-on-premises.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises.adoc @@ -17,9 +17,12 @@ Data flow for a single AI request: . The client application requests a JWT from the token endpoint. . The client sends a prompt with the JWT to the AI service over HTTPS. . The AI service verifies the token, checks per-feature permissions, and forwards the prompt to the configured large language model (LLM). -. The LLM streams its response back to the client through Server-Sent Events (SSE). +. The LLM streams its response back to the AI service. +. The AI service relays the response to the client through Server-Sent Events (SSE). -When used with TinyMCE `tinymceai`, the plugin handles steps 1, 2, and 4 automatically through the `tinymceai_token_provider` callback. +When used with TinyMCE `tinymceai`, the plugin handles steps 1, 2, and 5 automatically through the `tinymceai_token_provider` callback. + +IMPORTANT: The browser connects directly to the AI service — requests do not pass through the application back end. The AI service must be network-reachable from the end-user browser, which means it must have a public URL (or be accessible through a VPN/internal network when deployed on an intranet). Configure xref:tinymceai-on-premises-frameworks.adoc#_cross_origin_requests_to_the_ai_service[CORS] and xref:tinymceai-on-premises-production.adoc#_tls_https[TLS] on the AI service accordingly. The shared secret (API Secret) never leaves the back end; the editor and the AI service only ever see signed tokens. @@ -64,7 +67,36 @@ The shared secret (API Secret) never leaves the back end; the editor and the AI |Horizontal scaling |The service is stateless; add replicas behind a load balancer without shared local state. + +|OpenAPI specification +|Published at `/v1/api/doc.json` with interactive documentation at `/docs/`. Auto-generate clients in any language. +|=== + +== Credentials + +Three credentials are involved in an on-premises deployment. They are distinct and serve different purposes. + +[cols="1,1,2,1",options="header"] |=== +|Credential |Where it lives |What it does |Required? + +|`LICENSE_KEY` +|AI service container (environment variable) +|Activates the AI service. A long string provided by the Tiny account representative. +|Yes — the service refuses to start without it. + +|`TINYMCE_API_KEY` +|Editor page (CDN script URL) or build configuration +|Authenticates against `cdn.tiny.cloud` when loading TinyMCE from the CDN. This is the short string from the tiny.cloud dashboard. +|Only when loading TinyMCE from the CDN. Omit for self-hosted editor bundles. + +|`license_key` (init option) +|`tinymce.init({ license_key: 'T8LK:...' })` +|Activates premium TinyMCE features when using a self-hosted editor bundle (not the CDN). +|Only for self-hosted editor deployments. Provided by the Tiny account representative. +|=== + +NOTE: `LICENSE_KEY` (the AI service license) and `TINYMCE_API_KEY` / `license_key` (the editor license) are different credentials from different sources. Do not interchange them. == Prerequisites From 9781ec53f9a0a72e367c2b33c9ac295dd7994ffb Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Wed, 20 May 2026 21:56:15 +1000 Subject: [PATCH 13/48] DOC-3498: Style fixes, terminology consistency, and external links - Fix "On-Premise" to "on-premises" in page titles - Update Advanced description to match trimmed content - Add external links: K8s Secrets, Ingress, HPA, KEDA, OTLP, nginx - Replace jargon ("upstream", "definitive") with neutral phrasing - Normalize xref capitalization to sentence case - Replace "ensure" with direct imperatives per style guide --- .../ROOT/pages/tinymceai-on-premises-frameworks.adoc | 4 ++-- .../pages/tinymceai-on-premises-getting-started.adoc | 6 +++--- .../ROOT/pages/tinymceai-on-premises-production.adoc | 10 +++++----- .../ROOT/pages/tinymceai-on-premises-providers.adoc | 8 ++++---- .../pages/tinymceai-on-premises-troubleshooting.adoc | 2 +- modules/ROOT/pages/tinymceai-on-premises.adoc | 4 ++-- 6 files changed, 17 insertions(+), 17 deletions(-) diff --git a/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc b/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc index 56383fc3e7..6814afb107 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc @@ -158,7 +158,7 @@ This pattern avoids cookies entirely and works well for cross-origin setups. == Cross-origin requests to the AI service -In production the editor page and the AI service almost always live on different origins (`\https://app.yourcompany.com` vs `\https://ai.yourcompany.com`). The AI service must respond with CORS headers that permit the editor origin; otherwise the browser blocks every request. +In production the editor page and the AI service typically run on different origins (`\https://app.yourcompany.com` and `\https://ai.yourcompany.com`). The AI service must respond with CORS headers that permit the editor origin; otherwise the browser blocks every request. === Configuring `ALLOWED_ORIGINS` @@ -217,7 +217,7 @@ The response should include `Access-Control-Allow-Origin: \https://app.yourcompa |All cross-origin requests fail silently. Add the editor origin. |Reverse proxy stripping `Origin` header -|Ensure the proxy passes the `Origin` header to the AI service. +|Configure the proxy to pass the `Origin` header to the AI service. |=== diff --git a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc index 21b5b6514f..22babce7af 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc @@ -1,4 +1,4 @@ -= Getting started with TinyMCE AI On-Premise += Getting started with TinyMCE AI on-premises :navtitle: Getting started :description: Five-minute Docker Compose quick start for TinyMCE AI on-premises service :keywords: AI, on-premises, getting started, docker, quick start @@ -11,7 +11,7 @@ This guide sets up a fully working local stack in roughly five minutes on any ma * **A minimal token server** (Node.js) — signs JWTs for the editor * **A browser page with TinyMCE** — validates the end-to-end flow -The quick start is designed to validate the stack components before moving to a production deployment. Production engineers should still review this section to understand the conceptual flow before continuing to xref:tinymceai-on-premises-production.adoc[the Production Deployment Guide]. +The quick start is designed to validate the stack components before moving to a production deployment. Production engineers can review this section to understand the conceptual flow before continuing to xref:tinymceai-on-premises-production.adoc[Production deployment]. == Five-minute demo with Docker Compose @@ -50,7 +50,7 @@ Replace `TINY_REGISTRY_USERNAME` with the username supplied by the Tiny account docker pull registry.containers.tiny.cloud/ai-service-tiny:latest ---- -For Podman, substitute `podman pull`. For production, pin a specific version tag (for example `:5.1.0`) rather than `:latest` to ensure repeatable deployments and avoid unexpected breaking changes. +For Podman, substitute `podman pull`. For production, pin a specific version tag (for example `:5.1.0`) rather than `:latest` for repeatable deployments and to avoid unexpected breaking changes. === Create `docker-compose.yml` (data layer) diff --git a/modules/ROOT/pages/tinymceai-on-premises-production.adoc b/modules/ROOT/pages/tinymceai-on-premises-production.adoc index 3c4fa5d44e..74bdace67c 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-production.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-production.adoc @@ -59,7 +59,7 @@ server { [IMPORTANT] -- -Server-Sent Events (SSE) streaming requires `proxy_buffering off`. Without it, AI responses appear to hang until the entire response is generated. +Server-Sent Events (SSE) streaming requires https://nginx.org/en/docs/http/ngx_http_proxy_module.html#proxy_buffering[`proxy_buffering off`]. Without it, AI responses appear to hang until the entire response is generated. -- === AWS ALB @@ -168,7 +168,7 @@ stringData: } ---- -In production, use Sealed Secrets, External Secrets Operator, or HashiCorp Vault rather than committing raw secret manifests. +In production, use https://sealed-secrets.netlify.app/[Sealed Secrets], https://external-secrets.io/[External Secrets Operator], or https://www.vaultproject.io/[HashiCorp Vault] rather than committing raw secret manifests. For the Kubernetes Secret resource itself, see the https://kubernetes.io/docs/concepts/configuration/secret/[Kubernetes Secrets documentation]. === Deployment @@ -316,7 +316,7 @@ These values are required by the token endpoint. See xref:tinymceai-on-premises- Always create environments through the Management Panel UI. Environments created through the raw management API are not fully registered and cause `invalid-jwt-payload` errors. -- -=== Ingress +=== https://kubernetes.io/docs/concepts/services-networking/ingress/[Ingress] [source,yaml] ---- @@ -372,7 +372,7 @@ spec: averageUtilization: 70 ---- -NOTE: The AI service is I/O-bound (waiting on upstream LLM responses). CPU-based autoscaling is a safe starting point but may not trigger under high concurrency if CPU remains low. For production, consider supplementing with custom metrics (concurrent SSE streams, request queue depth) through KEDA or the Prometheus Adapter. +NOTE: The AI service is I/O-bound (waiting on LLM provider responses). CPU-based autoscaling is a safe starting point but may not trigger under high concurrency if CPU remains low. For production, consider supplementing with custom metrics (concurrent SSE streams, request queue depth) through https://keda.sh/[KEDA] or the https://github.com/kubernetes-sigs/prometheus-adapter[Prometheus Adapter]. For HPA configuration, see the https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/[Kubernetes HPA documentation]. == AWS ECS / Fargate @@ -521,7 +521,7 @@ TIP: Inspect the first few entries with `docker logs ai-service --tail 5 | jq .` |`OTEL_DEBUG` |No |- |Verbose OTLP diagnostic logging |=== -Compatible with Jaeger, Grafana Tempo, Datadog, New Relic, Honeycomb, and any OTLP-compatible back end. +Compatible with Jaeger, Grafana Tempo, Datadog, New Relic, Honeycomb, and any https://opentelemetry.io/docs/specs/otlp/[OTLP-compatible] back end. === Langfuse diff --git a/modules/ROOT/pages/tinymceai-on-premises-providers.adoc b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc index 36cc4b93f2..2a98376a16 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-providers.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc @@ -6,9 +6,9 @@ -The `PROVIDERS` environment variable tells the AI service how to reach the upstream large language model (LLM). The `MODELS` environment variable tells the service which models are exposed to clients and which features each model supports. This page is the definitive reference for both: every supported `type`, every required field, and every known issue encountered in production. +The `PROVIDERS` environment variable tells the AI service how to reach the configured large language model (LLM) provider. The `MODELS` environment variable tells the service which models are exposed to clients and which features each model supports. This page documents every supported `type`, every required field, and every known production issue for both variables. -Start with the xref:tinymceai-on-premises-getting-started.adoc[Getting Started guide] if the AI service container is not yet running. The following sections assume a running `ai-service` container. +Start with the xref:tinymceai-on-premises-getting-started.adoc[Getting started guide] if the AI service container is not yet running. The following sections assume a running `ai-service` container. == Provider and model architecture @@ -17,7 +17,7 @@ The AI service uses two related environment variables: [cols=",,",options="header",] |=== |Variable |Type |What it does -|`PROVIDERS` |JSON object |Map of provider IDs to provider configurations. Each entry says how to authenticate with one upstream LLM API. +|`PROVIDERS` |JSON object |Map of provider IDs to provider configurations. Each entry says how to authenticate with one LLM provider API. |`MODELS` |JSON array |List of models exposed to clients. Each model points at a `PROVIDERS` entry and declares which features it can serve. |JSON Web Token (JWT) `auth.ai.permissions` |string array |Per-user authorization list. Includes `ai:models::` entries to gate access to individual models. |=== @@ -72,7 +72,7 @@ However, the default `agent-1` model has important limitations: * The TinyMCE editor model picker does not display real model names — it shows only the generic `agent-1` entry. * Azure, Bedrock, Vertex, and openai-compatible providers do not work at all without `MODELS`. -For production deployments, always set `MODELS` explicitly regardless of provider type. This ensures the editor model picker displays meaningful names, the correct provider handles each request, and JWT permissions can reference specific model IDs. +For production deployments, always set `MODELS` explicitly regardless of provider type. Setting `MODELS` explicitly makes the editor model picker display meaningful names, the correct provider handles each request, and JWT permissions can reference specific model IDs. The `ai:models:agent` permission in the JWT grants access to the built-in `agent-1` model. When adding custom models through `MODELS`, also add `ai:models::` permissions to expose each model to users. See xref:tinymceai-on-premises-jwt.adoc[JWT authentication] for permission syntax. diff --git a/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc b/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc index 46e2bd0f49..f66e6d2aa1 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc @@ -78,7 +78,7 @@ These assume the container is running and `/health` returns OK. |`invalid-jwt-payload` |`aud` claim does not match a known Environment ID, or `aud` is an array instead of a string -|Copy the Environment ID from `/panel/`. Ensure `aud` is a string, not an array. Recreate environments through the Panel UI only. +|Copy the Environment ID from `/panel/`. Set `aud` to a string, not an array. Recreate environments through the Panel UI only. |`invalid-jwt` (expired) |Token is past its `exp` claim diff --git a/modules/ROOT/pages/tinymceai-on-premises.adoc b/modules/ROOT/pages/tinymceai-on-premises.adoc index 371946309e..58349dd8c2 100644 --- a/modules/ROOT/pages/tinymceai-on-premises.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises.adoc @@ -1,4 +1,4 @@ -= TinyMCE AI On-Premise += TinyMCE AI on-premises :navtitle: Overview :description: Self-hosted AI writing assistance for TinyMCE; architecture, prerequisites, and topic guide :keywords: AI, on-premises, self-hosted, deployment, overview @@ -160,7 +160,7 @@ For a first-time deployment, progress through the guides in order. Each guide ca |Kubernetes manifests, AWS ECS task definitions, horizontal scaling, security hardening, rate limiting, observability, backup and recovery, and upgrades. |xref:tinymceai-on-premises-advanced.adoc[Advanced scenarios] -|MCP server integration, web scraping and search, multi-tenant patterns, custom models with guardrails, and AI-powered document pipelines. +|MCP server integration, web scraping and search, and multi-tenant patterns. |xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting] |Quick triage, container startup failures, JWT errors, LLM provider errors, editor issues, performance, and diagnostic recipes. From 532e30f76b8a13fb365c8cce94dabf1cce167e7b Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Wed, 20 May 2026 22:44:14 +1000 Subject: [PATCH 14/48] Restructure Advanced scenarios page into focused child pages Move MCP and web scraping/search content to a dedicated child page (tinymceai-on-premises-mcp.adoc) under LLM providers. Move multi-tenant deployment content into the JWT authentication page. Delete the catch-all Advanced scenarios page and update all cross-references and nav accordingly. --- modules/ROOT/nav.adoc | 2 +- .../ROOT/pages/tinymceai-on-premises-jwt.adoc | 87 +++++++-- ...ed.adoc => tinymceai-on-premises-mcp.adoc} | 173 +++++------------- .../tinymceai-on-premises-providers.adoc | 1 + .../tinymceai-on-premises-reference.adoc | 6 +- modules/ROOT/pages/tinymceai-on-premises.adoc | 5 +- 6 files changed, 124 insertions(+), 150 deletions(-) rename modules/ROOT/pages/{tinymceai-on-premises-advanced.adoc => tinymceai-on-premises-mcp.adoc} (53%) diff --git a/modules/ROOT/nav.adoc b/modules/ROOT/nav.adoc index 4991a7e036..b59b4d2d61 100644 --- a/modules/ROOT/nav.adoc +++ b/modules/ROOT/nav.adoc @@ -274,10 +274,10 @@ ***** xref:tinymceai-on-premises-getting-started.adoc[Getting started] ***** xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage] ***** xref:tinymceai-on-premises-providers.adoc[LLM providers] +****** xref:tinymceai-on-premises-mcp.adoc[MCP and web integrations] ***** xref:tinymceai-on-premises-jwt.adoc[JWT authentication] ***** xref:tinymceai-on-premises-frameworks.adoc[TinyMCE integration] ***** xref:tinymceai-on-premises-production.adoc[Production deployment] -***** xref:tinymceai-on-premises-advanced.adoc[Advanced scenarios] ***** xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting] ***** xref:tinymceai-on-premises-reference.adoc[Reference] **** xref:ai.adoc[AI Assistant (legacy)] diff --git a/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc b/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc index 888d5231a7..ee513bde41 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc @@ -1,7 +1,7 @@ = JWT authentication for the on-premises AI service :navtitle: JWT authentication :description: JWT authentication for the TinyMCE AI on-premises service using HS256 symmetric signing -:keywords: AI, on-premises, JWT, authentication, HS256 +:keywords: AI, on-premises, JWT, authentication, HS256, multi-tenant The on-premises AI service uses *HS256* (HMAC-SHA256, symmetric shared secret) for JSON Web Token (JWT) authentication. This is different from the Tiny Cloud AI service, which uses RS256. @@ -833,17 +833,7 @@ For deployments that should expose history without allowing new conversations: === Multi-tenant: separate environments -If tenants must be *fully isolated* (separate conversation history, separate access keys, separate audit logs), give each tenant its own Environment in the Management Panel, mint tokens with the tenant-specific `aud` and `AI_API_SECRET`, and route in the token endpoint: - -[source,javascript] ----- -function envFor(tenantId) { - return { - envId: process.env[`AI_ENV_ID_${tenantId}`], - apiSecret: process.env[`AI_API_SECRET_${tenantId}`], - }; -} ----- +If tenants require full isolation (separate conversation history, separate access keys, separate audit logs), assign each tenant its own Environment in the Management Panel and mint tokens with tenant-specific `aud` and API Secret values. See <> for implementation details. @@ -914,6 +904,79 @@ Short-lived tokens limit exposure if a token leaks through a browser extension, +[[multi-tenant-deployment]] +== Multi-tenant deployment + +For SaaS platforms that serve multiple customers from a single AI service instance, Environments provide full tenant isolation. + +=== Architecture + +[.text-center] +image::tinymceai-on-premises/advanced-scenarios-fig-2.svg[alt="Multi-tenant SaaS architecture with per-customer environments access keys and conversation isolation",width=100%] + +Each Environment provides: + +* Independent access keys with separate rotation schedules +* Isolated conversation history (queries are partitioned by `sub` within an environment) +* Separate usage tracking (through Langfuse or a custom metrics pipeline keyed by environment) + +=== Implementation + +. *Create one Environment per tenant through the Management Panel:* ++ +* Customer A -> Environment `env-customer-a` +* Customer B -> Environment `env-customer-b` +* Customer C -> Environment `env-customer-c` + +. *Route token generation to the correct Environment:* ++ +.Multi-tenant token endpoint +[%collapsible] +==== +[source,javascript] +---- +app.post('/api/ai-token', requireAuth, (req, res) => { + const customer = getCustomerConfig(req.user.organizationId); + + const token = jwt.sign({ + aud: customer.envId, + sub: req.user.id, + user: { name: req.user.name, email: req.user.email }, + auth: { + ai: { + permissions: getPermissionsForPlan(customer.plan) + } + } + }, customer.apiSecret, { algorithm: 'HS256', expiresIn: '1h' }); + + res.json({ token }); +}); + +function getPermissionsForPlan(plan) { + switch (plan) { + case 'enterprise': + return ['ai:conversations:*', 'ai:models:agent', 'ai:actions:system:*', 'ai:reviews:system:*']; + case 'pro': + return ['ai:conversations:*', 'ai:actions:system:*']; + case 'basic': + return ['ai:actions:system:*']; + } +} + +function getCustomerConfig(tenantId) { + return { + envId: process.env[`AI_ENV_ID_${tenantId}`], + apiSecret: process.env[`AI_API_SECRET_${tenantId}`] + }; +} +---- +==== + +. *Result:* Full data isolation between tenants, with feature gating derived from subscription tier. + +CAUTION: Conversation history is isolated by the `sub` claim within each Environment. Reusing a single `sub` value for multiple users within one Environment causes those users to share conversation history. Always use a stable, unique-per-user identifier (such as an internal user ID) as the `sub` value. + + == See also diff --git a/modules/ROOT/pages/tinymceai-on-premises-advanced.adoc b/modules/ROOT/pages/tinymceai-on-premises-mcp.adoc similarity index 53% rename from modules/ROOT/pages/tinymceai-on-premises-advanced.adoc rename to modules/ROOT/pages/tinymceai-on-premises-mcp.adoc index 433fc05366..bc9f5f015a 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-advanced.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-mcp.adoc @@ -1,46 +1,54 @@ -= TinyMCE AI on-premises: advanced scenarios -:navtitle: Advanced scenarios -:description: Advanced scenarios for TinyMCE AI on-premises service -:keywords: AI, on-premises, multi-tenant, MCP, regulated += MCP and web integrations +:navtitle: MCP and web integrations +:description: Model Context Protocol (MCP) server integration and web scraping/search configuration for TinyMCE AI on-premises +:keywords: AI, on-premises, MCP, Model Context Protocol, web search, web scraping -[.lead] -This guide covers enterprise scenarios for the AI on-premises service through concrete examples. Each scenario builds on the xref:tinymceai-on-premises-getting-started.adoc[Getting started guide]; teams may implement any scenario on its own. +The AI service extends model capabilities through two integration points: the https://modelcontextprotocol.io/[Model Context Protocol] (MCP) for tool calling, and pluggable web endpoints for page fetching and search. Both features operate within AI conversations only. -== Internal knowledge base through MCP -*Use case:* Engineers writing documentation can query internal wikis, API specs, and runbooks directly from the editor, without switching context. -=== Architecture +[[mcp-integration]] +== Model Context Protocol (MCP) + +MCP allows the AI service to call external tools — internal wikis, API specifications, runbooks, contract databases, and compliance checkers — during conversations. The service connects to MCP servers over *Streamable HTTP transport*. [.text-center] -image::tinymceai-on-premises/advanced-scenarios-fig-1.svg[MCP integration: TinyMCE rich text editor communicates with AI service which calls MCP knowledge-hub server,width=100%] +image::tinymceai-on-premises/advanced-scenarios-fig-1.svg[MCP integration: TinyMCE rich text editor communicates with AI service which calls MCP server,width=100%] + +NOTE: MCP tools are available in AI *conversations* only. Reviews and quick actions do not invoke MCP tools. -=== Configuration reference +=== Configuration -The Model Context Protocol (MCP) allows the AI service to call external tools (internal wikis, API specs, runbooks, contract databases, compliance checkers) from inside conversations. The AI service connects over *Streamable HTTP transport*. +Set the `MCP_SERVERS` environment variable to a JSON object. Each key is a server identifier; each value describes the connection: + +[source,bash] +---- +-e MCP_SERVERS='{ + "knowledge-hub": { + "url": "http://host.docker.internal:3001/mcp", + "options": { "callToolTimeout": 30 } + } +}' +---- [cols="1,3",options="header"] |=== -|Option |Description +|Field |Description |`url` |HTTP endpoint of the MCP server (Streamable HTTP transport). -|`headers` |Auth headers sent with every request. Single shared token per server. See <>. -|`tools.disabled` |Array of tool names to block from LLM access. +|`headers` |Authentication headers sent with every request. Single shared token per server. See <>. +|`tools.disabled` |Array of tool names to exclude from LLM access. |`options.callToolTimeout` |Per-tool-call timeout in seconds (default 60). |=== -NOTE: MCP tools are available in AI *conversations* only, not in reviews or quick actions. - -TIP: On Linux Docker, add `extra_hosts: ["host.docker.internal:host-gateway"]` to the AI service to reach MCP servers running on the host. +TIP: On Linux Docker, add `extra_hosts: ["host.docker.internal:host-gateway"]` to the AI service compose entry to reach MCP servers running on the host machine. -[[single-shared-token-limitation]] -=== Single-shared-token limitation +[[mcp-shared-token]] +=== Shared-token authentication limitation -The `headers` field is fixed at deploy time. Every MCP tool call shares the same token; there is no per-user MCP authentication path yet. If the MCP server needs per-user context, encode it in the conversation prompt or in a header that maps user identity at the MCP server side (for example, using a token the MCP server itself swaps for a per-user identity). +The `headers` field is fixed at deploy time. Every MCP tool call uses the same credentials; there is no per-user MCP authentication path. If the MCP server requires per-user context, encode identity in the conversation prompt or in a header that the MCP server resolves to a per-user identity on its own side. -=== Implementation +=== MCP server example -. *Create an MCP server that exposes the knowledge base:* -+ .Knowledge-base MCP server (Express) [%collapsible] ==== @@ -126,104 +134,12 @@ app.listen(3001, () => console.log('Knowledge MCP server on http://localhost:300 ---- ==== -. *Configure the AI service:* -+ -[source,bash] ----- --e MCP_SERVERS='{ - "knowledge-hub": { - "url": "http://host.docker.internal:3001/mcp", - "options": { "callToolTimeout": 30 } - } -}' ----- - -. *Sample AI chat message:* -+ -____ -"What are the API guidelines for error handling?" -____ -+ -The assistant calls the `search_knowledge_base` tool, retrieves the relevant policy, and responds with sourced information without leaving the rich text editor. - - - -== Multi-tenant SaaS platform - -*Use case:* A SaaS platform provides AI writing features to customers. Each customer gets isolated conversations, separate large language model (LLM) budgets, and per-tenant configuration. - -=== Architecture - -[.text-center] -image::tinymceai-on-premises/advanced-scenarios-fig-2.svg[alt="Multi-tenant SaaS architecture with per-customer environments access keys and conversation isolation",width=100%] - -Each environment provides: - -* Its own access keys (independent rotation) -* Isolated conversation history (queries are partitioned by `sub` within an environment) -* Separate billing and usage tracking (through Langfuse or a custom metrics pipeline) - -=== Implementation - -. *Create one environment per customer through the Management Panel:* -+ -* Customer A -> Environment `env-customer-a` -* Customer B -> Environment `env-customer-b` -* Customer C -> Environment `env-customer-c` - -. *Token server generates JSON Web Tokens (JWTs) with the correct environment:* -+ -.Multi-tenant JWT generation -[%collapsible] -==== -[source,javascript] ----- -app.post('/api/ai-token', requireAuth, (req, res) => { - const customer = getCustomerConfig(req.user.organizationId); - - const token = jwt.sign({ - aud: customer.envId, - sub: req.user.id, - user: { name: req.user.name, email: req.user.email }, - auth: { - ai: { - permissions: getPermissionsForPlan(customer.plan) - } - } - }, customer.apiSecret, { algorithm: 'HS256', expiresIn: '1h' }); - - res.json({ token }); -}); - -function getPermissionsForPlan(plan) { - switch (plan) { - case 'enterprise': - return ['ai:conversations:*', 'ai:models:agent', 'ai:actions:system:*', 'ai:reviews:system:*']; - case 'pro': - return ['ai:conversations:*', 'ai:actions:system:*']; - case 'basic': - return ['ai:actions:system:*']; - } -} - -function envFor(tenantId) { - return { - envId: process.env[`AI_ENV_ID_${tenantId}`], - apiSecret: process.env[`AI_API_SECRET_${tenantId}`] - }; -} ----- -==== - -. *Result:* Full data isolation between customers, with feature gating based on subscription tier. - -CAUTION: Conversation history is isolated by the `sub` claim in the JWT. Reusing a single `sub` value for multiple users within one environment causes those users to share conversation history. Always use a stable, unique-per-user identifier (such as an internal user ID) as the `sub` value. +[[web-scraping-and-search]] +== Web scraping and web search -== Web-augmented research assistant - -*Use case:* Content team writing blog posts and marketing copy can pull live data from the web and internal sources. +The AI service can forward web page fetches and search queries to external endpoints, enabling AI conversations to reference live web content. === Configuration @@ -238,6 +154,8 @@ CAUTION: Conversation history is isolated by the `sub` claim in the JWT. Reusing -e WEBSEARCH_HEADERS='{"Authorization":"Bearer search-api-key"}' ---- +NOTE: A model must include `capabilities.webSearch: true` in its xref:tinymceai-on-premises-providers.adoc#models-required[`MODELS` entry] for the web search toggle to appear in the editor. + === Web scraping endpoint contract [cols="1,2",options="header"] @@ -259,7 +177,7 @@ CAUTION: Conversation history is isolated by the `sub` claim in the JWT. Reusing { "type": "text/html", "data": "

Example page body

" } ---- -==== Scraper example (Playwright) +==== Scraper implementation example (Playwright) [source,javascript] ---- @@ -313,7 +231,7 @@ app.listen(4000); } ---- -==== Search example (SerpAPI) +==== Search implementation example (SerpAPI) [source,javascript] ---- @@ -338,15 +256,10 @@ app.post('/search', async (req, res) => { app.listen(4001); ---- -NOTE: A model must include `capabilities.webSearch: true` in its `MODELS` entry to expose the web search toggle. - -*Example prompts:* - -* "Research the latest trends in AI governance and write a summary" -* "Read this URL and rewrite the key points for the target audience: pass:[https://…]" - -For production deployment guidance including Kubernetes manifests, scaling, security hardening, rate limiting, and observability, see xref:tinymceai-on-premises-production.adoc[Production deployment]. +== See also -For common errors and debugging steps, see xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting]. +* xref:tinymceai-on-premises-providers.adoc[LLM providers] -- provider configuration and the `MODELS` catalog +* xref:tinymceai-on-premises-reference.adoc[Reference] -- full environment variable reference including `MCP_SERVERS`, `WEBRESOURCES_*`, and `WEBSEARCH_*` +* xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting] -- general troubleshooting diff --git a/modules/ROOT/pages/tinymceai-on-premises-providers.adoc b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc index 2a98376a16..694574bd5d 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-providers.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc @@ -1024,6 +1024,7 @@ More general troubleshooting (database, JWT, storage, networking) lives in xref: == See also +* xref:tinymceai-on-premises-mcp.adoc[MCP and web integrations] -- Model Context Protocol tool calling and web scraping/search endpoints * xref:tinymceai-on-premises-getting-started.adoc[Getting started] -- initial container bring-up and demo * xref:tinymceai-on-premises-jwt.adoc[JWT authentication] -- per-model and per-provider JWT permissions * xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage] -- MySQL/Postgres configuration for the AI service diff --git a/modules/ROOT/pages/tinymceai-on-premises-reference.adoc b/modules/ROOT/pages/tinymceai-on-premises-reference.adoc index 4584bf87ba..caf2e26d76 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-reference.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-reference.adoc @@ -30,7 +30,7 @@ Alphabetized. Required-ness is marked relative to a minimum working deployment. |`LICENSE_KEY` |Yes |- |AI service license key (long string from Tiny). |`LLM_TELEMETRY_ENABLED` |No |`false` |Primary OpenTelemetry switch. |`LLM_TIMEOUT_MS` |No |180000 |Per-request large language model (LLM) timeout in ms. Raise for large self-hosted models. -|`MCP_SERVERS` |No |- |JSON object; Model Context Protocol (MCP) server configuration. See xref:tinymceai-on-premises-advanced.adoc[Advanced scenarios]. +|`MCP_SERVERS` |No |- |JSON object; Model Context Protocol (MCP) server configuration. See xref:tinymceai-on-premises-mcp.adoc#mcp-integration[MCP integration]. |`MODELS` |Yes (Azure, Bedrock, Vertex, openai-compatible); recommended for all providers |- |JSON array defining exposed models. Required for Azure, Bedrock, Vertex, and openai-compatible providers. Recommended for OpenAI, Anthropic, and Google direct. See xref:tinymceai-on-premises-providers.adoc[LLM providers]. |`OTEL_DEBUG` |No |- |Verbose OpenTelemetry Protocol (OTLP) diagnostic logging. |`OTEL_EXPORTER_OTLP_TRACES_ENDPOINT` |If using OTEL |- |OTLP traces endpoint URL. @@ -57,10 +57,10 @@ Alphabetized. Required-ness is marked relative to a minimum working deployment. |`STORAGE_LOCATION` |If using filesystem |- |Mount point for filesystem storage. Must be writable by the container user. |`STORAGE_REGION` |If using S3 |- |S3 region. |`STORAGE_SECRET_ACCESS_KEY` |If using S3 |- |S3 secret access key. -|`WEBRESOURCES_ENABLED` |No |`false` |Enable web scraping endpoint forwarding. +|`WEBRESOURCES_ENABLED` |No |`false` |Enable web scraping endpoint forwarding. See xref:tinymceai-on-premises-mcp.adoc#web-scraping-and-search[Web scraping and web search]. |`WEBRESOURCES_ENDPOINT` |If web resources enabled |- |Scraper URL. |`WEBRESOURCES_REQUEST_TIMEOUT` |No |- |Scraper request timeout in ms. -|`WEBSEARCH_ENABLED` |No |`false` |Enable web search forwarding. +|`WEBSEARCH_ENABLED` |No |`false` |Enable web search forwarding. See xref:tinymceai-on-premises-mcp.adoc#web-scraping-and-search[Web scraping and web search]. |`WEBSEARCH_ENDPOINT` |If web search enabled |- |Search URL. |`WEBSEARCH_HEADERS` |No |- |JSON object; extra headers sent to the search endpoint. |`WEBSEARCH_REQUEST_TIMEOUT` |No |- |Search request timeout in ms. diff --git a/modules/ROOT/pages/tinymceai-on-premises.adoc b/modules/ROOT/pages/tinymceai-on-premises.adoc index 58349dd8c2..57b8e3f33d 100644 --- a/modules/ROOT/pages/tinymceai-on-premises.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises.adoc @@ -151,7 +151,7 @@ For a first-time deployment, progress through the guides in order. Each guide ca |Connect to OpenAI, Anthropic, Google Gemini, Azure OpenAI, AWS Bedrock, Google Vertex AI, or any OpenAI-compatible endpoint (Ollama, vLLM, LM Studio). Custom model catalog and API key rotation. |xref:tinymceai-on-premises-jwt.adoc[JWT authentication] -|HS256 signing model, required and optional claims, permissions reference, and token endpoint examples in 8 languages. +|HS256 signing model, required and optional claims, permissions reference, token endpoint examples in 8 languages, and multi-tenant deployment patterns. |xref:tinymceai-on-premises-frameworks.adoc[TinyMCE integration] |Editor-side configuration: plugin options, token provider, authentication patterns, Cross-Origin Resource Sharing (CORS), and deployment checklists. @@ -159,9 +159,6 @@ For a first-time deployment, progress through the guides in order. Each guide ca |xref:tinymceai-on-premises-production.adoc[Production deployment] |Kubernetes manifests, AWS ECS task definitions, horizontal scaling, security hardening, rate limiting, observability, backup and recovery, and upgrades. -|xref:tinymceai-on-premises-advanced.adoc[Advanced scenarios] -|MCP server integration, web scraping and search, and multi-tenant patterns. - |xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting] |Quick triage, container startup failures, JWT errors, LLM provider errors, editor issues, performance, and diagnostic recipes. From 0e15d605ea088fe8845d48fc37ee1e84f20b6f72 Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Wed, 20 May 2026 23:18:31 +1000 Subject: [PATCH 15/48] Fix audit findings across all on-premises documentation - Getting Started: add ALLOWED_ORIGINS, fix CORS blocker, mark TINYMCE_API_KEY required for CDN demo, fix NOTE inside bash block, un-collapse launch script, add prerequisites section - Production: add missing storage secret keys to K8s Secret, add ALLOWED_ORIGINS to K8s and ECS, align HPA minReplicas, add ECS startPeriod, label Podman as eval-only storage - JWT: fix aud description, fix sanity-check port, coerce sub to String - Frameworks: add React, Vue, Angular minimal examples - Database: add AI service connection env vars section - MCP: label Express example as illustrative - Overview: add MCP to topic guides table --- .../pages/tinymceai-on-premises-database.adoc | 67 ++++++++++++++ .../tinymceai-on-premises-frameworks.adoc | 91 +++++++++++++++++++ ...tinymceai-on-premises-getting-started.adoc | 33 ++++--- .../ROOT/pages/tinymceai-on-premises-jwt.adoc | 6 +- .../ROOT/pages/tinymceai-on-premises-mcp.adoc | 4 +- .../tinymceai-on-premises-production.adoc | 12 ++- modules/ROOT/pages/tinymceai-on-premises.adoc | 3 + 7 files changed, 193 insertions(+), 23 deletions(-) diff --git a/modules/ROOT/pages/tinymceai-on-premises-database.adoc b/modules/ROOT/pages/tinymceai-on-premises-database.adoc index 8bba9f5009..087cfbb14c 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-database.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-database.adoc @@ -632,6 +632,73 @@ Files are stored in the SQL database as blobs, capped at roughly 4 GB total. Thi +== Connecting the AI service + +After provisioning the database and Redis, pass the connection details to the AI service container through environment variables. + +=== MySQL connection + +[source,bash] +---- +-e DATABASE_DRIVER='mysql' \ +-e DATABASE_HOST='mysql' \ +-e DATABASE_PORT='3306' \ +-e DATABASE_USER='ai_service' \ +-e DATABASE_PASSWORD='STRONG_PASSWORD' \ +-e DATABASE_DATABASE='ai_service' \ +-e REDIS_HOST='redis' \ +-e REDIS_PORT='6379' +---- + +=== PostgreSQL connection + +[source,bash] +---- +-e DATABASE_DRIVER='postgres' \ +-e DATABASE_HOST='postgres' \ +-e DATABASE_PORT='5432' \ +-e DATABASE_USER='ai_service' \ +-e DATABASE_PASSWORD='STRONG_PASSWORD' \ +-e DATABASE_DATABASE='ai_service' \ +-e DATABASE_SCHEMA='cs-on-premises' \ +-e REDIS_HOST='redis' \ +-e REDIS_PORT='6379' +---- + +Set `DATABASE_SCHEMA` to `public` if the `cs-on-premises` schema was not created. See <>. + +=== Managed cloud with TLS + +When connecting to managed database services (Amazon RDS, Azure Database, Cloud SQL), add the TLS certificate path: + +[source,bash] +---- +-e DATABASE_SSL_CA='/certs/rds-combined-ca-bundle.pem' +---- + +Mount the certificate into the container with `-v /local/certs:/certs:ro` in `docker run` or a volume mount in the Kubernetes pod spec. + +For Redis with authentication (ElastiCache, Azure Cache, Memorystore): + +[source,bash] +---- +-e REDIS_PASSWORD='REDIS_AUTH_TOKEN' \ +-e REDIS_TLS='true' +---- + +For Redis Cluster mode, use `REDIS_CLUSTER_NODES` instead of `REDIS_HOST`: + +[source,bash] +---- +-e REDIS_CLUSTER_NODES='node1.cache.amazonaws.com:6379,node2.cache.amazonaws.com:6379' +---- + +When `REDIS_CLUSTER_NODES` is set, `REDIS_HOST` is ignored. + +For a complete `docker run` command including all env vars, see the xref:tinymceai-on-premises-getting-started.adoc[Getting started] launch script or the xref:tinymceai-on-premises-production.adoc[Production deployment] manifests. + + + == Verification === MySQL diff --git a/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc b/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc index 6814afb107..08aad9d3ea 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc @@ -125,6 +125,97 @@ tinymceai_token_provider: () => { +== Framework-specific examples + +The examples below show the minimum configuration needed to connect the TinyMCE AI plugin to the on-premises service. Each uses the framework wrapper's `init` prop to pass the same options documented above. + +=== React + +[source,jsx] +---- +import { Editor } from '@tinymce/tinymce-react'; + +function MyEditor() { + return ( + + fetch('/api/ai-token', { method: 'POST', credentials: 'include' }) + .then((r) => r.json()) + .then((data) => ({ token: data.token })) + }} + /> + ); +} +---- + +=== Vue.js + +[source,html] +---- + + + +---- + +=== Angular + +[source,html] +---- + + +---- + +[source,typescript] +---- +// app.component.ts +import { Component } from '@angular/core'; + +@Component({ selector: 'app-root', templateUrl: './app.component.html' }) +export class AppComponent { + editorInit = { + plugins: 'tinymceai', + toolbar: 'undo redo | bold italic | tinymceai-chat tinymceai-review tinymceai-quickactions', + height: 500, + tinymceai_service_url: 'https://ai.yourcompany.com', + tinymceai_token_provider: () => + fetch('/api/ai-token', { method: 'POST', credentials: 'include' }) + .then((r: Response) => r.json()) + .then((data: { token: string }) => ({ token: data.token })) + }; +} +---- + +For general framework setup (installing wrappers, SSR configuration, bundler setup), refer to the framework integration guides linked at the top of this page. Replace `T8LK:...` with the license key from the Tiny account representative. + + + == Authenticating the token request The `tinymceai_token_provider` fetches a JWT from the application back end. How that back end authenticates the browser request depends on the application architecture. diff --git a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc index 22babce7af..78918088ca 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc @@ -13,6 +13,13 @@ This guide sets up a fully working local stack in roughly five minutes on any ma The quick start is designed to validate the stack components before moving to a production deployment. Production engineers can review this section to understand the conceptual flow before continuing to xref:tinymceai-on-premises-production.adoc[Production deployment]. +=== Prerequisites + +* Docker 20.10{plus} (or Podman 4{plus}) +* Node.js 18{plus} and npm (for the demo token server) +* A TinyMCE license key and container registry credentials (from the Tiny account representative) +* At least one LLM provider API key (OpenAI, Anthropic, or Google) + == Five-minute demo with Docker Compose === Create the project folder @@ -96,15 +103,7 @@ PostgreSQL is equally supported. See xref:tinymceai-on-premises-database.adoc[Da [NOTE] -- -If any service in the stack needs to reach the host machine (for example a self-hosted Ollama running on the host), add an `extra_hosts` entry to the `ai-service` block in the compose file above: - -[source,yaml] ----- -extra_hosts: - - "host.docker.internal:host-gateway" ----- - -Docker Desktop (macOS, Windows) and Podman 4{plus} auto-inject this alias. Native Linux Docker does not. +If the AI service needs to reach the host machine (for example a self-hosted Ollama running on the host), add `--add-host=host.docker.internal:host-gateway` to the `docker run` command in the Launch section below. Docker Desktop (macOS, Windows) and Podman 4{plus} auto-inject this alias; native Linux Docker does not. -- === Create the `.env` file @@ -114,8 +113,8 @@ Docker Desktop (macOS, Windows) and Podman 4{plus} auto-inject this alias. Nativ # --- Required: provided by Tiny --- LICENSE_KEY= -# --- Optional: only required when loading TinyMCE from cdn.tiny.cloud --- -# Omit for self-hosted editor bundles. +# --- Required for this demo (loads TinyMCE from cdn.tiny.cloud) --- +# Omit only when using a self-hosted editor bundle with license_key. TINYMCE_API_KEY= # --- Required: strong secret used to log into the Management Panel --- @@ -129,7 +128,7 @@ OPENAI_API_KEY= # ANTHROPIC_API_KEY= # GOOGLE_API_KEY= -# --- Filled in after creating an environment (Step 7). Used by the token server, not the AI service. --- +# --- Filled in after creating an environment (see "Create an environment and access key" below). Used by the token server, not the AI service. --- AI_ENV_ID= AI_API_SECRET= ---- @@ -158,9 +157,6 @@ The AI service runs as a standalone container outside of the Docker Compose stac Run from the same folder as the `.env` file: -.Full launch script -[%collapsible] -==== [source,bash] ---- set -a && source .env && set +a @@ -199,10 +195,10 @@ docker run --init -d -p 8000:8000 \ -e REDIS_HOST='redis' \ -e PROVIDERS="$PROVIDERS" \ -e STORAGE_DRIVER='database' \ + -e ALLOWED_ORIGINS='http://localhost:3000' \ -e ENABLE_METRIC_LOGS='true' \ registry.containers.tiny.cloud/ai-service-tiny:latest ---- -==== For Podman, replace `docker run` with `podman run` and use a Podman pod instead of a compose network. See xref:tinymceai-on-premises-production.adoc[Production deployment] for Podman-specific guidance. See xref:tinymceai-on-premises-production.adoc#_podman_deployment[Podman deployment] for a full example. @@ -411,9 +407,12 @@ curl -s -X POST http://localhost:8000/v1/conversations \ -H "Authorization: Bearer $TOKEN" \ -H "Content-Type: application/json" \ -d '{"id":"verify-1","title":"Verification"}' +---- NOTE: The command below uses the built-in `agent-1` model. If `MODELS` has been explicitly configured, replace `agent-1` with the `id` of one of the configured models. See xref:tinymceai-on-premises-providers.adoc#models-required[Defining the model list]. +[source,bash] +---- curl -s -N -X POST http://localhost:8000/v1/conversations/verify-1/messages \ -H "Authorization: Bearer $TOKEN" \ -H "Content-Type: application/json" \ @@ -455,7 +454,7 @@ docker compose up -d --force-recreate # Recreate the standalone AI service: docker stop ai-service && docker rm ai-service -# Then re-run the launch script from Step 5. +# Then re-run the launch script from "Launch the AI service" above. ---- For Kubernetes, update the Secret and trigger a rollout restart: diff --git a/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc b/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc index ee513bde41..d1b91820a3 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc @@ -62,7 +62,7 @@ Every token MUST contain the following claims. [cols=",,",options="header",] |=== |Claim |Type |Description -|`aud` |string |The Environment ID, copied from the Management Panel. UUID-shaped. Type *must be string*, not array; the verifier rejects array-shaped `aud` (the default in some JWT libraries) with `invalid-jwt-payload`. +|`aud` |string |The Environment ID, copied from the Management Panel (for example `viOu8BnjJHb0HGK091p`). Type *must be string*, not array; the verifier rejects array-shaped `aud` (the default in some JWT libraries) with `invalid-jwt-payload`. |`iat` |number |Issued-at, seconds since epoch (UTC). |`exp` |number |Expiry, seconds since epoch (UTC). Recommend `iat {plus} 3600` for demos, `iat {plus} 900` for production. The server applies 60 seconds of clock-skew leeway; tokens up to 60 seconds past `exp` still verify. |`sub` |string |Unique, stable user identifier. Conversation history is isolated per-`sub`; do not reuse one `sub` across users or conversations will leak between them. @@ -877,7 +877,7 @@ When debugging, start here. Most "auth failures" reflect wrong claim values rath [source,bash] ---- -TOKEN=$(curl -s -X POST http://localhost:3001/api/ai-token | jq -r .token) +TOKEN=$(curl -s -X POST http://localhost:3000/api/ai-token | jq -r .token) curl -i https://ai.example.com/v1/conversations \ -H "Authorization: Bearer $TOKEN" \ @@ -940,7 +940,7 @@ app.post('/api/ai-token', requireAuth, (req, res) => { const token = jwt.sign({ aud: customer.envId, - sub: req.user.id, + sub: String(req.user.id), user: { name: req.user.name, email: req.user.email }, auth: { ai: { diff --git a/modules/ROOT/pages/tinymceai-on-premises-mcp.adoc b/modules/ROOT/pages/tinymceai-on-premises-mcp.adoc index bc9f5f015a..59b4c018c6 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-mcp.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-mcp.adoc @@ -49,7 +49,9 @@ The `headers` field is fixed at deploy time. Every MCP tool call uses the same c === MCP server example -.Knowledge-base MCP server (Express) +The following is an illustrative example showing the JSON-RPC message flow. Production MCP servers must implement the full https://modelcontextprotocol.io/specification/2025-11-25/basic/transports#streamable-http[Streamable HTTP transport specification]. + +.Knowledge-base MCP server (illustrative) [%collapsible] ==== [source,javascript] diff --git a/modules/ROOT/pages/tinymceai-on-premises-production.adoc b/modules/ROOT/pages/tinymceai-on-premises-production.adoc index 74bdace67c..e299b64dce 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-production.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-production.adoc @@ -96,6 +96,8 @@ When deploying for the first time or upgrading to a new version, start a single == Podman deployment +NOTE: This example uses `STORAGE_DRIVER='database'` for simplicity. For production workloads, use S3 or Azure Blob storage. See xref:tinymceai-on-premises-database.adoc#_file_storage[File storage] for options. + The AI service works with Podman as an alternative to Docker. In Podman, containers within a pod share a network namespace, so use `127.0.0.1` instead of container names for hostnames. [source,bash] @@ -159,6 +161,8 @@ stringData: management-secret: "EXAMPLE_MANAGEMENT_SECRET" db-password: "EXAMPLE_DB_PASSWORD" redis-password: "EXAMPLE_REDIS_PASSWORD" + storage-access-key: "EXAMPLE_S3_ACCESS_KEY_ID" + storage-secret-key: "EXAMPLE_S3_SECRET_ACCESS_KEY" providers: | { "openai": { @@ -255,6 +259,8 @@ spec: secretKeyRef: name: ai-service-secrets key: storage-secret-key + - name: ALLOWED_ORIGINS + value: "https://app.example.com" - name: ENABLE_METRIC_LOGS value: "true" readinessProbe: @@ -361,7 +367,7 @@ spec: apiVersion: apps/v1 kind: Deployment name: ai-service - minReplicas: 3 + minReplicas: 2 maxReplicas: 20 metrics: - type: Resource @@ -398,7 +404,8 @@ NOTE: The AI service is I/O-bound (waiting on LLM provider responses). CPU-based "command": ["CMD-SHELL", "wget -q --spider http://localhost:8000/health || exit 1"], "interval": 30, "timeout": 5, - "retries": 3 + "retries": 3, + "startPeriod": 60 }, "secrets": [ { "name": "LICENSE_KEY", "valueFrom": "arn:aws:secretsmanager:us-east-1:111122223333:secret:ai-license" }, @@ -416,6 +423,7 @@ NOTE: The AI service is I/O-bound (waiting on LLM provider responses). CPU-based { "name": "STORAGE_DRIVER", "value": "s3" }, { "name": "STORAGE_BUCKET", "value": "example-ai-storage-bucket" }, { "name": "STORAGE_REGION", "value": "us-east-1" }, + { "name": "ALLOWED_ORIGINS", "value": "https://app.example.com" }, { "name": "ENABLE_METRIC_LOGS", "value": "true" } ] } diff --git a/modules/ROOT/pages/tinymceai-on-premises.adoc b/modules/ROOT/pages/tinymceai-on-premises.adoc index 57b8e3f33d..9fc4b9188b 100644 --- a/modules/ROOT/pages/tinymceai-on-premises.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises.adoc @@ -150,6 +150,9 @@ For a first-time deployment, progress through the guides in order. Each guide ca |xref:tinymceai-on-premises-providers.adoc[LLM providers] |Connect to OpenAI, Anthropic, Google Gemini, Azure OpenAI, AWS Bedrock, Google Vertex AI, or any OpenAI-compatible endpoint (Ollama, vLLM, LM Studio). Custom model catalog and API key rotation. +|xref:tinymceai-on-premises-mcp.adoc[MCP and web integrations] +|Model Context Protocol (MCP) tool integration, web scraping endpoints, and web search endpoints. + |xref:tinymceai-on-premises-jwt.adoc[JWT authentication] |HS256 signing model, required and optional claims, permissions reference, token endpoint examples in 8 languages, and multi-tenant deployment patterns. From e006b46913bdccd86fe1db0f0ae54dc04c9168d8 Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Thu, 21 May 2026 19:05:53 +1000 Subject: [PATCH 16/48] Add architecture diagram, page intros, and address PR feedback - Add deployment architecture diagram (overview-fig-2.svg) to Overview - Add "where this fits" introductory context to Database, Providers, JWT, and Frameworks pages linking to overall deployment flow - Un-nest MCP page to same nav level as other on-prem pages - Add Step 1/2/3 subheadings to Getting Started verification section - Remove raw Management API reference from Getting Started - Restructure Database version pinning into neutral parent section - Fix "two layers" to "three layers" matching the diagram - Address metricjs PR feedback: hyphens, TLS note, Redis wording, schema note ordering, conditions-first, section explanations --- .../tinymceai-on-premises/overview-fig-2.mmd | 23 +++++++++++ .../tinymceai-on-premises/overview-fig-2.svg | 1 + modules/ROOT/nav.adoc | 2 +- .../pages/tinymceai-on-premises-database.adoc | 39 ++++++++++++------- .../tinymceai-on-premises-frameworks.adoc | 10 +++-- ...tinymceai-on-premises-getting-started.adoc | 17 ++++++-- .../ROOT/pages/tinymceai-on-premises-jwt.adoc | 4 ++ .../tinymceai-on-premises-providers.adoc | 10 ++++- modules/ROOT/pages/tinymceai-on-premises.adoc | 13 ++++++- 9 files changed, 93 insertions(+), 26 deletions(-) create mode 100644 modules/ROOT/images/tinymceai-on-premises/overview-fig-2.mmd create mode 100644 modules/ROOT/images/tinymceai-on-premises/overview-fig-2.svg diff --git a/modules/ROOT/images/tinymceai-on-premises/overview-fig-2.mmd b/modules/ROOT/images/tinymceai-on-premises/overview-fig-2.mmd new file mode 100644 index 0000000000..8c17859110 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/overview-fig-2.mmd @@ -0,0 +1,23 @@ +%%{init: {'theme': 'base', 'themeVariables': { 'primaryColor': '#ECECFF', 'primaryBorderColor': '#9370DB', 'lineColor': '#333333', 'edgeLabelBackground': '#e8e8e8', 'fontSize': '16px' }, 'flowchart': { 'nodeSpacing': 40, 'rankSpacing': 80 }}}%% +flowchart LR + subgraph Client["Client layer"] + Token["Token endpoint\n(back end)"] + App["TinyMCE editor\n(browser)"] + end + + subgraph Service["Application layer"] + AI["AI service\n(container)"] + end + + subgraph Data["Data layer"] + DB[("Database\n+ Redis\n+ Storage")] + end + + LLM["LLM provider"] + + Token -->|"1. signed JWT"| App + App -->|"2. prompt + JWT"| AI + AI -->|"5. SSE stream"| App + AI -->|"3. forward"| LLM + LLM -->|"4. stream"| AI + AI --- DB diff --git a/modules/ROOT/images/tinymceai-on-premises/overview-fig-2.svg b/modules/ROOT/images/tinymceai-on-premises/overview-fig-2.svg new file mode 100644 index 0000000000..61141945bc --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/overview-fig-2.svg @@ -0,0 +1 @@ +

Data layer

Application layer

Client layer

1. signed JWT

2. prompt + JWT

5. SSE stream

3. forward

4. stream

Token endpoint
(back end)

TinyMCE editor
(browser)

AI service
(container)

Database
+ Redis
+ Storage

LLM provider

\ No newline at end of file diff --git a/modules/ROOT/nav.adoc b/modules/ROOT/nav.adoc index b59b4d2d61..027d7a6970 100644 --- a/modules/ROOT/nav.adoc +++ b/modules/ROOT/nav.adoc @@ -274,7 +274,7 @@ ***** xref:tinymceai-on-premises-getting-started.adoc[Getting started] ***** xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage] ***** xref:tinymceai-on-premises-providers.adoc[LLM providers] -****** xref:tinymceai-on-premises-mcp.adoc[MCP and web integrations] +***** xref:tinymceai-on-premises-mcp.adoc[MCP and web integrations] ***** xref:tinymceai-on-premises-jwt.adoc[JWT authentication] ***** xref:tinymceai-on-premises-frameworks.adoc[TinyMCE integration] ***** xref:tinymceai-on-premises-production.adoc[Production deployment] diff --git a/modules/ROOT/pages/tinymceai-on-premises-database.adoc b/modules/ROOT/pages/tinymceai-on-premises-database.adoc index 087cfbb14c..1d1b23f2d4 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-database.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-database.adoc @@ -3,8 +3,13 @@ :description: Database, Redis, and file storage setup for the TinyMCE AI On-Premises service :keywords: AI, on-premises, database, MySQL, PostgreSQL, Redis, Docker, Podman, file storage, S3, Azure Blob -This page covers the data layer: the SQL database, Redis, and file storage. -For container runtimes, reverse proxies, Transport Layer Security (TLS), Kubernetes, and ECS deployment, see the xref:tinymceai-on-premises-production.adoc[Production deployment guide]. +This page covers the *data layer*: the SQL database, Redis, and file storage. These components must be running and accessible before the AI service container can start — the service connects to them on boot and will not proceed without them. + +* *SQL database* — stores persistent data such as configurations, conversations, files, and documents. +* *Redis* — handles temporary data and inter-instance communication for scaling. When multiple application instances are running, Redis ensures that data is shared correctly across all of them. +* *File storage* — stores uploaded files and documents. + +Configure the data layer first, then proceed to xref:tinymceai-on-premises-providers.adoc[LLM providers] and xref:tinymceai-on-premises-jwt.adoc[JWT authentication]. For container runtimes, reverse proxies, Transport Layer Security (TLS), Kubernetes, and ECS deployment, see the xref:tinymceai-on-premises-production.adoc[Production deployment guide]. == Supported versions @@ -52,7 +57,7 @@ All paths produce the same end state: a running database the AI service can conn [[postgresql-schema-prerequisite]] == PostgreSQL schema prerequisite -The AI service expects a schema named `cs-on-premises` (with a hyphen). If that schema does not exist, the container crashes on first boot with: +The AI service expects a schema named `cs-on-premises` (with hyphens). If that schema does not exist, the container crashes on first boot with: .... error: schema "cs-on-premises" does not exist @@ -79,14 +84,16 @@ Set the `DATABASE_SCHEMA` environment variable on the AI service container: DATABASE_SCHEMA=public .... -This bypasses the hyphenated schema entirely. +This bypasses the hyphenated schema entirely. MySQL deployments do not require this step — the database name (`DATABASE_DATABASE`) serves as the namespace. + -NOTE: MySQL does not have this issue. The database itself is the namespace, set through `DATABASE_DATABASE`. +== Version pinning +TIP: Pin specific major versions for all data layer images (`mysql:8.0`, `postgres:16`, `redis:7`). Floating tags like `:latest` or `:8` can introduce breaking changes during routine image pulls. [[mysql-version-pinning]] -== MySQL version pinning +=== MySQL Do *not* use `mysql:8`. That tag now floats to the latest MySQL, which removes the `default-authentication-plugin=mysql_native_password` startup flag the AI service relies on. The container crashloops with: @@ -97,8 +104,6 @@ Do *not* use `mysql:8`. That tag now floats to the latest MySQL, which removes t Pin to `mysql:8.0` in every manifest: `docker run`, Docker Compose, Kubernetes, Helm, ECS. Running newer MySQL versions with workarounds (removing the flag and switching to `caching_sha2_password`) is not a supported configuration. -TIP: The same principle applies to PostgreSQL. Pin `postgres:16` rather than `postgres:latest`. - [[database-user-privileges]] @@ -131,11 +136,13 @@ GRANT ALL PRIVILEGES ON ai_service.* TO 'ai_service'@'%'; [NOTE] -- -Some builds report false-positive "Not enough permissions to access database" errors even with `ALL PRIVILEGES`. If this occurs, grant the privileges globally rather than per-database, or use the MySQL `root` user for development. +Some versions of the AI service image report false-positive "Not enough permissions to access database" errors even with `ALL PRIVILEGES`. If this occurs, grant the privileges globally rather than per-database, or use the MySQL `root` user for development. -- === PostgreSQL +If `DATABASE_SCHEMA=public` was chosen (see <>), substitute `public` for `"cs-on-premises"` in each statement below. + [source,sql] ---- CREATE USER ai_service WITH PASSWORD 'STRONG_PASSWORD'; @@ -160,12 +167,12 @@ GRANT ALL ON SCHEMA "cs-on-premises" TO ai_service; ---- ==== -If `DATABASE_SCHEMA=public` was chosen, substitute `public` for `"cs-on-premises"` in each grant statement. - == Database setup +The sections below provide ready-to-use configuration for each database engine. Use the Docker Compose files for local evaluation; for production, provision managed database services (Amazon RDS, Azure Database, Cloud SQL) and pass the connection details as environment variables (see <<_connecting_the_ai_service>>). + === Docker Compose (recommended for evaluation) .MySQL compose file @@ -243,7 +250,7 @@ volumes: ---- ==== -After `docker compose up -d`, create the PostgreSQL schema (if not using `DATABASE_SCHEMA=public`): +If using PostgreSQL and not using `DATABASE_SCHEMA=public`, after `docker compose up -d`, create the schema: [source,bash] ---- @@ -251,7 +258,9 @@ docker compose exec postgres psql -U ai_service -d ai_service \ -c 'CREATE SCHEMA "cs-on-premises";' ---- -=== Docker single container +=== Docker single container (without Compose) + +Use these `docker run` commands when Docker Compose is not available or when integrating into existing orchestration scripts. .MySQL [%collapsible] @@ -431,7 +440,7 @@ DATABASE_SSL_CA=/certs/server-ca.pem Mount the certificate file into the container and reference the path in `DATABASE_SSL_CA`. Download the CA bundle from the cloud provider documentation. -NOTE: `DATABASE_SSL_CERT` and `DATABASE_SSL_KEY` are required only for mutual TLS (mTLS). Most managed database services require only the CA certificate for server verification. +NOTE: Most managed database services require only the CA certificate (`DATABASE_SSL_CA`) for server verification. `DATABASE_SSL_CERT` and `DATABASE_SSL_KEY` are additionally required only for mutual TLS (mTLS). [WARNING] -- @@ -474,7 +483,7 @@ Every AI service instance must reach Redis. Redis holds session coordination, Se === Setup -Redis is typically included in the Docker Compose file alongside the database (see the compose examples above). For standalone setup: +When using Docker Compose files, Redis is typically included alongside the database (see the compose examples above). For standalone setup: [source,bash] ---- diff --git a/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc b/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc index 08aad9d3ea..dc96af33bd 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc @@ -4,11 +4,13 @@ :keywords: AI, on-premises, React, Vue, Angular, Svelte, token provider -This page covers the *editor-side* configuration that connects TinyMCE to the on-premises AI service. It assumes: +This page covers the *editor-side* configuration — the final step that connects the browser-based TinyMCE editor to the running AI service. At this point in the deployment process, the xref:tinymceai-on-premises-database.adoc[data layer], xref:tinymceai-on-premises-providers.adoc[LLM providers], and xref:tinymceai-on-premises-jwt.adoc[JWT token endpoint] are already operational. This page wires the editor to that infrastructure. -* The AI service is already running. See xref:tinymceai-on-premises-getting-started.adoc[Getting started] for setup instructions. -* A token endpoint exists that signs JSON Web Tokens (JWTs) for the AI service. See xref:tinymceai-on-premises-jwt.adoc[JWT authentication] for back-end implementations. -* A valid TinyMCE license key or API key with the AI feature enabled. On-premises deployments typically use a license key provided by a Tiny account representative. +It assumes: + +* The AI service is already running and reachable from the browser (see xref:tinymceai-on-premises-getting-started.adoc[Getting started]). +* A token endpoint exists that signs HS256 JWTs (see xref:tinymceai-on-premises-jwt.adoc[JWT authentication]). +* A valid TinyMCE license key or API key with the AI feature enabled. For general framework setup (installing wrappers, component structure, server-side rendering (SSR) patterns), see the existing integration guides: diff --git a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc index 78918088ca..78fc4a4132 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc @@ -244,7 +244,7 @@ AI_API_SECRET=PASTE_API_SECRET_HERE [IMPORTANT] -- -Always create environments through the Management Panel UI. Environments created through the raw Management API are not fully registered and cause `invalid-jwt-payload` or `Environment not found` errors. See the xref:tinymceai-on-premises-jwt.adoc[JWT authentication] guide for details on environment and access key management. +Always create environments through the Management Panel UI. See the xref:tinymceai-on-premises-jwt.adoc[JWT authentication] guide for details on environment and access key management. -- === Create the token server @@ -370,9 +370,12 @@ The TinyMCE AI on-premises service is now running. After completing the quick start, exercise the pipeline end-to-end from the command line. +=== Step 1: Health check + +Confirms the AI service container is running and connected to the database and Redis. + [source,bash] ---- -# 1. Health check curl http://localhost:8000/health ---- @@ -383,9 +386,12 @@ Expected: {"serviceName":"on-premises-http","uptime":12.345} ---- +=== Step 2: Generate a token + +Confirms the token server can sign a valid JWT using the API Secret and Environment ID. + [source,bash] ---- -# 2. Generate a token curl -s -X POST http://localhost:3000/api/ai-token | python3 -m json.tool ---- @@ -398,9 +404,12 @@ Expected: } ---- +=== Step 3: Create a conversation and send a message + +Confirms the full chain: JWT verification, permissions, environment registration, LLM provider authentication, and SSE streaming. + [source,bash] ---- -# 3. Create a conversation and send a message TOKEN=$(curl -s -X POST http://localhost:3000/api/ai-token | python3 -c "import sys,json;print(json.load(sys.stdin)['token'])") curl -s -X POST http://localhost:8000/v1/conversations \ diff --git a/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc b/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc index d1b91820a3..c8945c9306 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc @@ -3,6 +3,10 @@ :description: JWT authentication for the TinyMCE AI on-premises service using HS256 symmetric signing :keywords: AI, on-premises, JWT, authentication, HS256, multi-tenant +This page covers *authentication between the application back end and the AI service*. Every request from the editor to the AI service carries a signed JWT — this is how the service identifies users, enforces permissions, and isolates conversations. The token endpoint runs in the application back end; the editor calls it automatically through the `tinymceai_token_provider` callback configured in xref:tinymceai-on-premises-frameworks.adoc[TinyMCE integration]. + +Before configuring JWT, complete the xref:tinymceai-on-premises-database.adoc[data layer] and xref:tinymceai-on-premises-providers.adoc[LLM provider] setup. After JWT, proceed to xref:tinymceai-on-premises-frameworks.adoc[TinyMCE integration] to wire the editor to the token endpoint. + The on-premises AI service uses *HS256* (HMAC-SHA256, symmetric shared secret) for JSON Web Token (JWT) authentication. This is different from the Tiny Cloud AI service, which uses RS256. [WARNING] diff --git a/modules/ROOT/pages/tinymceai-on-premises-providers.adoc b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc index 694574bd5d..e78d69b53a 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-providers.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc @@ -6,7 +6,15 @@ -The `PROVIDERS` environment variable tells the AI service how to reach the configured large language model (LLM) provider. The `MODELS` environment variable tells the service which models are exposed to clients and which features each model supports. This page documents every supported `type`, every required field, and every known production issue for both variables. +This page configures *how the AI service reaches LLM providers* and *which models are exposed to clients*. It sits between the xref:tinymceai-on-premises-database.adoc[data layer setup] (which must be complete first) and xref:tinymceai-on-premises-jwt.adoc[JWT authentication] (which gates per-user access to the models configured here). The environment variables documented below are set on the AI service container — in a `docker run` command, a Kubernetes Deployment manifest, or an ECS task definition. + +Two environment variables and one JWT claim control provider and model configuration: + +* `PROVIDERS` — tells the AI service how to authenticate with each LLM provider API. +* `MODELS` — tells the service which models to expose to clients and which features each model supports. +* `auth.ai.permissions` (JWT claim) — gates per-user access to individual models. + +This page documents every supported provider `type`, every required field, and every known production issue for both variables. Start with the xref:tinymceai-on-premises-getting-started.adoc[Getting started guide] if the AI service container is not yet running. The following sections assume a running `ai-service` container. diff --git a/modules/ROOT/pages/tinymceai-on-premises.adoc b/modules/ROOT/pages/tinymceai-on-premises.adoc index 9fc4b9188b..0f6845068d 100644 --- a/modules/ROOT/pages/tinymceai-on-premises.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises.adoc @@ -9,8 +9,19 @@ The service ships as a single Open Container Initiative (OCI) container image (` == Architecture +The infrastructure consists of three layers: + +* The *client layer* runs in the browser (TinyMCE editor with the `tinymceai` plugin) and in the application back end (token endpoint that signs JWTs). +* The *application layer* runs the AI service container and communicates with LLM providers to process AI requests. It may consist of one or more instances behind a load balancer (round-robin recommended). Each instance runs the same stateless container image. +* The *data layer* consists of a SQL database, a Redis instance, and file storage: +** *SQL database* — stores persistent data: configurations, conversations, files, and documents. +** *Redis* — handles temporary data and inter-instance communication for scaling. When multiple instances are running, Redis ensures data is shared correctly across all of them. +** *File storage* — stores uploaded files and documents (S3, Azure Blob, filesystem, or the database itself). + [.text-center] -image::tinymceai-on-premises/overview-fig-1.svg[alt="High-level architecture showing client token endpoint AI service LLM provider and data layer",width=100%] +image::tinymceai-on-premises/overview-fig-2.svg[alt="Deployment architecture showing data layer, application layer, client layer, and LLM provider connections",width=100%] + +=== Data flow Data flow for a single AI request: From 08c74b812323e2ba3912884bc13a17e9e1e72809 Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Thu, 21 May 2026 21:48:38 +1000 Subject: [PATCH 17/48] Improve providers diagram readability Add theme config with 14px font and wider node spacing, shorten truncated model names, use uniform arrow weight throughout, and fix SVG width to 1200px. --- .../providers-guide-fig-1.mmd | 25 ++++++++++--------- .../providers-guide-fig-1.svg | 2 +- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.mmd b/modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.mmd index 9586bf0667..6ce3388dfd 100644 --- a/modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.mmd +++ b/modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.mmd @@ -1,20 +1,21 @@ +%%{init: {'theme': 'base', 'themeVariables': { 'primaryColor': '#ECECFF', 'primaryBorderColor': '#9370DB', 'lineColor': '#333333', 'edgeLabelBackground': '#e8e8e8', 'fontSize': '14px' }, 'flowchart': { 'nodeSpacing': 30, 'rankSpacing': 60, 'wrappingWidth': 200 }}}%% flowchart LR subgraph JWT["JWT auth.ai.permissions"] - K1["ai:models:my-openai-key:
gpt-4.1"] - K2["ai:models:my-bedrock:
us.anthropic.claude-sonnet-4-..."] - K3["ai:models:my-ollama:
qwen3:0.6b"] + K1["ai:models:my-openai-key:\ngpt-4.1"] + K2["ai:models:my-bedrock:\nclaude-sonnet-4"] + K3["ai:models:my-ollama:\nqwen3:0.6b"] end subgraph MD["MODELS env var · JSON array"] - M1["gpt-4.1
provider: my-openai-key"] - M2["us.anthropic.claude-sonnet-4-...
provider: my-bedrock"] - M3["qwen3:0.6b
provider: my-ollama"] + M1["gpt-4.1\nprovider: my-openai-key"] + M2["claude-sonnet-4\nprovider: my-bedrock"] + M3["qwen3:0.6b\nprovider: my-ollama"] end subgraph PR["PROVIDERS env var · JSON object"] - P1["my-openai-key
type: openai"] - P2["my-bedrock
type: bedrock"] - P3["my-ollama
type: openai-compatible"] + P1["my-openai-key\ntype: openai"] + P2["my-bedrock\ntype: bedrock"] + P3["my-ollama\ntype: openai-compatible"] end K1 -->|"gates access"| M1 @@ -25,6 +26,6 @@ flowchart LR M2 -->|"provider key"| P2 M3 -->|"provider key"| P3 - P1 ==> LLM1["OpenAI API"] - P2 ==> LLM2["AWS Bedrock"] - P3 ==> LLM3["Local Ollama"] + P1 --> LLM1["OpenAI API"] + P2 --> LLM2["AWS Bedrock"] + P3 --> LLM3["Local Ollama"] diff --git a/modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.svg index d00943b640..05e6209293 100644 --- a/modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.svg +++ b/modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.svg @@ -1 +1 @@ -PROVIDERS env var · JSON objectMODELS env var · JSON arrayJWT auth.ai.permissionsgates accessgates accessgates accessprovider keyprovider keyprovider keyai:models:my-openai-key:gpt-4.1ai:models:my-bedrock:us.anthropic.claude-sonnet-4-...ai:models:my-ollama:qwen3:0.6bgpt-4.1provider: my-openai-keyus.anthropic.claude-sonnet-4-...provider: my-bedrockqwen3:0.6bprovider: my-ollamamy-openai-keytype: openaimy-bedrocktype: bedrockmy-ollamatype: openai-compatibleOpenAI APIAWS BedrockLocal Ollama \ No newline at end of file +

PROVIDERS env var · JSON object

MODELS env var · JSON array

JWT auth.ai.permissions

gates access

gates access

gates access

provider key

provider key

provider key

ai:models:my-openai-key:
gpt-4.1

ai:models:my-bedrock:
claude-sonnet-4

ai:models:my-ollama:
qwen3:0.6b

gpt-4.1
provider: my-openai-key

claude-sonnet-4
provider: my-bedrock

qwen3:0.6b
provider: my-ollama

my-openai-key
type: openai

my-bedrock
type: bedrock

my-ollama
type: openai-compatible

OpenAI API

AWS Bedrock

Local Ollama

\ No newline at end of file From bbe55ce00bb1258c63a57c819885a21048c06ba1 Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Thu, 21 May 2026 21:54:20 +1000 Subject: [PATCH 18/48] Restructure OpenAI-compatible provider section for clarity Un-collapse fields table and Ollama networking into visible sections, remove duplicated LLM_TIMEOUT_MS, consolidate vLLM and LM Studio into a comparison table, fix Verify to hit the AI service endpoint, and add a "when to use" introductory sentence. --- .../tinymceai-on-premises-providers.adoc | 90 ++++++++++--------- 1 file changed, 47 insertions(+), 43 deletions(-) diff --git a/modules/ROOT/pages/tinymceai-on-premises-providers.adoc b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc index e78d69b53a..c8c344f760 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-providers.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc @@ -665,12 +665,11 @@ IMPORTANT: *Region mismatch:* set `location` to a region that hosts the model. * == OpenAI-compatible (Ollama, vLLM, LM Studio, and similar) -For any HTTP API that implements the OpenAI Chat Completions interface, including self-hosted runtimes and commercial aggregators (OpenRouter, Together, Groq, Fireworks). The `baseUrl` *must* include the `/v1` suffix. +Use this provider type for any runtime or aggregator that is not listed as a native provider above but exposes an OpenAI-compatible `/v1/chat/completions` endpoint — including Ollama, vLLM, LM Studio, OpenRouter, Together, Groq, and Fireworks. -.Configuration details -[%collapsible] -==== -*JSON shape:* +The `baseUrl` *must* include the `/v1` suffix. + +=== Configuration [source,json] ---- @@ -684,9 +683,7 @@ For any HTTP API that implements the OpenAI Chat Completions interface, includin } ---- -*Fields:* - -[cols=",,",options="header",] +[cols="1,1,3",options="header"] |=== |Field |Required |Notes |`type` |Yes |Literal `"openai-compatible"` @@ -697,7 +694,7 @@ For any HTTP API that implements the OpenAI Chat Completions interface, includin NOTE: File uploads through this adapter are limited to `image/*` MIME types. PDFs and Office documents are not forwarded. To work with non-image files, route through an OpenAI, Anthropic, or Bedrock provider instead. -*Ollama-specific setup:* +=== Ollama networking Ollama listens on `127.0.0.1:11434` by default, which is unreachable from inside a Docker container. Bind to all interfaces: @@ -749,6 +746,7 @@ PARAMETER stop "<|im_start|>" ---- The exact template depends on the base model. Check the model card for the recommended chat template. Verify tool support with `ollama show ` before connecting to the AI service. +==== The reasoning toggle (`capabilities.reasoning: true`) is cosmetic for Ollama-backed models; the openai-compatible adapter does not translate it to the native Ollama API. @@ -761,7 +759,7 @@ Large self-hosted models on consumer hardware can exceed the default 180-second -e LLM_TIMEOUT_MS='600000' ---- -=== Example -- Ollama +=== Example — Ollama [source,bash] ---- @@ -781,61 +779,67 @@ Large self-hosted models on consumer hardware can exceed the default 180-second "features": ["conversations", "reviews", "actions"] } ]' - --e LLM_TIMEOUT_MS='600000' ---- -=== Example -- vLLM +=== Example — vLLM and LM Studio -[source,bash] +The configuration pattern is identical to Ollama — only the `baseUrl`, provider key, and model ID differ. + +[cols="1,2,2",options="header"] +|=== +|Runtime |`PROVIDERS` entry |`MODELS` entry + +|vLLM +a|[source,json] ---- --e PROVIDERS='{ +{ "vllm": { "type": "openai-compatible", "baseUrl": "http://vllm-host.internal:8001/v1", "apiKeys": ["YOUR_VLLM_TOKEN"] } -}' - --e MODELS='[ - { - "id": "meta-llama/Llama-3.1-8B-Instruct", - "provider": "vllm", - "name": "Llama 3.1 8B (vLLM)", - "description": "Self-hosted Llama 3.1 8B served through vLLM", - "features": ["conversations", "reviews", "actions"] - } -]' +} +---- +a|[source,json] +---- +{ + "id": "meta-llama/Llama-3.1-8B-Instruct", + "provider": "vllm", + "name": "Llama 3.1 8B (vLLM)", + "features": ["conversations", "reviews", "actions"] +} ---- -=== Example -- LM Studio - -[source,bash] +|LM Studio +a|[source,json] ---- --e PROVIDERS='{ +{ "lmstudio": { "type": "openai-compatible", "baseUrl": "http://host.docker.internal:1234/v1" } -}' - --e MODELS='[ - { - "id": "lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF", - "provider": "lmstudio", - "name": "Llama 3.1 8B (LM Studio)", - "description": "Local LM Studio runtime", - "features": ["conversations", "actions"] - } -]' +} +---- +a|[source,json] ---- +{ + "id": "lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF", + "provider": "lmstudio", + "name": "Llama 3.1 8B (LM Studio)", + "features": ["conversations", "actions"] +} +---- +|=== === Verify +After starting the AI service with an openai-compatible provider configured, confirm it can reach the runtime: + [source,bash] ---- -curl -s http://host.docker.internal:11434/v1/chat/completions \ +curl -s http://localhost:8000/v1/chat/completions \ -H "Content-Type: application/json" \ + -H "Authorization: Bearer " \ -d '{ "model": "qwen3:0.6b", "messages": [{"role":"user","content":"Say hello in five words."}], @@ -843,7 +847,7 @@ curl -s http://host.docker.internal:11434/v1/chat/completions \ }' ---- -When the `curl` call returns a chat completion, the AI service can use the same endpoint. +A successful response confirms the AI service can forward requests to the local runtime. From d663c6142d197207aad9a175698eb93f9b3cc341 Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Thu, 21 May 2026 21:55:22 +1000 Subject: [PATCH 19/48] Increase overview architecture diagram width to 1200px --- modules/ROOT/images/tinymceai-on-premises/overview-fig-2.svg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ROOT/images/tinymceai-on-premises/overview-fig-2.svg b/modules/ROOT/images/tinymceai-on-premises/overview-fig-2.svg index 61141945bc..0b15ad24f8 100644 --- a/modules/ROOT/images/tinymceai-on-premises/overview-fig-2.svg +++ b/modules/ROOT/images/tinymceai-on-premises/overview-fig-2.svg @@ -1 +1 @@ -

Data layer

Application layer

Client layer

1. signed JWT

2. prompt + JWT

5. SSE stream

3. forward

4. stream

Token endpoint
(back end)

TinyMCE editor
(browser)

AI service
(container)

Database
+ Redis
+ Storage

LLM provider

\ No newline at end of file +

Data layer

Application layer

Client layer

1. signed JWT

2. prompt + JWT

5. SSE stream

3. forward

4. stream

Token endpoint
(back end)

TinyMCE editor
(browser)

AI service
(container)

Database
+ Redis
+ Storage

LLM provider

\ No newline at end of file From ccfe41ee1773d2382edccf61b3cfd438e6a4fbe9 Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Thu, 21 May 2026 22:15:40 +1000 Subject: [PATCH 20/48] Normalize definition-style lists to colon delimiter Replace em dashes and double hyphens with colons in label:description list patterns across all on-premises pages. Normalize bold formatting to single-asterisk emphasis for consistency. --- .../pages/tinymceai-on-premises-database.adoc | 6 +++--- .../tinymceai-on-premises-getting-started.adoc | 10 +++++----- .../ROOT/pages/tinymceai-on-premises-jwt.adoc | 8 ++++---- .../ROOT/pages/tinymceai-on-premises-mcp.adoc | 6 +++--- .../pages/tinymceai-on-premises-production.adoc | 16 ++++++++-------- .../pages/tinymceai-on-premises-providers.adoc | 16 ++++++++-------- modules/ROOT/pages/tinymceai-on-premises.adoc | 6 +++--- 7 files changed, 34 insertions(+), 34 deletions(-) diff --git a/modules/ROOT/pages/tinymceai-on-premises-database.adoc b/modules/ROOT/pages/tinymceai-on-premises-database.adoc index 1d1b23f2d4..3168af7e9a 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-database.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-database.adoc @@ -5,9 +5,9 @@ This page covers the *data layer*: the SQL database, Redis, and file storage. These components must be running and accessible before the AI service container can start — the service connects to them on boot and will not proceed without them. -* *SQL database* — stores persistent data such as configurations, conversations, files, and documents. -* *Redis* — handles temporary data and inter-instance communication for scaling. When multiple application instances are running, Redis ensures that data is shared correctly across all of them. -* *File storage* — stores uploaded files and documents. +* *SQL database*: stores persistent data such as configurations, conversations, files, and documents. +* *Redis*: handles temporary data and inter-instance communication for scaling. When multiple application instances are running, Redis ensures that data is shared correctly across all of them. +* *File storage*: stores uploaded files and documents. Configure the data layer first, then proceed to xref:tinymceai-on-premises-providers.adoc[LLM providers] and xref:tinymceai-on-premises-jwt.adoc[JWT authentication]. For container runtimes, reverse proxies, Transport Layer Security (TLS), Kubernetes, and ECS deployment, see the xref:tinymceai-on-premises-production.adoc[Production deployment guide]. diff --git a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc index 78fc4a4132..0707fdd387 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc @@ -5,11 +5,11 @@ This guide sets up a fully working local stack in roughly five minutes on any machine with Docker: -* **MySQL 8.0** — conversation history and metadata -* **Redis** — caching and session state -* **TinyMCE AI service** — the on-premises AI back end -* **A minimal token server** (Node.js) — signs JWTs for the editor -* **A browser page with TinyMCE** — validates the end-to-end flow +* *MySQL 8.0*: conversation history and metadata +* *Redis*: caching and session state +* *TinyMCE AI service*: the on-premises AI back end +* *A minimal token server* (Node.js): signs JWTs for the editor +* *A browser page with TinyMCE*: validates the end-to-end flow The quick start is designed to validate the stack components before moving to a production deployment. Production engineers can review this section to understand the conceptual flow before continuing to xref:tinymceai-on-premises-production.adoc[Production deployment]. diff --git a/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc b/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc index c8945c9306..48ab8390cb 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc @@ -984,7 +984,7 @@ CAUTION: Conversation history is isolated by the `sub` claim within each Environ == See also -* xref:tinymceai-on-premises-getting-started.adoc[Getting started] -- end-to-end deployment, including a demo token server -* xref:tinymceai-on-premises-providers.adoc[large language model (LLM) providers] -- configuring custom models through `MODELS` and the `ai:models::` permission syntax -* xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting] -- full troubleshooting catalog beyond JWT -* xref:tinymceai-on-premises-frameworks.adoc[TinyMCE integration] -- editor-side integration patterns for React, Vue, and Angular, including `tinymceai_token_provider` wrappers +* xref:tinymceai-on-premises-getting-started.adoc[Getting started]: end-to-end deployment, including a demo token server +* xref:tinymceai-on-premises-providers.adoc[LLM providers]: configuring custom models through `MODELS` and the `ai:models::` permission syntax +* xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting]: full troubleshooting catalog beyond JWT +* xref:tinymceai-on-premises-frameworks.adoc[TinyMCE integration]: editor-side integration patterns for React, Vue, and Angular, including `tinymceai_token_provider` wrappers diff --git a/modules/ROOT/pages/tinymceai-on-premises-mcp.adoc b/modules/ROOT/pages/tinymceai-on-premises-mcp.adoc index 59b4c018c6..f83b1df144 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-mcp.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-mcp.adoc @@ -262,6 +262,6 @@ app.listen(4001); == See also -* xref:tinymceai-on-premises-providers.adoc[LLM providers] -- provider configuration and the `MODELS` catalog -* xref:tinymceai-on-premises-reference.adoc[Reference] -- full environment variable reference including `MCP_SERVERS`, `WEBRESOURCES_*`, and `WEBSEARCH_*` -* xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting] -- general troubleshooting +* xref:tinymceai-on-premises-providers.adoc[LLM providers]: provider configuration and the `MODELS` catalog +* xref:tinymceai-on-premises-reference.adoc[Reference]: full environment variable reference including `MCP_SERVERS`, `WEBRESOURCES_*`, and `WEBSEARCH_*` +* xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting]: general troubleshooting diff --git a/modules/ROOT/pages/tinymceai-on-premises-production.adoc b/modules/ROOT/pages/tinymceai-on-premises-production.adoc index e299b64dce..f599790ebb 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-production.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-production.adoc @@ -9,10 +9,10 @@ This guide assumes a running Kubernetes cluster, ECS cluster, or Docker/Podman h Before deploying to production, confirm each item: -. xref:tinymceai-on-premises-database.adoc[Database and Redis] — provisioned, accessible from the AI service, schema created (PostgreSQL). -. xref:tinymceai-on-premises-providers.adoc[LLM providers] — `PROVIDERS` configured and verified; `MODELS` defined for the target provider(s). -. xref:tinymceai-on-premises-jwt.adoc[JWT authentication] — token endpoint deployed, signing with HS256 and the correct API Secret. -. xref:tinymceai-on-premises-frameworks.adoc[TinyMCE integration] — `tinymceai_service_url` and `tinymceai_token_provider` configured; `ALLOWED_ORIGINS` set on the AI service. +. xref:tinymceai-on-premises-database.adoc[Database and Redis]: provisioned, accessible from the AI service, schema created (PostgreSQL). +. xref:tinymceai-on-premises-providers.adoc[LLM providers]: `PROVIDERS` configured and verified; `MODELS` defined for the target provider(s). +. xref:tinymceai-on-premises-jwt.adoc[JWT authentication]: token endpoint deployed, signing with HS256 and the correct API Secret. +. xref:tinymceai-on-premises-frameworks.adoc[TinyMCE integration]: `tinymceai_service_url` and `tinymceai_token_provider` configured; `ALLOWED_ORIGINS` set on the AI service. . Container image pulled and registry credentials stored as a secret. . Reverse proxy with TLS termination and `proxy_buffering off` for SSE. . Environment and access key created through the Management Panel. @@ -564,10 +564,10 @@ For production multi-instance deployments, ship container logs to the existing l The following checks help catch common issues early: -* **Health endpoint** -- poll `/health` on each instance; alert if any instance returns a non-200 response for more than 60 seconds. -* **Error rate** -- monitor the HTTP 5xx rate in the metric logs or traces; a sustained increase may indicate an LLM provider outage or a misconfigured environment. -* **Latency** -- track request duration; a sudden increase typically points to LLM provider throttling or network issues. -* **Container restarts** -- alert on repeated container restarts, which may indicate a missing environment variable or a database connectivity problem. +* *Health endpoint*: poll `/health` on each instance; alert if any instance returns a non-200 response for more than 60 seconds. +* *Error rate*: monitor the HTTP 5xx rate in the metric logs or traces; a sustained increase may indicate an LLM provider outage or a misconfigured environment. +* *Latency*: track request duration; a sudden increase typically points to LLM provider throttling or network issues. +* *Container restarts*: alert on repeated container restarts, which may indicate a missing environment variable or a database connectivity problem. For troubleshooting specific error patterns, see xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting]. diff --git a/modules/ROOT/pages/tinymceai-on-premises-providers.adoc b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc index c8c344f760..9c0034fc81 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-providers.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc @@ -10,9 +10,9 @@ This page configures *how the AI service reaches LLM providers* and *which model Two environment variables and one JWT claim control provider and model configuration: -* `PROVIDERS` — tells the AI service how to authenticate with each LLM provider API. -* `MODELS` — tells the service which models to expose to clients and which features each model supports. -* `auth.ai.permissions` (JWT claim) — gates per-user access to individual models. +* `PROVIDERS`: tells the AI service how to authenticate with each LLM provider API. +* `MODELS`: tells the service which models to expose to clients and which features each model supports. +* `auth.ai.permissions` (JWT claim): gates per-user access to individual models. This page documents every supported provider `type`, every required field, and every known production issue for both variables. @@ -1036,8 +1036,8 @@ More general troubleshooting (database, JWT, storage, networking) lives in xref: == See also -* xref:tinymceai-on-premises-mcp.adoc[MCP and web integrations] -- Model Context Protocol tool calling and web scraping/search endpoints -* xref:tinymceai-on-premises-getting-started.adoc[Getting started] -- initial container bring-up and demo -* xref:tinymceai-on-premises-jwt.adoc[JWT authentication] -- per-model and per-provider JWT permissions -* xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage] -- MySQL/Postgres configuration for the AI service -* xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting] -- general troubleshooting beyond provider configuration +* xref:tinymceai-on-premises-mcp.adoc[MCP and web integrations]: Model Context Protocol tool calling and web scraping/search endpoints +* xref:tinymceai-on-premises-getting-started.adoc[Getting started]: initial container bring-up and demo +* xref:tinymceai-on-premises-jwt.adoc[JWT authentication]: per-model and per-provider JWT permissions +* xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage]: MySQL/Postgres configuration for the AI service +* xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting]: general troubleshooting beyond provider configuration diff --git a/modules/ROOT/pages/tinymceai-on-premises.adoc b/modules/ROOT/pages/tinymceai-on-premises.adoc index 0f6845068d..af974d85cd 100644 --- a/modules/ROOT/pages/tinymceai-on-premises.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises.adoc @@ -14,9 +14,9 @@ The infrastructure consists of three layers: * The *client layer* runs in the browser (TinyMCE editor with the `tinymceai` plugin) and in the application back end (token endpoint that signs JWTs). * The *application layer* runs the AI service container and communicates with LLM providers to process AI requests. It may consist of one or more instances behind a load balancer (round-robin recommended). Each instance runs the same stateless container image. * The *data layer* consists of a SQL database, a Redis instance, and file storage: -** *SQL database* — stores persistent data: configurations, conversations, files, and documents. -** *Redis* — handles temporary data and inter-instance communication for scaling. When multiple instances are running, Redis ensures data is shared correctly across all of them. -** *File storage* — stores uploaded files and documents (S3, Azure Blob, filesystem, or the database itself). +** *SQL database*: stores persistent data: configurations, conversations, files, and documents. +** *Redis*: handles temporary data and inter-instance communication for scaling. When multiple instances are running, Redis ensures data is shared correctly across all of them. +** *File storage*: stores uploaded files and documents (S3, Azure Blob, filesystem, or the database itself). [.text-center] image::tinymceai-on-premises/overview-fig-2.svg[alt="Deployment architecture showing data layer, application layer, client layer, and LLM provider connections",width=100%] From 1214abfd53c22095cf91ac994ebbf9c6149e2a41 Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Fri, 22 May 2026 13:20:22 +1000 Subject: [PATCH 21/48] Improve on-premises docs structure, flow, and placeholder consistency - Reorder nav: MCP moved after TinyMCE Integration, marked optional - Un-collapse token server, K8s manifest, and MySQL compose file - Add "Next steps" section to Getting Started bridging to production - Replace dynamic launch script with static docker run + TIP - Add expected boot log after docker run - Promote prerequisites to "Before you begin" with verification commands - Add complete docker run reference to Reference page - Add numbered steps (1-5) to Production K8s section - Standardize placeholders to format across all pages - Condense agent-1 explanation on Providers page - Remove raw management API reference from Production page --- modules/ROOT/nav.adoc | 2 +- .../pages/tinymceai-on-premises-database.adoc | 47 ++++------- ...tinymceai-on-premises-getting-started.adoc | 83 +++++++++++-------- .../tinymceai-on-premises-production.adoc | 45 +++++----- .../tinymceai-on-premises-providers.adoc | 25 +++--- .../tinymceai-on-premises-reference.adoc | 32 +++++++ modules/ROOT/pages/tinymceai-on-premises.adoc | 10 +-- 7 files changed, 136 insertions(+), 108 deletions(-) diff --git a/modules/ROOT/nav.adoc b/modules/ROOT/nav.adoc index 027d7a6970..0d1c09cff8 100644 --- a/modules/ROOT/nav.adoc +++ b/modules/ROOT/nav.adoc @@ -274,10 +274,10 @@ ***** xref:tinymceai-on-premises-getting-started.adoc[Getting started] ***** xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage] ***** xref:tinymceai-on-premises-providers.adoc[LLM providers] -***** xref:tinymceai-on-premises-mcp.adoc[MCP and web integrations] ***** xref:tinymceai-on-premises-jwt.adoc[JWT authentication] ***** xref:tinymceai-on-premises-frameworks.adoc[TinyMCE integration] ***** xref:tinymceai-on-premises-production.adoc[Production deployment] +***** xref:tinymceai-on-premises-mcp.adoc[MCP and web integrations (optional)] ***** xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting] ***** xref:tinymceai-on-premises-reference.adoc[Reference] **** xref:ai.adoc[AI Assistant (legacy)] diff --git a/modules/ROOT/pages/tinymceai-on-premises-database.adoc b/modules/ROOT/pages/tinymceai-on-premises-database.adoc index 3168af7e9a..77e5eefc85 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-database.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-database.adoc @@ -37,21 +37,11 @@ The AI service supports both MySQL and PostgreSQL equally. Pick whichever the op == Choosing a setup path +Use Docker Compose for evaluation, managed cloud services (Amazon RDS, Cloud SQL, Azure Database) for production. All paths produce the same end state: a running database the AI service can connect to. Both MySQL and PostgreSQL are supported in every configuration. + [.text-center] image::tinymceai-on-premises/database-setup-fig-1.svg[Database setup decision tree: local Docker Compose vs managed cloud database for evaluation and production,width=100%] -All paths produce the same end state: a running database the AI service can connect to. - -[cols="1,1,1",options="header"] -|=== -|Path |MySQL |PostgreSQL - -|Docker / Podman |Yes |Yes -|Docker Compose |Yes |Yes -|Native (macOS / Linux) |Yes |Yes -|Managed cloud (RDS, Cloud SQL, Azure) |Yes |Yes -|=== - [[postgresql-schema-prerequisite]] @@ -117,7 +107,7 @@ The database user needs enough privilege to create, alter, and operate on these [source,sql] ---- -CREATE USER 'ai_service'@'%' IDENTIFIED BY 'STRONG_PASSWORD'; +CREATE USER 'ai_service'@'%' IDENTIFIED BY ''; GRANT SELECT, INSERT, UPDATE, DELETE, ALTER, CREATE, DROP, INDEX, TRIGGER, LOCK TABLES, REFERENCES @@ -145,7 +135,7 @@ If `DATABASE_SCHEMA=public` was chosen (see <>), [source,sql] ---- -CREATE USER ai_service WITH PASSWORD 'STRONG_PASSWORD'; +CREATE USER ai_service WITH PASSWORD ''; CREATE DATABASE ai_service OWNER ai_service; \c ai_service CREATE SCHEMA "cs-on-premises" AUTHORIZATION ai_service; @@ -175,19 +165,17 @@ The sections below provide ready-to-use configuration for each database engine. === Docker Compose (recommended for evaluation) -.MySQL compose file -[%collapsible] -==== [source,yaml] +.MySQL compose file ---- services: mysql: image: mysql:8.0 environment: - MYSQL_ROOT_PASSWORD: ROOT_PASSWORD + MYSQL_ROOT_PASSWORD: MYSQL_DATABASE: ai_service MYSQL_USER: ai_service - MYSQL_PASSWORD: STRONG_PASSWORD + MYSQL_PASSWORD: ports: - "3306:3306" volumes: @@ -211,7 +199,6 @@ services: volumes: mysql_data: ---- -==== .PostgreSQL compose file [%collapsible] @@ -224,7 +211,7 @@ services: environment: POSTGRES_DB: ai_service POSTGRES_USER: ai_service - POSTGRES_PASSWORD: STRONG_PASSWORD + POSTGRES_PASSWORD: ports: - "5432:5432" volumes: @@ -269,10 +256,10 @@ Use these `docker run` commands when Docker Compose is not available or when int ---- docker run -d \ --name ai-mysql \ - -e MYSQL_ROOT_PASSWORD=ROOT_PASSWORD \ + -e MYSQL_ROOT_PASSWORD= \ -e MYSQL_DATABASE=ai_service \ -e MYSQL_USER=ai_service \ - -e MYSQL_PASSWORD=STRONG_PASSWORD \ + -e MYSQL_PASSWORD= \ -p 3306:3306 \ -v ai_mysql_data:/var/lib/mysql \ mysql:8.0 @@ -288,7 +275,7 @@ docker run -d \ --name ai-postgres \ -e POSTGRES_DB=ai_service \ -e POSTGRES_USER=ai_service \ - -e POSTGRES_PASSWORD=STRONG_PASSWORD \ + -e POSTGRES_PASSWORD= \ -p 5432:5432 \ -v ai_pg_data:/var/lib/postgresql/data \ postgres:16 @@ -319,7 +306,7 @@ brew services start mysql mysql_secure_installation mysql -u root -p <<'SQL' CREATE DATABASE ai_service; -CREATE USER 'ai_service'@'%' IDENTIFIED BY 'STRONG_PASSWORD'; +CREATE USER 'ai_service'@'%' IDENTIFIED BY ''; GRANT SELECT, INSERT, UPDATE, DELETE, ALTER, CREATE, DROP, INDEX, TRIGGER, LOCK TABLES, REFERENCES ON ai_service.* TO 'ai_service'@'%'; @@ -361,7 +348,7 @@ sudo systemctl enable --now mysql sudo mysql_secure_installation sudo mysql <<'SQL' CREATE DATABASE ai_service; -CREATE USER 'ai_service'@'%' IDENTIFIED BY 'STRONG_PASSWORD'; +CREATE USER 'ai_service'@'%' IDENTIFIED BY ''; GRANT SELECT, INSERT, UPDATE, DELETE, ALTER, CREATE, DROP, INDEX, TRIGGER, LOCK TABLES, REFERENCES ON ai_service.* TO 'ai_service'@'%'; @@ -379,7 +366,7 @@ sudo apt update sudo apt install -y postgresql postgresql-contrib sudo systemctl enable --now postgresql sudo -u postgres psql <<'SQL' -CREATE USER ai_service WITH PASSWORD 'STRONG_PASSWORD'; +CREATE USER ai_service WITH PASSWORD ''; CREATE DATABASE ai_service OWNER ai_service; SQL sudo -u postgres psql -d ai_service \ @@ -653,7 +640,7 @@ After provisioning the database and Redis, pass the connection details to the AI -e DATABASE_HOST='mysql' \ -e DATABASE_PORT='3306' \ -e DATABASE_USER='ai_service' \ --e DATABASE_PASSWORD='STRONG_PASSWORD' \ +-e DATABASE_PASSWORD='' \ -e DATABASE_DATABASE='ai_service' \ -e REDIS_HOST='redis' \ -e REDIS_PORT='6379' @@ -667,7 +654,7 @@ After provisioning the database and Redis, pass the connection details to the AI -e DATABASE_HOST='postgres' \ -e DATABASE_PORT='5432' \ -e DATABASE_USER='ai_service' \ --e DATABASE_PASSWORD='STRONG_PASSWORD' \ +-e DATABASE_PASSWORD='' \ -e DATABASE_DATABASE='ai_service' \ -e DATABASE_SCHEMA='cs-on-premises' \ -e REDIS_HOST='redis' \ @@ -714,7 +701,7 @@ For a complete `docker run` command including all env vars, see the xref:tinymce [source,bash] ---- -mysql --host=DB_HOST --user=ai_service --password=STRONG_PASSWORD \ +mysql --host=DB_HOST --user=ai_service --password= \ ai_service --port=3306 -e "SELECT 1" ---- diff --git a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc index 0707fdd387..33f0b4bb23 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc @@ -13,10 +13,19 @@ This guide sets up a fully working local stack in roughly five minutes on any ma The quick start is designed to validate the stack components before moving to a production deployment. Production engineers can review this section to understand the conceptual flow before continuing to xref:tinymceai-on-premises-production.adoc[Production deployment]. -=== Prerequisites +== Before you begin + +Verify the following are installed and accessible: + +[source,bash] +---- +docker --version # 20.10+ required (or podman --version for Podman 4+) +node --version # 18+ required +npm --version +---- + +Additionally, have the following credentials ready: -* Docker 20.10{plus} (or Podman 4{plus}) -* Node.js 18{plus} and npm (for the demo token server) * A TinyMCE license key and container registry credentials (from the Tiny account representative) * At least one LLM provider API key (OpenAI, Anthropic, or Google) @@ -37,7 +46,7 @@ For Docker: [source,bash] ---- -docker login -u 'TINY_REGISTRY_USERNAME' https://registry.containers.tiny.cloud +docker login -u '' https://registry.containers.tiny.cloud # Docker prompts for the password; this avoids leaking it in shell history. ---- @@ -45,10 +54,10 @@ For Podman: [source,bash] ---- -podman login -u 'TINY_REGISTRY_USERNAME' registry.containers.tiny.cloud +podman login -u '' registry.containers.tiny.cloud ---- -Replace `TINY_REGISTRY_USERNAME` with the username supplied by the Tiny account representative. If credentials have not been received, contact `support@tiny.cloud`. +Replace `` with the username supplied by the Tiny account representative. If credentials have not been received, contact `support@tiny.cloud`. === Pull the AI service image @@ -155,35 +164,21 @@ Both data layer containers (MySQL and Redis) should report `healthy` in the STAT The AI service runs as a standalone container outside of the Docker Compose stack. This separation allows upgrading or reconfiguring the AI service without restarting the database and Redis. -Run from the same folder as the `.env` file: +First, find the Docker Compose network name (Docker creates it from the folder name): [source,bash] ---- -set -a && source .env && set +a +docker network ls --format '{{.Name}}' | grep default +---- -PROVIDERS='{' -if [ -n "$OPENAI_API_KEY" ]; then - PROVIDERS+='"openai":{"type":"openai","apiKeys":["'"$OPENAI_API_KEY"'"]}' -fi -if [ -n "$ANTHROPIC_API_KEY" ]; then - [ "$PROVIDERS" != '{' ] && PROVIDERS+=',' - PROVIDERS+='"anthropic":{"type":"anthropic","apiKeys":["'"$ANTHROPIC_API_KEY"'"]}' -fi -if [ -n "$GOOGLE_API_KEY" ]; then - [ "$PROVIDERS" != '{' ] && PROVIDERS+=',' - PROVIDERS+='"google":{"type":"google","apiKeys":["'"$GOOGLE_API_KEY"'"]}' -fi -PROVIDERS+='}' - -# Resolve the compose network name (varies across Docker versions and folder names) -NETWORK=$(docker network ls --format '{{.Name}}' | grep -E "^$(basename "$PWD" | tr '[:upper:]' '[:lower:]')[_-]default$" | head -1) -if [ -z "$NETWORK" ]; then - echo "ERROR: Could not find the Docker Compose network. Run 'docker network ls' and pass the network name with --network=." - exit 1 -fi +Use the matching network name in `--network` below. Then run from the same folder as the `.env` file: + +[source,bash] +---- +set -a && source .env && set +a docker run --init -d -p 8000:8000 \ - --network "$NETWORK" \ + --network _default \ --name ai-service \ -e LICENSE_KEY="$LICENSE_KEY" \ -e ENVIRONMENTS_MANAGEMENT_SECRET_KEY="$MANAGEMENT_SECRET" \ @@ -193,13 +188,15 @@ docker run --init -d -p 8000:8000 \ -e DATABASE_PASSWORD="$DB_PASSWORD" \ -e DATABASE_DATABASE='ai_service' \ -e REDIS_HOST='redis' \ - -e PROVIDERS="$PROVIDERS" \ + -e PROVIDERS='{"openai":{"type":"openai","apiKeys":["'"$OPENAI_API_KEY"'"]}}' \ -e STORAGE_DRIVER='database' \ -e ALLOWED_ORIGINS='http://localhost:3000' \ -e ENABLE_METRIC_LOGS='true' \ registry.containers.tiny.cloud/ai-service-tiny:latest ---- +TIP: The network name is typically `_default` (e.g., `tinymce-ai-onpremise_default`). Run `docker network ls` to confirm. For multiple LLM providers, extend the `PROVIDERS` JSON: `{"openai":{...},"anthropic":{...}}`. + For Podman, replace `docker run` with `podman run` and use a Podman pod instead of a compose network. See xref:tinymceai-on-premises-production.adoc[Production deployment] for Podman-specific guidance. See xref:tinymceai-on-premises-production.adoc#_podman_deployment[Podman deployment] for a full example. For native databases (the database runs on the host or in a managed service rather than in Docker), drop the `--network` flag and set `DATABASE_HOST=host.docker.internal` (Docker Desktop and Podman 4{plus}). On native Linux Docker, additionally pass `--add-host=host.docker.internal:host-gateway`. @@ -218,6 +215,17 @@ Expected response: {"serviceName":"on-premises-http","uptime":5.123} ---- +.Successful boot log (`docker logs ai-service`) +[source,text] +---- +Connecting to database (driver=mysql host=mysql) +Running migrations... +Migrations complete: 32 tables ready +Connecting to Redis (host=redis:6379) +Redis connected +Server is listening on port 8000. +---- + [WARNING] -- If the container exits immediately, run `docker logs ai-service`. The most common causes are documented in the xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting] guide. The top three are: malformed `LICENSE_KEY` (line breaks from word wrap), missing PostgreSQL schema, and JSON syntax error in `PROVIDERS`. @@ -271,10 +279,8 @@ Create `package.json`: Create `token-server.js`: -.Full token-server.js listing -[%collapsible] -==== [source,javascript] +.token-server.js ---- require('dotenv').config(); const express = require('express'); @@ -350,7 +356,6 @@ app.listen(PORT, () => { console.log('AI Service: ' + AI_SERVICE_URL); }); ---- -==== === Install and run @@ -493,3 +498,13 @@ For Kubernetes, scale the deployment to zero or delete it. Persistent volumes fo ---- kubectl delete deployment ai-service -n tinymce-ai ---- + +== Next steps + +The quick start validates the stack end-to-end on a single machine. To deploy for production, work through each guide in order: + +. xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage]: provision managed databases, configure TLS, and set up production-grade file storage. +. xref:tinymceai-on-premises-providers.adoc[LLM providers]: configure explicit model catalogs and multi-provider routing. +. xref:tinymceai-on-premises-jwt.adoc[JWT authentication]: build the production token endpoint with proper permissions and multi-tenant isolation. +. xref:tinymceai-on-premises-frameworks.adoc[TinyMCE integration]: wire the editor to the production AI service with CORS and CSP. +. xref:tinymceai-on-premises-production.adoc[Production deployment]: deploy to Kubernetes or ECS with TLS, scaling, and observability. diff --git a/modules/ROOT/pages/tinymceai-on-premises-production.adoc b/modules/ROOT/pages/tinymceai-on-premises-production.adoc index f599790ebb..c42db02149 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-production.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-production.adoc @@ -102,14 +102,14 @@ The AI service works with Podman as an alternative to Docker. In Podman, contain [source,bash] ---- -podman login -u 'TINY_REGISTRY_USERNAME' registry.containers.tiny.cloud +podman login -u '' registry.containers.tiny.cloud podman pull registry.containers.tiny.cloud/ai-service-tiny:latest podman pod create --name ai-pod -p 8000:8000 -p 3306:3306 -p 6379:6379 podman run -d --pod ai-pod --name mysql \ - -e MYSQL_ROOT_PASSWORD=ROOT_PASSWORD \ + -e MYSQL_ROOT_PASSWORD= \ -e MYSQL_DATABASE=ai_service \ mysql:8.0 @@ -117,11 +117,11 @@ podman run -d --pod ai-pod --name redis redis:7 podman run --init -d --pod ai-pod --name ai-service \ -e LICENSE_KEY='T8LK:...' \ - -e ENVIRONMENTS_MANAGEMENT_SECRET_KEY='MANAGEMENT_SECRET' \ + -e ENVIRONMENTS_MANAGEMENT_SECRET_KEY='' \ -e DATABASE_DRIVER='mysql' \ -e DATABASE_HOST='127.0.0.1' \ -e DATABASE_USER='root' \ - -e DATABASE_PASSWORD='ROOT_PASSWORD' \ + -e DATABASE_PASSWORD='' \ -e DATABASE_DATABASE='ai_service' \ -e REDIS_HOST='127.0.0.1' \ -e PROVIDERS='{"openai":{"type":"openai","apiKeys":["sk-proj-..."]}}' \ @@ -133,7 +133,9 @@ IMPORTANT: Pin to `mysql:8.0`. The `mysql:8` tag floats to the latest MySQL, whi == Kubernetes deployment -=== Namespace and image pull secret +Deploy the AI service to Kubernetes in five steps: create a namespace, store secrets, apply the Deployment, expose as a Service, and verify. + +=== Step 1: Namespace and image pull secret [source,bash] ---- @@ -142,11 +144,11 @@ kubectl create namespace tinymce-ai kubectl create secret docker-registry tiny-registry \ --namespace tinymce-ai \ --docker-server=registry.containers.tiny.cloud \ - --docker-username=TINY_REGISTRY_USERNAME \ - --docker-password='TINY_REGISTRY_ACCESS_TOKEN' + --docker-username= \ + --docker-password='' ---- -=== Application secrets +=== Step 2: Application secrets [source,yaml] ---- @@ -157,29 +159,27 @@ metadata: namespace: tinymce-ai type: Opaque stringData: - license-key: "EXAMPLE_LICENSE_KEY" - management-secret: "EXAMPLE_MANAGEMENT_SECRET" - db-password: "EXAMPLE_DB_PASSWORD" - redis-password: "EXAMPLE_REDIS_PASSWORD" - storage-access-key: "EXAMPLE_S3_ACCESS_KEY_ID" - storage-secret-key: "EXAMPLE_S3_SECRET_ACCESS_KEY" + license-key: "" + management-secret: "" + db-password: "" + redis-password: "" + storage-access-key: "" + storage-secret-key: "" providers: | { "openai": { "type": "openai", - "apiKeys": ["sk-proj-EXAMPLE_KEY"] + "apiKeys": [""] } } ---- In production, use https://sealed-secrets.netlify.app/[Sealed Secrets], https://external-secrets.io/[External Secrets Operator], or https://www.vaultproject.io/[HashiCorp Vault] rather than committing raw secret manifests. For the Kubernetes Secret resource itself, see the https://kubernetes.io/docs/concepts/configuration/secret/[Kubernetes Secrets documentation]. -=== Deployment +=== Step 3: Deployment -.Full Kubernetes Deployment manifest -[%collapsible] -==== [source,yaml] +.Kubernetes Deployment manifest ---- apiVersion: apps/v1 kind: Deployment @@ -283,13 +283,12 @@ spec: memory: "2Gi" cpu: "2000m" ---- -==== TIP: For PostgreSQL, change `DATABASE_DRIVER` to `"postgres"`, update `DATABASE_HOST` to the PostgreSQL endpoint, and add `DATABASE_SCHEMA` set to `"public"` (or ensure the `cs-on-premises` schema exists). See xref:tinymceai-on-premises-database.adoc#postgresql-schema-prerequisite[PostgreSQL schema prerequisite]. NOTE: `terminationGracePeriodSeconds` is set to 300 to match the maximum SSE stream duration. On SIGTERM, the service finishes in-flight SSE streams before shutting down. Set this value equal to or greater than the longest expected AI response time. For multi-zone clusters, add `topologySpreadConstraints` to the pod spec to spread replicas across availability zones. Add a `PodDisruptionBudget` (`minAvailable: 1` or a percentage at scale) to prevent all replicas being evicted simultaneously during node maintenance. These resource values are evaluation defaults; adjust for production workload. -=== Service +=== Step 4: Service [source,yaml] ---- @@ -306,7 +305,7 @@ spec: targetPort: 8000 ---- -=== Bootstrap the environment +=== Step 5: Bootstrap the environment After the first pod reaches Ready status, create an environment and access key through the Management Panel: @@ -319,7 +318,7 @@ These values are required by the token endpoint. See xref:tinymceai-on-premises- [IMPORTANT] -- -Always create environments through the Management Panel UI. Environments created through the raw management API are not fully registered and cause `invalid-jwt-payload` errors. +Always create environments through the Management Panel UI. -- === https://kubernetes.io/docs/concepts/services-networking/ingress/[Ingress] diff --git a/modules/ROOT/pages/tinymceai-on-premises-providers.adoc b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc index 9c0034fc81..a606547c55 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-providers.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc @@ -72,25 +72,20 @@ The diagram reflects three stacked layers: *how to authenticate* with each upstr === Default model behavior (`agent-1`) -When the AI service starts without an explicit `MODELS` environment variable, it exposes a single built-in placeholder model called `agent-1`. This model exists so that OpenAI, Anthropic, and Google direct providers can route requests without explicit configuration — the service has built-in routing knowledge for those three providers. +Without an explicit `MODELS` variable, the service exposes a single built-in placeholder called `agent-1`. This works only for OpenAI, Anthropic, and Google direct providers (the service has built-in routing for those three). Azure, Bedrock, Vertex, and openai-compatible providers do not work without `MODELS`. -However, the default `agent-1` model has important limitations: +IMPORTANT: Always set `MODELS` explicitly for production. Without it, the editor model picker shows only a generic `agent-1` entry, JWT permissions cannot target specific models, and non-native providers are non-functional. +.Details: `agent-1` behavior and limitations +[%collapsible] +==== * The `/v1/models/1` endpoint reports `agent-1` with `allowed: false` unless the JWT includes the `ai:models:agent` permission. -* The TinyMCE editor model picker does not display real model names — it shows only the generic `agent-1` entry. -* Azure, Bedrock, Vertex, and openai-compatible providers do not work at all without `MODELS`. - -For production deployments, always set `MODELS` explicitly regardless of provider type. Setting `MODELS` explicitly makes the editor model picker display meaningful names, the correct provider handles each request, and JWT permissions can reference specific model IDs. - -The `ai:models:agent` permission in the JWT grants access to the built-in `agent-1` model. When adding custom models through `MODELS`, also add `ai:models::` permissions to expose each model to users. See xref:tinymceai-on-premises-jwt.adoc[JWT authentication] for permission syntax. - -=== When `MODELS` is required - -`MODELS` is required for Azure, Bedrock, Vertex, and openai-compatible providers. Without it, these providers expose nothing usable to clients. - -For OpenAI, Anthropic, and Google direct, the service has built-in routing knowledge that allows requests to reach those providers without an explicit `MODELS` definition. However, the `/v1/models/1` endpoint still returns only a disabled `agent-1` placeholder, and the editor model picker will not display real model names. For a production deployment, always define `MODELS` explicitly regardless of provider type. +* The TinyMCE editor model picker does not display real model names. +* Setting `MODELS` explicitly makes the editor display meaningful names, routes to the correct provider, and enables per-model JWT permissions (`ai:models::`). +* See xref:tinymceai-on-premises-jwt.adoc[JWT authentication] for permission syntax. +==== -When only `PROVIDERS` is configured and `MODELS` is omitted, a `GET /v1/models/1` call returns only the built-in `agent-1` placeholder with `allowed: false`: +When `MODELS` is omitted, a `GET /v1/models/1` call returns: [source,json] ---- diff --git a/modules/ROOT/pages/tinymceai-on-premises-reference.adoc b/modules/ROOT/pages/tinymceai-on-premises-reference.adoc index caf2e26d76..3956b4bdb8 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-reference.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-reference.adoc @@ -66,6 +66,38 @@ Alphabetized. Required-ness is marked relative to a minimum working deployment. |`WEBSEARCH_REQUEST_TIMEOUT` |No |- |Search request timeout in ms. |=== +== Complete `docker run` reference + +A single command with all required environment variables for a minimal production deployment (MySQL, OpenAI, S3 storage): + +[source,bash] +---- +docker run --init -d -p 8000:8000 \ + --name ai-service \ + -e LICENSE_KEY='' \ + -e ENVIRONMENTS_MANAGEMENT_SECRET_KEY='' \ + -e DATABASE_DRIVER='mysql' \ + -e DATABASE_HOST='' \ + -e DATABASE_PORT='3306' \ + -e DATABASE_USER='' \ + -e DATABASE_PASSWORD='' \ + -e DATABASE_DATABASE='ai_service' \ + -e REDIS_HOST='' \ + -e REDIS_PORT='6379' \ + -e PROVIDERS='{"openai":{"type":"openai","apiKeys":[""]}}' \ + -e MODELS='[{"id":"gpt-4.1","provider":"openai","name":"GPT-4.1","features":["conversations","reviews","actions"]}]' \ + -e STORAGE_DRIVER='s3' \ + -e STORAGE_BUCKET='' \ + -e STORAGE_REGION='' \ + -e STORAGE_ACCESS_KEY_ID='' \ + -e STORAGE_SECRET_ACCESS_KEY='' \ + -e ALLOWED_ORIGINS='https://' \ + -e ENABLE_METRIC_LOGS='true' \ + registry.containers.tiny.cloud/ai-service-tiny: +---- + +For PostgreSQL, change `DATABASE_DRIVER` to `'postgres'` and add `-e DATABASE_SCHEMA='cs-on-premises'` (or `'public'`). For Redis authentication, add `-e REDIS_PASSWORD=''`. For Redis TLS, add `-e REDIS_TLS_ENABLE='true'`. + == API endpoint reference [cols=",,,",options="header",] diff --git a/modules/ROOT/pages/tinymceai-on-premises.adoc b/modules/ROOT/pages/tinymceai-on-premises.adoc index af974d85cd..2cc116112b 100644 --- a/modules/ROOT/pages/tinymceai-on-premises.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises.adoc @@ -139,11 +139,11 @@ NOTE: `LICENSE_KEY` (the AI service license) and `TINYMCE_API_KEY` / `license_ke == Choosing a setup path +For evaluation, start with the xref:tinymceai-on-premises-getting-started.adoc[Getting started] guide (Docker Compose on a single machine). For production, work through each topic guide in order — the same components apply regardless of deployment target (Kubernetes, ECS, or standalone Docker). + [.text-center] image::tinymceai-on-premises/complete-guide-fig-2.svg[Setup path decision tree,width=100%] -All setup paths lead to the same set of topic guides listed below. The decision tree helps identify which guides to prioritize based on the deployment target. - == Topic guides For a first-time deployment, progress through the guides in order. Each guide can also be used independently as a reference for a specific topic. @@ -161,9 +161,6 @@ For a first-time deployment, progress through the guides in order. Each guide ca |xref:tinymceai-on-premises-providers.adoc[LLM providers] |Connect to OpenAI, Anthropic, Google Gemini, Azure OpenAI, AWS Bedrock, Google Vertex AI, or any OpenAI-compatible endpoint (Ollama, vLLM, LM Studio). Custom model catalog and API key rotation. -|xref:tinymceai-on-premises-mcp.adoc[MCP and web integrations] -|Model Context Protocol (MCP) tool integration, web scraping endpoints, and web search endpoints. - |xref:tinymceai-on-premises-jwt.adoc[JWT authentication] |HS256 signing model, required and optional claims, permissions reference, token endpoint examples in 8 languages, and multi-tenant deployment patterns. @@ -173,6 +170,9 @@ For a first-time deployment, progress through the guides in order. Each guide ca |xref:tinymceai-on-premises-production.adoc[Production deployment] |Kubernetes manifests, AWS ECS task definitions, horizontal scaling, security hardening, rate limiting, observability, backup and recovery, and upgrades. +|xref:tinymceai-on-premises-mcp.adoc[MCP and web integrations (optional)] +|Model Context Protocol (MCP) tool integration, web scraping endpoints, and web search endpoints. Not required for initial setup. + |xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting] |Quick triage, container startup failures, JWT errors, LLM provider errors, editor issues, performance, and diagnostic recipes. From 139cd39158e53c78af18337529f6cc7443e80beb Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Mon, 25 May 2026 09:22:08 +1000 Subject: [PATCH 22/48] Address CK-DOCS-ASKS-HYBRID findings and Tim's diagram feedback Diagrams: - overview-fig-2: add LB/proxy, HTTPS/HTTP labels, fix token flow direction, split data layer into labeled nodes, fix label positioning - complete-guide-fig-1: reduce to single replica (+N), add HTTPS labels, add read/write labels to data layer connections - complete-guide-fig-2: remove bare-metal option, rename to "Container orchestrator" - database-setup-fig-1: remove bare-metal, clarify native DB path Critical fixes (from CK-DOCS-ASKS-HYBRID): - G6: Fix WEBSEARCH_HEADERS from broken JSON to colon-CSV format - G1: Add ai:conversations:webSearch to JWT permissions table High-severity: - D2: Add Postgres TLS known-issue warning (managed PG + missing SSL) - B4: Add Bedrock API-key billing trap note - E1: Document I/O-bound workload shape for HPA guidance - E2: Add HA primitives section (PDB, anti-affinity, topology spread) - E3: Add Azure (AKS) and GCP (GKE) cluster bring-up pointers Medium/Low: - D3: MySQL role-inherited grants note for Cloud SQL - E7: License key shared across replicas statement - G7: host.docker.internal loopback-bind warning - Network requirements: license.container.tiny.cloud firewall whitelist - MCP page: web search prominence TIP --- .../complete-guide-fig-1.mmd | 16 +++--- .../complete-guide-fig-1.svg | 2 +- .../complete-guide-fig-2.mmd | 4 +- .../complete-guide-fig-2.svg | 2 +- .../database-setup-fig-1.mmd | 2 +- .../database-setup-fig-1.svg | 2 +- .../tinymceai-on-premises/overview-fig-2.mmd | 19 +++++-- .../tinymceai-on-premises/overview-fig-2.svg | 2 +- .../pages/tinymceai-on-premises-database.adoc | 7 ++- .../ROOT/pages/tinymceai-on-premises-jwt.adoc | 3 +- .../ROOT/pages/tinymceai-on-premises-mcp.adoc | 10 +++- .../tinymceai-on-premises-production.adoc | 55 ++++++++++++++++++- .../tinymceai-on-premises-providers.adoc | 2 + .../tinymceai-on-premises-reference.adoc | 2 +- 14 files changed, 99 insertions(+), 29 deletions(-) diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.mmd b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.mmd index 0a6555af04..8642c5e37d 100644 --- a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.mmd +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.mmd @@ -1,17 +1,13 @@ flowchart TB Browser["Browser
TinyMCE editor + tinymceai plugin"] TokenEP["Your token endpoint
signs HS256 JWTs"] - Browser -->|"fetch JWT"| TokenEP + Browser -->|"HTTPS"| TokenEP Browser -->|"HTTPS + Bearer JWT"| LB - subgraph App["Application layer (stateless, N replicas)"] + subgraph App["Application layer (stateless, +N replicas)"] LB["Reverse proxy / Load balancer
nginx · ALB · K8s Ingress
TLS termination · SSE pass-through"] - AIN["ai-service replica N"] - AI2["ai-service replica 2"] - AI1["ai-service replica 1"] - LB --> AIN - LB --> AI2 - LB --> AI1 + AI1["ai-service"] + LB -->|"HTTP"| AI1 end subgraph Data["Shared data layer"] @@ -20,7 +16,9 @@ flowchart TB Storage[("File storage
S3 · Azure Blob · filesystem")] end - AI1 --> Data + AI1 <-->|"read/write"| DB + AI1 <-->|"read/write"| Cache + AI1 <-->|"read/write"| Storage AI1 -->|"HTTPS"| LLM["LLM provider
OpenAI · Anthropic · Google ·
Azure · Bedrock · Vertex ·
self-hosted"] diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.svg index 97aeabe7ca..a0b8e21e11 100644 --- a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.svg +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.svg @@ -1 +1 @@ -Application layer (stateless, N replicas)fetch JWTHTTPS + Bearer JWTHTTPStelemetrytool callsShared data layerSQL databaseMySQL 8.0+ / PostgreSQL13+Redis 3.2.6+File storageS3 · Azure Blob · filesystemBrowserTinyMCE editor + tinymceaipluginYour token endpointsigns HS256 JWTsReverse proxy / Loadbalancernginx · ALB · K8s IngressTLS termination · SSEpass-throughai-service replica Nai-service replica 2ai-service replica 1LLM providerOpenAI · Anthropic ·Google ·Azure · Bedrock · Vertex ·self-hostedOpenTelemetry · LangfuseMCP servers \ No newline at end of file +

Shared data layer

Application layer (stateless, +N replicas)

HTTPS

HTTPS + Bearer JWT

HTTP

read/write

read/write

read/write

HTTPS

telemetry

tool calls

Browser
TinyMCE editor + tinymceai plugin

Your token endpoint
signs HS256 JWTs

Reverse proxy / Load balancer
nginx · ALB · K8s Ingress
TLS termination · SSE pass-through

ai-service

SQL database
MySQL 8.0+ / PostgreSQL 13+

Redis 3.2.6+

File storage
S3 · Azure Blob · filesystem

LLM provider
OpenAI · Anthropic · Google ·
Azure · Bedrock · Vertex ·
self-hosted

OpenTelemetry · Langfuse

MCP servers

\ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.mmd b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.mmd index 91496059f6..d960e063ee 100644 --- a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.mmd +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.mmd @@ -1,16 +1,14 @@ flowchart TD Start([New deployment]) --> Q1{Evaluating or
going to production?} Q1 -->|Evaluating locally| Compose[Docker Compose
all services on one host
Getting started guide] - Q1 -->|Production| Q2{Orchestrator?} + Q1 -->|Production| Q2{Container orchestrator?} Q2 -->|Kubernetes| K8s[Kubernetes deployment
Production guide] Q2 -->|AWS ECS / Fargate| ECS[ECS task definition
Production guide] Q2 -->|Docker / Podman on VMs| VMs[Docker or Podman compose
Database guide] - Q2 -->|Bare metal / no containers| Bare[Native install for
data layer; container
for AI service
Database guide] Compose --> DB{Database?} K8s --> DB ECS --> DB VMs --> DB - Bare --> DB DB -->|Managed cloud DB| Managed[RDS · Cloud SQL ·
Azure Database] DB -->|Self-managed| Self[Containers or native install] Managed --> Done([Continue with
LLM providers guide]) diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.svg b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.svg index 2a4f22aa83..f675af7f46 100644 --- a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.svg +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.svg @@ -1 +1 @@ -Evaluating locallyProductionKubernetesAWS ECS / FargateDocker / Podman on VMsBare metal / no containersManaged cloud DBSelf-managedNew deploymentEvaluating orgoing to production?Docker Composeall services on one hostGetting started guideOrchestrator?Kubernetes deploymentProduction guideECS task definitionProduction guideDocker or Podman composeDatabase guideNative install fordata layer; containerfor AI serviceDatabase guideDatabase?RDS · Cloud SQL ·Azure DatabaseContainers or native installContinue withLLM providers guide \ No newline at end of file +

Evaluating locally

Production

Kubernetes

AWS ECS / Fargate

Docker / Podman on VMs

Managed cloud DB

Self-managed

New deployment

Evaluating or
going to production?

Docker Compose
all services on one host
Getting started guide

Container orchestrator?

Kubernetes deployment
Production guide

ECS task definition
Production guide

Docker or Podman compose
Database guide

Database?

RDS · Cloud SQL ·
Azure Database

Containers or native install

Continue with
LLM providers guide

\ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/database-setup-fig-1.mmd b/modules/ROOT/images/tinymceai-on-premises/database-setup-fig-1.mmd index 4e914e0bd2..752bc8a6aa 100644 --- a/modules/ROOT/images/tinymceai-on-premises/database-setup-fig-1.mmd +++ b/modules/ROOT/images/tinymceai-on-premises/database-setup-fig-1.mmd @@ -5,7 +5,7 @@ flowchart TD Q2 -->|Cloud / managed services| Managed[AWS RDS · Cloud SQL ·
Azure Database
+ ElastiCache · Memorystore ·
Azure Cache for Redis] Q2 -->|Self-managed| Q3{Container runtime
available?} Q3 -->|Docker or Podman| Containers[Containers on the same
network or pod as ai-service] - Q3 -->|None - bare metal or VM| Native[Native install
brew · apt · yum · dnf
service runs on host] + Q3 -->|Native on host| Native[Native install
brew · apt · yum · dnf
AI service connects via
host.docker.internal] Compose --> Verify([Verify: nc -zv host port
then start ai-service]) Managed --> Verify Containers --> Verify diff --git a/modules/ROOT/images/tinymceai-on-premises/database-setup-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/database-setup-fig-1.svg index 5055886d48..8deafa3b8d 100644 --- a/modules/ROOT/images/tinymceai-on-premises/database-setup-fig-1.svg +++ b/modules/ROOT/images/tinymceai-on-premises/database-setup-fig-1.svg @@ -1 +1 @@ -Evaluating locallyDeployingCloud / managed servicesSelf-managedDocker or PodmanNone - bare metal or VMWhere will MySQL/Postgresand Redis run?Evaluating ordeploying to prod?Docker Composeimage: mysql:8.0 orpostgres:16+ redis:7Cloud or self-managed?AWS RDS · Cloud SQL ·Azure Database+ ElastiCache ·Memorystore ·Azure Cache for RedisContainer runtimeavailable?Containers on the samenetwork or pod asai-serviceNative installbrew · apt · yum · dnfservice runs on hostVerify: nc -zv host portthen start ai-service \ No newline at end of file +

Evaluating locally

Deploying

Cloud / managed services

Self-managed

Docker or Podman

Native on host

Where will MySQL/Postgres and Redis run?

Evaluating or
deploying to prod?

Docker Compose
image: mysql:8.0 or postgres:16
+ redis:7

Cloud or self-managed?

AWS RDS · Cloud SQL ·
Azure Database
+ ElastiCache · Memorystore ·
Azure Cache for Redis

Container runtime
available?

Containers on the same
network or pod as ai-service

Native install
brew · apt · yum · dnf
AI service connects via
host.docker.internal

Verify: nc -zv host port
then start ai-service

\ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/overview-fig-2.mmd b/modules/ROOT/images/tinymceai-on-premises/overview-fig-2.mmd index 8c17859110..eafb0efc04 100644 --- a/modules/ROOT/images/tinymceai-on-premises/overview-fig-2.mmd +++ b/modules/ROOT/images/tinymceai-on-premises/overview-fig-2.mmd @@ -6,18 +6,25 @@ flowchart LR end subgraph Service["Application layer"] + LB["Load balancer /\nreverse proxy\n(TLS termination)"] AI["AI service\n(container)"] end subgraph Data["Data layer"] - DB[("Database\n+ Redis\n+ Storage")] + DB[("SQL database\n(conversations, configs)")] + Redis[("Redis\n(cache, coordination)")] + Storage[("File storage\n(uploads, documents)")] end LLM["LLM provider"] - Token -->|"1. signed JWT"| App - App -->|"2. prompt + JWT"| AI - AI -->|"5. SSE stream"| App - AI -->|"3. forward"| LLM + App -->|"1. request JWT"| Token + Token -->|"JWT"| App + App -->|"2. HTTPS"| LB + LB -->|"HTTP"| AI + AI -->|"3. HTTPS"| LLM LLM -->|"4. stream"| AI - AI --- DB + AI -->|"5. SSE response"| App + AI <-->|"read/write"| DB + AI <-->|"read/write"| Redis + AI <-->|"read/write"| Storage diff --git a/modules/ROOT/images/tinymceai-on-premises/overview-fig-2.svg b/modules/ROOT/images/tinymceai-on-premises/overview-fig-2.svg index 0b15ad24f8..c02a9150a2 100644 --- a/modules/ROOT/images/tinymceai-on-premises/overview-fig-2.svg +++ b/modules/ROOT/images/tinymceai-on-premises/overview-fig-2.svg @@ -1 +1 @@ -

Data layer

Application layer

Client layer

1. signed JWT

2. prompt + JWT

5. SSE stream

3. forward

4. stream

Token endpoint
(back end)

TinyMCE editor
(browser)

AI service
(container)

Database
+ Redis
+ Storage

LLM provider

\ No newline at end of file +

Data layer

Application layer

Client layer

1. request JWT

JWT

2. HTTPS

HTTP

3. HTTPS

4. stream

5. SSE response

read/write

read/write

read/write

Token endpoint
(back end)

TinyMCE editor
(browser)

Load balancer /
reverse proxy
(TLS termination)

AI service
(container)

SQL database
(conversations, configs)

Redis
(cache, coordination)

File storage
(uploads, documents)

LLM provider

\ No newline at end of file diff --git a/modules/ROOT/pages/tinymceai-on-premises-database.adoc b/modules/ROOT/pages/tinymceai-on-premises-database.adoc index 77e5eefc85..c7c72cecbc 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-database.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-database.adoc @@ -127,6 +127,8 @@ GRANT ALL PRIVILEGES ON ai_service.* TO 'ai_service'@'%'; [NOTE] -- Some versions of the AI service image report false-positive "Not enough permissions to access database" errors even with `ALL PRIVILEGES`. If this occurs, grant the privileges globally rather than per-database, or use the MySQL `root` user for development. + +On Cloud SQL MySQL, grant privileges to the service user **directly** — not via a role (e.g. `cloudsqlsuperuser`). The startup grant check runs `SHOW GRANTS FOR user` and does not resolve role-inherited grants. -- === PostgreSQL @@ -157,7 +159,10 @@ GRANT ALL ON SCHEMA "cs-on-premises" TO ai_service; ---- ==== - +[WARNING] +-- +*Managed PostgreSQL TLS issue:* Amazon RDS, Cloud SQL, and Azure Database for PostgreSQL default to requiring TLS (`rds.force_ssl=1` / `require_secure_transport=ON`). When TLS is enforced and the AI service has not been configured with `DATABASE_SSL_CA`, the connection is rejected — but the error message surfaces as a generic "permissions" error, not a TLS error. If the service fails to start with a permissions-related database error after grants have been verified, check whether TLS is the underlying cause. +-- == Database setup diff --git a/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc b/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc index 48ab8390cb..2abe27acd6 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc @@ -95,10 +95,11 @@ This is the canonical permission list for the AI service. [cols=",",options="header",] |=== |Permission |Grants -|`ai:conversations:*` |All conversation operations: create, list, send message, delete +|`ai:conversations:*` |All conversation operations: create, list, send message, delete, and web search |`ai:conversations:create` |Create new conversations |`ai:conversations:read` |List and read existing conversations |`ai:conversations:delete` |Delete conversations +|`ai:conversations:webSearch` |Enable the web search toggle in conversations. Without this permission, `GET /v1/models/1` reports `capabilities.webSearch.allowed: false` even when `WEBSEARCH_ENABLED=true` and `capabilities.webSearch: true` is set on the model. |`ai:models:agent` |Access the built-in agent model (model ID `agent-1`) |`ai:models::` |Access a specific custom model configured through the `MODELS` env var |`ai:actions:system:*` |All built-in quick actions (rewrite, summarize, expand, translate, change tone, and related operations) diff --git a/modules/ROOT/pages/tinymceai-on-premises-mcp.adoc b/modules/ROOT/pages/tinymceai-on-premises-mcp.adoc index f83b1df144..8aee959f95 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-mcp.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-mcp.adoc @@ -5,6 +5,8 @@ The AI service extends model capabilities through two integration points: the https://modelcontextprotocol.io/[Model Context Protocol] (MCP) for tool calling, and pluggable web endpoints for page fetching and search. Both features operate within AI conversations only. +TIP: Web search and scraping allow the AI to reference live internet content during conversations. For most deployments, enabling at least web search significantly improves response quality. These features are configured entirely through environment variables on the AI service container — no additional infrastructure is required beyond a search endpoint. + [[mcp-integration]] @@ -42,6 +44,8 @@ Set the `MCP_SERVERS` environment variable to a JSON object. Each key is a serve TIP: On Linux Docker, add `extra_hosts: ["host.docker.internal:host-gateway"]` to the AI service compose entry to reach MCP servers running on the host machine. +WARNING: `host-gateway` resolves to the Docker bridge IP, not `127.0.0.1`. Host services bound to `127.0.0.1` only (common in Python MCP SDK examples and CLI-style servers) are unreachable from inside the container (`ECONNREFUSED`). Bind MCP servers to `0.0.0.0` or run them as sibling containers on the same Docker network. + [[mcp-shared-token]] === Shared-token authentication limitation @@ -153,10 +157,12 @@ The AI service can forward web page fetches and search queries to external endpo -e WEBSEARCH_ENABLED='true' \ -e WEBSEARCH_ENDPOINT='http://host.docker.internal:4001/search' \ -e WEBSEARCH_REQUEST_TIMEOUT='10000' \ --e WEBSEARCH_HEADERS='{"Authorization":"Bearer search-api-key"}' +-e WEBSEARCH_HEADERS='Authorization: Bearer search-api-key, X-Source: tinymce' ---- -NOTE: A model must include `capabilities.webSearch: true` in its xref:tinymceai-on-premises-providers.adoc#models-required[`MODELS` entry] for the web search toggle to appear in the editor. +IMPORTANT: `WEBSEARCH_HEADERS` uses **colon-CSV format** (`Header-Name: value, Another: value`), not JSON. Passing a JSON object produces `ERR_INVALID_HTTP_TOKEN` and silently prevents all outbound search requests. + +NOTE: A model must include `capabilities.webSearch: true` in its xref:tinymceai-on-premises-providers.adoc#models-required[`MODELS` entry] for the web search toggle to appear in the editor. Additionally, the JWT must include the `ai:conversations:webSearch` permission — see xref:tinymceai-on-premises-jwt.adoc#permissions-reference[JWT permissions]. === Web scraping endpoint contract diff --git a/modules/ROOT/pages/tinymceai-on-premises-production.adoc b/modules/ROOT/pages/tinymceai-on-premises-production.adoc index c42db02149..b451e92bcd 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-production.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-production.adoc @@ -286,7 +286,19 @@ spec: TIP: For PostgreSQL, change `DATABASE_DRIVER` to `"postgres"`, update `DATABASE_HOST` to the PostgreSQL endpoint, and add `DATABASE_SCHEMA` set to `"public"` (or ensure the `cs-on-premises` schema exists). See xref:tinymceai-on-premises-database.adoc#postgresql-schema-prerequisite[PostgreSQL schema prerequisite]. -NOTE: `terminationGracePeriodSeconds` is set to 300 to match the maximum SSE stream duration. On SIGTERM, the service finishes in-flight SSE streams before shutting down. Set this value equal to or greater than the longest expected AI response time. For multi-zone clusters, add `topologySpreadConstraints` to the pod spec to spread replicas across availability zones. Add a `PodDisruptionBudget` (`minAvailable: 1` or a percentage at scale) to prevent all replicas being evicted simultaneously during node maintenance. These resource values are evaluation defaults; adjust for production workload. +NOTE: `terminationGracePeriodSeconds` is set to 300 to match the maximum SSE stream duration. On SIGTERM, the service finishes in-flight SSE streams before shutting down. Set this value equal to or greater than the longest expected AI response time. These resource values are evaluation defaults; adjust for production workload. + +IMPORTANT: The workload is *network-I/O bound* (waiting on LLM provider streams), not CPU-bound. CPU-based HPA scaling is a safe starting point but may not trigger under high concurrency. For production, consider supplementing with custom metrics (concurrent SSE streams, request queue depth) through KEDA or Prometheus Adapter. + +=== High-availability primitives + +Any multi-replica deployment should include the following Kubernetes primitives — configure them in line with the platform team's standards: + +* *`PodDisruptionBudget`*: prevents all replicas being evicted simultaneously during node maintenance (`minAvailable: 1` or a percentage at scale). +* *`podAntiAffinity`*: distributes replicas across nodes to avoid single-node failures. +* *`topologySpreadConstraints`*: spreads replicas across availability zones for zone-level resilience. + +The canonical Deployment example above does not include these — they are site-specific. Without them, the scheduler may bin-pack all replicas onto a single node in a single availability zone. === Step 4: Service @@ -451,7 +463,48 @@ The AI service does not use platform-native credential chains. AWS IRSA, EC2 ins |Registry pull credentials |Secrets Manager {plus} ECR pull-through cache, or a private repository mirroring `registry.containers.tiny.cloud` |=== +=== Azure (AKS) + +Deploy on Azure Kubernetes Service with Azure Database for PostgreSQL Flexible Server, Azure Cache for Redis, and Azure Blob Storage. Service-shape considerations: + +* `STORAGE_DRIVER=azure` is the recommended storage backend on Azure. +* PostgreSQL Flexible Server defaults to enforcing TLS — see the xref:tinymceai-on-premises-database.adoc#_postgresql[known TLS error surface]. +* The AI service does not use Workload Identity or managed identity for LLM credentials — inline Azure OpenAI credentials in `PROVIDERS` (see xref:tinymceai-on-premises-providers.adoc#_azure_openai[Azure OpenAI]). + +For AKS cluster setup, node pools, and networking, refer to the https://learn.microsoft.com/en-us/azure/aks/[Azure Kubernetes Service documentation]. + +=== GCP (GKE) + +Deploy on Google Kubernetes Engine (Standard mode — not Autopilot) with Cloud SQL for PostgreSQL, Memorystore for Redis, and GCS for file storage. Service-shape considerations: + +* No native GCS driver exists. Use GCS via S3-interop (`STORAGE_DRIVER=s3` with HMAC credentials). The bucket must have `uniform_bucket_level_access` disabled (the SDK sends `x-amz-acl` headers). +* The AI service does not use GKE Workload Identity or Application Default Credentials — inline Vertex SA credentials in `PROVIDERS` (see xref:tinymceai-on-premises-providers.adoc#_google_vertex_ai[Google Vertex AI]). +* Cloud SQL for PostgreSQL defaults to enforcing TLS — see the xref:tinymceai-on-premises-database.adoc#_postgresql[known TLS error surface]. + +For GKE cluster creation and node pool configuration, refer to the https://cloud.google.com/kubernetes-engine/docs[Google Kubernetes Engine documentation]. + +== Network requirements + +The AI service requires outbound HTTPS access to the following endpoints. Configure firewall rules and security groups to allow this traffic: + +[cols="1,2,2",options="header"] +|=== +|Endpoint |Purpose |Required + +|LLM provider APIs (e.g. `api.openai.com`, `bedrock-runtime.*.amazonaws.com`) +|Forward AI requests to the configured providers +|Yes + +|`license.container.tiny.cloud` +|License key validation (phone-home check on startup) +|Yes — service will not start if this is blocked + +|`registry.containers.tiny.cloud` +|Pull container image updates +|Yes (at deploy time) +|=== +TIP: If the deployment environment blocks all outbound traffic by default, add these to the egress allowlist before starting the service. The license check occurs on every container start. == Security hardening diff --git a/modules/ROOT/pages/tinymceai-on-premises-providers.adoc b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc index a606547c55..780bf15624 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-providers.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc @@ -419,6 +419,8 @@ IMPORTANT: The AI service does *not* use the AWS SDK default credential chain. ` The `sessionToken` field is optional but required for STS-issued short-lived credentials. Plan a rotation procedure when using temporary credentials. +NOTE: Bedrock console-issued API keys have a separate billing entitlement check that can return `INVALID_PAYMENT_INSTRUMENT` on accounts where the IAM-credential path works fine. Prefer the `credentials` block (IAM user with `accessKeyId` / `secretAccessKey`) for production. + *Prerequisites checklist:* . *Enable model access.* Bedrock console -> *Model access* -> Manage model access. Each model must be approved per-region. diff --git a/modules/ROOT/pages/tinymceai-on-premises-reference.adoc b/modules/ROOT/pages/tinymceai-on-premises-reference.adoc index 3956b4bdb8..d4d1529469 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-reference.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-reference.adoc @@ -27,7 +27,7 @@ Alphabetized. Required-ness is marked relative to a minimum working deployment. |`LANGFUSE_DEBUG` |No |- |Verbose Langfuse logging. |`LANGFUSE_PUBLIC_KEY` |If using Langfuse |- |Langfuse public key. |`LANGFUSE_SECRET_KEY` |If using Langfuse |- |Langfuse secret key. -|`LICENSE_KEY` |Yes |- |AI service license key (long string from Tiny). +|`LICENSE_KEY` |Yes |- |AI service license key (long string from Tiny). The same key is shared across all replicas of a deployment — no per-pod licensing is required. |`LLM_TELEMETRY_ENABLED` |No |`false` |Primary OpenTelemetry switch. |`LLM_TIMEOUT_MS` |No |180000 |Per-request large language model (LLM) timeout in ms. Raise for large self-hosted models. |`MCP_SERVERS` |No |- |JSON object; Model Context Protocol (MCP) server configuration. See xref:tinymceai-on-premises-mcp.adoc#mcp-integration[MCP integration]. From 29e0a1cfca69f541156a17009ccdba0c6a29609a Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Mon, 25 May 2026 12:08:24 +1000 Subject: [PATCH 23/48] Address PR review comments from Ben, Shiridi, and Benjamin MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Delete unused duplicate diagram (complete-guide-fig-8) - Frameworks: simplify intro text (remove SSR patterns) - Overview: simplify step 3, IMPORTANT→NOTE, fix secret wording - Overview: align LICENSE_KEY note with Ben's suggestion - Getting Started: apply Shiridi's panel UI wording fixes - Getting Started: fix dotenv override issue (Benjamin's report) - Getting Started: align TIP folder name example with mkdir step --- .../tinymceai-on-premises/complete-guide-fig-8.mmd | 14 -------------- .../tinymceai-on-premises/complete-guide-fig-8.svg | 1 - .../pages/tinymceai-on-premises-frameworks.adoc | 2 +- .../tinymceai-on-premises-getting-started.adoc | 8 ++++---- modules/ROOT/pages/tinymceai-on-premises.adoc | 8 ++++---- 5 files changed, 9 insertions(+), 24 deletions(-) delete mode 100644 modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-8.mmd delete mode 100644 modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-8.svg diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-8.mmd b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-8.mmd deleted file mode 100644 index b8eb0690a6..0000000000 --- a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-8.mmd +++ /dev/null @@ -1,14 +0,0 @@ -flowchart LR - subgraph Tenants[Your SaaS customers] - CA[Customer A users] - CB[Customer B users] - CC[Customer C users] - end - subgraph AISvc[Single AI service deployment] - EA[Environment A
access keys A
isolated conversations] - EB[Environment B
access keys B
isolated conversations] - EC[Environment C
access keys C
isolated conversations] - end - CA --> EA --> OpenAI[OpenAI] - CB --> EB --> Anthropic[Anthropic] - CC --> EC --> Azure[Azure OpenAI] diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-8.svg b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-8.svg deleted file mode 100644 index 8687168cdd..0000000000 --- a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-8.svg +++ /dev/null @@ -1 +0,0 @@ -Single AI service deploymentYour SaaS customersCustomer A usersCustomer B usersCustomer C usersEnvironment Aaccess keys Aisolated conversationsEnvironment Baccess keys Bisolated conversationsEnvironment Caccess keys Cisolated conversationsOpenAIAnthropicAzure OpenAI \ No newline at end of file diff --git a/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc b/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc index dc96af33bd..0c9a6b2d3d 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc @@ -12,7 +12,7 @@ It assumes: * A token endpoint exists that signs HS256 JWTs (see xref:tinymceai-on-premises-jwt.adoc[JWT authentication]). * A valid TinyMCE license key or API key with the AI feature enabled. -For general framework setup (installing wrappers, component structure, server-side rendering (SSR) patterns), see the existing integration guides: +For general framework setup (installing wrappers, component structure), see the existing integration guides: * xref:react-cloud.adoc[React] * xref:vue-cloud.adoc[Vue.js] diff --git a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc index 33f0b4bb23..04f2e4aa14 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc @@ -195,7 +195,7 @@ docker run --init -d -p 8000:8000 \ registry.containers.tiny.cloud/ai-service-tiny:latest ---- -TIP: The network name is typically `_default` (e.g., `tinymce-ai-onpremise_default`). Run `docker network ls` to confirm. For multiple LLM providers, extend the `PROVIDERS` JSON: `{"openai":{...},"anthropic":{...}}`. +TIP: The network name is typically `_default` (e.g., `tinymce-ai-onpremise_default` for the folder created above). Run `docker network ls` to confirm the exact name — Docker Compose versions format it differently. For multiple LLM providers, extend the `PROVIDERS` JSON: `{"openai":{...},"anthropic":{...}}`. For Podman, replace `docker run` with `podman run` and use a Podman pod instead of a compose network. See xref:tinymceai-on-premises-production.adoc[Production deployment] for Podman-specific guidance. See xref:tinymceai-on-premises-production.adoc#_podman_deployment[Podman deployment] for a full example. @@ -239,8 +239,8 @@ The AI service isolates users into Environments. Each environment has its own ac . Sign in using the `MANAGEMENT_SECRET` from `.env`. . Click *Create Environment* and give it a name (for example "Development"). . Note the *Environment ID* displayed (a short identifier like `viOu8BnjJHb0HGK091p`). -. Inside the environment, click *Create Access Key*. -. *Copy the API Secret immediately*. The Management Panel shows it only once. +. Inside the environment, click *Create a new access key*. +. *Copy the Environment ID and Access Key*. The Management Panel shows the API Secret only once. Update `.env` with the new values: @@ -282,7 +282,7 @@ Create `token-server.js`: [source,javascript] .token-server.js ---- -require('dotenv').config(); +require('dotenv').config({ override: true }); const express = require('express'); const jwt = require('jsonwebtoken'); diff --git a/modules/ROOT/pages/tinymceai-on-premises.adoc b/modules/ROOT/pages/tinymceai-on-premises.adoc index 2cc116112b..f95d24f1e1 100644 --- a/modules/ROOT/pages/tinymceai-on-premises.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises.adoc @@ -27,15 +27,15 @@ Data flow for a single AI request: . The client application requests a JWT from the token endpoint. . The client sends a prompt with the JWT to the AI service over HTTPS. -. The AI service verifies the token, checks per-feature permissions, and forwards the prompt to the configured large language model (LLM). +. The AI service verifies the token and forwards the prompt to the configured large language model (LLM). . The LLM streams its response back to the AI service. . The AI service relays the response to the client through Server-Sent Events (SSE). When used with TinyMCE `tinymceai`, the plugin handles steps 1, 2, and 5 automatically through the `tinymceai_token_provider` callback. -IMPORTANT: The browser connects directly to the AI service — requests do not pass through the application back end. The AI service must be network-reachable from the end-user browser, which means it must have a public URL (or be accessible through a VPN/internal network when deployed on an intranet). Configure xref:tinymceai-on-premises-frameworks.adoc#_cross_origin_requests_to_the_ai_service[CORS] and xref:tinymceai-on-premises-production.adoc#_tls_https[TLS] on the AI service accordingly. +NOTE: The browser connects directly to the AI service — requests do not pass through the application back end. The AI service must be network-reachable from the end-user browser, which means it must have a public URL (or be accessible through a VPN/internal network when deployed on an intranet). Configure xref:tinymceai-on-premises-frameworks.adoc#_cross_origin_requests_to_the_ai_service[CORS] and xref:tinymceai-on-premises-production.adoc#_tls_https[TLS] on the AI service accordingly. -The shared secret (API Secret) never leaves the back end; the editor and the AI service only ever see signed tokens. +The shared secret (API Secret) exists only in the application back end (token endpoint) and the AI service container. The browser and the editor never see it — they only handle signed tokens. == Capabilities @@ -107,7 +107,7 @@ Three credentials are involved in an on-premises deployment. They are distinct a |Only for self-hosted editor deployments. Provided by the Tiny account representative. |=== -NOTE: `LICENSE_KEY` (the AI service license) and `TINYMCE_API_KEY` / `license_key` (the editor license) are different credentials from different sources. Do not interchange them. +NOTE: `LICENSE_KEY` (the AI service license) and `TINYMCE_API_KEY` / `license_key` (the editor license) are different credentials from different sources. They are not interchangeable. == Prerequisites From c6502700cfd56e339c9c9d85e7daf322b38b0f19 Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Mon, 25 May 2026 15:27:44 +1000 Subject: [PATCH 24/48] Restructure MCP page: separate web search/scraping, reduce admonitions, fix style - Split web search and web scraping into independent sections with clear enabling steps, trigger mechanisms, and endpoint contracts - Reduce admonitions from 9 to 2 (WARNING + IMPORTANT only) - Remove all em-dashes, fix US English, terminology, and Latin terms - Add Prerequisites section, SSE events section, K8s deployment section - Document per-message capabilities:{webSearch:{}} requirement - Document POST /v1/conversations/{id}/web-resources trigger endpoint - Add tool namespacing and Docker networking subsections - Replace illustrative MCP example with official SDK-based example - Add stdio transport limitation note - Remove forward-looking OAuth statement (per team decision) - Add production readiness checklist cross-reference - Replace old diagram with new MCP-specific architecture SVG --- .../advanced-scenarios-fig-1.mmd | 4 - .../advanced-scenarios-fig-1.svg | 1 - .../mcp-web-integrations-architecture.mmd | 23 + .../mcp-web-integrations-architecture.svg | 1 + .../ROOT/pages/tinymceai-on-premises-mcp.adoc | 403 ++++++++++++------ .../tinymceai-on-premises-production.adoc | 1 + 6 files changed, 309 insertions(+), 124 deletions(-) delete mode 100644 modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-1.mmd delete mode 100644 modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-1.svg create mode 100644 modules/ROOT/images/tinymceai-on-premises/mcp-web-integrations-architecture.mmd create mode 100644 modules/ROOT/images/tinymceai-on-premises/mcp-web-integrations-architecture.svg diff --git a/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-1.mmd b/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-1.mmd deleted file mode 100644 index 842da64633..0000000000 --- a/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-1.mmd +++ /dev/null @@ -1,4 +0,0 @@ -flowchart LR - Editor[TinyMCE editor] <-->|chat / quick actions| AI[AI Service] - AI <-->|MCP tools/call| MCP[MCP Server
knowledge-hub] - MCP <-->|read| KB[Confluence ·
Notion ·
GitBook ·
internal wiki] diff --git a/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-1.svg deleted file mode 100644 index a6ed13be3a..0000000000 --- a/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-1.svg +++ /dev/null @@ -1 +0,0 @@ -chat / quick actionsMCP tools/callreadTinyMCE editorAI ServiceMCP Serverknowledge-hubConfluence ·Notion ·GitBook ·internal wiki \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/mcp-web-integrations-architecture.mmd b/modules/ROOT/images/tinymceai-on-premises/mcp-web-integrations-architecture.mmd new file mode 100644 index 0000000000..1c7569e5bc --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/mcp-web-integrations-architecture.mmd @@ -0,0 +1,23 @@ +flowchart LR + Editor["TinyMCE editor"] <-->|"conversations (SSE)\nHTTP / HTTPS"| AI["AI Service\n(on-premises)"] + + AI <-->|"inference\nHTTPS"| LLM["LLM Provider\nOpenAI · Anthropic · Google"] + + AI <-->|"MCP tools/call\nHTTP"| MCP["MCP Server(s)\nStreamable HTTP"] + MCP <-->|"read"| KB["Confluence ·\nNotion · GitBook ·\ninternal wiki"] + + AI <-->|"search query\nHTTP"| WS["Web Search\nWEBSEARCH_ENDPOINT"] + WS <-->|"HTTPS"| SearchAPI["Search API\nSerpAPI · Brave · etc."] + + AI <-->|"scrape request\nHTTP"| WR["Web Scrape\nWEBRESOURCES_ENDPOINT"] + WR <-->|"HTTP/S"| Pages["Web pages"] + + style AI fill:#E3F2FD,stroke:#1976D2,stroke-width:2px + style Editor fill:#ECECFF,stroke:#9370DB + style MCP fill:#ECECFF,stroke:#9370DB + style WS fill:#ECECFF,stroke:#9370DB + style WR fill:#ECECFF,stroke:#9370DB + style LLM fill:#F1F8E9,stroke:#8BC34A + style KB fill:#F1F8E9,stroke:#8BC34A + style SearchAPI fill:#F1F8E9,stroke:#8BC34A + style Pages fill:#F1F8E9,stroke:#8BC34A diff --git a/modules/ROOT/images/tinymceai-on-premises/mcp-web-integrations-architecture.svg b/modules/ROOT/images/tinymceai-on-premises/mcp-web-integrations-architecture.svg new file mode 100644 index 0000000000..04bb6dc202 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/mcp-web-integrations-architecture.svg @@ -0,0 +1 @@ +

conversations (SSE)
HTTP / HTTPS

inference
HTTPS

MCP tools/call
HTTP

read

search query
HTTP

HTTPS

scrape request
HTTP

HTTP/S

TinyMCE editor

AI Service
(on-premises)

LLM Provider
OpenAI · Anthropic · Google

MCP Server(s)
Streamable HTTP

Confluence ·
Notion · GitBook ·
internal wiki

Web Search
WEBSEARCH_ENDPOINT

Search API
SerpAPI · Brave · etc.

Web Scrape
WEBRESOURCES_ENDPOINT

Web pages

\ No newline at end of file diff --git a/modules/ROOT/pages/tinymceai-on-premises-mcp.adoc b/modules/ROOT/pages/tinymceai-on-premises-mcp.adoc index 8aee959f95..0acc0ce0c2 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-mcp.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-mcp.adoc @@ -1,23 +1,33 @@ = MCP and web integrations -:navtitle: MCP and web integrations +:navtitle: MCP and web integrations (optional) :description: Model Context Protocol (MCP) server integration and web scraping/search configuration for TinyMCE AI on-premises :keywords: AI, on-premises, MCP, Model Context Protocol, web search, web scraping -The AI service extends model capabilities through two integration points: the https://modelcontextprotocol.io/[Model Context Protocol] (MCP) for tool calling, and pluggable web endpoints for page fetching and search. Both features operate within AI conversations only. +The AI service extends model capabilities through two integration points: the https://modelcontextprotocol.io/[Model Context Protocol] (MCP) for tool calling, and pluggable web endpoints for page fetching and search. Both features operate within AI conversations only. Web search and scraping allow the AI to reference live internet content during conversations, and for most deployments, enabling at least web search improves response quality. -TIP: Web search and scraping allow the AI to reference live internet content during conversations. For most deployments, enabling at least web search significantly improves response quality. These features are configured entirely through environment variables on the AI service container — no additional infrastructure is required beyond a search endpoint. +These features are configured entirely through environment variables on the AI service container. No additional infrastructure is required beyond a search endpoint. + +[.text-center] +image::tinymceai-on-premises/mcp-web-integrations-architecture.svg[MCP and web integrations architecture: TinyMCE editor connects to AI Service through SSE conversations. AI Service connects to LLM provider for inference and to MCP servers and web search and scrape endpoints for tool calling and live web content.,width=100%] + +== Prerequisites + +This page assumes the following are already configured: + +* A running AI service container with database and Redis (see xref:tinymceai-on-premises-database.adoc[Database and Redis]). +* At least one LLM provider configured in `PROVIDERS` and `MODELS` (see xref:tinymceai-on-premises-providers.adoc[LLM providers]). +* A JWT token endpoint deployed and signing tokens with the correct API Secret (see xref:tinymceai-on-premises-jwt.adoc[JWT authentication]). [[mcp-integration]] == Model Context Protocol (MCP) -MCP allows the AI service to call external tools — internal wikis, API specifications, runbooks, contract databases, and compliance checkers — during conversations. The service connects to MCP servers over *Streamable HTTP transport*. +MCP allows the AI service to call external tools (internal wikis, API specifications, runbooks, contract databases, and compliance checkers) during conversations. The service connects to MCP servers over *Streamable HTTP transport*. -[.text-center] -image::tinymceai-on-premises/advanced-scenarios-fig-1.svg[MCP integration: TinyMCE rich text editor communicates with AI service which calls MCP server,width=100%] +Only Streamable HTTP transport is supported. Stdio-based MCP servers cannot connect directly to the AI service. Wrap them with the SDK's HTTP transport adapter (for example, `StreamableHTTPServerTransport` in the TypeScript SDK or `StreamableHTTPServer` in the Python SDK) before use. -NOTE: MCP tools are available in AI *conversations* only. Reviews and quick actions do not invoke MCP tools. +MCP tools are available in AI *conversations* only. Reviews and quick actions do not invoke MCP tools. === Configuration @@ -42,28 +52,65 @@ Set the `MCP_SERVERS` environment variable to a JSON object. Each key is a serve |`options.callToolTimeout` |Per-tool-call timeout in seconds (default 60). |=== -TIP: On Linux Docker, add `extra_hosts: ["host.docker.internal:host-gateway"]` to the AI service compose entry to reach MCP servers running on the host machine. +`MCP_SERVERS` supports multiple server entries. Add additional keys to the same JSON object: + +[source,bash] +---- +-e MCP_SERVERS='{ + "knowledge-hub": { + "url": "http://host.docker.internal:3001/mcp" + }, + "compliance-checker": { + "url": "http://host.docker.internal:3002/mcp" + } +}' +---- + +Each server's tools are namespaced independently (for example, `knowledge-hub-search_docs` and `compliance-checker-run_check`). + +[[mcp-docker-networking]] +=== Docker networking + +On Linux Docker, add `extra_hosts: ["host.docker.internal:host-gateway"]` to the AI service compose entry to reach MCP servers running on the host machine. WARNING: `host-gateway` resolves to the Docker bridge IP, not `127.0.0.1`. Host services bound to `127.0.0.1` only (common in Python MCP SDK examples and CLI-style servers) are unreachable from inside the container (`ECONNREFUSED`). Bind MCP servers to `0.0.0.0` or run them as sibling containers on the same Docker network. [[mcp-shared-token]] -=== Shared-token authentication limitation +=== Authentication The `headers` field is fixed at deploy time. Every MCP tool call uses the same credentials; there is no per-user MCP authentication path. If the MCP server requires per-user context, encode identity in the conversation prompt or in a header that the MCP server resolves to a per-user identity on its own side. +The AI service does not perform OAuth flows. MCP servers requiring OAuth must be fronted by a proxy that handles token exchange, or tokens must be rotated externally (requires a container restart to update `MCP_SERVERS`). + +[[mcp-tool-namespacing]] +=== Tool namespacing + +MCP tools are exposed to the LLM with a namespaced identifier: `-` (for example, `knowledge-hub-search_knowledge_base`). The `tools.disabled` array accepts the *bare tool name* only (for example, `"search_knowledge_base"`, not the namespaced form). + === MCP server example -The following is an illustrative example showing the JSON-RPC message flow. Production MCP servers must implement the full https://modelcontextprotocol.io/specification/2025-11-25/basic/transports#streamable-http[Streamable HTTP transport specification]. +The following example uses the official https://github.com/modelcontextprotocol/typescript-sdk[MCP TypeScript SDK] (`@modelcontextprotocol/sdk`) with `StreamableHTTPServerTransport`, which handles session management, SSE framing, and the full https://modelcontextprotocol.io/specification/2025-11-25/basic/transports#streamable-http[Streamable HTTP transport] specification. -.Knowledge-base MCP server (illustrative) +.Knowledge-base MCP server [%collapsible] ==== +Install dependencies: + +[source,bash] +---- +npm install @modelcontextprotocol/sdk express zod +---- + +Create `knowledge-mcp-server.js` (requires `"type": "module"` in `package.json`): + [source,javascript] ---- // knowledge-mcp-server.js -const express = require('express'); -const app = express(); -app.use(express.json()); +import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js'; +import express from 'express'; +import { randomUUID } from 'node:crypto'; +import { z } from 'zod'; const KNOWLEDGE_BASE = { 'api-guidelines': 'All REST APIs must use JSON, include pagination through Link headers, and return 4xx for client errors with a machine-readable error code.', @@ -71,89 +118,82 @@ const KNOWLEDGE_BASE = { 'security-policy': 'All user data must be encrypted at rest (AES-256) and in transit (TLS 1.3). PII requires additional field-level encryption.', }; -app.post('/mcp', (req, res) => { - const { method, id, params } = req.body; - - if (method === 'initialize') { - return res.json({ - jsonrpc: '2.0', id, - result: { - protocolVersion: '2024-11-05', - capabilities: { tools: {} }, - serverInfo: { name: 'knowledge-hub', version: '1.0.0' } - } - }); - } +const server = new McpServer({ + name: 'knowledge-hub', + version: '1.0.0' +}); - if (method === 'tools/list') { - return res.json({ - jsonrpc: '2.0', id, - result: { - tools: [{ - name: 'search_knowledge_base', - description: 'Search the company knowledge base for policies, guidelines, and procedures', - inputSchema: { - type: 'object', - properties: { - query: { type: 'string', description: 'Search query' } - }, - required: ['query'] - } - }, { - name: 'get_api_spec', - description: 'Get the OpenAPI spec for an internal service', - inputSchema: { - type: 'object', - properties: { - service: { type: 'string', description: 'Service name (for example user-service, billing-api)' } - }, - required: ['service'] - } - }] - } - }); +server.tool( + 'search_knowledge_base', + 'Search the company knowledge base for policies, guidelines, and procedures', + { query: z.string().describe('Search query') }, + async ({ query }) => { + const q = query.toLowerCase(); + const results = Object.entries(KNOWLEDGE_BASE) + .filter(([key]) => key.includes(q) || q.includes(key.split('-')[0])) + .map(([key, value]) => `## ${key}\n${value}`) + .join('\n\n'); + return { + content: [{ type: 'text', text: results || 'No results found.' }] + }; } - - if (method === 'tools/call') { - const { name, arguments: args } = params; - if (name === 'search_knowledge_base') { - const query = (args?.query || '').toLowerCase(); - const results = Object.entries(KNOWLEDGE_BASE) - .filter(([key]) => key.includes(query) || query.includes(key.split('-')[0])) - .map(([key, value]) => `##${key}\n${value}`) - .join('\n\n'); - return res.json({ - jsonrpc: '2.0', id, - result: { content: [{ type: 'text', text: results || 'No results found.' }] } - }); - } - return res.json({ - jsonrpc: '2.0', id, - result: { content: [{ type: 'text', text: 'Spec not found for: ' + args?.service }] } - }); +); + +server.tool( + 'get_api_spec', + 'Get the OpenAPI spec for an internal service', + { service: z.string().describe('Service name (for example user-service, billing-api)') }, + async ({ service }) => { + return { + content: [{ type: 'text', text: `Spec not found for: ${service}` }] + }; } +); - res.json({ jsonrpc: '2.0', id, error: { code: -32601, message: 'Unknown method' } }); +const transport = new StreamableHTTPServerTransport({ + sessionIdGenerator: () => randomUUID() }); -app.listen(3001, () => console.log('Knowledge MCP server on http://localhost:3001/mcp')); +await server.connect(transport); + +const app = express(); +app.use(express.json()); + +app.all('/mcp', (req, res) => transport.handleRequest(req, res, req.body)); + +app.listen(3001, () => console.log('Knowledge MCP server on http://0.0.0.0:3001/mcp')); ---- ==== +The example server binds to `0.0.0.0` so it is reachable from inside Docker containers. When the MCP server runs on the host machine, the AI service connects to it at `http://host.docker.internal:3001/mcp` (see <>). -[[web-scraping-and-search]] -== Web scraping and web search -The AI service can forward web page fetches and search queries to external endpoints, enabling AI conversations to reference live web content. +[[web-search]] +== Web search -=== Configuration +The AI service can forward search queries to an external endpoint, enabling AI conversations to reference live web content. + +=== Enabling web search + +Web search requires three components configured together: + +[cols="1,3",options="header"] +|=== +|Component |Configuration + +|Environment variables +|Set `WEBSEARCH_ENABLED='true'` and `WEBSEARCH_ENDPOINT` on the AI service container. Optionally set `WEBSEARCH_REQUEST_TIMEOUT` (default 10000ms) and `WEBSEARCH_HEADERS` for authenticated search backends. + +|MODELS entry +|The model's `MODELS` entry must include `capabilities.webSearch: true` (boolean) for the web search toggle to appear in the editor. See xref:tinymceai-on-premises-providers.adoc#models-required[MODELS configuration]. + +|JWT permission +|The JWT must include `ai:conversations:webSearch` in the `auth.ai.permissions` array. See xref:tinymceai-on-premises-jwt.adoc#permissions-reference[JWT permissions]. Without this permission, the toggle remains grayed out even when the environment variables and MODELS entry are correct. +|=== [source,bash] ---- --e WEBRESOURCES_ENABLED='true' \ --e WEBRESOURCES_ENDPOINT='http://host.docker.internal:4000/scrape' \ --e WEBRESOURCES_REQUEST_TIMEOUT='10000' \ -e WEBSEARCH_ENABLED='true' \ -e WEBSEARCH_ENDPOINT='http://host.docker.internal:4001/search' \ -e WEBSEARCH_REQUEST_TIMEOUT='10000' \ @@ -162,52 +202,27 @@ The AI service can forward web page fetches and search queries to external endpo IMPORTANT: `WEBSEARCH_HEADERS` uses **colon-CSV format** (`Header-Name: value, Another: value`), not JSON. Passing a JSON object produces `ERR_INVALID_HTTP_TOKEN` and silently prevents all outbound search requests. -NOTE: A model must include `capabilities.webSearch: true` in its xref:tinymceai-on-premises-providers.adoc#models-required[`MODELS` entry] for the web search toggle to appear in the editor. Additionally, the JWT must include the `ai:conversations:webSearch` permission — see xref:tinymceai-on-premises-jwt.adoc#permissions-reference[JWT permissions]. - -=== Web scraping endpoint contract +[[websearch-per-message]] +=== Per-message activation (custom integrations) -[cols="1,2",options="header"] -|=== -|Direction |Payload -|Request |JSON object with a `url` field (page to fetch). -|Response |JSON object with `type` (`text/html` or `text/markdown`) and `data` (body content). -|=== +The TinyMCE editor sends the web search activation flag when the user toggles web search on. Custom integrations that call the AI service API directly must include the following in each message request body where web search should be active: -.Request body [source,json] ---- -{ "url": "https://example.com/article" } ----- - -.Response body -[source,json] ----- -{ "type": "text/html", "data": "

Example page body

" } +{ + "capabilities": { "webSearch": {} } +} ---- -==== Scraper implementation example (Playwright) - -[source,javascript] ----- -// scraper-service.js -const { chromium } = require('playwright'); -const express = require('express'); -const app = express(); -app.use(express.json()); +The value *must* be an empty object (`{}`). This is the documented API contract (see https://ckeditor.com/docs/cs/latest/guides/ckeditor-ai/models.html#capability-configuration[CKEditor AI Models: Capability Configuration]). The service rejects other shapes: -app.post('/scrape', async (req, res) => { - const browser = await chromium.launch(); - const page = await browser.newPage(); - await page.goto(req.body.url, { waitUntil: 'networkidle' }); - const content = await page.content(); - await browser.close(); - res.json({ type: 'text/html', data: content }); -}); +* `"webSearch": true` : rejected (schema expects an object, not a boolean). +* `"webSearch": {"enabled": true}` : rejected (unrecognized key). +* Omitted entirely: web search is not surfaced to the model, even when all server-side configuration is correct. -app.listen(4000); ----- +Without this field, the environment variables, JWT permission, and MODELS configuration are insufficient. The model never receives the web search tool. -=== Web search endpoint contract +=== Endpoint contract [cols="1,2",options="header"] |=== @@ -239,7 +254,7 @@ app.listen(4000); } ---- -==== Search implementation example (SerpAPI) +=== Implementation example (SerpAPI) [source,javascript] ---- @@ -266,6 +281,156 @@ app.listen(4001); +[[web-scraping]] +== Web scraping + +The AI service can forward web page fetches to an external endpoint, enabling AI conversations to reference specific pages. + +=== Enabling web scraping + +Set `WEBRESOURCES_ENABLED='true'` and `WEBRESOURCES_ENDPOINT` on the AI service container. Optionally set `WEBRESOURCES_REQUEST_TIMEOUT` (default 10000ms). + +[source,bash] +---- +-e WEBRESOURCES_ENABLED='true' \ +-e WEBRESOURCES_ENDPOINT='http://host.docker.internal:4000/scrape' \ +-e WEBRESOURCES_REQUEST_TIMEOUT='10000' +---- + +[[web-scraping-trigger]] +=== Trigger mechanism + +The AI service calls the scrape endpoint (`WEBRESOURCES_ENDPOINT`) when it receives a request on: + +[source] +---- +POST /v1/conversations/{id}/web-resources +Content-Type: application/json + +{ "url": "https://example.com/page-to-fetch" } +---- + +The TinyMCE editor sends this request when a user pastes or references a URL in conversation. Custom integrations must call this endpoint explicitly to trigger a page fetch. + +The response is stored against the conversation. The `type` field in the scrape response must be `text/html` or `text/markdown`. Other MIME types (for example, `application/pdf`) are rejected with a `422 web-resource-download-error`. + +=== Endpoint contract + +[cols="1,2",options="header"] +|=== +|Direction |Payload +|Request |JSON object with a `url` field (page to fetch). +|Response |JSON object with `type` (`text/html` or `text/markdown`) and `data` (body content). +|=== + +.Request body +[source,json] +---- +{ "url": "https://example.com/article" } +---- + +.Response body +[source,json] +---- +{ "type": "text/html", "data": "

Example page body

" } +---- + +=== Implementation example (Playwright) + +[source,javascript] +---- +// scraper-service.js +const { chromium } = require('playwright'); +const express = require('express'); +const app = express(); +app.use(express.json()); + +app.post('/scrape', async (req, res) => { + const browser = await chromium.launch(); + const page = await browser.newPage(); + await page.goto(req.body.url, { waitUntil: 'networkidle' }); + const content = await page.content(); + await browser.close(); + res.json({ type: 'text/html', data: content }); +}); + +app.listen(4000); +---- + + + +[[sse-events]] +== SSE events + +Custom streaming UI integrations can use the following Server-Sent Events (SSE) to render tool call progress and search results. The AI service emits these events during conversations: + +[cols="1,3",options="header"] +|=== +|Event name |Description +|`mcp-tool-result` |Emitted when an MCP tool call completes. Contains the tool result. There is no pre-call event; clients receive no signal until the tool call finishes. +|`web-search` |Emitted when a web search returns results (or fails without emitting an error event; see <>). +|`source` |Emitted with source citations from the model response. +|=== + + + +[[mcp-kubernetes]] +== Kubernetes deployment + +In Kubernetes, the `extra_hosts: host-gateway` Docker pattern does not apply. Deploy MCP servers and web integration endpoints as sibling Deployments with ClusterIP Services in the same namespace as the AI service. Reference them using cluster DNS: + +[source,bash] +---- +-e MCP_SERVERS='{ + "knowledge-hub": { + "url": "http://mcp-knowledge.tinymce-ai.svc.cluster.local:3001/mcp", + "options": { "callToolTimeout": 30 } + } +}' +-e WEBRESOURCES_ENDPOINT='http://web-scrape.tinymce-ai.svc.cluster.local:4000/scrape' +-e WEBSEARCH_ENDPOINT='http://web-search.tinymce-ai.svc.cluster.local:4001/search' +---- + +Store `MCP_SERVERS` and `WEBSEARCH_HEADERS` in a Kubernetes Secret and mount them as environment variables in the AI service Deployment. This avoids exposing credentials in the Deployment manifest and allows rotation without redeploying. + +For production clusters, the MCP server Deployments should have dedicated resource requests, liveness probes, and replica counts independent of the AI service. See xref:tinymceai-on-premises-production.adoc[Production deployment] for general Kubernetes patterns. + + + +[[web-search-troubleshooting]] +== Troubleshooting + +[cols="1,2",options="header"] +|=== +|Symptom |Resolution + +|Web search or web scraping features do not appear at all +|`WEBSEARCH_ENABLED` and `WEBRESOURCES_ENABLED` both default to `false`. Set them to `true` in the AI service environment and provide the corresponding `_ENDPOINT` values. Without these flags enabled, the AI service disables all web integration features regardless of JWT permissions or MODELS configuration. + +|Web search toggle is grayed out in the editor +|The JWT is missing the `ai:conversations:webSearch` permission. Add it to the `auth.ai.permissions` array in the token (or use the wildcard `ai:conversations:*`). See xref:tinymceai-on-premises-jwt.adoc#permissions-reference[JWT permissions]. + +|Web search is toggled on but the model never uses it +|The per-message request body is missing `"capabilities": {"webSearch": {}}`. The TinyMCE editor sends this when the toggle is active. Custom integrations must include it in each request body. See <>. + +|`ERR_INVALID_HTTP_TOKEN` in AI service logs related to web search +|`WEBSEARCH_HEADERS` is set as a JSON object. The service expects colon-CSV format: `Header-Name: value, Another: value`. + +|`ECONNREFUSED` when connecting to MCP server +|The MCP server is bound to `127.0.0.1`. Bind to `0.0.0.0` instead, or run the MCP server as a sibling container on the same Docker network (or Kubernetes namespace). + +|`[WARN] MCP client health check ping failed, evicting` every ~60 seconds +|Non-functional. The AI service re-handshakes with the MCP server after each eviction. Tool calls continue to work. This log volume is expected in steady state and does not indicate a problem. + +|Web search returns "no results" but the search back end is running +|Check AI service container logs for `[WARN] Web search request failed`. When the `WEBSEARCH_ENDPOINT` returns a 5xx error, the service logs a warning but emits an empty `event: web-search` to the client with no error payload. The model proceeds as if zero results were returned. + +|`422 web-resource-download-error` from the web-resources endpoint +|This single error code covers multiple failure modes: the scrape endpoint returned 5xx, returned non-JSON, returned an empty `data` field, or returned an unsupported `type` (anything other than `text/html` or `text/markdown`). Check AI service container logs for the underlying cause. +|=== + + + == See also * xref:tinymceai-on-premises-providers.adoc[LLM providers]: provider configuration and the `MODELS` catalog diff --git a/modules/ROOT/pages/tinymceai-on-premises-production.adoc b/modules/ROOT/pages/tinymceai-on-premises-production.adoc index b451e92bcd..efe2e53a6c 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-production.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-production.adoc @@ -16,6 +16,7 @@ Before deploying to production, confirm each item: . Container image pulled and registry credentials stored as a secret. . Reverse proxy with TLS termination and `proxy_buffering off` for SSE. . Environment and access key created through the Management Panel. +. (Optional) xref:tinymceai-on-premises-mcp.adoc#mcp-kubernetes[MCP and web integrations]: MCP servers and web endpoints deployed as sibling Services if using tool calling or web search. == Architecture overview From 87be074baa081784bdb4dea982d10cc712afdc13 Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Mon, 25 May 2026 15:30:18 +1000 Subject: [PATCH 25/48] Fix WEBSEARCH_HEADERS description in reference, add web search note to getting-started - Reference page: correct WEBSEARCH_HEADERS from "JSON object" to colon-CSV format with cross-reference to MCP page - Getting started: add NOTE about enabling web search with link to full MCP/web configuration --- modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc | 2 ++ modules/ROOT/pages/tinymceai-on-premises-reference.adoc | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc index 04f2e4aa14..9cdb2472ce 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc @@ -197,6 +197,8 @@ docker run --init -d -p 8000:8000 \ TIP: The network name is typically `_default` (e.g., `tinymce-ai-onpremise_default` for the folder created above). Run `docker network ls` to confirm the exact name — Docker Compose versions format it differently. For multiple LLM providers, extend the `PROVIDERS` JSON: `{"openai":{...},"anthropic":{...}}`. +NOTE: The launch command above starts the AI service with basic conversation support. To enable *web search* in conversations, add `WEBSEARCH_ENABLED='true'` and `WEBSEARCH_ENDPOINT` (pointing to a search backend) to the `docker run` command. See xref:tinymceai-on-premises-mcp.adoc#web-scraping-and-search[Web scraping and web search] for the full configuration, endpoint contracts, and a SerpAPI example. + For Podman, replace `docker run` with `podman run` and use a Podman pod instead of a compose network. See xref:tinymceai-on-premises-production.adoc[Production deployment] for Podman-specific guidance. See xref:tinymceai-on-premises-production.adoc#_podman_deployment[Podman deployment] for a full example. For native databases (the database runs on the host or in a managed service rather than in Docker), drop the `--network` flag and set `DATABASE_HOST=host.docker.internal` (Docker Desktop and Podman 4{plus}). On native Linux Docker, additionally pass `--add-host=host.docker.internal:host-gateway`. diff --git a/modules/ROOT/pages/tinymceai-on-premises-reference.adoc b/modules/ROOT/pages/tinymceai-on-premises-reference.adoc index d4d1529469..28bced4e7a 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-reference.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-reference.adoc @@ -62,7 +62,7 @@ Alphabetized. Required-ness is marked relative to a minimum working deployment. |`WEBRESOURCES_REQUEST_TIMEOUT` |No |- |Scraper request timeout in ms. |`WEBSEARCH_ENABLED` |No |`false` |Enable web search forwarding. See xref:tinymceai-on-premises-mcp.adoc#web-scraping-and-search[Web scraping and web search]. |`WEBSEARCH_ENDPOINT` |If web search enabled |- |Search URL. -|`WEBSEARCH_HEADERS` |No |- |JSON object; extra headers sent to the search endpoint. +|`WEBSEARCH_HEADERS` |No |- |Colon-CSV format (`Header-Name: value, Another: value`). Extra headers sent to the search endpoint. Do not use JSON — see xref:tinymceai-on-premises-mcp.adoc#web-scraping-and-search[Web scraping and web search]. |`WEBSEARCH_REQUEST_TIMEOUT` |No |- |Search request timeout in ms. |=== From 4a01875ed571b734063e238817be1ed16c981938 Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Mon, 25 May 2026 16:04:30 +1000 Subject: [PATCH 26/48] Address PR #4142 review comments - Fix wildcard (*) rendering bug in JWT permissions table - Clarify ENVIRONMENTS_MANAGEMENT_SECRET_KEY description - Simplify filesystem storage IMPORTANT admonition - Un-collapse PostgreSQL compose file for consistency - Promote MySQL version pinning to WARNING admonition - Align Redis description with overview diagram label --- modules/ROOT/pages/tinymceai-on-premises-database.adoc | 9 +++------ modules/ROOT/pages/tinymceai-on-premises-jwt.adoc | 4 ++-- modules/ROOT/pages/tinymceai-on-premises.adoc | 2 +- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/modules/ROOT/pages/tinymceai-on-premises-database.adoc b/modules/ROOT/pages/tinymceai-on-premises-database.adoc index c7c72cecbc..2d99eda59a 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-database.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-database.adoc @@ -6,7 +6,7 @@ This page covers the *data layer*: the SQL database, Redis, and file storage. These components must be running and accessible before the AI service container can start — the service connects to them on boot and will not proceed without them. * *SQL database*: stores persistent data such as configurations, conversations, files, and documents. -* *Redis*: handles temporary data and inter-instance communication for scaling. When multiple application instances are running, Redis ensures that data is shared correctly across all of them. +* *Redis*: caching and coordination (SSE delivery, rate limits, pub/sub). Enables the AI service to remain stateless. * *File storage*: stores uploaded files and documents. Configure the data layer first, then proceed to xref:tinymceai-on-premises-providers.adoc[LLM providers] and xref:tinymceai-on-premises-jwt.adoc[JWT authentication]. For container runtimes, reverse proxies, Transport Layer Security (TLS), Kubernetes, and ECS deployment, see the xref:tinymceai-on-premises-production.adoc[Production deployment guide]. @@ -85,7 +85,7 @@ TIP: Pin specific major versions for all data layer images (`mysql:8.0`, `postgr [[mysql-version-pinning]] === MySQL -Do *not* use `mysql:8`. That tag now floats to the latest MySQL, which removes the `default-authentication-plugin=mysql_native_password` startup flag the AI service relies on. The container crashloops with: +WARNING: Do *not* use `mysql:8`. That tag now floats to the latest MySQL, which removes the `default-authentication-plugin=mysql_native_password` startup flag the AI service relies on. The container crashloops with: .... [ERROR] [MY-000067] [Server] unknown variable 'default-authentication-plugin=mysql_native_password'. @@ -206,8 +206,6 @@ volumes: ---- .PostgreSQL compose file -[%collapsible] -==== [source,yaml] ---- services: @@ -240,7 +238,6 @@ services: volumes: pg_data: ---- -==== If using PostgreSQL and not using `DATABASE_SCHEMA=public`, after `docker compose up -d`, create the schema: @@ -620,7 +617,7 @@ STORAGE_DRIVER=filesystem STORAGE_LOCATION=/tmp/ai-storage ---- -IMPORTANT: The container runs as a non-root user and cannot write under `/var`. Mount a writable volume and point `STORAGE_LOCATION` at the mount point: `-v ./ai-storage:/tmp/ai-storage`. +IMPORTANT: The AI service container runs as a non-root user. Mount a writable volume and point `STORAGE_LOCATION` at the mount path (for example, `-v ./ai-storage:/tmp/ai-storage`). === Database diff --git a/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc b/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc index 2abe27acd6..7acaf40ddb 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc @@ -52,7 +52,7 @@ The API Secret is generated when creating an access key inside an environment, i |=== |Credential |Purpose |Used by |*API Secret* |Signs user-facing JWTs presented to the AI runtime endpoints. Created per access key inside an environment. |The application token endpoint. Never appears in any management call. -|*`ENVIRONMENTS_MANAGEMENT_SECRET_KEY`* |Signs Management Panel logins. Set as an environment variable on the AI service container. |The Management Panel UI. +|*`ENVIRONMENTS_MANAGEMENT_SECRET_KEY`* |Password to log in to the Management Panel. Set as an environment variable on the AI service container. |The Management Panel UI. |=== These two credentials are unrelated. Using one in place of the other produces `invalid-jwt-signature`. @@ -70,7 +70,7 @@ Every token MUST contain the following claims. |`iat` |number |Issued-at, seconds since epoch (UTC). |`exp` |number |Expiry, seconds since epoch (UTC). Recommend `iat {plus} 3600` for demos, `iat {plus} 900` for production. The server applies 60 seconds of clock-skew leeway; tokens up to 60 seconds past `exp` still verify. |`sub` |string |Unique, stable user identifier. Conversation history is isolated per-`sub`; do not reuse one `sub` across users or conversations will leak between them. -|`auth.ai.permissions` |`string[]` |Array of feature permission strings. See the permissions reference below. Wildcards (`*`) are accepted only in the documented positions; the bare string `"*"` is rejected. +|`auth.ai.permissions` |`string[]` |Array of feature permission strings. See the permissions reference below. Wildcards (pass:[*]) are accepted only in the documented positions; the bare string `"*"` is rejected. |=== == Optional claims diff --git a/modules/ROOT/pages/tinymceai-on-premises.adoc b/modules/ROOT/pages/tinymceai-on-premises.adoc index f95d24f1e1..553b65d778 100644 --- a/modules/ROOT/pages/tinymceai-on-premises.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises.adoc @@ -15,7 +15,7 @@ The infrastructure consists of three layers: * The *application layer* runs the AI service container and communicates with LLM providers to process AI requests. It may consist of one or more instances behind a load balancer (round-robin recommended). Each instance runs the same stateless container image. * The *data layer* consists of a SQL database, a Redis instance, and file storage: ** *SQL database*: stores persistent data: configurations, conversations, files, and documents. -** *Redis*: handles temporary data and inter-instance communication for scaling. When multiple instances are running, Redis ensures data is shared correctly across all of them. +** *Redis*: caching and coordination (SSE delivery, rate limits, pub/sub). Enables the AI service to remain stateless. ** *File storage*: stores uploaded files and documents (S3, Azure Blob, filesystem, or the database itself). [.text-center] From a665e32790344140bd3566f618ecb88e78df8fa4 Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Mon, 25 May 2026 16:07:35 +1000 Subject: [PATCH 27/48] Fix wildcard escape to preserve monospace formatting Use `+*+` instead of pass:[*] so the asterisk renders in monospace as intended. --- modules/ROOT/pages/tinymceai-on-premises-jwt.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc b/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc index 7acaf40ddb..a62701969d 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc @@ -70,7 +70,7 @@ Every token MUST contain the following claims. |`iat` |number |Issued-at, seconds since epoch (UTC). |`exp` |number |Expiry, seconds since epoch (UTC). Recommend `iat {plus} 3600` for demos, `iat {plus} 900` for production. The server applies 60 seconds of clock-skew leeway; tokens up to 60 seconds past `exp` still verify. |`sub` |string |Unique, stable user identifier. Conversation history is isolated per-`sub`; do not reuse one `sub` across users or conversations will leak between them. -|`auth.ai.permissions` |`string[]` |Array of feature permission strings. See the permissions reference below. Wildcards (pass:[*]) are accepted only in the documented positions; the bare string `"*"` is rejected. +|`auth.ai.permissions` |`string[]` |Array of feature permission strings. See the permissions reference below. Wildcards (`+*+`) are accepted only in the documented positions; the bare string `"*"` is rejected. |=== == Optional claims From 4a65164dd7388452fe97e9d86bef82beb2a514f4 Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Mon, 25 May 2026 16:30:19 +1000 Subject: [PATCH 28/48] Update diagrams per Tim's feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - JWT sequence: label step 2 as tinymceai_token_provider callback rather than inventing a specific endpoint - Overview flowchart: move token endpoint into Application layer, route SSE response back through load balancer, simplify LB↔AI to bidirectional HTTP/SSE edge --- .../tinymceai-on-premises/jwt-authentication-fig-1.mmd | 2 +- .../tinymceai-on-premises/jwt-authentication-fig-1.svg | 2 +- .../ROOT/images/tinymceai-on-premises/overview-fig-2.mmd | 8 ++++---- .../ROOT/images/tinymceai-on-premises/overview-fig-2.svg | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/ROOT/images/tinymceai-on-premises/jwt-authentication-fig-1.mmd b/modules/ROOT/images/tinymceai-on-premises/jwt-authentication-fig-1.mmd index 587af33a91..45423c4c7f 100644 --- a/modules/ROOT/images/tinymceai-on-premises/jwt-authentication-fig-1.mmd +++ b/modules/ROOT/images/tinymceai-on-premises/jwt-authentication-fig-1.mmd @@ -7,7 +7,7 @@ sequenceDiagram participant LLM as LLM provider User->>Editor: Triggers an AI feature - Editor->>App: POST /api/ai-token
session cookie or Bearer + Editor->>App: tinymceai_token_provider callback
(e.g. POST /api/ai-token) App->>App: Authenticate the user Note over App: Sign HS256 JWT with API Secret
aud = environment ID
sub = user ID
auth.ai.permissions = [...] App-->>Editor: { "token": "eyJ..." } diff --git a/modules/ROOT/images/tinymceai-on-premises/jwt-authentication-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/jwt-authentication-fig-1.svg index 8cd0598928..f9d9f82201 100644 --- a/modules/ROOT/images/tinymceai-on-premises/jwt-authentication-fig-1.svg +++ b/modules/ROOT/images/tinymceai-on-premises/jwt-authentication-fig-1.svg @@ -1 +1 @@ -LLM providerAI serviceYour application backend(token endpoint)TinyMCE editor(tinymceai plugin)LLM providerAI serviceYour application backend(token endpoint)TinyMCE editor(tinymceai plugin)Sign HS256 JWT with API Secretaud = environment IDsub = user IDauth.ai.permissions = [...]alt[Token valid and permissions allow the action][Signature does not match][aud is not registered with AI runtime][Past expiry plus 60s leeway][Permissions do not cover the action]UserTriggers an AI feature1POST /api/ai-tokensession cookie or Bearer2Authenticate the user3{ "token": "eyJ..." }4POST /v1/conversations/{id}/messagesAuthorization: Bearer eyJ...5Verify HS256 signaturecheck aud, exp, permissions6Forward prompt7Stream response chunks8SSE: text-delta events9401 invalid-jwt-signature10401 invalid-jwt-payload11401 invalid-jwt12200 with allowed:false13User \ No newline at end of file +LLM providerAI serviceYour application backend(token endpoint)TinyMCE editor(tinymceai plugin)LLM providerAI serviceYour application backend(token endpoint)TinyMCE editor(tinymceai plugin)Sign HS256 JWT with API Secretaud = environment IDsub = user IDauth.ai.permissions = [...]alt[Token valid and permissions allow the action][Signature does not match][aud is not registered with AI runtime][Past expiry plus 60s leeway][Permissions do not cover the action]UserTriggers an AI feature1tinymceai_token_provider callback(e.g. POST /api/ai-token)2Authenticate the user3{ "token": "eyJ..." }4POST /v1/conversations/{id}/messagesAuthorization: Bearer eyJ...5Verify HS256 signaturecheck aud, exp, permissions6Forward prompt7Stream response chunks8SSE: text-delta events9401 invalid-jwt-signature10401 invalid-jwt-payload11401 invalid-jwt12200 with allowed:false13User \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/overview-fig-2.mmd b/modules/ROOT/images/tinymceai-on-premises/overview-fig-2.mmd index eafb0efc04..468a7115e8 100644 --- a/modules/ROOT/images/tinymceai-on-premises/overview-fig-2.mmd +++ b/modules/ROOT/images/tinymceai-on-premises/overview-fig-2.mmd @@ -1,11 +1,11 @@ %%{init: {'theme': 'base', 'themeVariables': { 'primaryColor': '#ECECFF', 'primaryBorderColor': '#9370DB', 'lineColor': '#333333', 'edgeLabelBackground': '#e8e8e8', 'fontSize': '16px' }, 'flowchart': { 'nodeSpacing': 40, 'rankSpacing': 80 }}}%% flowchart LR - subgraph Client["Client layer"] - Token["Token endpoint\n(back end)"] + subgraph Browser["Browser"] App["TinyMCE editor\n(browser)"] end subgraph Service["Application layer"] + Token["Token endpoint\n(your server)"] LB["Load balancer /\nreverse proxy\n(TLS termination)"] AI["AI service\n(container)"] end @@ -21,10 +21,10 @@ flowchart LR App -->|"1. request JWT"| Token Token -->|"JWT"| App App -->|"2. HTTPS"| LB - LB -->|"HTTP"| AI + LB <-->|"HTTP / SSE"| AI AI -->|"3. HTTPS"| LLM LLM -->|"4. stream"| AI - AI -->|"5. SSE response"| App + LB -->|"5. SSE"| App AI <-->|"read/write"| DB AI <-->|"read/write"| Redis AI <-->|"read/write"| Storage diff --git a/modules/ROOT/images/tinymceai-on-premises/overview-fig-2.svg b/modules/ROOT/images/tinymceai-on-premises/overview-fig-2.svg index c02a9150a2..f53b81026e 100644 --- a/modules/ROOT/images/tinymceai-on-premises/overview-fig-2.svg +++ b/modules/ROOT/images/tinymceai-on-premises/overview-fig-2.svg @@ -1 +1 @@ -

Data layer

Application layer

Client layer

1. request JWT

JWT

2. HTTPS

HTTP

3. HTTPS

4. stream

5. SSE response

read/write

read/write

read/write

Token endpoint
(back end)

TinyMCE editor
(browser)

Load balancer /
reverse proxy
(TLS termination)

AI service
(container)

SQL database
(conversations, configs)

Redis
(cache, coordination)

File storage
(uploads, documents)

LLM provider

\ No newline at end of file +

Data layer

Application layer

Browser

1. request JWT

JWT

2. HTTPS

HTTP / SSE

3. HTTPS

4. stream

5. SSE

read/write

read/write

read/write

TinyMCE editor
(browser)

Token endpoint
(your server)

Load balancer /
reverse proxy
(TLS termination)

AI service
(container)

SQL database
(conversations, configs)

Redis
(cache, coordination)

File storage
(uploads, documents)

LLM provider

\ No newline at end of file From 0811e63e454d495b8ca031c0a7fa457cbc6eceb8 Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Mon, 25 May 2026 16:31:46 +1000 Subject: [PATCH 29/48] Align overview prose with updated diagram layout Token endpoint is now in the Application layer (not client layer), matching the revised overview-fig-2 diagram structure. --- modules/ROOT/pages/tinymceai-on-premises.adoc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/ROOT/pages/tinymceai-on-premises.adoc b/modules/ROOT/pages/tinymceai-on-premises.adoc index 553b65d778..7207c3a0e5 100644 --- a/modules/ROOT/pages/tinymceai-on-premises.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises.adoc @@ -11,15 +11,15 @@ The service ships as a single Open Container Initiative (OCI) container image (` The infrastructure consists of three layers: -* The *client layer* runs in the browser (TinyMCE editor with the `tinymceai` plugin) and in the application back end (token endpoint that signs JWTs). -* The *application layer* runs the AI service container and communicates with LLM providers to process AI requests. It may consist of one or more instances behind a load balancer (round-robin recommended). Each instance runs the same stateless container image. +* The *browser* runs the TinyMCE editor with the `tinymceai` plugin. +* The *application layer* runs the token endpoint (which signs JWTs), the AI service container, and a load balancer or reverse proxy. It may consist of one or more AI service instances behind the load balancer (round-robin recommended). Each instance runs the same stateless container image. * The *data layer* consists of a SQL database, a Redis instance, and file storage: ** *SQL database*: stores persistent data: configurations, conversations, files, and documents. ** *Redis*: caching and coordination (SSE delivery, rate limits, pub/sub). Enables the AI service to remain stateless. ** *File storage*: stores uploaded files and documents (S3, Azure Blob, filesystem, or the database itself). [.text-center] -image::tinymceai-on-premises/overview-fig-2.svg[alt="Deployment architecture showing data layer, application layer, client layer, and LLM provider connections",width=100%] +image::tinymceai-on-premises/overview-fig-2.svg[alt="Deployment architecture showing browser, application layer, data layer, and LLM provider connections",width=100%] === Data flow From c70ca7a2157cc952f261bab055702a4c2544ce7a Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Mon, 25 May 2026 16:38:27 +1000 Subject: [PATCH 30/48] Apply Ben's suggestions: shorten SQL description, simplify setup intro - Overview: SQL database bullet shortened to match actual storage scope - Database: simplify setup path intro sentence per Ben's suggestion --- modules/ROOT/pages/tinymceai-on-premises-database.adoc | 2 +- modules/ROOT/pages/tinymceai-on-premises.adoc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/ROOT/pages/tinymceai-on-premises-database.adoc b/modules/ROOT/pages/tinymceai-on-premises-database.adoc index 2d99eda59a..a1c59fd795 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-database.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-database.adoc @@ -37,7 +37,7 @@ The AI service supports both MySQL and PostgreSQL equally. Pick whichever the op == Choosing a setup path -Use Docker Compose for evaluation, managed cloud services (Amazon RDS, Cloud SQL, Azure Database) for production. All paths produce the same end state: a running database the AI service can connect to. Both MySQL and PostgreSQL are supported in every configuration. +Use Docker Compose for evaluation or managed cloud services (Amazon RDS, Cloud SQL, Azure Database) for production. [.text-center] image::tinymceai-on-premises/database-setup-fig-1.svg[Database setup decision tree: local Docker Compose vs managed cloud database for evaluation and production,width=100%] diff --git a/modules/ROOT/pages/tinymceai-on-premises.adoc b/modules/ROOT/pages/tinymceai-on-premises.adoc index 7207c3a0e5..43c3a2d4b4 100644 --- a/modules/ROOT/pages/tinymceai-on-premises.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises.adoc @@ -14,7 +14,7 @@ The infrastructure consists of three layers: * The *browser* runs the TinyMCE editor with the `tinymceai` plugin. * The *application layer* runs the token endpoint (which signs JWTs), the AI service container, and a load balancer or reverse proxy. It may consist of one or more AI service instances behind the load balancer (round-robin recommended). Each instance runs the same stateless container image. * The *data layer* consists of a SQL database, a Redis instance, and file storage: -** *SQL database*: stores persistent data: configurations, conversations, files, and documents. +** *SQL database*: stores persistent data such as configurations and conversations. ** *Redis*: caching and coordination (SSE delivery, rate limits, pub/sub). Enables the AI service to remain stateless. ** *File storage*: stores uploaded files and documents (S3, Azure Blob, filesystem, or the database itself). From 2f82631b3f473c6eb416489930c67eaa38565cff Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Mon, 25 May 2026 21:08:00 +1000 Subject: [PATCH 31/48] Re-render SVGs with intrinsic pixel widths Run the render script to replace width="100%" with actual pixel widths from viewBox. Fixes diagrams rendering too small in img tags. --- .../ROOT/images/tinymceai-on-premises/complete-guide-fig-1.svg | 2 +- .../ROOT/images/tinymceai-on-premises/complete-guide-fig-2.svg | 2 +- .../ROOT/images/tinymceai-on-premises/complete-guide-fig-3.svg | 2 +- .../ROOT/images/tinymceai-on-premises/database-setup-fig-1.svg | 2 +- .../images/tinymceai-on-premises/jwt-authentication-fig-1.svg | 2 +- .../tinymceai-on-premises/mcp-web-integrations-architecture.svg | 2 +- modules/ROOT/images/tinymceai-on-premises/overview-fig-1.svg | 2 +- modules/ROOT/images/tinymceai-on-premises/overview-fig-2.svg | 2 +- .../ROOT/images/tinymceai-on-premises/providers-guide-fig-1.svg | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.svg index a0b8e21e11..8a9a4ca34a 100644 --- a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.svg +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.svg @@ -1 +1 @@ -

Shared data layer

Application layer (stateless, +N replicas)

HTTPS

HTTPS + Bearer JWT

HTTP

read/write

read/write

read/write

HTTPS

telemetry

tool calls

Browser
TinyMCE editor + tinymceai plugin

Your token endpoint
signs HS256 JWTs

Reverse proxy / Load balancer
nginx · ALB · K8s Ingress
TLS termination · SSE pass-through

ai-service

SQL database
MySQL 8.0+ / PostgreSQL 13+

Redis 3.2.6+

File storage
S3 · Azure Blob · filesystem

LLM provider
OpenAI · Anthropic · Google ·
Azure · Bedrock · Vertex ·
self-hosted

OpenTelemetry · Langfuse

MCP servers

\ No newline at end of file +Shared data layerApplication layer (stateless, +N replicas)HTTPSHTTPS + Bearer JWTHTTPread/writeread/writeread/writeHTTPStelemetrytool callsBrowserTinyMCE editor + tinymceaipluginYour token endpointsigns HS256 JWTsReverse proxy / Loadbalancernginx · ALB · K8s IngressTLS termination · SSEpass-throughai-serviceSQL databaseMySQL 8.0+ / PostgreSQL13+Redis 3.2.6+File storageS3 · Azure Blob · filesystemLLM providerOpenAI · Anthropic ·Google ·Azure · Bedrock · Vertex ·self-hostedOpenTelemetry · LangfuseMCP servers \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.svg b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.svg index f675af7f46..ee941af931 100644 --- a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.svg +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.svg @@ -1 +1 @@ -

Evaluating locally

Production

Kubernetes

AWS ECS / Fargate

Docker / Podman on VMs

Managed cloud DB

Self-managed

New deployment

Evaluating or
going to production?

Docker Compose
all services on one host
Getting started guide

Container orchestrator?

Kubernetes deployment
Production guide

ECS task definition
Production guide

Docker or Podman compose
Database guide

Database?

RDS · Cloud SQL ·
Azure Database

Containers or native install

Continue with
LLM providers guide

\ No newline at end of file +Evaluating locallyProductionKubernetesAWS ECS / FargateDocker / Podman on VMsManaged cloud DBSelf-managedNew deploymentEvaluating orgoing to production?Docker Composeall services on one hostGetting started guideContainer orchestrator?Kubernetes deploymentProduction guideECS task definitionProduction guideDocker or Podman composeDatabase guideDatabase?RDS · Cloud SQL ·Azure DatabaseContainers or native installContinue withLLM providers guide \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-3.svg b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-3.svg index 47f6946e61..7462a1f91f 100644 --- a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-3.svg +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-3.svg @@ -1 +1 @@ -scalesscalesscalesInternetIngress controllernginx-ingress · ALBcontrollerproxy-buffering offService: ai-servicePod: ai-service replica 1Pod: ai-service replica 2Pod: ai-service replica NService: databaseor external RDSService: redisor external ElastiCacheS3 / Azure BlobHorizontalPodAutoscaler \ No newline at end of file +scalesscalesscalesInternetIngress controllernginx-ingress · ALBcontrollerproxy-buffering offService: ai-servicePod: ai-service replica 1Pod: ai-service replica 2Pod: ai-service replica NService: databaseor external RDSService: redisor external ElastiCacheS3 / Azure BlobHorizontalPodAutoscaler \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/database-setup-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/database-setup-fig-1.svg index 8deafa3b8d..f7abff0aa7 100644 --- a/modules/ROOT/images/tinymceai-on-premises/database-setup-fig-1.svg +++ b/modules/ROOT/images/tinymceai-on-premises/database-setup-fig-1.svg @@ -1 +1 @@ -

Evaluating locally

Deploying

Cloud / managed services

Self-managed

Docker or Podman

Native on host

Where will MySQL/Postgres and Redis run?

Evaluating or
deploying to prod?

Docker Compose
image: mysql:8.0 or postgres:16
+ redis:7

Cloud or self-managed?

AWS RDS · Cloud SQL ·
Azure Database
+ ElastiCache · Memorystore ·
Azure Cache for Redis

Container runtime
available?

Containers on the same
network or pod as ai-service

Native install
brew · apt · yum · dnf
AI service connects via
host.docker.internal

Verify: nc -zv host port
then start ai-service

\ No newline at end of file +Evaluating locallyDeployingCloud / managed servicesSelf-managedDocker or PodmanNative on hostWhere will MySQL/Postgresand Redis run?Evaluating ordeploying to prod?Docker Composeimage: mysql:8.0 orpostgres:16+ redis:7Cloud or self-managed?AWS RDS · Cloud SQL ·Azure Database+ ElastiCache ·Memorystore ·Azure Cache for RedisContainer runtimeavailable?Containers on the samenetwork or pod asai-serviceNative installbrew · apt · yum · dnfAI service connects viahost.docker.internalVerify: nc -zv host portthen start ai-service \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/jwt-authentication-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/jwt-authentication-fig-1.svg index f9d9f82201..f7bf476ece 100644 --- a/modules/ROOT/images/tinymceai-on-premises/jwt-authentication-fig-1.svg +++ b/modules/ROOT/images/tinymceai-on-premises/jwt-authentication-fig-1.svg @@ -1 +1 @@ -LLM providerAI serviceYour application backend(token endpoint)TinyMCE editor(tinymceai plugin)LLM providerAI serviceYour application backend(token endpoint)TinyMCE editor(tinymceai plugin)Sign HS256 JWT with API Secretaud = environment IDsub = user IDauth.ai.permissions = [...]alt[Token valid and permissions allow the action][Signature does not match][aud is not registered with AI runtime][Past expiry plus 60s leeway][Permissions do not cover the action]UserTriggers an AI feature1tinymceai_token_provider callback(e.g. POST /api/ai-token)2Authenticate the user3{ "token": "eyJ..." }4POST /v1/conversations/{id}/messagesAuthorization: Bearer eyJ...5Verify HS256 signaturecheck aud, exp, permissions6Forward prompt7Stream response chunks8SSE: text-delta events9401 invalid-jwt-signature10401 invalid-jwt-payload11401 invalid-jwt12200 with allowed:false13User \ No newline at end of file +LLM providerAI serviceYour application backend(token endpoint)TinyMCE editor(tinymceai plugin)LLM providerAI serviceYour application backend(token endpoint)TinyMCE editor(tinymceai plugin)Sign HS256 JWT with API Secretaud = environment IDsub = user IDauth.ai.permissions = [...]alt[Token valid and permissions allow the action][Signature does not match][aud is not registered with AI runtime][Past expiry plus 60s leeway][Permissions do not cover the action]UserTriggers an AI feature1tinymceai_token_provider callback(e.g. POST /api/ai-token)2Authenticate the user3{ "token": "eyJ..." }4POST /v1/conversations/{id}/messagesAuthorization: Bearer eyJ...5Verify HS256 signaturecheck aud, exp, permissions6Forward prompt7Stream response chunks8SSE: text-delta events9401 invalid-jwt-signature10401 invalid-jwt-payload11401 invalid-jwt12200 with allowed:false13User \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/mcp-web-integrations-architecture.svg b/modules/ROOT/images/tinymceai-on-premises/mcp-web-integrations-architecture.svg index 04bb6dc202..dd0b79ebe3 100644 --- a/modules/ROOT/images/tinymceai-on-premises/mcp-web-integrations-architecture.svg +++ b/modules/ROOT/images/tinymceai-on-premises/mcp-web-integrations-architecture.svg @@ -1 +1 @@ -

conversations (SSE)
HTTP / HTTPS

inference
HTTPS

MCP tools/call
HTTP

read

search query
HTTP

HTTPS

scrape request
HTTP

HTTP/S

TinyMCE editor

AI Service
(on-premises)

LLM Provider
OpenAI · Anthropic · Google

MCP Server(s)
Streamable HTTP

Confluence ·
Notion · GitBook ·
internal wiki

Web Search
WEBSEARCH_ENDPOINT

Search API
SerpAPI · Brave · etc.

Web Scrape
WEBRESOURCES_ENDPOINT

Web pages

\ No newline at end of file +conversations (SSE)HTTP / HTTPSinferenceHTTPSMCP tools/callHTTPreadsearch queryHTTPHTTPSscrape requestHTTPHTTP/STinyMCE editorAI Service(on-premises)LLM ProviderOpenAI · Anthropic ·GoogleMCP Server(s)Streamable HTTPConfluence ·Notion · GitBook ·internal wikiWeb SearchWEBSEARCH_ENDPOINTSearch APISerpAPI · Brave · etc.Web ScrapeWEBRESOURCES_ENDPOINTWeb pages \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/overview-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/overview-fig-1.svg index 707b6d7e1a..94453220a9 100644 --- a/modules/ROOT/images/tinymceai-on-premises/overview-fig-1.svg +++ b/modules/ROOT/images/tinymceai-on-premises/overview-fig-1.svg @@ -1 +1 @@ -1. fetch JWT2. prompt + JWT3. forward prompt4. stream response5. SSE streamClientapplicationTokenendpointAI service(container)LLMproviderDatabase+ Redis \ No newline at end of file +1. fetch JWT2. prompt + JWT3. forward prompt4. stream response5. SSE streamClientapplicationTokenendpointAI service(container)LLMproviderDatabase+ Redis \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/overview-fig-2.svg b/modules/ROOT/images/tinymceai-on-premises/overview-fig-2.svg index f53b81026e..19093ba3d5 100644 --- a/modules/ROOT/images/tinymceai-on-premises/overview-fig-2.svg +++ b/modules/ROOT/images/tinymceai-on-premises/overview-fig-2.svg @@ -1 +1 @@ -

Data layer

Application layer

Browser

1. request JWT

JWT

2. HTTPS

HTTP / SSE

3. HTTPS

4. stream

5. SSE

read/write

read/write

read/write

TinyMCE editor
(browser)

Token endpoint
(your server)

Load balancer /
reverse proxy
(TLS termination)

AI service
(container)

SQL database
(conversations, configs)

Redis
(cache, coordination)

File storage
(uploads, documents)

LLM provider

\ No newline at end of file +Data layerApplication layerBrowser1. request JWTJWT2. HTTPSHTTP / SSE3. HTTPS4. stream5. SSEread/writeread/writeread/writeTinyMCE editor(browser)Token endpoint(your server)Load balancer /reverse proxy(TLS termination)AI service(container)SQL database(conversations, configs)Redis(cache, coordination)File storage(uploads, documents)LLM provider \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.svg index 05e6209293..bd3f9aceda 100644 --- a/modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.svg +++ b/modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.svg @@ -1 +1 @@ -

PROVIDERS env var · JSON object

MODELS env var · JSON array

JWT auth.ai.permissions

gates access

gates access

gates access

provider key

provider key

provider key

ai:models:my-openai-key:
gpt-4.1

ai:models:my-bedrock:
claude-sonnet-4

ai:models:my-ollama:
qwen3:0.6b

gpt-4.1
provider: my-openai-key

claude-sonnet-4
provider: my-bedrock

qwen3:0.6b
provider: my-ollama

my-openai-key
type: openai

my-bedrock
type: bedrock

my-ollama
type: openai-compatible

OpenAI API

AWS Bedrock

Local Ollama

\ No newline at end of file +PROVIDERS env var · JSON objectMODELS env var · JSON arrayJWT auth.ai.permissionsgates accessgates accessgates accessprovider keyprovider keyprovider keyai:models:my-openai-key:gpt-4.1ai:models:my-bedrock:claude-sonnet-4ai:models:my-ollama:qwen3:0.6bgpt-4.1provider: my-openai-keyclaude-sonnet-4provider: my-bedrockqwen3:0.6bprovider: my-ollamamy-openai-keytype: openaimy-bedrocktype: bedrockmy-ollamatype: openai-compatibleOpenAI APIAWS BedrockLocal Ollama \ No newline at end of file From e342ad4dbce960aed08e478f8f4a0beb582374a5 Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Mon, 25 May 2026 21:30:38 +1000 Subject: [PATCH 32/48] Move permissions example inline into JWT troubleshooting table Embed the correct-permissions-shape JSON directly in the table cell (as the last row) instead of an orphaned collapsible block between sections. Uses a| for AsciiDoc block content in the cell. --- .../tinymceai-on-premises-troubleshooting.adoc | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc b/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc index f66e6d2aa1..f3b300258c 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc @@ -92,19 +92,14 @@ These assume the container is running and `/health` returns OK. |Token signed with RS256 instead of HS256 |Re-sign with `algorithm: 'HS256'` and the API Secret. See xref:tinymceai-on-premises-jwt.adoc[JWT authentication]. -|`allowed: false` on every endpoint -|`auth.ai.permissions` is a string, shorthand, or wrong shape -|Use the explicit array form. See the <> below. - |`409 conversation in use` then `404 conversation does not exist` |Stream abort left temporary state blocking the conversation |Start a new conversation or reload the page. Custom UIs should create a fresh conversation after cancel. -|=== -[[correct-permissions-shape]] -.Correct permissions shape -[%collapsible] -==== +|`allowed: false` on every endpoint +|`auth.ai.permissions` is a string, shorthand, or wrong shape +a|Use the explicit array form: + [source,json] ---- { @@ -121,8 +116,8 @@ These assume the container is running and `/health` returns OK. } ---- -Common mistakes that produce `allowed: false`: `"permissions": "ai:admin"` (string shorthand), `"permissions": "*"`, `"useAllFeatures": true`, or a single permission as a string instead of an array. See xref:tinymceai-on-premises-jwt.adoc[JWT authentication] for the full permission catalog. -==== +Common mistakes: `"permissions": "ai:admin"` (string shorthand), `"permissions": "*"`, `"useAllFeatures": true`, or a single permission as a string instead of an array. See xref:tinymceai-on-premises-jwt.adoc[JWT authentication] for the full permission catalog. +|=== [[llm-provider-errors]] From 336cfda759ff065bb242fb047afe8fdc6019ce6b Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Tue, 26 May 2026 14:02:59 +1000 Subject: [PATCH 33/48] Normalize remaining PASTE_X_HERE placeholders to format --- .../ROOT/pages/tinymceai-on-premises-getting-started.adoc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc index 9cdb2472ce..33825f8f50 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc @@ -138,8 +138,8 @@ OPENAI_API_KEY= # GOOGLE_API_KEY= # --- Filled in after creating an environment (see "Create an environment and access key" below). Used by the token server, not the AI service. --- -AI_ENV_ID= -AI_API_SECRET= +AI_ENV_ID= +AI_API_SECRET= ---- IMPORTANT: `LICENSE_KEY` and `TINYMCE_API_KEY` are different credentials. `LICENSE_KEY` is the long string from the account representative that activates the AI service. `TINYMCE_API_KEY` is the short string from the tiny.cloud dashboard used to load TinyMCE from the CDN — it is not required for self-hosted editor bundles. See the xref:tinymceai-on-premises.adoc#_credentials[Credentials] section on the Overview page. @@ -248,8 +248,8 @@ Update `.env` with the new values: [source,bash] ---- -AI_ENV_ID=PASTE_ENVIRONMENT_ID_HERE -AI_API_SECRET=PASTE_API_SECRET_HERE +AI_ENV_ID= +AI_API_SECRET= ---- [IMPORTANT] From b09637e102eb6393e4965861271d5fbf9f7944d5 Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Tue, 26 May 2026 19:27:35 +1000 Subject: [PATCH 34/48] Add web-resources endpoint to API reference table --- modules/ROOT/pages/tinymceai-on-premises-reference.adoc | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/ROOT/pages/tinymceai-on-premises-reference.adoc b/modules/ROOT/pages/tinymceai-on-premises-reference.adoc index 28bced4e7a..b83eaee4c1 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-reference.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-reference.adoc @@ -112,6 +112,7 @@ For PostgreSQL, change `DATABASE_DRIVER` to `'postgres'` and add `-e DATABASE_SC |GET |`/v1/conversations` |JWT |List conversations for the current `sub`. |GET |`/v1/conversations/\{id}` |JWT |Read one conversation. |POST |`/v1/conversations/\{id}/messages` |JWT |Send a message. Returns Server-Sent Events (SSE) stream. +|POST |`/v1/conversations/\{id}/web-resources` |JWT |Fetch a web page through the configured scrape endpoint. Body: `{"url":"https://..."}`. Returns 201 on success, 422 on scrape failure. Requires `WEBRESOURCES_ENABLED='true'`. See xref:tinymceai-on-premises-mcp.adoc#web-scraping[Web scraping]. |DELETE |`/v1/conversations/\{id}` |JWT |Delete a conversation. |POST |`/v1/actions/\{actionId}` |JWT |Run a quick action. Body shape: `{"content":[{"type":"text","content":"..."}]}` (no `modelId`). |POST |`/v1/reviews/\{reviewId}` |JWT |Run a review. From a1eca3bae72a30f9d05d06d1081e2d19dccc774e Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Tue, 26 May 2026 19:36:19 +1000 Subject: [PATCH 35/48] Add reviews to LM Studio features example LM Studio natively supports tool calling for the Llama 3.1 8B Instruct GGUF model shown in the example. The omission of reviews was overly conservative and inconsistent with the vLLM and Ollama examples that use the same model family. --- modules/ROOT/pages/tinymceai-on-premises-providers.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ROOT/pages/tinymceai-on-premises-providers.adoc b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc index 780bf15624..26ccc78103 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-providers.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc @@ -823,7 +823,7 @@ a|[source,json] "id": "lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF", "provider": "lmstudio", "name": "Llama 3.1 8B (LM Studio)", - "features": ["conversations", "actions"] + "features": ["conversations", "reviews", "actions"] } ---- |=== From 74fba3977313ffd3dad44288522a38d4b94055e9 Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Tue, 26 May 2026 19:59:00 +1000 Subject: [PATCH 36/48] Use {pluginname} and {productname} attributes across on-premises pages Replace hardcoded TinyMCE AI and TinyMCE references with AsciiDoc attributes in prose. Add :pluginname:, :description_short:, and enrich :keywords: on all 10 on-premises pages. --- .../pages/tinymceai-on-premises-database.adoc | 6 ++++-- .../pages/tinymceai-on-premises-frameworks.adoc | 16 +++++++++------- .../tinymceai-on-premises-getting-started.adoc | 14 ++++++++------ .../ROOT/pages/tinymceai-on-premises-jwt.adoc | 6 ++++-- .../ROOT/pages/tinymceai-on-premises-mcp.adoc | 10 ++++++---- .../pages/tinymceai-on-premises-production.adoc | 4 +++- .../pages/tinymceai-on-premises-providers.adoc | 10 ++++++---- .../pages/tinymceai-on-premises-reference.adoc | 4 +++- .../tinymceai-on-premises-troubleshooting.adoc | 8 +++++--- modules/ROOT/pages/tinymceai-on-premises.adoc | 16 +++++++++------- 10 files changed, 57 insertions(+), 37 deletions(-) diff --git a/modules/ROOT/pages/tinymceai-on-premises-database.adoc b/modules/ROOT/pages/tinymceai-on-premises-database.adoc index a1c59fd795..146b55b1a5 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-database.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-database.adoc @@ -1,7 +1,9 @@ = Database, Redis, and infrastructure setup :navtitle: Database, Redis, and storage -:description: Database, Redis, and file storage setup for the TinyMCE AI On-Premises service -:keywords: AI, on-premises, database, MySQL, PostgreSQL, Redis, Docker, Podman, file storage, S3, Azure Blob +:description: Database, Redis, and file storage setup for the TinyMCE AI on-premises service +:description_short: Database, Redis, and file storage setup. +:keywords: AI, on-premises, database, MySQL, PostgreSQL, Redis, Docker, Podman, file storage, S3, Azure Blob, infrastructure +:pluginname: TinyMCE AI This page covers the *data layer*: the SQL database, Redis, and file storage. These components must be running and accessible before the AI service container can start — the service connects to them on boot and will not proceed without them. diff --git a/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc b/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc index 0c9a6b2d3d..119a5a0a9c 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc @@ -1,16 +1,18 @@ = TinyMCE AI on-premises: editor-side integration :navtitle: TinyMCE integration :description: Connecting the TinyMCE editor to the on-premises AI service from React, Vue, Angular, Svelte, or vanilla JavaScript. -:keywords: AI, on-premises, React, Vue, Angular, Svelte, token provider +:description_short: Editor-side AI service integration. +:keywords: AI, on-premises, React, Vue, Angular, Svelte, token provider, CORS, integration, JavaScript +:pluginname: TinyMCE AI -This page covers the *editor-side* configuration — the final step that connects the browser-based TinyMCE editor to the running AI service. At this point in the deployment process, the xref:tinymceai-on-premises-database.adoc[data layer], xref:tinymceai-on-premises-providers.adoc[LLM providers], and xref:tinymceai-on-premises-jwt.adoc[JWT token endpoint] are already operational. This page wires the editor to that infrastructure. +This page covers the *editor-side* configuration — the final step that connects the browser-based {productname} editor to the running AI service. At this point in the deployment process, the xref:tinymceai-on-premises-database.adoc[data layer], xref:tinymceai-on-premises-providers.adoc[LLM providers], and xref:tinymceai-on-premises-jwt.adoc[JWT token endpoint] are already operational. This page wires the editor to that infrastructure. It assumes: * The AI service is already running and reachable from the browser (see xref:tinymceai-on-premises-getting-started.adoc[Getting started]). * A token endpoint exists that signs HS256 JWTs (see xref:tinymceai-on-premises-jwt.adoc[JWT authentication]). -* A valid TinyMCE license key or API key with the AI feature enabled. +* A valid {productname} license key or API key with the AI feature enabled. For general framework setup (installing wrappers, component structure), see the existing integration guides: @@ -19,7 +21,7 @@ For general framework setup (installing wrappers, component structure), see the * xref:angular-cloud.adoc[Angular] * xref:svelte-cloud.adoc[Svelte] -The on-premises AI integration adds the options documented below to the standard TinyMCE `init` configuration. +The on-premises AI integration adds the options documented below to the standard {productname} `init` configuration. == Token flow @@ -86,7 +88,7 @@ The following vanilla JavaScript example contains every on-premises-specific opt ---- -Replace `/path/to/tinymce/` with the location of the self-hosted TinyMCE assets. See xref:installation.adoc[Self-hosted installation] for download and setup instructions. +Replace `/path/to/tinymce/` with the location of the self-hosted {productname} assets. See xref:installation.adoc[Self-hosted installation] for download and setup instructions. @@ -129,7 +131,7 @@ tinymceai_token_provider: () => { == Framework-specific examples -The examples below show the minimum configuration needed to connect the TinyMCE AI plugin to the on-premises service. Each uses the framework wrapper's `init` prop to pass the same options documented above. +The examples below show the minimum configuration needed to connect the {pluginname} plugin to the on-premises service. Each uses the framework wrapper's `init` prop to pass the same options documented above. === React @@ -337,7 +339,7 @@ If using the Tiny CDN instead of self-hosted assets, also add `\https://cdn.tiny |Symptom |Likely cause |Fix |Editor loads but no AI buttons appear -|`plugins` does not include `tinymceai`, or TinyMCE is version 7.x or earlier +|`plugins` does not include `tinymceai`, or {productname} is version 7.x or earlier |Add `tinymceai` to the `plugins` list and confirm the script URL uses `/tinymce/8/`. Verify the API key has the AI feature enabled. |`POST /api/ai-token` returns 401 diff --git a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc index 33825f8f50..cc2cfbc06d 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc @@ -1,15 +1,17 @@ = Getting started with TinyMCE AI on-premises :navtitle: Getting started :description: Five-minute Docker Compose quick start for TinyMCE AI on-premises service -:keywords: AI, on-premises, getting started, docker, quick start +:description_short: Five-minute Docker Compose quick start. +:keywords: AI, on-premises, getting started, Docker, quick start, Docker Compose, installation, setup +:pluginname: TinyMCE AI This guide sets up a fully working local stack in roughly five minutes on any machine with Docker: * *MySQL 8.0*: conversation history and metadata * *Redis*: caching and session state -* *TinyMCE AI service*: the on-premises AI back end +* *{pluginname} service*: the on-premises AI back end * *A minimal token server* (Node.js): signs JWTs for the editor -* *A browser page with TinyMCE*: validates the end-to-end flow +* *A browser page with {productname}*: validates the end-to-end flow The quick start is designed to validate the stack components before moving to a production deployment. Production engineers can review this section to understand the conceptual flow before continuing to xref:tinymceai-on-premises-production.adoc[Production deployment]. @@ -26,7 +28,7 @@ npm --version Additionally, have the following credentials ready: -* A TinyMCE license key and container registry credentials (from the Tiny account representative) +* A {productname} license key and container registry credentials (from the Tiny account representative) * At least one LLM provider API key (OpenAI, Anthropic, or Google) == Five-minute demo with Docker Compose @@ -142,7 +144,7 @@ AI_ENV_ID= AI_API_SECRET= ---- -IMPORTANT: `LICENSE_KEY` and `TINYMCE_API_KEY` are different credentials. `LICENSE_KEY` is the long string from the account representative that activates the AI service. `TINYMCE_API_KEY` is the short string from the tiny.cloud dashboard used to load TinyMCE from the CDN — it is not required for self-hosted editor bundles. See the xref:tinymceai-on-premises.adoc#_credentials[Credentials] section on the Overview page. +IMPORTANT: `LICENSE_KEY` and `TINYMCE_API_KEY` are different credentials. `LICENSE_KEY` is the long string from the account representative that activates the AI service. `TINYMCE_API_KEY` is the short string from the tiny.cloud dashboard used to load {productname} from the CDN — it is not required for self-hosted editor bundles. See the xref:tinymceai-on-premises.adoc#_credentials[Credentials] section on the Overview page. === Start MySQL and Redis @@ -371,7 +373,7 @@ npm start Open *http://localhost:3000* in a browser. The editor loads with the AI toolbar. Select text and try the AI features. Responses stream in real time from the chosen large language model (LLM) provider, processed entirely within the local infrastructure. -The TinyMCE AI on-premises service is now running. +The {pluginname} on-premises service is now running. == Verifying the installation diff --git a/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc b/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc index a62701969d..81b47af373 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc @@ -1,7 +1,9 @@ = JWT authentication for the on-premises AI service :navtitle: JWT authentication :description: JWT authentication for the TinyMCE AI on-premises service using HS256 symmetric signing -:keywords: AI, on-premises, JWT, authentication, HS256, multi-tenant +:description_short: HS256 JWT authentication and permissions. +:keywords: AI, on-premises, JWT, authentication, HS256, multi-tenant, token, permissions, security +:pluginname: TinyMCE AI This page covers *authentication between the application back end and the AI service*. Every request from the editor to the AI service carries a signed JWT — this is how the service identifies users, enforces permissions, and isolates conversations. The token endpoint runs in the application back end; the editor calls it automatically through the `tinymceai_token_provider` callback configured in xref:tinymceai-on-premises-frameworks.adoc[TinyMCE integration]. @@ -748,7 +750,7 @@ public class AiTokenController { == Editor-side token provider -Configure the TinyMCE editor to fetch a token from the application endpoint. The plugin calls the provider on demand and re-fetches when the token nears expiry. +Configure the {productname} editor to fetch a token from the application endpoint. The plugin calls the provider on demand and re-fetches when the token nears expiry. [source,javascript] ---- diff --git a/modules/ROOT/pages/tinymceai-on-premises-mcp.adoc b/modules/ROOT/pages/tinymceai-on-premises-mcp.adoc index 0acc0ce0c2..effee549a6 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-mcp.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-mcp.adoc @@ -1,7 +1,9 @@ = MCP and web integrations :navtitle: MCP and web integrations (optional) :description: Model Context Protocol (MCP) server integration and web scraping/search configuration for TinyMCE AI on-premises -:keywords: AI, on-premises, MCP, Model Context Protocol, web search, web scraping +:description_short: MCP, web search, and web scraping setup. +:keywords: AI, on-premises, MCP, Model Context Protocol, web search, web scraping, tool calling, Streamable HTTP +:pluginname: TinyMCE AI The AI service extends model capabilities through two integration points: the https://modelcontextprotocol.io/[Model Context Protocol] (MCP) for tool calling, and pluggable web endpoints for page fetching and search. Both features operate within AI conversations only. Web search and scraping allow the AI to reference live internet content during conversations, and for most deployments, enabling at least web search improves response quality. @@ -205,7 +207,7 @@ IMPORTANT: `WEBSEARCH_HEADERS` uses **colon-CSV format** (`Header-Name: value, A [[websearch-per-message]] === Per-message activation (custom integrations) -The TinyMCE editor sends the web search activation flag when the user toggles web search on. Custom integrations that call the AI service API directly must include the following in each message request body where web search should be active: +The {productname} editor sends the web search activation flag when the user toggles web search on. Custom integrations that call the AI service API directly must include the following in each message request body where web search should be active: [source,json] ---- @@ -310,7 +312,7 @@ Content-Type: application/json { "url": "https://example.com/page-to-fetch" } ---- -The TinyMCE editor sends this request when a user pastes or references a URL in conversation. Custom integrations must call this endpoint explicitly to trigger a page fetch. +The {productname} editor sends this request when a user pastes or references a URL in conversation. Custom integrations must call this endpoint explicitly to trigger a page fetch. The response is stored against the conversation. The `type` field in the scrape response must be `text/html` or `text/markdown`. Other MIME types (for example, `application/pdf`) are rejected with a `422 web-resource-download-error`. @@ -411,7 +413,7 @@ For production clusters, the MCP server Deployments should have dedicated resour |The JWT is missing the `ai:conversations:webSearch` permission. Add it to the `auth.ai.permissions` array in the token (or use the wildcard `ai:conversations:*`). See xref:tinymceai-on-premises-jwt.adoc#permissions-reference[JWT permissions]. |Web search is toggled on but the model never uses it -|The per-message request body is missing `"capabilities": {"webSearch": {}}`. The TinyMCE editor sends this when the toggle is active. Custom integrations must include it in each request body. See <>. +|The per-message request body is missing `"capabilities": {"webSearch": {}}`. The {productname} editor sends this when the toggle is active. Custom integrations must include it in each request body. See <>. |`ERR_INVALID_HTTP_TOKEN` in AI service logs related to web search |`WEBSEARCH_HEADERS` is set as a JSON object. The service expects colon-CSV format: `Header-Name: value, Another: value`. diff --git a/modules/ROOT/pages/tinymceai-on-premises-production.adoc b/modules/ROOT/pages/tinymceai-on-premises-production.adoc index efe2e53a6c..d6e4a0d51e 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-production.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-production.adoc @@ -1,7 +1,9 @@ = TinyMCE AI on-premises: Production deployment guide :navtitle: Production deployment :description: Production deployment guide for the TinyMCE AI on-premises service -:keywords: AI, on-premises, production, Kubernetes, ECS, scaling +:description_short: Production deployment and scaling guide. +:keywords: AI, on-premises, production, Kubernetes, ECS, scaling, TLS, security, observability, Docker, Podman +:pluginname: TinyMCE AI This guide assumes a running Kubernetes cluster, ECS cluster, or Docker/Podman host with the relevant CLI tools (`kubectl`, `aws`, `docker`) configured. For cluster setup, refer to the platform documentation. diff --git a/modules/ROOT/pages/tinymceai-on-premises-providers.adoc b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc index 26ccc78103..2506ca7ca0 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-providers.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc @@ -1,7 +1,9 @@ = LLM provider configuration :navtitle: LLM providers :description: LLM provider configuration for TinyMCE AI on-premises service +:description_short: LLM provider configuration and model catalog. :keywords: AI, on-premises, LLM, OpenAI, Anthropic, Google, Gemini, Azure, Bedrock, Vertex, Ollama, vLLM, openai-compatible +:pluginname: TinyMCE AI @@ -80,7 +82,7 @@ IMPORTANT: Always set `MODELS` explicitly for production. Without it, the editor [%collapsible] ==== * The `/v1/models/1` endpoint reports `agent-1` with `allowed: false` unless the JWT includes the `ai:models:agent` permission. -* The TinyMCE editor model picker does not display real model names. +* The {productname} editor model picker does not display real model names. * Setting `MODELS` explicitly makes the editor display meaningful names, routes to the correct provider, and enables per-model JWT permissions (`ai:models::`). * See xref:tinymceai-on-premises-jwt.adoc[JWT authentication] for permission syntax. ==== @@ -96,7 +98,7 @@ When `MODELS` is omitted, a `GET /v1/models/1` call returns: } ---- -Clients (the TinyMCE editor included) will then fall back to the disabled agent and every AI request will fail with no useful error in the UI. The model list endpoint also only accepts the literal version `1`: +Clients (the {productname} editor included) will then fall back to the disabled agent and every AI request will fail with no useful error in the UI. The model list endpoint also only accepts the literal version `1`: [source,bash] ---- @@ -222,7 +224,7 @@ API key from https://console.anthropic.com/[console.anthropic.com]. Same shape a *Reasoning models:* -Claude 4.x models (Sonnet 4, Opus 4) support extended thinking. To surface the reasoning toggle in the TinyMCE UI, add `capabilities.reasoning: true` to the model entry in `MODELS`: +Claude 4.x models (Sonnet 4, Opus 4) support extended thinking. To surface the reasoning toggle in the {productname} UI, add `capabilities.reasoning: true` to the model entry in `MODELS`: [source,json] ---- @@ -902,7 +904,7 @@ A `MODELS` array routes individual models to specific providers using the `provi ---- ==== -This wires conversations to OpenAI, reviews to Bedrock-hosted Claude, and quick actions to a local Ollama model. The TinyMCE editor will pick the appropriate provider for each feature based on which models declare which `features`. +This wires conversations to OpenAI, reviews to Bedrock-hosted Claude, and quick actions to a local Ollama model. The {productname} editor will pick the appropriate provider for each feature based on which models declare which `features`. A `MODELS` entry with a `provider` value that does not exist in `PROVIDERS` is silently skipped; that model will not appear in `/v1/models/1`. When a model is missing from the model selector in the rich text editor, check the spelling of its `provider` field against the keys in `PROVIDERS` (case-sensitive). See xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting] for additional debugging steps. diff --git a/modules/ROOT/pages/tinymceai-on-premises-reference.adoc b/modules/ROOT/pages/tinymceai-on-premises-reference.adoc index b83eaee4c1..ca36c37ab3 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-reference.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-reference.adoc @@ -1,7 +1,9 @@ = TinyMCE AI on-premises reference :navtitle: Reference :description: Environment variable, API endpoint, SSE, and error code reference for the TinyMCE AI on-premises service -:keywords: AI, on-premises, reference, API, environment variables, error codes +:description_short: Environment variables, API, SSE, and error codes. +:keywords: AI, on-premises, reference, API, environment variables, error codes, SSE, Server-Sent Events, docker run +:pluginname: TinyMCE AI == Environment variable reference diff --git a/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc b/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc index f3b300258c..903fa6b65a 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc @@ -1,7 +1,9 @@ = Troubleshooting :navtitle: Troubleshooting :description: Troubleshooting guide for the TinyMCE AI on-premises service -:keywords: AI, on-premises, troubleshooting, errors, debugging +:description_short: Troubleshooting and diagnostic recipes. +:keywords: AI, on-premises, troubleshooting, errors, debugging, diagnostics, JWT, container, LLM +:pluginname: TinyMCE AI Match the symptom to the fix below. If the symptom does not fit any section, escalate to `support@tiny.cloud` with the output of `docker logs ai-service --tail 200` and a redacted copy of the `PROVIDERS` value. @@ -186,7 +188,7 @@ Confirm `/health` is OK and a direct `curl` to `/v1/conversations` works before |Symptom |Fix |No AI buttons in the toolbar -|Ensure TinyMCE 8{plus} is loaded, `plugins: 'tinymceai'` is set, and the toolbar string includes `tinymceai`. Verify the API key has the AI feature enabled. +|Ensure {productname} 8{plus} is loaded, `plugins: 'tinymceai'` is set, and the toolbar string includes `tinymceai`. Verify the API key has the AI feature enabled. |Token fetch returns 401 |The token endpoint's own authentication middleware is rejecting the request. Check session cookies, Cross-Origin Resource Sharing (CORS) credentials, and bearer tokens in the browser network tab. @@ -201,7 +203,7 @@ Confirm `/health` is OK and a direct `curl` to `/v1/conversations` works before |Add the editor's origin (scheme {plus} host {plus} port) to the `ALLOWED_ORIGINS` environment variable. |Editor renders then disappears (Next.js / Nuxt / SvelteKit) -|TinyMCE references `window` at load time. Load the editor client-only: `dynamic(() => import('./Editor'), { ssr: false })` in Next.js, `` in Nuxt, `onMount` in SvelteKit. See xref:tinymceai-on-premises-frameworks.adoc[TinyMCE integration]. +|{productname} references `window` at load time. Load the editor client-only: `dynamic(() => import('./Editor'), { ssr: false })` in Next.js, `` in Nuxt, `onMount` in SvelteKit. See xref:tinymceai-on-premises-frameworks.adoc[TinyMCE integration]. |`tinymceai_token_provider` called in a tight loop |Token endpoint is returning an invalid JWT or non-JSON response. Test with `curl -X POST http://localhost:3000/api/ai-token` and verify the response is `pass:c[{"token":"eyJ..."}]`. diff --git a/modules/ROOT/pages/tinymceai-on-premises.adoc b/modules/ROOT/pages/tinymceai-on-premises.adoc index 43c3a2d4b4..8b3e05309a 100644 --- a/modules/ROOT/pages/tinymceai-on-premises.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises.adoc @@ -1,9 +1,11 @@ = TinyMCE AI on-premises :navtitle: Overview :description: Self-hosted AI writing assistance for TinyMCE; architecture, prerequisites, and topic guide -:keywords: AI, on-premises, self-hosted, deployment, overview +:description_short: Self-hosted AI writing assistance for TinyMCE. +:keywords: AI, on-premises, self-hosted, deployment, overview, architecture, prerequisites, container, LLM +:pluginname: TinyMCE AI -The TinyMCE AI on-premises service is a self-hosted back end that powers AI writing assistance. It can be used with the TinyMCE rich text editor, particularly the xref:tinymceai.adoc[TinyMCE AI plugin], or as a standalone service. It runs entirely within the host infrastructure. Document content, conversation history, file attachments, and user data stay within the host network and are not stored by Tiny. Data sent to a configured LLM provider is subject to that provider's data handling policies. +The {pluginname} on-premises service is a self-hosted back end that powers AI writing assistance. It can be used with the {productname} rich text editor, particularly the xref:tinymceai.adoc[TinyMCE AI plugin], or as a standalone service. It runs entirely within the host infrastructure. Document content, conversation history, file attachments, and user data stay within the host network and are not stored by Tiny. Data sent to a configured LLM provider is subject to that provider's data handling policies. The service ships as a single Open Container Initiative (OCI) container image (`registry.containers.tiny.cloud/ai-service-tiny`). It exposes a REST API, a Management Panel, Server-Sent Events streaming, and an OpenAPI spec. @@ -11,7 +13,7 @@ The service ships as a single Open Container Initiative (OCI) container image (` The infrastructure consists of three layers: -* The *browser* runs the TinyMCE editor with the `tinymceai` plugin. +* The *browser* runs the {productname} editor with the `tinymceai` plugin. * The *application layer* runs the token endpoint (which signs JWTs), the AI service container, and a load balancer or reverse proxy. It may consist of one or more AI service instances behind the load balancer (round-robin recommended). Each instance runs the same stateless container image. * The *data layer* consists of a SQL database, a Redis instance, and file storage: ** *SQL database*: stores persistent data such as configurations and conversations. @@ -31,7 +33,7 @@ Data flow for a single AI request: . The LLM streams its response back to the AI service. . The AI service relays the response to the client through Server-Sent Events (SSE). -When used with TinyMCE `tinymceai`, the plugin handles steps 1, 2, and 5 automatically through the `tinymceai_token_provider` callback. +When used with {productname} `tinymceai`, the plugin handles steps 1, 2, and 5 automatically through the `tinymceai_token_provider` callback. NOTE: The browser connects directly to the AI service — requests do not pass through the application back end. The AI service must be network-reachable from the end-user browser, which means it must have a public URL (or be accessible through a VPN/internal network when deployed on an intranet). Configure xref:tinymceai-on-premises-frameworks.adoc#_cross_origin_requests_to_the_ai_service[CORS] and xref:tinymceai-on-premises-production.adoc#_tls_https[TLS] on the AI service accordingly. @@ -98,12 +100,12 @@ Three credentials are involved in an on-premises deployment. They are distinct a |`TINYMCE_API_KEY` |Editor page (CDN script URL) or build configuration -|Authenticates against `cdn.tiny.cloud` when loading TinyMCE from the CDN. This is the short string from the tiny.cloud dashboard. -|Only when loading TinyMCE from the CDN. Omit for self-hosted editor bundles. +|Authenticates against `cdn.tiny.cloud` when loading {productname} from the CDN. This is the short string from the tiny.cloud dashboard. +|Only when loading {productname} from the CDN. Omit for self-hosted editor bundles. |`license_key` (init option) |`tinymce.init({ license_key: 'T8LK:...' })` -|Activates premium TinyMCE features when using a self-hosted editor bundle (not the CDN). +|Activates premium {productname} features when using a self-hosted editor bundle (not the CDN). |Only for self-hosted editor deployments. Provided by the Tiny account representative. |=== From 89098882abb2625e8f499aeb970245d37d02ec22 Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Wed, 27 May 2026 11:16:33 +1000 Subject: [PATCH 37/48] Replace CDN with NPM self-hosted editor on getting started page MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix project folder name: tinymce-ai-onpremise → tinymceai-onpremise - Replace CDN script tag with locally served tinymce and tinymce-premium - Rename LICENSE_KEY to AI_LICENCE_KEY in .env and docker run - Remove TINYMCE_API_KEY (not needed for self-hosted editor) - Add license_key init option with inline guidance - Add tinymce and tinymce-premium (^8.4.0) to package.json --- ...tinymceai-on-premises-getting-started.adoc | 34 ++++++++++--------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc index cc2cfbc06d..5dd5c069a7 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc @@ -37,7 +37,7 @@ Additionally, have the following credentials ready: [source,bash] ---- -mkdir tinymce-ai-onpremise && cd tinymce-ai-onpremise +mkdir tinymceai-onpremise && cd tinymceai-onpremise ---- === Authenticate with the container registry @@ -121,12 +121,8 @@ If the AI service needs to reach the host machine (for example a self-hosted Oll [source,bash] ---- -# --- Required: provided by Tiny --- -LICENSE_KEY= - -# --- Required for this demo (loads TinyMCE from cdn.tiny.cloud) --- -# Omit only when using a self-hosted editor bundle with license_key. -TINYMCE_API_KEY= +# --- Required: AI service license key provided by Tiny --- +AI_LICENCE_KEY= # --- Required: strong secret used to log into the Management Panel --- MANAGEMENT_SECRET= @@ -144,7 +140,7 @@ AI_ENV_ID= AI_API_SECRET= ---- -IMPORTANT: `LICENSE_KEY` and `TINYMCE_API_KEY` are different credentials. `LICENSE_KEY` is the long string from the account representative that activates the AI service. `TINYMCE_API_KEY` is the short string from the tiny.cloud dashboard used to load {productname} from the CDN — it is not required for self-hosted editor bundles. See the xref:tinymceai-on-premises.adoc#_credentials[Credentials] section on the Overview page. +IMPORTANT: `AI_LICENCE_KEY` is the long string from the account representative that activates the AI service container. It is not the same as the `license_key` init option used by the self-hosted editor. See the xref:tinymceai-on-premises.adoc#_credentials[Credentials] section on the Overview page. === Start MySQL and Redis @@ -182,7 +178,7 @@ set -a && source .env && set +a docker run --init -d -p 8000:8000 \ --network _default \ --name ai-service \ - -e LICENSE_KEY="$LICENSE_KEY" \ + -e LICENSE_KEY="$AI_LICENCE_KEY" \ -e ENVIRONMENTS_MANAGEMENT_SECRET_KEY="$MANAGEMENT_SECRET" \ -e DATABASE_DRIVER='mysql' \ -e DATABASE_HOST='mysql' \ @@ -197,7 +193,7 @@ docker run --init -d -p 8000:8000 \ registry.containers.tiny.cloud/ai-service-tiny:latest ---- -TIP: The network name is typically `_default` (e.g., `tinymce-ai-onpremise_default` for the folder created above). Run `docker network ls` to confirm the exact name — Docker Compose versions format it differently. For multiple LLM providers, extend the `PROVIDERS` JSON: `{"openai":{...},"anthropic":{...}}`. +TIP: The network name is typically `_default` (e.g., `tinymceai-onpremise_default` for the folder created above). Run `docker network ls` to confirm the exact name — Docker Compose versions format it differently. For multiple LLM providers, extend the `PROVIDERS` JSON: `{"openai":{...},"anthropic":{...}}`. NOTE: The launch command above starts the AI service with basic conversation support. To enable *web search* in conversations, add `WEBSEARCH_ENABLED='true'` and `WEBSEARCH_ENDPOINT` (pointing to a search backend) to the `docker run` command. See xref:tinymceai-on-premises-mcp.adoc#web-scraping-and-search[Web scraping and web search] for the full configuration, endpoint contracts, and a SerpAPI example. @@ -232,7 +228,7 @@ Server is listening on port 8000. [WARNING] -- -If the container exits immediately, run `docker logs ai-service`. The most common causes are documented in the xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting] guide. The top three are: malformed `LICENSE_KEY` (line breaks from word wrap), missing PostgreSQL schema, and JSON syntax error in `PROVIDERS`. +If the container exits immediately, run `docker logs ai-service`. The most common causes are documented in the xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting] guide. The top three are: malformed `AI_LICENCE_KEY` (line breaks from word wrap), missing PostgreSQL schema, and JSON syntax error in `PROVIDERS`. -- === Create an environment and access key @@ -268,7 +264,7 @@ Create `package.json`: [source,json] ---- { - "name": "tinymce-ai-onpremise-demo", + "name": "tinymceai-onpremise-demo", "private": true, "scripts": { "start": "node token-server.js" @@ -276,7 +272,9 @@ Create `package.json`: "dependencies": { "dotenv": "^16.0.0", "express": "^4.18.0", - "jsonwebtoken": "^9.0.0" + "jsonwebtoken": "^9.0.0", + "tinymce": "^8.4.0", + "tinymce-premium": "^8.4.0" } } ---- @@ -289,12 +287,12 @@ Create `token-server.js`: require('dotenv').config({ override: true }); const express = require('express'); const jwt = require('jsonwebtoken'); +const path = require('path'); const PORT = process.env.PORT || 3000; const AI_ENV_ID = process.env.AI_ENV_ID; const AI_API_SECRET = process.env.AI_API_SECRET; const AI_SERVICE_URL = process.env.AI_SERVICE_URL || 'http://localhost:8000'; -const TINYMCE_API_KEY = process.env.TINYMCE_API_KEY || 'no-api-key'; if (!AI_ENV_ID || !AI_API_SECRET) { console.error('ERROR: AI_ENV_ID and AI_API_SECRET must be set in .env'); @@ -305,6 +303,9 @@ if (!AI_ENV_ID || !AI_API_SECRET) { const app = express(); app.use(express.json()); +app.use('/tinymce', express.static(path.join(__dirname, 'node_modules', 'tinymce'))); +app.use('/tinymce/plugins', express.static(path.join(__dirname, 'node_modules', 'tinymce-premium', 'plugins'))); + app.post('/api/ai-token', (req, res) => { const token = jwt.sign({ aud: AI_ENV_ID, @@ -330,8 +331,8 @@ app.get('/', (req, res) => { TinyMCE AI on-premises Demo - - + +

TinyMCE AI on-premises Demo

@@ -340,6 +341,7 @@ app.get('/', (req, res) => { + +

TinyMCE AI on-premises Demo

@@ -341,7 +340,6 @@ app.get('/', (req, res) => { +

TinyMCE AI on-premises Demo

@@ -344,7 +347,8 @@ app.get('/', (req, res) => { +---- + +Replace `my-server` with the key from `MCP_SERVERS`. In production, add error handling, a loading indicator, and validation for missing `code` or `state` parameters. + +In the example above, the callback page calls `/api/mcp/oauth/{serverName}/complete` on the integrating application's own server rather than directly on the AI service. This is the proxy pattern described below. + +[[mcp-oauth-proxy]] +==== Browser integration and proxy endpoints + +The <> on the AI service require JWT authentication and may not be directly callable from the browser due to CORS restrictions. In browser-based integrations, proxy the OAuth calls through the application's own backend. + +The backend generates a JWT (the same token used for conversations), attaches it to the request, and forwards the call to the AI service. This keeps the JWT generation server-side and avoids CORS configuration on the AI service. + +A typical integration requires four proxy routes: + +* `GET /api/mcp/oauth/status` -> `GET {AI_SERVICE_URL}/v1/mcp/oauth/status` +* `POST /api/mcp/oauth/{serverName}/initialize` -> `POST {AI_SERVICE_URL}/v1/mcp/oauth/{serverName}/initialize` +* `POST /api/mcp/oauth/{serverName}/complete` -> `POST {AI_SERVICE_URL}/v1/mcp/oauth/{serverName}/complete` +* `DELETE /api/mcp/oauth/{serverName}` -> `DELETE {AI_SERVICE_URL}/v1/mcp/oauth/{serverName}` + [[mcp-oauth-endpoints]] ==== REST endpoints @@ -285,8 +338,6 @@ app.listen(3001, () => console.log('Knowledge MCP server on http://0.0.0.0:3001/ The example server binds to `0.0.0.0` so it is reachable from inside Docker containers. When the MCP server runs on the host machine, the AI service connects to it at `http://host.docker.internal:3001/mcp` (see <>). - - [[web-search]] == Web search @@ -551,6 +602,10 @@ For production clusters, the MCP server Deployments should have dedicated resour |MCP tools not appearing for a user after OAuth is configured |The user has not completed the authorization flow. OAuth connections are per user -- each user must authorize independently through `initialize` and `complete`. Check the connection status with `GET /v1/mcp/oauth/status`. + +[[mcp-troubleshooting]] +|`initializeMcpOAuth` hangs for 60+ seconds with no response +|The MCP server `url` in `MCP_SERVERS` does not return OAuth resource discovery metadata (https://datatracker.ietf.org/doc/html/rfc9728[RFC 9728]). The AI service sends an unauthenticated request to the URL and expects a `401` response with a `WWW-Authenticate` header containing a `resource_metadata` link. If the header is missing, the service hangs indefinitely. Verify the correct endpoint with: `curl -sI -X POST \https://mcp.example.com/endpoint -H "Content-Type: application/json"` and check for `resource_metadata` in the `WWW-Authenticate` header. Some MCP servers expose separate URLs for authenticated and OAuth-protected access. See <>. |=== diff --git a/modules/ROOT/pages/tinymceai-on-premises-production.adoc b/modules/ROOT/pages/tinymceai-on-premises-production.adoc index 15d02d2734..f4f815bd2f 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-production.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-production.adoc @@ -498,7 +498,7 @@ The AI service requires outbound HTTPS access to the following endpoints. Config |Forward AI requests to the configured providers |Yes -|`license.container.tiny.cloud` +|`license.containers.tiny.cloud` |License key validation (phone-home check on startup) |Yes — service will not start if this is blocked From 74aa678f13c739698b28dceda429c7dc7c3b0702 Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Thu, 28 May 2026 11:54:43 +1000 Subject: [PATCH 45/48] Address open PR review comments across on-prem docs Getting-started: add bold labels and diagnostic guidance to each verification step for readability. Database: fix double underscore in table namespace list, add PostgreSQL skip note. JWT: rename heading to Editor-side configuration. MCP: reorder config table (oauth after headers), remove redundant OAuth cross-ref, mention MCP_OAUTH_CALLBACK_URL in config intro, change "changes" to "steps". Reference: clarify MCP auth wording, add OAuth endpoints to API table. --- .../pages/tinymceai-on-premises-database.adoc | 4 +++- ...tinymceai-on-premises-getting-started.adoc | 22 +++++++++++-------- .../ROOT/pages/tinymceai-on-premises-jwt.adoc | 2 +- .../ROOT/pages/tinymceai-on-premises-mcp.adoc | 8 +++---- .../tinymceai-on-premises-reference.adoc | 8 ++++++- 5 files changed, 27 insertions(+), 17 deletions(-) diff --git a/modules/ROOT/pages/tinymceai-on-premises-database.adoc b/modules/ROOT/pages/tinymceai-on-premises-database.adoc index 146b55b1a5..5b01e47d71 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-database.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-database.adoc @@ -49,6 +49,8 @@ image::tinymceai-on-premises/database-setup-fig-1.svg[Database setup decision tr [[postgresql-schema-prerequisite]] == PostgreSQL schema prerequisite +NOTE: This section applies to PostgreSQL deployments only. MySQL deployments can skip to <>. + The AI service expects a schema named `cs-on-premises` (with hyphens). If that schema does not exist, the container crashes on first boot with: .... @@ -101,7 +103,7 @@ Pin to `mysql:8.0` in every manifest: `docker run`, Docker Compose, Kubernetes, [[database-user-privileges]] == Database user privileges -On first boot the AI service runs schema migrations and creates roughly 32 tables across the following namespaces: `ai_assistant_*`, `environments__*`, `security__*`, `insights__*`, `blob_storage__*`, and `cs_migrations*`. +On first boot the AI service runs schema migrations and creates roughly 32 tables across the following namespaces: `ai_assistant_*`, `environments_*`, `security_*`, `insights_*`, `blob_storage_*`, and `cs_migrations*`. The database user needs enough privilege to create, alter, and operate on these tables. diff --git a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc index 88c73a7cfd..19267f3395 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc @@ -393,34 +393,36 @@ The {pluginname} on-premises service is now running. == Verifying the installation -After completing the quick start, exercise the pipeline end-to-end from the command line. +After completing the quick start, run each check below from the command line to exercise the pipeline end-to-end. === Step 1: Health check -Confirms the AI service container is running and connected to the database and Redis. +*What this checks:* the AI service container is running and connected to the database and Redis. [source,bash] ---- curl http://localhost:8000/health ---- -Expected: +*Expected response:* [source,json] ---- {"serviceName":"on-premises-http","uptime":12.345} ---- +A JSON response with `serviceName` and `uptime` confirms the container is healthy. If the request fails or times out, check `docker logs ai-service` for startup errors. + === Step 2: Generate a token -Confirms the token server can sign a valid JWT using the API Secret and Environment ID. +*What this checks:* the token server can sign a valid JWT using the API Secret and Environment ID. [source,bash] ---- curl -s -X POST http://localhost:3000/api/ai-token | python3 -m json.tool ---- -Expected: +*Expected response:* [source,json] ---- @@ -429,9 +431,11 @@ Expected: } ---- +A JSON response containing a `token` field confirms the token server is running and can sign JWTs. If the server returns an error, verify that `AI_ENV_ID` and `AI_API_SECRET` are set in `.env`. + === Step 3: Create a conversation and send a message -Confirms the full chain: JWT verification, permissions, environment registration, LLM provider authentication, and SSE streaming. +*What this checks:* the full chain — JWT verification, permissions, environment registration, LLM provider authentication, and SSE streaming. [source,bash] ---- @@ -453,7 +457,7 @@ curl -s -N -X POST http://localhost:8000/v1/conversations/verify-1/messages \ -d '{"prompt":"Say hello in five words.","model":"agent-1"}' ---- -The message endpoint returns a Server-Sent Events stream: +*Expected response:* a Server-Sent Events stream: [source,text] ---- @@ -473,9 +477,9 @@ event: done data: {} ---- -If the stream emits `event: error`, inspect the `data` payload. Provider errors (invalid API key, IAM denial, model unavailable) ride inside the Server-Sent Events (SSE) response. The HTTP status stays 200. See the xref:tinymceai-on-premises-troubleshooting.adoc[LLM provider errors] section in the Troubleshooting guide for details. +A stream of `text-delta` events followed by `done` confirms the entire pipeline is working: container health, database connectivity, Redis connectivity, JWT signing and verification, permissions, environment registration, LLM provider authentication, and SSE streaming. -A successful round-trip confirms: container health, database connectivity, Redis connectivity, JWT signing, JWT verification, permissions checking, environment registration, LLM provider authentication, and SSE streaming. If problems persist after these checks, focus on the editor configuration next. +If the stream emits `event: error`, inspect the `data` payload. Provider errors (invalid API key, IAM denial, model unavailable) ride inside the Server-Sent Events (SSE) response. The HTTP status stays 200. See the xref:tinymceai-on-premises-troubleshooting.adoc[LLM provider errors] section in the Troubleshooting guide for details. == Updating configuration diff --git a/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc b/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc index 81b47af373..19c1a5123d 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc @@ -748,7 +748,7 @@ public class AiTokenController { -== Editor-side token provider +== Editor-side configuration Configure the {productname} editor to fetch a token from the application endpoint. The plugin calls the provider on demand and re-fetches when the token nears expiry. diff --git a/modules/ROOT/pages/tinymceai-on-premises-mcp.adoc b/modules/ROOT/pages/tinymceai-on-premises-mcp.adoc index 2d6ca94fd0..3684cc1c9b 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-mcp.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-mcp.adoc @@ -33,7 +33,7 @@ MCP tools are available in AI *conversations* only. Reviews and quick actions do === Configuration -Set the `MCP_SERVERS` environment variable to a JSON object. Each key is a server identifier; each value describes the connection: +Set the `MCP_SERVERS` environment variable to a JSON object. Each key is a server identifier; each value describes the connection. For OAuth-enabled servers, also set `MCP_OAUTH_CALLBACK_URL` (see <>). [source,bash] ---- @@ -50,9 +50,9 @@ Set the `MCP_SERVERS` environment variable to a JSON object. Each key is a serve |Field |Description |`url` |HTTP endpoint of the MCP server (Streamable HTTP transport). For <> servers, this must be the endpoint that supports https://datatracker.ietf.org/doc/html/rfc9728[OAuth protected resource discovery] (RFC 9728). See <>. |`headers` |JSON object of HTTP headers sent with every request (for example, `{"Authorization": "Bearer token"}`). See <>. +|`oauth` |OAuth 2.0 configuration block. Enables per-user authorization for MCP servers that require it. See <>. |`tools.disabled` |Array of tool names to exclude from LLM access. |`options.callToolTimeout` |Per-tool-call timeout in seconds (default 60). -|`oauth` |OAuth 2.0 configuration block. Enables per-user authorization for MCP servers that require it. See <>. |=== `MCP_SERVERS` supports multiple server entries. Add additional keys to the same JSON object: @@ -98,8 +98,6 @@ The `headers` field sends a fixed set of HTTP headers with every request to the If the MCP server requires per-user context without OAuth, encode identity in the conversation prompt or in a header that the MCP server resolves to a per-user identity on its own side. -For MCP servers that require per-user authorization, use <> instead. - [[mcp-oauth]] === OAuth 2.0 authentication @@ -118,7 +116,7 @@ TIP: To verify the correct endpoint, send an unauthenticated request and inspect ==== Configuration -Enabling OAuth for an MCP server requires two configuration changes: +Enabling OAuth for an MCP server requires two steps: . Set `MCP_OAUTH_CALLBACK_URL` to the URL of the OAuth callback page hosted by the integrating web application. The AI service uses this URL as the OAuth `redirect_uri` parameter for all OAuth-enabled MCP servers (unless overridden per server with `oauth.callbackUrl`). . Add an `oauth` block inside the target server entry in `MCP_SERVERS`. diff --git a/modules/ROOT/pages/tinymceai-on-premises-reference.adoc b/modules/ROOT/pages/tinymceai-on-premises-reference.adoc index 027a064073..a9eeb9bb12 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-reference.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-reference.adoc @@ -119,8 +119,14 @@ For PostgreSQL, change `DATABASE_DRIVER` to `'postgres'` and add `-e DATABASE_SC |DELETE |`/v1/conversations/\{id}` |JWT |Delete a conversation. |POST |`/v1/actions/\{actionId}` |JWT |Run a quick action. Body shape: `{"content":[{"type":"text","content":"..."}]}` (no `modelId`). |POST |`/v1/reviews/\{reviewId}` |JWT |Run a review. +|GET |`/v1/mcp/oauth/status` |JWT |Connection status for all OAuth-enabled MCP servers. See xref:tinymceai-on-premises-mcp.adoc#mcp-oauth-endpoints[OAuth REST endpoints]. +|POST |`/v1/mcp/oauth/\{serverName}/initialize` |JWT |Start the OAuth authorization flow. +|POST |`/v1/mcp/oauth/\{serverName}/complete` |JWT |Complete the OAuth flow with the authorization code. +|DELETE |`/v1/mcp/oauth/\{serverName}` |JWT |Revoke the OAuth connection for the calling user. |=== +NOTE: The OAuth endpoints are available only when at least one MCP server has an `oauth` block configured. The interactive API documentation at `/docs/` may not include these endpoints; they are documented in the xref:tinymceai-on-premises-mcp.adoc#mcp-oauth[MCP OAuth section]. + NOTE: Environment management (create, read, update, delete) is handled through the Management Panel UI at `/panel/`. == Server-Sent Events reference @@ -213,7 +219,7 @@ Error codes returned in HTTP 4xx responses and inside SSE `event: error` payload |Built-in rate limiting |None |Front the service with nginx `limit_req` or ALB rate-limit rules. See xref:tinymceai-on-premises-production.adoc#rate-limiting[Rate limiting]. |File support (OpenAI-compatible providers) |Images only (`image/*`) |PDFs, text, and Office files are not forwarded to OpenAI-compatible providers. Use a non-OpenAI-compatible provider for non-image file attachments. |MCP tool availability |Conversations only |MCP tools are not available in reviews or quick actions. -|MCP authentication |Static token or OAuth 2.0 per server |Static tokens use the `headers` field (fixed at deploy time). Per-user authentication is supported through OAuth 2.0 (Authorization Code with PKCE). See xref:tinymceai-on-premises-mcp.adoc#mcp-oauth[OAuth 2.0 authentication]. +|MCP authentication |Static token or OAuth 2.0, configured independently for each server |Static tokens use the `headers` field (shared across all users, fixed at deploy time). Per-user authentication is supported through OAuth 2.0 (Authorization Code with PKCE). See xref:tinymceai-on-premises-mcp.adoc#mcp-oauth[OAuth 2.0 authentication]. |PostgreSQL default schema |`cs-on-premises` (with hyphen) |Pre-create with `CREATE SCHEMA "cs-on-premises";` or set `DATABASE_SCHEMA=public`. |`/v1/models/\{compatibilityVersion}` |Only accepts `1` |Values such as `v1`, `v2`, or `latest` return 500. |Environment creation through raw API |Not supported |Always create environments through the Management Panel UI. From 0180a162c0110472f9d95deaf4fedb258f3d672f Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Thu, 28 May 2026 12:04:25 +1000 Subject: [PATCH 46/48] Replace five-minute claim with Quick start in getting-started heading The original time estimate was unrealistic for first-run setups that include image pulls and Management Panel configuration. --- .../ROOT/pages/tinymceai-on-premises-getting-started.adoc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc index 19267f3395..f9368bd6b6 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc @@ -1,11 +1,11 @@ = Getting started with TinyMCE AI on-premises :navtitle: Getting started -:description: Five-minute Docker Compose quick start for TinyMCE AI on-premises service -:description_short: Five-minute Docker Compose quick start. +:description: Docker Compose quick start for TinyMCE AI on-premises service +:description_short: Docker Compose quick start. :keywords: AI, on-premises, getting started, Docker, quick start, Docker Compose, installation, setup :pluginname: TinyMCE AI -This guide sets up a fully working local stack in roughly five minutes on any machine with Docker: +This guide sets up a fully working local stack on any machine with Docker: * *MySQL 8.0*: conversation history and metadata * *Redis*: caching and session state @@ -31,7 +31,7 @@ Additionally, have the following credentials ready: * A {productname} license key and container registry credentials (from the Tiny account representative) * At least one LLM provider API key (OpenAI, Anthropic, or Google) -== Five-minute demo with Docker Compose +== Quick start with Docker Compose === Create the project folder From f5b6b11534343016ba1ec21b66ba11ac60b21c6a Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Thu, 28 May 2026 12:13:33 +1000 Subject: [PATCH 47/48] Fix readability issue for IMPORTANT admon. --- .../ROOT/pages/tinymceai-on-premises-getting-started.adoc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc index f9368bd6b6..882a1b4f5e 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc @@ -140,7 +140,12 @@ AI_ENV_ID='paste-environment-id-here' AI_API_SECRET='paste-api-secret-here' ---- -IMPORTANT: `AI_LICENCE_KEY` activates the AI service container. The `license_key` init option in `tinymce.init()` activates premium {productname} features in the self-hosted editor. These are different credentials from different sources — they are not interchangeable. See the xref:tinymceai-on-premises.adoc#_credentials[Credentials] section on the Overview page. +[IMPORTANT] +==== + * `AI_LICENCE_KEY` activates the AI service container. + * The `license_key` init option in `tinymce.init()` activates premium {productname} features in the self-hosted editor. + * These are different credentials from different sources — they are not interchangeable. See the xref:tinymceai-on-premises.adoc#_credentials[Credentials] section on the Overview page. +==== === Start MySQL and Redis From 3a0757a4fdb2d28ebcd776d0795a53f6b13a9017 Mon Sep 17 00:00:00 2001 From: Karl Kemister-Sheppard Date: Thu, 28 May 2026 13:20:44 +1000 Subject: [PATCH 48/48] Refine getting-started: clean up .env placeholders, remove licensekeymanager, expand port tip Remove angle brackets from remaining .env placeholders, drop licensekeymanager from plugins list, and convert the port-conflict tip to a block with a code snippet for easier copy-paste. --- .../tinymceai-on-premises-getting-started.adoc | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc index 882a1b4f5e..e2160ef8aa 100644 --- a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc +++ b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc @@ -254,14 +254,14 @@ The AI service isolates users into Environments. Each environment has its own ac . Click *Create Environment* and give it a name (for example "Development"). . Note the *Environment ID* displayed (a short identifier like `viOu8BnjJHb0HGK091p`). . Inside the environment, click *Create a new access key*. -. *Copy the Environment ID and Access Key*. The Management Panel shows the API Secret only once. +. *Copy the Environment ID and Access Key*. Update `.env` with the new values: [source,bash] ---- -AI_ENV_ID='' -AI_API_SECRET='' +AI_ENV_ID='paste-environment-id-here' +AI_API_SECRET='paste-api-secret-here' ---- [IMPORTANT] @@ -353,7 +353,7 @@ app.get('/', (req, res) => { tinymce.init({ selector: '#editor', license_key: 'your-license-key', - plugins: 'tinymceai licensekeymanager', + plugins: 'tinymceai', toolbar: 'undo redo | blocks | bold italic | tinymceai-chat tinymceai-review tinymceai-quickactions', height: 500, tinymceai_service_url: '${AI_SERVICE_URL}', @@ -388,7 +388,15 @@ The {pluginname} plugin must be present in the {productname} plugins directory b npm start ---- -TIP: If port 3000 is already in use from a previous run, stop the existing process first: `lsof -ti :3000 | xargs kill`. +[TIP] +==== +If port 3000 is already in use from a previous run, stop the existing process first: + +[source,bash] +---- +lsof -ti :3000 | xargs kill +---- +==== === Open the demo