diff --git a/.env.example b/.env.example index f392a5929..79b6d4517 100644 --- a/.env.example +++ b/.env.example @@ -19,6 +19,18 @@ DB_PASSWORD=change-me-strong-user-password # ⚠️ MySQL root 账号密码,仅用于容器内初始化。请改成与 DB_PASSWORD 不同的强密码。 DB_ROOT_PASSWORD=change-me-strong-root-password +# -------------------- PostgreSQL(可选,替代 MySQL)-------------------- +# 仅在用 PostgreSQL 部署时需要: +# docker compose -f docker-compose.yml -f docker-compose.pg.yml up -d +# 该叠加文件把 mateclaw-server 切到 postgres profile,MySQL 容器不启动。 +# 注意:即便不启动 MySQL,docker compose 仍会在解析阶段要求上面的 +# DB_PASSWORD / DB_ROOT_PASSWORD 有值(任意占位即可,MySQL 容器不会运行)。 +# 应用真正使用的库凭据来自下面这三项。 +PGSQL_DB_NAME=mateclaw +PGSQL_DB_USERNAME=postgres +# ⚠️ 必填且请改成强密码。docker-compose.pg.yml 会通过 ${PGSQL_DB_PASSWORD:?} 强制要求。 +PGSQL_DB_PASSWORD=change-me-strong-pg-password + # ==================== 安全(强烈建议覆盖) ==================== # JWT 签名密钥。若留空,服务器会用内置默认值并在启动日志里 WARN。 diff --git a/README.md b/README.md index 7693c2941..242b1e5e7 100644 --- a/README.md +++ b/README.md @@ -104,7 +104,7 @@ You hire coworkers, not chat boxes. Each one has a **Role**, a **Goal**, a **Bac Text-to-speech · Speech-to-text · Image · Music · Video · 3D. First-class, not add-ons. **Sidecar routing** (1.3.0+) means a text-only main model + an image attachment no longer dead-ends — a configured vision model describes the image, and the main model answers. **Image edit** lands too: refer to an earlier conversation attachment by `msg::` and ask the model to recolor or restyle it. Four **document-generation tools** (`DocxRenderTool` / `XlsxRenderTool` / `PptxRenderTool` / `PdfRenderTool`) render Markdown straight to Office files inside the JVM — no subprocess, no Office install. ### Enterprise-ready -RBAC + JWT. **Personal Access Tokens** for headless scripts and CI. **HMAC-SHA-256 outbound webhook signing**. **Distributed Cron lock** so multi-instance deployments don't double-fire. Full audit trail. Flyway-managed schema that auto-heals on upgrade. One JAR to ship. MySQL in production, H2 for dev — nothing to change in your code. +RBAC + JWT. **Personal Access Tokens** for headless scripts and CI. **HMAC-SHA-256 outbound webhook signing**. **Distributed Cron lock** so multi-instance deployments don't double-fire. Full audit trail. Flyway-managed schema that auto-heals on upgrade. One JAR to ship. MySQL, PostgreSQL, or KingbaseES in production, H2 for dev — nothing to change in your code. --- @@ -203,7 +203,7 @@ Desktop binaries ship via [GitHub Releases](https://github.com/mateaix/mateclaw/ | Digital Employee Runtime | StateGraph · ReAct + Plan-Execute · Role / Goal / Backstory · LESSONS self-evolution | | Orchestration | Workflow (7 step modes · Pebble DSL) · Triggers (6 pattern types · event governance) · Wiki Transformations (1.3.0+) | | Capability Extension | SKILL.md packages · MCP (stdio / SSE / HTTP · per-agent binding) · ACP bridge (Claude Code / Codex) | -| Database | H2 (dev) · MySQL 8.0+ (prod) | +| Database | H2 (dev) · MySQL 8.0+ / PostgreSQL 14+ / KingbaseES 8+ (prod) | | Auth | Spring Security + JWT | | Frontend | Vue 3 · TypeScript · Vite · Element Plus · TailwindCSS 4 | | Desktop | Electron · electron-updater · JRE 21 (bundled) | diff --git a/docker-compose.pg.yml b/docker-compose.pg.yml new file mode 100644 index 000000000..79bdb9214 --- /dev/null +++ b/docker-compose.pg.yml @@ -0,0 +1,70 @@ +# PostgreSQL override for docker-compose.yml. +# +# Usage: +# cp .env.example .env # set PGSQL_DB_PASSWORD (and the rest) +# docker compose -f docker-compose.yml -f docker-compose.pg.yml up -d +# +# The base docker-compose.yml runs the app on MySQL. This override adds a +# PostgreSQL service, re-points mateclaw-server at it, and keeps MySQL from +# starting — so the stack runs on PostgreSQL instead. +# +# Schema: the mateclaw schema is created by Flyway on startup +# (application-postgres.yml -> spring.flyway.init-sqls: CREATE SCHEMA IF NOT +# EXISTS mateclaw), so the postgres container needs no init script. + +services: + # Gate MySQL behind a profile so a plain `up` with this override does NOT + # start the MySQL container. Start it explicitly with `--profile mysql` only + # if you want both DBs side by side. + # + # Note: docker compose still interpolates the base mysql service's ${VAR:?} + # at config-load time even though the service won't start, so .env must still + # carry DB_ROOT_PASSWORD / DB_PASSWORD (any non-empty placeholder is fine for + # a PG-only deployment — the mysql container never runs). The PostgreSQL + # credentials the app actually uses come from PGSQL_DB_* below. + mysql: + profiles: ["mysql"] + + # PostgreSQL service (not present in the base compose file). + postgres: + image: postgres:16-alpine + container_name: mateclaw-postgres + restart: unless-stopped + environment: + POSTGRES_DB: ${PGSQL_DB_NAME:-mateclaw} + POSTGRES_USER: ${PGSQL_DB_USERNAME:-postgres} + POSTGRES_PASSWORD: ${PGSQL_DB_PASSWORD:?PGSQL_DB_PASSWORD is required in .env} + TZ: Asia/Shanghai + ports: + - "5432:5432" + volumes: + - postgres_data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${PGSQL_DB_USERNAME:-postgres} -d ${PGSQL_DB_NAME:-mateclaw}"] + interval: 10s + timeout: 5s + retries: 5 + + mateclaw-server: + depends_on: + # Drop the inherited mysql dependency (compose merges depends_on maps, so + # without !reset the base's `mysql: service_healthy` would survive and + # block startup since mysql no longer starts). Requires Compose v2.20+. + mysql: !reset null + postgres: + condition: service_healthy + searxng: + condition: service_healthy + environment: + SPRING_PROFILES_ACTIVE: postgres + DB_HOST: postgres + DB_PORT: 5432 + # application-postgres.yml reads DB_NAME / DB_USERNAME / DB_PASSWORD. + # Align them with the postgres service's PGSQL_DB_* values so a single set + # of .env values drives both the server and the container. + DB_NAME: ${PGSQL_DB_NAME:-mateclaw} + DB_USERNAME: ${PGSQL_DB_USERNAME:-postgres} + DB_PASSWORD: ${PGSQL_DB_PASSWORD:?PGSQL_DB_PASSWORD is required in .env} + +volumes: + postgres_data: diff --git a/mateclaw-server/pom.xml b/mateclaw-server/pom.xml index e04bb74fd..92965bd5a 100644 --- a/mateclaw-server/pom.xml +++ b/mateclaw-server/pom.xml @@ -382,6 +382,22 @@ test + + + org.testcontainers + junit-jupiter + test + + + org.testcontainers + postgresql + test + + .', 'builtin', 'pptxRenderTool', '🎞️', TRUE, TRUE, NOW(), NOW(), 0) +ON CONFLICT (id) DO UPDATE SET name=EXCLUDED.name, display_name=EXCLUDED.display_name, description=EXCLUDED.description, bean_name=EXCLUDED.bean_name, icon=EXCLUDED.icon, update_time=EXCLUDED.update_time; + +INSERT INTO mate_tool (id, name, display_name, description, tool_type, bean_name, icon, enabled, builtin, create_time, update_time, deleted) +VALUES (1000000022, 'PdfRenderTool', 'PDF Render', 'Render Markdown into a final-form .pdf and return a one-time download link. Two backends (LibreOffice subprocess preferred, OpenPDF + Flying Saucer fallback); supports YAML frontmatter for cover / page header / page footer.', 'builtin', 'pdfRenderTool', '📄', TRUE, TRUE, NOW(), NOW(), 0) +ON CONFLICT (id) DO UPDATE SET name=EXCLUDED.name, display_name=EXCLUDED.display_name, description=EXCLUDED.description, bean_name=EXCLUDED.bean_name, icon=EXCLUDED.icon, update_time=EXCLUDED.update_time; diff --git a/mateclaw-server/src/main/resources/db/migration/postgresql/V95__wiki_raw_material_cancel.sql b/mateclaw-server/src/main/resources/db/migration/postgresql/V95__wiki_raw_material_cancel.sql new file mode 100644 index 000000000..4e2a1854c --- /dev/null +++ b/mateclaw-server/src/main/resources/db/migration/postgresql/V95__wiki_raw_material_cancel.sql @@ -0,0 +1,15 @@ +-- V95: cancellation flag for in-progress wiki raw material processing. +-- Lets the user request a stop on a long-running PDF analysis (e.g. when +-- the embedding model has run out of credits) without having to delete +-- the raw material. The processing pipeline checks the flag at its +-- existing abort checkpoints and bails out with a 'cancelled' status. +-- MySQL lacks ADD COLUMN IF NOT EXISTS; use INFORMATION_SCHEMA guard instead. +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = 'mate_wiki_raw_material' AND column_name = 'cancel_requested' + ) THEN + ALTER TABLE mate_wiki_raw_material ADD COLUMN cancel_requested BOOLEAN NOT NULL DEFAULT FALSE; + END IF; +END $$; diff --git a/mateclaw-server/src/main/resources/db/migration/postgresql/V96__workflow_foundations.sql b/mateclaw-server/src/main/resources/db/migration/postgresql/V96__workflow_foundations.sql new file mode 100644 index 000000000..165c757a2 --- /dev/null +++ b/mateclaw-server/src/main/resources/db/migration/postgresql/V96__workflow_foundations.sql @@ -0,0 +1,154 @@ +-- V96: Foundational schema for the workflow runtime. +-- Eight tables establish workflow identity (workflow + immutable revisions), +-- run state (run + per-step rows + durable pause rows for await_approval), +-- payload URI storage with inline / filesystem fallback, and trigger +-- definitions paired with a dedup-window table for envelope-based event +-- governance. CREATE TABLE IF NOT EXISTS is itself idempotent on MySQL. + +-- 1. Stable workflow identity + draft (1:1 with workflow row). +CREATE TABLE IF NOT EXISTS mate_workflow ( + id BIGINT NOT NULL PRIMARY KEY, + workspace_id BIGINT NOT NULL, + name VARCHAR(128) NOT NULL, + description VARCHAR(1024), + enabled BOOLEAN NOT NULL DEFAULT TRUE, + draft_json JSONB, + draft_schema_version VARCHAR(8), + draft_updated_by BIGINT, + draft_updated_at TIMESTAMP(3), + latest_revision_id BIGINT, + created_by BIGINT, + create_time TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + update_time TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP , + deleted INT NOT NULL DEFAULT 0 +); +CREATE UNIQUE INDEX IF NOT EXISTS uk_workflow_workspace_name ON mate_workflow (workspace_id, name, deleted); + +-- 2. Immutable published revisions; integer revision is monotonic per workflow. +CREATE TABLE IF NOT EXISTS mate_workflow_revision ( + id BIGINT NOT NULL PRIMARY KEY, + workflow_id BIGINT NOT NULL, + revision INT NOT NULL, + graph_json JSONB NOT NULL DEFAULT '{}'::jsonb, + schema_version VARCHAR(8) NOT NULL, + published_note VARCHAR(512), + published_by BIGINT, + create_time TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP +); +CREATE UNIQUE INDEX IF NOT EXISTS uk_workflow_revision ON mate_workflow_revision (workflow_id, revision); + +-- 3. Workflow run instance; payload bodies live behind URIs in mate_workflow_payload. +CREATE TABLE IF NOT EXISTS mate_workflow_run ( + id BIGINT NOT NULL PRIMARY KEY, + workflow_id BIGINT NOT NULL, + revision_id BIGINT NOT NULL, + workspace_id BIGINT NOT NULL, + state VARCHAR(16) NOT NULL, + triggered_by VARCHAR(32), + triggered_meta TEXT, + initial_input_ref VARCHAR(256), + final_output_ref VARCHAR(256), + error_message VARCHAR(2048), + started_at TIMESTAMP(3), + completed_at TIMESTAMP(3), + create_time TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + deleted INT NOT NULL DEFAULT 0 +); +CREATE INDEX IF NOT EXISTS idx_workflow_run_started ON mate_workflow_run (workflow_id, started_at); + +-- 4. Per-step run row; iteration_index reserved for fan_out (and future loop). +CREATE TABLE IF NOT EXISTS mate_workflow_run_step ( + id BIGINT NOT NULL PRIMARY KEY, + run_id BIGINT NOT NULL, + step_index INT NOT NULL, + iteration_index INT, + step_name VARCHAR(128), + agent_id BIGINT, + state VARCHAR(16), + input_ref VARCHAR(256), + output_ref VARCHAR(256), + output_summary VARCHAR(512), + output_content_type VARCHAR(64), + error_message VARCHAR(2048), + duration_ms BIGINT, + token_input INT, + token_output INT, + started_at TIMESTAMP(3), + completed_at TIMESTAMP(3) +); +CREATE INDEX IF NOT EXISTS idx_workflow_run_step ON mate_workflow_run_step (run_id, step_index, iteration_index); + +-- 5. Durable pause rows so await_approval can resume across restarts. +CREATE TABLE IF NOT EXISTS mate_workflow_run_pause ( + id BIGINT NOT NULL PRIMARY KEY, + run_id BIGINT NOT NULL, + step_id BIGINT NOT NULL, + pause_kind VARCHAR(32) NOT NULL, + pause_token VARCHAR(128) NOT NULL, + external_approval_id BIGINT, + paused_at TIMESTAMP(3) NOT NULL, + resume_deadline TIMESTAMP(3), + resume_payload_ref VARCHAR(256), + resumed_at TIMESTAMP(3), + resume_outcome VARCHAR(32) +); +CREATE UNIQUE INDEX IF NOT EXISTS uk_workflow_pause_run_step ON mate_workflow_run_pause (run_id, step_id); +CREATE UNIQUE INDEX IF NOT EXISTS uk_workflow_pause_token ON mate_workflow_run_pause (pause_token); +CREATE INDEX IF NOT EXISTS idx_workflow_pause_external_approval ON mate_workflow_run_pause (external_approval_id); +CREATE INDEX IF NOT EXISTS idx_workflow_pause_open_deadline ON mate_workflow_run_pause (resumed_at, resume_deadline); + +-- 6. Payload URI storage. Inline BYTEA for < 256KB; storage_kind=fs/s3/oss +-- carries the external object key in storage_ref. +CREATE TABLE IF NOT EXISTS mate_workflow_payload ( + id BIGINT NOT NULL PRIMARY KEY, + payload_uri VARCHAR(256) NOT NULL, + workspace_id BIGINT NOT NULL, + content_bytes BYTEA, + storage_kind VARCHAR(16) NOT NULL, + storage_ref VARCHAR(512), + content_type VARCHAR(64), + sha256 CHAR(64), + size_bytes BIGINT, + created_at TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP +); +CREATE UNIQUE INDEX IF NOT EXISTS uk_workflow_payload_uri ON mate_workflow_payload (payload_uri); +CREATE INDEX IF NOT EXISTS idx_workflow_payload_workspace_created ON mate_workflow_payload (workspace_id, created_at); + +-- 7. Trigger definitions. pattern_version is a lamport counter that fire +-- callbacks compare against on every fire to detect that another instance +-- has updated the cron expression and self-cancel the local schedule. +CREATE TABLE IF NOT EXISTS mate_trigger ( + id BIGINT NOT NULL PRIMARY KEY, + workspace_id BIGINT NOT NULL, + name VARCHAR(128), + pattern_type VARCHAR(32) NOT NULL, + pattern_json JSONB NOT NULL DEFAULT '{}'::jsonb, + target_type VARCHAR(16) NOT NULL, + target_id BIGINT NOT NULL, + payload_template TEXT, + rate_limit_per_min INT NOT NULL DEFAULT 60, + dedup_window_secs INT NOT NULL DEFAULT 60, + bot_self_filter BOOLEAN NOT NULL DEFAULT TRUE, + enabled BOOLEAN NOT NULL DEFAULT TRUE, + fire_count BIGINT NOT NULL DEFAULT 0, + max_fires BIGINT NOT NULL DEFAULT 0, + last_fired_at TIMESTAMP(3), + pattern_version BIGINT NOT NULL DEFAULT 1, + create_time TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + update_time TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP , + deleted INT NOT NULL DEFAULT 0 +); +CREATE INDEX IF NOT EXISTS idx_trigger_workspace_enabled ON mate_trigger (workspace_id, enabled, deleted); +CREATE INDEX IF NOT EXISTS idx_trigger_target ON mate_trigger (target_type, target_id); + +-- 8. Event dedup window. dedup_key is envelope.eventId, falling back to +-- sourceHash when the upstream channel did not provide a stable id. +CREATE TABLE IF NOT EXISTS mate_trigger_event ( + id BIGINT NOT NULL PRIMARY KEY, + trigger_id BIGINT NOT NULL, + dedup_key VARCHAR(128) NOT NULL, + received_at TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + expires_at TIMESTAMP(3) NOT NULL +); +CREATE UNIQUE INDEX IF NOT EXISTS uk_trigger_dedup ON mate_trigger_event (trigger_id, dedup_key); +CREATE INDEX IF NOT EXISTS idx_trigger_event_expires ON mate_trigger_event (expires_at); diff --git a/mateclaw-server/src/main/resources/db/migration/postgresql/V97__workflow_purge_tombstones.sql b/mateclaw-server/src/main/resources/db/migration/postgresql/V97__workflow_purge_tombstones.sql new file mode 100644 index 000000000..8b3a9125e --- /dev/null +++ b/mateclaw-server/src/main/resources/db/migration/postgresql/V97__workflow_purge_tombstones.sql @@ -0,0 +1,9 @@ +-- See the matching H2 file for context. The workflow / trigger entities +-- moved off @TableLogic to align with the project's hard-delete convention; +-- this migration drops any tombstones the old soft-delete path persisted so +-- list endpoints don't expose them after the annotation-driven filter is +-- removed. + +DELETE FROM mate_workflow WHERE deleted <> 0; +DELETE FROM mate_workflow_run WHERE deleted <> 0; +DELETE FROM mate_trigger WHERE deleted <> 0; diff --git a/mateclaw-server/src/main/resources/db/migration/postgresql/V98__trigger_last_error.sql b/mateclaw-server/src/main/resources/db/migration/postgresql/V98__trigger_last_error.sql new file mode 100644 index 000000000..dd98b00d2 --- /dev/null +++ b/mateclaw-server/src/main/resources/db/migration/postgresql/V98__trigger_last_error.sql @@ -0,0 +1,5 @@ +-- See the H2 file for context. KingbaseES (PostgreSQL) supports +-- ADD COLUMN IF NOT EXISTS natively. + +ALTER TABLE mate_trigger ADD COLUMN IF NOT EXISTS last_error VARCHAR(2048); +ALTER TABLE mate_trigger ADD COLUMN IF NOT EXISTS last_dispatched_at TIMESTAMP NULL; diff --git a/mateclaw-server/src/main/resources/db/migration/postgresql/V99__dashscope_compat_provider.sql b/mateclaw-server/src/main/resources/db/migration/postgresql/V99__dashscope_compat_provider.sql new file mode 100644 index 000000000..be99a04e2 --- /dev/null +++ b/mateclaw-server/src/main/resources/db/migration/postgresql/V99__dashscope_compat_provider.sql @@ -0,0 +1,39 @@ +-- V99: register a DashScope OpenAI-compatible provider entry alongside the +-- existing native dashscope provider, plus the dot-versioned Qwen families +-- (qwen3.5-*, qwen3.6-*) that only ship on compatible-mode/v1. +-- +-- See the H2 copy for full background. The MySQL copy uses INSERT ... ON +-- DUPLICATE KEY UPDATE; the api_key column is intentionally omitted from the +-- update list so existing deployments that have already configured a key keep +-- it (this only matters if a future migration re-applies a similar block; +-- Flyway runs each version once today). + +-- -- Provider -------------------------------------------------------------- +INSERT INTO mate_model_provider (provider_id, name, api_key_prefix, chat_model, api_key, base_url, generate_kwargs, is_custom, is_local, support_model_discovery, support_connection_check, freeze_url, require_api_key, create_time, update_time) +VALUES ('dashscope-compat', 'DashScope (兼容模式)', 'sk-', 'OpenAIChatModel', '', 'https://dashscope.aliyuncs.com/compatible-mode/v1', '{}', FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, NOW(), NOW()) +ON CONFLICT (provider_id) DO UPDATE SET name = EXCLUDED.name, + api_key_prefix = EXCLUDED.api_key_prefix, + chat_model = EXCLUDED.chat_model, + base_url = EXCLUDED.base_url, + generate_kwargs = EXCLUDED.generate_kwargs, + support_model_discovery = EXCLUDED.support_model_discovery, + support_connection_check = EXCLUDED.support_connection_check, + freeze_url = EXCLUDED.freeze_url, + require_api_key = EXCLUDED.require_api_key, + update_time = EXCLUDED.update_time; + +-- -- Model catalog --------------------------------------------------------- +-- Only seed the variants that are publicly callable on compatible-mode. The +-- -max / -vl-max variants exist in the marketplace but return 404 for general +-- accounts; users with whitelist access can add them via Settings → Models. +INSERT INTO mate_model_config (id, name, provider, model_name, description, temperature, max_tokens, top_p, builtin, enabled, is_default, create_time, update_time, deleted) +VALUES + (1000000601, 'Qwen3.6 Plus', 'dashscope-compat', 'qwen3.6-plus', '通义千问 3.6 Plus 旗舰,平衡推理与速度(兼容模式专属)', 0.7, 4096, 0.8, TRUE, TRUE, FALSE, NOW(), NOW(), 0), + (1000000603, 'Qwen3.5 Plus', 'dashscope-compat', 'qwen3.5-plus', '通义千问 3.5 Plus(兼容模式专属)', 0.7, 4096, 0.8, TRUE, TRUE, FALSE, NOW(), NOW(), 0), + (1000000605, 'Qwen3 VL Plus', 'dashscope-compat', 'qwen3-vl-plus', '通义千问 3 视觉理解 Plus,支持图像、视频输入(兼容模式专属)', 0.7, 4096, 0.8, TRUE, TRUE, FALSE, NOW(), NOW(), 0) +ON CONFLICT (id) DO UPDATE SET name = EXCLUDED.name, + model_name = EXCLUDED.model_name, + description = EXCLUDED.description, + builtin = EXCLUDED.builtin, + enabled = EXCLUDED.enabled, + update_time = EXCLUDED.update_time; diff --git a/mateclaw-server/src/main/resources/db/migration/postgresql/V9__usage_cache_tokens.sql b/mateclaw-server/src/main/resources/db/migration/postgresql/V9__usage_cache_tokens.sql new file mode 100644 index 000000000..ae616828c --- /dev/null +++ b/mateclaw-server/src/main/resources/db/migration/postgresql/V9__usage_cache_tokens.sql @@ -0,0 +1,24 @@ +-- V9: Track Anthropic prompt cache token usage +-- RFC-014 Change 4: per-call cache_creation_input_tokens / cache_read_input_tokens +-- accumulated daily so the dashboard can show cache hit rate and cost savings. +-- (was originally numbered V8 but collided with V8__wiki_raw_progress.sql; renumbered to V9.) +-- MySQL lacks ADD COLUMN IF NOT EXISTS; use INFORMATION_SCHEMA guard instead. +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = 'mate_usage_daily' AND column_name = 'cache_read_tokens' + ) THEN + ALTER TABLE mate_usage_daily ADD COLUMN cache_read_tokens BIGINT DEFAULT 0; + END IF; +END $$; + +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = 'mate_usage_daily' AND column_name = 'cache_write_tokens' + ) THEN + ALTER TABLE mate_usage_daily ADD COLUMN cache_write_tokens BIGINT DEFAULT 0; + END IF; +END $$; diff --git a/mateclaw-server/src/main/resources/docs/en/config.md b/mateclaw-server/src/main/resources/docs/en/config.md index 067d622cc..8944049df 100644 --- a/mateclaw-server/src/main/resources/docs/en/config.md +++ b/mateclaw-server/src/main/resources/docs/en/config.md @@ -14,8 +14,10 @@ Deep-dive topics have their own pages — Tool Guard rules in [Security & Approv |---------|----------|--------------| | `default` | H2 file at `./data/mateclaw` | No action needed | | `mysql` | MySQL 8.0+ | `spring.profiles.active=mysql` or `SPRING_PROFILES_ACTIVE=mysql` | +| `postgres` | PostgreSQL 14+ | `spring.profiles.active=postgres` or `SPRING_PROFILES_ACTIVE=postgres` | +| `kingbase` | KingbaseES 8+ | `spring.profiles.active=kingbase` (needs `-Pkingbase` build) | -Docker deployments activate `mysql` automatically. Desktop builds use `default`. +Docker deployments activate `mysql` by default; for PostgreSQL layer in `docker-compose.pg.yml` (see [PostgreSQL deployment](./database-postgresql)). Desktop builds use `default`. --- @@ -57,6 +59,27 @@ spring: driver-class-name: com.mysql.cj.jdbc.Driver ``` +### Database — PostgreSQL (production) + +```yaml +spring: + profiles: + active: postgres + datasource: + # stringtype=unspecified is REQUIRED: JSON columns are JSONB on PG; without + # it a String write fails with "column is of type jsonb but expression is of + # type character varying". + url: jdbc:postgresql://localhost:5432/mateclaw?currentSchema=mateclaw&stringtype=unspecified + username: ${DB_USERNAME:postgres} + password: ${DB_PASSWORD} + driver-class-name: org.postgresql.Driver +``` + +PostgreSQL uses its own migration tree `db/migration/postgresql` (JSON columns +as JSONB). For the full guide — JSONB design, backups, upgrade path from the +kingbase tree — see [PostgreSQL deployment](./database-postgresql). KingbaseES +shares PG's dialect; use the `kingbase` profile with a `-Pkingbase` build. + ### AI model — managed in the UI, not YAML ::: tip @@ -261,9 +284,12 @@ MateClaw uses **Flyway** for schema migrations: 1. `db/migration/h2/V*__*.sql` — H2-dialect migration scripts 2. `db/migration/mysql/V*__*.sql` — MySQL-dialect migration scripts -3. After migrations, seed data is loaded from `db/data-*.sql` — idempotent +3. `db/migration/kingbase/V*__*.sql` — KingbaseES (PostgreSQL-family) dialect +4. `db/migration/postgresql/V*__*.sql` — PostgreSQL dialect (forked from kingbase; JSON columns as JSONB) +5. After migrations, seed data is loaded from `db/data-*.sql` — idempotent -Flyway auto-selects the correct dialect path based on the active Spring profile. Every startup runs a `repair` before `migrate`, self-healing checksum drift and partially-failed migrations (especially important for desktop users upgrading offline). +Flyway auto-selects the correct dialect path based on the active Spring profile. +New migrations must be added to **all four** trees with the same version number. Every startup runs a `repair` before `migrate`, self-healing checksum drift and partially-failed migrations (especially important for desktop users upgrading offline). ### Table conventions @@ -294,6 +320,19 @@ export MYSQL_ROOT_PASSWORD=your-password mvn spring-boot:run ``` +### Switching to PostgreSQL + +```bash +export SPRING_PROFILES_ACTIVE=postgres +export DB_HOST=localhost DB_PORT=5432 DB_NAME=mateclaw +export DB_USERNAME=postgres DB_PASSWORD=your-password +mvn spring-boot:run +``` + +Flyway creates the `mateclaw` schema automatically. See [PostgreSQL +deployment](./database-postgresql) for the JSONB notes and the +`stringtype=unspecified` requirement. + --- ## Runtime settings (`mate_system_setting`) diff --git a/mateclaw-server/src/main/resources/docs/en/database-postgresql.md b/mateclaw-server/src/main/resources/docs/en/database-postgresql.md new file mode 100644 index 000000000..4ce53dc84 --- /dev/null +++ b/mateclaw-server/src/main/resources/docs/en/database-postgresql.md @@ -0,0 +1,134 @@ +# PostgreSQL deployment + +MateClaw supports PostgreSQL as a **first-class** target: its own migration +tree, JSONB columns, and real-database integration tests. This page covers +running production on PostgreSQL and how it differs from MySQL. + +> Supported: **PostgreSQL 14+**. KingbaseES is PostgreSQL-family — use the +> `kingbase` profile; same dialect but JSON columns stay `TEXT`. + +## Quick start (Docker) + +```bash +cp .env.example .env +# Edit .env: set at least PGSQL_DB_PASSWORD (a strong password). +# Note: docker compose still interpolates the base mysql service's required +# vars at config time, so DB_PASSWORD / DB_ROOT_PASSWORD must have any +# non-empty placeholder value (the mysql container never starts). + +docker compose -f docker-compose.yml -f docker-compose.pg.yml up -d +``` + +The override `docker-compose.pg.yml` does three things: switches +`mateclaw-server` to the `postgres` profile, points it at the `postgres` +container, and gates the MySQL container behind a `mysql` profile so it never +starts. + +## Manual deployment (non-Docker) + +Activate the `postgres` profile and supply the connection parameters: + +```bash +java -jar mateclaw-server.jar \ + --spring.profiles.active=postgres \ + --DB_HOST=your-pg-host --DB_PORT=5432 \ + --DB_NAME=mateclaw --DB_USERNAME=postgres --DB_PASSWORD=... +``` + +Key connection-string params (see `application-postgres.yml`): + +``` +jdbc:postgresql://HOST:PORT/DB?currentSchema=mateclaw&stringtype=unspecified +``` + +- **`currentSchema=mateclaw`** — all tables live in the `mateclaw` schema (not + `public`). Flyway runs `CREATE SCHEMA IF NOT EXISTS mateclaw` on startup, and + HikariCP runs `SET search_path TO mateclaw` per connection as a backstop. +- **`stringtype=unspecified`** — **required**. See the JSONB notes below. + +## JSONB design + +The PostgreSQL migration tree (`db/migration/postgresql`) is forked from the +KingbaseES tree and upgrades ~40 high-frequency JSON columns from `TEXT` to +**`JSONB`** (`config_json` / `headers_json` / `settings_json` / +`delivery_config` / …). Payoff: **the database validates JSON well-formedness at +write time** — malformed JSON is rejected. + +### Why `stringtype=unspecified` is required + +MyBatis / JDBC bind JSON column values as `java.lang.String` via `setString`, +which PostgreSQL types as `varchar`. Writing a `varchar` into a `jsonb` column +fails: + +``` +ERROR: column "xxx" is of type jsonb but expression is of type character varying +``` + +`stringtype=unspecified` makes the driver send String params as `unknown`, so +PostgreSQL coerces them into jsonb (and validates). This covers both write +paths: `JacksonTypeHandler` (`CronJobEntity.deliveryConfig`) and plain String +JSON columns. + +### Columns deliberately kept TEXT + +| Column | Table | Reason | +|---|---|---| +| `params_schema` | `mate_tool` | Arbitrary JSON-Schema text | +| `output_schema` | `mate_wiki_transformation` | Same | +| `metadata` | `mate_message` | Frequently-truncated half-structured blob | + +### JSON queries and indexes + +No code queries JSON fields today, so **no GIN index is created**. When you need +to filter by a JSON key, add one: + +```sql +CREATE INDEX idx_xxx_gin ON mateclaw.your_table USING GIN (your_col jsonb_path_ops); +``` + +JSONB columns are queryable with `->`/`->>`/`@>`, e.g. +`SELECT delivery_config ->> 'targetId' FROM mate_cron_job`. + +## Differences from MySQL + +| Aspect | MySQL | PostgreSQL | +|---|---|---| +| Migration tree | `db/migration/mysql` | `db/migration/postgresql` | +| upsert | `ON DUPLICATE KEY UPDATE` | `ON CONFLICT ... DO UPDATE` | +| Primary key | Snowflake (app-side; same) | Snowflake (same) | +| Logical-delete flag | `INT` | `SMALLINT` (maps to Java `Integer`) | +| JSON storage | `JSON` / `TEXT` | `JSONB` | +| schema | database == schema | dedicated `mateclaw` schema | + +## Backups + +```bash +# Back up the mateclaw schema only +pg_dump -h HOST -U postgres -n mateclaw -Fc mateclaw > mateclaw.dump + +# Restore +pg_restore -h HOST -U postgres -d mateclaw --clean mateclaw.dump +``` + +## Upgrading from the "parasitic kingbase tree" + +Earlier versions ran PostgreSQL on the `db/migration/kingbase` tree. PostgreSQL +now has its own `db/migration/postgresql` tree. Because the two trees are +byte-identical at fork time and Flyway keys on version+checksum (not path), +**switching locations is transparent for existing PG deployments** — nothing +re-runs, nothing conflicts. The JSONB upgrade only affects column types on a +**freshly created** database; an existing TEXT-based database is not auto-ALTERed +(deploy a new database, or migrate the column types manually, to gain JSONB). + +## Verification + +The PG migration tree is guarded by Testcontainers integration tests (needs +Docker locally): + +```bash +mvn -Dtest='PostgresMigrationSmokeTest,CronJobDeliveryConfigPgTest' test +``` + +Coverage: every migration applies on a real PG server, JSONB columns have the +right physical type, the JacksonTypeHandler round-trips, and malformed JSON is +rejected. On machines without Docker these tests are skipped (not failed). diff --git a/mateclaw-server/src/main/resources/docs/en/docker-deploy.md b/mateclaw-server/src/main/resources/docs/en/docker-deploy.md index d70703a22..0835ce6c5 100644 --- a/mateclaw-server/src/main/resources/docs/en/docker-deploy.md +++ b/mateclaw-server/src/main/resources/docs/en/docker-deploy.md @@ -29,6 +29,18 @@ This page covers **requirements, steps, verification, and common gotchas**. For | `searxng` | Built from `./docker/searxng/` | Keyless search fallback | `8088` | | `mateclaw-server` | Built from `mateclaw-server/Dockerfile` | Spring Boot backend + embedded browser | `18080` | +### Using PostgreSQL instead of MySQL + +The compose file already ships a `postgres` service (unused by +`mateclaw-server` by default). Layer in `docker-compose.pg.yml` to switch the +backend to PostgreSQL; the MySQL container won't start: + +```bash +docker compose -f docker-compose.yml -f docker-compose.pg.yml up -d +``` + +See [PostgreSQL deployment](./database-postgresql) for details. + --- ## SearXNG search service diff --git a/mateclaw-server/src/main/resources/docs/zh/config.md b/mateclaw-server/src/main/resources/docs/zh/config.md index e7a43d185..1eeccc2c4 100644 --- a/mateclaw-server/src/main/resources/docs/zh/config.md +++ b/mateclaw-server/src/main/resources/docs/zh/config.md @@ -14,8 +14,10 @@ |---------|--------|----------| | `default` | H2 文件 `./data/mateclaw` | 不用做什么 | | `mysql` | MySQL 8.0+ | `spring.profiles.active=mysql` 或环境变量 | +| `postgres` | PostgreSQL 14+ | `spring.profiles.active=postgres` 或环境变量 | +| `kingbase` | KingbaseES 8+(人大金仓) | `spring.profiles.active=kingbase`(需 `-Pkingbase` 构建) | -Docker 部署自动激活 `mysql`。桌面版用 `default`。 +Docker 部署默认激活 `mysql`;用 PostgreSQL 叠加 `docker-compose.pg.yml`(见 [PostgreSQL 部署](./database-postgresql))。桌面版用 `default`。 --- @@ -57,6 +59,23 @@ spring: driver-class-name: com.mysql.cj.jdbc.Driver ``` +### 数据库 —— PostgreSQL(生产) + +```yaml +spring: + profiles: + active: postgres + datasource: + # stringtype=unspecified 必填:JSON 列在 PG 上是 JSONB,没有它 String 写入会报 + # "column is of type jsonb but expression is of type character varying" + url: jdbc:postgresql://localhost:5432/mateclaw?currentSchema=mateclaw&stringtype=unspecified + username: ${DB_USERNAME:postgres} + password: ${DB_PASSWORD} + driver-class-name: org.postgresql.Driver +``` + +PostgreSQL 用独立迁移树 `db/migration/postgresql`(JSON 列为 JSONB)。完整说明、JSONB 设计、备份、从 kingbase 树升级路径见 [PostgreSQL 部署](./database-postgresql)。KingbaseES 与 PG 同源,用 `kingbase` profile + `-Pkingbase` 构建。 + ### AI 模型 —— 在 UI 里管,不在 YAML 里 ::: tip @@ -261,9 +280,11 @@ MateClaw 用 **Flyway** 管理 schema 迁移: 1. `db/migration/h2/V*__*.sql`——H2 方言的迁移脚本 2. `db/migration/mysql/V*__*.sql`——MySQL 方言的迁移脚本 -3. 迁移完成后加载种子数据(`db/data-*.sql`),幂等执行 +3. `db/migration/kingbase/V*__*.sql`——KingbaseES(PG 同源)方言 +4. `db/migration/postgresql/V*__*.sql`——PostgreSQL 方言(fork 自 kingbase;JSON 列为 JSONB) +5. 迁移完成后加载种子数据(`db/data-*.sql`),幂等执行 -启动时 Flyway 根据 active profile 自动选择正确的方言路径。每次启动时先做一次 `repair`,再 `migrate`——checksum 变更和部分失败的迁移自动修复(对桌面端离线升级用户尤其重要)。 +启动时 Flyway 根据 active profile 自动选择正确的方言路径。新增迁移必须**四套同步**、版本号一致。每次启动时先做一次 `repair`,再 `migrate`——checksum 变更和部分失败的迁移自动修复(对桌面端离线升级用户尤其重要)。 ### 表约定 diff --git a/mateclaw-server/src/main/resources/docs/zh/database-postgresql.md b/mateclaw-server/src/main/resources/docs/zh/database-postgresql.md new file mode 100644 index 000000000..66b9fac3a --- /dev/null +++ b/mateclaw-server/src/main/resources/docs/zh/database-postgresql.md @@ -0,0 +1,107 @@ +# PostgreSQL 部署 + +MateClaw 把 PostgreSQL 作为**一等公民**支持:独立迁移树、JSONB 列、真实数据库集成测试。本页讲怎么用 PostgreSQL 跑生产,以及它和 MySQL 的差异。 + +> 支持版本:**PostgreSQL 14+**。KingbaseES(人大金仓)与 PG 同源,用 `kingbase` profile,方言一致但 JSON 列保持 `TEXT`。 + +## 快速开始(Docker) + +```bash +cp .env.example .env +# 编辑 .env:至少设置 PGSQL_DB_PASSWORD(强密码) +# 注意:即便不用 MySQL,docker compose 解析阶段仍要求 DB_PASSWORD / DB_ROOT_PASSWORD +# 有值(任意占位即可,MySQL 容器不会启动) + +docker compose -f docker-compose.yml -f docker-compose.pg.yml up -d +``` + +叠加文件 `docker-compose.pg.yml` 做三件事:把 `mateclaw-server` 切到 `postgres` profile、指向 `postgres` 容器、并把 MySQL 容器挡在 `mysql` profile 后面不启动。 + +## 手动部署(非 Docker) + +激活 `postgres` profile,配好连接参数: + +```bash +java -jar mateclaw-server.jar \ + --spring.profiles.active=postgres \ + --DB_HOST=your-pg-host --DB_PORT=5432 \ + --DB_NAME=mateclaw --DB_USERNAME=postgres --DB_PASSWORD=... +``` + +连接串关键参数(见 `application-postgres.yml`): + +``` +jdbc:postgresql://HOST:PORT/DB?currentSchema=mateclaw&stringtype=unspecified +``` + +- **`currentSchema=mateclaw`** —— MateClaw 所有表建在 `mateclaw` schema(不是 `public`)。Flyway 启动时自动 `CREATE SCHEMA IF NOT EXISTS mateclaw`,HikariCP 每个连接再 `SET search_path TO mateclaw` 兜底。 +- **`stringtype=unspecified`** —— **必填**。见下方 JSONB 说明。 + +## JSONB 设计 + +PostgreSQL 迁移树(`db/migration/postgresql`)是从 KingbaseES 树 fork 出来的,把约 40 个高频 JSON 列从 `TEXT` 升级为 **`JSONB`**(`config_json` / `headers_json` / `settings_json` / `delivery_config` / …)。收益:**写入时数据库强制校验 JSON 合法性**,非法 JSON 直接被拒。 + +### 为什么必须 `stringtype=unspecified` + +MyBatis / JDBC 把 JSON 列的值按 `java.lang.String` 用 `setString` 绑定,PG 默认把它当 `varchar`。往 `jsonb` 列写 `varchar` 会报: + +``` +ERROR: column "xxx" is of type jsonb but expression is of type character varying +``` + +`stringtype=unspecified` 让驱动把 String 参数标为 `unknown`,PG 自动 cast 成 jsonb 并校验。这覆盖了两条写入路径:`JacksonTypeHandler`(`CronJobEntity.deliveryConfig`)和普通 String JSON 列。 + +### 哪些列没转 JSONB + +刻意保持 `TEXT`: + +| 列 | 表 | 原因 | +|---|---|---| +| `params_schema` | `mate_tool` | 任意 JSON Schema 文本 | +| `output_schema` | `mate_wiki_transformation` | 同上 | +| `metadata` | `mate_message` | 高频截断的半结构化 blob | + +### JSON 查询与索引 + +目前代码不按 JSON 字段查询,所以**没有建 GIN 索引**。未来需要按 JSON 键过滤时,再加: + +```sql +CREATE INDEX idx_xxx_gin ON mateclaw.your_table USING GIN (your_col jsonb_path_ops); +``` + +JSONB 列可直接用 `->`/`->>`/`@>` 查询,例如 `SELECT delivery_config ->> 'targetId' FROM mate_cron_job`。 + +## 与 MySQL 的差异 + +| 方面 | MySQL | PostgreSQL | +|---|---|---| +| 迁移树 | `db/migration/mysql` | `db/migration/postgresql` | +| upsert | `ON DUPLICATE KEY UPDATE` | `ON CONFLICT ... DO UPDATE` | +| 主键 | 雪花(应用层生成,两者相同) | 雪花(相同) | +| 逻辑删除标记 | `INT` | `SMALLINT`(对应 Java `Integer`) | +| JSON 存储 | `JSON` / `TEXT` | `JSONB` | +| schema | 库即 schema | 独立 `mateclaw` schema | + +## 备份 + +```bash +# 备份(只导 mateclaw schema) +pg_dump -h HOST -U postgres -n mateclaw -Fc mateclaw > mateclaw.dump + +# 恢复 +pg_restore -h HOST -U postgres -d mateclaw --clean mateclaw.dump +``` + +## 从「寄生 kingbase 树」升级 + +早期版本的 PostgreSQL 复用 `db/migration/kingbase` 迁移树。现在 PG 有独立的 `db/migration/postgresql` 树。由于两棵树 fork 时字节一致、Flyway 按 version+checksum(而非路径)判定,**已有 PG 部署切换 location 是透明的**——不会重跑、不会冲突。JSONB 升级只影响**新建库**的列类型;已用 TEXT 跑起来的老库不会被自动 ALTER(如需享受 JSONB,请在新库部署或手动迁移列类型)。 + +## 验证 + +PG 迁移树由 Testcontainers 集成测试守护(需本机有 Docker): + +```bash +mvn -Dtest='PostgresMigrationSmokeTest,CronJobDeliveryConfigPgTest' test +``` + +覆盖:全部迁移在真实 PG 上跑通、JSONB 列物理类型正确、JacksonTypeHandler 读写往返、非法 JSON 被拒。无 Docker 的机器上这些测试自动跳过(不失败)。 diff --git a/mateclaw-server/src/main/resources/docs/zh/docker-deploy.md b/mateclaw-server/src/main/resources/docs/zh/docker-deploy.md index 8ed18af99..732f4cbac 100644 --- a/mateclaw-server/src/main/resources/docs/zh/docker-deploy.md +++ b/mateclaw-server/src/main/resources/docs/zh/docker-deploy.md @@ -29,6 +29,16 @@ | `searxng` | 本地构建 `./docker/searxng/` | 无 API Key 搜索兜底 | `8088` | | `mateclaw-server` | 本地构建 `mateclaw-server/Dockerfile` | Spring Boot 后端 + 内置浏览器 | `18080` | +### 用 PostgreSQL 替代 MySQL + +`compose` 文件已内置 `postgres` 服务(默认不被 `mateclaw-server` 使用)。叠加 `docker-compose.pg.yml` 即可把后端切到 PostgreSQL,MySQL 容器不启动: + +```bash +docker compose -f docker-compose.yml -f docker-compose.pg.yml up -d +``` + +详见 [PostgreSQL 部署](./database-postgresql)。 + --- ## SearXNG 搜索服务 diff --git a/mateclaw-server/src/test/java/vip/mate/skill/service/SkillFileServiceTest.java b/mateclaw-server/src/test/java/vip/mate/skill/service/SkillFileServiceTest.java index d9c0389d2..3681e57e4 100644 --- a/mateclaw-server/src/test/java/vip/mate/skill/service/SkillFileServiceTest.java +++ b/mateclaw-server/src/test/java/vip/mate/skill/service/SkillFileServiceTest.java @@ -102,6 +102,29 @@ void unchangedRowSkipped() { verify(mapper, never()).deleteById(anyLong()); } + @Test + @DisplayName("NUL bytes are stripped before persist; size/hash track the stored content") + void nulStrippedBeforeInsert() { + when(mapper.selectList(any())).thenReturn(new ArrayList<>()); + + String clean = "print('hi')\nprint('bye')"; + String dirty = "print('hi')\u0000\nprint('bye')"; + + var result = service.applyBundleFiles(42L, Map.of("scripts/run.py", dirty), false); + + assertEquals(1, result.rowsWritten()); + ArgumentCaptor captor = ArgumentCaptor.forClass(SkillFileEntity.class); + verify(mapper).insert((SkillFileEntity) captor.capture()); + SkillFileEntity inserted = captor.getValue(); + // The NUL is gone but the rest of the content survives verbatim. + assertEquals(clean, inserted.getContent()); + assertFalse(inserted.getContent().contains("\u0000")); + // Size and hash are computed on the stripped content (post-strip), + // so the syncer's idempotent diff stays consistent with what's on disk. + assertEquals(clean.getBytes(java.nio.charset.StandardCharsets.UTF_8).length, inserted.getContentSize()); + assertEquals(SkillFileService.sha256Hex(clean), inserted.getSha256()); + } + private static final AtomicLong IDS = new AtomicLong(1); private static SkillFileEntity newRow(Long id, String path, String content) { diff --git a/mateclaw-server/src/test/java/vip/mate/support/CronJobDeliveryConfigPgTest.java b/mateclaw-server/src/test/java/vip/mate/support/CronJobDeliveryConfigPgTest.java new file mode 100644 index 000000000..e4173ea41 --- /dev/null +++ b/mateclaw-server/src/test/java/vip/mate/support/CronJobDeliveryConfigPgTest.java @@ -0,0 +1,94 @@ +package vip.mate.support; + +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.jdbc.core.JdbcTemplate; +import vip.mate.cron.model.CronJobEntity; +import vip.mate.cron.model.DeliveryConfig; +import vip.mate.cron.repository.CronJobMapper; + +import java.time.LocalDateTime; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** + * Exercises the JSONB write/read path end-to-end on a real PostgreSQL server, + * specifically through MyBatis-Plus's {@link + * com.baomidou.mybatisplus.extension.handlers.JacksonTypeHandler} on + * {@code CronJobEntity.deliveryConfig} (the only typeHandler-mapped JSON column + * in the codebase). + * + *

The handler binds the serialized JSON via {@code setString}, which would + * fail against a {@code jsonb} column ("column is of type jsonb but expression + * is of type character varying") without {@code stringtype=unspecified} on the + * JDBC URL. This test is the guard that the production URL setting actually + * makes that path work. + */ +@DisplayName("CronJob.deliveryConfig round-trips through a JSONB column") +class CronJobDeliveryConfigPgTest extends PostgresE2EBaseTest { + + @Autowired + private CronJobMapper cronJobMapper; + + @Autowired + private JdbcTemplate jdbcTemplate; + + @Test + @DisplayName("delivery_config is JSONB and JacksonTypeHandler round-trips") + void deliveryConfigRoundTripsThroughJsonb() { + long id = 990001L; + CronJobEntity job = new CronJobEntity(); + job.setId(id); + job.setWorkspaceId(1L); + job.setName("pg-jsonb-roundtrip"); + job.setCronExpression("0 9 * * *"); + job.setTimezone("Asia/Shanghai"); + job.setAgentId(1L); + job.setTaskType("reminder"); + job.setEnabled(true); + job.setDeleted(0); + job.setCreateTime(LocalDateTime.now()); + job.setUpdateTime(LocalDateTime.now()); + // 3-arg legacy constructor: targetId / threadId / accountId. + job.setDeliveryConfig(new DeliveryConfig("u-123", "thread-7", null)); + + assertThat(cronJobMapper.insert(job)).isEqualTo(1); + + // The column is physically JSONB. + String dataType = jdbcTemplate.queryForObject( + "SELECT data_type FROM information_schema.columns " + + "WHERE table_schema = 'mateclaw' AND table_name = 'mate_cron_job' " + + "AND column_name = 'delivery_config'", + String.class); + assertThat(dataType).isEqualTo("jsonb"); + + // JacksonTypeHandler deserializes it back to the record. + CronJobEntity back = cronJobMapper.selectById(id); + assertThat(back).isNotNull(); + assertThat(back.getDeliveryConfig()).isNotNull(); + assertThat(back.getDeliveryConfig().targetId()).isEqualTo("u-123"); + assertThat(back.getDeliveryConfig().threadId()).isEqualTo("thread-7"); + + // JSONB is queryable by key on the server side (proves it's real JSON, + // not an opaque string blob). + String targetViaSql = jdbcTemplate.queryForObject( + "SELECT delivery_config ->> 'targetId' FROM mateclaw.mate_cron_job WHERE id = ?", + String.class, id); + assertThat(targetViaSql).isEqualTo("u-123"); + } + + @Test + @DisplayName("a JSONB column rejects malformed JSON") + void jsonbRejectsInvalidJson() { + // Direct insert of a non-JSON literal into a jsonb column must fail — + // this is the data-integrity payoff of the TEXT->JSONB upgrade. + assertThatThrownBy(() -> jdbcTemplate.update( + "INSERT INTO mateclaw.mate_channel " + + "(id, name, channel_type, config_json, enabled, workspace_id, create_time, update_time) " + + "VALUES (?, 'bad', 'feishu', 'not-valid-json', FALSE, 1, NOW(), NOW())", + 990002L)) + .isInstanceOf(Exception.class); + } +} diff --git a/mateclaw-server/src/test/java/vip/mate/support/PostgresE2EBaseTest.java b/mateclaw-server/src/test/java/vip/mate/support/PostgresE2EBaseTest.java new file mode 100644 index 000000000..5bd47ceae --- /dev/null +++ b/mateclaw-server/src/test/java/vip/mate/support/PostgresE2EBaseTest.java @@ -0,0 +1,94 @@ +package vip.mate.support; + +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.annotation.DirtiesContext; +import org.springframework.test.context.DynamicPropertyRegistry; +import org.springframework.test.context.DynamicPropertySource; +import org.testcontainers.containers.PostgreSQLContainer; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.junit.jupiter.Testcontainers; +import vip.mate.MateClawApplication; + +/** + * Abstract base for PostgreSQL end-to-end tests that run against a real + * PostgreSQL server (Testcontainers), not the in-memory H2 the rest of the + * suite uses. + * + *

Why this exists: the {@code db/migration/postgresql} tree carries + * PostgreSQL-only column types (JSONB) and a JSONB-native rewrite of V53 that + * H2 can never exercise. Running it on a real server is the only way to prove + * the 150+ migrations actually apply and that the JSONB columns behave as + * intended. See {@code docs/database-postgresql.md}. + * + *

Subclasses get: + *

    + *
  • A full Spring Boot context whose datasource + Flyway point at a + * throwaway {@code postgres:16-alpine} container, with the + * {@code postgresql} migration tree, {@code postgre_sql} MyBatis dialect, + * and {@code stringtype=unspecified} (so String-bound JSON values coerce + * into JSONB) — mirroring {@code application-postgres.yml}.
  • + *
  • The {@code mateclaw} schema pre-created via an init script, matching + * the production {@code currentSchema=mateclaw} convention.
  • + *
  • {@link DirtiesContext} after each class so a fresh context (and a fresh + * container) is used per test class.
  • + *
+ * + *

{@link Testcontainers#disabledWithoutDocker() disabledWithoutDocker} is + * set so this test (and subclasses) are skipped, not failed, on + * machines / CI runners without a Docker daemon. A normal {@code mvn test} + * therefore stays green everywhere; the PostgreSQL coverage kicks in wherever + * Docker is available. + * + *

Subclasses author the actual {@code @Test} methods; this class has none so + * JUnit's abstract-class discovery skips it. + */ +@Testcontainers(disabledWithoutDocker = true) +@SpringBootTest( + classes = MateClawApplication.class, + webEnvironment = SpringBootTest.WebEnvironment.NONE, + properties = { + "spring.flyway.enabled=true", + "spring.flyway.locations=classpath:db/migration/postgresql", + "mybatis-plus.global-config.db-config.db-type=postgre_sql", + // Keep background schedulers quiet during the test context. + "mateclaw.feature-flag.refresh-ms=999999" + } +) +@DirtiesContext(classMode = DirtiesContext.ClassMode.AFTER_CLASS) +public abstract class PostgresE2EBaseTest { + + /** + * Shared across subclasses in the same JVM run. The init script creates the + * {@code mateclaw} schema before Flyway runs (PostgreSQL won't auto-create a + * non-public schema), matching the production {@code currentSchema} setup. + */ + @Container + protected static final PostgreSQLContainer POSTGRES = + new PostgreSQLContainer<>("postgres:16-alpine") + .withDatabaseName("mateclaw") + .withUsername("test") + .withPassword("test") + .withInitScript("db/test/init-mateclaw-schema.sql") + // currentSchema pins the mateclaw schema (matching prod); + // stringtype=unspecified lets the driver coerce String-bound + // JSON values into JSONB. Added via withUrlParam so getJdbcUrl() + // returns a correctly-formed query string (no double '?'). + .withUrlParam("currentSchema", "mateclaw") + .withUrlParam("stringtype", "unspecified"); + + @DynamicPropertySource + static void datasourceProps(DynamicPropertyRegistry registry) { + registry.add("spring.datasource.url", POSTGRES::getJdbcUrl); + registry.add("spring.datasource.username", POSTGRES::getUsername); + registry.add("spring.datasource.password", POSTGRES::getPassword); + registry.add("spring.datasource.driver-class-name", () -> "org.postgresql.Driver"); + // HikariCP belt-and-suspenders: force search_path on every new connection + // so nothing lands in the public schema even if currentSchema is dropped. + registry.add("spring.datasource.hikari.connection-init-sql", () -> "SET search_path TO mateclaw"); + // Flyway shares the same already-parameterized URL so it creates the + // schema objects in mateclaw too (consistent with the bootstrap runner). + registry.add("spring.flyway.url", POSTGRES::getJdbcUrl); + registry.add("spring.flyway.user", POSTGRES::getUsername); + registry.add("spring.flyway.password", POSTGRES::getPassword); + } +} diff --git a/mateclaw-server/src/test/java/vip/mate/support/PostgresMigrationSmokeTest.java b/mateclaw-server/src/test/java/vip/mate/support/PostgresMigrationSmokeTest.java new file mode 100644 index 000000000..5266bf51f --- /dev/null +++ b/mateclaw-server/src/test/java/vip/mate/support/PostgresMigrationSmokeTest.java @@ -0,0 +1,78 @@ +package vip.mate.support; + +import org.flywaydb.core.Flyway; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.jdbc.core.JdbcTemplate; + +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Proves the {@code db/migration/postgresql} tree applies cleanly on a real + * PostgreSQL server, and that the TEXT→JSONB upgrade produced actual {@code + * jsonb} columns (not text) while the intentionally-excluded columns stayed + * {@code text}. + * + *

This is the automated form of the manual verification done when the tree + * was forked: it guards against a future migration introducing dialect-specific + * SQL that only H2/MySQL/Kingbase accept. + */ +@DisplayName("PostgreSQL migration tree applies on a real server") +class PostgresMigrationSmokeTest extends PostgresE2EBaseTest { + + @Autowired + private Flyway flyway; + + @Autowired + private JdbcTemplate jdbcTemplate; + + @Test + @DisplayName("all migrations applied successfully (no failed history rows)") + void allMigrationsApplyCleanly() { + // Spring Boot already ran Flyway on context startup; assert the outcome. + assertThat(flyway.info().applied().length) + .as("applied migration count") + .isGreaterThanOrEqualTo(150); + + Integer failed = jdbcTemplate.queryForObject( + "SELECT COUNT(*) FROM mateclaw.flyway_schema_history WHERE success = FALSE", + Integer.class); + assertThat(failed).as("failed migrations").isZero(); + } + + @Test + @DisplayName("upgraded JSON columns are physically JSONB; excluded ones stay TEXT") + void jsonColumnsHaveExpectedPhysicalTypes() { + // A representative slice of the whitelist across several tables. + List jsonbExpected = List.of( + new String[]{"mate_model_provider", "generate_kwargs"}, + new String[]{"mate_channel", "config_json"}, + new String[]{"mate_skill", "config_json"}, + new String[]{"mate_mcp_server", "headers_json"}, + new String[]{"mate_cron_job", "delivery_config"}, + new String[]{"mate_workflow_revision", "graph_json"}, + new String[]{"mate_wiki_pipeline_definition", "steps_json"}, + new String[]{"mate_tool_guard_config", "guarded_tools_json"} + ); + for (String[] tc : jsonbExpected) { + assertThat(columnType(tc[0], tc[1])) + .as("%s.%s should be jsonb", tc[0], tc[1]) + .isEqualTo("jsonb"); + } + + // C-list: deliberately kept TEXT (arbitrary JSON-schema text / + // frequently-truncated half-structured blobs). + assertThat(columnType("mate_tool", "params_schema")).isEqualTo("text"); + assertThat(columnType("mate_message", "metadata")).isEqualTo("text"); + } + + private String columnType(String table, String column) { + return jdbcTemplate.queryForObject( + "SELECT data_type FROM information_schema.columns " + + "WHERE table_schema = 'mateclaw' AND table_name = ? AND column_name = ?", + String.class, table, column); + } +} diff --git a/mateclaw-server/src/test/java/vip/mate/support/SkillFileNulContentPgTest.java b/mateclaw-server/src/test/java/vip/mate/support/SkillFileNulContentPgTest.java new file mode 100644 index 000000000..4c99c2b0b --- /dev/null +++ b/mateclaw-server/src/test/java/vip/mate/support/SkillFileNulContentPgTest.java @@ -0,0 +1,58 @@ +package vip.mate.support; + +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import vip.mate.skill.model.SkillFileEntity; +import vip.mate.skill.service.SkillFileService; + +import java.util.List; +import java.util.Map; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Guards the PostgreSQL text contract for skill bundle files against the NUL + * byte ({@code }). + * + *

PostgreSQL rejects {@code 0x00} in a {@code text} column ("invalid byte + * sequence for encoding UTF8: 0x00"), whereas MySQL ({@code utf8mb4 TEXT}) and + * H2 ({@code CLOB}) silently accept it — so this failure mode is invisible on + * the default dev database and only a real PostgreSQL server can prove the fix. + * {@code SkillFileService.applyBundleFiles} is the single write chokepoint and + * strips NUL before persist; without that strip the insert below would throw on + * PostgreSQL. + */ +@DisplayName("Skill file content with a NUL byte persists on PostgreSQL") +class SkillFileNulContentPgTest extends PostgresE2EBaseTest { + + @Autowired + private SkillFileService skillFileService; + + @Test + @DisplayName("applyBundleFiles strips NUL so the TEXT insert succeeds and reads back clean") + void nulIsStrippedBeforePersist() { + long skillId = 990101L; + // An otherwise-text file carrying a stray NUL — the case that slips past + // a prefix-only binary check and dies on PostgreSQL's TEXT column. + String dirty = "print('hello')\u0000\nprint('world')"; + + SkillFileService.ApplyResult result = skillFileService.applyBundleFiles( + skillId, + Map.of("scripts/run.py", dirty), + false); + + assertThat(result.rowsWritten()).isEqualTo(1); + + List rows = skillFileService.listBySkillId(skillId); + assertThat(rows).hasSize(1); + SkillFileEntity row = rows.get(0); + assertThat(row.getFilePath()).isEqualTo("scripts/run.py"); + // The persisted content is the NUL-free form; the rest is preserved. + assertThat(row.getContent()).isEqualTo("print('hello')\nprint('world')"); + assertThat(row.getContent()).doesNotContain("\u0000"); + // content_size reflects the stored (stripped) content, not the original. + assertThat(row.getContentSize()) + .isEqualTo("print('hello')\nprint('world')".getBytes(java.nio.charset.StandardCharsets.UTF_8).length); + } +} diff --git a/mateclaw-server/src/test/resources/db/test/init-mateclaw-schema.sql b/mateclaw-server/src/test/resources/db/test/init-mateclaw-schema.sql new file mode 100644 index 000000000..4ee35401f --- /dev/null +++ b/mateclaw-server/src/test/resources/db/test/init-mateclaw-schema.sql @@ -0,0 +1,4 @@ +-- Pre-create the mateclaw schema before Flyway runs. PostgreSQL does not +-- auto-create a non-public schema, and the test datasource connects with +-- currentSchema=mateclaw (matching production application-postgres.yml). +CREATE SCHEMA IF NOT EXISTS mateclaw;