From e32f2007a1fa4d35cc37eb90fd6882b05db0d6c2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 7 Apr 2026 08:43:52 +0000 Subject: [PATCH 1/8] Initial plan From 4252f10ed403bd907776a1437d7f903d46abbdad Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 7 Apr 2026 09:09:56 +0000 Subject: [PATCH 2/8] feat: add sandboxed verification environment for OpenDataMask Agent-Logs-Url: https://github.com/MaximumTrainer/OpenDataMask/sessions/36a80646-cf40-4ec0-b04e-34c637988f21 Co-authored-by: MaximumTrainer <1376575+MaximumTrainer@users.noreply.github.com> --- verification/.env.example | 23 +++ verification/README.md | 163 ++++++++++++++++++ verification/docker-compose.yml | 123 +++++++++++++ verification/init/source_db.sql | 64 +++++++ verification/requirements.txt | 1 + verification/run_verification.sh | 217 +++++++++++++++++++++++ verification/verify.py | 284 +++++++++++++++++++++++++++++++ 7 files changed, 875 insertions(+) create mode 100644 verification/.env.example create mode 100644 verification/README.md create mode 100644 verification/docker-compose.yml create mode 100644 verification/init/source_db.sql create mode 100644 verification/requirements.txt create mode 100755 verification/run_verification.sh create mode 100644 verification/verify.py diff --git a/verification/.env.example b/verification/.env.example new file mode 100644 index 0000000..bfc7fea --- /dev/null +++ b/verification/.env.example @@ -0,0 +1,23 @@ +# OpenDataMask Verification Sandbox — Environment Variables +# +# Copy this file to .env and edit as needed before running docker compose. +# These values are for the SANDBOX verification environment ONLY. +# Never reuse them in any non-sandbox environment. +# +# cp .env.example .env +# docker compose -f docker-compose.yml up -d + +# ── OpenDataMask backend secrets ───────────────────────────────────────────── +# Generate fresh values with: openssl rand -base64 32 +ODM_JWT_SECRET=odm-verification-jwt-secret-sandbox-not-for-production-use-xyz +ODM_ENCRYPTION_KEY=odm-verify-enc-key-sandbox-only + +# ── Source database credentials ─────────────────────────────────────────────── +SOURCE_DB_NAME=source_db +SOURCE_DB_USER=source_user +SOURCE_DB_PASS=source_pass + +# ── Target database credentials ─────────────────────────────────────────────── +TARGET_DB_NAME=target_db +TARGET_DB_USER=target_user +TARGET_DB_PASS=target_pass diff --git a/verification/README.md b/verification/README.md new file mode 100644 index 0000000..03f9858 --- /dev/null +++ b/verification/README.md @@ -0,0 +1,163 @@ +# OpenDataMask — Sandboxed Verification Environment + +This directory contains a self-contained, Docker-based environment for +proving that OpenDataMask correctly masks sensitive PII data while preserving +referential integrity. + +## What It Does + +| Step | Description | +|------|-------------| +| **SOURCE_DB** | PostgreSQL database pre-seeded with 50 realistic user records (UUID PK, full_name, email, phone_number, date_of_birth, salary). | +| **TARGET_DB** | Empty PostgreSQL database that receives the masked data. | +| **Masking job** | OpenDataMask reads every row from SOURCE_DB, applies Datafaker-powered generators to all PII columns, and writes the anonymised rows to TARGET_DB — keeping the original UUID primary keys intact. | +| **Verification** | A Python script connects to both databases and validates: row counts, key persistence, masking effectiveness, and human-readability of the output. | + +## Directory Layout + +``` +verification/ +├── docker-compose.yml # SOURCE_DB, TARGET_DB, app_db, backend, frontend +├── init/ +│ └── source_db.sql # DDL + 50 seed records for SOURCE_DB +├── run_verification.sh # Full end-to-end orchestration script +├── verify.py # Python validation script +├── requirements.txt # Python dependencies (psycopg2-binary) +└── README.md # This file +``` + +## Prerequisites + +| Tool | Version | +|------|---------| +| Docker Engine | ≥ 24 | +| Docker Compose | v2 (`docker compose`) or v1 (`docker-compose`) | +| curl | any | +| Python 3 | ≥ 3.10 | +| pip3 | any | + +## Quick Start + +```bash +# Run from the repository root or the verification/ directory: +cd verification/ +chmod +x run_verification.sh +./run_verification.sh +``` + +The script will: + +1. Build the backend and frontend Docker images. +2. Start all services and wait for them to be healthy. +3. Register a user and authenticate with the OpenDataMask API. +4. Create a workspace, source & destination connections, table configuration, + and per-column masking generators. +5. Trigger a masking job and poll until it completes. +6. Run `verify.py` and print a Verification Report. + +## Running Only the Verification Script + +If the environment is already running and the masking job has already completed: + +```bash +pip3 install -r requirements.txt +python3 verify.py +``` + +### Environment Variables (optional overrides) + +| Variable | Default | Description | +|----------|---------|-------------| +| `SOURCE_DB_HOST` | `localhost` | Source DB hostname | +| `SOURCE_DB_PORT` | `5433` | Source DB port (host-mapped) | +| `SOURCE_DB_NAME` | `source_db` | Source DB database name | +| `SOURCE_DB_USER` | `source_user` | Source DB username | +| `SOURCE_DB_PASS` | `source_pass` | Source DB password | +| `TARGET_DB_HOST` | `localhost` | Target DB hostname | +| `TARGET_DB_PORT` | `5434` | Target DB port (host-mapped) | +| `TARGET_DB_NAME` | `target_db` | Target DB database name | +| `TARGET_DB_USER` | `target_user` | Target DB username | +| `TARGET_DB_PASS` | `target_pass` | Target DB password | + +## Verification Checks + +### 1 · Record Integrity +Confirms the row count in SOURCE_DB matches TARGET_DB (both should be **50**). + +### 2 · Key Persistence +For every `id` (UUID) in SOURCE_DB, verifies the exact same `id` exists in +TARGET_DB. This proves the tool does **not** hash or alter primary keys. + +### 3 · Masking Effectiveness +Compares `full_name` and `email` for every matching `id`. The check **passes** +only if: + +``` +source.id == target.id AND +source.full_name != target.full_name AND +source.email != target.email +``` + +### 4 · Human Readability +Prints a sample of 5 masked records to the console so a human can visually +confirm the output looks realistic (e.g., a real-looking name and a valid +e-mail address rather than random strings like `asdfghjkl`). + +### Sample Report Output + +``` +════════════════════════════════════════════════════════════ + OpenDataMask — Verification Report +════════════════════════════════════════════════════════════ + +Connecting to SOURCE_DB (localhost:5433/source_db)… +Connecting to TARGET_DB (localhost:5434/target_db)… + + ── Masked Record Sample (TARGET_DB) ────────────────────── + [1] id : a1b2c3d4-0001-4000-8000-000000000001 + full_name : Johnathan Mraz + email : cordell.okon@yahoo.com + phone_number : 1-541-388-3947 + date_of_birth : Mon Jan 15 00:00:00 UTC 1990 + salary : 97432 + +──────────────────────────────────────────────────────────── + Results +──────────────────────────────────────────────────────────── + [✓] Record Integrity (row count matches): PASS + Source row count : 50 + Target row count : 50 + [✓] Key Persistence (all source IDs present in target): PASS + Source IDs : 50 + Target IDs : 50 + [✓] Masking Effectiveness (PII fields differ between source and target): PASS + Rows compared : 50 + Name unchanged (should be 0) : 0 + Email unchanged (should be 0) : 0 + [✓] Human Readability (sample of 5 masked records): PASS + +════════════════════════════════════════════════════════════ + OK ALL 4/4 CHECKS PASSED +════════════════════════════════════════════════════════════ +``` + +## Masking Rules Applied + +| Column | Generator | Behaviour | +|--------|-----------|-----------| +| `id` | *(none — passthrough)* | UUID primary key is preserved exactly. | +| `full_name` | `FULL_NAME` | Replaced with a random realistic full name via Datafaker. | +| `email` | `EMAIL` | Replaced with a random realistic e-mail address. | +| `phone_number` | `PHONE` | Replaced with a random phone number. | +| `date_of_birth` | `BIRTH_DATE` | Replaced with a random birthday. | +| `salary` | `RANDOM_INT` (30 000–200 000) | Replaced with a random integer in range. | + +## Tearing Down + +```bash +cd verification/ +docker compose -f docker-compose.yml down -v +``` + +The `-v` flag also removes the named volume (`app_db_data`) so the next run +starts with a clean OpenDataMask application database. diff --git a/verification/docker-compose.yml b/verification/docker-compose.yml new file mode 100644 index 0000000..d9ea94e --- /dev/null +++ b/verification/docker-compose.yml @@ -0,0 +1,123 @@ +version: '3.8' + +# ───────────────────────────────────────────────────────────────────────────── +# OpenDataMask — Sandboxed Verification Environment +# +# Services +# source_db PostgreSQL database pre-seeded with 50 sensitive user records. +# target_db Empty PostgreSQL database; receives the masked data. +# app_db PostgreSQL database used by the OpenDataMask backend for its +# own application state (workspaces, jobs, configs, etc.). +# backend OpenDataMask Spring Boot backend. +# frontend OpenDataMask Vue.js frontend (optional for visual inspection). +# +# Secrets are loaded from a .env file (copy .env.example → .env). +# The defaults shown are for the sandbox ONLY — never reuse in production. +# ───────────────────────────────────────────────────────────────────────────── + +services: + + # ── Source database ──────────────────────────────────────────────────────── + source_db: + image: postgres:16-alpine + container_name: odm_source_db + environment: + POSTGRES_DB: ${SOURCE_DB_NAME:-source_db} + POSTGRES_USER: ${SOURCE_DB_USER:-source_user} + POSTGRES_PASSWORD: ${SOURCE_DB_PASS:-source_pass} + ports: + - "5433:5432" + volumes: + - ./init:/docker-entrypoint-initdb.d # runs source_db.sql on first start + networks: + - odm_net + healthcheck: + test: ["CMD-SHELL", "pg_isready -U $${POSTGRES_USER} -d $${POSTGRES_DB}"] + interval: 5s + timeout: 5s + retries: 10 + + # ── Target (destination) database ───────────────────────────────────────── + target_db: + image: postgres:16-alpine + container_name: odm_target_db + environment: + POSTGRES_DB: ${TARGET_DB_NAME:-target_db} + POSTGRES_USER: ${TARGET_DB_USER:-target_user} + POSTGRES_PASSWORD: ${TARGET_DB_PASS:-target_pass} + ports: + - "5434:5432" + networks: + - odm_net + healthcheck: + test: ["CMD-SHELL", "pg_isready -U $${POSTGRES_USER} -d $${POSTGRES_DB}"] + interval: 5s + timeout: 5s + retries: 10 + + # ── OpenDataMask application database ───────────────────────────────────── + app_db: + image: postgres:16-alpine + container_name: odm_app_db + environment: + POSTGRES_DB: opendatamask + POSTGRES_USER: opendatamask + POSTGRES_PASSWORD: opendatamask + volumes: + - app_db_data:/var/lib/postgresql/data + networks: + - odm_net + healthcheck: + test: ["CMD-SHELL", "pg_isready -U opendatamask"] + interval: 5s + timeout: 5s + retries: 10 + + # ── OpenDataMask backend ─────────────────────────────────────────────────── + backend: + build: + context: ../backend + dockerfile: Dockerfile + container_name: odm_backend + ports: + - "8080:8080" + environment: + DATABASE_URL: jdbc:postgresql://app_db:5432/opendatamask + DATABASE_USERNAME: opendatamask + DATABASE_PASSWORD: opendatamask + # NOTE: these secrets are loaded from .env (see .env.example). + # The fallback values are for the verification sandbox ONLY — never reuse in production. + JWT_SECRET: ${ODM_JWT_SECRET:-odm-verification-jwt-secret-sandbox-not-for-production-use-xyz} + ENCRYPTION_KEY: ${ODM_ENCRYPTION_KEY:-odm-verify-enc-key-sandbox-only} + SERVER_PORT: "8080" + depends_on: + app_db: + condition: service_healthy + networks: + - odm_net + healthcheck: + test: ["CMD-SHELL", "wget -qO- http://localhost:8080/actuator/health || exit 1"] + interval: 15s + timeout: 10s + retries: 12 + start_period: 90s + + # ── OpenDataMask frontend (optional) ────────────────────────────────────── + frontend: + build: + context: ../frontend + dockerfile: Dockerfile + container_name: odm_frontend + ports: + - "80:80" + depends_on: + - backend + networks: + - odm_net + +networks: + odm_net: + driver: bridge + +volumes: + app_db_data: diff --git a/verification/init/source_db.sql b/verification/init/source_db.sql new file mode 100644 index 0000000..40db1b9 --- /dev/null +++ b/verification/init/source_db.sql @@ -0,0 +1,64 @@ +-- Initialise SOURCE_DB: create the users table and insert 50 realistic records. +-- This script is executed automatically by the postgres:16-alpine container +-- via the /docker-entrypoint-initdb.d/ mount. + +CREATE TABLE IF NOT EXISTS users ( + id UUID PRIMARY KEY, + full_name TEXT NOT NULL, + email TEXT NOT NULL UNIQUE, + phone_number TEXT NOT NULL, + date_of_birth DATE NOT NULL, + salary NUMERIC(10,2) NOT NULL +); + +INSERT INTO users (id, full_name, email, phone_number, date_of_birth, salary) VALUES + ('a1b2c3d4-0001-4000-8000-000000000001','Alice Johnson', 'alice.johnson@example.com', '+1-555-0101','1985-03-15', 72500.00), + ('a1b2c3d4-0002-4000-8000-000000000002','Bob Martinez', 'bob.martinez@corp.net', '+1-555-0102','1990-07-22', 55000.00), + ('a1b2c3d4-0003-4000-8000-000000000003','Carol Williams', 'carol.w@techfirm.io', '+1-555-0103','1978-11-30', 98000.00), + ('a1b2c3d4-0004-4000-8000-000000000004','David Lee', 'david.lee@startup.co', '+1-555-0104','1995-02-08', 47000.00), + ('a1b2c3d4-0005-4000-8000-000000000005','Eva Brown', 'eva.brown@example.com', '+1-555-0105','1988-09-14', 63000.00), + ('a1b2c3d4-0006-4000-8000-000000000006','Frank Davis', 'frank.davis@enterprise.org', '+1-555-0106','1975-06-01', 112000.00), + ('a1b2c3d4-0007-4000-8000-000000000007','Grace Wilson', 'grace.wilson@agency.com', '+1-555-0107','1993-12-20', 58500.00), + ('a1b2c3d4-0008-4000-8000-000000000008','Henry Moore', 'henry.moore@corp.net', '+1-555-0108','1982-04-05', 87000.00), + ('a1b2c3d4-0009-4000-8000-000000000009','Irene Taylor', 'irene.taylor@techfirm.io', '+1-555-0109','1970-08-18', 134000.00), + ('a1b2c3d4-0010-4000-8000-000000000010','James Anderson', 'james.anderson@startup.co', '+1-555-0110','1998-01-27', 42000.00), + ('a1b2c3d4-0011-4000-8000-000000000011','Karen Thomas', 'karen.thomas@example.com', '+1-555-0111','1987-10-11', 76000.00), + ('a1b2c3d4-0012-4000-8000-000000000012','Liam Jackson', 'liam.jackson@enterprise.org', '+1-555-0112','1991-03-29', 53000.00), + ('a1b2c3d4-0013-4000-8000-000000000013','Mia White', 'mia.white@agency.com', '+1-555-0113','1984-07-17', 69000.00), + ('a1b2c3d4-0014-4000-8000-000000000014','Noah Harris', 'noah.harris@corp.net', '+1-555-0114','1996-11-03', 49500.00), + ('a1b2c3d4-0015-4000-8000-000000000015','Olivia Martin', 'olivia.martin@techfirm.io', '+1-555-0115','1979-05-22', 105000.00), + ('a1b2c3d4-0016-4000-8000-000000000016','Peter Garcia', 'peter.garcia@startup.co', '+1-555-0116','1994-09-06', 44000.00), + ('a1b2c3d4-0017-4000-8000-000000000017','Quinn Rodriguez', 'quinn.rodriguez@example.com', '+1-555-0117','1986-02-14', 82000.00), + ('a1b2c3d4-0018-4000-8000-000000000018','Rachel Lewis', 'rachel.lewis@enterprise.org', '+1-555-0118','1973-06-30', 118000.00), + ('a1b2c3d4-0019-4000-8000-000000000019','Samuel Lee', 'samuel.lee@agency.com', '+1-555-0119','1999-10-19', 38000.00), + ('a1b2c3d4-0020-4000-8000-000000000020','Tina Walker', 'tina.walker@corp.net', '+1-555-0120','1983-04-08', 91000.00), + ('a1b2c3d4-0021-4000-8000-000000000021','Umar Hall', 'umar.hall@techfirm.io', '+1-555-0121','1997-08-25', 46500.00), + ('a1b2c3d4-0022-4000-8000-000000000022','Vera Allen', 'vera.allen@startup.co', '+1-555-0122','1981-12-13', 77000.00), + ('a1b2c3d4-0023-4000-8000-000000000023','Walter Young', 'walter.young@example.com', '+1-555-0123','1968-03-02', 145000.00), + ('a1b2c3d4-0024-4000-8000-000000000024','Xena Hernandez', 'xena.hernandez@enterprise.org', '+1-555-0124','1992-07-21', 61000.00), + ('a1b2c3d4-0025-4000-8000-000000000025','Yusuf King', 'yusuf.king@agency.com', '+1-555-0125','1976-11-09', 127000.00), + ('a1b2c3d4-0026-4000-8000-000000000026','Zoe Wright', 'zoe.wright@corp.net', '+1-555-0126','1989-05-28', 74000.00), + ('a1b2c3d4-0027-4000-8000-000000000027','Aaron Scott', 'aaron.scott@techfirm.io', '+1-555-0127','1993-09-16', 57000.00), + ('a1b2c3d4-0028-4000-8000-000000000028','Bella Torres', 'bella.torres@startup.co', '+1-555-0128','1980-01-04', 99000.00), + ('a1b2c3d4-0029-4000-8000-000000000029','Carlos Nguyen', 'carlos.nguyen@example.com', '+1-555-0129','1995-06-23', 43000.00), + ('a1b2c3d4-0030-4000-8000-000000000030','Diana Hill', 'diana.hill@enterprise.org', '+1-555-0130','1972-10-12', 138000.00), + ('a1b2c3d4-0031-4000-8000-000000000031','Ethan Flores', 'ethan.flores@agency.com', '+1-555-0131','1988-02-01', 66000.00), + ('a1b2c3d4-0032-4000-8000-000000000032','Fiona Green', 'fiona.green@corp.net', '+1-555-0132','1977-05-19', 121000.00), + ('a1b2c3d4-0033-4000-8000-000000000033','George Adams', 'george.adams@techfirm.io', '+1-555-0133','1991-09-07', 51000.00), + ('a1b2c3d4-0034-4000-8000-000000000034','Hannah Nelson', 'hannah.nelson@startup.co', '+1-555-0134','1984-12-26', 84000.00), + ('a1b2c3d4-0035-4000-8000-000000000035','Isaac Carter', 'isaac.carter@example.com', '+1-555-0135','1998-04-14', 40000.00), + ('a1b2c3d4-0036-4000-8000-000000000036','Julia Mitchell', 'julia.mitchell@enterprise.org', '+1-555-0136','1971-08-03', 152000.00), + ('a1b2c3d4-0037-4000-8000-000000000037','Kevin Perez', 'kevin.perez@agency.com', '+1-555-0137','1994-12-22', 48000.00), + ('a1b2c3d4-0038-4000-8000-000000000038','Laura Roberts', 'laura.roberts@corp.net', '+1-555-0138','1986-04-10', 79000.00), + ('a1b2c3d4-0039-4000-8000-000000000039','Marcus Turner', 'marcus.turner@techfirm.io', '+1-555-0139','1979-08-29', 107000.00), + ('a1b2c3d4-0040-4000-8000-000000000040','Natalie Phillips', 'natalie.phillips@startup.co', '+1-555-0140','1996-01-17', 45000.00), + ('a1b2c3d4-0041-4000-8000-000000000041','Oscar Campbell', 'oscar.campbell@example.com', '+1-555-0141','1983-05-06', 93000.00), + ('a1b2c3d4-0042-4000-8000-000000000042','Penelope Parker', 'penelope.parker@enterprise.org', '+1-555-0142','1975-09-24', 114000.00), + ('a1b2c3d4-0043-4000-8000-000000000043','Quincy Evans', 'quincy.evans@agency.com', '+1-555-0143','1990-02-12', 60000.00), + ('a1b2c3d4-0044-4000-8000-000000000044','Rebecca Edwards', 'rebecca.edwards@corp.net', '+1-555-0144','1967-06-01', 167000.00), + ('a1b2c3d4-0045-4000-8000-000000000045','Simon Collins', 'simon.collins@techfirm.io', '+1-555-0145','1992-09-20', 55500.00), + ('a1b2c3d4-0046-4000-8000-000000000046','Teresa Stewart', 'teresa.stewart@startup.co', '+1-555-0146','1985-01-08', 71000.00), + ('a1b2c3d4-0047-4000-8000-000000000047','Ursula Sanchez', 'ursula.sanchez@example.com', '+1-555-0147','1997-05-27', 41500.00), + ('a1b2c3d4-0048-4000-8000-000000000048','Vincent Morris', 'vincent.morris@enterprise.org', '+1-555-0148','1974-09-15', 129000.00), + ('a1b2c3d4-0049-4000-8000-000000000049','Wendy Rogers', 'wendy.rogers@agency.com', '+1-555-0149','1988-01-03', 68000.00), + ('a1b2c3d4-0050-4000-8000-000000000050','Xavier Reed', 'xavier.reed@corp.net', '+1-555-0150','1982-04-22', 95000.00); diff --git a/verification/requirements.txt b/verification/requirements.txt new file mode 100644 index 0000000..04b95e4 --- /dev/null +++ b/verification/requirements.txt @@ -0,0 +1 @@ +psycopg2-binary>=2.9.0 diff --git a/verification/run_verification.sh b/verification/run_verification.sh new file mode 100755 index 0000000..c5bd94a --- /dev/null +++ b/verification/run_verification.sh @@ -0,0 +1,217 @@ +#!/usr/bin/env bash +# run_verification.sh — End-to-end verification runner for OpenDataMask. +# +# This script: +# 1. Starts the sandboxed Docker environment (source_db, target_db, app_db, backend). +# 2. Waits for all services to become healthy. +# 3. Configures OpenDataMask via its REST API (workspace, connections, +# table configuration, column generators). +# 4. Triggers a masking job and waits for it to complete. +# 5. Invokes verify.py to validate masking results. +# +# Prerequisites: docker compose (v2), curl, python3, pip3. +# Run from the repository root or from the verification/ directory. + +set -euo pipefail + +# ── Resolve paths ───────────────────────────────────────────────────────────── +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" + +# ── Colour helpers ──────────────────────────────────────────────────────────── +RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; NC='\033[0m' +info() { echo -e "${GREEN}[INFO]${NC} $*"; } +warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } +error() { echo -e "${RED}[ERROR]${NC} $*" >&2; } +die() { error "$*"; exit 1; } + +# ── Configuration ───────────────────────────────────────────────────────────── +API_BASE="http://localhost:8080" +ODM_USER="verifier" +ODM_PASS="Verif1cation!Pass" +ODM_EMAIL="verifier@odm-sandbox.local" + +# ── Prerequisites check ─────────────────────────────────────────────────────── +info "Checking prerequisites…" +command -v docker >/dev/null 2>&1 || die "docker is required but not installed." +command -v curl >/dev/null 2>&1 || die "curl is required but not installed." +command -v python3 >/dev/null 2>&1 || die "python3 is required but not installed." + +# Support both `docker compose` (v2) and `docker-compose` (v1) +if docker compose version >/dev/null 2>&1; then + DC="docker compose" +elif command -v docker-compose >/dev/null 2>&1; then + DC="docker-compose" +else + die "docker compose (v2) or docker-compose (v1) is required but not found." +fi + +# ── Install Python dependencies ─────────────────────────────────────────────── +info "Installing Python dependencies…" +pip3 install -q -r requirements.txt + +# ── Start Docker environment ────────────────────────────────────────────────── +info "Starting Docker environment…" +$DC -f docker-compose.yml up -d --build + +# ── Wait for backend health ─────────────────────────────────────────────────── +info "Waiting for OpenDataMask backend to become healthy (up to 3 min)…" +MAX_WAIT=180 +ELAPSED=0 +until curl -sf "${API_BASE}/actuator/health" | grep -q '"status":"UP"'; do + if [ $ELAPSED -ge $MAX_WAIT ]; then + die "Backend did not become healthy within ${MAX_WAIT}s." + fi + sleep 5 + ELAPSED=$((ELAPSED + 5)) + echo -n "." +done +echo "" +info "Backend is healthy." + +# ── Helper: call the API ────────────────────────────────────────────────────── +# api_post → response body +api_post() { + local path="$1" body="$2" + curl -sf -X POST "${API_BASE}${path}" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer ${TOKEN:-}" \ + -d "$body" +} + +# api_get → response body +api_get() { + local path="$1" + curl -sf -X GET "${API_BASE}${path}" \ + -H "Authorization: Bearer ${TOKEN:-}" +} + +# ── Register user (ignore error if already exists) ──────────────────────────── +info "Registering user '${ODM_USER}'…" +curl -sf -X POST "${API_BASE}/api/auth/register" \ + -H "Content-Type: application/json" \ + -d "{\"username\":\"${ODM_USER}\",\"email\":\"${ODM_EMAIL}\",\"password\":\"${ODM_PASS}\"}" \ + > /dev/null 2>&1 || true # silently continue if user already exists + +# ── Login ───────────────────────────────────────────────────────────────────── +info "Logging in…" +LOGIN_RESP=$(curl -sf -X POST "${API_BASE}/api/auth/login" \ + -H "Content-Type: application/json" \ + -d "{\"username\":\"${ODM_USER}\",\"password\":\"${ODM_PASS}\"}" \ + || die "Login request failed. Check that the backend is running and reachable at ${API_BASE}.") + +TOKEN=$(echo "$LOGIN_RESP" \ + | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('token',''))" \ + 2>/dev/null || true) +[ -n "$TOKEN" ] || die "Failed to obtain JWT token. Login response: ${LOGIN_RESP}" +info "Authenticated successfully." + +# ── Create workspace ────────────────────────────────────────────────────────── +info "Creating workspace…" +WS_RESP=$(api_post "/api/workspaces" \ + '{"name":"Verification Workspace","description":"Automated PII masking verification"}') +WS_ID=$(echo "$WS_RESP" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])") +info "Workspace created: id=${WS_ID}" + +# ── Create source connection ────────────────────────────────────────────────── +info "Creating source data connection (SOURCE_DB)…" +SRC_RESP=$(api_post "/api/workspaces/${WS_ID}/connections" \ + '{"name":"source-db","type":"POSTGRESQL", + "connectionString":"jdbc:postgresql://source_db:5432/source_db", + "username":"source_user","password":"source_pass", + "isSource":true,"isDestination":false}') +SRC_CONN_ID=$(echo "$SRC_RESP" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])") +info "Source connection created: id=${SRC_CONN_ID}" + +# ── Create destination connection ───────────────────────────────────────────── +info "Creating destination data connection (TARGET_DB)…" +DST_RESP=$(api_post "/api/workspaces/${WS_ID}/connections" \ + '{"name":"target-db","type":"POSTGRESQL", + "connectionString":"jdbc:postgresql://target_db:5432/target_db", + "username":"target_user","password":"target_pass", + "isSource":false,"isDestination":true}') +DST_CONN_ID=$(echo "$DST_RESP" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])") +info "Destination connection created: id=${DST_CONN_ID}" + +# ── Create table configuration (MASK mode) ──────────────────────────────────── +info "Creating table configuration for 'users' (MASK mode)…" +TABLE_RESP=$(api_post "/api/workspaces/${WS_ID}/tables" \ + '{"tableName":"users","mode":"MASK"}') +TABLE_ID=$(echo "$TABLE_RESP" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])") +info "Table configuration created: id=${TABLE_ID}" + +# ── Add column generators ───────────────────────────────────────────────────── +# The 'id' column has no generator → it is passed through unchanged (PK preserved). + +add_generator() { + local col="$1" gtype="$2" params="${3:-null}" + if [ "$params" = "null" ]; then + BODY="{\"columnName\":\"${col}\",\"generatorType\":\"${gtype}\"}" + else + BODY="{\"columnName\":\"${col}\",\"generatorType\":\"${gtype}\",\"generatorParams\":${params}}" + fi + api_post "/api/workspaces/${WS_ID}/tables/${TABLE_ID}/generators" "$BODY" > /dev/null + info " Generator added: ${col} → ${gtype}" +} + +info "Configuring column generators…" +add_generator "full_name" "FULL_NAME" +add_generator "email" "EMAIL" +add_generator "phone_number" "PHONE" +add_generator "date_of_birth" "BIRTH_DATE" +add_generator "salary" "RANDOM_INT" '{"min":"30000","max":"200000"}' + +# ── Run masking job ─────────────────────────────────────────────────────────── +info "Triggering masking job…" +JOB_RESP=$(api_post "/api/workspaces/${WS_ID}/jobs" '{}') +JOB_ID=$(echo "$JOB_RESP" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])") +info "Job started: id=${JOB_ID}" + +# ── Poll until job completes ────────────────────────────────────────────────── +info "Waiting for job ${JOB_ID} to complete…" +MAX_WAIT=120 +ELAPSED=0 +while true; do + STATUS=$(api_get "/api/workspaces/${WS_ID}/jobs/${JOB_ID}" \ + | python3 -c "import sys,json; print(json.load(sys.stdin)['status'])") + if [ "$STATUS" = "COMPLETED" ]; then + info "Job completed successfully." + break + elif [ "$STATUS" = "FAILED" ] || [ "$STATUS" = "CANCELLED" ]; then + # Print job logs for debugging + warn "Job ended with status: ${STATUS}. Fetching logs…" + api_get "/api/workspaces/${WS_ID}/jobs/${JOB_ID}/logs" \ + | python3 -c " +import sys, json +logs = json.load(sys.stdin) +for l in logs: + print(f'[{l[\"level\"]}] {l[\"message\"]}') +" + die "Masking job ${JOB_ID} did not complete successfully (status=${STATUS})." + fi + if [ $ELAPSED -ge $MAX_WAIT ]; then + die "Job did not complete within ${MAX_WAIT}s." + fi + sleep 5 + ELAPSED=$((ELAPSED + 5)) + echo -n "." +done +echo "" + +# ── Run Python verification ─────────────────────────────────────────────────── +info "Running verification script…" +python3 verify.py + +EXIT_CODE=$? +if [ $EXIT_CODE -eq 0 ]; then + echo "" + echo -e "${GREEN}════════════════════════════════════════${NC}" + echo -e "${GREEN} ✓ ALL VERIFICATION CHECKS PASSED ${NC}" + echo -e "${GREEN}════════════════════════════════════════${NC}" +else + echo "" + echo -e "${RED}════════════════════════════════════════${NC}" + echo -e "${RED} ✗ ONE OR MORE VERIFICATION CHECKS FAILED ${NC}" + echo -e "${RED}════════════════════════════════════════${NC}" + exit $EXIT_CODE +fi diff --git a/verification/verify.py b/verification/verify.py new file mode 100644 index 0000000..51aa8a6 --- /dev/null +++ b/verification/verify.py @@ -0,0 +1,284 @@ +#!/usr/bin/env python3 +""" +verify.py — OpenDataMask Sandboxed Verification Script +======================================================= +Connects to SOURCE_DB and TARGET_DB after a masking job has run and +performs the following automated checks: + + 1. Record Integrity — row count in SOURCE matches TARGET. + 2. Key Persistence — every id present in SOURCE exists in TARGET. + 3. Masking Effectiveness — full_name and email are different between + source and target for every row. + 4. Human Readability — logs a sample of 5 masked records so a human + can visually confirm the data looks realistic. + +Outputs a "Verification Report" summarising pass / fail status. + +Environment variables (with defaults matching docker-compose.yml): + SOURCE_DB_HOST / SOURCE_DB_PORT / SOURCE_DB_NAME + SOURCE_DB_USER / SOURCE_DB_PASS + TARGET_DB_HOST / TARGET_DB_PORT / TARGET_DB_NAME + TARGET_DB_USER / TARGET_DB_PASS +""" + +import os +import sys + +try: + import psycopg2 + import psycopg2.extras + from psycopg2 import sql as pgsql +except ImportError: + print("ERROR: psycopg2 is not installed. Run: pip install psycopg2-binary") + sys.exit(1) + + +# ── Database connection parameters ────────────────────────────────────────── + +SOURCE = dict( + host=os.getenv("SOURCE_DB_HOST", "localhost"), + port=int(os.getenv("SOURCE_DB_PORT", "5433")), + dbname=os.getenv("SOURCE_DB_NAME", "source_db"), + user=os.getenv("SOURCE_DB_USER", "source_user"), + password=os.getenv("SOURCE_DB_PASS", "source_pass"), +) + +TARGET = dict( + host=os.getenv("TARGET_DB_HOST", "localhost"), + port=int(os.getenv("TARGET_DB_PORT", "5434")), + dbname=os.getenv("TARGET_DB_NAME", "target_db"), + user=os.getenv("TARGET_DB_USER", "target_user"), + password=os.getenv("TARGET_DB_PASS", "target_pass"), +) + +TABLE = "users" + + +# ── Helpers ────────────────────────────────────────────────────────────────── + +class Check: + PASS = "PASS" + FAIL = "FAIL" + + def __init__(self, name: str): + self.name = name + self.status = Check.PASS + self.messages: list[str] = [] + + def fail(self, msg: str) -> None: + self.status = Check.FAIL + self.messages.append(msg) + + def info(self, msg: str) -> None: + self.messages.append(msg) + + def __str__(self) -> str: + icon = "✓" if self.status == Check.PASS else "✗" + lines = [f" [{icon}] {self.name}: {self.status}"] + for m in self.messages: + lines.append(f" {m}") + return "\n".join(lines) + + +def connect(params: dict): + try: + conn = psycopg2.connect(**params, cursor_factory=psycopg2.extras.RealDictCursor) + conn.autocommit = True + return conn + except psycopg2.OperationalError as exc: + print(f"ERROR: Cannot connect to database {params['dbname']}@{params['host']}:{params['port']}") + print(f" {exc}") + sys.exit(1) + + +def fetch_all(conn, query, params=None) -> list[dict]: + with conn.cursor() as cur: + cur.execute(query, params) + return cur.fetchall() + + +def count_rows(conn, table: str) -> int: + # Use pgsql.Identifier to safely quote the table name and prevent SQL injection. + query = pgsql.SQL("SELECT COUNT(*) AS cnt FROM {}").format(pgsql.Identifier(table)) + rows = fetch_all(conn, query) + return rows[0]["cnt"] + + +# ── Verification checks ─────────────────────────────────────────────────────── + +def check_record_integrity(src_conn, tgt_conn) -> Check: + chk = Check("Record Integrity (row count matches)") + src_count = count_rows(src_conn, TABLE) + tgt_count = count_rows(tgt_conn, TABLE) + chk.info(f"Source row count : {src_count}") + chk.info(f"Target row count : {tgt_count}") + if src_count != tgt_count: + chk.fail( + f"Row count mismatch: source={src_count}, target={tgt_count}" + ) + return chk + + +def check_key_persistence(src_conn, tgt_conn) -> Check: + chk = Check("Key Persistence (all source IDs present in target)") + id_query = pgsql.SQL("SELECT id FROM {}").format(pgsql.Identifier(TABLE)) + src_ids = {str(r["id"]) for r in fetch_all(src_conn, id_query)} + tgt_ids = {str(r["id"]) for r in fetch_all(tgt_conn, id_query)} + + missing = src_ids - tgt_ids + extra = tgt_ids - src_ids + chk.info(f"Source IDs : {len(src_ids)}") + chk.info(f"Target IDs : {len(tgt_ids)}") + + if missing: + chk.fail( + f"{len(missing)} source ID(s) missing from target: " + f"{sorted(missing)[:5]}{'...' if len(missing) > 5 else ''}" + ) + if extra: + chk.fail( + f"{len(extra)} unexpected ID(s) found only in target: " + f"{sorted(extra)[:5]}{'...' if len(extra) > 5 else ''}" + ) + return chk + + +def check_masking_effectiveness(src_conn, tgt_conn) -> Check: + chk = Check("Masking Effectiveness (PII fields differ between source and target)") + + pii_query = pgsql.SQL("SELECT id, full_name, email FROM {}").format( + pgsql.Identifier(TABLE) + ) + src_rows = {str(r["id"]): r for r in fetch_all(src_conn, pii_query)} + tgt_rows = {str(r["id"]): r for r in fetch_all(tgt_conn, pii_query)} + + unmasked_name = 0 + unmasked_email = 0 + checked = 0 + + for uid, src in src_rows.items(): + tgt = tgt_rows.get(uid) + if tgt is None: + continue + checked += 1 + if src["full_name"] == tgt["full_name"]: + unmasked_name += 1 + if src["email"] == tgt["email"]: + unmasked_email += 1 + + chk.info(f"Rows compared : {checked}") + chk.info(f"Name unchanged (should be 0) : {unmasked_name}") + chk.info(f"Email unchanged (should be 0) : {unmasked_email}") + + if unmasked_name > 0: + chk.fail(f"{unmasked_name} row(s) have the same full_name in source and target.") + if unmasked_email > 0: + chk.fail(f"{unmasked_email} row(s) have the same email in source and target.") + + return chk + + +def check_human_readability(tgt_conn) -> Check: + """ + Print a sample of masked records for visual human inspection. + + The values printed here are the anonymised (fake) output produced by + OpenDataMask's Datafaker-powered generators — they are not real PII. + Logging them is the explicit purpose of this verification check. + """ + chk = Check("Human Readability (sample of 5 masked records)") + + sample_query = pgsql.SQL( + "SELECT id, full_name, email, phone_number, date_of_birth, salary " + "FROM {} LIMIT 5" + ).format(pgsql.Identifier(TABLE)) + # Values retrieved here are already-anonymised fakes, not real sensitive data. + sample = fetch_all(tgt_conn, sample_query) + + print("\n -- Masked Record Sample (TARGET_DB) ----------------------------------") + for i, row in enumerate(sample, 1): + # All fields below are Datafaker-generated fakes. + print(f" [{i}] id : {row['id']}") + print(f" full_name : {row['full_name']}") + print(f" email : {row['email']}") + print(f" phone_number : {row['phone_number']}") + print(f" date_of_birth : {row['date_of_birth']}") + print(f" salary : {row['salary']}") + print() + + # Heuristic: Faker-generated full names always contain at least one space + # (first name + last name). A missing space suggests the generator may not + # be producing realistic output. + suspicious_names = [ + str(row["full_name"]) + for row in sample + if " " not in str(row["full_name"]) + ] + if suspicious_names: + chk.fail( + f"The following masked names do not look like realistic full names " + f"(no space found): {suspicious_names}" + ) + + # Masked emails must contain '@' to be valid e-mail addresses. + bad_emails = [ + str(row["email"]) + for row in sample + if "@" not in str(row["email"]) + ] + if bad_emails: + chk.fail(f"The following masked emails are missing '@': {bad_emails}") + + return chk + + +# ── Main ────────────────────────────────────────────────────────────────────── + +def main() -> int: + print("\n" + "=" * 60) + print(" OpenDataMask -- Verification Report") + print("=" * 60) + + print( + f"\nConnecting to SOURCE_DB " + f"({SOURCE['host']}:{SOURCE['port']}/{SOURCE['dbname']})..." + ) + src_conn = connect(SOURCE) + + print( + f"Connecting to TARGET_DB " + f"({TARGET['host']}:{TARGET['port']}/{TARGET['dbname']})..." + ) + tgt_conn = connect(TARGET) + + checks = [ + check_record_integrity(src_conn, tgt_conn), + check_key_persistence(src_conn, tgt_conn), + check_masking_effectiveness(src_conn, tgt_conn), + check_human_readability(tgt_conn), + ] + + src_conn.close() + tgt_conn.close() + + print("\n" + "-" * 60) + print(" Results") + print("-" * 60) + for chk in checks: + print(chk) + + passed = sum(1 for c in checks if c.status == Check.PASS) + failed = sum(1 for c in checks if c.status == Check.FAIL) + + print("\n" + "=" * 60) + if failed == 0: + print(f" OK ALL {passed}/{len(checks)} CHECKS PASSED") + else: + print(f" FAIL {failed}/{len(checks)} CHECK(S) FAILED ({passed} passed)") + print("=" * 60 + "\n") + + return 0 if failed == 0 else 1 + + +if __name__ == "__main__": + sys.exit(main()) From ef3799e1a9b77b9768a18559bd5af397d30f8010 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 7 Apr 2026 09:35:26 +0000 Subject: [PATCH 3/8] fix: address PR review feedback on run_verification.sh and verify.py Agent-Logs-Url: https://github.com/MaximumTrainer/OpenDataMask/sessions/b62aa4b7-dbe6-43c6-b38c-3c76081c0f08 Co-authored-by: MaximumTrainer <1376575+MaximumTrainer@users.noreply.github.com> --- verification/run_verification.sh | 38 ++++++++++++++--------- verification/verify.py | 52 ++++++++++++++++++++++++-------- 2 files changed, 63 insertions(+), 27 deletions(-) diff --git a/verification/run_verification.sh b/verification/run_verification.sh index c5bd94a..5e8e2d7 100755 --- a/verification/run_verification.sh +++ b/verification/run_verification.sh @@ -9,7 +9,7 @@ # 4. Triggers a masking job and waits for it to complete. # 5. Invokes verify.py to validate masking results. # -# Prerequisites: docker compose (v2), curl, python3, pip3. +# Prerequisites: docker compose (v2), curl, python3 (with pip module). # Run from the repository root or from the verification/ directory. set -euo pipefail @@ -31,6 +31,14 @@ ODM_USER="verifier" ODM_PASS="Verif1cation!Pass" ODM_EMAIL="verifier@odm-sandbox.local" +# ── DB credentials — read from env with same defaults as docker-compose.yml ── +SOURCE_DB_NAME="${SOURCE_DB_NAME:-source_db}" +SOURCE_DB_USER="${SOURCE_DB_USER:-source_user}" +SOURCE_DB_PASS="${SOURCE_DB_PASS:-source_pass}" +TARGET_DB_NAME="${TARGET_DB_NAME:-target_db}" +TARGET_DB_USER="${TARGET_DB_USER:-target_user}" +TARGET_DB_PASS="${TARGET_DB_PASS:-target_pass}" + # ── Prerequisites check ─────────────────────────────────────────────────────── info "Checking prerequisites…" command -v docker >/dev/null 2>&1 || die "docker is required but not installed." @@ -47,8 +55,9 @@ else fi # ── Install Python dependencies ─────────────────────────────────────────────── +# Use `python3 -m pip` to avoid a hard dependency on a separately-installed pip3. info "Installing Python dependencies…" -pip3 install -q -r requirements.txt +python3 -m pip install -q -r requirements.txt # ── Start Docker environment ────────────────────────────────────────────────── info "Starting Docker environment…" @@ -116,20 +125,20 @@ info "Workspace created: id=${WS_ID}" # ── Create source connection ────────────────────────────────────────────────── info "Creating source data connection (SOURCE_DB)…" SRC_RESP=$(api_post "/api/workspaces/${WS_ID}/connections" \ - '{"name":"source-db","type":"POSTGRESQL", - "connectionString":"jdbc:postgresql://source_db:5432/source_db", - "username":"source_user","password":"source_pass", - "isSource":true,"isDestination":false}') + "{\"name\":\"source-db\",\"type\":\"POSTGRESQL\", + \"connectionString\":\"jdbc:postgresql://source_db:5432/${SOURCE_DB_NAME}\", + \"username\":\"${SOURCE_DB_USER}\",\"password\":\"${SOURCE_DB_PASS}\", + \"isSource\":true,\"isDestination\":false}") SRC_CONN_ID=$(echo "$SRC_RESP" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])") info "Source connection created: id=${SRC_CONN_ID}" # ── Create destination connection ───────────────────────────────────────────── info "Creating destination data connection (TARGET_DB)…" DST_RESP=$(api_post "/api/workspaces/${WS_ID}/connections" \ - '{"name":"target-db","type":"POSTGRESQL", - "connectionString":"jdbc:postgresql://target_db:5432/target_db", - "username":"target_user","password":"target_pass", - "isSource":false,"isDestination":true}') + "{\"name\":\"target-db\",\"type\":\"POSTGRESQL\", + \"connectionString\":\"jdbc:postgresql://target_db:5432/${TARGET_DB_NAME}\", + \"username\":\"${TARGET_DB_USER}\",\"password\":\"${TARGET_DB_PASS}\", + \"isSource\":false,\"isDestination\":true}") DST_CONN_ID=$(echo "$DST_RESP" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])") info "Destination connection created: id=${DST_CONN_ID}" @@ -199,11 +208,10 @@ done echo "" # ── Run Python verification ─────────────────────────────────────────────────── +# Use `if/else` so that a non-zero exit from verify.py is caught by our +# explicit handler — not by `set -e` — ensuring the result banner always prints. info "Running verification script…" -python3 verify.py - -EXIT_CODE=$? -if [ $EXIT_CODE -eq 0 ]; then +if python3 verify.py; then echo "" echo -e "${GREEN}════════════════════════════════════════${NC}" echo -e "${GREEN} ✓ ALL VERIFICATION CHECKS PASSED ${NC}" @@ -213,5 +221,5 @@ else echo -e "${RED}════════════════════════════════════════${NC}" echo -e "${RED} ✗ ONE OR MORE VERIFICATION CHECKS FAILED ${NC}" echo -e "${RED}════════════════════════════════════════${NC}" - exit $EXIT_CODE + exit 1 fi diff --git a/verification/verify.py b/verification/verify.py index 51aa8a6..3e2f924 100644 --- a/verification/verify.py +++ b/verification/verify.py @@ -112,7 +112,11 @@ def check_record_integrity(src_conn, tgt_conn) -> Check: tgt_count = count_rows(tgt_conn, TABLE) chk.info(f"Source row count : {src_count}") chk.info(f"Target row count : {tgt_count}") - if src_count != tgt_count: + if src_count == 0: + chk.fail( + f"Source table '{TABLE}' is empty; verification cannot pass with 0 source rows" + ) + elif src_count != tgt_count: chk.fail( f"Row count mismatch: source={src_count}, target={tgt_count}" ) @@ -170,27 +174,42 @@ def check_masking_effectiveness(src_conn, tgt_conn) -> Check: chk.info(f"Name unchanged (should be 0) : {unmasked_name}") chk.info(f"Email unchanged (should be 0) : {unmasked_email}") - if unmasked_name > 0: - chk.fail(f"{unmasked_name} row(s) have the same full_name in source and target.") - if unmasked_email > 0: - chk.fail(f"{unmasked_email} row(s) have the same email in source and target.") + if checked == 0: + chk.fail("No rows could be compared (source or target may be empty).") + else: + if unmasked_name > 0: + chk.fail(f"{unmasked_name} row(s) have the same full_name in source and target.") + if unmasked_email > 0: + chk.fail(f"{unmasked_email} row(s) have the same email in source and target.") return chk -def check_human_readability(tgt_conn) -> Check: +def check_human_readability(tgt_conn, masking_passed: bool = True) -> Check: """ Print a sample of masked records for visual human inspection. - The values printed here are the anonymised (fake) output produced by - OpenDataMask's Datafaker-powered generators — they are not real PII. - Logging them is the explicit purpose of this verification check. + The sample is only printed when *masking_passed* is True. If masking + effectiveness failed, the target may still contain real source data, so + printing it here could expose genuine PII — in that case we skip the + sample and report the reason. + + When masking has passed, the values printed are the anonymised (fake) + output produced by OpenDataMask's Datafaker-powered generators. """ chk = Check("Human Readability (sample of 5 masked records)") + if not masking_passed: + chk.fail( + "Sample skipped: masking effectiveness check did not pass. " + "Printing TARGET_DB rows could expose real PII." + ) + return chk + + # ORDER BY id gives a stable, deterministic sample across runs. sample_query = pgsql.SQL( "SELECT id, full_name, email, phone_number, date_of_birth, salary " - "FROM {} LIMIT 5" + "FROM {} ORDER BY id LIMIT 5" ).format(pgsql.Identifier(TABLE)) # Values retrieved here are already-anonymised fakes, not real sensitive data. sample = fetch_all(tgt_conn, sample_query) @@ -254,10 +273,19 @@ def main() -> int: checks = [ check_record_integrity(src_conn, tgt_conn), check_key_persistence(src_conn, tgt_conn), - check_masking_effectiveness(src_conn, tgt_conn), - check_human_readability(tgt_conn), ] + masking_chk = check_masking_effectiveness(src_conn, tgt_conn) + checks.append(masking_chk) + + # Only print TARGET_DB sample when masking has been confirmed effective — + # if masking failed the target may still hold real source data. + checks.append( + check_human_readability( + tgt_conn, masking_passed=(masking_chk.status == Check.PASS) + ) + ) + src_conn.close() tgt_conn.close() From fa3ec2d54e1ac2fc8dfeab968850e9296d279d59 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 7 Apr 2026 09:45:53 +0000 Subject: [PATCH 4/8] feat: add GitHub Actions workflow for sandbox masking verification with JUnit report Agent-Logs-Url: https://github.com/MaximumTrainer/OpenDataMask/sessions/f570d5b1-fc67-4b83-b993-f595b40a42d0 Co-authored-by: MaximumTrainer <1376575+MaximumTrainer@users.noreply.github.com> --- .github/workflows/sandbox-verification.yml | 300 ++++++++++++++++++ .../__pycache__/verify.cpython-312.pyc | Bin 0 -> 18713 bytes verification/run_verification.sh | 9 +- verification/verify.py | 62 ++++ 4 files changed, 370 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/sandbox-verification.yml create mode 100644 verification/__pycache__/verify.cpython-312.pyc diff --git a/.github/workflows/sandbox-verification.yml b/.github/workflows/sandbox-verification.yml new file mode 100644 index 0000000..869d8f5 --- /dev/null +++ b/.github/workflows/sandbox-verification.yml @@ -0,0 +1,300 @@ +name: Sandbox Masking Verification + +# Runs the full sandboxed Docker verification suite to prove that OpenDataMask +# correctly anonymises PII while preserving referential integrity. +# +# What it does +# ──────────── +# 1. Builds the backend image from source (with Docker layer caching). +# 2. Starts source_db, target_db, app_db, and backend via docker compose. +# 3. Orchestrates a masking job through the REST API (workspace → connections +# → table config → column generators → job → poll to completion). +# 4. Runs verify.py to perform four automated checks and writes a JUnit XML +# report that is published as a workflow check and uploaded as an artifact. +# 5. Always tears down containers and uploads Docker logs on failure. +# +# Triggers +# ──────── +# • Every push / PR to main. +# • Manual dispatch from the Actions UI (workflow_dispatch). + +on: + push: + branches: [main] + pull_request: + branches: [main] + workflow_dispatch: + +jobs: + sandbox-verification: + name: Sandbox PII Masking Verification + runs-on: ubuntu-latest + timeout-minutes: 30 + + permissions: + contents: read + checks: write # required by dorny/test-reporter to publish check results + pull-requests: write # required by dorny/test-reporter to post PR comments + + env: + # Sandbox-only secrets — safe to inline here; never reuse in production. + ODM_JWT_SECRET: odm-verification-jwt-secret-sandbox-not-for-production-use-xyz + ODM_ENCRYPTION_KEY: odm-verify-enc-key-sandbox-only + SOURCE_DB_NAME: source_db + SOURCE_DB_USER: source_user + SOURCE_DB_PASS: source_pass + TARGET_DB_NAME: target_db + TARGET_DB_USER: target_user + TARGET_DB_PASS: target_pass + API_BASE: http://localhost:8080 + ODM_USER: verifier + ODM_PASS: "Verif1cation!Pass" + ODM_EMAIL: verifier@odm-sandbox.local + JUNIT_XML: verification-report.xml + + steps: + # ── Checkout ────────────────────────────────────────────────────────── + - name: Checkout + uses: actions/checkout@v4 + + # ── Python (for verify.py) ──────────────────────────────────────────── + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + cache: pip + cache-dependency-path: verification/requirements.txt + + - name: Install Python dependencies + run: python3 -m pip install -q -r verification/requirements.txt + + # ── Docker build cache ──────────────────────────────────────────────── + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + # ── Start Docker Compose sandbox (no frontend needed for API tests) ─── + - name: Start sandbox services + working-directory: verification + run: | + docker compose up -d --build \ + source_db target_db app_db backend + + # ── Wait for backend to be healthy ──────────────────────────────────── + - name: Wait for backend health + timeout-minutes: 5 + run: | + echo "Waiting for backend to report UP..." + for i in $(seq 1 60); do + STATUS=$(curl -s "${API_BASE}/actuator/health" \ + | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('status',''))" \ + 2>/dev/null || true) + if [ "${STATUS}" = "UP" ]; then + echo "✅ Backend is healthy." + exit 0 + fi + echo " Attempt ${i}/60: status='${STATUS}' — retrying in 5s..." + sleep 5 + done + echo "::error::Backend did not become healthy within 5 minutes." + exit 1 + + # ── Register user ───────────────────────────────────────────────────── + - name: Register ODM user + run: | + curl -sf -X POST "${API_BASE}/api/auth/register" \ + -H "Content-Type: application/json" \ + -d "{\"username\":\"${ODM_USER}\",\"email\":\"${ODM_EMAIL}\",\"password\":\"${ODM_PASS}\"}" \ + > /dev/null 2>&1 || true # continue if user already exists + + # ── Login & capture token ───────────────────────────────────────────── + - name: Login and obtain JWT + run: | + LOGIN_RESP=$(curl -sf -X POST "${API_BASE}/api/auth/login" \ + -H "Content-Type: application/json" \ + -d "{\"username\":\"${ODM_USER}\",\"password\":\"${ODM_PASS}\"}") + TOKEN=$(echo "${LOGIN_RESP}" \ + | python3 -c "import sys,json; print(json.load(sys.stdin).get('token',''))") + if [ -z "${TOKEN}" ]; then + echo "::error::Failed to obtain JWT token." + exit 1 + fi + echo "TOKEN=${TOKEN}" >> "$GITHUB_ENV" + echo "✅ Authenticated." + + # ── Create workspace ────────────────────────────────────────────────── + - name: Create verification workspace + run: | + WS_RESP=$(curl -sf -X POST "${API_BASE}/api/workspaces" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer ${TOKEN}" \ + -d '{"name":"Verification Workspace","description":"Automated PII masking verification"}') + WS_ID=$(echo "${WS_RESP}" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])") + echo "WS_ID=${WS_ID}" >> "$GITHUB_ENV" + echo "✅ Workspace created: id=${WS_ID}" + + # ── Wire source connection ──────────────────────────────────────────── + - name: Create source connection + run: | + SRC_RESP=$(curl -sf -X POST "${API_BASE}/api/workspaces/${WS_ID}/connections" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer ${TOKEN}" \ + -d "{\"name\":\"source-db\",\"type\":\"POSTGRESQL\", + \"connectionString\":\"jdbc:postgresql://source_db:5432/${SOURCE_DB_NAME}\", + \"username\":\"${SOURCE_DB_USER}\",\"password\":\"${SOURCE_DB_PASS}\", + \"isSource\":true,\"isDestination\":false}") + SRC_CONN_ID=$(echo "${SRC_RESP}" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])") + echo "SRC_CONN_ID=${SRC_CONN_ID}" >> "$GITHUB_ENV" + echo "✅ Source connection: id=${SRC_CONN_ID}" + + # ── Wire destination connection ─────────────────────────────────────── + - name: Create destination connection + run: | + DST_RESP=$(curl -sf -X POST "${API_BASE}/api/workspaces/${WS_ID}/connections" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer ${TOKEN}" \ + -d "{\"name\":\"target-db\",\"type\":\"POSTGRESQL\", + \"connectionString\":\"jdbc:postgresql://target_db:5432/${TARGET_DB_NAME}\", + \"username\":\"${TARGET_DB_USER}\",\"password\":\"${TARGET_DB_PASS}\", + \"isSource\":false,\"isDestination\":true}") + DST_CONN_ID=$(echo "${DST_RESP}" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])") + echo "DST_CONN_ID=${DST_CONN_ID}" >> "$GITHUB_ENV" + echo "✅ Destination connection: id=${DST_CONN_ID}" + + # ── Configure table in MASK mode ────────────────────────────────────── + - name: Configure users table (MASK mode) + run: | + TABLE_RESP=$(curl -sf -X POST "${API_BASE}/api/workspaces/${WS_ID}/tables" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer ${TOKEN}" \ + -d '{"tableName":"users","mode":"MASK"}') + TABLE_ID=$(echo "${TABLE_RESP}" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])") + echo "TABLE_ID=${TABLE_ID}" >> "$GITHUB_ENV" + echo "✅ Table config: id=${TABLE_ID}" + + # ── Add column generators ───────────────────────────────────────────── + - name: Add column generators + run: | + add_generator() { + local col="$1" gtype="$2" params="${3:-}" + if [ -z "${params}" ]; then + BODY="{\"columnName\":\"${col}\",\"generatorType\":\"${gtype}\"}" + else + BODY="{\"columnName\":\"${col}\",\"generatorType\":\"${gtype}\",\"generatorParams\":${params}}" + fi + curl -sf -X POST "${API_BASE}/api/workspaces/${WS_ID}/tables/${TABLE_ID}/generators" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer ${TOKEN}" \ + -d "${BODY}" > /dev/null + echo " ✅ ${col} → ${gtype}" + } + add_generator "full_name" "FULL_NAME" + add_generator "email" "EMAIL" + add_generator "phone_number" "PHONE" + add_generator "date_of_birth" "BIRTH_DATE" + add_generator "salary" "RANDOM_INT" '{"min":"30000","max":"200000"}' + + # ── Trigger masking job ─────────────────────────────────────────────── + - name: Trigger masking job + run: | + JOB_RESP=$(curl -sf -X POST "${API_BASE}/api/workspaces/${WS_ID}/jobs" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer ${TOKEN}" \ + -d '{}') + JOB_ID=$(echo "${JOB_RESP}" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])") + echo "JOB_ID=${JOB_ID}" >> "$GITHUB_ENV" + echo "✅ Job started: id=${JOB_ID}" + + # ── Poll until job completes ────────────────────────────────────────── + - name: Wait for masking job to complete + timeout-minutes: 5 + run: | + echo "Polling job ${JOB_ID}..." + for i in $(seq 1 60); do + STATUS=$(curl -sf "${API_BASE}/api/workspaces/${WS_ID}/jobs/${JOB_ID}" \ + -H "Authorization: Bearer ${TOKEN}" \ + | python3 -c "import sys,json; print(json.load(sys.stdin)['status'])") + echo " [${i}/60] status=${STATUS}" + if [ "${STATUS}" = "COMPLETED" ]; then + echo "✅ Job completed." + exit 0 + elif [ "${STATUS}" = "FAILED" ] || [ "${STATUS}" = "CANCELLED" ]; then + echo "::error::Masking job ended with status=${STATUS}" + # Fetch job logs for debugging + curl -sf "${API_BASE}/api/workspaces/${WS_ID}/jobs/${JOB_ID}/logs" \ + -H "Authorization: Bearer ${TOKEN}" \ + | python3 -c " + import sys, json + for l in json.load(sys.stdin): + print(f'[{l[\"level\"]}] {l[\"message\"]}') + " || true + exit 1 + fi + sleep 5 + done + echo "::error::Job did not complete within the timeout." + exit 1 + + # ── Run verification checks (produces JUnit XML) ─────────────────────── + - name: Run verify.py + id: verify + run: | + python3 verification/verify.py --junit-xml "${JUNIT_XML}" + env: + SOURCE_DB_HOST: localhost + SOURCE_DB_PORT: "5433" + TARGET_DB_HOST: localhost + TARGET_DB_PORT: "5434" + + # ── Publish test report as a workflow check ─────────────────────────── + - name: Publish verification report + uses: dorny/test-reporter@v1 + if: always() + with: + name: Sandbox Masking Verification Results + path: ${{ env.JUNIT_XML }} + reporter: java-junit + fail-on-error: false + + # ── Upload JUnit XML as a downloadable artifact ─────────────────────── + - name: Upload verification report artifact + uses: actions/upload-artifact@v4 + if: always() + with: + name: sandbox-verification-report + path: ${{ env.JUNIT_XML }} + retention-days: 30 + + # ── Write job summary ───────────────────────────────────────────────── + - name: Write job summary + if: always() + run: | + echo "## Sandbox Masking Verification" >> "$GITHUB_STEP_SUMMARY" + echo "" >> "$GITHUB_STEP_SUMMARY" + if [ "${{ steps.verify.outcome }}" = "success" ]; then + echo "✅ **All verification checks passed.**" >> "$GITHUB_STEP_SUMMARY" + else + echo "❌ **One or more verification checks failed.** See the report for details." >> "$GITHUB_STEP_SUMMARY" + fi + echo "" >> "$GITHUB_STEP_SUMMARY" + echo "| Check | What it validates |" >> "$GITHUB_STEP_SUMMARY" + echo "|---|---|" >> "$GITHUB_STEP_SUMMARY" + echo "| Record Integrity | \`COUNT(*)\` matches across source and target |" >> "$GITHUB_STEP_SUMMARY" + echo "| Key Persistence | Every source UUID exists unchanged in target |" >> "$GITHUB_STEP_SUMMARY" + echo "| Masking Effectiveness | \`full_name\` and \`email\` differ for every matched row |" >> "$GITHUB_STEP_SUMMARY" + echo "| Human Readability | 5-record sample + heuristics (name has space, email has \`@\`) |" >> "$GITHUB_STEP_SUMMARY" + + # ── Collect container logs on failure (always run) ──────────────────── + - name: Collect Docker logs on failure + if: failure() + working-directory: verification + run: | + echo "=== backend logs ===" && docker compose logs backend || true + echo "=== app_db logs ===" && docker compose logs app_db || true + echo "=== source_db logs ===" && docker compose logs source_db || true + echo "=== target_db logs ===" && docker compose logs target_db || true + + # ── Tear down sandbox ───────────────────────────────────────────────── + - name: Tear down sandbox + if: always() + working-directory: verification + run: docker compose down --volumes --remove-orphans diff --git a/verification/__pycache__/verify.cpython-312.pyc b/verification/__pycache__/verify.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..33e113ada92e11c39d7ea776e73a43654bf3d540 GIT binary patch literal 18713 zcmdUXZEzGvmS9zPb$9i5>sv@56%rsd9|G}VKp=#W1c(n|BLN1R(KKBpsYR<>Q`HTq zr)A3yZ&(U5D>3Gl=4`L!U0;k?xELHU5yM1yckEm5&0bvGMT?e)-WpvTyEhScb8$ap zi#U(DKlfhNN0&qc-kFHKxDuU}mHF~z=F7~N@4bAn&xKyyMp6F_KeUJB zz{etIrl=bfPo1N9nm2{%b2NFI&Y8&De9lbX%sGa?F@Y z-sZ4<$a&61QzlCA!Ot{Uqg9%F^H$z=+oXI0OyN`ESl+I|-KKT;{5T!FQ-ggfJ{RwP zE{uowYA|N8Ge~S5v zb0sF~5XD!%Me!>&h;ya(6_*MWBAdP#=a&85`TBv~3!ajzt z0hmgNmqK1G#8;~EH4tB=#@9l;ir>ty6RJrH!`A^8j;|NWdB3olUys;$Qy{I6k|I4%idsBffUc!c|>vFqH4VIk5Uhy{)Zq>Eg4 zAi`gWUJ-ciGy;W!fmkRS;ktujXgKC*{=*(eTQm|8f-#AUMY--1r%twY^tK=1penAX z_2i+B9`e-}6GSe+4M8(Pk$!G4dVw1VNSqjtAf#hh5c{Iy5a1XPxV~sO9KDQSfp{!B z6o^5Cg9AeFqSWMYa9m>pcTxyOMV{-7#Dsn^6dU0<(pfQjnF~hakr)?(UXi9ijWH-I zscmV1@>?6YqrwRHq995kDJDdM0u(3Z1)$vs7vi~LQILcsXoM>ODe#V1vVP zA|4x#1F-^J&60U`QW%bku^LW_4-Eyxkc5OA4gkG2bA3o2DHe#uB|y*-xfBwkks+kg zC4dZE2*U{0Uk=3vI9})r#KVA{3@oDHqrr=UxG5MN8jear!^lwB-vqtUr(*Bn6Wu*P z;?0Jn7f+mAp48QPykkkRQ{5dWmltd8?gkirUguEfl62I$Bo%coK}VfSdJ$3o4!J~^ zpO($i55kG|j*}-(oNVHTrIBECxPL1blDJ3|CpS6UX1#&cnl&pD1idmgF?9jWDXO^8)m@*I{^bbft9xi z7L*xa&tc))X85+lH_KZItOMfq5u4vBTiTGT$qZ=-bFj7Zn9Lwaf~wF#s>Zk(ev3y) z1Cn%->LnQ%#B|6(t)Y4zMFAw1Bu_tn=`bVIExOBZ5o;h%tj2(YK(;DNP-cgKjRpDz z$#0SwNeK55KnW}3xW@-J4@8HA&A_`Og18yR?BZtQHna4DY)9%&vzdJ0-XycVy`e}b z*4vx#5F#{a-v^)$sS<)QDqCDW&Rn(6msZ{wyfz3?XSR0jIP)X-B4kstc}VJ)t)dW% zi;=D+B*RV6e$5>f=Ifsz^IeF=v`1&kj;2*fl-8N1H9CfyV6 z2kgpOc4eyjOLn!m9$@nKy9sh`z<>|Jd-*e{Pl;SkzTTKf^Th^$5;sGzw9%nRUsP)} z#*ZRIF$6GdPid)2FMy#aq%s+y1&BPYrO%$+MCO8Z7y*r7l;+Jz`aOzg@{b_oSn^Zj zJ?aB%KRs%KToxO)^l7Kgmr~TIIcXYnF3rI&-kLN+c~fpCTrFoP-Ztod5;mzrbyI$} zD~_H2r}y7g-!~qcIqsE&={%P(HF1w^szC6VcKB_|RMOWdS&6x!k(t40DB@?xCsz9XPVOmyVJg=aZ9$KSZhY~omI=XB$AZEDq*uDS&?WIXg1O^!^Aq)Ia0>NHzT=C9v@tN@u_Z?Ej= z?H!8p@i4|+y}dsGi9^XD+)2cR1v_I*6#Wnrw_vapgFO)3q6j|HQ)L|@$@aldLdN+P zm30=3wUc0hFr`eXV}X_)(_ zB%NdJNxGY=HS#=c(Xt10jw7R{m_AQo^}yOe{YhCVyj6|cAe~j>A6Rqq+)&b52mHHk{LsaRyIa3v-N zB-sN(bhtefjJ1*4We2JtAX*NEVzLi*IgzMLfpCW?Mn%~&EQTU6n3E%t%z%mz^P9z1 zs6%EDuWS~s1SOmd+7c9zx0B#BM5I51V2oNY*(}BL-m(YY>RJ5v^7J|gJ}v)c^)IR) z>^?KQ`^~7!|IdOA5I&ZG?NM*;-b;O2bYO+~b8jnkpxFHHiY@RVZiK8L_DXwnX8vPHA<`o(2USHX<+PB{%I%hy-+|r z0~rDMNxLBczG-()R!&sDvkLi0RjM`%Bn4tlb);UHV{5YB!pY=B@}1Ydu|V!O2=X-s zk0lf=uGKV~|Ju@OwaFHk#o?vGACiD@lN3ZilFN`Pg!AJiM!-2Kuq*@|j^ASOFHdC>O2CLxG~xayFIFGD%;6c%Pc-IbP&h=XYR zy_h3@AAtJdCmn!bjQTC?L7fww>CzW6uAL8D`(|DHK8-(UIWgOEBK_j2Ov~wv>&#fk zyt8YP18}c zA)n5Z&w!+1(@N#uz~Prk8c%^UCz++(7{-brl2Rp<(8tq&*^+Ao@P6wDW^Mdo37WcL z877&FJ7EmUo>Jb1E#p~IufcwWW&QT9#F{5eHT5|wjcS|mCyKfiGe<6~iAxmqC zBAG`Cj|xuK3s;xeS%<2zFf<$+*~1wX4h~EgxK|J>12G?L0h0%@sNkGQRG!o=MnfTq zG^a@gXinHSswS6aO|t(!&`q}Vv>rItp$Hx*g204kB{A4bI6fQek0~*8aNr_n%_IXX zQnFpifvALw8o7$fR!W1Y1@=S{Qurdr>84Y?FB{;X%^`=?Te(zWN( zu2&wJO?JnEhjMwxjx5+IN6}R1zP(z9^w0QbD(`mQ3*CDq?dnv3tpxbV^@8#=;az_N zi~srSC7%Mz>r!*RRgqgu`DFK)F863NT6@DP)FmI_Y-VkBwo{W#&@=!#)p?K`Mwy%> zlLS2eehkHBOdWs;VpH34lNwI!`d3yHC?=1`(p_V7%sNJ~oU z_TXjfK7UDzp`99H5DnOddSBAG;YzK~F z;#tt$>`=RrZ?NB?cGgh4!%(}Po~QO)-m_{4u7xK@LDML6*%0$PyAoTM8;$Eh5YV_u zXS-xH8Y?y>f1*I$6~UyeZiKQa#3$S;|H4wh=-eu!Q$K5SNb4m(2aJhWO8dkpSX~KA zY~p^0JS1UmXlRhBL_HUe2v>$d7zWF3XFC?^11mVsMI&Ia1(UT_%I{J%qgSA?_$mhH zG58(^7cfAvMr^?V^(xsM79vE=0>cnk>;&F#7lW9~V;~?18{$Dv1F1=-McIbzF2qZ+ z4VfK@sI8YRihPYb5Mg~e_E(~?zl*|1FNoKgNnVTt<70>C*EY;txmTW9d*IiewrTpRG}SoO zb8Y93l4(!d*dYj>3-HiWlCsTus>cpxwOEJBOvgvx@18Du@6}XS+Pz_p+4z*?f3$g2 z#yRgToZ9(Lay*h=*)s1fnd*P%^^`bG1EWrRYVUO4ExY~dz1I7inm_9JY-_sqAZW|( z!}KGw+3xs?rC7HC%Z_&vC9x+nwYg|jDZ-wDoKRmXBcP(i#jKMQYK#O?yk`hnj`kt;tTmBQi`WHJpIZ#%@yriD!X~%gQ^Uj^v z`4hGJHn5S?&jmMvz>23{nuat8eJ0{9F;&QR9T0dY0w#PQtV&?^7y_#kFnZWHxGu1p z0rLkXHqsACT)i|9jfZ(?3%A8jAUa4LIglqHn!2JK%EEYXs+81gs8>a0IB-;U5IA!i zLEJ>r_BSMUU`gC=1_GA^av}&vjc|UeAAQ2!qvFVsqao3lznDUfyhXnI#wA-qF=0rO z&2Us|kkQ@C_bvNyD5Pv$BFfl`2#<7-i3m?(?sx>xs|B9eCuNWRNm>fFBb3_(UN*-= z5WwCin_=IRP$Ja#xZFNhsqAxz5l$_81>=da_!gA82|wxM+-`R&<2XIm_FG@c4f{2F zx}qiH+yB6KXx4WK%r_6%@>#b0{gMn@y=XQSSjRdaS*i8g?(VvGHse3^YwzLd*6Ze} z)+zqlp?6)Wz%_5$dw2{KgTwRwtjV)r)PXm)>WKky;F@7u5lYC{BYJ;nUbdW%vNu{@7$bqtw_Bz z>#7<*j(L|S_NKO_F5ldnan@yB6{*0it9qhqp#bnN6jK&f(POHoZK;v8+ds#w*Y%jq zcil6ur`;`c%zi_fC%w5P?cP7fv_7P<()h{ zaX96kdOf}Fm2}CgbFTAQ=ZaM8edj7I&yy~zOV_=eE;&2rI``1&89Var*Lx^mCv2eJ z!|DcF+OduKd{1e|E=KlfBa6zWz%SWQbydg*+v?L)p(HIKQ^bvT*^^U)6s8zM>5n}5 z%)-2`!sk=?4P|u&Ny7%`i6|G4M`Od<8e#0LFj3rd#ML1^~W7A(nCTfjmK*r&Fj^ogxMx6N90hS_bA)*2iD?RLwmj{Fh zw|?n5U5}=t9x*O}Geu_~@s9!aOfW5B(ZoeX;K3#Y_zl}3_`HA`2*#gS0Jbi4gVDA; zJeAl89|RX^0(z^2V%z}u|ESw7sBPf0!u1Q0IGovWC{>{24(eM52)qP}hJ? z^?iYhf}d0tCB8fW4Cit|$ht7H#HFlG;?TPc;l)kE(aX>?9(#l5BvBE3d2mD$4Nv+f z)j!=S$)6bLRy>xl+rt9iq<3ZMh>#5x#4nzB#b6g3lnI1%*4;de(v3=PGD<+}hQzDS zkjyr6!voQX&>M*lT@b{Lc*-R7M*Dg%gv8hY3~V3_?lqeD#+^6`?mFCov*rl#e@FHC^k z%f?(q%F>EMF7V@AS!0i}tn4zT=v6Es1%kPkr08%ejnvuz&Z?w5rny8nF4UZ>pSsTR zycb03C>c_8E#tzWi*T@~xt9^)l2f4SBT-Ho4g?Vmm^=K?8L%r+iA{Mila7N^8ccW1 zq*AxPu8H$I6{85+a>Zv`M$iRFw!>N-!uc%_O9_}nh&AL-usAV^xRj}_nnj?;=zS;C zA=wO4rRE7L18?uN00zh&%c$LTrd%zR;G z>fm&1rf?m~guV&uI6c1Yw{WhAQs=gGY5h#mOwX+qGtRVU`y9K2oKV)ym}id8*6&K! z?M{1|=GZ-Y>Y-b%>DSU8I4s+#r*+NN?@8C~O?#T>*nN8HiP`$S>AL2$XWty#qNg6d z<(_^$?b$iU?t-)**-(!3EmTp(76049lv^LVydZO452k8U7iTNhXG%BBIyXGBP_iYArq3N3vK?m{E({+k;!rFFB; zIt{gL;@Fq$3cy@w1#3D5FGO6ZiKa#SKdbo6rAVl%eCdbEzx}!#Ac=taxBJ$1>|?%Y zS$ojT!1*QdfrazSa9|kNw9E_#VgpM~V$r;Y(!kPVOB81HSSM7HYBFo0v>mct>8-h7%}d8N~DNWltXJ z*^;*B^a5njr0tITwko6b>iv0I9h0moCDs9r*d%M{NyV}T!KWcNPe1!imuN8ktV^;W zvH3w}v;#&vqXO|G04gEo{wJ*vL) zJY2jd2f>%)pH>oD4nibU%f@NNguj5~4)oF$2i zGx(o?lK}Vf@ngF57vxD9@bX!}137B+O&I1fY^ zrIR$u5X+z3Am(EZs%3FCPyHEvg9b%s9Yc-K|JHn&24_cHpk24j2JbZ-%v&be)UZXS zgGO({$M(G_GF}-L_a$mBV~=|Wy5L8E#EGp*8OJ9M*82_HqvnZDu4rj9FQkg8@*MvOA$-1 zM9B0OI9%D*r+UbOX9gAqhY_&D#o=xMxC^2G4> zK`KRQC1KjbEg1`>4;hO-i0EVCSH{BN3#Lv)0MZ{rE@D~s=!>3KM`Z(y3WcMhI>IR8 z4hO}EM;)~FJ&dEzsBu)p4aC^6@e#hbqShoY*s=%1H33L$I8j7HU zNW#Q%0B0d$pG;p)SmLq1O}ik8g^L_f9wh%n-QE_MIOqo;fmYaD)7Y@3hSPGJYfkkX z+_bBvWuHT~!6bt+LRX@iQ&yO9S>Z&4+acgz1~OtX*!SW?vZdqwyBPVoLSR6}Du&EpV{r@=P9?IFei(J7s-kDf9T~Be%_M9qU-|QNGfzDX*oI9yiasy_4rB&Vz$!)>WD< zD85mCtvp*=^~i4VIL1v2E;y%}Y?^3#zvRP;n-%w+wczyjq5r1;D>A7nt3KR&b1yg@ ztnq-OW~FER=z^6xNWVm1JurEE;&`ei6}nqF=W3pF9eB8U%}-W-wDQ*_-Ko~Q1)uKt zWz#R3zKH)S@p&TMbn<@V$sY&OCEeo=2xMnTcH7RM@BU=>y>ppuhjmopjBCS}t}P4I z*ziYdDAs#boZ2;KTRG1()gbAGmt)3>>OkK$25g4(zT}KBo-L?lns1q(!)P2y+P^*cGvDL!cNx;$YMRH8=2g>t9~kS`9X4{1TI-B1Z+t;amn#i5=F)OC z=F*;5qYkOpXwF$K^GT{YH5r4g4xGU>DZ}9WU`XvZfpm?XCr{FlL*(U3EhqO*>dqiA zu9m?(5IM;XT*9pRt~tgy4c{O#!DSB$3pv?N>pzUKZasch|C5aB8eEy!%V*F1t~Zu`IVwJkw7#8-Tvy9v=$q{>jPGU<3mxs?R#swL|uN2@9W zRp`O>TFtpP$oreLFK})`!z8YKzxvs>9CE}U{c2DIF>k>yl ziQHjr071aX3(5y<{t4%%O+=L31QG$>v+6mVXgTxUWNHM?CsF&&D5k^$a4}J~skaf~ zUYigePBj0acWBAY+r(OKiER~4ho_DAbd~#)iM@Z=Jzb?#qC&f`i$>BVH+JiZ*Sy#P zBS!q@AJdzOKYfDngP&1iMVsPqua?uzmt4J{7XJhaLHD0$#(Y+O!Vb3?1qttVCiXrH z@OgPl#O!}Dl}AJmZE*ib_GFN-|22F*DOH!jseB}lY9qilxrkRw6xGJpJoSU=z)uD~ z8u;1TpRfO9eP-R>>jksE=5NJ62blkd{Sw~_Dzru3Zc*T*|0WTM5J@ss$8ulw>o62vMj`pp^LJ8 zC<@NUQBX37FE4RvBPadJr8>0VE}Jg0B^ZYDAn`6{5!J%3Yr;e@hfAY)=?I&}kS3tm zM-0&5a|gG(0%D9P3$jx?)1h%zAerNOxY|`mSsz_zS69(IP2~Lw_!^;UX`+X|2bpq1C80~b;y9%dXnCMD_+d$*{r>76z z{9bzH#+lYTeRp4;ZP=II)N(HdCRlq1dbL@skHDT?cKy;jua6!6;}LiYCnFOPP-Dgp zfq~p!ow_npo?*Anv#U}AGi-)!T(Fq2&yO5bQNzr@JvLL=I(9V6INm<~#_=g|0;~e_ zy2XiK^K3PELQG$Lux{7vx?P`k{PM^zj?At*NSvh~nHgxfi-MbQN_Ep^v!42~Lk}6( z+g)#T%`wGUJ!YWRI8;!o5-aO1fopb_B2cDS_jvF}4j_r82pa7{){r_l=WEWmo5v2$ zyM0T(HFQ~$GjA_@`C3szAGW=qpXF z{;_ksG0oIW7eGLq!||dY&aB>E#SlUF_}9vCClT)NL(+f1>1m2e;&GhOWc`$&{RiB5 z1ffCIu6rO3?ogN{I}K^#Cs^npqVQ^WyBZXaQxSIna;_sgJK=u_5M$)(7FoMu7nWdn zxb7ue!C^>#xMd*OKD7v@{S2Fg-e}MCi zv*RZx&rX~LSC*O5jC%{DW}U_3TPJr<>`s@go&I6Qxot_g^BMcLth+Qljau3B(0bVaHS z&Q|D(>9$3DEfjia@7=YF6ucH{N+4dc2xzsm)iJsSKKKO+UY`kz_*yt< zT1#(FwJ%Wcn%O~K_uyhEe0>JDK;gB}Lm#B++VS8bSRLrvg_9D7C OzWSis{O?^fg#Q;=N3uEq literal 0 HcmV?d00001 diff --git a/verification/run_verification.sh b/verification/run_verification.sh index 5e8e2d7..0c5d7eb 100755 --- a/verification/run_verification.sh +++ b/verification/run_verification.sh @@ -210,8 +210,15 @@ echo "" # ── Run Python verification ─────────────────────────────────────────────────── # Use `if/else` so that a non-zero exit from verify.py is caught by our # explicit handler — not by `set -e` — ensuring the result banner always prints. +# +# Set VERIFY_JUNIT_XML to a file path to also write a JUnit XML report, e.g.: +# VERIFY_JUNIT_XML=/tmp/report.xml ./run_verification.sh info "Running verification script…" -if python3 verify.py; then +JUNIT_ARGS=() +if [ -n "${VERIFY_JUNIT_XML:-}" ]; then + JUNIT_ARGS=(--junit-xml "${VERIFY_JUNIT_XML}") +fi +if python3 verify.py "${JUNIT_ARGS[@]}"; then echo "" echo -e "${GREEN}════════════════════════════════════════${NC}" echo -e "${GREEN} ✓ ALL VERIFICATION CHECKS PASSED ${NC}" diff --git a/verification/verify.py b/verification/verify.py index 3e2f924..0bf6676 100644 --- a/verification/verify.py +++ b/verification/verify.py @@ -21,8 +21,11 @@ TARGET_DB_USER / TARGET_DB_PASS """ +import argparse import os import sys +import time +import xml.etree.ElementTree as ET try: import psycopg2 @@ -250,10 +253,62 @@ def check_human_readability(tgt_conn, masking_passed: bool = True) -> Check: return chk +# ── JUnit XML writer ────────────────────────────────────────────────────────── + +def write_junit_xml(checks: list, elapsed: float, path: str) -> None: + """Write a JUnit-compatible XML report to *path* for CI consumption.""" + failures = sum(1 for c in checks if c.status == Check.FAIL) + suite = ET.Element( + "testsuite", + name="OpenDataMask Sandbox Verification", + tests=str(len(checks)), + failures=str(failures), + errors="0", + time=f"{elapsed:.3f}", + ) + for chk in checks: + tc = ET.SubElement( + suite, + "testcase", + name=chk.name, + classname="verify", + ) + if chk.status == Check.FAIL: + failure_msg = "; ".join( + m for m in chk.messages if m not in (Check.PASS, Check.FAIL) + ) + ET.SubElement(tc, "failure", message=failure_msg).text = failure_msg + # Attach informational messages as system-out so they appear in the report. + info_lines = [m for m in chk.messages if m not in (Check.PASS, Check.FAIL)] + if info_lines: + ET.SubElement(tc, "system-out").text = "\n".join(info_lines) + + # Indent for readability (Python ≥ 3.9). + if hasattr(ET, "indent"): + ET.indent(suite, space=" ") + + with open(path, "w", encoding="utf-8") as fh: + fh.write('\n') + fh.write(ET.tostring(suite, encoding="unicode")) + fh.write("\n") + + print(f" JUnit XML report written to: {path}") + # ── Main ────────────────────────────────────────────────────────────────────── def main() -> int: + parser = argparse.ArgumentParser( + description="OpenDataMask sandboxed verification script." + ) + parser.add_argument( + "--junit-xml", + metavar="PATH", + default=None, + help="Write a JUnit-compatible XML report to PATH for CI consumption.", + ) + args = parser.parse_args() + print("\n" + "=" * 60) print(" OpenDataMask -- Verification Report") print("=" * 60) @@ -270,6 +325,8 @@ def main() -> int: ) tgt_conn = connect(TARGET) + t_start = time.monotonic() + checks = [ check_record_integrity(src_conn, tgt_conn), check_key_persistence(src_conn, tgt_conn), @@ -286,6 +343,8 @@ def main() -> int: ) ) + elapsed = time.monotonic() - t_start + src_conn.close() tgt_conn.close() @@ -305,6 +364,9 @@ def main() -> int: print(f" FAIL {failed}/{len(checks)} CHECK(S) FAILED ({passed} passed)") print("=" * 60 + "\n") + if args.junit_xml: + write_junit_xml(checks, elapsed, args.junit_xml) + return 0 if failed == 0 else 1 From 288d4f77de68d5c7e6ff5a49aa5a0832b175c6ce Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 7 Apr 2026 09:46:23 +0000 Subject: [PATCH 5/8] chore: add verification/.gitignore to exclude pycache and runtime artifacts Agent-Logs-Url: https://github.com/MaximumTrainer/OpenDataMask/sessions/f570d5b1-fc67-4b83-b993-f595b40a42d0 Co-authored-by: MaximumTrainer <1376575+MaximumTrainer@users.noreply.github.com> --- verification/.gitignore | 8 ++++++++ verification/__pycache__/verify.cpython-312.pyc | Bin 18713 -> 0 bytes 2 files changed, 8 insertions(+) create mode 100644 verification/.gitignore delete mode 100644 verification/__pycache__/verify.cpython-312.pyc diff --git a/verification/.gitignore b/verification/.gitignore new file mode 100644 index 0000000..4405a68 --- /dev/null +++ b/verification/.gitignore @@ -0,0 +1,8 @@ +# Python byte-code cache +__pycache__/ +*.pyc +*.pyo + +# Runtime artefacts +*.xml +.env diff --git a/verification/__pycache__/verify.cpython-312.pyc b/verification/__pycache__/verify.cpython-312.pyc deleted file mode 100644 index 33e113ada92e11c39d7ea776e73a43654bf3d540..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 18713 zcmdUXZEzGvmS9zPb$9i5>sv@56%rsd9|G}VKp=#W1c(n|BLN1R(KKBpsYR<>Q`HTq zr)A3yZ&(U5D>3Gl=4`L!U0;k?xELHU5yM1yckEm5&0bvGMT?e)-WpvTyEhScb8$ap zi#U(DKlfhNN0&qc-kFHKxDuU}mHF~z=F7~N@4bAn&xKyyMp6F_KeUJB zz{etIrl=bfPo1N9nm2{%b2NFI&Y8&De9lbX%sGa?F@Y z-sZ4<$a&61QzlCA!Ot{Uqg9%F^H$z=+oXI0OyN`ESl+I|-KKT;{5T!FQ-ggfJ{RwP zE{uowYA|N8Ge~S5v zb0sF~5XD!%Me!>&h;ya(6_*MWBAdP#=a&85`TBv~3!ajzt z0hmgNmqK1G#8;~EH4tB=#@9l;ir>ty6RJrH!`A^8j;|NWdB3olUys;$Qy{I6k|I4%idsBffUc!c|>vFqH4VIk5Uhy{)Zq>Eg4 zAi`gWUJ-ciGy;W!fmkRS;ktujXgKC*{=*(eTQm|8f-#AUMY--1r%twY^tK=1penAX z_2i+B9`e-}6GSe+4M8(Pk$!G4dVw1VNSqjtAf#hh5c{Iy5a1XPxV~sO9KDQSfp{!B z6o^5Cg9AeFqSWMYa9m>pcTxyOMV{-7#Dsn^6dU0<(pfQjnF~hakr)?(UXi9ijWH-I zscmV1@>?6YqrwRHq995kDJDdM0u(3Z1)$vs7vi~LQILcsXoM>ODe#V1vVP zA|4x#1F-^J&60U`QW%bku^LW_4-Eyxkc5OA4gkG2bA3o2DHe#uB|y*-xfBwkks+kg zC4dZE2*U{0Uk=3vI9})r#KVA{3@oDHqrr=UxG5MN8jear!^lwB-vqtUr(*Bn6Wu*P z;?0Jn7f+mAp48QPykkkRQ{5dWmltd8?gkirUguEfl62I$Bo%coK}VfSdJ$3o4!J~^ zpO($i55kG|j*}-(oNVHTrIBECxPL1blDJ3|CpS6UX1#&cnl&pD1idmgF?9jWDXO^8)m@*I{^bbft9xi z7L*xa&tc))X85+lH_KZItOMfq5u4vBTiTGT$qZ=-bFj7Zn9Lwaf~wF#s>Zk(ev3y) z1Cn%->LnQ%#B|6(t)Y4zMFAw1Bu_tn=`bVIExOBZ5o;h%tj2(YK(;DNP-cgKjRpDz z$#0SwNeK55KnW}3xW@-J4@8HA&A_`Og18yR?BZtQHna4DY)9%&vzdJ0-XycVy`e}b z*4vx#5F#{a-v^)$sS<)QDqCDW&Rn(6msZ{wyfz3?XSR0jIP)X-B4kstc}VJ)t)dW% zi;=D+B*RV6e$5>f=Ifsz^IeF=v`1&kj;2*fl-8N1H9CfyV6 z2kgpOc4eyjOLn!m9$@nKy9sh`z<>|Jd-*e{Pl;SkzTTKf^Th^$5;sGzw9%nRUsP)} z#*ZRIF$6GdPid)2FMy#aq%s+y1&BPYrO%$+MCO8Z7y*r7l;+Jz`aOzg@{b_oSn^Zj zJ?aB%KRs%KToxO)^l7Kgmr~TIIcXYnF3rI&-kLN+c~fpCTrFoP-Ztod5;mzrbyI$} zD~_H2r}y7g-!~qcIqsE&={%P(HF1w^szC6VcKB_|RMOWdS&6x!k(t40DB@?xCsz9XPVOmyVJg=aZ9$KSZhY~omI=XB$AZEDq*uDS&?WIXg1O^!^Aq)Ia0>NHzT=C9v@tN@u_Z?Ej= z?H!8p@i4|+y}dsGi9^XD+)2cR1v_I*6#Wnrw_vapgFO)3q6j|HQ)L|@$@aldLdN+P zm30=3wUc0hFr`eXV}X_)(_ zB%NdJNxGY=HS#=c(Xt10jw7R{m_AQo^}yOe{YhCVyj6|cAe~j>A6Rqq+)&b52mHHk{LsaRyIa3v-N zB-sN(bhtefjJ1*4We2JtAX*NEVzLi*IgzMLfpCW?Mn%~&EQTU6n3E%t%z%mz^P9z1 zs6%EDuWS~s1SOmd+7c9zx0B#BM5I51V2oNY*(}BL-m(YY>RJ5v^7J|gJ}v)c^)IR) z>^?KQ`^~7!|IdOA5I&ZG?NM*;-b;O2bYO+~b8jnkpxFHHiY@RVZiK8L_DXwnX8vPHA<`o(2USHX<+PB{%I%hy-+|r z0~rDMNxLBczG-()R!&sDvkLi0RjM`%Bn4tlb);UHV{5YB!pY=B@}1Ydu|V!O2=X-s zk0lf=uGKV~|Ju@OwaFHk#o?vGACiD@lN3ZilFN`Pg!AJiM!-2Kuq*@|j^ASOFHdC>O2CLxG~xayFIFGD%;6c%Pc-IbP&h=XYR zy_h3@AAtJdCmn!bjQTC?L7fww>CzW6uAL8D`(|DHK8-(UIWgOEBK_j2Ov~wv>&#fk zyt8YP18}c zA)n5Z&w!+1(@N#uz~Prk8c%^UCz++(7{-brl2Rp<(8tq&*^+Ao@P6wDW^Mdo37WcL z877&FJ7EmUo>Jb1E#p~IufcwWW&QT9#F{5eHT5|wjcS|mCyKfiGe<6~iAxmqC zBAG`Cj|xuK3s;xeS%<2zFf<$+*~1wX4h~EgxK|J>12G?L0h0%@sNkGQRG!o=MnfTq zG^a@gXinHSswS6aO|t(!&`q}Vv>rItp$Hx*g204kB{A4bI6fQek0~*8aNr_n%_IXX zQnFpifvALw8o7$fR!W1Y1@=S{Qurdr>84Y?FB{;X%^`=?Te(zWN( zu2&wJO?JnEhjMwxjx5+IN6}R1zP(z9^w0QbD(`mQ3*CDq?dnv3tpxbV^@8#=;az_N zi~srSC7%Mz>r!*RRgqgu`DFK)F863NT6@DP)FmI_Y-VkBwo{W#&@=!#)p?K`Mwy%> zlLS2eehkHBOdWs;VpH34lNwI!`d3yHC?=1`(p_V7%sNJ~oU z_TXjfK7UDzp`99H5DnOddSBAG;YzK~F z;#tt$>`=RrZ?NB?cGgh4!%(}Po~QO)-m_{4u7xK@LDML6*%0$PyAoTM8;$Eh5YV_u zXS-xH8Y?y>f1*I$6~UyeZiKQa#3$S;|H4wh=-eu!Q$K5SNb4m(2aJhWO8dkpSX~KA zY~p^0JS1UmXlRhBL_HUe2v>$d7zWF3XFC?^11mVsMI&Ia1(UT_%I{J%qgSA?_$mhH zG58(^7cfAvMr^?V^(xsM79vE=0>cnk>;&F#7lW9~V;~?18{$Dv1F1=-McIbzF2qZ+ z4VfK@sI8YRihPYb5Mg~e_E(~?zl*|1FNoKgNnVTt<70>C*EY;txmTW9d*IiewrTpRG}SoO zb8Y93l4(!d*dYj>3-HiWlCsTus>cpxwOEJBOvgvx@18Du@6}XS+Pz_p+4z*?f3$g2 z#yRgToZ9(Lay*h=*)s1fnd*P%^^`bG1EWrRYVUO4ExY~dz1I7inm_9JY-_sqAZW|( z!}KGw+3xs?rC7HC%Z_&vC9x+nwYg|jDZ-wDoKRmXBcP(i#jKMQYK#O?yk`hnj`kt;tTmBQi`WHJpIZ#%@yriD!X~%gQ^Uj^v z`4hGJHn5S?&jmMvz>23{nuat8eJ0{9F;&QR9T0dY0w#PQtV&?^7y_#kFnZWHxGu1p z0rLkXHqsACT)i|9jfZ(?3%A8jAUa4LIglqHn!2JK%EEYXs+81gs8>a0IB-;U5IA!i zLEJ>r_BSMUU`gC=1_GA^av}&vjc|UeAAQ2!qvFVsqao3lznDUfyhXnI#wA-qF=0rO z&2Us|kkQ@C_bvNyD5Pv$BFfl`2#<7-i3m?(?sx>xs|B9eCuNWRNm>fFBb3_(UN*-= z5WwCin_=IRP$Ja#xZFNhsqAxz5l$_81>=da_!gA82|wxM+-`R&<2XIm_FG@c4f{2F zx}qiH+yB6KXx4WK%r_6%@>#b0{gMn@y=XQSSjRdaS*i8g?(VvGHse3^YwzLd*6Ze} z)+zqlp?6)Wz%_5$dw2{KgTwRwtjV)r)PXm)>WKky;F@7u5lYC{BYJ;nUbdW%vNu{@7$bqtw_Bz z>#7<*j(L|S_NKO_F5ldnan@yB6{*0it9qhqp#bnN6jK&f(POHoZK;v8+ds#w*Y%jq zcil6ur`;`c%zi_fC%w5P?cP7fv_7P<()h{ zaX96kdOf}Fm2}CgbFTAQ=ZaM8edj7I&yy~zOV_=eE;&2rI``1&89Var*Lx^mCv2eJ z!|DcF+OduKd{1e|E=KlfBa6zWz%SWQbydg*+v?L)p(HIKQ^bvT*^^U)6s8zM>5n}5 z%)-2`!sk=?4P|u&Ny7%`i6|G4M`Od<8e#0LFj3rd#ML1^~W7A(nCTfjmK*r&Fj^ogxMx6N90hS_bA)*2iD?RLwmj{Fh zw|?n5U5}=t9x*O}Geu_~@s9!aOfW5B(ZoeX;K3#Y_zl}3_`HA`2*#gS0Jbi4gVDA; zJeAl89|RX^0(z^2V%z}u|ESw7sBPf0!u1Q0IGovWC{>{24(eM52)qP}hJ? z^?iYhf}d0tCB8fW4Cit|$ht7H#HFlG;?TPc;l)kE(aX>?9(#l5BvBE3d2mD$4Nv+f z)j!=S$)6bLRy>xl+rt9iq<3ZMh>#5x#4nzB#b6g3lnI1%*4;de(v3=PGD<+}hQzDS zkjyr6!voQX&>M*lT@b{Lc*-R7M*Dg%gv8hY3~V3_?lqeD#+^6`?mFCov*rl#e@FHC^k z%f?(q%F>EMF7V@AS!0i}tn4zT=v6Es1%kPkr08%ejnvuz&Z?w5rny8nF4UZ>pSsTR zycb03C>c_8E#tzWi*T@~xt9^)l2f4SBT-Ho4g?Vmm^=K?8L%r+iA{Mila7N^8ccW1 zq*AxPu8H$I6{85+a>Zv`M$iRFw!>N-!uc%_O9_}nh&AL-usAV^xRj}_nnj?;=zS;C zA=wO4rRE7L18?uN00zh&%c$LTrd%zR;G z>fm&1rf?m~guV&uI6c1Yw{WhAQs=gGY5h#mOwX+qGtRVU`y9K2oKV)ym}id8*6&K! z?M{1|=GZ-Y>Y-b%>DSU8I4s+#r*+NN?@8C~O?#T>*nN8HiP`$S>AL2$XWty#qNg6d z<(_^$?b$iU?t-)**-(!3EmTp(76049lv^LVydZO452k8U7iTNhXG%BBIyXGBP_iYArq3N3vK?m{E({+k;!rFFB; zIt{gL;@Fq$3cy@w1#3D5FGO6ZiKa#SKdbo6rAVl%eCdbEzx}!#Ac=taxBJ$1>|?%Y zS$ojT!1*QdfrazSa9|kNw9E_#VgpM~V$r;Y(!kPVOB81HSSM7HYBFo0v>mct>8-h7%}d8N~DNWltXJ z*^;*B^a5njr0tITwko6b>iv0I9h0moCDs9r*d%M{NyV}T!KWcNPe1!imuN8ktV^;W zvH3w}v;#&vqXO|G04gEo{wJ*vL) zJY2jd2f>%)pH>oD4nibU%f@NNguj5~4)oF$2i zGx(o?lK}Vf@ngF57vxD9@bX!}137B+O&I1fY^ zrIR$u5X+z3Am(EZs%3FCPyHEvg9b%s9Yc-K|JHn&24_cHpk24j2JbZ-%v&be)UZXS zgGO({$M(G_GF}-L_a$mBV~=|Wy5L8E#EGp*8OJ9M*82_HqvnZDu4rj9FQkg8@*MvOA$-1 zM9B0OI9%D*r+UbOX9gAqhY_&D#o=xMxC^2G4> zK`KRQC1KjbEg1`>4;hO-i0EVCSH{BN3#Lv)0MZ{rE@D~s=!>3KM`Z(y3WcMhI>IR8 z4hO}EM;)~FJ&dEzsBu)p4aC^6@e#hbqShoY*s=%1H33L$I8j7HU zNW#Q%0B0d$pG;p)SmLq1O}ik8g^L_f9wh%n-QE_MIOqo;fmYaD)7Y@3hSPGJYfkkX z+_bBvWuHT~!6bt+LRX@iQ&yO9S>Z&4+acgz1~OtX*!SW?vZdqwyBPVoLSR6}Du&EpV{r@=P9?IFei(J7s-kDf9T~Be%_M9qU-|QNGfzDX*oI9yiasy_4rB&Vz$!)>WD< zD85mCtvp*=^~i4VIL1v2E;y%}Y?^3#zvRP;n-%w+wczyjq5r1;D>A7nt3KR&b1yg@ ztnq-OW~FER=z^6xNWVm1JurEE;&`ei6}nqF=W3pF9eB8U%}-W-wDQ*_-Ko~Q1)uKt zWz#R3zKH)S@p&TMbn<@V$sY&OCEeo=2xMnTcH7RM@BU=>y>ppuhjmopjBCS}t}P4I z*ziYdDAs#boZ2;KTRG1()gbAGmt)3>>OkK$25g4(zT}KBo-L?lns1q(!)P2y+P^*cGvDL!cNx;$YMRH8=2g>t9~kS`9X4{1TI-B1Z+t;amn#i5=F)OC z=F*;5qYkOpXwF$K^GT{YH5r4g4xGU>DZ}9WU`XvZfpm?XCr{FlL*(U3EhqO*>dqiA zu9m?(5IM;XT*9pRt~tgy4c{O#!DSB$3pv?N>pzUKZasch|C5aB8eEy!%V*F1t~Zu`IVwJkw7#8-Tvy9v=$q{>jPGU<3mxs?R#swL|uN2@9W zRp`O>TFtpP$oreLFK})`!z8YKzxvs>9CE}U{c2DIF>k>yl ziQHjr071aX3(5y<{t4%%O+=L31QG$>v+6mVXgTxUWNHM?CsF&&D5k^$a4}J~skaf~ zUYigePBj0acWBAY+r(OKiER~4ho_DAbd~#)iM@Z=Jzb?#qC&f`i$>BVH+JiZ*Sy#P zBS!q@AJdzOKYfDngP&1iMVsPqua?uzmt4J{7XJhaLHD0$#(Y+O!Vb3?1qttVCiXrH z@OgPl#O!}Dl}AJmZE*ib_GFN-|22F*DOH!jseB}lY9qilxrkRw6xGJpJoSU=z)uD~ z8u;1TpRfO9eP-R>>jksE=5NJ62blkd{Sw~_Dzru3Zc*T*|0WTM5J@ss$8ulw>o62vMj`pp^LJ8 zC<@NUQBX37FE4RvBPadJr8>0VE}Jg0B^ZYDAn`6{5!J%3Yr;e@hfAY)=?I&}kS3tm zM-0&5a|gG(0%D9P3$jx?)1h%zAerNOxY|`mSsz_zS69(IP2~Lw_!^;UX`+X|2bpq1C80~b;y9%dXnCMD_+d$*{r>76z z{9bzH#+lYTeRp4;ZP=II)N(HdCRlq1dbL@skHDT?cKy;jua6!6;}LiYCnFOPP-Dgp zfq~p!ow_npo?*Anv#U}AGi-)!T(Fq2&yO5bQNzr@JvLL=I(9V6INm<~#_=g|0;~e_ zy2XiK^K3PELQG$Lux{7vx?P`k{PM^zj?At*NSvh~nHgxfi-MbQN_Ep^v!42~Lk}6( z+g)#T%`wGUJ!YWRI8;!o5-aO1fopb_B2cDS_jvF}4j_r82pa7{){r_l=WEWmo5v2$ zyM0T(HFQ~$GjA_@`C3szAGW=qpXF z{;_ksG0oIW7eGLq!||dY&aB>E#SlUF_}9vCClT)NL(+f1>1m2e;&GhOWc`$&{RiB5 z1ffCIu6rO3?ogN{I}K^#Cs^npqVQ^WyBZXaQxSIna;_sgJK=u_5M$)(7FoMu7nWdn zxb7ue!C^>#xMd*OKD7v@{S2Fg-e}MCi zv*RZx&rX~LSC*O5jC%{DW}U_3TPJr<>`s@go&I6Qxot_g^BMcLth+Qljau3B(0bVaHS z&Q|D(>9$3DEfjia@7=YF6ucH{N+4dc2xzsm)iJsSKKKO+UY`kz_*yt< zT1#(FwJ%Wcn%O~K_uyhEe0>JDK;gB}Lm#B++VS8bSRLrvg_9D7C OzWSis{O?^fg#Q;=N3uEq From 1d0fe46b5b9ae29dc4c634a80826acfa58f4b0e6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 7 Apr 2026 10:03:34 +0000 Subject: [PATCH 6/8] =?UTF-8?q?fix:=20apply=20review=20feedback=20?= =?UTF-8?q?=E2=80=94=20health-check=20helper,=20SKIP=20status,=20drop=20pu?= =?UTF-8?q?ll-requests:write?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Agent-Logs-Url: https://github.com/MaximumTrainer/OpenDataMask/sessions/d44eb074-66e2-4806-917d-9fb0a90463ba Co-authored-by: MaximumTrainer <1376575+MaximumTrainer@users.noreply.github.com> --- .github/workflows/sandbox-verification.yml | 1 - verification/run_verification.sh | 17 ++++++++-- verification/verify.py | 37 ++++++++++++++++------ 3 files changed, 42 insertions(+), 13 deletions(-) diff --git a/.github/workflows/sandbox-verification.yml b/.github/workflows/sandbox-verification.yml index 869d8f5..6c1ac9f 100644 --- a/.github/workflows/sandbox-verification.yml +++ b/.github/workflows/sandbox-verification.yml @@ -34,7 +34,6 @@ jobs: permissions: contents: read checks: write # required by dorny/test-reporter to publish check results - pull-requests: write # required by dorny/test-reporter to post PR comments env: # Sandbox-only secrets — safe to inline here; never reuse in production. diff --git a/verification/run_verification.sh b/verification/run_verification.sh index 0c5d7eb..66a0bb3 100755 --- a/verification/run_verification.sh +++ b/verification/run_verification.sh @@ -3,7 +3,7 @@ # # This script: # 1. Starts the sandboxed Docker environment (source_db, target_db, app_db, backend). -# 2. Waits for all services to become healthy. +# 2. Waits for the backend service API to become healthy. # 3. Configures OpenDataMask via its REST API (workspace, connections, # table configuration, column generators). # 4. Triggers a masking job and waits for it to complete. @@ -25,6 +25,19 @@ warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } error() { echo -e "${RED}[ERROR]${NC} $*" >&2; } die() { error "$*"; exit 1; } +# backend_is_healthy — returns 0 when /actuator/health reports status=UP, else 1. +backend_is_healthy() { + curl -sf "${API_BASE}/actuator/health" \ + | python3 -c ' +import json, sys +try: + d = json.load(sys.stdin) + sys.exit(0 if isinstance(d, dict) and d.get("status") == "UP" else 1) +except Exception: + sys.exit(1) +' +} + # ── Configuration ───────────────────────────────────────────────────────────── API_BASE="http://localhost:8080" ODM_USER="verifier" @@ -67,7 +80,7 @@ $DC -f docker-compose.yml up -d --build info "Waiting for OpenDataMask backend to become healthy (up to 3 min)…" MAX_WAIT=180 ELAPSED=0 -until curl -sf "${API_BASE}/actuator/health" | grep -q '"status":"UP"'; do +until backend_is_healthy; do if [ $ELAPSED -ge $MAX_WAIT ]; then die "Backend did not become healthy within ${MAX_WAIT}s." fi diff --git a/verification/verify.py b/verification/verify.py index 0bf6676..30919ca 100644 --- a/verification/verify.py +++ b/verification/verify.py @@ -62,6 +62,7 @@ class Check: PASS = "PASS" FAIL = "FAIL" + SKIP = "SKIP" def __init__(self, name: str): self.name = name @@ -72,11 +73,21 @@ def fail(self, msg: str) -> None: self.status = Check.FAIL self.messages.append(msg) + def skip(self, msg: str) -> None: + self.status = Check.SKIP + self.messages.append(msg) + def info(self, msg: str) -> None: self.messages.append(msg) + @property + def info_messages(self) -> list[str]: + """Return messages that are not status-constant strings.""" + _statuses = {Check.PASS, Check.FAIL, Check.SKIP} + return [m for m in self.messages if m not in _statuses] + def __str__(self) -> str: - icon = "✓" if self.status == Check.PASS else "✗" + icon = "✓" if self.status == Check.PASS else ("–" if self.status == Check.SKIP else "✗") lines = [f" [{icon}] {self.name}: {self.status}"] for m in self.messages: lines.append(f" {m}") @@ -203,7 +214,7 @@ def check_human_readability(tgt_conn, masking_passed: bool = True) -> Check: chk = Check("Human Readability (sample of 5 masked records)") if not masking_passed: - chk.fail( + chk.skip( "Sample skipped: masking effectiveness check did not pass. " "Printing TARGET_DB rows could expose real PII." ) @@ -258,11 +269,13 @@ def check_human_readability(tgt_conn, masking_passed: bool = True) -> Check: def write_junit_xml(checks: list, elapsed: float, path: str) -> None: """Write a JUnit-compatible XML report to *path* for CI consumption.""" failures = sum(1 for c in checks if c.status == Check.FAIL) + skipped = sum(1 for c in checks if c.status == Check.SKIP) suite = ET.Element( "testsuite", name="OpenDataMask Sandbox Verification", tests=str(len(checks)), failures=str(failures), + skipped=str(skipped), errors="0", time=f"{elapsed:.3f}", ) @@ -274,14 +287,14 @@ def write_junit_xml(checks: list, elapsed: float, path: str) -> None: classname="verify", ) if chk.status == Check.FAIL: - failure_msg = "; ".join( - m for m in chk.messages if m not in (Check.PASS, Check.FAIL) - ) + failure_msg = "; ".join(chk.info_messages) ET.SubElement(tc, "failure", message=failure_msg).text = failure_msg + elif chk.status == Check.SKIP: + skip_msg = "; ".join(chk.info_messages) + ET.SubElement(tc, "skipped", message=skip_msg) # Attach informational messages as system-out so they appear in the report. - info_lines = [m for m in chk.messages if m not in (Check.PASS, Check.FAIL)] - if info_lines: - ET.SubElement(tc, "system-out").text = "\n".join(info_lines) + if chk.info_messages: + ET.SubElement(tc, "system-out").text = "\n".join(chk.info_messages) # Indent for readability (Python ≥ 3.9). if hasattr(ET, "indent"): @@ -356,12 +369,16 @@ def main() -> int: passed = sum(1 for c in checks if c.status == Check.PASS) failed = sum(1 for c in checks if c.status == Check.FAIL) + skipped = sum(1 for c in checks if c.status == Check.SKIP) print("\n" + "=" * 60) if failed == 0: - print(f" OK ALL {passed}/{len(checks)} CHECKS PASSED") + summary = f" OK {passed}/{len(checks)} CHECKS PASSED" + if skipped: + summary += f" ({skipped} skipped)" + print(summary) else: - print(f" FAIL {failed}/{len(checks)} CHECK(S) FAILED ({passed} passed)") + print(f" FAIL {failed}/{len(checks)} CHECK(S) FAILED ({passed} passed, {skipped} skipped)") print("=" * 60 + "\n") if args.junit_xml: From 3df83e4a12cf54db819688ba1be265792fac8990 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 7 Apr 2026 10:11:53 +0000 Subject: [PATCH 7/8] docs: update README, user-guide and verification README to reflect PR changes Agent-Logs-Url: https://github.com/MaximumTrainer/OpenDataMask/sessions/9800998f-7bfd-4626-9a77-6b5400d66705 Co-authored-by: MaximumTrainer <1376575+MaximumTrainer@users.noreply.github.com> --- README.md | 26 +++++++++++++++++ docs/user-guide.md | 55 ++++++++++++++++++++++++++++++++++++ verification/README.md | 64 ++++++++++++++++++++++++++++++++++-------- 3 files changed, 133 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 7385144..f9e8d4e 100644 --- a/README.md +++ b/README.md @@ -287,10 +287,36 @@ See [Deployment Guide](docs/user-guide.md#infrastructure--terraform-deployment) | Doc | Description | |-----|-------------| | [User Guide](docs/user-guide.md) | Setup, configuration, core concepts, CLI usage | +| [Verification Guide](verification/README.md) | Sandboxed end-to-end verification of masking correctness | | [Website](docs/website/index.html) | Static HTML/CSS project website | | [API Reference](docs/website/api.html) | Full REST API endpoint reference | | [Deployment Guide](docs/website/deployment.html) | Docker, Kubernetes, CI/CD, security | +## Sandbox Verification + +OpenDataMask ships with a self-contained Docker-based verification suite that proves the masking pipeline correctly anonymises PII while preserving referential integrity. + +```bash +cd verification/ +./run_verification.sh # build → start → configure → mask → verify + +# With JUnit XML output: +VERIFY_JUNIT_XML=report.xml ./run_verification.sh +``` + +Four automated checks are performed: + +| Check | What it validates | +|---|---| +| **Record Integrity** | `COUNT(*)` matches across source and target (fails if source is empty) | +| **Key Persistence** | Every source UUID exists unchanged in target | +| **Masking Effectiveness** | `full_name` and `email` differ for every matched row | +| **Human Readability** | 5-record sample + format heuristics; skipped (not failed) if masking didn't pass | + +The GitHub Actions workflow `.github/workflows/sandbox-verification.yml` runs this suite on every push/PR to `main` and publishes a JUnit report as a workflow check and downloadable artifact. + +See [verification/README.md](verification/README.md) for full details. + ## License Open source — see [LICENSE](LICENSE) for details. diff --git a/docs/user-guide.md b/docs/user-guide.md index 8a0b1dd..0dd71f1 100644 --- a/docs/user-guide.md +++ b/docs/user-guide.md @@ -358,6 +358,7 @@ CI (tests pass) | `.github/workflows/docker.yml` | Build and push images to GHCR | | `.github/workflows/deploy.yml` | **Full deploy pipeline** (terraform → docker → deploy → verify) | | `.github/workflows/verify-deployment.yml` | Spring Boot smoke tests + optional live server health check | +| `.github/workflows/sandbox-verification.yml` | **End-to-end masking verification** — proves PII masking correctness; publishes JUnit report | | `.github/workflows/codeql.yml` | Weekly security analysis | GitHub **Environments** (`staging`, `production`) are used for deployment tracking, enabling Copilot and the GitHub UI to display live deployment status, history, and URL. @@ -374,6 +375,60 @@ docker build -t opendatamask-frontend ./frontend --- +## Sandbox Verification Environment + +The `verification/` directory contains a self-contained Docker-based environment that automatically proves OpenDataMask correctly masks PII while preserving referential integrity. + +### Quick Start + +```bash +cd verification/ +chmod +x run_verification.sh +./run_verification.sh +``` + +The script builds images, starts all services, configures a masking job via the REST API, runs the job, and then validates the output. + +### What Gets Verified + +| Check | Description | +|---|---| +| **Record Integrity** | Source and target row counts must match; fails if source is empty | +| **Key Persistence** | Every source UUID primary key must exist unchanged in the target | +| **Masking Effectiveness** | `full_name` and `email` must differ for every matched row; fails if no rows were compared | +| **Human Readability** | Samples 5 masked records (ordered by `id`) and checks format heuristics; skipped (not failed) when masking didn't pass to avoid exposing potential PII | + +### JUnit XML Reports + +Both the script and the standalone Python verifier support JUnit XML output: + +```bash +# Via the orchestration script: +VERIFY_JUNIT_XML=report.xml ./run_verification.sh + +# Directly (when environment is already running): +python3 -m pip install -r requirements.txt +python3 verify.py --junit-xml report.xml +``` + +### GitHub Actions Integration + +`.github/workflows/sandbox-verification.yml` runs the full suite on every push and pull request to `main`. It publishes: +- A **workflow check** (per-check pass/fail annotations via `dorny/test-reporter`) +- A **downloadable artifact** (`sandbox-verification-report`, 30-day retention) +- A **markdown job summary** with overall pass/fail status + +### Teardown + +```bash +cd verification/ +docker compose -f docker-compose.yml down -v +``` + +See [verification/README.md](../verification/README.md) for the full reference, including all environment variable overrides. + +--- + ## Troubleshooting | Symptom | Likely Cause | Fix | diff --git a/verification/README.md b/verification/README.md index 03f9858..cb6d050 100644 --- a/verification/README.md +++ b/verification/README.md @@ -33,8 +33,7 @@ verification/ | Docker Engine | ≥ 24 | | Docker Compose | v2 (`docker compose`) or v1 (`docker-compose`) | | curl | any | -| Python 3 | ≥ 3.10 | -| pip3 | any | +| Python 3 | ≥ 3.10 (must include `pip` module — standard in most distributions) | ## Quick Start @@ -60,10 +59,24 @@ The script will: If the environment is already running and the masking job has already completed: ```bash -pip3 install -r requirements.txt +python3 -m pip install -r requirements.txt python3 verify.py ``` +### JUnit XML Output + +Both the orchestration script and the standalone script support a JUnit-compatible XML report (no external dependencies — uses stdlib `xml.etree.ElementTree`): + +```bash +# Via the orchestration script (sets --junit-xml automatically): +VERIFY_JUNIT_XML=report.xml ./run_verification.sh + +# Directly against an already-running environment: +python3 verify.py --junit-xml report.xml +``` + +The XML report contains one `` per check. Skipped checks (e.g., Human Readability when masking didn't pass) are written as `` rather than `` so CI tools count them correctly. + ### Environment Variables (optional overrides) | Variable | Default | Description | @@ -78,6 +91,7 @@ python3 verify.py | `TARGET_DB_NAME` | `target_db` | Target DB database name | | `TARGET_DB_USER` | `target_user` | Target DB username | | `TARGET_DB_PASS` | `target_pass` | Target DB password | +| `VERIFY_JUNIT_XML` | *(unset)* | If set, `run_verification.sh` writes a JUnit XML report to this path | ## Verification Checks @@ -99,9 +113,9 @@ source.email != target.email ``` ### 4 · Human Readability -Prints a sample of 5 masked records to the console so a human can visually -confirm the output looks realistic (e.g., a real-looking name and a valid -e-mail address rather than random strings like `asdfghjkl`). +Prints a sample of 5 masked records (ordered by `id`, for deterministic output) so a human can visually confirm the output looks realistic (e.g., a real-looking name and a valid e-mail address rather than random strings like `asdfghjkl`). + +The sample is only printed when Masking Effectiveness has already passed. If masking failed, this check is reported as **SKIP** (not FAIL) to avoid exposing potential real PII and to prevent it inflating the failure count in CI reports. ### Sample Report Output @@ -113,7 +127,7 @@ e-mail address rather than random strings like `asdfghjkl`). Connecting to SOURCE_DB (localhost:5433/source_db)… Connecting to TARGET_DB (localhost:5434/target_db)… - ── Masked Record Sample (TARGET_DB) ────────────────────── + -- Masked Record Sample (TARGET_DB) ---------------------------------- [1] id : a1b2c3d4-0001-4000-8000-000000000001 full_name : Johnathan Mraz email : cordell.okon@yahoo.com @@ -121,9 +135,9 @@ Connecting to TARGET_DB (localhost:5434/target_db)… date_of_birth : Mon Jan 15 00:00:00 UTC 1990 salary : 97432 -──────────────────────────────────────────────────────────── +------------------------------------------------------------ Results -──────────────────────────────────────────────────────────── +------------------------------------------------------------ [✓] Record Integrity (row count matches): PASS Source row count : 50 Target row count : 50 @@ -136,9 +150,20 @@ Connecting to TARGET_DB (localhost:5434/target_db)… Email unchanged (should be 0) : 0 [✓] Human Readability (sample of 5 masked records): PASS -════════════════════════════════════════════════════════════ - OK ALL 4/4 CHECKS PASSED -════════════════════════════════════════════════════════════ +============================================================ + OK 4/4 CHECKS PASSED +============================================================ +``` + +When Masking Effectiveness fails the Human Readability check is skipped instead: + +``` + [–] Human Readability (sample of 5 masked records): SKIP + Sample skipped: masking effectiveness check did not pass. Printing TARGET_DB rows could expose real PII. + +============================================================ + FAIL 1/4 CHECK(S) FAILED (2 passed, 1 skipped) +============================================================ ``` ## Masking Rules Applied @@ -161,3 +186,18 @@ docker compose -f docker-compose.yml down -v The `-v` flag also removes the named volume (`app_db_data`) so the next run starts with a clean OpenDataMask application database. + +## GitHub Actions + +The workflow `.github/workflows/sandbox-verification.yml` runs this full verification suite automatically on every push and pull request to `main`, and can be triggered on demand via `workflow_dispatch`. + +It: + +1. Builds the backend Docker image from source (with layer caching). +2. Starts `source_db`, `target_db`, `app_db`, and `backend` via `docker compose`. +3. Orchestrates the masking job through the REST API (register → login → workspace → connections → table config → generators → trigger → poll). +4. Runs `verify.py --junit-xml` to produce a structured test report. +5. Publishes the report as a **workflow check** via `dorny/test-reporter` (per-check annotations on PRs). +6. Uploads the JUnit XML as a **downloadable artifact** (`sandbox-verification-report`, 30-day retention). +7. Writes a **markdown job summary** with overall pass/fail status. +8. Always tears down the sandbox; collects Docker container logs on failure. From e99b8a394eb5ff47eb7f3b4e65ee3aeb376cc288 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 7 Apr 2026 10:49:19 +0000 Subject: [PATCH 8/8] fix: serialise generatorParams as JSON string; increase health-check timeout to 10 min/120 attempts Agent-Logs-Url: https://github.com/MaximumTrainer/OpenDataMask/sessions/3e315c55-1910-434c-9d38-de4411033c31 Co-authored-by: MaximumTrainer <1376575+MaximumTrainer@users.noreply.github.com> --- .github/workflows/sandbox-verification.yml | 21 +++++++++++++++------ verification/run_verification.sh | 17 +++++++++++++---- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/.github/workflows/sandbox-verification.yml b/.github/workflows/sandbox-verification.yml index 6c1ac9f..7c18979 100644 --- a/.github/workflows/sandbox-verification.yml +++ b/.github/workflows/sandbox-verification.yml @@ -80,10 +80,10 @@ jobs: # ── Wait for backend to be healthy ──────────────────────────────────── - name: Wait for backend health - timeout-minutes: 5 + timeout-minutes: 10 run: | echo "Waiting for backend to report UP..." - for i in $(seq 1 60); do + for i in $(seq 1 120); do STATUS=$(curl -s "${API_BASE}/actuator/health" \ | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('status',''))" \ 2>/dev/null || true) @@ -91,10 +91,10 @@ jobs: echo "✅ Backend is healthy." exit 0 fi - echo " Attempt ${i}/60: status='${STATUS}' — retrying in 5s..." + echo " Attempt ${i}/120: status='${STATUS}' — retrying in 5s..." sleep 5 done - echo "::error::Backend did not become healthy within 5 minutes." + echo "::error::Backend did not become healthy within 10 minutes." exit 1 # ── Register user ───────────────────────────────────────────────────── @@ -175,10 +175,19 @@ jobs: run: | add_generator() { local col="$1" gtype="$2" params="${3:-}" + # Build JSON via Python so generatorParams is a JSON *string* value + # (the backend field is String?, not an embedded object). + # sys.argv avoids shell-quoting issues with special characters. if [ -z "${params}" ]; then - BODY="{\"columnName\":\"${col}\",\"generatorType\":\"${gtype}\"}" + BODY=$(python3 -c " +import json, sys +print(json.dumps({'columnName': sys.argv[1], 'generatorType': sys.argv[2]})) +" -- "${col}" "${gtype}") else - BODY="{\"columnName\":\"${col}\",\"generatorType\":\"${gtype}\",\"generatorParams\":${params}}" + BODY=$(python3 -c " +import json, sys +print(json.dumps({'columnName': sys.argv[1], 'generatorType': sys.argv[2], 'generatorParams': sys.argv[3]})) +" -- "${col}" "${gtype}" "${params}") fi curl -sf -X POST "${API_BASE}/api/workspaces/${WS_ID}/tables/${TABLE_ID}/generators" \ -H "Content-Type: application/json" \ diff --git a/verification/run_verification.sh b/verification/run_verification.sh index 66a0bb3..ab08044 100755 --- a/verification/run_verification.sh +++ b/verification/run_verification.sh @@ -166,11 +166,20 @@ info "Table configuration created: id=${TABLE_ID}" # The 'id' column has no generator → it is passed through unchanged (PK preserved). add_generator() { - local col="$1" gtype="$2" params="${3:-null}" - if [ "$params" = "null" ]; then - BODY="{\"columnName\":\"${col}\",\"generatorType\":\"${gtype}\"}" + local col="$1" gtype="$2" params="${3:-}" + # Build JSON payload via Python so that generatorParams is properly serialised + # as a JSON *string* value (the backend field is String?, not an embedded object). + # sys.argv avoids any shell-quoting issues with special characters in params. + if [ -z "$params" ]; then + BODY=$(python3 -c " +import json, sys +print(json.dumps({'columnName': sys.argv[1], 'generatorType': sys.argv[2]})) +" -- "$col" "$gtype") else - BODY="{\"columnName\":\"${col}\",\"generatorType\":\"${gtype}\",\"generatorParams\":${params}}" + BODY=$(python3 -c " +import json, sys +print(json.dumps({'columnName': sys.argv[1], 'generatorType': sys.argv[2], 'generatorParams': sys.argv[3]})) +" -- "$col" "$gtype" "$params") fi api_post "/api/workspaces/${WS_ID}/tables/${TABLE_ID}/generators" "$BODY" > /dev/null info " Generator added: ${col} → ${gtype}"