From 482e11ed22e2bec74ce1d048f1e4f9ee70e0380f Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Sun, 8 Mar 2026 18:51:15 +0100 Subject: [PATCH 01/39] add OpenTelemetry distributed tracing support When OTEL_EXPORTER_OTLP_ENDPOINT is set, enables full tracing pipeline with OTLP export, W3C traceparent propagation, and spans for HTTP requests, SQL file execution, DB pool acquire, and query execution. Falls back to env_logger when unset. Co-Authored-By: Claude Opus 4.6 --- Cargo.lock | 468 +++++++++++++++++- Cargo.toml | 10 + examples/opentelemetry-grafana/README.md | 52 ++ .../opentelemetry-grafana/docker-compose.yml | 65 +++ .../grafana/datasources.yaml | 8 + .../opentelemetry-grafana/nginx/nginx.conf | 27 + .../opentelemetry-grafana/otel-collector.yaml | 24 + .../sqlpage/migrations/001_init.sql | 6 + .../sqlpage/sqlpage.json | 3 + examples/opentelemetry-grafana/tempo.yaml | 17 + .../website/add_todo.sql | 8 + .../opentelemetry-grafana/website/index.sql | 11 + src/lib.rs | 1 + src/main.rs | 12 +- src/telemetry.rs | 89 ++++ src/webserver/database/execute_queries.rs | 18 + src/webserver/http.rs | 15 +- 17 files changed, 817 insertions(+), 17 deletions(-) create mode 100644 examples/opentelemetry-grafana/README.md create mode 100644 examples/opentelemetry-grafana/docker-compose.yml create mode 100644 examples/opentelemetry-grafana/grafana/datasources.yaml create mode 100644 examples/opentelemetry-grafana/nginx/nginx.conf create mode 100644 examples/opentelemetry-grafana/otel-collector.yaml create mode 100644 examples/opentelemetry-grafana/sqlpage/migrations/001_init.sql create mode 100644 examples/opentelemetry-grafana/sqlpage/sqlpage.json create mode 100644 examples/opentelemetry-grafana/tempo.yaml create mode 100644 examples/opentelemetry-grafana/website/add_todo.sql create mode 100644 examples/opentelemetry-grafana/website/index.sql create mode 100644 src/telemetry.rs diff --git a/Cargo.lock b/Cargo.lock index e9ee88f3..5a565493 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -40,7 +40,7 @@ dependencies = [ "flate2", "foldhash 0.1.5", "futures-core", - "h2", + "h2 0.3.27", "http 0.2.12", "httparse", "httpdate", @@ -629,7 +629,7 @@ dependencies = [ "derive_more 2.1.1", "futures-core", "futures-util", - "h2", + "h2 0.3.27", "http 0.2.12", "itoa", "log", @@ -1994,6 +1994,25 @@ dependencies = [ "tracing", ] +[[package]] +name = "h2" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http 1.4.0", + "indexmap 2.13.0", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "handlebars" version = "6.4.0" @@ -2119,6 +2138,29 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http 1.4.0", +] + +[[package]] +name = "http-body-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" +dependencies = [ + "bytes", + "futures-core", + "http 1.4.0", + "http-body 1.0.1", + "pin-project-lite", +] + [[package]] name = "httparse" version = "1.10.1" @@ -2142,7 +2184,7 @@ dependencies = [ "futures-core", "futures-util", "http 0.2.12", - "http-body", + "http-body 0.4.6", "httparse", "httpdate", "itoa", @@ -2154,6 +2196,64 @@ dependencies = [ "want", ] +[[package]] +name = "hyper" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" +dependencies = [ + "atomic-waker", + "bytes", + "futures-channel", + "futures-core", + "h2 0.4.13", + "http 1.4.0", + "http-body 1.0.1", + "httparse", + "itoa", + "pin-project-lite", + "pin-utils", + "smallvec", + "tokio", + "want", +] + +[[package]] +name = "hyper-timeout" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" +dependencies = [ + "hyper 1.8.1", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", +] + +[[package]] +name = "hyper-util" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" +dependencies = [ + "base64 0.22.1", + "bytes", + "futures-channel", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "hyper 1.8.1", + "ipnet", + "libc", + "percent-encoding", + "pin-project-lite", + "socket2 0.6.3", + "tokio", + "tower-service", + "tracing", +] + [[package]] name = "iana-time-zone" version = "0.1.65" @@ -2340,6 +2440,22 @@ dependencies = [ "serde_core", ] +[[package]] +name = "ipnet" +version = "2.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" + +[[package]] +name = "iri-string" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.2" @@ -2465,13 +2581,13 @@ dependencies = [ "bytes", "futures", "http 0.2.12", - "hyper", + "hyper 0.14.32", "lambda_runtime_api_client", "serde", "serde_json", "tokio", "tokio-stream", - "tower", + "tower 0.4.13", "tracing", ] @@ -2482,7 +2598,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7210012be904051520f0dc502140ba599bae3042b65b3737b87727f1aa88a7d6" dependencies = [ "http 0.2.12", - "hyper", + "hyper 0.14.32", "tokio", "tower-service", ] @@ -2637,6 +2753,15 @@ dependencies = [ "unicode-id", ] +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata", +] + [[package]] name = "md-5" version = "0.10.6" @@ -2697,6 +2822,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "mutually_exclusive_features" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e94e1e6445d314f972ff7395df2de295fe51b71821694f0b0e1e79c4f12c8577" + [[package]] name = "ndk" version = "0.9.0" @@ -2737,6 +2868,15 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "num-bigint" version = "0.4.6" @@ -3145,6 +3285,82 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" +[[package]] +name = "opentelemetry" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b84bcd6ae87133e903af7ef497404dda70c60d0ea14895fc8a5e6722754fc2a0" +dependencies = [ + "futures-core", + "futures-sink", + "js-sys", + "pin-project-lite", + "thiserror 2.0.18", + "tracing", +] + +[[package]] +name = "opentelemetry-http" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7a6d09a73194e6b66df7c8f1b680f156d916a1a942abf2de06823dd02b7855d" +dependencies = [ + "async-trait", + "bytes", + "http 1.4.0", + "opentelemetry", + "reqwest", +] + +[[package]] +name = "opentelemetry-otlp" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2366db2dca4d2ad033cad11e6ee42844fd727007af5ad04a1730f4cb8163bf" +dependencies = [ + "http 1.4.0", + "opentelemetry", + "opentelemetry-http", + "opentelemetry-proto", + "opentelemetry_sdk", + "prost", + "reqwest", + "thiserror 2.0.18", + "tokio", + "tonic", + "tracing", +] + +[[package]] +name = "opentelemetry-proto" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7175df06de5eaee9909d4805a3d07e28bb752c34cab57fa9cff549da596b30f" +dependencies = [ + "opentelemetry", + "opentelemetry_sdk", + "prost", + "tonic", + "tonic-prost", +] + +[[package]] +name = "opentelemetry_sdk" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e14ae4f5991976fd48df6d843de219ca6d31b01daaab2dad5af2badeded372bd" +dependencies = [ + "futures-channel", + "futures-executor", + "futures-util", + "opentelemetry", + "percent-encoding", + "rand 0.9.2", + "thiserror 2.0.18", + "tokio", + "tokio-stream", +] + [[package]] name = "option-ext" version = "0.2.0" @@ -3356,6 +3572,12 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + [[package]] name = "piper" version = "0.2.5" @@ -3490,6 +3712,29 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "prost" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568" +dependencies = [ + "bytes", + "prost-derive", +] + +[[package]] +name = "prost-derive" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" +dependencies = [ + "anyhow", + "itertools", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "quote" version = "1.0.45" @@ -3699,6 +3944,40 @@ version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" +[[package]] +name = "reqwest" +version = "0.12.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" +dependencies = [ + "base64 0.22.1", + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.8.1", + "hyper-util", + "js-sys", + "log", + "percent-encoding", + "pin-project-lite", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tower 0.5.3", + "tower-http", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "rfc6979" version = "0.4.0" @@ -4163,6 +4442,15 @@ dependencies = [ "digest", ] +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + [[package]] name = "shlex" version = "1.3.0" @@ -4290,6 +4578,9 @@ dependencies = [ "mime_guess", "odbc-sys 0.29.0", "openidconnect", + "opentelemetry", + "opentelemetry-otlp", + "opentelemetry_sdk", "password-hash", "percent-encoding", "rand 0.10.0", @@ -4304,6 +4595,11 @@ dependencies = [ "tokio", "tokio-stream", "tokio-util", + "tracing", + "tracing-actix-web", + "tracing-log", + "tracing-opentelemetry", + "tracing-subscriber", ] [[package]] @@ -4473,6 +4769,15 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "sync_wrapper" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +dependencies = [ + "futures-core", +] + [[package]] name = "synstructure" version = "0.13.2" @@ -4537,6 +4842,15 @@ dependencies = [ "syn", ] +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + [[package]] name = "time" version = "0.3.47" @@ -4717,6 +5031,43 @@ dependencies = [ "winnow", ] +[[package]] +name = "tonic" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fec7c61a0695dc1887c1b53952990f3ad2e3a31453e1f49f10e75424943a93ec" +dependencies = [ + "async-trait", + "base64 0.22.1", + "bytes", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.8.1", + "hyper-timeout", + "hyper-util", + "percent-encoding", + "pin-project", + "sync_wrapper", + "tokio", + "tokio-stream", + "tower 0.5.3", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tonic-prost" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a55376a0bbaa4975a3f10d009ad763d8f4108f067c7c2e74f3001fb49778d309" +dependencies = [ + "bytes", + "prost", + "tonic", +] + [[package]] name = "tower" version = "0.4.13" @@ -4732,6 +5083,43 @@ dependencies = [ "tracing", ] +[[package]] +name = "tower" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" +dependencies = [ + "futures-core", + "futures-util", + "indexmap 2.13.0", + "pin-project-lite", + "slab", + "sync_wrapper", + "tokio", + "tokio-util", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower-http" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" +dependencies = [ + "bitflags 2.11.0", + "bytes", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "iri-string", + "pin-project-lite", + "tower 0.5.3", + "tower-layer", + "tower-service", +] + [[package]] name = "tower-layer" version = "0.3.3" @@ -4756,6 +5144,21 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "tracing-actix-web" +version = "0.7.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ca6b15407f9bfcb35f82d0e79e603e1629ece4e91cc6d9e58f890c184dd20af" +dependencies = [ + "actix-web", + "mutually_exclusive_features", + "opentelemetry", + "pin-project", + "tracing", + "tracing-opentelemetry", + "uuid", +] + [[package]] name = "tracing-attributes" version = "0.1.31" @@ -4774,6 +5177,52 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" dependencies = [ "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-opentelemetry" +version = "0.32.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ac28f2d093c6c477eaa76b23525478f38de514fa9aeb1285738d4b97a9552fc" +dependencies = [ + "js-sys", + "opentelemetry", + "smallvec", + "tracing", + "tracing-core", + "tracing-log", + "tracing-subscriber", + "web-time", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex-automata", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", ] [[package]] @@ -4897,10 +5346,17 @@ version = "1.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a68d3c8f01c0cfa54a75291d83601161799e4a89a39e0929f4b0354d88757a37" dependencies = [ + "getrandom 0.4.2", "js-sys", "wasm-bindgen", ] +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + [[package]] name = "vcpkg" version = "0.2.15" diff --git a/Cargo.toml b/Cargo.toml index 64d3b1a2..c59bd77d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -81,6 +81,16 @@ openidconnect = { version = "4.0.0", default-features = false, features = ["acce encoding_rs = "0.8.35" odbc-sys = { version = "0.29.0", optional = true } +# OpenTelemetry / tracing +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["registry", "env-filter"] } +tracing-opentelemetry = "0.32" +tracing-actix-web = { version = "0.7", features = ["opentelemetry_0_31"] } +tracing-log = "0.2" +opentelemetry = "0.31" +opentelemetry_sdk = { version = "0.31", features = ["rt-tokio-current-thread"] } +opentelemetry-otlp = { version = "0.31", features = ["http-proto", "grpc-tonic"] } + [features] default = [] diff --git a/examples/opentelemetry-grafana/README.md b/examples/opentelemetry-grafana/README.md new file mode 100644 index 00000000..197d0379 --- /dev/null +++ b/examples/opentelemetry-grafana/README.md @@ -0,0 +1,52 @@ +# SQLPage + OpenTelemetry + Grafana Example + +This example demonstrates end-to-end distributed tracing with SQLPage. + +## Architecture + +``` +Browser → nginx (with OTel module) → SQLPage → PostgreSQL + ↓ ↓ + OTel Collector ←───────────┘ + ↓ + Grafana Tempo + ↓ + Grafana (UI) +``` + +## How to run + +```bash +docker compose up --build +``` + +## How to explore traces + +1. Open the todo app at [http://localhost](http://localhost) and add a few items. +2. Open Grafana at [http://localhost:3000](http://localhost:3000). +3. Go to **Explore** → select the **Tempo** datasource. +4. Search for traces by service name (`nginx` or `sqlpage`). + +## What to look for in traces + +Each HTTP request produces a trace with: + +- **nginx root span** — the entry point, with `traceparent` propagated to SQLPage +- **SQLPage HTTP request span** — child of nginx, created by `tracing-actix-web` +- **`sqlpage.exec`** — SQL file execution, with `sqlpage.file` attribute +- **`db.pool.acquire`** — time spent waiting for a database connection from the pool +- **`db.query`** — individual SQL statement execution, with `db.statement` and `db.system` + +## Testing pool pressure + +To see `db.pool.acquire` spans with queuing time, reduce the pool size: + +```json +// sqlpage/sqlpage.json +{ + "listen_on": "0.0.0.0:8080", + "max_database_pool_connections": 1 +} +``` + +Then send concurrent requests (e.g., open multiple browser tabs quickly) and observe the acquire duration in Grafana. diff --git a/examples/opentelemetry-grafana/docker-compose.yml b/examples/opentelemetry-grafana/docker-compose.yml new file mode 100644 index 00000000..0bad7920 --- /dev/null +++ b/examples/opentelemetry-grafana/docker-compose.yml @@ -0,0 +1,65 @@ +services: + nginx: + image: ghcr.io/nginxinc/nginx-otel:latest + ports: + - "80:80" + volumes: + - ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro + depends_on: + - sqlpage + - otel-collector + + sqlpage: + build: + context: ../.. + dockerfile: Dockerfile + environment: + - DATABASE_URL=postgres://sqlpage:sqlpage@postgres:5432/sqlpage + - OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 + - OTEL_SERVICE_NAME=sqlpage + volumes: + - ./website:/var/www + - ./sqlpage:/etc/sqlpage + depends_on: + postgres: + condition: service_healthy + + postgres: + image: postgres:16 + environment: + POSTGRES_USER: sqlpage + POSTGRES_PASSWORD: sqlpage + POSTGRES_DB: sqlpage + healthcheck: + test: ["CMD-SHELL", "pg_isready -U sqlpage"] + interval: 2s + timeout: 5s + retries: 5 + + otel-collector: + image: otel/opentelemetry-collector-contrib:latest + volumes: + - ./otel-collector.yaml:/etc/otelcol-contrib/config.yaml:ro + ports: + - "4317:4317" # OTLP gRPC + - "4318:4318" # OTLP HTTP + depends_on: + - tempo + + tempo: + image: grafana/tempo:latest + volumes: + - ./tempo.yaml:/etc/tempo/config.yaml:ro + command: ["-config.file=/etc/tempo/config.yaml"] + + grafana: + image: grafana/grafana:latest + ports: + - "3000:3000" + environment: + - GF_AUTH_ANONYMOUS_ENABLED=true + - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin + volumes: + - ./grafana/datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:ro + depends_on: + - tempo diff --git a/examples/opentelemetry-grafana/grafana/datasources.yaml b/examples/opentelemetry-grafana/grafana/datasources.yaml new file mode 100644 index 00000000..01a0b49b --- /dev/null +++ b/examples/opentelemetry-grafana/grafana/datasources.yaml @@ -0,0 +1,8 @@ +apiVersion: 1 + +datasources: + - name: Tempo + type: tempo + access: proxy + url: http://tempo:3200 + isDefault: true diff --git a/examples/opentelemetry-grafana/nginx/nginx.conf b/examples/opentelemetry-grafana/nginx/nginx.conf new file mode 100644 index 00000000..32b61569 --- /dev/null +++ b/examples/opentelemetry-grafana/nginx/nginx.conf @@ -0,0 +1,27 @@ +load_module modules/ngx_otel_module.so; + +events {} + +http { + otel_exporter { + endpoint otel-collector:4317; + } + + otel_service_name nginx; + otel_trace on; + otel_trace_context propagate; + + upstream sqlpage { + server sqlpage:8080; + } + + server { + listen 80; + + location / { + proxy_pass http://sqlpage; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + } + } +} diff --git a/examples/opentelemetry-grafana/otel-collector.yaml b/examples/opentelemetry-grafana/otel-collector.yaml new file mode 100644 index 00000000..779d0459 --- /dev/null +++ b/examples/opentelemetry-grafana/otel-collector.yaml @@ -0,0 +1,24 @@ +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + +processors: + batch: + timeout: 1s + +exporters: + otlp/tempo: + endpoint: tempo:4317 + tls: + insecure: true + +service: + pipelines: + traces: + receivers: [otlp] + processors: [batch] + exporters: [otlp/tempo] diff --git a/examples/opentelemetry-grafana/sqlpage/migrations/001_init.sql b/examples/opentelemetry-grafana/sqlpage/migrations/001_init.sql new file mode 100644 index 00000000..0ff86847 --- /dev/null +++ b/examples/opentelemetry-grafana/sqlpage/migrations/001_init.sql @@ -0,0 +1,6 @@ +CREATE TABLE IF NOT EXISTS todos ( + id SERIAL PRIMARY KEY, + title TEXT NOT NULL, + done BOOLEAN NOT NULL DEFAULT false, + created_at TIMESTAMP NOT NULL DEFAULT NOW() +); diff --git a/examples/opentelemetry-grafana/sqlpage/sqlpage.json b/examples/opentelemetry-grafana/sqlpage/sqlpage.json new file mode 100644 index 00000000..f28d3cda --- /dev/null +++ b/examples/opentelemetry-grafana/sqlpage/sqlpage.json @@ -0,0 +1,3 @@ +{ + "listen_on": "0.0.0.0:8080" +} diff --git a/examples/opentelemetry-grafana/tempo.yaml b/examples/opentelemetry-grafana/tempo.yaml new file mode 100644 index 00000000..da4cbe71 --- /dev/null +++ b/examples/opentelemetry-grafana/tempo.yaml @@ -0,0 +1,17 @@ +server: + http_listen_port: 3200 + +distributor: + receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + +storage: + trace: + backend: local + local: + path: /tmp/tempo/blocks + wal: + path: /tmp/tempo/wal diff --git a/examples/opentelemetry-grafana/website/add_todo.sql b/examples/opentelemetry-grafana/website/add_todo.sql new file mode 100644 index 00000000..6e70d467 --- /dev/null +++ b/examples/opentelemetry-grafana/website/add_todo.sql @@ -0,0 +1,8 @@ +-- Toggle an existing todo +UPDATE todos SET done = $done::boolean WHERE id = $id::int; + +-- Insert a new todo if title is provided +INSERT INTO todos (title) +SELECT $title WHERE $title IS NOT NULL AND length($title) > 0; + +SELECT 'redirect' AS component, '/' AS link; diff --git a/examples/opentelemetry-grafana/website/index.sql b/examples/opentelemetry-grafana/website/index.sql new file mode 100644 index 00000000..151ea3af --- /dev/null +++ b/examples/opentelemetry-grafana/website/index.sql @@ -0,0 +1,11 @@ +SELECT 'list' AS component, + 'Todo List' AS title; + +SELECT title, + CASE WHEN done THEN 'complete' ELSE 'pending' END AS description, + 'add_todo.sql?id=' || id || '&done=' || (NOT done)::text AS link +FROM todos +ORDER BY created_at DESC; + +SELECT 'form' AS component, 'Add a todo' AS title, 'add_todo.sql' AS action; +SELECT 'title' AS name, 'What do you need to do?' AS placeholder; diff --git a/src/lib.rs b/src/lib.rs index df2eb9a5..a4babf3c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -76,6 +76,7 @@ pub mod dynamic_component; pub mod file_cache; pub mod filesystem; pub mod render; +pub mod telemetry; pub mod template_helpers; pub mod templates; pub mod utils; diff --git a/src/main.rs b/src/main.rs index 9074ddea..5dbcebc2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,7 @@ use sqlpage::{ app_config::AppConfig, cli, + telemetry, webserver::{self, Database}, AppState, }; @@ -29,17 +30,14 @@ async fn start() -> anyhow::Result<()> { log::debug!("Starting server..."); webserver::http::run_server(&app_config, state).await?; log::info!("Server stopped gracefully. Goodbye!"); + telemetry::shutdown_telemetry(); Ok(()) } fn init_logging() { let load_env = dotenvy::dotenv(); - let env = - env_logger::Env::new().default_filter_or("sqlpage=info,actix_web::middleware::logger=info"); - let mut logging = env_logger::Builder::from_env(env); - logging.format_timestamp_millis(); - logging.init(); + let otel_active = telemetry::init_telemetry(); match load_env { Ok(path) => log::info!("Loaded environment variables from {path:?}"), @@ -48,4 +46,8 @@ fn init_logging() { ), Err(e) => log::error!("Error loading .env file: {e}"), } + + if otel_active { + log::info!("OpenTelemetry tracing enabled (OTEL_EXPORTER_OTLP_ENDPOINT is set)"); + } } diff --git a/src/telemetry.rs b/src/telemetry.rs new file mode 100644 index 00000000..7bfbe7ed --- /dev/null +++ b/src/telemetry.rs @@ -0,0 +1,89 @@ +//! OpenTelemetry initialization and shutdown. +//! +//! When `OTEL_EXPORTER_OTLP_ENDPOINT` is set, sets up a full tracing pipeline +//! with OTLP export. Otherwise, falls back to `env_logger` as before. + +use std::env; +use std::sync::OnceLock; + +use opentelemetry_sdk::trace::SdkTracerProvider; + +static TRACER_PROVIDER: OnceLock = OnceLock::new(); + +/// Initializes logging / tracing. Returns `true` if OTel was activated. +pub fn init_telemetry() -> bool { + let otel_endpoint = env::var("OTEL_EXPORTER_OTLP_ENDPOINT").ok(); + let otel_active = otel_endpoint + .as_deref() + .is_some_and(|v| !v.is_empty()); + + if otel_active { + init_otel_tracing(); + } else { + init_env_logger(); + } + + otel_active +} + +/// Shuts down the OTel tracer provider, flushing pending spans. +pub fn shutdown_telemetry() { + if let Some(provider) = TRACER_PROVIDER.get() { + if let Err(e) = provider.shutdown() { + eprintln!("Error shutting down tracer provider: {e}"); + } + } +} + +fn init_env_logger() { + let env = + env_logger::Env::new().default_filter_or("sqlpage=info,actix_web::middleware::logger=info"); + let mut logging = env_logger::Builder::from_env(env); + logging.format_timestamp_millis(); + logging.init(); +} + +fn init_otel_tracing() { + use opentelemetry::global; + use opentelemetry::trace::TracerProvider as _; + use opentelemetry_sdk::propagation::TraceContextPropagator; + use tracing_subscriber::layer::SubscriberExt; + use tracing_subscriber::util::SubscriberInitExt; + + // Bridge log::* macros → tracing events (must be called before subscriber init) + tracing_log::LogTracer::init().expect("Failed to set log tracer"); + + // W3C TraceContext propagation (traceparent header) + global::set_text_map_propagator(TraceContextPropagator::new()); + + // OTLP exporter — reads OTEL_EXPORTER_OTLP_ENDPOINT, OTEL_SERVICE_NAME, etc. + let exporter = opentelemetry_otlp::SpanExporter::builder() + .with_http() + .build() + .expect("Failed to build OTLP span exporter"); + + let provider = SdkTracerProvider::builder() + .with_batch_exporter(exporter) + .build(); + + let tracer = provider.tracer("sqlpage"); + global::set_tracer_provider(provider.clone()); + let _ = TRACER_PROVIDER.set(provider); + + let otel_layer = tracing_opentelemetry::layer().with_tracer(tracer); + + let filter = tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| { + "sqlpage=info,actix_web=info,tracing_actix_web=info".into() + }); + + let fmt_layer = tracing_subscriber::fmt::layer() + .with_timer(tracing_subscriber::fmt::time::uptime()) + .with_target(true); + + tracing_subscriber::registry() + .with(filter) + .with(fmt_layer) + .with(otel_layer) + .init(); +} diff --git a/src/webserver/database/execute_queries.rs b/src/webserver/database/execute_queries.rs index 3edeecfc..c5172c70 100644 --- a/src/webserver/database/execute_queries.rs +++ b/src/webserver/database/execute_queries.rs @@ -64,6 +64,12 @@ pub fn stream_query_results_with_conn<'a>( request.server_timing.record("bind_params"); let connection = take_connection(&request.app_state.db, db_connection, request).await?; log::trace!("Executing query {:?}", query.sql); + let _query_span = tracing::info_span!( + "db.query", + db.statement = query.sql, + db.system = %request.app_state.db.info.dbms_name, + ) + .entered(); let mut stream = connection.fetch_many(query); let mut error = None; while let Some(elem) = stream.next().await { @@ -217,6 +223,12 @@ async fn execute_set_variable_query<'a>( query.sql ); + let _query_span = tracing::info_span!( + "db.query", + db.statement = query.sql, + db.system = %request.app_state.db.info.dbms_name, + ) + .entered(); let value = match connection.fetch_optional(query).await { Ok(Some(row)) => row_to_string(&row), Ok(None) => None, @@ -288,6 +300,12 @@ async fn take_connection<'a>( if let Some(c) = conn { return Ok(c); } + let pool_size = db.connection.size(); + let _acquire_span = tracing::info_span!( + "db.pool.acquire", + db.pool.size = pool_size, + ) + .entered(); match db.connection.acquire().await { Ok(c) => { log::debug!("Acquired a database connection"); diff --git a/src/webserver/http.rs b/src/webserver/http.rs index 68464e2b..e59c1c11 100644 --- a/src/webserver/http.rs +++ b/src/webserver/http.rs @@ -16,9 +16,8 @@ use actix_web::http::header::Accept; use actix_web::http::header::{ContentType, Header, HttpDate, IfModifiedSince, LastModified}; use actix_web::http::{header, StatusCode}; use actix_web::web::PayloadConfig; -use actix_web::{ - dev::ServiceResponse, middleware, middleware::Logger, web, App, HttpResponse, HttpServer, -}; +use actix_web::{dev::ServiceResponse, middleware, web, App, HttpResponse, HttpServer}; +use tracing_actix_web::TracingLogger; use super::error::{anyhow_err_to_actix, bind_error, send_anyhow_error}; use super::http_client::make_http_client; @@ -228,7 +227,11 @@ async fn render_sql( let (resp_send, resp_recv) = tokio::sync::oneshot::channel::(); let source_path: PathBuf = sql_file.source_path.clone(); - actix_web::rt::spawn(async move { + let exec_span = tracing::info_span!( + "sqlpage.exec", + sqlpage.file = %source_path.display(), + ); + actix_web::rt::spawn(tracing::Instrument::instrument(async move { let request_info = exec_ctx.request(); let request_context = RequestContext { is_embedded: request_info.url_params.contains_key("_sqlpage_embed"), @@ -267,7 +270,7 @@ async fn render_sql( send_anyhow_error(&err, resp_send, &app_state); } } - }); + }, exec_span)); resp_recv.await.map_err(ErrorInternalServerError) } @@ -435,7 +438,7 @@ pub fn create_app( // when receiving a request outside of the prefix, redirect to the prefix .default_service(fn_service(default_prefix_redirect)) .wrap(OidcMiddleware::new(&app_state)) - .wrap(Logger::default()) + .wrap(TracingLogger::default()) .wrap(default_headers()) .wrap(middleware::Condition::new( app_state.config.compress_responses, From f3a9ee2e700668e5f3733506ce576c337fb50f10 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Sun, 8 Mar 2026 19:21:47 +0100 Subject: [PATCH 02/39] fix OTel example: tracing init, Dockerfile, Tempo config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix tracing-log bridge initialization order (set subscriber first, then LogTracer) to avoid double-set panic - Add dedicated Dockerfile for example using release profile (avoids OOM with superoptimized LTO in Docker) - Use debian:trixie-slim runtime for glibc compatibility - Fix nginx image to nginx:otel (official image with OTel module) - Fix nginx.conf: move otel_trace directives into location block - Pin Tempo to 2.6.1 (latest has partition ring issues in single-node) - Fix otel-collector exporter alias (otlp → otlp_grpc) Co-Authored-By: Claude Opus 4.6 --- examples/opentelemetry-grafana/Dockerfile | 19 +++++++++++++++++++ .../opentelemetry-grafana/docker-compose.yml | 6 +++--- .../opentelemetry-grafana/nginx/nginx.conf | 4 ++-- .../opentelemetry-grafana/otel-collector.yaml | 4 ++-- examples/opentelemetry-grafana/tempo.yaml | 13 +++++++++++-- src/telemetry.rs | 16 +++++++++------- 6 files changed, 46 insertions(+), 16 deletions(-) create mode 100644 examples/opentelemetry-grafana/Dockerfile diff --git a/examples/opentelemetry-grafana/Dockerfile b/examples/opentelemetry-grafana/Dockerfile new file mode 100644 index 00000000..396a3789 --- /dev/null +++ b/examples/opentelemetry-grafana/Dockerfile @@ -0,0 +1,19 @@ +FROM rust:1.91-slim AS builder +WORKDIR /usr/src/sqlpage +RUN apt-get update && apt-get install -y --no-install-recommends \ + pkg-config libssl-dev unixodbc-dev && rm -rf /var/lib/apt/lists/* +COPY Cargo.toml Cargo.lock ./ +RUN mkdir src && echo "fn main() {}" > src/main.rs && mkdir -p src/webserver && cargo build --release 2>/dev/null || true +COPY . . +RUN touch src/main.rs && cargo build --release +RUN cp target/release/sqlpage /usr/local/bin/sqlpage + +FROM debian:trixie-slim +RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates unixodbc && rm -rf /var/lib/apt/lists/* +COPY --from=builder /usr/local/bin/sqlpage /usr/local/bin/sqlpage +COPY --from=builder /usr/src/sqlpage/sqlpage/sqlpage.db /etc/sqlpage/sqlpage.db +ENV SQLPAGE_WEB_ROOT=/var/www +ENV SQLPAGE_CONFIGURATION_DIRECTORY=/etc/sqlpage +WORKDIR /var/www +EXPOSE 8080 +CMD ["sqlpage"] diff --git a/examples/opentelemetry-grafana/docker-compose.yml b/examples/opentelemetry-grafana/docker-compose.yml index 0bad7920..74ade6b9 100644 --- a/examples/opentelemetry-grafana/docker-compose.yml +++ b/examples/opentelemetry-grafana/docker-compose.yml @@ -1,6 +1,6 @@ services: nginx: - image: ghcr.io/nginxinc/nginx-otel:latest + image: nginx:otel ports: - "80:80" volumes: @@ -12,7 +12,7 @@ services: sqlpage: build: context: ../.. - dockerfile: Dockerfile + dockerfile: examples/opentelemetry-grafana/Dockerfile environment: - DATABASE_URL=postgres://sqlpage:sqlpage@postgres:5432/sqlpage - OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 @@ -47,7 +47,7 @@ services: - tempo tempo: - image: grafana/tempo:latest + image: grafana/tempo:2.6.1 volumes: - ./tempo.yaml:/etc/tempo/config.yaml:ro command: ["-config.file=/etc/tempo/config.yaml"] diff --git a/examples/opentelemetry-grafana/nginx/nginx.conf b/examples/opentelemetry-grafana/nginx/nginx.conf index 32b61569..5a6b4322 100644 --- a/examples/opentelemetry-grafana/nginx/nginx.conf +++ b/examples/opentelemetry-grafana/nginx/nginx.conf @@ -8,8 +8,6 @@ http { } otel_service_name nginx; - otel_trace on; - otel_trace_context propagate; upstream sqlpage { server sqlpage:8080; @@ -19,6 +17,8 @@ http { listen 80; location / { + otel_trace on; + otel_trace_context propagate; proxy_pass http://sqlpage; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; diff --git a/examples/opentelemetry-grafana/otel-collector.yaml b/examples/opentelemetry-grafana/otel-collector.yaml index 779d0459..692035c4 100644 --- a/examples/opentelemetry-grafana/otel-collector.yaml +++ b/examples/opentelemetry-grafana/otel-collector.yaml @@ -11,7 +11,7 @@ processors: timeout: 1s exporters: - otlp/tempo: + otlp_grpc/tempo: endpoint: tempo:4317 tls: insecure: true @@ -21,4 +21,4 @@ service: traces: receivers: [otlp] processors: [batch] - exporters: [otlp/tempo] + exporters: [otlp_grpc/tempo] diff --git a/examples/opentelemetry-grafana/tempo.yaml b/examples/opentelemetry-grafana/tempo.yaml index da4cbe71..6c5fb02a 100644 --- a/examples/opentelemetry-grafana/tempo.yaml +++ b/examples/opentelemetry-grafana/tempo.yaml @@ -1,3 +1,5 @@ +stream_over_http_enabled: true + server: http_listen_port: 3200 @@ -8,10 +10,17 @@ distributor: grpc: endpoint: 0.0.0.0:4317 +ingester: + max_block_duration: 5m + +compactor: + compaction: + block_retention: 1h + storage: trace: backend: local - local: - path: /tmp/tempo/blocks wal: path: /tmp/tempo/wal + local: + path: /tmp/tempo/blocks diff --git a/src/telemetry.rs b/src/telemetry.rs index 7bfbe7ed..7cf45a57 100644 --- a/src/telemetry.rs +++ b/src/telemetry.rs @@ -48,10 +48,6 @@ fn init_otel_tracing() { use opentelemetry::trace::TracerProvider as _; use opentelemetry_sdk::propagation::TraceContextPropagator; use tracing_subscriber::layer::SubscriberExt; - use tracing_subscriber::util::SubscriberInitExt; - - // Bridge log::* macros → tracing events (must be called before subscriber init) - tracing_log::LogTracer::init().expect("Failed to set log tracer"); // W3C TraceContext propagation (traceparent header) global::set_text_map_propagator(TraceContextPropagator::new()); @@ -81,9 +77,15 @@ fn init_otel_tracing() { .with_timer(tracing_subscriber::fmt::time::uptime()) .with_target(true); - tracing_subscriber::registry() + // Build the subscriber and set it as global default. + // We use tracing_log::LogTracer to bridge log::* → tracing, + // and set_global_default (not .init()) to avoid double-setting the log logger. + let subscriber = tracing_subscriber::registry() .with(filter) .with(fmt_layer) - .with(otel_layer) - .init(); + .with(otel_layer); + + tracing::subscriber::set_global_default(subscriber) + .expect("Failed to set global tracing subscriber"); + tracing_log::LogTracer::init().expect("Failed to set log→tracing bridge"); } From c742abe5c3091f8d1b9bdac836b2654ca3bc96e9 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Sun, 8 Mar 2026 20:00:36 +0100 Subject: [PATCH 03/39] propagate trace context to PostgreSQL via application_name After acquiring a DB connection, set the W3C traceparent as the PostgreSQL application_name (or MySQL session variable). This makes trace IDs visible in pg_stat_activity and PostgreSQL logs, enabling direct correlation between Grafana Tempo traces and database-side monitoring. Co-Authored-By: Claude Opus 4.6 --- src/webserver/database/execute_queries.rs | 38 ++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/src/webserver/database/execute_queries.rs b/src/webserver/database/execute_queries.rs index c5172c70..c6e61f64 100644 --- a/src/webserver/database/execute_queries.rs +++ b/src/webserver/database/execute_queries.rs @@ -311,7 +311,9 @@ async fn take_connection<'a>( log::debug!("Acquired a database connection"); request.server_timing.record("db_conn"); *conn = Some(c); - Ok(conn.as_mut().unwrap()) + let connection = conn.as_mut().unwrap(); + set_trace_context(connection, db).await; + Ok(connection) } Err(e) => { let db_name = db.connection.any_kind(); @@ -322,6 +324,40 @@ async fn take_connection<'a>( } } +/// Sets the current OTel trace context on the database connection so it is visible +/// in `pg_stat_activity.application_name` (PostgreSQL) or as a session variable (MySQL). +/// This allows correlating SQLPage traces with database-side monitoring. +async fn set_trace_context(connection: &mut AnyConnection, db: &Database) { + use opentelemetry::trace::TraceContextExt; + use tracing_opentelemetry::OpenTelemetrySpanExt; + + let span = tracing::Span::current(); + let context = span.context(); + let otel_span = context.span(); + let span_context = otel_span.span_context(); + if !span_context.is_valid() { + return; + } + let traceparent = format!( + "00-{}-{}-{:02x}", + span_context.trace_id(), + span_context.span_id(), + span_context.trace_flags() + ); + let sql = match db.info.kind { + sqlx::any::AnyKind::Postgres => { + format!("SET application_name = 'sqlpage {traceparent}'") + } + sqlx::any::AnyKind::MySql => { + format!("SET @traceparent = '{traceparent}'") + } + _ => return, + }; + if let Err(e) = connection.execute(sql.as_str()).await { + log::debug!("Failed to set trace context on connection: {e}"); + } +} + #[inline] fn parse_single_sql_result( source_file: &Path, From 2bb370ade36661889aa06b5dddca12e3f5382d17 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Sun, 8 Mar 2026 20:44:41 +0100 Subject: [PATCH 04/39] rewrite OTel example README with setup guides for all major providers Comprehensive documentation covering: - Step-by-step quick start for the Docker Compose example - How OpenTelemetry works (spans, collectors, backends) - Setup guides for Grafana Tempo, Jaeger, Grafana Cloud, Datadog, Honeycomb, New Relic, and Axiom with exact env vars and doc links - PostgreSQL trace correlation via application_name - Environment variable reference - Troubleshooting section Co-Authored-By: Claude Opus 4.6 --- examples/opentelemetry-grafana/README.md | 387 +++++++++++++++++++++-- 1 file changed, 359 insertions(+), 28 deletions(-) diff --git a/examples/opentelemetry-grafana/README.md b/examples/opentelemetry-grafana/README.md index 197d0379..fa737f9e 100644 --- a/examples/opentelemetry-grafana/README.md +++ b/examples/opentelemetry-grafana/README.md @@ -1,52 +1,383 @@ -# SQLPage + OpenTelemetry + Grafana Example +# Distributed Tracing for SQLPage with OpenTelemetry -This example demonstrates end-to-end distributed tracing with SQLPage. +SQLPage has built-in support for [OpenTelemetry](https://opentelemetry.io/) (OTel), +an open standard for collecting traces, metrics, and logs from your applications. +When enabled, every HTTP request to SQLPage produces a **trace** — a timeline of +everything that happened to serve that request, from receiving it to querying the +database and rendering the response. -## Architecture +This is useful for: -``` -Browser → nginx (with OTel module) → SQLPage → PostgreSQL - ↓ ↓ - OTel Collector ←───────────┘ - ↓ - Grafana Tempo - ↓ - Grafana (UI) -``` +- **Debugging slow pages**: see exactly which SQL query is taking the longest. +- **Diagnosing connection pool exhaustion**: see how long requests wait for a database connection. +- **End-to-end visibility**: follow a single user request from your reverse proxy (nginx, Caddy, etc.) + through SQLPage and into PostgreSQL. + +## Quick start (this example) + +This directory contains a ready-to-run Docker Compose stack that demonstrates +the full tracing pipeline. No prior OpenTelemetry experience is needed. -## How to run +### Prerequisites + +- [Docker](https://docs.docker.com/get-docker/) and + [Docker Compose](https://docs.docker.com/compose/install/) installed on your machine. + +### Run ```bash +cd examples/opentelemetry-grafana docker compose up --build ``` -## How to explore traces +This starts six services: + +| Service | Role | Port | +|------------------|-----------------------------------------------------------|---------------| +| **nginx** | Reverse proxy, creates the root trace span | `localhost:80` | +| **SQLPage** | Your application, sends traces to the collector | (internal 8080) | +| **PostgreSQL** | Database | (internal 5432) | +| **OTel Collector** | Receives traces and forwards them to Tempo | `localhost:4318` | +| **Tempo** | Trace storage backend | (internal 3200) | +| **Grafana** | Web UI to explore traces | `localhost:3000` | + +### Explore traces -1. Open the todo app at [http://localhost](http://localhost) and add a few items. +1. Open the todo app at [http://localhost](http://localhost) — add a few items, click to toggle them. 2. Open Grafana at [http://localhost:3000](http://localhost:3000). -3. Go to **Explore** → select the **Tempo** datasource. -4. Search for traces by service name (`nginx` or `sqlpage`). +3. In the left sidebar, click **Explore** (compass icon). +4. Make sure **Tempo** is selected as the datasource (top-left dropdown). +5. Click **Search** tab, then **Run query**. You should see a list of recent traces. +6. Click any trace to see the full span waterfall. -## What to look for in traces +### What you will see in a trace -Each HTTP request produces a trace with: +Each HTTP request produces a tree of **spans** (timed operations): -- **nginx root span** — the entry point, with `traceparent` propagated to SQLPage -- **SQLPage HTTP request span** — child of nginx, created by `tracing-actix-web` -- **`sqlpage.exec`** — SQL file execution, with `sqlpage.file` attribute -- **`db.pool.acquire`** — time spent waiting for a database connection from the pool -- **`db.query`** — individual SQL statement execution, with `db.statement` and `db.system` +``` +[nginx] GET / ← root span (created by nginx) + └─ [sqlpage] GET / ← HTTP request span + └─ sqlpage.exec ← SQL file execution (index.sql) + ├─ db.pool.acquire ← time waiting for a DB connection + └─ db.query ← the actual SQL query + db.statement = "SELECT title, ..." + db.system = "PostgreSQL" +``` + +Key attributes on each span: + +| Span | Key attributes | +|---------------------|--------------------------------------------------------------| +| HTTP request | `http.method`, `http.target`, `http.status_code`, `http.user_agent` | +| `sqlpage.exec` | `sqlpage.file` — which `.sql` file was executed | +| `db.pool.acquire` | `db.pool.size` — current pool size when acquiring | +| `db.query` | `db.statement` — the full SQL text; `db.system` — database type | -## Testing pool pressure +### PostgreSQL correlation -To see `db.pool.acquire` spans with queuing time, reduce the pool size: +SQLPage automatically sets the +[`application_name`](https://www.postgresql.org/docs/current/runtime-config-logging.html#GUC-APPLICATION-NAME) +on each database connection to include the W3C +[traceparent](https://www.w3.org/TR/trace-context/#traceparent-header). +This means you can: + +- See trace IDs in `pg_stat_activity` when monitoring live queries: + ```sql + SELECT application_name, query, state FROM pg_stat_activity; + -- application_name: sqlpage 00-abc123...-def456...-01 + ``` +- Include trace IDs in PostgreSQL logs by adding `%a` to + [`log_line_prefix`](https://www.postgresql.org/docs/current/runtime-config-logging.html#GUC-LOG-LINE-PREFIX). + +### Testing pool pressure + +To simulate database connection pool exhaustion (a common production issue), +reduce the pool size to 1 in `sqlpage/sqlpage.json`: ```json -// sqlpage/sqlpage.json { "listen_on": "0.0.0.0:8080", "max_database_pool_connections": 1 } ``` -Then send concurrent requests (e.g., open multiple browser tabs quickly) and observe the acquire duration in Grafana. +Restart (`docker compose restart sqlpage`), then open several browser tabs +to `http://localhost` simultaneously. In Grafana, you will see `db.pool.acquire` +spans with longer durations as requests queue up waiting for the single connection. + +--- + +## How it works + +### Enabling tracing in SQLPage + +Tracing is **built into SQLPage** — there is nothing to install or compile. +It activates automatically when you set the `OTEL_EXPORTER_OTLP_ENDPOINT` +environment variable. When this variable is not set, SQLPage behaves exactly +as before (plain text logs, no tracing overhead). + +**Minimal setup — just two environment variables:** + +```bash +# Where to send traces (an OTLP-compatible endpoint) +export OTEL_EXPORTER_OTLP_ENDPOINT="http://localhost:4318" + +# A name to identify this service in traces +export OTEL_SERVICE_NAME="sqlpage" + +# Now start SQLPage as usual +sqlpage +``` + +These are [standard OpenTelemetry environment variables](https://opentelemetry.io/docs/specs/otel/protocol/exporter/) +understood by all OTel-compatible tools. SQLPage reads them directly — no +`sqlpage.json` configuration is needed for tracing. + +### The role of each component + +**OpenTelemetry** is a standard, not a product. It defines a protocol (OTLP) for +sending trace data. Here is how the pieces fit together: + +``` + Your app (SQLPage) Collector (optional) Backend (storage + UI) +┌─────────────────┐ ┌──────────────────┐ ┌─────────────────────┐ +│ Creates spans │──OTLP──│ Routes, batches, │──OTLP──│ Stores traces and │ +│ for each request │ │ and exports data │ │ provides a query UI │ +└─────────────────┘ └──────────────────┘ └─────────────────────┘ +``` + +- **SQLPage** generates trace data and sends it via the OTLP HTTP protocol. +- A **collector** (optional) receives traces and forwards them to one or more backends. + Useful for buffering, sampling, or fanning out to multiple destinations. + You can skip the collector and send directly from SQLPage to most backends. +- A **backend** stores the traces and lets you search and visualize them. + +### Trace context propagation + +When a reverse proxy (like nginx) sits in front of SQLPage, you want the trace +to start at nginx and continue into SQLPage as a single, connected trace. +This works via the +[W3C Trace Context](https://www.w3.org/TR/trace-context/) standard: +nginx adds a `traceparent` HTTP header to the request it forwards to SQLPage, +and SQLPage reads it to continue the same trace. + +Most modern reverse proxies and load balancers support this. +For nginx specifically, use the [`ngx_otel_module`](https://nginx.org/en/docs/ngx_otel_module.html) +(included in the `nginx:otel` Docker image). + +--- + +## Setup guides by deployment scenario + +### Self-hosted with Grafana Tempo + +This is what the Docker Compose example in this directory uses. +[Grafana Tempo](https://grafana.com/oss/tempo/) is a free, open-source trace backend. + +**Components:** +- [Grafana Tempo](https://grafana.com/docs/tempo/latest/) stores the traces. +- [Grafana](https://grafana.com/docs/grafana/latest/) provides the web UI. +- An [OTel Collector](https://opentelemetry.io/docs/collector/) sits between + SQLPage and Tempo (optional but recommended for production). + +**SQLPage environment variables:** + +```bash +OTEL_EXPORTER_OTLP_ENDPOINT=http://:4318 +OTEL_SERVICE_NAME=sqlpage +``` + +**Links:** +- [Tempo installation guide](https://grafana.com/docs/tempo/latest/setup/) +- [OTel Collector installation](https://opentelemetry.io/docs/collector/installation/) + +### Self-hosted with Jaeger + +[Jaeger](https://www.jaegertracing.io/) is another popular open-source tracing +backend. Version 2+ natively accepts OTLP — no collector needed. + +**Start Jaeger with one command:** + +```bash +docker run -d --name jaeger \ + -p 16686:16686 \ + -p 4317:4317 \ + -p 4318:4318 \ + jaegertracing/jaeger:latest +``` + +**SQLPage environment variables:** + +```bash +OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 +OTEL_SERVICE_NAME=sqlpage +``` + +Open the Jaeger UI at [http://localhost:16686](http://localhost:16686) to explore traces. + +**Links:** +- [Jaeger getting started](https://www.jaegertracing.io/docs/latest/getting-started/) + +### Grafana Cloud + +[Grafana Cloud](https://grafana.com/products/cloud/) has a free tier that +includes trace storage. SQLPage can send traces directly — no collector needed. + +**SQLPage environment variables:** + +```bash +OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp-gateway-prod-.grafana.net/otlp +OTEL_EXPORTER_OTLP_HEADERS="Authorization=Basic " +OTEL_SERVICE_NAME=sqlpage +``` + +Replace: +- `` with your Grafana Cloud region (e.g., `us-east-0`, `eu-west-2`). + Find it in your Grafana Cloud portal under **My Account** > **Tempo**. +- `` with the Base64 encoding of + `:`. Generate a token in your Grafana Cloud + portal under **My Account** > **API Keys**. + + On macOS/Linux, generate the Base64 value with: + ```bash + echo -n "123456:glc_your_token_here" | base64 + ``` + +**Links:** +- [Send data via OTLP to Grafana Cloud](https://grafana.com/docs/grafana-cloud/send-data/otlp/send-data-otlp/) + +### Datadog + +[Datadog](https://www.datadoghq.com/) supports OTLP ingestion through the +Datadog Agent. + +**1. Run the Datadog Agent** with OTLP ingest enabled: + +```bash +docker run -d --name datadog-agent \ + -e DD_API_KEY= \ + -e DD_OTLP_CONFIG_RECEIVER_PROTOCOLS_HTTP_ENDPOINT=0.0.0.0:4318 \ + -e DD_SITE=datadoghq.com \ + -p 4318:4318 \ + gcr.io/datadoghq/agent:latest +``` + +**2. Point SQLPage to the Agent:** + +```bash +OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 +OTEL_SERVICE_NAME=sqlpage +``` + +Traces appear in the Datadog **APM > Traces** section. + +**Links:** +- [OTLP ingestion in the Datadog Agent](https://docs.datadoghq.com/opentelemetry/setup/otlp_ingest_in_the_agent/) + +### Honeycomb + +[Honeycomb](https://www.honeycomb.io/) accepts OTLP directly — no collector needed. + +**SQLPage environment variables:** + +```bash +OTEL_EXPORTER_OTLP_ENDPOINT=https://api.honeycomb.io +OTEL_EXPORTER_OTLP_HEADERS="x-honeycomb-team=" +OTEL_SERVICE_NAME=sqlpage +``` + +For the EU region, use `https://api.eu1.honeycomb.io` instead. + +**Links:** +- [Send data with OpenTelemetry — Honeycomb docs](https://docs.honeycomb.io/send-data/opentelemetry/) + +### New Relic + +[New Relic](https://newrelic.com/) accepts OTLP directly. + +**SQLPage environment variables:** + +```bash +OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp.nr-data.net +OTEL_EXPORTER_OTLP_HEADERS="api-key=" +OTEL_SERVICE_NAME=sqlpage +``` + +For the EU region, use `https://otlp.eu01.nr-data.net` instead. + +Find your Ingest License Key in the New Relic UI under +**API Keys** (type: `INGEST - LICENSE`). + +**Links:** +- [New Relic OTLP endpoint configuration](https://docs.newrelic.com/docs/opentelemetry/best-practices/opentelemetry-otlp/) + +### Axiom + +[Axiom](https://axiom.co/) accepts OTLP directly. + +**SQLPage environment variables:** + +```bash +OTEL_EXPORTER_OTLP_ENDPOINT=https://api.axiom.co +OTEL_EXPORTER_OTLP_HEADERS="Authorization=Bearer ,X-Axiom-Dataset=" +OTEL_SERVICE_NAME=sqlpage +``` + +**Links:** +- [Send OpenTelemetry data to Axiom](https://axiom.co/docs/send-data/opentelemetry) + +--- + +## Environment variable reference + +These are [standard OpenTelemetry variables](https://opentelemetry.io/docs/specs/otel/protocol/exporter/), +not specific to SQLPage. + +| Variable | Required? | Description | Example | +|-----------------------------------|-----------|------------------------------------------------|--------------------------------------| +| `OTEL_EXPORTER_OTLP_ENDPOINT` | Yes | Base URL of the OTLP receiver | `http://localhost:4318` | +| `OTEL_SERVICE_NAME` | No | Service name shown in traces (default: `unknown_service`) | `sqlpage` | +| `OTEL_EXPORTER_OTLP_HEADERS` | No | Comma-separated `key=value` pairs for auth headers | `api-key=abc123` | +| `OTEL_EXPORTER_OTLP_PROTOCOL` | No | Protocol (default: `http/protobuf`) | `http/protobuf` | +| `RUST_LOG` | No | Filter which spans/logs are emitted | `sqlpage=debug,tracing_actix_web=info` | + +When `OTEL_EXPORTER_OTLP_ENDPOINT` is **not set**, SQLPage uses plain text +logging only (same behavior as versions before tracing support was added). + +--- + +## Troubleshooting + +### No traces appear + +1. **Check that SQLPage sees the endpoint.** Look for this line in the startup logs: + ``` + OpenTelemetry tracing enabled (OTEL_EXPORTER_OTLP_ENDPOINT is set) + ``` + If you don't see it, the environment variable is not reaching SQLPage. + +2. **Check that the collector/backend is reachable.** From the SQLPage host, try: + ```bash + curl -v http://:4318/v1/traces + ``` + You should get a response (even if it's an error like "no data"), not a connection refused. + +3. **Check the collector logs** for export errors (e.g., authentication failures). + +### Traces are disconnected (nginx and SQLPage show as separate traces) + +This means the `traceparent` header is not being propagated. Check that: + +- Your reverse proxy is configured to inject/propagate the `traceparent` header. +- For nginx, you need the `ngx_otel_module` with `otel_trace_context propagate` + in the location block. See the `nginx/nginx.conf` in this example. + +### Spans are missing (e.g., no `db.query` spans) + +The `RUST_LOG` / `OTEL_LOG_LEVEL` filter might be too restrictive. +SQLPage emits spans at the `INFO` level by default. Make sure your filter +includes `sqlpage=info`: + +```bash +RUST_LOG="sqlpage=info,actix_web=info,tracing_actix_web=info" +``` From 9a879e5727486e370eface7dfae8c3eb89971a1b Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Sun, 8 Mar 2026 21:14:07 +0100 Subject: [PATCH 05/39] fix todo example: use :title (POST variable) instead of $title The form submits via POST, so the title field must be referenced with the : prefix (POST parameter) rather than $ (GET parameter). Co-Authored-By: Claude Opus 4.6 --- examples/opentelemetry-grafana/website/add_todo.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/opentelemetry-grafana/website/add_todo.sql b/examples/opentelemetry-grafana/website/add_todo.sql index 6e70d467..32c4aa7f 100644 --- a/examples/opentelemetry-grafana/website/add_todo.sql +++ b/examples/opentelemetry-grafana/website/add_todo.sql @@ -1,8 +1,8 @@ -- Toggle an existing todo UPDATE todos SET done = $done::boolean WHERE id = $id::int; --- Insert a new todo if title is provided +-- Insert a new todo if title is provided via the form (POST) INSERT INTO todos (title) -SELECT $title WHERE $title IS NOT NULL AND length($title) > 0; +SELECT :title WHERE :title IS NOT NULL AND length(:title) > 0; SELECT 'redirect' AS component, '/' AS link; From ec9f3f97074ba3595415efbc9cc83d70dae5f99a Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Sun, 8 Mar 2026 22:12:48 +0100 Subject: [PATCH 06/39] set code.filepath and code.lineno span attributes to user SQL files OTel span attributes now reference the user's .sql file path and line number instead of the SQLPage Rust source code. Also improves span naming, adds JSON log formatting, custom root span builder, and Grafana dashboard provisioning for the OTel example. Co-Authored-By: Claude Opus 4.6 --- Cargo.lock | 41 +++- Cargo.toml | 3 +- examples/opentelemetry-grafana/README.md | 12 +- .../opentelemetry-grafana/docker-compose.yml | 4 + .../grafana/dashboards.yaml | 12 + .../grafana/sqlpage-home.json | 215 ++++++++++++++++++ .../opentelemetry-grafana/nginx/nginx.conf | 1 + examples/opentelemetry-grafana/tempo.yaml | 26 +++ src/main.rs | 3 +- src/telemetry.rs | 20 +- src/webserver/database/execute_queries.rs | 16 +- src/webserver/http.rs | 174 ++++++++++---- 12 files changed, 456 insertions(+), 71 deletions(-) create mode 100644 examples/opentelemetry-grafana/grafana/dashboards.yaml create mode 100644 examples/opentelemetry-grafana/grafana/sqlpage-home.json diff --git a/Cargo.lock b/Cargo.lock index 5a565493..ee55f3f0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2471,6 +2471,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.17" @@ -2543,6 +2552,23 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "json-subscriber" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6456771bcb3511e2afa6a6a8c9ce2730be80803116a2839931476740ab3b86bc" +dependencies = [ + "opentelemetry", + "serde", + "serde_json", + "tracing", + "tracing-core", + "tracing-opentelemetry", + "tracing-serde", + "tracing-subscriber", + "uuid", +] + [[package]] name = "json5" version = "0.4.1" @@ -3260,7 +3286,7 @@ dependencies = [ "ed25519-dalek", "hmac", "http 1.4.0", - "itertools", + "itertools 0.10.5", "log", "oauth2", "p256", @@ -3729,7 +3755,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", - "itertools", + "itertools 0.14.0", "proc-macro2", "quote", "syn", @@ -4571,6 +4597,7 @@ dependencies = [ "handlebars", "hmac", "include_dir", + "json-subscriber", "lambda-web", "libflate", "log", @@ -5207,6 +5234,16 @@ dependencies = [ "web-time", ] +[[package]] +name = "tracing-serde" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704b1aeb7be0d0a84fc9828cae51dab5970fee5088f83d1dd7ee6f6246fc6ff1" +dependencies = [ + "serde", + "tracing-core", +] + [[package]] name = "tracing-subscriber" version = "0.3.22" diff --git a/Cargo.toml b/Cargo.toml index c59bd77d..8ea102ae 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -84,8 +84,9 @@ odbc-sys = { version = "0.29.0", optional = true } # OpenTelemetry / tracing tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["registry", "env-filter"] } +json-subscriber = { version = "0.2.7", features = ["tracing-opentelemetry-0-32"] } tracing-opentelemetry = "0.32" -tracing-actix-web = { version = "0.7", features = ["opentelemetry_0_31"] } +tracing-actix-web = { version = "0.7", default-features = false, features = ["opentelemetry_0_31"] } tracing-log = "0.2" opentelemetry = "0.31" opentelemetry_sdk = { version = "0.31", features = ["rt-tokio-current-thread"] } diff --git a/examples/opentelemetry-grafana/README.md b/examples/opentelemetry-grafana/README.md index fa737f9e..9d0a941c 100644 --- a/examples/opentelemetry-grafana/README.md +++ b/examples/opentelemetry-grafana/README.md @@ -55,9 +55,9 @@ This starts six services: Each HTTP request produces a tree of **spans** (timed operations): ``` -[nginx] GET / ← root span (created by nginx) - └─ [sqlpage] GET / ← HTTP request span - └─ sqlpage.exec ← SQL file execution (index.sql) +[nginx] GET /todos ← root span (created by nginx) + └─ [sqlpage] GET /todos ← HTTP request span + └─ [sqlpage] SQL website/todos.sql ← SQL file execution ├─ db.pool.acquire ← time waiting for a DB connection └─ db.query ← the actual SQL query db.statement = "SELECT title, ..." @@ -69,7 +69,7 @@ Key attributes on each span: | Span | Key attributes | |---------------------|--------------------------------------------------------------| | HTTP request | `http.method`, `http.target`, `http.status_code`, `http.user_agent` | -| `sqlpage.exec` | `sqlpage.file` — which `.sql` file was executed | +| SQL file execution | `sqlpage.file` — which `.sql` file was executed | | `db.pool.acquire` | `db.pool.size` — current pool size when acquiring | | `db.query` | `db.statement` — the full SQL text; `db.system` — database type | @@ -370,7 +370,9 @@ This means the `traceparent` header is not being propagated. Check that: - Your reverse proxy is configured to inject/propagate the `traceparent` header. - For nginx, you need the `ngx_otel_module` with `otel_trace_context propagate` - in the location block. See the `nginx/nginx.conf` in this example. + in the location block. Setting `otel_span_name "$request_method $uri"` also keeps + the nginx span name aligned with the actual request path. See the `nginx/nginx.conf` + in this example. ### Spans are missing (e.g., no `db.query` spans) diff --git a/examples/opentelemetry-grafana/docker-compose.yml b/examples/opentelemetry-grafana/docker-compose.yml index 74ade6b9..914736fd 100644 --- a/examples/opentelemetry-grafana/docker-compose.yml +++ b/examples/opentelemetry-grafana/docker-compose.yml @@ -59,7 +59,11 @@ services: environment: - GF_AUTH_ANONYMOUS_ENABLED=true - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin + - GF_DASHBOARDS_DEFAULT_HOME_DASHBOARD_PATH=/etc/grafana/provisioning/dashboards/sqlpage/sqlpage-home.json + - GF_NEWS_NEWS_FEED_ENABLED=false volumes: - ./grafana/datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:ro + - ./grafana/dashboards.yaml:/etc/grafana/provisioning/dashboards/dashboards.yaml:ro + - ./grafana:/etc/grafana/provisioning/dashboards/sqlpage:ro depends_on: - tempo diff --git a/examples/opentelemetry-grafana/grafana/dashboards.yaml b/examples/opentelemetry-grafana/grafana/dashboards.yaml new file mode 100644 index 00000000..9c6d001e --- /dev/null +++ b/examples/opentelemetry-grafana/grafana/dashboards.yaml @@ -0,0 +1,12 @@ +apiVersion: 1 + +providers: + - name: SQLPage Tracing + orgId: 1 + folder: "" + type: file + disableDeletion: true + updateIntervalSeconds: 30 + allowUiUpdates: false + options: + path: /etc/grafana/provisioning/dashboards/sqlpage diff --git a/examples/opentelemetry-grafana/grafana/sqlpage-home.json b/examples/opentelemetry-grafana/grafana/sqlpage-home.json new file mode 100644 index 00000000..01c12a5e --- /dev/null +++ b/examples/opentelemetry-grafana/grafana/sqlpage-home.json @@ -0,0 +1,215 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "gridPos": { + "h": 4, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

Recent SQLPage traces

Open http://localhost and interact with the app. New requests will appear here automatically.

Click any trace ID to open the full span waterfall. Use the time picker in the top right if the table is empty.

", + "mode": "html" + }, + "pluginVersion": "12.4.0", + "title": "Overview", + "type": "text" + }, + { + "datasource": { + "type": "tempo", + "uid": "P214B5B846CF3925F" + }, + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "traceID" + }, + "properties": [ + { + "id": "custom.width", + "value": 300 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "startTime" + }, + "properties": [ + { + "id": "custom.width", + "value": 190 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "traceService" + }, + "properties": [ + { + "id": "custom.width", + "value": 140 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "traceDuration" + }, + "properties": [ + { + "id": "custom.width", + "value": 120 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "nested" + }, + "properties": [ + { + "id": "custom.hidden", + "value": true + } + ] + } + ] + }, + "gridPos": { + "h": 18, + "w": 24, + "x": 0, + "y": 4 + }, + "id": 2, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "frameIndex": 0, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Start time" + } + ] + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "tempo", + "uid": "P214B5B846CF3925F" + }, + "limit": 50, + "query": "{resource.service.name != nil}", + "queryType": "traceqlSearch", + "refId": "A", + "tableType": "traces" + } + ], + "timeFrom": "1h", + "title": "Trace List", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "nested": true + }, + "indexByName": { + "startTime": 1, + "traceDuration": 4, + "traceID": 0, + "traceName": 3, + "traceService": 2 + }, + "renameByName": { + "startTime": "Start time", + "traceDuration": "Duration", + "traceID": "Trace ID", + "traceName": "Route", + "traceService": "Service" + } + } + } + ], + "type": "table" + } + ], + "refresh": "5s", + "schemaVersion": 41, + "style": "dark", + "tags": [ + "sqlpage", + "tracing" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "SQLPage Tracing Home", + "uid": "sqlpage-tracing-home", + "version": 2 +} diff --git a/examples/opentelemetry-grafana/nginx/nginx.conf b/examples/opentelemetry-grafana/nginx/nginx.conf index 5a6b4322..41fa87fe 100644 --- a/examples/opentelemetry-grafana/nginx/nginx.conf +++ b/examples/opentelemetry-grafana/nginx/nginx.conf @@ -18,6 +18,7 @@ http { location / { otel_trace on; + otel_span_name "$request_method $uri"; otel_trace_context propagate; proxy_pass http://sqlpage; proxy_set_header Host $host; diff --git a/examples/opentelemetry-grafana/tempo.yaml b/examples/opentelemetry-grafana/tempo.yaml index 6c5fb02a..a8d79406 100644 --- a/examples/opentelemetry-grafana/tempo.yaml +++ b/examples/opentelemetry-grafana/tempo.yaml @@ -17,6 +17,32 @@ compactor: compaction: block_retention: 1h +metrics_generator: + ring: + kvstore: + store: inmemory + registry: + external_labels: + source: tempo + cluster: docker-compose + processor: + service_graphs: {} + span_metrics: {} + local_blocks: + flush_to_storage: true + storage: + path: /tmp/tempo/generator/wal + traces_storage: + path: /tmp/tempo/generator/traces + +overrides: + defaults: + metrics_generator: + processors: + - service-graphs + - span-metrics + - local-blocks + storage: trace: backend: local diff --git a/src/main.rs b/src/main.rs index 5dbcebc2..ed683025 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,6 @@ use sqlpage::{ app_config::AppConfig, - cli, - telemetry, + cli, telemetry, webserver::{self, Database}, AppState, }; diff --git a/src/telemetry.rs b/src/telemetry.rs index 7cf45a57..bcf0ef59 100644 --- a/src/telemetry.rs +++ b/src/telemetry.rs @@ -10,12 +10,11 @@ use opentelemetry_sdk::trace::SdkTracerProvider; static TRACER_PROVIDER: OnceLock = OnceLock::new(); -/// Initializes logging / tracing. Returns `true` if OTel was activated. +/// Initializes logging / tracing. Returns `true` if `OTel` was activated. +#[must_use] pub fn init_telemetry() -> bool { let otel_endpoint = env::var("OTEL_EXPORTER_OTLP_ENDPOINT").ok(); - let otel_active = otel_endpoint - .as_deref() - .is_some_and(|v| !v.is_empty()); + let otel_active = otel_endpoint.as_deref().is_some_and(|v| !v.is_empty()); if otel_active { init_otel_tracing(); @@ -26,7 +25,7 @@ pub fn init_telemetry() -> bool { otel_active } -/// Shuts down the OTel tracer provider, flushing pending spans. +/// Shuts down the `OTel` tracer provider, flushing pending spans. pub fn shutdown_telemetry() { if let Some(provider) = TRACER_PROVIDER.get() { if let Err(e) = provider.shutdown() { @@ -69,13 +68,12 @@ fn init_otel_tracing() { let otel_layer = tracing_opentelemetry::layer().with_tracer(tracer); let filter = tracing_subscriber::EnvFilter::try_from_default_env() - .unwrap_or_else(|_| { - "sqlpage=info,actix_web=info,tracing_actix_web=info".into() - }); + .unwrap_or_else(|_| "sqlpage=info,actix_web=info,tracing_actix_web=info".into()); - let fmt_layer = tracing_subscriber::fmt::layer() - .with_timer(tracing_subscriber::fmt::time::uptime()) - .with_target(true); + let fmt_layer = json_subscriber::layer() + .with_current_span(true) + .with_span_list(false) + .with_opentelemetry_ids(true); // Build the subscriber and set it as global default. // We use tracing_log::LogTracer to bridge log::* → tracing, diff --git a/src/webserver/database/execute_queries.rs b/src/webserver/database/execute_queries.rs index c6e61f64..81edaa6a 100644 --- a/src/webserver/database/execute_queries.rs +++ b/src/webserver/database/execute_queries.rs @@ -68,6 +68,8 @@ pub fn stream_query_results_with_conn<'a>( "db.query", db.statement = query.sql, db.system = %request.app_state.db.info.dbms_name, + code.filepath = %source_file.display(), + code.lineno = stmt.query_position.start.line, ) .entered(); let mut stream = connection.fetch_many(query); @@ -227,6 +229,8 @@ async fn execute_set_variable_query<'a>( "db.query", db.statement = query.sql, db.system = %request.app_state.db.info.dbms_name, + code.filepath = %source_file.display(), + code.lineno = statement.query_position.start.line, ) .entered(); let value = match connection.fetch_optional(query).await { @@ -301,11 +305,7 @@ async fn take_connection<'a>( return Ok(c); } let pool_size = db.connection.size(); - let _acquire_span = tracing::info_span!( - "db.pool.acquire", - db.pool.size = pool_size, - ) - .entered(); + let _acquire_span = tracing::info_span!("db.pool.acquire", db.pool.size = pool_size,).entered(); match db.connection.acquire().await { Ok(c) => { log::debug!("Acquired a database connection"); @@ -324,9 +324,9 @@ async fn take_connection<'a>( } } -/// Sets the current OTel trace context on the database connection so it is visible -/// in `pg_stat_activity.application_name` (PostgreSQL) or as a session variable (MySQL). -/// This allows correlating SQLPage traces with database-side monitoring. +/// Sets the current `OTel` trace context on the database connection so it is visible +/// in `pg_stat_activity.application_name` (`PostgreSQL`) or as a session variable (`MySQL`). +/// This allows correlating `SQLPage` traces with database-side monitoring. async fn set_trace_context(connection: &mut AnyConnection, db: &Database) { use opentelemetry::trace::TraceContextExt; use tracing_opentelemetry::OpenTelemetrySpanExt; diff --git a/src/webserver/http.rs b/src/webserver/http.rs index e59c1c11..807bf74e 100644 --- a/src/webserver/http.rs +++ b/src/webserver/http.rs @@ -16,8 +16,9 @@ use actix_web::http::header::Accept; use actix_web::http::header::{ContentType, Header, HttpDate, IfModifiedSince, LastModified}; use actix_web::http::{header, StatusCode}; use actix_web::web::PayloadConfig; -use actix_web::{dev::ServiceResponse, middleware, web, App, HttpResponse, HttpServer}; -use tracing_actix_web::TracingLogger; +use actix_web::{dev::ServiceResponse, middleware, web, App, Error, HttpResponse, HttpServer}; +use tracing::Span; +use tracing_actix_web::{DefaultRootSpanBuilder, RootSpanBuilder, TracingLogger}; use super::error::{anyhow_err_to_actix, bind_error, send_anyhow_error}; use super::http_client::make_http_client; @@ -34,6 +35,7 @@ use anyhow::{bail, Context}; use chrono::{DateTime, Utc}; use futures_util::stream::Stream; use futures_util::StreamExt; +use std::borrow::Cow; use std::path::PathBuf; use std::pin::Pin; use std::sync::Arc; @@ -228,52 +230,119 @@ async fn render_sql( let (resp_send, resp_recv) = tokio::sync::oneshot::channel::(); let source_path: PathBuf = sql_file.source_path.clone(); let exec_span = tracing::info_span!( - "sqlpage.exec", + "sqlpage.file", + otel.name = %sql_execution_span_name(&source_path), sqlpage.file = %source_path.display(), + code.filepath = %source_path.display(), ); - actix_web::rt::spawn(tracing::Instrument::instrument(async move { - let request_info = exec_ctx.request(); - let request_context = RequestContext { - is_embedded: request_info.url_params.contains_key("_sqlpage_embed"), - source_path, - content_security_policy: ContentSecurityPolicy::with_random_nonce(), - server_timing: Arc::clone(&request_info.server_timing), - response_format, - }; - let mut conn = None; - let database_entries_stream = - stream_query_results_with_conn(&sql_file, &exec_ctx, &mut conn); - let database_entries_stream = stop_at_first_error(database_entries_stream); - let response_with_writer = build_response_header_and_stream( - Arc::clone(&app_state), - database_entries_stream, - request_context, - ) - .await; - match response_with_writer { - Ok(ResponseWithWriter::RenderStream { - http_response, - renderer, + actix_web::rt::spawn(tracing::Instrument::instrument( + async move { + let request_info = exec_ctx.request(); + let request_context = RequestContext { + is_embedded: request_info.url_params.contains_key("_sqlpage_embed"), + source_path, + content_security_policy: ContentSecurityPolicy::with_random_nonce(), + server_timing: Arc::clone(&request_info.server_timing), + response_format, + }; + let mut conn = None; + let database_entries_stream = + stream_query_results_with_conn(&sql_file, &exec_ctx, &mut conn); + let database_entries_stream = stop_at_first_error(database_entries_stream); + let response_with_writer = build_response_header_and_stream( + Arc::clone(&app_state), database_entries_stream, - }) => { - resp_send - .send(http_response) - .unwrap_or_else(|e| log::error!("could not send headers {e:?}")); - stream_response(database_entries_stream, renderer).await; - } - Ok(ResponseWithWriter::FinishedResponse { http_response }) => { - resp_send - .send(http_response) - .unwrap_or_else(|e| log::error!("could not send headers {e:?}")); - } - Err(err) => { - send_anyhow_error(&err, resp_send, &app_state); + request_context, + ) + .await; + match response_with_writer { + Ok(ResponseWithWriter::RenderStream { + http_response, + renderer, + database_entries_stream, + }) => { + resp_send + .send(http_response) + .unwrap_or_else(|e| log::error!("could not send headers {e:?}")); + stream_response(database_entries_stream, renderer).await; + } + Ok(ResponseWithWriter::FinishedResponse { http_response }) => { + resp_send + .send(http_response) + .unwrap_or_else(|e| log::error!("could not send headers {e:?}")); + } + Err(err) => { + send_anyhow_error(&err, resp_send, &app_state); + } } - } - }, exec_span)); + }, + exec_span, + )); resp_recv.await.map_err(ErrorInternalServerError) } +fn request_span_route(request: &ServiceRequest) -> Cow<'_, str> { + request + .match_pattern() + .map_or_else(|| request.path().to_owned().into(), Cow::from) +} + +fn request_span_name(request: &ServiceRequest) -> String { + format!("{} {}", request.method(), request_span_route(request)) +} + +fn sql_execution_span_name(source_path: &std::path::Path) -> String { + format!("SQL {}", source_path.display()) +} + +struct SqlPageRootSpanBuilder; + +impl RootSpanBuilder for SqlPageRootSpanBuilder { + fn on_request_start(request: &ServiceRequest) -> Span { + let user_agent = request + .headers() + .get("User-Agent") + .map_or("", |h| h.to_str().unwrap_or("")); + let http_route = request_span_route(request); + let http_method = + tracing_actix_web::root_span_macro::private::http_method_str(request.method()); + let otel_name = request_span_name(request); + let connection_info = request.connection_info(); + let request_id = tracing_actix_web::root_span_macro::private::get_request_id(request); + + let span = tracing::span!( + tracing::Level::INFO, + "HTTP request", + http.method = %http_method, + http.route = %http_route, + http.flavor = %tracing_actix_web::root_span_macro::private::http_flavor(request.version()), + http.scheme = %tracing_actix_web::root_span_macro::private::http_scheme(connection_info.scheme()), + http.host = %connection_info.host(), + http.client_ip = %request.connection_info().realip_remote_addr().unwrap_or(""), + http.user_agent = %user_agent, + http.target = %request + .uri() + .path_and_query() + .map_or("", actix_web::http::uri::PathAndQuery::as_str), + http.status_code = tracing::field::Empty, + otel.name = %otel_name, + otel.kind = "server", + otel.status_code = tracing::field::Empty, + trace_id = tracing::field::Empty, + request_id = %request_id, + exception.message = tracing::field::Empty, + exception.details = tracing::field::Empty, + ); + std::mem::drop(connection_info); + tracing_actix_web::root_span_macro::private::set_otel_parent(request, &span); + span + } + + fn on_request_end(span: Span, outcome: &Result, Error>) { + DefaultRootSpanBuilder::on_request_end(span, outcome); + } +} + async fn process_sql_request( req: &mut ServiceRequest, sql_path: PathBuf, @@ -438,7 +507,7 @@ pub fn create_app( // when receiving a request outside of the prefix, redirect to the prefix .default_service(fn_service(default_prefix_redirect)) .wrap(OidcMiddleware::new(&app_state)) - .wrap(TracingLogger::default()) + .wrap(TracingLogger::::new()) .wrap(default_headers()) .wrap(middleware::Condition::new( app_state.config.compress_responses, @@ -588,3 +657,24 @@ fn bind_unix_socket_err(e: std::io::Error, unix_socket: &std::path::Path) -> any }; anyhow::anyhow!(e).context(ctx) } + +#[cfg(test)] +mod tests { + use super::{request_span_name, sql_execution_span_name}; + use actix_web::test::TestRequest; + use std::path::Path; + + #[test] + fn request_span_name_uses_request_path_when_no_matched_route_exists() { + let request = TestRequest::with_uri("/todos/42?filter=open").to_srv_request(); + assert_eq!(request_span_name(&request), "GET /todos/42"); + } + + #[test] + fn sql_execution_span_name_uses_sql_file_path() { + assert_eq!( + sql_execution_span_name(Path::new("website/todos.sql")), + "SQL website/todos.sql" + ); + } +} From 0aebd0c47b062eb2b6cc33502d7569af0a9383e0 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Sun, 8 Mar 2026 22:23:05 +0100 Subject: [PATCH 07/39] npm run fix --- examples/opentelemetry-grafana/grafana/sqlpage-home.json | 9 ++------- examples/opentelemetry-grafana/sqlpage/sqlpage.json | 2 +- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/examples/opentelemetry-grafana/grafana/sqlpage-home.json b/examples/opentelemetry-grafana/grafana/sqlpage-home.json index 01c12a5e..7f7cb1de 100644 --- a/examples/opentelemetry-grafana/grafana/sqlpage-home.json +++ b/examples/opentelemetry-grafana/grafana/sqlpage-home.json @@ -136,9 +136,7 @@ "footer": { "countRows": false, "fields": "", - "reducer": [ - "sum" - ], + "reducer": ["sum"], "show": false }, "frameIndex": 0, @@ -196,10 +194,7 @@ "refresh": "5s", "schemaVersion": 41, "style": "dark", - "tags": [ - "sqlpage", - "tracing" - ], + "tags": ["sqlpage", "tracing"], "templating": { "list": [] }, diff --git a/examples/opentelemetry-grafana/sqlpage/sqlpage.json b/examples/opentelemetry-grafana/sqlpage/sqlpage.json index f28d3cda..5322302d 100644 --- a/examples/opentelemetry-grafana/sqlpage/sqlpage.json +++ b/examples/opentelemetry-grafana/sqlpage/sqlpage.json @@ -1,3 +1,3 @@ { - "listen_on": "0.0.0.0:8080" + "listen_on": "0.0.0.0:8080" } From 712fb14d60bb63b657c00ae4742d570ca1800617 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Sun, 8 Mar 2026 22:27:30 +0100 Subject: [PATCH 08/39] use stable OTel semantic convention attribute names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - code.filepath → code.file.path, code.lineno → code.line.number - db.statement → db.query.text, db.system → db.system.name - Disable auto code location (.with_location(false)) so spans reference user SQL files, not SQLPage Rust source - Remove redundant sqlpage.file attribute (code.file.path suffices) Co-Authored-By: Claude Opus 4.6 --- src/telemetry.rs | 188 ++++++++++++++++++++-- src/webserver/database/execute_queries.rs | 16 +- src/webserver/http.rs | 3 +- 3 files changed, 185 insertions(+), 22 deletions(-) diff --git a/src/telemetry.rs b/src/telemetry.rs index bcf0ef59..e206e3bd 100644 --- a/src/telemetry.rs +++ b/src/telemetry.rs @@ -1,7 +1,7 @@ //! OpenTelemetry initialization and shutdown. //! //! When `OTEL_EXPORTER_OTLP_ENDPOINT` is set, sets up a full tracing pipeline -//! with OTLP export. Otherwise, falls back to `env_logger` as before. +//! with OTLP export and logfmt log output. Otherwise, falls back to `env_logger`. use std::env; use std::sync::OnceLock; @@ -10,11 +10,12 @@ use opentelemetry_sdk::trace::SdkTracerProvider; static TRACER_PROVIDER: OnceLock = OnceLock::new(); -/// Initializes logging / tracing. Returns `true` if `OTel` was activated. -#[must_use] +/// Initializes logging / tracing. Returns `true` if OTel was activated. pub fn init_telemetry() -> bool { let otel_endpoint = env::var("OTEL_EXPORTER_OTLP_ENDPOINT").ok(); - let otel_active = otel_endpoint.as_deref().is_some_and(|v| !v.is_empty()); + let otel_active = otel_endpoint + .as_deref() + .is_some_and(|v| !v.is_empty()); if otel_active { init_otel_tracing(); @@ -25,7 +26,7 @@ pub fn init_telemetry() -> bool { otel_active } -/// Shuts down the `OTel` tracer provider, flushing pending spans. +/// Shuts down the OTel tracer provider, flushing pending spans. pub fn shutdown_telemetry() { if let Some(provider) = TRACER_PROVIDER.get() { if let Err(e) = provider.shutdown() { @@ -65,25 +66,188 @@ fn init_otel_tracing() { global::set_tracer_provider(provider.clone()); let _ = TRACER_PROVIDER.set(provider); - let otel_layer = tracing_opentelemetry::layer().with_tracer(tracer); + let otel_layer = tracing_opentelemetry::layer() + .with_tracer(tracer) + .with_location(false); let filter = tracing_subscriber::EnvFilter::try_from_default_env() .unwrap_or_else(|_| "sqlpage=info,actix_web=info,tracing_actix_web=info".into()); - let fmt_layer = json_subscriber::layer() - .with_current_span(true) - .with_span_list(false) - .with_opentelemetry_ids(true); - // Build the subscriber and set it as global default. // We use tracing_log::LogTracer to bridge log::* → tracing, // and set_global_default (not .init()) to avoid double-setting the log logger. let subscriber = tracing_subscriber::registry() .with(filter) - .with(fmt_layer) + .with(logfmt::LogfmtLayer) .with(otel_layer); tracing::subscriber::set_global_default(subscriber) .expect("Failed to set global tracing subscriber"); tracing_log::LogTracer::init().expect("Failed to set log→tracing bridge"); } + +/// Custom logfmt logging layer. +/// +/// Outputs one line per event in logfmt format with carefully chosen fields: +/// ```text +/// ts=2026-03-08T20:56:15Z level=error target=sqlpage::webserver::error msg="..." method=GET path=/foo client_ip=1.2.3.4 trace_id=abc123 +/// ``` +mod logfmt { + use std::collections::HashMap; + use std::fmt::Write; + use std::io; + + use tracing::field::{Field, Visit}; + use tracing::Subscriber; + use tracing_subscriber::layer::Context; + use tracing_subscriber::registry::LookupSpan; + use tracing_subscriber::Layer; + + /// Stores span fields so we can access them when formatting events. + #[derive(Default)] + struct SpanFields(HashMap<&'static str, String>); + + /// Visitor that collects fields into a HashMap. + struct FieldCollector<'a>(&'a mut HashMap<&'static str, String>); + + impl Visit for FieldCollector<'_> { + fn record_debug(&mut self, field: &Field, value: &dyn std::fmt::Debug) { + self.0.insert(field.name(), format!("{value:?}")); + } + fn record_str(&mut self, field: &Field, value: &str) { + self.0.insert(field.name(), value.to_owned()); + } + fn record_i64(&mut self, field: &Field, value: i64) { + self.0.insert(field.name(), value.to_string()); + } + fn record_u64(&mut self, field: &Field, value: u64) { + self.0.insert(field.name(), value.to_string()); + } + fn record_bool(&mut self, field: &Field, value: bool) { + self.0.insert(field.name(), value.to_string()); + } + } + + /// Fields we pick from spans, in display order. + /// (span_field_name, logfmt_key) + const SPAN_FIELDS: &[(&str, &str)] = &[ + ("http.method", "method"), + ("http.target", "path"), + ("sqlpage.file", "file"), + ("http.client_ip", "client_ip"), + ]; + + pub(super) struct LogfmtLayer; + + impl Layer for LogfmtLayer + where + S: Subscriber + for<'a> LookupSpan<'a>, + { + fn on_new_span( + &self, + attrs: &tracing::span::Attributes<'_>, + id: &tracing::span::Id, + ctx: Context<'_, S>, + ) { + let mut fields = SpanFields::default(); + attrs.record(&mut FieldCollector(&mut fields.0)); + if let Some(span) = ctx.span(id) { + span.extensions_mut().insert(fields); + } + } + + fn on_record( + &self, + id: &tracing::span::Id, + values: &tracing::span::Record<'_>, + ctx: Context<'_, S>, + ) { + if let Some(span) = ctx.span(id) { + let mut ext = span.extensions_mut(); + if let Some(fields) = ext.get_mut::() { + values.record(&mut FieldCollector(&mut fields.0)); + } + } + } + + fn on_event(&self, event: &tracing::Event<'_>, ctx: Context<'_, S>) { + let mut buf = String::with_capacity(256); + + // 1. ts + let now = chrono::Utc::now().format("%Y-%m-%dT%H:%M:%S%.3fZ"); + let _ = write!(buf, "ts={now}"); + + // 2. level + let level = event.metadata().level(); + let _ = write!(buf, " level={level}"); + + // 3. target — for bridged log events, use log.target; otherwise metadata target + let mut event_fields = HashMap::new(); + event.record(&mut FieldCollector(&mut event_fields)); + + let target = event_fields + .get("log.target") + .map(String::as_str) + .unwrap_or_else(|| event.metadata().target()); + let _ = write!(buf, " target={target}"); + + // 4. msg + if let Some(msg) = event_fields.get("message") { + write_logfmt_value(&mut buf, "msg", msg); + } + + // 5. Selected span fields in order + let mut seen = [false; SPAN_FIELDS.len()]; + if let Some(scope) = ctx.event_scope(event) { + for span in scope { + let ext = span.extensions(); + if let Some(fields) = ext.get::() { + for (i, &(span_key, logfmt_key)) in SPAN_FIELDS.iter().enumerate() { + if !seen[i] { + if let Some(val) = fields.0.get(span_key) { + write_logfmt_value(&mut buf, logfmt_key, val); + seen[i] = true; + } + } + } + } + } + } + + // 6. trace_id from OpenTelemetry span context + if let Some(scope) = ctx.event_scope(event) { + 'outer: for span in scope { + let ext = span.extensions(); + if let Some(otel_data) = ext.get::() { + if let Some(trace_id) = otel_data.trace_id() { + let _ = write!(buf, " trace_id={trace_id}"); + break 'outer; + } + } + } + } + + buf.push('\n'); + + // Write atomically to stderr + let _ = io::Write::write_all(&mut io::stderr().lock(), buf.as_bytes()); + } + } + + /// Write a logfmt key=value pair, quoting the value if it contains spaces or special chars. + fn write_logfmt_value(buf: &mut String, key: &str, value: &str) { + // Replace newlines with spaces for single-line output + let needs_quoting = value.contains(' ') + || value.contains('"') + || value.contains('=') + || value.contains('\n') + || value.is_empty(); + + if needs_quoting { + let escaped = value.replace('\n', " ").replace('"', "\\\""); + let _ = write!(buf, " {key}=\"{escaped}\""); + } else { + let _ = write!(buf, " {key}={value}"); + } + } +} diff --git a/src/webserver/database/execute_queries.rs b/src/webserver/database/execute_queries.rs index 81edaa6a..80d08c9b 100644 --- a/src/webserver/database/execute_queries.rs +++ b/src/webserver/database/execute_queries.rs @@ -66,10 +66,10 @@ pub fn stream_query_results_with_conn<'a>( log::trace!("Executing query {:?}", query.sql); let _query_span = tracing::info_span!( "db.query", - db.statement = query.sql, - db.system = %request.app_state.db.info.dbms_name, - code.filepath = %source_file.display(), - code.lineno = stmt.query_position.start.line, + db.query.text = query.sql, + db.system.name = %request.app_state.db.info.dbms_name, + code.file.path = %source_file.display(), + code.line.number = stmt.query_position.start.line, ) .entered(); let mut stream = connection.fetch_many(query); @@ -227,10 +227,10 @@ async fn execute_set_variable_query<'a>( let _query_span = tracing::info_span!( "db.query", - db.statement = query.sql, - db.system = %request.app_state.db.info.dbms_name, - code.filepath = %source_file.display(), - code.lineno = statement.query_position.start.line, + db.query.text = query.sql, + db.system.name = %request.app_state.db.info.dbms_name, + code.file.path = %source_file.display(), + code.line.number = statement.query_position.start.line, ) .entered(); let value = match connection.fetch_optional(query).await { diff --git a/src/webserver/http.rs b/src/webserver/http.rs index 807bf74e..0d01362d 100644 --- a/src/webserver/http.rs +++ b/src/webserver/http.rs @@ -232,8 +232,7 @@ async fn render_sql( let exec_span = tracing::info_span!( "sqlpage.file", otel.name = %sql_execution_span_name(&source_path), - sqlpage.file = %source_path.display(), - code.filepath = %source_path.display(), + code.file.path = %source_path.display(), ); actix_web::rt::spawn(tracing::Instrument::instrument( async move { From 85dd9b8026772a3f00bb764f014b13b782b93d6b Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Sun, 8 Mar 2026 22:49:25 +0100 Subject: [PATCH 09/39] use OTel semantic convention values for db.system.name Use well-known values from the OpenTelemetry registry instead of raw DBMS name strings. Cast line numbers to i64 for correct span recording. Co-Authored-By: Claude Opus 4.6 --- src/webserver/database/connect.rs | 1 - src/webserver/database/execute_queries.rs | 8 ++++---- src/webserver/database/mod.rs | 16 ++++++++++++++++ 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/webserver/database/connect.rs b/src/webserver/database/connect.rs index d96f2b5a..573df2fa 100644 --- a/src/webserver/database/connect.rs +++ b/src/webserver/database/connect.rs @@ -58,7 +58,6 @@ impl Database { }; let dbms_name: String = pool.acquire().await?.dbms_name().await?; let database_type = SupportedDatabase::from_dbms_name(&dbms_name); - log::debug!("Initialized {dbms_name:?} database pool: {pool:#?}"); Ok(Database { connection: pool, diff --git a/src/webserver/database/execute_queries.rs b/src/webserver/database/execute_queries.rs index 80d08c9b..b120f629 100644 --- a/src/webserver/database/execute_queries.rs +++ b/src/webserver/database/execute_queries.rs @@ -67,9 +67,9 @@ pub fn stream_query_results_with_conn<'a>( let _query_span = tracing::info_span!( "db.query", db.query.text = query.sql, - db.system.name = %request.app_state.db.info.dbms_name, + db.system.name = request.app_state.db.info.database_type.otel_name(), code.file.path = %source_file.display(), - code.line.number = stmt.query_position.start.line, + code.line.number = stmt.query_position.start.line as i64, ) .entered(); let mut stream = connection.fetch_many(query); @@ -228,9 +228,9 @@ async fn execute_set_variable_query<'a>( let _query_span = tracing::info_span!( "db.query", db.query.text = query.sql, - db.system.name = %request.app_state.db.info.dbms_name, + db.system.name = request.app_state.db.info.database_type.otel_name(), code.file.path = %source_file.display(), - code.line.number = statement.query_position.start.line, + code.line.number = statement.query_position.start.line as i64, ) .entered(); let value = match connection.fetch_optional(query).await { diff --git a/src/webserver/database/mod.rs b/src/webserver/database/mod.rs index b354c215..00e0fff2 100644 --- a/src/webserver/database/mod.rs +++ b/src/webserver/database/mod.rs @@ -58,6 +58,22 @@ impl SupportedDatabase { Self::Generic => "Generic", } } + + /// Returns the OTel `db.system.name` well-known value. + /// See + #[must_use] + pub fn otel_name(self) -> &'static str { + match self { + Self::Sqlite => "sqlite", + Self::Duckdb => "duckdb", + Self::Oracle => "oracle.db", + Self::Postgres => "postgresql", + Self::MySql => "mysql", + Self::Mssql => "microsoft.sql_server", + Self::Snowflake => "snowflake", + Self::Generic => "other_sql", + } + } } pub struct Database { From dafbac2d6cd28319f6c3984832b555ae6b4d2a20 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Sun, 8 Mar 2026 22:49:31 +0100 Subject: [PATCH 10/39] remove json-subscriber dependency The custom logfmt layer in telemetry.rs replaces it with zero extra dependencies and precise control over field selection and ordering. Co-Authored-By: Claude Opus 4.6 --- Cargo.lock | 28 ---------------------------- Cargo.toml | 1 - 2 files changed, 29 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ee55f3f0..b5eef97a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2552,23 +2552,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "json-subscriber" -version = "0.2.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6456771bcb3511e2afa6a6a8c9ce2730be80803116a2839931476740ab3b86bc" -dependencies = [ - "opentelemetry", - "serde", - "serde_json", - "tracing", - "tracing-core", - "tracing-opentelemetry", - "tracing-serde", - "tracing-subscriber", - "uuid", -] - [[package]] name = "json5" version = "0.4.1" @@ -4597,7 +4580,6 @@ dependencies = [ "handlebars", "hmac", "include_dir", - "json-subscriber", "lambda-web", "libflate", "log", @@ -5234,16 +5216,6 @@ dependencies = [ "web-time", ] -[[package]] -name = "tracing-serde" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "704b1aeb7be0d0a84fc9828cae51dab5970fee5088f83d1dd7ee6f6246fc6ff1" -dependencies = [ - "serde", - "tracing-core", -] - [[package]] name = "tracing-subscriber" version = "0.3.22" diff --git a/Cargo.toml b/Cargo.toml index 8ea102ae..83d26619 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -84,7 +84,6 @@ odbc-sys = { version = "0.29.0", optional = true } # OpenTelemetry / tracing tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["registry", "env-filter"] } -json-subscriber = { version = "0.2.7", features = ["tracing-opentelemetry-0-32"] } tracing-opentelemetry = "0.32" tracing-actix-web = { version = "0.7", default-features = false, features = ["opentelemetry_0_31"] } tracing-log = "0.2" From 2a8a89a250d128c08116bacd4aa645a6cf7765c3 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Sun, 8 Mar 2026 22:49:38 +0100 Subject: [PATCH 11/39] add Loki + Promtail log aggregation to OTel example Adds two new services (Loki, Promtail) to scrape SQLPage container logs and display them in Grafana alongside traces. The home dashboard now shows a logs panel with trace_id derived fields linking to Tempo. Co-Authored-By: Claude Opus 4.6 --- examples/opentelemetry-grafana/README.md | 55 +++++++++++++------ .../opentelemetry-grafana/docker-compose.yml | 14 +++++ .../grafana/datasources.yaml | 12 ++++ .../grafana/sqlpage-home.json | 54 +++++++++++++++--- examples/opentelemetry-grafana/promtail.yaml | 51 +++++++++++++++++ 5 files changed, 161 insertions(+), 25 deletions(-) create mode 100644 examples/opentelemetry-grafana/promtail.yaml diff --git a/examples/opentelemetry-grafana/README.md b/examples/opentelemetry-grafana/README.md index 9d0a941c..ce39d511 100644 --- a/examples/opentelemetry-grafana/README.md +++ b/examples/opentelemetry-grafana/README.md @@ -1,10 +1,12 @@ -# Distributed Tracing for SQLPage with OpenTelemetry +# Distributed Tracing and Logs for SQLPage with OpenTelemetry and Grafana SQLPage has built-in support for [OpenTelemetry](https://opentelemetry.io/) (OTel), an open standard for collecting traces, metrics, and logs from your applications. When enabled, every HTTP request to SQLPage produces a **trace** — a timeline of everything that happened to serve that request, from receiving it to querying the -database and rendering the response. +database and rendering the response. SQLPage also emits structured request-aware +logs, which this example forwards to Grafana Loki so you can inspect logs and traces +side by side. This is useful for: @@ -16,7 +18,7 @@ This is useful for: ## Quick start (this example) This directory contains a ready-to-run Docker Compose stack that demonstrates -the full tracing pipeline. No prior OpenTelemetry experience is needed. +the full tracing and logging pipeline. No prior OpenTelemetry experience is needed. ### Prerequisites @@ -30,7 +32,7 @@ cd examples/opentelemetry-grafana docker compose up --build ``` -This starts six services: +This starts eight services: | Service | Role | Port | |------------------|-----------------------------------------------------------|---------------| @@ -39,16 +41,19 @@ This starts six services: | **PostgreSQL** | Database | (internal 5432) | | **OTel Collector** | Receives traces and forwards them to Tempo | `localhost:4318` | | **Tempo** | Trace storage backend | (internal 3200) | -| **Grafana** | Web UI to explore traces | `localhost:3000` | +| **Loki** | Log storage backend | (internal 3100) | +| **Promtail** | Scrapes container logs and pushes them to Loki | (internal) | +| **Grafana** | Web UI to explore traces and logs | `localhost:3000` | -### Explore traces +### Explore traces and logs 1. Open the todo app at [http://localhost](http://localhost) — add a few items, click to toggle them. 2. Open Grafana at [http://localhost:3000](http://localhost:3000). -3. In the left sidebar, click **Explore** (compass icon). -4. Make sure **Tempo** is selected as the datasource (top-left dropdown). -5. Click **Search** tab, then **Run query**. You should see a list of recent traces. -6. Click any trace to see the full span waterfall. +3. The default home dashboard now shows recent traces and recent SQLPage logs. +4. Click any trace ID in the trace table to see the full span waterfall. +5. In the logs panel, click a `trace_id` derived field to jump straight to the matching trace. +6. In the left sidebar, click **Explore** (compass icon) if you want to search manually. +7. Select **Tempo** to search traces or **Loki** to search logs. ### What you will see in a trace @@ -73,6 +78,18 @@ Key attributes on each span: | `db.pool.acquire` | `db.pool.size` — current pool size when acquiring | | `db.query` | `db.statement` — the full SQL text; `db.system` — database type | +### What you will see in the logs + +SQLPage writes one structured log line per event, for example: + +```text +ts=2026-03-08T20:56:15.000Z level=info target=sqlpage::webserver::http msg="request completed" method=GET path=/ trace_id=4f2d... +``` + +Promtail scrapes these container logs, parses the logfmt fields, and sends them to Loki. +The homepage dashboard filters to the `sqlpage` container so you can see request logs update +live while you use the sample app. + ### PostgreSQL correlation SQLPage automatically sets the @@ -139,18 +156,16 @@ understood by all OTel-compatible tools. SQLPage reads them directly — no sending trace data. Here is how the pieces fit together: ``` - Your app (SQLPage) Collector (optional) Backend (storage + UI) -┌─────────────────┐ ┌──────────────────┐ ┌─────────────────────┐ -│ Creates spans │──OTLP──│ Routes, batches, │──OTLP──│ Stores traces and │ -│ for each request │ │ and exports data │ │ provides a query UI │ -└─────────────────┘ └──────────────────┘ └─────────────────────┘ + Traces: SQLPage -> OTel Collector -> Tempo -> Grafana + Logs: SQLPage stdout/stderr -> Promtail -> Loki -> Grafana ``` - **SQLPage** generates trace data and sends it via the OTLP HTTP protocol. - A **collector** (optional) receives traces and forwards them to one or more backends. Useful for buffering, sampling, or fanning out to multiple destinations. You can skip the collector and send directly from SQLPage to most backends. -- A **backend** stores the traces and lets you search and visualize them. +- **Promtail** scrapes Docker container logs and forwards them to Loki. +- **Tempo** stores traces, **Loki** stores logs, and **Grafana** lets you search both. ### Trace context propagation @@ -169,16 +184,20 @@ For nginx specifically, use the [`ngx_otel_module`](https://nginx.org/en/docs/ng ## Setup guides by deployment scenario -### Self-hosted with Grafana Tempo +### Self-hosted with Grafana Tempo and Loki This is what the Docker Compose example in this directory uses. -[Grafana Tempo](https://grafana.com/oss/tempo/) is a free, open-source trace backend. +[Grafana Tempo](https://grafana.com/oss/tempo/) is a free, open-source trace backend, and +[Grafana Loki](https://grafana.com/oss/loki/) is the corresponding log backend. **Components:** - [Grafana Tempo](https://grafana.com/docs/tempo/latest/) stores the traces. +- [Grafana Loki](https://grafana.com/docs/loki/latest/) stores the logs. - [Grafana](https://grafana.com/docs/grafana/latest/) provides the web UI. - An [OTel Collector](https://opentelemetry.io/docs/collector/) sits between SQLPage and Tempo (optional but recommended for production). +- [Promtail](https://grafana.com/docs/loki/latest/send-data/promtail/) scrapes container logs + and forwards them to Loki. **SQLPage environment variables:** diff --git a/examples/opentelemetry-grafana/docker-compose.yml b/examples/opentelemetry-grafana/docker-compose.yml index 914736fd..28361c6a 100644 --- a/examples/opentelemetry-grafana/docker-compose.yml +++ b/examples/opentelemetry-grafana/docker-compose.yml @@ -52,6 +52,19 @@ services: - ./tempo.yaml:/etc/tempo/config.yaml:ro command: ["-config.file=/etc/tempo/config.yaml"] + loki: + image: grafana/loki:3.4.1 + command: ["-config.file=/etc/loki/local-config.yaml"] + + promtail: + image: grafana/promtail:3.4.1 + command: ["-config.file=/etc/promtail/config.yml"] + volumes: + - ./promtail.yaml:/etc/promtail/config.yml:ro + - /var/run/docker.sock:/var/run/docker.sock:ro + depends_on: + - loki + grafana: image: grafana/grafana:latest ports: @@ -67,3 +80,4 @@ services: - ./grafana:/etc/grafana/provisioning/dashboards/sqlpage:ro depends_on: - tempo + - loki diff --git a/examples/opentelemetry-grafana/grafana/datasources.yaml b/examples/opentelemetry-grafana/grafana/datasources.yaml index 01a0b49b..f2134a1a 100644 --- a/examples/opentelemetry-grafana/grafana/datasources.yaml +++ b/examples/opentelemetry-grafana/grafana/datasources.yaml @@ -3,6 +3,18 @@ apiVersion: 1 datasources: - name: Tempo type: tempo + uid: tempo access: proxy url: http://tempo:3200 isDefault: true + - name: Loki + type: loki + uid: loki + access: proxy + url: http://loki:3100 + jsonData: + derivedFields: + - name: trace_id + matcherRegex: 'trace_id=([0-9a-f]+)' + datasourceUid: tempo + url: '$${__value.raw}' diff --git a/examples/opentelemetry-grafana/grafana/sqlpage-home.json b/examples/opentelemetry-grafana/grafana/sqlpage-home.json index 7f7cb1de..e1d366eb 100644 --- a/examples/opentelemetry-grafana/grafana/sqlpage-home.json +++ b/examples/opentelemetry-grafana/grafana/sqlpage-home.json @@ -39,7 +39,7 @@ "showLineNumbers": false, "showMiniMap": false }, - "content": "

Recent SQLPage traces

Open http://localhost and interact with the app. New requests will appear here automatically.

Click any trace ID to open the full span waterfall. Use the time picker in the top right if the table is empty.

", + "content": "

Recent SQLPage traces and logs

Open http://localhost and interact with the app. New requests will appear here automatically.

The trace table shows recent requests. The logs panel shows SQLPage container logs, including the trace ID so you can jump from a log line to the full span waterfall.

", "mode": "html" }, "pluginVersion": "12.4.0", @@ -49,7 +49,7 @@ { "datasource": { "type": "tempo", - "uid": "P214B5B846CF3925F" + "uid": "tempo" }, "fieldConfig": { "defaults": { @@ -125,7 +125,7 @@ ] }, "gridPos": { - "h": 18, + "h": 12, "w": 24, "x": 0, "y": 4 @@ -153,7 +153,7 @@ { "datasource": { "type": "tempo", - "uid": "P214B5B846CF3925F" + "uid": "tempo" }, "limit": 50, "query": "{resource.service.name != nil}", @@ -189,12 +189,52 @@ } ], "type": "table" + }, + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 3, + "options": { + "dedupStrategy": "none", + "enableInfiniteScrolling": false, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": true, + "showTime": true, + "sortOrder": "Descending", + "wrapLogMessage": true + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "direction": "backward", + "editorMode": "builder", + "expr": "{compose_service=\"sqlpage\"}", + "queryType": "range", + "refId": "A" + } + ], + "timeFrom": "1h", + "title": "SQLPage Logs", + "type": "logs" } ], "refresh": "5s", "schemaVersion": 41, "style": "dark", - "tags": ["sqlpage", "tracing"], + "tags": ["sqlpage", "tracing", "logs"], "templating": { "list": [] }, @@ -204,7 +244,7 @@ }, "timepicker": {}, "timezone": "browser", - "title": "SQLPage Tracing Home", + "title": "SQLPage Tracing and Logs Home", "uid": "sqlpage-tracing-home", - "version": 2 + "version": 3 } diff --git a/examples/opentelemetry-grafana/promtail.yaml b/examples/opentelemetry-grafana/promtail.yaml new file mode 100644 index 00000000..98ecce47 --- /dev/null +++ b/examples/opentelemetry-grafana/promtail.yaml @@ -0,0 +1,51 @@ +server: + http_listen_port: 9080 + grpc_listen_port: 0 + +positions: + filename: /tmp/positions.yaml + +clients: + - url: http://loki:3100/loki/api/v1/push + +scrape_configs: + - job_name: docker + docker_sd_configs: + - host: unix:///var/run/docker.sock + refresh_interval: 5s + relabel_configs: + - source_labels: [__meta_docker_container_label_com_docker_compose_service] + regex: .+ + action: keep + - source_labels: [__meta_docker_container_label_com_docker_compose_service] + target_label: compose_service + - source_labels: [__meta_docker_container_name] + regex: /(.+) + target_label: container + - source_labels: [__meta_docker_container_log_stream] + target_label: stream + pipeline_stages: + - docker: {} + - match: + selector: '{compose_service="sqlpage"}' + stages: + - logfmt: + mapping: + ts: + level: + target: + msg: + method: + path: + file: + client_ip: + trace_id: + - labels: + level: + target: + method: + path: + file: + - timestamp: + source: ts + format: RFC3339Nano From d492cccd4c620e1ee38cad16a0d0c01d0f04fea3 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Sun, 8 Mar 2026 22:57:57 +0100 Subject: [PATCH 12/39] add OTel spans for request parsing, rendering, sqlpage functions, and OIDC Add targeted spans to account for previously untraced time: - http.parse_request: request/form parsing before SQL execution - render: template rendering and response streaming - subprocess: sqlpage.exec() with process.command attribute - http.client: sqlpage.fetch()/fetch_with_meta() with OTel HTTP client semantic conventions (http.request.method, url.full, http.response.status_code) - sqlpage.file: sqlpage.run_sql() nested file execution - oidc.callback + http.client: OIDC token exchange Co-Authored-By: Claude Opus 4.6 --- .../database/sqlpage_functions/functions.rs | 36 +++++++++++++++++++ src/webserver/http.rs | 15 +++++--- src/webserver/oidc.rs | 7 ++++ 3 files changed, 54 insertions(+), 4 deletions(-) diff --git a/src/webserver/database/sqlpage_functions/functions.rs b/src/webserver/database/sqlpage_functions/functions.rs index a41b18a1..4a8ddaaf 100644 --- a/src/webserver/database/sqlpage_functions/functions.rs +++ b/src/webserver/database/sqlpage_functions/functions.rs @@ -136,6 +136,13 @@ async fn exec<'a>( Make sure you understand the security implications before enabling it, and never allow user input to be passed as the first argument to this function. You can enable it by setting the allow_exec option to true in the sqlpage.json configuration file.") } + let _exec_span = tracing::info_span!( + "subprocess", + otel.name = format!("EXEC {program_name}"), + process.command = %program_name, + process.args_count = args.len(), + ) + .entered(); let res = tokio::process::Command::new(&*program_name) .args(args.iter().map(|x| &**x)) .output() @@ -205,6 +212,16 @@ async fn fetch( let Some(http_request) = http_request else { return Ok(None); }; + let method = http_request.method.as_deref().unwrap_or("GET"); + let fetch_span = tracing::info_span!( + "http.client", + otel.name = format!("{method}"), + http.request.method = method, + url.full = %http_request.url, + http.response.status_code = tracing::field::Empty, + ); + let _guard = fetch_span.enter(); + let client = make_http_client(&request.app_state.config) .with_context(|| "Unable to create an HTTP client")?; let req = build_request(&client, &http_request)?; @@ -219,6 +236,8 @@ async fn fetch( .await .map_err(|e| anyhow!("Unable to fetch {}: {e}", http_request.url))?; + fetch_span.record("http.response.status_code", response.status().as_u16() as i64); + log::debug!( "Finished fetching {}. Status: {}", http_request.url, @@ -285,6 +304,16 @@ async fn fetch_with_meta( return Ok(None); }; + let method = http_request.method.as_deref().unwrap_or("GET"); + let fetch_span = tracing::info_span!( + "http.client", + otel.name = format!("{method}"), + http.request.method = method, + url.full = %http_request.url, + http.response.status_code = tracing::field::Empty, + ); + let _guard = fetch_span.enter(); + let client = make_http_client(&request.app_state.config) .with_context(|| "Unable to create an HTTP client")?; let req = build_request(&client, &http_request)?; @@ -303,6 +332,7 @@ async fn fetch_with_meta( match response_result { Ok(mut response) => { let status = response.status(); + fetch_span.record("http.response.status_code", status.as_u16() as i64); obj.serialize_entry("status", &status.as_u16())?; let mut has_error = false; if status.is_server_error() { @@ -577,6 +607,12 @@ async fn run_sql<'a>( log::debug!("run_sql: first argument is NULL, returning NULL"); return Ok(None); }; + let _run_sql_span = tracing::info_span!( + "sqlpage.file", + otel.name = format!("SQL {sql_file_path}"), + code.file.path = %sql_file_path, + ) + .entered(); let app_state = &request.app_state; let sql_file = app_state .sql_file_cache diff --git a/src/webserver/http.rs b/src/webserver/http.rs index 0d01362d..5633d577 100644 --- a/src/webserver/http.rs +++ b/src/webserver/http.rs @@ -220,9 +220,12 @@ async fn render_sql( .map(|accept| ResponseFormat::from_accept_header(&accept)) .unwrap_or_default(); - let exec_ctx = extract_request_info(srv_req, Arc::clone(&app_state), server_timing) - .await - .map_err(|e| anyhow_err_to_actix(e, &app_state))?; + let exec_ctx = { + let _parse_span = tracing::info_span!("http.parse_request").entered(); + extract_request_info(srv_req, Arc::clone(&app_state), server_timing) + .await + .map_err(|e| anyhow_err_to_actix(e, &app_state))? + }; log::debug!("Received a request with the following parameters: {exec_ctx:?}"); exec_ctx.request().server_timing.record("parse_req"); @@ -263,7 +266,11 @@ async fn render_sql( resp_send .send(http_response) .unwrap_or_else(|e| log::error!("could not send headers {e:?}")); - stream_response(database_entries_stream, renderer).await; + tracing::Instrument::instrument( + stream_response(database_entries_stream, renderer), + tracing::info_span!("render"), + ) + .await; } Ok(ResponseWithWriter::FinishedResponse { http_response }) => { resp_send diff --git a/src/webserver/oidc.rs b/src/webserver/oidc.rs index 21a40e24..7b00dba4 100644 --- a/src/webserver/oidc.rs +++ b/src/webserver/oidc.rs @@ -463,6 +463,7 @@ async fn handle_oidc_callback( oidc_state: &Arc, request: ServiceRequest, ) -> ServiceResponse { + let _span = tracing::info_span!("oidc.callback").entered(); match process_oidc_callback(oidc_state, &request).await { Ok(mut response) => { clear_redirect_count_cookie(&mut response); @@ -698,6 +699,12 @@ async fn exchange_code_for_token( http_client: &awc::Client, oidc_callback_params: OidcCallbackParams, ) -> anyhow::Result { + let _span = tracing::info_span!( + "http.client", + otel.name = "POST token_endpoint", + http.request.method = "POST", + ) + .entered(); let token_response = oidc_client .exchange_code(openidconnect::AuthorizationCode::new( oidc_callback_params.code, From cb7bdf62cb71aeca6a53825a8df378512948ab37 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Sun, 8 Mar 2026 23:05:08 +0100 Subject: [PATCH 13/39] add oidc.jwt.verify span for OIDC token verification This span covers JWT signature verification and claims validation, which runs on every authenticated request via get_token_claims(). Co-Authored-By: Claude Opus 4.6 --- src/webserver/oidc.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/webserver/oidc.rs b/src/webserver/oidc.rs index 7b00dba4..6da99e6c 100644 --- a/src/webserver/oidc.rs +++ b/src/webserver/oidc.rs @@ -265,6 +265,7 @@ impl OidcState { id_token: OidcToken, expected_nonce: &Nonce, ) -> anyhow::Result { + let _span = tracing::info_span!("oidc.jwt.verify").entered(); let snapshot = self.snapshot(); let verifier = self.config.create_id_token_verifier(&snapshot.client); let nonce_verifier = |nonce: Option<&Nonce>| check_nonce(nonce, expected_nonce); From 0eeb5c3507a21104db9bc3949aaac35d4693d876 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Sun, 8 Mar 2026 23:06:16 +0100 Subject: [PATCH 14/39] add enduser.id attribute to oidc.jwt.verify span Records the OIDC subject claim (sub) as enduser.id after successful JWT verification, following OTel semantic conventions. Co-Authored-By: Claude Opus 4.6 --- src/webserver/oidc.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/webserver/oidc.rs b/src/webserver/oidc.rs index 6da99e6c..4b431f95 100644 --- a/src/webserver/oidc.rs +++ b/src/webserver/oidc.rs @@ -265,13 +265,18 @@ impl OidcState { id_token: OidcToken, expected_nonce: &Nonce, ) -> anyhow::Result { - let _span = tracing::info_span!("oidc.jwt.verify").entered(); + let span = tracing::info_span!( + "oidc.jwt.verify", + enduser.id = tracing::field::Empty, + ); + let _guard = span.enter(); let snapshot = self.snapshot(); let verifier = self.config.create_id_token_verifier(&snapshot.client); let nonce_verifier = |nonce: Option<&Nonce>| check_nonce(nonce, expected_nonce); let claims: OidcClaims = id_token .into_claims(&verifier, nonce_verifier) .map_err(|e| anyhow::anyhow!("Could not verify the ID token: {e}"))?; + span.record("enduser.id", claims.subject().as_str()); Ok(claims) } From fab9350af3827dfdd126a0a668ab39f5e8400659 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Sun, 8 Mar 2026 23:09:11 +0100 Subject: [PATCH 15/39] add OTel user.* attributes to oidc.jwt.verify span Record user.id (sub), user.name (preferred_username), user.full_name (name), and user.email from OIDC claims after JWT verification. Co-Authored-By: Claude Opus 4.6 --- src/webserver/oidc.rs | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/webserver/oidc.rs b/src/webserver/oidc.rs index 4b431f95..123603c5 100644 --- a/src/webserver/oidc.rs +++ b/src/webserver/oidc.rs @@ -268,6 +268,10 @@ impl OidcState { let span = tracing::info_span!( "oidc.jwt.verify", enduser.id = tracing::field::Empty, + user.id = tracing::field::Empty, + user.name = tracing::field::Empty, + user.full_name = tracing::field::Empty, + user.email = tracing::field::Empty, ); let _guard = span.enter(); let snapshot = self.snapshot(); @@ -276,7 +280,18 @@ impl OidcState { let claims: OidcClaims = id_token .into_claims(&verifier, nonce_verifier) .map_err(|e| anyhow::anyhow!("Could not verify the ID token: {e}"))?; - span.record("enduser.id", claims.subject().as_str()); + let sub = claims.subject().as_str(); + span.record("enduser.id", sub); + span.record("user.id", sub); + if let Some(name) = claims.preferred_username() { + span.record("user.name", name.as_str()); + } + if let Some(name) = claims.name().and_then(|n| n.get(None)) { + span.record("user.full_name", name.as_str()); + } + if let Some(email) = claims.email() { + span.record("user.email", email.as_str()); + } Ok(claims) } From ff8800ef793fce1d154f1a2cb2d413265ac8e247 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Sun, 8 Mar 2026 23:20:27 +0100 Subject: [PATCH 16/39] add sqlpage.file.load span and attributes to http.parse_request The gap before http.parse_request was the SQL file cache lookup - now covered by the sqlpage.file.load span with code.file.path. http.parse_request now records http.request.method and content_type, which helps identify slow multipart/form-data parsing. Co-Authored-By: Claude Opus 4.6 --- src/webserver/http.rs | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/src/webserver/http.rs b/src/webserver/http.rs index 5633d577..4dc5b49f 100644 --- a/src/webserver/http.rs +++ b/src/webserver/http.rs @@ -221,7 +221,17 @@ async fn render_sql( .unwrap_or_default(); let exec_ctx = { - let _parse_span = tracing::info_span!("http.parse_request").entered(); + let content_type = srv_req + .headers() + .get("content-type") + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + let _parse_span = tracing::info_span!( + "http.parse_request", + http.request.method = %srv_req.method(), + http.request.header.content_type = content_type, + ) + .entered(); extract_request_info(srv_req, Arc::clone(&app_state), server_timing) .await .map_err(|e| anyhow_err_to_actix(e, &app_state))? @@ -356,12 +366,19 @@ async fn process_sql_request( let app_state: &web::Data = req.app_data().expect("app_state"); let server_timing = ServerTiming::for_env(app_state.config.environment); - let sql_file = app_state - .sql_file_cache - .get_with_privilege(app_state, &sql_path, false) - .await - .with_context(|| format!("Unable to read SQL file \"{}\"", sql_path.display())) - .map_err(|e| anyhow_err_to_actix(e, app_state))?; + let sql_file = { + let _span = tracing::info_span!( + "sqlpage.file.load", + code.file.path = %sql_path.display(), + ) + .entered(); + app_state + .sql_file_cache + .get_with_privilege(app_state, &sql_path, false) + .await + .with_context(|| format!("Unable to read SQL file \"{}\"", sql_path.display())) + .map_err(|e| anyhow_err_to_actix(e, app_state))? + }; server_timing.record("sql_file"); render_sql(req, sql_file, server_timing).await From f8fdc242bba7f0eb78297ecc5516f0e891820494 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Sun, 8 Mar 2026 23:23:02 +0100 Subject: [PATCH 17/39] Fix clippy pedantic warnings --- src/telemetry.rs | 16 +++++++--------- src/webserver/database/execute_queries.rs | 8 ++++++-- src/webserver/database/mod.rs | 2 +- .../database/sqlpage_functions/functions.rs | 7 +++++-- 4 files changed, 19 insertions(+), 14 deletions(-) diff --git a/src/telemetry.rs b/src/telemetry.rs index e206e3bd..8209dcdb 100644 --- a/src/telemetry.rs +++ b/src/telemetry.rs @@ -10,12 +10,11 @@ use opentelemetry_sdk::trace::SdkTracerProvider; static TRACER_PROVIDER: OnceLock = OnceLock::new(); -/// Initializes logging / tracing. Returns `true` if OTel was activated. +/// Initializes logging / tracing. Returns `true` if `OTel` was activated. +#[must_use] pub fn init_telemetry() -> bool { let otel_endpoint = env::var("OTEL_EXPORTER_OTLP_ENDPOINT").ok(); - let otel_active = otel_endpoint - .as_deref() - .is_some_and(|v| !v.is_empty()); + let otel_active = otel_endpoint.as_deref().is_some_and(|v| !v.is_empty()); if otel_active { init_otel_tracing(); @@ -26,7 +25,7 @@ pub fn init_telemetry() -> bool { otel_active } -/// Shuts down the OTel tracer provider, flushing pending spans. +/// Shuts down the `OTel` tracer provider, flushing pending spans. pub fn shutdown_telemetry() { if let Some(provider) = TRACER_PROVIDER.get() { if let Err(e) = provider.shutdown() { @@ -107,7 +106,7 @@ mod logfmt { #[derive(Default)] struct SpanFields(HashMap<&'static str, String>); - /// Visitor that collects fields into a HashMap. + /// Visitor that collects fields into a `HashMap`. struct FieldCollector<'a>(&'a mut HashMap<&'static str, String>); impl Visit for FieldCollector<'_> { @@ -129,7 +128,7 @@ mod logfmt { } /// Fields we pick from spans, in display order. - /// (span_field_name, logfmt_key) + /// (`span_field_name`, `logfmt_key`) const SPAN_FIELDS: &[(&str, &str)] = &[ ("http.method", "method"), ("http.target", "path"), @@ -187,8 +186,7 @@ mod logfmt { let target = event_fields .get("log.target") - .map(String::as_str) - .unwrap_or_else(|| event.metadata().target()); + .map_or_else(|| event.metadata().target(), String::as_str); let _ = write!(buf, " target={target}"); // 4. msg diff --git a/src/webserver/database/execute_queries.rs b/src/webserver/database/execute_queries.rs index b120f629..2baed05d 100644 --- a/src/webserver/database/execute_queries.rs +++ b/src/webserver/database/execute_queries.rs @@ -29,6 +29,10 @@ use sqlx::{ pub type DbConn = Option>; +fn source_line_number(line: usize) -> i64 { + i64::try_from(line).unwrap_or(i64::MAX) +} + impl Database { pub(crate) async fn prepare_with( &self, @@ -69,7 +73,7 @@ pub fn stream_query_results_with_conn<'a>( db.query.text = query.sql, db.system.name = request.app_state.db.info.database_type.otel_name(), code.file.path = %source_file.display(), - code.line.number = stmt.query_position.start.line as i64, + code.line.number = source_line_number(stmt.query_position.start.line), ) .entered(); let mut stream = connection.fetch_many(query); @@ -230,7 +234,7 @@ async fn execute_set_variable_query<'a>( db.query.text = query.sql, db.system.name = request.app_state.db.info.database_type.otel_name(), code.file.path = %source_file.display(), - code.line.number = statement.query_position.start.line as i64, + code.line.number = source_line_number(statement.query_position.start.line), ) .entered(); let value = match connection.fetch_optional(query).await { diff --git a/src/webserver/database/mod.rs b/src/webserver/database/mod.rs index 00e0fff2..950096db 100644 --- a/src/webserver/database/mod.rs +++ b/src/webserver/database/mod.rs @@ -59,7 +59,7 @@ impl SupportedDatabase { } } - /// Returns the OTel `db.system.name` well-known value. + /// Returns the `OTel` `db.system.name` well-known value. /// See #[must_use] pub fn otel_name(self) -> &'static str { diff --git a/src/webserver/database/sqlpage_functions/functions.rs b/src/webserver/database/sqlpage_functions/functions.rs index 4a8ddaaf..899eb22c 100644 --- a/src/webserver/database/sqlpage_functions/functions.rs +++ b/src/webserver/database/sqlpage_functions/functions.rs @@ -236,7 +236,10 @@ async fn fetch( .await .map_err(|e| anyhow!("Unable to fetch {}: {e}", http_request.url))?; - fetch_span.record("http.response.status_code", response.status().as_u16() as i64); + fetch_span.record( + "http.response.status_code", + i64::from(response.status().as_u16()), + ); log::debug!( "Finished fetching {}. Status: {}", @@ -332,7 +335,7 @@ async fn fetch_with_meta( match response_result { Ok(mut response) => { let status = response.status(); - fetch_span.record("http.response.status_code", status.as_u16() as i64); + fetch_span.record("http.response.status_code", i64::from(status.as_u16())); obj.serialize_entry("status", &status.as_u16())?; let mut has_error = false; if status.is_server_error() { From 3b0c37c23e3b67302fd6aa5c3ee565eb562bc48a Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Sun, 8 Mar 2026 23:45:54 +0100 Subject: [PATCH 18/39] unify log format: logfmt with colors, no OTel noise MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the custom logfmt layer for both OTel and non-OTel modes instead of falling back to env_logger. This eliminates the tracing→log bridge dumping all span fields (user agents, otel.kind, request_id, etc.) and only shows: ts, level, target, msg, method, path, status, file, client_ip, and trace_id (when valid). Adds terminal color support (bold red for errors, green for info, dim for timestamps/targets). Emits one log line per completed successful request. Errors are logged once by the error handler. Suppresses trace_id=000...0 when no real trace context exists. Co-Authored-By: Claude Opus 4.6 --- src/telemetry.rs | 117 ++++++++++++++++++++++++++++++++---------- src/webserver/http.rs | 11 ++++ 2 files changed, 100 insertions(+), 28 deletions(-) diff --git a/src/telemetry.rs b/src/telemetry.rs index 8209dcdb..5aa10e86 100644 --- a/src/telemetry.rs +++ b/src/telemetry.rs @@ -1,7 +1,10 @@ //! OpenTelemetry initialization and shutdown. //! //! When `OTEL_EXPORTER_OTLP_ENDPOINT` is set, sets up a full tracing pipeline -//! with OTLP export and logfmt log output. Otherwise, falls back to `env_logger`. +//! with OTLP export. Otherwise, sets up tracing with logfmt output only. +//! +//! In both cases, the same logfmt log format is used, with carefully chosen +//! fields for human readability and machine parseability. use std::env; use std::sync::OnceLock; @@ -10,22 +13,23 @@ use opentelemetry_sdk::trace::SdkTracerProvider; static TRACER_PROVIDER: OnceLock = OnceLock::new(); -/// Initializes logging / tracing. Returns `true` if `OTel` was activated. -#[must_use] +/// Initializes logging / tracing. Returns `true` if OTel was activated. pub fn init_telemetry() -> bool { let otel_endpoint = env::var("OTEL_EXPORTER_OTLP_ENDPOINT").ok(); - let otel_active = otel_endpoint.as_deref().is_some_and(|v| !v.is_empty()); + let otel_active = otel_endpoint + .as_deref() + .is_some_and(|v| !v.is_empty()); if otel_active { init_otel_tracing(); } else { - init_env_logger(); + init_tracing(); } otel_active } -/// Shuts down the `OTel` tracer provider, flushing pending spans. +/// Shuts down the OTel tracer provider, flushing pending spans. pub fn shutdown_telemetry() { if let Some(provider) = TRACER_PROVIDER.get() { if let Err(e) = provider.shutdown() { @@ -34,12 +38,21 @@ pub fn shutdown_telemetry() { } } -fn init_env_logger() { - let env = - env_logger::Env::new().default_filter_or("sqlpage=info,actix_web::middleware::logger=info"); - let mut logging = env_logger::Builder::from_env(env); - logging.format_timestamp_millis(); - logging.init(); +/// Tracing subscriber without OTel export — logfmt output only. +fn init_tracing() { + use tracing_subscriber::layer::SubscriberExt; + + let filter = tracing_subscriber::EnvFilter::try_from_default_env().unwrap_or_else(|_| { + "sqlpage=info,actix_web=info,tracing_actix_web=info".into() + }); + + let subscriber = tracing_subscriber::registry() + .with(filter) + .with(logfmt::LogfmtLayer::new()); + + tracing::subscriber::set_global_default(subscriber) + .expect("Failed to set global tracing subscriber"); + tracing_log::LogTracer::init().expect("Failed to set log→tracing bridge"); } fn init_otel_tracing() { @@ -72,12 +85,9 @@ fn init_otel_tracing() { let filter = tracing_subscriber::EnvFilter::try_from_default_env() .unwrap_or_else(|_| "sqlpage=info,actix_web=info,tracing_actix_web=info".into()); - // Build the subscriber and set it as global default. - // We use tracing_log::LogTracer to bridge log::* → tracing, - // and set_global_default (not .init()) to avoid double-setting the log logger. let subscriber = tracing_subscriber::registry() .with(filter) - .with(logfmt::LogfmtLayer) + .with(logfmt::LogfmtLayer::new()) .with(otel_layer); tracing::subscriber::set_global_default(subscriber) @@ -91,10 +101,12 @@ fn init_otel_tracing() { /// ```text /// ts=2026-03-08T20:56:15Z level=error target=sqlpage::webserver::error msg="..." method=GET path=/foo client_ip=1.2.3.4 trace_id=abc123 /// ``` +/// +/// With terminal colors when stderr is a TTY. mod logfmt { use std::collections::HashMap; use std::fmt::Write; - use std::io; + use std::io::{self, IsTerminal}; use tracing::field::{Field, Visit}; use tracing::Subscriber; @@ -106,7 +118,7 @@ mod logfmt { #[derive(Default)] struct SpanFields(HashMap<&'static str, String>); - /// Visitor that collects fields into a `HashMap`. + /// Visitor that collects fields into a HashMap. struct FieldCollector<'a>(&'a mut HashMap<&'static str, String>); impl Visit for FieldCollector<'_> { @@ -128,15 +140,38 @@ mod logfmt { } /// Fields we pick from spans, in display order. - /// (`span_field_name`, `logfmt_key`) + /// (span_field_name, logfmt_key) const SPAN_FIELDS: &[(&str, &str)] = &[ ("http.method", "method"), ("http.target", "path"), + ("http.status_code", "status"), ("sqlpage.file", "file"), ("http.client_ip", "client_ip"), ]; - pub(super) struct LogfmtLayer; + /// All-zeros trace ID means no real trace context. + const INVALID_TRACE_ID: &str = "00000000000000000000000000000000"; + + // ANSI color codes + const RED: &str = "\x1b[31m"; + const YELLOW: &str = "\x1b[33m"; + const GREEN: &str = "\x1b[32m"; + const BLUE: &str = "\x1b[34m"; + const DIM: &str = "\x1b[2m"; + const BOLD: &str = "\x1b[1m"; + const RESET: &str = "\x1b[0m"; + + pub(super) struct LogfmtLayer { + use_colors: bool, + } + + impl LogfmtLayer { + pub fn new() -> Self { + Self { + use_colors: io::stderr().is_terminal(), + } + } + } impl Layer for LogfmtLayer where @@ -171,14 +206,30 @@ mod logfmt { fn on_event(&self, event: &tracing::Event<'_>, ctx: Context<'_, S>) { let mut buf = String::with_capacity(256); + let colors = self.use_colors; // 1. ts let now = chrono::Utc::now().format("%Y-%m-%dT%H:%M:%S%.3fZ"); - let _ = write!(buf, "ts={now}"); + if colors { + let _ = write!(buf, "{DIM}ts={now}{RESET}"); + } else { + let _ = write!(buf, "ts={now}"); + } - // 2. level + // 2. level (with color) let level = event.metadata().level(); - let _ = write!(buf, " level={level}"); + if colors { + let (color, label) = match *level { + tracing::Level::ERROR => (RED, "error"), + tracing::Level::WARN => (YELLOW, "warn"), + tracing::Level::INFO => (GREEN, "info"), + tracing::Level::DEBUG => (BLUE, "debug"), + tracing::Level::TRACE => (DIM, "trace"), + }; + let _ = write!(buf, " {BOLD}{color}level={label}{RESET}"); + } else { + let _ = write!(buf, " level={}", level.as_str().to_ascii_lowercase()); + } // 3. target — for bridged log events, use log.target; otherwise metadata target let mut event_fields = HashMap::new(); @@ -187,7 +238,11 @@ mod logfmt { let target = event_fields .get("log.target") .map_or_else(|| event.metadata().target(), String::as_str); - let _ = write!(buf, " target={target}"); + if colors { + let _ = write!(buf, " {DIM}target={target}{RESET}"); + } else { + let _ = write!(buf, " target={target}"); + } // 4. msg if let Some(msg) = event_fields.get("message") { @@ -212,14 +267,21 @@ mod logfmt { } } - // 6. trace_id from OpenTelemetry span context + // 6. trace_id from OpenTelemetry span context (only if valid) if let Some(scope) = ctx.event_scope(event) { 'outer: for span in scope { let ext = span.extensions(); if let Some(otel_data) = ext.get::() { if let Some(trace_id) = otel_data.trace_id() { - let _ = write!(buf, " trace_id={trace_id}"); - break 'outer; + let id = trace_id.to_string(); + if id != INVALID_TRACE_ID { + if colors { + let _ = write!(buf, " {DIM}trace_id={id}{RESET}"); + } else { + let _ = write!(buf, " trace_id={id}"); + } + break 'outer; + } } } } @@ -234,7 +296,6 @@ mod logfmt { /// Write a logfmt key=value pair, quoting the value if it contains spaces or special chars. fn write_logfmt_value(buf: &mut String, key: &str, value: &str) { - // Replace newlines with spaces for single-line output let needs_quoting = value.contains(' ') || value.contains('"') || value.contains('=') diff --git a/src/webserver/http.rs b/src/webserver/http.rs index 4dc5b49f..4ab95883 100644 --- a/src/webserver/http.rs +++ b/src/webserver/http.rs @@ -355,7 +355,18 @@ impl RootSpanBuilder for SqlPageRootSpanBuilder { } fn on_request_end(span: Span, outcome: &Result, Error>) { + let span_ref = span.clone(); DefaultRootSpanBuilder::on_request_end(span, outcome); + + // Emit a single log event per completed request so it appears in logs. + // Error responses (4xx/5xx) are already logged by our error handlers. + let _enter = span_ref.enter(); + if let Ok(response) = outcome { + let status = response.response().status(); + if status.is_success() || status.is_redirection() { + log::info!("{}", status.canonical_reason().unwrap_or("ok")); + } + } } } From 130972590f2f88c70ae7e4a39bf4327b98b2fd98 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Sun, 8 Mar 2026 23:52:16 +0100 Subject: [PATCH 19/39] use .instrument() instead of .entered() for async spans Span guards from .entered() do not propagate correctly across await points. Switch to tracing::Instrument to ensure spans are properly associated with their async tasks throughout their lifetime. Co-Authored-By: Claude Opus 4.6 --- src/webserver/database/execute_queries.rs | 21 +- .../database/sqlpage_functions/functions.rs | 230 +++++++++--------- src/webserver/http.rs | 14 +- src/webserver/oidc.rs | 11 +- 4 files changed, 142 insertions(+), 134 deletions(-) diff --git a/src/webserver/database/execute_queries.rs b/src/webserver/database/execute_queries.rs index 2baed05d..57d5bef4 100644 --- a/src/webserver/database/execute_queries.rs +++ b/src/webserver/database/execute_queries.rs @@ -1,6 +1,7 @@ use anyhow::{anyhow, Context}; use futures_util::stream::Stream; use futures_util::StreamExt; +use tracing::Instrument; use serde_json::Value; use std::borrow::Cow; use std::path::Path; @@ -68,24 +69,23 @@ pub fn stream_query_results_with_conn<'a>( request.server_timing.record("bind_params"); let connection = take_connection(&request.app_state.db, db_connection, request).await?; log::trace!("Executing query {:?}", query.sql); - let _query_span = tracing::info_span!( + let query_span = tracing::info_span!( "db.query", db.query.text = query.sql, db.system.name = request.app_state.db.info.database_type.otel_name(), code.file.path = %source_file.display(), code.line.number = source_line_number(stmt.query_position.start.line), - ) - .entered(); + ); let mut stream = connection.fetch_many(query); let mut error = None; - while let Some(elem) = stream.next().await { + while let Some(elem) = stream.next().instrument(query_span.clone()).await { let mut query_result = parse_single_sql_result(source_file, stmt, elem); if let DbItem::Error(e) = query_result { error = Some(e); break; } apply_json_columns(&mut query_result, &stmt.json_columns); - apply_delayed_functions(request, &stmt.delayed_functions, &mut query_result).await?; + apply_delayed_functions(request, &stmt.delayed_functions, &mut query_result).instrument(query_span.clone()).await?; for db_item in parse_dynamic_rows(query_result) { yield db_item; } @@ -229,15 +229,14 @@ async fn execute_set_variable_query<'a>( query.sql ); - let _query_span = tracing::info_span!( + let query_span = tracing::info_span!( "db.query", db.query.text = query.sql, db.system.name = request.app_state.db.info.database_type.otel_name(), code.file.path = %source_file.display(), code.line.number = source_line_number(statement.query_position.start.line), - ) - .entered(); - let value = match connection.fetch_optional(query).await { + ); + let value = match connection.fetch_optional(query).instrument(query_span).await { Ok(Some(row)) => row_to_string(&row), Ok(None) => None, Err(e) => { @@ -309,8 +308,8 @@ async fn take_connection<'a>( return Ok(c); } let pool_size = db.connection.size(); - let _acquire_span = tracing::info_span!("db.pool.acquire", db.pool.size = pool_size,).entered(); - match db.connection.acquire().await { + let acquire_span = tracing::info_span!("db.pool.acquire", db.pool.size = pool_size,); + match db.connection.acquire().instrument(acquire_span).await { Ok(c) => { log::debug!("Acquired a database connection"); request.server_timing.record("db_conn"); diff --git a/src/webserver/database/sqlpage_functions/functions.rs b/src/webserver/database/sqlpage_functions/functions.rs index 899eb22c..dbe821b5 100644 --- a/src/webserver/database/sqlpage_functions/functions.rs +++ b/src/webserver/database/sqlpage_functions/functions.rs @@ -12,6 +12,7 @@ use crate::webserver::{ }; use anyhow::{anyhow, Context}; use futures_util::StreamExt; +use tracing::Instrument; use mime_guess::mime; use std::fmt::Write; use std::{borrow::Cow, ffi::OsStr, str::FromStr}; @@ -136,16 +137,16 @@ async fn exec<'a>( Make sure you understand the security implications before enabling it, and never allow user input to be passed as the first argument to this function. You can enable it by setting the allow_exec option to true in the sqlpage.json configuration file.") } - let _exec_span = tracing::info_span!( + let exec_span = tracing::info_span!( "subprocess", otel.name = format!("EXEC {program_name}"), process.command = %program_name, process.args_count = args.len(), - ) - .entered(); + ); let res = tokio::process::Command::new(&*program_name) .args(args.iter().map(|x| &**x)) .output() + .instrument(exec_span) .await .with_context(|| { let mut s = format!("Unable to execute command: {program_name}"); @@ -220,46 +221,49 @@ async fn fetch( url.full = %http_request.url, http.response.status_code = tracing::field::Empty, ); - let _guard = fetch_span.enter(); - let client = make_http_client(&request.app_state.config) - .with_context(|| "Unable to create an HTTP client")?; - let req = build_request(&client, &http_request)?; + async { + let client = make_http_client(&request.app_state.config) + .with_context(|| "Unable to create an HTTP client")?; + let req = build_request(&client, &http_request)?; - log::info!("Fetching {}", http_request.url); - let mut response = if let Some(body) = &http_request.body { - let (body, req) = prepare_request_body(body, req)?; - req.send_body(body) - } else { - req.send() - } - .await - .map_err(|e| anyhow!("Unable to fetch {}: {e}", http_request.url))?; + log::info!("Fetching {}", http_request.url); + let mut response = if let Some(body) = &http_request.body { + let (body, req) = prepare_request_body(body, req)?; + req.send_body(body) + } else { + req.send() + } + .await + .map_err(|e| anyhow!("Unable to fetch {}: {e}", http_request.url))?; - fetch_span.record( - "http.response.status_code", - i64::from(response.status().as_u16()), - ); + tracing::Span::current().record( + "http.response.status_code", + i64::from(response.status().as_u16()), + ); - log::debug!( - "Finished fetching {}. Status: {}", - http_request.url, - response.status() - ); + log::debug!( + "Finished fetching {}. Status: {}", + http_request.url, + response.status() + ); - let body = response - .body() - .await - .with_context(|| { - format!( - "Unable to read the body of the response from {}", - http_request.url - ) - })? - .to_vec(); - let response_str = decode_response(body, http_request.response_encoding.as_deref())?; - log::debug!("Fetch response: {response_str}"); - Ok(Some(response_str)) + let body = response + .body() + .await + .with_context(|| { + format!( + "Unable to read the body of the response from {}", + http_request.url + ) + })? + .to_vec(); + let response_str = decode_response(body, http_request.response_encoding.as_deref())?; + log::debug!("Fetch response: {response_str}"); + Ok(Some(response_str)) + } + .instrument(fetch_span) + .await } fn decode_response(response: Vec, encoding: Option<&str>) -> anyhow::Result { @@ -315,84 +319,88 @@ async fn fetch_with_meta( url.full = %http_request.url, http.response.status_code = tracing::field::Empty, ); - let _guard = fetch_span.enter(); - let client = make_http_client(&request.app_state.config) - .with_context(|| "Unable to create an HTTP client")?; - let req = build_request(&client, &http_request)?; + async { + let client = make_http_client(&request.app_state.config) + .with_context(|| "Unable to create an HTTP client")?; + let req = build_request(&client, &http_request)?; - log::info!("Fetching {} with metadata", http_request.url); - let response_result = if let Some(body) = &http_request.body { - let (body, req) = prepare_request_body(body, req)?; - req.send_body(body).await - } else { - req.send().await - }; - - let mut resp_str = Vec::new(); - let mut encoder = serde_json::Serializer::new(&mut resp_str); - let mut obj = encoder.serialize_map(Some(3))?; - match response_result { - Ok(mut response) => { - let status = response.status(); - fetch_span.record("http.response.status_code", i64::from(status.as_u16())); - obj.serialize_entry("status", &status.as_u16())?; - let mut has_error = false; - if status.is_server_error() { - has_error = true; - obj.serialize_entry("error", &format!("Server error: {status}"))?; - } + log::info!("Fetching {} with metadata", http_request.url); + let response_result = if let Some(body) = &http_request.body { + let (body, req) = prepare_request_body(body, req)?; + req.send_body(body).await + } else { + req.send().await + }; + + let mut resp_str = Vec::new(); + let mut encoder = serde_json::Serializer::new(&mut resp_str); + let mut obj = encoder.serialize_map(Some(3))?; + match response_result { + Ok(mut response) => { + let status = response.status(); + tracing::Span::current() + .record("http.response.status_code", i64::from(status.as_u16())); + obj.serialize_entry("status", &status.as_u16())?; + let mut has_error = false; + if status.is_server_error() { + has_error = true; + obj.serialize_entry("error", &format!("Server error: {status}"))?; + } - let headers = response.headers(); - - let is_json = headers - .get("content-type") - .and_then(|v| v.to_str().ok()) - .unwrap_or_default() - .starts_with("application/json"); - - obj.serialize_entry( - "headers", - &headers - .iter() - .map(|(k, v)| (k.to_string(), v.to_str().unwrap_or_default())) - .collect::>(), - )?; - - match response.body().await { - Ok(body) => { - let body_bytes = body.to_vec(); - let body_str = - decode_response(body_bytes, http_request.response_encoding.as_deref())?; - if is_json { - obj.serialize_entry( - "json_body", - &serde_json::value::RawValue::from_string(body_str)?, - )?; - } else { - obj.serialize_entry("body", &body_str)?; + let headers = response.headers(); + + let is_json = headers + .get("content-type") + .and_then(|v| v.to_str().ok()) + .unwrap_or_default() + .starts_with("application/json"); + + obj.serialize_entry( + "headers", + &headers + .iter() + .map(|(k, v)| (k.to_string(), v.to_str().unwrap_or_default())) + .collect::>(), + )?; + + match response.body().await { + Ok(body) => { + let body_bytes = body.to_vec(); + let body_str = + decode_response(body_bytes, http_request.response_encoding.as_deref())?; + if is_json { + obj.serialize_entry( + "json_body", + &serde_json::value::RawValue::from_string(body_str)?, + )?; + } else { + obj.serialize_entry("body", &body_str)?; + } } - } - Err(e) => { - log::warn!("Failed to read response body: {e}"); - if !has_error { - obj.serialize_entry( - "error", - &format!("Failed to read response body: {e}"), - )?; + Err(e) => { + log::warn!("Failed to read response body: {e}"); + if !has_error { + obj.serialize_entry( + "error", + &format!("Failed to read response body: {e}"), + )?; + } } } } + Err(e) => { + log::warn!("Request failed: {e}"); + obj.serialize_entry("error", &format!("Request failed: {e}"))?; + } } - Err(e) => { - log::warn!("Request failed: {e}"); - obj.serialize_entry("error", &format!("Request failed: {e}"))?; - } - } - obj.end()?; - let return_value = String::from_utf8(resp_str)?; - Ok(Some(return_value)) + obj.end()?; + let return_value = String::from_utf8(resp_str)?; + Ok(Some(return_value)) + } + .instrument(fetch_span) + .await } pub(crate) async fn hash_password(password: Option) -> anyhow::Result> { @@ -610,12 +618,11 @@ async fn run_sql<'a>( log::debug!("run_sql: first argument is NULL, returning NULL"); return Ok(None); }; - let _run_sql_span = tracing::info_span!( + let run_sql_span = tracing::info_span!( "sqlpage.file", otel.name = format!("SQL {sql_file_path}"), code.file.path = %sql_file_path, - ) - .entered(); + ); let app_state = &request.app_state; let sql_file = app_state .sql_file_cache @@ -624,6 +631,7 @@ async fn run_sql<'a>( std::path::Path::new(sql_file_path.as_ref()), true, ) + .instrument(run_sql_span.clone()) .await .with_context(|| format!("run_sql: invalid path {sql_file_path:?}"))?; let tmp_req = if let Some(variables) = variables { @@ -652,7 +660,7 @@ async fn run_sql<'a>( let mut json_results_bytes = Vec::new(); let mut json_encoder = serde_json::Serializer::new(&mut json_results_bytes); let mut seq = json_encoder.serialize_seq(None)?; - while let Some(db_item) = results_stream.next().await { + while let Some(db_item) = results_stream.next().instrument(run_sql_span.clone()).await { use crate::webserver::database::DbItem::{Error, FinishedQuery, Row}; match db_item { Row(row) => { diff --git a/src/webserver/http.rs b/src/webserver/http.rs index 4ab95883..99fc2f79 100644 --- a/src/webserver/http.rs +++ b/src/webserver/http.rs @@ -17,7 +17,7 @@ use actix_web::http::header::{ContentType, Header, HttpDate, IfModifiedSince, La use actix_web::http::{header, StatusCode}; use actix_web::web::PayloadConfig; use actix_web::{dev::ServiceResponse, middleware, web, App, Error, HttpResponse, HttpServer}; -use tracing::Span; +use tracing::{Instrument, Span}; use tracing_actix_web::{DefaultRootSpanBuilder, RootSpanBuilder, TracingLogger}; use super::error::{anyhow_err_to_actix, bind_error, send_anyhow_error}; @@ -226,13 +226,13 @@ async fn render_sql( .get("content-type") .and_then(|v| v.to_str().ok()) .unwrap_or(""); - let _parse_span = tracing::info_span!( + let parse_span = tracing::info_span!( "http.parse_request", http.request.method = %srv_req.method(), http.request.header.content_type = content_type, - ) - .entered(); + ); extract_request_info(srv_req, Arc::clone(&app_state), server_timing) + .instrument(parse_span) .await .map_err(|e| anyhow_err_to_actix(e, &app_state))? }; @@ -378,14 +378,14 @@ async fn process_sql_request( let server_timing = ServerTiming::for_env(app_state.config.environment); let sql_file = { - let _span = tracing::info_span!( + let span = tracing::info_span!( "sqlpage.file.load", code.file.path = %sql_path.display(), - ) - .entered(); + ); app_state .sql_file_cache .get_with_privilege(app_state, &sql_path, false) + .instrument(span) .await .with_context(|| format!("Unable to read SQL file \"{}\"", sql_path.display())) .map_err(|e| anyhow_err_to_actix(e, app_state))? diff --git a/src/webserver/oidc.rs b/src/webserver/oidc.rs index 123603c5..6dca9b79 100644 --- a/src/webserver/oidc.rs +++ b/src/webserver/oidc.rs @@ -17,6 +17,7 @@ use actix_web::{ Error, HttpMessage, HttpResponse, }; use anyhow::{anyhow, Context}; +use tracing::Instrument; use awc::Client; use openidconnect::core::{ CoreAuthDisplay, CoreAuthPrompt, CoreErrorResponseType, CoreGenderClaim, CoreJsonWebKey, @@ -484,8 +485,8 @@ async fn handle_oidc_callback( oidc_state: &Arc, request: ServiceRequest, ) -> ServiceResponse { - let _span = tracing::info_span!("oidc.callback").entered(); - match process_oidc_callback(oidc_state, &request).await { + let span = tracing::info_span!("oidc.callback"); + match process_oidc_callback(oidc_state, &request).instrument(span).await { Ok(mut response) => { clear_redirect_count_cookie(&mut response); request.into_response(response) @@ -720,17 +721,17 @@ async fn exchange_code_for_token( http_client: &awc::Client, oidc_callback_params: OidcCallbackParams, ) -> anyhow::Result { - let _span = tracing::info_span!( + let span = tracing::info_span!( "http.client", otel.name = "POST token_endpoint", http.request.method = "POST", - ) - .entered(); + ); let token_response = oidc_client .exchange_code(openidconnect::AuthorizationCode::new( oidc_callback_params.code, ))? .request_async(&AwcHttpClient::from_client(http_client)) + .instrument(span) .await .context("Failed to exchange code for token")?; let access_token = token_response.access_token(); From f884eabefea636a6feaf326593104049eafab56c Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Sun, 8 Mar 2026 23:55:23 +0100 Subject: [PATCH 20/39] preserve multi-line error formatting in terminal log output When stderr is a terminal and the log message contains newlines (e.g. SQL syntax errors with source highlighting and arrows), print the metadata on the first line and the message below with its original formatting. Machine output (non-terminal) remains single-line logfmt. Co-Authored-By: Claude Opus 4.6 --- src/telemetry.rs | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/telemetry.rs b/src/telemetry.rs index 5aa10e86..97295614 100644 --- a/src/telemetry.rs +++ b/src/telemetry.rs @@ -244,9 +244,14 @@ mod logfmt { let _ = write!(buf, " target={target}"); } - // 4. msg - if let Some(msg) = event_fields.get("message") { - write_logfmt_value(&mut buf, "msg", msg); + // 4. msg — for terminal, preserve multi-line formatting (e.g. SQL + // error highlighting with arrows); for machine output, flatten. + let msg = event_fields.get("message"); + let multiline_msg = colors && msg.is_some_and(|m| m.contains('\n')); + if !multiline_msg { + if let Some(msg) = msg { + write_logfmt_value(&mut buf, "msg", msg); + } } // 5. Selected span fields in order @@ -289,6 +294,15 @@ mod logfmt { buf.push('\n'); + // For multi-line messages on a terminal, print the message below + // the metadata line with its original formatting preserved. + if multiline_msg { + if let Some(msg) = msg { + buf.push_str(msg); + buf.push('\n'); + } + } + // Write atomically to stderr let _ = io::Write::write_all(&mut io::stderr().lock(), buf.as_bytes()); } From 5bfaa0813a671303c3a49454b0a61c5007acf2c4 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Mon, 9 Mar 2026 00:23:58 +0100 Subject: [PATCH 21/39] use root Dockerfile for OTel example, add CARGO_PROFILE build arg Remove the example's custom Dockerfile and use the main one with a CARGO_PROFILE=release build arg to avoid OOM from fat LTO in memory-constrained Docker environments. The build scripts now read CARGO_PROFILE from the environment, defaulting to superoptimized for backward compatibility. Co-Authored-By: Claude Opus 4.6 --- Dockerfile | 2 ++ examples/opentelemetry-grafana/Dockerfile | 19 ------------------- .../opentelemetry-grafana/docker-compose.yml | 3 ++- scripts/build-dependencies.sh | 5 +++-- scripts/build-project.sh | 7 ++++--- 5 files changed, 11 insertions(+), 25 deletions(-) delete mode 100644 examples/opentelemetry-grafana/Dockerfile diff --git a/Dockerfile b/Dockerfile index a6ceb323..e4a6481b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,6 +3,8 @@ FROM --platform=$BUILDPLATFORM rust:1.91-slim AS builder WORKDIR /usr/src/sqlpage ARG TARGETARCH ARG BUILDARCH +ARG CARGO_PROFILE=superoptimized +ENV CARGO_PROFILE=$CARGO_PROFILE COPY scripts/ /usr/local/bin/ RUN cargo init . diff --git a/examples/opentelemetry-grafana/Dockerfile b/examples/opentelemetry-grafana/Dockerfile deleted file mode 100644 index 396a3789..00000000 --- a/examples/opentelemetry-grafana/Dockerfile +++ /dev/null @@ -1,19 +0,0 @@ -FROM rust:1.91-slim AS builder -WORKDIR /usr/src/sqlpage -RUN apt-get update && apt-get install -y --no-install-recommends \ - pkg-config libssl-dev unixodbc-dev && rm -rf /var/lib/apt/lists/* -COPY Cargo.toml Cargo.lock ./ -RUN mkdir src && echo "fn main() {}" > src/main.rs && mkdir -p src/webserver && cargo build --release 2>/dev/null || true -COPY . . -RUN touch src/main.rs && cargo build --release -RUN cp target/release/sqlpage /usr/local/bin/sqlpage - -FROM debian:trixie-slim -RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates unixodbc && rm -rf /var/lib/apt/lists/* -COPY --from=builder /usr/local/bin/sqlpage /usr/local/bin/sqlpage -COPY --from=builder /usr/src/sqlpage/sqlpage/sqlpage.db /etc/sqlpage/sqlpage.db -ENV SQLPAGE_WEB_ROOT=/var/www -ENV SQLPAGE_CONFIGURATION_DIRECTORY=/etc/sqlpage -WORKDIR /var/www -EXPOSE 8080 -CMD ["sqlpage"] diff --git a/examples/opentelemetry-grafana/docker-compose.yml b/examples/opentelemetry-grafana/docker-compose.yml index 28361c6a..d45f7ac7 100644 --- a/examples/opentelemetry-grafana/docker-compose.yml +++ b/examples/opentelemetry-grafana/docker-compose.yml @@ -12,7 +12,8 @@ services: sqlpage: build: context: ../.. - dockerfile: examples/opentelemetry-grafana/Dockerfile + args: + CARGO_PROFILE: release environment: - DATABASE_URL=postgres://sqlpage:sqlpage@postgres:5432/sqlpage - OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 diff --git a/scripts/build-dependencies.sh b/scripts/build-dependencies.sh index 5a55754a..af845432 100755 --- a/scripts/build-dependencies.sh +++ b/scripts/build-dependencies.sh @@ -3,10 +3,11 @@ set -euo pipefail source /tmp/build-env.sh -echo "Building dependencies for target: $TARGET" +PROFILE="${CARGO_PROFILE:-superoptimized}" +echo "Building dependencies for target: $TARGET (profile: $PROFILE)" cargo build \ --target "$TARGET" \ --config "target.$TARGET.linker=\"$LINKER\"" \ --features odbc-static \ - --profile superoptimized + --profile "$PROFILE" diff --git a/scripts/build-project.sh b/scripts/build-project.sh index ac595b61..f9a8781a 100755 --- a/scripts/build-project.sh +++ b/scripts/build-project.sh @@ -3,12 +3,13 @@ set -euo pipefail source /tmp/build-env.sh -echo "Building project for target: $TARGET" +PROFILE="${CARGO_PROFILE:-superoptimized}" +echo "Building project for target: $TARGET (profile: $PROFILE)" cargo build \ --target "$TARGET" \ --config "target.$TARGET.linker=\"$LINKER\"" \ --features odbc-static \ - --profile superoptimized + --profile "$PROFILE" -mv "target/$TARGET/superoptimized/sqlpage" sqlpage.bin +mv "target/$TARGET/$PROFILE/sqlpage" sqlpage.bin From 3aaf82efe20d3a6d9c222c4a4da672eabe5af293 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Mon, 9 Mar 2026 01:10:09 +0100 Subject: [PATCH 22/39] add db.query.parameter and db.response.returned_rows span attributes Co-Authored-By: Claude Opus 4.6 --- src/webserver/database/execute_queries.rs | 37 +++++++++++++++++++++-- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/src/webserver/database/execute_queries.rs b/src/webserver/database/execute_queries.rs index 57d5bef4..41f4b0d3 100644 --- a/src/webserver/database/execute_queries.rs +++ b/src/webserver/database/execute_queries.rs @@ -30,6 +30,18 @@ use sqlx::{ pub type DbConn = Option>; +fn record_query_params(span: &tracing::Span, params: &[Option]) { + use tracing_opentelemetry::OpenTelemetrySpanExt; + for (idx, value) in params.iter().enumerate() { + let key = opentelemetry::Key::new(format!("db.query.parameter.{idx}")); + let otel_value = match value { + Some(v) => opentelemetry::Value::String(v.clone().into()), + None => opentelemetry::Value::String("NULL".into()), + }; + span.set_attribute(key, otel_value); + } +} + fn source_line_number(line: usize) -> i64 { i64::try_from(line).unwrap_or(i64::MAX) } @@ -75,15 +87,21 @@ pub fn stream_query_results_with_conn<'a>( db.system.name = request.app_state.db.info.database_type.otel_name(), code.file.path = %source_file.display(), code.line.number = source_line_number(stmt.query_position.start.line), + db.response.returned_rows = tracing::field::Empty, ); + record_query_params(&query_span, &query.param_values); let mut stream = connection.fetch_many(query); let mut error = None; + let mut returned_rows: i64 = 0; while let Some(elem) = stream.next().instrument(query_span.clone()).await { let mut query_result = parse_single_sql_result(source_file, stmt, elem); if let DbItem::Error(e) = query_result { error = Some(e); break; } + if matches!(query_result, DbItem::Row(_)) { + returned_rows += 1; + } apply_json_columns(&mut query_result, &stmt.json_columns); apply_delayed_functions(request, &stmt.delayed_functions, &mut query_result).instrument(query_span.clone()).await?; for db_item in parse_dynamic_rows(query_result) { @@ -91,6 +109,7 @@ pub fn stream_query_results_with_conn<'a>( } } drop(stream); + query_span.record("db.response.returned_rows", returned_rows); if let Some(error) = error { try_rollback_transaction(connection).await; yield DbItem::Error(error); @@ -235,10 +254,18 @@ async fn execute_set_variable_query<'a>( db.system.name = request.app_state.db.info.database_type.otel_name(), code.file.path = %source_file.display(), code.line.number = source_line_number(statement.query_position.start.line), + db.response.returned_rows = tracing::field::Empty, ); - let value = match connection.fetch_optional(query).instrument(query_span).await { - Ok(Some(row)) => row_to_string(&row), - Ok(None) => None, + record_query_params(&query_span, &query.param_values); + let value = match connection.fetch_optional(query).instrument(query_span.clone()).await { + Ok(Some(row)) => { + query_span.record("db.response.returned_rows", 1_i64); + row_to_string(&row) + } + Ok(None) => { + query_span.record("db.response.returned_rows", 0_i64); + None + } Err(e) => { try_rollback_transaction(connection).await; let err = display_stmt_db_error(source_file, statement, e); @@ -431,6 +458,7 @@ async fn bind_parameters<'a>( let sql = stmt.query.as_str(); log::debug!("Preparing statement: {sql}"); let mut arguments = AnyArguments::default(); + let mut param_values = Vec::with_capacity(stmt.params.len()); for (param_idx, param) in stmt.params.iter().enumerate() { log::trace!("\tevaluating parameter {}: {}", param_idx + 1, param); let argument = extract_req_param(param, request, db_connection).await?; @@ -439,6 +467,7 @@ async fn bind_parameters<'a>( param_idx + 1, argument.as_ref().unwrap_or(&Cow::Borrowed("NULL")) ); + param_values.push(argument.as_deref().map(str::to_owned)); match argument { None => arguments.add(None::), Some(Cow::Owned(s)) => arguments.add(s), @@ -450,6 +479,7 @@ async fn bind_parameters<'a>( sql, arguments, has_arguments, + param_values, }) } @@ -535,6 +565,7 @@ pub struct StatementWithParams<'a> { sql: &'a str, arguments: AnyArguments<'a>, has_arguments: bool, + param_values: Vec>, } impl<'q> sqlx::Execute<'q, Any> for StatementWithParams<'q> { From 4c88b1e1de05864c71e65d694906e3520f9441c7 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Mon, 9 Mar 2026 01:37:28 +0100 Subject: [PATCH 23/39] Add official blog post about tracing --- .../sqlpage/migrations/73_blog_tracing.sql | 174 ++++++++++++++++++ 1 file changed, 174 insertions(+) create mode 100644 examples/official-site/sqlpage/migrations/73_blog_tracing.sql diff --git a/examples/official-site/sqlpage/migrations/73_blog_tracing.sql b/examples/official-site/sqlpage/migrations/73_blog_tracing.sql new file mode 100644 index 00000000..4f7e40d2 --- /dev/null +++ b/examples/official-site/sqlpage/migrations/73_blog_tracing.sql @@ -0,0 +1,174 @@ +INSERT INTO blog_posts (title, description, icon, created_at, content) +VALUES + ( + 'Tracing SQLPage with OpenTelemetry and Grafana', + 'How to inspect requests, SQL queries, and database wait time with distributed tracing', + 'route-2', + '2026-03-09', + ' +# Tracing SQLPage with OpenTelemetry and Grafana + +When a page is slow, a log line telling you that the request took 1.8 seconds is only the start of the investigation. What you usually want to know next is where that time went: + +- Did the request wait for a database connection? +- Which SQL file was executed? +- Which query took the longest? +- Did the delay start in SQLPage, in the reverse proxy, or in the database? + +SQLPage now supports [OpenTelemetry](https://opentelemetry.io/), the standard way to emit distributed traces. Combined with Grafana, Tempo, Loki, and an OpenTelemetry collector, this gives you a detailed timeline of each request and lets you jump directly from logs to traces. + +If you want a ready-to-run demo, see the [OpenTelemetry + Grafana example](https://github.com/sqlpage/SQLPage/tree/main/examples/opentelemetry-grafana), which this article is based on. + +## What tracing gives you + +With tracing enabled, one HTTP request becomes a tree of timed operations called spans. In a typical SQLPage app, you will see something like: + +```text +[nginx] GET /todos + └─ [sqlpage] GET /todos + └─ [sqlpage] SQL website/todos.sql + ├─ db.pool.acquire + └─ db.query +``` + +This is immediately useful for: + +- Debugging slow pages by seeing exactly which query consumed the time +- Detecting connection pool pressure by measuring time spent in `db.pool.acquire` +- Following one request end-to-end from the reverse proxy to SQLPage to PostgreSQL + +Tracing is especially helpful in SQLPage because one request often maps cleanly to one SQL file. That makes traces easy to interpret even when you are not used to application performance tooling. + +## The easiest way to try it + +The simplest way to explore tracing is to run the example shipped with SQLPage: + +```bash +cd examples/opentelemetry-grafana +docker compose up --build +``` + +That stack starts: + +- nginx as the reverse proxy +- SQLPage +- PostgreSQL +- an OpenTelemetry collector +- Grafana Tempo for traces +- Grafana Loki for logs +- Promtail for log shipping +- Grafana for visualization + +Then: + +1. Open [http://localhost](http://localhost) and use the sample todo application. +2. Open [http://localhost:3000](http://localhost:3000) to access Grafana. +3. Inspect recent traces and logs from the default dashboard. +4. Open a trace to see the full span waterfall for a single request. + +This setup is useful both as a demo and as a reference architecture for production deployments. + +## Enabling tracing in SQLPage + +Tracing is built into SQLPage. There is no plugin to install and no SQLPage-specific tracing configuration file to write. + +You only need to set standard OpenTelemetry environment variables before starting SQLPage: + +```bash +export OTEL_EXPORTER_OTLP_ENDPOINT="http://localhost:4318" +export OTEL_SERVICE_NAME="sqlpage" +sqlpage +``` + +The `OTEL_EXPORTER_OTLP_ENDPOINT` variable tells SQLPage where to send traces. +The `OTEL_SERVICE_NAME` variable controls how the service appears in your tracing backend. + +If `OTEL_EXPORTER_OTLP_ENDPOINT` is not set, SQLPage falls back to normal logging and tracing stays disabled. + +## What you will see in the trace + +The most useful spans emitted by SQLPage are: + +- The HTTP request span, with attributes such as method, path, status code, and user agent +- The SQL file execution span, showing which `.sql` file handled the request +- The `db.pool.acquire` span, showing time spent waiting for a database connection +- The `db.query` span, containing the SQL statement and database system + +In practice, that means you can answer questions like: + +- Is the page slow because the SQL itself is slow? +- Is the request queued because the connection pool is exhausted? +- Is the delay happening before SQLPage even receives the request? + +This is much more actionable than a single request duration number. + +## Logs and traces together + +Tracing is even more useful when logs and traces are connected. + +In the example stack, SQLPage writes structured logs to stdout, Promtail forwards them to Loki, and Grafana lets you move from a log line to the matching trace using the trace id. This makes it possible to start from an error log and immediately inspect the full request timeline. + +That workflow is often the difference between guessing and knowing. + +## PostgreSQL correlation + +SQLPage also propagates trace context to PostgreSQL through the connection `application_name`. +This makes it possible to correlate live PostgreSQL activity or database logs with the trace that triggered it. + +For example, inspecting `pg_stat_activity` can show which trace is attached to a running query: + +```sql +SELECT application_name, query, state +FROM pg_stat_activity; +``` + +If you also include `%a` in PostgreSQL''s `log_line_prefix`, your database logs can contain the same trace context. + +## A practical debugging example + +Suppose users report that a page occasionally becomes slow under load. + +With tracing enabled, you might see that: + +- the HTTP span is long +- the SQL file execution span is also long +- the `db.query` span is short +- but `db.pool.acquire` takes several hundred milliseconds + +That immediately tells you the database query itself is not the problem. The real issue is contention on the connection pool. You can then increase `max_database_pool_connections`, reduce concurrent load, or review long-running requests that keep connections busy. + +Without tracing, this kind of diagnosis usually requires guesswork. + +## Deployment options + +The example uses Grafana Tempo and Loki, but SQLPage is not tied to a single backend. Because it emits standard OTLP traces, you can also send data to: + +- Jaeger +- Grafana Cloud +- Datadog +- Honeycomb +- New Relic +- Axiom + +In small setups, SQLPage can often send traces directly to the backend. In larger deployments, placing an OpenTelemetry collector in the middle is usually better because it centralizes routing, batching, and authentication. + +## When to enable tracing + +Tracing is particularly valuable when: + +- you are running SQLPage behind a reverse proxy +- several SQL files participate in user-facing workflows +- you want to understand production latency, not just local development behavior +- you need a shared debugging tool for developers and operators + +If your application is already important enough to monitor, it is important enough to trace. + +## Conclusion + +SQLPage already makes the application logic easy to inspect because it lives in SQL files. Tracing extends that visibility to runtime behavior. + +By enabling OpenTelemetry and connecting SQLPage to Grafana, you can see not just that a request was slow, but why it was slow, where the time was spent, and which query or resource caused the delay. + +For a complete working setup, start with the [OpenTelemetry + Grafana example](https://github.com/sqlpage/SQLPage/tree/main/examples/opentelemetry-grafana) and adapt it to your own deployment. +' + ); From bf7a863279e38b7a2cbbd81bc8615eb5bb4900d9 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Mon, 9 Mar 2026 10:48:27 +0100 Subject: [PATCH 24/39] add http.request.body.size and url.query span attributes Add http.request.body.size to HTTP client spans (fetch, fetch_with_meta) and to the server-side http.parse_request span (from Content-Length header). Add url.query to the http.parse_request span. Co-Authored-By: Claude Opus 4.6 --- .../database/sqlpage_functions/functions.rs | 8 +++++++- src/webserver/http.rs | 13 +++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/webserver/database/sqlpage_functions/functions.rs b/src/webserver/database/sqlpage_functions/functions.rs index dbe821b5..f98860be 100644 --- a/src/webserver/database/sqlpage_functions/functions.rs +++ b/src/webserver/database/sqlpage_functions/functions.rs @@ -12,10 +12,10 @@ use crate::webserver::{ }; use anyhow::{anyhow, Context}; use futures_util::StreamExt; -use tracing::Instrument; use mime_guess::mime; use std::fmt::Write; use std::{borrow::Cow, ffi::OsStr, str::FromStr}; +use tracing::Instrument; super::function_definition_macro::sqlpage_functions! { basic_auth_password((&RequestInfo)); @@ -219,6 +219,7 @@ async fn fetch( otel.name = format!("{method}"), http.request.method = method, url.full = %http_request.url, + http.request.body.size = tracing::field::Empty, http.response.status_code = tracing::field::Empty, ); @@ -230,6 +231,8 @@ async fn fetch( log::info!("Fetching {}", http_request.url); let mut response = if let Some(body) = &http_request.body { let (body, req) = prepare_request_body(body, req)?; + tracing::Span::current() + .record("http.request.body.size", body.len() as i64); req.send_body(body) } else { req.send() @@ -317,6 +320,7 @@ async fn fetch_with_meta( otel.name = format!("{method}"), http.request.method = method, url.full = %http_request.url, + http.request.body.size = tracing::field::Empty, http.response.status_code = tracing::field::Empty, ); @@ -328,6 +332,8 @@ async fn fetch_with_meta( log::info!("Fetching {} with metadata", http_request.url); let response_result = if let Some(body) = &http_request.body { let (body, req) = prepare_request_body(body, req)?; + tracing::Span::current() + .record("http.request.body.size", body.len() as i64); req.send_body(body).await } else { req.send().await diff --git a/src/webserver/http.rs b/src/webserver/http.rs index 99fc2f79..0dae7e71 100644 --- a/src/webserver/http.rs +++ b/src/webserver/http.rs @@ -226,10 +226,23 @@ async fn render_sql( .get("content-type") .and_then(|v| v.to_str().ok()) .unwrap_or(""); + let content_length = srv_req + .headers() + .get("content-length") + .and_then(|v| v.to_str().ok()) + .and_then(|v| v.parse::().ok()); + let url_query = srv_req.query_string(); + let url_query = if url_query.is_empty() { + None + } else { + Some(url_query) + }; let parse_span = tracing::info_span!( "http.parse_request", http.request.method = %srv_req.method(), http.request.header.content_type = content_type, + http.request.body.size = content_length, + url.query = url_query, ); extract_request_info(srv_req, Arc::clone(&app_state), server_timing) .instrument(parse_span) From 10dec8708f6bdd561b336fecca34abe79ae6c0ba Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Mon, 9 Mar 2026 11:21:32 +0100 Subject: [PATCH 25/39] Replace Promtail with the OpenTelemetry Collector --- examples/opentelemetry-grafana/README.md | 32 ++-- .../opentelemetry-grafana/docker-compose.yml | 37 +++- .../grafana/datasources.yaml | 15 ++ .../grafana/sqlpage-home.json | 168 +++++++++++++++++- .../opentelemetry-grafana/otel-collector.yaml | 73 ++++++++ .../postgres-init/001_pg_stat_statements.sql | 5 + examples/opentelemetry-grafana/prometheus.yml | 9 + examples/opentelemetry-grafana/promtail.yaml | 51 ------ 8 files changed, 309 insertions(+), 81 deletions(-) create mode 100644 examples/opentelemetry-grafana/postgres-init/001_pg_stat_statements.sql create mode 100644 examples/opentelemetry-grafana/prometheus.yml delete mode 100644 examples/opentelemetry-grafana/promtail.yaml diff --git a/examples/opentelemetry-grafana/README.md b/examples/opentelemetry-grafana/README.md index ce39d511..ecf03487 100644 --- a/examples/opentelemetry-grafana/README.md +++ b/examples/opentelemetry-grafana/README.md @@ -18,7 +18,8 @@ This is useful for: ## Quick start (this example) This directory contains a ready-to-run Docker Compose stack that demonstrates -the full tracing and logging pipeline. No prior OpenTelemetry experience is needed. +the full tracing, logging, and PostgreSQL metrics pipeline. No prior +OpenTelemetry experience is needed. ### Prerequisites @@ -39,21 +40,22 @@ This starts eight services: | **nginx** | Reverse proxy, creates the root trace span | `localhost:80` | | **SQLPage** | Your application, sends traces to the collector | (internal 8080) | | **PostgreSQL** | Database | (internal 5432) | -| **OTel Collector** | Receives traces and forwards them to Tempo | `localhost:4318` | +| **Prometheus** | Stores PostgreSQL metrics scraped from the OTel Collector | (internal 9090) | | **Tempo** | Trace storage backend | (internal 3200) | | **Loki** | Log storage backend | (internal 3100) | -| **Promtail** | Scrapes container logs and pushes them to Loki | (internal) | +| **OTel Collector** | Receives traces, PostgreSQL metrics, and SQLPage logs | `localhost:4318`, `localhost:1514` | | **Grafana** | Web UI to explore traces and logs | `localhost:3000` | ### Explore traces and logs 1. Open the todo app at [http://localhost](http://localhost) — add a few items, click to toggle them. 2. Open Grafana at [http://localhost:3000](http://localhost:3000). -3. The default home dashboard now shows recent traces and recent SQLPage logs. +3. The default home dashboard now shows recent traces, recent SQLPage logs, and PostgreSQL metrics. 4. Click any trace ID in the trace table to see the full span waterfall. 5. In the logs panel, click a `trace_id` derived field to jump straight to the matching trace. -6. In the left sidebar, click **Explore** (compass icon) if you want to search manually. -7. Select **Tempo** to search traces or **Loki** to search logs. +6. The PostgreSQL metrics panels are populated by the collector's `postgresqlreceiver`. +7. In the left sidebar, click **Explore** (compass icon) if you want to search manually. +8. Select **Tempo** to search traces, **Loki** to search logs, or **Prometheus** to query metrics. ### What you will see in a trace @@ -86,8 +88,9 @@ SQLPage writes one structured log line per event, for example: ts=2026-03-08T20:56:15.000Z level=info target=sqlpage::webserver::http msg="request completed" method=GET path=/ trace_id=4f2d... ``` -Promtail scrapes these container logs, parses the logfmt fields, and sends them to Loki. -The homepage dashboard filters to the `sqlpage` container so you can see request logs update +The OpenTelemetry Collector receives these SQLPage container logs through Docker's syslog +logging driver and forwards them to Loki. +The homepage dashboard filters to the `sqlpage` service so you can see request logs update live while you use the sample app. ### PostgreSQL correlation @@ -156,15 +159,16 @@ understood by all OTel-compatible tools. SQLPage reads them directly — no sending trace data. Here is how the pieces fit together: ``` - Traces: SQLPage -> OTel Collector -> Tempo -> Grafana - Logs: SQLPage stdout/stderr -> Promtail -> Loki -> Grafana + Traces: SQLPage -> OTel Collector -> Tempo -> Grafana + Logs: SQLPage -> Docker syslog logging driver -> OTel Collector -> Loki -> Grafana + Metrics: PostgreSQL -> OTel Collector postgresqlreceiver -> Prometheus -> Grafana ``` - **SQLPage** generates trace data and sends it via the OTLP HTTP protocol. - A **collector** (optional) receives traces and forwards them to one or more backends. Useful for buffering, sampling, or fanning out to multiple destinations. You can skip the collector and send directly from SQLPage to most backends. -- **Promtail** scrapes Docker container logs and forwards them to Loki. +- The **OTel Collector** also receives SQLPage container logs and forwards them to Loki. - **Tempo** stores traces, **Loki** stores logs, and **Grafana** lets you search both. ### Trace context propagation @@ -194,10 +198,8 @@ This is what the Docker Compose example in this directory uses. - [Grafana Tempo](https://grafana.com/docs/tempo/latest/) stores the traces. - [Grafana Loki](https://grafana.com/docs/loki/latest/) stores the logs. - [Grafana](https://grafana.com/docs/grafana/latest/) provides the web UI. -- An [OTel Collector](https://opentelemetry.io/docs/collector/) sits between - SQLPage and Tempo (optional but recommended for production). -- [Promtail](https://grafana.com/docs/loki/latest/send-data/promtail/) scrapes container logs - and forwards them to Loki. +- An [OTel Collector](https://opentelemetry.io/docs/collector/) receives SQLPage traces, + SQLPage logs, and PostgreSQL metrics in this example. **SQLPage environment variables:** diff --git a/examples/opentelemetry-grafana/docker-compose.yml b/examples/opentelemetry-grafana/docker-compose.yml index d45f7ac7..3efdcc47 100644 --- a/examples/opentelemetry-grafana/docker-compose.yml +++ b/examples/opentelemetry-grafana/docker-compose.yml @@ -22,15 +22,31 @@ services: - ./website:/var/www - ./sqlpage:/etc/sqlpage depends_on: + otel-collector: + condition: service_started postgres: condition: service_healthy + logging: + driver: syslog + options: + syslog-address: "tcp://localhost:1514" + syslog-format: "rfc5424micro" + tag: "sqlpage" postgres: image: postgres:16 + command: + - postgres + - -c + - shared_preload_libraries=pg_stat_statements + - -c + - pg_stat_statements.track_utility=off environment: POSTGRES_USER: sqlpage POSTGRES_PASSWORD: sqlpage POSTGRES_DB: sqlpage + volumes: + - ./postgres-init:/docker-entrypoint-initdb.d:ro healthcheck: test: ["CMD-SHELL", "pg_isready -U sqlpage"] interval: 2s @@ -44,8 +60,19 @@ services: ports: - "4317:4317" # OTLP gRPC - "4318:4318" # OTLP HTTP + - "1514:1514" # Syslog TCP (SQLPage container logs) depends_on: - tempo + - postgres + + prometheus: + image: prom/prometheus:v3.2.1 + command: + - --config.file=/etc/prometheus/prometheus.yml + - --storage.tsdb.path=/prometheus + - --web.enable-otlp-receiver + volumes: + - ./prometheus.yml:/etc/prometheus/prometheus.yml:ro tempo: image: grafana/tempo:2.6.1 @@ -57,15 +84,6 @@ services: image: grafana/loki:3.4.1 command: ["-config.file=/etc/loki/local-config.yaml"] - promtail: - image: grafana/promtail:3.4.1 - command: ["-config.file=/etc/promtail/config.yml"] - volumes: - - ./promtail.yaml:/etc/promtail/config.yml:ro - - /var/run/docker.sock:/var/run/docker.sock:ro - depends_on: - - loki - grafana: image: grafana/grafana:latest ports: @@ -82,3 +100,4 @@ services: depends_on: - tempo - loki + - prometheus diff --git a/examples/opentelemetry-grafana/grafana/datasources.yaml b/examples/opentelemetry-grafana/grafana/datasources.yaml index f2134a1a..c44e9c48 100644 --- a/examples/opentelemetry-grafana/grafana/datasources.yaml +++ b/examples/opentelemetry-grafana/grafana/datasources.yaml @@ -7,6 +7,16 @@ datasources: access: proxy url: http://tempo:3200 isDefault: true + jsonData: + nodeGraph: + enabled: true + tracesToLogsV2: + datasourceUid: loki + filterByTraceID: true + filterBySpanID: true + spanStartTimeShift: "-5m" + spanEndTimeShift: "5m" + customQuery: false - name: Loki type: loki uid: loki @@ -18,3 +28,8 @@ datasources: matcherRegex: 'trace_id=([0-9a-f]+)' datasourceUid: tempo url: '$${__value.raw}' + - name: Prometheus + type: prometheus + uid: prometheus + access: proxy + url: http://prometheus:9090 diff --git a/examples/opentelemetry-grafana/grafana/sqlpage-home.json b/examples/opentelemetry-grafana/grafana/sqlpage-home.json index e1d366eb..943fe02e 100644 --- a/examples/opentelemetry-grafana/grafana/sqlpage-home.json +++ b/examples/opentelemetry-grafana/grafana/sqlpage-home.json @@ -39,13 +39,169 @@ "showLineNumbers": false, "showMiniMap": false }, - "content": "

Recent SQLPage traces and logs

Open http://localhost and interact with the app. New requests will appear here automatically.

The trace table shows recent requests. The logs panel shows SQLPage container logs, including the trace ID so you can jump from a log line to the full span waterfall.

", + "content": "

Recent SQLPage traces, logs, and PostgreSQL metrics

Open http://localhost and interact with the app. New requests will appear here automatically.

The trace table shows recent requests. Click any trace ID to open the full span waterfall in Grafana. The metrics panels come from the OpenTelemetry PostgreSQL receiver via Prometheus.

", "mode": "html" }, "pluginVersion": "12.4.0", "title": "Overview", "type": "text" }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 10 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 12, + "x": 0, + "y": 4 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(postgresql_backends)", + "refId": "A" + } + ], + "title": "PostgreSQL Backends", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 4, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 12, + "x": 12, + "y": 4 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(postgresql_db_size) by (postgresql_database_name)", + "legendFormat": "{{postgresql_database_name}}", + "refId": "A" + } + ], + "title": "PostgreSQL Database Size", + "type": "timeseries" + }, { "datasource": { "type": "tempo", @@ -128,7 +284,7 @@ "h": 12, "w": 24, "x": 0, - "y": 4 + "y": 8 }, "id": 2, "options": { @@ -199,7 +355,7 @@ "h": 10, "w": 24, "x": 0, - "y": 16 + "y": 20 }, "id": 3, "options": { @@ -221,7 +377,7 @@ }, "direction": "backward", "editorMode": "builder", - "expr": "{compose_service=\"sqlpage\"}", + "expr": "{service_name=\"sqlpage\"}", "queryType": "range", "refId": "A" } @@ -244,7 +400,7 @@ }, "timepicker": {}, "timezone": "browser", - "title": "SQLPage Tracing and Logs Home", + "title": "SQLPage Observability Home", "uid": "sqlpage-tracing-home", - "version": 3 + "version": 4 } diff --git a/examples/opentelemetry-grafana/otel-collector.yaml b/examples/opentelemetry-grafana/otel-collector.yaml index 692035c4..b5bc2588 100644 --- a/examples/opentelemetry-grafana/otel-collector.yaml +++ b/examples/opentelemetry-grafana/otel-collector.yaml @@ -5,8 +5,63 @@ receivers: endpoint: 0.0.0.0:4317 http: endpoint: 0.0.0.0:4318 + syslog/sqlpage: + protocol: rfc5424 + tcp: + listen_address: 0.0.0.0:1514 + postgresql: + endpoint: postgres:5432 + transport: tcp + username: sqlpage + password: sqlpage + tls: + insecure: true + databases: + - sqlpage + collection_interval: 1s + events: + db.server.query_sample: + enabled: true + db.server.top_query: + enabled: true + query_sample_collection: + max_rows_per_query: 1000 + top_query_collection: + collection_interval: 1s + max_rows_per_query: 1000 processors: + transform/sqlpage_logs: + error_mode: ignore + log_statements: + - context: resource + statements: + - set(attributes["service.name"], "sqlpage") + - context: log + statements: + - set(body, attributes["message"]) where attributes["message"] != nil + - merge_maps(cache, ExtractPatterns(body, "level=(?P[^ ]+)"), "upsert") where IsString(body) + - merge_maps(cache, ExtractPatterns(body, "trace_id=(?P[0-9a-f]+)"), "upsert") where IsString(body) + - set(attributes["level"], cache["level"]) where cache["level"] != nil + - set(attributes["trace_id"], cache["trace_id"]) where cache["trace_id"] != nil + - set(severity_text, "TRACE") where cache["level"] == "trace" + - set(severity_number, SEVERITY_NUMBER_TRACE) where cache["level"] == "trace" + - set(severity_text, "DEBUG") where cache["level"] == "debug" + - set(severity_number, SEVERITY_NUMBER_DEBUG) where cache["level"] == "debug" + - set(severity_text, "INFO") where cache["level"] == "info" + - set(severity_number, SEVERITY_NUMBER_INFO) where cache["level"] == "info" + - set(severity_text, "WARN") where cache["level"] == "warn" + - set(severity_number, SEVERITY_NUMBER_WARN) where cache["level"] == "warn" + - set(severity_text, "ERROR") where cache["level"] == "error" + - set(severity_number, SEVERITY_NUMBER_ERROR) where cache["level"] == "error" + transform/postgresql_events: + error_mode: ignore + log_statements: + - context: log + statements: + - set(log.attributes["trace_id"], log.trace_id.string) where log.trace_id.string != "" + - set(log.attributes["span_id"], log.span_id.string) where log.span_id.string != "" + - set(log.attributes["event_name"], log.event_name) where log.event_name != "" batch: timeout: 1s @@ -15,6 +70,12 @@ exporters: endpoint: tempo:4317 tls: insecure: true + otlp_http/loki: + endpoint: http://loki:3100/otlp + debug/postgresql_logs: + verbosity: detailed + prometheus: + endpoint: 0.0.0.0:9464 service: pipelines: @@ -22,3 +83,15 @@ service: receivers: [otlp] processors: [batch] exporters: [otlp_grpc/tempo] + metrics: + receivers: [postgresql] + processors: [batch] + exporters: [prometheus] + logs/sqlpage: + receivers: [syslog/sqlpage] + processors: [transform/sqlpage_logs, batch] + exporters: [otlp_http/loki] + logs: + receivers: [postgresql] + processors: [transform/postgresql_events, batch] + exporters: [otlp_http/loki, debug/postgresql_logs] diff --git a/examples/opentelemetry-grafana/postgres-init/001_pg_stat_statements.sql b/examples/opentelemetry-grafana/postgres-init/001_pg_stat_statements.sql new file mode 100644 index 00000000..c237a502 --- /dev/null +++ b/examples/opentelemetry-grafana/postgres-init/001_pg_stat_statements.sql @@ -0,0 +1,5 @@ +\connect postgres +CREATE EXTENSION IF NOT EXISTS pg_stat_statements; + +\connect sqlpage +CREATE EXTENSION IF NOT EXISTS pg_stat_statements; diff --git a/examples/opentelemetry-grafana/prometheus.yml b/examples/opentelemetry-grafana/prometheus.yml new file mode 100644 index 00000000..29b26bf8 --- /dev/null +++ b/examples/opentelemetry-grafana/prometheus.yml @@ -0,0 +1,9 @@ +global: + scrape_interval: 10s + evaluation_interval: 10s + +scrape_configs: + - job_name: otel-collector + static_configs: + - targets: ["otel-collector:9464"] + diff --git a/examples/opentelemetry-grafana/promtail.yaml b/examples/opentelemetry-grafana/promtail.yaml deleted file mode 100644 index 98ecce47..00000000 --- a/examples/opentelemetry-grafana/promtail.yaml +++ /dev/null @@ -1,51 +0,0 @@ -server: - http_listen_port: 9080 - grpc_listen_port: 0 - -positions: - filename: /tmp/positions.yaml - -clients: - - url: http://loki:3100/loki/api/v1/push - -scrape_configs: - - job_name: docker - docker_sd_configs: - - host: unix:///var/run/docker.sock - refresh_interval: 5s - relabel_configs: - - source_labels: [__meta_docker_container_label_com_docker_compose_service] - regex: .+ - action: keep - - source_labels: [__meta_docker_container_label_com_docker_compose_service] - target_label: compose_service - - source_labels: [__meta_docker_container_name] - regex: /(.+) - target_label: container - - source_labels: [__meta_docker_container_log_stream] - target_label: stream - pipeline_stages: - - docker: {} - - match: - selector: '{compose_service="sqlpage"}' - stages: - - logfmt: - mapping: - ts: - level: - target: - msg: - method: - path: - file: - client_ip: - trace_id: - - labels: - level: - target: - method: - path: - file: - - timestamp: - source: ts - format: RFC3339Nano From 7e057e24028a4f914c64001b4856557c1953277d Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Mon, 9 Mar 2026 12:15:49 +0100 Subject: [PATCH 26/39] Refactor telemetry logging helpers --- src/telemetry.rs | 242 +++++++++++++++++++++++++++-------------------- 1 file changed, 141 insertions(+), 101 deletions(-) diff --git a/src/telemetry.rs b/src/telemetry.rs index 97295614..19032be2 100644 --- a/src/telemetry.rs +++ b/src/telemetry.rs @@ -13,12 +13,11 @@ use opentelemetry_sdk::trace::SdkTracerProvider; static TRACER_PROVIDER: OnceLock = OnceLock::new(); -/// Initializes logging / tracing. Returns `true` if OTel was activated. +/// Initializes logging / tracing. Returns `true` if `OTel` was activated. +#[must_use] pub fn init_telemetry() -> bool { let otel_endpoint = env::var("OTEL_EXPORTER_OTLP_ENDPOINT").ok(); - let otel_active = otel_endpoint - .as_deref() - .is_some_and(|v| !v.is_empty()); + let otel_active = otel_endpoint.as_deref().is_some_and(|v| !v.is_empty()); if otel_active { init_otel_tracing(); @@ -29,7 +28,7 @@ pub fn init_telemetry() -> bool { otel_active } -/// Shuts down the OTel tracer provider, flushing pending spans. +/// Shuts down the `OTel` tracer provider, flushing pending spans. pub fn shutdown_telemetry() { if let Some(provider) = TRACER_PROVIDER.get() { if let Err(e) = provider.shutdown() { @@ -38,21 +37,15 @@ pub fn shutdown_telemetry() { } } -/// Tracing subscriber without OTel export — logfmt output only. +/// Tracing subscriber without `OTel` export — logfmt output only. fn init_tracing() { use tracing_subscriber::layer::SubscriberExt; - let filter = tracing_subscriber::EnvFilter::try_from_default_env().unwrap_or_else(|_| { - "sqlpage=info,actix_web=info,tracing_actix_web=info".into() - }); - let subscriber = tracing_subscriber::registry() - .with(filter) + .with(default_env_filter()) .with(logfmt::LogfmtLayer::new()); - tracing::subscriber::set_global_default(subscriber) - .expect("Failed to set global tracing subscriber"); - tracing_log::LogTracer::init().expect("Failed to set log→tracing bridge"); + set_global_subscriber(subscriber); } fn init_otel_tracing() { @@ -82,14 +75,20 @@ fn init_otel_tracing() { .with_tracer(tracer) .with_location(false); - let filter = tracing_subscriber::EnvFilter::try_from_default_env() - .unwrap_or_else(|_| "sqlpage=info,actix_web=info,tracing_actix_web=info".into()); - let subscriber = tracing_subscriber::registry() - .with(filter) + .with(default_env_filter()) .with(logfmt::LogfmtLayer::new()) .with(otel_layer); + set_global_subscriber(subscriber); +} + +fn default_env_filter() -> tracing_subscriber::EnvFilter { + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| "sqlpage=info,actix_web=info,tracing_actix_web=info".into()) +} + +fn set_global_subscriber(subscriber: impl tracing::Subscriber + Send + Sync) { tracing::subscriber::set_global_default(subscriber) .expect("Failed to set global tracing subscriber"); tracing_log::LogTracer::init().expect("Failed to set log→tracing bridge"); @@ -118,7 +117,7 @@ mod logfmt { #[derive(Default)] struct SpanFields(HashMap<&'static str, String>); - /// Visitor that collects fields into a HashMap. + /// Visitor that collects fields into a `HashMap`. struct FieldCollector<'a>(&'a mut HashMap<&'static str, String>); impl Visit for FieldCollector<'_> { @@ -140,7 +139,7 @@ mod logfmt { } /// Fields we pick from spans, in display order. - /// (span_field_name, logfmt_key) + /// (`span_field_name`, `logfmt_key`) const SPAN_FIELDS: &[(&str, &str)] = &[ ("http.method", "method"), ("http.target", "path"), @@ -207,104 +206,145 @@ mod logfmt { fn on_event(&self, event: &tracing::Event<'_>, ctx: Context<'_, S>) { let mut buf = String::with_capacity(256); let colors = self.use_colors; + let mut event_fields = HashMap::new(); + event.record(&mut FieldCollector(&mut event_fields)); + let target = event_target(event, &event_fields); + let msg = event_fields.get("message"); + let multiline_msg = is_multiline_terminal_message(colors, msg); - // 1. ts - let now = chrono::Utc::now().format("%Y-%m-%dT%H:%M:%S%.3fZ"); - if colors { - let _ = write!(buf, "{DIM}ts={now}{RESET}"); - } else { - let _ = write!(buf, "ts={now}"); - } + write_timestamp(&mut buf, colors); + write_level(&mut buf, *event.metadata().level(), colors); + write_dimmed_field(&mut buf, "target", target, colors); + write_message(&mut buf, msg, multiline_msg); + write_span_fields(&mut buf, ctx.event_scope(event)); + write_trace_id(&mut buf, ctx.event_scope(event), colors); - // 2. level (with color) - let level = event.metadata().level(); - if colors { - let (color, label) = match *level { - tracing::Level::ERROR => (RED, "error"), - tracing::Level::WARN => (YELLOW, "warn"), - tracing::Level::INFO => (GREEN, "info"), - tracing::Level::DEBUG => (BLUE, "debug"), - tracing::Level::TRACE => (DIM, "trace"), - }; - let _ = write!(buf, " {BOLD}{color}level={label}{RESET}"); - } else { - let _ = write!(buf, " level={}", level.as_str().to_ascii_lowercase()); - } + buf.push('\n'); + write_multiline_message(&mut buf, msg, multiline_msg); + let _ = io::Write::write_all(&mut io::stderr().lock(), buf.as_bytes()); + } + } - // 3. target — for bridged log events, use log.target; otherwise metadata target - let mut event_fields = HashMap::new(); - event.record(&mut FieldCollector(&mut event_fields)); + fn event_target<'a>( + event: &'a tracing::Event<'_>, + event_fields: &'a HashMap<&'static str, String>, + ) -> &'a str { + event_fields + .get("log.target") + .map_or_else(|| event.metadata().target(), String::as_str) + } - let target = event_fields - .get("log.target") - .map_or_else(|| event.metadata().target(), String::as_str); - if colors { - let _ = write!(buf, " {DIM}target={target}{RESET}"); - } else { - let _ = write!(buf, " target={target}"); - } + fn is_multiline_terminal_message(colors: bool, msg: Option<&String>) -> bool { + colors && msg.is_some_and(|message| message.contains('\n')) + } - // 4. msg — for terminal, preserve multi-line formatting (e.g. SQL - // error highlighting with arrows); for machine output, flatten. - let msg = event_fields.get("message"); - let multiline_msg = colors && msg.is_some_and(|m| m.contains('\n')); - if !multiline_msg { - if let Some(msg) = msg { - write_logfmt_value(&mut buf, "msg", msg); - } - } + fn write_timestamp(buf: &mut String, colors: bool) { + let now = chrono::Utc::now().format("%Y-%m-%dT%H:%M:%S%.3fZ"); + if colors { + let _ = write!(buf, "{DIM}ts={now}{RESET}"); + } else { + let _ = write!(buf, "ts={now}"); + } + } - // 5. Selected span fields in order - let mut seen = [false; SPAN_FIELDS.len()]; - if let Some(scope) = ctx.event_scope(event) { - for span in scope { - let ext = span.extensions(); - if let Some(fields) = ext.get::() { - for (i, &(span_key, logfmt_key)) in SPAN_FIELDS.iter().enumerate() { - if !seen[i] { - if let Some(val) = fields.0.get(span_key) { - write_logfmt_value(&mut buf, logfmt_key, val); - seen[i] = true; - } - } - } - } - } + fn write_level(buf: &mut String, level: tracing::Level, colors: bool) { + if colors { + let (color, label) = level_style(level); + let _ = write!(buf, " {BOLD}{color}level={label}{RESET}"); + } else { + let _ = write!(buf, " level={}", level.as_str().to_ascii_lowercase()); + } + } + + fn level_style(level: tracing::Level) -> (&'static str, &'static str) { + match level { + tracing::Level::ERROR => (RED, "error"), + tracing::Level::WARN => (YELLOW, "warn"), + tracing::Level::INFO => (GREEN, "info"), + tracing::Level::DEBUG => (BLUE, "debug"), + tracing::Level::TRACE => (DIM, "trace"), + } + } + + fn write_dimmed_field(buf: &mut String, key: &str, value: &str, colors: bool) { + if colors { + let _ = write!(buf, " {DIM}{key}={value}{RESET}"); + } else { + let _ = write!(buf, " {key}={value}"); + } + } + + fn write_message(buf: &mut String, msg: Option<&String>, multiline_msg: bool) { + if !multiline_msg { + if let Some(msg) = msg { + write_logfmt_value(buf, "msg", msg); } + } + } - // 6. trace_id from OpenTelemetry span context (only if valid) - if let Some(scope) = ctx.event_scope(event) { - 'outer: for span in scope { - let ext = span.extensions(); - if let Some(otel_data) = ext.get::() { - if let Some(trace_id) = otel_data.trace_id() { - let id = trace_id.to_string(); - if id != INVALID_TRACE_ID { - if colors { - let _ = write!(buf, " {DIM}trace_id={id}{RESET}"); - } else { - let _ = write!(buf, " trace_id={id}"); - } - break 'outer; - } + fn write_span_fields( + buf: &mut String, + scope: Option>, + ) where + S: Subscriber + for<'a> LookupSpan<'a>, + { + let mut seen = [false; SPAN_FIELDS.len()]; + if let Some(scope) = scope { + for span in scope { + let ext = span.extensions(); + if let Some(fields) = ext.get::() { + for (i, &(span_key, logfmt_key)) in SPAN_FIELDS.iter().enumerate() { + if seen[i] { + continue; + } + if let Some(val) = fields.0.get(span_key) { + write_logfmt_value(buf, logfmt_key, val); + seen[i] = true; } } } } + } + } - buf.push('\n'); + fn write_trace_id( + buf: &mut String, + scope: Option>, + colors: bool, + ) where + S: Subscriber + for<'a> LookupSpan<'a>, + { + if let Some(trace_id) = first_valid_trace_id(scope) { + write_dimmed_field(buf, "trace_id", &trace_id, colors); + } + } - // For multi-line messages on a terminal, print the message below - // the metadata line with its original formatting preserved. - if multiline_msg { - if let Some(msg) = msg { - buf.push_str(msg); - buf.push('\n'); + fn first_valid_trace_id( + scope: Option>, + ) -> Option + where + S: Subscriber + for<'a> LookupSpan<'a>, + { + for span in scope? { + let ext = span.extensions(); + if let Some(otel_data) = ext.get::() { + if let Some(trace_id) = otel_data.trace_id() { + let trace_id = trace_id.to_string(); + if trace_id != INVALID_TRACE_ID { + return Some(trace_id); + } } } + } + None + } - // Write atomically to stderr - let _ = io::Write::write_all(&mut io::stderr().lock(), buf.as_bytes()); + fn write_multiline_message(buf: &mut String, msg: Option<&String>, multiline_msg: bool) { + if multiline_msg { + if let Some(msg) = msg { + buf.push_str(msg); + buf.push('\n'); + } } } From 7e156c5b10711282e2d31a39e9e85fbeae6abe3a Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Mon, 9 Mar 2026 12:16:18 +0100 Subject: [PATCH 27/39] Clamp traced fetch body size --- src/webserver/database/sqlpage_functions/functions.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/webserver/database/sqlpage_functions/functions.rs b/src/webserver/database/sqlpage_functions/functions.rs index f98860be..bce629cf 100644 --- a/src/webserver/database/sqlpage_functions/functions.rs +++ b/src/webserver/database/sqlpage_functions/functions.rs @@ -231,8 +231,10 @@ async fn fetch( log::info!("Fetching {}", http_request.url); let mut response = if let Some(body) = &http_request.body { let (body, req) = prepare_request_body(body, req)?; - tracing::Span::current() - .record("http.request.body.size", body.len() as i64); + tracing::Span::current().record( + "http.request.body.size", + i64::try_from(body.len()).unwrap_or(i64::MAX), + ); req.send_body(body) } else { req.send() @@ -332,8 +334,7 @@ async fn fetch_with_meta( log::info!("Fetching {} with metadata", http_request.url); let response_result = if let Some(body) = &http_request.body { let (body, req) = prepare_request_body(body, req)?; - tracing::Span::current() - .record("http.request.body.size", body.len() as i64); + tracing::Span::current().record("http.request.body.size", body.len() as i64); req.send_body(body).await } else { req.send().await From 36aa4653bfa8aaa272f3b9e9e8d81ece60a7bcb5 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Mon, 9 Mar 2026 12:17:09 +0100 Subject: [PATCH 28/39] Clamp traced fetch_with_meta body size --- src/webserver/database/sqlpage_functions/functions.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/webserver/database/sqlpage_functions/functions.rs b/src/webserver/database/sqlpage_functions/functions.rs index bce629cf..32011f48 100644 --- a/src/webserver/database/sqlpage_functions/functions.rs +++ b/src/webserver/database/sqlpage_functions/functions.rs @@ -334,7 +334,10 @@ async fn fetch_with_meta( log::info!("Fetching {} with metadata", http_request.url); let response_result = if let Some(body) = &http_request.body { let (body, req) = prepare_request_body(body, req)?; - tracing::Span::current().record("http.request.body.size", body.len() as i64); + tracing::Span::current().record( + "http.request.body.size", + i64::try_from(body.len()).unwrap_or(i64::MAX), + ); req.send_body(body).await } else { req.send().await From e02ed672799f757d10bb50d713cecd4ff995484a Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Mon, 9 Mar 2026 12:32:34 +0100 Subject: [PATCH 29/39] Silence noisy PostgreSQL collector logs --- .../opentelemetry-grafana/otel-collector.yaml | 56 ++++++------------- 1 file changed, 16 insertions(+), 40 deletions(-) diff --git a/examples/opentelemetry-grafana/otel-collector.yaml b/examples/opentelemetry-grafana/otel-collector.yaml index b5bc2588..f2d65e80 100644 --- a/examples/opentelemetry-grafana/otel-collector.yaml +++ b/examples/opentelemetry-grafana/otel-collector.yaml @@ -19,16 +19,6 @@ receivers: databases: - sqlpage collection_interval: 1s - events: - db.server.query_sample: - enabled: true - db.server.top_query: - enabled: true - query_sample_collection: - max_rows_per_query: 1000 - top_query_collection: - collection_interval: 1s - max_rows_per_query: 1000 processors: transform/sqlpage_logs: @@ -36,32 +26,24 @@ processors: log_statements: - context: resource statements: - - set(attributes["service.name"], "sqlpage") + - set(resource.attributes["service.name"], "sqlpage") - context: log statements: - - set(body, attributes["message"]) where attributes["message"] != nil - - merge_maps(cache, ExtractPatterns(body, "level=(?P[^ ]+)"), "upsert") where IsString(body) - - merge_maps(cache, ExtractPatterns(body, "trace_id=(?P[0-9a-f]+)"), "upsert") where IsString(body) - - set(attributes["level"], cache["level"]) where cache["level"] != nil - - set(attributes["trace_id"], cache["trace_id"]) where cache["trace_id"] != nil - - set(severity_text, "TRACE") where cache["level"] == "trace" - - set(severity_number, SEVERITY_NUMBER_TRACE) where cache["level"] == "trace" - - set(severity_text, "DEBUG") where cache["level"] == "debug" - - set(severity_number, SEVERITY_NUMBER_DEBUG) where cache["level"] == "debug" - - set(severity_text, "INFO") where cache["level"] == "info" - - set(severity_number, SEVERITY_NUMBER_INFO) where cache["level"] == "info" - - set(severity_text, "WARN") where cache["level"] == "warn" - - set(severity_number, SEVERITY_NUMBER_WARN) where cache["level"] == "warn" - - set(severity_text, "ERROR") where cache["level"] == "error" - - set(severity_number, SEVERITY_NUMBER_ERROR) where cache["level"] == "error" - transform/postgresql_events: - error_mode: ignore - log_statements: - - context: log - statements: - - set(log.attributes["trace_id"], log.trace_id.string) where log.trace_id.string != "" - - set(log.attributes["span_id"], log.span_id.string) where log.span_id.string != "" - - set(log.attributes["event_name"], log.event_name) where log.event_name != "" + - set(log.body, log.attributes["message"]) where log.attributes["message"] != nil + - merge_maps(log.cache, ExtractPatterns(log.body, "level=(?P[^ ]+)"), "upsert") where IsString(log.body) + - merge_maps(log.cache, ExtractPatterns(log.body, "trace_id=(?P[0-9a-f]+)"), "upsert") where IsString(log.body) + - set(log.attributes["level"], log.cache["level"]) where log.cache["level"] != nil + - set(log.attributes["trace_id"], log.cache["trace_id"]) where log.cache["trace_id"] != nil + - set(log.severity_text, "TRACE") where log.cache["level"] == "trace" + - set(log.severity_number, SEVERITY_NUMBER_TRACE) where log.cache["level"] == "trace" + - set(log.severity_text, "DEBUG") where log.cache["level"] == "debug" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.cache["level"] == "debug" + - set(log.severity_text, "INFO") where log.cache["level"] == "info" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.cache["level"] == "info" + - set(log.severity_text, "WARN") where log.cache["level"] == "warn" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.cache["level"] == "warn" + - set(log.severity_text, "ERROR") where log.cache["level"] == "error" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.cache["level"] == "error" batch: timeout: 1s @@ -72,8 +54,6 @@ exporters: insecure: true otlp_http/loki: endpoint: http://loki:3100/otlp - debug/postgresql_logs: - verbosity: detailed prometheus: endpoint: 0.0.0.0:9464 @@ -91,7 +71,3 @@ service: receivers: [syslog/sqlpage] processors: [transform/sqlpage_logs, batch] exporters: [otlp_http/loki] - logs: - receivers: [postgresql] - processors: [transform/postgresql_events, batch] - exporters: [otlp_http/loki, debug/postgresql_logs] From d285c57015dd0a283c19d371d4c1b15d6b3ebb85 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Mon, 9 Mar 2026 14:05:15 +0100 Subject: [PATCH 30/39] make startup logs parseable --- src/webserver/http.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/webserver/http.rs b/src/webserver/http.rs index 0dae7e71..a95b7ea2 100644 --- a/src/webserver/http.rs +++ b/src/webserver/http.rs @@ -680,8 +680,8 @@ fn log_welcome_message(config: &AppConfig) { let version = env!("CARGO_PKG_VERSION"); let web_root = config.web_root.display(); - eprintln!( - "{sparkle} SQLPage v{version} started successfully! {sparkle}\n\n\ + log::info!( + "\n{sparkle} SQLPage v{version} started successfully! {sparkle}\n\n\ View your website at:\n{link} {address_message}\n\n\ Create your pages with SQL files in:\n{computer} {web_root}\n\n\ Happy coding! {rocket}" From 0deb65032422492771af15c610665f079e80de56 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Mon, 9 Mar 2026 14:29:05 +0100 Subject: [PATCH 31/39] update terminal log formats --- src/telemetry.rs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/telemetry.rs b/src/telemetry.rs index 19032be2..e401eaf9 100644 --- a/src/telemetry.rs +++ b/src/telemetry.rs @@ -214,8 +214,8 @@ mod logfmt { write_timestamp(&mut buf, colors); write_level(&mut buf, *event.metadata().level(), colors); - write_dimmed_field(&mut buf, "target", target, colors); write_message(&mut buf, msg, multiline_msg); + write_dimmed_field(&mut buf, "target", target, colors); write_span_fields(&mut buf, ctx.event_scope(event)); write_trace_id(&mut buf, ctx.event_scope(event), colors); @@ -250,7 +250,7 @@ mod logfmt { fn write_level(buf: &mut String, level: tracing::Level, colors: bool) { if colors { let (color, label) = level_style(level); - let _ = write!(buf, " {BOLD}{color}level={label}{RESET}"); + let _ = write!(buf, " {DIM}level={RESET}{BOLD}{color}{label}{RESET}"); } else { let _ = write!(buf, " level={}", level.as_str().to_ascii_lowercase()); } @@ -350,11 +350,7 @@ mod logfmt { /// Write a logfmt key=value pair, quoting the value if it contains spaces or special chars. fn write_logfmt_value(buf: &mut String, key: &str, value: &str) { - let needs_quoting = value.contains(' ') - || value.contains('"') - || value.contains('=') - || value.contains('\n') - || value.is_empty(); + let needs_quoting = value.contains([' ', '"', '=', '\n', '\t']) || value.is_empty(); if needs_quoting { let escaped = value.replace('\n', " ").replace('"', "\\\""); From 8f7aeea45dd553d9a556b7f93b93fafac4873dcb Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Mon, 9 Mar 2026 15:12:57 +0100 Subject: [PATCH 32/39] Ingest real PostgreSQL logs with trace IDs --- .../opentelemetry-grafana/docker-compose.yml | 21 ++++++++++--- .../grafana/datasources.yaml | 7 ++--- .../opentelemetry-grafana/otel-collector.yaml | 31 ++++++++++++++++++- 3 files changed, 50 insertions(+), 9 deletions(-) diff --git a/examples/opentelemetry-grafana/docker-compose.yml b/examples/opentelemetry-grafana/docker-compose.yml index 3efdcc47..1fa79cb4 100644 --- a/examples/opentelemetry-grafana/docker-compose.yml +++ b/examples/opentelemetry-grafana/docker-compose.yml @@ -29,7 +29,7 @@ services: logging: driver: syslog options: - syslog-address: "tcp://localhost:1514" + syslog-address: "udp://localhost:1514" syslog-format: "rfc5424micro" tag: "sqlpage" @@ -41,6 +41,12 @@ services: - shared_preload_libraries=pg_stat_statements - -c - pg_stat_statements.track_utility=off + - -c + - log_destination=stderr + - -c + - log_line_prefix=ts=%m pid=%p db=%d user=%u app=[%a] + - -c + - log_min_duration_statement=25 environment: POSTGRES_USER: sqlpage POSTGRES_PASSWORD: sqlpage @@ -52,15 +58,22 @@ services: interval: 2s timeout: 5s retries: 5 + logging: + driver: syslog + options: + syslog-address: "udp://localhost:1515" + syslog-format: "rfc5424micro" + tag: "postgres" otel-collector: image: otel/opentelemetry-collector-contrib:latest volumes: - ./otel-collector.yaml:/etc/otelcol-contrib/config.yaml:ro ports: - - "4317:4317" # OTLP gRPC - - "4318:4318" # OTLP HTTP - - "1514:1514" # Syslog TCP (SQLPage container logs) + - "4317:4317" + - "4318:4318" + - "1514:1514/udp" + - "1515:1515/udp" depends_on: - tempo - postgres diff --git a/examples/opentelemetry-grafana/grafana/datasources.yaml b/examples/opentelemetry-grafana/grafana/datasources.yaml index c44e9c48..6c7736aa 100644 --- a/examples/opentelemetry-grafana/grafana/datasources.yaml +++ b/examples/opentelemetry-grafana/grafana/datasources.yaml @@ -12,11 +12,10 @@ datasources: enabled: true tracesToLogsV2: datasourceUid: loki - filterByTraceID: true - filterBySpanID: true spanStartTimeShift: "-5m" spanEndTimeShift: "5m" - customQuery: false + customQuery: true + query: '{service_name="postgresql"} | trace_id="$${__span.traceId}" | span_id="$${__span.spanId}"' - name: Loki type: loki uid: loki @@ -25,7 +24,7 @@ datasources: jsonData: derivedFields: - name: trace_id - matcherRegex: 'trace_id=([0-9a-f]+)' + matcherRegex: '(?:trace_id=|00-)([0-9a-f]{32})' datasourceUid: tempo url: '$${__value.raw}' - name: Prometheus diff --git a/examples/opentelemetry-grafana/otel-collector.yaml b/examples/opentelemetry-grafana/otel-collector.yaml index f2d65e80..a5b7e645 100644 --- a/examples/opentelemetry-grafana/otel-collector.yaml +++ b/examples/opentelemetry-grafana/otel-collector.yaml @@ -7,8 +7,12 @@ receivers: endpoint: 0.0.0.0:4318 syslog/sqlpage: protocol: rfc5424 - tcp: + udp: listen_address: 0.0.0.0:1514 + syslog/postgresql: + protocol: rfc5424 + udp: + listen_address: 0.0.0.0:1515 postgresql: endpoint: postgres:5432 transport: tcp @@ -44,6 +48,27 @@ processors: - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.cache["level"] == "warn" - set(log.severity_text, "ERROR") where log.cache["level"] == "error" - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.cache["level"] == "error" + transform/postgresql_logs: + error_mode: ignore + log_statements: + - context: resource + statements: + - set(resource.attributes["service.name"], "postgresql") + - context: log + statements: + - set(log.body, log.attributes["message"]) where log.attributes["message"] != nil + - merge_maps(log.cache, ExtractPatterns(log.body, "app=\\[[^\\]]*00-(?P[0-9a-f]{32})-[0-9a-f]{16}-[0-9a-f]{2}\\]"), "upsert") where IsString(log.body) + - merge_maps(log.cache, ExtractPatterns(log.body, "(?PLOG|ERROR|WARNING|FATAL|PANIC|NOTICE|INFO|DEBUG):"), "upsert") where IsString(log.body) + - set(log.attributes["trace_id"], log.cache["trace_id"]) where log.cache["trace_id"] != nil + - set(log.attributes["level"], log.cache["level"]) where log.cache["level"] != nil + - set(log.severity_text, "DEBUG") where log.cache["level"] == "DEBUG" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.cache["level"] == "DEBUG" + - set(log.severity_text, "INFO") where log.cache["level"] == "LOG" or log.cache["level"] == "INFO" or log.cache["level"] == "NOTICE" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.cache["level"] == "LOG" or log.cache["level"] == "INFO" or log.cache["level"] == "NOTICE" + - set(log.severity_text, "WARN") where log.cache["level"] == "WARNING" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.cache["level"] == "WARNING" + - set(log.severity_text, "ERROR") where log.cache["level"] == "ERROR" or log.cache["level"] == "FATAL" or log.cache["level"] == "PANIC" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.cache["level"] == "ERROR" or log.cache["level"] == "FATAL" or log.cache["level"] == "PANIC" batch: timeout: 1s @@ -71,3 +96,7 @@ service: receivers: [syslog/sqlpage] processors: [transform/sqlpage_logs, batch] exporters: [otlp_http/loki] + logs/postgresql: + receivers: [syslog/postgresql] + processors: [transform/postgresql_logs, batch] + exporters: [otlp_http/loki] From 2ee9aa25d943186973bbacefcdcd2db426161690 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Mon, 9 Mar 2026 15:19:08 +0100 Subject: [PATCH 33/39] Use raw traceparent for PostgreSQL tracing --- src/webserver/database/execute_queries.rs | 11 ++++++++--- src/webserver/oidc.rs | 7 +++++-- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/webserver/database/execute_queries.rs b/src/webserver/database/execute_queries.rs index 41f4b0d3..8235f0b4 100644 --- a/src/webserver/database/execute_queries.rs +++ b/src/webserver/database/execute_queries.rs @@ -1,11 +1,11 @@ use anyhow::{anyhow, Context}; use futures_util::stream::Stream; use futures_util::StreamExt; -use tracing::Instrument; use serde_json::Value; use std::borrow::Cow; use std::path::Path; use std::pin::Pin; +use tracing::Instrument; use super::csv_import::run_csv_import; use super::error_highlighting::{display_stmt_db_error, display_stmt_error}; @@ -257,7 +257,11 @@ async fn execute_set_variable_query<'a>( db.response.returned_rows = tracing::field::Empty, ); record_query_params(&query_span, &query.param_values); - let value = match connection.fetch_optional(query).instrument(query_span.clone()).await { + let value = match connection + .fetch_optional(query) + .instrument(query_span.clone()) + .await + { Ok(Some(row)) => { query_span.record("db.response.returned_rows", 1_i64); row_to_string(&row) @@ -376,7 +380,8 @@ async fn set_trace_context(connection: &mut AnyConnection, db: &Database) { ); let sql = match db.info.kind { sqlx::any::AnyKind::Postgres => { - format!("SET application_name = 'sqlpage {traceparent}'") + // postgresqlreceiver expects application_name to be a raw W3C traceparent value. + format!("SET application_name = '{traceparent}'") } sqlx::any::AnyKind::MySql => { format!("SET @traceparent = '{traceparent}'") diff --git a/src/webserver/oidc.rs b/src/webserver/oidc.rs index 6dca9b79..7c61039c 100644 --- a/src/webserver/oidc.rs +++ b/src/webserver/oidc.rs @@ -17,7 +17,6 @@ use actix_web::{ Error, HttpMessage, HttpResponse, }; use anyhow::{anyhow, Context}; -use tracing::Instrument; use awc::Client; use openidconnect::core::{ CoreAuthDisplay, CoreAuthPrompt, CoreErrorResponseType, CoreGenderClaim, CoreJsonWebKey, @@ -36,6 +35,7 @@ use openidconnect::{ StandardTokenResponse, }; use serde::{Deserialize, Serialize}; +use tracing::Instrument; use super::error::anyhow_err_to_actix_resp; use super::http_client::make_http_client; @@ -486,7 +486,10 @@ async fn handle_oidc_callback( request: ServiceRequest, ) -> ServiceResponse { let span = tracing::info_span!("oidc.callback"); - match process_oidc_callback(oidc_state, &request).instrument(span).await { + match process_oidc_callback(oidc_state, &request) + .instrument(span) + .await + { Ok(mut response) => { clear_redirect_count_cookie(&mut response); request.into_response(response) From 80da041a1e697e85f9014eb3faf2c0f5c0e2ae38 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Mon, 9 Mar 2026 15:19:17 +0100 Subject: [PATCH 34/39] Update opentelemetry example for PostgreSQL query events --- .../grafana/sqlpage-home.json | 4 +-- .../opentelemetry-grafana/otel-collector.yaml | 36 ++++++++----------- .../opentelemetry-grafana/website/slow.sql | 7 ++++ 3 files changed, 23 insertions(+), 24 deletions(-) create mode 100644 examples/opentelemetry-grafana/website/slow.sql diff --git a/examples/opentelemetry-grafana/grafana/sqlpage-home.json b/examples/opentelemetry-grafana/grafana/sqlpage-home.json index 943fe02e..03ca9b42 100644 --- a/examples/opentelemetry-grafana/grafana/sqlpage-home.json +++ b/examples/opentelemetry-grafana/grafana/sqlpage-home.json @@ -88,9 +88,7 @@ "orientation": "auto", "percentChangeColorMode": "standard", "reduceOptions": { - "calcs": [ - "lastNotNull" - ], + "calcs": ["lastNotNull"], "fields": "", "values": false }, diff --git a/examples/opentelemetry-grafana/otel-collector.yaml b/examples/opentelemetry-grafana/otel-collector.yaml index a5b7e645..e3b668da 100644 --- a/examples/opentelemetry-grafana/otel-collector.yaml +++ b/examples/opentelemetry-grafana/otel-collector.yaml @@ -7,12 +7,8 @@ receivers: endpoint: 0.0.0.0:4318 syslog/sqlpage: protocol: rfc5424 - udp: + tcp: listen_address: 0.0.0.0:1514 - syslog/postgresql: - protocol: rfc5424 - udp: - listen_address: 0.0.0.0:1515 postgresql: endpoint: postgres:5432 transport: tcp @@ -23,6 +19,16 @@ receivers: databases: - sqlpage collection_interval: 1s + events: + db.server.query_sample: + enabled: true + db.server.top_query: + enabled: true + query_sample_collection: + max_rows_per_query: 1000 + top_query_collection: + collection_interval: 1s + max_rows_per_query: 1000 processors: transform/sqlpage_logs: @@ -48,7 +54,7 @@ processors: - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.cache["level"] == "warn" - set(log.severity_text, "ERROR") where log.cache["level"] == "error" - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.cache["level"] == "error" - transform/postgresql_logs: + transform/postgresql_events: error_mode: ignore log_statements: - context: resource @@ -56,19 +62,7 @@ processors: - set(resource.attributes["service.name"], "postgresql") - context: log statements: - - set(log.body, log.attributes["message"]) where log.attributes["message"] != nil - - merge_maps(log.cache, ExtractPatterns(log.body, "app=\\[[^\\]]*00-(?P[0-9a-f]{32})-[0-9a-f]{16}-[0-9a-f]{2}\\]"), "upsert") where IsString(log.body) - - merge_maps(log.cache, ExtractPatterns(log.body, "(?PLOG|ERROR|WARNING|FATAL|PANIC|NOTICE|INFO|DEBUG):"), "upsert") where IsString(log.body) - - set(log.attributes["trace_id"], log.cache["trace_id"]) where log.cache["trace_id"] != nil - - set(log.attributes["level"], log.cache["level"]) where log.cache["level"] != nil - - set(log.severity_text, "DEBUG") where log.cache["level"] == "DEBUG" - - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.cache["level"] == "DEBUG" - - set(log.severity_text, "INFO") where log.cache["level"] == "LOG" or log.cache["level"] == "INFO" or log.cache["level"] == "NOTICE" - - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.cache["level"] == "LOG" or log.cache["level"] == "INFO" or log.cache["level"] == "NOTICE" - - set(log.severity_text, "WARN") where log.cache["level"] == "WARNING" - - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.cache["level"] == "WARNING" - - set(log.severity_text, "ERROR") where log.cache["level"] == "ERROR" or log.cache["level"] == "FATAL" or log.cache["level"] == "PANIC" - - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.cache["level"] == "ERROR" or log.cache["level"] == "FATAL" or log.cache["level"] == "PANIC" + - set(log.body, log.attributes["db.query.text"]) where log.attributes["db.query.text"] != nil batch: timeout: 1s @@ -97,6 +91,6 @@ service: processors: [transform/sqlpage_logs, batch] exporters: [otlp_http/loki] logs/postgresql: - receivers: [syslog/postgresql] - processors: [transform/postgresql_logs, batch] + receivers: [postgresql] + processors: [transform/postgresql_events, batch] exporters: [otlp_http/loki] diff --git a/examples/opentelemetry-grafana/website/slow.sql b/examples/opentelemetry-grafana/website/slow.sql new file mode 100644 index 00000000..fead830c --- /dev/null +++ b/examples/opentelemetry-grafana/website/slow.sql @@ -0,0 +1,7 @@ +SELECT pg_sleep(15); + +SELECT 'list' AS component, + 'Slow Query Complete' AS title; + +SELECT 'The slow query finished successfully.' AS title, + 'This page exists to make PostgreSQL query-sample events easy to capture.' AS description; From 0a4920f5eb2394c56ca8214e44dcb61510e4394d Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Mon, 9 Mar 2026 15:24:33 +0100 Subject: [PATCH 35/39] Add nginx logs to opentelemetry example --- .../opentelemetry-grafana/docker-compose.yml | 7 ++ .../grafana/datasources.yaml | 2 +- .../opentelemetry-grafana/nginx/nginx.conf | 11 ++++ .../opentelemetry-grafana/otel-collector.yaml | 65 ++++++++++++++----- 4 files changed, 69 insertions(+), 16 deletions(-) diff --git a/examples/opentelemetry-grafana/docker-compose.yml b/examples/opentelemetry-grafana/docker-compose.yml index 1fa79cb4..5e32387b 100644 --- a/examples/opentelemetry-grafana/docker-compose.yml +++ b/examples/opentelemetry-grafana/docker-compose.yml @@ -8,6 +8,12 @@ services: depends_on: - sqlpage - otel-collector + logging: + driver: syslog + options: + syslog-address: "udp://localhost:1516" + syslog-format: "rfc5424micro" + tag: "nginx" sqlpage: build: @@ -74,6 +80,7 @@ services: - "4318:4318" - "1514:1514/udp" - "1515:1515/udp" + - "1516:1516/udp" depends_on: - tempo - postgres diff --git a/examples/opentelemetry-grafana/grafana/datasources.yaml b/examples/opentelemetry-grafana/grafana/datasources.yaml index 6c7736aa..6b7fcba7 100644 --- a/examples/opentelemetry-grafana/grafana/datasources.yaml +++ b/examples/opentelemetry-grafana/grafana/datasources.yaml @@ -15,7 +15,7 @@ datasources: spanStartTimeShift: "-5m" spanEndTimeShift: "5m" customQuery: true - query: '{service_name="postgresql"} | trace_id="$${__span.traceId}" | span_id="$${__span.spanId}"' + query: '{service_name=~"nginx|sqlpage|postgresql"} | trace_id="$${__span.traceId}"' - name: Loki type: loki uid: loki diff --git a/examples/opentelemetry-grafana/nginx/nginx.conf b/examples/opentelemetry-grafana/nginx/nginx.conf index 41fa87fe..61a7f978 100644 --- a/examples/opentelemetry-grafana/nginx/nginx.conf +++ b/examples/opentelemetry-grafana/nginx/nginx.conf @@ -9,6 +9,17 @@ http { otel_service_name nginx; + log_format otel_combined 'ts=$time_iso8601 level=info target=nginx.access ' + 'client_ip=$remote_addr method=$request_method path="$uri" ' + 'status=$status request_time=$request_time ' + 'upstream_addr="$upstream_addr" upstream_status="$upstream_status" ' + 'upstream_response_time="$upstream_response_time" ' + 'referer="$http_referer" user_agent="$http_user_agent" ' + 'trace_id=$otel_trace_id span_id=$otel_span_id parent_id=$otel_parent_id'; + + access_log /dev/stdout otel_combined; + error_log /dev/stderr info; + upstream sqlpage { server sqlpage:8080; } diff --git a/examples/opentelemetry-grafana/otel-collector.yaml b/examples/opentelemetry-grafana/otel-collector.yaml index e3b668da..9eab632a 100644 --- a/examples/opentelemetry-grafana/otel-collector.yaml +++ b/examples/opentelemetry-grafana/otel-collector.yaml @@ -7,8 +7,16 @@ receivers: endpoint: 0.0.0.0:4318 syslog/sqlpage: protocol: rfc5424 - tcp: + udp: listen_address: 0.0.0.0:1514 + syslog/postgresql: + protocol: rfc5424 + udp: + listen_address: 0.0.0.0:1515 + syslog/nginx: + protocol: rfc5424 + udp: + listen_address: 0.0.0.0:1516 postgresql: endpoint: postgres:5432 transport: tcp @@ -19,16 +27,6 @@ receivers: databases: - sqlpage collection_interval: 1s - events: - db.server.query_sample: - enabled: true - db.server.top_query: - enabled: true - query_sample_collection: - max_rows_per_query: 1000 - top_query_collection: - collection_interval: 1s - max_rows_per_query: 1000 processors: transform/sqlpage_logs: @@ -54,7 +52,7 @@ processors: - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.cache["level"] == "warn" - set(log.severity_text, "ERROR") where log.cache["level"] == "error" - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.cache["level"] == "error" - transform/postgresql_events: + transform/postgresql_logs: error_mode: ignore log_statements: - context: resource @@ -62,7 +60,40 @@ processors: - set(resource.attributes["service.name"], "postgresql") - context: log statements: - - set(log.body, log.attributes["db.query.text"]) where log.attributes["db.query.text"] != nil + - set(log.body, log.attributes["message"]) where log.attributes["message"] != nil + - merge_maps(log.cache, ExtractPatterns(log.body, "app=\\[[^\\]]*00-(?P[0-9a-f]{32})-[0-9a-f]{16}-[0-9a-f]{2}\\]"), "upsert") where IsString(log.body) + - merge_maps(log.cache, ExtractPatterns(log.body, "(?PLOG|ERROR|WARNING|FATAL|PANIC|NOTICE|INFO|DEBUG):"), "upsert") where IsString(log.body) + - set(log.attributes["trace_id"], log.cache["trace_id"]) where log.cache["trace_id"] != nil + - set(log.attributes["level"], log.cache["level"]) where log.cache["level"] != nil + - set(log.severity_text, "DEBUG") where log.cache["level"] == "DEBUG" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.cache["level"] == "DEBUG" + - set(log.severity_text, "INFO") where log.cache["level"] == "LOG" or log.cache["level"] == "INFO" or log.cache["level"] == "NOTICE" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.cache["level"] == "LOG" or log.cache["level"] == "INFO" or log.cache["level"] == "NOTICE" + - set(log.severity_text, "WARN") where log.cache["level"] == "WARNING" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.cache["level"] == "WARNING" + - set(log.severity_text, "ERROR") where log.cache["level"] == "ERROR" or log.cache["level"] == "FATAL" or log.cache["level"] == "PANIC" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.cache["level"] == "ERROR" or log.cache["level"] == "FATAL" or log.cache["level"] == "PANIC" + transform/nginx_logs: + error_mode: ignore + log_statements: + - context: resource + statements: + - set(resource.attributes["service.name"], "nginx") + - context: log + statements: + - set(log.body, log.attributes["message"]) where log.attributes["message"] != nil + - merge_maps(log.cache, ExtractPatterns(log.body, "level=(?P[^ ]+)"), "upsert") where IsString(log.body) + - merge_maps(log.cache, ExtractPatterns(log.body, "trace_id=(?P[0-9a-f]{32})"), "upsert") where IsString(log.body) + - merge_maps(log.cache, ExtractPatterns(log.body, "span_id=(?P[0-9a-f]{16})"), "upsert") where IsString(log.body) + - set(log.attributes["trace_id"], log.cache["trace_id"]) where log.cache["trace_id"] != nil + - set(log.attributes["span_id"], log.cache["span_id"]) where log.cache["span_id"] != nil + - set(log.attributes["level"], log.cache["level"]) where log.cache["level"] != nil + - set(log.severity_text, "INFO") where log.cache["level"] == "info" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.cache["level"] == "info" + - set(log.severity_text, "ERROR") where log.cache["level"] == "error" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.cache["level"] == "error" + - set(log.severity_text, "ERROR") where IsMatch(log.body, "\\[error\\]") + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where IsMatch(log.body, "\\[error\\]") batch: timeout: 1s @@ -91,6 +122,10 @@ service: processors: [transform/sqlpage_logs, batch] exporters: [otlp_http/loki] logs/postgresql: - receivers: [postgresql] - processors: [transform/postgresql_events, batch] + receivers: [syslog/postgresql] + processors: [transform/postgresql_logs, batch] + exporters: [otlp_http/loki] + logs/nginx: + receivers: [syslog/nginx] + processors: [transform/nginx_logs, batch] exporters: [otlp_http/loki] From a2622152ed0ec38c2bdc5c19ac28140ba49c3e92 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Mon, 9 Mar 2026 16:34:13 +0100 Subject: [PATCH 36/39] Parse nginx error log severity correctly --- .../opentelemetry-grafana/otel-collector.yaml | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/examples/opentelemetry-grafana/otel-collector.yaml b/examples/opentelemetry-grafana/otel-collector.yaml index 9eab632a..c96ff641 100644 --- a/examples/opentelemetry-grafana/otel-collector.yaml +++ b/examples/opentelemetry-grafana/otel-collector.yaml @@ -83,17 +83,21 @@ processors: statements: - set(log.body, log.attributes["message"]) where log.attributes["message"] != nil - merge_maps(log.cache, ExtractPatterns(log.body, "level=(?P[^ ]+)"), "upsert") where IsString(log.body) + - merge_maps(log.cache, ExtractPatterns(log.body, "\\[(?Pinfo|error|warn|notice|debug)\\]"), "upsert") where IsString(log.body) - merge_maps(log.cache, ExtractPatterns(log.body, "trace_id=(?P[0-9a-f]{32})"), "upsert") where IsString(log.body) - merge_maps(log.cache, ExtractPatterns(log.body, "span_id=(?P[0-9a-f]{16})"), "upsert") where IsString(log.body) - set(log.attributes["trace_id"], log.cache["trace_id"]) where log.cache["trace_id"] != nil - set(log.attributes["span_id"], log.cache["span_id"]) where log.cache["span_id"] != nil - set(log.attributes["level"], log.cache["level"]) where log.cache["level"] != nil - - set(log.severity_text, "INFO") where log.cache["level"] == "info" - - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.cache["level"] == "info" - - set(log.severity_text, "ERROR") where log.cache["level"] == "error" - - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.cache["level"] == "error" - - set(log.severity_text, "ERROR") where IsMatch(log.body, "\\[error\\]") - - set(log.severity_number, SEVERITY_NUMBER_ERROR) where IsMatch(log.body, "\\[error\\]") + - set(log.attributes["level"], log.cache["nginx_level"]) where log.cache["level"] == nil and log.cache["nginx_level"] != nil + - set(log.severity_text, "DEBUG") where log.attributes["level"] == "debug" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.attributes["level"] == "debug" + - set(log.severity_text, "INFO") where log.attributes["level"] == "info" or log.attributes["level"] == "notice" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.attributes["level"] == "info" or log.attributes["level"] == "notice" + - set(log.severity_text, "WARN") where log.attributes["level"] == "warn" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.attributes["level"] == "warn" + - set(log.severity_text, "ERROR") where log.attributes["level"] == "error" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.attributes["level"] == "error" batch: timeout: 1s From 57047153c3e5f520b04c6fe1eaf2f24f1ecdf45a Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Mon, 9 Mar 2026 16:46:48 +0100 Subject: [PATCH 37/39] Log all span fields when debug logging is enabled --- src/telemetry.rs | 103 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 98 insertions(+), 5 deletions(-) diff --git a/src/telemetry.rs b/src/telemetry.rs index e401eaf9..ef86d8dd 100644 --- a/src/telemetry.rs +++ b/src/telemetry.rs @@ -103,6 +103,7 @@ fn set_global_subscriber(subscriber: impl tracing::Subscriber + Send + Sync) { /// /// With terminal colors when stderr is a TTY. mod logfmt { + use std::collections::BTreeMap; use std::collections::HashMap; use std::fmt::Write; use std::io::{self, IsTerminal}; @@ -206,6 +207,8 @@ mod logfmt { fn on_event(&self, event: &tracing::Event<'_>, ctx: Context<'_, S>) { let mut buf = String::with_capacity(256); let colors = self.use_colors; + let level = *event.metadata().level(); + let include_all_span_fields = includes_all_span_fields(); let mut event_fields = HashMap::new(); event.record(&mut FieldCollector(&mut event_fields)); let target = event_target(event, &event_fields); @@ -213,10 +216,10 @@ mod logfmt { let multiline_msg = is_multiline_terminal_message(colors, msg); write_timestamp(&mut buf, colors); - write_level(&mut buf, *event.metadata().level(), colors); + write_level(&mut buf, level, colors); write_message(&mut buf, msg, multiline_msg); write_dimmed_field(&mut buf, "target", target, colors); - write_span_fields(&mut buf, ctx.event_scope(event)); + write_span_fields(&mut buf, ctx.event_scope(event), include_all_span_fields); write_trace_id(&mut buf, ctx.event_scope(event), colors); buf.push('\n'); @@ -285,25 +288,82 @@ mod logfmt { fn write_span_fields( buf: &mut String, scope: Option>, + include_all_span_fields: bool, ) where S: Subscriber + for<'a> LookupSpan<'a>, { - let mut seen = [false; SPAN_FIELDS.len()]; if let Some(scope) = scope { + let mut seen_mapped_fields = [false; SPAN_FIELDS.len()]; + let mut extra_fields = BTreeMap::new(); + for span in scope { let ext = span.extensions(); if let Some(fields) = ext.get::() { for (i, &(span_key, logfmt_key)) in SPAN_FIELDS.iter().enumerate() { - if seen[i] { + if seen_mapped_fields[i] { continue; } if let Some(val) = fields.0.get(span_key) { write_logfmt_value(buf, logfmt_key, val); - seen[i] = true; + seen_mapped_fields[i] = true; + } + } + if include_all_span_fields { + for (&key, val) in &fields.0 { + if SPAN_FIELDS.iter().any(|(span_key, _)| key == *span_key) { + continue; + } + extra_fields.entry(key).or_insert_with(|| val.clone()); } } } } + + if include_all_span_fields { + for (key, val) in extra_fields { + write_logfmt_value(buf, key, &val); + } + } + } + } + + fn includes_all_span_fields() -> bool { + tracing::level_filters::LevelFilter::current() >= tracing::level_filters::LevelFilter::DEBUG + } + + #[cfg(test)] + fn write_span_field_maps<'a>( + buf: &mut String, + span_fields: impl IntoIterator>, + include_all_span_fields: bool, + ) { + let mut seen_mapped_fields = [false; SPAN_FIELDS.len()]; + let mut extra_fields = BTreeMap::new(); + + for fields in span_fields { + for (i, &(span_key, logfmt_key)) in SPAN_FIELDS.iter().enumerate() { + if seen_mapped_fields[i] { + continue; + } + if let Some(val) = fields.get(span_key) { + write_logfmt_value(buf, logfmt_key, val); + seen_mapped_fields[i] = true; + } + } + if include_all_span_fields { + for (&key, val) in fields { + if SPAN_FIELDS.iter().any(|(span_key, _)| key == *span_key) { + continue; + } + extra_fields.entry(key).or_insert_with(|| val.clone()); + } + } + } + + if include_all_span_fields { + for (key, val) in extra_fields { + write_logfmt_value(buf, key, &val); + } } } @@ -359,4 +419,37 @@ mod logfmt { let _ = write!(buf, " {key}={value}"); } } + + #[cfg(test)] + mod tests { + use super::*; + + #[test] + fn debug_logs_include_unmapped_span_fields() { + let mut buf = String::new(); + let span_fields = HashMap::from([ + ("http.method", "GET".to_string()), + ("http.route", "/users/:id".to_string()), + ("otel.kind", "server".to_string()), + ]); + + write_span_field_maps(&mut buf, [&span_fields], true); + + assert_eq!(buf, " method=GET http.route=/users/:id otel.kind=server"); + } + + #[test] + fn info_logs_keep_only_mapped_span_fields_when_not_in_debug_mode() { + let mut buf = String::new(); + let span_fields = HashMap::from([ + ("http.method", "GET".to_string()), + ("http.route", "/users/:id".to_string()), + ("otel.kind", "server".to_string()), + ]); + + write_span_field_maps(&mut buf, [&span_fields], false); + + assert_eq!(buf, " method=GET"); + } + } } From ec43feb2275d9dd8c74bb1c24596b471953815e8 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Mon, 9 Mar 2026 16:59:27 +0100 Subject: [PATCH 38/39] Rename telemetry example directory --- .../sqlpage/migrations/73_blog_tracing.sql | 6 +-- .../opentelemetry-grafana/nginx/nginx.conf | 39 --------------- .../README.md | 2 +- .../docker-compose.yml | 0 .../grafana/dashboards.yaml | 0 .../grafana/datasources.yaml | 0 .../grafana/sqlpage-home.json | 0 examples/telemetry/nginx/nginx.conf | 50 +++++++++++++++++++ .../otel-collector.yaml | 0 .../postgres-init/001_pg_stat_statements.sql | 0 .../prometheus.yml | 0 .../sqlpage/migrations/001_init.sql | 0 .../sqlpage/sqlpage.json | 0 .../tempo.yaml | 0 .../website/add_todo.sql | 0 .../website/index.sql | 0 .../website/slow.sql | 0 17 files changed, 54 insertions(+), 43 deletions(-) delete mode 100644 examples/opentelemetry-grafana/nginx/nginx.conf rename examples/{opentelemetry-grafana => telemetry}/README.md (99%) rename examples/{opentelemetry-grafana => telemetry}/docker-compose.yml (100%) rename examples/{opentelemetry-grafana => telemetry}/grafana/dashboards.yaml (100%) rename examples/{opentelemetry-grafana => telemetry}/grafana/datasources.yaml (100%) rename examples/{opentelemetry-grafana => telemetry}/grafana/sqlpage-home.json (100%) create mode 100644 examples/telemetry/nginx/nginx.conf rename examples/{opentelemetry-grafana => telemetry}/otel-collector.yaml (100%) rename examples/{opentelemetry-grafana => telemetry}/postgres-init/001_pg_stat_statements.sql (100%) rename examples/{opentelemetry-grafana => telemetry}/prometheus.yml (100%) rename examples/{opentelemetry-grafana => telemetry}/sqlpage/migrations/001_init.sql (100%) rename examples/{opentelemetry-grafana => telemetry}/sqlpage/sqlpage.json (100%) rename examples/{opentelemetry-grafana => telemetry}/tempo.yaml (100%) rename examples/{opentelemetry-grafana => telemetry}/website/add_todo.sql (100%) rename examples/{opentelemetry-grafana => telemetry}/website/index.sql (100%) rename examples/{opentelemetry-grafana => telemetry}/website/slow.sql (100%) diff --git a/examples/official-site/sqlpage/migrations/73_blog_tracing.sql b/examples/official-site/sqlpage/migrations/73_blog_tracing.sql index 4f7e40d2..233c65ed 100644 --- a/examples/official-site/sqlpage/migrations/73_blog_tracing.sql +++ b/examples/official-site/sqlpage/migrations/73_blog_tracing.sql @@ -17,7 +17,7 @@ When a page is slow, a log line telling you that the request took 1.8 seconds is SQLPage now supports [OpenTelemetry](https://opentelemetry.io/), the standard way to emit distributed traces. Combined with Grafana, Tempo, Loki, and an OpenTelemetry collector, this gives you a detailed timeline of each request and lets you jump directly from logs to traces. -If you want a ready-to-run demo, see the [OpenTelemetry + Grafana example](https://github.com/sqlpage/SQLPage/tree/main/examples/opentelemetry-grafana), which this article is based on. +If you want a ready-to-run demo, see the [OpenTelemetry + Grafana example](https://github.com/sqlpage/SQLPage/tree/main/examples/telemetry), which this article is based on. ## What tracing gives you @@ -44,7 +44,7 @@ Tracing is especially helpful in SQLPage because one request often maps cleanly The simplest way to explore tracing is to run the example shipped with SQLPage: ```bash -cd examples/opentelemetry-grafana +cd examples/telemetry docker compose up --build ``` @@ -169,6 +169,6 @@ SQLPage already makes the application logic easy to inspect because it lives in By enabling OpenTelemetry and connecting SQLPage to Grafana, you can see not just that a request was slow, but why it was slow, where the time was spent, and which query or resource caused the delay. -For a complete working setup, start with the [OpenTelemetry + Grafana example](https://github.com/sqlpage/SQLPage/tree/main/examples/opentelemetry-grafana) and adapt it to your own deployment. +For a complete working setup, start with the [OpenTelemetry + Grafana example](https://github.com/sqlpage/SQLPage/tree/main/examples/telemetry) and adapt it to your own deployment. ' ); diff --git a/examples/opentelemetry-grafana/nginx/nginx.conf b/examples/opentelemetry-grafana/nginx/nginx.conf deleted file mode 100644 index 61a7f978..00000000 --- a/examples/opentelemetry-grafana/nginx/nginx.conf +++ /dev/null @@ -1,39 +0,0 @@ -load_module modules/ngx_otel_module.so; - -events {} - -http { - otel_exporter { - endpoint otel-collector:4317; - } - - otel_service_name nginx; - - log_format otel_combined 'ts=$time_iso8601 level=info target=nginx.access ' - 'client_ip=$remote_addr method=$request_method path="$uri" ' - 'status=$status request_time=$request_time ' - 'upstream_addr="$upstream_addr" upstream_status="$upstream_status" ' - 'upstream_response_time="$upstream_response_time" ' - 'referer="$http_referer" user_agent="$http_user_agent" ' - 'trace_id=$otel_trace_id span_id=$otel_span_id parent_id=$otel_parent_id'; - - access_log /dev/stdout otel_combined; - error_log /dev/stderr info; - - upstream sqlpage { - server sqlpage:8080; - } - - server { - listen 80; - - location / { - otel_trace on; - otel_span_name "$request_method $uri"; - otel_trace_context propagate; - proxy_pass http://sqlpage; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - } - } -} diff --git a/examples/opentelemetry-grafana/README.md b/examples/telemetry/README.md similarity index 99% rename from examples/opentelemetry-grafana/README.md rename to examples/telemetry/README.md index ecf03487..a25831c6 100644 --- a/examples/opentelemetry-grafana/README.md +++ b/examples/telemetry/README.md @@ -29,7 +29,7 @@ OpenTelemetry experience is needed. ### Run ```bash -cd examples/opentelemetry-grafana +cd examples/telemetry docker compose up --build ``` diff --git a/examples/opentelemetry-grafana/docker-compose.yml b/examples/telemetry/docker-compose.yml similarity index 100% rename from examples/opentelemetry-grafana/docker-compose.yml rename to examples/telemetry/docker-compose.yml diff --git a/examples/opentelemetry-grafana/grafana/dashboards.yaml b/examples/telemetry/grafana/dashboards.yaml similarity index 100% rename from examples/opentelemetry-grafana/grafana/dashboards.yaml rename to examples/telemetry/grafana/dashboards.yaml diff --git a/examples/opentelemetry-grafana/grafana/datasources.yaml b/examples/telemetry/grafana/datasources.yaml similarity index 100% rename from examples/opentelemetry-grafana/grafana/datasources.yaml rename to examples/telemetry/grafana/datasources.yaml diff --git a/examples/opentelemetry-grafana/grafana/sqlpage-home.json b/examples/telemetry/grafana/sqlpage-home.json similarity index 100% rename from examples/opentelemetry-grafana/grafana/sqlpage-home.json rename to examples/telemetry/grafana/sqlpage-home.json diff --git a/examples/telemetry/nginx/nginx.conf b/examples/telemetry/nginx/nginx.conf new file mode 100644 index 00000000..bbedcb09 --- /dev/null +++ b/examples/telemetry/nginx/nginx.conf @@ -0,0 +1,50 @@ +load_module modules/ngx_otel_module.so; + +events {} + +http { + otel_exporter { + endpoint otel-collector:4317; + } + + otel_service_name nginx; + + map $status $log_level { + ~^[45] error; + default info; + } + + map $status $log_target { + ~^[45] nginx.request_error; + default nginx.access; + } + + log_format otel_request 'ts=$time_iso8601 level=$log_level target=$log_target ' + 'client_ip=$remote_addr method=$request_method path="$uri" ' + 'status=$status request_time=$request_time ' + 'connection=$connection ' + 'upstream_addr="$upstream_addr" upstream_status="$upstream_status" ' + 'upstream_response_time="$upstream_response_time" ' + 'referer="$http_referer" user_agent="$http_user_agent" ' + 'trace_id=$otel_trace_id span_id=$otel_span_id parent_id=$otel_parent_id'; + + access_log /dev/stdout otel_request; + error_log /dev/stderr warn; + + upstream sqlpage { + server sqlpage:8080; + } + + server { + listen 80; + + location / { + otel_trace on; + otel_span_name "$request_method $uri"; + otel_trace_context propagate; + proxy_pass http://sqlpage; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + } + } +} diff --git a/examples/opentelemetry-grafana/otel-collector.yaml b/examples/telemetry/otel-collector.yaml similarity index 100% rename from examples/opentelemetry-grafana/otel-collector.yaml rename to examples/telemetry/otel-collector.yaml diff --git a/examples/opentelemetry-grafana/postgres-init/001_pg_stat_statements.sql b/examples/telemetry/postgres-init/001_pg_stat_statements.sql similarity index 100% rename from examples/opentelemetry-grafana/postgres-init/001_pg_stat_statements.sql rename to examples/telemetry/postgres-init/001_pg_stat_statements.sql diff --git a/examples/opentelemetry-grafana/prometheus.yml b/examples/telemetry/prometheus.yml similarity index 100% rename from examples/opentelemetry-grafana/prometheus.yml rename to examples/telemetry/prometheus.yml diff --git a/examples/opentelemetry-grafana/sqlpage/migrations/001_init.sql b/examples/telemetry/sqlpage/migrations/001_init.sql similarity index 100% rename from examples/opentelemetry-grafana/sqlpage/migrations/001_init.sql rename to examples/telemetry/sqlpage/migrations/001_init.sql diff --git a/examples/opentelemetry-grafana/sqlpage/sqlpage.json b/examples/telemetry/sqlpage/sqlpage.json similarity index 100% rename from examples/opentelemetry-grafana/sqlpage/sqlpage.json rename to examples/telemetry/sqlpage/sqlpage.json diff --git a/examples/opentelemetry-grafana/tempo.yaml b/examples/telemetry/tempo.yaml similarity index 100% rename from examples/opentelemetry-grafana/tempo.yaml rename to examples/telemetry/tempo.yaml diff --git a/examples/opentelemetry-grafana/website/add_todo.sql b/examples/telemetry/website/add_todo.sql similarity index 100% rename from examples/opentelemetry-grafana/website/add_todo.sql rename to examples/telemetry/website/add_todo.sql diff --git a/examples/opentelemetry-grafana/website/index.sql b/examples/telemetry/website/index.sql similarity index 100% rename from examples/opentelemetry-grafana/website/index.sql rename to examples/telemetry/website/index.sql diff --git a/examples/opentelemetry-grafana/website/slow.sql b/examples/telemetry/website/slow.sql similarity index 100% rename from examples/opentelemetry-grafana/website/slow.sql rename to examples/telemetry/website/slow.sql From 322f675284487e506ead5e8bf620593949fb9b3c Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Tue, 10 Mar 2026 11:47:30 +0100 Subject: [PATCH 39/39] Fix PostgreSQL Loki log ingestion --- examples/telemetry/README.md | 9 +++- examples/telemetry/docker-compose.yml | 54 ++++++++++++++++---- examples/telemetry/grafana/sqlpage-home.json | 46 +++++++++++++++-- examples/telemetry/otel-collector.yaml | 36 +++++++------ 4 files changed, 115 insertions(+), 30 deletions(-) diff --git a/examples/telemetry/README.md b/examples/telemetry/README.md index a25831c6..792e12eb 100644 --- a/examples/telemetry/README.md +++ b/examples/telemetry/README.md @@ -93,7 +93,7 @@ logging driver and forwards them to Loki. The homepage dashboard filters to the `sqlpage` service so you can see request logs update live while you use the sample app. -### PostgreSQL correlation +### PostgreSQL correlation and explain plans SQLPage automatically sets the [`application_name`](https://www.postgresql.org/docs/current/runtime-config-logging.html#GUC-APPLICATION-NAME) @@ -109,6 +109,13 @@ This means you can: - Include trace IDs in PostgreSQL logs by adding `%a` to [`log_line_prefix`](https://www.postgresql.org/docs/current/runtime-config-logging.html#GUC-LOG-LINE-PREFIX). +This example also enables PostgreSQL's +[`auto_explain`](https://www.postgresql.org/docs/current/auto-explain.html) +extension for queries slower than 25 ms. The plans are logged in JSON and keep +the SQLPage trace context in the `app=[...]` prefix, so Grafana's Loki +`trace_id` derived field links each slow-query plan back to the originating +SQLPage trace. + ### Testing pool pressure To simulate database connection pool exhaustion (a common production issue), diff --git a/examples/telemetry/docker-compose.yml b/examples/telemetry/docker-compose.yml index 5e32387b..e5883deb 100644 --- a/examples/telemetry/docker-compose.yml +++ b/examples/telemetry/docker-compose.yml @@ -44,42 +44,73 @@ services: command: - postgres - -c - - shared_preload_libraries=pg_stat_statements + - shared_preload_libraries=pg_stat_statements,auto_explain - -c - pg_stat_statements.track_utility=off - -c - - log_destination=stderr + - logging_collector=on + - -c + - log_destination=jsonlog + - -c + - log_directory=/var/log/postgresql + - -c + - log_filename=postgresql + - -c + - log_file_mode=0644 + - -c + - log_rotation_age=0 + - -c + - log_rotation_size=0 - -c - log_line_prefix=ts=%m pid=%p db=%d user=%u app=[%a] - -c + - log_min_messages=info + - -c + - log_error_verbosity=verbose + - -c + - log_connections=on + - -c + - log_disconnections=on + - -c + - log_duration=on + - -c + - log_statement=all + - -c - log_min_duration_statement=25 + - -c + - auto_explain.log_min_duration=25ms + - -c + - auto_explain.log_analyze=on + - -c + - auto_explain.log_buffers=on + - -c + - auto_explain.log_timing=on + - -c + - auto_explain.log_verbose=on + - -c + - auto_explain.log_format=json environment: POSTGRES_USER: sqlpage POSTGRES_PASSWORD: sqlpage POSTGRES_DB: sqlpage volumes: - ./postgres-init:/docker-entrypoint-initdb.d:ro + - postgres-logs:/var/log/postgresql healthcheck: test: ["CMD-SHELL", "pg_isready -U sqlpage"] interval: 2s timeout: 5s retries: 5 - logging: - driver: syslog - options: - syslog-address: "udp://localhost:1515" - syslog-format: "rfc5424micro" - tag: "postgres" - otel-collector: image: otel/opentelemetry-collector-contrib:latest + user: "0:0" volumes: - ./otel-collector.yaml:/etc/otelcol-contrib/config.yaml:ro + - postgres-logs:/var/log/postgresql:ro ports: - "4317:4317" - "4318:4318" - "1514:1514/udp" - - "1515:1515/udp" - "1516:1516/udp" depends_on: - tempo @@ -121,3 +152,6 @@ services: - tempo - loki - prometheus + +volumes: + postgres-logs: diff --git a/examples/telemetry/grafana/sqlpage-home.json b/examples/telemetry/grafana/sqlpage-home.json index 03ca9b42..6c19ad36 100644 --- a/examples/telemetry/grafana/sqlpage-home.json +++ b/examples/telemetry/grafana/sqlpage-home.json @@ -39,7 +39,7 @@ "showLineNumbers": false, "showMiniMap": false }, - "content": "

Recent SQLPage traces, logs, and PostgreSQL metrics

Open http://localhost and interact with the app. New requests will appear here automatically.

The trace table shows recent requests. Click any trace ID to open the full span waterfall in Grafana. The metrics panels come from the OpenTelemetry PostgreSQL receiver via Prometheus.

", + "content": "

Recent SQLPage traces, logs, and PostgreSQL metrics

Open http://localhost and interact with the app. New requests will appear here automatically.

The trace table shows recent requests. Click any trace ID to open the full span waterfall in Grafana. PostgreSQL slow-query explain plans appear in the PostgreSQL Logs panel and link back to the same trace via the extracted trace ID. The metrics panels come from the OpenTelemetry PostgreSQL receiver via Prometheus.

", "mode": "html" }, "pluginVersion": "12.4.0", @@ -351,7 +351,7 @@ }, "gridPos": { "h": 10, - "w": 24, + "w": 12, "x": 0, "y": 20 }, @@ -383,6 +383,46 @@ "timeFrom": "1h", "title": "SQLPage Logs", "type": "logs" + }, + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 20 + }, + "id": 6, + "options": { + "dedupStrategy": "none", + "enableInfiniteScrolling": false, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": true, + "showTime": true, + "sortOrder": "Descending", + "wrapLogMessage": true + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "direction": "backward", + "editorMode": "builder", + "expr": "{service_name=\"postgresql\"}", + "queryType": "range", + "refId": "A" + } + ], + "timeFrom": "1h", + "title": "PostgreSQL Logs", + "type": "logs" } ], "refresh": "5s", @@ -400,5 +440,5 @@ "timezone": "browser", "title": "SQLPage Observability Home", "uid": "sqlpage-tracing-home", - "version": 4 + "version": 5 } diff --git a/examples/telemetry/otel-collector.yaml b/examples/telemetry/otel-collector.yaml index c96ff641..032d2919 100644 --- a/examples/telemetry/otel-collector.yaml +++ b/examples/telemetry/otel-collector.yaml @@ -9,14 +9,19 @@ receivers: protocol: rfc5424 udp: listen_address: 0.0.0.0:1514 - syslog/postgresql: - protocol: rfc5424 - udp: - listen_address: 0.0.0.0:1515 syslog/nginx: protocol: rfc5424 udp: listen_address: 0.0.0.0:1516 + filelog/postgresql: + include: + - /var/log/postgresql/postgresql.json + start_at: end + include_file_path: true + operators: + - type: json_parser + parse_from: body + parse_to: attributes postgresql: endpoint: postgres:5432 transport: tcp @@ -61,18 +66,17 @@ processors: - context: log statements: - set(log.body, log.attributes["message"]) where log.attributes["message"] != nil - - merge_maps(log.cache, ExtractPatterns(log.body, "app=\\[[^\\]]*00-(?P[0-9a-f]{32})-[0-9a-f]{16}-[0-9a-f]{2}\\]"), "upsert") where IsString(log.body) - - merge_maps(log.cache, ExtractPatterns(log.body, "(?PLOG|ERROR|WARNING|FATAL|PANIC|NOTICE|INFO|DEBUG):"), "upsert") where IsString(log.body) + - merge_maps(log.cache, ExtractPatterns(log.attributes["application_name"], "00-(?P[0-9a-f]{32})-[0-9a-f]{16}-[0-9a-f]{2}"), "upsert") where IsString(log.attributes["application_name"]) + - set(log.attributes["level"], log.attributes["error_severity"]) where log.attributes["error_severity"] != nil - set(log.attributes["trace_id"], log.cache["trace_id"]) where log.cache["trace_id"] != nil - - set(log.attributes["level"], log.cache["level"]) where log.cache["level"] != nil - - set(log.severity_text, "DEBUG") where log.cache["level"] == "DEBUG" - - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.cache["level"] == "DEBUG" - - set(log.severity_text, "INFO") where log.cache["level"] == "LOG" or log.cache["level"] == "INFO" or log.cache["level"] == "NOTICE" - - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.cache["level"] == "LOG" or log.cache["level"] == "INFO" or log.cache["level"] == "NOTICE" - - set(log.severity_text, "WARN") where log.cache["level"] == "WARNING" - - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.cache["level"] == "WARNING" - - set(log.severity_text, "ERROR") where log.cache["level"] == "ERROR" or log.cache["level"] == "FATAL" or log.cache["level"] == "PANIC" - - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.cache["level"] == "ERROR" or log.cache["level"] == "FATAL" or log.cache["level"] == "PANIC" + - set(log.severity_text, "DEBUG") where log.attributes["level"] == "DEBUG1" or log.attributes["level"] == "DEBUG2" or log.attributes["level"] == "DEBUG3" or log.attributes["level"] == "DEBUG4" or log.attributes["level"] == "DEBUG5" or log.attributes["level"] == "DEBUG" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == "DEBUG" + - set(log.severity_text, "INFO") where log.attributes["level"] == "LOG" or log.attributes["level"] == "INFO" or log.attributes["level"] == "NOTICE" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == "INFO" + - set(log.severity_text, "WARN") where log.attributes["level"] == "WARNING" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == "WARN" + - set(log.severity_text, "ERROR") where log.attributes["level"] == "ERROR" or log.attributes["level"] == "FATAL" or log.attributes["level"] == "PANIC" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == "ERROR" transform/nginx_logs: error_mode: ignore log_statements: @@ -126,7 +130,7 @@ service: processors: [transform/sqlpage_logs, batch] exporters: [otlp_http/loki] logs/postgresql: - receivers: [syslog/postgresql] + receivers: [filelog/postgresql] processors: [transform/postgresql_logs, batch] exporters: [otlp_http/loki] logs/nginx: