diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..bbc0032 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,13 @@ +target/ +.git/ +.github/ +.codex/ + +.env +healthcheck/ +dev/lgtm/runtime-logs/ + +.DS_Store +*.iml +.idea/ +.vscode/ diff --git a/.env.example b/.env.example index 40a1135..a8539c5 100644 --- a/.env.example +++ b/.env.example @@ -1,11 +1,11 @@ # WHEN DEVELOPING LOCALLY, WE NEED TO ACCESS THE HOST NETWORK FROM K8S (FOR POSTGRES/KAFKA/ELASTIC/ETC) -LOCAL_HOST_IP=$(ifconfig en0 | grep inet | grep -v inet6 | awk '{print $2}') +LOCAL_HOST_IP=127.0.0.1 # RUST version RUST_VERSION=stable # KAFKA -KAFKA_HOST="localhost\,$LOCAL_HOST_IP" +KAFKA_HOST="localhost,$LOCAL_HOST_IP" KAFKA_PORT="9094" KAFKA_CLIENT_ID="chronos" KAFKA_GROUP_ID="chronos" diff --git a/.github/config.json b/.github/config.json new file mode 100644 index 0000000..93f5fa1 --- /dev/null +++ b/.github/config.json @@ -0,0 +1,40 @@ +{ + "repository": { + "allow_auto_merge": true, + "allow_merge_commit": false, + "allow_rebase_merge": true, + "allow_squash_merge": true, + "delete_branch_on_merge": true, + "squash_merge_commit_message": "PR_BODY", + "squash_merge_commit_title": "PR_TITLE", + "web_commit_signoff_required": false + }, + "actions": { + "default_workflow_permissions": "read", + "can_approve_pull_request_reviews": false + }, + "branches": { + "master": { + "protection": { + "required_status_checks": { + "strict": true, + "contexts": [ + "CI" + ] + }, + "enforce_admins": true, + "required_pull_request_reviews": { + "dismiss_stale_reviews": true, + "require_code_owner_reviews": false, + "required_approving_review_count": 1 + }, + "restrictions": null, + "required_linear_history": true, + "allow_force_pushes": false, + "allow_deletions": false, + "block_creations": false, + "required_conversation_resolution": true + } + } + } +} diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml new file mode 100644 index 0000000..f8b5168 --- /dev/null +++ b/.github/workflows/CI.yaml @@ -0,0 +1,33 @@ +name: CI + +on: + push: + branches-ignore: + - main + +permissions: + contents: read + security-events: write + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + pre-commit: + uses: ./.github/workflows/pre-commit.yml + + test: + uses: ./.github/workflows/test.yml + + weaver-live-check: + uses: ./.github/workflows/weaver-live-check.yml + + scan: + uses: ./.github/workflows/scan.yml + + build-binary: + uses: ./.github/workflows/build-binary.yml + + build-container: + uses: ./.github/workflows/build-container.yml diff --git a/.github/workflows/build-binary.yml b/.github/workflows/build-binary.yml new file mode 100644 index 0000000..819e341 --- /dev/null +++ b/.github/workflows/build-binary.yml @@ -0,0 +1,37 @@ +name: build-binary + +on: + workflow_call: + workflow_dispatch: + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-build-binary-${{ github.ref }} + cancel-in-progress: true + +jobs: + build-binary: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Build binary image stage + run: docker build --target builder -f docker/Dockerfile.chronos-slim -t chronos-binary-builder:${{ github.sha }} . + + - name: Extract binary + run: | + mkdir -p dist + container_id="$(docker create chronos-binary-builder:${{ github.sha }})" + trap 'docker rm -f "${container_id}" >/dev/null 2>&1 || true' EXIT + docker cp "${container_id}:/build/target/release/chronos" dist/chronos-linux-x86_64-alpine + chmod 0755 dist/chronos-linux-x86_64-alpine + + - name: Upload binary artifact + uses: actions/upload-artifact@v4 + with: + name: chronos-linux-x86_64-alpine + path: dist/chronos-linux-x86_64-alpine + if-no-files-found: error diff --git a/.github/workflows/build-container.yml b/.github/workflows/build-container.yml new file mode 100644 index 0000000..186418b --- /dev/null +++ b/.github/workflows/build-container.yml @@ -0,0 +1,25 @@ +name: build-container + +on: + workflow_call: + workflow_dispatch: + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-build-container-${{ github.ref }} + cancel-in-progress: true + +jobs: + build-container: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Build regular container + run: docker build -f docker/Dockerfile.chronos -t chronos:${{ github.sha }} . + + - name: Build scratch container + run: docker build -f docker/Dockerfile.chronos-slim -t chronos-scratch:${{ github.sha }} . diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml deleted file mode 100644 index 152c4e0..0000000 --- a/.github/workflows/build.yml +++ /dev/null @@ -1,10 +0,0 @@ -name: release app binary on tag - -# push to branch -on: - push: - -jobs: - build: - uses: ./.github/workflows/rust_build.yml - \ No newline at end of file diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml new file mode 100644 index 0000000..88f71ed --- /dev/null +++ b/.github/workflows/pre-commit.yml @@ -0,0 +1,31 @@ +name: pre-commit + +on: + workflow_call: + workflow_dispatch: + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-pre-commit-${{ github.ref }} + cancel-in-progress: true + +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@master + with: + toolchain: 1.94 + components: rustfmt,clippy + + - name: Install system dependencies + run: scripts/ubuntu-setup.sh + + - name: Run pre-commit checks + run: make pre-commit diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index c26d2a7..b04289a 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -25,9 +25,9 @@ jobs: if: needs.build.result == 'success' steps: - name: Checkout the repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Login to GitHub Container Registry - uses: docker/login-action@v1 + uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} @@ -35,14 +35,14 @@ jobs: - name: Build and publish chronos for chronos image with ver run: | - docker build -f Dockerfile.chronos . --tag ghcr.io/$GITHUB_REPOSITORY:$GITHUB_REF_NAME --tag ghcr.io/$GITHUB_REPOSITORY:latest - docker push ghcr.io/$GITHUB_REPOSITORY:$GITHUB_REF_NAME + docker build -f Dockerfile.chronos . --tag "ghcr.io/${GITHUB_REPOSITORY}:${GITHUB_REF_NAME}" --tag "ghcr.io/${GITHUB_REPOSITORY}:latest" + docker push "ghcr.io/${GITHUB_REPOSITORY}:${GITHUB_REF_NAME}" - name: publish chronos latest for chronos image replace latest run: | - docker push ghcr.io/$GITHUB_REPOSITORY:latest + docker push "ghcr.io/${GITHUB_REPOSITORY}:latest" - name: Build and publish the chronos-pg-migration Docker image run: | - docker build -f Dockerfile.chronos-pg-migrations . --tag ghcr.io/$GITHUB_REPOSITORY/db-migration:$GITHUB_REF_NAME - docker push ghcr.io/$GITHUB_REPOSITORY/db-migration:$GITHUB_REF_NAME \ No newline at end of file + docker build -f Dockerfile.chronos-pg-migrations . --tag "ghcr.io/${GITHUB_REPOSITORY}/db-migration:${GITHUB_REF_NAME}" + docker push "ghcr.io/${GITHUB_REPOSITORY}/db-migration:${GITHUB_REF_NAME}" diff --git a/.github/workflows/rust_build.yml b/.github/workflows/rust_build.yml index 0a74de4..6f3a8e7 100644 --- a/.github/workflows/rust_build.yml +++ b/.github/workflows/rust_build.yml @@ -8,7 +8,7 @@ jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@stable with: toolchain: stable @@ -18,4 +18,4 @@ jobs: run: | cargo clean cargo build --release - - run: scripts/pre-commit-checks.sh \ No newline at end of file + - run: make pre-commit diff --git a/.github/workflows/sbom.yml b/.github/workflows/sbom.yml new file mode 100644 index 0000000..e11b530 --- /dev/null +++ b/.github/workflows/sbom.yml @@ -0,0 +1,72 @@ +name: sbom + +on: + workflow_call: + inputs: + target-type: + description: Use "container" for an image SBOM or "release" for a filesystem/release artifact SBOM. + required: true + type: string + target-ref: + description: Container image reference or release artifact path to scan. + required: true + type: string + workflow_dispatch: + inputs: + target-type: + description: Use "container" for an image SBOM or "release" for a filesystem/release artifact SBOM. + required: true + type: choice + options: + - container + - release + target-ref: + description: Container image reference or release artifact path to scan. + required: true + type: string + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-sbom-${{ github.ref }} + cancel-in-progress: true + +jobs: + sbom: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Validate inputs + run: | + case "${{ inputs.target-type }}" in + container|release) ;; + *) echo "target-type must be container or release" >&2; exit 2 ;; + esac + + - name: Generate container SBOM + if: inputs.target-type == 'container' + uses: aquasecurity/trivy-action@v0.32.0 + with: + scan-type: image + scan-ref: ${{ inputs.target-ref }} + format: cyclonedx + output: chronos-sbom.cdx.json + + - name: Generate release SBOM + if: inputs.target-type == 'release' + uses: aquasecurity/trivy-action@v0.32.0 + with: + scan-type: fs + scan-ref: ${{ inputs.target-ref }} + format: cyclonedx + output: chronos-sbom.cdx.json + + - name: Upload SBOM artifact + uses: actions/upload-artifact@v4 + with: + name: chronos-sbom-${{ inputs.target-type }} + path: chronos-sbom.cdx.json + if-no-files-found: error diff --git a/.github/workflows/scan.yml b/.github/workflows/scan.yml new file mode 100644 index 0000000..9077382 --- /dev/null +++ b/.github/workflows/scan.yml @@ -0,0 +1,43 @@ +name: scan + +on: + workflow_call: + workflow_dispatch: + +permissions: + contents: read + security-events: write + +concurrency: + group: ${{ github.workflow }}-scan-${{ github.ref }} + cancel-in-progress: true + +jobs: + scan: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@master + with: + toolchain: 1.94 + + - name: Install system dependencies + run: scripts/ubuntu-setup.sh + + - name: Build release binary + run: cargo build --release -p chronos_bin + + - name: Scan Rust build output + run: | + docker run --rm \ + -v "$PWD:/work:ro" \ + aquasec/trivy:0.64.1 \ + fs \ + --scanners vuln \ + --severity CRITICAL,HIGH \ + --ignore-unfixed \ + --exit-code 1 \ + /work/target/release/chronos diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..3a5ded1 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,30 @@ +name: test + +on: + workflow_call: + workflow_dispatch: + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-test-${{ github.ref }} + cancel-in-progress: true + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@master + with: + toolchain: 1.94 + + - name: Install system dependencies + run: scripts/ubuntu-setup.sh + + - name: Run unit tests + run: cargo test diff --git a/.github/workflows/weaver-live-check.yml b/.github/workflows/weaver-live-check.yml new file mode 100644 index 0000000..f04a864 --- /dev/null +++ b/.github/workflows/weaver-live-check.yml @@ -0,0 +1,30 @@ +name: weaver-live-check + +on: + workflow_call: + workflow_dispatch: + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-weaver-live-check-${{ github.ref }} + cancel-in-progress: true + +jobs: + weaver-live-check: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@master + with: + toolchain: 1.94 + + - name: Install system dependencies + run: scripts/ubuntu-setup.sh + + - name: Run Weaver live check + run: make weaver.live-check diff --git a/.gitignore b/.gitignore index fb16bbd..ee5d169 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ /target .env /healthcheck +dev/lgtm/runtime-logs/*.jsonl ### Linux ### diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..727d31d --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,122 @@ +# Agent Instructions for Chronos + +## Project Context + +Chronos is an open source Rust project. It listens for messages on a Kafka input topic, stores delayed messages in PostgreSQL, and publishes those messages to a Kafka output topic at a later time. More project background, design notes, and runtime details are in [README.md](README.md). + +This is an existing project. Agents must preserve the project's current structure, style, testing practices, and conventions unless the user explicitly asks for a larger change. + +The active working branch for current work is `feat/prom_metrics`. + +## Working Principles + +- Optimize for a clear action trail. Future agents may start with no conversation history, so decisions must be recoverable from files, commits, and command output summaries. +- Document material changes when making them. At minimum, the commit message must explain the intent, relevant implementation notes, and verification performed. +- Keep edits scoped to the requested change. Do not reformat unrelated files or rewrite working code for style only. +- Do not discard or revert user changes. If the worktree has unrelated modifications, leave them alone. +- Prefer existing module boundaries and patterns over new abstractions. +- Update README, How-to notes, examples, or this file when behavior, setup, tests, or agent workflow expectations change. + +## Rust Conventions + +- This is a Cargo workspace with these members: + - `chronos_bin`: main Chronos binary and library code. + - `pg_mig`: PostgreSQL migration binary. + - `examples/*`: example clients and utilities. +- The Rust toolchain is pinned in [rust-toolchain.toml](rust-toolchain.toml). Use that version unless the user asks to change it. +- Formatting is controlled by [rustfmt.toml](rustfmt.toml): 4-space tabs, `max_width = 160`, Unix newlines. +- Keep tests close to the code under `#[cfg(test)] mod tests` when following the existing unit-test style. +- Prefer typed Rust APIs and project helpers over ad hoc parsing or shelling out from Rust code. +- Preserve the project's async style based on Tokio, Kafka, PostgreSQL, tracing, and Prometheus metrics crates already in use. + +## Verification Commands + +Use the repository's Make targets and scripts as the source of truth. + +- Default pre-commit verification: + + ```sh + make pre-commit + ``` + +- Lint-only check: + + ```sh + make lint + ``` + + This runs `cargo check`, `cargo fmt -- --check`, and `cargo clippy --all-targets`. + +- Unit tests: + + ```sh + make test + ``` + + This runs `cargo test`. + +- Build: + + ```sh + make build + ``` + + This runs `cargo build`. + +- Metrics/integration verification: + + ```sh + make integration + ``` + + This starts Docker-backed PostgreSQL and Kafka dependencies, runs migrations, starts Chronos, publishes a test message, verifies delivery, and checks the Prometheus `/metrics` endpoint. + +- Stop integration services: + + ```sh + make integration.down + ``` + +Run the narrowest useful checks while iterating, then run the default pre-commit verification before committing. Run `make integration` for changes touching Kafka/PostgreSQL behavior, runtime wiring, Docker setup, migrations, metrics exposure, or end-to-end message flow. + +If a verification command cannot be run, document the reason in the final response and in the commit message. + +## Commit and Push Policy + +Agents should commit and push their changes unless the user explicitly says not to. + +Commit messages must include a footer named `Model-version` containing the model that generated the commit. Example: + +```text +docs: add agent workflow guidance + +Document Chronos project conventions, verification commands, and agent +handoff expectations. + +Verification: +- make pre-commit + +Model-version: GPT-5 +``` + +Use concise subject lines that match the existing repository style, such as `feat(...)`, `fix(...)`, `docs:`, or `chore:`. Include enough body detail for a future agent to understand why the change was made and what was verified. + +## Paper Trail Expectations + +For each non-trivial change, leave evidence in one or more of these places: + +- Code comments only where they clarify non-obvious behavior. +- Tests that encode behavioral expectations. +- Documentation updates for changed workflows, configuration, metrics, or operational behavior. +- Commit message body with the reasoning and verification. +- Final response summarizing changed files and checks run. + +When making tradeoffs, record the chosen path and the reason. Avoid relying on chat history as the only explanation. + +## Project-Specific Notes + +- Chronos treats Kafka message bodies opaquely and forwards messages after delay; avoid adding application-level assumptions about payload shape. +- The README describes at-least-once delivery semantics. Preserve behavior that supports persistence, recovery from suspected node failure, and duplicate-safe processing. +- Metrics work on the `feat/prom_metrics` branch currently includes a Prometheus endpoint and metric-family checks in the integration script. Production Weaver inputs live under `dev/weaver/production`; example Weaver inputs live under `examples/weaver`. `WEAVER_TARGET` defaults to `production`, and example artifacts are generated explicitly with `make weaver.generate WEAVER_TARGET=example`. Changes to metrics should preserve unit tests for registry output and integration checks for expected metric families. +- Local development commonly uses `.env` copied from [.env.example](.env.example) through `make setup` or `make withenv`. +- Docker Compose files live in `dev/docker-compose`. `make up` starts Chronos with PostgreSQL, Kafka, Jaeger, and the OpenTelemetry Collector by default; `make up lgtm` uses the LGTM backend. diff --git a/Cargo.lock b/Cargo.lock index 7df7f80..0f3fcc2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "addr2line" @@ -95,18 +95,6 @@ version = "1.0.75" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" -[[package]] -name = "arrayref" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" - -[[package]] -name = "arrayvec" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" - [[package]] name = "async-trait" version = "0.1.74" @@ -142,13 +130,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b829e4e32b91e643de6eafe82b1d90675f5874230191a4ffbc1b336dec4d6bf" dependencies = [ "async-trait", - "axum-core", + "axum-core 0.3.4", "bitflags 1.3.2", "bytes", "futures-util", - "http", - "http-body", - "hyper", + "http 0.2.9", + "http-body 0.4.5", + "hyper 0.14.27", "itoa", "matchit", "memchr", @@ -157,7 +145,37 @@ dependencies = [ "pin-project-lite", "rustversion", "serde", - "sync_wrapper", + "sync_wrapper 0.1.2", + "tower", + "tower-layer", + "tower-service", +] + +[[package]] +name = "axum" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf" +dependencies = [ + "async-trait", + "axum-core 0.4.5", + "bytes", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.5.2", + "hyper-util", + "itoa", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "sync_wrapper 1.0.2", + "tokio", "tower", "tower-layer", "tower-service", @@ -172,14 +190,34 @@ dependencies = [ "async-trait", "bytes", "futures-util", - "http", - "http-body", + "http 0.2.9", + "http-body 0.4.5", "mime", "rustversion", "tower-layer", "tower-service", ] +[[package]] +name = "axum-core" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" +dependencies = [ + "async-trait", + "bytes", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "mime", + "pin-project-lite", + "rustversion", + "sync_wrapper 1.0.2", + "tower-layer", + "tower-service", +] + [[package]] name = "backtrace" version = "0.3.69" @@ -195,12 +233,6 @@ dependencies = [ "rustc-demangle", ] -[[package]] -name = "base64" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" - [[package]] name = "base64" version = "0.21.4" @@ -219,17 +251,6 @@ version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" -[[package]] -name = "blake2b_simd" -version = "0.5.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afa748e348ad3be8263be728124b24a24f268266f6f5d58af9d75f6a40b5c587" -dependencies = [ - "arrayref", - "arrayvec", - "constant_time_eq", -] - [[package]] name = "block-buffer" version = "0.10.4" @@ -265,11 +286,12 @@ checksum = "7b02b629252fe8ef6460461409564e2c21d0c8e77e0944f3d189ff06c4e932ad" [[package]] name = "cc" -version = "1.0.83" +version = "1.2.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +checksum = "d16d90359e986641506914ba71350897565610e87ce0ad9e6f28569db3dd5c6d" dependencies = [ - "libc", + "find-msvc-tools", + "shlex", ] [[package]] @@ -298,10 +320,10 @@ version = "0.2.1" dependencies = [ "anyhow", "async-trait", + "axum 0.7.5", "cargo-husky", "chrono", "clap", - "clippy", "deadpool-postgres", "dotenvy", "env_logger 0.10.0", @@ -313,6 +335,7 @@ dependencies = [ "opentelemetry-otlp", "opentelemetry_api", "opentelemetry_sdk", + "prometheus", "rand", "rdkafka", "refinery", @@ -332,6 +355,7 @@ dependencies = [ name = "chronos_ex" version = "0.0.2" dependencies = [ + "chrono", "chronos_bin", "dotenv", "env_logger 0.9.3", @@ -343,10 +367,12 @@ dependencies = [ "opentelemetry-stdout", "opentelemetry_api", "opentelemetry_sdk", + "serde_json", "tokio", "tracing", "tracing-opentelemetry", "tracing-subscriber", + "uuid", ] [[package]] @@ -389,27 +415,12 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd7cc57abe963c6d3b9d8be5b06ba7c8957a930305ca90304f24ef040aa6f961" -[[package]] -name = "clippy" -version = "0.0.302" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d911ee15579a3f50880d8c1d59ef6e79f9533127a3bd342462f5d584f5e8c294" -dependencies = [ - "term", -] - [[package]] name = "colorchoice" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" -[[package]] -name = "constant_time_eq" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" - [[package]] name = "core-foundation" version = "0.9.3" @@ -513,9 +524,9 @@ dependencies = [ [[package]] name = "deranged" -version = "0.3.9" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f32d04922c60427da6f9fef14d042d9edddef64cb9d4ce0d64d0685fbeb1fd3" +checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" dependencies = [ "powerfmt", ] @@ -531,17 +542,6 @@ dependencies = [ "subtle", ] -[[package]] -name = "dirs" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fd78930633bd1c6e35c4b42b1df7b0cbc6bc191146e512bb3bedf243fcc3901" -dependencies = [ - "libc", - "redox_users", - "winapi", -] - [[package]] name = "dotenv" version = "0.15.0" @@ -638,6 +638,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + [[package]] name = "finl_unicode" version = "1.2.0" @@ -773,17 +779,6 @@ dependencies = [ "version_check", ] -[[package]] -name = "getrandom" -version = "0.1.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" -dependencies = [ - "cfg-if", - "libc", - "wasi 0.9.0+wasi-snapshot-preview1", -] - [[package]] name = "getrandom" version = "0.2.10" @@ -792,7 +787,7 @@ checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" dependencies = [ "cfg-if", "libc", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi", ] [[package]] @@ -812,7 +807,7 @@ dependencies = [ "futures-core", "futures-sink", "futures-util", - "http", + "http 0.2.9", "indexmap 1.9.3", "slab", "tokio", @@ -873,6 +868,16 @@ dependencies = [ "itoa", ] +[[package]] +name = "http" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" +dependencies = [ + "bytes", + "itoa", +] + [[package]] name = "http-body" version = "0.4.5" @@ -880,7 +885,30 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" dependencies = [ "bytes", - "http", + "http 0.2.9", + "pin-project-lite", +] + +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http 1.4.0", +] + +[[package]] +name = "http-body-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" +dependencies = [ + "bytes", + "futures-core", + "http 1.4.0", + "http-body 1.0.1", "pin-project-lite", ] @@ -913,8 +941,8 @@ dependencies = [ "futures-core", "futures-util", "h2", - "http", - "http-body", + "http 0.2.9", + "http-body 0.4.5", "httparse", "httpdate", "itoa", @@ -926,18 +954,52 @@ dependencies = [ "want", ] +[[package]] +name = "hyper" +version = "1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "256fb8d4bd6413123cc9d91832d78325c48ff41677595be797d90f42969beae0" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "smallvec", + "tokio", +] + [[package]] name = "hyper-timeout" version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbb958482e8c7be4bc3cf272a766a2b0bf1a6755e7a6ae777f017a31d11b13b1" dependencies = [ - "hyper", + "hyper 0.14.27", "pin-project-lite", "tokio", "tokio-io-timeout", ] +[[package]] +name = "hyper-util" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cde7055719c54e36e95e8719f95883f22072a48ede39db7fc17a4e1d5281e9b9" +dependencies = [ + "bytes", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "hyper 1.5.2", + "pin-project-lite", + "tokio", +] + [[package]] name = "iana-time-zone" version = "0.1.57" @@ -1046,15 +1108,15 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.149" +version = "0.2.186" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" [[package]] name = "libz-sys" -version = "1.1.12" +version = "1.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d97137b25e321a73eef1418d1d5d2eda4d77e12813f8e6dead84bc52c5870a7b" +checksum = "fc3a226e576f50782b3305c5ccf458698f92798987f551c6a02efe8276721e22" dependencies = [ "cc", "libc", @@ -1132,7 +1194,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "927a765cd3fc26206e66b296465fa9d3e5ab003e651c1b3c060e7956d96b19d2" dependencies = [ "libc", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi", "windows-sys", ] @@ -1146,6 +1208,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + [[package]] name = "num-traits" version = "0.2.17" @@ -1167,23 +1235,24 @@ dependencies = [ [[package]] name = "num_enum" -version = "0.5.11" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f646caf906c20226733ed5b1374287eb97e3c2a5c227ce668c1f2ce20ae57c9" +checksum = "5d0bca838442ec211fa11de3a8b0e0e8f3a4522575b5c4c06ed722e005036f26" dependencies = [ "num_enum_derive", + "rustversion", ] [[package]] name = "num_enum_derive" -version = "0.5.11" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcbff9bc912032c62bf65ef1d5aea88983b420f4f839db1e9b0c281a25c9c799" +checksum = "680998035259dcfcafe653688bf2aa6d3e2dc05e98be6ab46afb089dc84f1df8" dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.38", ] [[package]] @@ -1229,9 +1298,9 @@ dependencies = [ [[package]] name = "openssl-sys" -version = "0.9.93" +version = "0.9.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db4d56a4c0478783083cfafcc42493dd4a981d41669da64b4572a2a089b51b1d" +checksum = "13ce1245cd07fcc4cfdb438f7507b0c7e4f3849a69fd84d52374c66d83741bb6" dependencies = [ "cc", "libc", @@ -1257,7 +1326,7 @@ checksum = "c7594ec0e11d8e33faf03530a4c49af7064ebba81c1480e01be67d90b356508b" dependencies = [ "async-trait", "bytes", - "http", + "http 0.2.9", "opentelemetry_api", "reqwest", ] @@ -1285,7 +1354,7 @@ checksum = "7e5e5a5c4135864099f3faafbe939eb4d7f9b80ebf68a8448da961b32a7c1275" dependencies = [ "async-trait", "futures-core", - "http", + "http 0.2.9", "opentelemetry-http", "opentelemetry-proto", "opentelemetry-semantic-conventions", @@ -1424,7 +1493,7 @@ checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.3.5", + "redox_syscall", "smallvec", "windows-targets", ] @@ -1498,9 +1567,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pkg-config" -version = "0.3.27" +version = "0.3.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" +checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" [[package]] name = "postgres-protocol" @@ -1508,7 +1577,7 @@ version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49b6c5ef183cd3ab4ba005f1ca64c21e8bd97ce4699cfea9e8d9a2c4958ca520" dependencies = [ - "base64 0.21.4", + "base64", "byteorder", "bytes", "fallible-iterator", @@ -1566,6 +1635,29 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "prom_otlp_mock_runner" +version = "0.0.0" +dependencies = [ + "chronos_bin", + "tokio", +] + +[[package]] +name = "prometheus" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d33c28a30771f7f96db69893f78b857f7450d7e0237e9c8fc6427a81bae7ed1" +dependencies = [ + "cfg-if", + "fnv", + "lazy_static", + "memchr", + "parking_lot", + "protobuf", + "thiserror", +] + [[package]] name = "prost" version = "0.11.9" @@ -1589,6 +1681,12 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "protobuf" +version = "2.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" + [[package]] name = "quote" version = "1.0.33" @@ -1625,7 +1723,7 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.2.10", + "getrandom", ] [[package]] @@ -1648,9 +1746,9 @@ dependencies = [ [[package]] name = "rdkafka-sys" -version = "4.6.0+2.2.0" +version = "4.10.0+2.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad63c279fca41a27c231c450a2d2ad18288032e9cbb159ad16c9d96eba35aaaf" +checksum = "e234cf318915c1059d4921ef7f75616b5219b10b46e9f3a511a15eb4b56a3f77" dependencies = [ "libc", "libz-sys", @@ -1660,12 +1758,6 @@ dependencies = [ "sasl2-sys", ] -[[package]] -name = "redox_syscall" -version = "0.1.57" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" - [[package]] name = "redox_syscall" version = "0.3.5" @@ -1675,17 +1767,6 @@ dependencies = [ "bitflags 1.3.2", ] -[[package]] -name = "redox_users" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de0737333e7a9502c789a36d7c7fa6092a49895d4faa31ca5df163857ded2e9d" -dependencies = [ - "getrandom 0.1.16", - "redox_syscall 0.1.57", - "rust-argon2", -] - [[package]] name = "refinery" version = "0.8.11" @@ -1766,15 +1847,15 @@ version = "0.11.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "046cd98826c46c2ac8ddecae268eb5c2e58628688a5fc7a2643704a73faba95b" dependencies = [ - "base64 0.21.4", + "base64", "bytes", "encoding_rs", "futures-core", "futures-util", "h2", - "http", - "http-body", - "hyper", + "http 0.2.9", + "http-body 0.4.5", + "hyper 0.14.27", "ipnet", "js-sys", "log", @@ -1801,18 +1882,6 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4389f1d5789befaf6029ebd9f7dac4af7f7e3d61b69d4f30e2ac02b57e7712b0" -[[package]] -name = "rust-argon2" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b18820d944b33caa75a71378964ac46f58517c92b6ae5f762636247c09e78fb" -dependencies = [ - "base64 0.13.1", - "blake2b_simd", - "constant_time_eq", - "crossbeam-utils", -] - [[package]] name = "rustc-demangle" version = "0.1.23" @@ -1855,9 +1924,9 @@ dependencies = [ [[package]] name = "sasl2-sys" -version = "0.1.20+2.1.28" +version = "0.1.22+2.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e645bd98535fc8fd251c43ba7c7c1f9be1e0369c99b6a5ea719052a773e655c" +checksum = "05f2a7f7efd9fc98b3a9033272df10709f5ee3fa0eabbd61a527a3a1ed6bd3c6" dependencies = [ "cc", "duct", @@ -1987,6 +2056,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "signal-hook-registry" version = "1.4.1" @@ -2019,9 +2094,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.11.1" +version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "942b4a808e05215192e39f4ab80813e599068285906cc91aa64f923db842bd5a" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "socket2" @@ -2162,6 +2237,12 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" +[[package]] +name = "sync_wrapper" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" + [[package]] name = "system-configuration" version = "0.5.1" @@ -2183,17 +2264,6 @@ dependencies = [ "libc", ] -[[package]] -name = "term" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edd106a334b7657c10b7c540a0106114feadeb4dc314513e97df481d5d966f42" -dependencies = [ - "byteorder", - "dirs", - "winapi", -] - [[package]] name = "termcolor" version = "1.3.0" @@ -2257,12 +2327,13 @@ dependencies = [ [[package]] name = "time" -version = "0.3.30" +version = "0.3.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4a34ab300f2dee6e562c10a046fc05e358b29f9bf92277f30c3c8d82275f6f5" +checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d" dependencies = [ "deranged", "itoa", + "num-conv", "powerfmt", "serde", "time-core", @@ -2271,16 +2342,17 @@ dependencies = [ [[package]] name = "time-core" -version = "0.1.2" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" +checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b" [[package]] name = "time-macros" -version = "0.2.15" +version = "0.2.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ad70d68dba9e1f8aceda7aa6711965dfec1cac869f311a51bd08b3a2ccbce20" +checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3" dependencies = [ + "num-conv", "time-core", ] @@ -2432,15 +2504,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3082666a3a6433f7f511c7192923fa1fe07c69332d3c6a2e6bb040b569199d5a" dependencies = [ "async-trait", - "axum", - "base64 0.21.4", + "axum 0.6.20", + "base64", "bytes", "futures-core", "futures-util", "h2", - "http", - "http-body", - "hyper", + "http 0.2.9", + "http-body 0.4.5", + "hyper 0.14.27", "hyper-timeout", "percent-encoding", "pin-project", @@ -2620,7 +2692,7 @@ version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "79daa5ed5740825c40b389c5e50312b9c86df53fccd33f281df655642b43869d" dependencies = [ - "getrandom 0.2.10", + "getrandom", "rand", "uuid-macro-internal", ] @@ -2709,12 +2781,6 @@ dependencies = [ "try-lock", ] -[[package]] -name = "wasi" -version = "0.9.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" - [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" diff --git a/Cargo.toml b/Cargo.toml index 16987fa..7e48054 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,6 +10,9 @@ members = [ # migrations binary "pg_mig" ] +exclude = [ + "examples/weaver" +] [workspace.dependencies] @@ -25,7 +28,6 @@ deadpool-postgres = "0.10" refinery = { version = "0.8.7", features = ["tokio-postgres"] } -clippy = "0.0.302" rand = "0.8.5" serial_test = "2.0.0" @@ -34,12 +36,15 @@ tracing = "0.1" tracing-subscriber = "0.3" tracing-opentelemetry = "0.21.0" -opentelemetry = { version = "0.20.0", features = ["rt-tokio", "trace"]} -opentelemetry_sdk = { version = "0.20.0", features = ["rt-tokio", "trace"]} +opentelemetry = { version = "0.20.0", features = ["rt-tokio", "trace", "metrics"]} +opentelemetry_sdk = { version = "0.20.0", features = ["rt-tokio", "trace", "metrics"]} opentelemetry_api = { version = "0.20.0"} # Collector opentelemetry-jaeger = {version="0.19.0", features=["rt-tokio"]} opentelemetry-stdout = { version = "0.1.0", features = ["trace"] } -opentelemetry-otlp = { version = "0.13.0", features = ["http-proto", "reqwest-client"] } +opentelemetry-otlp = { version = "0.13.0", features = ["http-proto", "reqwest-client", "metrics", "tonic"] } opentelemetry-http = "0.9.0" +# metrics +prometheus = "0.13" +axum = { version = "0.7", default-features = false, features = ["http1", "tokio"] } diff --git a/How-to.md b/How-to.md index a7cd568..99c8470 100644 --- a/How-to.md +++ b/How-to.md @@ -17,9 +17,15 @@ Input messages with headers 2. Delete any existing .env file, use `make withenv RECIPE=run` ## Run Chronos docker image -Using [docker-compose](./docker-compose.yml) docker conatiner can host Chronos image with mentioned env variables for Kafka, PG and Chronos configuration variables. +Using [Docker Compose](./dev/docker-compose/compose.yaml), containers can host Chronos, PostgreSQL, Kafka, and observability backends with the environment variables mentioned below. -Use `make withenv RECIPE=docker.up` +Use `make up` to build and start Chronos with PostgreSQL, Kafka, Jaeger, and the OpenTelemetry Collector. + +Use `make up lgtm` or `make up BACKEND=lgtm` to start the same Chronos stack with the Grafana LGTM backend instead of Jaeger. + +Use `make docker.build` to build the Chronos and PostgreSQL migration container images without starting the Compose stack. + +Use `make down` to stop the running stack. ## ENV vars All the required configurations for Chronos can be passed in environment variables mentioned below @@ -51,6 +57,8 @@ These values are set to fine tune performance Chrono in need, refer to [Chronos] | TIMING_ADVANCE|0 sec | FAIL_DETECT_INTERVAL|10 sec | HEALTHCHECK_FILE|healthcheck/chronos_healthcheck +| OTEL_EXPORTER_PROMETHEUS_HOST|0.0.0.0 +| OTEL_EXPORTER_PROMETHEUS_PORT|9090 ## Observability @@ -62,15 +70,30 @@ At this time Chronos supports Http protocol based connectivity to the Otel colle | OTEL_EXPORTER_OTLP_TRACES_ENDPOINT|"http://localhost:4318/v1/traces" | OTEL_EXPORTER_OTLP_PROTOCOL|"http/json" -## Chronos Images -Two images are published for each [RELEASE]( `https://github.com/kindredgroup/chronos/pkgs/container/chronos`) -- migrations image -- chornos image +### Local Grafana LGTM stack +Use the Grafana LGTM compose overlay to run Grafana, Loki, Tempo, Prometheus, Pyroscope, and the OpenTelemetry Collector in one container: + +```sh +make up lgtm +``` +The overlay mounts local override files from `dev/lgtm` for Prometheus, the OpenTelemetry Collector, and Grafana dashboard provisioning. Chronos exposes its Prometheus metrics endpoint with `OTEL_EXPORTER_PROMETHEUS_HOST` and `OTEL_EXPORTER_PROMETHEUS_PORT`; when run from Docker Compose the endpoint is `chronos:9091`. +The LGTM overlay also starts local infrastructure exporters for container, PostgreSQL, Kafka, and SQL-derived database metrics. Prometheus scrapes cAdvisor, postgres_exporter, KMinion, and sql_exporter from `dev/lgtm/prometheus.yaml`; the SQL exporter emits `chronos_rows`, the current row count of the Chronos `hanger` table. The exporter-specific configuration lives in `dev/lgtm/kminion.yaml` and `dev/lgtm/sql_exporter.yaml`. +The local Compose stack limits the Chronos container to 2 CPUs and 2 GiB of memory. k6 runner containers launched by `make k6.contract` and `make k6.load` are limited to 1 CPU and 1 GiB of memory. +Chronos production metrics are generated from the OpenTelemetry Weaver registry in `dev/weaver/production/registry/chronos/metrics.yaml`. Rust definitions are generated into `chronos_bin/src/metrics/generated`, Markdown docs into `docs/chronos_metrics.md`, and the resolved registry schema into `docs/schema/resolved-registry.schema.json`. `OTEL_METRICS_EXPORTER=prometheus` is the default and exposes `/metrics` with the `chronos_` Prometheus namespace, for example `chronos_msg_jitter`. `OTEL_METRICS_EXPORTER=otlp` records the same generated metric IDs through the OTLP gRPC metrics exporter. +`make build` runs `make weaver.generate WEAVER_TARGET=production` before compiling, which refreshes the production Rust definitions, Markdown metric docs, and resolved registry JSON schema. `WEAVER_TARGET` defaults to `production`; generate example Weaver artifacts explicitly with `make weaver.generate WEAVER_TARGET=example`. +Validate the LGTM configuration files with: +```sh +make lgtm.validate +``` +## Chronos Images +Two images are published for each [RELEASE]( `https://github.com/kindredgroup/chronos/pkgs/container/chronos`) +- migrations image +- chornos image diff --git a/Makefile b/Makefile index 84767ef..8a045df 100644 --- a/Makefile +++ b/Makefile @@ -1,120 +1,16 @@ -#!make -SHELL:=/bin/bash +SHELL := /usr/bin/env bash +.DEFAULT_GOAL := help -# pp - pretty print function -yellow := $(shell tput setaf 3) -normal := $(shell tput sgr0) -define pp - @printf '$(yellow)$(1)$(normal)\n' -endef +MAKEFILES_DIR := dev/makefiles +COMMON_MAKEFILE := $(MAKEFILES_DIR)/common.mk +MAKEFILE_PARTS := $(filter-out $(COMMON_MAKEFILE),$(sort $(wildcard $(MAKEFILES_DIR)/*.mk))) +include $(COMMON_MAKEFILE) +include $(MAKEFILE_PARTS) -help: Makefile - @echo " Choose a command to run:" - @sed -n 's/^##//p' $< | column -t -s ':' | sed -e 's/^/ /' +## help: Print available make targets +help: + @echo "Choose a command to run:" + @awk '/^## / { help=substr($$0, 4); sub(/^[^:]+: /, "", help); next } /^[A-Za-z0-9_.-]+:/ { if (help != "") { split($$0, target, ":"); printf " %-28s %s\n", target[1], help; help="" } }' Makefile $(COMMON_MAKEFILE) $(MAKEFILE_PARTS) | sort - -# DEV ############################################################################################# - -## withenv: ๐Ÿ˜ญ CALL TARGETS LIKE THIS `make withenv RECIPE=dev.init` -withenv: -# NB: IT APPEARS THAT LOADING ENVIRONMENT VARIABLES INTO make SUUUUCKS. -# NB: THIS RECIPE IS A HACK TO MAKE IT WORK. -# NB: THAT'S WHY THIS MAKEFILE NEEDS TO BE CALLED LIKE `make withenv RECIPE=dev.init` - test -e .env || cp .env.example .env - bash -c 'set -o allexport; source .env; set +o allexport; make "$$RECIPE"' - -## dev.init: ๐ŸŒ Initialize local dev environment -# If rdkafka compilation fails with SSL error then install openssl@1.1 or later and export: -# export LDFLAGS=-L/opt/homebrew/opt/openssl@1.1/lib -# export CPPFLAGS=-I/opt/homebrew/opt/openssl@1.1/include -dev.init: install - $(call pp,install git hooks...) - cargo install cargo-watch - cargo test - -## dev.kafka_init: ๐Ÿฅ Init kafka topic -# dev.kafka_init: -# $(call pp,creating kafka topic...) -# cargo run --example kafka_create_topic - -dev.chronos_ex: - $(call pp,creating kafka topic...) - cargo run --example chronos_ex - -## pg.create: ๐Ÿฅ Create database -pg.create: - $(call pp,creating database...) - cargo run --example pg_create_database - -## pg.migrate: ๐Ÿฅ Run migrations on database -pg.migrate: - $(call pp,running migrations on database...) - cargo run --package pg_mig --bin chronos-pg-migrations - -# TEST / DEPLOY ################################################################################### - -## install: ๐Ÿงน Installs dependencies -install: - $(call pp,pull rust dependencies...) - rustup install "${RUST_VERSION}" - rustup component add rust-src clippy llvm-tools-preview - rustup toolchain install nightly - rustup override set "${RUST_VERSION}" - cargo install cargo2junit grcov - cargo fetch - -## build: ๐Ÿงช Compiles rust -build: - $(call pp,build rust...) - cargo build - - -## dev.run: ๐Ÿงช Runs rust app in watch mode -dev.run: - $(call pp,run app...) - cargo watch -q -c -x 'run --package chronos_bin --bin chronos' - -## run: ๐Ÿงช Runs rust app -run: - $(call pp,run app...) - cargo run --package chronos_bin --bin chronos - -## run: ๐Ÿงช Runs rust app in release mode -run.release: - $(call pp,run app...) - cargo run --package chronos_bin -r --bin chronos - - -## lint: ๐Ÿงน Checks for lint failures on rust -lint: - $(call pp,lint rust...) - cargo check - cargo fmt -- --check - cargo clippy --all-targets - -## test.unit: ๐Ÿงช Runs unit tests -test.unit: - $(call pp,rust unit tests...) - cargo test - -## test.unit.coverage: ๐Ÿงช Runs rust unit tests with coverage 'cobertura' and 'junit' reports -test.unit.coverage: - $(call pp,rust unit tests...) - sh scripts/coverage-report.sh - -## docker.up: ๐Ÿงช Runs rust app in docker container along with kafka and postgres -docker.up: - $(call pp,run app...) - docker-compose --env-file /dev/null up -d - -## docker.down: bring down the docker containers -docker.down: - $(call pp,run app...) - docker-compose down -# PHONY ########################################################################################### - -# To force rebuild of not-file-related targets, make the targets "phony". -# A phony target is one that is not really the name of a file; -# Rather it is just a name for a recipe to be executed when you make an explicit request. -.PHONY: build +.PHONY: help diff --git a/chronos_bin/Cargo.toml b/chronos_bin/Cargo.toml index cee9e01..fd11e2d 100644 --- a/chronos_bin/Cargo.toml +++ b/chronos_bin/Cargo.toml @@ -39,7 +39,6 @@ anyhow = "1.0.42" chrono = "0.4.23" #config -clippy.workspace = true clap = { version="4.1.4", features = ["derive"] } dotenvy = "0.15" uuid = { version="1.3.0", features = [ @@ -64,6 +63,10 @@ opentelemetry-jaeger.workspace = true # opentelemetry-jaeger.workspace = true opentelemetry-otlp.workspace = true +# metrics +prometheus.workspace = true +axum.workspace = true + [dev-dependencies] serial_test.workspace = true diff --git a/chronos_bin/src/bin/chronos.rs b/chronos_bin/src/bin/chronos.rs index 0d9a36b..c07c5af 100644 --- a/chronos_bin/src/bin/chronos.rs +++ b/chronos_bin/src/bin/chronos.rs @@ -1,6 +1,7 @@ use chronos_bin::kafka::config::KafkaConfig; use chronos_bin::kafka::consumer::KafkaConsumer; use chronos_bin::kafka::producer::KafkaProducer; +use chronos_bin::metrics::ChronosMetrics; use chronos_bin::postgres::config::PgConfig; use chronos_bin::postgres::pg::Pg; use chronos_bin::runner::Runner; @@ -13,6 +14,10 @@ use std::time::Duration; async fn main() { env_logger::init(); dotenvy::dotenv().ok(); + std::env::set_var( + "OTEL_SERVICE_NAME", + std::env::var("OTEL_SERVICE_NAME").unwrap_or_else(|_| "chronos".to_string()), + ); let protocol = std::env::var("OTEL_EXPORTER_OTLP_PROTOCOL").unwrap_or_else(|_| "http/json".to_string()); @@ -40,11 +45,14 @@ async fn main() { }, }; + let metrics = Arc::new(ChronosMetrics::new().expect("Failed to initialize metrics registry")); + info!("starting chronos establish connections"); let r = Runner { data_store: Arc::new(data_store), producer: Arc::new(kafka_producer), consumer: Arc::new(kafka_consumer), + metrics, }; debug!("debug logs starting chronos"); diff --git a/chronos_bin/src/kafka/consumer.rs b/chronos_bin/src/kafka/consumer.rs index 336f0e8..04caa3e 100644 --- a/chronos_bin/src/kafka/consumer.rs +++ b/chronos_bin/src/kafka/consumer.rs @@ -7,7 +7,7 @@ use rdkafka::message::BorrowedMessage; use super::config::KafkaConfig; -use tracing::{instrument, trace, warn}; +use tracing::{trace, warn}; // Kafka Consumer Client pub struct KafkaConsumer { @@ -65,7 +65,7 @@ impl KafkaConsumer { }; } - pub(crate) async fn kafka_consume_message(&self) -> Result { + pub(crate) async fn kafka_consume_message(&self) -> Result, KafkaAdapterError> { self.consumer.recv().await.map_err(KafkaAdapterError::ReceiveMessage) } } diff --git a/chronos_bin/src/kafka/producer.rs b/chronos_bin/src/kafka/producer.rs index 3fc3b94..dd83d15 100644 --- a/chronos_bin/src/kafka/producer.rs +++ b/chronos_bin/src/kafka/producer.rs @@ -3,6 +3,7 @@ use std::time::Duration; use crate::utils::util::into_headers; use crate::{kafka::errors::KafkaAdapterError, utils::util::CHRONOS_ID}; +use chrono::{DateTime, Utc}; use rdkafka::producer::{FutureProducer, FutureRecord}; use super::config::KafkaConfig; @@ -16,6 +17,11 @@ pub struct KafkaProducer { topic: String, } +pub struct PublishedMessage { + pub id: String, + pub timestamp: DateTime, +} + impl KafkaProducer { pub fn new(config: &KafkaConfig) -> Self { // rdlibkafka goes infinitely trying to connect to kafka broker @@ -25,11 +31,12 @@ impl KafkaProducer { Self { producer, topic } } #[instrument(skip_all, fields(topic = %self.topic))] - pub(crate) async fn kafka_publish(&self, message: String, headers: Option>, key: String) -> Result { + pub async fn kafka_publish(&self, message: String, headers: Option>, key: String) -> Result { // Only because never expecting wrong headers to reach here let unwrap_header = &headers.unwrap_or_default(); let o_header = into_headers(unwrap_header); + let published_at = Utc::now(); // println!("headers {:?}", o_header); // println!("headers {:?} headers--{:?}", &headers["chronosId)"].to_string(), &headers["chronosDeadline)"].to_string()); @@ -39,11 +46,15 @@ impl KafkaProducer { FutureRecord::to(self.topic.as_str()) .payload(message.as_str()) .key(key.as_str()) - .headers(o_header), + .headers(o_header) + .timestamp(published_at.timestamp_millis()), Duration::from_secs(0), ) .await .map_err(|(kafka_error, _record)| KafkaAdapterError::PublishMessage(kafka_error, "message publishing failed".to_string()))?; - Ok(unwrap_header[CHRONOS_ID].to_string()) + Ok(PublishedMessage { + id: unwrap_header[CHRONOS_ID].to_string(), + timestamp: published_at, + }) } } diff --git a/chronos_bin/src/lib.rs b/chronos_bin/src/lib.rs index 98bac13..3f969e8 100644 --- a/chronos_bin/src/lib.rs +++ b/chronos_bin/src/lib.rs @@ -2,6 +2,7 @@ pub mod core; mod message_processor; mod message_receiver; +pub mod metrics; mod monitor; pub mod runner; diff --git a/chronos_bin/src/message_processor.rs b/chronos_bin/src/message_processor.rs index 9e03172..3350353 100644 --- a/chronos_bin/src/message_processor.rs +++ b/chronos_bin/src/message_processor.rs @@ -1,4 +1,5 @@ use crate::kafka::producer::KafkaProducer; +use crate::metrics::ChronosMetrics; use crate::postgres::pg::{GetReady, Pg, TableRow}; use crate::utils::config::ChronosConfig; use crate::utils::delay_controller::DelayController; @@ -12,6 +13,7 @@ use uuid::Uuid; pub struct MessageProcessor { pub(crate) data_store: Arc, pub(crate) producer: Arc, + pub(crate) metrics: Arc, } impl MessageProcessor { @@ -49,18 +51,24 @@ impl MessageProcessor { } }; + // Capture deadline before updated_row fields are moved into the publish call. + let deadline = updated_row.deadline; + let readied_by_column = Some(updated_row.readied_by.to_string()); tracing::Span::current().record("correlationId", &readied_by_column); match readied_by_column { Some(id) => { headers.insert("readied_by".to_string(), id); - if let Ok(id) = self + if let Ok(published) = self .producer .kafka_publish(updated_row.message_value.to_string(), Some(headers), updated_row.message_key.to_string()) .await { - Ok(id) + // chronos.message.jitter: difference between actual publish time and client-requested deadline. + // Floored at 0 to guard against clock skew producing negative jitter. + self.metrics.observe_jitter(jitter_seconds(published.timestamp, deadline)); + Ok(published.id) } else { Err("error occurred while publishing".to_string()) } @@ -88,11 +96,14 @@ impl MessageProcessor { } } + /// Returns `(returned, status)` where: + /// - `returned = true` means the loop exited early (no rows ready to fire) + /// - `returned = false` means rows were processed (or a terminal error occurred) + /// - `status = "pass"` on success, `"fail"` on unrecoverable error #[tracing::instrument(skip_all)] - async fn processor_message_ready(&self, node_id: Uuid) { + async fn processor_message_ready(&self, node_id: Uuid) -> (bool, &'static str) { loop { log::debug!("retry loop"); - // thread::sleep(Duration::from_millis(100)); let max_retry_count = 3; let mut retry_count = 0; @@ -102,8 +113,6 @@ impl MessageProcessor { readied_at: deadline, readied_by: node_id, deadline, - // limit: 1000, - // order: "asc", }; let readied_by_column: Option = None; @@ -111,31 +120,28 @@ impl MessageProcessor { match resp { Ok(ready_to_publish_rows) => { if ready_to_publish_rows.is_empty() { - log::debug!("no rows ready to fire for dealine {}", deadline); - break; + log::debug!("no rows ready to fire for deadline {}", deadline); + return (true, "pass"); } else { let publish_futures = ready_to_publish_rows.into_iter().map(|row| self.prepare_to_publish(row)); let results = futures::future::join_all(publish_futures).await; - // closure to gather ids from results vector and ignore error from result - let ids: Vec = results.into_iter().filter_map(|result| result.ok()).collect(); if !ids.is_empty() { let _ = self.delete_fired_records_from_db(&ids).await; log::debug!("number of rows published successfully and deleted from DB {}", ids.len()); - break; + return (false, "pass"); } } } Err(e) => { if e.contains("could not serialize access due to concurrent update") && retry_count < max_retry_count { - //retry goes here retry_count += 1; if retry_count == max_retry_count { log::error!("Error: max retry count {} reached by node {:?} for row ", max_retry_count, readied_by_column); - break; + return (false, "fail"); } } log::error!("Error: error occurred in message processor while publishing {}", e); @@ -143,6 +149,7 @@ impl MessageProcessor { } } } + pub async fn run(&self) { log::info!("MessageProcessor ON!"); @@ -154,9 +161,60 @@ impl MessageProcessor { loop { log::debug!("MessageProcessor loop"); tokio::time::sleep(Duration::from_millis(10)).await; - self.processor_message_ready(node_id).await; + + // chronos.message.process.duration: time the full processor_message_ready() call. + let timer = std::time::Instant::now(); + let (returned, status) = self.processor_message_ready(node_id).await; + let elapsed = timer.elapsed().as_secs_f64(); + self.metrics.observe_process_latency(elapsed, returned, status); delay_controller.sleep().await; } } } + +fn jitter_seconds(published_at: chrono::DateTime, deadline: chrono::DateTime) -> f64 { + (published_at - deadline).num_milliseconds().max(0) as f64 / 1000.0 +} + +#[cfg(test)] +mod tests { + use super::jitter_seconds; + use crate::metrics::ChronosMetrics; + + #[test] + fn test_jitter_calculation_positive() { + use chrono::{Duration, Utc}; + let deadline = Utc::now() - Duration::milliseconds(300); + let jitter_ms = (Utc::now() - deadline).num_milliseconds().max(0); + assert!(jitter_ms >= 300, "jitter should be at least 300ms when deadline was 300ms ago"); + } + + #[test] + fn test_jitter_seconds_converts_milliseconds_to_seconds() { + use chrono::{Duration, Utc}; + let deadline = Utc::now(); + let published_at = deadline + Duration::milliseconds(300); + assert!((jitter_seconds(published_at, deadline) - 0.3).abs() < f64::EPSILON); + } + + #[test] + fn test_jitter_seconds_floors_clock_skew_at_zero() { + use chrono::{Duration, Utc}; + let deadline = Utc::now(); + let published_at = deadline - Duration::milliseconds(300); + assert_eq!(jitter_seconds(published_at, deadline), 0.0); + } + + #[test] + fn test_jitter_below_500ms_within_sla() { + let metrics = ChronosMetrics::new().unwrap(); + // A 300ms jitter is within the 500ms SLA โ€” must land in the <=0.5s bucket + metrics.observe_jitter(0.3); + let output = metrics.render_prometheus().unwrap(); + assert!( + output.contains("chronos_message_jitter_bucket{le=\"0.5\"} 1"), + "300ms jitter must be counted in the <=500ms bucket" + ); + } +} diff --git a/chronos_bin/src/message_receiver.rs b/chronos_bin/src/message_receiver.rs index 93c0ea8..01ac4cc 100644 --- a/chronos_bin/src/message_receiver.rs +++ b/chronos_bin/src/message_receiver.rs @@ -1,18 +1,21 @@ use chrono::{DateTime, Utc}; +use rdkafka::message::BorrowedMessage; +use rdkafka::Message; use serde_json::json; +use std::{collections::HashMap, str::FromStr, sync::Arc}; use tracing::instrument; use crate::kafka::consumer::KafkaConsumer; use crate::kafka::producer::KafkaProducer; +use crate::metrics::ChronosMetrics; use crate::postgres::pg::{Pg, TableInsertRow}; use crate::utils::util::{get_message_key, get_payload_utf8, required_headers, CHRONOS_ID, DEADLINE}; -use rdkafka::message::BorrowedMessage; -use std::{collections::HashMap, str::FromStr, sync::Arc}; pub struct MessageReceiver { pub(crate) consumer: Arc, pub(crate) producer: Arc, pub(crate) data_store: Arc, + pub(crate) metrics: Arc, } impl MessageReceiver { @@ -81,21 +84,44 @@ impl MessageReceiver { #[tracing::instrument(name = "receiver_handle_message", skip_all, fields(correlationId, error))] pub async fn handle_message(&self, message: &BorrowedMessage<'_>) { + // chronos.message.wait.duration: record how long the message waited in the Kafka input queue. + // Uses the Kafka-assigned message timestamp; guards against clock skew with max(0). + if let Some(kafka_ts_ms) = message.timestamp().to_millis() { + let wait_secs = (Utc::now().timestamp_millis() - kafka_ts_ms).max(0) as f64 / 1000.0; + self.metrics.observe_wait_time(wait_secs); + } + + let timer = std::time::Instant::now(); + let mut destination = "unknown"; + let mut status = "pass"; + let new_message = &message; if let Some(reqd_headers) = required_headers(new_message) { tracing::Span::current().record("correlationId", &reqd_headers[CHRONOS_ID]); if let Ok(message_deadline) = DateTime::::from_str(&reqd_headers[DEADLINE]) { if message_deadline <= Utc::now() { + destination = "kafka"; if let Some(err) = self.prepare_and_publish(new_message, reqd_headers).await { + status = "fail"; log::error!("{}", err); tracing::Span::current().record("error", &err); } - } else if let Some(err_string) = self.insert_into_db(new_message, reqd_headers, message_deadline).await { - log::error!("{}", err_string); - tracing::Span::current().record("error", &err_string); + } else { + destination = "postgres"; + if let Some(err_string) = self.insert_into_db(new_message, reqd_headers, message_deadline).await { + status = "fail"; + log::error!("{}", err_string); + tracing::Span::current().record("error", &err_string); + } } } } + + // chronos.message.consume.duration: only record when destination was determined (valid message headers). + if destination != "unknown" { + let elapsed = timer.elapsed().as_secs_f64(); + self.metrics.observe_consume_latency(elapsed, destination, status); + } } pub async fn run(&self) { @@ -110,9 +136,26 @@ impl MessageReceiver { log::error!("error while consuming message {:?}", e); } } - // if let Ok(message) = &self.consumer.kafka_consume_message().await { - // self.handle_message(message).await; - // } } } } + +#[cfg(test)] +mod tests { + #[test] + fn test_wait_time_calculation_non_negative() { + let kafka_ts_ms: i64 = 1_700_000_000_000; + let now_ms: i64 = kafka_ts_ms + 5_000; + let wait_secs = (now_ms - kafka_ts_ms).max(0) as f64 / 1000.0; + assert!((wait_secs - 5.0).abs() < 1e-9); + } + + #[test] + fn test_wait_time_calculation_clock_skew() { + // Simulates a future Kafka timestamp (clock skew) โ€” should floor to 0.0 + let kafka_ts_ms: i64 = 9_999_999_999_999; + let now_ms: i64 = 1_700_000_000_000; + let wait_secs = (now_ms - kafka_ts_ms).max(0) as f64 / 1000.0; + assert_eq!(wait_secs, 0.0); + } +} diff --git a/chronos_bin/src/metrics/generated/chronos_metric_definitions.rs b/chronos_bin/src/metrics/generated/chronos_metric_definitions.rs new file mode 100644 index 0000000..3ee2792 --- /dev/null +++ b/chronos_bin/src/metrics/generated/chronos_metric_definitions.rs @@ -0,0 +1,103 @@ +// Generated from dev/weaver/production/registry/chronos/metrics.yaml by OpenTelemetry Weaver. +// Do not edit by hand. + +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub enum MetricId { + MsgConsumeLatency, + MsgJitter, + MsgProcessLatency, + MsgReset, + MsgWaitTime, +} + +#[derive(Clone, Copy, Debug)] +pub enum MetricKind { + Counter, + Histogram, +} + +impl MetricKind { + pub fn is_counter(self) -> bool { + matches!(self, Self::Counter) + } + + pub fn is_histogram(self) -> bool { + matches!(self, Self::Histogram) + } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum MetricTemporality { + Cumulative, +} + +#[derive(Clone, Copy, Debug)] +pub struct MetricDefinition { + pub id: MetricId, + pub name: &'static str, + pub description: &'static str, + pub unit: Option<&'static str>, + pub label_names: &'static [&'static str], + pub kind: MetricKind, + pub temporality: Option, + pub buckets: Option<&'static [f64]>, + pub prewarm_label_values: &'static [&'static [&'static str]], +} + +pub const METRIC_DEFINITIONS: &[MetricDefinition] = &[ + MetricDefinition { + id: MetricId::MsgConsumeLatency, + name: "chronos.message.consume.duration", + description: "Duration of handle_message() in message_receiver.", + unit: Some("s"), + label_names: &["chronos.consume.status", "chronos.destination"], + kind: MetricKind::Histogram, + temporality: Some(MetricTemporality::Cumulative), + buckets: Some(&[0.001, 0.002, 0.004, 0.008, 0.016, 0.032, 0.064, 0.128, 0.256, 0.512, 1.024, 2.048]), + prewarm_label_values: &[&["pass", "kafka"], &["fail", "kafka"], &["pass", "postgres"], &["fail", "postgres"]], + }, + MetricDefinition { + id: MetricId::MsgJitter, + name: "chronos.message.jitter", + description: "Difference between actual publish time and client-requested deadline.", + unit: Some("s"), + label_names: &[], + kind: MetricKind::Histogram, + temporality: Some(MetricTemporality::Cumulative), + buckets: Some(&[0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]), + prewarm_label_values: &[], + }, + MetricDefinition { + id: MetricId::MsgProcessLatency, + name: "chronos.message.process.duration", + description: "Duration of processor_message_ready() loop in message_processor.", + unit: Some("s"), + label_names: &["chronos.process.status", "chronos.processor.returned"], + kind: MetricKind::Histogram, + temporality: Some(MetricTemporality::Cumulative), + buckets: Some(&[0.001, 0.002, 0.004, 0.008, 0.016, 0.032, 0.064, 0.128, 0.256, 0.512, 1.024, 2.048]), + prewarm_label_values: &[&["pass", "true"], &["fail", "true"], &["pass", "false"], &["fail", "false"]], + }, + MetricDefinition { + id: MetricId::MsgReset, + name: "chronos.message.reset", + description: "Number of records reset by reset_to_init_db() in the monitor task.", + unit: Some("{message}"), + label_names: &[], + kind: MetricKind::Counter, + temporality: None, + buckets: None, + prewarm_label_values: &[], + }, + MetricDefinition { + id: MetricId::MsgWaitTime, + name: "chronos.message.wait.duration", + description: "Time a message spent in the Kafka input queue before processing.", + unit: Some("s"), + label_names: &[], + kind: MetricKind::Histogram, + temporality: Some(MetricTemporality::Cumulative), + buckets: Some(&[0.1, 0.2, 0.4, 0.8, 1.6, 3.2, 6.4, 12.8, 25.6, 51.2, 102.4, 204.8, 409.6, 819.2]), + prewarm_label_values: &[], + }, +]; diff --git a/chronos_bin/src/metrics/generated/mod.rs b/chronos_bin/src/metrics/generated/mod.rs new file mode 100644 index 0000000..6851991 --- /dev/null +++ b/chronos_bin/src/metrics/generated/mod.rs @@ -0,0 +1,3 @@ +pub mod chronos_metric_definitions; + +pub use chronos_metric_definitions::*; diff --git a/chronos_bin/src/metrics/mod.rs b/chronos_bin/src/metrics/mod.rs new file mode 100644 index 0000000..311bf4e --- /dev/null +++ b/chronos_bin/src/metrics/mod.rs @@ -0,0 +1,4 @@ +pub mod generated; +pub mod registry; +pub mod server; +pub use registry::ChronosMetrics; diff --git a/chronos_bin/src/metrics/registry.rs b/chronos_bin/src/metrics/registry.rs new file mode 100644 index 0000000..10a64d7 --- /dev/null +++ b/chronos_bin/src/metrics/registry.rs @@ -0,0 +1,520 @@ +use std::collections::HashMap; +use std::env; + +use opentelemetry::global; +use opentelemetry::metrics::{Counter as OtlpCounter, Histogram as OtlpHistogram, Unit}; +use opentelemetry::KeyValue; +use opentelemetry_otlp::WithExportConfig; +use opentelemetry_sdk::metrics::data::Temporality; +use opentelemetry_sdk::metrics::reader::{AggregationSelector, DefaultAggregationSelector, TemporalitySelector}; +use opentelemetry_sdk::metrics::{Aggregation, InstrumentKind}; +use prometheus::{histogram_opts, opts, CounterVec as PromCounterVec, HistogramVec as PromHistogramVec, Registry}; + +use crate::metrics::generated::{MetricDefinition, MetricId, MetricKind, MetricTemporality, METRIC_DEFINITIONS}; + +const OTEL_METRICS_EXPORTER: &str = "OTEL_METRICS_EXPORTER"; +const OTEL_EXPORTER_OTLP_ENDPOINT: &str = "OTEL_EXPORTER_OTLP_ENDPOINT"; +const OTEL_EXPORTER_OTLP_METRICS_ENDPOINT: &str = "OTEL_EXPORTER_OTLP_METRICS_ENDPOINT"; +const OTEL_EXPORTER_OTLP_PROTOCOL: &str = "OTEL_EXPORTER_OTLP_PROTOCOL"; +const OTEL_EXPORTER_OTLP_METRICS_PROTOCOL: &str = "OTEL_EXPORTER_OTLP_METRICS_PROTOCOL"; +type MetricLabels<'a> = &'a [(&'static str, String)]; + +trait MetricsBackend: Send + Sync { + fn inc_counter(&self, id: MetricId, value: u64, labels: MetricLabels<'_>); + fn observe_histogram(&self, id: MetricId, value: f64, labels: MetricLabels<'_>); + fn render_prometheus(&self) -> Option; + fn shutdown(&self); +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum MetricsExporter { + Prometheus, + Otlp, +} + +impl MetricsExporter { + pub fn from_env() -> Result> { + match env::var(OTEL_METRICS_EXPORTER).unwrap_or_else(|_| "prometheus".to_string()).as_str() { + "prometheus" => Ok(Self::Prometheus), + "otlp" => { + require_grpc_protocol()?; + Ok(Self::Otlp) + } + "none" => Err("metrics exporter disabled by OTEL_METRICS_EXPORTER=none".into()), + other => Err(format!("unsupported {OTEL_METRICS_EXPORTER} value: {other}").into()), + } + } +} + +/// Runtime metrics facade for Chronos. +/// +/// Metric definitions are generated by OpenTelemetry Weaver in +/// `metrics/generated/chronos_metric_definitions.rs`; this facade only chooses +/// a Prometheus or OTLP backend and records by generated metric IDs. +pub struct ChronosMetrics { + exporter: MetricsExporter, + backend: Box, +} + +impl std::fmt::Debug for ChronosMetrics { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ChronosMetrics").field("exporter", &self.exporter).finish() + } +} + +impl ChronosMetrics { + pub fn new() -> Result> { + Self::from_env() + } + + pub fn from_env() -> Result> { + let exporter = MetricsExporter::from_env()?; + let backend: Box = match exporter { + MetricsExporter::Prometheus => Box::new(PrometheusMetricsBackend::new()?), + MetricsExporter::Otlp => Box::new(OtlpMetricsBackend::new()?), + }; + + Ok(Self { exporter, backend }) + } + + pub fn is_prometheus(&self) -> bool { + self.exporter == MetricsExporter::Prometheus + } + + pub fn render_prometheus(&self) -> Option { + self.backend.render_prometheus() + } + + pub fn shutdown(&self) { + self.backend.shutdown(); + } + + pub fn observe_consume_latency(&self, seconds: f64, destination: &'static str, status: &'static str) { + self.observe_histogram(MetricId::MsgConsumeLatency, seconds, consume_labels(destination, status)); + } + + pub fn observe_process_latency(&self, seconds: f64, returned: bool, status: &'static str) { + self.observe_histogram(MetricId::MsgProcessLatency, seconds, process_labels(returned, status)); + } + + pub fn observe_wait_time(&self, seconds: f64) { + self.observe_histogram(MetricId::MsgWaitTime, seconds, Vec::new()); + } + + pub fn observe_jitter(&self, seconds: f64) { + self.observe_histogram(MetricId::MsgJitter, seconds, Vec::new()); + } + + pub fn messages_reset(&self, count: u64) { + self.inc_counter(MetricId::MsgReset, count, Vec::new()); + } + + fn inc_counter(&self, id: MetricId, value: u64, labels: Vec<(&'static str, String)>) { + self.backend.inc_counter(id, value, &labels); + } + + fn observe_histogram(&self, id: MetricId, value: f64, labels: Vec<(&'static str, String)>) { + self.backend.observe_histogram(id, value, &labels); + } +} + +impl Drop for ChronosMetrics { + fn drop(&mut self) { + self.backend.shutdown(); + } +} + +struct PrometheusMetricsBackend { + registry: Registry, + counters: HashMap, + histograms: HashMap, +} + +impl PrometheusMetricsBackend { + fn new() -> Result { + let registry = Registry::new(); + let mut counters = HashMap::new(); + let mut histograms = HashMap::new(); + + for definition in METRIC_DEFINITIONS { + let prometheus_name = prometheus_metric_name(definition.name); + let prometheus_label_names = prometheus_label_names(definition.label_names); + let prometheus_label_refs = prometheus_label_names.iter().map(String::as_str).collect::>(); + match definition.kind { + MetricKind::Counter => { + let metric = PromCounterVec::new(opts!(prometheus_name, definition.description), &prometheus_label_refs)?; + registry.register(Box::new(metric.clone()))?; + prewarm_counter(definition, &metric)?; + counters.insert(definition.id, metric); + } + MetricKind::Histogram => { + let opts = match definition.buckets { + Some(buckets) => histogram_opts!(prometheus_name, definition.description, buckets.to_vec()), + None => histogram_opts!(prometheus_name, definition.description), + }; + let metric = PromHistogramVec::new(opts, &prometheus_label_refs)?; + registry.register(Box::new(metric.clone()))?; + prewarm_histogram(definition, &metric)?; + histograms.insert(definition.id, metric); + } + } + } + + Ok(Self { + registry, + counters, + histograms, + }) + } +} + +fn prewarm_counter(definition: &MetricDefinition, metric: &PromCounterVec) -> Result<(), prometheus::Error> { + if definition.label_names.is_empty() { + metric.get_metric_with_label_values(&[])?; + return Ok(()); + } + + for label_values in definition.prewarm_label_values { + metric.get_metric_with_label_values(label_values)?; + } + + Ok(()) +} + +fn prewarm_histogram(definition: &MetricDefinition, metric: &PromHistogramVec) -> Result<(), prometheus::Error> { + if definition.label_names.is_empty() { + metric.get_metric_with_label_values(&[])?; + return Ok(()); + } + + for label_values in definition.prewarm_label_values { + metric.get_metric_with_label_values(label_values)?; + } + + Ok(()) +} + +impl MetricsBackend for PrometheusMetricsBackend { + fn inc_counter(&self, id: MetricId, value: u64, labels: MetricLabels<'_>) { + if let Some(counter) = self.counters.get(&id) { + let label_values = prometheus_label_values(id, labels); + match counter.get_metric_with_label_values(&label_values) { + Ok(metric) => metric.inc_by(value as f64), + Err(err) => log::error!("metrics: failed to record counter {:?}: {}", id, err), + } + } + } + + fn observe_histogram(&self, id: MetricId, value: f64, labels: MetricLabels<'_>) { + if let Some(histogram) = self.histograms.get(&id) { + let label_values = prometheus_label_values(id, labels); + match histogram.get_metric_with_label_values(&label_values) { + Ok(metric) => metric.observe(value), + Err(err) => log::error!("metrics: failed to observe histogram {:?}: {}", id, err), + } + } + } + + fn render_prometheus(&self) -> Option { + use prometheus::{Encoder, TextEncoder}; + + let encoder = TextEncoder::new(); + let mut buffer = Vec::new(); + encoder.encode(&self.registry.gather(), &mut buffer).ok()?; + String::from_utf8(buffer).ok() + } + + fn shutdown(&self) {} +} + +struct ChronosAggregationSelector; + +impl AggregationSelector for ChronosAggregationSelector { + fn aggregation(&self, kind: InstrumentKind) -> Aggregation { + if kind != InstrumentKind::Histogram { + return DefaultAggregationSelector::new().aggregation(kind); + } + + Aggregation::ExplicitBucketHistogram { + boundaries: otlp_histogram_boundaries(), + record_min_max: true, + } + } +} + +struct ChronosTemporalitySelector; + +impl TemporalitySelector for ChronosTemporalitySelector { + fn temporality(&self, kind: InstrumentKind) -> Temporality { + if kind == InstrumentKind::Histogram { + Temporality::Cumulative + } else { + opentelemetry_sdk::metrics::reader::DefaultTemporalitySelector::new().temporality(kind) + } + } +} + +fn otlp_histogram_boundaries() -> Vec { + let mut boundaries = METRIC_DEFINITIONS + .iter() + .filter(|definition| definition.kind.is_histogram()) + .filter(|definition| definition.temporality == Some(MetricTemporality::Cumulative)) + .filter_map(|definition| definition.buckets) + .flat_map(|buckets| buckets.iter().copied()) + .collect::>(); + boundaries.sort_by(f64::total_cmp); + boundaries.dedup(); + boundaries +} + +struct OtlpMetricsBackend { + provider: opentelemetry_sdk::metrics::MeterProvider, + counters: HashMap>, + histograms: HashMap>, +} + +impl OtlpMetricsBackend { + fn new() -> Result> { + let endpoint = env::var(OTEL_EXPORTER_OTLP_METRICS_ENDPOINT) + .or_else(|_| env::var(OTEL_EXPORTER_OTLP_ENDPOINT)) + .unwrap_or_else(|_| "http://127.0.0.1:4317".to_string()); + let exporter = opentelemetry_otlp::new_exporter().tonic().with_env().with_endpoint(endpoint); + let provider = opentelemetry_otlp::new_pipeline() + .metrics(opentelemetry::runtime::Tokio) + .with_exporter(exporter) + .with_aggregation_selector(ChronosAggregationSelector) + .with_temporality_selector(ChronosTemporalitySelector) + .build()?; + + global::set_meter_provider(provider.clone()); + let meter = global::meter("chronos"); + + let mut counters = HashMap::new(); + let mut histograms = HashMap::new(); + + for definition in METRIC_DEFINITIONS { + match definition.kind { + MetricKind::Counter => { + let mut builder = meter.u64_counter(definition.name).with_description(definition.description); + if let Some(unit) = definition.unit { + builder = builder.with_unit(Unit::new(unit)); + } + counters.insert(definition.id, builder.init()); + } + MetricKind::Histogram => { + let mut builder = meter.f64_histogram(definition.name).with_description(definition.description); + if let Some(unit) = definition.unit { + builder = builder.with_unit(Unit::new(unit)); + } + histograms.insert(definition.id, builder.init()); + } + } + } + + Ok(Self { + provider, + counters, + histograms, + }) + } +} + +impl MetricsBackend for OtlpMetricsBackend { + fn inc_counter(&self, id: MetricId, value: u64, labels: MetricLabels<'_>) { + if let Some(counter) = self.counters.get(&id) { + counter.add(value, &labels_to_key_values(labels)); + } + } + + fn observe_histogram(&self, id: MetricId, value: f64, labels: MetricLabels<'_>) { + if let Some(histogram) = self.histograms.get(&id) { + histogram.record(value, &labels_to_key_values(labels)); + } + } + + fn render_prometheus(&self) -> Option { + None + } + + fn shutdown(&self) { + if let Err(err) = self.provider.force_flush(&opentelemetry::Context::current()) { + log::error!("failed to flush OTLP metrics: {}", err); + } + if let Err(err) = self.provider.shutdown() { + log::error!("failed to shut down OTLP metrics provider: {}", err); + } + } +} + +fn require_grpc_protocol() -> Result<(), Box> { + let protocol = env::var(OTEL_EXPORTER_OTLP_METRICS_PROTOCOL) + .or_else(|_| env::var(OTEL_EXPORTER_OTLP_PROTOCOL)) + .unwrap_or_else(|_| "grpc".to_string()); + + if protocol == "grpc" { + Ok(()) + } else { + Err(format!("unsupported OTLP metrics protocol {protocol:?}; use grpc").into()) + } +} + +fn consume_labels(destination: &'static str, status: &'static str) -> Vec<(&'static str, String)> { + vec![("chronos.destination", destination.to_string()), ("chronos.consume.status", status.to_string())] +} + +fn process_labels(returned: bool, status: &'static str) -> Vec<(&'static str, String)> { + vec![ + ("chronos.processor.returned", returned.to_string()), + ("chronos.process.status", status.to_string()), + ] +} + +fn metric_definition(id: MetricId) -> Option<&'static MetricDefinition> { + METRIC_DEFINITIONS.iter().find(|definition| definition.id == id) +} + +fn prometheus_label_values<'a>(id: MetricId, labels: MetricLabels<'a>) -> Vec<&'a str> { + let Some(definition) = metric_definition(id) else { + return Vec::new(); + }; + + definition + .label_names + .iter() + .map(|name| { + labels + .iter() + .find(|(label_name, _)| label_name == name) + .map(|(_, value)| value.as_str()) + .unwrap_or("unknown") + }) + .collect() +} + +fn labels_to_key_values(labels: MetricLabels<'_>) -> Vec { + labels.iter().map(|(key, value)| KeyValue::new(*key, value.clone())).collect() +} + +fn prometheus_metric_name(name: &str) -> String { + normalize_prometheus_identifier(name, true) +} + +fn prometheus_label_names(names: &[&str]) -> Vec { + names.iter().map(|name| normalize_prometheus_identifier(name, false)).collect() +} + +fn normalize_prometheus_identifier(name: &str, allow_colon: bool) -> String { + let mut output = String::with_capacity(name.len()); + + for (index, character) in name.chars().enumerate() { + let is_allowed = character.is_ascii_alphanumeric() || character == '_' || (allow_colon && character == ':'); + let is_valid_first = character.is_ascii_alphabetic() || character == '_' || (allow_colon && character == ':'); + + if (index == 0 && !is_valid_first) || (index > 0 && !is_allowed) { + output.push('_'); + } else { + output.push(character); + } + } + + output +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::metrics::generated::METRIC_DEFINITIONS; + use serial_test::serial; + + fn prometheus_metrics() -> ChronosMetrics { + env::remove_var(OTEL_METRICS_EXPORTER); + ChronosMetrics::new().unwrap() + } + + #[test] + #[serial] + fn metrics_registry_creates_successfully() { + env::remove_var(OTEL_METRICS_EXPORTER); + assert!(ChronosMetrics::new().is_ok()); + } + + #[test] + #[serial] + fn generated_definitions_drive_all_runtime_metrics() { + let metrics = prometheus_metrics(); + let output = metrics.render_prometheus().unwrap(); + + for definition in METRIC_DEFINITIONS { + let prometheus_name = prometheus_metric_name(definition.name); + assert!( + output.contains(&format!("# HELP {prometheus_name}")), + "metric {} must be registered from generated definitions", + definition.name + ); + } + } + + #[test] + #[serial] + fn prometheus_metrics_normalize_otel_names() { + let metrics = prometheus_metrics(); + metrics.observe_jitter(0.499); + let output = metrics.render_prometheus().unwrap(); + + assert!(output.contains("# HELP chronos_message_jitter")); + assert!(!output.contains("# HELP chronos.message.jitter")); + } + + #[test] + #[serial] + fn msg_jitter_has_500ms_bucket() { + let metrics = prometheus_metrics(); + metrics.observe_jitter(0.499); + let output = metrics.render_prometheus().unwrap(); + + assert!(output.contains("chronos_message_jitter_bucket{le=\"0.5\"} 1")); + } + + #[test] + fn otlp_histograms_use_generated_second_boundaries() { + let boundaries = otlp_histogram_boundaries(); + + assert!(boundaries.contains(&0.5)); + assert!(boundaries.contains(&2.048)); + assert!(boundaries.contains(&5.0)); + assert!(boundaries.windows(2).all(|window| window[0] < window[1])); + } + + #[test] + #[serial] + fn msg_reset_increments_correctly() { + let metrics = prometheus_metrics(); + metrics.messages_reset(3); + metrics.messages_reset(2); + let output = metrics.render_prometheus().unwrap(); + + assert!(output.contains("chronos_message_reset 5")); + } + + #[test] + #[serial] + fn msg_wait_time_records_observation() { + let metrics = prometheus_metrics(); + metrics.observe_wait_time(1.5); + let output = metrics.render_prometheus().unwrap(); + + assert!(output.contains("chronos_message_wait_duration_count 1")); + } + + #[test] + #[serial] + fn labeled_metrics_record_issue_dimensions() { + let metrics = prometheus_metrics(); + metrics.observe_consume_latency(0.05, "postgres", "pass"); + metrics.observe_process_latency(0.01, false, "fail"); + let output = metrics.render_prometheus().unwrap(); + + assert!(output.contains("chronos_message_consume_duration_count{chronos_consume_status=\"pass\",chronos_destination=\"postgres\"} 1")); + assert!(output.contains("chronos_message_process_duration_count{chronos_process_status=\"fail\",chronos_processor_returned=\"false\"} 1")); + } +} diff --git a/chronos_bin/src/metrics/server.rs b/chronos_bin/src/metrics/server.rs new file mode 100644 index 0000000..9ef1995 --- /dev/null +++ b/chronos_bin/src/metrics/server.rs @@ -0,0 +1,25 @@ +use crate::metrics::ChronosMetrics; +use axum::{extract::State, http::StatusCode, response::IntoResponse, routing::get, Router}; +use std::sync::Arc; + +async fn metrics_handler(State(metrics): State>) -> impl IntoResponse { + match metrics.render_prometheus() { + Some(body) => (StatusCode::OK, [("content-type", "text/plain; version=0.0.4; charset=utf-8")], body).into_response(), + None => StatusCode::NOT_FOUND.into_response(), + } +} + +pub async fn run_metrics_server(metrics: Arc, host: String, port: u16) { + if !metrics.is_prometheus() { + log::info!("Prometheus metrics server disabled because OTEL_METRICS_EXPORTER is not prometheus"); + return; + } + + let app = Router::new().route("/metrics", get(metrics_handler)).with_state(metrics); + + let addr = format!("{}:{}", host, port); + log::info!("Metrics server listening on {}", addr); + + let listener = tokio::net::TcpListener::bind(&addr).await.expect("Failed to bind metrics server port"); + axum::serve(listener, app).await.expect("Metrics server failed"); +} diff --git a/chronos_bin/src/monitor.rs b/chronos_bin/src/monitor.rs index aaaffd3..1825d93 100644 --- a/chronos_bin/src/monitor.rs +++ b/chronos_bin/src/monitor.rs @@ -1,3 +1,4 @@ +use crate::metrics::ChronosMetrics; use crate::postgres::pg::Pg; use crate::utils::config::ChronosConfig; use chrono::Utc; @@ -7,6 +8,7 @@ use std::time::Duration; #[derive(Debug)] pub struct FailureDetector { pub(crate) data_store: Arc, + pub(crate) metrics: Arc, } impl FailureDetector { @@ -22,11 +24,16 @@ impl FailureDetector { #[tracing::instrument(skip_all, fields(error))] async fn reset_to_init_db(&self, fetched_rows: &std::vec::Vec) { if !fetched_rows.is_empty() { - if let Err(e) = &self.data_store.reset_to_init_db(fetched_rows).await { - tracing::Span::current().record("error", e); - log::error!("error in monitor reset_to_init {}", e); - } else { - log::debug!("reset_to_init_db success for {:?}", fetched_rows) + match &self.data_store.reset_to_init_db(fetched_rows).await { + Ok(reset_ids) => { + // chronos.message.reset: count the number of messages reset by the monitor task. + self.metrics.messages_reset(reset_ids.len() as u64); + log::debug!("reset_to_init_db success for {:?}", fetched_rows) + } + Err(e) => { + tracing::Span::current().record("error", e); + log::error!("error in monitor reset_to_init {}", e); + } } } } diff --git a/chronos_bin/src/postgres/pg.rs b/chronos_bin/src/postgres/pg.rs index 9e39462..4be47e9 100644 --- a/chronos_bin/src/postgres/pg.rs +++ b/chronos_bin/src/postgres/pg.rs @@ -63,7 +63,7 @@ struct PgAccess { } impl PgAccess { - pub async fn build_txn(&mut self) -> Result { + pub async fn build_txn(&mut self) -> Result, PgError> { let txn = self .client .build_transaction() @@ -191,7 +191,7 @@ impl Pg { } #[tracing::instrument(skip_all)] - pub(crate) async fn delete_fired(&self, ids: &Vec) -> Result { + pub(crate) async fn delete_fired(&self, ids: &[String]) -> Result { // let query_execute_instant = Instant::now(); let pg_client = match self.get_client().await { Ok(client) => client, diff --git a/chronos_bin/src/runner.rs b/chronos_bin/src/runner.rs index 7431ec7..0649af5 100644 --- a/chronos_bin/src/runner.rs +++ b/chronos_bin/src/runner.rs @@ -2,8 +2,11 @@ use crate::kafka::consumer::KafkaConsumer; use crate::kafka::producer::KafkaProducer; use crate::message_processor::MessageProcessor; use crate::message_receiver::MessageReceiver; +use crate::metrics::server::run_metrics_server; +use crate::metrics::ChronosMetrics; use crate::monitor::FailureDetector; use crate::postgres::pg::Pg; +use crate::utils::config::ChronosConfig; use std::fs::{create_dir, read, write}; use std::sync::Arc; @@ -11,27 +14,46 @@ pub struct Runner { pub consumer: Arc, pub producer: Arc, pub data_store: Arc, + pub metrics: Arc, } impl Runner { pub async fn run(&self) { let monitor_ds = Arc::clone(&self.data_store); + let monitor_metrics = Arc::clone(&self.metrics); let process_ds = Arc::clone(&self.data_store); let process_producer = self.producer.clone(); + let process_metrics = Arc::clone(&self.metrics); let receiver_ds = Arc::clone(&self.data_store); let receiver_prod = self.producer.clone(); let receiver_consumer = self.consumer.clone(); + let receiver_metrics = Arc::clone(&self.metrics); + + let chronos_config = ChronosConfig::from_env(); + let metrics_host = chronos_config.metrics_host; + let metrics_port = chronos_config.metrics_port; + let metrics_for_server = Arc::clone(&self.metrics); + + // Spawn metrics server as an independent background task. + // A failure here is logged but does not stop the processing tasks. + tokio::task::spawn(async move { + run_metrics_server(metrics_for_server, metrics_host, metrics_port).await; + }); let monitor_handler = tokio::task::spawn(async { - let monitor = FailureDetector { data_store: monitor_ds }; + let monitor = FailureDetector { + data_store: monitor_ds, + metrics: monitor_metrics, + }; monitor.run().await; }); let message_processor_handler = tokio::task::spawn(async { let message_processor = MessageProcessor { producer: process_producer, data_store: process_ds, + metrics: process_metrics, }; message_processor.run().await; }); @@ -40,6 +62,7 @@ impl Runner { consumer: receiver_consumer, producer: receiver_prod, data_store: receiver_ds, + metrics: receiver_metrics, }; message_receiver.run().await; diff --git a/chronos_bin/src/telemetry/jaegar_backend.rs b/chronos_bin/src/telemetry/jaegar_backend.rs index 887f9fa..92f674e 100644 --- a/chronos_bin/src/telemetry/jaegar_backend.rs +++ b/chronos_bin/src/telemetry/jaegar_backend.rs @@ -2,11 +2,10 @@ use opentelemetry_api::trace::TraceError; use opentelemetry_sdk::trace::Tracer; pub fn instrument_jaegar_pipleline() -> Result { - let service_name = std::env::var("OTEL_SERVICE_NAME"); - if service_name.is_err() { - std::env::set_var("OTEL_SERVICE_NAME", "chronos"); - } - opentelemetry_jaeger::new_agent_pipeline() - .with_service_name(format!("{:?}", service_name)) - .install_simple() + let service_name = std::env::var("OTEL_SERVICE_NAME").unwrap_or_else(|_| { + let service_name = "chronos".to_string(); + std::env::set_var("OTEL_SERVICE_NAME", &service_name); + service_name + }); + opentelemetry_jaeger::new_agent_pipeline().with_service_name(service_name).install_simple() } diff --git a/chronos_bin/src/telemetry/otlp_collector.rs b/chronos_bin/src/telemetry/otlp_collector.rs index db972d0..a045bdc 100644 --- a/chronos_bin/src/telemetry/otlp_collector.rs +++ b/chronos_bin/src/telemetry/otlp_collector.rs @@ -31,10 +31,7 @@ impl OtlpCollector { log::error!("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT not set"); // trace error - Err(TraceError::Other(Box::new(std::io::Error::new( - std::io::ErrorKind::Other, - "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT not set", - )))) + Err(TraceError::Other(Box::new(std::io::Error::other("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT not set")))) } } } diff --git a/chronos_bin/src/utils/config.rs b/chronos_bin/src/utils/config.rs index 03d0fbb..fa6f14f 100644 --- a/chronos_bin/src/utils/config.rs +++ b/chronos_bin/src/utils/config.rs @@ -5,6 +5,8 @@ pub struct ChronosConfig { pub processor_db_poll: u64, pub time_advance: u64, pub fail_detect_interval: u64, + pub metrics_host: String, + pub metrics_port: u16, } impl ChronosConfig { @@ -15,6 +17,52 @@ impl ChronosConfig { processor_db_poll: std::env::var("PROCESSOR_DB_POLL").unwrap_or_else(|_| 5.to_string()).parse().unwrap_or(5), time_advance: std::env::var("TIMING_ADVANCE").unwrap_or_else(|_| 0.to_string()).parse().unwrap_or(0), fail_detect_interval: std::env::var("FAIL_DETECT_INTERVAL").unwrap_or_else(|_| 10.to_string()).parse().unwrap_or(10), + metrics_host: std::env::var("OTEL_EXPORTER_PROMETHEUS_HOST").unwrap_or_else(|_| "0.0.0.0".to_string()), + metrics_port: std::env::var("OTEL_EXPORTER_PROMETHEUS_PORT") + .or_else(|_| std::env::var("METRICS_PORT")) + .unwrap_or_else(|_| "9090".to_string()) + .parse() + .unwrap_or(9090), } } } + +#[cfg(test)] +mod tests { + use super::ChronosConfig; + use serial_test::serial; + + fn remove_metrics_env() { + std::env::remove_var("OTEL_EXPORTER_PROMETHEUS_HOST"); + std::env::remove_var("OTEL_EXPORTER_PROMETHEUS_PORT"); + std::env::remove_var("METRICS_PORT"); + } + + #[test] + #[serial] + fn prometheus_spec_env_overrides_metrics_binding() { + remove_metrics_env(); + std::env::set_var("OTEL_EXPORTER_PROMETHEUS_HOST", "127.0.0.1"); + std::env::set_var("OTEL_EXPORTER_PROMETHEUS_PORT", "9464"); + std::env::set_var("METRICS_PORT", "9090"); + + let config = ChronosConfig::from_env(); + + assert_eq!(config.metrics_host, "127.0.0.1"); + assert_eq!(config.metrics_port, 9464); + remove_metrics_env(); + } + + #[test] + #[serial] + fn metrics_port_remains_backward_compatible_fallback() { + remove_metrics_env(); + std::env::set_var("METRICS_PORT", "9091"); + + let config = ChronosConfig::from_env(); + + assert_eq!(config.metrics_host, "0.0.0.0"); + assert_eq!(config.metrics_port, 9091); + remove_metrics_env(); + } +} diff --git a/chronos_bin/src/utils/env.rs b/chronos_bin/src/utils/env.rs index 7dc5803..1860be9 100644 --- a/chronos_bin/src/utils/env.rs +++ b/chronos_bin/src/utils/env.rs @@ -56,7 +56,7 @@ pub fn get_env_vars_with_prefix(prefix: &str) -> Option> /// ``` /// /// - When the `key` and value return `type` is passed, the environment variable is -/// read for the key and the value is parsed into the `type` passed as argument. +/// read for the key and the value is parsed into the `type` passed as argument. /// /// ## Example /// ```ignore @@ -65,7 +65,7 @@ pub fn get_env_vars_with_prefix(prefix: &str) -> Option> /// ``` /// /// - Special scenario to convert the string value to Vector. -/// When the `key` and value return `type` is passed as `Vec` +/// When the `key` and value return `type` is passed as `Vec` /// - the environment variable is read for the key. /// - the string value returned is split on `,` to create a Vec. /// - each value of the vec is parsed into the `type` passed as argument. diff --git a/chronos_bin/src/utils/util.rs b/chronos_bin/src/utils/util.rs index b9375ae..2eae6f2 100644 --- a/chronos_bin/src/utils/util.rs +++ b/chronos_bin/src/utils/util.rs @@ -9,14 +9,10 @@ pub fn required_headers(message: &BorrowedMessage) -> Option::new(), |mut acc, header| { - if let Ok(key) = header.key.parse() { - if let Some(value) = header.value { - let value: String = String::from_utf8_lossy(value).into_owned(); - acc.insert(key, value); - acc - } else { - acc - } + if let Some(value) = header.value { + let value: String = String::from_utf8_lossy(value).into_owned(); + acc.insert(header.key.to_string(), value); + acc } else { acc } @@ -48,7 +44,7 @@ pub fn headers_check(headers: &BorrowedHeaders) -> bool { outcome == 2 } -pub fn get_payload_utf8<'a>(message: &'a BorrowedMessage) -> Option<&'a [u8]> { +pub fn get_payload_utf8<'a>(message: &'a BorrowedMessage<'_>) -> Option<&'a [u8]> { message.payload() } diff --git a/dev/dashboards/.gitkeep b/dev/dashboards/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/dev/dashboards/.gitkeep @@ -0,0 +1 @@ + diff --git a/dev/dashboards/chronos.json b/dev/dashboards/chronos.json new file mode 100644 index 0000000..20c005e --- /dev/null +++ b/dev/dashboards/chronos.json @@ -0,0 +1,950 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "A dashboard for monitoring Chronos\nhttps://github.com/kindredgroup/chronos", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 0, + "y": 0 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "value", + "wideLayout": true + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(increase(chronos_message_reset_total{}[$__range]))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "msgs reset", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "Over time range", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "msg/s" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 6, + "y": 0 + }, + "id": 5, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "value_and_name", + "wideLayout": true + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(\n increase(\n chronos_message_consume_duration_seconds_count[$__range]\n )\n) by (chronos_consume_status, chronos_destination)", + "legendFormat": "{{ chronos_consume_status }}:{{ chronos_destination }}", + "range": true, + "refId": "A" + } + ], + "title": "Messages consumed", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 0.5, + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "dashed" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 0.5 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(rate(chronos_message_jitter_seconds_bucket[$__rate_interval])) by (le))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "p99 message jitter", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "{{ chronos_destination }}:{{ chronos_consume_status }}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "msg/s" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(\n rate(\n chronos_message_consume_duration_seconds_count[$__rate_interval]\n )\n) by (chronos_consume_status, chronos_destination)", + "legendFormat": "{{ chronos_destination }}:{{ chronos_consume_status }}", + "range": true, + "refId": "A" + } + ], + "title": "Messages consumed p/s", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "p99 time spent in queue", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "histogram_quantile(0.99,\n sum(\n rate(\n chronos_message_wait_duration_seconds_bucket[$__rate_interval]\n )\n ) by (le)\n)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Messages wait time", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "The % of messages routed to either Postgres or Kafka\n{{ chronos_destination }}:{{ chronos_consume_status }}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 11, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(\n rate(\n chronos_message_consume_duration_seconds_count[$__rate_interval]\n )\n) by (chronos_consume_status, chronos_destination) / on() group_left() sum(\n rate(\n chronos_message_consume_duration_seconds_count[$__rate_interval]\n )\n) by (chronos_consume_status)", + "legendFormat": "{{ chronos_destination }}:{{ chronos_consume_status }}", + "range": true, + "refId": "A" + } + ], + "title": "Consumed msg destination %", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "The percentage of time spent running the consumption or processing of messages", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 18 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(\n rate(\n chronos_message_process_duration_seconds_sum[$__rate_interval]\n )\n)", + "instant": false, + "legendFormat": "processing", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(\n rate(\n chronos_message_consume_duration_seconds_sum[$__rate_interval]\n )\n)", + "instant": false, + "legendFormat": "consumption", + "range": true, + "refId": "C" + } + ], + "title": "Running time p/s", + "transformations": [ + { + "id": "calculateField", + "options": {} + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "tempo", + "uid": "tempo" + }, + "description": "Use the TRACEID variables to see a trace", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "footer": { + "reducers": [] + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 17, + "w": 12, + "x": 0, + "y": 27 + }, + "id": 10, + "options": { + "cellHeight": "sm", + "showHeader": true + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "datasource": { + "type": "tempo", + "uid": "tempo" + }, + "limit": 20, + "metricsQueryType": "range", + "query": "{resource.service.name=\"chronos\"}", + "queryType": "traceql", + "refId": "A", + "serviceMapUseNativeHistograms": false, + "spss": 1, + "tableType": "traces" + } + ], + "title": "Traces", + "transformations": [ + { + "id": "calculateField", + "options": {} + } + ], + "type": "table" + }, + { + "datasource": { + "type": "tempo", + "uid": "tempo" + }, + "description": "Use the TRACEID variables to see a trace", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 17, + "w": 12, + "x": 12, + "y": 27 + }, + "id": 9, + "options": { + "spanFilters": { + "adhocFilters": [], + "criticalPathOnly": false, + "matchesOnly": false + } + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "datasource": { + "type": "tempo", + "uid": "tempo" + }, + "limit": 20, + "metricsQueryType": "range", + "query": "${TRACEID}", + "queryType": "traceql", + "refId": "A", + "serviceMapUseNativeHistograms": false, + "tableType": "traces" + } + ], + "title": "Traces", + "transformations": [ + { + "id": "calculateField", + "options": {} + } + ], + "type": "traces" + }, + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 13, + "w": 24, + "x": 0, + "y": 44 + }, + "id": 1, + "options": { + "dedupStrategy": "none", + "enableInfiniteScrolling": false, + "enableLogDetails": true, + "showControls": false, + "showTime": false, + "sortOrder": "Descending", + "syntaxHighlighting": true, + "unwrappedColumns": false, + "wrapLogMessage": true + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "direction": "backward", + "editorMode": "code", + "expr": "{service_name=\"chronos\"} | log_file_name=\"chronos.jsonl\"", + "queryType": "range", + "refId": "A" + } + ], + "title": "logs", + "type": "logs" + }, + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 13, + "w": 24, + "x": 0, + "y": 57 + }, + "id": 12, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "direction": "backward", + "editorMode": "code", + "expr": "sum(count_over_time({service_name=\"chronos\"} | log_file_name=\"chronos.jsonl\"[$__auto])) by (detected_level)", + "queryType": "range", + "refId": "A" + } + ], + "title": "logs", + "type": "timeseries" + } + ], + "preload": false, + "refresh": "5s", + "schemaVersion": 42, + "tags": [], + "templating": { + "list": [ + { + "current": { + "text": "", + "value": "" + }, + "description": "A trace ID to lookup", + "label": "TRACEID", + "name": "TRACEID", + "options": [ + { + "selected": true, + "text": "", + "value": "" + } + ], + "query": "", + "type": "textbox" + } + ] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Chronos", + "uid": "gk65ns", + "version": 1, + "weekStart": "" +} \ No newline at end of file diff --git a/dev/dashboards/testing.json b/dev/dashboards/testing.json new file mode 100644 index 0000000..51db6da --- /dev/null +++ b/dev/dashboards/testing.json @@ -0,0 +1,2927 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "A dashboard for working with the local testing infrastructure", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "panels": [ + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 5, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "count(up)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Targets", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Name", + "sortDesc": false + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(up{service_name!~\".+\"}) by (job, instance, target)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Target status", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(scrape_samples_post_metric_relabeling) by (job)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Series per job", + "transformations": [ + { + "id": "calculateField", + "options": {} + } + ], + "type": "timeseries" + } + ], + "title": "exporters", + "type": "row" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 31, + "panels": [], + "title": "k6", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "{{ scenario }}:{{ topic }}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 2 + }, + "id": 33, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "histogram_quantile(0.99,\n sum(\n rate(\n k6_iteration_duration_milliseconds_bucket[$__rate_interval]\n )\n ) by (le, scenario)\n)", + "legendFormat": "{{ scenario }}:99%", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.5,\n sum(\n rate(\n k6_iteration_duration_milliseconds_bucket[$__rate_interval]\n )\n ) by (le, scenario)\n)", + "instant": false, + "legendFormat": "{{ scenario }}:50%", + "range": true, + "refId": "B" + } + ], + "title": "kafka p99 iteration duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 2 + }, + "id": 34, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(\n rate(\n k6_iteration_duration_milliseconds_count[$__rate_interval]\n )\n) by (scenario)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "iterations per second", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "The path the message intends to exercise.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 37, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(\n rate(\n k6_chronos_messages_published_total{job=\"k6-chronos\"}[$__rate_interval]\n )\n) by (chronos_path)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Publish path", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "The path the message intends to exercise.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 38, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(\n rate(\n k6_chronos_messages_published_total{job=\"k6-chronos\"}[$__rate_interval]\n )\n) by (chronos_path) / on() group_left() sum(\n rate(\n k6_chronos_messages_published_total{job=\"k6-chronos\"}[$__rate_interval]\n )\n)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Publish path %", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "{{ scenario }}:{{ topic }}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 32, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "histogram_quantile(0.99,\n sum(\n rate(\n k6_kafka_writer_write_seconds_milliseconds_bucket{job=\"k6-chronos\"}[$__rate_interval]\n )\n ) by (le, scenario, topic)\n)", + "legendFormat": "{{ scenario }}:{{ topic }}", + "range": true, + "refId": "A" + } + ], + "title": "kafka p99 write seconds", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "The % of iterations dropped per second", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 1, + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "dashed" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "#EAB839", + "value": 0.05 + }, + { + "color": "red", + "value": 0.1 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 18 + }, + "id": 36, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(\n increase(\n k6_dropped_iterations_total{}[$__rate_interval]\n )\n) by (scenario) / sum(\n increase(\n k6_iterations_total[$__rate_interval]\n )\n ) by (scenario)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Dropped iterations %", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "The % of iterations dropped over the range", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "#EAB839", + "value": 0.05 + }, + { + "color": "red", + "value": 0.1 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 18 + }, + "id": 35, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(increase(k6_dropped_iterations_total{}[$__range])) / sum(increase(k6_iterations_total[$__range]))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Dropped iterations %", + "type": "stat" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 26 + }, + "id": 16, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "{{ topic_name }}:{{ group_id }}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 76 + }, + "id": 27, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(kminion_kafka_consumer_group_topic_lag) by (group_id, topic_name)", + "legendFormat": "{{ topic_name }}:{{ group_id }}", + "range": true, + "refId": "A" + } + ], + "title": "Topic lag", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "{{ topic_name }}:{{ group_id }}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 76 + }, + "id": 28, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(\n kminion_kafka_topic_log_dir_size_total_bytes{topic_name!~\"__.+\"}\n) by (topic_name)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "log size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 84 + }, + "id": 29, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(\n rate(\n kminion_kafka_received_bytes{}[$__rate_interval]\n )\n) * -1", + "legendFormat": "received", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(\n rate(\n kminion_kafka_sent_bytes{}[$__rate_interval]\n )\n)", + "instant": false, + "legendFormat": "sent", + "range": true, + "refId": "B" + } + ], + "title": "Network I/O", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 84 + }, + "id": 30, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(rate(kminion_kafka_requests_received_total[$__rate_interval]))", + "legendFormat": "sent", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(rate(kminion_kafka_requests_sent_total{}[$__rate_interval])) *-1", + "instant": false, + "legendFormat": "received", + "range": true, + "refId": "B" + } + ], + "title": "Requests p/s", + "type": "timeseries" + } + ], + "title": "kafka", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 27 + }, + "id": 15, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "Number of rows in the hangfire table", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 28 + }, + "id": 1, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "chronos_rows{job=\"sql-exporter\"}", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Hangfire rows", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 28 + }, + "id": 21, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "editorMode": "code", + "exemplar": false, + "expr": "sum(pg_database_size_bytes{datname!~\"postgres|template\\\\d\"}) by (datname)", + "format": "table", + "instant": false, + "legendFormat": "{{ name }}", + "range": true, + "refId": "A" + } + ], + "title": "Database size", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "Number of entries in the hangfire table now", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 28 + }, + "id": 22, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "value", + "wideLayout": true + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(chronos_rows{job=\"sql-exporter\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Active hangfire rows", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "Requires:\\\npg_settings_track_io_timing == 1", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 9, + "x": 0, + "y": 36 + }, + "id": 26, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(rate(pg_stat_database_blk_read_time{datname!~\"postgres|template\\\\d\"}[$__rate_interval])) by (datname) *-1", + "legendFormat": "{{ datname }}:read", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(rate(pg_stat_database_blk_write_time{datname!~\"postgres|template\\\\d\"}[$__rate_interval])) by (datname)", + "instant": false, + "legendFormat": "{{ datname }}:write", + "range": true, + "refId": "B" + } + ], + "title": "PG blk I/O time", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "{{ datname }}:returned", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 9, + "x": 9, + "y": 36 + }, + "id": 25, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Name", + "sortDesc": false + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(rate(pg_stat_database_tup_updated{datname!~\"postgres|template\\\\d\"}[$__rate_interval])) by (datname)", + "instant": false, + "legendFormat": "{{ datname }}:updated", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(rate(pg_stat_database_tup_returned{datname!~\"postgres|template\\\\d\"}[$__rate_interval])) by (datname)", + "instant": false, + "legendFormat": "{{ datname }}:returned", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(rate(pg_stat_database_tup_inserted{datname!~\"postgres|template\\\\d\"}[$__rate_interval])) by (datname)", + "instant": false, + "legendFormat": "{{ datname }}:inserted", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(rate(pg_stat_database_tup_fetched{datname!~\"postgres|template\\\\d\"}[$__rate_interval])) by (datname)", + "instant": false, + "legendFormat": "{{ datname }}:fetched", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(rate(pg_stat_database_tup_deleted{datname!~\"postgres|template\\\\d\"}[$__rate_interval])) by (datname)", + "instant": false, + "legendFormat": "{{ datname }}:deleted", + "range": true, + "refId": "F" + } + ], + "title": "Database operations", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "Lookup time for sql-exporter", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "left", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 18, + "y": 36 + }, + "id": 24, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(scrape_duration_seconds{\n job=\"sql-exporter\", target=~\".+\"\n}) by (target)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Hangfire table count query latency", + "type": "timeseries" + } + ], + "title": "postgres", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 28 + }, + "id": 2, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 38 + }, + "id": 8, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(rate(container_cpu_usage_seconds_total{image!=\"\"}[$__rate_interval])) by (image)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "CPU seconds", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 38 + }, + "id": 9, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(container_memory_usage_bytes{image!=\"\"}) by (image)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Memory usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "CPU usage compared to the node total", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 46 + }, + "id": 19, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(rate(container_cpu_usage_seconds_total{image!=\"\"}[$__rate_interval])) by (image) / on() group_left() sum(machine_cpu_cores)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "CPU usage %", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "Memory usage compared to node total", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 46 + }, + "id": 20, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(\n container_memory_usage_bytes{id!=\"/\"}\n) by (image) / on() group_left() sum(\n machine_memory_bytes{}\n)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Memory usage %", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 54 + }, + "id": 17, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(rate(container_cpu_cfs_throttled_periods_total{}[$__rate_interval])) by (image) / sum( rate(container_cpu_cfs_periods_total{}[$__rate_interval]) ) by (image)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Throttled periods %", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 54 + }, + "id": 18, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(rate(container_fs_reads_bytes_total{image!=\"\", name!=\"\"}[$__rate_interval])) by (image) * -1", + "legendFormat": "{{ image }}:read", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(rate(container_fs_writes_bytes_total{image!=\"\", name!=\"\"}[$__rate_interval])) by (image)", + "instant": false, + "legendFormat": "{{ image }}:write", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "C" + } + ], + "title": "Disk I/O bytes", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 62 + }, + "id": 13, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(\n rate(\n container_network_receive_bytes_total{image!=\"\", name!=\"\"}[$__rate_interval]\n )\n) by (image)", + "legendFormat": "{{ image }}:rx", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(\n rate(\n container_network_transmit_bytes_total{image!=\"\", name!=\"\"}[$__rate_interval]\n )\n) by (image) * -1", + "instant": false, + "legendFormat": "{{ image }}:tx", + "range": true, + "refId": "B" + } + ], + "title": "Network I/O", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 62 + }, + "id": 12, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(rate(container_fs_read_seconds_total{image!=\"\", name!=\"\"}[$__rate_interval])) by (image) * -1", + "legendFormat": "{{ image }}:read", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(rate(container_fs_write_seconds_total{image!=\"\", name!=\"\"}[$__rate_interval])) by (image)", + "instant": false, + "legendFormat": "{{ image }}:write", + "range": true, + "refId": "B" + } + ], + "title": "Disk I/O seconds", + "type": "timeseries" + } + ], + "title": "containers", + "type": "row" + } + ], + "preload": false, + "refresh": "5s", + "schemaVersion": 42, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Testing", + "uid": "gwvvwj", + "version": 4, + "weekStart": "" +} \ No newline at end of file diff --git a/dev/docker-compose/compose.yaml b/dev/docker-compose/compose.yaml new file mode 100644 index 0000000..682dfcc --- /dev/null +++ b/dev/docker-compose/compose.yaml @@ -0,0 +1,62 @@ +include: + - infra.yaml + +services: + chronos-pg-migrations: + build: + context: ../.. + dockerfile: docker/Dockerfile.chronos-pg-migrations + environment: + PG_HOST: postgres + PG_PORT: "5432" + PG_USER: admin + PG_PASSWORD: admin + PG_DATABASE: chronos_db + depends_on: + postgres: + condition: service_healthy + restart: "no" + networks: + - chronos + + chronos: + build: + context: ../.. + dockerfile: docker/Dockerfile.chronos + cpus: 2 + mem_limit: 2g + ports: + - "9091:9091" + environment: + KAFKA_HOST: kafka + KAFKA_PORT: "9092" + KAFKA_CLIENT_ID: chronos + KAFKA_GROUP_ID: chronos + KAFKA_IN_TOPIC: chronos.in + KAFKA_OUT_TOPIC: chronos.out + KAFKA_USERNAME: "" + KAFKA_PASSWORD: "" + PG_HOST: postgres + PG_PORT: "5432" + PG_USER: admin + PG_PASSWORD: admin + PG_DATABASE: chronos_db + PG_POOL_SIZE: "10" + RUST_LOG: info + OTEL_METRICS_EXPORTER: "prometheus" + OTEL_EXPORTER_PROMETHEUS_HOST: "0.0.0.0" + OTEL_EXPORTER_PROMETHEUS_PORT: "9091" + OTEL_METRIC_EXPORT_INTERVAL: "1500" + MONITOR_DB_POLL: "5" + PROCESSOR_DB_POLL: "5" + TIMING_ADVANCE: "0" + FAIL_DETECT_INTERVAL: "10" + depends_on: + postgres: + condition: service_healthy + kafka: + condition: service_healthy + chronos-pg-migrations: + condition: service_completed_successfully + networks: + - chronos diff --git a/dev/docker-compose/infra.yaml b/dev/docker-compose/infra.yaml new file mode 100644 index 0000000..0e72813 --- /dev/null +++ b/dev/docker-compose/infra.yaml @@ -0,0 +1,49 @@ +services: + postgres: + image: postgres:16 + ports: + - "5432:5432" + environment: + POSTGRES_USER: admin + POSTGRES_PASSWORD: admin + POSTGRES_DB: chronos_db + volumes: + - postgres:/var/lib/postgresql/data/ + healthcheck: + test: ["CMD-SHELL", "pg_isready -U admin -d chronos_db"] + interval: 5s + timeout: 5s + retries: 10 + networks: + - chronos + + kafka: + image: bitnami/kafka:latest + ports: + - "9094:9094" + environment: + KAFKA_CFG_NODE_ID: "0" + KAFKA_CFG_PROCESS_ROLES: controller,broker + KAFKA_CFG_CONTROLLER_QUORUM_VOTERS: 0@kafka:9093 + KAFKA_CFG_LISTENERS: PLAINTEXT://:9092,CONTROLLER://:9093,EXTERNAL://:9094 + KAFKA_CFG_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092,EXTERNAL://localhost:9094 + KAFKA_CFG_LISTENER_SECURITY_PROTOCOL_MAP: CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT,EXTERNAL:PLAINTEXT + KAFKA_CFG_CONTROLLER_LISTENER_NAMES: CONTROLLER + KAFKA_CFG_INTER_BROKER_LISTENER_NAME: PLAINTEXT + KAFKA_CFG_AUTO_CREATE_TOPICS_ENABLE: "true" + healthcheck: + test: ["CMD-SHELL", "kafka-topics.sh --bootstrap-server localhost:9092 --list"] + interval: 10s + timeout: 10s + retries: 15 + start_period: 30s + networks: + - chronos + +networks: + chronos: + name: chronos + +volumes: + postgres: + driver: local diff --git a/dev/docker-compose/jaeger.yaml b/dev/docker-compose/jaeger.yaml new file mode 100644 index 0000000..0bd9ad0 --- /dev/null +++ b/dev/docker-compose/jaeger.yaml @@ -0,0 +1,34 @@ +services: + chronos: + environment: + OTEL_EXPORTER_OTLP_TRACES_ENDPOINT: http://otel-collector:4318/v1/traces + depends_on: + otel-collector: + condition: service_started + + jaeger-all-in-one: + image: jaegertracing/all-in-one:latest + container_name: Jaeger + ports: + - "16686:16686" + environment: + COLLECTOR_OTLP_ENABLED: "true" + networks: + - chronos + + otel-collector: + image: otel/opentelemetry-collector:latest + container_name: otelcol + restart: unless-stopped + command: ["--config=/etc/otelcol-config.yml"] + volumes: + - ../otel/otelcol-config.yml:/etc/otelcol-config.yml:ro + ports: + - "1888:1888" + - "13133:13133" + - "4317:4317" + - "4318:4318" + depends_on: + - jaeger-all-in-one + networks: + - chronos diff --git a/dev/docker-compose/lgtm.yaml b/dev/docker-compose/lgtm.yaml new file mode 100644 index 0000000..88ac73e --- /dev/null +++ b/dev/docker-compose/lgtm.yaml @@ -0,0 +1,146 @@ +services: + chronos: + command: + - sh + - -c + - /opt/build/chronos >> /data/lgtm/logs/chronos.jsonl 2>&1 + environment: + OTEL_METRICS_EXPORTER: otlp + OTEL_EXPORTER_OTLP_METRICS_PROTOCOL: grpc + OTEL_EXPORTER_OTLP_METRICS_ENDPOINT: http://lgtm:4317 + OTEL_EXPORTER_OTLP_TRACES_ENDPOINT: http://lgtm:4318/v1/traces + depends_on: + lgtm: + condition: service_healthy + volumes: + - ../lgtm/runtime-logs:/data/lgtm/logs + + lgtm: + image: grafana/otel-lgtm:0.24.1 + container_name: lgtm + environment: + ENABLE_LOGS_ALL: "true" + GF_LOG_CONSOLE_FORMAT: json + GF_LOG_FORMAT: json + GF_LOG_MODE: console + LGTM_LOG_DIR: /data/lgtm/logs + LOKI_EXTRA_ARGS: -log.format=json + PROMETHEUS_EXTRA_ARGS: --log.format=json + PYROSCOPE_EXTRA_ARGS: -log.format=json + ports: + - "3000:3000" + - "3100:3100" + - "3200:3200" + - "4040:4040" + - "4317:4317" + - "4318:4318" + - "9090:9090" + healthcheck: + test: ["CMD-SHELL", "sh /otel-lgtm/chronos-healthcheck.sh"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 30s + depends_on: + cadvisor: + condition: service_healthy + postgres-exporter: + condition: service_healthy + kminion: + condition: service_healthy + sql-exporter: + condition: service_healthy + volumes: + - ../lgtm/prometheus.yaml:/otel-lgtm/prometheus.yaml:ro + - ../lgtm/otelcol-contrib.yaml:/otel-lgtm/otelcol-config.yaml:ro + - ../lgtm/healthcheck.sh:/otel-lgtm/chronos-healthcheck.sh:ro + - ../lgtm/logging.sh:/otel-lgtm/logging.sh:ro + - ../lgtm/runtime-logs:/data/lgtm/logs + - ../lgtm/dashboards.yaml:/otel-lgtm/grafana/conf/provisioning/dashboards/chronos.yaml:ro + - ../dashboards:/otel-lgtm/grafana/conf/provisioning/dashboards/chronos:ro + networks: + - chronos + + cadvisor: + image: gcr.io/cadvisor/cadvisor:v0.52.1 + container_name: cadvisor + command: + - --docker_only=true + - --housekeeping_interval=10s + - --store_container_labels=false + privileged: true + devices: + - /dev/kmsg:/dev/kmsg + volumes: + - /:/rootfs:ro + - /var/run:/var/run:rw + - /sys:/sys:ro + - /var/lib/docker:/var/lib/docker:ro + - /dev/disk:/dev/disk:ro + healthcheck: + test: ["CMD-SHELL", "wget -qO- http://127.0.0.1:8080/healthz >/dev/null"] + interval: 15s + timeout: 5s + retries: 10 + start_period: 15s + networks: + - chronos + + postgres-exporter: + image: quay.io/prometheuscommunity/postgres-exporter:v0.19.1 + container_name: postgres-exporter + environment: + DATA_SOURCE_URI: postgres:5432/chronos_db?sslmode=disable + DATA_SOURCE_USER: admin + DATA_SOURCE_PASS: admin + depends_on: + postgres: + condition: service_healthy + healthcheck: + test: ["CMD-SHELL", "wget -qO- http://127.0.0.1:9187/metrics >/dev/null"] + interval: 15s + timeout: 5s + retries: 10 + start_period: 15s + networks: + - chronos + + kminion: + image: redpandadata/kminion:v2.2.14 + container_name: kminion + environment: + CONFIG_FILEPATH: /etc/kminion/kminion.yaml + depends_on: + kafka: + condition: service_healthy + healthcheck: + test: ["CMD-SHELL", "wget -qO- http://127.0.0.1:8080/metrics >/dev/null"] + interval: 15s + timeout: 5s + retries: 10 + start_period: 30s + volumes: + - ../lgtm/kminion.yaml:/etc/kminion/kminion.yaml:ro + networks: + - chronos + + sql-exporter: + image: burningalchemist/sql_exporter:0.18.3 + container_name: sql-exporter + command: + - --config.file=/etc/sql_exporter/sql_exporter.yaml + depends_on: + postgres: + condition: service_healthy + chronos-pg-migrations: + condition: service_completed_successfully + healthcheck: + test: ["CMD-SHELL", "wget -qO- http://127.0.0.1:9399/metrics >/dev/null"] + interval: 15s + timeout: 5s + retries: 10 + start_period: 15s + volumes: + - ../lgtm/sql_exporter.yaml:/etc/sql_exporter/sql_exporter.yaml:ro + networks: + - chronos diff --git a/dev/k6/README.md b/dev/k6/README.md new file mode 100644 index 0000000..4c453ce --- /dev/null +++ b/dev/k6/README.md @@ -0,0 +1,21 @@ +# Chronos k6 Integration Tests + +The k6 image is built with `xk6-kafka` so tests can publish to and consume from the Chronos Kafka topics. k6 run metrics are exported with the built-in OpenTelemetry output. + +## Targets + +- `make k6.build` builds the custom k6 image. +- `make k6.contract` runs one pass through the important Chronos processing paths. +- `make k6.load` runs a constant-arrival-rate producer load test. The default profile tops out at 100 messages/sec. +- `K6_FULL_LOAD=true make k6.load` runs the full load profile at 1,000 messages/sec for one minute. +- `make k6.test` runs contract and load tests. + +The full load profile is a production-scale signal, not a guaranteed local-dev pass. It depends on k6 producer speed, k6 consumer drain speed, Docker host capacity, Kafka throughput, PostgreSQL throughput, and Chronos capacity. It may require production-like infrastructure to satisfy the 1,000 messages/sec throughput target and the 500 ms p99.9 observed scheduling jitter threshold. + +The load test publishes a default mix of 10% immediate messages and 90% delayed messages. Immediate messages use an already-expired deadline and exercise the receiver-to-Kafka path. Delayed messages use a future deadline, are inserted into PostgreSQL, and exercise the processor-to-Kafka path. Set `K6_LOAD_IMMEDIATE_RATIO` to change the immediate-message fraction, `K6_LOAD_IMMEDIATE_DELAY_MS` to change the immediate deadline offset, and `K6_LOAD_DELAY_MS` to change the delayed deadline offset. + +The load test records `chronos_scheduling_jitter` only for delayed messages, using the Kafka output record timestamp minus the requested scheduled timestamp. It does not use the time k6 consumes or drains the output topic. Immediate messages record `chronos_immediate_output_delay` from the Kafka output record timestamp minus the input publish timestamp. + +By default the recipes use the LGTM compose network and send k6 OTLP metrics to `lgtm:4317`. In GitHub Actions outside `act`, set `K6_CI_OTLP_ENDPOINT`; the default is `host.docker.internal:4317`. When running under `act`, the recipes keep using the LGTM container. + +Logs from k6 are appended to `dev/lgtm/runtime-logs/*.jsonl`, which is mounted into the LGTM collector filelog receiver. diff --git a/dev/k6/contract.js b/dev/k6/contract.js new file mode 100644 index 0000000..5dc1c3a --- /dev/null +++ b/dev/k6/contract.js @@ -0,0 +1,139 @@ +import { check, sleep } from "k6"; +import encoding from "k6/encoding"; +import { Counter } from "k6/metrics"; +import { Producer, Consumer } from "k6/x/kafka"; + +const brokers = (__ENV.KAFKA_BROKERS || "kafka:9092").split(","); +const inputTopic = __ENV.KAFKA_IN_TOPIC || "chronos.in"; +const outputTopic = __ENV.KAFKA_OUT_TOPIC || "chronos.out"; +const runId = __ENV.K6_RUN_ID || `contract-${Date.now()}`; +const outputTimeoutMs = Number(__ENV.K6_CONTRACT_OUTPUT_TIMEOUT_MS || 15000); + +const exercisedPaths = new Counter("chronos_contract_paths_exercised"); + +export const options = { + scenarios: { + contract: { + executor: "shared-iterations", + vus: 1, + iterations: 1, + maxDuration: "30s", + }, + }, + thresholds: { + checks: ["rate==1"], + chronos_contract_paths_exercised: ["count>=4"], + }, +}; + +const producer = new Producer({ + brokers, + topic: inputTopic, + autoCreateTopic: true, + requiredAcks: 1, +}); + +const consumer = new Consumer({ + brokers, + topic: outputTopic, + groupId: `${runId}-out`, + startOffset: "start_offsets_first_offset", + maxWait: "500ms", +}); + +function deadline(offsetMs) { + return new Date(Date.now() + offsetMs).toISOString(); +} + +function payload(id, extra = {}) { + return JSON.stringify({ + source: "k6-contract", + run_id: runId, + message_id: id, + sent_at_ms: Date.now(), + ...extra, + }); +} + +function chronosHeaders(id, deadlineValue) { + return { + chronosMessageId: id, + chronosDeadline: deadlineValue, + }; +} + +function bytesToString(value) { + if (typeof value === "string") { + return value; + } + return String.fromCharCode.apply(null, Array.from(value || [])); +} + +function produceMessage({ id, key = id, value = payload(id), deadlineMs = -1000, headers = null }) { + const message = { + value: encoding.b64encode(value), + headers: headers || chronosHeaders(id, deadline(deadlineMs)), + }; + if (key !== null) { + message.key = encoding.b64encode(key); + } + producer.produce({ messages: [message] }); +} + +function consumeUntil(id, timeoutMs) { + const expiresAt = Date.now() + timeoutMs; + while (Date.now() < expiresAt) { + const messages = consumer.consume({ maxMessages: 25, expectTimeout: true }); + for (const message of messages) { + const value = bytesToString(message.value); + if (value.includes(id)) { + return value; + } + } + sleep(0.1); + } + return ""; +} + +export default function () { + const immediatePassId = `${runId}-immediate-pass`; + produceMessage({ id: immediatePassId, deadlineMs: -1000 }); + const immediateOutput = consumeUntil(immediatePassId, outputTimeoutMs); + check(immediateOutput, { + "immediate kafka path publishes output": (value) => value.includes(immediatePassId), + }); + exercisedPaths.add(1, { chronos_destination: "kafka", chronos_status: "pass" }); + + const delayedPassId = `${runId}-delayed-pass`; + produceMessage({ id: delayedPassId, deadlineMs: 750 }); + const delayedOutput = consumeUntil(delayedPassId, outputTimeoutMs); + check(delayedOutput, { + "postgres delay path publishes output": (value) => value.includes(delayedPassId), + }); + exercisedPaths.add(1, { chronos_destination: "postgres", chronos_status: "pass" }); + + const postgresFailId = `${runId}-postgres-fail`; + produceMessage({ id: postgresFailId, value: "not-json", deadlineMs: 60_000 }); + sleep(1); + const postgresFailOutput = consumeUntil(postgresFailId, 1000); + check(postgresFailOutput, { + "invalid future payload is not published": (value) => value === "", + }); + exercisedPaths.add(1, { chronos_destination: "postgres", chronos_status: "fail" }); + + const kafkaFailId = `${runId}-kafka-fail`; + produceMessage({ id: kafkaFailId, key: null, deadlineMs: -1000 }); + sleep(1); + const kafkaFailOutput = consumeUntil(kafkaFailId, 1000); + check(kafkaFailOutput, { + "missing key immediate payload is not published": (value) => value === "", + }); + exercisedPaths.add(1, { chronos_destination: "kafka", chronos_status: "fail" }); + + sleep(1); +} + +export function teardown() { + producer.close(); + consumer.close(); +} diff --git a/dev/k6/load.js b/dev/k6/load.js new file mode 100644 index 0000000..c45f495 --- /dev/null +++ b/dev/k6/load.js @@ -0,0 +1,220 @@ +import { check, sleep } from "k6"; +import encoding from "k6/encoding"; +import exec from "k6/execution"; +import { Counter, Trend } from "k6/metrics"; +import { Producer, Consumer } from "k6/x/kafka"; + +const brokers = (__ENV.KAFKA_BROKERS || "kafka:9092").split(","); +const inputTopic = __ENV.KAFKA_IN_TOPIC || "chronos.in"; +const outputTopic = __ENV.KAFKA_OUT_TOPIC || "chronos.out"; +const rate = Number(__ENV.K6_LOAD_RATE || 100); +const duration = __ENV.K6_LOAD_DURATION || "1m"; +const consumeDuration = __ENV.K6_LOAD_CONSUME_DURATION || "90s"; +const delayedScheduleDelayMs = Number(__ENV.K6_LOAD_DELAY_MS || 1000); +const immediateScheduleDelayMs = Number(__ENV.K6_LOAD_IMMEDIATE_DELAY_MS || -1000); +const immediateRatio = clampRatio(Number(__ENV.K6_LOAD_IMMEDIATE_RATIO || 0.1)); +const runId = __ENV.K6_RUN_ID || `load-${Date.now()}`; +const expectedMessages = Number(__ENV.K6_LOAD_EXPECTED_MESSAGES || Math.floor(rate * durationSeconds(duration))); +const expectedImmediateMessages = Math.floor(expectedMessages * immediateRatio); +const expectedDelayedMessages = expectedMessages - expectedImmediateMessages; + +const published = new Counter("chronos_messages_published"); +const consumed = new Counter("chronos_messages_consumed"); +const timestampErrors = new Counter("chronos_output_timestamp_errors"); +const schedulingJitter = new Trend("chronos_scheduling_jitter", true); +const immediateOutputDelay = new Trend("chronos_immediate_output_delay", true); + +export const options = { + scenarios: { + queueing_load: { + executor: "constant-arrival-rate", + exec: "produceInput", + rate, + timeUnit: "1s", + duration, + preAllocatedVUs: Number(__ENV.K6_LOAD_PREALLOCATED_VUS || 100), + maxVUs: Number(__ENV.K6_LOAD_MAX_VUS || 500), + }, + output_drain: { + executor: "constant-vus", + exec: "consumeOutput", + vus: 1, + duration: consumeDuration, + gracefulStop: "5s", + }, + }, + summaryTrendStats: ["avg", "min", "med", "p(95)", "p(99)", "p(99.9)", "max"], + thresholds: { + checks: ["rate>=0.999"], + dropped_iterations: ["count==0"], + chronos_messages_published: [`count>=${expectedMessages}`], + chronos_messages_consumed: [`count>=${expectedMessages}`], + chronos_output_timestamp_errors: ["count==0"], + chronos_scheduling_jitter: ["p(99.9)<500"], + ...pathThresholds(), + }, +}; + +let producer; +let consumer; +const seen = {}; + +function getProducer() { + if (!producer) { + producer = new Producer({ + brokers, + topic: inputTopic, + autoCreateTopic: true, + requiredAcks: 1, + }); + } + return producer; +} + +function getConsumer(data) { + if (!consumer) { + consumer = new Consumer({ + brokers, + topic: outputTopic, + groupId: `${data.runId}-out`, + startOffset: "start_offsets_first_offset", + maxWait: "500ms", + }); + } + return consumer; +} + +function durationSeconds(value) { + const match = String(value).match(/^(\d+)(ms|s|m|h)$/); + if (!match) { + return 60; + } + const amount = Number(match[1]); + switch (match[2]) { + case "ms": + return amount / 1000; + case "s": + return amount; + case "m": + return amount * 60; + case "h": + return amount * 3600; + default: + return 60; + } +} + +function clampRatio(value) { + if (Number.isNaN(value)) { + return 0.1; + } + return Math.min(1, Math.max(0, value)); +} + +function shouldPublishImmediate() { + const spreadBucket = ((exec.scenario.iterationInTest * 9973) % 100) / 100; + return spreadBucket < immediateRatio; +} + +function pathThresholds() { + const thresholds = {}; + if (expectedImmediateMessages > 0) { + thresholds["chronos_messages_published{chronos_path:immediate}"] = [`count>=${expectedImmediateMessages}`]; + thresholds["chronos_messages_consumed{chronos_path:immediate}"] = [`count>=${expectedImmediateMessages}`]; + } + if (expectedDelayedMessages > 0) { + thresholds["chronos_messages_published{chronos_path:delayed}"] = [`count>=${expectedDelayedMessages}`]; + thresholds["chronos_messages_consumed{chronos_path:delayed}"] = [`count>=${expectedDelayedMessages}`]; + } + return thresholds; +} + +function bytesToString(value) { + if (typeof value === "string") { + return value; + } + return String.fromCharCode.apply(null, Array.from(value || [])); +} + +export function setup() { + return { runId, expectedMessages }; +} + +export function produceInput(data) { + const publishedAtMs = Date.now(); + const id = `${data.runId}-${__VU}-${__ITER}-${publishedAtMs}`; + const chronosPath = shouldPublishImmediate() ? "immediate" : "delayed"; + const scheduleDelayMs = chronosPath === "immediate" ? immediateScheduleDelayMs : delayedScheduleDelayMs; + const scheduledAtMs = publishedAtMs + scheduleDelayMs; + const message = { + key: encoding.b64encode(id), + value: encoding.b64encode(JSON.stringify({ + source: "k6-load", + run_id: data.runId, + message_id: id, + chronos_path: chronosPath, + published_at_ms: publishedAtMs, + scheduled_at_ms: scheduledAtMs, + })), + headers: { + chronosMessageId: id, + chronosDeadline: new Date(scheduledAtMs).toISOString(), + }, + }; + getProducer().produce({ messages: [message] }); + published.add(1, { chronos_path: chronosPath }); +} + +export function consumeOutput(data) { + const messages = getConsumer(data).consume({ maxMessages: 500, expectTimeout: true }); + let matched = 0; + for (const message of messages) { + const value = bytesToString(message.value); + if (!value.includes(data.runId)) { + continue; + } + const parsed = JSON.parse(value); + if (seen[parsed.message_id]) { + continue; + } + seen[parsed.message_id] = true; + const outputPublishedAtMs = Date.parse(message.time); + if (Number.isNaN(outputPublishedAtMs)) { + timestampErrors.add(1); + continue; + } + consumed.add(1, { chronos_path: parsed.chronos_path || "unknown" }); + if (parsed.chronos_path === "delayed") { + const scheduledAtMs = Number(parsed.scheduled_at_ms); + if (Number.isNaN(scheduledAtMs)) { + timestampErrors.add(1); + continue; + } + schedulingJitter.add(Math.max(0, outputPublishedAtMs - scheduledAtMs), { chronos_path: "delayed" }); + } else { + const publishedAtMs = Number(parsed.published_at_ms); + if (Number.isNaN(publishedAtMs)) { + timestampErrors.add(1); + continue; + } + immediateOutputDelay.add(Math.max(0, outputPublishedAtMs - publishedAtMs), { chronos_path: "immediate" }); + } + matched += 1; + } + if (matched === 0) { + sleep(0.1); + } +} + +export function teardown() { + if (producer) { + producer.flush(); + producer.close(); + } + if (consumer) { + consumer.close(); + } + check(true, { + "load test completed": (value) => value === true, + }); +} diff --git a/dev/lgtm/dashboards.yaml b/dev/lgtm/dashboards.yaml new file mode 100644 index 0000000..ee8b9a1 --- /dev/null +++ b/dev/lgtm/dashboards.yaml @@ -0,0 +1,9 @@ +apiVersion: 1 + +providers: + - name: "Chronos" + type: file + updateIntervalSeconds: 10 + options: + path: /otel-lgtm/grafana/conf/provisioning/dashboards/chronos + foldersFromFilesStructure: true diff --git a/dev/lgtm/healthcheck.sh b/dev/lgtm/healthcheck.sh new file mode 100644 index 0000000..fecbbe6 --- /dev/null +++ b/dev/lgtm/healthcheck.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env sh + +set -eu + +check_service() { + name=$1 + url=$2 + + echo "Checking ${name} at ${url}" + curl -sf "${url}" >/dev/null +} + +check_service "Grafana" "http://127.0.0.1:3000/api/health" +check_service "Loki" "http://127.0.0.1:3100/ready" +check_service "Tempo" "http://127.0.0.1:3200/ready" +check_service "Pyroscope" "http://127.0.0.1:4040/ready" +check_service "Prometheus" "http://127.0.0.1:9090/-/ready" +check_service "OpenTelemetry Collector" "http://127.0.0.1:13133/ready" + +echo "All LGTM services healthy" diff --git a/dev/lgtm/kminion.yaml b/dev/lgtm/kminion.yaml new file mode 100644 index 0000000..bc293ee --- /dev/null +++ b/dev/lgtm/kminion.yaml @@ -0,0 +1,25 @@ +logger: + level: info + +kafka: + brokers: + - kafka:9092 + clientId: chronos-kminion + +minion: + consumerGroups: + enabled: true + scrapeMode: adminApi + granularity: topic + topics: + enabled: true + granularity: topic + logDirs: + enabled: true + endToEnd: + enabled: false + +exporter: + namespace: kminion + host: "" + port: 8080 diff --git a/dev/lgtm/logging.sh b/dev/lgtm/logging.sh new file mode 100644 index 0000000..ffebc13 --- /dev/null +++ b/dev/lgtm/logging.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +set -euo pipefail + +function json_lines() { + service=$1 + stream=$2 + + awk -v service="${service}" -v stream="${stream}" ' + function escape_json(value) { + gsub(/\\/, "\\\\", value) + gsub(/"/, "\\\"", value) + gsub(/\t/, "\\t", value) + gsub(/\r/, "\\r", value) + return value + } + /^[[:space:]]*\{/ { + print + fflush() + next + } + { + message = escape_json($0) + printf("{\"service\":\"%s\",\"stream\":\"%s\",\"message\":\"%s\"}\n", service, stream, message) + fflush() + } + ' +} + +function run_with_logging() { + name=$1 + shift + envvar=$1 + shift + + case "${name}" in + "OpenTelemetry Collector"*) service_name=otelcol ;; + *) service_name=${name%% *} ;; + esac + safe_name=$(printf '%s' "${service_name}" | tr '[:upper:]' '[:lower:]' | tr -cd '[:alnum:]_.-') + log_dir="${LGTM_LOG_DIR:-/data/lgtm/logs}" + log_file="${log_dir}/${safe_name}.jsonl" + + if [[ ${envvar} == "true" || ${ENABLE_LOGS_ALL:-false} == "true" ]]; then + echo "Running ${name} logging=true file=${log_file}" + mkdir -p "${log_dir}" + exec "$@" > >(json_lines "${name}" stdout | tee -a "${log_file}") 2> >(json_lines "${name}" stderr | tee -a "${log_file}" >&2) + else + echo "Running ${name} logging=false" + exec "$@" >/dev/null 2>&1 + fi +} diff --git a/dev/lgtm/otelcol-contrib.yaml b/dev/lgtm/otelcol-contrib.yaml new file mode 100644 index 0000000..7dc0dbb --- /dev/null +++ b/dev/lgtm/otelcol-contrib.yaml @@ -0,0 +1,84 @@ +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + cors: + allowed_origins: + - http://* + prometheus/collector: + config: + scrape_configs: + - job_name: "opentelemetry-collector" + scrape_interval: 1s + static_configs: + - targets: ["127.0.0.1:8888"] + file_log/lgtm: + include: + - /data/lgtm/logs/*.jsonl + include_file_name: true + include_file_path: true + start_at: beginning + +extensions: + health_check: + endpoint: 0.0.0.0:13133 + path: "/ready" + +processors: + transform/lgtm_logs: + log_statements: + - context: log + statements: + - set(resource.attributes["service.name"], ExtractPatterns(attributes["log.file.name"], "^(?P.*)\\.jsonl$")["service_name"]) where resource.attributes["service.name"] == nil + batch: + +exporters: + otlp_http/metrics: + endpoint: http://127.0.0.1:9090/api/v1/otlp + tls: + insecure: true + otlp_http/traces: + endpoint: http://127.0.0.1:4418 + tls: + insecure: true + otlp_http/logs: + endpoint: http://127.0.0.1:3100/otlp + tls: + insecure: true + otlp/profiles: + endpoint: http://127.0.0.1:4040 + tls: + insecure: true + debug/metrics: + verbosity: detailed + debug/traces: + verbosity: detailed + debug/logs: + verbosity: detailed + +service: + telemetry: + logs: + encoding: json + metrics: + level: detailed + extensions: [health_check] + pipelines: + traces: + receivers: [otlp] + processors: [batch] + exporters: [otlp_http/traces] + metrics: + receivers: [otlp, prometheus/collector] + processors: [batch] + exporters: [otlp_http/metrics] + logs: + receivers: [otlp, file_log/lgtm] + processors: [transform/lgtm_logs, batch] + exporters: [otlp_http/logs] + profiles: + receivers: [otlp] + exporters: [otlp/profiles] diff --git a/dev/lgtm/prometheus.yaml b/dev/lgtm/prometheus.yaml new file mode 100644 index 0000000..683294c --- /dev/null +++ b/dev/lgtm/prometheus.yaml @@ -0,0 +1,85 @@ +--- +global: + scrape_interval: 15s + scrape_native_histograms: true + +otlp: + keep_identifying_resource_attributes: true + promote_resource_attributes: + - service.instance.id + - service.name + - service.namespace + - service.version + - cloud.availability_zone + - cloud.region + - container.name + - deployment.environment + - deployment.environment.name + - k8s.cluster.name + - k8s.container.name + - k8s.cronjob.name + - k8s.daemonset.name + - k8s.deployment.name + - k8s.job.name + - k8s.namespace.name + - k8s.node.name + - k8s.pod.name + - k8s.replicaset.name + - k8s.statefulset.name + - host.name + - postgresql.database.name + - postgresql.schema.name + - postgresql.table.name + - postgresql.index.name + - database + - kafka.cluster.alias + +storage: + tsdb: + out_of_order_time_window: 10m + +scrape_configs: + - job_name: chronos + static_configs: + - targets: ["chronos:9091"] + + - job_name: cadvisor + static_configs: + - targets: ["cadvisor:8080"] + + - job_name: postgres-exporter + static_configs: + - targets: ["postgres-exporter:9187"] + + - job_name: kminion + static_configs: + - targets: ["kminion:8080"] + + - job_name: sql-exporter + static_configs: + - targets: ["sql-exporter:9399"] + + - job_name: grafana + static_configs: + - targets: ["127.0.0.1:3000"] + + - job_name: loki + static_configs: + - targets: ["127.0.0.1:3100"] + + - job_name: prometheus + static_configs: + - targets: ["127.0.0.1:9090"] + + - job_name: pyroscope + static_configs: + - targets: ["127.0.0.1:4040"] + + - job_name: tempo + static_configs: + - targets: ["127.0.0.1:3200"] + + - job_name: opentelemetry-collector + scrape_interval: 1s + static_configs: + - targets: ["127.0.0.1:8888"] diff --git a/dev/lgtm/sql_exporter.yaml b/dev/lgtm/sql_exporter.yaml new file mode 100644 index 0000000..ba77c2c --- /dev/null +++ b/dev/lgtm/sql_exporter.yaml @@ -0,0 +1,24 @@ +global: + scrape_timeout_offset: 500ms + min_interval: 15s + max_connections: 2 + max_idle_connections: 2 + +target: + name: chronos_db + data_source_name: "postgresql://admin:admin@postgres:5432/chronos_db?sslmode=disable" + collectors: + - chronos + enable_ping: true + +collectors: + - collector_name: chronos + metrics: + - metric_name: chronos_rows + type: gauge + help: "Number of rows stored in the Chronos hanger table." + values: + - rows + query: | + SELECT count(*)::double precision AS rows + FROM hanger; diff --git a/dev/makefiles/act.mk b/dev/makefiles/act.mk new file mode 100644 index 0000000..12211ba --- /dev/null +++ b/dev/makefiles/act.mk @@ -0,0 +1,58 @@ +ACT_EVENT ?= push +ACT_JOB ?= pre-commit +ACT_RUNNER_IMAGE ?= catthehacker/ubuntu:act-latest +ACT_ARTIFACT_DIR ?= /tmp/chronos-act-artifacts +ACT_EVENT_DIR ?= /tmp/chronos-act-events +ACT_ARTIFACT_ADDR ?= 127.0.0.1 +ACT_FLAGS ?= -P ubuntu-latest=$(ACT_RUNNER_IMAGE) --artifact-server-path $(ACT_ARTIFACT_DIR) --artifact-server-addr $(ACT_ARTIFACT_ADDR) + +CI_WORKFLOW ?= .github/workflows/CI.yaml +PRE_COMMIT_WORKFLOW ?= .github/workflows/pre-commit.yml +TEST_WORKFLOW ?= .github/workflows/test.yml +SCAN_WORKFLOW ?= .github/workflows/scan.yml +BUILD_BINARY_WORKFLOW ?= .github/workflows/build-binary.yml +BUILD_CONTAINER_WORKFLOW ?= .github/workflows/build-container.yml +SBOM_WORKFLOW ?= .github/workflows/sbom.yml + +SBOM_TARGET_TYPE ?= release +SBOM_TARGET_REF ?= . + +.PHONY: act.ci act.ci.job act.pre-commit workflow.pre-commit.act act.test act.scan act.build-binary act.build-container act.sbom act.sbom.container act.sbom.release + +act.ci: + mkdir -p "$(ACT_ARTIFACT_DIR)" + act push -W "$(CI_WORKFLOW)" $(ACT_FLAGS) + +act.ci.job: + mkdir -p "$(ACT_ARTIFACT_DIR)" + act push -W "$(CI_WORKFLOW)" -j "$(ACT_JOB)" $(ACT_FLAGS) + +act.pre-commit: + act workflow_dispatch -W "$(PRE_COMMIT_WORKFLOW)" $(ACT_FLAGS) + +## workflow.pre-commit.act: Run the pre-commit GitHub Actions workflow locally with act +workflow.pre-commit.act: act.pre-commit + +act.test: + act workflow_dispatch -W "$(TEST_WORKFLOW)" $(ACT_FLAGS) + +act.scan: + act workflow_dispatch -W "$(SCAN_WORKFLOW)" $(ACT_FLAGS) + +act.build-binary: + mkdir -p "$(ACT_ARTIFACT_DIR)" + act workflow_dispatch -W "$(BUILD_BINARY_WORKFLOW)" $(ACT_FLAGS) + +act.build-container: + act workflow_dispatch -W "$(BUILD_CONTAINER_WORKFLOW)" $(ACT_FLAGS) + +act.sbom: + mkdir -p "$(ACT_ARTIFACT_DIR)" "$(ACT_EVENT_DIR)" + printf '{"inputs":{"target-type":"%s","target-ref":"%s"}}\n' "$(SBOM_TARGET_TYPE)" "$(SBOM_TARGET_REF)" > "$(ACT_EVENT_DIR)/sbom.json" + act workflow_dispatch -W "$(SBOM_WORKFLOW)" -e "$(ACT_EVENT_DIR)/sbom.json" $(ACT_FLAGS) + +act.sbom.container: + $(MAKE) -f dev/makefiles/act.mk act.sbom SBOM_TARGET_TYPE=container + +act.sbom.release: + $(MAKE) -f dev/makefiles/act.mk act.sbom SBOM_TARGET_TYPE=release diff --git a/dev/makefiles/ci.mk b/dev/makefiles/ci.mk new file mode 100644 index 0000000..24c6629 --- /dev/null +++ b/dev/makefiles/ci.mk @@ -0,0 +1,8 @@ +GITHUB_CONFIG ?= .github/config.json + +## repo.config.apply: Apply GitHub repository and branch settings from .github/config.json +repo.config.apply: + $(call pp,apply GitHub repository config from $(GITHUB_CONFIG)...) + scripts/apply-github-config.sh "$(GITHUB_CONFIG)" + +.PHONY: repo.config.apply diff --git a/dev/makefiles/common.mk b/dev/makefiles/common.mk new file mode 100644 index 0000000..995cae9 --- /dev/null +++ b/dev/makefiles/common.mk @@ -0,0 +1,16 @@ +RUST_VERSION := $(shell grep 'channel' rust-toolchain.toml | sed 's/.*"\(.*\)"/\1/') + +yellow := $(shell tput setaf 3 2>/dev/null || true) +normal := $(shell tput sgr0 2>/dev/null || true) + +define pp + @printf '$(yellow)$(1)$(normal)\n' +endef + +define require_cmd + @command -v $(1) >/dev/null 2>&1 || { \ + printf 'Missing required command: %s\n' '$(1)' >&2; \ + printf 'Install it with your system package manager, then run make setup again.\n' >&2; \ + exit 1; \ + } +endef diff --git a/dev/makefiles/dev.mk b/dev/makefiles/dev.mk new file mode 100644 index 0000000..6800e7a --- /dev/null +++ b/dev/makefiles/dev.mk @@ -0,0 +1,66 @@ +RECIPE ?= help + +## setup: Check local development dependencies and prepare .env +setup: + $(call pp,checking development dependencies...) + $(call require_cmd,cargo) + $(call require_cmd,rustup) + $(call require_cmd,docker) + $(call require_cmd,curl) + $(call require_cmd,awk) + @test -e .env || cp .env.example .env + @rustup component list --installed | grep -q '^rustfmt' || { echo 'Missing Rust component: rustfmt. Install with: rustup component add rustfmt' >&2; exit 1; } + @rustup component list --installed | grep -q '^clippy' || { echo 'Missing Rust component: clippy. Install with: rustup component add clippy' >&2; exit 1; } + @printf 'Development dependencies look ready.\n' + +## withenv: Run a make recipe with variables loaded from .env, for example make withenv RECIPE=run +withenv: + test -e .env || cp .env.example .env + bash -c 'set -o allexport; source .env; set +o allexport; make "$(RECIPE)"' + +## dev.init: Initialize local dev environment +dev.init: setup + $(call pp,checking rust tests...) + cargo test + +dev.chronos_ex: + $(call pp,creating kafka topic...) + test -e .env || cp .env.example .env + bash -c 'set -o allexport; source .env; set +o allexport; cargo run --example chronos_ex' + +## pg.create: Create database +pg.create: + $(call pp,creating database...) + test -e .env || cp .env.example .env + bash -c 'set -o allexport; source .env; set +o allexport; cargo run --example pg_create_database' + +## pg.migrate: Run migrations on database +pg.migrate: + $(call pp,running migrations on database...) + test -e .env || cp .env.example .env + bash -c 'set -o allexport; source .env; set +o allexport; cargo run --package pg_mig --bin chronos-pg-migrations' + +## run: Run Chronos locally +run: + $(call pp,run app...) + test -e .env || cp .env.example .env + bash -c 'set -o allexport; source .env; set +o allexport; cargo run --package chronos_bin --bin chronos' + +## run.release: Run Chronos locally in release mode +run.release: + $(call pp,run app...) + test -e .env || cp .env.example .env + bash -c 'set -o allexport; source .env; set +o allexport; cargo run --package chronos_bin -r --bin chronos' + +## dev.run: Run Chronos in cargo-watch mode +dev.run: + $(call pp,run app...) + test -e .env || cp .env.example .env + @if cargo watch --version >/dev/null 2>&1; then \ + bash -c 'set -o allexport; source .env; set +o allexport; cargo watch -q -c -x "run --package chronos_bin --bin chronos"'; \ + else \ + printf 'cargo-watch not installed; falling back to one cargo run invocation.\n' >&2; \ + bash -c 'set -o allexport; source .env; set +o allexport; cargo run --package chronos_bin --bin chronos'; \ + fi + +.PHONY: setup withenv dev.init dev.chronos_ex pg.create pg.migrate run run.release dev.run diff --git a/dev/makefiles/docker.mk b/dev/makefiles/docker.mk new file mode 100644 index 0000000..c51cafb --- /dev/null +++ b/dev/makefiles/docker.mk @@ -0,0 +1,51 @@ +COMPOSE_PROJECT_NAME ?= chronos +COMPOSE_FILE_BASE := dev/docker-compose/compose.yaml +COMPOSE_FILE_JAEGER := dev/docker-compose/jaeger.yaml +COMPOSE_FILE_LGTM := dev/docker-compose/lgtm.yaml +BACKEND_ARG := $(firstword $(filter jaeger lgtm,$(MAKECMDGOALS))) +BACKEND ?= $(if $(BACKEND_ARG),$(BACKEND_ARG),jaeger) +COMPOSE_BACKEND_FILE := $(if $(filter lgtm,$(BACKEND)),$(COMPOSE_FILE_LGTM),$(COMPOSE_FILE_JAEGER)) +DOCKER_COMPOSE := docker compose --project-name $(COMPOSE_PROJECT_NAME) -f $(COMPOSE_FILE_BASE) -f $(COMPOSE_BACKEND_FILE) +DOCKER_COMPOSE_JAEGER := docker compose --project-name $(COMPOSE_PROJECT_NAME) -f $(COMPOSE_FILE_BASE) -f $(COMPOSE_FILE_JAEGER) +DOCKER_COMPOSE_LGTM := docker compose --project-name $(COMPOSE_PROJECT_NAME) -f $(COMPOSE_FILE_BASE) -f $(COMPOSE_FILE_LGTM) +CHRONOS_IMAGE ?= chronos:local +CHRONOS_MIGRATIONS_IMAGE ?= chronos-pg-migrations:local + +## up: Build and start Chronos, dependencies, and observability. Use make up lgtm or BACKEND=lgtm for LGTM +up: + $(call pp,starting docker compose stack with $(BACKEND) observability...) + $(DOCKER_COMPOSE) up -d --build + +## down: Stop the docker compose stack +down: + $(call pp,stopping docker compose stack...) + $(DOCKER_COMPOSE_LGTM) down 2>/dev/null || true + $(DOCKER_COMPOSE_JAEGER) down 2>/dev/null || true + +## docker.config: Render the docker compose configuration +docker.config: + $(DOCKER_COMPOSE) config + +## docker.build: Build the Chronos, PostgreSQL migration, and k6 container images +docker.build: docker.build.chronos docker.build.migrations k6.build + +## docker.build.chronos: Build the Chronos container image +docker.build.chronos: + $(call pp,building Chronos container image $(CHRONOS_IMAGE)...) + docker build -f docker/Dockerfile.chronos -t $(CHRONOS_IMAGE) . + +## docker.build.migrations: Build the PostgreSQL migration container image +docker.build.migrations: + $(call pp,building Chronos PostgreSQL migration container image $(CHRONOS_MIGRATIONS_IMAGE)...) + docker build -f docker/Dockerfile.chronos-pg-migrations -t $(CHRONOS_MIGRATIONS_IMAGE) . + +## docker.up: Legacy alias for make up +docker.up: up + +## docker.down: Legacy alias for make down +docker.down: down + +jaeger lgtm: + @: + +.PHONY: up down docker.config docker.build docker.build.chronos docker.build.migrations docker.up docker.down jaeger lgtm diff --git a/dev/makefiles/integration.mk b/dev/makefiles/integration.mk new file mode 100644 index 0000000..3e58e2d --- /dev/null +++ b/dev/makefiles/integration.mk @@ -0,0 +1,12 @@ +## integration: Start deps, migrate, run Chronos, publish test message, verify metrics +integration: build + $(call pp,running integration test...) + @bash scripts/integration.sh + +## integration.down: Stop docker services started by make integration +integration.down: + $(call pp,stopping integration services...) + docker compose --project-name chronos -f dev/docker-compose/compose.yaml stop postgres kafka 2>/dev/null || true + docker compose --project-name chronos -f dev/docker-compose/compose.yaml rm -f postgres kafka 2>/dev/null || true + +.PHONY: integration integration.down diff --git a/dev/makefiles/k6.mk b/dev/makefiles/k6.mk new file mode 100644 index 0000000..e549cba --- /dev/null +++ b/dev/makefiles/k6.mk @@ -0,0 +1,54 @@ +K6_VERSION ?= 1.7.1 +XK6_KAFKA_VERSION ?= latest +K6_IMAGE ?= chronos-k6:$(K6_VERSION) +K6_LOG_DIR ?= $(PWD)/dev/lgtm/runtime-logs +K6_RUN_ID ?= chronos-k6-$(shell date +%Y%m%d%H%M%S) +K6_CI_OTLP_ENDPOINT ?= host.docker.internal:4317 +K6_DEFAULT_OTEL_ENDPOINT := $(if $(and $(GITHUB_ACTIONS),$(if $(ACT),,1)),$(K6_CI_OTLP_ENDPOINT),lgtm:4317) +K6_OTEL_GRPC_EXPORTER_ENDPOINT ?= $(K6_DEFAULT_OTEL_ENDPOINT) +K6_DOCKER_NETWORK ?= $(if $(and $(GITHUB_ACTIONS),$(if $(ACT),,1)),bridge,chronos) +K6_FULL_LOAD ?= false +K6_LOAD_DEFAULT_RATE := $(if $(filter true 1 yes,$(K6_FULL_LOAD)),1000,100) +K6_LOAD_DEFAULT_DURATION := $(if $(filter true 1 yes,$(K6_FULL_LOAD)),1m,1m) +K6_LOAD_DEFAULT_CONSUME_DURATION := $(if $(filter true 1 yes,$(K6_FULL_LOAD)),2m,90s) +K6_LOAD_PROFILE := $(if $(filter true 1 yes,$(K6_FULL_LOAD)),full load,load) +K6_COMMON_ENV := \ + -e KAFKA_BROKERS=$${KAFKA_BROKERS:-kafka:9092} \ + -e KAFKA_IN_TOPIC=$${KAFKA_IN_TOPIC:-chronos.in} \ + -e KAFKA_OUT_TOPIC=$${KAFKA_OUT_TOPIC:-chronos.out} \ + -e K6_OTEL_SERVICE_NAME=$${K6_OTEL_SERVICE_NAME:-k6-chronos} \ + -e K6_OTEL_METRIC_PREFIX=$${K6_OTEL_METRIC_PREFIX:-k6_} \ + -e K6_OTEL_GRPC_EXPORTER_INSECURE=$${K6_OTEL_GRPC_EXPORTER_INSECURE:-true} \ + -e K6_OTEL_GRPC_EXPORTER_ENDPOINT=$(K6_OTEL_GRPC_EXPORTER_ENDPOINT) \ + -e K6_RUN_ID=$(K6_RUN_ID) +K6_DOCKER_RUN := docker run --rm --cpus 1 --memory 1g --network $(K6_DOCKER_NETWORK) --add-host=host.docker.internal:host-gateway -v "$(PWD)/dev/k6:/scripts:ro" -v "$(K6_LOG_DIR):/data/lgtm/logs" $(K6_COMMON_ENV) + +## k6.build: Build the custom k6 image with xk6-kafka +k6.build: + $(call pp,building k6 image $(K6_IMAGE) with k6 $(K6_VERSION) and xk6-kafka $(XK6_KAFKA_VERSION)...) + docker build -f docker/Dockerfile.k6 --build-arg K6_VERSION=$(K6_VERSION) --build-arg XK6_KAFKA_VERSION=$(XK6_KAFKA_VERSION) -t $(K6_IMAGE) . + +## k6.contract: Run the k6 Chronos contract integration test with OTLP output +k6.contract: + $(call pp,running k6 contract test with OTLP endpoint $(K6_OTEL_GRPC_EXPORTER_ENDPOINT)...) + mkdir -p "$(K6_LOG_DIR)" + $(K6_DOCKER_RUN) --entrypoint bash $(K6_IMAGE) -lc 'k6 run --out opentelemetry /scripts/contract.js 2>&1 | tee -a /data/lgtm/logs/k6-contract.jsonl; exit $${PIPESTATUS[0]}' + +## k6.load: Run the k6 Chronos load test with OTLP output. Use K6_FULL_LOAD=true for the 1,000 rps full load profile +k6.load: + $(call pp,running k6 $(K6_LOAD_PROFILE) test with OTLP endpoint $(K6_OTEL_GRPC_EXPORTER_ENDPOINT)...) + mkdir -p "$(K6_LOG_DIR)" + $(K6_DOCKER_RUN) \ + -e K6_LOAD_RATE=$${K6_LOAD_RATE:-$(K6_LOAD_DEFAULT_RATE)} \ + -e K6_LOAD_DURATION=$${K6_LOAD_DURATION:-$(K6_LOAD_DEFAULT_DURATION)} \ + -e K6_LOAD_CONSUME_DURATION=$${K6_LOAD_CONSUME_DURATION:-$(K6_LOAD_DEFAULT_CONSUME_DURATION)} \ + -e K6_LOAD_DELAY_MS=$${K6_LOAD_DELAY_MS:-1000} \ + -e K6_LOAD_IMMEDIATE_DELAY_MS=$${K6_LOAD_IMMEDIATE_DELAY_MS:--1000} \ + -e K6_LOAD_IMMEDIATE_RATIO=$${K6_LOAD_IMMEDIATE_RATIO:-0.1} \ + -e K6_LOAD_EXPECTED_MESSAGES=$${K6_LOAD_EXPECTED_MESSAGES:-} \ + --entrypoint bash $(K6_IMAGE) -lc 'k6 run --out opentelemetry /scripts/load.js 2>&1 | tee -a /data/lgtm/logs/k6-load.jsonl; exit $${PIPESTATUS[0]}' + +## k6.test: Run k6 contract and load integration tests +k6.test: k6.contract k6.load + +.PHONY: k6.build k6.contract k6.load k6.test diff --git a/dev/makefiles/rust.mk b/dev/makefiles/rust.mk new file mode 100644 index 0000000..f0ec866 --- /dev/null +++ b/dev/makefiles/rust.mk @@ -0,0 +1,133 @@ +EXPORTER ?= prom +WEAVER_VERSION ?= 0.23.0 +WEAVER_IMAGE ?= otel/weaver:v$(WEAVER_VERSION) +WEAVER_TARGET ?= production +WEAVER_LIVE_CHECK_PORT ?= 4319 +WEAVER_LIVE_CHECK_ADMIN_PORT ?= 4320 +WEAVER_LIVE_CHECK_OUT ?= /tmp/chronos-weaver-live-check + +ifeq ($(WEAVER_TARGET),production) +WEAVER_REGISTRY ?= dev/weaver/production/registry +WEAVER_TEMPLATES ?= dev/weaver/production/templates +WEAVER_RUST_OUT ?= chronos_bin/src/metrics/generated +WEAVER_DOCS_OUT ?= docs +WEAVER_SCHEMA_OUT ?= docs/schema +else ifeq ($(WEAVER_TARGET),example) +WEAVER_REGISTRY ?= examples/weaver/registry +WEAVER_TEMPLATES ?= examples/weaver/templates +WEAVER_RUST_OUT ?= examples/weaver/generated +WEAVER_DOCS_OUT ?= examples/weaver/generated +WEAVER_SCHEMA_OUT ?= examples/weaver/generated +else +$(error Unsupported WEAVER_TARGET=$(WEAVER_TARGET); use production or example) +endif + +## build: Build Rust binaries +build: + $(MAKE) weaver.generate WEAVER_TARGET=production + $(call pp,build rust...) + cargo build + +## fmt: Format Rust code +fmt: + $(call pp,format rust...) + cargo fmt + +## lint: Check Rust formatting, clippy, and cargo check +lint: + $(call pp,lint rust...) + RUSTFLAGS="-D warnings" cargo check + cargo fmt -- --check + RUSTFLAGS="-D warnings" cargo clippy --all-targets -- -D warnings + +## test: Run Rust unit tests +test: test.unit + +## test.unit: Run Rust unit tests +test.unit: + $(call pp,rust unit tests...) + RUSTFLAGS="-D warnings" cargo test + +## pre-commit: Run pre-commit checks +pre-commit: lint test.unit + +## test.unit.coverage: Run Rust unit tests with coverage reports +test.unit.coverage: + $(call pp,rust unit tests...) + sh scripts/coverage-report.sh + +## metrics.check: Verify /metrics endpoint responds +metrics.check: + $(call pp,check metrics endpoint...) + curl -sf "http://localhost:$${OTEL_EXPORTER_PROMETHEUS_PORT:-$${METRICS_PORT:-9090}}/metrics" | head -20 + +## metrics.mock: Run Prometheus/OTLP metrics mock example with EXPORTER=prom|otlp +metrics.mock: + $(call pp,run metrics mock example with exporter $(EXPORTER)...) + @case "$(EXPORTER)" in \ + prom|prometheus) OTEL_METRICS_EXPORTER=prometheus OTEL_EXPORTER_PROMETHEUS_HOST=$${OTEL_EXPORTER_PROMETHEUS_HOST:-127.0.0.1} OTEL_EXPORTER_PROMETHEUS_PORT=$${OTEL_EXPORTER_PROMETHEUS_PORT:-9092} cargo run --package prom_otlp_mock_runner --bin prom_otlp_mock ;; \ + otlp) OTEL_SERVICE_NAME=chronos-metrics-mock OTEL_RESOURCE_ATTRIBUTES=service.instance.id=chronos-metrics-mock-local OTEL_METRICS_EXPORTER=otlp OTEL_EXPORTER_OTLP_PROTOCOL=grpc OTEL_EXPORTER_OTLP_METRICS_ENDPOINT=$${OTEL_EXPORTER_OTLP_METRICS_ENDPOINT:-http://127.0.0.1:4317} OTEL_METRIC_EXPORT_INTERVAL=$${OTEL_METRIC_EXPORT_INTERVAL:-1000} cargo run --package prom_otlp_mock_runner --bin prom_otlp_mock ;; \ + *) echo "unsupported EXPORTER=$(EXPORTER); use EXPORTER=prom or EXPORTER=otlp" >&2; exit 2 ;; \ + esac + +## weaver.check: Validate the selected Chronos Weaver registry with WEAVER_TARGET=production|example +weaver.check: + $(call pp,check $(WEAVER_TARGET) Weaver registry with $(WEAVER_IMAGE)...) + docker run --rm -v "$(PWD):/work" -w /work $(WEAVER_IMAGE) registry check -r $(WEAVER_REGISTRY) + +## weaver.generate.rust: Generate selected Rust metric definitions with WEAVER_TARGET=production|example +weaver.generate.rust: + $(call pp,generate $(WEAVER_TARGET) Rust metric definitions with $(WEAVER_IMAGE)...) + docker run --rm -v "$(PWD):/work" -w /work $(WEAVER_IMAGE) registry generate -r $(WEAVER_REGISTRY) --templates $(WEAVER_TEMPLATES) rust $(WEAVER_RUST_OUT) + rustfmt --config-path rustfmt.toml $(WEAVER_RUST_OUT)/chronos_metric_definitions.rs + +## weaver.generate.docs: Generate selected Chronos metrics docs with WEAVER_TARGET=production|example +weaver.generate.docs: + $(call pp,generate $(WEAVER_TARGET) metrics markdown docs with $(WEAVER_IMAGE)...) + docker run --rm -v "$(PWD):/work" -w /work $(WEAVER_IMAGE) registry generate -r $(WEAVER_REGISTRY) --templates $(WEAVER_TEMPLATES) markdown $(WEAVER_DOCS_OUT) + +## weaver.generate.schema: Generate selected Weaver resolved-registry JSON schema with WEAVER_TARGET=production|example +weaver.generate.schema: + $(call pp,generate $(WEAVER_TARGET) Weaver JSON schema with $(WEAVER_IMAGE)...) + mkdir -p $(WEAVER_SCHEMA_OUT) + docker run --rm -v "$(PWD):/work" -w /work $(WEAVER_IMAGE) registry json-schema -o $(WEAVER_SCHEMA_OUT)/resolved-registry.schema.json + +## weaver.generate: Generate selected Weaver Rust, docs, and schema artifacts with WEAVER_TARGET=production|example +weaver.generate: weaver.generate.rust weaver.generate.docs weaver.generate.schema + +## weaver.live-check: Run Weaver live-check against the OTLP metrics mock +weaver.live-check: + $(call pp,run Weaver live-check against metrics mock...) + @set -euo pipefail; \ + cargo build --package prom_otlp_mock_runner; \ + rm -rf "$(WEAVER_LIVE_CHECK_OUT)"; \ + mkdir -p "$(WEAVER_LIVE_CHECK_OUT)"; \ + chmod 0777 "$(WEAVER_LIVE_CHECK_OUT)"; \ + docker run --rm --network host \ + -v "$(PWD):/work" \ + -v "$(WEAVER_LIVE_CHECK_OUT):/out" \ + -w /work \ + $(WEAVER_IMAGE) registry live-check \ + -r $(WEAVER_REGISTRY) \ + --input-source otlp \ + --otlp-grpc-address 127.0.0.1 \ + --otlp-grpc-port $(WEAVER_LIVE_CHECK_PORT) \ + --admin-port $(WEAVER_LIVE_CHECK_ADMIN_PORT) \ + --inactivity-timeout 5 \ + --no-stream \ + --format json \ + -o /out & \ + live_check_pid=$$!; \ + trap 'kill "$$live_check_pid" 2>/dev/null || true' EXIT; \ + sleep 2; \ + OTEL_SERVICE_NAME=chronos-metrics-mock \ + OTEL_RESOURCE_ATTRIBUTES=service.instance.id=chronos-metrics-mock-live-check \ + OTEL_METRICS_EXPORTER=otlp \ + OTEL_EXPORTER_OTLP_PROTOCOL=grpc \ + OTEL_EXPORTER_OTLP_METRICS_ENDPOINT=http://127.0.0.1:$(WEAVER_LIVE_CHECK_PORT) \ + OTEL_METRIC_EXPORT_INTERVAL=500 \ + timeout -s INT 10 cargo run --quiet --package prom_otlp_mock_runner --bin prom_otlp_mock || test "$$?" -eq 124; \ + wait "$$live_check_pid"; \ + find "$(WEAVER_LIVE_CHECK_OUT)" -maxdepth 1 -type f -print + +.PHONY: build fmt lint test test.unit pre-commit test.unit.coverage metrics.check metrics.mock weaver.check weaver.generate.rust weaver.generate.docs weaver.generate.schema weaver.generate weaver.live-check diff --git a/dev/makefiles/telemetry.mk b/dev/makefiles/telemetry.mk new file mode 100644 index 0000000..59bea7b --- /dev/null +++ b/dev/makefiles/telemetry.mk @@ -0,0 +1,24 @@ +LGTM_IMAGE ?= grafana/otel-lgtm:0.24.1 + +## lgtm.validate: Validate LGTM Prometheus and OpenTelemetry Collector configs +lgtm.validate: + $(call pp,validate LGTM Prometheus config with $(LGTM_IMAGE)...) + docker run --rm \ + -v "$(PWD)/dev/lgtm/prometheus.yaml:/otel-lgtm/prometheus.yaml:ro" \ + --entrypoint /otel-lgtm/prometheus/promtool \ + $(LGTM_IMAGE) check config /otel-lgtm/prometheus.yaml + $(call pp,validate LGTM OpenTelemetry Collector config with $(LGTM_IMAGE)...) + docker run --rm \ + -v "$(PWD)/dev/lgtm/otelcol-contrib.yaml:/otel-lgtm/otelcol-config.yaml:ro" \ + --entrypoint /otel-lgtm/otelcol-contrib/otelcol-contrib \ + $(LGTM_IMAGE) validate --config=file:/otel-lgtm/otelcol-config.yaml --feature-gates=service.profilesSupport + +## lgtm.up: Start the docker compose stack with Grafana LGTM +lgtm.up: + $(MAKE) up BACKEND=lgtm + +## lgtm.down: Stop the docker compose stack with Grafana LGTM +lgtm.down: + $(MAKE) down BACKEND=lgtm + +.PHONY: lgtm.validate lgtm.up lgtm.down diff --git a/infra/otelcol-config.yml b/dev/otel/otelcol-config.yml similarity index 100% rename from infra/otelcol-config.yml rename to dev/otel/otelcol-config.yml diff --git a/dev/weaver/production/registry/chronos/metrics.yaml b/dev/weaver/production/registry/chronos/metrics.yaml new file mode 100644 index 0000000..0b37840 --- /dev/null +++ b/dev/weaver/production/registry/chronos/metrics.yaml @@ -0,0 +1,134 @@ +groups: + - id: resource.chronos.service + type: attribute_group + stability: development + brief: Resource attributes emitted by the Chronos metrics mock. + attributes: + - id: service.name + type: string + stability: stable + brief: Logical name of the service. + examples: ["chronos-metrics-mock"] + requirement_level: required + - id: service.instance.id + type: string + stability: stable + brief: The string ID of the service instance. + examples: ["chronos-metrics-mock-live-check"] + requirement_level: required + + - id: metric_attributes.chronos.consume_result + type: attribute_group + stability: development + brief: Attributes for Chronos input message handling outcomes. + attributes: + - id: chronos.destination + type: string + stability: development + brief: Downstream selected by message_receiver::handle_message. + examples: ["kafka", "postgres"] + requirement_level: required + - id: chronos.consume.status + type: string + stability: development + brief: Whether the consume path completed successfully. + examples: ["pass", "fail"] + requirement_level: required + + - id: metric_attributes.chronos.process_result + type: attribute_group + stability: development + brief: Attributes for Chronos ready-message processor loop outcomes. + attributes: + - id: chronos.processor.returned + type: string + stability: development + brief: Whether the processor loop returned early because no rows were ready. + examples: ["true", "false"] + requirement_level: required + - id: chronos.process.status + type: string + stability: development + brief: Whether the processor loop completed successfully. + examples: ["pass", "fail"] + requirement_level: required + + - id: metric.chronos.message.consume.duration + type: metric + metric_name: chronos.message.consume.duration + stability: development + brief: Duration of handle_message() in message_receiver. + instrument: histogram + unit: s + extends: metric_attributes.chronos.consume_result + annotations: + code_generation: + rust_name: msg_consume_latency + metric_value_type: double + temporality: cumulative + buckets: [0.001, 0.002, 0.004, 0.008, 0.016, 0.032, 0.064, 0.128, 0.256, 0.512, 1.024, 2.048] + prewarm_label_values: + - [pass, kafka] + - [fail, kafka] + - [pass, postgres] + - [fail, postgres] + + - id: metric.chronos.message.process.duration + type: metric + metric_name: chronos.message.process.duration + stability: development + brief: Duration of processor_message_ready() loop in message_processor. + instrument: histogram + unit: s + extends: metric_attributes.chronos.process_result + annotations: + code_generation: + rust_name: msg_process_latency + metric_value_type: double + temporality: cumulative + buckets: [0.001, 0.002, 0.004, 0.008, 0.016, 0.032, 0.064, 0.128, 0.256, 0.512, 1.024, 2.048] + prewarm_label_values: + - [pass, "true"] + - [fail, "true"] + - [pass, "false"] + - [fail, "false"] + + - id: metric.chronos.message.wait.duration + type: metric + metric_name: chronos.message.wait.duration + stability: development + brief: Time a message spent in the Kafka input queue before processing. + instrument: histogram + unit: s + annotations: + code_generation: + rust_name: msg_wait_time + metric_value_type: double + temporality: cumulative + buckets: [0.1, 0.2, 0.4, 0.8, 1.6, 3.2, 6.4, 12.8, 25.6, 51.2, 102.4, 204.8, 409.6, 819.2] + + - id: metric.chronos.message.jitter + type: metric + metric_name: chronos.message.jitter + stability: development + brief: Difference between actual publish time and client-requested deadline. + instrument: histogram + unit: s + annotations: + code_generation: + rust_name: msg_jitter + metric_value_type: double + temporality: cumulative + buckets: [0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0] + + - id: metric.chronos.message.reset + type: metric + metric_name: chronos.message.reset + stability: development + brief: Number of records reset by reset_to_init_db() in the monitor task. + instrument: counter + unit: "{message}" + annotations: + code_generation: + rust_name: msg_reset + metric_value_type: int diff --git a/dev/weaver/production/templates/registry/markdown/metrics.md.j2 b/dev/weaver/production/templates/registry/markdown/metrics.md.j2 new file mode 100644 index 0000000..7df98ee --- /dev/null +++ b/dev/weaver/production/templates/registry/markdown/metrics.md.j2 @@ -0,0 +1,9 @@ +# Chronos Metrics + +Generated from `dev/weaver/production/registry/chronos/metrics.yaml` by OpenTelemetry Weaver. + +| Metric | Prometheus Name | Instrument | Unit | Attributes | Description | +| --- | --- | --- | --- | --- | --- | +{% for metric in ctx.metrics -%} +| `{{ metric.metric_name }}` | `{{ metric.prometheus_name }}` | `{{ metric.instrument }}` | `{{ metric.unit }}` | {% if metric.attributes %}{% for attribute in metric.attributes %}`{{ attribute }}`{% if not loop.last %}, {% endif %}{% endfor %}{% else %}-{% endif %} | {{ metric.brief }} | +{% endfor -%} diff --git a/dev/weaver/production/templates/registry/markdown/weaver.yaml b/dev/weaver/production/templates/registry/markdown/weaver.yaml new file mode 100644 index 0000000..ae68a0e --- /dev/null +++ b/dev/weaver/production/templates/registry/markdown/weaver.yaml @@ -0,0 +1,17 @@ +templates: + - pattern: metrics.md.j2 + filter: > + { + metrics: (.groups + | map(select(.type == "metric")) + | map({ + metric_name, + prometheus_name: (.metric_name | split(".") | join("_")), + brief, + instrument, + unit, + attributes: (.attributes // [] | map(.name // .id // .ref)) + })) + } + application_mode: single + file_name: chronos_metrics.md diff --git a/dev/weaver/production/templates/registry/rust/registry.rs.j2 b/dev/weaver/production/templates/registry/rust/registry.rs.j2 new file mode 100644 index 0000000..4f667b3 --- /dev/null +++ b/dev/weaver/production/templates/registry/rust/registry.rs.j2 @@ -0,0 +1,61 @@ +// Generated from dev/weaver/production/registry/chronos/metrics.yaml by OpenTelemetry Weaver. +// Do not edit by hand. + +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub enum MetricId { +{%- for metric in ctx.metrics %} + {{ metric.rust_name | pascal_case }}, +{%- endfor %} +} + +#[derive(Clone, Copy, Debug)] +pub enum MetricKind { + Counter, + Histogram, +} + +impl MetricKind { + pub fn is_counter(self) -> bool { + matches!(self, Self::Counter) + } + + pub fn is_histogram(self) -> bool { + matches!(self, Self::Histogram) + } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum MetricTemporality { + Cumulative, +} + +#[derive(Clone, Copy, Debug)] +pub struct MetricDefinition { + pub id: MetricId, + pub name: &'static str, + pub description: &'static str, + pub unit: Option<&'static str>, + pub label_names: &'static [&'static str], + pub kind: MetricKind, + pub temporality: Option, + pub buckets: Option<&'static [f64]>, + pub prewarm_label_values: &'static [&'static [&'static str]], +} + +pub const METRIC_DEFINITIONS: &[MetricDefinition] = &[ +{%- for metric in ctx.metrics %} + MetricDefinition { + id: MetricId::{{ metric.rust_name | pascal_case }}, + name: "{{ metric.metric_name }}", + description: "{{ metric.brief }}", + unit: {% if metric.unit %}Some("{{ metric.unit }}"){% else %}None{% endif %}, + label_names: &[{% for attribute in metric.attributes %}"{{ attribute }}"{% if not loop.last %}, {% endif %}{% endfor %}], + kind: MetricKind::{{ metric.instrument | pascal_case }}, + temporality: {% if metric.temporality == "cumulative" %}Some(MetricTemporality::Cumulative){% else %}None{% endif %}, + buckets: {% if metric.buckets %}{% if metric.buckets | length > 10 %}Some(&[ + {{ metric.buckets | join(", ") }}, + ]){% else %}Some(&[{{ metric.buckets | join(", ") }}]){% endif %}{% else %}None{% endif %}, + prewarm_label_values: &[{% for values in metric.prewarm_label_values %}&[{% for value in values %}"{{ value }}"{% if not loop.last %}, {% endif %}{% endfor %}]{% if not loop.last %}, {% endif %}{% endfor %}], + }, +{%- endfor %} +]; diff --git a/dev/weaver/production/templates/registry/rust/weaver.yaml b/dev/weaver/production/templates/registry/rust/weaver.yaml new file mode 100644 index 0000000..829701e --- /dev/null +++ b/dev/weaver/production/templates/registry/rust/weaver.yaml @@ -0,0 +1,21 @@ +templates: + - pattern: registry.rs.j2 + filter: > + { + metrics: (.groups + | map(select(.type == "metric")) + | map({ + id, + metric_name, + rust_name: .annotations.code_generation.rust_name, + brief, + instrument, + unit, + attributes: (.attributes // [] | map(.name // .id // .ref)), + temporality: .annotations.code_generation.temporality, + buckets: .annotations.code_generation.buckets, + prewarm_label_values: (.annotations.code_generation.prewarm_label_values // []) + })) + } + application_mode: single + file_name: chronos_metric_definitions.rs diff --git a/docker-compose.yml b/docker-compose.yml deleted file mode 100644 index 0f4f21f..0000000 --- a/docker-compose.yml +++ /dev/null @@ -1,179 +0,0 @@ -version: '3.1' -services: - -#postgres DB - # postgres: - # image: postgres:13.3 - # ports: - # - 5432:5432 - # volumes: - # - postgres:/var/lib/postgresql/data/ - # environment: - # POSTGRES_USER: admin - # POSTGRES_PASSWORD: admin - # POSTGRES_DB: chronos_db - # networks: - # - chronos - # migration / init container - # chronos-pg-mig: - # image: mig - # networks: - # - chronos - # working_dir: /opt/chronos - # volumes: - # - ../../:/opt/chronos - # environment: - # PG_HOST: postgres - # PG_PORT: 5432 - # PG_USER: admin - # PG_PASSWORD: admin - # PG_DATABASE: chronos_db - # PG_POOL_SIZE: 50 - # RUST_LOG: "${RUST_LOG:-info}" - # depends_on: - # - postgres - - # zookeeper: - # image: bitnami/zookeeper:3.7.0 - # ports: - # - 2180:2181 - # volumes: - # - zookeeper:/bitnami/zookeeper - # environment: - # ALLOW_ANONYMOUS_LOGIN: "yes" - # networks: - # - chronos - - # kafka: - # image: bitnami/kafka:2.8.0 - # ports: - # - 9092:9092 - # - 9093:9093 - # - 9094:9094 - # volumes: - # - kafka:/bitnami/kafka - # - ./infra:/opt/infra - # environment: - # KAFKA_BROKER_ID: "1" - # KAFKA_CFG_LISTENERS: "INTERNAL://:9092, EXTERNAL://:9093, K8S://:9094" - # KAFKA_CFG_ADVERTISED_LISTENERS: "INTERNAL://kafka:9092, EXTERNAL://localhost:9093" - # KAFKA_CFG_LISTENER_SECURITY_PROTOCOL_MAP: "INTERNAL:PLAINTEXT, EXTERNAL:PLAINTEXT, K8S:PLAINTEXT" - # KAFKA_CFG_ZOOKEEPER_CONNECT: "zookeeper:2181" - # KAFKA_INTER_BROKER_LISTENER_NAME: "INTERNAL" - # KAFKA_INTER_BROKER_USER: "admin" - # KAFKA_INTER_BROKER_PASSWORD: "admin-secret" - # KAFKA_CFG_NUM_PARTITIONS: "1" - # KAFKA_LOG_RETENTION_BYTES: -1 - # KAFKA_LOG_RETENTION_MS: -1 - # KAFKA_CFG_AUTO_CREATE_TOPICS_ENABLE: "true" - # KAFKA_CFG_SUPER_USERS: "User:admin" - # KAFKA_CLIENT_USERS: "admin,kafdrop" - # KAFKA_CLIENT_PASSWORDS: "admin-secret,admin-secret" - # ALLOW_PLAINTEXT_LISTENER: "yes" - # networks: - # - chronos - # depends_on: - # - zookeeper - - # kowl: - # image: quay.io/cloudhut/kowl:master - # ports: - # - 9091:8080 - # environment: - # KAFKA_BROKERS: "kafka:9092" - # networks: - # - chronos - # depends_on: - # - kafka - # - zookeeper - - # chronos - # chronos-delay-scheduler: - # image: chronos - # networks: - # - chronos - # ports: - # - 8181:8181 - # working_dir: /opt/chronos - # volumes: - # - ../../:/opt/chronos - # environment: - # ENVIRONMENT: "dev" - # SERVICE_NAME: "chronos-delay-scheduler" - # BUILD_VERSION: "0.0.0" - # KAFKA_HOST: kafka - # KAFKA_PORT: 9092 - # KAFKA_CLIENT_ID: "chronos" - # KAFKA_GROUP_ID: "chronos" - # KAFKA_IN_TOPIC: "chronos.in" - # KAFKA_OUT_TOPIC: "chronos.out" - # KAFKA_USERNAME: - # KAFKA_PASSWORD: - # PG_HOST: postgres - # PG_PORT: 5432 - # PG_USER: admin - # PG_PASSWORD: admin - # PG_DATABASE: chronos_db - # PG_POOL_SIZE: 50 - # RUST_LOG: "${RUST_LOG:-info}" - # # App config (optional) - # # DELAY_TIME: 0 - # # RANDOMNESS_DELAY: 100 - # # MONITOR_POLL_INTERVAL: 5 - # # TIMING_ADVANCE: 0 - # # FAIL_DETECT_INTERVAL: 500 - # depends_on: - # - postgres - # - zookeeper - # - kafka - - # ******************** - # Telemetry Components - # ******************** - # Jaeger - - jaeger-all-in-one: - image: jaegertracing/all-in-one:latest - ports: - - "16686:16686" - - "14268" - - "14250" - container_name: Jaeger - environment: - # COLLECTOR_OTLP_ENABLED is false in case running Jaeger as Backend - - COLLECTOR_OTLP_ENABLED=true - networks: - - chronos - - # Collector - otel-collector: - image: otel/opentelemetry-collector:latest - container_name: otelcol - networks: - - chronos - restart: unless-stopped - command: [ "--config=/etc/otelcol-config.yml" ] - volumes: - - ./infra/otelcol-config.yml:/etc/otelcol-config.yml - ports: - - "1888:1888" # pprof extension - - "13133:13133" # health_check extension - - "4317:4317" # OTLP gRPC receiver - - "4318:4318" # OTLP HTTP receiver - - "55670:55679" # zpages extension - depends_on: - - jaeger-all-in-one - - - -networks: - chronos: - name: chronos - -volumes: - postgres: - driver: local - zookeeper: - driver: local - kafka: - driver: local \ No newline at end of file diff --git a/Dockerfile.chronos b/docker/Dockerfile.chronos similarity index 76% rename from Dockerfile.chronos rename to docker/Dockerfile.chronos index 0a081e8..244ff86 100644 --- a/Dockerfile.chronos +++ b/docker/Dockerfile.chronos @@ -1,4 +1,4 @@ -FROM rust:1.75.0-bookworm AS BUILD +FROM rust:1.94-bookworm AS build # Install software RUN update-ca-certificates && apt-get update && apt-get install -y libsasl2-dev # Create appuser @@ -21,16 +21,16 @@ RUN cargo build -p chronos_bin --release # # Run image based on bookworm-slim to reduce image size while still using glibc # -FROM debian:bookworm-slim AS RUN +FROM debian:bookworm-slim AS run # SASL supports RUN apt-get update && apt-get install -y libsasl2-dev WORKDIR /opt/build # Import users from build -COPY --from=BUILD /etc/passwd /etc/passwd -COPY --from=BUILD /etc/group /etc/group +COPY --from=build /etc/passwd /etc/passwd +COPY --from=build /etc/group /etc/group # Copy binary from build -COPY --from=BUILD /tmp/target/release/chronos ./ +COPY --from=build /tmp/target/release/chronos ./ # Use an unprivileged user -USER ${USER}:${USER} +USER chronos:chronos # Entry point CMD ["/opt/build/chronos"] diff --git a/Dockerfile.chronos-pg-migrations b/docker/Dockerfile.chronos-pg-migrations similarity index 75% rename from Dockerfile.chronos-pg-migrations rename to docker/Dockerfile.chronos-pg-migrations index d6e7026..3a2971d 100644 --- a/Dockerfile.chronos-pg-migrations +++ b/docker/Dockerfile.chronos-pg-migrations @@ -1,4 +1,4 @@ -FROM rust:1.75.0-bookworm AS BUILD +FROM rust:1.94-bookworm AS build # Install software RUN update-ca-certificates && apt-get update && apt-get install -y libsasl2-dev # Create appuser @@ -21,16 +21,16 @@ RUN cargo build -p pg_mig --release # # Run image based on bookworm-slim to reduce image size while still using glibc # -FROM debian:bookworm-slim AS RUN +FROM debian:bookworm-slim AS run # SASL supports RUN apt-get update && apt-get install -y libsasl2-dev WORKDIR /opt/build # Import users from build -COPY --from=BUILD /etc/passwd /etc/passwd -COPY --from=BUILD /etc/group /etc/group +COPY --from=build /etc/passwd /etc/passwd +COPY --from=build /etc/group /etc/group # Copy binary from build -COPY --from=BUILD /tmp/target/release/chronos-pg-migrations ./ +COPY --from=build /tmp/target/release/chronos-pg-migrations ./ # Use an unprivileged user -USER ${USER}:${USER} +USER chronos:chronos # Entry point CMD ["/opt/build/chronos-pg-migrations"] diff --git a/docker/Dockerfile.chronos-slim b/docker/Dockerfile.chronos-slim new file mode 100644 index 0000000..0e9da01 --- /dev/null +++ b/docker/Dockerfile.chronos-slim @@ -0,0 +1,79 @@ +# syntax=docker/dockerfile:1 +# +# Dockerfile.chronos-slim โ€” Alpine image for minimal production deployments. +# +# Key differences from Dockerfile.chronos ("fat" / glibc image): +# - Uses Alpine + musl to keep runtime dependencies small +# - Unit tests are executed during the build stage; the image build fails if +# any test fails +# - The final stage keeps only the small runtime packages needed by rdkafka +# +# To use this image in Docker Compose, change the chronos service to: +# build: +# context: . +# dockerfile: docker/Dockerfile.chronos-slim + +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +# Build stage +# Alpine's musl toolchain is used throughout. rdkafka compiles librdkafka from +# source (cmake). The SASL feature requires Cyrus SASL. Alpine's static SASL +# archive pulls in GSSAPI, GDBM, and SQLite plugin symbols, so this image links +# SASL dynamically and carries the small runtime libraries in the final stage. +# librdkafka's configure script is run by rdkafka-sys and requires bash. +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +FROM rust:1.94-alpine AS builder + +RUN apk add --no-cache \ + bash \ + musl-dev \ + cmake \ + make \ + g++ \ + cyrus-sasl-dev \ + openssl-dev \ + pkgconfig \ + perl + +WORKDIR /build +COPY ./ . + +# โ”€โ”€ Run unit tests โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +# Library unit tests run without external services (no Kafka or Postgres). +# Building the test binary also verifies that the release code compiles cleanly +# under musl. The image build is aborted here if any test fails. +RUN RUSTFLAGS="-C target-feature=-crt-static" \ + cargo test --lib -p chronos_bin + +# โ”€โ”€ Build release binary โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +# The release compile is fast here because the test stage above already built +# all library crates under the same target. +# +RUN RUSTFLAGS="-C target-feature=-crt-static" \ + cargo build --release -p chronos_bin + +# Minimal passwd/group entries for the non-root user in the runtime image +RUN printf 'chronos:x:1000:1000::/nonexistent:/sbin/nologin\n' > /tmp/passwd && \ + printf 'chronos:x:1000:\n' > /tmp/group + +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +# Runtime stage. +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +FROM alpine:3.23 + +RUN apk add --no-cache \ + ca-certificates \ + cyrus-sasl \ + cyrus-sasl-scram \ + libgcc \ + openssl + +# User and group files so the container runs as non-root (uid 1000) +COPY --from=builder /tmp/passwd /etc/passwd +COPY --from=builder /tmp/group /etc/group + +COPY --from=builder \ + /build/target/release/chronos \ + /chronos + +USER 1000:1000 +ENTRYPOINT ["/chronos"] diff --git a/docker/Dockerfile.k6 b/docker/Dockerfile.k6 new file mode 100644 index 0000000..e1c251a --- /dev/null +++ b/docker/Dockerfile.k6 @@ -0,0 +1,23 @@ +FROM golang:1.26-bookworm AS build + +ARG K6_VERSION=1.7.1 +ARG XK6_KAFKA_VERSION=latest + +RUN apt-get update \ + && apt-get install -y --no-install-recommends build-essential ca-certificates git pkg-config \ + && rm -rf /var/lib/apt/lists/* + +RUN go install go.k6.io/xk6/cmd/xk6@latest +RUN CGO_ENABLED=1 xk6 build --k6-version "v${K6_VERSION}" \ + --with "github.com/mostafa/xk6-kafka/v2@${XK6_KAFKA_VERSION}" \ + --output /tmp/k6 + +FROM debian:bookworm-slim + +RUN apt-get update \ + && apt-get install -y --no-install-recommends bash ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +COPY --from=build /tmp/k6 /usr/local/bin/k6 + +ENTRYPOINT ["k6"] diff --git a/docs/chronos_metrics.md b/docs/chronos_metrics.md new file mode 100644 index 0000000..8a5d111 --- /dev/null +++ b/docs/chronos_metrics.md @@ -0,0 +1,11 @@ +# Chronos Metrics + +Generated from `dev/weaver/production/registry/chronos/metrics.yaml` by OpenTelemetry Weaver. + +| Metric | Prometheus Name | Instrument | Unit | Attributes | Description | +| --- | --- | --- | --- | --- | --- | +| `chronos.message.consume.duration` | `chronos_message_consume_duration` | `histogram` | `s` | `chronos.consume.status`, `chronos.destination` | Duration of handle_message() in message_receiver. | +| `chronos.message.jitter` | `chronos_message_jitter` | `histogram` | `s` | - | Difference between actual publish time and client-requested deadline. | +| `chronos.message.process.duration` | `chronos_message_process_duration` | `histogram` | `s` | `chronos.process.status`, `chronos.processor.returned` | Duration of processor_message_ready() loop in message_processor. | +| `chronos.message.reset` | `chronos_message_reset` | `counter` | `{message}` | - | Number of records reset by reset_to_init_db() in the monitor task. | +| `chronos.message.wait.duration` | `chronos_message_wait_duration` | `histogram` | `s` | - | Time a message spent in the Kafka input queue before processing. | diff --git a/docs/schema/resolved-registry.schema.json b/docs/schema/resolved-registry.schema.json new file mode 100644 index 0000000..081f975 --- /dev/null +++ b/docs/schema/resolved-registry.schema.json @@ -0,0 +1,1830 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "ResolvedRegistry", + "description": "A resolved semantic convention registry used in the context of the template and policy\nengines.", + "type": "object", + "properties": { + "groups": { + "description": "A list of semantic convention groups.", + "type": "array", + "items": { + "$ref": "#/$defs/ResolvedGroup" + } + }, + "registry_url": { + "description": "The semantic convention registry url.", + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "registry_url", + "groups" + ], + "$defs": { + "AnyValueSpec": { + "description": "The AnyValueTypeSpec is a specification of a value that can be of any type.", + "oneOf": [ + { + "description": "A boolean attribute.", + "type": "object", + "properties": { + "brief": { + "description": "A brief description of the value", + "type": "string" + }, + "examples": { + "description": "Sequence of examples for the value or single example\nvalue. If only a single example is provided, it can\ndirectly be reported without encapsulating it\ninto a sequence/dictionary.", + "anyOf": [ + { + "$ref": "#/$defs/Examples" + }, + { + "type": "null" + } + ] + }, + "id": { + "description": "String that uniquely identifies the enum entry.", + "type": "string" + }, + "note": { + "description": "A more elaborate description of the value.\nIt defaults to an empty string.", + "type": "string" + }, + "requirement_level": { + "description": "Specifies if the field is mandatory. Can be \"required\",\n\"conditionally_required\", \"recommended\" or \"opt_in\". When omitted,\nthe field is \"recommended\". When set to\n\"conditionally_required\", the string provided as MUST\nspecify the conditions under which the field is required.", + "$ref": "#/$defs/RequirementLevel" + }, + "stability": { + "description": "Specifies the stability of the value.", + "anyOf": [ + { + "$ref": "#/$defs/Stability" + }, + { + "type": "null" + } + ] + }, + "type": { + "type": "string", + "const": "boolean" + } + }, + "required": [ + "type", + "id", + "requirement_level" + ] + }, + { + "description": "A integer attribute (signed 64 bit integer).", + "type": "object", + "properties": { + "brief": { + "description": "A brief description of the value", + "type": "string" + }, + "examples": { + "description": "Sequence of examples for the value or single example\nvalue. If only a single example is provided, it can\ndirectly be reported without encapsulating it\ninto a sequence/dictionary.", + "anyOf": [ + { + "$ref": "#/$defs/Examples" + }, + { + "type": "null" + } + ] + }, + "id": { + "description": "String that uniquely identifies the enum entry.", + "type": "string" + }, + "note": { + "description": "A more elaborate description of the value.\nIt defaults to an empty string.", + "type": "string" + }, + "requirement_level": { + "description": "Specifies if the field is mandatory. Can be \"required\",\n\"conditionally_required\", \"recommended\" or \"opt_in\". When omitted,\nthe field is \"recommended\". When set to\n\"conditionally_required\", the string provided as MUST\nspecify the conditions under which the field is required.", + "$ref": "#/$defs/RequirementLevel" + }, + "stability": { + "description": "Specifies the stability of the value.", + "anyOf": [ + { + "$ref": "#/$defs/Stability" + }, + { + "type": "null" + } + ] + }, + "type": { + "type": "string", + "const": "int" + } + }, + "required": [ + "type", + "id", + "requirement_level" + ] + }, + { + "description": "A double attribute (double precision floating point (IEEE 754-1985)).", + "type": "object", + "properties": { + "brief": { + "description": "A brief description of the value", + "type": "string" + }, + "examples": { + "description": "Sequence of examples for the value or single example\nvalue. If only a single example is provided, it can\ndirectly be reported without encapsulating it\ninto a sequence/dictionary.", + "anyOf": [ + { + "$ref": "#/$defs/Examples" + }, + { + "type": "null" + } + ] + }, + "id": { + "description": "String that uniquely identifies the enum entry.", + "type": "string" + }, + "note": { + "description": "A more elaborate description of the value.\nIt defaults to an empty string.", + "type": "string" + }, + "requirement_level": { + "description": "Specifies if the field is mandatory. Can be \"required\",\n\"conditionally_required\", \"recommended\" or \"opt_in\". When omitted,\nthe field is \"recommended\". When set to\n\"conditionally_required\", the string provided as MUST\nspecify the conditions under which the field is required.", + "$ref": "#/$defs/RequirementLevel" + }, + "stability": { + "description": "Specifies the stability of the value.", + "anyOf": [ + { + "$ref": "#/$defs/Stability" + }, + { + "type": "null" + } + ] + }, + "type": { + "type": "string", + "const": "double" + } + }, + "required": [ + "type", + "id", + "requirement_level" + ] + }, + { + "description": "A string attribute.", + "type": "object", + "properties": { + "brief": { + "description": "A brief description of the value", + "type": "string" + }, + "examples": { + "description": "Sequence of examples for the value or single example\nvalue. If only a single example is provided, it can\ndirectly be reported without encapsulating it\ninto a sequence/dictionary.", + "anyOf": [ + { + "$ref": "#/$defs/Examples" + }, + { + "type": "null" + } + ] + }, + "id": { + "description": "String that uniquely identifies the enum entry.", + "type": "string" + }, + "note": { + "description": "A more elaborate description of the value.\nIt defaults to an empty string.", + "type": "string" + }, + "requirement_level": { + "description": "Specifies if the field is mandatory. Can be \"required\",\n\"conditionally_required\", \"recommended\" or \"opt_in\". When omitted,\nthe field is \"recommended\". When set to\n\"conditionally_required\", the string provided as MUST\nspecify the conditions under which the field is required.", + "$ref": "#/$defs/RequirementLevel" + }, + "stability": { + "description": "Specifies the stability of the value.", + "anyOf": [ + { + "$ref": "#/$defs/Stability" + }, + { + "type": "null" + } + ] + }, + "type": { + "type": "string", + "const": "string" + } + }, + "required": [ + "type", + "id", + "requirement_level" + ] + }, + { + "description": "An array of strings attribute.", + "type": "object", + "properties": { + "brief": { + "description": "A brief description of the value", + "type": "string" + }, + "examples": { + "description": "Sequence of examples for the value or single example\nvalue. If only a single example is provided, it can\ndirectly be reported without encapsulating it\ninto a sequence/dictionary.", + "anyOf": [ + { + "$ref": "#/$defs/Examples" + }, + { + "type": "null" + } + ] + }, + "id": { + "description": "String that uniquely identifies the enum entry.", + "type": "string" + }, + "note": { + "description": "A more elaborate description of the value.\nIt defaults to an empty string.", + "type": "string" + }, + "requirement_level": { + "description": "Specifies if the field is mandatory. Can be \"required\",\n\"conditionally_required\", \"recommended\" or \"opt_in\". When omitted,\nthe field is \"recommended\". When set to\n\"conditionally_required\", the string provided as MUST\nspecify the conditions under which the field is required.", + "$ref": "#/$defs/RequirementLevel" + }, + "stability": { + "description": "Specifies the stability of the value.", + "anyOf": [ + { + "$ref": "#/$defs/Stability" + }, + { + "type": "null" + } + ] + }, + "type": { + "type": "string", + "const": "string[]" + } + }, + "required": [ + "type", + "id", + "requirement_level" + ] + }, + { + "description": "An array of integer attribute.", + "type": "object", + "properties": { + "brief": { + "description": "A brief description of the value", + "type": "string" + }, + "examples": { + "description": "Sequence of examples for the value or single example\nvalue. If only a single example is provided, it can\ndirectly be reported without encapsulating it\ninto a sequence/dictionary.", + "anyOf": [ + { + "$ref": "#/$defs/Examples" + }, + { + "type": "null" + } + ] + }, + "id": { + "description": "String that uniquely identifies the enum entry.", + "type": "string" + }, + "note": { + "description": "A more elaborate description of the value.\nIt defaults to an empty string.", + "type": "string" + }, + "requirement_level": { + "description": "Specifies if the field is mandatory. Can be \"required\",\n\"conditionally_required\", \"recommended\" or \"opt_in\". When omitted,\nthe field is \"recommended\". When set to\n\"conditionally_required\", the string provided as MUST\nspecify the conditions under which the field is required.", + "$ref": "#/$defs/RequirementLevel" + }, + "stability": { + "description": "Specifies the stability of the value.", + "anyOf": [ + { + "$ref": "#/$defs/Stability" + }, + { + "type": "null" + } + ] + }, + "type": { + "type": "string", + "const": "int[]" + } + }, + "required": [ + "type", + "id", + "requirement_level" + ] + }, + { + "description": "An array of double attribute.", + "type": "object", + "properties": { + "brief": { + "description": "A brief description of the value", + "type": "string" + }, + "examples": { + "description": "Sequence of examples for the value or single example\nvalue. If only a single example is provided, it can\ndirectly be reported without encapsulating it\ninto a sequence/dictionary.", + "anyOf": [ + { + "$ref": "#/$defs/Examples" + }, + { + "type": "null" + } + ] + }, + "id": { + "description": "String that uniquely identifies the enum entry.", + "type": "string" + }, + "note": { + "description": "A more elaborate description of the value.\nIt defaults to an empty string.", + "type": "string" + }, + "requirement_level": { + "description": "Specifies if the field is mandatory. Can be \"required\",\n\"conditionally_required\", \"recommended\" or \"opt_in\". When omitted,\nthe field is \"recommended\". When set to\n\"conditionally_required\", the string provided as MUST\nspecify the conditions under which the field is required.", + "$ref": "#/$defs/RequirementLevel" + }, + "stability": { + "description": "Specifies the stability of the value.", + "anyOf": [ + { + "$ref": "#/$defs/Stability" + }, + { + "type": "null" + } + ] + }, + "type": { + "type": "string", + "const": "double[]" + } + }, + "required": [ + "type", + "id", + "requirement_level" + ] + }, + { + "description": "An array of boolean attribute.", + "type": "object", + "properties": { + "brief": { + "description": "A brief description of the value", + "type": "string" + }, + "examples": { + "description": "Sequence of examples for the value or single example\nvalue. If only a single example is provided, it can\ndirectly be reported without encapsulating it\ninto a sequence/dictionary.", + "anyOf": [ + { + "$ref": "#/$defs/Examples" + }, + { + "type": "null" + } + ] + }, + "id": { + "description": "String that uniquely identifies the enum entry.", + "type": "string" + }, + "note": { + "description": "A more elaborate description of the value.\nIt defaults to an empty string.", + "type": "string" + }, + "requirement_level": { + "description": "Specifies if the field is mandatory. Can be \"required\",\n\"conditionally_required\", \"recommended\" or \"opt_in\". When omitted,\nthe field is \"recommended\". When set to\n\"conditionally_required\", the string provided as MUST\nspecify the conditions under which the field is required.", + "$ref": "#/$defs/RequirementLevel" + }, + "stability": { + "description": "Specifies the stability of the value.", + "anyOf": [ + { + "$ref": "#/$defs/Stability" + }, + { + "type": "null" + } + ] + }, + "type": { + "type": "string", + "const": "boolean[]" + } + }, + "required": [ + "type", + "id", + "requirement_level" + ] + }, + { + "description": "The value type is a map of key, value pairs", + "type": "object", + "properties": { + "brief": { + "description": "A brief description of the value", + "type": "string" + }, + "examples": { + "description": "Sequence of examples for the value or single example\nvalue. If only a single example is provided, it can\ndirectly be reported without encapsulating it\ninto a sequence/dictionary.", + "anyOf": [ + { + "$ref": "#/$defs/Examples" + }, + { + "type": "null" + } + ] + }, + "fields": { + "description": "The collection of key, values where the value is an `AnyValueSpec`", + "type": "array", + "items": { + "$ref": "#/$defs/AnyValueSpec" + } + }, + "id": { + "description": "String that uniquely identifies the enum entry.", + "type": "string" + }, + "note": { + "description": "A more elaborate description of the value.\nIt defaults to an empty string.", + "type": "string" + }, + "requirement_level": { + "description": "Specifies if the field is mandatory. Can be \"required\",\n\"conditionally_required\", \"recommended\" or \"opt_in\". When omitted,\nthe field is \"recommended\". When set to\n\"conditionally_required\", the string provided as MUST\nspecify the conditions under which the field is required.", + "$ref": "#/$defs/RequirementLevel" + }, + "stability": { + "description": "Specifies the stability of the value.", + "anyOf": [ + { + "$ref": "#/$defs/Stability" + }, + { + "type": "null" + } + ] + }, + "type": { + "type": "string", + "const": "map" + } + }, + "required": [ + "type", + "id", + "requirement_level", + "fields" + ] + }, + { + "description": "The value type is a map of key, value pairs", + "type": "object", + "properties": { + "brief": { + "description": "A brief description of the value", + "type": "string" + }, + "examples": { + "description": "Sequence of examples for the value or single example\nvalue. If only a single example is provided, it can\ndirectly be reported without encapsulating it\ninto a sequence/dictionary.", + "anyOf": [ + { + "$ref": "#/$defs/Examples" + }, + { + "type": "null" + } + ] + }, + "fields": { + "description": "The collection of key, values where the value is an `AnyValueSpec`", + "type": "array", + "items": { + "$ref": "#/$defs/AnyValueSpec" + } + }, + "id": { + "description": "String that uniquely identifies the enum entry.", + "type": "string" + }, + "note": { + "description": "A more elaborate description of the value.\nIt defaults to an empty string.", + "type": "string" + }, + "requirement_level": { + "description": "Specifies if the field is mandatory. Can be \"required\",\n\"conditionally_required\", \"recommended\" or \"opt_in\". When omitted,\nthe field is \"recommended\". When set to\n\"conditionally_required\", the string provided as MUST\nspecify the conditions under which the field is required.", + "$ref": "#/$defs/RequirementLevel" + }, + "stability": { + "description": "Specifies the stability of the value.", + "anyOf": [ + { + "$ref": "#/$defs/Stability" + }, + { + "type": "null" + } + ] + }, + "type": { + "type": "string", + "const": "map[]" + } + }, + "required": [ + "type", + "id", + "requirement_level", + "fields" + ] + }, + { + "description": "The value type will just be a bytes.", + "type": "object", + "properties": { + "brief": { + "description": "A brief description of the value", + "type": "string" + }, + "examples": { + "description": "Sequence of examples for the value or single example\nvalue. If only a single example is provided, it can\ndirectly be reported without encapsulating it\ninto a sequence/dictionary.", + "anyOf": [ + { + "$ref": "#/$defs/Examples" + }, + { + "type": "null" + } + ] + }, + "id": { + "description": "String that uniquely identifies the enum entry.", + "type": "string" + }, + "note": { + "description": "A more elaborate description of the value.\nIt defaults to an empty string.", + "type": "string" + }, + "requirement_level": { + "description": "Specifies if the field is mandatory. Can be \"required\",\n\"conditionally_required\", \"recommended\" or \"opt_in\". When omitted,\nthe field is \"recommended\". When set to\n\"conditionally_required\", the string provided as MUST\nspecify the conditions under which the field is required.", + "$ref": "#/$defs/RequirementLevel" + }, + "stability": { + "description": "Specifies the stability of the value.", + "anyOf": [ + { + "$ref": "#/$defs/Stability" + }, + { + "type": "null" + } + ] + }, + "type": { + "type": "string", + "const": "bytes" + } + }, + "required": [ + "type", + "id", + "requirement_level" + ] + }, + { + "description": "The value type is not specified.", + "type": "object", + "properties": { + "brief": { + "description": "A brief description of the value", + "type": "string" + }, + "examples": { + "description": "Sequence of examples for the value or single example\nvalue. If only a single example is provided, it can\ndirectly be reported without encapsulating it\ninto a sequence/dictionary.", + "anyOf": [ + { + "$ref": "#/$defs/Examples" + }, + { + "type": "null" + } + ] + }, + "id": { + "description": "String that uniquely identifies the enum entry.", + "type": "string" + }, + "note": { + "description": "A more elaborate description of the value.\nIt defaults to an empty string.", + "type": "string" + }, + "requirement_level": { + "description": "Specifies if the field is mandatory. Can be \"required\",\n\"conditionally_required\", \"recommended\" or \"opt_in\". When omitted,\nthe field is \"recommended\". When set to\n\"conditionally_required\", the string provided as MUST\nspecify the conditions under which the field is required.", + "$ref": "#/$defs/RequirementLevel" + }, + "stability": { + "description": "Specifies the stability of the value.", + "anyOf": [ + { + "$ref": "#/$defs/Stability" + }, + { + "type": "null" + } + ] + }, + "type": { + "type": "string", + "const": "undefined" + } + }, + "required": [ + "type", + "id", + "requirement_level" + ] + }, + { + "description": "An enum definition type.", + "type": "object", + "properties": { + "brief": { + "description": "A brief description of the value", + "type": "string" + }, + "examples": { + "description": "Sequence of examples for the value or single example\nvalue. If only a single example is provided, it can\ndirectly be reported without encapsulating it\ninto a sequence/dictionary.", + "anyOf": [ + { + "$ref": "#/$defs/Examples" + }, + { + "type": "null" + } + ] + }, + "id": { + "description": "String that uniquely identifies the enum entry.", + "type": "string" + }, + "members": { + "description": "List of enum entries.", + "type": "array", + "items": { + "$ref": "#/$defs/EnumEntriesSpec" + } + }, + "note": { + "description": "A more elaborate description of the value.\nIt defaults to an empty string.", + "type": "string" + }, + "requirement_level": { + "description": "Specifies if the field is mandatory. Can be \"required\",\n\"conditionally_required\", \"recommended\" or \"opt_in\". When omitted,\nthe field is \"recommended\". When set to\n\"conditionally_required\", the string provided as MUST\nspecify the conditions under which the field is required.", + "$ref": "#/$defs/RequirementLevel" + }, + "stability": { + "description": "Specifies the stability of the value.", + "anyOf": [ + { + "$ref": "#/$defs/Stability" + }, + { + "type": "null" + } + ] + }, + "type": { + "type": "string", + "const": "enum" + } + }, + "required": [ + "type", + "id", + "requirement_level", + "members" + ] + } + ] + }, + "Attribute": { + "description": "An attribute definition.", + "type": "object", + "properties": { + "annotations": { + "description": "Annotations for the group.", + "type": [ + "object", + "null" + ], + "additionalProperties": { + "$ref": "#/$defs/YamlValue" + } + }, + "brief": { + "description": "A brief description of the attribute.", + "type": "string" + }, + "deprecated": { + "description": "Specifies if the attribute is deprecated.", + "anyOf": [ + { + "$ref": "#/$defs/Deprecated" + }, + { + "type": "null" + } + ] + }, + "examples": { + "description": "Sequence of example values for the attribute or single example\nvalue. They are required only for string and string array\nattributes. Example values must be of the same type of the\nattribute. If only a single example is provided, it can directly\nbe reported without encapsulating it into a sequence/dictionary.", + "anyOf": [ + { + "$ref": "#/$defs/Examples" + }, + { + "type": "null" + } + ] + }, + "name": { + "description": "Attribute name.", + "type": "string" + }, + "note": { + "description": "A more elaborate description of the attribute.\nIt defaults to an empty string.", + "type": "string" + }, + "prefix": { + "description": "Specifies the prefix of the attribute.\nIf this parameter is set, the resolved id of the referenced attribute will\nhave group prefix added to it.\nIt defaults to false.", + "type": "boolean" + }, + "requirement_level": { + "description": "Specifies if the attribute is mandatory. Can be \"required\",\n\"conditionally_required\", \"recommended\" or \"opt_in\". When omitted,\nthe attribute is \"recommended\". When set to\n\"conditionally_required\", the string provided as MUST\nspecify the conditions under which the attribute is required.", + "$ref": "#/$defs/RequirementLevel" + }, + "role": { + "description": "Whether the attribute is identifying or descriptive.", + "anyOf": [ + { + "$ref": "#/$defs/AttributeRole" + }, + { + "type": "null" + } + ] + }, + "sampling_relevant": { + "description": "Specifies if the attribute is (especially) relevant for sampling\nand thus should be set at span start. It defaults to false.\nNote: this field is experimental.", + "type": [ + "boolean", + "null" + ] + }, + "stability": { + "description": "Specifies the stability of the attribute.\nNote that, if stability is missing but deprecated is present, it will\nautomatically set the stability to deprecated. If deprecated is\npresent and stability differs from deprecated, this will result in an\nerror.", + "anyOf": [ + { + "$ref": "#/$defs/Stability" + }, + { + "type": "null" + } + ] + }, + "tag": { + "description": "Associates a tag (\"sub-group\") to the attribute. It carries no\nparticular semantic meaning but can be used e.g. for filtering\nin the markdown generator.", + "type": [ + "string", + "null" + ] + }, + "tags": { + "description": "A set of tags for the attribute.", + "anyOf": [ + { + "$ref": "#/$defs/Tags" + }, + { + "type": "null" + } + ] + }, + "type": { + "description": "Either a string literal denoting the type as a primitive or an\narray type, a template type or an enum definition.", + "$ref": "#/$defs/AttributeType" + }, + "value": { + "description": "The value of the attribute.\nNote: This is only used in a telemetry schema specification.", + "anyOf": [ + { + "$ref": "#/$defs/Value" + }, + { + "type": "null" + } + ] + } + }, + "additionalProperties": false, + "required": [ + "name", + "type", + "brief", + "requirement_level" + ] + }, + "AttributeLineage": { + "description": "Attribute lineage (at the field level).", + "type": "object", + "properties": { + "inherited_fields": { + "description": "A list of fields that are inherited from the source group.", + "type": "array", + "items": { + "type": "string" + }, + "uniqueItems": true + }, + "locally_overridden_fields": { + "description": "A list of fields that are overridden in the local group.", + "type": "array", + "items": { + "type": "string" + }, + "uniqueItems": true + }, + "source_group": { + "description": "The group id where the attribute is coming from.", + "type": "string" + } + }, + "required": [ + "source_group" + ] + }, + "AttributeRole": { + "description": "The different roles for attributes in groups.", + "oneOf": [ + { + "description": "The attribute is considered identifying for the signal it is associated with.", + "type": "string", + "const": "identifying" + }, + { + "description": "The attribute is considered descriptive for the signal it is associated with.", + "type": "string", + "const": "descriptive" + } + ] + }, + "AttributeType": { + "description": "The different types of attributes (specification).", + "anyOf": [ + { + "description": "Primitive or array type.", + "$ref": "#/$defs/PrimitiveOrArrayTypeSpec" + }, + { + "description": "A template type.", + "$ref": "#/$defs/TemplateTypeSpec" + }, + { + "description": "An enum definition type.", + "type": "object", + "properties": { + "members": { + "description": "List of enum entries.", + "type": "array", + "items": { + "$ref": "#/$defs/EnumEntriesSpec" + } + } + }, + "required": [ + "members" + ] + } + ] + }, + "BasicRequirementLevelSpec": { + "description": "The different types of basic requirement levels.", + "oneOf": [ + { + "description": "A required requirement level.", + "type": "string", + "const": "required" + }, + { + "description": "An optional requirement level.", + "type": "string", + "const": "recommended" + }, + { + "description": "An opt-in requirement level.", + "type": "string", + "const": "opt_in" + } + ] + }, + "Deprecated": { + "description": "The different ways to deprecate an attribute, a metric, ...", + "oneOf": [ + { + "description": "The telemetry object containing the deprecated field has been renamed to an\nexisting or a new telemetry object.", + "type": "object", + "properties": { + "note": { + "description": "The note to provide more context about the deprecation.", + "type": "string" + }, + "reason": { + "type": "string", + "const": "renamed" + }, + "renamed_to": { + "description": "The new name of the telemetry object.", + "type": "string" + } + }, + "required": [ + "reason", + "renamed_to", + "note" + ] + }, + { + "description": "The telemetry object containing the deprecated field has been obsoleted\nbecause it no longer exists and has no valid replacement.\n\nThe `brief` field should contain the reason why the field has been obsoleted.", + "type": "object", + "properties": { + "note": { + "description": "The note to provide more context about the deprecation.", + "type": "string" + }, + "reason": { + "type": "string", + "const": "obsoleted" + } + }, + "required": [ + "reason", + "note" + ] + }, + { + "description": "The telemetry object containing the deprecated field has been deprecated for\ncomplex reasons (split, merge, ...) which are currently not precisely defined\nin the supported deprecation reasons.\n\nThe `brief` field should contain the reason for this uncategorized deprecation.", + "type": "object", + "properties": { + "note": { + "description": "The note to provide more context about the deprecation.", + "type": "string" + }, + "reason": { + "type": "string", + "const": "uncategorized" + } + }, + "required": [ + "reason", + "note" + ] + }, + { + "description": "This variant is used to capture old, unstructured deprecated \"string\".\nUsed for backward-compatibility only.", + "type": "object", + "properties": { + "note": { + "description": "The note to provide more context about the deprecation.", + "type": "string" + }, + "reason": { + "type": "string", + "const": "unspecified" + } + }, + "required": [ + "reason", + "note" + ] + } + ] + }, + "EnumEntriesSpec": { + "description": "Possible enum entries.", + "type": "object", + "properties": { + "annotations": { + "description": "Annotations for the member.", + "type": [ + "object", + "null" + ], + "additionalProperties": { + "$ref": "#/$defs/YamlValue" + } + }, + "brief": { + "description": "Brief description of the enum entry value.\nIt defaults to the value of id.", + "type": [ + "string", + "null" + ] + }, + "deprecated": { + "description": "Deprecation note.", + "anyOf": [ + { + "$ref": "#/$defs/Deprecated" + }, + { + "type": "null" + } + ] + }, + "id": { + "description": "String that uniquely identifies the enum entry.", + "type": "string" + }, + "note": { + "description": "Longer description.\nIt defaults to an empty string.", + "type": [ + "string", + "null" + ] + }, + "stability": { + "description": "Stability of this enum value.", + "anyOf": [ + { + "$ref": "#/$defs/Stability" + }, + { + "type": "null" + } + ] + }, + "value": { + "description": "String, int, or boolean; value of the enum entry.", + "$ref": "#/$defs/ValueSpec" + } + }, + "additionalProperties": false, + "required": [ + "id", + "value" + ] + }, + "Examples": { + "description": "The different types of examples.", + "anyOf": [ + { + "description": "A boolean example.", + "type": "boolean" + }, + { + "description": "A integer example.", + "type": "integer", + "format": "int64" + }, + { + "description": "A double example.", + "type": "number", + "format": "double" + }, + { + "description": "A string example.", + "type": "string" + }, + { + "description": "A any example.", + "$ref": "#/$defs/ValueSpec" + }, + { + "description": "A array of integers example.", + "type": "array", + "items": { + "type": "integer", + "format": "int64" + } + }, + { + "description": "A array of doubles example.", + "type": "array", + "items": { + "type": "number", + "format": "double" + } + }, + { + "description": "A array of bools example.", + "type": "array", + "items": { + "type": "boolean" + } + }, + { + "description": "A array of strings example.", + "type": "array", + "items": { + "type": "string" + } + }, + { + "description": "A array of anys example.", + "type": "array", + "items": { + "$ref": "#/$defs/ValueSpec" + } + }, + { + "description": "List of arrays of integers example.", + "type": "array", + "items": { + "type": "array", + "items": { + "type": "integer", + "format": "int64" + } + } + }, + { + "description": "List of arrays of doubles example.", + "type": "array", + "items": { + "type": "array", + "items": { + "type": "number", + "format": "double" + } + } + }, + { + "description": "List of arrays of bools example.", + "type": "array", + "items": { + "type": "array", + "items": { + "type": "boolean" + } + } + }, + { + "description": "List of arrays of strings example.", + "type": "array", + "items": { + "type": "array", + "items": { + "type": "string" + } + } + } + ] + }, + "GroupLineage": { + "description": "Group lineage.", + "type": "object", + "properties": { + "attributes": { + "description": "The lineage per attribute.\n\nNote: Use a BTreeMap to ensure a deterministic order of attributes.\nThis is important to keep unit tests stable.", + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/AttributeLineage" + } + }, + "extends_group": { + "description": "The group that this group extended, if available.", + "type": [ + "string", + "null" + ] + }, + "includes_group": { + "description": "(V2 Only) Attribute groups included in this group.", + "type": "array", + "items": { + "type": "string" + } + }, + "provenance": { + "description": "The provenance of the source file where the group is defined.", + "$ref": "#/$defs/Provenance" + } + }, + "required": [ + "provenance" + ] + }, + "GroupType": { + "description": "The different types of groups: `attribute_group`, `span`, `event`, `metric`, `entity`, `scope`.\n\nNote: The `resource` type is no longer used and is an alias for `entity`.", + "oneOf": [ + { + "description": "Attribute group (attribute_group type) defines a set of attributes that\ncan be declared once and referenced by semantic conventions for\ndifferent signals, for example spans and logs. Attribute groups don't\nhave any specific fields and follow the general semconv semantics.", + "type": "string", + "const": "attribute_group" + }, + { + "description": "Span semantic convention.", + "type": "string", + "const": "span" + }, + { + "description": "Event semantic convention.", + "type": "string", + "const": "event" + }, + { + "description": "Metric semantic convention.", + "type": "string", + "const": "metric" + }, + { + "description": "The metric group semconv is a group where related metric attributes can\nbe defined and then referenced from other metric groups using ref.", + "type": "string", + "const": "metric_group" + }, + { + "description": "Entity semantic convention.", + "type": "string", + "const": "entity" + }, + { + "description": "Scope.", + "type": "string", + "const": "scope" + }, + { + "description": "Undefined group type.", + "type": "string", + "const": "undefined" + } + ] + }, + "InstrumentSpec": { + "description": "The type of the metric.", + "oneOf": [ + { + "description": "An up-down counter metric.", + "type": "string", + "const": "updowncounter" + }, + { + "description": "A counter metric.", + "type": "string", + "const": "counter" + }, + { + "description": "A gauge metric.", + "type": "string", + "const": "gauge" + }, + { + "description": "A histogram metric.", + "type": "string", + "const": "histogram" + } + ] + }, + "PrimitiveOrArrayTypeSpec": { + "description": "Primitive or array types.", + "oneOf": [ + { + "description": "A boolean attribute.", + "type": "string", + "const": "boolean" + }, + { + "description": "A integer attribute (signed 64 bit integer).", + "type": "string", + "const": "int" + }, + { + "description": "A double attribute (double precision floating point (IEEE 754-1985)).", + "type": "string", + "const": "double" + }, + { + "description": "A string attribute.", + "type": "string", + "const": "string" + }, + { + "description": "An any type attribute (accepts any valid value).", + "type": "string", + "const": "any" + }, + { + "description": "An array of strings attribute.", + "type": "string", + "const": "string[]" + }, + { + "description": "An array of integer attribute.", + "type": "string", + "const": "int[]" + }, + { + "description": "An array of double attribute.", + "type": "string", + "const": "double[]" + }, + { + "description": "An array of boolean attribute.", + "type": "string", + "const": "boolean[]" + } + ] + }, + "Provenance": { + "description": "The provenance a semantic convention specification file.", + "type": "object", + "properties": { + "path": { + "description": "The path to the specification file.\n\nThis is the path is only available *locally*. When publishing resolved schemas,\nthis field is not included.", + "type": "string" + }, + "schema_url": { + "description": "The schema URL where this was specified.\n\nThe Schema url contains the registry id and the version of the schema.\nIt can be used to detect conflicts or resolve multiple \"ids\" existing across\ndependency chains but being the same thing, conceptually.", + "$ref": "#/$defs/SchemaUrl" + } + }, + "required": [ + "schema_url", + "path" + ] + }, + "RequirementLevel": { + "description": "The different requirement level specifications.", + "anyOf": [ + { + "description": "A basic requirement level.", + "$ref": "#/$defs/BasicRequirementLevelSpec" + }, + { + "description": "A conditional requirement level.", + "type": "object", + "properties": { + "conditionally_required": { + "description": "The description of the condition.", + "type": "string" + } + }, + "required": [ + "conditionally_required" + ] + }, + { + "description": "A recommended requirement level.", + "type": "object", + "properties": { + "recommended": { + "description": "The description of the recommendation.", + "type": "string" + } + }, + "required": [ + "recommended" + ] + }, + { + "description": "An opt in requirement level.", + "type": "object", + "properties": { + "opt_in": { + "description": "The description of the recommendation.", + "type": "string" + } + }, + "required": [ + "opt_in" + ] + } + ] + }, + "ResolvedGroup": { + "description": "Resolved group specification used in the context of the template engine.", + "type": "object", + "properties": { + "annotations": { + "description": "Annotations for the group.", + "type": [ + "object", + "null" + ], + "additionalProperties": { + "$ref": "#/$defs/YamlValue" + } + }, + "attributes": { + "description": "List of attributes that belong to the semantic convention.", + "type": "array", + "items": { + "$ref": "#/$defs/Attribute" + } + }, + "body": { + "description": "The body specification used for event semantic conventions.", + "anyOf": [ + { + "$ref": "#/$defs/AnyValueSpec" + }, + { + "type": "null" + } + ] + }, + "brief": { + "description": "A brief description of the semantic convention.", + "type": "string" + }, + "deprecated": { + "description": "Specifies if the semantic convention is deprecated. The string\nprovided as `description` MUST specify why it's deprecated and/or what\nto use instead. See also stability.", + "anyOf": [ + { + "$ref": "#/$defs/Deprecated" + }, + { + "type": "null" + } + ] + }, + "display_name": { + "description": "The readable name for attribute groups used when generating registry tables.", + "type": [ + "string", + "null" + ] + }, + "entity_associations": { + "description": "The associated entities of this group.", + "type": "array", + "items": { + "type": "string" + } + }, + "events": { + "description": "List of strings that specify the ids of event semantic conventions\nassociated with this span semantic convention.\nNote: only valid if type is span", + "type": "array", + "items": { + "type": "string" + } + }, + "extends": { + "description": "Reference another semantic convention id. It inherits\nall attributes defined in the specified semantic\nconvention.", + "type": [ + "string", + "null" + ] + }, + "id": { + "description": "The id that uniquely identifies the semantic convention.", + "type": "string" + }, + "instrument": { + "description": "The instrument type that should be used to record the metric. Note that\nthe semantic conventions must be written using the names of the\nsynchronous instrument types (counter, gauge, updowncounter and\nhistogram).\nFor more details: [Metrics semantic conventions - Instrument types](https://github.com/open-telemetry/opentelemetry-specification/tree/main/specification/metrics/semantic_conventions#instrument-types).\nNote: This field is required if type is metric.", + "anyOf": [ + { + "$ref": "#/$defs/InstrumentSpec" + }, + { + "type": "null" + } + ] + }, + "lineage": { + "description": "The lineage of the group.", + "anyOf": [ + { + "$ref": "#/$defs/GroupLineage" + }, + { + "type": "null" + } + ] + }, + "metric_name": { + "description": "The metric name as described by the [OpenTelemetry Specification](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/data-model.md#timeseries-model).\nNote: This field is required if type is metric.", + "type": [ + "string", + "null" + ] + }, + "name": { + "description": "The name of the event. If not specified, the prefix is used.\nIf prefix is empty (or unspecified), name is required.", + "type": [ + "string", + "null" + ] + }, + "note": { + "description": "A more elaborate description of the semantic convention.\nIt defaults to an empty string.", + "type": "string" + }, + "prefix": { + "description": "Prefix for the attributes for this semantic convention.\nIt defaults to an empty string.", + "type": "string" + }, + "span_kind": { + "description": "Specifies the kind of the span.\nNote: only valid if type is span", + "anyOf": [ + { + "$ref": "#/$defs/SpanKindSpec" + }, + { + "type": "null" + } + ] + }, + "stability": { + "description": "Specifies the stability of the semantic convention.\nNote that, if stability is missing but deprecated is present, it will\nautomatically set the stability to deprecated. If deprecated is\npresent and stability differs from deprecated, this will result in an\nerror.", + "anyOf": [ + { + "$ref": "#/$defs/Stability" + }, + { + "type": "null" + } + ] + }, + "type": { + "description": "The type of the group including the specific fields for each type.", + "$ref": "#/$defs/GroupType" + }, + "unit": { + "description": "The unit in which the metric is measured, which should adhere to the\n[guidelines](https://github.com/open-telemetry/opentelemetry-specification/tree/main/specification/metrics/semantic_conventions#instrument-units).\nNote: This field is required if type is metric.", + "type": [ + "string", + "null" + ] + } + }, + "required": [ + "id", + "type", + "brief" + ] + }, + "SchemaUrl": { + "description": "Represents the schema URL of a registry, which serves as a unique identifier for the registry\nalong with its version.", + "type": "object", + "properties": { + "url": { + "description": "The schema URL string.", + "type": "string" + } + }, + "required": [ + "url" + ] + }, + "SpanKindSpec": { + "description": "The span kind.", + "oneOf": [ + { + "description": "An internal span.", + "type": "string", + "const": "internal" + }, + { + "description": "A client span.", + "type": "string", + "const": "client" + }, + { + "description": "A server span.", + "type": "string", + "const": "server" + }, + { + "description": "A producer span.", + "type": "string", + "const": "producer" + }, + { + "description": "A consumer span.", + "type": "string", + "const": "consumer" + } + ] + }, + "Stability": { + "description": "The level of stability for a definition. Defined in [OTEP-232](https://github.com/open-telemetry/oteps/blob/main/text/0232-maturity-of-otel.md)", + "oneOf": [ + { + "description": "A deprecated definition.", + "type": "string", + "const": "deprecated", + "deprecated": true + }, + { + "description": "A stable definition.", + "type": "string", + "const": "stable" + }, + { + "description": "A definition in development. Formally known as experimental.", + "type": "string", + "const": "development" + }, + { + "description": "An alpha definition.", + "type": "string", + "const": "alpha" + }, + { + "description": "A beta definition.", + "type": "string", + "const": "beta" + }, + { + "description": "A release candidate definition.", + "type": "string", + "const": "release_candidate" + } + ] + }, + "Tags": { + "description": "A set of tags.\n\nExamples of tags:\n- sensitivity: pii\n- sensitivity: phi\n- data_classification: restricted\n- semantic_type: email\n- semantic_type: first_name\n- owner:\n- provenance: browser_sensor", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "TemplateTypeSpec": { + "description": "Template types.", + "oneOf": [ + { + "description": "A boolean attribute.", + "type": "string", + "const": "template[boolean]" + }, + { + "description": "A integer attribute.", + "type": "string", + "const": "template[int]" + }, + { + "description": "A double attribute.", + "type": "string", + "const": "template[double]" + }, + { + "description": "A string attribute.", + "type": "string", + "const": "template[string]" + }, + { + "description": "A any attribute.", + "type": "string", + "const": "template[any]" + }, + { + "description": "An array of strings attribute.", + "type": "string", + "const": "template[string[]]" + }, + { + "description": "An array of integer attribute.", + "type": "string", + "const": "template[int[]]" + }, + { + "description": "An array of double attribute.", + "type": "string", + "const": "template[double[]]" + }, + { + "description": "An array of boolean attribute.", + "type": "string", + "const": "template[boolean[]]" + } + ] + }, + "Value": { + "description": "The different types of values.", + "oneOf": [ + { + "description": "A integer value.", + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "Int" + }, + "value": { + "description": "The value", + "type": "integer", + "format": "int64" + } + }, + "required": [ + "type", + "value" + ] + }, + { + "description": "A double value.", + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "Double" + }, + "value": { + "description": "The value", + "type": "number", + "format": "double" + } + }, + "required": [ + "type", + "value" + ] + }, + { + "description": "A string value.", + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "String" + }, + "value": { + "description": "The value", + "type": "string" + } + }, + "required": [ + "type", + "value" + ] + } + ] + }, + "ValueSpec": { + "description": "The different types of values.", + "anyOf": [ + { + "description": "A integer value.", + "type": "integer", + "format": "int64" + }, + { + "description": "A double value.", + "type": "number", + "format": "double" + }, + { + "description": "A string value.", + "type": "string" + }, + { + "description": "A boolean value.", + "type": "boolean" + } + ] + }, + "YamlValue": { + "type": [ + "null", + "boolean", + "object", + "array", + "number", + "string" + ] + } + } +} \ No newline at end of file diff --git a/examples/chronos_ex/Cargo.toml b/examples/chronos_ex/Cargo.toml index 3f8d40a..4c17705 100644 --- a/examples/chronos_ex/Cargo.toml +++ b/examples/chronos_ex/Cargo.toml @@ -12,6 +12,9 @@ tokio.workspace = true futures.workspace = true chronos_bin={path="../../chronos_bin"} +chrono = "0.4.23" +uuid = { version = "1.3.0", features = ["v4", "fast-rng"] } +serde_json = "1.0.93" #tracing tracing.workspace = true diff --git a/examples/chronos_ex/examples/chronos_ex.rs b/examples/chronos_ex/examples/chronos_ex.rs index d1567b8..3b0ffdf 100644 --- a/examples/chronos_ex/examples/chronos_ex.rs +++ b/examples/chronos_ex/examples/chronos_ex.rs @@ -1,6 +1,7 @@ use chronos_bin::kafka::config::KafkaConfig; use chronos_bin::kafka::consumer::KafkaConsumer; use chronos_bin::kafka::producer::KafkaProducer; +use chronos_bin::metrics::ChronosMetrics; use chronos_bin::postgres::config::PgConfig; use chronos_bin::postgres::pg::Pg; use chronos_bin::runner::Runner; @@ -30,7 +31,7 @@ fn init_tracer() -> Result { if service_name.is_err() { std::env::set_var("OTEL_SERVICE_NAME", "chronos"); } - if trace_exporter.is_ok() { + if let Ok(trace_exporter) = trace_exporter { global::set_text_map_propagator(TraceContextPropagator::new()); let os_resource = OsResourceDetector.detect(Duration::from_secs(0)); let process_resource = ProcessResourceDetector.detect(Duration::from_secs(0)); @@ -39,7 +40,7 @@ fn init_tracer() -> Result { let telemetry_resource = TelemetryResourceDetector.detect(Duration::from_secs(0)); opentelemetry_otlp::new_pipeline() .tracing() - .with_exporter(opentelemetry_otlp::new_exporter().http().with_endpoint(format!("{:?}", service_name))) + .with_exporter(opentelemetry_otlp::new_exporter().http().with_endpoint(trace_exporter)) .with_trace_config( sdktrace::config().with_resource( os_resource @@ -54,10 +55,7 @@ fn init_tracer() -> Result { log::error!("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT not set"); // trace error - Err(TraceError::Other(Box::new(std::io::Error::new( - std::io::ErrorKind::Other, - "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT not set", - )))) + Err(TraceError::Other(Box::new(std::io::Error::other("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT not set")))) } } @@ -91,10 +89,13 @@ async fn main() { let kafka_producer = KafkaProducer::new(&kafka_config); let data_store = Pg::new(pg_config).await.unwrap(); + let metrics = Arc::new(ChronosMetrics::new().expect("Failed to initialize metrics registry")); + let r = Runner { data_store: Arc::new(data_store), producer: Arc::new(kafka_producer), consumer: Arc::new(kafka_consumer), + metrics, }; debug!("debug logs starting chronos"); diff --git a/examples/chronos_ex/examples/publish_test_message.rs b/examples/chronos_ex/examples/publish_test_message.rs new file mode 100644 index 0000000..8343ca1 --- /dev/null +++ b/examples/chronos_ex/examples/publish_test_message.rs @@ -0,0 +1,56 @@ +/// Publishes a single test message to the Chronos input Kafka topic. +/// +/// Required environment variables (same as the main Chronos service): +/// KAFKA_HOST, KAFKA_PORT, KAFKA_CLIENT_ID, KAFKA_GROUP_ID, +/// KAFKA_IN_TOPIC, KAFKA_OUT_TOPIC, KAFKA_USERNAME, KAFKA_PASSWORD +/// +/// Optional environment variables: +/// CHRONOS_DEADLINE RFC3339 timestamp for the message deadline. +/// Defaults to 1 minute in the past, which causes +/// Chronos to fire the message immediately and generate +/// observable msg_jitter metrics. +/// CHRONOS_MSG_ID Override the generated message UUID. +use chrono::{Duration, Utc}; +use chronos_bin::kafka::config::KafkaConfig; +use chronos_bin::kafka::producer::KafkaProducer; +use std::collections::HashMap; +use uuid::Uuid; + +#[tokio::main] +async fn main() { + env_logger::init(); + dotenv::dotenv().ok(); + + let msg_id = std::env::var("CHRONOS_MSG_ID").unwrap_or_else(|_| Uuid::new_v4().to_string()); + + // Default: 1 minute in the past so Chronos fires immediately (exercises jitter metrics). + // Override with a future timestamp to test the "store and delay" path. + let deadline = std::env::var("CHRONOS_DEADLINE").unwrap_or_else(|_| (Utc::now() - Duration::minutes(1)).to_rfc3339()); + + let payload = serde_json::json!({ + "source": "integration-test", + "message_id": msg_id, + "sent_at": Utc::now().to_rfc3339(), + }) + .to_string(); + + let mut headers = HashMap::new(); + headers.insert("chronosMessageId".to_string(), msg_id.clone()); + headers.insert("chronosDeadline".to_string(), deadline.clone()); + + println!("Publishing test message"); + println!(" id: {}", msg_id); + println!(" deadline: {}", deadline); + println!(" payload: {}", payload); + + let kafka_config = KafkaConfig::from_env(); + let producer = KafkaProducer::new(&kafka_config); + + match producer.kafka_publish(payload, Some(headers), msg_id.clone()).await { + Ok(published) => println!("โœ“ Published successfully (returned id: {})", published.id), + Err(e) => { + eprintln!("โœ— Failed to publish: {}", e); + std::process::exit(1); + } + } +} diff --git a/examples/chronos_ex/examples/telemetry_simple.rs b/examples/chronos_ex/examples/telemetry_simple.rs index 25faded..5131a25 100644 --- a/examples/chronos_ex/examples/telemetry_simple.rs +++ b/examples/chronos_ex/examples/telemetry_simple.rs @@ -1,8 +1,7 @@ use opentelemetry::trace::TracerProvider as _; -use opentelemetry_otlp::ExportConfig; + use opentelemetry_sdk::{runtime::Tokio, trace::TracerProvider}; use tracing::{info_span, instrument}; -use tracing_subscriber::prelude::*; use tokio::time::Duration; @@ -122,9 +121,9 @@ async fn main() { let handler = tokio::task::spawn(async { println!("this is spawning"); - // let runner = Runner {}; - // runner.run(); - // runner.run_sub_db().await; + let runner = Runner {}; + runner.run(); + runner.run_sub_db().await; let mut count = 0; loop { count += 1; diff --git a/examples/chronos_ex/examples/tracing_example.rs b/examples/chronos_ex/examples/tracing_example.rs index 358d738..d8c5866 100644 --- a/examples/chronos_ex/examples/tracing_example.rs +++ b/examples/chronos_ex/examples/tracing_example.rs @@ -39,7 +39,7 @@ pub fn shave(yak: usize) -> Result<(), Box> { // note that this is intended to demonstrate `tracing`'s features, not idiomatic // error handling! in a library or application, you should consider returning // a dedicated `YakError`. libraries like snafu or thiserror make this easy. - return Err(io::Error::new(io::ErrorKind::Other, "shaving yak failed!").into()); + return Err(io::Error::other("shaving yak failed!").into()); } else { debug!("yak shaved successfully"); } diff --git a/examples/prom_otlp_mock.rs b/examples/prom_otlp_mock.rs new file mode 100644 index 0000000..426edb5 --- /dev/null +++ b/examples/prom_otlp_mock.rs @@ -0,0 +1,148 @@ +//! Mock runner for exercising Chronos metrics through Prometheus or OTLP. +//! +//! This intentionally uses the production `ChronosMetrics` facade so Weaver +//! live checks validate the same generated metric definitions as the runtime. + +use std::env; +use std::sync::Arc; +use std::time::Duration; + +use chronos_bin::metrics::ChronosMetrics; + +const OTEL_METRICS_EXPORTER: &str = "OTEL_METRICS_EXPORTER"; +const OTEL_METRIC_EXPORT_INTERVAL: &str = "OTEL_METRIC_EXPORT_INTERVAL"; +const OTEL_EXPORTER_PROMETHEUS_HOST: &str = "OTEL_EXPORTER_PROMETHEUS_HOST"; +const OTEL_EXPORTER_PROMETHEUS_PORT: &str = "OTEL_EXPORTER_PROMETHEUS_PORT"; + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum MetricsExporter { + Prometheus, + Otlp, +} + +impl MetricsExporter { + fn from_env() -> Result> { + match env::var(OTEL_METRICS_EXPORTER).unwrap_or_else(|_| "prometheus".to_string()).as_str() { + "prometheus" => Ok(Self::Prometheus), + "otlp" => Ok(Self::Otlp), + "none" => Err("metrics exporter disabled by OTEL_METRICS_EXPORTER=none".into()), + other => Err(format!("unsupported {OTEL_METRICS_EXPORTER} value: {other}").into()), + } + } +} + +struct MockRuntimeConfig { + interval: Duration, + prometheus_host: String, + prometheus_port: u16, +} + +impl MockRuntimeConfig { + fn from_env() -> Result> { + Ok(Self { + interval: env_duration_ms(OTEL_METRIC_EXPORT_INTERVAL, 1_000)?, + prometheus_host: env::var(OTEL_EXPORTER_PROMETHEUS_HOST).unwrap_or_else(|_| "127.0.0.1".to_string()), + prometheus_port: env::var(OTEL_EXPORTER_PROMETHEUS_PORT) + .unwrap_or_else(|_| "9092".to_string()) + .parse() + .map_err(|err| format!("invalid {OTEL_EXPORTER_PROMETHEUS_PORT}: {err}"))?, + }) + } +} + +fn env_duration_ms(name: &'static str, default_ms: u64) -> Result> { + let millis = env::var(name) + .unwrap_or_else(|_| default_ms.to_string()) + .parse() + .map_err(|err| format!("invalid {name}: {err}"))?; + Ok(Duration::from_millis(millis)) +} + +async fn spawn_prometheus_server( + metrics: Arc, + host: String, + port: u16, +) -> Result, Box> { + use tokio::io::{AsyncReadExt, AsyncWriteExt}; + + let listener = tokio::net::TcpListener::bind(format!("{host}:{port}")).await?; + eprintln!("Prometheus metrics mock listening on http://{host}:{port}/metrics"); + + Ok(tokio::spawn(async move { + loop { + let Ok((mut stream, _)) = listener.accept().await else { + continue; + }; + let metrics = Arc::clone(&metrics); + tokio::spawn(async move { + let mut request = [0_u8; 1024]; + let bytes_read = stream.read(&mut request).await.unwrap_or(0); + let request_line = String::from_utf8_lossy(&request[..bytes_read]); + let (status, body) = if request_line.starts_with("GET /metrics ") { + ("200 OK", metrics.render_prometheus().unwrap_or_default()) + } else { + ("404 Not Found", "not found\n".to_string()) + }; + let response = format!( + "HTTP/1.1 {status}\r\ncontent-type: text/plain; version=0.0.4; charset=utf-8\r\ncontent-length: {}\r\nconnection: close\r\n\r\n{body}", + body.len() + ); + let _ = stream.write_all(response.as_bytes()).await; + }); + } + })) +} + +async fn run_workload(metrics: Arc, config: &MockRuntimeConfig) { + let mut cycle = 0_u64; + loop { + cycle += 1; + + let consume_destination = if cycle.is_multiple_of(2) { "kafka" } else { "postgres" }; + let consume_status = if cycle.is_multiple_of(5) { "fail" } else { "pass" }; + let process_returned = cycle.is_multiple_of(3); + let process_status = if cycle.is_multiple_of(7) { "fail" } else { "pass" }; + let duration_seconds = 0.005 + ((cycle % 20) as f64 * 0.0025); + + metrics.observe_consume_latency(duration_seconds, consume_destination, consume_status); + metrics.observe_process_latency(duration_seconds * 1.5, process_returned, process_status); + metrics.observe_wait_time(0.1 + ((cycle % 10) as f64 * 0.05)); + metrics.observe_jitter(0.01 + ((cycle % 10) as f64 * 0.025)); + metrics.messages_reset(1); + + tokio::time::sleep(config.interval).await; + } +} + +#[tokio::main] +async fn main() -> Result<(), Box> { + let exporter = MetricsExporter::from_env()?; + let config = MockRuntimeConfig::from_env()?; + let metrics = Arc::new(ChronosMetrics::from_env()?); + + let prometheus_server = if exporter == MetricsExporter::Prometheus { + let metrics_for_server = Arc::clone(&metrics); + Some(spawn_prometheus_server(metrics_for_server, config.prometheus_host.clone(), config.prometheus_port).await?) + } else { + None + }; + + eprintln!("Metrics mock running until interrupted"); + + tokio::select! { + _ = run_workload(Arc::clone(&metrics), &config) => {} + result = tokio::signal::ctrl_c() => { + result?; + } + } + + if exporter == MetricsExporter::Otlp { + tokio::time::sleep(Duration::from_secs(2)).await; + } + + metrics.shutdown(); + if let Some(server) = prometheus_server { + server.abort(); + } + Ok(()) +} diff --git a/examples/prom_otlp_mock_runner/Cargo.toml b/examples/prom_otlp_mock_runner/Cargo.toml new file mode 100644 index 0000000..53717b7 --- /dev/null +++ b/examples/prom_otlp_mock_runner/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "prom_otlp_mock_runner" +version = "0.0.0" +edition = "2021" + +[[bin]] +name = "prom_otlp_mock" +path = "../prom_otlp_mock.rs" + +[dependencies] +chronos_bin = { path = "../../chronos_bin" } +tokio.workspace = true diff --git a/examples/weaver/generated/chronos_metric_definitions.rs b/examples/weaver/generated/chronos_metric_definitions.rs new file mode 100644 index 0000000..f849fb9 --- /dev/null +++ b/examples/weaver/generated/chronos_metric_definitions.rs @@ -0,0 +1,104 @@ +// Generated from examples/weaver/registry/chronos/metrics.yaml by OpenTelemetry Weaver. +// Do not edit by hand. + +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub enum MetricId { + MsgConsumeLatency, + MsgJitter, + MsgProcessLatency, + MsgReset, + MsgWaitTime, +} + +#[derive(Clone, Copy, Debug)] +pub enum MetricKind { + Counter, + Histogram, +} + +impl MetricKind { + pub fn is_counter(self) -> bool { + matches!(self, Self::Counter) + } + + pub fn is_histogram(self) -> bool { + matches!(self, Self::Histogram) + } +} + +#[derive(Clone, Copy, Debug)] +pub struct MetricDefinition { + pub id: MetricId, + pub otel_name: &'static str, + pub prometheus_name: &'static str, + pub description: &'static str, + pub unit: Option<&'static str>, + pub label_names: &'static [&'static str], + pub prometheus_label_names: &'static [&'static str], + pub kind: MetricKind, + pub buckets: Option<&'static [f64]>, + pub prewarm_label_values: &'static [&'static [&'static str]], +} + +pub const METRIC_DEFINITIONS: &[MetricDefinition] = &[ + MetricDefinition { + id: MetricId::MsgConsumeLatency, + otel_name: "chronos.message.consume.duration", + prometheus_name: "msg_consume_latency", + description: "Duration of handle_message() in message_receiver.", + unit: Some("s"), + label_names: &["destination", "status"], + prometheus_label_names: &["destination", "status"], + kind: MetricKind::Histogram, + buckets: Some(&[0.001, 0.002, 0.004, 0.008, 0.016, 0.032, 0.064, 0.128, 0.256, 0.512, 1.024, 2.048]), + prewarm_label_values: &[&["kafka", "pass"], &["kafka", "fail"], &["postgres", "pass"], &["postgres", "fail"]], + }, + MetricDefinition { + id: MetricId::MsgJitter, + otel_name: "chronos.message.jitter", + prometheus_name: "msg_jitter", + description: "Difference between actual publish time and client-requested deadline.", + unit: Some("s"), + label_names: &[], + prometheus_label_names: &[], + kind: MetricKind::Histogram, + buckets: Some(&[0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]), + prewarm_label_values: &[], + }, + MetricDefinition { + id: MetricId::MsgProcessLatency, + otel_name: "chronos.message.process.duration", + prometheus_name: "msg_process_latency", + description: "Duration of processor_message_ready() loop in message_processor.", + unit: Some("s"), + label_names: &["returned", "status"], + prometheus_label_names: &["returned", "status"], + kind: MetricKind::Histogram, + buckets: Some(&[0.001, 0.002, 0.004, 0.008, 0.016, 0.032, 0.064, 0.128, 0.256, 0.512, 1.024, 2.048]), + prewarm_label_values: &[&["true", "pass"], &["true", "fail"], &["false", "pass"], &["false", "fail"]], + }, + MetricDefinition { + id: MetricId::MsgReset, + otel_name: "chronos.message.reset", + prometheus_name: "msg_reset", + description: "Number of records reset by reset_to_init_db() in the monitor task.", + unit: Some("{message}"), + label_names: &[], + prometheus_label_names: &[], + kind: MetricKind::Counter, + buckets: None, + prewarm_label_values: &[], + }, + MetricDefinition { + id: MetricId::MsgWaitTime, + otel_name: "chronos.message.wait.duration", + prometheus_name: "msg_wait_time", + description: "Time a message spent in the Kafka input queue before processing.", + unit: Some("s"), + label_names: &[], + prometheus_label_names: &[], + kind: MetricKind::Histogram, + buckets: Some(&[0.1, 0.2, 0.4, 0.8, 1.6, 3.2, 6.4, 12.8, 25.6, 51.2, 102.4, 204.8, 409.6, 819.2]), + prewarm_label_values: &[], + }, +]; diff --git a/examples/weaver/generated/chronos_metrics.md b/examples/weaver/generated/chronos_metrics.md new file mode 100644 index 0000000..123cbe5 --- /dev/null +++ b/examples/weaver/generated/chronos_metrics.md @@ -0,0 +1,11 @@ +# Chronos Metrics + +Generated from `examples/weaver/registry/chronos/metrics.yaml` by OpenTelemetry Weaver. + +| Metric | Prometheus Name | Instrument | Unit | Attributes | Description | +| --- | --- | --- | --- | --- | --- | +| `chronos.message.consume.duration` | `msg_consume_latency` | `histogram` | `s` | `destination`, `status` | Duration of handle_message() in message_receiver. | +| `chronos.message.jitter` | `msg_jitter` | `histogram` | `s` | - | Difference between actual publish time and client-requested deadline. | +| `chronos.message.process.duration` | `msg_process_latency` | `histogram` | `s` | `returned`, `status` | Duration of processor_message_ready() loop in message_processor. | +| `chronos.message.reset` | `msg_reset` | `counter` | `{message}` | - | Number of records reset by reset_to_init_db() in the monitor task. | +| `chronos.message.wait.duration` | `msg_wait_time` | `histogram` | `s` | - | Time a message spent in the Kafka input queue before processing. | diff --git a/examples/weaver/generated/resolved-registry.schema.json b/examples/weaver/generated/resolved-registry.schema.json new file mode 100644 index 0000000..081f975 --- /dev/null +++ b/examples/weaver/generated/resolved-registry.schema.json @@ -0,0 +1,1830 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "ResolvedRegistry", + "description": "A resolved semantic convention registry used in the context of the template and policy\nengines.", + "type": "object", + "properties": { + "groups": { + "description": "A list of semantic convention groups.", + "type": "array", + "items": { + "$ref": "#/$defs/ResolvedGroup" + } + }, + "registry_url": { + "description": "The semantic convention registry url.", + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "registry_url", + "groups" + ], + "$defs": { + "AnyValueSpec": { + "description": "The AnyValueTypeSpec is a specification of a value that can be of any type.", + "oneOf": [ + { + "description": "A boolean attribute.", + "type": "object", + "properties": { + "brief": { + "description": "A brief description of the value", + "type": "string" + }, + "examples": { + "description": "Sequence of examples for the value or single example\nvalue. If only a single example is provided, it can\ndirectly be reported without encapsulating it\ninto a sequence/dictionary.", + "anyOf": [ + { + "$ref": "#/$defs/Examples" + }, + { + "type": "null" + } + ] + }, + "id": { + "description": "String that uniquely identifies the enum entry.", + "type": "string" + }, + "note": { + "description": "A more elaborate description of the value.\nIt defaults to an empty string.", + "type": "string" + }, + "requirement_level": { + "description": "Specifies if the field is mandatory. Can be \"required\",\n\"conditionally_required\", \"recommended\" or \"opt_in\". When omitted,\nthe field is \"recommended\". When set to\n\"conditionally_required\", the string provided as MUST\nspecify the conditions under which the field is required.", + "$ref": "#/$defs/RequirementLevel" + }, + "stability": { + "description": "Specifies the stability of the value.", + "anyOf": [ + { + "$ref": "#/$defs/Stability" + }, + { + "type": "null" + } + ] + }, + "type": { + "type": "string", + "const": "boolean" + } + }, + "required": [ + "type", + "id", + "requirement_level" + ] + }, + { + "description": "A integer attribute (signed 64 bit integer).", + "type": "object", + "properties": { + "brief": { + "description": "A brief description of the value", + "type": "string" + }, + "examples": { + "description": "Sequence of examples for the value or single example\nvalue. If only a single example is provided, it can\ndirectly be reported without encapsulating it\ninto a sequence/dictionary.", + "anyOf": [ + { + "$ref": "#/$defs/Examples" + }, + { + "type": "null" + } + ] + }, + "id": { + "description": "String that uniquely identifies the enum entry.", + "type": "string" + }, + "note": { + "description": "A more elaborate description of the value.\nIt defaults to an empty string.", + "type": "string" + }, + "requirement_level": { + "description": "Specifies if the field is mandatory. Can be \"required\",\n\"conditionally_required\", \"recommended\" or \"opt_in\". When omitted,\nthe field is \"recommended\". When set to\n\"conditionally_required\", the string provided as MUST\nspecify the conditions under which the field is required.", + "$ref": "#/$defs/RequirementLevel" + }, + "stability": { + "description": "Specifies the stability of the value.", + "anyOf": [ + { + "$ref": "#/$defs/Stability" + }, + { + "type": "null" + } + ] + }, + "type": { + "type": "string", + "const": "int" + } + }, + "required": [ + "type", + "id", + "requirement_level" + ] + }, + { + "description": "A double attribute (double precision floating point (IEEE 754-1985)).", + "type": "object", + "properties": { + "brief": { + "description": "A brief description of the value", + "type": "string" + }, + "examples": { + "description": "Sequence of examples for the value or single example\nvalue. If only a single example is provided, it can\ndirectly be reported without encapsulating it\ninto a sequence/dictionary.", + "anyOf": [ + { + "$ref": "#/$defs/Examples" + }, + { + "type": "null" + } + ] + }, + "id": { + "description": "String that uniquely identifies the enum entry.", + "type": "string" + }, + "note": { + "description": "A more elaborate description of the value.\nIt defaults to an empty string.", + "type": "string" + }, + "requirement_level": { + "description": "Specifies if the field is mandatory. Can be \"required\",\n\"conditionally_required\", \"recommended\" or \"opt_in\". When omitted,\nthe field is \"recommended\". When set to\n\"conditionally_required\", the string provided as MUST\nspecify the conditions under which the field is required.", + "$ref": "#/$defs/RequirementLevel" + }, + "stability": { + "description": "Specifies the stability of the value.", + "anyOf": [ + { + "$ref": "#/$defs/Stability" + }, + { + "type": "null" + } + ] + }, + "type": { + "type": "string", + "const": "double" + } + }, + "required": [ + "type", + "id", + "requirement_level" + ] + }, + { + "description": "A string attribute.", + "type": "object", + "properties": { + "brief": { + "description": "A brief description of the value", + "type": "string" + }, + "examples": { + "description": "Sequence of examples for the value or single example\nvalue. If only a single example is provided, it can\ndirectly be reported without encapsulating it\ninto a sequence/dictionary.", + "anyOf": [ + { + "$ref": "#/$defs/Examples" + }, + { + "type": "null" + } + ] + }, + "id": { + "description": "String that uniquely identifies the enum entry.", + "type": "string" + }, + "note": { + "description": "A more elaborate description of the value.\nIt defaults to an empty string.", + "type": "string" + }, + "requirement_level": { + "description": "Specifies if the field is mandatory. Can be \"required\",\n\"conditionally_required\", \"recommended\" or \"opt_in\". When omitted,\nthe field is \"recommended\". When set to\n\"conditionally_required\", the string provided as MUST\nspecify the conditions under which the field is required.", + "$ref": "#/$defs/RequirementLevel" + }, + "stability": { + "description": "Specifies the stability of the value.", + "anyOf": [ + { + "$ref": "#/$defs/Stability" + }, + { + "type": "null" + } + ] + }, + "type": { + "type": "string", + "const": "string" + } + }, + "required": [ + "type", + "id", + "requirement_level" + ] + }, + { + "description": "An array of strings attribute.", + "type": "object", + "properties": { + "brief": { + "description": "A brief description of the value", + "type": "string" + }, + "examples": { + "description": "Sequence of examples for the value or single example\nvalue. If only a single example is provided, it can\ndirectly be reported without encapsulating it\ninto a sequence/dictionary.", + "anyOf": [ + { + "$ref": "#/$defs/Examples" + }, + { + "type": "null" + } + ] + }, + "id": { + "description": "String that uniquely identifies the enum entry.", + "type": "string" + }, + "note": { + "description": "A more elaborate description of the value.\nIt defaults to an empty string.", + "type": "string" + }, + "requirement_level": { + "description": "Specifies if the field is mandatory. Can be \"required\",\n\"conditionally_required\", \"recommended\" or \"opt_in\". When omitted,\nthe field is \"recommended\". When set to\n\"conditionally_required\", the string provided as MUST\nspecify the conditions under which the field is required.", + "$ref": "#/$defs/RequirementLevel" + }, + "stability": { + "description": "Specifies the stability of the value.", + "anyOf": [ + { + "$ref": "#/$defs/Stability" + }, + { + "type": "null" + } + ] + }, + "type": { + "type": "string", + "const": "string[]" + } + }, + "required": [ + "type", + "id", + "requirement_level" + ] + }, + { + "description": "An array of integer attribute.", + "type": "object", + "properties": { + "brief": { + "description": "A brief description of the value", + "type": "string" + }, + "examples": { + "description": "Sequence of examples for the value or single example\nvalue. If only a single example is provided, it can\ndirectly be reported without encapsulating it\ninto a sequence/dictionary.", + "anyOf": [ + { + "$ref": "#/$defs/Examples" + }, + { + "type": "null" + } + ] + }, + "id": { + "description": "String that uniquely identifies the enum entry.", + "type": "string" + }, + "note": { + "description": "A more elaborate description of the value.\nIt defaults to an empty string.", + "type": "string" + }, + "requirement_level": { + "description": "Specifies if the field is mandatory. Can be \"required\",\n\"conditionally_required\", \"recommended\" or \"opt_in\". When omitted,\nthe field is \"recommended\". When set to\n\"conditionally_required\", the string provided as MUST\nspecify the conditions under which the field is required.", + "$ref": "#/$defs/RequirementLevel" + }, + "stability": { + "description": "Specifies the stability of the value.", + "anyOf": [ + { + "$ref": "#/$defs/Stability" + }, + { + "type": "null" + } + ] + }, + "type": { + "type": "string", + "const": "int[]" + } + }, + "required": [ + "type", + "id", + "requirement_level" + ] + }, + { + "description": "An array of double attribute.", + "type": "object", + "properties": { + "brief": { + "description": "A brief description of the value", + "type": "string" + }, + "examples": { + "description": "Sequence of examples for the value or single example\nvalue. If only a single example is provided, it can\ndirectly be reported without encapsulating it\ninto a sequence/dictionary.", + "anyOf": [ + { + "$ref": "#/$defs/Examples" + }, + { + "type": "null" + } + ] + }, + "id": { + "description": "String that uniquely identifies the enum entry.", + "type": "string" + }, + "note": { + "description": "A more elaborate description of the value.\nIt defaults to an empty string.", + "type": "string" + }, + "requirement_level": { + "description": "Specifies if the field is mandatory. Can be \"required\",\n\"conditionally_required\", \"recommended\" or \"opt_in\". When omitted,\nthe field is \"recommended\". When set to\n\"conditionally_required\", the string provided as MUST\nspecify the conditions under which the field is required.", + "$ref": "#/$defs/RequirementLevel" + }, + "stability": { + "description": "Specifies the stability of the value.", + "anyOf": [ + { + "$ref": "#/$defs/Stability" + }, + { + "type": "null" + } + ] + }, + "type": { + "type": "string", + "const": "double[]" + } + }, + "required": [ + "type", + "id", + "requirement_level" + ] + }, + { + "description": "An array of boolean attribute.", + "type": "object", + "properties": { + "brief": { + "description": "A brief description of the value", + "type": "string" + }, + "examples": { + "description": "Sequence of examples for the value or single example\nvalue. If only a single example is provided, it can\ndirectly be reported without encapsulating it\ninto a sequence/dictionary.", + "anyOf": [ + { + "$ref": "#/$defs/Examples" + }, + { + "type": "null" + } + ] + }, + "id": { + "description": "String that uniquely identifies the enum entry.", + "type": "string" + }, + "note": { + "description": "A more elaborate description of the value.\nIt defaults to an empty string.", + "type": "string" + }, + "requirement_level": { + "description": "Specifies if the field is mandatory. Can be \"required\",\n\"conditionally_required\", \"recommended\" or \"opt_in\". When omitted,\nthe field is \"recommended\". When set to\n\"conditionally_required\", the string provided as MUST\nspecify the conditions under which the field is required.", + "$ref": "#/$defs/RequirementLevel" + }, + "stability": { + "description": "Specifies the stability of the value.", + "anyOf": [ + { + "$ref": "#/$defs/Stability" + }, + { + "type": "null" + } + ] + }, + "type": { + "type": "string", + "const": "boolean[]" + } + }, + "required": [ + "type", + "id", + "requirement_level" + ] + }, + { + "description": "The value type is a map of key, value pairs", + "type": "object", + "properties": { + "brief": { + "description": "A brief description of the value", + "type": "string" + }, + "examples": { + "description": "Sequence of examples for the value or single example\nvalue. If only a single example is provided, it can\ndirectly be reported without encapsulating it\ninto a sequence/dictionary.", + "anyOf": [ + { + "$ref": "#/$defs/Examples" + }, + { + "type": "null" + } + ] + }, + "fields": { + "description": "The collection of key, values where the value is an `AnyValueSpec`", + "type": "array", + "items": { + "$ref": "#/$defs/AnyValueSpec" + } + }, + "id": { + "description": "String that uniquely identifies the enum entry.", + "type": "string" + }, + "note": { + "description": "A more elaborate description of the value.\nIt defaults to an empty string.", + "type": "string" + }, + "requirement_level": { + "description": "Specifies if the field is mandatory. Can be \"required\",\n\"conditionally_required\", \"recommended\" or \"opt_in\". When omitted,\nthe field is \"recommended\". When set to\n\"conditionally_required\", the string provided as MUST\nspecify the conditions under which the field is required.", + "$ref": "#/$defs/RequirementLevel" + }, + "stability": { + "description": "Specifies the stability of the value.", + "anyOf": [ + { + "$ref": "#/$defs/Stability" + }, + { + "type": "null" + } + ] + }, + "type": { + "type": "string", + "const": "map" + } + }, + "required": [ + "type", + "id", + "requirement_level", + "fields" + ] + }, + { + "description": "The value type is a map of key, value pairs", + "type": "object", + "properties": { + "brief": { + "description": "A brief description of the value", + "type": "string" + }, + "examples": { + "description": "Sequence of examples for the value or single example\nvalue. If only a single example is provided, it can\ndirectly be reported without encapsulating it\ninto a sequence/dictionary.", + "anyOf": [ + { + "$ref": "#/$defs/Examples" + }, + { + "type": "null" + } + ] + }, + "fields": { + "description": "The collection of key, values where the value is an `AnyValueSpec`", + "type": "array", + "items": { + "$ref": "#/$defs/AnyValueSpec" + } + }, + "id": { + "description": "String that uniquely identifies the enum entry.", + "type": "string" + }, + "note": { + "description": "A more elaborate description of the value.\nIt defaults to an empty string.", + "type": "string" + }, + "requirement_level": { + "description": "Specifies if the field is mandatory. Can be \"required\",\n\"conditionally_required\", \"recommended\" or \"opt_in\". When omitted,\nthe field is \"recommended\". When set to\n\"conditionally_required\", the string provided as MUST\nspecify the conditions under which the field is required.", + "$ref": "#/$defs/RequirementLevel" + }, + "stability": { + "description": "Specifies the stability of the value.", + "anyOf": [ + { + "$ref": "#/$defs/Stability" + }, + { + "type": "null" + } + ] + }, + "type": { + "type": "string", + "const": "map[]" + } + }, + "required": [ + "type", + "id", + "requirement_level", + "fields" + ] + }, + { + "description": "The value type will just be a bytes.", + "type": "object", + "properties": { + "brief": { + "description": "A brief description of the value", + "type": "string" + }, + "examples": { + "description": "Sequence of examples for the value or single example\nvalue. If only a single example is provided, it can\ndirectly be reported without encapsulating it\ninto a sequence/dictionary.", + "anyOf": [ + { + "$ref": "#/$defs/Examples" + }, + { + "type": "null" + } + ] + }, + "id": { + "description": "String that uniquely identifies the enum entry.", + "type": "string" + }, + "note": { + "description": "A more elaborate description of the value.\nIt defaults to an empty string.", + "type": "string" + }, + "requirement_level": { + "description": "Specifies if the field is mandatory. Can be \"required\",\n\"conditionally_required\", \"recommended\" or \"opt_in\". When omitted,\nthe field is \"recommended\". When set to\n\"conditionally_required\", the string provided as MUST\nspecify the conditions under which the field is required.", + "$ref": "#/$defs/RequirementLevel" + }, + "stability": { + "description": "Specifies the stability of the value.", + "anyOf": [ + { + "$ref": "#/$defs/Stability" + }, + { + "type": "null" + } + ] + }, + "type": { + "type": "string", + "const": "bytes" + } + }, + "required": [ + "type", + "id", + "requirement_level" + ] + }, + { + "description": "The value type is not specified.", + "type": "object", + "properties": { + "brief": { + "description": "A brief description of the value", + "type": "string" + }, + "examples": { + "description": "Sequence of examples for the value or single example\nvalue. If only a single example is provided, it can\ndirectly be reported without encapsulating it\ninto a sequence/dictionary.", + "anyOf": [ + { + "$ref": "#/$defs/Examples" + }, + { + "type": "null" + } + ] + }, + "id": { + "description": "String that uniquely identifies the enum entry.", + "type": "string" + }, + "note": { + "description": "A more elaborate description of the value.\nIt defaults to an empty string.", + "type": "string" + }, + "requirement_level": { + "description": "Specifies if the field is mandatory. Can be \"required\",\n\"conditionally_required\", \"recommended\" or \"opt_in\". When omitted,\nthe field is \"recommended\". When set to\n\"conditionally_required\", the string provided as MUST\nspecify the conditions under which the field is required.", + "$ref": "#/$defs/RequirementLevel" + }, + "stability": { + "description": "Specifies the stability of the value.", + "anyOf": [ + { + "$ref": "#/$defs/Stability" + }, + { + "type": "null" + } + ] + }, + "type": { + "type": "string", + "const": "undefined" + } + }, + "required": [ + "type", + "id", + "requirement_level" + ] + }, + { + "description": "An enum definition type.", + "type": "object", + "properties": { + "brief": { + "description": "A brief description of the value", + "type": "string" + }, + "examples": { + "description": "Sequence of examples for the value or single example\nvalue. If only a single example is provided, it can\ndirectly be reported without encapsulating it\ninto a sequence/dictionary.", + "anyOf": [ + { + "$ref": "#/$defs/Examples" + }, + { + "type": "null" + } + ] + }, + "id": { + "description": "String that uniquely identifies the enum entry.", + "type": "string" + }, + "members": { + "description": "List of enum entries.", + "type": "array", + "items": { + "$ref": "#/$defs/EnumEntriesSpec" + } + }, + "note": { + "description": "A more elaborate description of the value.\nIt defaults to an empty string.", + "type": "string" + }, + "requirement_level": { + "description": "Specifies if the field is mandatory. Can be \"required\",\n\"conditionally_required\", \"recommended\" or \"opt_in\". When omitted,\nthe field is \"recommended\". When set to\n\"conditionally_required\", the string provided as MUST\nspecify the conditions under which the field is required.", + "$ref": "#/$defs/RequirementLevel" + }, + "stability": { + "description": "Specifies the stability of the value.", + "anyOf": [ + { + "$ref": "#/$defs/Stability" + }, + { + "type": "null" + } + ] + }, + "type": { + "type": "string", + "const": "enum" + } + }, + "required": [ + "type", + "id", + "requirement_level", + "members" + ] + } + ] + }, + "Attribute": { + "description": "An attribute definition.", + "type": "object", + "properties": { + "annotations": { + "description": "Annotations for the group.", + "type": [ + "object", + "null" + ], + "additionalProperties": { + "$ref": "#/$defs/YamlValue" + } + }, + "brief": { + "description": "A brief description of the attribute.", + "type": "string" + }, + "deprecated": { + "description": "Specifies if the attribute is deprecated.", + "anyOf": [ + { + "$ref": "#/$defs/Deprecated" + }, + { + "type": "null" + } + ] + }, + "examples": { + "description": "Sequence of example values for the attribute or single example\nvalue. They are required only for string and string array\nattributes. Example values must be of the same type of the\nattribute. If only a single example is provided, it can directly\nbe reported without encapsulating it into a sequence/dictionary.", + "anyOf": [ + { + "$ref": "#/$defs/Examples" + }, + { + "type": "null" + } + ] + }, + "name": { + "description": "Attribute name.", + "type": "string" + }, + "note": { + "description": "A more elaborate description of the attribute.\nIt defaults to an empty string.", + "type": "string" + }, + "prefix": { + "description": "Specifies the prefix of the attribute.\nIf this parameter is set, the resolved id of the referenced attribute will\nhave group prefix added to it.\nIt defaults to false.", + "type": "boolean" + }, + "requirement_level": { + "description": "Specifies if the attribute is mandatory. Can be \"required\",\n\"conditionally_required\", \"recommended\" or \"opt_in\". When omitted,\nthe attribute is \"recommended\". When set to\n\"conditionally_required\", the string provided as MUST\nspecify the conditions under which the attribute is required.", + "$ref": "#/$defs/RequirementLevel" + }, + "role": { + "description": "Whether the attribute is identifying or descriptive.", + "anyOf": [ + { + "$ref": "#/$defs/AttributeRole" + }, + { + "type": "null" + } + ] + }, + "sampling_relevant": { + "description": "Specifies if the attribute is (especially) relevant for sampling\nand thus should be set at span start. It defaults to false.\nNote: this field is experimental.", + "type": [ + "boolean", + "null" + ] + }, + "stability": { + "description": "Specifies the stability of the attribute.\nNote that, if stability is missing but deprecated is present, it will\nautomatically set the stability to deprecated. If deprecated is\npresent and stability differs from deprecated, this will result in an\nerror.", + "anyOf": [ + { + "$ref": "#/$defs/Stability" + }, + { + "type": "null" + } + ] + }, + "tag": { + "description": "Associates a tag (\"sub-group\") to the attribute. It carries no\nparticular semantic meaning but can be used e.g. for filtering\nin the markdown generator.", + "type": [ + "string", + "null" + ] + }, + "tags": { + "description": "A set of tags for the attribute.", + "anyOf": [ + { + "$ref": "#/$defs/Tags" + }, + { + "type": "null" + } + ] + }, + "type": { + "description": "Either a string literal denoting the type as a primitive or an\narray type, a template type or an enum definition.", + "$ref": "#/$defs/AttributeType" + }, + "value": { + "description": "The value of the attribute.\nNote: This is only used in a telemetry schema specification.", + "anyOf": [ + { + "$ref": "#/$defs/Value" + }, + { + "type": "null" + } + ] + } + }, + "additionalProperties": false, + "required": [ + "name", + "type", + "brief", + "requirement_level" + ] + }, + "AttributeLineage": { + "description": "Attribute lineage (at the field level).", + "type": "object", + "properties": { + "inherited_fields": { + "description": "A list of fields that are inherited from the source group.", + "type": "array", + "items": { + "type": "string" + }, + "uniqueItems": true + }, + "locally_overridden_fields": { + "description": "A list of fields that are overridden in the local group.", + "type": "array", + "items": { + "type": "string" + }, + "uniqueItems": true + }, + "source_group": { + "description": "The group id where the attribute is coming from.", + "type": "string" + } + }, + "required": [ + "source_group" + ] + }, + "AttributeRole": { + "description": "The different roles for attributes in groups.", + "oneOf": [ + { + "description": "The attribute is considered identifying for the signal it is associated with.", + "type": "string", + "const": "identifying" + }, + { + "description": "The attribute is considered descriptive for the signal it is associated with.", + "type": "string", + "const": "descriptive" + } + ] + }, + "AttributeType": { + "description": "The different types of attributes (specification).", + "anyOf": [ + { + "description": "Primitive or array type.", + "$ref": "#/$defs/PrimitiveOrArrayTypeSpec" + }, + { + "description": "A template type.", + "$ref": "#/$defs/TemplateTypeSpec" + }, + { + "description": "An enum definition type.", + "type": "object", + "properties": { + "members": { + "description": "List of enum entries.", + "type": "array", + "items": { + "$ref": "#/$defs/EnumEntriesSpec" + } + } + }, + "required": [ + "members" + ] + } + ] + }, + "BasicRequirementLevelSpec": { + "description": "The different types of basic requirement levels.", + "oneOf": [ + { + "description": "A required requirement level.", + "type": "string", + "const": "required" + }, + { + "description": "An optional requirement level.", + "type": "string", + "const": "recommended" + }, + { + "description": "An opt-in requirement level.", + "type": "string", + "const": "opt_in" + } + ] + }, + "Deprecated": { + "description": "The different ways to deprecate an attribute, a metric, ...", + "oneOf": [ + { + "description": "The telemetry object containing the deprecated field has been renamed to an\nexisting or a new telemetry object.", + "type": "object", + "properties": { + "note": { + "description": "The note to provide more context about the deprecation.", + "type": "string" + }, + "reason": { + "type": "string", + "const": "renamed" + }, + "renamed_to": { + "description": "The new name of the telemetry object.", + "type": "string" + } + }, + "required": [ + "reason", + "renamed_to", + "note" + ] + }, + { + "description": "The telemetry object containing the deprecated field has been obsoleted\nbecause it no longer exists and has no valid replacement.\n\nThe `brief` field should contain the reason why the field has been obsoleted.", + "type": "object", + "properties": { + "note": { + "description": "The note to provide more context about the deprecation.", + "type": "string" + }, + "reason": { + "type": "string", + "const": "obsoleted" + } + }, + "required": [ + "reason", + "note" + ] + }, + { + "description": "The telemetry object containing the deprecated field has been deprecated for\ncomplex reasons (split, merge, ...) which are currently not precisely defined\nin the supported deprecation reasons.\n\nThe `brief` field should contain the reason for this uncategorized deprecation.", + "type": "object", + "properties": { + "note": { + "description": "The note to provide more context about the deprecation.", + "type": "string" + }, + "reason": { + "type": "string", + "const": "uncategorized" + } + }, + "required": [ + "reason", + "note" + ] + }, + { + "description": "This variant is used to capture old, unstructured deprecated \"string\".\nUsed for backward-compatibility only.", + "type": "object", + "properties": { + "note": { + "description": "The note to provide more context about the deprecation.", + "type": "string" + }, + "reason": { + "type": "string", + "const": "unspecified" + } + }, + "required": [ + "reason", + "note" + ] + } + ] + }, + "EnumEntriesSpec": { + "description": "Possible enum entries.", + "type": "object", + "properties": { + "annotations": { + "description": "Annotations for the member.", + "type": [ + "object", + "null" + ], + "additionalProperties": { + "$ref": "#/$defs/YamlValue" + } + }, + "brief": { + "description": "Brief description of the enum entry value.\nIt defaults to the value of id.", + "type": [ + "string", + "null" + ] + }, + "deprecated": { + "description": "Deprecation note.", + "anyOf": [ + { + "$ref": "#/$defs/Deprecated" + }, + { + "type": "null" + } + ] + }, + "id": { + "description": "String that uniquely identifies the enum entry.", + "type": "string" + }, + "note": { + "description": "Longer description.\nIt defaults to an empty string.", + "type": [ + "string", + "null" + ] + }, + "stability": { + "description": "Stability of this enum value.", + "anyOf": [ + { + "$ref": "#/$defs/Stability" + }, + { + "type": "null" + } + ] + }, + "value": { + "description": "String, int, or boolean; value of the enum entry.", + "$ref": "#/$defs/ValueSpec" + } + }, + "additionalProperties": false, + "required": [ + "id", + "value" + ] + }, + "Examples": { + "description": "The different types of examples.", + "anyOf": [ + { + "description": "A boolean example.", + "type": "boolean" + }, + { + "description": "A integer example.", + "type": "integer", + "format": "int64" + }, + { + "description": "A double example.", + "type": "number", + "format": "double" + }, + { + "description": "A string example.", + "type": "string" + }, + { + "description": "A any example.", + "$ref": "#/$defs/ValueSpec" + }, + { + "description": "A array of integers example.", + "type": "array", + "items": { + "type": "integer", + "format": "int64" + } + }, + { + "description": "A array of doubles example.", + "type": "array", + "items": { + "type": "number", + "format": "double" + } + }, + { + "description": "A array of bools example.", + "type": "array", + "items": { + "type": "boolean" + } + }, + { + "description": "A array of strings example.", + "type": "array", + "items": { + "type": "string" + } + }, + { + "description": "A array of anys example.", + "type": "array", + "items": { + "$ref": "#/$defs/ValueSpec" + } + }, + { + "description": "List of arrays of integers example.", + "type": "array", + "items": { + "type": "array", + "items": { + "type": "integer", + "format": "int64" + } + } + }, + { + "description": "List of arrays of doubles example.", + "type": "array", + "items": { + "type": "array", + "items": { + "type": "number", + "format": "double" + } + } + }, + { + "description": "List of arrays of bools example.", + "type": "array", + "items": { + "type": "array", + "items": { + "type": "boolean" + } + } + }, + { + "description": "List of arrays of strings example.", + "type": "array", + "items": { + "type": "array", + "items": { + "type": "string" + } + } + } + ] + }, + "GroupLineage": { + "description": "Group lineage.", + "type": "object", + "properties": { + "attributes": { + "description": "The lineage per attribute.\n\nNote: Use a BTreeMap to ensure a deterministic order of attributes.\nThis is important to keep unit tests stable.", + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/AttributeLineage" + } + }, + "extends_group": { + "description": "The group that this group extended, if available.", + "type": [ + "string", + "null" + ] + }, + "includes_group": { + "description": "(V2 Only) Attribute groups included in this group.", + "type": "array", + "items": { + "type": "string" + } + }, + "provenance": { + "description": "The provenance of the source file where the group is defined.", + "$ref": "#/$defs/Provenance" + } + }, + "required": [ + "provenance" + ] + }, + "GroupType": { + "description": "The different types of groups: `attribute_group`, `span`, `event`, `metric`, `entity`, `scope`.\n\nNote: The `resource` type is no longer used and is an alias for `entity`.", + "oneOf": [ + { + "description": "Attribute group (attribute_group type) defines a set of attributes that\ncan be declared once and referenced by semantic conventions for\ndifferent signals, for example spans and logs. Attribute groups don't\nhave any specific fields and follow the general semconv semantics.", + "type": "string", + "const": "attribute_group" + }, + { + "description": "Span semantic convention.", + "type": "string", + "const": "span" + }, + { + "description": "Event semantic convention.", + "type": "string", + "const": "event" + }, + { + "description": "Metric semantic convention.", + "type": "string", + "const": "metric" + }, + { + "description": "The metric group semconv is a group where related metric attributes can\nbe defined and then referenced from other metric groups using ref.", + "type": "string", + "const": "metric_group" + }, + { + "description": "Entity semantic convention.", + "type": "string", + "const": "entity" + }, + { + "description": "Scope.", + "type": "string", + "const": "scope" + }, + { + "description": "Undefined group type.", + "type": "string", + "const": "undefined" + } + ] + }, + "InstrumentSpec": { + "description": "The type of the metric.", + "oneOf": [ + { + "description": "An up-down counter metric.", + "type": "string", + "const": "updowncounter" + }, + { + "description": "A counter metric.", + "type": "string", + "const": "counter" + }, + { + "description": "A gauge metric.", + "type": "string", + "const": "gauge" + }, + { + "description": "A histogram metric.", + "type": "string", + "const": "histogram" + } + ] + }, + "PrimitiveOrArrayTypeSpec": { + "description": "Primitive or array types.", + "oneOf": [ + { + "description": "A boolean attribute.", + "type": "string", + "const": "boolean" + }, + { + "description": "A integer attribute (signed 64 bit integer).", + "type": "string", + "const": "int" + }, + { + "description": "A double attribute (double precision floating point (IEEE 754-1985)).", + "type": "string", + "const": "double" + }, + { + "description": "A string attribute.", + "type": "string", + "const": "string" + }, + { + "description": "An any type attribute (accepts any valid value).", + "type": "string", + "const": "any" + }, + { + "description": "An array of strings attribute.", + "type": "string", + "const": "string[]" + }, + { + "description": "An array of integer attribute.", + "type": "string", + "const": "int[]" + }, + { + "description": "An array of double attribute.", + "type": "string", + "const": "double[]" + }, + { + "description": "An array of boolean attribute.", + "type": "string", + "const": "boolean[]" + } + ] + }, + "Provenance": { + "description": "The provenance a semantic convention specification file.", + "type": "object", + "properties": { + "path": { + "description": "The path to the specification file.\n\nThis is the path is only available *locally*. When publishing resolved schemas,\nthis field is not included.", + "type": "string" + }, + "schema_url": { + "description": "The schema URL where this was specified.\n\nThe Schema url contains the registry id and the version of the schema.\nIt can be used to detect conflicts or resolve multiple \"ids\" existing across\ndependency chains but being the same thing, conceptually.", + "$ref": "#/$defs/SchemaUrl" + } + }, + "required": [ + "schema_url", + "path" + ] + }, + "RequirementLevel": { + "description": "The different requirement level specifications.", + "anyOf": [ + { + "description": "A basic requirement level.", + "$ref": "#/$defs/BasicRequirementLevelSpec" + }, + { + "description": "A conditional requirement level.", + "type": "object", + "properties": { + "conditionally_required": { + "description": "The description of the condition.", + "type": "string" + } + }, + "required": [ + "conditionally_required" + ] + }, + { + "description": "A recommended requirement level.", + "type": "object", + "properties": { + "recommended": { + "description": "The description of the recommendation.", + "type": "string" + } + }, + "required": [ + "recommended" + ] + }, + { + "description": "An opt in requirement level.", + "type": "object", + "properties": { + "opt_in": { + "description": "The description of the recommendation.", + "type": "string" + } + }, + "required": [ + "opt_in" + ] + } + ] + }, + "ResolvedGroup": { + "description": "Resolved group specification used in the context of the template engine.", + "type": "object", + "properties": { + "annotations": { + "description": "Annotations for the group.", + "type": [ + "object", + "null" + ], + "additionalProperties": { + "$ref": "#/$defs/YamlValue" + } + }, + "attributes": { + "description": "List of attributes that belong to the semantic convention.", + "type": "array", + "items": { + "$ref": "#/$defs/Attribute" + } + }, + "body": { + "description": "The body specification used for event semantic conventions.", + "anyOf": [ + { + "$ref": "#/$defs/AnyValueSpec" + }, + { + "type": "null" + } + ] + }, + "brief": { + "description": "A brief description of the semantic convention.", + "type": "string" + }, + "deprecated": { + "description": "Specifies if the semantic convention is deprecated. The string\nprovided as `description` MUST specify why it's deprecated and/or what\nto use instead. See also stability.", + "anyOf": [ + { + "$ref": "#/$defs/Deprecated" + }, + { + "type": "null" + } + ] + }, + "display_name": { + "description": "The readable name for attribute groups used when generating registry tables.", + "type": [ + "string", + "null" + ] + }, + "entity_associations": { + "description": "The associated entities of this group.", + "type": "array", + "items": { + "type": "string" + } + }, + "events": { + "description": "List of strings that specify the ids of event semantic conventions\nassociated with this span semantic convention.\nNote: only valid if type is span", + "type": "array", + "items": { + "type": "string" + } + }, + "extends": { + "description": "Reference another semantic convention id. It inherits\nall attributes defined in the specified semantic\nconvention.", + "type": [ + "string", + "null" + ] + }, + "id": { + "description": "The id that uniquely identifies the semantic convention.", + "type": "string" + }, + "instrument": { + "description": "The instrument type that should be used to record the metric. Note that\nthe semantic conventions must be written using the names of the\nsynchronous instrument types (counter, gauge, updowncounter and\nhistogram).\nFor more details: [Metrics semantic conventions - Instrument types](https://github.com/open-telemetry/opentelemetry-specification/tree/main/specification/metrics/semantic_conventions#instrument-types).\nNote: This field is required if type is metric.", + "anyOf": [ + { + "$ref": "#/$defs/InstrumentSpec" + }, + { + "type": "null" + } + ] + }, + "lineage": { + "description": "The lineage of the group.", + "anyOf": [ + { + "$ref": "#/$defs/GroupLineage" + }, + { + "type": "null" + } + ] + }, + "metric_name": { + "description": "The metric name as described by the [OpenTelemetry Specification](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/data-model.md#timeseries-model).\nNote: This field is required if type is metric.", + "type": [ + "string", + "null" + ] + }, + "name": { + "description": "The name of the event. If not specified, the prefix is used.\nIf prefix is empty (or unspecified), name is required.", + "type": [ + "string", + "null" + ] + }, + "note": { + "description": "A more elaborate description of the semantic convention.\nIt defaults to an empty string.", + "type": "string" + }, + "prefix": { + "description": "Prefix for the attributes for this semantic convention.\nIt defaults to an empty string.", + "type": "string" + }, + "span_kind": { + "description": "Specifies the kind of the span.\nNote: only valid if type is span", + "anyOf": [ + { + "$ref": "#/$defs/SpanKindSpec" + }, + { + "type": "null" + } + ] + }, + "stability": { + "description": "Specifies the stability of the semantic convention.\nNote that, if stability is missing but deprecated is present, it will\nautomatically set the stability to deprecated. If deprecated is\npresent and stability differs from deprecated, this will result in an\nerror.", + "anyOf": [ + { + "$ref": "#/$defs/Stability" + }, + { + "type": "null" + } + ] + }, + "type": { + "description": "The type of the group including the specific fields for each type.", + "$ref": "#/$defs/GroupType" + }, + "unit": { + "description": "The unit in which the metric is measured, which should adhere to the\n[guidelines](https://github.com/open-telemetry/opentelemetry-specification/tree/main/specification/metrics/semantic_conventions#instrument-units).\nNote: This field is required if type is metric.", + "type": [ + "string", + "null" + ] + } + }, + "required": [ + "id", + "type", + "brief" + ] + }, + "SchemaUrl": { + "description": "Represents the schema URL of a registry, which serves as a unique identifier for the registry\nalong with its version.", + "type": "object", + "properties": { + "url": { + "description": "The schema URL string.", + "type": "string" + } + }, + "required": [ + "url" + ] + }, + "SpanKindSpec": { + "description": "The span kind.", + "oneOf": [ + { + "description": "An internal span.", + "type": "string", + "const": "internal" + }, + { + "description": "A client span.", + "type": "string", + "const": "client" + }, + { + "description": "A server span.", + "type": "string", + "const": "server" + }, + { + "description": "A producer span.", + "type": "string", + "const": "producer" + }, + { + "description": "A consumer span.", + "type": "string", + "const": "consumer" + } + ] + }, + "Stability": { + "description": "The level of stability for a definition. Defined in [OTEP-232](https://github.com/open-telemetry/oteps/blob/main/text/0232-maturity-of-otel.md)", + "oneOf": [ + { + "description": "A deprecated definition.", + "type": "string", + "const": "deprecated", + "deprecated": true + }, + { + "description": "A stable definition.", + "type": "string", + "const": "stable" + }, + { + "description": "A definition in development. Formally known as experimental.", + "type": "string", + "const": "development" + }, + { + "description": "An alpha definition.", + "type": "string", + "const": "alpha" + }, + { + "description": "A beta definition.", + "type": "string", + "const": "beta" + }, + { + "description": "A release candidate definition.", + "type": "string", + "const": "release_candidate" + } + ] + }, + "Tags": { + "description": "A set of tags.\n\nExamples of tags:\n- sensitivity: pii\n- sensitivity: phi\n- data_classification: restricted\n- semantic_type: email\n- semantic_type: first_name\n- owner:\n- provenance: browser_sensor", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "TemplateTypeSpec": { + "description": "Template types.", + "oneOf": [ + { + "description": "A boolean attribute.", + "type": "string", + "const": "template[boolean]" + }, + { + "description": "A integer attribute.", + "type": "string", + "const": "template[int]" + }, + { + "description": "A double attribute.", + "type": "string", + "const": "template[double]" + }, + { + "description": "A string attribute.", + "type": "string", + "const": "template[string]" + }, + { + "description": "A any attribute.", + "type": "string", + "const": "template[any]" + }, + { + "description": "An array of strings attribute.", + "type": "string", + "const": "template[string[]]" + }, + { + "description": "An array of integer attribute.", + "type": "string", + "const": "template[int[]]" + }, + { + "description": "An array of double attribute.", + "type": "string", + "const": "template[double[]]" + }, + { + "description": "An array of boolean attribute.", + "type": "string", + "const": "template[boolean[]]" + } + ] + }, + "Value": { + "description": "The different types of values.", + "oneOf": [ + { + "description": "A integer value.", + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "Int" + }, + "value": { + "description": "The value", + "type": "integer", + "format": "int64" + } + }, + "required": [ + "type", + "value" + ] + }, + { + "description": "A double value.", + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "Double" + }, + "value": { + "description": "The value", + "type": "number", + "format": "double" + } + }, + "required": [ + "type", + "value" + ] + }, + { + "description": "A string value.", + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "String" + }, + "value": { + "description": "The value", + "type": "string" + } + }, + "required": [ + "type", + "value" + ] + } + ] + }, + "ValueSpec": { + "description": "The different types of values.", + "anyOf": [ + { + "description": "A integer value.", + "type": "integer", + "format": "int64" + }, + { + "description": "A double value.", + "type": "number", + "format": "double" + }, + { + "description": "A string value.", + "type": "string" + }, + { + "description": "A boolean value.", + "type": "boolean" + } + ] + }, + "YamlValue": { + "type": [ + "null", + "boolean", + "object", + "array", + "number", + "string" + ] + } + } +} \ No newline at end of file diff --git a/examples/weaver/registry/chronos/metrics.yaml b/examples/weaver/registry/chronos/metrics.yaml new file mode 100644 index 0000000..1814c20 --- /dev/null +++ b/examples/weaver/registry/chronos/metrics.yaml @@ -0,0 +1,183 @@ +groups: + - id: resource.chronos.service + type: attribute_group + stability: development + brief: Resource attributes emitted by the Chronos metrics mock. + attributes: + - id: service.name + type: string + stability: stable + brief: Logical name of the service. + examples: [ "chronos-metrics-mock" ] + requirement_level: required + - id: service.instance.id + type: string + stability: stable + brief: The string ID of the service instance. + examples: [ "chronos-metrics-mock-live-check" ] + requirement_level: required + + - id: metric_attributes.chronos.consume_result + type: attribute_group + stability: development + brief: Attributes for Chronos input message handling outcomes. + attributes: + - id: destination + type: string + stability: development + brief: Downstream selected by message_receiver::handle_message. + examples: [ "kafka", "postgres" ] + requirement_level: required + - id: chronos.consume.status + type: string + stability: development + brief: Whether the consume path completed successfully. + examples: [ "pass", "fail" ] + requirement_level: required + + - id: metric_attributes.chronos.process_result + type: attribute_group + stability: development + brief: Attributes for Chronos ready-message processor loop outcomes. + attributes: + - id: returned + type: string + stability: development + brief: Whether the processor loop returned early because no rows were ready. + examples: [ "true", "false" ] + requirement_level: required + - id: chronos.process.status + type: string + stability: development + brief: Whether the processor loop completed successfully. + examples: [ "pass", "fail" ] + requirement_level: required + + - id: metric.chronos.message.consume.duration + type: metric + metric_name: chronos.message.consume.duration + stability: development + brief: Duration of handle_message() in message_receiver. + instrument: histogram + unit: s + extends: metric_attributes.chronos.consume_result + annotations: + code_generation: + rust_name: msg_consume_latency + metric_value_type: double + prometheus_name: msg_consume_latency + label_names: [ destination, status ] + prometheus_label_names: [ destination, status ] + buckets: + [ + 0.001, + 0.002, + 0.004, + 0.008, + 0.016, + 0.032, + 0.064, + 0.128, + 0.256, + 0.512, + 1.024, + 2.048, + ] + prewarm_label_values: + - [ kafka, pass ] + - [ kafka, fail ] + - [ postgres, pass ] + - [ postgres, fail ] + + - id: metric.chronos.message.process.duration + type: metric + metric_name: chronos.message.process.duration + stability: development + brief: Duration of processor_message_ready() loop in message_processor. + instrument: histogram + unit: s + extends: metric_attributes.chronos.process_result + annotations: + code_generation: + rust_name: msg_process_latency + metric_value_type: double + prometheus_name: msg_process_latency + label_names: [ returned, status ] + prometheus_label_names: [ returned, status ] + buckets: + [ + 0.001, + 0.002, + 0.004, + 0.008, + 0.016, + 0.032, + 0.064, + 0.128, + 0.256, + 0.512, + 1.024, + 2.048, + ] + prewarm_label_values: + - [ "true", pass ] + - [ "true", fail ] + - [ "false", pass ] + - [ "false", fail ] + + - id: metric.chronos.message.wait.duration + type: metric + metric_name: chronos.message.wait.duration + stability: development + brief: Time a message spent in the Kafka input queue before processing. + instrument: histogram + unit: s + annotations: + code_generation: + rust_name: msg_wait_time + metric_value_type: double + prometheus_name: msg_wait_time + buckets: + [ + 0.1, + 0.2, + 0.4, + 0.8, + 1.6, + 3.2, + 6.4, + 12.8, + 25.6, + 51.2, + 102.4, + 204.8, + 409.6, + 819.2, + ] + + - id: metric.chronos.message.jitter + type: metric + metric_name: chronos.message.jitter + stability: development + brief: Difference between actual publish time and client-requested deadline. + instrument: histogram + unit: s + annotations: + code_generation: + rust_name: msg_jitter + metric_value_type: double + prometheus_name: msg_jitter + buckets: [ 0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0 ] + + - id: metric.chronos.message.reset + type: metric + metric_name: chronos.message.reset + stability: development + brief: Number of records reset by reset_to_init_db() in the monitor task. + instrument: counter + unit: "{message}" + annotations: + code_generation: + rust_name: msg_reset + metric_value_type: int + prometheus_name: msg_reset diff --git a/examples/weaver/templates/registry/markdown/metrics.md.j2 b/examples/weaver/templates/registry/markdown/metrics.md.j2 new file mode 100644 index 0000000..4dd3ffa --- /dev/null +++ b/examples/weaver/templates/registry/markdown/metrics.md.j2 @@ -0,0 +1,9 @@ +# Chronos Metrics + +Generated from `examples/weaver/registry/chronos/metrics.yaml` by OpenTelemetry Weaver. + +| Metric | Prometheus Name | Instrument | Unit | Attributes | Description | +| --- | --- | --- | --- | --- | --- | +{% for metric in ctx.metrics -%} +| `{{ metric.metric_name }}` | `{{ metric.prometheus_name }}` | `{{ metric.instrument }}` | `{{ metric.unit }}` | {% if metric.attributes %}{% for attribute in metric.attributes %}`{{ attribute }}`{% if not loop.last %}, {% endif %}{% endfor %}{% else %}-{% endif %} | {{ metric.brief }} | +{% endfor -%} diff --git a/examples/weaver/templates/registry/markdown/weaver.yaml b/examples/weaver/templates/registry/markdown/weaver.yaml new file mode 100644 index 0000000..61b5edf --- /dev/null +++ b/examples/weaver/templates/registry/markdown/weaver.yaml @@ -0,0 +1,17 @@ +templates: + - pattern: metrics.md.j2 + filter: > + { + metrics: (.groups + | map(select(.type == "metric")) + | map({ + metric_name, + prometheus_name: .annotations.code_generation.prometheus_name, + brief, + instrument, + unit, + attributes: (.annotations.code_generation.label_names // (.attributes // [] | map(.name // .id // .ref))) + })) + } + application_mode: single + file_name: chronos_metrics.md diff --git a/examples/weaver/templates/registry/rust/registry.rs.j2 b/examples/weaver/templates/registry/rust/registry.rs.j2 new file mode 100644 index 0000000..a319be7 --- /dev/null +++ b/examples/weaver/templates/registry/rust/registry.rs.j2 @@ -0,0 +1,58 @@ +// Generated from examples/weaver/registry/chronos/metrics.yaml by OpenTelemetry Weaver. +// Do not edit by hand. + +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub enum MetricId { +{%- for metric in ctx.metrics %} + {{ metric.rust_name | pascal_case }}, +{%- endfor %} +} + +#[derive(Clone, Copy, Debug)] +pub enum MetricKind { + Counter, + Histogram, +} + +impl MetricKind { + pub fn is_counter(self) -> bool { + matches!(self, Self::Counter) + } + + pub fn is_histogram(self) -> bool { + matches!(self, Self::Histogram) + } +} + +#[derive(Clone, Copy, Debug)] +pub struct MetricDefinition { + pub id: MetricId, + pub otel_name: &'static str, + pub prometheus_name: &'static str, + pub description: &'static str, + pub unit: Option<&'static str>, + pub label_names: &'static [&'static str], + pub prometheus_label_names: &'static [&'static str], + pub kind: MetricKind, + pub buckets: Option<&'static [f64]>, + pub prewarm_label_values: &'static [&'static [&'static str]], +} + +pub const METRIC_DEFINITIONS: &[MetricDefinition] = &[ +{%- for metric in ctx.metrics %} + MetricDefinition { + id: MetricId::{{ metric.rust_name | pascal_case }}, + otel_name: "{{ metric.metric_name }}", + prometheus_name: "{{ metric.prometheus_name }}", + description: "{{ metric.brief }}", + unit: {% if metric.unit %}Some("{{ metric.unit }}"){% else %}None{% endif %}, + label_names: &[{% for attribute in metric.attributes %}"{{ attribute }}"{% if not loop.last %}, {% endif %}{% endfor %}], + prometheus_label_names: &[{% for label in metric.prometheus_labels %}"{{ label }}"{% if not loop.last %}, {% endif %}{% endfor %}], + kind: MetricKind::{{ metric.instrument | pascal_case }}, + buckets: {% if metric.buckets %}{% if metric.buckets | length > 10 %}Some(&[ + {{ metric.buckets | join(", ") }}, + ]){% else %}Some(&[{{ metric.buckets | join(", ") }}]){% endif %}{% else %}None{% endif %}, + prewarm_label_values: &[{% for values in metric.prewarm_label_values %}&[{% for value in values %}"{{ value }}"{% if not loop.last %}, {% endif %}{% endfor %}]{% if not loop.last %}, {% endif %}{% endfor %}], + }, +{%- endfor %} +]; diff --git a/examples/weaver/templates/registry/rust/weaver.yaml b/examples/weaver/templates/registry/rust/weaver.yaml new file mode 100644 index 0000000..ed7f9fb --- /dev/null +++ b/examples/weaver/templates/registry/rust/weaver.yaml @@ -0,0 +1,22 @@ +templates: + - pattern: registry.rs.j2 + filter: > + { + metrics: (.groups + | map(select(.type == "metric")) + | map({ + id, + metric_name, + rust_name: .annotations.code_generation.rust_name, + prometheus_name: .annotations.code_generation.prometheus_name, + brief, + instrument, + unit, + attributes: (.annotations.code_generation.label_names // (.attributes // [] | map(.name // .id // .ref))), + prometheus_labels: (.annotations.code_generation.prometheus_label_names // (.attributes // [] | map((.name // .id // .ref) | gsub("\\."; "_")))), + buckets: .annotations.code_generation.buckets, + prewarm_label_values: (.annotations.code_generation.prewarm_label_values // []) + })) + } + application_mode: single + file_name: chronos_metric_definitions.rs diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 6d833ff..4683c9e 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,2 +1,2 @@ [toolchain] -channel = "1.75" +channel = "1.94" diff --git a/scripts/apply-github-config.sh b/scripts/apply-github-config.sh new file mode 100755 index 0000000..1c0cd1c --- /dev/null +++ b/scripts/apply-github-config.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +set -euo pipefail + +config_path="${1:-.github/config.json}" +repo="${GITHUB_REPOSITORY:-}" + +if [[ -z "${repo}" ]]; then + repo="$(gh repo view --json nameWithOwner --jq '.nameWithOwner')" +fi + +if [[ -z "${repo}" ]]; then + echo "Unable to determine GitHub repository. Set GITHUB_REPOSITORY=owner/name." >&2 + exit 2 +fi + +if [[ ! -f "${config_path}" ]]; then + echo "GitHub config file not found: ${config_path}" >&2 + exit 2 +fi + +echo "Applying repository settings to ${repo}" +jq -c '.repository' "${config_path}" | gh api --method PATCH "repos/${repo}" --input - + +echo "Applying Actions workflow permissions to ${repo}" +jq -c '.actions' "${config_path}" | gh api --method PUT "repos/${repo}/actions/permissions/workflow" --input - + +for branch in $(jq -r '.branches | keys[]' "${config_path}"); do + echo "Applying branch protection to ${repo}:${branch}" + jq -c --arg branch "${branch}" '.branches[$branch].protection' "${config_path}" | + gh api --method PUT "repos/${repo}/branches/${branch}/protection" --input - +done diff --git a/scripts/coverage-report.sh b/scripts/coverage-report.sh new file mode 100755 index 0000000..7d08e1e --- /dev/null +++ b/scripts/coverage-report.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env sh + +set -eu + +if ! cargo llvm-cov --version >/dev/null 2>&1; then + printf 'cargo-llvm-cov not installed; writing raw LLVM coverage profiles under target/coverage.\n' >&2 + coverage_dir="$(pwd)/target/coverage" + mkdir -p "${coverage_dir}" + CARGO_INCREMENTAL=0 \ + CARGO_HUSKY_DONT_INSTALL_HOOKS=true \ + RUSTFLAGS="${RUSTFLAGS:-} -Cinstrument-coverage" \ + LLVM_PROFILE_FILE="${coverage_dir}/chronos-%p-%m.profraw" \ + cargo test + exit 0 +fi + +cargo llvm-cov --workspace --all-targets diff --git a/scripts/integration.sh b/scripts/integration.sh new file mode 100755 index 0000000..806e2fd --- /dev/null +++ b/scripts/integration.sh @@ -0,0 +1,193 @@ +#!/usr/bin/env bash +# integration.sh โ€” starts Docker dependencies, runs migrations, starts Chronos +# locally, publishes a test message, and verifies metrics are being recorded. +# +# Usage: called by `make integration` from the repo root. +set -euo pipefail + +# โ”€โ”€โ”€ configuration โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +KAFKA_EXT_PORT="${KAFKA_EXT_PORT:-9094}" +PG_PORT="${PG_PORT:-5432}" +METRICS_PORT="${OTEL_EXPORTER_PROMETHEUS_PORT:-${METRICS_PORT:-9090}}" +COMPOSE="docker compose --project-name chronos -f dev/docker-compose/compose.yaml" +CHRONOS_PID_FILE="/tmp/chronos_integration.pid" +CHRONOS_LOG="/tmp/chronos_integration.log" +MAX_WAIT=120 # seconds to wait for each readiness check + +# Unique ID for this test run โ€” used to identify our message on the output topic +MSG_ID="integration-test-$(date +%s)" + +# โ”€โ”€โ”€ helpers โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +log() { printf '\033[0;33m%s\033[0m\n' "โ†’ $*"; } +ok() { printf '\033[0;32m%s\033[0m\n' "โœ“ $*"; } +fail() { printf '\033[0;31m%s\033[0m\n' "โœ— $*" >&2; exit 1; } + +wait_for() { + local label="$1"; shift + local elapsed=0 + printf '%s ' "โ†’ Waiting for ${label}..." + until "$@" > /dev/null 2>&1; do + printf '.' + sleep 2 + elapsed=$((elapsed + 2)) + if [ "${elapsed}" -ge "${MAX_WAIT}" ]; then + echo "" + fail "Timed out waiting for ${label} after ${MAX_WAIT}s" + fi + done + echo " ready" +} + +# cleanup() { +# if [ -f "${CHRONOS_PID_FILE}" ]; then +# local pid +# pid="$(cat "${CHRONOS_PID_FILE}")" +# if kill -0 "${pid}" 2>/dev/null; then +# log "Stopping Chronos (pid ${pid})..." +# kill "${pid}" 2>/dev/null || true +# wait "${pid}" 2>/dev/null || true +# fi +# rm -f "${CHRONOS_PID_FILE}" +# fi +# } +# trap cleanup EXIT + +# โ”€โ”€โ”€ 1. start infrastructure โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +log "Starting infrastructure (postgres + kafka)..." +${COMPOSE} up -d postgres kafka + +# โ”€โ”€โ”€ 2. wait for postgres โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +wait_for "postgres" \ + ${COMPOSE} exec -T postgres pg_isready -U admin -d chronos_db + +# โ”€โ”€โ”€ 3. wait for kafka โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +wait_for "kafka" \ + ${COMPOSE} exec -T kafka \ + /opt/bitnami/kafka/bin/kafka-topics.sh --bootstrap-server localhost:9092 --list + +# โ”€โ”€โ”€ 4. run migrations โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +log "Running database migrations..." +PG_HOST=localhost \ +PG_PORT="${PG_PORT}" \ +PG_USER=admin \ +PG_PASSWORD=admin \ +PG_DATABASE=chronos_db \ + cargo run --quiet --package pg_mig --bin chronos-pg-migrations +ok "Migrations complete" + +# โ”€โ”€โ”€ 5. start chronos in background โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +log "Starting Chronos (logs โ†’ ${CHRONOS_LOG})..." +KAFKA_HOST=localhost \ +KAFKA_PORT="${KAFKA_EXT_PORT}" \ +KAFKA_CLIENT_ID=chronos \ +KAFKA_GROUP_ID=chronos \ +KAFKA_IN_TOPIC=chronos.in \ +KAFKA_OUT_TOPIC=chronos.out \ +KAFKA_USERNAME="" \ +KAFKA_PASSWORD="" \ +PG_HOST=localhost \ +PG_PORT="${PG_PORT}" \ +PG_USER=admin \ +PG_PASSWORD=admin \ +PG_DATABASE=chronos_db \ +PG_POOL_SIZE=10 \ +RUST_LOG=warn \ +OTEL_EXPORTER_PROMETHEUS_HOST=0.0.0.0 \ +OTEL_EXPORTER_PROMETHEUS_PORT="${METRICS_PORT}" \ +MONITOR_DB_POLL=5 \ +PROCESSOR_DB_POLL=5 \ +TIMING_ADVANCE=0 \ +FAIL_DETECT_INTERVAL=10 \ + cargo run --quiet --package chronos_bin --bin chronos \ + > "${CHRONOS_LOG}" 2>&1 & +echo $! > "${CHRONOS_PID_FILE}" + +# โ”€โ”€โ”€ 6. wait for metrics endpoint โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +wait_for "Chronos metrics endpoint" \ + curl -sf "http://localhost:${METRICS_PORT}/metrics" + +# โ”€โ”€โ”€ 7. publish test message โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +# The deadline is 1 minute in the past so Chronos fires the message immediately +# to the output topic, exercising the full consume โ†’ store-or-fire path. +log "Publishing test message (id: ${MSG_ID})..." +CHRONOS_MSG_ID="${MSG_ID}" \ +KAFKA_HOST=localhost \ +KAFKA_PORT="${KAFKA_EXT_PORT}" \ +KAFKA_CLIENT_ID=chronos-test-publisher \ +KAFKA_GROUP_ID=chronos-test-publisher \ +KAFKA_IN_TOPIC=chronos.in \ +KAFKA_OUT_TOPIC=chronos.out \ +KAFKA_USERNAME="" \ +KAFKA_PASSWORD="" \ + cargo run --quiet --package chronos_ex --example publish_test_message +ok "Message published" + +# โ”€โ”€โ”€ 8. verify message fired to output topic โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +# Consume from chronos.out from the beginning, waiting up to 30s for the message +# to appear. kafka-console-consumer exits when max-messages is reached OR when +# no new messages arrive within timeout-ms โ€” whichever comes first. +# The || true prevents set -e from aborting on the consumer's non-zero exit +# (timeout reached) which is normal when the topic drains before max-messages. +log "Waiting for message ${MSG_ID} on chronos.out (up to 30s)..." +FIRED_OUTPUT=$( + ${COMPOSE} exec -T kafka \ + /opt/bitnami/kafka/bin/kafka-console-consumer.sh \ + --bootstrap-server localhost:9092 \ + --topic chronos.out \ + --from-beginning \ + --max-messages 50 \ + --timeout-ms 30000 \ + 2>/dev/null || true +) + +if echo "${FIRED_OUTPUT}" | grep -q "${MSG_ID}"; then + ok "Message ${MSG_ID} arrived on chronos.out" +else + echo "" + printf '\033[0;31m%s\033[0m\n' "โœ— Message ${MSG_ID} was NOT found on chronos.out" >&2 + echo " Last 20 lines of Chronos log:" >&2 + tail -20 "${CHRONOS_LOG}" >&2 + fail "Message delivery test failed" +fi + +# โ”€โ”€โ”€ 9. show metrics โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +echo "" +echo "โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•" +echo " Chronos metrics (http://localhost:${METRICS_PORT}/metrics)" +echo "โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•" +curl -sf "http://localhost:${METRICS_PORT}/metrics" \ + | grep -E "^(# HELP|# TYPE|chronos_msg_)" \ + | sort +echo "" + +# โ”€โ”€โ”€ 10. verify all five metric families are present โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +log "Verifying metric families..." +METRICS_OUTPUT="$(curl -sf "http://localhost:${METRICS_PORT}/metrics")" +EXPECTED_METRICS=( + "chronos_msg_consume_latency" + "chronos_msg_process_latency" + "chronos_msg_wait_time" + "chronos_msg_jitter" + "chronos_msg_reset" +) +ALL_OK=true +for metric in "${EXPECTED_METRICS[@]}"; do + if echo "${METRICS_OUTPUT}" | grep -q "^# HELP ${metric}"; then + ok "${metric} present" + else + printf '\033[0;31m%s\033[0m\n' "โœ— ${metric} MISSING" >&2 + ALL_OK=false + fi +done + +echo "" +if [ "${ALL_OK}" = "true" ]; then + ok "All metrics verified" +else + fail "One or more metrics are missing โ€” check ${CHRONOS_LOG}" +fi + +echo "" +ok "Integration test complete" +echo " Chronos logs: ${CHRONOS_LOG}" +echo " Run 'make integration.down' to stop Docker services." diff --git a/scripts/pre-commit-checks.sh b/scripts/pre-commit-checks.sh deleted file mode 100755 index 4b7d15c..0000000 --- a/scripts/pre-commit-checks.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash -set -e - -make withenv RECIPE=lint -make withenv RECIPE=test.unit - -