From 00049d6e47f5b71ece23fd9271415ead8c40880d Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Tue, 28 Apr 2026 09:32:26 +0200 Subject: [PATCH 01/49] =?UTF-8?q?bump:=20version=200.4.0=20=E2=86=92=201.0?= =?UTF-8?q?.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 23 +++++++++++++++++++++++ osmsg/__version__.py | 2 +- pyproject.toml | 2 +- 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 10ccba6..1842e75 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,26 @@ +## v1.0.0 (2026-04-28) + +### Fix + +- **docker**: fixes docker images , replaced slim image with the distroless +- **ci**: fix lib creds on ci +- **data**: fix bug on data loss due to window changeset open +- **stat**: completeness test cases +- **stream**: fixes live streaming of the compressed osm files +- **test**: fixes test case strip issue +- **Dockerfile**: version upgrade in stage 1 - missed that one in the last commit +- **test_app.yml**: I had to remove "" from the python version number and change the number to python3.x + +### Refactor + +- **osmsg**: Updated the processing with this approach: Workers → write Parquet (independent) → final DuckDB merge. +- **osmsg**: Data type validation with pydantic models and multi-process implementations for processing of files +- **build.yml-test_app.yml**: remove uneeded installs in test_app.yml and let uv set up python in both test_app.yml and build.yml + +### Perf + +- **chore**: housekeeping removing dead links + ## v0.3.0 (2024-08-26) ### Feat diff --git a/osmsg/__version__.py b/osmsg/__version__.py index 6a9beea..5becc17 100644 --- a/osmsg/__version__.py +++ b/osmsg/__version__.py @@ -1 +1 @@ -__version__ = "0.4.0" +__version__ = "1.0.0" diff --git a/pyproject.toml b/pyproject.toml index 0174908..e5de6ac 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "osmsg" -version = "0.4.0" +version = "1.0.0" description = "OpenStreetMap Stats Generator: Commandline" readme = "README.md" authors = [ From 87b97834a3d725b81cdf85633446ee7977f4c8a0 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Tue, 28 Apr 2026 09:34:02 +0200 Subject: [PATCH 02/49] fix(lock): uv lock updates the lock file --- uv.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uv.lock b/uv.lock index 1b6be7c..6b2c03c 100644 --- a/uv.lock +++ b/uv.lock @@ -597,7 +597,7 @@ wheels = [ [[package]] name = "osmsg" -version = "0.4.0" +version = "1.0.0" source = { editable = "." } dependencies = [ { name = "duckdb" }, From 67a6c1fc8876132f775f9eef06a224515f939f48 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Tue, 28 Apr 2026 09:45:51 +0200 Subject: [PATCH 03/49] docs(bumps-readme): userfriendly docs and add token in ci --- .github/workflows/ci.yml | 2 +- .github/workflows/publish.yml | 15 +++-- CONTRIBUTING.md | 56 ++++++++++++++-- README.md | 118 +++++++++++++++++++++++++--------- docs/Installation.md | 4 +- docs/Manual.md | 14 ++-- pyproject.toml | 4 +- 7 files changed, 163 insertions(+), 50 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b2fda4d..658a2eb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -52,7 +52,7 @@ jobs: - run: uv build --no-sources smoke-last-hour: - name: Smoke — process last hour + name: Smoke (process last hour) runs-on: ubuntu-latest needs: lint-typecheck-test timeout-minutes: 10 diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 1f4f019..8a2358c 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -2,14 +2,19 @@ name: Publish to PyPI on: release: - types: [published] + types: [ published ] + push: + tags: + - "v*" workflow_dispatch: jobs: publish: + name: Build and publish to PyPI runs-on: ubuntu-latest - permissions: - id-token: write # PyPI trusted publishing + environment: + name: pypi + url: https://pypi.org/project/osmsg/ steps: - uses: actions/checkout@v6 @@ -22,5 +27,7 @@ jobs: - name: Build wheel + sdist run: uv build --no-sources - - name: Publish to PyPI (trusted publisher) + - name: Publish to PyPI + env: + UV_PUBLISH_TOKEN: ${{ secrets.PYPI_API_TOKEN }} run: uv publish dist/* diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c1cb96d..0fb7c44 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,6 +1,6 @@ # Contributing -Contributions are welcome. Please read the [Code of Conduct](./CODE_OF_CONDUCT.md) before starting. +Thanks for considering a contribution to **osmsg**. This is an [OSGeo Nepal](https://osgeonepal.org) project and we welcome PRs of every size: a typo fix, a new flag, a perf patch, a docs cleanup. Please read the [Code of Conduct](./CODE_OF_CONDUCT.md) before you start. ## Setup @@ -12,20 +12,64 @@ uv run pre-commit install uv run pytest -m "not network" ``` +`uv sync` installs runtime and dev dependencies (`ruff`, `ty`, `pytest`, `pre-commit`, `commitizen`) from `pyproject.toml`. `uv` is the only build/dev tool you need; no system OSM libraries are required. + +If you do not have `uv` yet: + +```bash +curl -LsSf https://astral.sh/uv/install.sh | sh +``` + ## Workflow -1. Open an issue first for non-trivial changes. +1. Open an issue first for non-trivial changes so we can agree on the approach. 2. Branch from `develop` (e.g. `fix/short-description`, `feat/short-description`). 3. Keep each PR to a single logical change. Squash intermediate commits before opening the PR. 4. Update the README or `docs/` for any user-visible behaviour change. +5. Open the PR against `develop`. `master` is reserved for releases. ## Coding standards -- **Format + lint**: `ruff` (config in `pyproject.toml`). Pre-commit auto-fixes. -- **Type-check**: `ty` (Astral). Must pass with zero errors. -- **Tests**: `pytest -m "not network"` for offline checks; `pytest -m network` for live Geofabrik / OSM integration. +- **Format and lint**: `ruff` (config in `pyproject.toml`). Pre-commit auto-fixes most issues. Run manually with `uv run ruff check osmsg tests` and `uv run ruff format osmsg tests`. +- **Type-check**: `ty` (Astral). Must pass with zero errors: `uv run ty check osmsg`. +- **Tests**: + - `uv run pytest -m "not network"` for offline checks. + - `uv run pytest -m network` for live Geofabrik / OSM integration (needs `OSM_USERNAME` and `OSM_PASSWORD`). - **Commits**: [Conventional Commits](https://www.conventionalcommits.org/) via `cz commit`. See [docs/Version_control.md](./docs/Version_control.md). +## CI + +Every PR runs: + +- `ruff check`, `ruff format --check`, `ty check`, `pytest -m "not network"` ([ci.yml](./.github/workflows/ci.yml)) +- Wheel and sdist build ([ci.yml](./.github/workflows/ci.yml)) +- Smoke run of `osmsg --last hour` to catch regressions on real planet data ([ci.yml](./.github/workflows/ci.yml)) +- Multi-arch Docker build ([docker.yml](./.github/workflows/docker.yml)) + +A green CI is a hard requirement before merge. + ## Releases -`cz bump` updates `pyproject.toml` + `osmsg/__version__.py`, refreshes `CHANGELOG.md`, and tags the release. Pushing the tag triggers PyPI publish + GHCR docker build via GitHub Actions. +Releases are cut from `master` by maintainers using `commitizen`: + +```bash +cz bump +git push --follow-tags +``` + +`cz bump` updates the version in `pyproject.toml` and `osmsg/__version__.py`, refreshes `CHANGELOG.md`, and tags the release. Pushing the tag (or publishing a GitHub Release) triggers: + +- PyPI publish via [publish.yml](./.github/workflows/publish.yml) using the `PYPI_API_TOKEN` repo secret. +- Multi-arch Docker image build to `ghcr.io/osgeonepal/osmsg` via [docker.yml](./.github/workflows/docker.yml). + +## Reporting issues + +Bugs and feature requests live in [GitHub issues](https://github.com/osgeonepal/osmsg/issues). For bugs, please include: + +- The `osmsg --version`, OS, and Python version. +- The exact command (or YAML config) you ran. +- The full traceback or error output. + +## License + +By contributing, you agree your contributions will be licensed under the [MIT License](./LICENSE). diff --git a/README.md b/README.md index 61db371..06ab8bc 100644 --- a/README.md +++ b/README.md @@ -1,42 +1,79 @@ # osmsg -Generate OpenStreetMap user stats from the command line. Point it at a time window, get back per-user counts of nodes/ways/relations created, modified, and deleted, in parquet, csv, json, markdown, or straight into Postgres. +[![CI](https://github.com/osgeonepal/osmsg/actions/workflows/ci.yml/badge.svg)](https://github.com/osgeonepal/osmsg/actions/workflows/ci.yml) +[![Docker](https://github.com/osgeonepal/osmsg/actions/workflows/docker.yml/badge.svg)](https://github.com/osgeonepal/osmsg/actions/workflows/docker.yml) +[![PyPI](https://img.shields.io/pypi/v/osmsg.svg)](https://pypi.org/project/osmsg/) +[![Python](https://img.shields.io/badge/python-3.11%2B-blue.svg)](https://www.python.org/downloads/) +[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](./LICENSE) +[![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff) +[![uv](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/uv/main/assets/badge/v0.json)](https://github.com/astral-sh/uv) +[![Container](https://img.shields.io/badge/ghcr.io-osgeonepal%2Fosmsg-2496ED?logo=docker)](https://github.com/osgeonepal/osmsg/pkgs/container/osmsg) + +**OpenStreetMap Stats Generator.** A tiny CLI (and Python library) that turns OSM history into per-user counts of nodes, ways, and relations created, modified, or deleted, written to parquet, csv, json, markdown, or Postgres. + +A Project of [OSGeo Nepal](https://osgeonepal.org). + +## What you get + +- Per-user create/modify/delete counts over any time window. +- Tag and hashtag breakdowns (e.g. `building`, `#hotosm`). +- Country and custom-boundary filters via Geofabrik. +- Cron-friendly resume with `--update`. +- Outputs you can query: parquet, csv, json, markdown, DuckDB, Postgres. ## Install +Pick the one that fits how you work. + ```bash -pip install osmsg -# or, as a standalone CLI -uv tool install osmsg -# or, no install +pip install osmsg # into your project +uv tool install osmsg # standalone CLI docker run --rm -v "$PWD:/work" -w /work ghcr.io/osgeonepal/osmsg:latest --last hour ``` -## Examples +## Quick start ```bash -# What happened in the last hour, planet-wide -osmsg --last hour +osmsg --last hour # planet, last hour +osmsg --last day --tags building # last day with a tag breakdown +osmsg --hashtags hotosm --last day # only changesets tagged #hotosm +``` + +That's it. A `stats.duckdb` and a `stats.parquet` show up in your current folder. + +## Tutorials -# Yesterday's stats for a country (needs OSM credentials, see below) +### 1. Stats for a country + +```bash osmsg --country nepal --last day +``` + +`--country` resolves through Geofabrik and needs an OSM account. Set `OSM_USERNAME` and `OSM_PASSWORD` in your shell or a `.env` file: + +```bash +export OSM_USERNAME=you +export OSM_PASSWORD=secret +``` -# Custom range, with per-key tag breakdowns and a daily summary +### 2. A custom date range with summaries + +```bash osmsg --start "2026-04-01" --end "2026-04-08" \ --tags building --tags highway --summary +``` -# Only changesets tagged #hotosm (substring by default; --exact-lookup for whole-word) -osmsg --hashtags hotosm --last day +`--summary` adds a daily rollup file alongside the per-changeset stats. -# Cron-friendly: pick up where the last run left off -osmsg --country nepal --update -``` +### 3. Run on a schedule -YAML configs work too if your flag list gets long: `osmsg --config nepal.yaml`. +```bash +osmsg --country nepal --update # picks up where the last run stopped +``` -## Output +Drop that into cron or a GitHub Actions schedule. State is stored inside the DuckDB file, so reruns are safe. -Every run writes `stats.duckdb` (or `<--name>.duckdb`) plus whatever formats you ask for via `-f parquet|csv|json|markdown|psql`. Parquet is the default. Open it with duckdb, polars, pandas, whatever. +### 4. Query the output ```bash duckdb stats.duckdb -c "SELECT username, SUM(nodes_created) AS n @@ -44,15 +81,9 @@ duckdb stats.duckdb -c "SELECT username, SUM(nodes_created) AS n GROUP BY username ORDER BY n DESC LIMIT 10" ``` -The schema is the same in DuckDB and Postgres. Four tables: `users`, `changesets`, `changeset_stats`, and `state` (the resume marker for `--update`). - -## Credentials - -`--country` (and Geofabrik URLs) need an OSM account; public planet replication (`--url minute|hour|day`) doesn't. - -Set `OSM_USERNAME` and `OSM_PASSWORD` in your environment or a `.env` file. Or pass `--username` and pipe the password to `--password-stdin`. OAuth 2.0 happens behind the scenes. +Same schema in DuckDB and Postgres: `users`, `changesets`, `changeset_stats`, `state`. -## Library +### 5. Use it as a library ```python from datetime import datetime, UTC @@ -67,13 +98,40 @@ result = run(RunConfig( print(result["files"]["parquet"]) ``` -That's the same pipeline the CLI runs. See [docs/Manual.md](./docs/Manual.md) for everything else. +Same pipeline as the CLI. + +### 6. Long flag lists? Use a config + +```bash +osmsg --config nepal.yaml +``` + +Any flag works as a YAML key. See [docs/Manual.md](./docs/Manual.md) for the full list. + +## Output formats + +Every run writes `stats.duckdb` (or `<--name>.duckdb`) plus the formats you ask for via `-f parquet|csv|json|markdown|psql`. Parquet is the default. Open it with duckdb, polars, pandas, anything. + +## Documentation + +- [Installation](./docs/Installation.md) +- [Manual](./docs/Manual.md) (every flag, with examples) +- [Version control / release notes](./docs/Version_control.md) -## Develop +## Contributing + +Pull requests are welcome. Quick path: ```bash git clone https://github.com/osgeonepal/osmsg && cd osmsg +git switch develop uv sync -uv run pytest -uv run osmsg --help +uv run pre-commit install +uv run pytest -m "not network" ``` + +Please read [CONTRIBUTING.md](./CONTRIBUTING.md) and the [Code of Conduct](./CODE_OF_CONDUCT.md) before opening a PR. Use [Conventional Commits](https://www.conventionalcommits.org/) (`cz commit`). + +## License + +[MIT](./LICENSE) © OSGeo Nepal contributors. diff --git a/docs/Installation.md b/docs/Installation.md index 8b216a7..e4e8127 100644 --- a/docs/Installation.md +++ b/docs/Installation.md @@ -44,5 +44,5 @@ uv run pytest -m "not network" ### Tests -- `pytest -m "not network"` — offline unit tests (handlers, queries, exporters, CLI). -- `pytest -m network` — integration tests against Geofabrik / OSM (requires `OSM_USERNAME` / `OSM_PASSWORD`). +- `pytest -m "not network"` for offline unit tests (handlers, queries, exporters, CLI). +- `pytest -m network` for integration tests against Geofabrik / OSM (requires `OSM_USERNAME` / `OSM_PASSWORD`). diff --git a/docs/Manual.md b/docs/Manual.md index 0c7d633..da9aa95 100644 --- a/docs/Manual.md +++ b/docs/Manual.md @@ -1,5 +1,7 @@ # Manual +The full flag reference, grouped by what you're trying to do. New here? Start with the [README quick start](../README.md#quick-start), then come back for the details. + ## Time range ```bash @@ -30,7 +32,7 @@ osmsg --boundary region.geojson > Each `--users`, `--hashtags`, `--tags`, `--length`, `--country`, `--url`, `-f` > takes one value at a time; pass the flag again for additional values. - +> > Editor stats are always included when `--changeset` or `--hashtags` is on: > the `editors` column lists every `created_by` tag the user appeared with. @@ -52,9 +54,9 @@ osmsg --last day --summary # daily breakdown in each request osmsg --last day -f psql --psql-dsn "host=localhost dbname=osm user=osm" ``` -> Every run writes `.duckdb` plus the formats you ask for. Parquet is the canonical exchange — open with DuckDB, polars, or pandas directly. - -> `--summary` follows the same `-f` formats: requesting `-f csv --summary` produces both `.csv` and `_summary.csv`. The `psql` target is intentionally skipped for summary — the daily breakdown is just a query over the four base tables, so consumers derive it on demand instead of duplicating data. +> Every run writes `.duckdb` plus the formats you ask for. Parquet is the canonical exchange: open with DuckDB, polars, or pandas directly. +> +> `--summary` follows the same `-f` formats: requesting `-f csv --summary` produces both `.csv` and `_summary.csv`. The `psql` target is intentionally skipped for summary, since the daily breakdown is just a query over the four base tables, so consumers derive it on demand instead of duplicating data. ## Config file @@ -82,7 +84,7 @@ update: true ## Caching -Downloaded `.osc.gz` files cache to a per-user dir (`~/Library/Caches/osmsg` on macOS, `~/.cache/osmsg` on Linux). Re-running the same range reuses them — no network needed. `--cache-dir` to relocate, `--delete-temp` to clean up after a run. +Downloaded `.osc.gz` files cache to a per-user dir (`~/Library/Caches/osmsg` on macOS, `~/.cache/osmsg` on Linux). Re-running the same range reuses them, so no network is needed. `--cache-dir` to relocate, `--delete-temp` to clean up after a run. ## Credentials @@ -92,7 +94,7 @@ Downloaded `.osc.gz` files cache to a per-user dir (`~/Library/Caches/osmsg` on 2. `OSM_USERNAME` + `OSM_PASSWORD` env vars (auto-loaded from `.env`) 3. Interactive prompt (TTY only) -> The CLI does not accept `--password` directly — passwords on the command line leak into shell history and `ps` output. Use stdin or env vars. +> The CLI does not accept `--password` directly, because passwords on the command line leak into shell history and `ps` output. Use stdin or env vars. ## Recipes diff --git a/pyproject.toml b/pyproject.toml index e5de6ac..08b6bf9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,8 @@ readme = "README.md" authors = [ {name = "Kshitij Raj Sharma", email = "skshitizraj@gmail.com"}, ] -license = {text = "GPL-3.0-only"} +license = {text = "MIT"} +license-files = ["LICENSE"] requires-python = ">=3.11" dependencies = [ "duckdb>=1.5.2", @@ -25,6 +26,7 @@ dependencies = [ keywords = ["osm", "stats", "commandline", "openstreetmap"] classifiers = [ + "License :: OSI Approved :: MIT License", "Topic :: Utilities", "Topic :: Scientific/Engineering :: GIS", "Programming Language :: Python :: 3", From 4aae09f11ca0e9d62380280ba2e323b9eb0490c8 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Tue, 28 Apr 2026 09:45:57 +0200 Subject: [PATCH 04/49] =?UTF-8?q?bump:=20version=201.0.0=20=E2=86=92=201.0?= =?UTF-8?q?.1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 6 ++++++ osmsg/__version__.py | 2 +- pyproject.toml | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1842e75..e74a0f4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +## v1.0.1 (2026-04-28) + +### Fix + +- **lock**: uv lock + ## v1.0.0 (2026-04-28) ### Fix diff --git a/osmsg/__version__.py b/osmsg/__version__.py index 5becc17..5c4105c 100644 --- a/osmsg/__version__.py +++ b/osmsg/__version__.py @@ -1 +1 @@ -__version__ = "1.0.0" +__version__ = "1.0.1" diff --git a/pyproject.toml b/pyproject.toml index 08b6bf9..0de5e61 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "osmsg" -version = "1.0.0" +version = "1.0.1" description = "OpenStreetMap Stats Generator: Commandline" readme = "README.md" authors = [ From 1c6f9ef4e5a8bebea8bb0a43f868f4a4355ab661 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Tue, 28 Apr 2026 09:48:55 +0200 Subject: [PATCH 05/49] fix(license): fix license text on build --- pyproject.toml | 3 +-- uv.lock | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 0de5e61..993e0f9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ readme = "README.md" authors = [ {name = "Kshitij Raj Sharma", email = "skshitizraj@gmail.com"}, ] -license = {text = "MIT"} +license = "MIT" license-files = ["LICENSE"] requires-python = ">=3.11" dependencies = [ @@ -26,7 +26,6 @@ dependencies = [ keywords = ["osm", "stats", "commandline", "openstreetmap"] classifiers = [ - "License :: OSI Approved :: MIT License", "Topic :: Utilities", "Topic :: Scientific/Engineering :: GIS", "Programming Language :: Python :: 3", diff --git a/uv.lock b/uv.lock index 6b2c03c..cb91d04 100644 --- a/uv.lock +++ b/uv.lock @@ -597,7 +597,7 @@ wheels = [ [[package]] name = "osmsg" -version = "1.0.0" +version = "1.0.1" source = { editable = "." } dependencies = [ { name = "duckdb" }, From a124461f2a9699621d2c45b89613268233898fa7 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Tue, 28 Apr 2026 09:52:01 +0200 Subject: [PATCH 06/49] fix(precommit): add lock to precommit hooks --- .pre-commit-config.yaml | 5 +++++ pyproject.toml | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index caffca4..cee3f20 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,6 +6,11 @@ repos: - id: commitizen stages: [commit-msg] + - repo: https://github.com/astral-sh/uv-pre-commit + rev: "0.11.8" + hooks: + - id: uv-lock + - repo: https://github.com/astral-sh/ruff-pre-commit rev: "v0.15.12" hooks: diff --git a/pyproject.toml b/pyproject.toml index 993e0f9..e6e85cd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,6 +77,10 @@ version_files = [ "pyproject.toml:version", "osmsg/__version__.py", ] +pre_bump_hooks = [ + "uv lock", + "git add uv.lock", +] [tool.ruff] fix = true From e8299bd47371ab74935241e0da0d849ce0fac46e Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Tue, 28 Apr 2026 09:52:34 +0200 Subject: [PATCH 07/49] =?UTF-8?q?bump:=20version=201.0.1=20=E2=86=92=201.0?= =?UTF-8?q?.2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 7 +++++++ osmsg/__version__.py | 2 +- pyproject.toml | 2 +- uv.lock | 2 +- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e74a0f4..d27c6b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +## v1.0.2 (2026-04-28) + +### Fix + +- **precommit**: add lock to precommit hooks +- **license**: fix license text on build + ## v1.0.1 (2026-04-28) ### Fix diff --git a/osmsg/__version__.py b/osmsg/__version__.py index 5c4105c..7863915 100644 --- a/osmsg/__version__.py +++ b/osmsg/__version__.py @@ -1 +1 @@ -__version__ = "1.0.1" +__version__ = "1.0.2" diff --git a/pyproject.toml b/pyproject.toml index e6e85cd..d4813a4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "osmsg" -version = "1.0.1" +version = "1.0.2" description = "OpenStreetMap Stats Generator: Commandline" readme = "README.md" authors = [ diff --git a/uv.lock b/uv.lock index cb91d04..f08fbd1 100644 --- a/uv.lock +++ b/uv.lock @@ -597,7 +597,7 @@ wheels = [ [[package]] name = "osmsg" -version = "1.0.1" +version = "1.0.2" source = { editable = "." } dependencies = [ { name = "duckdb" }, From 2e3b7617c2d05ffa25023892c05e82c82d7f872f Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Tue, 28 Apr 2026 09:57:57 +0200 Subject: [PATCH 08/49] docs(lint): markdown --- .markdownlint.json | 12 ++++++++++++ CODE_OF_CONDUCT.md | 6 +++--- CONTRIBUTING.md | 13 +++++++++---- README.md | 12 ++++++++---- docs/Installation.md | 15 ++++++++++++++- docs/Manual.md | 24 +++++++++++++++++------- docs/Version_control.md | 38 ++++++++++++++++++++++---------------- 7 files changed, 85 insertions(+), 35 deletions(-) create mode 100644 .markdownlint.json diff --git a/.markdownlint.json b/.markdownlint.json new file mode 100644 index 0000000..5e1112e --- /dev/null +++ b/.markdownlint.json @@ -0,0 +1,12 @@ +{ + "default": true, + "MD013": { + "line_length": 120, + "code_blocks": false, + "tables": false, + "headings": false + }, + "MD024": { "siblings_only": true }, + "MD033": false, + "MD041": false +} diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 18c9147..999d8db 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -116,7 +116,7 @@ the community. This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.0, available at -https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. +. Community Impact Guidelines were inspired by [Mozilla's code of conduct enforcement ladder](https://github.com/mozilla/diversity). @@ -124,5 +124,5 @@ enforcement ladder](https://github.com/mozilla/diversity). [homepage]: https://www.contributor-covenant.org For answers to common questions about this code of conduct, see the FAQ at -https://www.contributor-covenant.org/faq. Translations are available at -https://www.contributor-covenant.org/translations. +. Translations are available at +. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0fb7c44..22713c4 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,6 +1,8 @@ # Contributing -Thanks for considering a contribution to **osmsg**. This is an [OSGeo Nepal](https://osgeonepal.org) project and we welcome PRs of every size: a typo fix, a new flag, a perf patch, a docs cleanup. Please read the [Code of Conduct](./CODE_OF_CONDUCT.md) before you start. +Thanks for considering a contribution to **osmsg**. This is an [OSGeo Nepal](https://osgeonepal.org) project and we +welcome PRs of every size: a typo fix, a new flag, a perf patch, a docs cleanup. Please read the +[Code of Conduct](./CODE_OF_CONDUCT.md) before you start. ## Setup @@ -12,7 +14,8 @@ uv run pre-commit install uv run pytest -m "not network" ``` -`uv sync` installs runtime and dev dependencies (`ruff`, `ty`, `pytest`, `pre-commit`, `commitizen`) from `pyproject.toml`. `uv` is the only build/dev tool you need; no system OSM libraries are required. +`uv sync` installs runtime and dev dependencies (`ruff`, `ty`, `pytest`, `pre-commit`, `commitizen`) from +`pyproject.toml`. `uv` is the only build/dev tool you need; no system OSM libraries are required. If you do not have `uv` yet: @@ -30,7 +33,8 @@ curl -LsSf https://astral.sh/uv/install.sh | sh ## Coding standards -- **Format and lint**: `ruff` (config in `pyproject.toml`). Pre-commit auto-fixes most issues. Run manually with `uv run ruff check osmsg tests` and `uv run ruff format osmsg tests`. +- **Format and lint**: `ruff` (config in `pyproject.toml`). Pre-commit auto-fixes most issues. Run manually with + `uv run ruff check osmsg tests` and `uv run ruff format osmsg tests`. - **Type-check**: `ty` (Astral). Must pass with zero errors: `uv run ty check osmsg`. - **Tests**: - `uv run pytest -m "not network"` for offline checks. @@ -57,7 +61,8 @@ cz bump git push --follow-tags ``` -`cz bump` updates the version in `pyproject.toml` and `osmsg/__version__.py`, refreshes `CHANGELOG.md`, and tags the release. Pushing the tag (or publishing a GitHub Release) triggers: +`cz bump` updates the version in `pyproject.toml` and `osmsg/__version__.py`, refreshes `CHANGELOG.md`, and tags +the release. Pushing the tag (or publishing a GitHub Release) triggers: - PyPI publish via [publish.yml](./.github/workflows/publish.yml) using the `PYPI_API_TOKEN` repo secret. - Multi-arch Docker image build to `ghcr.io/osgeonepal/osmsg` via [docker.yml](./.github/workflows/docker.yml). diff --git a/README.md b/README.md index 06ab8bc..243713d 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,8 @@ [![uv](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/uv/main/assets/badge/v0.json)](https://github.com/astral-sh/uv) [![Container](https://img.shields.io/badge/ghcr.io-osgeonepal%2Fosmsg-2496ED?logo=docker)](https://github.com/osgeonepal/osmsg/pkgs/container/osmsg) -**OpenStreetMap Stats Generator.** A tiny CLI (and Python library) that turns OSM history into per-user counts of nodes, ways, and relations created, modified, or deleted, written to parquet, csv, json, markdown, or Postgres. +**OpenStreetMap Stats Generator.** A tiny CLI (and Python library) that turns OSM history into per-user counts +of nodes, ways, and relations created, modified, or deleted, written to parquet, csv, json, markdown, or Postgres. A Project of [OSGeo Nepal](https://osgeonepal.org). @@ -49,7 +50,8 @@ That's it. A `stats.duckdb` and a `stats.parquet` show up in your current folder osmsg --country nepal --last day ``` -`--country` resolves through Geofabrik and needs an OSM account. Set `OSM_USERNAME` and `OSM_PASSWORD` in your shell or a `.env` file: +`--country` resolves through Geofabrik and needs an OSM account. Set `OSM_USERNAME` and `OSM_PASSWORD` +in your shell or a `.env` file: ```bash export OSM_USERNAME=you @@ -110,7 +112,8 @@ Any flag works as a YAML key. See [docs/Manual.md](./docs/Manual.md) for the ful ## Output formats -Every run writes `stats.duckdb` (or `<--name>.duckdb`) plus the formats you ask for via `-f parquet|csv|json|markdown|psql`. Parquet is the default. Open it with duckdb, polars, pandas, anything. +Every run writes `stats.duckdb` (or `<--name>.duckdb`) plus the formats you ask for via +`-f parquet|csv|json|markdown|psql`. Parquet is the default. Open it with duckdb, polars, pandas, anything. ## Documentation @@ -130,7 +133,8 @@ uv run pre-commit install uv run pytest -m "not network" ``` -Please read [CONTRIBUTING.md](./CONTRIBUTING.md) and the [Code of Conduct](./CODE_OF_CONDUCT.md) before opening a PR. Use [Conventional Commits](https://www.conventionalcommits.org/) (`cz commit`). +Please read [CONTRIBUTING.md](./CONTRIBUTING.md) and the [Code of Conduct](./CODE_OF_CONDUCT.md) before opening a PR. +Use [Conventional Commits](https://www.conventionalcommits.org/) (`cz commit`). ## License diff --git a/docs/Installation.md b/docs/Installation.md index e4e8127..0907344 100644 --- a/docs/Installation.md +++ b/docs/Installation.md @@ -40,7 +40,20 @@ uv run pytest -m "not network" ### Pre-commit hooks -`ruff` (lint + format), `ty` (Astral type checker), `markdownlint`, `commitizen` (conventional commits). +`ruff` (lint + format), `ty` (Astral type checker), `uv-lock` (keeps `uv.lock` in sync with `pyproject.toml`), +`markdownlint`, `commitizen` (conventional commits). + +Hooks run automatically on every `git commit` once you have run `uv run pre-commit install`. To run them manually: + +```bash +uv run pre-commit run --all-files # run every hook against the whole repo +uv run pre-commit run ruff --all-files # run a single hook +uv run pre-commit run --files osmsg/cli.py # run hooks against specific files +uv run pre-commit autoupdate # bump hook versions in .pre-commit-config.yaml +``` + +If a hook auto-fixes a file (e.g. `ruff` reformats, `uv-lock` refreshes the lockfile), the commit is aborted: +re-stage the changes and commit again. ### Tests diff --git a/docs/Manual.md b/docs/Manual.md index da9aa95..17c96ff 100644 --- a/docs/Manual.md +++ b/docs/Manual.md @@ -1,6 +1,7 @@ # Manual -The full flag reference, grouped by what you're trying to do. New here? Start with the [README quick start](../README.md#quick-start), then come back for the details. +The full flag reference, grouped by what you're trying to do. New here? Start with the +[README quick start](../README.md#quick-start), then come back for the details. ## Time range @@ -54,9 +55,13 @@ osmsg --last day --summary # daily breakdown in each request osmsg --last day -f psql --psql-dsn "host=localhost dbname=osm user=osm" ``` -> Every run writes `.duckdb` plus the formats you ask for. Parquet is the canonical exchange: open with DuckDB, polars, or pandas directly. +> Every run writes `.duckdb` plus the formats you ask for. Parquet is the canonical exchange: +> open with DuckDB, polars, or pandas directly. > -> `--summary` follows the same `-f` formats: requesting `-f csv --summary` produces both `.csv` and `_summary.csv`. The `psql` target is intentionally skipped for summary, since the daily breakdown is just a query over the four base tables, so consumers derive it on demand instead of duplicating data. +> `--summary` follows the same `-f` formats: requesting `-f csv --summary` produces both `.csv` +> and `_summary.csv`. The `psql` target is intentionally skipped for summary, since the daily +> breakdown is just a query over the four base tables, so consumers derive it on demand instead of +> duplicating data. ## Config file @@ -84,17 +89,21 @@ update: true ## Caching -Downloaded `.osc.gz` files cache to a per-user dir (`~/Library/Caches/osmsg` on macOS, `~/.cache/osmsg` on Linux). Re-running the same range reuses them, so no network is needed. `--cache-dir` to relocate, `--delete-temp` to clean up after a run. +Downloaded `.osc.gz` files cache to a per-user dir (`~/Library/Caches/osmsg` on macOS, +`~/.cache/osmsg` on Linux). Re-running the same range reuses them, so no network is needed. +`--cache-dir` to relocate, `--delete-temp` to clean up after a run. ## Credentials `--country` and any `geofabrik` URL need OSM credentials. Resolution order: -1. `--username` (CLI) + `OSM_PASSWORD` env var, or `--password-stdin` to pipe a password in (e.g. `cat secret | osmsg --password-stdin ...`) +1. `--username` (CLI) + `OSM_PASSWORD` env var, or `--password-stdin` to pipe a password in + (e.g. `cat secret | osmsg --password-stdin ...`) 2. `OSM_USERNAME` + `OSM_PASSWORD` env vars (auto-loaded from `.env`) 3. Interactive prompt (TTY only) -> The CLI does not accept `--password` directly, because passwords on the command line leak into shell history and `ps` output. Use stdin or env vars. +> The CLI does not accept `--password` directly, because passwords on the command line leak into +> shell history and `ps` output. Use stdin or env vars. ## Recipes @@ -114,4 +123,5 @@ osmsg --start "2025-01-01 00:00:00" --end "2026-01-01 00:00:00" \ osmsg --country nepal --update ``` -> `map_changes` per row is the sum of the nine element columns (`{nodes,ways,rels}_{created,modified,deleted}`); POI counters are tracked separately. +> `map_changes` per row is the sum of the nine element columns +> (`{nodes,ways,rels}_{created,modified,deleted}`); POI counters are tracked separately. diff --git a/docs/Version_control.md b/docs/Version_control.md index 8e6bf2d..6323fe2 100644 --- a/docs/Version_control.md +++ b/docs/Version_control.md @@ -2,7 +2,8 @@ ## Conventional Commits -A [specification](https://www.conventionalcommits.org/en/v1.0.0) for adding human and machine readable meaning to commit messages. +A [specification](https://www.conventionalcommits.org/en/v1.0.0) for adding human and machine +readable meaning to commit messages. **Format**: [optional scope]: @@ -13,7 +14,8 @@ Example `fix: fixed the bug in issue #123` ## Commitizen CLI -[Commitizen](https://commitizen-tools.github.io/commitizen) is a Python tool to help with creating **conventional commits** and automating version control. +[Commitizen](https://commitizen-tools.github.io/commitizen) is a Python tool to help with creating +**conventional commits** and automating version control. ### Install @@ -26,35 +28,39 @@ Example `fix: fixed the bug in issue #123` ### Bumping a Version -- When you decide it is time to create a new version: +When you decide it is time to create a new version: -1. Create a new branch +1. Create a new branch: -`git checkout -b bump/new_release` + ```bash + git checkout -b bump/new_release + ``` -2. Bump the version and push +2. Bump the version and push: -```bash -pip install commitizen # (if not installed) - -cz bump - -git push -``` + ```bash + pip install commitizen # (if not installed) + cz bump + git push + ``` This will: + - Update the SemVer version number in locations specific in `pyproject.toml`, throughout the codebase. - - If a `feat` commit is included, the version is bumped by a minor increment (0.x.0), if only `fix` is included a patch will be used (0.0.x). + - If a `feat` commit is included, the version is bumped by a minor increment (0.x.0); if only `fix` + is included a patch will be used (0.0.x). - Automatically update CHANGELOG.md with all changes since the last version. - Create a tag matching the version number. -> Note: in a repo where you have direct push access, you would simply update on main and push. As we are using Git-Flow, a PR is necessary. +> Note: in a repo where you have direct push access, you would simply update on main and push. +> As we are using Git-Flow, a PR is necessary. ## Creating Releases 1. Update the version throughout the code ([Bumping a Version](#bumping-a-version)). 2. Click `Draft a new release`. -3. Click `Choose a tag`, then input the current version number and press enter (this will automatically create a matching tag for your release). +3. Click `Choose a tag`, then input the current version number and press enter (this will + automatically create a matching tag for your release). 4. Set the `Release title` to v`x.x.x`, replacing with your version number. 5. Add a description if possible, then release. From 8c72df9b0dbfb86d62a3c9c0a62cd37ca1d4bebd Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Tue, 28 Apr 2026 10:13:02 +0200 Subject: [PATCH 09/49] docs(uvx): add uvx installation command --- README.md | 4 ++++ docs/Installation.md | 10 ++++++++++ osmsg/export/markdown.py | 27 ++++++++++++++++++++++++++- 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 243713d..a2df536 100644 --- a/README.md +++ b/README.md @@ -27,11 +27,15 @@ A Project of [OSGeo Nepal](https://osgeonepal.org). Pick the one that fits how you work. ```bash +uvx --from osmsg osmsg --last hour # zero-install, one-shot run pip install osmsg # into your project uv tool install osmsg # standalone CLI docker run --rm -v "$PWD:/work" -w /work ghcr.io/osgeonepal/osmsg:latest --last hour ``` +`uvx` can run osmsg in a throwaway environment , no install, no virtualenv to manage. Works +with any flag combination, e.g. `uvx --from osmsg osmsg --last hour --tags building --summary -f parquet -f markdown`. + ## Quick start ```bash diff --git a/docs/Installation.md b/docs/Installation.md index 0907344..26773d4 100644 --- a/docs/Installation.md +++ b/docs/Installation.md @@ -3,6 +3,8 @@ ## End user ```bash +uvx --from osmsg osmsg --last hour # zero-install, one-shot run +# or pip install osmsg # or uv tool install osmsg @@ -10,6 +12,14 @@ uv tool install osmsg Wheels include the compiled `pyosmium` extension; no system OSM tools are required. +`uvx` (from [uv](https://docs.astral.sh/uv/)) runs osmsg in a managed, throwaway environment. handy for +ad-hoc runs, CI jobs, and cron entries where you don't want to manage a venv. If a stale resolver cache +ever picks an older release, add `--refresh`: + +```bash +uvx --refresh --from osmsg osmsg --last hour +``` + ## Docker Pull a published image from GHCR: diff --git a/osmsg/export/markdown.py b/osmsg/export/markdown.py index e3d4f8f..b8f3a2a 100644 --- a/osmsg/export/markdown.py +++ b/osmsg/export/markdown.py @@ -80,8 +80,33 @@ def _sum(*cols: str) -> int: parts.append(f"\nFull stats: `{fname}.parquet`") parts.append("\n#### Top 5 users") + user_cols = ( + ("rank", "rank"), + ("name", "name"), + ("changesets", "changesets"), + ("map_changes", "map changes"), + ("nodes_create", "nodes created"), + ("ways_create", "ways created"), + ("rels_create", "rels created"), + ("poi_create", "poi created"), + ("hashtags", "hashtags"), + ) + parts.append("| " + " | ".join(label for _, label in user_cols) + " |") + parts.append("| " + " | ".join("---" for _ in user_cols) + " |") for r in rows[:5]: - parts.append(f"- {r['name']}: {_human(int(r.get('map_changes', 0) or 0))} map changes") + cells: list[str] = [] + for key, _ in user_cols: + v = r.get(key) + if key == "hashtags": + hts = v or [] + cells.append(", ".join(hts[:3]) + (f" (+{len(hts) - 3})" if len(hts) > 3 else "")) + elif key == "name": + cells.append(str(v or "")) + elif key == "rank": + cells.append(str(v if v is not None else "")) + else: + cells.append(_human(int(v or 0))) + parts.append("| " + " | ".join(cells) + " |") if tm_stats and any("tasks_mapped" in r for r in rows): parts.append("\n#### Top 5 TM mappers") From a85c0a03dfa7e5ed2c9c609d3f4fe245e3283721 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Tue, 28 Apr 2026 11:27:29 +0200 Subject: [PATCH 10/49] perf(url): auto switch the replication url base don the input span --- data/Readme.md | 4 +- docs/Manual.md | 7 +++ osmsg/cli.py | 1 + osmsg/pipeline.py | 56 ++++++++++++++++-- tests/test_pipeline_smoke.py | 112 ++++++++++++++++++++++++++++++++++- 5 files changed, 170 insertions(+), 10 deletions(-) diff --git a/data/Readme.md b/data/Readme.md index 0247ed3..31c583e 100644 --- a/data/Readme.md +++ b/data/Readme.md @@ -1,3 +1,3 @@ -Geofabrik Countries : https://download.geofabrik.de/index-v1.json +Geofabrik Countries : -UN Countries Boundary: https://public.opendatasoft.com/explore/dataset/world-administrative-boundaries/information/?dataChart=eyJxdWVyaWVzIjpbeyJjb25maWciOnsiZGF0YXNldCI6IndvcmxkLWFkbWluaXN0cmF0aXZlLWJvdW5kYXJpZXMiLCJvcHRpb25zIjp7fX0sImNoYXJ0cyI6W3siYWxpZ25Nb250aCI6dHJ1ZSwidHlwZSI6ImNvbHVtbiIsImZ1bmMiOiJDT1VOVCIsInNjaWVudGlmaWNEaXNwbGF5Ijp0cnVlLCJjb2xvciI6IiNGRjUxNUEifV0sInhBeGlzIjoic3RhdHVzIiwibWF4cG9pbnRzIjo1MCwic29ydCI6IiJ9XSwidGltZXNjYWxlIjoiIiwiZGlzcGxheUxlZ2VuZCI6dHJ1ZSwiYWxpZ25Nb250aCI6dHJ1ZX0%3D \ No newline at end of file +UN Countries Boundary: diff --git a/docs/Manual.md b/docs/Manual.md index 17c96ff..117c515 100644 --- a/docs/Manual.md +++ b/docs/Manual.md @@ -10,6 +10,8 @@ osmsg --last hour|day|week|month|year osmsg --days 7 osmsg --start "2026-04-01 00:00:00" --end "2026-04-08 00:00:00" osmsg --update # resume from last finished run in .duckdb + # (must use the same --url as the prior run; switching + # granularity would double-count via changeset_stats) ``` > Times are UTC. @@ -22,6 +24,11 @@ osmsg --url https://... # any OSM replication base osmsg --country nepal --country india --country africa # Geofabrik regions, resolved live ``` +> When `--url` is omitted, osmsg picks a planet replication granularity that fits the requested +> span: minute for spans under 6h, hour for 6h–7d, day for ≥7d. A warning prints when the +> auto-switch happens; pass `--url` explicitly to override (also suppressed by `--country`, +> `--update`, or multiple `--url` values). + ## Filters ```bash diff --git a/osmsg/cli.py b/osmsg/cli.py index 4956408..fc58832 100644 --- a/osmsg/cli.py +++ b/osmsg/cli.py @@ -194,6 +194,7 @@ def main( end_date=_parse_dt(end), countries=country, urls=url or ["minute"], + url_explicit=url is not None, workers=workers, additional_tags=tags, hashtags=hashtags, diff --git a/osmsg/pipeline.py b/osmsg/pipeline.py index 4dac067..410c442 100644 --- a/osmsg/pipeline.py +++ b/osmsg/pipeline.py @@ -24,7 +24,7 @@ from .export import summary_markdown, to_csv, to_json, to_parquet, to_psql from .fetch import download_osm_file from .geofabrik import country_update_url -from .replication import ChangesetReplication, changefile_download_urls, resolve_url +from .replication import SHORTCUTS, ChangesetReplication, changefile_download_urls, resolve_url from .ui import info, progress_bar, warn UTC = dt.UTC @@ -49,6 +49,7 @@ class RunConfig: end_date: dt.datetime | None = None countries: list[str] | None = None urls: list[str] = field(default_factory=lambda: ["https://planet.openstreetmap.org/replication/minute"]) + url_explicit: bool = False workers: int | None = None additional_tags: list[str] | None = None hashtags: list[str] | None = None @@ -83,6 +84,34 @@ def _normalize_urls(cfg: RunConfig) -> None: cfg.urls = list(dict.fromkeys(resolve_url(u) for u in cfg.urls)) +def _pick_replication_for_span(span: dt.timedelta) -> str: + span_h = span.total_seconds() / 3600 + if span_h < 6: + return "minute" + if span_h < 24 * 7: + return "hour" + return "day" + + +def _auto_switch_replication(cfg: RunConfig, span: dt.timedelta) -> None: + """Swap a single planet-shortcut --url for the cheapest one that covers `span`.""" + if cfg.url_explicit or cfg.update or cfg.countries or len(cfg.urls) != 1: + return + cur = cfg.urls[0] + if cur not in SHORTCUTS.values(): + return + target_label = _pick_replication_for_span(span) + target_url = SHORTCUTS[target_label] + if target_url == cur: + return + cur_label = next(label for label, url in SHORTCUTS.items() if url == cur) + warn( + f"Span is {span}; auto-switching --url from '{cur_label}' to '{target_label}' to reduce load. " + f"Pass --url {cur_label} to keep '{cur_label}'." + ) + cfg.urls = [target_url] + + def _canonical_hashtags(hashtags: list[str]) -> list[str]: # Force leading '#' so 'hotosm' and '#hotosm' both match the '#hotosm' tokens in changeset comments. return ["#" + h.lstrip("#") for h in hashtags] @@ -96,7 +125,18 @@ def _resolve_url_starts(conn, cfg: RunConfig) -> dict[str, dt.datetime]: for url in cfg.urls: last = get_state(conn, url) if not last: - raise OsmsgError(f"--update has no prior state for {url}. Run osmsg without --update first to seed it.") + known = [r[0] for r in conn.execute("SELECT source_url FROM state").fetchall()] + hint = ( + f" Existing state in this DuckDB is for: {', '.join(known)}. " + "Re-run --update with one of those URLs, or start fresh under a different --name." + if known + else " Run osmsg once without --update to seed state." + ) + raise OsmsgError( + f"--update cannot switch replication URL: no prior state for {url}.{hint} " + "(Replaying the same window through a different granularity would double-count " + "via the changeset_stats (seq_id, changeset_id) key.)" + ) starts[url] = last["last_ts"] return starts if cfg.start_date is None: @@ -219,14 +259,17 @@ def run(cfg: RunConfig) -> dict[str, Any]: dbmod.create_tables(conn) info(f"DuckDB: {db_path}") + if cfg.end_date is None: + cfg.end_date = dt.datetime.now(UTC) + if cfg.start_date is not None: + _auto_switch_replication(cfg, cfg.end_date - cfg.start_date) + url_starts = _resolve_url_starts(conn, cfg) if cfg.update: # Changeset-replication reads one planet-wide source; widest window covers every URL. cfg.start_date = min(url_starts.values()) info(f"--update: resuming each source from its own state row (earliest: {cfg.start_date.isoformat()})") - if cfg.end_date is None: - cfg.end_date = dt.datetime.now(UTC) # _resolve_url_starts guarantees start_date is set (or raised); narrow for ty. assert cfg.start_date is not None if cfg.start_date >= cfg.end_date: @@ -235,8 +278,9 @@ def run(cfg: RunConfig) -> dict[str, Any]: span = cfg.end_date - cfg.start_date info(f"Range: {cfg.start_date.isoformat()} → {cfg.end_date.isoformat()} ({span})") span_hours = span.total_seconds() / 3600 - # 72h on minute replication is ~4,300 files; beyond that, hour/day replication is much cheaper. - if span_hours >= 72 and any("minute" in u.lower() for u in cfg.urls): + # When auto-switch was suppressed (--url explicit, --update, --country, multi-URL), a long + # span on minute replication still floods the network. Hint the user. + if span_hours >= 72 and any(u == SHORTCUTS["minute"] for u in cfg.urls): warn( f"Range spans {span_hours:.0f}h on minute replication " f"(~{int(span_hours * 60):,} files). Consider --url hour or --url day." diff --git a/tests/test_pipeline_smoke.py b/tests/test_pipeline_smoke.py index ec745fc..8fc6360 100644 --- a/tests/test_pipeline_smoke.py +++ b/tests/test_pipeline_smoke.py @@ -10,7 +10,15 @@ from osmsg.db.schema import create_tables, upsert_state from osmsg.exceptions import OsmsgError -from osmsg.pipeline import RunConfig, _canonical_hashtags, _normalize_urls, _resolve_url_starts +from osmsg.pipeline import ( + RunConfig, + _auto_switch_replication, + _canonical_hashtags, + _normalize_urls, + _pick_replication_for_span, + _resolve_url_starts, +) +from osmsg.replication import SHORTCUTS def test_normalize_urls_expands_minute_shortcut(): @@ -96,5 +104,105 @@ def test_resolve_url_starts_update_missing_state_raises_per_url(tmp_path): updated_at=dt.datetime(2026, 4, 25, tzinfo=dt.UTC), ) cfg = RunConfig(urls=["https://x", "https://y"], update=True) - with pytest.raises(OsmsgError, match="--update has no prior state for https://y"): + with pytest.raises(OsmsgError, match="no prior state for https://y"): _resolve_url_starts(conn, cfg) + + +def test_resolve_url_starts_update_error_lists_known_urls_and_invariant(tmp_path): + """The error must surface (a) which URLs are seeded and (b) the seq_id double-count rationale — + so the user knows their two recovery options without spelunking the source.""" + conn = _open_db(tmp_path) + upsert_state( + conn, + source_url="https://planet.openstreetmap.org/replication/minute", + last_seq=1, + last_ts=dt.datetime(2026, 4, 25, tzinfo=dt.UTC), + updated_at=dt.datetime(2026, 4, 25, tzinfo=dt.UTC), + ) + cfg = RunConfig(urls=["https://planet.openstreetmap.org/replication/day"], update=True) + with pytest.raises(OsmsgError) as exc: + _resolve_url_starts(conn, cfg) + msg = str(exc.value) + assert "Existing state in this DuckDB is for" in msg + assert "minute" in msg # known URL surfaced + assert "different --name" in msg # recovery hint + assert "seq_id" in msg # invariant referenced + + +@pytest.mark.parametrize( + "span,expected", + [ + (dt.timedelta(hours=1), "minute"), + (dt.timedelta(hours=5, minutes=59), "minute"), + (dt.timedelta(hours=6), "hour"), # boundary: ≥6h flips to hour + (dt.timedelta(days=1), "hour"), + (dt.timedelta(days=6, hours=23), "hour"), + (dt.timedelta(days=7), "day"), # boundary: ≥7d flips to day + (dt.timedelta(days=30), "day"), + ], +) +def test_pick_replication_for_span(span, expected): + assert _pick_replication_for_span(span) == expected + + +def test_auto_switch_promotes_minute_to_hour_on_long_span(capsys): + cfg = RunConfig(urls=[SHORTCUTS["minute"]]) + _auto_switch_replication(cfg, dt.timedelta(hours=10)) + assert cfg.urls == [SHORTCUTS["hour"]] + err = capsys.readouterr().err + assert "auto-switching" in err + assert "from 'minute' to 'hour'" in err + + +def test_auto_switch_promotes_minute_to_day_on_multi_day_span(): + cfg = RunConfig(urls=[SHORTCUTS["minute"]]) + _auto_switch_replication(cfg, dt.timedelta(days=30)) + assert cfg.urls == [SHORTCUTS["day"]] + + +def test_auto_switch_demotes_day_to_minute_on_short_span(): + """A user defaulting to day for a 1h window should be moved back to minute too.""" + cfg = RunConfig(urls=[SHORTCUTS["day"]]) + _auto_switch_replication(cfg, dt.timedelta(hours=1)) + assert cfg.urls == [SHORTCUTS["minute"]] + + +def test_auto_switch_no_op_when_already_correct(capsys): + cfg = RunConfig(urls=[SHORTCUTS["hour"]]) + _auto_switch_replication(cfg, dt.timedelta(hours=10)) + assert cfg.urls == [SHORTCUTS["hour"]] + assert "auto-switching" not in capsys.readouterr().err + + +def test_auto_switch_suppressed_by_url_explicit(): + cfg = RunConfig(urls=[SHORTCUTS["minute"]], url_explicit=True) + _auto_switch_replication(cfg, dt.timedelta(days=30)) + assert cfg.urls == [SHORTCUTS["minute"]] + + +def test_auto_switch_suppressed_by_update(): + """--update must never auto-switch — cross-URL replay would double-count via (seq_id, changeset_id).""" + cfg = RunConfig(urls=[SHORTCUTS["minute"]], update=True) + _auto_switch_replication(cfg, dt.timedelta(days=30)) + assert cfg.urls == [SHORTCUTS["minute"]] + + +def test_auto_switch_suppressed_by_country(): + cfg = RunConfig(urls=[SHORTCUTS["minute"]], countries=["nepal"]) + _auto_switch_replication(cfg, dt.timedelta(days=30)) + assert cfg.urls == [SHORTCUTS["minute"]] + + +def test_auto_switch_suppressed_by_multi_url(): + urls = [SHORTCUTS["minute"], SHORTCUTS["hour"]] + cfg = RunConfig(urls=list(urls)) + _auto_switch_replication(cfg, dt.timedelta(days=30)) + assert cfg.urls == urls + + +def test_auto_switch_skips_non_shortcut_url(): + """A custom (e.g. Geofabrik) URL must not be silently swapped for a planet shortcut.""" + custom = "https://download.geofabrik.de/asia/nepal-updates" + cfg = RunConfig(urls=[custom]) + _auto_switch_replication(cfg, dt.timedelta(days=30)) + assert cfg.urls == [custom] From 6c933a863385db9b55344251371c5f68e43c9aa0 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Tue, 28 Apr 2026 11:27:39 +0200 Subject: [PATCH 11/49] =?UTF-8?q?bump:=20version=201.0.2=20=E2=86=92=201.0?= =?UTF-8?q?.3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 6 ++++++ osmsg/__version__.py | 2 +- pyproject.toml | 2 +- uv.lock | 2 +- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d27c6b3..9ce01a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +## v1.0.3 (2026-04-28) + +### Perf + +- **url**: auto switch the replication url base don the input span + ## v1.0.2 (2026-04-28) ### Fix diff --git a/osmsg/__version__.py b/osmsg/__version__.py index 7863915..976498a 100644 --- a/osmsg/__version__.py +++ b/osmsg/__version__.py @@ -1 +1 @@ -__version__ = "1.0.2" +__version__ = "1.0.3" diff --git a/pyproject.toml b/pyproject.toml index d4813a4..05485a9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "osmsg" -version = "1.0.2" +version = "1.0.3" description = "OpenStreetMap Stats Generator: Commandline" readme = "README.md" authors = [ diff --git a/uv.lock b/uv.lock index f08fbd1..234803b 100644 --- a/uv.lock +++ b/uv.lock @@ -597,7 +597,7 @@ wheels = [ [[package]] name = "osmsg" -version = "1.0.2" +version = "1.0.3" source = { editable = "." } dependencies = [ { name = "duckdb" }, From c4b49e6a3fb28d5db92aac579f3270d246d364a1 Mon Sep 17 00:00:00 2001 From: gauravbarall Date: Wed, 29 Apr 2026 07:59:39 +0545 Subject: [PATCH 12/49] fix(pipeline): Replace hardcoded "processing" label with stage-specific descriptions --- osmsg/pipeline.py | 31 +++++++++++++++++++++++++------ osmsg/ui.py | 4 ++-- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/osmsg/pipeline.py b/osmsg/pipeline.py index 410c442..f2896cb 100644 --- a/osmsg/pipeline.py +++ b/osmsg/pipeline.py @@ -194,10 +194,16 @@ def _processing_config(cfg: RunConfig, *, parquet_dir: Path, geom_wkt: str | Non def _download_all( - urls: list[str], mode: str, max_workers: int, cookie: str | None, cache_dir: Path, label: str + urls: list[str], + mode: str, + max_workers: int, + cookie: str | None, + cache_dir: Path, + label: str, + description: str = "downloading", ) -> None: with ( - progress_bar(len(urls), unit=label) as advance, + progress_bar(len(urls), unit=label, description=description) as advance, concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as pool, ): for _ in pool.map(lambda u: download_osm_file(u, mode=mode, cookie=cookie, cache_dir=cache_dir), urls): @@ -214,9 +220,10 @@ def _process_all( label: str, workers: int, extra_iterables: tuple[list, ...] = (), + description: str = "processing", ) -> None: with ( - progress_bar(len(items), unit=label) as advance, + progress_bar(len(items), unit=label, description=description) as advance, concurrent.futures.ProcessPoolExecutor( max_workers=workers, initializer=initializer, initargs=init_args ) as pool, @@ -314,7 +321,9 @@ def run(cfg: RunConfig) -> dict[str, Any]: cs_config["window_start_utc"] = cfg.start_date.astimezone(UTC) cs_config["window_end_utc"] = cfg.end_date.astimezone(UTC) - _download_all(urls, "changeset", max_workers, None, cfg.cache_dir, "changesets") + _download_all( + urls, "changeset", max_workers, None, cfg.cache_dir, "changesets", description="Downloading changesets" + ) _process_all( urls, target=process_changeset, @@ -323,6 +332,7 @@ def run(cfg: RunConfig) -> dict[str, Any]: chunksize=10, label="changesets", workers=max_workers, + description="Processing changesets", ) dbmod.merge_parquet_files(conn, cs_dir, cleanup=True) info("Changeset processing complete.") @@ -353,7 +363,15 @@ def run(cfg: RunConfig) -> dict[str, Any]: cf_config["start_date_utc"] = url_start_date_utc cf_config["end_date_utc"] = url_end_date_utc - _download_all(urls, "changefiles", max_workers, cookie, cfg.cache_dir, "changefiles") + _download_all( + urls, + "changefiles", + max_workers, + cookie, + cfg.cache_dir, + "changefiles", + description="Downloading changefiles", + ) chunksize = 10 if "minute" in url.lower() else 1 seq_ids = list(range(src_start_seq, src_end_seq + 1)) _process_all( @@ -365,6 +383,7 @@ def run(cfg: RunConfig) -> dict[str, Any]: label="changefiles", workers=max_workers, extra_iterables=(seq_ids,), + description="Processing changefiles", ) dbmod.merge_parquet_files(conn, cf_dir, cleanup=True) upsert_state( @@ -374,7 +393,7 @@ def run(cfg: RunConfig) -> dict[str, Any]: last_ts=url_end_date, updated_at=dt.datetime.now(UTC), ) - info(f"Done: {url}") + info(f"Changefile processing complete: {url}") if cfg.delete_temp: # Never rmtree cfg.cache_dir itself — it may be the user's platform cache root. diff --git a/osmsg/ui.py b/osmsg/ui.py index 2d3424a..dc3a0a6 100644 --- a/osmsg/ui.py +++ b/osmsg/ui.py @@ -27,7 +27,7 @@ def error(message: str) -> None: @contextmanager -def progress_bar(total: int, unit: str = "items"): +def progress_bar(total: int, unit: str = "items", description: str = "processing"): # transient=False keeps a one-line summary so cron logs / file-redirected stdout retain context. with Progress( TextColumn("[bold blue]{task.description}"), @@ -37,7 +37,7 @@ def progress_bar(total: int, unit: str = "items"): console=console, transient=False, ) as bar: - task = bar.add_task("processing", total=total) + task = bar.add_task(description, total=total) def advance() -> None: bar.advance(task) From c907aa57235f9e6f59202631c6c0f37e9b7c1984 Mon Sep 17 00:00:00 2001 From: NirrWorks Date: Sun, 3 May 2026 19:18:58 +0545 Subject: [PATCH 13/49] Add litestar API endpoints --- Procfile | 1 + osmsg/api/__init__.py | 5 + osmsg/api/app.py | 55 ++++ osmsg/api/db.py | 36 +++ osmsg/api/queries.py | 54 ++++ pyproject.toml | 3 + tests/test_api.py | 12 + uv.lock | 676 ++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 842 insertions(+) create mode 100644 Procfile create mode 100644 osmsg/api/__init__.py create mode 100644 osmsg/api/app.py create mode 100644 osmsg/api/db.py create mode 100644 osmsg/api/queries.py create mode 100644 tests/test_api.py diff --git a/Procfile b/Procfile new file mode 100644 index 0000000..bb1ed34 --- /dev/null +++ b/Procfile @@ -0,0 +1 @@ +web: litestar --app osmsg.api.app:app run --host 0.0.0.0 --port ${PORT:-8000} diff --git a/osmsg/api/__init__.py b/osmsg/api/__init__.py new file mode 100644 index 0000000..63a58b2 --- /dev/null +++ b/osmsg/api/__init__.py @@ -0,0 +1,5 @@ +"""Litestar API for querying osmsg PostgreSQL output.""" + +from .app import app + +__all__ = ["app"] diff --git a/osmsg/api/app.py b/osmsg/api/app.py new file mode 100644 index 0000000..f249e4c --- /dev/null +++ b/osmsg/api/app.py @@ -0,0 +1,55 @@ +from __future__ import annotations + +from contextlib import asynccontextmanager +from typing import Any + +from litestar import Litestar, get +from litestar.openapi.config import OpenAPIConfig +from litestar.params import Parameter + +from .db import close_pool, open_pool +from .queries import fetch_users + + +@asynccontextmanager +async def lifespan(app: Litestar): + await open_pool() + try: + yield + finally: + await close_pool() + + +@get("/health") +async def health() -> dict[str, str]: + return {"status": "ok"} + + +@get("/api/v1/users") +async def get_users( + limit: int = Parameter(default=100, ge=1, le=1000), + offset: int = Parameter(default=0, ge=0), +) -> dict[str, Any]: + users = await fetch_users(limit=limit, offset=offset) + return {"count": len(users), "limit": limit, "offset": offset, "users": users} + + +# @get("/api/v1/stats/summary") +# async def get_summary(start_date: datetime, end_date: datetime, hashtag: str | None = None) -> dict: +# if start_date > end_date: +# return {"error": "start_date must be before end_date"} +# return {"message": "Temporarily disabled"} + + +# @get("/api/v1/stats/timeseries") +# async def get_timeseries(start_date: datetime, end_date: datetime, hashtag: str | None = None) -> dict: +# if start_date > end_date: +# return {"error": "start_date must be before end_date"} +# return {"message": "Temporarily disabled"} + + +app = Litestar( + route_handlers=[health, get_users], + lifespan=[lifespan], + openapi_config=OpenAPIConfig(title="OSMSG API", version="1.0.0", path="/docs"), +) diff --git a/osmsg/api/db.py b/osmsg/api/db.py new file mode 100644 index 0000000..e7feb6f --- /dev/null +++ b/osmsg/api/db.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +import os + +import asyncpg +from dotenv import load_dotenv + +load_dotenv() + +_pool: asyncpg.Pool | None = None + + +def get_database_url() -> str: + database_url = os.getenv("DATABASE_URL") + if not database_url: + raise RuntimeError("DATABASE_URL environment variable is not set") + return database_url + + +async def open_pool() -> None: + global _pool + if _pool is None: + _pool = await asyncpg.create_pool(dsn=get_database_url(), min_size=1, max_size=10) + + +async def close_pool() -> None: + global _pool + if _pool is not None: + await _pool.close() + _pool = None + + +def get_pool() -> asyncpg.Pool: + if _pool is None: + raise RuntimeError("Database pool is not initialized") + return _pool diff --git a/osmsg/api/queries.py b/osmsg/api/queries.py new file mode 100644 index 0000000..b03be9d --- /dev/null +++ b/osmsg/api/queries.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +from typing import Any + +from .db import get_pool + + +async def fetch_users(*, limit: int = 100, offset: int = 0) -> list[dict[str, Any]]: + sql = """ + SELECT + u.uid, + u.username AS name, + COUNT(DISTINCT st.changeset_id) AS changesets, + COALESCE(SUM(st.nodes_created), 0) AS nodes_create, + COALESCE(SUM(st.nodes_modified), 0) AS nodes_modify, + COALESCE(SUM(st.nodes_deleted), 0) AS nodes_delete, + COALESCE(SUM(st.ways_created), 0) AS ways_create, + COALESCE(SUM(st.ways_modified), 0) AS ways_modify, + COALESCE(SUM(st.ways_deleted), 0) AS ways_delete, + COALESCE(SUM(st.rels_created), 0) AS rels_create, + COALESCE(SUM(st.rels_modified), 0) AS rels_modify, + COALESCE(SUM(st.rels_deleted), 0) AS rels_delete, + COALESCE(SUM(st.poi_created), 0) AS poi_create, + COALESCE(SUM(st.poi_modified), 0) AS poi_modify, + COALESCE( + SUM( + st.nodes_created + st.nodes_modified + st.nodes_deleted + + st.ways_created + st.ways_modified + st.ways_deleted + + st.rels_created + st.rels_modified + st.rels_deleted + ), + 0 + ) AS map_changes, + ROW_NUMBER() OVER ( + ORDER BY + COALESCE( + SUM( + st.nodes_created + st.nodes_modified + st.nodes_deleted + + st.ways_created + st.ways_modified + st.ways_deleted + + st.rels_created + st.rels_modified + st.rels_deleted + ), + 0 + ) DESC, + u.uid ASC + ) AS rank + FROM users u + JOIN changeset_stats st ON u.uid = st.uid + GROUP BY u.uid, u.username + ORDER BY map_changes DESC, u.uid ASC + LIMIT $1 OFFSET $2 + """ + + async with get_pool().acquire() as conn: + rows = await conn.fetch(sql, limit, offset) + return [dict(row) for row in rows] diff --git a/pyproject.toml b/pyproject.toml index 05485a9..67ca228 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,9 @@ license = "MIT" license-files = ["LICENSE"] requires-python = ">=3.11" dependencies = [ + "asyncpg>=0.30.0", "duckdb>=1.5.2", + "litestar[standard]>=2.18.0", "osmium>=4.3.1", "platformdirs>=4.5.1", "pyarrow>=24.0.0", @@ -22,6 +24,7 @@ dependencies = [ "shapely>=2.1.2", "typer>=0.25.0", "typer-config[yaml]>=1.5.1", + "uv>=0.9.0", ] keywords = ["osm", "stats", "commandline", "openstreetmap"] diff --git a/tests/test_api.py b/tests/test_api.py new file mode 100644 index 0000000..8cbd70d --- /dev/null +++ b/tests/test_api.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +from osmsg.api.app import app + + +def test_api_exposes_only_active_public_routes(): + paths = {route.path for route in app.routes} + + assert "/health" in paths + assert "/api/v1/users" in paths + assert "/api/v1/stats/summary" not in paths + assert "/api/v1/stats/timeseries" not in paths diff --git a/uv.lock b/uv.lock index 234803b..5745c14 100644 --- a/uv.lock +++ b/uv.lock @@ -20,6 +20,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" }, ] +[[package]] +name = "anyio" +version = "4.13.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "idna" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/19/14/2c5dd9f512b66549ae92767a9c7b330ae88e1932ca57876909410251fe13/anyio-4.13.0.tar.gz", hash = "sha256:334b70e641fd2221c1505b3890c69882fe4a2df910cba14d97019b90b24439dc", size = 231622, upload-time = "2026-03-24T12:59:09.671Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/da/42/e921fccf5015463e32a3cf6ee7f980a6ed0f395ceeaa45060b61d86486c2/anyio-4.13.0-py3-none-any.whl", hash = "sha256:08b310f9e24a9594186fd75b4f73f4a4152069e3853f1ed8bfbf58369f4ad708", size = 114353, upload-time = "2026-03-24T12:59:08.246Z" }, +] + [[package]] name = "argcomplete" version = "3.6.3" @@ -29,6 +42,54 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/74/f5/9373290775639cb67a2fce7f629a1c240dce9f12fe927bc32b2736e16dfc/argcomplete-3.6.3-py3-none-any.whl", hash = "sha256:f5007b3a600ccac5d25bbce33089211dfd49eab4a7718da3f10e3082525a92ce", size = 43846, upload-time = "2025-10-20T03:33:33.021Z" }, ] +[[package]] +name = "asyncpg" +version = "0.31.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fe/cc/d18065ce2380d80b1bcce927c24a2642efd38918e33fd724bc4bca904877/asyncpg-0.31.0.tar.gz", hash = "sha256:c989386c83940bfbd787180f2b1519415e2d3d6277a70d9d0f0145ac73500735", size = 993667, upload-time = "2025-11-24T23:27:00.812Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/08/17/cc02bc49bc350623d050fa139e34ea512cd6e020562f2a7312a7bcae4bc9/asyncpg-0.31.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:eee690960e8ab85063ba93af2ce128c0f52fd655fdff9fdb1a28df01329f031d", size = 643159, upload-time = "2025-11-24T23:25:36.443Z" }, + { url = "https://files.pythonhosted.org/packages/a4/62/4ded7d400a7b651adf06f49ea8f73100cca07c6df012119594d1e3447aa6/asyncpg-0.31.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2657204552b75f8288de08ca60faf4a99a65deef3a71d1467454123205a88fab", size = 638157, upload-time = "2025-11-24T23:25:37.89Z" }, + { url = "https://files.pythonhosted.org/packages/d6/5b/4179538a9a72166a0bf60ad783b1ef16efb7960e4d7b9afe9f77a5551680/asyncpg-0.31.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a429e842a3a4b4ea240ea52d7fe3f82d5149853249306f7ff166cb9948faa46c", size = 2918051, upload-time = "2025-11-24T23:25:39.461Z" }, + { url = "https://files.pythonhosted.org/packages/e6/35/c27719ae0536c5b6e61e4701391ffe435ef59539e9360959240d6e47c8c8/asyncpg-0.31.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c0807be46c32c963ae40d329b3a686356e417f674c976c07fa49f1b30303f109", size = 2972640, upload-time = "2025-11-24T23:25:41.512Z" }, + { url = "https://files.pythonhosted.org/packages/43/f4/01ebb9207f29e645a64699b9ce0eefeff8e7a33494e1d29bb53736f7766b/asyncpg-0.31.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e5d5098f63beeae93512ee513d4c0c53dc12e9aa2b7a1af5a81cddf93fe4e4da", size = 2851050, upload-time = "2025-11-24T23:25:43.153Z" }, + { url = "https://files.pythonhosted.org/packages/3e/f4/03ff1426acc87be0f4e8d40fa2bff5c3952bef0080062af9efc2212e3be8/asyncpg-0.31.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:37fc6c00a814e18eef51833545d1891cac9aa69140598bb076b4cd29b3e010b9", size = 2962574, upload-time = "2025-11-24T23:25:44.942Z" }, + { url = "https://files.pythonhosted.org/packages/c7/39/cc788dfca3d4060f9d93e67be396ceec458dfc429e26139059e58c2c244d/asyncpg-0.31.0-cp311-cp311-win32.whl", hash = "sha256:5a4af56edf82a701aece93190cc4e094d2df7d33f6e915c222fb09efbb5afc24", size = 521076, upload-time = "2025-11-24T23:25:46.486Z" }, + { url = "https://files.pythonhosted.org/packages/28/fc/735af5384c029eb7f1ca60ccb8fa95521dbdaeef788edf4cecfc604c3cab/asyncpg-0.31.0-cp311-cp311-win_amd64.whl", hash = "sha256:480c4befbdf079c14c9ca43c8c5e1fe8b6296c96f1f927158d4f1e750aacc047", size = 584980, upload-time = "2025-11-24T23:25:47.938Z" }, + { url = "https://files.pythonhosted.org/packages/2a/a6/59d0a146e61d20e18db7396583242e32e0f120693b67a8de43f1557033e2/asyncpg-0.31.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b44c31e1efc1c15188ef183f287c728e2046abb1d26af4d20858215d50d91fad", size = 662042, upload-time = "2025-11-24T23:25:49.578Z" }, + { url = "https://files.pythonhosted.org/packages/36/01/ffaa189dcb63a2471720615e60185c3f6327716fdc0fc04334436fbb7c65/asyncpg-0.31.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0c89ccf741c067614c9b5fc7f1fc6f3b61ab05ae4aaa966e6fd6b93097c7d20d", size = 638504, upload-time = "2025-11-24T23:25:51.501Z" }, + { url = "https://files.pythonhosted.org/packages/9f/62/3f699ba45d8bd24c5d65392190d19656d74ff0185f42e19d0bbd973bb371/asyncpg-0.31.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:12b3b2e39dc5470abd5e98c8d3373e4b1d1234d9fbdedf538798b2c13c64460a", size = 3426241, upload-time = "2025-11-24T23:25:53.278Z" }, + { url = "https://files.pythonhosted.org/packages/8c/d1/a867c2150f9c6e7af6462637f613ba67f78a314b00db220cd26ff559d532/asyncpg-0.31.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:aad7a33913fb8bcb5454313377cc330fbb19a0cd5faa7272407d8a0c4257b671", size = 3520321, upload-time = "2025-11-24T23:25:54.982Z" }, + { url = "https://files.pythonhosted.org/packages/7a/1a/cce4c3f246805ecd285a3591222a2611141f1669d002163abef999b60f98/asyncpg-0.31.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3df118d94f46d85b2e434fd62c84cb66d5834d5a890725fe625f498e72e4d5ec", size = 3316685, upload-time = "2025-11-24T23:25:57.43Z" }, + { url = "https://files.pythonhosted.org/packages/40/ae/0fc961179e78cc579e138fad6eb580448ecae64908f95b8cb8ee2f241f67/asyncpg-0.31.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bd5b6efff3c17c3202d4b37189969acf8927438a238c6257f66be3c426beba20", size = 3471858, upload-time = "2025-11-24T23:25:59.636Z" }, + { url = "https://files.pythonhosted.org/packages/52/b2/b20e09670be031afa4cbfabd645caece7f85ec62d69c312239de568e058e/asyncpg-0.31.0-cp312-cp312-win32.whl", hash = "sha256:027eaa61361ec735926566f995d959ade4796f6a49d3bde17e5134b9964f9ba8", size = 527852, upload-time = "2025-11-24T23:26:01.084Z" }, + { url = "https://files.pythonhosted.org/packages/b5/f0/f2ed1de154e15b107dc692262395b3c17fc34eafe2a78fc2115931561730/asyncpg-0.31.0-cp312-cp312-win_amd64.whl", hash = "sha256:72d6bdcbc93d608a1158f17932de2321f68b1a967a13e014998db87a72ed3186", size = 597175, upload-time = "2025-11-24T23:26:02.564Z" }, + { url = "https://files.pythonhosted.org/packages/95/11/97b5c2af72a5d0b9bc3fa30cd4b9ce22284a9a943a150fdc768763caf035/asyncpg-0.31.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c204fab1b91e08b0f47e90a75d1b3c62174dab21f670ad6c5d0f243a228f015b", size = 661111, upload-time = "2025-11-24T23:26:04.467Z" }, + { url = "https://files.pythonhosted.org/packages/1b/71/157d611c791a5e2d0423f09f027bd499935f0906e0c2a416ce712ba51ef3/asyncpg-0.31.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:54a64f91839ba59008eccf7aad2e93d6e3de688d796f35803235ea1c4898ae1e", size = 636928, upload-time = "2025-11-24T23:26:05.944Z" }, + { url = "https://files.pythonhosted.org/packages/2e/fc/9e3486fb2bbe69d4a867c0b76d68542650a7ff1574ca40e84c3111bb0c6e/asyncpg-0.31.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c0e0822b1038dc7253b337b0f3f676cadc4ac31b126c5d42691c39691962e403", size = 3424067, upload-time = "2025-11-24T23:26:07.957Z" }, + { url = "https://files.pythonhosted.org/packages/12/c6/8c9d076f73f07f995013c791e018a1cd5f31823c2a3187fc8581706aa00f/asyncpg-0.31.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bef056aa502ee34204c161c72ca1f3c274917596877f825968368b2c33f585f4", size = 3518156, upload-time = "2025-11-24T23:26:09.591Z" }, + { url = "https://files.pythonhosted.org/packages/ae/3b/60683a0baf50fbc546499cfb53132cb6835b92b529a05f6a81471ab60d0c/asyncpg-0.31.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0bfbcc5b7ffcd9b75ab1558f00db2ae07db9c80637ad1b2469c43df79d7a5ae2", size = 3319636, upload-time = "2025-11-24T23:26:11.168Z" }, + { url = "https://files.pythonhosted.org/packages/50/dc/8487df0f69bd398a61e1792b3cba0e47477f214eff085ba0efa7eac9ce87/asyncpg-0.31.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:22bc525ebbdc24d1261ecbf6f504998244d4e3be1721784b5f64664d61fbe602", size = 3472079, upload-time = "2025-11-24T23:26:13.164Z" }, + { url = "https://files.pythonhosted.org/packages/13/a1/c5bbeeb8531c05c89135cb8b28575ac2fac618bcb60119ee9696c3faf71c/asyncpg-0.31.0-cp313-cp313-win32.whl", hash = "sha256:f890de5e1e4f7e14023619399a471ce4b71f5418cd67a51853b9910fdfa73696", size = 527606, upload-time = "2025-11-24T23:26:14.78Z" }, + { url = "https://files.pythonhosted.org/packages/91/66/b25ccb84a246b470eb943b0107c07edcae51804912b824054b3413995a10/asyncpg-0.31.0-cp313-cp313-win_amd64.whl", hash = "sha256:dc5f2fa9916f292e5c5c8b2ac2813763bcd7f58e130055b4ad8a0531314201ab", size = 596569, upload-time = "2025-11-24T23:26:16.189Z" }, + { url = "https://files.pythonhosted.org/packages/3c/36/e9450d62e84a13aea6580c83a47a437f26c7ca6fa0f0fd40b6670793ea30/asyncpg-0.31.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:f6b56b91bb0ffc328c4e3ed113136cddd9deefdf5f79ab448598b9772831df44", size = 660867, upload-time = "2025-11-24T23:26:17.631Z" }, + { url = "https://files.pythonhosted.org/packages/82/4b/1d0a2b33b3102d210439338e1beea616a6122267c0df459ff0265cd5807a/asyncpg-0.31.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:334dec28cf20d7f5bb9e45b39546ddf247f8042a690bff9b9573d00086e69cb5", size = 638349, upload-time = "2025-11-24T23:26:19.689Z" }, + { url = "https://files.pythonhosted.org/packages/41/aa/e7f7ac9a7974f08eff9183e392b2d62516f90412686532d27e196c0f0eeb/asyncpg-0.31.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:98cc158c53f46de7bb677fd20c417e264fc02b36d901cc2a43bd6cb0dc6dbfd2", size = 3410428, upload-time = "2025-11-24T23:26:21.275Z" }, + { url = "https://files.pythonhosted.org/packages/6f/de/bf1b60de3dede5c2731e6788617a512bc0ebd9693eac297ee74086f101d7/asyncpg-0.31.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9322b563e2661a52e3cdbc93eed3be7748b289f792e0011cb2720d278b366ce2", size = 3471678, upload-time = "2025-11-24T23:26:23.627Z" }, + { url = "https://files.pythonhosted.org/packages/46/78/fc3ade003e22d8bd53aaf8f75f4be48f0b460fa73738f0391b9c856a9147/asyncpg-0.31.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:19857a358fc811d82227449b7ca40afb46e75b33eb8897240c3839dd8b744218", size = 3313505, upload-time = "2025-11-24T23:26:25.235Z" }, + { url = "https://files.pythonhosted.org/packages/bf/e9/73eb8a6789e927816f4705291be21f2225687bfa97321e40cd23055e903a/asyncpg-0.31.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ba5f8886e850882ff2c2ace5732300e99193823e8107e2c53ef01c1ebfa1e85d", size = 3434744, upload-time = "2025-11-24T23:26:26.944Z" }, + { url = "https://files.pythonhosted.org/packages/08/4b/f10b880534413c65c5b5862f79b8e81553a8f364e5238832ad4c0af71b7f/asyncpg-0.31.0-cp314-cp314-win32.whl", hash = "sha256:cea3a0b2a14f95834cee29432e4ddc399b95700eb1d51bbc5bfee8f31fa07b2b", size = 532251, upload-time = "2025-11-24T23:26:28.404Z" }, + { url = "https://files.pythonhosted.org/packages/d3/2d/7aa40750b7a19efa5d66e67fc06008ca0f27ba1bd082e457ad82f59aba49/asyncpg-0.31.0-cp314-cp314-win_amd64.whl", hash = "sha256:04d19392716af6b029411a0264d92093b6e5e8285ae97a39957b9a9c14ea72be", size = 604901, upload-time = "2025-11-24T23:26:30.34Z" }, + { url = "https://files.pythonhosted.org/packages/ce/fe/b9dfe349b83b9dee28cc42360d2c86b2cdce4cb551a2c2d27e156bcac84d/asyncpg-0.31.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:bdb957706da132e982cc6856bb2f7b740603472b54c3ebc77fe60ea3e57e1bd2", size = 702280, upload-time = "2025-11-24T23:26:32Z" }, + { url = "https://files.pythonhosted.org/packages/6a/81/e6be6e37e560bd91e6c23ea8a6138a04fd057b08cf63d3c5055c98e81c1d/asyncpg-0.31.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:6d11b198111a72f47154fa03b85799f9be63701e068b43f84ac25da0bda9cb31", size = 682931, upload-time = "2025-11-24T23:26:33.572Z" }, + { url = "https://files.pythonhosted.org/packages/a6/45/6009040da85a1648dd5bc75b3b0a062081c483e75a1a29041ae63a0bf0dc/asyncpg-0.31.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:18c83b03bc0d1b23e6230f5bf8d4f217dc9bc08644ce0502a9d91dc9e634a9c7", size = 3581608, upload-time = "2025-11-24T23:26:35.638Z" }, + { url = "https://files.pythonhosted.org/packages/7e/06/2e3d4d7608b0b2b3adbee0d0bd6a2d29ca0fc4d8a78f8277df04e2d1fd7b/asyncpg-0.31.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e009abc333464ff18b8f6fd146addffd9aaf63e79aa3bb40ab7a4c332d0c5e9e", size = 3498738, upload-time = "2025-11-24T23:26:37.275Z" }, + { url = "https://files.pythonhosted.org/packages/7d/aa/7d75ede780033141c51d83577ea23236ba7d3a23593929b32b49db8ed36e/asyncpg-0.31.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:3b1fbcb0e396a5ca435a8826a87e5c2c2cc0c8c68eb6fadf82168056b0e53a8c", size = 3401026, upload-time = "2025-11-24T23:26:39.423Z" }, + { url = "https://files.pythonhosted.org/packages/ba/7a/15e37d45e7f7c94facc1e9148c0e455e8f33c08f0b8a0b1deb2c5171771b/asyncpg-0.31.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8df714dba348efcc162d2adf02d213e5fab1bd9f557e1305633e851a61814a7a", size = 3429426, upload-time = "2025-11-24T23:26:41.032Z" }, + { url = "https://files.pythonhosted.org/packages/13/d5/71437c5f6ae5f307828710efbe62163974e71237d5d46ebd2869ea052d10/asyncpg-0.31.0-cp314-cp314t-win32.whl", hash = "sha256:1b41f1afb1033f2b44f3234993b15096ddc9cd71b21a42dbd87fc6a57b43d65d", size = 614495, upload-time = "2025-11-24T23:26:42.659Z" }, + { url = "https://files.pythonhosted.org/packages/3c/d7/8fb3044eaef08a310acfe23dae9a8e2e07d305edc29a53497e52bc76eca7/asyncpg-0.31.0-cp314-cp314t-win_amd64.whl", hash = "sha256:bd4107bb7cdd0e9e65fae66a62afd3a249663b844fa34d479f6d5b3bef9c04c3", size = 706062, upload-time = "2025-11-24T23:26:44.086Z" }, +] + [[package]] name = "certifi" version = "2026.1.4" @@ -317,6 +378,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7a/e3/9d34173ec068631faea3ea6e73050700729363e7e33306a9a3218e5cdc61/duckdb-1.5.2-cp314-cp314-win_arm64.whl", hash = "sha256:c9f3e0b71b8a50fccfb42794899285d9d318ce2503782b9dd54868e5ecd0ad31", size = 14402513, upload-time = "2026-04-13T11:30:06.609Z" }, ] +[[package]] +name = "editorconfig" +version = "0.17.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/88/3a/a61d9a1f319a186b05d14df17daea42fcddea63c213bcd61a929fb3a6796/editorconfig-0.17.1.tar.gz", hash = "sha256:23c08b00e8e08cc3adcddb825251c497478df1dada6aefeb01e626ad37303745", size = 14695, upload-time = "2025-06-09T08:21:37.097Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/96/fd/a40c621ff207f3ce8e484aa0fc8ba4eb6e3ecf52e15b42ba764b457a9550/editorconfig-0.17.1-py3-none-any.whl", hash = "sha256:1eda9c2c0db8c16dbd50111b710572a5e6de934e39772de1959d41f64fc17c82", size = 16360, upload-time = "2025-06-09T08:21:35.654Z" }, +] + +[[package]] +name = "faker" +version = "40.15.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "tzdata", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7f/13/6741787bd91c4109c7bed047d68273965cd52ce8a5f773c471b949334b6d/faker-40.15.0.tar.gz", hash = "sha256:20f3a6ec8c266b74d4c554e34118b21c3c2056c0b4a519d15c8decb3a4e6e795", size = 1967447, upload-time = "2026-04-17T20:05:27.555Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a7/a7/a600f8f30d4505e89166de51dd121bd540ab8e560e8cf0901de00a81de8c/faker-40.15.0-py3-none-any.whl", hash = "sha256:71ab3c3370da9d2205ab74ffb0fd51273063ad562b3a3bb69d0026a20923e318", size = 2004447, upload-time = "2026-04-17T20:05:25.437Z" }, +] + [[package]] name = "filelock" version = "3.29.0" @@ -326,6 +408,79 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/81/47/dd9a212ef6e343a6857485ffe25bba537304f1913bdbed446a23f7f592e1/filelock-3.29.0-py3-none-any.whl", hash = "sha256:96f5f6344709aa1572bbf631c640e4ebeeb519e08da902c39a001882f30ac258", size = 39812, upload-time = "2026-04-19T15:39:08.752Z" }, ] +[[package]] +name = "h11" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, +] + +[[package]] +name = "httpcore" +version = "1.0.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" }, +] + +[[package]] +name = "httptools" +version = "0.7.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/46/120a669232c7bdedb9d52d4aeae7e6c7dfe151e99dc70802e2fc7a5e1993/httptools-0.7.1.tar.gz", hash = "sha256:abd72556974f8e7c74a259655924a717a2365b236c882c3f6f8a45fe94703ac9", size = 258961, upload-time = "2025-10-10T03:55:08.559Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9c/08/17e07e8d89ab8f343c134616d72eebfe03798835058e2ab579dcc8353c06/httptools-0.7.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:474d3b7ab469fefcca3697a10d11a32ee2b9573250206ba1e50d5980910da657", size = 206521, upload-time = "2025-10-10T03:54:31.002Z" }, + { url = "https://files.pythonhosted.org/packages/aa/06/c9c1b41ff52f16aee526fd10fbda99fa4787938aa776858ddc4a1ea825ec/httptools-0.7.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a3c3b7366bb6c7b96bd72d0dbe7f7d5eead261361f013be5f6d9590465ea1c70", size = 110375, upload-time = "2025-10-10T03:54:31.941Z" }, + { url = "https://files.pythonhosted.org/packages/cc/cc/10935db22fda0ee34c76f047590ca0a8bd9de531406a3ccb10a90e12ea21/httptools-0.7.1-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:379b479408b8747f47f3b253326183d7c009a3936518cdb70db58cffd369d9df", size = 456621, upload-time = "2025-10-10T03:54:33.176Z" }, + { url = "https://files.pythonhosted.org/packages/0e/84/875382b10d271b0c11aa5d414b44f92f8dd53e9b658aec338a79164fa548/httptools-0.7.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cad6b591a682dcc6cf1397c3900527f9affef1e55a06c4547264796bbd17cf5e", size = 454954, upload-time = "2025-10-10T03:54:34.226Z" }, + { url = "https://files.pythonhosted.org/packages/30/e1/44f89b280f7e46c0b1b2ccee5737d46b3bb13136383958f20b580a821ca0/httptools-0.7.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:eb844698d11433d2139bbeeb56499102143beb582bd6c194e3ba69c22f25c274", size = 440175, upload-time = "2025-10-10T03:54:35.942Z" }, + { url = "https://files.pythonhosted.org/packages/6f/7e/b9287763159e700e335028bc1824359dc736fa9b829dacedace91a39b37e/httptools-0.7.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f65744d7a8bdb4bda5e1fa23e4ba16832860606fcc09d674d56e425e991539ec", size = 440310, upload-time = "2025-10-10T03:54:37.1Z" }, + { url = "https://files.pythonhosted.org/packages/b3/07/5b614f592868e07f5c94b1f301b5e14a21df4e8076215a3bccb830a687d8/httptools-0.7.1-cp311-cp311-win_amd64.whl", hash = "sha256:135fbe974b3718eada677229312e97f3b31f8a9c8ffa3ae6f565bf808d5b6bcb", size = 86875, upload-time = "2025-10-10T03:54:38.421Z" }, + { url = "https://files.pythonhosted.org/packages/53/7f/403e5d787dc4942316e515e949b0c8a013d84078a915910e9f391ba9b3ed/httptools-0.7.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:38e0c83a2ea9746ebbd643bdfb521b9aa4a91703e2cd705c20443405d2fd16a5", size = 206280, upload-time = "2025-10-10T03:54:39.274Z" }, + { url = "https://files.pythonhosted.org/packages/2a/0d/7f3fd28e2ce311ccc998c388dd1c53b18120fda3b70ebb022b135dc9839b/httptools-0.7.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f25bbaf1235e27704f1a7b86cd3304eabc04f569c828101d94a0e605ef7205a5", size = 110004, upload-time = "2025-10-10T03:54:40.403Z" }, + { url = "https://files.pythonhosted.org/packages/84/a6/b3965e1e146ef5762870bbe76117876ceba51a201e18cc31f5703e454596/httptools-0.7.1-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2c15f37ef679ab9ecc06bfc4e6e8628c32a8e4b305459de7cf6785acd57e4d03", size = 517655, upload-time = "2025-10-10T03:54:41.347Z" }, + { url = "https://files.pythonhosted.org/packages/11/7d/71fee6f1844e6fa378f2eddde6c3e41ce3a1fb4b2d81118dd544e3441ec0/httptools-0.7.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7fe6e96090df46b36ccfaf746f03034e5ab723162bc51b0a4cf58305324036f2", size = 511440, upload-time = "2025-10-10T03:54:42.452Z" }, + { url = "https://files.pythonhosted.org/packages/22/a5/079d216712a4f3ffa24af4a0381b108aa9c45b7a5cc6eb141f81726b1823/httptools-0.7.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f72fdbae2dbc6e68b8239defb48e6a5937b12218e6ffc2c7846cc37befa84362", size = 495186, upload-time = "2025-10-10T03:54:43.937Z" }, + { url = "https://files.pythonhosted.org/packages/e9/9e/025ad7b65278745dee3bd0ebf9314934c4592560878308a6121f7f812084/httptools-0.7.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e99c7b90a29fd82fea9ef57943d501a16f3404d7b9ee81799d41639bdaae412c", size = 499192, upload-time = "2025-10-10T03:54:45.003Z" }, + { url = "https://files.pythonhosted.org/packages/6d/de/40a8f202b987d43afc4d54689600ff03ce65680ede2f31df348d7f368b8f/httptools-0.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:3e14f530fefa7499334a79b0cf7e7cd2992870eb893526fb097d51b4f2d0f321", size = 86694, upload-time = "2025-10-10T03:54:45.923Z" }, + { url = "https://files.pythonhosted.org/packages/09/8f/c77b1fcbfd262d422f12da02feb0d218fa228d52485b77b953832105bb90/httptools-0.7.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6babce6cfa2a99545c60bfef8bee0cc0545413cb0018f617c8059a30ad985de3", size = 202889, upload-time = "2025-10-10T03:54:47.089Z" }, + { url = "https://files.pythonhosted.org/packages/0a/1a/22887f53602feaa066354867bc49a68fc295c2293433177ee90870a7d517/httptools-0.7.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:601b7628de7504077dd3dcb3791c6b8694bbd967148a6d1f01806509254fb1ca", size = 108180, upload-time = "2025-10-10T03:54:48.052Z" }, + { url = "https://files.pythonhosted.org/packages/32/6a/6aaa91937f0010d288d3d124ca2946d48d60c3a5ee7ca62afe870e3ea011/httptools-0.7.1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:04c6c0e6c5fb0739c5b8a9eb046d298650a0ff38cf42537fc372b28dc7e4472c", size = 478596, upload-time = "2025-10-10T03:54:48.919Z" }, + { url = "https://files.pythonhosted.org/packages/6d/70/023d7ce117993107be88d2cbca566a7c1323ccbaf0af7eabf2064fe356f6/httptools-0.7.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:69d4f9705c405ae3ee83d6a12283dc9feba8cc6aaec671b412917e644ab4fa66", size = 473268, upload-time = "2025-10-10T03:54:49.993Z" }, + { url = "https://files.pythonhosted.org/packages/32/4d/9dd616c38da088e3f436e9a616e1d0cc66544b8cdac405cc4e81c8679fc7/httptools-0.7.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:44c8f4347d4b31269c8a9205d8a5ee2df5322b09bbbd30f8f862185bb6b05346", size = 455517, upload-time = "2025-10-10T03:54:51.066Z" }, + { url = "https://files.pythonhosted.org/packages/1d/3a/a6c595c310b7df958e739aae88724e24f9246a514d909547778d776799be/httptools-0.7.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:465275d76db4d554918aba40bf1cbebe324670f3dfc979eaffaa5d108e2ed650", size = 458337, upload-time = "2025-10-10T03:54:52.196Z" }, + { url = "https://files.pythonhosted.org/packages/fd/82/88e8d6d2c51edc1cc391b6e044c6c435b6aebe97b1abc33db1b0b24cd582/httptools-0.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:322d00c2068d125bd570f7bf78b2d367dad02b919d8581d7476d8b75b294e3e6", size = 85743, upload-time = "2025-10-10T03:54:53.448Z" }, + { url = "https://files.pythonhosted.org/packages/34/50/9d095fcbb6de2d523e027a2f304d4551855c2f46e0b82befd718b8b20056/httptools-0.7.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:c08fe65728b8d70b6923ce31e3956f859d5e1e8548e6f22ec520a962c6757270", size = 203619, upload-time = "2025-10-10T03:54:54.321Z" }, + { url = "https://files.pythonhosted.org/packages/07/f0/89720dc5139ae54b03f861b5e2c55a37dba9a5da7d51e1e824a1f343627f/httptools-0.7.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:7aea2e3c3953521c3c51106ee11487a910d45586e351202474d45472db7d72d3", size = 108714, upload-time = "2025-10-10T03:54:55.163Z" }, + { url = "https://files.pythonhosted.org/packages/b3/cb/eea88506f191fb552c11787c23f9a405f4c7b0c5799bf73f2249cd4f5228/httptools-0.7.1-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:0e68b8582f4ea9166be62926077a3334064d422cf08ab87d8b74664f8e9058e1", size = 472909, upload-time = "2025-10-10T03:54:56.056Z" }, + { url = "https://files.pythonhosted.org/packages/e0/4a/a548bdfae6369c0d078bab5769f7b66f17f1bfaa6fa28f81d6be6959066b/httptools-0.7.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:df091cf961a3be783d6aebae963cc9b71e00d57fa6f149025075217bc6a55a7b", size = 470831, upload-time = "2025-10-10T03:54:57.219Z" }, + { url = "https://files.pythonhosted.org/packages/4d/31/14df99e1c43bd132eec921c2e7e11cda7852f65619bc0fc5bdc2d0cb126c/httptools-0.7.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f084813239e1eb403ddacd06a30de3d3e09a9b76e7894dcda2b22f8a726e9c60", size = 452631, upload-time = "2025-10-10T03:54:58.219Z" }, + { url = "https://files.pythonhosted.org/packages/22/d2/b7e131f7be8d854d48cb6d048113c30f9a46dca0c9a8b08fcb3fcd588cdc/httptools-0.7.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7347714368fb2b335e9063bc2b96f2f87a9ceffcd9758ac295f8bbcd3ffbc0ca", size = 452910, upload-time = "2025-10-10T03:54:59.366Z" }, + { url = "https://files.pythonhosted.org/packages/53/cf/878f3b91e4e6e011eff6d1fa9ca39f7eb17d19c9d7971b04873734112f30/httptools-0.7.1-cp314-cp314-win_amd64.whl", hash = "sha256:cfabda2a5bb85aa2a904ce06d974a3f30fb36cc63d7feaddec05d2050acede96", size = 88205, upload-time = "2025-10-10T03:55:00.389Z" }, +] + +[[package]] +name = "httpx" +version = "0.28.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "certifi" }, + { name = "httpcore" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, +] + [[package]] name = "identify" version = "2.6.19" @@ -365,6 +520,59 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, ] +[[package]] +name = "jsbeautifier" +version = "1.15.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "editorconfig" }, + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ea/98/d6cadf4d5a1c03b2136837a435682418c29fdeb66be137128544cecc5b7a/jsbeautifier-1.15.4.tar.gz", hash = "sha256:5bb18d9efb9331d825735fbc5360ee8f1aac5e52780042803943aa7f854f7592", size = 75257, upload-time = "2025-02-27T17:53:53.252Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2d/14/1c65fccf8413d5f5c6e8425f84675169654395098000d8bddc4e9d3390e1/jsbeautifier-1.15.4-py3-none-any.whl", hash = "sha256:72f65de312a3f10900d7685557f84cb61a9733c50dcc27271a39f5b0051bf528", size = 94707, upload-time = "2025-02-27T17:53:46.152Z" }, +] + +[[package]] +name = "litestar" +version = "2.21.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "click" }, + { name = "httpx" }, + { name = "litestar-htmx" }, + { name = "msgspec" }, + { name = "multidict" }, + { name = "multipart" }, + { name = "polyfactory" }, + { name = "pyyaml" }, + { name = "rich" }, + { name = "rich-click" }, + { name = "sniffio" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a1/fc/7ce2057ffd738be4d2abc5b69229f57181bbff8a84a4576004b021085773/litestar-2.21.1.tar.gz", hash = "sha256:28301438de7c5e77bb68a5d8684dff415b9f252b0dd8413b356e8e6794c6863a", size = 376270, upload-time = "2026-03-07T13:49:16.053Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f3/67/139c0fa6e1dd9e558910c02a383cd0ae12c2a1d6d3f0ea0d42dbeb03d8b2/litestar-2.21.1-py3-none-any.whl", hash = "sha256:6321340195801454aeac4a12e72c28f54714a4c3e8172c33e577c593cc5982c6", size = 568342, upload-time = "2026-03-07T13:49:13.694Z" }, +] + +[package.optional-dependencies] +standard = [ + { name = "jinja2" }, + { name = "jsbeautifier" }, + { name = "uvicorn", extra = ["standard"] }, +] + +[[package]] +name = "litestar-htmx" +version = "0.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/3f/b9/7e296aa1adada25cce8e5f89a996b0e38d852d93b1b656a2058226c542a2/litestar_htmx-0.5.0.tar.gz", hash = "sha256:e02d1a3a92172c874835fa3e6749d65ae9fc626d0df46719490a16293e2146fb", size = 119755, upload-time = "2025-06-11T21:19:45.573Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f2/24/8d99982f0aa9c1cd82073c6232b54a0dbe6797c7d63c0583a6c68ee3ddf2/litestar_htmx-0.5.0-py3-none-any.whl", hash = "sha256:92833aa47e0d0e868d2a7dbfab75261f124f4b83d4f9ad12b57b9a68f86c50e6", size = 9970, upload-time = "2025-06-11T21:19:44.465Z" }, +] + [[package]] name = "markdown-it-py" version = "4.0.0" @@ -460,6 +668,180 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, ] +[[package]] +name = "msgspec" +version = "0.21.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e3/60/f79b9b013a16fa3a58350c9295ddc6789f2e335f36ea61ed10a21b215364/msgspec-0.21.1.tar.gz", hash = "sha256:2313508e394b0d208f8f56892ca9b2799e2561329de9763b19619595a6c0f72c", size = 319193, upload-time = "2026-04-12T21:44:50.394Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ba/7f/bbc4e74cd33d316b75541149e4d35b163b63bce066530ae185a2ec3b5bfc/msgspec-0.21.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b504b6e7f7a22a24b27232b73034421692147865162daaec9f3bf62439007c87", size = 193131, upload-time = "2026-04-12T21:43:56.094Z" }, + { url = "https://files.pythonhosted.org/packages/c1/60/504886af1aaf854112663b842d5eea9a15d9588f9bf7d0d2df736424b84d/msgspec-0.21.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4692b7c1609155708c4418f88e92f63c13fdf08aa095c84bae82bad75b53389b", size = 186597, upload-time = "2026-04-12T21:43:57.242Z" }, + { url = "https://files.pythonhosted.org/packages/fa/54/d24ddeaa65b5278c9e67f48ce3c17a9831e8f3722f3c8322ee120aca22ef/msgspec-0.21.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d3124010b3815451494c85ff345e693cb9fe5889cfcbbef39ed8622e0e72319c", size = 215158, upload-time = "2026-04-12T21:43:58.442Z" }, + { url = "https://files.pythonhosted.org/packages/9f/75/bb79c8b89a93ae23cd33c0d802373f16feaf9633f05d8af77091350dda0a/msgspec-0.21.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6badc03b9725352219cca017bfe71c61f2fbd0fb5982b410ac17c97c213deb30", size = 219856, upload-time = "2026-04-12T21:44:00.015Z" }, + { url = "https://files.pythonhosted.org/packages/b4/9c/c5ca26b46f0ebbd3a6683695ef89396712cb9e4199fd1f0bc1dd968216b1/msgspec-0.21.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:5d2d4116ebe3035a78d9ec76e99a9d64e5fa6d44fe61a9c5de7fd1acf54bcc69", size = 220314, upload-time = "2026-04-12T21:44:01.548Z" }, + { url = "https://files.pythonhosted.org/packages/c8/31/645a351c4285dce40ed6755c3dcc0aa648e26dacb20a98018fe2cce5e87b/msgspec-0.21.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0d1009f6715f5bff3b54d4ff5c7428ad96197e0534e1645b8e9b955890c84664", size = 223215, upload-time = "2026-04-12T21:44:02.884Z" }, + { url = "https://files.pythonhosted.org/packages/09/af/8bf15736a6dd3cb4f90c5467f6dc39197d2daaf10754490cdc0aa17b7312/msgspec-0.21.1-cp311-cp311-win_amd64.whl", hash = "sha256:c6faffe5bb644ec884052679af4dfd776d4b5ca90e4a7ec7e7e319e4e6b93a6e", size = 188554, upload-time = "2026-04-12T21:44:04.151Z" }, + { url = "https://files.pythonhosted.org/packages/ef/29/cc7db3a165b62d16e64a83f82eccb79655055cb5bc1f60459a6f9d7c82f2/msgspec-0.21.1-cp311-cp311-win_arm64.whl", hash = "sha256:ee9e3f11fa94603f7d673bf795cfa31b549c4a2c723bc39b45beb1e7f5a3fb99", size = 174517, upload-time = "2026-04-12T21:44:05.66Z" }, + { url = "https://files.pythonhosted.org/packages/6e/cf/317224852c00248c620a9bcf4b26e2e4ab8afd752f18d2a6ef73ebd423b6/msgspec-0.21.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d4248cf0b6129b7d230eacd493c17cc2d4f3989f3bb7f633a928a85b7dcfa251", size = 196188, upload-time = "2026-04-12T21:44:07.181Z" }, + { url = "https://files.pythonhosted.org/packages/6d/81/074612945c0666078f7366f40000013de9f6ba687491d450df699bceebc9/msgspec-0.21.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5102c7e9b3acff82178449b85006d96310e690291bb1ea0142f1b24bcb8aabcb", size = 188473, upload-time = "2026-04-12T21:44:08.736Z" }, + { url = "https://files.pythonhosted.org/packages/8a/37/655101799590bcc5fddb2bd3fe0e6194e816c2d1da7c361725f5eb89a910/msgspec-0.21.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:846758412e9518252b2ac9bffd6f0e54d9ff614f5f9488df7749f81ff5c80920", size = 218871, upload-time = "2026-04-12T21:44:09.917Z" }, + { url = "https://files.pythonhosted.org/packages/b5/d1/d4cd9fe89c7d400d7a18f86ccc94daa3f0927f53558846fcb60791dce5d6/msgspec-0.21.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:21995e74b5c598c2e004110ad66ec7f1b8c20bf2bcf3b2de8fd9a3094422d3ff", size = 225025, upload-time = "2026-04-12T21:44:11.191Z" }, + { url = "https://files.pythonhosted.org/packages/24/bf/e20549e602b9edccadeeff98760345a416f9cce846a657e8b18e3396b212/msgspec-0.21.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6129f0cca52992e898fd5344187f7c8127b63d810b2fd73e36fca73b4c6475ee", size = 222672, upload-time = "2026-04-12T21:44:12.481Z" }, + { url = "https://files.pythonhosted.org/packages/b4/68/04d7a8f0f786545cf9b8c280c57aa6befb5977af6e884b8b54191cbe44b3/msgspec-0.21.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ef3ec2296248d1f8b9231acb051b6d471dfde8f21819e86c9adaaa9f42918521", size = 227303, upload-time = "2026-04-12T21:44:13.709Z" }, + { url = "https://files.pythonhosted.org/packages/cc/4d/619866af2840875be408047bf9e70ceafbae6ab50660de7134ed1b25eb86/msgspec-0.21.1-cp312-cp312-win_amd64.whl", hash = "sha256:d4ab834a054c6f0cbeef6df9e7e1b33d5f1bc7b86dea1d2fd7cad003873e783d", size = 190017, upload-time = "2026-04-12T21:44:14.977Z" }, + { url = "https://files.pythonhosted.org/packages/5e/2e/a8f9eca8fd00e097d7a9e99ba8a4685db994494448e3d4f0b7f6e9a3c0f7/msgspec-0.21.1-cp312-cp312-win_arm64.whl", hash = "sha256:628aaa35c74950a8c59da330d7e98917e1c7188f983745782027748ee4ca573e", size = 175345, upload-time = "2026-04-12T21:44:16.431Z" }, + { url = "https://files.pythonhosted.org/packages/7e/74/f11ede02839b19ff459f88e3145df5d711626ca84da4e23520cebf819367/msgspec-0.21.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:764173717a01743f007e9f74520ed281f24672c604514f7d76c1c3a10e8edb66", size = 196176, upload-time = "2026-04-12T21:44:17.613Z" }, + { url = "https://files.pythonhosted.org/packages/bb/40/4476c1bd341418a046c4955aff632ec769315d1e3cb94e6acf86d461f9ed/msgspec-0.21.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:344c7cd0eaed1fb81d7959f99100ef71ec9b536881a376f11b9a6c4803365697", size = 188524, upload-time = "2026-04-12T21:44:18.815Z" }, + { url = "https://files.pythonhosted.org/packages/ca/d9/9e9d7d7e5061b47540d03d640fab9b3965ba7ae49c1b2154861c8f007518/msgspec-0.21.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:48943e278b3854c2f89f955ddc6f9f430d3f0784b16e47d10604ee0463cd21f5", size = 218880, upload-time = "2026-04-12T21:44:20.028Z" }, + { url = "https://files.pythonhosted.org/packages/74/66/2bb344f34abb4b57e60c7c9c761994e0417b9718ec1460bf00c296f2a7ea/msgspec-0.21.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a9aa659ebb0101b1cbc31461212b87e341d961f0ab0772aaf068a99e001ec4aa", size = 225050, upload-time = "2026-04-12T21:44:21.577Z" }, + { url = "https://files.pythonhosted.org/packages/1a/84/7c1e412f76092277bf760cef12b7979d03314d259ab5b5cafde5d0c1722d/msgspec-0.21.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7b27d1a8ead2b6f5b0c4f2d07b8be1ccfcc041c8a0e704781edebe3ae13c484", size = 222713, upload-time = "2026-04-12T21:44:22.83Z" }, + { url = "https://files.pythonhosted.org/packages/4e/27/0bba04b2b4ef05f3d068429410bc71d2cea925f1596a8f41152cccd5edb8/msgspec-0.21.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:38fe93e86b61328fe544cb7fd871fad5a27c8734bfda90f65e5dbe288ae50f61", size = 227259, upload-time = "2026-04-12T21:44:24.11Z" }, + { url = "https://files.pythonhosted.org/packages/b0/2d/09574b0eea02fed2c2c1383dbaae2c7f79dc16dcd6487a886000afb5d7c4/msgspec-0.21.1-cp313-cp313-win_amd64.whl", hash = "sha256:8bc666331c35fcce05a7cd2d6221adbe0f6058f8e750711413d22793c080ac6a", size = 189857, upload-time = "2026-04-12T21:44:25.359Z" }, + { url = "https://files.pythonhosted.org/packages/46/34/105b1576ad182879914f0c821f17ee1d13abb165cb060448f96fe2aff078/msgspec-0.21.1-cp313-cp313-win_arm64.whl", hash = "sha256:42bb1241e0750c1a4346f2aa84db26c5ffd99a4eb3a954927d9f149ff2f42898", size = 175403, upload-time = "2026-04-12T21:44:26.608Z" }, + { url = "https://files.pythonhosted.org/packages/5a/ad/86954e987d1d6a5c579e2c2e7832b65e0fff194179fdac4f581536086024/msgspec-0.21.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:fab48eb45fdbfbdb2c0edfec00ffc53b6b6085beefc6b50b61e01659f9f8757f", size = 196261, upload-time = "2026-04-12T21:44:27.807Z" }, + { url = "https://files.pythonhosted.org/packages/d1/a1/c5e46c3e42b866199365e35d11dddfd1fbd8bba4fdb3c52f965b1607ce94/msgspec-0.21.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:3cb779ea0c35bc807ff941d415875c1f69ca0be91a2e907ab99a171811d86a9a", size = 188729, upload-time = "2026-04-12T21:44:28.99Z" }, + { url = "https://files.pythonhosted.org/packages/85/7d/1e29a319d678d6cb962ae5bdf32a6858ebdf38f73bc654c0e9c742a0c2c8/msgspec-0.21.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:68604db36b3b4dd9bf160e436e12798a4738848144cea1aca1cb984011eb160f", size = 219866, upload-time = "2026-04-12T21:44:31.104Z" }, + { url = "https://files.pythonhosted.org/packages/25/1f/cca084ca2572810fff12ea9dbdcbe39eac048f40daf4a9077b49fcbe8cee/msgspec-0.21.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3d6b9dc50948eaf65df54d2fd0ff66e6d8c32f116037209ee861810eb9b676cb", size = 224993, upload-time = "2026-04-12T21:44:32.649Z" }, + { url = "https://files.pythonhosted.org/packages/71/94/d2120fc9d419a89a3a7c13e5b7078798c4b392a96a02a6e2b3ce43a8766c/msgspec-0.21.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:52c5e21930942302394429c5a582ce7e6b62c7f983b3760834c2ce107e0dd6df", size = 223535, upload-time = "2026-04-12T21:44:33.839Z" }, + { url = "https://files.pythonhosted.org/packages/75/17/42418b66a3ad972a89bab73dd78b79cc6282bb488a25e73c853cee7443b9/msgspec-0.21.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:abbb39d65681fa24ed394e01af3d59d869068324f900c61d06062b7fb9980f2f", size = 227222, upload-time = "2026-04-12T21:44:35.093Z" }, + { url = "https://files.pythonhosted.org/packages/c4/33/265c894268cca88ff67b144ca2b4c522fc8b9a6f1966a3640c70516e78e1/msgspec-0.21.1-cp314-cp314-win_amd64.whl", hash = "sha256:5666b1b560b97b6ec2eb3fca8a502298ebac56e13bbca1f88523538ce83d01ea", size = 193810, upload-time = "2026-04-12T21:44:36.612Z" }, + { url = "https://files.pythonhosted.org/packages/3b/8f/a6d35f25bf1fc63c492fdd88fdce01ba0875ead48c2b91f90f33653b4131/msgspec-0.21.1-cp314-cp314-win_arm64.whl", hash = "sha256:d8b8578e4c83b14ceea4cef0d0b747e31d9330fe4b03b2b2ad4063866a178f93", size = 179125, upload-time = "2026-04-12T21:44:38.198Z" }, + { url = "https://files.pythonhosted.org/packages/c6/39/74839641e64b99d87da55af0fc472854d42b46e2183b9e2a67fe1bb2a512/msgspec-0.21.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:15f523d51c00ebad412213bfe9f06f0a50ec2b93e0c19e824a2d267cabb48ea2", size = 200171, upload-time = "2026-04-12T21:44:39.414Z" }, + { url = "https://files.pythonhosted.org/packages/70/9b/ce0cca6d2d87fcd4b6ff97600790494e64f26a2c55d61507cd2755c16193/msgspec-0.21.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:4e47390360583ba3d5c6cb44cf0a9f61b0a06a899d3c2c00627cedebb2e2884b", size = 192879, upload-time = "2026-04-12T21:44:40.882Z" }, + { url = "https://files.pythonhosted.org/packages/a7/08/673a7bb05e5702dc787ddd3011195b509f9867927970da59052211929987/msgspec-0.21.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f60800e6299b798142dc40b0644da77ceac5ea0568be58228417eae14135c847", size = 226281, upload-time = "2026-04-12T21:44:42.181Z" }, + { url = "https://files.pythonhosted.org/packages/7d/45/86508cf57283e9070b3c447e3ab25b792a7a0855a3ea4e0c6d111ac34c97/msgspec-0.21.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5f8e9dfcd98419cf7568808470c4317a3fb30bef0e3715b568730a2b272a20d7", size = 229863, upload-time = "2026-04-12T21:44:43.442Z" }, + { url = "https://files.pythonhosted.org/packages/2c/62/e7c9367cd08d590559faacd711edbae36840342843e669440363f33c7d36/msgspec-0.21.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:92d89dfad13bd1ea640dc3e37e724ed380da1030b272bdf5ecafb983c3ad7c75", size = 230445, upload-time = "2026-04-12T21:44:44.806Z" }, + { url = "https://files.pythonhosted.org/packages/42/b4/c0f54632103846b658a10930025f4de41c8724b5e4805a5f3b395586cb7e/msgspec-0.21.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0d03867786e5d7ba25d666df4b11320c27170f4aeafcb8e3a8b0a50a4fb742ca", size = 231822, upload-time = "2026-04-12T21:44:46.343Z" }, + { url = "https://files.pythonhosted.org/packages/ea/1d/0d85cc79d0ccf5508e9c846cc66552a6a16bf92abd1dbd8362617f7b35cd/msgspec-0.21.1-cp314-cp314t-win_amd64.whl", hash = "sha256:740fbf1c9d59992ca3537d6fbe9ebbf9eaf726a65fbf31448e0ecbc710697a63", size = 206650, upload-time = "2026-04-12T21:44:47.601Z" }, + { url = "https://files.pythonhosted.org/packages/90/91/56c5d560f20e6c20e9e4f55bd0e458f7f162aa689ee350346c04c48eac0b/msgspec-0.21.1-cp314-cp314t-win_arm64.whl", hash = "sha256:0d2cc73df6058d811a126ac3a8ad63a4dfa210c82f9cf5a004802eaf4712de90", size = 183149, upload-time = "2026-04-12T21:44:48.833Z" }, +] + +[[package]] +name = "multidict" +version = "6.7.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1a/c2/c2d94cbe6ac1753f3fc980da97b3d930efe1da3af3c9f5125354436c073d/multidict-6.7.1.tar.gz", hash = "sha256:ec6652a1bee61c53a3e5776b6049172c53b6aaba34f18c9ad04f82712bac623d", size = 102010, upload-time = "2026-01-26T02:46:45.979Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/f1/a90635c4f88fb913fbf4ce660b83b7445b7a02615bda034b2f8eb38fd597/multidict-6.7.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:7ff981b266af91d7b4b3793ca3382e53229088d193a85dfad6f5f4c27fc73e5d", size = 76626, upload-time = "2026-01-26T02:43:26.485Z" }, + { url = "https://files.pythonhosted.org/packages/a6/9b/267e64eaf6fc637a15b35f5de31a566634a2740f97d8d094a69d34f524a4/multidict-6.7.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:844c5bca0b5444adb44a623fb0a1310c2f4cd41f402126bb269cd44c9b3f3e1e", size = 44706, upload-time = "2026-01-26T02:43:27.607Z" }, + { url = "https://files.pythonhosted.org/packages/dd/a4/d45caf2b97b035c57267791ecfaafbd59c68212004b3842830954bb4b02e/multidict-6.7.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f2a0a924d4c2e9afcd7ec64f9de35fcd96915149b2216e1cb2c10a56df483855", size = 44356, upload-time = "2026-01-26T02:43:28.661Z" }, + { url = "https://files.pythonhosted.org/packages/fd/d2/0a36c8473f0cbaeadd5db6c8b72d15bbceeec275807772bfcd059bef487d/multidict-6.7.1-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:8be1802715a8e892c784c0197c2ace276ea52702a0ede98b6310c8f255a5afb3", size = 244355, upload-time = "2026-01-26T02:43:31.165Z" }, + { url = "https://files.pythonhosted.org/packages/5d/16/8c65be997fd7dd311b7d39c7b6e71a0cb449bad093761481eccbbe4b42a2/multidict-6.7.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2e2d2ed645ea29f31c4c7ea1552fcfd7cb7ba656e1eafd4134a6620c9f5fdd9e", size = 246433, upload-time = "2026-01-26T02:43:32.581Z" }, + { url = "https://files.pythonhosted.org/packages/01/fb/4dbd7e848d2799c6a026ec88ad39cf2b8416aa167fcc903baa55ecaa045c/multidict-6.7.1-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:95922cee9a778659e91db6497596435777bd25ed116701a4c034f8e46544955a", size = 225376, upload-time = "2026-01-26T02:43:34.417Z" }, + { url = "https://files.pythonhosted.org/packages/b6/8a/4a3a6341eac3830f6053062f8fbc9a9e54407c80755b3f05bc427295c2d0/multidict-6.7.1-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6b83cabdc375ffaaa15edd97eb7c0c672ad788e2687004990074d7d6c9b140c8", size = 257365, upload-time = "2026-01-26T02:43:35.741Z" }, + { url = "https://files.pythonhosted.org/packages/f7/a2/dd575a69c1aa206e12d27d0770cdf9b92434b48a9ef0cd0d1afdecaa93c4/multidict-6.7.1-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:38fb49540705369bab8484db0689d86c0a33a0a9f2c1b197f506b71b4b6c19b0", size = 254747, upload-time = "2026-01-26T02:43:36.976Z" }, + { url = "https://files.pythonhosted.org/packages/5a/56/21b27c560c13822ed93133f08aa6372c53a8e067f11fbed37b4adcdac922/multidict-6.7.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:439cbebd499f92e9aa6793016a8acaa161dfa749ae86d20960189f5398a19144", size = 246293, upload-time = "2026-01-26T02:43:38.258Z" }, + { url = "https://files.pythonhosted.org/packages/5a/a4/23466059dc3854763423d0ad6c0f3683a379d97673b1b89ec33826e46728/multidict-6.7.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6d3bc717b6fe763b8be3f2bee2701d3c8eb1b2a8ae9f60910f1b2860c82b6c49", size = 242962, upload-time = "2026-01-26T02:43:40.034Z" }, + { url = "https://files.pythonhosted.org/packages/1f/67/51dd754a3524d685958001e8fa20a0f5f90a6a856e0a9dcabff69be3dbb7/multidict-6.7.1-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:619e5a1ac57986dbfec9f0b301d865dddf763696435e2962f6d9cf2fdff2bb71", size = 237360, upload-time = "2026-01-26T02:43:41.752Z" }, + { url = "https://files.pythonhosted.org/packages/64/3f/036dfc8c174934d4b55d86ff4f978e558b0e585cef70cfc1ad01adc6bf18/multidict-6.7.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:0b38ebffd9be37c1170d33bc0f36f4f262e0a09bc1aac1c34c7aa51a7293f0b3", size = 245940, upload-time = "2026-01-26T02:43:43.042Z" }, + { url = "https://files.pythonhosted.org/packages/3d/20/6214d3c105928ebc353a1c644a6ef1408bc5794fcb4f170bb524a3c16311/multidict-6.7.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:10ae39c9cfe6adedcdb764f5e8411d4a92b055e35573a2eaa88d3323289ef93c", size = 253502, upload-time = "2026-01-26T02:43:44.371Z" }, + { url = "https://files.pythonhosted.org/packages/b1/e2/c653bc4ae1be70a0f836b82172d643fcf1dade042ba2676ab08ec08bff0f/multidict-6.7.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:25167cc263257660290fba06b9318d2026e3c910be240a146e1f66dd114af2b0", size = 247065, upload-time = "2026-01-26T02:43:45.745Z" }, + { url = "https://files.pythonhosted.org/packages/c8/11/a854b4154cd3bd8b1fd375e8a8ca9d73be37610c361543d56f764109509b/multidict-6.7.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:128441d052254f42989ef98b7b6a6ecb1e6f708aa962c7984235316db59f50fa", size = 241870, upload-time = "2026-01-26T02:43:47.054Z" }, + { url = "https://files.pythonhosted.org/packages/13/bf/9676c0392309b5fdae322333d22a829715b570edb9baa8016a517b55b558/multidict-6.7.1-cp311-cp311-win32.whl", hash = "sha256:d62b7f64ffde3b99d06b707a280db04fb3855b55f5a06df387236051d0668f4a", size = 41302, upload-time = "2026-01-26T02:43:48.753Z" }, + { url = "https://files.pythonhosted.org/packages/c9/68/f16a3a8ba6f7b6dc92a1f19669c0810bd2c43fc5a02da13b1cbf8e253845/multidict-6.7.1-cp311-cp311-win_amd64.whl", hash = "sha256:bdbf9f3b332abd0cdb306e7c2113818ab1e922dc84b8f8fd06ec89ed2a19ab8b", size = 45981, upload-time = "2026-01-26T02:43:49.921Z" }, + { url = "https://files.pythonhosted.org/packages/ac/ad/9dd5305253fa00cd3c7555dbef69d5bf4133debc53b87ab8d6a44d411665/multidict-6.7.1-cp311-cp311-win_arm64.whl", hash = "sha256:b8c990b037d2fff2f4e33d3f21b9b531c5745b33a49a7d6dbe7a177266af44f6", size = 43159, upload-time = "2026-01-26T02:43:51.635Z" }, + { url = "https://files.pythonhosted.org/packages/8d/9c/f20e0e2cf80e4b2e4b1c365bf5fe104ee633c751a724246262db8f1a0b13/multidict-6.7.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:a90f75c956e32891a4eda3639ce6dd86e87105271f43d43442a3aedf3cddf172", size = 76893, upload-time = "2026-01-26T02:43:52.754Z" }, + { url = "https://files.pythonhosted.org/packages/fe/cf/18ef143a81610136d3da8193da9d80bfe1cb548a1e2d1c775f26b23d024a/multidict-6.7.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3fccb473e87eaa1382689053e4a4618e7ba7b9b9b8d6adf2027ee474597128cd", size = 45456, upload-time = "2026-01-26T02:43:53.893Z" }, + { url = "https://files.pythonhosted.org/packages/a9/65/1caac9d4cd32e8433908683446eebc953e82d22b03d10d41a5f0fefe991b/multidict-6.7.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b0fa96985700739c4c7853a43c0b3e169360d6855780021bfc6d0f1ce7c123e7", size = 43872, upload-time = "2026-01-26T02:43:55.041Z" }, + { url = "https://files.pythonhosted.org/packages/cf/3b/d6bd75dc4f3ff7c73766e04e705b00ed6dbbaccf670d9e05a12b006f5a21/multidict-6.7.1-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:cb2a55f408c3043e42b40cc8eecd575afa27b7e0b956dfb190de0f8499a57a53", size = 251018, upload-time = "2026-01-26T02:43:56.198Z" }, + { url = "https://files.pythonhosted.org/packages/fd/80/c959c5933adedb9ac15152e4067c702a808ea183a8b64cf8f31af8ad3155/multidict-6.7.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eb0ce7b2a32d09892b3dd6cc44877a0d02a33241fafca5f25c8b6b62374f8b75", size = 258883, upload-time = "2026-01-26T02:43:57.499Z" }, + { url = "https://files.pythonhosted.org/packages/86/85/7ed40adafea3d4f1c8b916e3b5cc3a8e07dfcdcb9cd72800f4ed3ca1b387/multidict-6.7.1-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:c3a32d23520ee37bf327d1e1a656fec76a2edd5c038bf43eddfa0572ec49c60b", size = 242413, upload-time = "2026-01-26T02:43:58.755Z" }, + { url = "https://files.pythonhosted.org/packages/d2/57/b8565ff533e48595503c785f8361ff9a4fde4d67de25c207cd0ba3befd03/multidict-6.7.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:9c90fed18bffc0189ba814749fdcc102b536e83a9f738a9003e569acd540a733", size = 268404, upload-time = "2026-01-26T02:44:00.216Z" }, + { url = "https://files.pythonhosted.org/packages/e0/50/9810c5c29350f7258180dfdcb2e52783a0632862eb334c4896ac717cebcb/multidict-6.7.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:da62917e6076f512daccfbbde27f46fed1c98fee202f0559adec8ee0de67f71a", size = 269456, upload-time = "2026-01-26T02:44:02.202Z" }, + { url = "https://files.pythonhosted.org/packages/f3/8d/5e5be3ced1d12966fefb5c4ea3b2a5b480afcea36406559442c6e31d4a48/multidict-6.7.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bfde23ef6ed9db7eaee6c37dcec08524cb43903c60b285b172b6c094711b3961", size = 256322, upload-time = "2026-01-26T02:44:03.56Z" }, + { url = "https://files.pythonhosted.org/packages/31/6e/d8a26d81ac166a5592782d208dd90dfdc0a7a218adaa52b45a672b46c122/multidict-6.7.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3758692429e4e32f1ba0df23219cd0b4fc0a52f476726fff9337d1a57676a582", size = 253955, upload-time = "2026-01-26T02:44:04.845Z" }, + { url = "https://files.pythonhosted.org/packages/59/4c/7c672c8aad41534ba619bcd4ade7a0dc87ed6b8b5c06149b85d3dd03f0cd/multidict-6.7.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:398c1478926eca669f2fd6a5856b6de9c0acf23a2cb59a14c0ba5844fa38077e", size = 251254, upload-time = "2026-01-26T02:44:06.133Z" }, + { url = "https://files.pythonhosted.org/packages/7b/bd/84c24de512cbafbdbc39439f74e967f19570ce7924e3007174a29c348916/multidict-6.7.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c102791b1c4f3ab36ce4101154549105a53dc828f016356b3e3bcae2e3a039d3", size = 252059, upload-time = "2026-01-26T02:44:07.518Z" }, + { url = "https://files.pythonhosted.org/packages/fa/ba/f5449385510825b73d01c2d4087bf6d2fccc20a2d42ac34df93191d3dd03/multidict-6.7.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:a088b62bd733e2ad12c50dad01b7d0166c30287c166e137433d3b410add807a6", size = 263588, upload-time = "2026-01-26T02:44:09.382Z" }, + { url = "https://files.pythonhosted.org/packages/d7/11/afc7c677f68f75c84a69fe37184f0f82fce13ce4b92f49f3db280b7e92b3/multidict-6.7.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:3d51ff4785d58d3f6c91bdbffcb5e1f7ddfda557727043aa20d20ec4f65e324a", size = 259642, upload-time = "2026-01-26T02:44:10.73Z" }, + { url = "https://files.pythonhosted.org/packages/2b/17/ebb9644da78c4ab36403739e0e6e0e30ebb135b9caf3440825001a0bddcb/multidict-6.7.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fc5907494fccf3e7d3f94f95c91d6336b092b5fc83811720fae5e2765890dfba", size = 251377, upload-time = "2026-01-26T02:44:12.042Z" }, + { url = "https://files.pythonhosted.org/packages/ca/a4/840f5b97339e27846c46307f2530a2805d9d537d8b8bd416af031cad7fa0/multidict-6.7.1-cp312-cp312-win32.whl", hash = "sha256:28ca5ce2fd9716631133d0e9a9b9a745ad7f60bac2bccafb56aa380fc0b6c511", size = 41887, upload-time = "2026-01-26T02:44:14.245Z" }, + { url = "https://files.pythonhosted.org/packages/80/31/0b2517913687895f5904325c2069d6a3b78f66cc641a86a2baf75a05dcbb/multidict-6.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:fcee94dfbd638784645b066074b338bc9cc155d4b4bffa4adce1615c5a426c19", size = 46053, upload-time = "2026-01-26T02:44:15.371Z" }, + { url = "https://files.pythonhosted.org/packages/0c/5b/aba28e4ee4006ae4c7df8d327d31025d760ffa992ea23812a601d226e682/multidict-6.7.1-cp312-cp312-win_arm64.whl", hash = "sha256:ba0a9fb644d0c1a2194cf7ffb043bd852cea63a57f66fbd33959f7dae18517bf", size = 43307, upload-time = "2026-01-26T02:44:16.852Z" }, + { url = "https://files.pythonhosted.org/packages/f2/22/929c141d6c0dba87d3e1d38fbdf1ba8baba86b7776469f2bc2d3227a1e67/multidict-6.7.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:2b41f5fed0ed563624f1c17630cb9941cf2309d4df00e494b551b5f3e3d67a23", size = 76174, upload-time = "2026-01-26T02:44:18.509Z" }, + { url = "https://files.pythonhosted.org/packages/c7/75/bc704ae15fee974f8fccd871305e254754167dce5f9e42d88a2def741a1d/multidict-6.7.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:84e61e3af5463c19b67ced91f6c634effb89ef8bfc5ca0267f954451ed4bb6a2", size = 45116, upload-time = "2026-01-26T02:44:19.745Z" }, + { url = "https://files.pythonhosted.org/packages/79/76/55cd7186f498ed080a18440c9013011eb548f77ae1b297206d030eb1180a/multidict-6.7.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:935434b9853c7c112eee7ac891bc4cb86455aa631269ae35442cb316790c1445", size = 43524, upload-time = "2026-01-26T02:44:21.571Z" }, + { url = "https://files.pythonhosted.org/packages/e9/3c/414842ef8d5a1628d68edee29ba0e5bcf235dbfb3ccd3ea303a7fe8c72ff/multidict-6.7.1-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:432feb25a1cb67fe82a9680b4d65fb542e4635cb3166cd9c01560651ad60f177", size = 249368, upload-time = "2026-01-26T02:44:22.803Z" }, + { url = "https://files.pythonhosted.org/packages/f6/32/befed7f74c458b4a525e60519fe8d87eef72bb1e99924fa2b0f9d97a221e/multidict-6.7.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e82d14e3c948952a1a85503817e038cba5905a3352de76b9a465075d072fba23", size = 256952, upload-time = "2026-01-26T02:44:24.306Z" }, + { url = "https://files.pythonhosted.org/packages/03/d6/c878a44ba877f366630c860fdf74bfb203c33778f12b6ac274936853c451/multidict-6.7.1-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:4cfb48c6ea66c83bcaaf7e4dfa7ec1b6bbcf751b7db85a328902796dfde4c060", size = 240317, upload-time = "2026-01-26T02:44:25.772Z" }, + { url = "https://files.pythonhosted.org/packages/68/49/57421b4d7ad2e9e60e25922b08ceb37e077b90444bde6ead629095327a6f/multidict-6.7.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:1d540e51b7e8e170174555edecddbd5538105443754539193e3e1061864d444d", size = 267132, upload-time = "2026-01-26T02:44:27.648Z" }, + { url = "https://files.pythonhosted.org/packages/b7/fe/ec0edd52ddbcea2a2e89e174f0206444a61440b40f39704e64dc807a70bd/multidict-6.7.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:273d23f4b40f3dce4d6c8a821c741a86dec62cded82e1175ba3d99be128147ed", size = 268140, upload-time = "2026-01-26T02:44:29.588Z" }, + { url = "https://files.pythonhosted.org/packages/b0/73/6e1b01cbeb458807aa0831742232dbdd1fa92bfa33f52a3f176b4ff3dc11/multidict-6.7.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d624335fd4fa1c08a53f8b4be7676ebde19cd092b3895c421045ca87895b429", size = 254277, upload-time = "2026-01-26T02:44:30.902Z" }, + { url = "https://files.pythonhosted.org/packages/6a/b2/5fb8c124d7561a4974c342bc8c778b471ebbeb3cc17df696f034a7e9afe7/multidict-6.7.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:12fad252f8b267cc75b66e8fc51b3079604e8d43a75428ffe193cd9e2195dfd6", size = 252291, upload-time = "2026-01-26T02:44:32.31Z" }, + { url = "https://files.pythonhosted.org/packages/5a/96/51d4e4e06bcce92577fcd488e22600bd38e4fd59c20cb49434d054903bd2/multidict-6.7.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:03ede2a6ffbe8ef936b92cb4529f27f42be7f56afcdab5ab739cd5f27fb1cbf9", size = 250156, upload-time = "2026-01-26T02:44:33.734Z" }, + { url = "https://files.pythonhosted.org/packages/db/6b/420e173eec5fba721a50e2a9f89eda89d9c98fded1124f8d5c675f7a0c0f/multidict-6.7.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:90efbcf47dbe33dcf643a1e400d67d59abeac5db07dc3f27d6bdeae497a2198c", size = 249742, upload-time = "2026-01-26T02:44:35.222Z" }, + { url = "https://files.pythonhosted.org/packages/44/a3/ec5b5bd98f306bc2aa297b8c6f11a46714a56b1e6ef5ebda50a4f5d7c5fb/multidict-6.7.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:5c4b9bfc148f5a91be9244d6264c53035c8a0dcd2f51f1c3c6e30e30ebaa1c84", size = 262221, upload-time = "2026-01-26T02:44:36.604Z" }, + { url = "https://files.pythonhosted.org/packages/cd/f7/e8c0d0da0cd1e28d10e624604e1a36bcc3353aaebdfdc3a43c72bc683a12/multidict-6.7.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:401c5a650f3add2472d1d288c26deebc540f99e2fb83e9525007a74cd2116f1d", size = 258664, upload-time = "2026-01-26T02:44:38.008Z" }, + { url = "https://files.pythonhosted.org/packages/52/da/151a44e8016dd33feed44f730bd856a66257c1ee7aed4f44b649fb7edeb3/multidict-6.7.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:97891f3b1b3ffbded884e2916cacf3c6fc87b66bb0dde46f7357404750559f33", size = 249490, upload-time = "2026-01-26T02:44:39.386Z" }, + { url = "https://files.pythonhosted.org/packages/87/af/a3b86bf9630b732897f6fc3f4c4714b90aa4361983ccbdcd6c0339b21b0c/multidict-6.7.1-cp313-cp313-win32.whl", hash = "sha256:e1c5988359516095535c4301af38d8a8838534158f649c05dd1050222321bcb3", size = 41695, upload-time = "2026-01-26T02:44:41.318Z" }, + { url = "https://files.pythonhosted.org/packages/b2/35/e994121b0e90e46134673422dd564623f93304614f5d11886b1b3e06f503/multidict-6.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:960c83bf01a95b12b08fd54324a4eb1d5b52c88932b5cba5d6e712bb3ed12eb5", size = 45884, upload-time = "2026-01-26T02:44:42.488Z" }, + { url = "https://files.pythonhosted.org/packages/ca/61/42d3e5dbf661242a69c97ea363f2d7b46c567da8eadef8890022be6e2ab0/multidict-6.7.1-cp313-cp313-win_arm64.whl", hash = "sha256:563fe25c678aaba333d5399408f5ec3c383ca5b663e7f774dd179a520b8144df", size = 43122, upload-time = "2026-01-26T02:44:43.664Z" }, + { url = "https://files.pythonhosted.org/packages/6d/b3/e6b21c6c4f314bb956016b0b3ef2162590a529b84cb831c257519e7fde44/multidict-6.7.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:c76c4bec1538375dad9d452d246ca5368ad6e1c9039dadcf007ae59c70619ea1", size = 83175, upload-time = "2026-01-26T02:44:44.894Z" }, + { url = "https://files.pythonhosted.org/packages/fb/76/23ecd2abfe0957b234f6c960f4ade497f55f2c16aeb684d4ecdbf1c95791/multidict-6.7.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:57b46b24b5d5ebcc978da4ec23a819a9402b4228b8a90d9c656422b4bdd8a963", size = 48460, upload-time = "2026-01-26T02:44:46.106Z" }, + { url = "https://files.pythonhosted.org/packages/c4/57/a0ed92b23f3a042c36bc4227b72b97eca803f5f1801c1ab77c8a212d455e/multidict-6.7.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e954b24433c768ce78ab7929e84ccf3422e46deb45a4dc9f93438f8217fa2d34", size = 46930, upload-time = "2026-01-26T02:44:47.278Z" }, + { url = "https://files.pythonhosted.org/packages/b5/66/02ec7ace29162e447f6382c495dc95826bf931d3818799bbef11e8f7df1a/multidict-6.7.1-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3bd231490fa7217cc832528e1cd8752a96f0125ddd2b5749390f7c3ec8721b65", size = 242582, upload-time = "2026-01-26T02:44:48.604Z" }, + { url = "https://files.pythonhosted.org/packages/58/18/64f5a795e7677670e872673aca234162514696274597b3708b2c0d276cce/multidict-6.7.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:253282d70d67885a15c8a7716f3a73edf2d635793ceda8173b9ecc21f2fb8292", size = 250031, upload-time = "2026-01-26T02:44:50.544Z" }, + { url = "https://files.pythonhosted.org/packages/c8/ed/e192291dbbe51a8290c5686f482084d31bcd9d09af24f63358c3d42fd284/multidict-6.7.1-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0b4c48648d7649c9335cf1927a8b87fa692de3dcb15faa676c6a6f1f1aabda43", size = 228596, upload-time = "2026-01-26T02:44:51.951Z" }, + { url = "https://files.pythonhosted.org/packages/1e/7e/3562a15a60cf747397e7f2180b0a11dc0c38d9175a650e75fa1b4d325e15/multidict-6.7.1-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:98bc624954ec4d2c7cb074b8eefc2b5d0ce7d482e410df446414355d158fe4ca", size = 257492, upload-time = "2026-01-26T02:44:53.902Z" }, + { url = "https://files.pythonhosted.org/packages/24/02/7d0f9eae92b5249bb50ac1595b295f10e263dd0078ebb55115c31e0eaccd/multidict-6.7.1-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:1b99af4d9eec0b49927b4402bcbb58dea89d3e0db8806a4086117019939ad3dd", size = 255899, upload-time = "2026-01-26T02:44:55.316Z" }, + { url = "https://files.pythonhosted.org/packages/00/e3/9b60ed9e23e64c73a5cde95269ef1330678e9c6e34dd4eb6b431b85b5a10/multidict-6.7.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6aac4f16b472d5b7dc6f66a0d49dd57b0e0902090be16594dc9ebfd3d17c47e7", size = 247970, upload-time = "2026-01-26T02:44:56.783Z" }, + { url = "https://files.pythonhosted.org/packages/3e/06/538e58a63ed5cfb0bd4517e346b91da32fde409d839720f664e9a4ae4f9d/multidict-6.7.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:21f830fe223215dffd51f538e78c172ed7c7f60c9b96a2bf05c4848ad49921c3", size = 245060, upload-time = "2026-01-26T02:44:58.195Z" }, + { url = "https://files.pythonhosted.org/packages/b2/2f/d743a3045a97c895d401e9bd29aaa09b94f5cbdf1bd561609e5a6c431c70/multidict-6.7.1-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:f5dd81c45b05518b9aa4da4aa74e1c93d715efa234fd3e8a179df611cc85e5f4", size = 235888, upload-time = "2026-01-26T02:44:59.57Z" }, + { url = "https://files.pythonhosted.org/packages/38/83/5a325cac191ab28b63c52f14f1131f3b0a55ba3b9aa65a6d0bf2a9b921a0/multidict-6.7.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:eb304767bca2bb92fb9c5bd33cedc95baee5bb5f6c88e63706533a1c06ad08c8", size = 243554, upload-time = "2026-01-26T02:45:01.054Z" }, + { url = "https://files.pythonhosted.org/packages/20/1f/9d2327086bd15da2725ef6aae624208e2ef828ed99892b17f60c344e57ed/multidict-6.7.1-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:c9035dde0f916702850ef66460bc4239d89d08df4d02023a5926e7446724212c", size = 252341, upload-time = "2026-01-26T02:45:02.484Z" }, + { url = "https://files.pythonhosted.org/packages/e8/2c/2a1aa0280cf579d0f6eed8ee5211c4f1730bd7e06c636ba2ee6aafda302e/multidict-6.7.1-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:af959b9beeb66c822380f222f0e0a1889331597e81f1ded7f374f3ecb0fd6c52", size = 246391, upload-time = "2026-01-26T02:45:03.862Z" }, + { url = "https://files.pythonhosted.org/packages/e5/03/7ca022ffc36c5a3f6e03b179a5ceb829be9da5783e6fe395f347c0794680/multidict-6.7.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:41f2952231456154ee479651491e94118229844dd7226541788be783be2b5108", size = 243422, upload-time = "2026-01-26T02:45:05.296Z" }, + { url = "https://files.pythonhosted.org/packages/dc/1d/b31650eab6c5778aceed46ba735bd97f7c7d2f54b319fa916c0f96e7805b/multidict-6.7.1-cp313-cp313t-win32.whl", hash = "sha256:df9f19c28adcb40b6aae30bbaa1478c389efd50c28d541d76760199fc1037c32", size = 47770, upload-time = "2026-01-26T02:45:06.754Z" }, + { url = "https://files.pythonhosted.org/packages/ac/5b/2d2d1d522e51285bd61b1e20df8f47ae1a9d80839db0b24ea783b3832832/multidict-6.7.1-cp313-cp313t-win_amd64.whl", hash = "sha256:d54ecf9f301853f2c5e802da559604b3e95bb7a3b01a9c295c6ee591b9882de8", size = 53109, upload-time = "2026-01-26T02:45:08.044Z" }, + { url = "https://files.pythonhosted.org/packages/3d/a3/cc409ba012c83ca024a308516703cf339bdc4b696195644a7215a5164a24/multidict-6.7.1-cp313-cp313t-win_arm64.whl", hash = "sha256:5a37ca18e360377cfda1d62f5f382ff41f2b8c4ccb329ed974cc2e1643440118", size = 45573, upload-time = "2026-01-26T02:45:09.349Z" }, + { url = "https://files.pythonhosted.org/packages/91/cc/db74228a8be41884a567e88a62fd589a913708fcf180d029898c17a9a371/multidict-6.7.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:8f333ec9c5eb1b7105e3b84b53141e66ca05a19a605368c55450b6ba208cb9ee", size = 75190, upload-time = "2026-01-26T02:45:10.651Z" }, + { url = "https://files.pythonhosted.org/packages/d5/22/492f2246bb5b534abd44804292e81eeaf835388901f0c574bac4eeec73c5/multidict-6.7.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:a407f13c188f804c759fc6a9f88286a565c242a76b27626594c133b82883b5c2", size = 44486, upload-time = "2026-01-26T02:45:11.938Z" }, + { url = "https://files.pythonhosted.org/packages/f1/4f/733c48f270565d78b4544f2baddc2fb2a245e5a8640254b12c36ac7ac68e/multidict-6.7.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:0e161ddf326db5577c3a4cc2d8648f81456e8a20d40415541587a71620d7a7d1", size = 43219, upload-time = "2026-01-26T02:45:14.346Z" }, + { url = "https://files.pythonhosted.org/packages/24/bb/2c0c2287963f4259c85e8bcbba9182ced8d7fca65c780c38e99e61629d11/multidict-6.7.1-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:1e3a8bb24342a8201d178c3b4984c26ba81a577c80d4d525727427460a50c22d", size = 245132, upload-time = "2026-01-26T02:45:15.712Z" }, + { url = "https://files.pythonhosted.org/packages/a7/f9/44d4b3064c65079d2467888794dea218d1601898ac50222ab8a9a8094460/multidict-6.7.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97231140a50f5d447d3164f994b86a0bed7cd016e2682f8650d6a9158e14fd31", size = 252420, upload-time = "2026-01-26T02:45:17.293Z" }, + { url = "https://files.pythonhosted.org/packages/8b/13/78f7275e73fa17b24c9a51b0bd9d73ba64bb32d0ed51b02a746eb876abe7/multidict-6.7.1-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:6b10359683bd8806a200fd2909e7c8ca3a7b24ec1d8132e483d58e791d881048", size = 233510, upload-time = "2026-01-26T02:45:19.356Z" }, + { url = "https://files.pythonhosted.org/packages/4b/25/8167187f62ae3cbd52da7893f58cb036b47ea3fb67138787c76800158982/multidict-6.7.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:283ddac99f7ac25a4acadbf004cb5ae34480bbeb063520f70ce397b281859362", size = 264094, upload-time = "2026-01-26T02:45:20.834Z" }, + { url = "https://files.pythonhosted.org/packages/a1/e7/69a3a83b7b030cf283fb06ce074a05a02322359783424d7edf0f15fe5022/multidict-6.7.1-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:538cec1e18c067d0e6103aa9a74f9e832904c957adc260e61cd9d8cf0c3b3d37", size = 260786, upload-time = "2026-01-26T02:45:22.818Z" }, + { url = "https://files.pythonhosted.org/packages/fe/3b/8ec5074bcfc450fe84273713b4b0a0dd47c0249358f5d82eb8104ffe2520/multidict-6.7.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7eee46ccb30ff48a1e35bb818cc90846c6be2b68240e42a78599166722cea709", size = 248483, upload-time = "2026-01-26T02:45:24.368Z" }, + { url = "https://files.pythonhosted.org/packages/48/5a/d5a99e3acbca0e29c5d9cba8f92ceb15dce78bab963b308ae692981e3a5d/multidict-6.7.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:fa263a02f4f2dd2d11a7b1bb4362aa7cb1049f84a9235d31adf63f30143469a0", size = 248403, upload-time = "2026-01-26T02:45:25.982Z" }, + { url = "https://files.pythonhosted.org/packages/35/48/e58cd31f6c7d5102f2a4bf89f96b9cf7e00b6c6f3d04ecc44417c00a5a3c/multidict-6.7.1-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:2e1425e2f99ec5bd36c15a01b690a1a2456209c5deed58f95469ffb46039ccbb", size = 240315, upload-time = "2026-01-26T02:45:27.487Z" }, + { url = "https://files.pythonhosted.org/packages/94/33/1cd210229559cb90b6786c30676bb0c58249ff42f942765f88793b41fdce/multidict-6.7.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:497394b3239fc6f0e13a78a3e1b61296e72bf1c5f94b4c4eb80b265c37a131cd", size = 245528, upload-time = "2026-01-26T02:45:28.991Z" }, + { url = "https://files.pythonhosted.org/packages/64/f2/6e1107d226278c876c783056b7db43d800bb64c6131cec9c8dfb6903698e/multidict-6.7.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:233b398c29d3f1b9676b4b6f75c518a06fcb2ea0b925119fb2c1bc35c05e1601", size = 258784, upload-time = "2026-01-26T02:45:30.503Z" }, + { url = "https://files.pythonhosted.org/packages/4d/c1/11f664f14d525e4a1b5327a82d4de61a1db604ab34c6603bb3c2cc63ad34/multidict-6.7.1-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:93b1818e4a6e0930454f0f2af7dfce69307ca03cdcfb3739bf4d91241967b6c1", size = 251980, upload-time = "2026-01-26T02:45:32.603Z" }, + { url = "https://files.pythonhosted.org/packages/e1/9f/75a9ac888121d0c5bbd4ecf4eead45668b1766f6baabfb3b7f66a410e231/multidict-6.7.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:f33dc2a3abe9249ea5d8360f969ec7f4142e7ac45ee7014d8f8d5acddf178b7b", size = 243602, upload-time = "2026-01-26T02:45:34.043Z" }, + { url = "https://files.pythonhosted.org/packages/9a/e7/50bf7b004cc8525d80dbbbedfdc7aed3e4c323810890be4413e589074032/multidict-6.7.1-cp314-cp314-win32.whl", hash = "sha256:3ab8b9d8b75aef9df299595d5388b14530839f6422333357af1339443cff777d", size = 40930, upload-time = "2026-01-26T02:45:36.278Z" }, + { url = "https://files.pythonhosted.org/packages/e0/bf/52f25716bbe93745595800f36fb17b73711f14da59ed0bb2eba141bc9f0f/multidict-6.7.1-cp314-cp314-win_amd64.whl", hash = "sha256:5e01429a929600e7dab7b166062d9bb54a5eed752384c7384c968c2afab8f50f", size = 45074, upload-time = "2026-01-26T02:45:37.546Z" }, + { url = "https://files.pythonhosted.org/packages/97/ab/22803b03285fa3a525f48217963da3a65ae40f6a1b6f6cf2768879e208f9/multidict-6.7.1-cp314-cp314-win_arm64.whl", hash = "sha256:4885cb0e817aef5d00a2e8451d4665c1808378dc27c2705f1bf4ef8505c0d2e5", size = 42471, upload-time = "2026-01-26T02:45:38.889Z" }, + { url = "https://files.pythonhosted.org/packages/e0/6d/f9293baa6146ba9507e360ea0292b6422b016907c393e2f63fc40ab7b7b5/multidict-6.7.1-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:0458c978acd8e6ea53c81eefaddbbee9c6c5e591f41b3f5e8e194780fe026581", size = 82401, upload-time = "2026-01-26T02:45:40.254Z" }, + { url = "https://files.pythonhosted.org/packages/7a/68/53b5494738d83558d87c3c71a486504d8373421c3e0dbb6d0db48ad42ee0/multidict-6.7.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:c0abd12629b0af3cf590982c0b413b1e7395cd4ec026f30986818ab95bfaa94a", size = 48143, upload-time = "2026-01-26T02:45:41.635Z" }, + { url = "https://files.pythonhosted.org/packages/37/e8/5284c53310dcdc99ce5d66563f6e5773531a9b9fe9ec7a615e9bc306b05f/multidict-6.7.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:14525a5f61d7d0c94b368a42cff4c9a4e7ba2d52e2672a7b23d84dc86fb02b0c", size = 46507, upload-time = "2026-01-26T02:45:42.99Z" }, + { url = "https://files.pythonhosted.org/packages/e4/fc/6800d0e5b3875568b4083ecf5f310dcf91d86d52573160834fb4bfcf5e4f/multidict-6.7.1-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:17307b22c217b4cf05033dabefe68255a534d637c6c9b0cc8382718f87be4262", size = 239358, upload-time = "2026-01-26T02:45:44.376Z" }, + { url = "https://files.pythonhosted.org/packages/41/75/4ad0973179361cdf3a113905e6e088173198349131be2b390f9fa4da5fc6/multidict-6.7.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7a7e590ff876a3eaf1c02a4dfe0724b6e69a9e9de6d8f556816f29c496046e59", size = 246884, upload-time = "2026-01-26T02:45:47.167Z" }, + { url = "https://files.pythonhosted.org/packages/c3/9c/095bb28b5da139bd41fb9a5d5caff412584f377914bd8787c2aa98717130/multidict-6.7.1-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:5fa6a95dfee63893d80a34758cd0e0c118a30b8dcb46372bf75106c591b77889", size = 225878, upload-time = "2026-01-26T02:45:48.698Z" }, + { url = "https://files.pythonhosted.org/packages/07/d0/c0a72000243756e8f5a277b6b514fa005f2c73d481b7d9e47cd4568aa2e4/multidict-6.7.1-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a0543217a6a017692aa6ae5cc39adb75e587af0f3a82288b1492eb73dd6cc2a4", size = 253542, upload-time = "2026-01-26T02:45:50.164Z" }, + { url = "https://files.pythonhosted.org/packages/c0/6b/f69da15289e384ecf2a68837ec8b5ad8c33e973aa18b266f50fe55f24b8c/multidict-6.7.1-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f99fe611c312b3c1c0ace793f92464d8cd263cc3b26b5721950d977b006b6c4d", size = 252403, upload-time = "2026-01-26T02:45:51.779Z" }, + { url = "https://files.pythonhosted.org/packages/a2/76/b9669547afa5a1a25cd93eaca91c0da1c095b06b6d2d8ec25b713588d3a1/multidict-6.7.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9004d8386d133b7e6135679424c91b0b854d2d164af6ea3f289f8f2761064609", size = 244889, upload-time = "2026-01-26T02:45:53.27Z" }, + { url = "https://files.pythonhosted.org/packages/7e/a9/a50d2669e506dad33cfc45b5d574a205587b7b8a5f426f2fbb2e90882588/multidict-6.7.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e628ef0e6859ffd8273c69412a2465c4be4a9517d07261b33334b5ec6f3c7489", size = 241982, upload-time = "2026-01-26T02:45:54.919Z" }, + { url = "https://files.pythonhosted.org/packages/c5/bb/1609558ad8b456b4827d3c5a5b775c93b87878fd3117ed3db3423dfbce1b/multidict-6.7.1-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:841189848ba629c3552035a6a7f5bf3b02eb304e9fea7492ca220a8eda6b0e5c", size = 232415, upload-time = "2026-01-26T02:45:56.981Z" }, + { url = "https://files.pythonhosted.org/packages/d8/59/6f61039d2aa9261871e03ab9dc058a550d240f25859b05b67fd70f80d4b3/multidict-6.7.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:ce1bbd7d780bb5a0da032e095c951f7014d6b0a205f8318308140f1a6aba159e", size = 240337, upload-time = "2026-01-26T02:45:58.698Z" }, + { url = "https://files.pythonhosted.org/packages/a1/29/fdc6a43c203890dc2ae9249971ecd0c41deaedfe00d25cb6564b2edd99eb/multidict-6.7.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:b26684587228afed0d50cf804cc71062cc9c1cdf55051c4c6345d372947b268c", size = 248788, upload-time = "2026-01-26T02:46:00.862Z" }, + { url = "https://files.pythonhosted.org/packages/a9/14/a153a06101323e4cf086ecee3faadba52ff71633d471f9685c42e3736163/multidict-6.7.1-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:9f9af11306994335398293f9958071019e3ab95e9a707dc1383a35613f6abcb9", size = 242842, upload-time = "2026-01-26T02:46:02.824Z" }, + { url = "https://files.pythonhosted.org/packages/41/5f/604ae839e64a4a6efc80db94465348d3b328ee955e37acb24badbcd24d83/multidict-6.7.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:b4938326284c4f1224178a560987b6cf8b4d38458b113d9b8c1db1a836e640a2", size = 240237, upload-time = "2026-01-26T02:46:05.898Z" }, + { url = "https://files.pythonhosted.org/packages/5f/60/c3a5187bf66f6fb546ff4ab8fb5a077cbdd832d7b1908d4365c7f74a1917/multidict-6.7.1-cp314-cp314t-win32.whl", hash = "sha256:98655c737850c064a65e006a3df7c997cd3b220be4ec8fe26215760b9697d4d7", size = 48008, upload-time = "2026-01-26T02:46:07.468Z" }, + { url = "https://files.pythonhosted.org/packages/0c/f7/addf1087b860ac60e6f382240f64fb99f8bfb532bb06f7c542b83c29ca61/multidict-6.7.1-cp314-cp314t-win_amd64.whl", hash = "sha256:497bde6223c212ba11d462853cfa4f0ae6ef97465033e7dc9940cdb3ab5b48e5", size = 53542, upload-time = "2026-01-26T02:46:08.809Z" }, + { url = "https://files.pythonhosted.org/packages/4c/81/4629d0aa32302ef7b2ec65c75a728cc5ff4fa410c50096174c1632e70b3e/multidict-6.7.1-cp314-cp314t-win_arm64.whl", hash = "sha256:2bbd113e0d4af5db41d5ebfe9ccaff89de2120578164f86a5d17d5a576d1e5b2", size = 44719, upload-time = "2026-01-26T02:46:11.146Z" }, + { url = "https://files.pythonhosted.org/packages/81/08/7036c080d7117f28a4af526d794aab6a84463126db031b007717c1a6676e/multidict-6.7.1-py3-none-any.whl", hash = "sha256:55d97cc6dae627efa6a6e548885712d4864b81110ac76fa4e534c03819fa4a56", size = 12319, upload-time = "2026-01-26T02:46:44.004Z" }, +] + +[[package]] +name = "multipart" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8e/d6/9c4f366d6f9bb8f8fb5eae3acac471335c39510c42b537fd515213d7d8c3/multipart-1.3.1.tar.gz", hash = "sha256:211d7cfc1a7a43e75c4d24ee0e8e0f4f61d522f1a21575303ae85333dea687bf", size = 38929, upload-time = "2026-02-27T10:17:13.7Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/19/ed/e1f03200ee1f0bf4a2b9b72709afefbf5319b68df654e0b84b35c65613ee/multipart-1.3.1-py3-none-any.whl", hash = "sha256:a82b59e1befe74d3d30b3d3f70efd5a2eba4d938f845dcff9faace968888ff29", size = 15061, upload-time = "2026-02-27T10:17:11.943Z" }, +] + [[package]] name = "nodeenv" version = "1.10.0" @@ -600,7 +982,9 @@ name = "osmsg" version = "1.0.3" source = { editable = "." } dependencies = [ + { name = "asyncpg" }, { name = "duckdb" }, + { name = "litestar", extra = ["standard"] }, { name = "osmium" }, { name = "platformdirs" }, { name = "pyarrow" }, @@ -612,6 +996,7 @@ dependencies = [ { name = "shapely" }, { name = "typer" }, { name = "typer-config", extra = ["yaml"] }, + { name = "uv" }, ] [package.dev-dependencies] @@ -627,7 +1012,9 @@ dev = [ [package.metadata] requires-dist = [ + { name = "asyncpg", specifier = ">=0.30.0" }, { name = "duckdb", specifier = ">=1.5.2" }, + { name = "litestar", extras = ["standard"], specifier = ">=2.18.0" }, { name = "osmium", specifier = ">=4.3.1" }, { name = "platformdirs", specifier = ">=4.5.1" }, { name = "pyarrow", specifier = ">=24.0.0" }, @@ -639,6 +1026,7 @@ requires-dist = [ { name = "shapely", specifier = ">=2.1.2" }, { name = "typer", specifier = ">=0.25.0" }, { name = "typer-config", extras = ["yaml"], specifier = ">=1.5.1" }, + { name = "uv", specifier = ">=0.9.0" }, ] [package.metadata.requires-dev] @@ -679,6 +1067,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, ] +[[package]] +name = "polyfactory" +version = "3.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "faker" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/85/68/7717bd9e63ed254617a7d3dc9260904fb736d6ea203e58ffddcb186c64e4/polyfactory-3.3.0.tar.gz", hash = "sha256:237258b6ff43edf362ffd1f68086bb796466f786adfa002b0ac256dbf2246e9a", size = 348668, upload-time = "2026-02-22T09:46:28.01Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dd/34/b6f19941adcdaf415b5e8a8d577499f5b6a76b59cbae37f9b125a9ffe9f2/polyfactory-3.3.0-py3-none-any.whl", hash = "sha256:686abcaa761930d3df87b91e95b26b8d8cb9fdbbbe0b03d5f918acff5c72606e", size = 62707, upload-time = "2026-02-22T09:46:25.985Z" }, +] + [[package]] name = "pre-commit" version = "4.6.0" @@ -1037,6 +1438,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/82/3b/64d4899d73f91ba49a8c18a8ff3f0ea8f1c1d75481760df8c68ef5235bf5/rich-15.0.0-py3-none-any.whl", hash = "sha256:33bd4ef74232fb73fe9279a257718407f169c09b78a87ad3d296f548e27de0bb", size = 310654, upload-time = "2026-04-12T08:24:02.83Z" }, ] +[[package]] +name = "rich-click" +version = "1.9.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "rich" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/04/27/091e140ea834272188e63f8dd6faac1f5c687582b687197b3e0ec3c78ebf/rich_click-1.9.7.tar.gz", hash = "sha256:022997c1e30731995bdbc8ec2f82819340d42543237f033a003c7b1f843fc5dc", size = 74838, upload-time = "2026-01-31T04:29:27.707Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ca/e5/d708d262b600a352abe01c2ae360d8ff75b0af819b78e9af293191d928e6/rich_click-1.9.7-py3-none-any.whl", hash = "sha256:2f99120fca78f536e07b114d3b60333bc4bb2a0969053b1250869bcdc1b5351b", size = 71491, upload-time = "2026-01-31T04:29:26.777Z" }, +] + [[package]] name = "ruff" version = "0.15.12" @@ -1130,6 +1545,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, ] +[[package]] +name = "six" +version = "1.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload-time = "2024-12-04T17:35:28.174Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, +] + +[[package]] +name = "sniffio" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload-time = "2024-02-25T23:20:04.057Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" }, +] + [[package]] name = "termcolor" version = "3.3.0" @@ -1225,6 +1658,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" }, ] +[[package]] +name = "tzdata" +version = "2026.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ba/19/1b9b0e29f30c6d35cb345486df41110984ea67ae69dddbc0e8a100999493/tzdata-2026.2.tar.gz", hash = "sha256:9173fde7d80d9018e02a662e168e5a2d04f87c41ea174b139fbef642eda62d10", size = 198254, upload-time = "2026-04-24T15:22:08.651Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/e4/dccd7f47c4b64213ac01ef921a1337ee6e30e8c6466046018326977efd95/tzdata-2026.2-py2.py3-none-any.whl", hash = "sha256:bbe9af844f658da81a5f95019480da3a89415801f6cc966806612cc7169bffe7", size = 349321, upload-time = "2026-04-24T15:22:05.876Z" }, +] + [[package]] name = "urllib3" version = "2.6.3" @@ -1234,6 +1676,94 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" }, ] +[[package]] +name = "uv" +version = "0.11.8" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c1/cd/4393fecb083897e956f016d4e66d0b8a496a08fe2e03cbda32a1e91da7ee/uv-0.11.8.tar.gz", hash = "sha256:bb2cf302b8503629aab6f0090a05551e6f8cfc2d687ca059cad7ec9e11214335", size = 4098020, upload-time = "2026-04-27T13:15:31.625Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/99/84/dcb676a3e36a3a2b44dc2e4dfea471b8cd709025e27cce3e588b176fd899/uv-0.11.8-py3-none-linux_armv6l.whl", hash = "sha256:a53e704a780a9e78a50f5a880e99a690f84e6fb9e82610903ce26f47c271d74c", size = 23664296, upload-time = "2026-04-27T13:15:15.644Z" }, + { url = "https://files.pythonhosted.org/packages/86/05/557aa070fda7b8460bbbe1e867e8e5b80602c5b30ed77d1d94fc5acae518/uv-0.11.8-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:d414fc3795b6f56fb6b1fa359537930924fdfe857750a144d2aedf3077be3f1d", size = 23087321, upload-time = "2026-04-27T13:15:36.193Z" }, + { url = "https://files.pythonhosted.org/packages/d5/62/82953018801a250e16b091ef4b5e95e939b2f01224363d6fc80f600b7eff/uv-0.11.8-py3-none-macosx_11_0_arm64.whl", hash = "sha256:f0d402e182ab581e934c159cc9edf25ec6e08d32f29aa797980e949afefc87cd", size = 21747142, upload-time = "2026-04-27T13:15:20.4Z" }, + { url = "https://files.pythonhosted.org/packages/af/4c/477f2abe16f9a3d3c73077f15615878a303eef3760115ec946be58ecb9b2/uv-0.11.8-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:877c9af3b3955a35ef739e5b2ba79c56dae5c4d50420a7ed908c0901e1c8c807", size = 23425861, upload-time = "2026-04-27T13:15:10.374Z" }, + { url = "https://files.pythonhosted.org/packages/2a/63/19f46193e49f0c9bf33346a4d726313871864db16e7cdd1c0a63bc112000/uv-0.11.8-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.musllinux_1_1_armv7l.whl", hash = "sha256:8278144df8d80a83f770c264a5e79ea50791316d2a0dda869e53b3c1174142a8", size = 23215551, upload-time = "2026-04-27T13:15:38.706Z" }, + { url = "https://files.pythonhosted.org/packages/72/3e/5595b265df848a33cd060b10e8f763a46d67521ac9f6c314e8a4ad5329d7/uv-0.11.8-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b3494ad32465f4e02259cfb104d24efe5bb8f7a782351f0354de9385415fb310", size = 23224170, upload-time = "2026-04-27T13:15:18.083Z" }, + { url = "https://files.pythonhosted.org/packages/a6/b3/6ca95e690b52542caa1dae10ede57732f90c629946ab5f027ff746f87deb/uv-0.11.8-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a4421e27e81f85bce3bdb75986c38b5f9bfab9cdccaf3d977cf124b3f0f0b989", size = 24730048, upload-time = "2026-04-27T13:15:13.254Z" }, + { url = "https://files.pythonhosted.org/packages/ea/49/71b7322067c85a3736a22a300072b0566991fe3f95b81bed793508ff5315/uv-0.11.8-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91943e77fc962752d4f64ad5739219858395981078051c740b28b52963b366aa", size = 25585906, upload-time = "2026-04-27T13:15:41.455Z" }, + { url = "https://files.pythonhosted.org/packages/37/16/4e84cd5131327fe86d4784ebfc8a983149f4e6b811476ef271fc548b29e6/uv-0.11.8-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:41fbba287efcc9bc9505a60549b3a223220da720eacd03be8c23d9daaafa44f4", size = 24795740, upload-time = "2026-04-27T13:15:49.842Z" }, + { url = "https://files.pythonhosted.org/packages/5b/01/df175979018743cc5ba6e2fb9dcec916868271e8d88cf0b9df8fd805a0df/uv-0.11.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d97bb2920d6cddc07faa475013461294cc09b77ec8139278416c6e54b938d037", size = 24824980, upload-time = "2026-04-27T13:15:53.506Z" }, + { url = "https://files.pythonhosted.org/packages/1c/95/93c7f595f7136fb32807442860c55d0faed2cd3d7da4b7105ed3c2535d5f/uv-0.11.8-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:fb6a755305eb1e081dfe6a8bc007dbae2d26fe75e551656ca7c9cd08fba21d26", size = 23526790, upload-time = "2026-04-27T13:15:04.955Z" }, + { url = "https://files.pythonhosted.org/packages/04/02/77430b89e172c20cc549b07a5b1dfda0c882c161b6d82781d3150a7063ac/uv-0.11.8-py3-none-manylinux_2_31_riscv64.musllinux_1_1_riscv64.whl", hash = "sha256:841ecbb38532698f73b14b49dc5f0c5e756194c7fcf6e5c6b7ed3859200fe91b", size = 24280498, upload-time = "2026-04-27T13:15:43.978Z" }, + { url = "https://files.pythonhosted.org/packages/8a/e3/23e4a2bb91e3880e017e6116886e2d0bde14ba6aa95ddc458160ee630e7c/uv-0.11.8-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:b3ff2b20c1897105ebe7ed7f9b1b331c7171da029bc1e35970ce31dc086141c1", size = 24375233, upload-time = "2026-04-27T13:15:25.753Z" }, + { url = "https://files.pythonhosted.org/packages/d9/67/fb7dc17cea816a667d1be2632525aa1687566bfafd17bdac561a7a6c9484/uv-0.11.8-py3-none-musllinux_1_1_i686.whl", hash = "sha256:ad381228b0170ef9646902c7e908d4a10a7ecc3da8139450506cf70c7e7f3e80", size = 23904818, upload-time = "2026-04-27T13:15:23.21Z" }, + { url = "https://files.pythonhosted.org/packages/4b/91/b920e35f54f8c6b51f2c639e8170bb80a47a739a1442fea33a479bc93a3d/uv-0.11.8-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:0172b5215544844cd3db0fa3c73a2eb74999b3f00cd2527dde578725076d7b65", size = 25015448, upload-time = "2026-04-27T13:15:46.666Z" }, + { url = "https://files.pythonhosted.org/packages/05/e8/3771956dc1c94b8484789bb8070d91872080d0af99332b8bdec7218c2bfd/uv-0.11.8-py3-none-win32.whl", hash = "sha256:e71c1dd23cbb480f3952c3a95b4fd00f96bd618e2a94583fc9388c500af3070d", size = 22823583, upload-time = "2026-04-27T13:15:33.674Z" }, + { url = "https://files.pythonhosted.org/packages/f9/9b/a91a9c60dcae0e1e3da06377d38f32118a523697d461fe41bc9f117ecf59/uv-0.11.8-py3-none-win_amd64.whl", hash = "sha256:306c624c68d95dd7ea3647675323d72c1abc25f91c3e92ae4cd6f0f11b508726", size = 25407438, upload-time = "2026-04-27T13:15:28.957Z" }, + { url = "https://files.pythonhosted.org/packages/61/5d/defa29fe617e6f07d4e514089e9d36fd9f44ede869e597e39ff7d69f6917/uv-0.11.8-py3-none-win_arm64.whl", hash = "sha256:a9853456696d579f206135c9dda7227a6ed8311b8a9a0b9b2008c4ae81950efe", size = 23914243, upload-time = "2026-04-27T13:15:07.717Z" }, +] + +[[package]] +name = "uvicorn" +version = "0.46.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1f/93/041fca8274050e40e6791f267d82e0e2e27dd165627bd640d3e0e378d877/uvicorn-0.46.0.tar.gz", hash = "sha256:fb9da0926999cc6cb22dc7cd71a94a632f078e6ae47ff683c5c420750fb7413d", size = 88758, upload-time = "2026-04-23T07:16:00.151Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/31/a3/5b1562db76a5a488274b2332a97199b32d0442aca0ed193697fd47786316/uvicorn-0.46.0-py3-none-any.whl", hash = "sha256:bbebbcbed972d162afca128605223022bedd345b7bc7855ce66deb31487a9048", size = 70926, upload-time = "2026-04-23T07:15:58.355Z" }, +] + +[package.optional-dependencies] +standard = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "httptools" }, + { name = "python-dotenv" }, + { name = "pyyaml" }, + { name = "uvloop", marker = "platform_python_implementation != 'PyPy' and sys_platform != 'cygwin' and sys_platform != 'win32'" }, + { name = "watchfiles" }, + { name = "websockets" }, +] + +[[package]] +name = "uvloop" +version = "0.22.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/06/f0/18d39dbd1971d6d62c4629cc7fa67f74821b0dc1f5a77af43719de7936a7/uvloop-0.22.1.tar.gz", hash = "sha256:6c84bae345b9147082b17371e3dd5d42775bddce91f885499017f4607fdaf39f", size = 2443250, upload-time = "2025-10-16T22:17:19.342Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/d5/69900f7883235562f1f50d8184bb7dd84a2fb61e9ec63f3782546fdbd057/uvloop-0.22.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c60ebcd36f7b240b30788554b6f0782454826a0ed765d8430652621b5de674b9", size = 1352420, upload-time = "2025-10-16T22:16:21.187Z" }, + { url = "https://files.pythonhosted.org/packages/a8/73/c4e271b3bce59724e291465cc936c37758886a4868787da0278b3b56b905/uvloop-0.22.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3b7f102bf3cb1995cfeaee9321105e8f5da76fdb104cdad8986f85461a1b7b77", size = 748677, upload-time = "2025-10-16T22:16:22.558Z" }, + { url = "https://files.pythonhosted.org/packages/86/94/9fb7fad2f824d25f8ecac0d70b94d0d48107ad5ece03769a9c543444f78a/uvloop-0.22.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:53c85520781d84a4b8b230e24a5af5b0778efdb39142b424990ff1ef7c48ba21", size = 3753819, upload-time = "2025-10-16T22:16:23.903Z" }, + { url = "https://files.pythonhosted.org/packages/74/4f/256aca690709e9b008b7108bc85fba619a2bc37c6d80743d18abad16ee09/uvloop-0.22.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:56a2d1fae65fd82197cb8c53c367310b3eabe1bbb9fb5a04d28e3e3520e4f702", size = 3804529, upload-time = "2025-10-16T22:16:25.246Z" }, + { url = "https://files.pythonhosted.org/packages/7f/74/03c05ae4737e871923d21a76fe28b6aad57f5c03b6e6bfcfa5ad616013e4/uvloop-0.22.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:40631b049d5972c6755b06d0bfe8233b1bd9a8a6392d9d1c45c10b6f9e9b2733", size = 3621267, upload-time = "2025-10-16T22:16:26.819Z" }, + { url = "https://files.pythonhosted.org/packages/75/be/f8e590fe61d18b4a92070905497aec4c0e64ae1761498cad09023f3f4b3e/uvloop-0.22.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:535cc37b3a04f6cd2c1ef65fa1d370c9a35b6695df735fcff5427323f2cd5473", size = 3723105, upload-time = "2025-10-16T22:16:28.252Z" }, + { url = "https://files.pythonhosted.org/packages/3d/ff/7f72e8170be527b4977b033239a83a68d5c881cc4775fca255c677f7ac5d/uvloop-0.22.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:fe94b4564e865d968414598eea1a6de60adba0c040ba4ed05ac1300de402cd42", size = 1359936, upload-time = "2025-10-16T22:16:29.436Z" }, + { url = "https://files.pythonhosted.org/packages/c3/c6/e5d433f88fd54d81ef4be58b2b7b0cea13c442454a1db703a1eea0db1a59/uvloop-0.22.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:51eb9bd88391483410daad430813d982010f9c9c89512321f5b60e2cddbdddd6", size = 752769, upload-time = "2025-10-16T22:16:30.493Z" }, + { url = "https://files.pythonhosted.org/packages/24/68/a6ac446820273e71aa762fa21cdcc09861edd3536ff47c5cd3b7afb10eeb/uvloop-0.22.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:700e674a166ca5778255e0e1dc4e9d79ab2acc57b9171b79e65feba7184b3370", size = 4317413, upload-time = "2025-10-16T22:16:31.644Z" }, + { url = "https://files.pythonhosted.org/packages/5f/6f/e62b4dfc7ad6518e7eff2516f680d02a0f6eb62c0c212e152ca708a0085e/uvloop-0.22.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b5b1ac819a3f946d3b2ee07f09149578ae76066d70b44df3fa990add49a82e4", size = 4426307, upload-time = "2025-10-16T22:16:32.917Z" }, + { url = "https://files.pythonhosted.org/packages/90/60/97362554ac21e20e81bcef1150cb2a7e4ffdaf8ea1e5b2e8bf7a053caa18/uvloop-0.22.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e047cc068570bac9866237739607d1313b9253c3051ad84738cbb095be0537b2", size = 4131970, upload-time = "2025-10-16T22:16:34.015Z" }, + { url = "https://files.pythonhosted.org/packages/99/39/6b3f7d234ba3964c428a6e40006340f53ba37993f46ed6e111c6e9141d18/uvloop-0.22.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:512fec6815e2dd45161054592441ef76c830eddaad55c8aa30952e6fe1ed07c0", size = 4296343, upload-time = "2025-10-16T22:16:35.149Z" }, + { url = "https://files.pythonhosted.org/packages/89/8c/182a2a593195bfd39842ea68ebc084e20c850806117213f5a299dfc513d9/uvloop-0.22.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:561577354eb94200d75aca23fbde86ee11be36b00e52a4eaf8f50fb0c86b7705", size = 1358611, upload-time = "2025-10-16T22:16:36.833Z" }, + { url = "https://files.pythonhosted.org/packages/d2/14/e301ee96a6dc95224b6f1162cd3312f6d1217be3907b79173b06785f2fe7/uvloop-0.22.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1cdf5192ab3e674ca26da2eada35b288d2fa49fdd0f357a19f0e7c4e7d5077c8", size = 751811, upload-time = "2025-10-16T22:16:38.275Z" }, + { url = "https://files.pythonhosted.org/packages/b7/02/654426ce265ac19e2980bfd9ea6590ca96a56f10c76e63801a2df01c0486/uvloop-0.22.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e2ea3d6190a2968f4a14a23019d3b16870dd2190cd69c8180f7c632d21de68d", size = 4288562, upload-time = "2025-10-16T22:16:39.375Z" }, + { url = "https://files.pythonhosted.org/packages/15/c0/0be24758891ef825f2065cd5db8741aaddabe3e248ee6acc5e8a80f04005/uvloop-0.22.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0530a5fbad9c9e4ee3f2b33b148c6a64d47bbad8000ea63704fa8260f4cf728e", size = 4366890, upload-time = "2025-10-16T22:16:40.547Z" }, + { url = "https://files.pythonhosted.org/packages/d2/53/8369e5219a5855869bcee5f4d317f6da0e2c669aecf0ef7d371e3d084449/uvloop-0.22.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bc5ef13bbc10b5335792360623cc378d52d7e62c2de64660616478c32cd0598e", size = 4119472, upload-time = "2025-10-16T22:16:41.694Z" }, + { url = "https://files.pythonhosted.org/packages/f8/ba/d69adbe699b768f6b29a5eec7b47dd610bd17a69de51b251126a801369ea/uvloop-0.22.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1f38ec5e3f18c8a10ded09742f7fb8de0108796eb673f30ce7762ce1b8550cad", size = 4239051, upload-time = "2025-10-16T22:16:43.224Z" }, + { url = "https://files.pythonhosted.org/packages/90/cd/b62bdeaa429758aee8de8b00ac0dd26593a9de93d302bff3d21439e9791d/uvloop-0.22.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3879b88423ec7e97cd4eba2a443aa26ed4e59b45e6b76aabf13fe2f27023a142", size = 1362067, upload-time = "2025-10-16T22:16:44.503Z" }, + { url = "https://files.pythonhosted.org/packages/0d/f8/a132124dfda0777e489ca86732e85e69afcd1ff7686647000050ba670689/uvloop-0.22.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:4baa86acedf1d62115c1dc6ad1e17134476688f08c6efd8a2ab076e815665c74", size = 752423, upload-time = "2025-10-16T22:16:45.968Z" }, + { url = "https://files.pythonhosted.org/packages/a3/94/94af78c156f88da4b3a733773ad5ba0b164393e357cc4bd0ab2e2677a7d6/uvloop-0.22.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:297c27d8003520596236bdb2335e6b3f649480bd09e00d1e3a99144b691d2a35", size = 4272437, upload-time = "2025-10-16T22:16:47.451Z" }, + { url = "https://files.pythonhosted.org/packages/b5/35/60249e9fd07b32c665192cec7af29e06c7cd96fa1d08b84f012a56a0b38e/uvloop-0.22.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c1955d5a1dd43198244d47664a5858082a3239766a839b2102a269aaff7a4e25", size = 4292101, upload-time = "2025-10-16T22:16:49.318Z" }, + { url = "https://files.pythonhosted.org/packages/02/62/67d382dfcb25d0a98ce73c11ed1a6fba5037a1a1d533dcbb7cab033a2636/uvloop-0.22.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b31dc2fccbd42adc73bc4e7cdbae4fc5086cf378979e53ca5d0301838c5682c6", size = 4114158, upload-time = "2025-10-16T22:16:50.517Z" }, + { url = "https://files.pythonhosted.org/packages/f0/7a/f1171b4a882a5d13c8b7576f348acfe6074d72eaf52cccef752f748d4a9f/uvloop-0.22.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:93f617675b2d03af4e72a5333ef89450dfaa5321303ede6e67ba9c9d26878079", size = 4177360, upload-time = "2025-10-16T22:16:52.646Z" }, + { url = "https://files.pythonhosted.org/packages/79/7b/b01414f31546caf0919da80ad57cbfe24c56b151d12af68cee1b04922ca8/uvloop-0.22.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:37554f70528f60cad66945b885eb01f1bb514f132d92b6eeed1c90fd54ed6289", size = 1454790, upload-time = "2025-10-16T22:16:54.355Z" }, + { url = "https://files.pythonhosted.org/packages/d4/31/0bb232318dd838cad3fa8fb0c68c8b40e1145b32025581975e18b11fab40/uvloop-0.22.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:b76324e2dc033a0b2f435f33eb88ff9913c156ef78e153fb210e03c13da746b3", size = 796783, upload-time = "2025-10-16T22:16:55.906Z" }, + { url = "https://files.pythonhosted.org/packages/42/38/c9b09f3271a7a723a5de69f8e237ab8e7803183131bc57c890db0b6bb872/uvloop-0.22.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:badb4d8e58ee08dad957002027830d5c3b06aea446a6a3744483c2b3b745345c", size = 4647548, upload-time = "2025-10-16T22:16:57.008Z" }, + { url = "https://files.pythonhosted.org/packages/c1/37/945b4ca0ac27e3dc4952642d4c900edd030b3da6c9634875af6e13ae80e5/uvloop-0.22.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b91328c72635f6f9e0282e4a57da7470c7350ab1c9f48546c0f2866205349d21", size = 4467065, upload-time = "2025-10-16T22:16:58.206Z" }, + { url = "https://files.pythonhosted.org/packages/97/cc/48d232f33d60e2e2e0b42f4e73455b146b76ebe216487e862700457fbf3c/uvloop-0.22.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:daf620c2995d193449393d6c62131b3fbd40a63bf7b307a1527856ace637fe88", size = 4328384, upload-time = "2025-10-16T22:16:59.36Z" }, + { url = "https://files.pythonhosted.org/packages/e4/16/c1fd27e9549f3c4baf1dc9c20c456cd2f822dbf8de9f463824b0c0357e06/uvloop-0.22.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6cde23eeda1a25c75b2e07d39970f3374105d5eafbaab2a4482be82f272d5a5e", size = 4296730, upload-time = "2025-10-16T22:17:00.744Z" }, +] + [[package]] name = "virtualenv" version = "21.2.4" @@ -1249,6 +1779,93 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/27/8d/edd0bd910ff803c308ee9a6b7778621af0d10252219ad9f19ef4d4982a61/virtualenv-21.2.4-py3-none-any.whl", hash = "sha256:29d21e941795206138d0f22f4e45ff7050e5da6c6472299fb7103318763861ac", size = 5831232, upload-time = "2026-04-14T22:15:29.342Z" }, ] +[[package]] +name = "watchfiles" +version = "1.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c2/c9/8869df9b2a2d6c59d79220a4db37679e74f807c559ffe5265e08b227a210/watchfiles-1.1.1.tar.gz", hash = "sha256:a173cb5c16c4f40ab19cecf48a534c409f7ea983ab8fed0741304a1c0a31b3f2", size = 94440, upload-time = "2025-10-14T15:06:21.08Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1f/f8/2c5f479fb531ce2f0564eda479faecf253d886b1ab3630a39b7bf7362d46/watchfiles-1.1.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:f57b396167a2565a4e8b5e56a5a1c537571733992b226f4f1197d79e94cf0ae5", size = 406529, upload-time = "2025-10-14T15:04:32.899Z" }, + { url = "https://files.pythonhosted.org/packages/fe/cd/f515660b1f32f65df671ddf6f85bfaca621aee177712874dc30a97397977/watchfiles-1.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:421e29339983e1bebc281fab40d812742268ad057db4aee8c4d2bce0af43b741", size = 394384, upload-time = "2025-10-14T15:04:33.761Z" }, + { url = "https://files.pythonhosted.org/packages/7b/c3/28b7dc99733eab43fca2d10f55c86e03bd6ab11ca31b802abac26b23d161/watchfiles-1.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6e43d39a741e972bab5d8100b5cdacf69db64e34eb19b6e9af162bccf63c5cc6", size = 448789, upload-time = "2025-10-14T15:04:34.679Z" }, + { url = "https://files.pythonhosted.org/packages/4a/24/33e71113b320030011c8e4316ccca04194bf0cbbaeee207f00cbc7d6b9f5/watchfiles-1.1.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f537afb3276d12814082a2e9b242bdcf416c2e8fd9f799a737990a1dbe906e5b", size = 460521, upload-time = "2025-10-14T15:04:35.963Z" }, + { url = "https://files.pythonhosted.org/packages/f4/c3/3c9a55f255aa57b91579ae9e98c88704955fa9dac3e5614fb378291155df/watchfiles-1.1.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b2cd9e04277e756a2e2d2543d65d1e2166d6fd4c9b183f8808634fda23f17b14", size = 488722, upload-time = "2025-10-14T15:04:37.091Z" }, + { url = "https://files.pythonhosted.org/packages/49/36/506447b73eb46c120169dc1717fe2eff07c234bb3232a7200b5f5bd816e9/watchfiles-1.1.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5f3f58818dc0b07f7d9aa7fe9eb1037aecb9700e63e1f6acfed13e9fef648f5d", size = 596088, upload-time = "2025-10-14T15:04:38.39Z" }, + { url = "https://files.pythonhosted.org/packages/82/ab/5f39e752a9838ec4d52e9b87c1e80f1ee3ccdbe92e183c15b6577ab9de16/watchfiles-1.1.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9bb9f66367023ae783551042d31b1d7fd422e8289eedd91f26754a66f44d5cff", size = 472923, upload-time = "2025-10-14T15:04:39.666Z" }, + { url = "https://files.pythonhosted.org/packages/af/b9/a419292f05e302dea372fa7e6fda5178a92998411f8581b9830d28fb9edb/watchfiles-1.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aebfd0861a83e6c3d1110b78ad54704486555246e542be3e2bb94195eabb2606", size = 456080, upload-time = "2025-10-14T15:04:40.643Z" }, + { url = "https://files.pythonhosted.org/packages/b0/c3/d5932fd62bde1a30c36e10c409dc5d54506726f08cb3e1d8d0ba5e2bc8db/watchfiles-1.1.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:5fac835b4ab3c6487b5dbad78c4b3724e26bcc468e886f8ba8cc4306f68f6701", size = 629432, upload-time = "2025-10-14T15:04:41.789Z" }, + { url = "https://files.pythonhosted.org/packages/f7/77/16bddd9779fafb795f1a94319dc965209c5641db5bf1edbbccace6d1b3c0/watchfiles-1.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:399600947b170270e80134ac854e21b3ccdefa11a9529a3decc1327088180f10", size = 623046, upload-time = "2025-10-14T15:04:42.718Z" }, + { url = "https://files.pythonhosted.org/packages/46/ef/f2ecb9a0f342b4bfad13a2787155c6ee7ce792140eac63a34676a2feeef2/watchfiles-1.1.1-cp311-cp311-win32.whl", hash = "sha256:de6da501c883f58ad50db3a32ad397b09ad29865b5f26f64c24d3e3281685849", size = 271473, upload-time = "2025-10-14T15:04:43.624Z" }, + { url = "https://files.pythonhosted.org/packages/94/bc/f42d71125f19731ea435c3948cad148d31a64fccde3867e5ba4edee901f9/watchfiles-1.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:35c53bd62a0b885bf653ebf6b700d1bf05debb78ad9292cf2a942b23513dc4c4", size = 287598, upload-time = "2025-10-14T15:04:44.516Z" }, + { url = "https://files.pythonhosted.org/packages/57/c9/a30f897351f95bbbfb6abcadafbaca711ce1162f4db95fc908c98a9165f3/watchfiles-1.1.1-cp311-cp311-win_arm64.whl", hash = "sha256:57ca5281a8b5e27593cb7d82c2ac927ad88a96ed406aa446f6344e4328208e9e", size = 277210, upload-time = "2025-10-14T15:04:45.883Z" }, + { url = "https://files.pythonhosted.org/packages/74/d5/f039e7e3c639d9b1d09b07ea412a6806d38123f0508e5f9b48a87b0a76cc/watchfiles-1.1.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:8c89f9f2f740a6b7dcc753140dd5e1ab9215966f7a3530d0c0705c83b401bd7d", size = 404745, upload-time = "2025-10-14T15:04:46.731Z" }, + { url = "https://files.pythonhosted.org/packages/a5/96/a881a13aa1349827490dab2d363c8039527060cfcc2c92cc6d13d1b1049e/watchfiles-1.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:bd404be08018c37350f0d6e34676bd1e2889990117a2b90070b3007f172d0610", size = 391769, upload-time = "2025-10-14T15:04:48.003Z" }, + { url = "https://files.pythonhosted.org/packages/4b/5b/d3b460364aeb8da471c1989238ea0e56bec24b6042a68046adf3d9ddb01c/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8526e8f916bb5b9a0a777c8317c23ce65de259422bba5b31325a6fa6029d33af", size = 449374, upload-time = "2025-10-14T15:04:49.179Z" }, + { url = "https://files.pythonhosted.org/packages/b9/44/5769cb62d4ed055cb17417c0a109a92f007114a4e07f30812a73a4efdb11/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2edc3553362b1c38d9f06242416a5d8e9fe235c204a4072e988ce2e5bb1f69f6", size = 459485, upload-time = "2025-10-14T15:04:50.155Z" }, + { url = "https://files.pythonhosted.org/packages/19/0c/286b6301ded2eccd4ffd0041a1b726afda999926cf720aab63adb68a1e36/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:30f7da3fb3f2844259cba4720c3fc7138eb0f7b659c38f3bfa65084c7fc7abce", size = 488813, upload-time = "2025-10-14T15:04:51.059Z" }, + { url = "https://files.pythonhosted.org/packages/c7/2b/8530ed41112dd4a22f4dcfdb5ccf6a1baad1ff6eed8dc5a5f09e7e8c41c7/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8979280bdafff686ba5e4d8f97840f929a87ed9cdf133cbbd42f7766774d2aa", size = 594816, upload-time = "2025-10-14T15:04:52.031Z" }, + { url = "https://files.pythonhosted.org/packages/ce/d2/f5f9fb49489f184f18470d4f99f4e862a4b3e9ac2865688eb2099e3d837a/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dcc5c24523771db3a294c77d94771abcfcb82a0e0ee8efd910c37c59ec1b31bb", size = 475186, upload-time = "2025-10-14T15:04:53.064Z" }, + { url = "https://files.pythonhosted.org/packages/cf/68/5707da262a119fb06fbe214d82dd1fe4a6f4af32d2d14de368d0349eb52a/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db5d7ae38ff20153d542460752ff397fcf5c96090c1230803713cf3147a6803", size = 456812, upload-time = "2025-10-14T15:04:55.174Z" }, + { url = "https://files.pythonhosted.org/packages/66/ab/3cbb8756323e8f9b6f9acb9ef4ec26d42b2109bce830cc1f3468df20511d/watchfiles-1.1.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:28475ddbde92df1874b6c5c8aaeb24ad5be47a11f87cde5a28ef3835932e3e94", size = 630196, upload-time = "2025-10-14T15:04:56.22Z" }, + { url = "https://files.pythonhosted.org/packages/78/46/7152ec29b8335f80167928944a94955015a345440f524d2dfe63fc2f437b/watchfiles-1.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:36193ed342f5b9842edd3532729a2ad55c4160ffcfa3700e0d54be496b70dd43", size = 622657, upload-time = "2025-10-14T15:04:57.521Z" }, + { url = "https://files.pythonhosted.org/packages/0a/bf/95895e78dd75efe9a7f31733607f384b42eb5feb54bd2eb6ed57cc2e94f4/watchfiles-1.1.1-cp312-cp312-win32.whl", hash = "sha256:859e43a1951717cc8de7f4c77674a6d389b106361585951d9e69572823f311d9", size = 272042, upload-time = "2025-10-14T15:04:59.046Z" }, + { url = "https://files.pythonhosted.org/packages/87/0a/90eb755f568de2688cb220171c4191df932232c20946966c27a59c400850/watchfiles-1.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:91d4c9a823a8c987cce8fa2690923b069966dabb196dd8d137ea2cede885fde9", size = 288410, upload-time = "2025-10-14T15:05:00.081Z" }, + { url = "https://files.pythonhosted.org/packages/36/76/f322701530586922fbd6723c4f91ace21364924822a8772c549483abed13/watchfiles-1.1.1-cp312-cp312-win_arm64.whl", hash = "sha256:a625815d4a2bdca61953dbba5a39d60164451ef34c88d751f6c368c3ea73d404", size = 278209, upload-time = "2025-10-14T15:05:01.168Z" }, + { url = "https://files.pythonhosted.org/packages/bb/f4/f750b29225fe77139f7ae5de89d4949f5a99f934c65a1f1c0b248f26f747/watchfiles-1.1.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:130e4876309e8686a5e37dba7d5e9bc77e6ed908266996ca26572437a5271e18", size = 404321, upload-time = "2025-10-14T15:05:02.063Z" }, + { url = "https://files.pythonhosted.org/packages/2b/f9/f07a295cde762644aa4c4bb0f88921d2d141af45e735b965fb2e87858328/watchfiles-1.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5f3bde70f157f84ece3765b42b4a52c6ac1a50334903c6eaf765362f6ccca88a", size = 391783, upload-time = "2025-10-14T15:05:03.052Z" }, + { url = "https://files.pythonhosted.org/packages/bc/11/fc2502457e0bea39a5c958d86d2cb69e407a4d00b85735ca724bfa6e0d1a/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14e0b1fe858430fc0251737ef3824c54027bedb8c37c38114488b8e131cf8219", size = 449279, upload-time = "2025-10-14T15:05:04.004Z" }, + { url = "https://files.pythonhosted.org/packages/e3/1f/d66bc15ea0b728df3ed96a539c777acfcad0eb78555ad9efcaa1274688f0/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f27db948078f3823a6bb3b465180db8ebecf26dd5dae6f6180bd87383b6b4428", size = 459405, upload-time = "2025-10-14T15:05:04.942Z" }, + { url = "https://files.pythonhosted.org/packages/be/90/9f4a65c0aec3ccf032703e6db02d89a157462fbb2cf20dd415128251cac0/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:059098c3a429f62fc98e8ec62b982230ef2c8df68c79e826e37b895bc359a9c0", size = 488976, upload-time = "2025-10-14T15:05:05.905Z" }, + { url = "https://files.pythonhosted.org/packages/37/57/ee347af605d867f712be7029bb94c8c071732a4b44792e3176fa3c612d39/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bfb5862016acc9b869bb57284e6cb35fdf8e22fe59f7548858e2f971d045f150", size = 595506, upload-time = "2025-10-14T15:05:06.906Z" }, + { url = "https://files.pythonhosted.org/packages/a8/78/cc5ab0b86c122047f75e8fc471c67a04dee395daf847d3e59381996c8707/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:319b27255aacd9923b8a276bb14d21a5f7ff82564c744235fc5eae58d95422ae", size = 474936, upload-time = "2025-10-14T15:05:07.906Z" }, + { url = "https://files.pythonhosted.org/packages/62/da/def65b170a3815af7bd40a3e7010bf6ab53089ef1b75d05dd5385b87cf08/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c755367e51db90e75b19454b680903631d41f9e3607fbd941d296a020c2d752d", size = 456147, upload-time = "2025-10-14T15:05:09.138Z" }, + { url = "https://files.pythonhosted.org/packages/57/99/da6573ba71166e82d288d4df0839128004c67d2778d3b566c138695f5c0b/watchfiles-1.1.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c22c776292a23bfc7237a98f791b9ad3144b02116ff10d820829ce62dff46d0b", size = 630007, upload-time = "2025-10-14T15:05:10.117Z" }, + { url = "https://files.pythonhosted.org/packages/a8/51/7439c4dd39511368849eb1e53279cd3454b4a4dbace80bab88feeb83c6b5/watchfiles-1.1.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:3a476189be23c3686bc2f4321dd501cb329c0a0469e77b7b534ee10129ae6374", size = 622280, upload-time = "2025-10-14T15:05:11.146Z" }, + { url = "https://files.pythonhosted.org/packages/95/9c/8ed97d4bba5db6fdcdb2b298d3898f2dd5c20f6b73aee04eabe56c59677e/watchfiles-1.1.1-cp313-cp313-win32.whl", hash = "sha256:bf0a91bfb5574a2f7fc223cf95eeea79abfefa404bf1ea5e339c0c1560ae99a0", size = 272056, upload-time = "2025-10-14T15:05:12.156Z" }, + { url = "https://files.pythonhosted.org/packages/1f/f3/c14e28429f744a260d8ceae18bf58c1d5fa56b50d006a7a9f80e1882cb0d/watchfiles-1.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:52e06553899e11e8074503c8e716d574adeeb7e68913115c4b3653c53f9bae42", size = 288162, upload-time = "2025-10-14T15:05:13.208Z" }, + { url = "https://files.pythonhosted.org/packages/dc/61/fe0e56c40d5cd29523e398d31153218718c5786b5e636d9ae8ae79453d27/watchfiles-1.1.1-cp313-cp313-win_arm64.whl", hash = "sha256:ac3cc5759570cd02662b15fbcd9d917f7ecd47efe0d6b40474eafd246f91ea18", size = 277909, upload-time = "2025-10-14T15:05:14.49Z" }, + { url = "https://files.pythonhosted.org/packages/79/42/e0a7d749626f1e28c7108a99fb9bf524b501bbbeb9b261ceecde644d5a07/watchfiles-1.1.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:563b116874a9a7ce6f96f87cd0b94f7faf92d08d0021e837796f0a14318ef8da", size = 403389, upload-time = "2025-10-14T15:05:15.777Z" }, + { url = "https://files.pythonhosted.org/packages/15/49/08732f90ce0fbbc13913f9f215c689cfc9ced345fb1bcd8829a50007cc8d/watchfiles-1.1.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3ad9fe1dae4ab4212d8c91e80b832425e24f421703b5a42ef2e4a1e215aff051", size = 389964, upload-time = "2025-10-14T15:05:16.85Z" }, + { url = "https://files.pythonhosted.org/packages/27/0d/7c315d4bd5f2538910491a0393c56bf70d333d51bc5b34bee8e68e8cea19/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce70f96a46b894b36eba678f153f052967a0d06d5b5a19b336ab0dbbd029f73e", size = 448114, upload-time = "2025-10-14T15:05:17.876Z" }, + { url = "https://files.pythonhosted.org/packages/c3/24/9e096de47a4d11bc4df41e9d1e61776393eac4cb6eb11b3e23315b78b2cc/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cb467c999c2eff23a6417e58d75e5828716f42ed8289fe6b77a7e5a91036ca70", size = 460264, upload-time = "2025-10-14T15:05:18.962Z" }, + { url = "https://files.pythonhosted.org/packages/cc/0f/e8dea6375f1d3ba5fcb0b3583e2b493e77379834c74fd5a22d66d85d6540/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:836398932192dae4146c8f6f737d74baeac8b70ce14831a239bdb1ca882fc261", size = 487877, upload-time = "2025-10-14T15:05:20.094Z" }, + { url = "https://files.pythonhosted.org/packages/ac/5b/df24cfc6424a12deb41503b64d42fbea6b8cb357ec62ca84a5a3476f654a/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:743185e7372b7bc7c389e1badcc606931a827112fbbd37f14c537320fca08620", size = 595176, upload-time = "2025-10-14T15:05:21.134Z" }, + { url = "https://files.pythonhosted.org/packages/8f/b5/853b6757f7347de4e9b37e8cc3289283fb983cba1ab4d2d7144694871d9c/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:afaeff7696e0ad9f02cbb8f56365ff4686ab205fcf9c4c5b6fdfaaa16549dd04", size = 473577, upload-time = "2025-10-14T15:05:22.306Z" }, + { url = "https://files.pythonhosted.org/packages/e1/f7/0a4467be0a56e80447c8529c9fce5b38eab4f513cb3d9bf82e7392a5696b/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f7eb7da0eb23aa2ba036d4f616d46906013a68caf61b7fdbe42fc8b25132e77", size = 455425, upload-time = "2025-10-14T15:05:23.348Z" }, + { url = "https://files.pythonhosted.org/packages/8e/e0/82583485ea00137ddf69bc84a2db88bd92ab4a6e3c405e5fb878ead8d0e7/watchfiles-1.1.1-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:831a62658609f0e5c64178211c942ace999517f5770fe9436be4c2faeba0c0ef", size = 628826, upload-time = "2025-10-14T15:05:24.398Z" }, + { url = "https://files.pythonhosted.org/packages/28/9a/a785356fccf9fae84c0cc90570f11702ae9571036fb25932f1242c82191c/watchfiles-1.1.1-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:f9a2ae5c91cecc9edd47e041a930490c31c3afb1f5e6d71de3dc671bfaca02bf", size = 622208, upload-time = "2025-10-14T15:05:25.45Z" }, + { url = "https://files.pythonhosted.org/packages/c3/f4/0872229324ef69b2c3edec35e84bd57a1289e7d3fe74588048ed8947a323/watchfiles-1.1.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:d1715143123baeeaeadec0528bb7441103979a1d5f6fd0e1f915383fea7ea6d5", size = 404315, upload-time = "2025-10-14T15:05:26.501Z" }, + { url = "https://files.pythonhosted.org/packages/7b/22/16d5331eaed1cb107b873f6ae1b69e9ced582fcf0c59a50cd84f403b1c32/watchfiles-1.1.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:39574d6370c4579d7f5d0ad940ce5b20db0e4117444e39b6d8f99db5676c52fd", size = 390869, upload-time = "2025-10-14T15:05:27.649Z" }, + { url = "https://files.pythonhosted.org/packages/b2/7e/5643bfff5acb6539b18483128fdc0ef2cccc94a5b8fbda130c823e8ed636/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7365b92c2e69ee952902e8f70f3ba6360d0d596d9299d55d7d386df84b6941fb", size = 449919, upload-time = "2025-10-14T15:05:28.701Z" }, + { url = "https://files.pythonhosted.org/packages/51/2e/c410993ba5025a9f9357c376f48976ef0e1b1aefb73b97a5ae01a5972755/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bfff9740c69c0e4ed32416f013f3c45e2ae42ccedd1167ef2d805c000b6c71a5", size = 460845, upload-time = "2025-10-14T15:05:30.064Z" }, + { url = "https://files.pythonhosted.org/packages/8e/a4/2df3b404469122e8680f0fcd06079317e48db58a2da2950fb45020947734/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b27cf2eb1dda37b2089e3907d8ea92922b673c0c427886d4edc6b94d8dfe5db3", size = 489027, upload-time = "2025-10-14T15:05:31.064Z" }, + { url = "https://files.pythonhosted.org/packages/ea/84/4587ba5b1f267167ee715b7f66e6382cca6938e0a4b870adad93e44747e6/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:526e86aced14a65a5b0ec50827c745597c782ff46b571dbfe46192ab9e0b3c33", size = 595615, upload-time = "2025-10-14T15:05:32.074Z" }, + { url = "https://files.pythonhosted.org/packages/6a/0f/c6988c91d06e93cd0bb3d4a808bcf32375ca1904609835c3031799e3ecae/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:04e78dd0b6352db95507fd8cb46f39d185cf8c74e4cf1e4fbad1d3df96faf510", size = 474836, upload-time = "2025-10-14T15:05:33.209Z" }, + { url = "https://files.pythonhosted.org/packages/b4/36/ded8aebea91919485b7bbabbd14f5f359326cb5ec218cd67074d1e426d74/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c85794a4cfa094714fb9c08d4a218375b2b95b8ed1666e8677c349906246c05", size = 455099, upload-time = "2025-10-14T15:05:34.189Z" }, + { url = "https://files.pythonhosted.org/packages/98/e0/8c9bdba88af756a2fce230dd365fab2baf927ba42cd47521ee7498fd5211/watchfiles-1.1.1-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:74d5012b7630714b66be7b7b7a78855ef7ad58e8650c73afc4c076a1f480a8d6", size = 630626, upload-time = "2025-10-14T15:05:35.216Z" }, + { url = "https://files.pythonhosted.org/packages/2a/84/a95db05354bf2d19e438520d92a8ca475e578c647f78f53197f5a2f17aaf/watchfiles-1.1.1-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:8fbe85cb3201c7d380d3d0b90e63d520f15d6afe217165d7f98c9c649654db81", size = 622519, upload-time = "2025-10-14T15:05:36.259Z" }, + { url = "https://files.pythonhosted.org/packages/1d/ce/d8acdc8de545de995c339be67711e474c77d643555a9bb74a9334252bd55/watchfiles-1.1.1-cp314-cp314-win32.whl", hash = "sha256:3fa0b59c92278b5a7800d3ee7733da9d096d4aabcfabb9a928918bd276ef9b9b", size = 272078, upload-time = "2025-10-14T15:05:37.63Z" }, + { url = "https://files.pythonhosted.org/packages/c4/c9/a74487f72d0451524be827e8edec251da0cc1fcf111646a511ae752e1a3d/watchfiles-1.1.1-cp314-cp314-win_amd64.whl", hash = "sha256:c2047d0b6cea13b3316bdbafbfa0c4228ae593d995030fda39089d36e64fc03a", size = 287664, upload-time = "2025-10-14T15:05:38.95Z" }, + { url = "https://files.pythonhosted.org/packages/df/b8/8ac000702cdd496cdce998c6f4ee0ca1f15977bba51bdf07d872ebdfc34c/watchfiles-1.1.1-cp314-cp314-win_arm64.whl", hash = "sha256:842178b126593addc05acf6fce960d28bc5fae7afbaa2c6c1b3a7b9460e5be02", size = 277154, upload-time = "2025-10-14T15:05:39.954Z" }, + { url = "https://files.pythonhosted.org/packages/47/a8/e3af2184707c29f0f14b1963c0aace6529f9d1b8582d5b99f31bbf42f59e/watchfiles-1.1.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:88863fbbc1a7312972f1c511f202eb30866370ebb8493aef2812b9ff28156a21", size = 403820, upload-time = "2025-10-14T15:05:40.932Z" }, + { url = "https://files.pythonhosted.org/packages/c0/ec/e47e307c2f4bd75f9f9e8afbe3876679b18e1bcec449beca132a1c5ffb2d/watchfiles-1.1.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:55c7475190662e202c08c6c0f4d9e345a29367438cf8e8037f3155e10a88d5a5", size = 390510, upload-time = "2025-10-14T15:05:41.945Z" }, + { url = "https://files.pythonhosted.org/packages/d5/a0/ad235642118090f66e7b2f18fd5c42082418404a79205cdfca50b6309c13/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f53fa183d53a1d7a8852277c92b967ae99c2d4dcee2bfacff8868e6e30b15f7", size = 448408, upload-time = "2025-10-14T15:05:43.385Z" }, + { url = "https://files.pythonhosted.org/packages/df/85/97fa10fd5ff3332ae17e7e40e20784e419e28521549780869f1413742e9d/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6aae418a8b323732fa89721d86f39ec8f092fc2af67f4217a2b07fd3e93c6101", size = 458968, upload-time = "2025-10-14T15:05:44.404Z" }, + { url = "https://files.pythonhosted.org/packages/47/c2/9059c2e8966ea5ce678166617a7f75ecba6164375f3b288e50a40dc6d489/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f096076119da54a6080e8920cbdaac3dbee667eb91dcc5e5b78840b87415bd44", size = 488096, upload-time = "2025-10-14T15:05:45.398Z" }, + { url = "https://files.pythonhosted.org/packages/94/44/d90a9ec8ac309bc26db808a13e7bfc0e4e78b6fc051078a554e132e80160/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:00485f441d183717038ed2e887a7c868154f216877653121068107b227a2f64c", size = 596040, upload-time = "2025-10-14T15:05:46.502Z" }, + { url = "https://files.pythonhosted.org/packages/95/68/4e3479b20ca305cfc561db3ed207a8a1c745ee32bf24f2026a129d0ddb6e/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a55f3e9e493158d7bfdb60a1165035f1cf7d320914e7b7ea83fe22c6023b58fc", size = 473847, upload-time = "2025-10-14T15:05:47.484Z" }, + { url = "https://files.pythonhosted.org/packages/4f/55/2af26693fd15165c4ff7857e38330e1b61ab8c37d15dc79118cdba115b7a/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c91ed27800188c2ae96d16e3149f199d62f86c7af5f5f4d2c61a3ed8cd3666c", size = 455072, upload-time = "2025-10-14T15:05:48.928Z" }, + { url = "https://files.pythonhosted.org/packages/66/1d/d0d200b10c9311ec25d2273f8aad8c3ef7cc7ea11808022501811208a750/watchfiles-1.1.1-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:311ff15a0bae3714ffb603e6ba6dbfba4065ab60865d15a6ec544133bdb21099", size = 629104, upload-time = "2025-10-14T15:05:49.908Z" }, + { url = "https://files.pythonhosted.org/packages/e3/bd/fa9bb053192491b3867ba07d2343d9f2252e00811567d30ae8d0f78136fe/watchfiles-1.1.1-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:a916a2932da8f8ab582f242c065f5c81bed3462849ca79ee357dd9551b0e9b01", size = 622112, upload-time = "2025-10-14T15:05:50.941Z" }, + { url = "https://files.pythonhosted.org/packages/d3/8e/e500f8b0b77be4ff753ac94dc06b33d8f0d839377fee1b78e8c8d8f031bf/watchfiles-1.1.1-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:db476ab59b6765134de1d4fe96a1a9c96ddf091683599be0f26147ea1b2e4b88", size = 408250, upload-time = "2025-10-14T15:06:10.264Z" }, + { url = "https://files.pythonhosted.org/packages/bd/95/615e72cd27b85b61eec764a5ca51bd94d40b5adea5ff47567d9ebc4d275a/watchfiles-1.1.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:89eef07eee5e9d1fda06e38822ad167a044153457e6fd997f8a858ab7564a336", size = 396117, upload-time = "2025-10-14T15:06:11.28Z" }, + { url = "https://files.pythonhosted.org/packages/c9/81/e7fe958ce8a7fb5c73cc9fb07f5aeaf755e6aa72498c57d760af760c91f8/watchfiles-1.1.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce19e06cbda693e9e7686358af9cd6f5d61312ab8b00488bc36f5aabbaf77e24", size = 450493, upload-time = "2025-10-14T15:06:12.321Z" }, + { url = "https://files.pythonhosted.org/packages/6e/d4/ed38dd3b1767193de971e694aa544356e63353c33a85d948166b5ff58b9e/watchfiles-1.1.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e6f39af2eab0118338902798b5aa6664f46ff66bc0280de76fca67a7f262a49", size = 457546, upload-time = "2025-10-14T15:06:13.372Z" }, +] + [[package]] name = "wcwidth" version = "0.6.0" @@ -1258,6 +1875,65 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/68/5a/199c59e0a824a3db2b89c5d2dade7ab5f9624dbf6448dc291b46d5ec94d3/wcwidth-0.6.0-py3-none-any.whl", hash = "sha256:1a3a1e510b553315f8e146c54764f4fb6264ffad731b3d78088cdb1478ffbdad", size = 94189, upload-time = "2026-02-06T19:19:39.646Z" }, ] +[[package]] +name = "websockets" +version = "16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/04/24/4b2031d72e840ce4c1ccb255f693b15c334757fc50023e4db9537080b8c4/websockets-16.0.tar.gz", hash = "sha256:5f6261a5e56e8d5c42a4497b364ea24d94d9563e8fbd44e78ac40879c60179b5", size = 179346, upload-time = "2026-01-10T09:23:47.181Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f2/db/de907251b4ff46ae804ad0409809504153b3f30984daf82a1d84a9875830/websockets-16.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:31a52addea25187bde0797a97d6fc3d2f92b6f72a9370792d65a6e84615ac8a8", size = 177340, upload-time = "2026-01-10T09:22:34.539Z" }, + { url = "https://files.pythonhosted.org/packages/f3/fa/abe89019d8d8815c8781e90d697dec52523fb8ebe308bf11664e8de1877e/websockets-16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:417b28978cdccab24f46400586d128366313e8a96312e4b9362a4af504f3bbad", size = 175022, upload-time = "2026-01-10T09:22:36.332Z" }, + { url = "https://files.pythonhosted.org/packages/58/5d/88ea17ed1ded2079358b40d31d48abe90a73c9e5819dbcde1606e991e2ad/websockets-16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:af80d74d4edfa3cb9ed973a0a5ba2b2a549371f8a741e0800cb07becdd20f23d", size = 175319, upload-time = "2026-01-10T09:22:37.602Z" }, + { url = "https://files.pythonhosted.org/packages/d2/ae/0ee92b33087a33632f37a635e11e1d99d429d3d323329675a6022312aac2/websockets-16.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:08d7af67b64d29823fed316505a89b86705f2b7981c07848fb5e3ea3020c1abe", size = 184631, upload-time = "2026-01-10T09:22:38.789Z" }, + { url = "https://files.pythonhosted.org/packages/c8/c5/27178df583b6c5b31b29f526ba2da5e2f864ecc79c99dae630a85d68c304/websockets-16.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7be95cfb0a4dae143eaed2bcba8ac23f4892d8971311f1b06f3c6b78952ee70b", size = 185870, upload-time = "2026-01-10T09:22:39.893Z" }, + { url = "https://files.pythonhosted.org/packages/87/05/536652aa84ddc1c018dbb7e2c4cbcd0db884580bf8e95aece7593fde526f/websockets-16.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d6297ce39ce5c2e6feb13c1a996a2ded3b6832155fcfc920265c76f24c7cceb5", size = 185361, upload-time = "2026-01-10T09:22:41.016Z" }, + { url = "https://files.pythonhosted.org/packages/6d/e2/d5332c90da12b1e01f06fb1b85c50cfc489783076547415bf9f0a659ec19/websockets-16.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1c1b30e4f497b0b354057f3467f56244c603a79c0d1dafce1d16c283c25f6e64", size = 184615, upload-time = "2026-01-10T09:22:42.442Z" }, + { url = "https://files.pythonhosted.org/packages/77/fb/d3f9576691cae9253b51555f841bc6600bf0a983a461c79500ace5a5b364/websockets-16.0-cp311-cp311-win32.whl", hash = "sha256:5f451484aeb5cafee1ccf789b1b66f535409d038c56966d6101740c1614b86c6", size = 178246, upload-time = "2026-01-10T09:22:43.654Z" }, + { url = "https://files.pythonhosted.org/packages/54/67/eaff76b3dbaf18dcddabc3b8c1dba50b483761cccff67793897945b37408/websockets-16.0-cp311-cp311-win_amd64.whl", hash = "sha256:8d7f0659570eefb578dacde98e24fb60af35350193e4f56e11190787bee77dac", size = 178684, upload-time = "2026-01-10T09:22:44.941Z" }, + { url = "https://files.pythonhosted.org/packages/84/7b/bac442e6b96c9d25092695578dda82403c77936104b5682307bd4deb1ad4/websockets-16.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:71c989cbf3254fbd5e84d3bff31e4da39c43f884e64f2551d14bb3c186230f00", size = 177365, upload-time = "2026-01-10T09:22:46.787Z" }, + { url = "https://files.pythonhosted.org/packages/b0/fe/136ccece61bd690d9c1f715baaeefd953bb2360134de73519d5df19d29ca/websockets-16.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:8b6e209ffee39ff1b6d0fa7bfef6de950c60dfb91b8fcead17da4ee539121a79", size = 175038, upload-time = "2026-01-10T09:22:47.999Z" }, + { url = "https://files.pythonhosted.org/packages/40/1e/9771421ac2286eaab95b8575b0cb701ae3663abf8b5e1f64f1fd90d0a673/websockets-16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:86890e837d61574c92a97496d590968b23c2ef0aeb8a9bc9421d174cd378ae39", size = 175328, upload-time = "2026-01-10T09:22:49.809Z" }, + { url = "https://files.pythonhosted.org/packages/18/29/71729b4671f21e1eaa5d6573031ab810ad2936c8175f03f97f3ff164c802/websockets-16.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9b5aca38b67492ef518a8ab76851862488a478602229112c4b0d58d63a7a4d5c", size = 184915, upload-time = "2026-01-10T09:22:51.071Z" }, + { url = "https://files.pythonhosted.org/packages/97/bb/21c36b7dbbafc85d2d480cd65df02a1dc93bf76d97147605a8e27ff9409d/websockets-16.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e0334872c0a37b606418ac52f6ab9cfd17317ac26365f7f65e203e2d0d0d359f", size = 186152, upload-time = "2026-01-10T09:22:52.224Z" }, + { url = "https://files.pythonhosted.org/packages/4a/34/9bf8df0c0cf88fa7bfe36678dc7b02970c9a7d5e065a3099292db87b1be2/websockets-16.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a0b31e0b424cc6b5a04b8838bbaec1688834b2383256688cf47eb97412531da1", size = 185583, upload-time = "2026-01-10T09:22:53.443Z" }, + { url = "https://files.pythonhosted.org/packages/47/88/4dd516068e1a3d6ab3c7c183288404cd424a9a02d585efbac226cb61ff2d/websockets-16.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:485c49116d0af10ac698623c513c1cc01c9446c058a4e61e3bf6c19dff7335a2", size = 184880, upload-time = "2026-01-10T09:22:55.033Z" }, + { url = "https://files.pythonhosted.org/packages/91/d6/7d4553ad4bf1c0421e1ebd4b18de5d9098383b5caa1d937b63df8d04b565/websockets-16.0-cp312-cp312-win32.whl", hash = "sha256:eaded469f5e5b7294e2bdca0ab06becb6756ea86894a47806456089298813c89", size = 178261, upload-time = "2026-01-10T09:22:56.251Z" }, + { url = "https://files.pythonhosted.org/packages/c3/f0/f3a17365441ed1c27f850a80b2bc680a0fa9505d733fe152fdf5e98c1c0b/websockets-16.0-cp312-cp312-win_amd64.whl", hash = "sha256:5569417dc80977fc8c2d43a86f78e0a5a22fee17565d78621b6bb264a115d4ea", size = 178693, upload-time = "2026-01-10T09:22:57.478Z" }, + { url = "https://files.pythonhosted.org/packages/cc/9c/baa8456050d1c1b08dd0ec7346026668cbc6f145ab4e314d707bb845bf0d/websockets-16.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:878b336ac47938b474c8f982ac2f7266a540adc3fa4ad74ae96fea9823a02cc9", size = 177364, upload-time = "2026-01-10T09:22:59.333Z" }, + { url = "https://files.pythonhosted.org/packages/7e/0c/8811fc53e9bcff68fe7de2bcbe75116a8d959ac699a3200f4847a8925210/websockets-16.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:52a0fec0e6c8d9a784c2c78276a48a2bdf099e4ccc2a4cad53b27718dbfd0230", size = 175039, upload-time = "2026-01-10T09:23:01.171Z" }, + { url = "https://files.pythonhosted.org/packages/aa/82/39a5f910cb99ec0b59e482971238c845af9220d3ab9fa76dd9162cda9d62/websockets-16.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e6578ed5b6981005df1860a56e3617f14a6c307e6a71b4fff8c48fdc50f3ed2c", size = 175323, upload-time = "2026-01-10T09:23:02.341Z" }, + { url = "https://files.pythonhosted.org/packages/bd/28/0a25ee5342eb5d5f297d992a77e56892ecb65e7854c7898fb7d35e9b33bd/websockets-16.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:95724e638f0f9c350bb1c2b0a7ad0e83d9cc0c9259f3ea94e40d7b02a2179ae5", size = 184975, upload-time = "2026-01-10T09:23:03.756Z" }, + { url = "https://files.pythonhosted.org/packages/f9/66/27ea52741752f5107c2e41fda05e8395a682a1e11c4e592a809a90c6a506/websockets-16.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c0204dc62a89dc9d50d682412c10b3542d748260d743500a85c13cd1ee4bde82", size = 186203, upload-time = "2026-01-10T09:23:05.01Z" }, + { url = "https://files.pythonhosted.org/packages/37/e5/8e32857371406a757816a2b471939d51c463509be73fa538216ea52b792a/websockets-16.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:52ac480f44d32970d66763115edea932f1c5b1312de36df06d6b219f6741eed8", size = 185653, upload-time = "2026-01-10T09:23:06.301Z" }, + { url = "https://files.pythonhosted.org/packages/9b/67/f926bac29882894669368dc73f4da900fcdf47955d0a0185d60103df5737/websockets-16.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6e5a82b677f8f6f59e8dfc34ec06ca6b5b48bc4fcda346acd093694cc2c24d8f", size = 184920, upload-time = "2026-01-10T09:23:07.492Z" }, + { url = "https://files.pythonhosted.org/packages/3c/a1/3d6ccdcd125b0a42a311bcd15a7f705d688f73b2a22d8cf1c0875d35d34a/websockets-16.0-cp313-cp313-win32.whl", hash = "sha256:abf050a199613f64c886ea10f38b47770a65154dc37181bfaff70c160f45315a", size = 178255, upload-time = "2026-01-10T09:23:09.245Z" }, + { url = "https://files.pythonhosted.org/packages/6b/ae/90366304d7c2ce80f9b826096a9e9048b4bb760e44d3b873bb272cba696b/websockets-16.0-cp313-cp313-win_amd64.whl", hash = "sha256:3425ac5cf448801335d6fdc7ae1eb22072055417a96cc6b31b3861f455fbc156", size = 178689, upload-time = "2026-01-10T09:23:10.483Z" }, + { url = "https://files.pythonhosted.org/packages/f3/1d/e88022630271f5bd349ed82417136281931e558d628dd52c4d8621b4a0b2/websockets-16.0-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:8cc451a50f2aee53042ac52d2d053d08bf89bcb31ae799cb4487587661c038a0", size = 177406, upload-time = "2026-01-10T09:23:12.178Z" }, + { url = "https://files.pythonhosted.org/packages/f2/78/e63be1bf0724eeb4616efb1ae1c9044f7c3953b7957799abb5915bffd38e/websockets-16.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:daa3b6ff70a9241cf6c7fc9e949d41232d9d7d26fd3522b1ad2b4d62487e9904", size = 175085, upload-time = "2026-01-10T09:23:13.511Z" }, + { url = "https://files.pythonhosted.org/packages/bb/f4/d3c9220d818ee955ae390cf319a7c7a467beceb24f05ee7aaaa2414345ba/websockets-16.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:fd3cb4adb94a2a6e2b7c0d8d05cb94e6f1c81a0cf9dc2694fb65c7e8d94c42e4", size = 175328, upload-time = "2026-01-10T09:23:14.727Z" }, + { url = "https://files.pythonhosted.org/packages/63/bc/d3e208028de777087e6fb2b122051a6ff7bbcca0d6df9d9c2bf1dd869ae9/websockets-16.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:781caf5e8eee67f663126490c2f96f40906594cb86b408a703630f95550a8c3e", size = 185044, upload-time = "2026-01-10T09:23:15.939Z" }, + { url = "https://files.pythonhosted.org/packages/ad/6e/9a0927ac24bd33a0a9af834d89e0abc7cfd8e13bed17a86407a66773cc0e/websockets-16.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:caab51a72c51973ca21fa8a18bd8165e1a0183f1ac7066a182ff27107b71e1a4", size = 186279, upload-time = "2026-01-10T09:23:17.148Z" }, + { url = "https://files.pythonhosted.org/packages/b9/ca/bf1c68440d7a868180e11be653c85959502efd3a709323230314fda6e0b3/websockets-16.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:19c4dc84098e523fd63711e563077d39e90ec6702aff4b5d9e344a60cb3c0cb1", size = 185711, upload-time = "2026-01-10T09:23:18.372Z" }, + { url = "https://files.pythonhosted.org/packages/c4/f8/fdc34643a989561f217bb477cbc47a3a07212cbda91c0e4389c43c296ebf/websockets-16.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:a5e18a238a2b2249c9a9235466b90e96ae4795672598a58772dd806edc7ac6d3", size = 184982, upload-time = "2026-01-10T09:23:19.652Z" }, + { url = "https://files.pythonhosted.org/packages/dd/d1/574fa27e233764dbac9c52730d63fcf2823b16f0856b3329fc6268d6ae4f/websockets-16.0-cp314-cp314-win32.whl", hash = "sha256:a069d734c4a043182729edd3e9f247c3b2a4035415a9172fd0f1b71658a320a8", size = 177915, upload-time = "2026-01-10T09:23:21.458Z" }, + { url = "https://files.pythonhosted.org/packages/8a/f1/ae6b937bf3126b5134ce1f482365fde31a357c784ac51852978768b5eff4/websockets-16.0-cp314-cp314-win_amd64.whl", hash = "sha256:c0ee0e63f23914732c6d7e0cce24915c48f3f1512ec1d079ed01fc629dab269d", size = 178381, upload-time = "2026-01-10T09:23:22.715Z" }, + { url = "https://files.pythonhosted.org/packages/06/9b/f791d1db48403e1f0a27577a6beb37afae94254a8c6f08be4a23e4930bc0/websockets-16.0-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:a35539cacc3febb22b8f4d4a99cc79b104226a756aa7400adc722e83b0d03244", size = 177737, upload-time = "2026-01-10T09:23:24.523Z" }, + { url = "https://files.pythonhosted.org/packages/bd/40/53ad02341fa33b3ce489023f635367a4ac98b73570102ad2cdd770dacc9a/websockets-16.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:b784ca5de850f4ce93ec85d3269d24d4c82f22b7212023c974c401d4980ebc5e", size = 175268, upload-time = "2026-01-10T09:23:25.781Z" }, + { url = "https://files.pythonhosted.org/packages/74/9b/6158d4e459b984f949dcbbb0c5d270154c7618e11c01029b9bbd1bb4c4f9/websockets-16.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:569d01a4e7fba956c5ae4fc988f0d4e187900f5497ce46339c996dbf24f17641", size = 175486, upload-time = "2026-01-10T09:23:27.033Z" }, + { url = "https://files.pythonhosted.org/packages/e5/2d/7583b30208b639c8090206f95073646c2c9ffd66f44df967981a64f849ad/websockets-16.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:50f23cdd8343b984957e4077839841146f67a3d31ab0d00e6b824e74c5b2f6e8", size = 185331, upload-time = "2026-01-10T09:23:28.259Z" }, + { url = "https://files.pythonhosted.org/packages/45/b0/cce3784eb519b7b5ad680d14b9673a31ab8dcb7aad8b64d81709d2430aa8/websockets-16.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:152284a83a00c59b759697b7f9e9cddf4e3c7861dd0d964b472b70f78f89e80e", size = 186501, upload-time = "2026-01-10T09:23:29.449Z" }, + { url = "https://files.pythonhosted.org/packages/19/60/b8ebe4c7e89fb5f6cdf080623c9d92789a53636950f7abacfc33fe2b3135/websockets-16.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:bc59589ab64b0022385f429b94697348a6a234e8ce22544e3681b2e9331b5944", size = 186062, upload-time = "2026-01-10T09:23:31.368Z" }, + { url = "https://files.pythonhosted.org/packages/88/a8/a080593f89b0138b6cba1b28f8df5673b5506f72879322288b031337c0b8/websockets-16.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:32da954ffa2814258030e5a57bc73a3635463238e797c7375dc8091327434206", size = 185356, upload-time = "2026-01-10T09:23:32.627Z" }, + { url = "https://files.pythonhosted.org/packages/c2/b6/b9afed2afadddaf5ebb2afa801abf4b0868f42f8539bfe4b071b5266c9fe/websockets-16.0-cp314-cp314t-win32.whl", hash = "sha256:5a4b4cc550cb665dd8a47f868c8d04c8230f857363ad3c9caf7a0c3bf8c61ca6", size = 178085, upload-time = "2026-01-10T09:23:33.816Z" }, + { url = "https://files.pythonhosted.org/packages/9f/3e/28135a24e384493fa804216b79a6a6759a38cc4ff59118787b9fb693df93/websockets-16.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b14dc141ed6d2dde437cddb216004bcac6a1df0935d79656387bd41632ba0bbd", size = 178531, upload-time = "2026-01-10T09:23:35.016Z" }, + { url = "https://files.pythonhosted.org/packages/72/07/c98a68571dcf256e74f1f816b8cc5eae6eb2d3d5cfa44d37f801619d9166/websockets-16.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:349f83cd6c9a415428ee1005cadb5c2c56f4389bc06a9af16103c3bc3dcc8b7d", size = 174947, upload-time = "2026-01-10T09:23:36.166Z" }, + { url = "https://files.pythonhosted.org/packages/7e/52/93e166a81e0305b33fe416338be92ae863563fe7bce446b0f687b9df5aea/websockets-16.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:4a1aba3340a8dca8db6eb5a7986157f52eb9e436b74813764241981ca4888f03", size = 175260, upload-time = "2026-01-10T09:23:37.409Z" }, + { url = "https://files.pythonhosted.org/packages/56/0c/2dbf513bafd24889d33de2ff0368190a0e69f37bcfa19009ef819fe4d507/websockets-16.0-pp311-pypy311_pp73-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:f4a32d1bd841d4bcbffdcb3d2ce50c09c3909fbead375ab28d0181af89fd04da", size = 176071, upload-time = "2026-01-10T09:23:39.158Z" }, + { url = "https://files.pythonhosted.org/packages/a5/8f/aea9c71cc92bf9b6cc0f7f70df8f0b420636b6c96ef4feee1e16f80f75dd/websockets-16.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0298d07ee155e2e9fda5be8a9042200dd2e3bb0b8a38482156576f863a9d457c", size = 176968, upload-time = "2026-01-10T09:23:41.031Z" }, + { url = "https://files.pythonhosted.org/packages/9a/3f/f70e03f40ffc9a30d817eef7da1be72ee4956ba8d7255c399a01b135902a/websockets-16.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:a653aea902e0324b52f1613332ddf50b00c06fdaf7e92624fbf8c77c78fa5767", size = 178735, upload-time = "2026-01-10T09:23:42.259Z" }, + { url = "https://files.pythonhosted.org/packages/6f/28/258ebab549c2bf3e64d2b0217b973467394a9cea8c42f70418ca2c5d0d2e/websockets-16.0-py3-none-any.whl", hash = "sha256:1637db62fad1dc833276dded54215f2c7fa46912301a24bd94d45d46a011ceec", size = 171598, upload-time = "2026-01-10T09:23:45.395Z" }, +] + [[package]] name = "wrapt" version = "2.1.2" From f0f3554e0b8b2ca1058399654b7c6b4982cf4616 Mon Sep 17 00:00:00 2001 From: NirrWorks Date: Sun, 3 May 2026 21:43:33 +0545 Subject: [PATCH 14/49] Add Railway deployment config --- railway.json | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 railway.json diff --git a/railway.json b/railway.json new file mode 100644 index 0000000..df67c9b --- /dev/null +++ b/railway.json @@ -0,0 +1,10 @@ +{ + "$schema": "https://railway.com/railway.schema.json", + "build": { + "builder": "NIXPACKS", + "buildCommand": "pip install uv && uv sync --frozen" + }, + "deploy": { + "startCommand": "uv run litestar --app osmsg.api.app:app run --host 0.0.0.0 --port $PORT" + } +} From 7614a5202fe13aa28cd9350dc473a96b9e472394 Mon Sep 17 00:00:00 2001 From: NirrWorks Date: Sun, 3 May 2026 23:18:50 +0545 Subject: [PATCH 15/49] Address API review comments --- .env.example | 1 + README.md | 34 ++++++++++++- osmsg/api/app.py | 28 ++++++++--- osmsg/api/queries.py | 19 ++++++-- pyproject.toml | 8 ++-- tests/test_api.py | 112 +++++++++++++++++++++++++++++++++++++++++-- uv.lock | 44 +++++------------ 7 files changed, 197 insertions(+), 49 deletions(-) create mode 100644 .env.example diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..641fc46 --- /dev/null +++ b/.env.example @@ -0,0 +1 @@ +DATABASE_URL=postgresql://user:password@localhost:5432/osmsg diff --git a/README.md b/README.md index a2df536..2d88542 100644 --- a/README.md +++ b/README.md @@ -89,7 +89,37 @@ duckdb stats.duckdb -c "SELECT username, SUM(nodes_created) AS n Same schema in DuckDB and Postgres: `users`, `changesets`, `changeset_stats`, `state`. -### 5. Use it as a library +### 5. Run the API + +The Litestar API reads osmsg data from Postgres. Set `DATABASE_URL` in your environment +or copy `.env.example` to `.env` and edit it. + +Insert the latest day of data into Postgres: + +```bash +uv run osmsg --last day --format psql --psql-dsn "$DATABASE_URL" --name api_last_day +``` + +Start the API: + +```bash +uv run --group api litestar --app osmsg.api.app:app run --host 0.0.0.0 --port 8000 +``` + +Then open: + +```text +http://localhost:8000/health +http://localhost:8000/api/v1/user-stats?start=2026-05-01T00:00:00Z&end=2026-05-02T00:00:00Z +``` + +Use `hashtag` to filter by one changeset hashtag: + +```text +http://localhost:8000/api/v1/user-stats?start=2026-05-01T00:00:00Z&end=2026-05-02T00:00:00Z&hashtag=%23mapathon +``` + +### 6. Use it as a library ```python from datetime import datetime, UTC @@ -106,7 +136,7 @@ print(result["files"]["parquet"]) Same pipeline as the CLI. -### 6. Long flag lists? Use a config +### 7. Long flag lists? Use a config ```bash osmsg --config nepal.yaml diff --git a/osmsg/api/app.py b/osmsg/api/app.py index f249e4c..7563489 100644 --- a/osmsg/api/app.py +++ b/osmsg/api/app.py @@ -1,14 +1,16 @@ from __future__ import annotations from contextlib import asynccontextmanager +from datetime import datetime from typing import Any from litestar import Litestar, get +from litestar.exceptions import HTTPException from litestar.openapi.config import OpenAPIConfig from litestar.params import Parameter from .db import close_pool, open_pool -from .queries import fetch_users +from .queries import fetch_user_stats @asynccontextmanager @@ -25,13 +27,27 @@ async def health() -> dict[str, str]: return {"status": "ok"} -@get("/api/v1/users") -async def get_users( +@get("/api/v1/user-stats") +async def get_user_stats( + start: datetime, + end: datetime, + hashtag: str | None = None, limit: int = Parameter(default=100, ge=1, le=1000), offset: int = Parameter(default=0, ge=0), ) -> dict[str, Any]: - users = await fetch_users(limit=limit, offset=offset) - return {"count": len(users), "limit": limit, "offset": offset, "users": users} + if start >= end: + raise HTTPException(status_code=400, detail="start must be before end") + + users = await fetch_user_stats(start=start, end=end, hashtag=hashtag, limit=limit, offset=offset) + return { + "count": len(users), + "start": start.isoformat(), + "end": end.isoformat(), + "hashtag": hashtag, + "limit": limit, + "offset": offset, + "users": users, + } # @get("/api/v1/stats/summary") @@ -49,7 +65,7 @@ async def get_users( app = Litestar( - route_handlers=[health, get_users], + route_handlers=[health, get_user_stats], lifespan=[lifespan], openapi_config=OpenAPIConfig(title="OSMSG API", version="1.0.0", path="/docs"), ) diff --git a/osmsg/api/queries.py b/osmsg/api/queries.py index b03be9d..2ce8f20 100644 --- a/osmsg/api/queries.py +++ b/osmsg/api/queries.py @@ -1,11 +1,19 @@ from __future__ import annotations +from datetime import datetime from typing import Any from .db import get_pool -async def fetch_users(*, limit: int = 100, offset: int = 0) -> list[dict[str, Any]]: +async def fetch_user_stats( + *, + start: datetime, + end: datetime, + hashtag: str | None = None, + limit: int = 100, + offset: int = 0, +) -> list[dict[str, Any]]: sql = """ SELECT u.uid, @@ -43,12 +51,17 @@ async def fetch_users(*, limit: int = 100, offset: int = 0) -> list[dict[str, An u.uid ASC ) AS rank FROM users u + JOIN changesets cs ON u.uid = cs.uid JOIN changeset_stats st ON u.uid = st.uid + AND cs.changeset_id = st.changeset_id + WHERE cs.created_at >= $1 + AND cs.created_at < $2 + AND ($3::TEXT IS NULL OR $3 = ANY(cs.hashtags)) GROUP BY u.uid, u.username ORDER BY map_changes DESC, u.uid ASC - LIMIT $1 OFFSET $2 + LIMIT $4 OFFSET $5 """ async with get_pool().acquire() as conn: - rows = await conn.fetch(sql, limit, offset) + rows = await conn.fetch(sql, start, end, hashtag, limit, offset) return [dict(row) for row in rows] diff --git a/pyproject.toml b/pyproject.toml index 67ca228..7040be0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,9 +10,7 @@ license = "MIT" license-files = ["LICENSE"] requires-python = ">=3.11" dependencies = [ - "asyncpg>=0.30.0", "duckdb>=1.5.2", - "litestar[standard]>=2.18.0", "osmium>=4.3.1", "platformdirs>=4.5.1", "pyarrow>=24.0.0", @@ -24,7 +22,6 @@ dependencies = [ "shapely>=2.1.2", "typer>=0.25.0", "typer-config[yaml]>=1.5.1", - "uv>=0.9.0", ] keywords = ["osm", "stats", "commandline", "openstreetmap"] @@ -53,7 +50,12 @@ module-name = "osmsg" module-root = "" [dependency-groups] +api = [ + "asyncpg>=0.30.0", + "litestar[standard]>=2.18.0", +] dev = [ + {include-group = "api"}, "pytest>=7.4.2", "pytest-mock>=3.14.0", "coverage>=7.6.1", diff --git a/tests/test_api.py b/tests/test_api.py index 8cbd70d..c14858a 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,12 +1,118 @@ from __future__ import annotations -from osmsg.api.app import app +from importlib import import_module + +from litestar import Litestar +from litestar.testing import TestClient + +from osmsg.api import app as api_app +from osmsg.api.app import get_user_stats, health + +api_module = import_module("osmsg.api.app") def test_api_exposes_only_active_public_routes(): - paths = {route.path for route in app.routes} + paths = {route.path for route in api_app.routes} assert "/health" in paths - assert "/api/v1/users" in paths + assert "/api/v1/user-stats" in paths assert "/api/v1/stats/summary" not in paths assert "/api/v1/stats/timeseries" not in paths + + +def test_health_endpoint_returns_ok(): + with TestClient(Litestar(route_handlers=[health])) as client: + response = client.get("/health") + + assert response.status_code == 200 + assert response.json() == {"status": "ok"} + + +def test_user_stats_endpoint_returns_expected_response(monkeypatch): + async def fake_fetch_user_stats(*, start, end, hashtag, limit, offset): + assert start.isoformat() == "2026-05-01T00:00:00+00:00" + assert end.isoformat() == "2026-05-02T00:00:00+00:00" + assert hashtag == "#mapathon" + assert limit == 1 + assert offset == 0 + return [ + { + "uid": 10, + "name": "alice", + "changesets": 2, + "nodes_create": 40, + "nodes_modify": 5, + "nodes_delete": 0, + "ways_create": 12, + "ways_modify": 1, + "ways_delete": 0, + "rels_create": 0, + "rels_modify": 0, + "rels_delete": 0, + "poi_create": 5, + "poi_modify": 1, + "map_changes": 58, + "rank": 1, + } + ] + + monkeypatch.setattr(api_module, "fetch_user_stats", fake_fetch_user_stats) + app = Litestar(route_handlers=[get_user_stats]) + + with TestClient(app) as client: + response = client.get( + "/api/v1/user-stats", + params={ + "start": "2026-05-01T00:00:00Z", + "end": "2026-05-02T00:00:00Z", + "hashtag": "#mapathon", + "limit": "1", + }, + ) + + assert response.status_code == 200 + assert response.json() == { + "count": 1, + "start": "2026-05-01T00:00:00+00:00", + "end": "2026-05-02T00:00:00+00:00", + "hashtag": "#mapathon", + "limit": 1, + "offset": 0, + "users": [ + { + "uid": 10, + "name": "alice", + "changesets": 2, + "nodes_create": 40, + "nodes_modify": 5, + "nodes_delete": 0, + "ways_create": 12, + "ways_modify": 1, + "ways_delete": 0, + "rels_create": 0, + "rels_modify": 0, + "rels_delete": 0, + "poi_create": 5, + "poi_modify": 1, + "map_changes": 58, + "rank": 1, + } + ], + } + + +def test_user_stats_endpoint_rejects_invalid_date_range(monkeypatch): + async def fake_fetch_user_stats(**kwargs): + raise AssertionError("fetch_user_stats should not be called") + + monkeypatch.setattr(api_module, "fetch_user_stats", fake_fetch_user_stats) + app = Litestar(route_handlers=[get_user_stats]) + + with TestClient(app) as client: + response = client.get( + "/api/v1/user-stats", + params={"start": "2026-05-02T00:00:00Z", "end": "2026-05-01T00:00:00Z"}, + ) + + assert response.status_code == 400 + assert response.json()["detail"] == "start must be before end" diff --git a/uv.lock b/uv.lock index 5745c14..e72385f 100644 --- a/uv.lock +++ b/uv.lock @@ -982,9 +982,7 @@ name = "osmsg" version = "1.0.3" source = { editable = "." } dependencies = [ - { name = "asyncpg" }, { name = "duckdb" }, - { name = "litestar", extra = ["standard"] }, { name = "osmium" }, { name = "platformdirs" }, { name = "pyarrow" }, @@ -996,13 +994,18 @@ dependencies = [ { name = "shapely" }, { name = "typer" }, { name = "typer-config", extra = ["yaml"] }, - { name = "uv" }, ] [package.dev-dependencies] +api = [ + { name = "asyncpg" }, + { name = "litestar", extra = ["standard"] }, +] dev = [ + { name = "asyncpg" }, { name = "commitizen" }, { name = "coverage" }, + { name = "litestar", extra = ["standard"] }, { name = "pre-commit" }, { name = "pytest" }, { name = "pytest-mock" }, @@ -1012,9 +1015,7 @@ dev = [ [package.metadata] requires-dist = [ - { name = "asyncpg", specifier = ">=0.30.0" }, { name = "duckdb", specifier = ">=1.5.2" }, - { name = "litestar", extras = ["standard"], specifier = ">=2.18.0" }, { name = "osmium", specifier = ">=4.3.1" }, { name = "platformdirs", specifier = ">=4.5.1" }, { name = "pyarrow", specifier = ">=24.0.0" }, @@ -1026,13 +1027,18 @@ requires-dist = [ { name = "shapely", specifier = ">=2.1.2" }, { name = "typer", specifier = ">=0.25.0" }, { name = "typer-config", extras = ["yaml"], specifier = ">=1.5.1" }, - { name = "uv", specifier = ">=0.9.0" }, ] [package.metadata.requires-dev] +api = [ + { name = "asyncpg", specifier = ">=0.30.0" }, + { name = "litestar", extras = ["standard"], specifier = ">=2.18.0" }, +] dev = [ + { name = "asyncpg", specifier = ">=0.30.0" }, { name = "commitizen", specifier = ">=4.13.10" }, { name = "coverage", specifier = ">=7.6.1" }, + { name = "litestar", extras = ["standard"], specifier = ">=2.18.0" }, { name = "pre-commit", specifier = ">=4.6.0" }, { name = "pytest", specifier = ">=7.4.2" }, { name = "pytest-mock", specifier = ">=3.14.0" }, @@ -1676,32 +1682,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" }, ] -[[package]] -name = "uv" -version = "0.11.8" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c1/cd/4393fecb083897e956f016d4e66d0b8a496a08fe2e03cbda32a1e91da7ee/uv-0.11.8.tar.gz", hash = "sha256:bb2cf302b8503629aab6f0090a05551e6f8cfc2d687ca059cad7ec9e11214335", size = 4098020, upload-time = "2026-04-27T13:15:31.625Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/99/84/dcb676a3e36a3a2b44dc2e4dfea471b8cd709025e27cce3e588b176fd899/uv-0.11.8-py3-none-linux_armv6l.whl", hash = "sha256:a53e704a780a9e78a50f5a880e99a690f84e6fb9e82610903ce26f47c271d74c", size = 23664296, upload-time = "2026-04-27T13:15:15.644Z" }, - { url = "https://files.pythonhosted.org/packages/86/05/557aa070fda7b8460bbbe1e867e8e5b80602c5b30ed77d1d94fc5acae518/uv-0.11.8-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:d414fc3795b6f56fb6b1fa359537930924fdfe857750a144d2aedf3077be3f1d", size = 23087321, upload-time = "2026-04-27T13:15:36.193Z" }, - { url = "https://files.pythonhosted.org/packages/d5/62/82953018801a250e16b091ef4b5e95e939b2f01224363d6fc80f600b7eff/uv-0.11.8-py3-none-macosx_11_0_arm64.whl", hash = "sha256:f0d402e182ab581e934c159cc9edf25ec6e08d32f29aa797980e949afefc87cd", size = 21747142, upload-time = "2026-04-27T13:15:20.4Z" }, - { url = "https://files.pythonhosted.org/packages/af/4c/477f2abe16f9a3d3c73077f15615878a303eef3760115ec946be58ecb9b2/uv-0.11.8-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:877c9af3b3955a35ef739e5b2ba79c56dae5c4d50420a7ed908c0901e1c8c807", size = 23425861, upload-time = "2026-04-27T13:15:10.374Z" }, - { url = "https://files.pythonhosted.org/packages/2a/63/19f46193e49f0c9bf33346a4d726313871864db16e7cdd1c0a63bc112000/uv-0.11.8-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.musllinux_1_1_armv7l.whl", hash = "sha256:8278144df8d80a83f770c264a5e79ea50791316d2a0dda869e53b3c1174142a8", size = 23215551, upload-time = "2026-04-27T13:15:38.706Z" }, - { url = "https://files.pythonhosted.org/packages/72/3e/5595b265df848a33cd060b10e8f763a46d67521ac9f6c314e8a4ad5329d7/uv-0.11.8-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b3494ad32465f4e02259cfb104d24efe5bb8f7a782351f0354de9385415fb310", size = 23224170, upload-time = "2026-04-27T13:15:18.083Z" }, - { url = "https://files.pythonhosted.org/packages/a6/b3/6ca95e690b52542caa1dae10ede57732f90c629946ab5f027ff746f87deb/uv-0.11.8-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a4421e27e81f85bce3bdb75986c38b5f9bfab9cdccaf3d977cf124b3f0f0b989", size = 24730048, upload-time = "2026-04-27T13:15:13.254Z" }, - { url = "https://files.pythonhosted.org/packages/ea/49/71b7322067c85a3736a22a300072b0566991fe3f95b81bed793508ff5315/uv-0.11.8-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91943e77fc962752d4f64ad5739219858395981078051c740b28b52963b366aa", size = 25585906, upload-time = "2026-04-27T13:15:41.455Z" }, - { url = "https://files.pythonhosted.org/packages/37/16/4e84cd5131327fe86d4784ebfc8a983149f4e6b811476ef271fc548b29e6/uv-0.11.8-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:41fbba287efcc9bc9505a60549b3a223220da720eacd03be8c23d9daaafa44f4", size = 24795740, upload-time = "2026-04-27T13:15:49.842Z" }, - { url = "https://files.pythonhosted.org/packages/5b/01/df175979018743cc5ba6e2fb9dcec916868271e8d88cf0b9df8fd805a0df/uv-0.11.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d97bb2920d6cddc07faa475013461294cc09b77ec8139278416c6e54b938d037", size = 24824980, upload-time = "2026-04-27T13:15:53.506Z" }, - { url = "https://files.pythonhosted.org/packages/1c/95/93c7f595f7136fb32807442860c55d0faed2cd3d7da4b7105ed3c2535d5f/uv-0.11.8-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:fb6a755305eb1e081dfe6a8bc007dbae2d26fe75e551656ca7c9cd08fba21d26", size = 23526790, upload-time = "2026-04-27T13:15:04.955Z" }, - { url = "https://files.pythonhosted.org/packages/04/02/77430b89e172c20cc549b07a5b1dfda0c882c161b6d82781d3150a7063ac/uv-0.11.8-py3-none-manylinux_2_31_riscv64.musllinux_1_1_riscv64.whl", hash = "sha256:841ecbb38532698f73b14b49dc5f0c5e756194c7fcf6e5c6b7ed3859200fe91b", size = 24280498, upload-time = "2026-04-27T13:15:43.978Z" }, - { url = "https://files.pythonhosted.org/packages/8a/e3/23e4a2bb91e3880e017e6116886e2d0bde14ba6aa95ddc458160ee630e7c/uv-0.11.8-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:b3ff2b20c1897105ebe7ed7f9b1b331c7171da029bc1e35970ce31dc086141c1", size = 24375233, upload-time = "2026-04-27T13:15:25.753Z" }, - { url = "https://files.pythonhosted.org/packages/d9/67/fb7dc17cea816a667d1be2632525aa1687566bfafd17bdac561a7a6c9484/uv-0.11.8-py3-none-musllinux_1_1_i686.whl", hash = "sha256:ad381228b0170ef9646902c7e908d4a10a7ecc3da8139450506cf70c7e7f3e80", size = 23904818, upload-time = "2026-04-27T13:15:23.21Z" }, - { url = "https://files.pythonhosted.org/packages/4b/91/b920e35f54f8c6b51f2c639e8170bb80a47a739a1442fea33a479bc93a3d/uv-0.11.8-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:0172b5215544844cd3db0fa3c73a2eb74999b3f00cd2527dde578725076d7b65", size = 25015448, upload-time = "2026-04-27T13:15:46.666Z" }, - { url = "https://files.pythonhosted.org/packages/05/e8/3771956dc1c94b8484789bb8070d91872080d0af99332b8bdec7218c2bfd/uv-0.11.8-py3-none-win32.whl", hash = "sha256:e71c1dd23cbb480f3952c3a95b4fd00f96bd618e2a94583fc9388c500af3070d", size = 22823583, upload-time = "2026-04-27T13:15:33.674Z" }, - { url = "https://files.pythonhosted.org/packages/f9/9b/a91a9c60dcae0e1e3da06377d38f32118a523697d461fe41bc9f117ecf59/uv-0.11.8-py3-none-win_amd64.whl", hash = "sha256:306c624c68d95dd7ea3647675323d72c1abc25f91c3e92ae4cd6f0f11b508726", size = 25407438, upload-time = "2026-04-27T13:15:28.957Z" }, - { url = "https://files.pythonhosted.org/packages/61/5d/defa29fe617e6f07d4e514089e9d36fd9f44ede869e597e39ff7d69f6917/uv-0.11.8-py3-none-win_arm64.whl", hash = "sha256:a9853456696d579f206135c9dda7227a6ed8311b8a9a0b9b2008c4ae81950efe", size = 23914243, upload-time = "2026-04-27T13:15:07.717Z" }, -] - [[package]] name = "uvicorn" version = "0.46.0" From 918a503ce46c6f02778271cd6bf38ae4b36a32a1 Mon Sep 17 00:00:00 2001 From: Niruta Neupane <133505105+NirrWorks@users.noreply.github.com> Date: Sun, 3 May 2026 23:25:48 +0545 Subject: [PATCH 16/49] Delete railway.json --- railway.json | 10 ---------- 1 file changed, 10 deletions(-) delete mode 100644 railway.json diff --git a/railway.json b/railway.json deleted file mode 100644 index df67c9b..0000000 --- a/railway.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "$schema": "https://railway.com/railway.schema.json", - "build": { - "builder": "NIXPACKS", - "buildCommand": "pip install uv && uv sync --frozen" - }, - "deploy": { - "startCommand": "uv run litestar --app osmsg.api.app:app run --host 0.0.0.0 --port $PORT" - } -} From f610c987faec2b7ddd6ae7ae2556d974b2cdd162 Mon Sep 17 00:00:00 2001 From: NirrWorks Date: Mon, 4 May 2026 00:37:21 +0545 Subject: [PATCH 17/49] Handle user stats when changeset metadata is unavailable --- osmsg/api/queries.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/osmsg/api/queries.py b/osmsg/api/queries.py index 2ce8f20..3eb6b87 100644 --- a/osmsg/api/queries.py +++ b/osmsg/api/queries.py @@ -15,6 +15,26 @@ async def fetch_user_stats( offset: int = 0, ) -> list[dict[str, Any]]: sql = """ + WITH filtered_changesets AS ( + SELECT changeset_id + FROM changesets + WHERE created_at >= $1 + AND created_at < $2 + AND ($3::TEXT IS NULL OR $3 = ANY(hashtags)) + ), + matching_stats AS ( + SELECT st.* + FROM changeset_stats st + JOIN filtered_changesets fc ON st.changeset_id = fc.changeset_id + ), + stats_scope AS ( + SELECT * FROM matching_stats + UNION ALL + SELECT st.* + FROM changeset_stats st + WHERE $3::TEXT IS NULL + AND NOT EXISTS (SELECT 1 FROM matching_stats) + ) SELECT u.uid, u.username AS name, @@ -51,12 +71,7 @@ async def fetch_user_stats( u.uid ASC ) AS rank FROM users u - JOIN changesets cs ON u.uid = cs.uid - JOIN changeset_stats st ON u.uid = st.uid - AND cs.changeset_id = st.changeset_id - WHERE cs.created_at >= $1 - AND cs.created_at < $2 - AND ($3::TEXT IS NULL OR $3 = ANY(cs.hashtags)) + JOIN stats_scope st ON u.uid = st.uid GROUP BY u.uid, u.username ORDER BY map_changes DESC, u.uid ASC LIMIT $4 OFFSET $5 From a4b8e2a1893fda387fd2178eb9165223f14e275d Mon Sep 17 00:00:00 2001 From: NirrWorks Date: Wed, 6 May 2026 18:46:12 +0545 Subject: [PATCH 18/49] Addressed minor API review comments --- README.md | 2 +- osmsg/api/app.py | 4 +--- osmsg/api/db.py | 2 -- osmsg/api/queries.py | 52 +++++++++++++++++++++++++++++--------------- pyproject.toml | 1 - tests/test_api.py | 19 ++++++++-------- uv.lock | 4 ---- 7 files changed, 46 insertions(+), 38 deletions(-) diff --git a/README.md b/README.md index 2d88542..e486edc 100644 --- a/README.md +++ b/README.md @@ -113,7 +113,7 @@ http://localhost:8000/health http://localhost:8000/api/v1/user-stats?start=2026-05-01T00:00:00Z&end=2026-05-02T00:00:00Z ``` -Use `hashtag` to filter by one changeset hashtag: +Repeat `hashtag` to filter by one or more changeset hashtags: ```text http://localhost:8000/api/v1/user-stats?start=2026-05-01T00:00:00Z&end=2026-05-02T00:00:00Z&hashtag=%23mapathon diff --git a/osmsg/api/app.py b/osmsg/api/app.py index 7563489..53d9dfa 100644 --- a/osmsg/api/app.py +++ b/osmsg/api/app.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from contextlib import asynccontextmanager from datetime import datetime from typing import Any @@ -31,7 +29,7 @@ async def health() -> dict[str, str]: async def get_user_stats( start: datetime, end: datetime, - hashtag: str | None = None, + hashtag: list[str] | None = None, limit: int = Parameter(default=100, ge=1, le=1000), offset: int = Parameter(default=0, ge=0), ) -> dict[str, Any]: diff --git a/osmsg/api/db.py b/osmsg/api/db.py index e7feb6f..72f6c30 100644 --- a/osmsg/api/db.py +++ b/osmsg/api/db.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import os import asyncpg diff --git a/osmsg/api/queries.py b/osmsg/api/queries.py index 3eb6b87..9e8579d 100644 --- a/osmsg/api/queries.py +++ b/osmsg/api/queries.py @@ -1,26 +1,28 @@ -from __future__ import annotations - from datetime import datetime from typing import Any from .db import get_pool -async def fetch_user_stats( - *, - start: datetime, - end: datetime, - hashtag: str | None = None, - limit: int = 100, - offset: int = 0, -) -> list[dict[str, Any]]: - sql = """ +def _user_stats_sql(*, filter_hashtags: bool) -> str: + changeset_filters = ["created_at >= $1", "created_at < $2"] + if filter_hashtags: + changeset_filters.append("hashtags && $3::TEXT[]") + limit_param = "$4" + offset_param = "$5" + enable_unfiltered_fallback = "FALSE" + else: + limit_param = "$3" + offset_param = "$4" + enable_unfiltered_fallback = "TRUE" + + changeset_where = " AND ".join(changeset_filters) + + return f""" WITH filtered_changesets AS ( SELECT changeset_id FROM changesets - WHERE created_at >= $1 - AND created_at < $2 - AND ($3::TEXT IS NULL OR $3 = ANY(hashtags)) + WHERE {changeset_where} ), matching_stats AS ( SELECT st.* @@ -32,7 +34,7 @@ async def fetch_user_stats( UNION ALL SELECT st.* FROM changeset_stats st - WHERE $3::TEXT IS NULL + WHERE {enable_unfiltered_fallback} AND NOT EXISTS (SELECT 1 FROM matching_stats) ) SELECT @@ -74,9 +76,25 @@ async def fetch_user_stats( JOIN stats_scope st ON u.uid = st.uid GROUP BY u.uid, u.username ORDER BY map_changes DESC, u.uid ASC - LIMIT $4 OFFSET $5 + LIMIT {limit_param} OFFSET {offset_param} """ + +async def fetch_user_stats( + *, + start: datetime, + end: datetime, + hashtag: list[str] | None = None, + limit: int = 100, + offset: int = 0, +) -> list[dict[str, Any]]: + filter_hashtags = bool(hashtag) + sql = _user_stats_sql(filter_hashtags=filter_hashtags) + params: list[Any] = [start, end] + if filter_hashtags: + params.append(hashtag) + params.extend([limit, offset]) + async with get_pool().acquire() as conn: - rows = await conn.fetch(sql, start, end, hashtag, limit, offset) + rows = await conn.fetch(sql, *params) return [dict(row) for row in rows] diff --git a/pyproject.toml b/pyproject.toml index 7040be0..79dbbec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,7 +55,6 @@ api = [ "litestar[standard]>=2.18.0", ] dev = [ - {include-group = "api"}, "pytest>=7.4.2", "pytest-mock>=3.14.0", "coverage>=7.6.1", diff --git a/tests/test_api.py b/tests/test_api.py index c14858a..effa9cd 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from importlib import import_module from litestar import Litestar @@ -32,7 +30,7 @@ def test_user_stats_endpoint_returns_expected_response(monkeypatch): async def fake_fetch_user_stats(*, start, end, hashtag, limit, offset): assert start.isoformat() == "2026-05-01T00:00:00+00:00" assert end.isoformat() == "2026-05-02T00:00:00+00:00" - assert hashtag == "#mapathon" + assert hashtag == ["#mapathon", "#roads"] assert limit == 1 assert offset == 0 return [ @@ -62,12 +60,13 @@ async def fake_fetch_user_stats(*, start, end, hashtag, limit, offset): with TestClient(app) as client: response = client.get( "/api/v1/user-stats", - params={ - "start": "2026-05-01T00:00:00Z", - "end": "2026-05-02T00:00:00Z", - "hashtag": "#mapathon", - "limit": "1", - }, + params=[ + ("start", "2026-05-01T00:00:00Z"), + ("end", "2026-05-02T00:00:00Z"), + ("hashtag", "#mapathon"), + ("hashtag", "#roads"), + ("limit", "1"), + ], ) assert response.status_code == 200 @@ -75,7 +74,7 @@ async def fake_fetch_user_stats(*, start, end, hashtag, limit, offset): "count": 1, "start": "2026-05-01T00:00:00+00:00", "end": "2026-05-02T00:00:00+00:00", - "hashtag": "#mapathon", + "hashtag": ["#mapathon", "#roads"], "limit": 1, "offset": 0, "users": [ diff --git a/uv.lock b/uv.lock index e72385f..b97be2c 100644 --- a/uv.lock +++ b/uv.lock @@ -1002,10 +1002,8 @@ api = [ { name = "litestar", extra = ["standard"] }, ] dev = [ - { name = "asyncpg" }, { name = "commitizen" }, { name = "coverage" }, - { name = "litestar", extra = ["standard"] }, { name = "pre-commit" }, { name = "pytest" }, { name = "pytest-mock" }, @@ -1035,10 +1033,8 @@ api = [ { name = "litestar", extras = ["standard"], specifier = ">=2.18.0" }, ] dev = [ - { name = "asyncpg", specifier = ">=0.30.0" }, { name = "commitizen", specifier = ">=4.13.10" }, { name = "coverage", specifier = ">=7.6.1" }, - { name = "litestar", extras = ["standard"], specifier = ">=2.18.0" }, { name = "pre-commit", specifier = ">=4.6.0" }, { name = "pytest", specifier = ">=7.4.2" }, { name = "pytest-mock", specifier = ">=3.14.0" }, From 92fe3f9ab3d49be7e39dd2401986ca377adc7934 Mon Sep 17 00:00:00 2001 From: NirrWorks Date: Wed, 6 May 2026 19:02:45 +0545 Subject: [PATCH 19/49] Install API dependencies in CI --- .github/workflows/ci.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 658a2eb..2995ca3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,19 +25,19 @@ jobs: cache-dependency-glob: "uv.lock" - name: Install dependencies - run: uv sync --locked + run: uv sync --locked --group api - name: Ruff lint - run: uv run ruff check osmsg tests + run: uv run --group api ruff check osmsg tests - name: Ruff format check - run: uv run ruff format --check osmsg tests + run: uv run --group api ruff format --check osmsg tests - name: ty (Astral type checker) - run: uv run ty check osmsg + run: uv run --group api ty check osmsg - name: Pytest (offline) - run: uv run pytest -m "not network" + run: uv run --group api pytest -m "not network" build: name: Build wheel + sdist From a4c254d39d2080ee0c14f82e004d820ecebd110a Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Thu, 7 May 2026 17:12:24 +0200 Subject: [PATCH 20/49] feat(infra): adds infra docker compose for hosting osmsg --- .dockerignore | 28 ++- .env.example | 13 +- .github/workflows/ci.yml | 68 +++---- .github/workflows/docker.yml | 49 +++-- .../workflows/{publish.yml => release.yml} | 24 +-- .gitignore | 2 +- .pre-commit-config.yaml | 51 +++-- Dockerfile | 71 ++++++- Procfile | 1 - README.md | 31 +-- {osmsg/api => api}/__init__.py | 0 {osmsg/api => api}/app.py | 3 +- api/db.py | 86 +++++++++ {osmsg/api => api}/queries.py | 0 data/example_boundary.geojson | 2 +- docker-compose.yml | 58 ++++++ docs/Manual.md | 12 +- docs/infra.md | 180 ++++++++++++++++++ justfile | 20 ++ osmsg/_tick.py | 84 ++++++++ osmsg/api/db.py | 34 ---- osmsg/boundary.py | 30 +-- osmsg/cli.py | 7 +- osmsg/geofabrik.py | 38 +++- osmsg/handlers.py | 14 +- osmsg/pipeline.py | 7 +- pyproject.toml | 2 +- tests/test_api.py | 12 +- tests/test_geofabrik.py | 64 ++++++- tests/test_handlers.py | 24 +++ worker-entrypoint.sh | 8 + 31 files changed, 840 insertions(+), 183 deletions(-) rename .github/workflows/{publish.yml => release.yml} (54%) delete mode 100644 Procfile rename {osmsg/api => api}/__init__.py (100%) rename {osmsg/api => api}/app.py (96%) create mode 100644 api/db.py rename {osmsg/api => api}/queries.py (100%) create mode 100644 docker-compose.yml create mode 100644 docs/infra.md create mode 100644 justfile create mode 100644 osmsg/_tick.py delete mode 100644 osmsg/api/db.py create mode 100755 worker-entrypoint.sh diff --git a/.dockerignore b/.dockerignore index 05c1fe1..1cdb9d2 100644 --- a/.dockerignore +++ b/.dockerignore @@ -3,18 +3,34 @@ .venv .ruff_cache .pytest_cache +.ty_cache +.mypy_cache +.coverage +.coverage.* +htmlcov +coverage.xml +dist +build +site +.cache __pycache__ *.pyc +*.pyo +.env +.env.* +!.env.example +tests +docs +.editorconfig +.pre-commit-config.yaml +justfile +CHANGELOG.md +.dockerignore *.duckdb *.parquet -dist -build -*.egg-info -docs -tests data temp temp_cs_parquet temp_cf_parquet -.env .DS_Store +*.egg-info diff --git a/.env.example b/.env.example index 641fc46..c8e576c 100644 --- a/.env.example +++ b/.env.example @@ -1 +1,12 @@ -DATABASE_URL=postgresql://user:password@localhost:5432/osmsg +DATABASE_URL=postgresql://osmsg:osmsg@localhost:5432/osmsg + +# Worker (docker compose) — leave blank for planet/minute defaults. +# OSMSG_NAME=nepal +# OSMSG_URL=minute # minute/hour/day or full URL; ignored when OSMSG_COUNTRY is set +# OSMSG_COUNTRY=nepal +# OSMSG_BOOTSTRAP=day +# OSMSG_BOOTSTRAP_DAYS= # alternative to OSMSG_BOOTSTRAP for arbitrary day counts +# OSMSG_BOUNDARY= # path to GeoJSON; overrides auto-derived country geometry +# OSMSG_SCHEDULE=0 * * * * +# OSM_USERNAME= +# OSM_PASSWORD= diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2995ca3..a0b35be 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,60 +11,62 @@ concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true +permissions: + contents: read + jobs: - lint-typecheck-test: - name: Lint, typecheck, test + pre-commit: + name: Pre-commit runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6 - - - name: Install uv - uses: astral-sh/setup-uv@v7 + - uses: actions/checkout@v4 + - uses: astral-sh/setup-uv@v5 with: enable-cache: true cache-dependency-glob: "uv.lock" + - run: uv sync --locked --group api + - run: uv run pre-commit run --all-files - - name: Install dependencies - run: uv sync --locked --group api - - - name: Ruff lint - run: uv run --group api ruff check osmsg tests - - - name: Ruff format check - run: uv run --group api ruff format --check osmsg tests - - - name: ty (Astral type checker) - run: uv run --group api ty check osmsg - - - name: Pytest (offline) - run: uv run --group api pytest -m "not network" + test: + name: Test (Python ${{ matrix.python-version }}) + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.11", "3.12", "3.13"] + steps: + - uses: actions/checkout@v4 + - uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + cache-dependency-glob: "uv.lock" + python-version: ${{ matrix.python-version }} + - run: uv sync --locked --group api + - run: uv run pytest -m "not network" build: name: Build wheel + sdist runs-on: ubuntu-latest - needs: lint-typecheck-test + needs: [pre-commit, test] steps: - - uses: actions/checkout@v6 - - uses: astral-sh/setup-uv@v7 + - uses: actions/checkout@v4 + - uses: astral-sh/setup-uv@v5 with: enable-cache: true cache-dependency-glob: "uv.lock" - run: uv build --no-sources - smoke-last-hour: - name: Smoke (process last hour) + smoke: + name: Smoke (last hour) runs-on: ubuntu-latest - needs: lint-typecheck-test + needs: [pre-commit, test] timeout-minutes: 10 steps: - - uses: actions/checkout@v6 - - uses: astral-sh/setup-uv@v7 + - uses: actions/checkout@v4 + - uses: astral-sh/setup-uv@v5 with: enable-cache: true cache-dependency-glob: "uv.lock" - run: uv sync --locked --no-group dev - - name: Run osmsg --last hour --summary - run: | - uv run osmsg --last hour --tags building --tags highway --summary -f parquet -f markdown --delete-temp - - name: Show artifacts - run: ls -lh stats.parquet stats.duckdb stats_summary.parquet stats_summary.md + - run: uv run osmsg --last hour --tags building --tags highway --summary -f parquet -f markdown --delete-temp + - run: ls -lh stats.parquet stats.duckdb stats_summary.parquet stats_summary.md diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 248adf2..393ac96 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -1,26 +1,37 @@ -name: Docker (multi-arch) +name: Docker on: - release: - types: [published] push: - branches: [master, develop] + branches: [master] + tags: [v*] pull_request: branches: [master, develop] workflow_dispatch: -env: - REGISTRY: ghcr.io - IMAGE_NAME: ${{ github.repository }} +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true jobs: - build-and-push: + build: + name: Build ${{ matrix.target }} runs-on: ubuntu-latest permissions: contents: read packages: write + attestations: write + id-token: write + strategy: + matrix: + include: + - target: cli + image: ghcr.io/${{ github.repository }} + - target: api + image: ghcr.io/${{ github.repository }}-api + - target: worker + image: ghcr.io/${{ github.repository }}-worker steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@v4 - name: Set up QEMU uses: docker/setup-qemu-action@v3 @@ -32,7 +43,7 @@ jobs: if: github.event_name != 'pull_request' uses: docker/login-action@v3 with: - registry: ${{ env.REGISTRY }} + registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} @@ -40,21 +51,33 @@ jobs: id: meta uses: docker/metadata-action@v5 with: - images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + images: ${{ matrix.image }} tags: | type=ref,event=branch type=ref,event=pr type=semver,pattern={{version}} type=semver,pattern={{major}}.{{minor}} + type=semver,pattern={{major}} + type=sha type=raw,value=latest,enable={{is_default_branch}} - - name: Build and push (multi-arch) - uses: docker/build-push-action@v6 + - name: Build and push + id: push + uses: docker/build-push-action@v5 with: context: . + target: ${{ matrix.target }} platforms: linux/amd64,linux/arm64 push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} cache-from: type=gha cache-to: type=gha,mode=max + + - name: Generate artifact attestation + if: github.event_name != 'pull_request' + uses: actions/attest-build-provenance@v1 + with: + subject-name: ${{ matrix.image }} + subject-digest: ${{ steps.push.outputs.digest }} + push-to-registry: true diff --git a/.github/workflows/publish.yml b/.github/workflows/release.yml similarity index 54% rename from .github/workflows/publish.yml rename to .github/workflows/release.yml index 8a2358c..ed8531c 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/release.yml @@ -1,33 +1,27 @@ -name: Publish to PyPI +name: Release on: - release: - types: [ published ] push: tags: - "v*" - workflow_dispatch: + +permissions: + contents: read jobs: - publish: + release: name: Build and publish to PyPI runs-on: ubuntu-latest environment: name: pypi url: https://pypi.org/project/osmsg/ steps: - - uses: actions/checkout@v6 - - - name: Install uv - uses: astral-sh/setup-uv@v7 + - uses: actions/checkout@v4 + - uses: astral-sh/setup-uv@v5 with: enable-cache: true cache-dependency-glob: "uv.lock" - - - name: Build wheel + sdist - run: uv build --no-sources - - - name: Publish to PyPI + - run: uv build --no-sources + - run: uv publish env: UV_PUBLISH_TOKEN: ${{ secrets.PYPI_API_TOKEN }} - run: uv publish dist/* diff --git a/.gitignore b/.gitignore index 9485553..2fcac14 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,4 @@ dist .DS_Store node_modules/ stats.* -*.parquet \ No newline at end of file +*.parquet diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cee3f20..75610cd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,35 +1,52 @@ -repos: - # Versioning - - repo: https://github.com/commitizen-tools/commitizen - rev: v3.29.0 - hooks: - - id: commitizen - stages: [commit-msg] +default_language_version: + python: python3 - - repo: https://github.com/astral-sh/uv-pre-commit - rev: "0.11.8" +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 hooks: - - id: uv-lock + - id: detect-private-key + - id: end-of-file-fixer + - id: trailing-whitespace + - id: check-merge-conflict + - id: check-toml + - id: check-yaml + - id: check-json + - id: fix-byte-order-marker - repo: https://github.com/astral-sh/ruff-pre-commit - rev: "v0.15.12" + rev: v0.15.12 hooks: - id: ruff - args: [--fix, --exit-non-zero-on-fix] + args: [--fix] - id: ruff-format + - repo: https://github.com/astral-sh/uv-pre-commit + rev: "0.11.8" + hooks: + - id: uv-lock + - repo: local hooks: - id: ty - name: ty + name: ty (type checker) entry: uv run ty check language: system types_or: [python, pyi] pass_filenames: false require_serial: true + stages: [pre-commit] + + - id: pytest + name: pytest + entry: uv run pytest -m "not network" + language: system + stages: [pre-push] + pass_filenames: false + always_run: true - - repo: https://github.com/igorshubovych/markdownlint-cli - rev: v0.41.0 + - repo: https://github.com/commitizen-tools/commitizen + rev: v4.6.0 hooks: - - id: markdownlint - args: [--fix, --ignore, CHANGELOG.md] \ No newline at end of file + - id: commitizen + stages: [commit-msg] diff --git a/Dockerfile b/Dockerfile index 0bf0dc9..0e68b31 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,6 @@ # syntax=docker/dockerfile:1.7 -FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim AS builder +FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim AS builder-base ENV UV_COMPILE_BYTECODE=1 \ UV_LINK_MODE=copy \ @@ -16,17 +16,37 @@ COPY osmsg /app/osmsg RUN --mount=type=cache,target=/root/.cache/uv \ uv sync --frozen --no-dev --no-editable -RUN find /app/.venv -type d -name __pycache__ -exec rm -rf {} + \ - && sed -i 's|^home = .*|home = /usr/bin|' /app/.venv/pyvenv.cfg \ +RUN find /app/.venv -type d -name __pycache__ -exec rm -rf {} + + + +FROM builder-base AS builder-distroless +RUN sed -i 's|^home = .*|home = /usr/bin|' /app/.venv/pyvenv.cfg \ && rm -f /app/.venv/bin/python /app/.venv/bin/python3 /app/.venv/bin/python3.13 \ && ln -s /usr/bin/python3.13 /app/.venv/bin/python3.13 \ && ln -s python3.13 /app/.venv/bin/python3 \ && ln -s python3.13 /app/.venv/bin/python -FROM gcr.io/distroless/python3-debian13:nonroot AS runtime + +FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim AS builder-api + +ENV UV_COMPILE_BYTECODE=1 \ + UV_LINK_MODE=copy \ + UV_PYTHON_DOWNLOADS=never + +RUN --mount=type=cache,target=/root/.cache/uv \ + uv venv /app/.venv && \ + uv pip install --python /app/.venv \ + "litestar[standard]>=2.18.0" \ + "asyncpg>=0.30.0" \ + "python-dotenv>=1.2.2" + +RUN find /app/.venv -type d -name __pycache__ -exec rm -rf {} + + + +FROM gcr.io/distroless/python3-debian13:nonroot AS cli WORKDIR /work -COPY --from=builder --chown=nonroot:nonroot /app/.venv /app/.venv +COPY --from=builder-distroless --chown=nonroot:nonroot /app/.venv /app/.venv ENV PATH="/app/.venv/bin:$PATH" \ PYTHONDONTWRITEBYTECODE=1 \ @@ -34,3 +54,44 @@ ENV PATH="/app/.venv/bin:$PATH" \ ENTRYPOINT ["/app/.venv/bin/osmsg"] CMD ["--help"] + + +FROM python:3.13-slim AS api + +WORKDIR /app +COPY --from=builder-api /app/.venv /app/.venv +COPY api /app/api + +ENV PATH="/app/.venv/bin:$PATH" \ + PYTHONPATH=/app \ + PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +EXPOSE 8000 +ENTRYPOINT ["/app/.venv/bin/litestar", "--app", "api.app:app", "run", "--host", "0.0.0.0", "--port", "8000"] + + +FROM python:3.13-slim AS worker + +RUN apt-get update \ + && apt-get install -y --no-install-recommends libexpat1 \ + && rm -rf /var/lib/apt/lists/* + +ARG SUPERCRONIC_VERSION=0.2.33 +ARG TARGETARCH=amd64 +ADD --chmod=755 https://github.com/aptible/supercronic/releases/download/v${SUPERCRONIC_VERSION}/supercronic-linux-${TARGETARCH} /usr/local/bin/supercronic + +WORKDIR /app +COPY --from=builder-base /app/.venv /app/.venv +COPY worker-entrypoint.sh /usr/local/bin/worker-entrypoint.sh +RUN chmod +x /usr/local/bin/worker-entrypoint.sh + +ENV PATH="/app/.venv/bin:$PATH" \ + PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + OSMSG_OUTPUT_DIR=/var/lib/osmsg \ + OSMSG_CACHE_DIR=/var/cache/osmsg + +RUN mkdir -p /var/lib/osmsg /var/cache/osmsg + +ENTRYPOINT ["/usr/local/bin/worker-entrypoint.sh"] diff --git a/Procfile b/Procfile deleted file mode 100644 index bb1ed34..0000000 --- a/Procfile +++ /dev/null @@ -1 +0,0 @@ -web: litestar --app osmsg.api.app:app run --host 0.0.0.0 --port ${PORT:-8000} diff --git a/README.md b/README.md index e486edc..155ec73 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ of nodes, ways, and relations created, modified, or deleted, written to parquet, A Project of [OSGeo Nepal](https://osgeonepal.org). -## What you get +## Features - Per-user create/modify/delete counts over any time window. - Tag and hashtag breakdowns (e.g. `building`, `#hotosm`). @@ -91,33 +91,20 @@ Same schema in DuckDB and Postgres: `users`, `changesets`, `changeset_stats`, `s ### 5. Run the API -The Litestar API reads osmsg data from Postgres. Set `DATABASE_URL` in your environment -or copy `.env.example` to `.env` and edit it. - -Insert the latest day of data into Postgres: - -```bash -uv run osmsg --last day --format psql --psql-dsn "$DATABASE_URL" --name api_last_day -``` - -Start the API: +Push stats into Postgres, then start the Litestar API: ```bash -uv run --group api litestar --app osmsg.api.app:app run --host 0.0.0.0 --port 8000 +osmsg --last day --format psql --psql-dsn "postgresql://user:pass@localhost/osmsg" +litestar --app api.app:app run --host 0.0.0.0 --port 8000 ``` -Then open: - ```text -http://localhost:8000/health -http://localhost:8000/api/v1/user-stats?start=2026-05-01T00:00:00Z&end=2026-05-02T00:00:00Z +GET /health +GET /api/v1/user-stats?start=2026-05-01T00:00:00Z&end=2026-05-02T00:00:00Z +GET /docs ``` -Repeat `hashtag` to filter by one or more changeset hashtags: - -```text -http://localhost:8000/api/v1/user-stats?start=2026-05-01T00:00:00Z&end=2026-05-02T00:00:00Z&hashtag=%23mapathon -``` +For self-hosting with Docker Compose and systemd, see [docs/infra.md](./docs/infra.md). ### 6. Use it as a library @@ -138,6 +125,7 @@ Same pipeline as the CLI. ### 7. Long flag lists? Use a config + ```bash osmsg --config nepal.yaml ``` @@ -153,6 +141,7 @@ Every run writes `stats.duckdb` (or `<--name>.duckdb`) plus the formats you ask - [Installation](./docs/Installation.md) - [Manual](./docs/Manual.md) (every flag, with examples) +- [Self-hosting / Docker Compose](./docs/infra.md) - [Version control / release notes](./docs/Version_control.md) ## Contributing diff --git a/osmsg/api/__init__.py b/api/__init__.py similarity index 100% rename from osmsg/api/__init__.py rename to api/__init__.py diff --git a/osmsg/api/app.py b/api/app.py similarity index 96% rename from osmsg/api/app.py rename to api/app.py index 53d9dfa..da457fc 100644 --- a/osmsg/api/app.py +++ b/api/app.py @@ -7,13 +7,14 @@ from litestar.openapi.config import OpenAPIConfig from litestar.params import Parameter -from .db import close_pool, open_pool +from .db import close_pool, ensure_schema, open_pool from .queries import fetch_user_stats @asynccontextmanager async def lifespan(app: Litestar): await open_pool() + await ensure_schema() try: yield finally: diff --git a/api/db.py b/api/db.py new file mode 100644 index 0000000..779f47d --- /dev/null +++ b/api/db.py @@ -0,0 +1,86 @@ +import os + +import asyncpg +from dotenv import load_dotenv + +load_dotenv() + + +PG_SCHEMA = """ +CREATE TABLE IF NOT EXISTS users ( + uid BIGINT PRIMARY KEY, + username TEXT NOT NULL +); +CREATE TABLE IF NOT EXISTS changesets ( + changeset_id BIGINT PRIMARY KEY, + uid BIGINT NOT NULL REFERENCES users(uid), + created_at TIMESTAMPTZ, + hashtags TEXT[], + editor TEXT, + min_lon DOUBLE PRECISION, + min_lat DOUBLE PRECISION, + max_lon DOUBLE PRECISION, + max_lat DOUBLE PRECISION +); +CREATE INDEX IF NOT EXISTS idx_changesets_created_at ON changesets(created_at); +CREATE TABLE IF NOT EXISTS changeset_stats ( + changeset_id BIGINT NOT NULL REFERENCES changesets(changeset_id), + seq_id BIGINT NOT NULL, + uid BIGINT NOT NULL REFERENCES users(uid), + nodes_created INTEGER DEFAULT 0, + nodes_modified INTEGER DEFAULT 0, + nodes_deleted INTEGER DEFAULT 0, + ways_created INTEGER DEFAULT 0, + ways_modified INTEGER DEFAULT 0, + ways_deleted INTEGER DEFAULT 0, + rels_created INTEGER DEFAULT 0, + rels_modified INTEGER DEFAULT 0, + rels_deleted INTEGER DEFAULT 0, + poi_created INTEGER DEFAULT 0, + poi_modified INTEGER DEFAULT 0, + tag_stats JSONB, + PRIMARY KEY (seq_id, changeset_id) +); +CREATE INDEX IF NOT EXISTS idx_changeset_stats_uid ON changeset_stats(uid); +CREATE TABLE IF NOT EXISTS state ( + source_url TEXT PRIMARY KEY, + last_seq BIGINT NOT NULL, + last_ts TIMESTAMPTZ NOT NULL, + updated_at TIMESTAMPTZ NOT NULL +); +""" + +_pool: asyncpg.Pool | None = None + + +def get_database_url() -> str: + database_url = os.getenv("DATABASE_URL") + if not database_url: + raise RuntimeError("DATABASE_URL environment variable is not set") + return database_url + + +async def open_pool() -> None: + global _pool + if _pool is None: + _pool = await asyncpg.create_pool(dsn=get_database_url(), min_size=1, max_size=10) + + +async def close_pool() -> None: + global _pool + if _pool is not None: + await _pool.close() + _pool = None + + +def get_pool() -> asyncpg.Pool: + if _pool is None: + raise RuntimeError("Database pool is not initialized") + return _pool + + +async def ensure_schema() -> None: + statements = [s.strip() for s in PG_SCHEMA.strip().split(";") if s.strip()] + async with get_pool().acquire() as conn: + for stmt in statements: + await conn.execute(stmt) diff --git a/osmsg/api/queries.py b/api/queries.py similarity index 100% rename from osmsg/api/queries.py rename to api/queries.py diff --git a/data/example_boundary.geojson b/data/example_boundary.geojson index f35c49e..cc01706 100644 --- a/data/example_boundary.geojson +++ b/data/example_boundary.geojson @@ -28,4 +28,4 @@ ], "type": "Polygon" } -} \ No newline at end of file +} diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..5eb75f4 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,58 @@ +services: + db: + image: postgres:17-alpine + environment: + POSTGRES_USER: osmsg + POSTGRES_PASSWORD: osmsg + POSTGRES_DB: osmsg + volumes: + - pgdata:/var/lib/postgresql/data + ports: + - "5432:5432" + healthcheck: + test: ["CMD-SHELL", "pg_isready -U osmsg -d osmsg"] + interval: 5s + timeout: 3s + retries: 10 + restart: unless-stopped + + api: + build: + context: . + target: api + environment: + DATABASE_URL: postgresql://osmsg:osmsg@db:5432/osmsg + ports: + - "8000:8000" + depends_on: + db: + condition: service_healthy + restart: unless-stopped + + worker: + build: + context: . + target: worker + environment: + DATABASE_URL: postgresql://osmsg:osmsg@db:5432/osmsg + OSMSG_NAME: ${OSMSG_NAME:-stats} + OSMSG_URL: ${OSMSG_URL:-minute} + OSMSG_COUNTRY: ${OSMSG_COUNTRY:-} + OSMSG_BOOTSTRAP: ${OSMSG_BOOTSTRAP:-hour} + OSMSG_BOOTSTRAP_DAYS: ${OSMSG_BOOTSTRAP_DAYS:-} + OSMSG_BOUNDARY: ${OSMSG_BOUNDARY:-} + OSMSG_SCHEDULE: ${OSMSG_SCHEDULE:-*/2 * * * *} + OSM_USERNAME: ${OSM_USERNAME:-} + OSM_PASSWORD: ${OSM_PASSWORD:-} + volumes: + - osmsg-data:/var/lib/osmsg + - osmsg-cache:/var/cache/osmsg + depends_on: + db: + condition: service_healthy + restart: unless-stopped + +volumes: + pgdata: + osmsg-data: + osmsg-cache: diff --git a/docs/Manual.md b/docs/Manual.md index 117c515..876bc74 100644 --- a/docs/Manual.md +++ b/docs/Manual.md @@ -35,9 +35,16 @@ osmsg --country nepal --country india --country africa # Geofabrik regions, re osmsg --hashtags hotosm-project-1234 --hashtags mapathon osmsg --hashtags mapathon --exact-lookup # match whole hashtag, not substring osmsg --users alice --users bob -osmsg --boundary region.geojson +osmsg --boundary nepal # Geofabrik region name +osmsg --boundary region.geojson # path to a GeoJSON file +osmsg --boundary '{"type":"Polygon",...}' # inline GeoJSON string ``` +> `--boundary` filters changesets whose bounding box intersects the given geometry. +> A Geofabrik region name resolves from the same index as `--country` — no separate file needed. +> `--boundary` only filters; it does not change the replication source. +> To scope the replication source to a country's diffs, use `--country` instead. +> > Each `--users`, `--hashtags`, `--tags`, `--length`, `--country`, `--url`, `-f` > takes one value at a time; pass the flag again for additional values. > @@ -126,6 +133,9 @@ osmsg --start "2025-01-01 00:00:00" --end "2026-01-01 00:00:00" \ --url day --all-tags -f parquet -f psql \ --psql-dsn "host=localhost dbname=osm_stats user=osm" +# All-time Nepal stats via planet/day (Geofabrik only keeps ~4 months per country) +osmsg --url day --boundary nepal --start "2012-09-13" -f parquet -f psql ... + # Cron / systemd: refresh Nepal nightly osmsg --country nepal --update ``` diff --git a/docs/infra.md b/docs/infra.md new file mode 100644 index 0000000..57d675f --- /dev/null +++ b/docs/infra.md @@ -0,0 +1,180 @@ +# Self-hosting osmsg + +This guide covers running osmsg continuously on a server: a Postgres database, a Litestar REST API, and a worker that keeps OSM stats refreshed on a cron schedule. + +## Stack overview + +| Service | Image target | Role | +| --- | --- | --- | +| `db` | `postgres:17-alpine` | Persistent stats store | +| `api` | `Dockerfile → api` | Litestar REST API at `:8000` | +| `worker` | `Dockerfile → worker` | osmsg cron worker (supercronic) | + +The worker bootstraps on first run (no existing state → `--last `) and switches to `--update` automatically on subsequent ticks. + +## Quick start : planet, every 2 minutes + +```bash +docker compose up -d +curl 'http://localhost:8000/health' +curl 'http://localhost:8000/api/v1/user-stats?start=2026-05-07T00:00:00Z&end=2026-05-08T00:00:00Z&limit=20' +``` + +No `.env` needed : defaults are planet replication, `*/2 * * * *` schedule, bootstrap from last hour. + +## Country mode (Geofabrik) + +Create a `.env` file (copy `.env.example` and edit): + +```bash +OSMSG_NAME=nepal +OSMSG_COUNTRY=nepal # any Geofabrik region id +OSMSG_BOOTSTRAP=day # first-run window +OSMSG_SCHEDULE=0 * * * * # hourly +OSM_USERNAME=you +OSM_PASSWORD=secret +``` + +Then: + +```bash +docker compose up -d +``` + +The worker fetches Nepal-specific replication diffs from Geofabrik. +Changesets are filtered to those whose bounding box intersects the Nepal polygon (auto-derived from the Geofabrik index). +A custom boundary GeoJSON can override this via `OSMSG_BOUNDARY`. + +### Geofabrik credentials + +Geofabrik sub-daily replication uses your OSM account credentials directly via OAuth 2.0. +Set `OSM_USERNAME` and `OSM_PASSWORD` in `.env` — no browser opt-in or separate Geofabrik registration required. + +## Environment variables + +All variables are optional; defaults target the planet at minute granularity. + +| Variable | Default | Notes | +| --- | --- | --- | +| `OSMSG_NAME` | `stats` | DuckDB / output file basename | +| `OSMSG_URL` | `minute` | `minute`/`hour`/`day` shortcut or full replication URL. Ignored when `OSMSG_COUNTRY` is set | +| `OSMSG_COUNTRY` | _unset_ | Geofabrik region id (e.g. `nepal`). Needs `OSM_USERNAME`/`OSM_PASSWORD` | +| `OSMSG_BOOTSTRAP` | `hour` | First-run window: `hour`/`day`/`week`/`month`/`year` | +| `OSMSG_BOOTSTRAP_DAYS` | _unset_ | Exact day count for first-run bootstrap (alternative to `OSMSG_BOOTSTRAP`) | +| `OSMSG_BOUNDARY` | _unset_ | Path to a GeoJSON file. Overrides auto-derived country geometry | +| `OSMSG_SCHEDULE` | `*/2 * * * *` | supercronic cron expression for the worker tick | +| `DATABASE_URL` | (compose default) | libpq DSN; worker mirrors each tick to Postgres | +| `OSM_USERNAME` | _unset_ | OSM account username (Geofabrik auth) | +| `OSM_PASSWORD` | _unset_ | OSM account password (Geofabrik auth) | + +## API endpoints + +``` +GET /health +GET /api/v1/user-stats?start=&end=[&hashtag=][&limit=N][&offset=N] +GET /docs (Swagger UI) +``` + +## Populate all-time stats (backfill) + +For a long historical backfill, run osmsg directly before starting the continuous worker. +The worker will resume from where the backfill left off. + +**Example : all Nepal stats since 2012:** + +```bash +docker compose up -d db # start only the database + +docker compose run --rm worker python -m osmsg \ + --name nepal \ + --country nepal \ + --start "2012-09-12" \ + --end "2026-01-01" \ + --format psql \ + --psql-dsn "postgresql://osmsg:osmsg@db:5432/osmsg" + +docker compose up -d # start api + worker; worker resumes from last backfill seq +``` + +The `state` table records the last processed sequence per source URL. +When the worker starts, it detects existing state and switches to `--update` automatically. + +**Example : last 90 days then keep refreshing:** + +```bash +OSMSG_BOOTSTRAP_DAYS=90 docker compose up -d +``` + +## Run the API standalone (without compose) + +Push stats into Postgres first, then start litestar: + +```bash +uv run osmsg --last day --format psql --psql-dsn "$DATABASE_URL" --name api_last_day +uv run --group api litestar --app api.app:app run --host 0.0.0.0 --port 8000 +``` + +## Run as a systemd service + +Drop the project on the server and let systemd manage the compose stack across reboots. + +**1. Place files:** + +```bash +mkdir -p /opt/osmsg +cp docker-compose.yml Dockerfile worker-entrypoint.sh /opt/osmsg/ +cp .env.example /opt/osmsg/.env +# edit /opt/osmsg/.env with your values +``` + +**2. Create the unit file** at `/etc/systemd/system/osmsg.service`: + +```ini +[Unit] +Description=osmsg stats stack +Requires=docker.service +After=docker.service network-online.target +Wants=network-online.target + +[Service] +Type=simple +Restart=on-failure +RestartSec=10 +WorkingDirectory=/opt/osmsg +EnvironmentFile=/opt/osmsg/.env +ExecStart=/usr/bin/docker compose up +ExecStop=/usr/bin/docker compose down +TimeoutStartSec=300 +TimeoutStopSec=60 + +[Install] +WantedBy=multi-user.target +``` + +**3. Enable and start:** + +```bash +systemctl daemon-reload +systemctl enable --now osmsg +``` + +**Useful commands:** + +```bash +systemctl status osmsg +journalctl -u osmsg -f # follow logs (all three containers) +systemctl restart osmsg # pick up .env changes +systemctl stop osmsg # brings the full stack down cleanly +``` + +> `EnvironmentFile=/opt/osmsg/.env` loads your env vars into the service environment. +> Docker Compose inherits them, so `${OSMSG_COUNTRY}` and friends resolve without a separate +> `--env-file` flag. + +## Volumes + +| Volume | Mount | Contents | +| --- | --- | --- | +| `pgdata` | `/var/lib/postgresql/data` | Postgres data directory | +| `osmsg-data` | `/var/lib/osmsg` | DuckDB state files + parquet output | +| `osmsg-cache` | `/var/cache/osmsg` | Downloaded replication diff cache | diff --git a/justfile b/justfile new file mode 100644 index 0000000..0606144 --- /dev/null +++ b/justfile @@ -0,0 +1,20 @@ +set shell := ["bash", "-uc"] + +default: + @just --list + +setup: + uv sync --all-groups + uv run pre-commit install --install-hooks --hook-type pre-commit --hook-type commit-msg + +lint: + uv run pre-commit run --all-files + +test *ARGS: + uv run pytest -m "not network" {{ARGS}} + +test-all *ARGS: + uv run pytest {{ARGS}} + +build: + uv build --no-sources diff --git a/osmsg/_tick.py b/osmsg/_tick.py new file mode 100644 index 0000000..242fd45 --- /dev/null +++ b/osmsg/_tick.py @@ -0,0 +1,84 @@ +"""Worker tick: bootstrap on first run, --update thereafter.""" + +from __future__ import annotations + +import fcntl +import os +import subprocess +import sys +from pathlib import Path + +from .db import connect, create_tables, get_state +from .geofabrik import country_update_url +from .replication import resolve_url + + +def _has_state(db_path: Path, source_url: str) -> bool: + if not db_path.exists(): + return False + conn = connect(str(db_path)) + try: + create_tables(conn) + return get_state(conn, source_url) is not None + finally: + conn.close() + + +def main() -> int: + name = os.environ.get("OSMSG_NAME", "stats") + out = Path(os.environ.get("OSMSG_OUTPUT_DIR", "/var/lib/osmsg")) + cache = Path(os.environ.get("OSMSG_CACHE_DIR", "/var/cache/osmsg")) + country = os.environ.get("OSMSG_COUNTRY") + url = os.environ.get("OSMSG_URL", "minute") + boundary = os.environ.get("OSMSG_BOUNDARY") + bootstrap = os.environ.get("OSMSG_BOOTSTRAP", "hour") + bootstrap_days = os.environ.get("OSMSG_BOOTSTRAP_DAYS") + psql_dsn = os.environ.get("DATABASE_URL") + + out.mkdir(parents=True, exist_ok=True) + cache.mkdir(parents=True, exist_ok=True) + + lock_path = out / f"{name}.lock" + lock_fd = os.open(str(lock_path), os.O_CREAT | os.O_RDWR, 0o644) + try: + fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + except BlockingIOError: + print("[osmsg-tick] previous tick still running, skipping", flush=True) + return 0 + + db_path = out / f"{name}.duckdb" + cmd = [ + "osmsg", + "--name", + name, + "--output-dir", + str(out), + "--cache-dir", + str(cache), + "--format", + "parquet", + ] + if country: + cmd.extend(["--country", country]) + source_url = country_update_url(country) + else: + cmd.extend(["--url", url]) + source_url = resolve_url(url) + if boundary: + cmd.extend(["--boundary", boundary]) + if psql_dsn: + cmd.extend(["--format", "psql", "--psql-dsn", psql_dsn]) + + if _has_state(db_path, source_url): + cmd.append("--update") + elif bootstrap_days: + cmd.extend(["--days", bootstrap_days]) + else: + cmd.extend(["--last", bootstrap]) + + print(f"[osmsg-tick] {' '.join(cmd)}", flush=True) + return subprocess.call(cmd) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/osmsg/api/db.py b/osmsg/api/db.py deleted file mode 100644 index 72f6c30..0000000 --- a/osmsg/api/db.py +++ /dev/null @@ -1,34 +0,0 @@ -import os - -import asyncpg -from dotenv import load_dotenv - -load_dotenv() - -_pool: asyncpg.Pool | None = None - - -def get_database_url() -> str: - database_url = os.getenv("DATABASE_URL") - if not database_url: - raise RuntimeError("DATABASE_URL environment variable is not set") - return database_url - - -async def open_pool() -> None: - global _pool - if _pool is None: - _pool = await asyncpg.create_pool(dsn=get_database_url(), min_size=1, max_size=10) - - -async def close_pool() -> None: - global _pool - if _pool is not None: - await _pool.close() - _pool = None - - -def get_pool() -> asyncpg.Pool: - if _pool is None: - raise RuntimeError("Database pool is not initialized") - return _pool diff --git a/osmsg/boundary.py b/osmsg/boundary.py index 35c087d..6b39cf4 100644 --- a/osmsg/boundary.py +++ b/osmsg/boundary.py @@ -1,4 +1,4 @@ -"""Geometry helpers: boundary parsing + bbox centroid.""" +"""Boundary GeoJSON parsing.""" from __future__ import annotations @@ -6,19 +6,27 @@ from pathlib import Path from typing import Any -from shapely.geometry import MultiPolygon, Polygon, box, shape +from shapely.geometry import MultiPolygon, Polygon, shape from shapely.geometry.base import BaseGeometry +from .exceptions import UnknownRegionError +from .geofabrik import country_geometry + def load_boundary(input_data: str) -> BaseGeometry: - """Accept either inline GeoJSON text or a path to a GeoJSON file.""" try: payload: Any = json.loads(input_data) - except json.JSONDecodeError as exc: + except json.JSONDecodeError: path = Path(input_data) - if not path.is_file(): - raise ValueError(f"Not valid JSON or a file path: {input_data!r}") from exc - payload = json.loads(path.read_text()) + if path.is_file(): + payload = json.loads(path.read_text()) + else: + try: + return country_geometry(input_data) + except UnknownRegionError: + raise ValueError( + f"--boundary {input_data!r} is not valid JSON, a file path, or a known Geofabrik region name." + ) from None geometry = payload.get("geometry") if "geometry" in payload else payload if not geometry or geometry.get("type") not in ("Polygon", "MultiPolygon"): @@ -27,11 +35,3 @@ def load_boundary(input_data: str) -> BaseGeometry: if isinstance(geom, (Polygon, MultiPolygon)): return geom raise ValueError(f"Unexpected geometry type: {type(geom).__name__}") - - -def bbox_centroid(bounds) -> tuple[float, float] | None: - """Centroid of an osmium bounding box, or None if invalid.""" - if not bounds.valid(): - return None - geom = box(bounds.bottom_left.lon, bounds.bottom_left.lat, bounds.top_right.lon, bounds.top_right.lat) - return geom.centroid.x, geom.centroid.y diff --git a/osmsg/cli.py b/osmsg/cli.py index fc58832..4c064b6 100644 --- a/osmsg/cli.py +++ b/osmsg/cli.py @@ -139,7 +139,12 @@ def main( int | None, typer.Option(help="Cap rows shown in the console table. Files always carry the full set."), ] = None, - boundary: Annotated[str | None, typer.Option(help="Path to GeoJSON or inline geojson string.")] = None, + boundary: Annotated[ + str | None, + typer.Option( + help="Boundary filter: Geofabrik region name (e.g. 'nepal'), GeoJSON file path, or inline GeoJSON." + ), + ] = None, formats: Annotated[list[Format] | None, typer.Option("--format", "-f", help="One or more output formats.")] = None, summary: Annotated[bool, typer.Option(help="Also write _summary.parquet + summary.md.")] = False, changeset: Annotated[bool, typer.Option(hidden=True)] = False, diff --git a/osmsg/geofabrik.py b/osmsg/geofabrik.py index 46b7078..1f4929f 100644 --- a/osmsg/geofabrik.py +++ b/osmsg/geofabrik.py @@ -3,6 +3,9 @@ from __future__ import annotations from functools import lru_cache +from typing import Any + +from shapely.geometry import MultiPolygon, Polygon, shape from ._http import session from .exceptions import UnknownRegionError @@ -11,20 +14,27 @@ @lru_cache(maxsize=1) -def load_index() -> dict[str, str]: - """Return `{region_id: updates_url}` parsed from the live Geofabrik index. Cached per process.""" +def _raw_index() -> dict[str, dict[str, Any]]: r = session.get(INDEX_URL, timeout=60) r.raise_for_status() - out: dict[str, str] = {} + out: dict[str, dict[str, Any]] = {} for f in r.json().get("features", []): props = f.get("properties") or {} rid = props.get("id") - url = (props.get("urls") or {}).get("updates") - if rid and url: - out[rid] = url + if not rid: + continue + out[rid] = { + "updates": (props.get("urls") or {}).get("updates"), + "geometry": f.get("geometry"), + } return out +def load_index() -> dict[str, str]: + """Return `{region_id: updates_url}` parsed from the live Geofabrik index. Cached per process.""" + return {rid: entry["updates"] for rid, entry in _raw_index().items() if entry.get("updates")} + + def country_update_url(region_id: str) -> str: """Resolve a Geofabrik region id (e.g. ``nepal``) to its `*-updates` base URL. @@ -38,4 +48,18 @@ def country_update_url(region_id: str) -> str: return idx[key] -__all__ = ["INDEX_URL", "country_update_url", "load_index"] +def country_geometry(region_id: str) -> Polygon | MultiPolygon: + """Resolve a Geofabrik region id to its published polygon.""" + idx = _raw_index() + key = region_id.lower() + entry = idx.get(key) + geom_dict = entry.get("geometry") if entry else None + if not geom_dict or geom_dict.get("type") not in ("Polygon", "MultiPolygon"): + raise UnknownRegionError(f"Geofabrik region '{region_id}' has no published polygon") + geom = shape(geom_dict) + if not isinstance(geom, (Polygon, MultiPolygon)): + raise UnknownRegionError(f"Geofabrik region '{region_id}' geometry is {type(geom).__name__}") + return geom + + +__all__ = ["INDEX_URL", "country_geometry", "country_update_url", "load_index"] diff --git a/osmsg/handlers.py b/osmsg/handlers.py index e0dadeb..8704441 100644 --- a/osmsg/handlers.py +++ b/osmsg/handlers.py @@ -9,9 +9,8 @@ import osmium import osmium.geom from shapely import wkt as shapely_wkt -from shapely.geometry import Point +from shapely.geometry import box -from .boundary import bbox_centroid from .models import Action, Changeset, ChangesetStats, TagValueStat, User HASHTAG_RE = re.compile(r"#[\w-]+") @@ -52,8 +51,15 @@ def changeset(self, c) -> None: return if self._geom is not None: - centroid_xy = bbox_centroid(c.bounds) - if centroid_xy is None or not self._geom.contains(Point(*centroid_xy)): + if not c.bounds.valid(): + return + bbox = box( + c.bounds.bottom_left.lon, + c.bounds.bottom_left.lat, + c.bounds.top_right.lon, + c.bounds.top_right.lat, + ) + if not self._geom.intersects(bbox): return keep = bool(cfg["changeset_meta"] and not cfg["hashtags"]) diff --git a/osmsg/pipeline.py b/osmsg/pipeline.py index f2896cb..de7a4d8 100644 --- a/osmsg/pipeline.py +++ b/osmsg/pipeline.py @@ -12,6 +12,7 @@ from typing import Any from platformdirs import user_cache_dir +from shapely.ops import unary_union from . import db as dbmod from . import tm @@ -23,7 +24,7 @@ from .exceptions import CredentialsRequiredError, NoDataFoundError, OsmsgError from .export import summary_markdown, to_csv, to_json, to_parquet, to_psql from .fetch import download_osm_file -from .geofabrik import country_update_url +from .geofabrik import country_geometry, country_update_url from .replication import SHORTCUTS, ChangesetReplication, changefile_download_urls, resolve_url from .ui import info, progress_bar, warn @@ -297,6 +298,10 @@ def run(cfg: RunConfig) -> dict[str, Any]: if cfg.boundary: cfg.changeset = cfg.changeset or not cfg.hashtags geom_wkt = load_boundary(cfg.boundary).wkt + elif cfg.countries: + geoms = [country_geometry(r) for r in cfg.countries] + cfg.changeset = cfg.changeset or not cfg.hashtags + geom_wkt = (unary_union(geoms) if len(geoms) > 1 else geoms[0]).wkt # summary/tm_stats read the changesets table — populate it even if user didn't ask. if (cfg.tm_stats or cfg.summary) and not cfg.changeset and not cfg.hashtags: diff --git a/pyproject.toml b/pyproject.toml index 79dbbec..65d2902 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -108,7 +108,7 @@ quote-style = "double" indent-style = "space" [tool.ty.src] -include = ["osmsg"] +include = ["osmsg", "api"] [tool.coverage.run] source = ["osmsg"] diff --git a/tests/test_api.py b/tests/test_api.py index effa9cd..915cf6e 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -3,10 +3,16 @@ from litestar import Litestar from litestar.testing import TestClient -from osmsg.api import app as api_app -from osmsg.api.app import get_user_stats, health +from api import app as api_app +from api.app import get_user_stats, health +from api.db import PG_SCHEMA as API_PG_SCHEMA +from osmsg.export.psql import PG_SCHEMA as CLI_PG_SCHEMA -api_module = import_module("osmsg.api.app") +api_module = import_module("api.app") + + +def test_pg_schema_is_in_sync_with_cli(): + assert API_PG_SCHEMA == CLI_PG_SCHEMA def test_api_exposes_only_active_public_routes(): diff --git a/tests/test_geofabrik.py b/tests/test_geofabrik.py index 63cc15b..7386e09 100644 --- a/tests/test_geofabrik.py +++ b/tests/test_geofabrik.py @@ -2,10 +2,14 @@ from __future__ import annotations +import json + import pytest +from shapely.geometry import MultiPolygon, Polygon +from osmsg.boundary import load_boundary from osmsg.exceptions import UnknownRegionError -from osmsg.geofabrik import country_update_url, load_index +from osmsg.geofabrik import country_geometry, country_update_url, load_index @pytest.mark.network @@ -26,3 +30,61 @@ def test_load_index_caches_in_memory(): a = load_index() b = load_index() assert a is b + + +@pytest.mark.network +def test_country_geometry_resolves_nepal(): + geom = country_geometry("nepal") + assert isinstance(geom, (Polygon, MultiPolygon)) + minx, miny, maxx, maxy = geom.bounds + assert 80 < minx < 90 and 26 < miny < 31 + assert 86 < maxx < 90 and 28 < maxy < 32 + + +@pytest.mark.network +def test_country_geometry_unknown_region(): + with pytest.raises(UnknownRegionError): + country_geometry("notarealcountry") + + +# --- load_boundary: region-name resolution --- + + +@pytest.mark.network +def test_load_boundary_accepts_region_name(): + geom = load_boundary("nepal") + assert isinstance(geom, (Polygon, MultiPolygon)) + minx, miny, maxx, maxy = geom.bounds + assert 80 < minx < 90 and 26 < miny < 31 + + +@pytest.mark.network +def test_load_boundary_unknown_name_raises(): + with pytest.raises(ValueError, match="not valid JSON"): + load_boundary("notarealcountry") + + +def test_load_boundary_accepts_geojson_file(tmp_path): + feat = { + "type": "Feature", + "geometry": { + "type": "Polygon", + "coordinates": [[[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]]], + }, + "properties": {}, + } + p = tmp_path / "boundary.geojson" + p.write_text(json.dumps(feat)) + geom = load_boundary(str(p)) + assert isinstance(geom, Polygon) + + +def test_load_boundary_accepts_inline_geojson(): + inline = json.dumps( + { + "type": "Polygon", + "coordinates": [[[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]]], + } + ) + geom = load_boundary(inline) + assert isinstance(geom, Polygon) diff --git a/tests/test_handlers.py b/tests/test_handlers.py index b403309..72b81de 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -27,6 +27,9 @@ def _write_changeset_xml(tmp_path, name, changesets): f'num_changes="{cs.get("num_changes", 1)}" user="{cs.get("user", "alice")}" ' f'uid="{cs.get("uid", 10)}" comments_count="0"' ) + if "bbox" in cs: + min_lon, min_lat, max_lon, max_lat = cs["bbox"] + attrs += f' min_lat="{min_lat}" min_lon="{min_lon}" max_lat="{max_lat}" max_lon="{max_lon}"' if is_open: attrs += ' open="true"' else: @@ -655,6 +658,27 @@ def test_changefile_handler_none_valid_changesets_means_no_filter(osc_factory, c assert set(handler.stats.keys()) == {1, 2} +def test_changeset_handler_geom_filter_intersects_not_centroid(tmp_path, changeset_config): + """Border-straddling changesets must be kept (intersects), not silently dropped.""" + from shapely.geometry import box + + boundary = box(0, 0, 10, 10) + changeset_config["geom_filter_wkt"] = boundary.wkt + p = _write_changeset_xml( + tmp_path, + "cs_geom.osm", + [ + {"id": 1, "bbox": (1, 1, 2, 2)}, + {"id": 2, "bbox": (9, 9, 12, 12)}, + {"id": 3, "bbox": (20, 20, 25, 25)}, + {"id": 4}, + ], + ) + h = ChangesetHandler(changeset_config) + h.apply_file(str(p)) + assert set(h.changesets.keys()) == {1, 2} + + @pytest.mark.parametrize("version,expected_bucket", [(1, "c"), (2, "m"), (0, "d")]) def test_changefile_handler_action_dispatch(osc_factory, changefile_config, version, expected_bucket): """version=1 → CREATE, >1 → MODIFY, 0 → DELETE.""" diff --git a/worker-entrypoint.sh b/worker-entrypoint.sh new file mode 100755 index 0000000..b9395b3 --- /dev/null +++ b/worker-entrypoint.sh @@ -0,0 +1,8 @@ +#!/bin/sh +set -e +SCHEDULE="${OSMSG_SCHEDULE:-*/2 * * * *}" +echo "$SCHEDULE /app/.venv/bin/python -m osmsg._tick" > /app/crontab +echo "[worker] schedule: $SCHEDULE" +echo "[worker] initial tick" +/app/.venv/bin/python -m osmsg._tick || echo "[worker] initial tick exit=$?" +exec /usr/local/bin/supercronic /app/crontab From fe8e2c35b82f52d11fd3fbdcbf49b2674e9730b3 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Thu, 7 May 2026 17:38:06 +0200 Subject: [PATCH 21/49] fix(schema): fixes shcmea being in multiple pieces , added test case to catch the change currently its duplicated in api and osmsg to keep api image slim --- api/db.py | 47 ++----------------------- api/pg_schema.py | 43 +++++++++++++++++++++++ osmsg/db/duckdb_schema.py | 43 +++++++++++++++++++++++ osmsg/db/schema.py | 74 ++++----------------------------------- osmsg/export/psql.py | 44 +---------------------- osmsg/pg_schema.py | 43 +++++++++++++++++++++++ tests/test_api.py | 6 ++-- 7 files changed, 141 insertions(+), 159 deletions(-) create mode 100644 api/pg_schema.py create mode 100644 osmsg/db/duckdb_schema.py create mode 100644 osmsg/pg_schema.py diff --git a/api/db.py b/api/db.py index 779f47d..c50dee1 100644 --- a/api/db.py +++ b/api/db.py @@ -3,52 +3,9 @@ import asyncpg from dotenv import load_dotenv -load_dotenv() - +from .pg_schema import PG_SCHEMA -PG_SCHEMA = """ -CREATE TABLE IF NOT EXISTS users ( - uid BIGINT PRIMARY KEY, - username TEXT NOT NULL -); -CREATE TABLE IF NOT EXISTS changesets ( - changeset_id BIGINT PRIMARY KEY, - uid BIGINT NOT NULL REFERENCES users(uid), - created_at TIMESTAMPTZ, - hashtags TEXT[], - editor TEXT, - min_lon DOUBLE PRECISION, - min_lat DOUBLE PRECISION, - max_lon DOUBLE PRECISION, - max_lat DOUBLE PRECISION -); -CREATE INDEX IF NOT EXISTS idx_changesets_created_at ON changesets(created_at); -CREATE TABLE IF NOT EXISTS changeset_stats ( - changeset_id BIGINT NOT NULL REFERENCES changesets(changeset_id), - seq_id BIGINT NOT NULL, - uid BIGINT NOT NULL REFERENCES users(uid), - nodes_created INTEGER DEFAULT 0, - nodes_modified INTEGER DEFAULT 0, - nodes_deleted INTEGER DEFAULT 0, - ways_created INTEGER DEFAULT 0, - ways_modified INTEGER DEFAULT 0, - ways_deleted INTEGER DEFAULT 0, - rels_created INTEGER DEFAULT 0, - rels_modified INTEGER DEFAULT 0, - rels_deleted INTEGER DEFAULT 0, - poi_created INTEGER DEFAULT 0, - poi_modified INTEGER DEFAULT 0, - tag_stats JSONB, - PRIMARY KEY (seq_id, changeset_id) -); -CREATE INDEX IF NOT EXISTS idx_changeset_stats_uid ON changeset_stats(uid); -CREATE TABLE IF NOT EXISTS state ( - source_url TEXT PRIMARY KEY, - last_seq BIGINT NOT NULL, - last_ts TIMESTAMPTZ NOT NULL, - updated_at TIMESTAMPTZ NOT NULL -); -""" +load_dotenv() _pool: asyncpg.Pool | None = None diff --git a/api/pg_schema.py b/api/pg_schema.py new file mode 100644 index 0000000..1afe55e --- /dev/null +++ b/api/pg_schema.py @@ -0,0 +1,43 @@ +PG_SCHEMA = """ +CREATE TABLE IF NOT EXISTS users ( + uid BIGINT PRIMARY KEY, + username TEXT NOT NULL +); +CREATE TABLE IF NOT EXISTS changesets ( + changeset_id BIGINT PRIMARY KEY, + uid BIGINT NOT NULL REFERENCES users(uid), + created_at TIMESTAMPTZ, + hashtags TEXT[], + editor TEXT, + min_lon DOUBLE PRECISION, + min_lat DOUBLE PRECISION, + max_lon DOUBLE PRECISION, + max_lat DOUBLE PRECISION +); +CREATE INDEX IF NOT EXISTS idx_changesets_created_at ON changesets(created_at); +CREATE TABLE IF NOT EXISTS changeset_stats ( + changeset_id BIGINT NOT NULL REFERENCES changesets(changeset_id), + seq_id BIGINT NOT NULL, + uid BIGINT NOT NULL REFERENCES users(uid), + nodes_created INTEGER DEFAULT 0, + nodes_modified INTEGER DEFAULT 0, + nodes_deleted INTEGER DEFAULT 0, + ways_created INTEGER DEFAULT 0, + ways_modified INTEGER DEFAULT 0, + ways_deleted INTEGER DEFAULT 0, + rels_created INTEGER DEFAULT 0, + rels_modified INTEGER DEFAULT 0, + rels_deleted INTEGER DEFAULT 0, + poi_created INTEGER DEFAULT 0, + poi_modified INTEGER DEFAULT 0, + tag_stats JSONB, + PRIMARY KEY (seq_id, changeset_id) +); +CREATE INDEX IF NOT EXISTS idx_changeset_stats_uid ON changeset_stats(uid); +CREATE TABLE IF NOT EXISTS state ( + source_url TEXT PRIMARY KEY, + last_seq BIGINT NOT NULL, + last_ts TIMESTAMPTZ NOT NULL, + updated_at TIMESTAMPTZ NOT NULL +); +""" diff --git a/osmsg/db/duckdb_schema.py b/osmsg/db/duckdb_schema.py new file mode 100644 index 0000000..5140bf7 --- /dev/null +++ b/osmsg/db/duckdb_schema.py @@ -0,0 +1,43 @@ +DUCKDB_SCHEMA = """ +CREATE TABLE IF NOT EXISTS users ( + uid BIGINT PRIMARY KEY, + username VARCHAR NOT NULL +); +CREATE TABLE IF NOT EXISTS changesets ( + changeset_id BIGINT PRIMARY KEY, + uid BIGINT NOT NULL REFERENCES users(uid), + created_at TIMESTAMPTZ, + hashtags VARCHAR[], + editor VARCHAR, + min_lon DOUBLE, + min_lat DOUBLE, + max_lon DOUBLE, + max_lat DOUBLE +); +CREATE INDEX IF NOT EXISTS idx_changesets_created_at ON changesets(created_at); +CREATE TABLE IF NOT EXISTS changeset_stats ( + changeset_id BIGINT NOT NULL REFERENCES changesets(changeset_id), + seq_id BIGINT NOT NULL, + uid BIGINT NOT NULL REFERENCES users(uid), + nodes_created INTEGER DEFAULT 0, + nodes_modified INTEGER DEFAULT 0, + nodes_deleted INTEGER DEFAULT 0, + ways_created INTEGER DEFAULT 0, + ways_modified INTEGER DEFAULT 0, + ways_deleted INTEGER DEFAULT 0, + rels_created INTEGER DEFAULT 0, + rels_modified INTEGER DEFAULT 0, + rels_deleted INTEGER DEFAULT 0, + poi_created INTEGER DEFAULT 0, + poi_modified INTEGER DEFAULT 0, + tag_stats JSON, + PRIMARY KEY (seq_id, changeset_id) +); +CREATE INDEX IF NOT EXISTS idx_changeset_stats_uid ON changeset_stats(uid); +CREATE TABLE IF NOT EXISTS state ( + source_url VARCHAR PRIMARY KEY, + last_seq BIGINT NOT NULL, + last_ts TIMESTAMPTZ NOT NULL, + updated_at TIMESTAMPTZ NOT NULL +); +""" diff --git a/osmsg/db/schema.py b/osmsg/db/schema.py index 1801dad..fb5115b 100644 --- a/osmsg/db/schema.py +++ b/osmsg/db/schema.py @@ -1,16 +1,13 @@ -"""DuckDB schema. Three data tables (`users`, `changesets`, `changeset_stats`) -plus a single-row-per-source `state` table for `--update` resume. Identical -schema works in PostgreSQL via the `psql` exporter.""" - from __future__ import annotations from typing import Any import duckdb +from .duckdb_schema import DUCKDB_SCHEMA + def connect(db_path: str) -> duckdb.DuckDBPyConnection: - """Open a DuckDB connection at `db_path`. Creates the file if absent.""" return duckdb.connect(db_path) @@ -19,71 +16,13 @@ def close(conn: duckdb.DuckDBPyConnection) -> None: def create_tables(conn: duckdb.DuckDBPyConnection) -> None: - """Create all osmsg tables if they don't exist. Idempotent.""" - conn.execute( - """ - CREATE TABLE IF NOT EXISTS users ( - uid BIGINT PRIMARY KEY, - username VARCHAR NOT NULL - ) - """ - ) - conn.execute( - """ - CREATE TABLE IF NOT EXISTS changesets ( - changeset_id BIGINT PRIMARY KEY, - uid BIGINT NOT NULL REFERENCES users(uid), - created_at TIMESTAMPTZ, - hashtags VARCHAR[], - editor VARCHAR, - min_lon DOUBLE, - min_lat DOUBLE, - max_lon DOUBLE, - max_lat DOUBLE - ) - """ - ) - conn.execute("CREATE INDEX IF NOT EXISTS idx_changesets_created_at ON changesets(created_at)") - - conn.execute( - """ - CREATE TABLE IF NOT EXISTS changeset_stats ( - changeset_id BIGINT NOT NULL REFERENCES changesets(changeset_id), - seq_id BIGINT NOT NULL, - uid BIGINT NOT NULL REFERENCES users(uid), - nodes_created INTEGER DEFAULT 0, - nodes_modified INTEGER DEFAULT 0, - nodes_deleted INTEGER DEFAULT 0, - ways_created INTEGER DEFAULT 0, - ways_modified INTEGER DEFAULT 0, - ways_deleted INTEGER DEFAULT 0, - rels_created INTEGER DEFAULT 0, - rels_modified INTEGER DEFAULT 0, - rels_deleted INTEGER DEFAULT 0, - poi_created INTEGER DEFAULT 0, - poi_modified INTEGER DEFAULT 0, - tag_stats JSON, - PRIMARY KEY (seq_id, changeset_id) - ) - """ - ) - conn.execute("CREATE INDEX IF NOT EXISTS idx_changeset_stats_uid ON changeset_stats(uid)") - - # One row per source_url — resume marker, not an audit log. - conn.execute( - """ - CREATE TABLE IF NOT EXISTS state ( - source_url VARCHAR PRIMARY KEY, - last_seq BIGINT NOT NULL, - last_ts TIMESTAMPTZ NOT NULL, - updated_at TIMESTAMPTZ NOT NULL - ) - """ - ) + for stmt in DUCKDB_SCHEMA.strip().split(";"): + stmt = stmt.strip() + if stmt: + conn.execute(stmt) def upsert_state(conn: duckdb.DuckDBPyConnection, *, source_url: str, last_seq: int, last_ts, updated_at) -> None: - """Record (or replace) the resume marker for `source_url`. Single row per URL.""" conn.execute( """ INSERT INTO state (source_url, last_seq, last_ts, updated_at) @@ -98,7 +37,6 @@ def upsert_state(conn: duckdb.DuckDBPyConnection, *, source_url: str, last_seq: def get_state(conn: duckdb.DuckDBPyConnection, source_url: str) -> dict[str, Any] | None: - """Return `{last_seq, last_ts, updated_at}` for `source_url`, or None if unseen.""" row = conn.execute( "SELECT last_seq, last_ts, updated_at FROM state WHERE source_url = ?", [source_url], diff --git a/osmsg/export/psql.py b/osmsg/export/psql.py index bfe86f3..1ef238c 100644 --- a/osmsg/export/psql.py +++ b/osmsg/export/psql.py @@ -10,49 +10,7 @@ import duckdb -PG_SCHEMA = """ -CREATE TABLE IF NOT EXISTS users ( - uid BIGINT PRIMARY KEY, - username TEXT NOT NULL -); -CREATE TABLE IF NOT EXISTS changesets ( - changeset_id BIGINT PRIMARY KEY, - uid BIGINT NOT NULL REFERENCES users(uid), - created_at TIMESTAMPTZ, - hashtags TEXT[], - editor TEXT, - min_lon DOUBLE PRECISION, - min_lat DOUBLE PRECISION, - max_lon DOUBLE PRECISION, - max_lat DOUBLE PRECISION -); -CREATE INDEX IF NOT EXISTS idx_changesets_created_at ON changesets(created_at); -CREATE TABLE IF NOT EXISTS changeset_stats ( - changeset_id BIGINT NOT NULL REFERENCES changesets(changeset_id), - seq_id BIGINT NOT NULL, - uid BIGINT NOT NULL REFERENCES users(uid), - nodes_created INTEGER DEFAULT 0, - nodes_modified INTEGER DEFAULT 0, - nodes_deleted INTEGER DEFAULT 0, - ways_created INTEGER DEFAULT 0, - ways_modified INTEGER DEFAULT 0, - ways_deleted INTEGER DEFAULT 0, - rels_created INTEGER DEFAULT 0, - rels_modified INTEGER DEFAULT 0, - rels_deleted INTEGER DEFAULT 0, - poi_created INTEGER DEFAULT 0, - poi_modified INTEGER DEFAULT 0, - tag_stats JSONB, - PRIMARY KEY (seq_id, changeset_id) -); -CREATE INDEX IF NOT EXISTS idx_changeset_stats_uid ON changeset_stats(uid); -CREATE TABLE IF NOT EXISTS state ( - source_url TEXT PRIMARY KEY, - last_seq BIGINT NOT NULL, - last_ts TIMESTAMPTZ NOT NULL, - updated_at TIMESTAMPTZ NOT NULL -); -""" +from ..pg_schema import PG_SCHEMA def to_psql(conn: duckdb.DuckDBPyConnection, dsn: str) -> None: diff --git a/osmsg/pg_schema.py b/osmsg/pg_schema.py new file mode 100644 index 0000000..1afe55e --- /dev/null +++ b/osmsg/pg_schema.py @@ -0,0 +1,43 @@ +PG_SCHEMA = """ +CREATE TABLE IF NOT EXISTS users ( + uid BIGINT PRIMARY KEY, + username TEXT NOT NULL +); +CREATE TABLE IF NOT EXISTS changesets ( + changeset_id BIGINT PRIMARY KEY, + uid BIGINT NOT NULL REFERENCES users(uid), + created_at TIMESTAMPTZ, + hashtags TEXT[], + editor TEXT, + min_lon DOUBLE PRECISION, + min_lat DOUBLE PRECISION, + max_lon DOUBLE PRECISION, + max_lat DOUBLE PRECISION +); +CREATE INDEX IF NOT EXISTS idx_changesets_created_at ON changesets(created_at); +CREATE TABLE IF NOT EXISTS changeset_stats ( + changeset_id BIGINT NOT NULL REFERENCES changesets(changeset_id), + seq_id BIGINT NOT NULL, + uid BIGINT NOT NULL REFERENCES users(uid), + nodes_created INTEGER DEFAULT 0, + nodes_modified INTEGER DEFAULT 0, + nodes_deleted INTEGER DEFAULT 0, + ways_created INTEGER DEFAULT 0, + ways_modified INTEGER DEFAULT 0, + ways_deleted INTEGER DEFAULT 0, + rels_created INTEGER DEFAULT 0, + rels_modified INTEGER DEFAULT 0, + rels_deleted INTEGER DEFAULT 0, + poi_created INTEGER DEFAULT 0, + poi_modified INTEGER DEFAULT 0, + tag_stats JSONB, + PRIMARY KEY (seq_id, changeset_id) +); +CREATE INDEX IF NOT EXISTS idx_changeset_stats_uid ON changeset_stats(uid); +CREATE TABLE IF NOT EXISTS state ( + source_url TEXT PRIMARY KEY, + last_seq BIGINT NOT NULL, + last_ts TIMESTAMPTZ NOT NULL, + updated_at TIMESTAMPTZ NOT NULL +); +""" diff --git a/tests/test_api.py b/tests/test_api.py index 915cf6e..d680831 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -5,13 +5,13 @@ from api import app as api_app from api.app import get_user_stats, health -from api.db import PG_SCHEMA as API_PG_SCHEMA -from osmsg.export.psql import PG_SCHEMA as CLI_PG_SCHEMA +from api.pg_schema import PG_SCHEMA as API_PG_SCHEMA +from osmsg.pg_schema import PG_SCHEMA as CLI_PG_SCHEMA api_module = import_module("api.app") -def test_pg_schema_is_in_sync_with_cli(): +def test_pg_schema_in_sync(): assert API_PG_SCHEMA == CLI_PG_SCHEMA From 1c5b8a916319c2bf3371ba01bb9721d3cd4f586b Mon Sep 17 00:00:00 2001 From: NirrWorks Date: Thu, 7 May 2026 21:57:31 +0545 Subject: [PATCH 22/49] Normalize API hashtag filters --- api/app.py | 23 +++++++++++++++++++++-- tests/test_api.py | 16 ++++++++++++++-- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/api/app.py b/api/app.py index da457fc..17e5ffa 100644 --- a/api/app.py +++ b/api/app.py @@ -11,6 +11,24 @@ from .queries import fetch_user_stats +def normalize_hashtags(hashtag: list[str] | None) -> list[str] | None: + if not hashtag: + return None + + normalized: list[str] = [] + seen: set[str] = set() + for value in hashtag: + cleaned = value.strip() + if not cleaned: + continue + cleaned = "#" + cleaned.lstrip("#") + key = cleaned.lower() + if key not in seen: + normalized.append(cleaned) + seen.add(key) + return normalized or None + + @asynccontextmanager async def lifespan(app: Litestar): await open_pool() @@ -37,12 +55,13 @@ async def get_user_stats( if start >= end: raise HTTPException(status_code=400, detail="start must be before end") - users = await fetch_user_stats(start=start, end=end, hashtag=hashtag, limit=limit, offset=offset) + normalized_hashtag = normalize_hashtags(hashtag) + users = await fetch_user_stats(start=start, end=end, hashtag=normalized_hashtag, limit=limit, offset=offset) return { "count": len(users), "start": start.isoformat(), "end": end.isoformat(), - "hashtag": hashtag, + "hashtag": normalized_hashtag, "limit": limit, "offset": offset, "users": users, diff --git a/tests/test_api.py b/tests/test_api.py index d680831..8f2350d 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -4,7 +4,7 @@ from litestar.testing import TestClient from api import app as api_app -from api.app import get_user_stats, health +from api.app import get_user_stats, health, normalize_hashtags from api.pg_schema import PG_SCHEMA as API_PG_SCHEMA from osmsg.pg_schema import PG_SCHEMA as CLI_PG_SCHEMA @@ -32,6 +32,18 @@ def test_health_endpoint_returns_ok(): assert response.json() == {"status": "ok"} +def test_normalize_hashtags_accepts_bare_or_prefixed_values(): + assert normalize_hashtags(["maproulette", "#HOTOSM", " #roads ", ""]) == [ + "#maproulette", + "#HOTOSM", + "#roads", + ] + + +def test_normalize_hashtags_dedupes_case_insensitively(): + assert normalize_hashtags(["maproulette", "#MapRoulette", "#roads"]) == ["#maproulette", "#roads"] + + def test_user_stats_endpoint_returns_expected_response(monkeypatch): async def fake_fetch_user_stats(*, start, end, hashtag, limit, offset): assert start.isoformat() == "2026-05-01T00:00:00+00:00" @@ -69,7 +81,7 @@ async def fake_fetch_user_stats(*, start, end, hashtag, limit, offset): params=[ ("start", "2026-05-01T00:00:00Z"), ("end", "2026-05-02T00:00:00Z"), - ("hashtag", "#mapathon"), + ("hashtag", "mapathon"), ("hashtag", "#roads"), ("limit", "1"), ], From 3100a651a863923ee24f630df515b78fecfc1718 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Thu, 7 May 2026 18:18:32 +0200 Subject: [PATCH 23/49] fix(caddy): adds caddy server and fix for the api rendering on 80 port --- .env.example | 12 +--- api/app.py | 28 ++++---- api/templates/home.html | 27 ++++++++ docker-compose.yml | 18 ++++- docs/infra.md | 143 +++++++++++++++++----------------------- infra/.env.example | 13 ++++ infra/Caddyfile | 3 + infra/osmsg.service | 19 ++++++ 8 files changed, 155 insertions(+), 108 deletions(-) create mode 100644 api/templates/home.html create mode 100644 infra/.env.example create mode 100644 infra/Caddyfile create mode 100644 infra/osmsg.service diff --git a/.env.example b/.env.example index c8e576c..f394bfb 100644 --- a/.env.example +++ b/.env.example @@ -1,12 +1,4 @@ +# Local dev — database for running osmsg CLI or the API standalone DATABASE_URL=postgresql://osmsg:osmsg@localhost:5432/osmsg -# Worker (docker compose) — leave blank for planet/minute defaults. -# OSMSG_NAME=nepal -# OSMSG_URL=minute # minute/hour/day or full URL; ignored when OSMSG_COUNTRY is set -# OSMSG_COUNTRY=nepal -# OSMSG_BOOTSTRAP=day -# OSMSG_BOOTSTRAP_DAYS= # alternative to OSMSG_BOOTSTRAP for arbitrary day counts -# OSMSG_BOUNDARY= # path to GeoJSON; overrides auto-derived country geometry -# OSMSG_SCHEDULE=0 * * * * -# OSM_USERNAME= -# OSM_PASSWORD= +# For self-hosting with docker compose, copy infra/.env.example instead diff --git a/api/app.py b/api/app.py index da457fc..7a53a52 100644 --- a/api/app.py +++ b/api/app.py @@ -1,15 +1,21 @@ from contextlib import asynccontextmanager from datetime import datetime +from pathlib import Path from typing import Any from litestar import Litestar, get +from litestar.contrib.jinja import JinjaTemplateEngine from litestar.exceptions import HTTPException from litestar.openapi.config import OpenAPIConfig from litestar.params import Parameter +from litestar.response import Template +from litestar.template.config import TemplateConfig from .db import close_pool, ensure_schema, open_pool from .queries import fetch_user_stats +TEMPLATES = Path(__file__).parent / "templates" + @asynccontextmanager async def lifespan(app: Litestar): @@ -21,6 +27,11 @@ async def lifespan(app: Litestar): await close_pool() +@get("/", include_in_schema=False) +async def home() -> Template: + return Template("home.html") + + @get("/health") async def health() -> dict[str, str]: return {"status": "ok"} @@ -49,22 +60,9 @@ async def get_user_stats( } -# @get("/api/v1/stats/summary") -# async def get_summary(start_date: datetime, end_date: datetime, hashtag: str | None = None) -> dict: -# if start_date > end_date: -# return {"error": "start_date must be before end_date"} -# return {"message": "Temporarily disabled"} - - -# @get("/api/v1/stats/timeseries") -# async def get_timeseries(start_date: datetime, end_date: datetime, hashtag: str | None = None) -> dict: -# if start_date > end_date: -# return {"error": "start_date must be before end_date"} -# return {"message": "Temporarily disabled"} - - app = Litestar( - route_handlers=[health, get_user_stats], + route_handlers=[home, health, get_user_stats], lifespan=[lifespan], openapi_config=OpenAPIConfig(title="OSMSG API", version="1.0.0", path="/docs"), + template_config=TemplateConfig(directory=TEMPLATES, engine=JinjaTemplateEngine), # ty: ignore[invalid-argument-type] ) diff --git a/api/templates/home.html b/api/templates/home.html new file mode 100644 index 0000000..133ad39 --- /dev/null +++ b/api/templates/home.html @@ -0,0 +1,27 @@ + + + + + + OSMSG API + + + +

OSMSG

+

OpenStreetMap contributor stats API

+
    +
  • API docs (Swagger)
  • +
  • GET /api/v1/user-stats : contributor stats by date range
  • +
  • GET /health : health check
  • +
+ + diff --git a/docker-compose.yml b/docker-compose.yml index 5eb75f4..ccf33f3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -22,8 +22,6 @@ services: target: api environment: DATABASE_URL: postgresql://osmsg:osmsg@db:5432/osmsg - ports: - - "8000:8000" depends_on: db: condition: service_healthy @@ -52,7 +50,23 @@ services: condition: service_healthy restart: unless-stopped + caddy: + image: caddy:2-alpine + ports: + - "80:80" + - "443:443" + - "443:443/udp" + volumes: + - ./infra/Caddyfile:/etc/caddy/Caddyfile:ro + - caddy-data:/data + - caddy-config:/config + depends_on: + - api + restart: unless-stopped + volumes: pgdata: osmsg-data: osmsg-cache: + caddy-data: + caddy-config: diff --git a/docs/infra.md b/docs/infra.md index 57d675f..0296469 100644 --- a/docs/infra.md +++ b/docs/infra.md @@ -7,80 +7,75 @@ This guide covers running osmsg continuously on a server: a Postgres database, a | Service | Image target | Role | | --- | --- | --- | | `db` | `postgres:17-alpine` | Persistent stats store | -| `api` | `Dockerfile → api` | Litestar REST API at `:8000` | +| `api` | `Dockerfile → api` | Litestar REST API (internal, port 8000) | | `worker` | `Dockerfile → worker` | osmsg cron worker (supercronic) | +| `caddy` | `caddy:2-alpine` | Reverse proxy — HTTP/HTTPS termination | The worker bootstraps on first run (no existing state → `--last `) and switches to `--update` automatically on subsequent ticks. -## Quick start : planet, every 2 minutes +## Quick start ```bash docker compose up -d -curl 'http://localhost:8000/health' -curl 'http://localhost:8000/api/v1/user-stats?start=2026-05-07T00:00:00Z&end=2026-05-08T00:00:00Z&limit=20' +curl 'http://localhost/health' +curl 'http://localhost/api/v1/user-stats?start=2026-05-07T00:00:00Z&end=2026-05-08T00:00:00Z&limit=20' ``` -No `.env` needed : defaults are planet replication, `*/2 * * * *` schedule, bootstrap from last hour. +No config needed: defaults are planet replication, `*/2 * * * *` schedule, bootstrap from last hour. -## Country mode (Geofabrik) +## Configuration -Create a `.env` file (copy `.env.example` and edit): +All deployment environment variables live in `infra/.env.example`. +Copy it to `/opt/osmsg/.env` and edit: ```bash -OSMSG_NAME=nepal -OSMSG_COUNTRY=nepal # any Geofabrik region id -OSMSG_BOOTSTRAP=day # first-run window -OSMSG_SCHEDULE=0 * * * * # hourly -OSM_USERNAME=you -OSM_PASSWORD=secret -``` - -Then: - -```bash -docker compose up -d +cp infra/.env.example /opt/osmsg/.env +$EDITOR /opt/osmsg/.env ``` -The worker fetches Nepal-specific replication diffs from Geofabrik. -Changesets are filtered to those whose bounding box intersects the Nepal polygon (auto-derived from the Geofabrik index). -A custom boundary GeoJSON can override this via `OSMSG_BOUNDARY`. - -### Geofabrik credentials - -Geofabrik sub-daily replication uses your OSM account credentials directly via OAuth 2.0. -Set `OSM_USERNAME` and `OSM_PASSWORD` in `.env` — no browser opt-in or separate Geofabrik registration required. - -## Environment variables - -All variables are optional; defaults target the planet at minute granularity. - | Variable | Default | Notes | | --- | --- | --- | +| `OSMSG_DOMAIN` | `localhost` | Caddy server name — set to your domain for automatic HTTPS | | `OSMSG_NAME` | `stats` | DuckDB / output file basename | | `OSMSG_URL` | `minute` | `minute`/`hour`/`day` shortcut or full replication URL. Ignored when `OSMSG_COUNTRY` is set | | `OSMSG_COUNTRY` | _unset_ | Geofabrik region id (e.g. `nepal`). Needs `OSM_USERNAME`/`OSM_PASSWORD` | | `OSMSG_BOOTSTRAP` | `hour` | First-run window: `hour`/`day`/`week`/`month`/`year` | | `OSMSG_BOOTSTRAP_DAYS` | _unset_ | Exact day count for first-run bootstrap (alternative to `OSMSG_BOOTSTRAP`) | -| `OSMSG_BOUNDARY` | _unset_ | Path to a GeoJSON file. Overrides auto-derived country geometry | +| `OSMSG_BOUNDARY` | _unset_ | GeoJSON path or Geofabrik region name — overrides auto-derived country geometry | | `OSMSG_SCHEDULE` | `*/2 * * * *` | supercronic cron expression for the worker tick | -| `DATABASE_URL` | (compose default) | libpq DSN; worker mirrors each tick to Postgres | | `OSM_USERNAME` | _unset_ | OSM account username (Geofabrik auth) | | `OSM_PASSWORD` | _unset_ | OSM account password (Geofabrik auth) | -## API endpoints +### Geofabrik credentials + +Geofabrik sub-daily replication uses your OSM account credentials directly via OAuth 2.0. +Set `OSM_USERNAME` and `OSM_PASSWORD` — no browser opt-in or separate Geofabrik registration required. + +## Country mode example +```bash +# infra/.env (or /opt/osmsg/.env on the server) +OSMSG_NAME=nepal +OSMSG_COUNTRY=nepal +OSMSG_BOOTSTRAP=day +OSMSG_SCHEDULE=0 * * * * +OSM_USERNAME=you +OSM_PASSWORD=secret ``` -GET /health -GET /api/v1/user-stats?start=&end=[&hashtag=][&limit=N][&offset=N] -GET /docs (Swagger UI) + +```bash +docker compose up -d ``` +The worker fetches Nepal-specific replication diffs from Geofabrik. +Changesets are filtered to those whose bounding box intersects the Nepal polygon. +Override with a GeoJSON file or a Geofabrik region name via `OSMSG_BOUNDARY`. + ## Populate all-time stats (backfill) -For a long historical backfill, run osmsg directly before starting the continuous worker. -The worker will resume from where the backfill left off. +Run osmsg directly before starting the continuous worker; it will resume from where the backfill left off. -**Example : all Nepal stats since 2012:** +**All Nepal stats since 2012 — then keep updating:** ```bash docker compose up -d db # start only the database @@ -93,62 +88,38 @@ docker compose run --rm worker python -m osmsg \ --format psql \ --psql-dsn "postgresql://osmsg:osmsg@db:5432/osmsg" -docker compose up -d # start api + worker; worker resumes from last backfill seq +docker compose up -d # api + worker resume from last backfill seq ``` -The `state` table records the last processed sequence per source URL. -When the worker starts, it detects existing state and switches to `--update` automatically. - -**Example : last 90 days then keep refreshing:** +**Last 90 days then keep refreshing:** ```bash OSMSG_BOOTSTRAP_DAYS=90 docker compose up -d ``` -## Run the API standalone (without compose) - -Push stats into Postgres first, then start litestar: +## API endpoints -```bash -uv run osmsg --last day --format psql --psql-dsn "$DATABASE_URL" --name api_last_day -uv run --group api litestar --app api.app:app run --host 0.0.0.0 --port 8000 +```text +GET /health +GET /api/v1/user-stats?start=&end=[&hashtag=][&limit=N][&offset=N] +GET /docs (Swagger UI) ``` ## Run as a systemd service -Drop the project on the server and let systemd manage the compose stack across reboots. - -**1. Place files:** +**1. Place files on the server:** ```bash mkdir -p /opt/osmsg -cp docker-compose.yml Dockerfile worker-entrypoint.sh /opt/osmsg/ -cp .env.example /opt/osmsg/.env -# edit /opt/osmsg/.env with your values +cp -r docker-compose.yml Dockerfile infra worker-entrypoint.sh /opt/osmsg/ +cp infra/.env.example /opt/osmsg/.env +$EDITOR /opt/osmsg/.env # set OSMSG_DOMAIN and other vars ``` -**2. Create the unit file** at `/etc/systemd/system/osmsg.service`: - -```ini -[Unit] -Description=osmsg stats stack -Requires=docker.service -After=docker.service network-online.target -Wants=network-online.target - -[Service] -Type=simple -Restart=on-failure -RestartSec=10 -WorkingDirectory=/opt/osmsg -EnvironmentFile=/opt/osmsg/.env -ExecStart=/usr/bin/docker compose up -ExecStop=/usr/bin/docker compose down -TimeoutStartSec=300 -TimeoutStopSec=60 - -[Install] -WantedBy=multi-user.target +**2. Install the unit file:** + +```bash +cp infra/osmsg.service /etc/systemd/system/osmsg.service ``` **3. Enable and start:** @@ -162,7 +133,7 @@ systemctl enable --now osmsg ```bash systemctl status osmsg -journalctl -u osmsg -f # follow logs (all three containers) +journalctl -u osmsg -f # follow logs (all containers) systemctl restart osmsg # pick up .env changes systemctl stop osmsg # brings the full stack down cleanly ``` @@ -171,6 +142,15 @@ systemctl stop osmsg # brings the full stack down cleanly > Docker Compose inherits them, so `${OSMSG_COUNTRY}` and friends resolve without a separate > `--env-file` flag. +## Run the API standalone (without compose) + +Push stats into Postgres first, then start litestar: + +```bash +uv run osmsg --last day --format psql --psql-dsn "$DATABASE_URL" --name api_last_day +uv run --group api litestar --app api.app:app run --host 0.0.0.0 --port 8000 +``` + ## Volumes | Volume | Mount | Contents | @@ -178,3 +158,4 @@ systemctl stop osmsg # brings the full stack down cleanly | `pgdata` | `/var/lib/postgresql/data` | Postgres data directory | | `osmsg-data` | `/var/lib/osmsg` | DuckDB state files + parquet output | | `osmsg-cache` | `/var/cache/osmsg` | Downloaded replication diff cache | +| `caddy-data` | `/data` | Caddy TLS certificates | diff --git a/infra/.env.example b/infra/.env.example new file mode 100644 index 0000000..c43c2cf --- /dev/null +++ b/infra/.env.example @@ -0,0 +1,13 @@ +# Caddy — set to your domain for automatic HTTPS +# OSMSG_DOMAIN=stats.example.com + +# Worker — leave blank for planet/minute defaults +# OSMSG_NAME=nepal +# OSMSG_URL=minute # minute/hour/day or full URL; ignored when OSMSG_COUNTRY is set +# OSMSG_COUNTRY=nepal +# OSMSG_BOOTSTRAP=day +# OSMSG_BOOTSTRAP_DAYS= # exact day count; alternative to OSMSG_BOOTSTRAP +# OSMSG_BOUNDARY= # GeoJSON path or Geofabrik region name; overrides OSMSG_COUNTRY geometry +# OSMSG_SCHEDULE=0 * * * * +# OSM_USERNAME= +# OSM_PASSWORD= diff --git a/infra/Caddyfile b/infra/Caddyfile new file mode 100644 index 0000000..a301726 --- /dev/null +++ b/infra/Caddyfile @@ -0,0 +1,3 @@ +{$OSMSG_DOMAIN:localhost} { + reverse_proxy api:8000 +} diff --git a/infra/osmsg.service b/infra/osmsg.service new file mode 100644 index 0000000..1a4d2e7 --- /dev/null +++ b/infra/osmsg.service @@ -0,0 +1,19 @@ +[Unit] +Description=osmsg stats stack +Requires=docker.service +After=docker.service network-online.target +Wants=network-online.target + +[Service] +Type=simple +Restart=on-failure +RestartSec=10 +WorkingDirectory=/opt/osmsg +EnvironmentFile=/opt/osmsg/.env +ExecStart=/usr/bin/docker compose up +ExecStop=/usr/bin/docker compose down +TimeoutStartSec=300 +TimeoutStopSec=60 + +[Install] +WantedBy=multi-user.target From 81f5642704808f25a79efd03c68695464d59f1fd Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Thu, 7 May 2026 18:46:58 +0200 Subject: [PATCH 24/49] fix(docker): docker compose prod cluster adds infra service and fixes tick not to run parquet when psql is active --- docker-compose.yml | 18 +---- docs/infra.md | 163 ++++++++++++++++++--------------------- infra/docker-compose.yml | 66 ++++++++++++++++ infra/osmsg.service | 4 +- osmsg/_tick.py | 13 ++-- 5 files changed, 152 insertions(+), 112 deletions(-) create mode 100644 infra/docker-compose.yml diff --git a/docker-compose.yml b/docker-compose.yml index ccf33f3..5eb75f4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -22,6 +22,8 @@ services: target: api environment: DATABASE_URL: postgresql://osmsg:osmsg@db:5432/osmsg + ports: + - "8000:8000" depends_on: db: condition: service_healthy @@ -50,23 +52,7 @@ services: condition: service_healthy restart: unless-stopped - caddy: - image: caddy:2-alpine - ports: - - "80:80" - - "443:443" - - "443:443/udp" - volumes: - - ./infra/Caddyfile:/etc/caddy/Caddyfile:ro - - caddy-data:/data - - caddy-config:/config - depends_on: - - api - restart: unless-stopped - volumes: pgdata: osmsg-data: osmsg-cache: - caddy-data: - caddy-config: diff --git a/docs/infra.md b/docs/infra.md index 0296469..40c41f0 100644 --- a/docs/infra.md +++ b/docs/infra.md @@ -1,84 +1,111 @@ # Self-hosting osmsg -This guide covers running osmsg continuously on a server: a Postgres database, a Litestar REST API, and a worker that keeps OSM stats refreshed on a cron schedule. +This guide covers running osmsg continuously on a server. -## Stack overview +## Two compose files -| Service | Image target | Role | +| File | Purpose | Images | | --- | --- | --- | -| `db` | `postgres:17-alpine` | Persistent stats store | -| `api` | `Dockerfile → api` | Litestar REST API (internal, port 8000) | -| `worker` | `Dockerfile → worker` | osmsg cron worker (supercronic) | -| `caddy` | `caddy:2-alpine` | Reverse proxy — HTTP/HTTPS termination | +| `docker-compose.yml` | Local development | Built from source | +| `infra/docker-compose.yml` | Production / server | Pulled from GHCR | -The worker bootstraps on first run (no existing state → `--last `) and switches to `--update` automatically on subsequent ticks. +The production compose adds Caddy for HTTPS termination and pulls pre-built images -## Quick start +## Local development ```bash docker compose up -d -curl 'http://localhost/health' -curl 'http://localhost/api/v1/user-stats?start=2026-05-07T00:00:00Z&end=2026-05-08T00:00:00Z&limit=20' +curl 'http://localhost:8000/health' ``` -No config needed: defaults are planet replication, `*/2 * * * *` schedule, bootstrap from last hour. +The API is available on port 8000 directly. No config needed: defaults to planet replication, +`*/2 * * * *` schedule, bootstrap from last hour. + +## Production deployment + +### Stack + +| Service | Image | Role | +| --- | --- | --- | +| `db` | `postgres:17-alpine` | Persistent stats store | +| `api` | `ghcr.io/osgeonepal/osmsg-api:latest` | Litestar REST API | +| `worker` | `ghcr.io/osgeonepal/osmsg-worker:latest` | osmsg cron worker | +| `caddy` | `caddy:2-alpine` | HTTPS reverse proxy | -## Configuration +### Configuration -All deployment environment variables live in `infra/.env.example`. -Copy it to `/opt/osmsg/.env` and edit: +Copy `infra/.env.example` and edit: ```bash -cp infra/.env.example /opt/osmsg/.env -$EDITOR /opt/osmsg/.env +cp infra/.env.example infra/.env +$EDITOR infra/.env ``` | Variable | Default | Notes | | --- | --- | --- | -| `OSMSG_DOMAIN` | `localhost` | Caddy server name — set to your domain for automatic HTTPS | +| `OSMSG_DOMAIN` | `localhost` | Your domain — enables automatic HTTPS via Caddy | | `OSMSG_NAME` | `stats` | DuckDB / output file basename | -| `OSMSG_URL` | `minute` | `minute`/`hour`/`day` shortcut or full replication URL. Ignored when `OSMSG_COUNTRY` is set | +| `OSMSG_URL` | `minute` | `minute`/`hour`/`day` or full replication URL. Ignored when `OSMSG_COUNTRY` is set | | `OSMSG_COUNTRY` | _unset_ | Geofabrik region id (e.g. `nepal`). Needs `OSM_USERNAME`/`OSM_PASSWORD` | | `OSMSG_BOOTSTRAP` | `hour` | First-run window: `hour`/`day`/`week`/`month`/`year` | -| `OSMSG_BOOTSTRAP_DAYS` | _unset_ | Exact day count for first-run bootstrap (alternative to `OSMSG_BOOTSTRAP`) | -| `OSMSG_BOUNDARY` | _unset_ | GeoJSON path or Geofabrik region name — overrides auto-derived country geometry | -| `OSMSG_SCHEDULE` | `*/2 * * * *` | supercronic cron expression for the worker tick | +| `OSMSG_BOOTSTRAP_DAYS` | _unset_ | Exact day count for bootstrap (alternative to `OSMSG_BOOTSTRAP`) | +| `OSMSG_BOUNDARY` | _unset_ | GeoJSON path or Geofabrik region name — overrides country geometry | +| `OSMSG_SCHEDULE` | `*/2 * * * *` | supercronic cron expression | | `OSM_USERNAME` | _unset_ | OSM account username (Geofabrik auth) | | `OSM_PASSWORD` | _unset_ | OSM account password (Geofabrik auth) | -### Geofabrik credentials +Geofabrik sub-daily replication uses your OSM credentials directly — no browser opt-in required. + +### Start + +```bash +cd infra +docker compose up -d +curl 'http://localhost/health' +``` + +Set `OSMSG_DOMAIN` to your server's hostname for automatic HTTPS. + +## Run as a systemd service -Geofabrik sub-daily replication uses your OSM account credentials directly via OAuth 2.0. -Set `OSM_USERNAME` and `OSM_PASSWORD` — no browser opt-in or separate Geofabrik registration required. +Only the `infra/` directory needs to be on the server — no source code or build tools required. -## Country mode example +**1. Place files:** ```bash -# infra/.env (or /opt/osmsg/.env on the server) -OSMSG_NAME=nepal -OSMSG_COUNTRY=nepal -OSMSG_BOOTSTRAP=day -OSMSG_SCHEDULE=0 * * * * -OSM_USERNAME=you -OSM_PASSWORD=secret +mkdir -p /opt/osmsg/infra +cp infra/docker-compose.yml infra/Caddyfile infra/osmsg.service /opt/osmsg/infra/ +cp infra/.env.example /opt/osmsg/infra/.env +$EDITOR /opt/osmsg/infra/.env ``` +**2. Install and enable:** + ```bash -docker compose up -d +cp /opt/osmsg/infra/osmsg.service /etc/systemd/system/osmsg.service +systemctl daemon-reload +systemctl enable --now osmsg ``` -The worker fetches Nepal-specific replication diffs from Geofabrik. -Changesets are filtered to those whose bounding box intersects the Nepal polygon. -Override with a GeoJSON file or a Geofabrik region name via `OSMSG_BOUNDARY`. +**Useful commands:** + +```bash +systemctl status osmsg +journalctl -u osmsg -f # follow logs from all containers +systemctl restart osmsg # pick up .env changes +systemctl stop osmsg # graceful shutdown +``` ## Populate all-time stats (backfill) -Run osmsg directly before starting the continuous worker; it will resume from where the backfill left off. +Run the worker once with a date range before starting the continuous service. +The worker detects existing state and resumes from `--update` automatically on next ticks. -**All Nepal stats since 2012 — then keep updating:** +**Nepal stats since 2012:** ```bash -docker compose up -d db # start only the database +cd infra +docker compose up -d db docker compose run --rm worker python -m osmsg \ --name nepal \ @@ -88,7 +115,7 @@ docker compose run --rm worker python -m osmsg \ --format psql \ --psql-dsn "postgresql://osmsg:osmsg@db:5432/osmsg" -docker compose up -d # api + worker resume from last backfill seq +docker compose up -d ``` **Last 90 days then keep refreshing:** @@ -100,52 +127,14 @@ OSMSG_BOOTSTRAP_DAYS=90 docker compose up -d ## API endpoints ```text +GET / GET /health GET /api/v1/user-stats?start=&end=[&hashtag=][&limit=N][&offset=N] -GET /docs (Swagger UI) +GET /docs ``` -## Run as a systemd service - -**1. Place files on the server:** - -```bash -mkdir -p /opt/osmsg -cp -r docker-compose.yml Dockerfile infra worker-entrypoint.sh /opt/osmsg/ -cp infra/.env.example /opt/osmsg/.env -$EDITOR /opt/osmsg/.env # set OSMSG_DOMAIN and other vars -``` - -**2. Install the unit file:** - -```bash -cp infra/osmsg.service /etc/systemd/system/osmsg.service -``` - -**3. Enable and start:** - -```bash -systemctl daemon-reload -systemctl enable --now osmsg -``` - -**Useful commands:** - -```bash -systemctl status osmsg -journalctl -u osmsg -f # follow logs (all containers) -systemctl restart osmsg # pick up .env changes -systemctl stop osmsg # brings the full stack down cleanly -``` - -> `EnvironmentFile=/opt/osmsg/.env` loads your env vars into the service environment. -> Docker Compose inherits them, so `${OSMSG_COUNTRY}` and friends resolve without a separate -> `--env-file` flag. - ## Run the API standalone (without compose) -Push stats into Postgres first, then start litestar: - ```bash uv run osmsg --last day --format psql --psql-dsn "$DATABASE_URL" --name api_last_day uv run --group api litestar --app api.app:app run --host 0.0.0.0 --port 8000 @@ -153,9 +142,9 @@ uv run --group api litestar --app api.app:app run --host 0.0.0.0 --port 8000 ## Volumes -| Volume | Mount | Contents | -| --- | --- | --- | -| `pgdata` | `/var/lib/postgresql/data` | Postgres data directory | -| `osmsg-data` | `/var/lib/osmsg` | DuckDB state files + parquet output | -| `osmsg-cache` | `/var/cache/osmsg` | Downloaded replication diff cache | -| `caddy-data` | `/data` | Caddy TLS certificates | +| Volume | Contents | +| --- | --- | +| `pgdata` | Postgres data | +| `osmsg-data` | DuckDB state files + parquet output | +| `osmsg-cache` | Downloaded replication diff cache | +| `caddy-data` | TLS certificates | diff --git a/infra/docker-compose.yml b/infra/docker-compose.yml new file mode 100644 index 0000000..ec90e39 --- /dev/null +++ b/infra/docker-compose.yml @@ -0,0 +1,66 @@ +services: + db: + image: postgres:17-alpine + environment: + POSTGRES_USER: osmsg + POSTGRES_PASSWORD: osmsg + POSTGRES_DB: osmsg + volumes: + - pgdata:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U osmsg -d osmsg"] + interval: 5s + timeout: 3s + retries: 10 + restart: unless-stopped + + api: + image: ghcr.io/osgeonepal/osmsg-api:latest + environment: + DATABASE_URL: postgresql://osmsg:osmsg@db:5432/osmsg + depends_on: + db: + condition: service_healthy + restart: unless-stopped + + worker: + image: ghcr.io/osgeonepal/osmsg-worker:latest + environment: + DATABASE_URL: postgresql://osmsg:osmsg@db:5432/osmsg + OSMSG_NAME: ${OSMSG_NAME:-stats} + OSMSG_URL: ${OSMSG_URL:-minute} + OSMSG_COUNTRY: ${OSMSG_COUNTRY:-} + OSMSG_BOOTSTRAP: ${OSMSG_BOOTSTRAP:-hour} + OSMSG_BOOTSTRAP_DAYS: ${OSMSG_BOOTSTRAP_DAYS:-} + OSMSG_BOUNDARY: ${OSMSG_BOUNDARY:-} + OSMSG_SCHEDULE: ${OSMSG_SCHEDULE:-*/2 * * * *} + OSM_USERNAME: ${OSM_USERNAME:-} + OSM_PASSWORD: ${OSM_PASSWORD:-} + volumes: + - osmsg-data:/var/lib/osmsg + - osmsg-cache:/var/cache/osmsg + depends_on: + db: + condition: service_healthy + restart: unless-stopped + + caddy: + image: caddy:2-alpine + ports: + - "80:80" + - "443:443" + - "443:443/udp" + volumes: + - ./Caddyfile:/etc/caddy/Caddyfile:ro + - caddy-data:/data + - caddy-config:/config + depends_on: + - api + restart: unless-stopped + +volumes: + pgdata: + osmsg-data: + osmsg-cache: + caddy-data: + caddy-config: diff --git a/infra/osmsg.service b/infra/osmsg.service index 1a4d2e7..4a1d68c 100644 --- a/infra/osmsg.service +++ b/infra/osmsg.service @@ -8,8 +8,8 @@ Wants=network-online.target Type=simple Restart=on-failure RestartSec=10 -WorkingDirectory=/opt/osmsg -EnvironmentFile=/opt/osmsg/.env +WorkingDirectory=/opt/osmsg/infra +EnvironmentFile=/opt/osmsg/infra/.env ExecStart=/usr/bin/docker compose up ExecStop=/usr/bin/docker compose down TimeoutStartSec=300 diff --git a/osmsg/_tick.py b/osmsg/_tick.py index 242fd45..ada17e0 100644 --- a/osmsg/_tick.py +++ b/osmsg/_tick.py @@ -17,11 +17,10 @@ def _has_state(db_path: Path, source_url: str) -> bool: if not db_path.exists(): return False conn = connect(str(db_path)) - try: - create_tables(conn) - return get_state(conn, source_url) is not None - finally: - conn.close() + create_tables(conn) + result = get_state(conn, source_url) is not None + conn.close() + return result def main() -> int: @@ -55,8 +54,6 @@ def main() -> int: str(out), "--cache-dir", str(cache), - "--format", - "parquet", ] if country: cmd.extend(["--country", country]) @@ -68,6 +65,8 @@ def main() -> int: cmd.extend(["--boundary", boundary]) if psql_dsn: cmd.extend(["--format", "psql", "--psql-dsn", psql_dsn]) + else: + cmd.extend(["--format", "parquet"]) if _has_state(db_path, source_url): cmd.append("--update") From c12be3ce81a71bea9ba71aa22ffc06755295ab64 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Thu, 7 May 2026 19:09:52 +0200 Subject: [PATCH 25/49] fix(docker): resource limit in docker compose --- docs/infra.md | 3 +++ infra/docker-compose.yml | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/docs/infra.md b/docs/infra.md index 40c41f0..e2909a3 100644 --- a/docs/infra.md +++ b/docs/infra.md @@ -77,6 +77,9 @@ mkdir -p /opt/osmsg/infra cp infra/docker-compose.yml infra/Caddyfile infra/osmsg.service /opt/osmsg/infra/ cp infra/.env.example /opt/osmsg/infra/.env $EDITOR /opt/osmsg/infra/.env + +# The pgdata Docker volume binds to /mnt — create the directory first +mkdir -p /mnt/osmsg/pgdata ``` **2. Install and enable:** diff --git a/infra/docker-compose.yml b/infra/docker-compose.yml index ec90e39..f090a87 100644 --- a/infra/docker-compose.yml +++ b/infra/docker-compose.yml @@ -5,6 +5,12 @@ services: POSTGRES_USER: osmsg POSTGRES_PASSWORD: osmsg POSTGRES_DB: osmsg + command: > + postgres + -c shared_buffers=128MB + -c effective_cache_size=256MB + -c work_mem=4MB + -c maintenance_work_mem=32MB volumes: - pgdata:/var/lib/postgresql/data healthcheck: @@ -12,6 +18,13 @@ services: interval: 5s timeout: 3s retries: 10 + deploy: + resources: + limits: + cpus: "0.30" + memory: 300M + reservations: + memory: 128M restart: unless-stopped api: @@ -21,6 +34,13 @@ services: depends_on: db: condition: service_healthy + deploy: + resources: + limits: + cpus: "0.15" + memory: 128M + reservations: + memory: 64M restart: unless-stopped worker: @@ -42,6 +62,13 @@ services: depends_on: db: condition: service_healthy + deploy: + resources: + limits: + cpus: "0.50" + memory: 400M + reservations: + memory: 128M restart: unless-stopped caddy: @@ -56,10 +83,22 @@ services: - caddy-config:/config depends_on: - api + deploy: + resources: + limits: + cpus: "0.10" + memory: 64M + reservations: + memory: 32M restart: unless-stopped volumes: pgdata: + driver: local + driver_opts: + type: none + o: bind + device: /mnt/mnt/osmsg/pgdata osmsg-data: osmsg-cache: caddy-data: From 2fa5b762df3b52fe80933c20702663165e929d87 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Thu, 7 May 2026 19:33:26 +0200 Subject: [PATCH 26/49] fix(docker): caddy add env variable to caddy for the domain configuration --- infra/docker-compose.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/infra/docker-compose.yml b/infra/docker-compose.yml index f090a87..73421e5 100644 --- a/infra/docker-compose.yml +++ b/infra/docker-compose.yml @@ -73,6 +73,8 @@ services: caddy: image: caddy:2-alpine + env_file: + - .env ports: - "80:80" - "443:443" From 606c8f66a8f9dec06f6f8112c24af8873efccb17 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Thu, 7 May 2026 19:45:03 +0200 Subject: [PATCH 27/49] fix(health): patch health endpoint to include the last sequence and updated at --- api/app.py | 11 ++++++++--- api/queries.py | 8 ++++++++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/api/app.py b/api/app.py index 0447bb3..fba56a8 100644 --- a/api/app.py +++ b/api/app.py @@ -12,7 +12,7 @@ from litestar.template.config import TemplateConfig from .db import close_pool, ensure_schema, open_pool -from .queries import fetch_user_stats +from .queries import fetch_state, fetch_user_stats TEMPLATES = Path(__file__).parent / "templates" @@ -51,8 +51,13 @@ async def home() -> Template: @get("/health") -async def health() -> dict[str, str]: - return {"status": "ok"} +async def health() -> dict[str, Any]: + state = await fetch_state() + return { + "status": "ok", + "last_seq": state["last_seq"] if state else None, + "last_updated": state["last_ts"].isoformat() if state else None, + } @get("/api/v1/user-stats") diff --git a/api/queries.py b/api/queries.py index 9e8579d..9383d16 100644 --- a/api/queries.py +++ b/api/queries.py @@ -80,6 +80,14 @@ def _user_stats_sql(*, filter_hashtags: bool) -> str: """ +async def fetch_state() -> dict[str, Any] | None: + async with get_pool().acquire() as conn: + row = await conn.fetchrow("SELECT last_seq, last_ts, updated_at FROM state ORDER BY updated_at DESC LIMIT 1") + if row is None: + return None + return dict(row) + + async def fetch_user_stats( *, start: datetime, From 61bb58c9d909f6cd26b97f7d591b1e91129a1b29 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Thu, 7 May 2026 19:48:52 +0200 Subject: [PATCH 28/49] fix(url): api url arg default start end --- api/app.py | 14 ++++++++------ api/queries.py | 38 +++++++++++++++++++++++++------------- 2 files changed, 33 insertions(+), 19 deletions(-) diff --git a/api/app.py b/api/app.py index fba56a8..657660d 100644 --- a/api/app.py +++ b/api/app.py @@ -1,5 +1,5 @@ from contextlib import asynccontextmanager -from datetime import datetime +from datetime import UTC, datetime from pathlib import Path from typing import Any @@ -62,21 +62,23 @@ async def health() -> dict[str, Any]: @get("/api/v1/user-stats") async def get_user_stats( - start: datetime, - end: datetime, + start: datetime | None = None, + end: datetime | None = None, hashtag: list[str] | None = None, limit: int = Parameter(default=100, ge=1, le=1000), offset: int = Parameter(default=0, ge=0), ) -> dict[str, Any]: - if start >= end: + start = start or (datetime.min.replace(tzinfo=UTC) if end else None) + end = end or (datetime.now(tz=UTC) if start else None) + if start and end and start >= end: raise HTTPException(status_code=400, detail="start must be before end") normalized_hashtag = normalize_hashtags(hashtag) users = await fetch_user_stats(start=start, end=end, hashtag=normalized_hashtag, limit=limit, offset=offset) return { "count": len(users), - "start": start.isoformat(), - "end": end.isoformat(), + "start": start.isoformat() if start else None, + "end": end.isoformat() if end else None, "hashtag": normalized_hashtag, "limit": limit, "offset": offset, diff --git a/api/queries.py b/api/queries.py index 9383d16..e741f36 100644 --- a/api/queries.py +++ b/api/queries.py @@ -4,25 +4,34 @@ from .db import get_pool -def _user_stats_sql(*, filter_hashtags: bool) -> str: - changeset_filters = ["created_at >= $1", "created_at < $2"] +def _user_stats_sql(*, filter_dates: bool, filter_hashtags: bool) -> str: + n = 1 + changeset_filters: list[str] = [] + + if filter_dates: + changeset_filters.append(f"created_at >= ${n}") + n += 1 + changeset_filters.append(f"created_at < ${n}") + n += 1 + if filter_hashtags: - changeset_filters.append("hashtags && $3::TEXT[]") - limit_param = "$4" - offset_param = "$5" + changeset_filters.append(f"hashtags && ${n}::TEXT[]") + n += 1 enable_unfiltered_fallback = "FALSE" else: - limit_param = "$3" - offset_param = "$4" enable_unfiltered_fallback = "TRUE" - changeset_where = " AND ".join(changeset_filters) + limit_param = f"${n}" + n += 1 + offset_param = f"${n}" + + changeset_where = f"WHERE {' AND '.join(changeset_filters)}" if changeset_filters else "" return f""" WITH filtered_changesets AS ( SELECT changeset_id FROM changesets - WHERE {changeset_where} + {changeset_where} ), matching_stats AS ( SELECT st.* @@ -90,15 +99,18 @@ async def fetch_state() -> dict[str, Any] | None: async def fetch_user_stats( *, - start: datetime, - end: datetime, + start: datetime | None = None, + end: datetime | None = None, hashtag: list[str] | None = None, limit: int = 100, offset: int = 0, ) -> list[dict[str, Any]]: + filter_dates = start is not None and end is not None filter_hashtags = bool(hashtag) - sql = _user_stats_sql(filter_hashtags=filter_hashtags) - params: list[Any] = [start, end] + sql = _user_stats_sql(filter_dates=filter_dates, filter_hashtags=filter_hashtags) + params: list[Any] = [] + if filter_dates: + params.extend([start, end]) if filter_hashtags: params.append(hashtag) params.extend([limit, offset]) From af5356faaf42a93247aba8dd5539c9d171d7f460 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Thu, 7 May 2026 20:19:55 +0200 Subject: [PATCH 29/49] fix(test): don't wait for fetch state to be there --- api/app.py | 5 ++++- tests/test_api.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/api/app.py b/api/app.py index 657660d..6cf4941 100644 --- a/api/app.py +++ b/api/app.py @@ -52,7 +52,10 @@ async def home() -> Template: @get("/health") async def health() -> dict[str, Any]: - state = await fetch_state() + try: + state = await fetch_state() + except Exception: + state = None return { "status": "ok", "last_seq": state["last_seq"] if state else None, diff --git a/tests/test_api.py b/tests/test_api.py index 8f2350d..cc461ae 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -29,7 +29,10 @@ def test_health_endpoint_returns_ok(): response = client.get("/health") assert response.status_code == 200 - assert response.json() == {"status": "ok"} + data = response.json() + assert data["status"] == "ok" + assert data["last_seq"] is None + assert data["last_updated"] is None def test_normalize_hashtags_accepts_bare_or_prefixed_values(): From 361d160c1393ecb463dc82a156df55d9570ad274 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Thu, 7 May 2026 21:16:39 +0200 Subject: [PATCH 30/49] fix(validation): pydantic arg validation and docs with swagger --- Dockerfile | 3 +- api/app.py | 74 ++++++++++------------------------------- api/routers/__init__.py | 0 api/routers/v1.py | 60 +++++++++++++++++++++++++++++++++ api/schemas.py | 38 +++++++++++++++++++++ api/templates/home.html | 6 ++-- docker-compose.yml | 3 ++ pyproject.toml | 2 +- tests/test_api.py | 30 ++++++++--------- uv.lock | 44 ++++++++++++++++++++++-- 10 files changed, 181 insertions(+), 79 deletions(-) create mode 100644 api/routers/__init__.py create mode 100644 api/routers/v1.py create mode 100644 api/schemas.py diff --git a/Dockerfile b/Dockerfile index 0e68b31..20e77d4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -36,7 +36,7 @@ ENV UV_COMPILE_BYTECODE=1 \ RUN --mount=type=cache,target=/root/.cache/uv \ uv venv /app/.venv && \ uv pip install --python /app/.venv \ - "litestar[standard]>=2.18.0" \ + "litestar[standard,pydantic]>=2.18.0" \ "asyncpg>=0.30.0" \ "python-dotenv>=1.2.2" @@ -69,6 +69,7 @@ ENV PATH="/app/.venv/bin:$PATH" \ EXPOSE 8000 ENTRYPOINT ["/app/.venv/bin/litestar", "--app", "api.app:app", "run", "--host", "0.0.0.0", "--port", "8000"] +CMD [] FROM python:3.13-slim AS worker diff --git a/api/app.py b/api/app.py index 6cf4941..fe705d6 100644 --- a/api/app.py +++ b/api/app.py @@ -1,40 +1,21 @@ from contextlib import asynccontextmanager -from datetime import UTC, datetime from pathlib import Path -from typing import Any from litestar import Litestar, get from litestar.contrib.jinja import JinjaTemplateEngine -from litestar.exceptions import HTTPException from litestar.openapi.config import OpenAPIConfig -from litestar.params import Parameter +from litestar.openapi.plugins import SwaggerRenderPlugin from litestar.response import Template from litestar.template.config import TemplateConfig from .db import close_pool, ensure_schema, open_pool -from .queries import fetch_state, fetch_user_stats +from .queries import fetch_state +from .routers.v1 import v1_router +from .schemas import HealthResponse TEMPLATES = Path(__file__).parent / "templates" -def normalize_hashtags(hashtag: list[str] | None) -> list[str] | None: - if not hashtag: - return None - - normalized: list[str] = [] - seen: set[str] = set() - for value in hashtag: - cleaned = value.strip() - if not cleaned: - continue - cleaned = "#" + cleaned.lstrip("#") - key = cleaned.lower() - if key not in seen: - normalized.append(cleaned) - seen.add(key) - return normalized or None - - @asynccontextmanager async def lifespan(app: Litestar): await open_pool() @@ -51,47 +32,26 @@ async def home() -> Template: @get("/health") -async def health() -> dict[str, Any]: +async def health() -> HealthResponse: try: state = await fetch_state() except Exception: state = None - return { - "status": "ok", - "last_seq": state["last_seq"] if state else None, - "last_updated": state["last_ts"].isoformat() if state else None, - } - - -@get("/api/v1/user-stats") -async def get_user_stats( - start: datetime | None = None, - end: datetime | None = None, - hashtag: list[str] | None = None, - limit: int = Parameter(default=100, ge=1, le=1000), - offset: int = Parameter(default=0, ge=0), -) -> dict[str, Any]: - start = start or (datetime.min.replace(tzinfo=UTC) if end else None) - end = end or (datetime.now(tz=UTC) if start else None) - if start and end and start >= end: - raise HTTPException(status_code=400, detail="start must be before end") - - normalized_hashtag = normalize_hashtags(hashtag) - users = await fetch_user_stats(start=start, end=end, hashtag=normalized_hashtag, limit=limit, offset=offset) - return { - "count": len(users), - "start": start.isoformat() if start else None, - "end": end.isoformat() if end else None, - "hashtag": normalized_hashtag, - "limit": limit, - "offset": offset, - "users": users, - } + return HealthResponse( + status="ok", + last_seq=state["last_seq"] if state else None, + last_updated=state["last_ts"] if state else None, + ) app = Litestar( - route_handlers=[home, health, get_user_stats], + route_handlers=[home, health, v1_router], lifespan=[lifespan], - openapi_config=OpenAPIConfig(title="OSMSG API", version="1.0.0", path="/docs"), + openapi_config=OpenAPIConfig( + title="OSMSG API", + version="1.0.0", + path="/docs", + render_plugins=[SwaggerRenderPlugin()], + ), template_config=TemplateConfig(directory=TEMPLATES, engine=JinjaTemplateEngine), # ty: ignore[invalid-argument-type] ) diff --git a/api/routers/__init__.py b/api/routers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/api/routers/v1.py b/api/routers/v1.py new file mode 100644 index 0000000..7fb445d --- /dev/null +++ b/api/routers/v1.py @@ -0,0 +1,60 @@ +from datetime import UTC, datetime + +from litestar import Controller, Router, get +from litestar.exceptions import HTTPException +from litestar.params import Parameter + +from ..queries import fetch_user_stats +from ..schemas import UserStat, UserStatsResponse + + +def normalize_hashtags(hashtag: list[str] | None) -> list[str] | None: + if not hashtag: + return None + + normalized: list[str] = [] + seen: set[str] = set() + for value in hashtag: + cleaned = value.strip() + if not cleaned: + continue + cleaned = "#" + cleaned.lstrip("#") + key = cleaned.lower() + if key not in seen: + normalized.append(cleaned) + seen.add(key) + return normalized or None + + +class StatsController(Controller): + path = "/stats" + + @get() + async def get_user_stats( + self, + start: datetime | None = None, + end: datetime | None = None, + hashtag: list[str] | None = None, + limit: int = Parameter(default=100, ge=1, le=1000), + offset: int = Parameter(default=0, ge=0), + ) -> UserStatsResponse: + start = start or (datetime.min.replace(tzinfo=UTC) if end else None) + end = end or (datetime.now(tz=UTC) if start else None) + if start and end and start >= end: + raise HTTPException(status_code=400, detail="start must be before end") + + normalized_hashtag = normalize_hashtags(hashtag) + rows = await fetch_user_stats(start=start, end=end, hashtag=normalized_hashtag, limit=limit, offset=offset) + users = [UserStat(**row) for row in rows] + return UserStatsResponse( + count=len(users), + start=start, + end=end, + hashtag=normalized_hashtag, + limit=limit, + offset=offset, + users=users, + ) + + +v1_router = Router(path="/api/v1", route_handlers=[StatsController]) diff --git a/api/schemas.py b/api/schemas.py new file mode 100644 index 0000000..2dbbeea --- /dev/null +++ b/api/schemas.py @@ -0,0 +1,38 @@ +from datetime import datetime + +from pydantic import BaseModel + + +class UserStat(BaseModel): + uid: int + name: str + changesets: int + nodes_create: int + nodes_modify: int + nodes_delete: int + ways_create: int + ways_modify: int + ways_delete: int + rels_create: int + rels_modify: int + rels_delete: int + poi_create: int + poi_modify: int + map_changes: int + rank: int + + +class UserStatsResponse(BaseModel): + count: int + start: datetime | None + end: datetime | None + hashtag: list[str] | None + limit: int + offset: int + users: list[UserStat] + + +class HealthResponse(BaseModel): + status: str + last_seq: int | None + last_updated: datetime | None diff --git a/api/templates/home.html b/api/templates/home.html index 133ad39..3137e29 100644 --- a/api/templates/home.html +++ b/api/templates/home.html @@ -19,9 +19,9 @@

OSMSG

OpenStreetMap contributor stats API

diff --git a/docker-compose.yml b/docker-compose.yml index 5eb75f4..f437110 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -24,6 +24,9 @@ services: DATABASE_URL: postgresql://osmsg:osmsg@db:5432/osmsg ports: - "8000:8000" + volumes: + - ./api:/app/api + command: ["--reload"] depends_on: db: condition: service_healthy diff --git a/pyproject.toml b/pyproject.toml index 65d2902..b961a2e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,7 +52,7 @@ module-root = "" [dependency-groups] api = [ "asyncpg>=0.30.0", - "litestar[standard]>=2.18.0", + "litestar[standard,pydantic]>=2.18.0", ] dev = [ "pytest>=7.4.2", diff --git a/tests/test_api.py b/tests/test_api.py index cc461ae..221d34e 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -4,11 +4,12 @@ from litestar.testing import TestClient from api import app as api_app -from api.app import get_user_stats, health, normalize_hashtags +from api.app import health from api.pg_schema import PG_SCHEMA as API_PG_SCHEMA +from api.routers.v1 import normalize_hashtags, v1_router from osmsg.pg_schema import PG_SCHEMA as CLI_PG_SCHEMA -api_module = import_module("api.app") +v1_module = import_module("api.routers.v1") def test_pg_schema_in_sync(): @@ -19,7 +20,7 @@ def test_api_exposes_only_active_public_routes(): paths = {route.path for route in api_app.routes} assert "/health" in paths - assert "/api/v1/user-stats" in paths + assert "/api/v1/stats" in paths assert "/api/v1/stats/summary" not in paths assert "/api/v1/stats/timeseries" not in paths @@ -47,6 +48,11 @@ def test_normalize_hashtags_dedupes_case_insensitively(): assert normalize_hashtags(["maproulette", "#MapRoulette", "#roads"]) == ["#maproulette", "#roads"] +def _stats_app(monkeypatch, fake_fetch): + monkeypatch.setattr(v1_module, "fetch_user_stats", fake_fetch) + return Litestar(route_handlers=[v1_router]) + + def test_user_stats_endpoint_returns_expected_response(monkeypatch): async def fake_fetch_user_stats(*, start, end, hashtag, limit, offset): assert start.isoformat() == "2026-05-01T00:00:00+00:00" @@ -75,12 +81,9 @@ async def fake_fetch_user_stats(*, start, end, hashtag, limit, offset): } ] - monkeypatch.setattr(api_module, "fetch_user_stats", fake_fetch_user_stats) - app = Litestar(route_handlers=[get_user_stats]) - - with TestClient(app) as client: + with TestClient(_stats_app(monkeypatch, fake_fetch_user_stats)) as client: response = client.get( - "/api/v1/user-stats", + "/api/v1/stats", params=[ ("start", "2026-05-01T00:00:00Z"), ("end", "2026-05-02T00:00:00Z"), @@ -93,8 +96,8 @@ async def fake_fetch_user_stats(*, start, end, hashtag, limit, offset): assert response.status_code == 200 assert response.json() == { "count": 1, - "start": "2026-05-01T00:00:00+00:00", - "end": "2026-05-02T00:00:00+00:00", + "start": "2026-05-01T00:00:00Z", + "end": "2026-05-02T00:00:00Z", "hashtag": ["#mapathon", "#roads"], "limit": 1, "offset": 0, @@ -125,12 +128,9 @@ def test_user_stats_endpoint_rejects_invalid_date_range(monkeypatch): async def fake_fetch_user_stats(**kwargs): raise AssertionError("fetch_user_stats should not be called") - monkeypatch.setattr(api_module, "fetch_user_stats", fake_fetch_user_stats) - app = Litestar(route_handlers=[get_user_stats]) - - with TestClient(app) as client: + with TestClient(_stats_app(monkeypatch, fake_fetch_user_stats)) as client: response = client.get( - "/api/v1/user-stats", + "/api/v1/stats", params={"start": "2026-05-02T00:00:00Z", "end": "2026-05-01T00:00:00Z"}, ) diff --git a/uv.lock b/uv.lock index b97be2c..56e9883 100644 --- a/uv.lock +++ b/uv.lock @@ -342,6 +342,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" }, ] +[[package]] +name = "dnspython" +version = "2.8.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8c/8b/57666417c0f90f08bcafa776861060426765fdb422eb10212086fb811d26/dnspython-2.8.0.tar.gz", hash = "sha256:181d3c6996452cb1189c4046c61599b84a5a86e099562ffde77d26984ff26d0f", size = 368251, upload-time = "2025-09-07T18:58:00.022Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ba/5a/18ad964b0086c6e62e2e7500f7edc89e3faa45033c71c1893d34eed2b2de/dnspython-2.8.0-py3-none-any.whl", hash = "sha256:01d9bbc4a2d76bf0db7c1f729812ded6d912bd318d3b1cf81d30c0f845dbf3af", size = 331094, upload-time = "2025-09-07T18:57:58.071Z" }, +] + [[package]] name = "duckdb" version = "1.5.2" @@ -387,6 +396,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/96/fd/a40c621ff207f3ce8e484aa0fc8ba4eb6e3ecf52e15b42ba764b457a9550/editorconfig-0.17.1-py3-none-any.whl", hash = "sha256:1eda9c2c0db8c16dbd50111b710572a5e6de934e39772de1959d41f64fc17c82", size = 16360, upload-time = "2025-06-09T08:21:35.654Z" }, ] +[[package]] +name = "email-validator" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "dnspython" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f5/22/900cb125c76b7aaa450ce02fd727f452243f2e91a61af068b40adba60ea9/email_validator-2.3.0.tar.gz", hash = "sha256:9fc05c37f2f6cf439ff414f8fc46d917929974a82244c20eb10231ba60c54426", size = 51238, upload-time = "2025-08-26T13:09:06.831Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/de/15/545e2b6cf2e3be84bc1ed85613edd75b8aea69807a71c26f4ca6a9258e82/email_validator-2.3.0-py3-none-any.whl", hash = "sha256:80f13f623413e6b197ae73bb10bf4eb0908faf509ad8362c5edeb0be7fd450b4", size = 35604, upload-time = "2025-08-26T13:09:05.858Z" }, +] + [[package]] name = "faker" version = "40.15.0" @@ -558,6 +580,11 @@ wheels = [ ] [package.optional-dependencies] +pydantic = [ + { name = "email-validator" }, + { name = "pydantic" }, + { name = "pydantic-extra-types" }, +] standard = [ { name = "jinja2" }, { name = "jsbeautifier" }, @@ -999,7 +1026,7 @@ dependencies = [ [package.dev-dependencies] api = [ { name = "asyncpg" }, - { name = "litestar", extra = ["standard"] }, + { name = "litestar", extra = ["pydantic", "standard"] }, ] dev = [ { name = "commitizen" }, @@ -1030,7 +1057,7 @@ requires-dist = [ [package.metadata.requires-dev] api = [ { name = "asyncpg", specifier = ">=0.30.0" }, - { name = "litestar", extras = ["standard"], specifier = ">=2.18.0" }, + { name = "litestar", extras = ["standard", "pydantic"], specifier = ">=2.18.0" }, ] dev = [ { name = "commitizen", specifier = ">=4.13.10" }, @@ -1277,6 +1304,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/07/0f/1c34a74c8d07136f0d729ffe5e1fdab04fbdaa7684f61a92f92511a84a15/pydantic_core-2.46.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:b00b76f7142fc60c762ce579bd29c8fa44aaa56592dd3c54fab3928d0d4ca6ff", size = 2184144, upload-time = "2026-04-20T14:42:57Z" }, ] +[[package]] +name = "pydantic-extra-types" +version = "2.11.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/71/dba38ee2651f84f7842206adbd2233d8bbdb59fb85e9fa14232486a8c471/pydantic_extra_types-2.11.1.tar.gz", hash = "sha256:46792d2307383859e923d8fcefa82108b1a141f8a9c0198982b3832ab5ef1049", size = 172002, upload-time = "2026-03-16T08:08:03.92Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/17/c1/3226e6d7f5a4f736f38ac11a6fbb262d701889802595cdb0f53a885ac2e0/pydantic_extra_types-2.11.1-py3-none-any.whl", hash = "sha256:1722ea2bddae5628ace25f2aa685b69978ef533123e5638cfbddb999e0100ec1", size = 79526, upload-time = "2026-03-16T08:08:02.533Z" }, +] + [[package]] name = "pygments" version = "2.19.2" From ffc850d3fbc16106e947321ee8037243ec3e1294 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Thu, 7 May 2026 23:17:36 +0200 Subject: [PATCH 31/49] refactor(alltags): refactors all tags and schema changes lat lon to geom in the changesets tables --- api/pg_schema.py | 7 +-- api/queries.py | 52 +++++++++++++++-- api/routers/v1.py | 11 +++- api/schemas.py | 8 +++ docker-compose.yml | 12 ++-- docs/Manual.md | 6 +- docs/infra.md | 37 +++++++++---- infra/.env.example | 24 +++++--- infra/docker-compose.yml | 10 ++-- osmsg/_tick.py | 50 +++++++---------- osmsg/cli.py | 13 +++-- osmsg/db/duckdb_schema.py | 5 +- osmsg/db/ingest.py | 5 +- osmsg/db/queries.py | 31 +++++------ osmsg/db/schema.py | 1 + osmsg/export/markdown.py | 4 +- osmsg/export/psql.py | 21 +++---- osmsg/handlers.py | 2 +- osmsg/pg_schema.py | 7 +-- osmsg/pipeline.py | 22 +++----- tests/conftest.py | 8 +-- tests/test_api.py | 50 ++++++++++++++++- tests/test_cli.py | 4 +- tests/test_db_queries.py | 22 +++++--- tests/test_export.py | 2 +- tests/test_handlers.py | 2 +- tests/test_psql_export.py | 98 ++++++++++++++++++++++++++++++--- tests/test_stats_correctness.py | 2 +- 28 files changed, 354 insertions(+), 162 deletions(-) diff --git a/api/pg_schema.py b/api/pg_schema.py index 1afe55e..178670a 100644 --- a/api/pg_schema.py +++ b/api/pg_schema.py @@ -1,4 +1,5 @@ PG_SCHEMA = """ +CREATE EXTENSION IF NOT EXISTS postgis; CREATE TABLE IF NOT EXISTS users ( uid BIGINT PRIMARY KEY, username TEXT NOT NULL @@ -9,12 +10,10 @@ created_at TIMESTAMPTZ, hashtags TEXT[], editor TEXT, - min_lon DOUBLE PRECISION, - min_lat DOUBLE PRECISION, - max_lon DOUBLE PRECISION, - max_lat DOUBLE PRECISION + geom GEOMETRY(POLYGON) ); CREATE INDEX IF NOT EXISTS idx_changesets_created_at ON changesets(created_at); +CREATE INDEX IF NOT EXISTS idx_changesets_geom ON changesets USING GIST (geom); CREATE TABLE IF NOT EXISTS changeset_stats ( changeset_id BIGINT NOT NULL REFERENCES changesets(changeset_id), seq_id BIGINT NOT NULL, diff --git a/api/queries.py b/api/queries.py index e741f36..c804bbe 100644 --- a/api/queries.py +++ b/api/queries.py @@ -3,8 +3,42 @@ from .db import get_pool +_TAG_CTES = """, + tag_agg AS ( + SELECT + st.uid, + tk.key AS tag_key, + tv.key AS tag_val, + SUM(COALESCE((tv.value->>'c')::bigint, 0)) AS total_c, + SUM(COALESCE((tv.value->>'m')::bigint, 0)) AS total_m, + SUM((tv.value->>'len')::double precision) AS total_len + FROM stats_scope st + JOIN LATERAL jsonb_each(st.tag_stats) tk ON st.tag_stats IS NOT NULL + JOIN LATERAL jsonb_each(tk.value) tv ON true + GROUP BY st.uid, tk.key, tv.key + ), + tag_per_key AS ( + SELECT + uid, + tag_key, + jsonb_object_agg( + tag_val, + CASE WHEN total_len IS NOT NULL + THEN jsonb_build_object('c', total_c, 'm', total_m, 'len', total_len) + ELSE jsonb_build_object('c', total_c, 'm', total_m) + END + ) AS tag_vals + FROM tag_agg + GROUP BY uid, tag_key + ), + tag_per_user AS ( + SELECT uid, jsonb_object_agg(tag_key, tag_vals) AS tag_stats + FROM tag_per_key + GROUP BY uid + )""" -def _user_stats_sql(*, filter_dates: bool, filter_hashtags: bool) -> str: + +def _user_stats_sql(*, filter_dates: bool, filter_hashtags: bool, include_tags: bool) -> str: n = 1 changeset_filters: list[str] = [] @@ -27,6 +61,11 @@ def _user_stats_sql(*, filter_dates: bool, filter_hashtags: bool) -> str: changeset_where = f"WHERE {' AND '.join(changeset_filters)}" if changeset_filters else "" + tag_ctes = _TAG_CTES if include_tags else "" + tag_select = "tpu.tag_stats" if include_tags else "NULL::jsonb AS tag_stats" + tag_join = "LEFT JOIN tag_per_user tpu ON tpu.uid = u.uid" if include_tags else "" + tag_group = ", tpu.tag_stats" if include_tags else "" + return f""" WITH filtered_changesets AS ( SELECT changeset_id @@ -45,7 +84,7 @@ def _user_stats_sql(*, filter_dates: bool, filter_hashtags: bool) -> str: FROM changeset_stats st WHERE {enable_unfiltered_fallback} AND NOT EXISTS (SELECT 1 FROM matching_stats) - ) + ){tag_ctes} SELECT u.uid, u.username AS name, @@ -80,10 +119,12 @@ def _user_stats_sql(*, filter_dates: bool, filter_hashtags: bool) -> str: 0 ) DESC, u.uid ASC - ) AS rank + ) AS rank, + {tag_select} FROM users u JOIN stats_scope st ON u.uid = st.uid - GROUP BY u.uid, u.username + {tag_join} + GROUP BY u.uid, u.username{tag_group} ORDER BY map_changes DESC, u.uid ASC LIMIT {limit_param} OFFSET {offset_param} """ @@ -102,12 +143,13 @@ async def fetch_user_stats( start: datetime | None = None, end: datetime | None = None, hashtag: list[str] | None = None, + tags: bool = True, limit: int = 100, offset: int = 0, ) -> list[dict[str, Any]]: filter_dates = start is not None and end is not None filter_hashtags = bool(hashtag) - sql = _user_stats_sql(filter_dates=filter_dates, filter_hashtags=filter_hashtags) + sql = _user_stats_sql(filter_dates=filter_dates, filter_hashtags=filter_hashtags, include_tags=tags) params: list[Any] = [] if filter_dates: params.extend([start, end]) diff --git a/api/routers/v1.py b/api/routers/v1.py index 7fb445d..15a1be2 100644 --- a/api/routers/v1.py +++ b/api/routers/v1.py @@ -35,6 +35,7 @@ async def get_user_stats( start: datetime | None = None, end: datetime | None = None, hashtag: list[str] | None = None, + tags: bool = True, limit: int = Parameter(default=100, ge=1, le=1000), offset: int = Parameter(default=0, ge=0), ) -> UserStatsResponse: @@ -44,13 +45,21 @@ async def get_user_stats( raise HTTPException(status_code=400, detail="start must be before end") normalized_hashtag = normalize_hashtags(hashtag) - rows = await fetch_user_stats(start=start, end=end, hashtag=normalized_hashtag, limit=limit, offset=offset) + rows = await fetch_user_stats( + start=start, + end=end, + hashtag=normalized_hashtag, + tags=tags, + limit=limit, + offset=offset, + ) users = [UserStat(**row) for row in rows] return UserStatsResponse( count=len(users), start=start, end=end, hashtag=normalized_hashtag, + tags=tags, limit=limit, offset=offset, users=users, diff --git a/api/schemas.py b/api/schemas.py index 2dbbeea..631b2ec 100644 --- a/api/schemas.py +++ b/api/schemas.py @@ -3,6 +3,12 @@ from pydantic import BaseModel +class TagValueStats(BaseModel): + c: int = 0 + m: int = 0 + len: float | None = None + + class UserStat(BaseModel): uid: int name: str @@ -20,6 +26,7 @@ class UserStat(BaseModel): poi_modify: int map_changes: int rank: int + tag_stats: dict[str, dict[str, TagValueStats]] | None = None class UserStatsResponse(BaseModel): @@ -27,6 +34,7 @@ class UserStatsResponse(BaseModel): start: datetime | None end: datetime | None hashtag: list[str] | None + tags: bool limit: int offset: int users: list[UserStat] diff --git a/docker-compose.yml b/docker-compose.yml index f437110..d70587d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,6 +1,7 @@ services: db: - image: postgres:17-alpine + image: postgis/postgis:17-3.5-alpine + platform: linux/amd64 environment: POSTGRES_USER: osmsg POSTGRES_PASSWORD: osmsg @@ -8,7 +9,7 @@ services: volumes: - pgdata:/var/lib/postgresql/data ports: - - "5432:5432" + - "${OSMSG_DB_PORT:-5432}:5432" healthcheck: test: ["CMD-SHELL", "pg_isready -U osmsg -d osmsg"] interval: 5s @@ -38,15 +39,12 @@ services: target: worker environment: DATABASE_URL: postgresql://osmsg:osmsg@db:5432/osmsg - OSMSG_NAME: ${OSMSG_NAME:-stats} - OSMSG_URL: ${OSMSG_URL:-minute} - OSMSG_COUNTRY: ${OSMSG_COUNTRY:-} + OSMSG_SCHEDULE: ${OSMSG_SCHEDULE:-*/2 * * * *} OSMSG_BOOTSTRAP: ${OSMSG_BOOTSTRAP:-hour} OSMSG_BOOTSTRAP_DAYS: ${OSMSG_BOOTSTRAP_DAYS:-} - OSMSG_BOUNDARY: ${OSMSG_BOUNDARY:-} - OSMSG_SCHEDULE: ${OSMSG_SCHEDULE:-*/2 * * * *} OSM_USERNAME: ${OSM_USERNAME:-} OSM_PASSWORD: ${OSM_PASSWORD:-} + OSMSG_EXTRA_ARGS: ${OSMSG_EXTRA_ARGS:---name stats --output-dir /var/lib/osmsg --cache-dir /var/cache/osmsg --url minute --format psql --psql-dsn postgresql://osmsg:osmsg@db:5432/osmsg} volumes: - osmsg-data:/var/lib/osmsg - osmsg-cache:/var/cache/osmsg diff --git a/docs/Manual.md b/docs/Manual.md index 876bc74..bd418db 100644 --- a/docs/Manual.md +++ b/docs/Manual.md @@ -56,8 +56,8 @@ osmsg --boundary '{"type":"Polygon",...}' # inline GeoJSON string ```bash osmsg --tags building --tags highway # per-key create/modify counts osmsg --length highway --length waterway # length in metres for created ways -osmsg --all-tags # every tag key -osmsg --all-tags --key-value # also key=value combos +osmsg --keys # every tag key (no value breakdown) +osmsg --all # every key=value combo + changeset metadata (hashtags, editors) ``` ## Output @@ -130,7 +130,7 @@ osmsg --hashtags smforst --days 6 --summary --tm-stats # Full year of global stats to Postgres (incremental-friendly) osmsg --start "2025-01-01 00:00:00" --end "2026-01-01 00:00:00" \ - --url day --all-tags -f parquet -f psql \ + --url day --all -f parquet -f psql \ --psql-dsn "host=localhost dbname=osm_stats user=osm" # All-time Nepal stats via planet/day (Geofabrik only keeps ~4 months per country) diff --git a/docs/infra.md b/docs/infra.md index e2909a3..bde9641 100644 --- a/docs/infra.md +++ b/docs/infra.md @@ -44,15 +44,15 @@ $EDITOR infra/.env | Variable | Default | Notes | | --- | --- | --- | | `OSMSG_DOMAIN` | `localhost` | Your domain — enables automatic HTTPS via Caddy | -| `OSMSG_NAME` | `stats` | DuckDB / output file basename | -| `OSMSG_URL` | `minute` | `minute`/`hour`/`day` or full replication URL. Ignored when `OSMSG_COUNTRY` is set | -| `OSMSG_COUNTRY` | _unset_ | Geofabrik region id (e.g. `nepal`). Needs `OSM_USERNAME`/`OSM_PASSWORD` | -| `OSMSG_BOOTSTRAP` | `hour` | First-run window: `hour`/`day`/`week`/`month`/`year` | -| `OSMSG_BOOTSTRAP_DAYS` | _unset_ | Exact day count for bootstrap (alternative to `OSMSG_BOOTSTRAP`) | -| `OSMSG_BOUNDARY` | _unset_ | GeoJSON path or Geofabrik region name — overrides country geometry | | `OSMSG_SCHEDULE` | `*/2 * * * *` | supercronic cron expression | -| `OSM_USERNAME` | _unset_ | OSM account username (Geofabrik auth) | -| `OSM_PASSWORD` | _unset_ | OSM account password (Geofabrik auth) | +| `OSMSG_BOOTSTRAP` | `hour` | First-run window: `hour`/`day`/`week`/`month`/`year` | +| `OSMSG_BOOTSTRAP_DAYS` | _unset_ | Exact day count for first run (alternative to `OSMSG_BOOTSTRAP`) | +| `OSM_USERNAME` | _unset_ | OSM account username (required for Geofabrik country replication) | +| `OSM_PASSWORD` | _unset_ | OSM account password (required for Geofabrik country replication) | +| `OSMSG_EXTRA_ARGS` | _see example_ | osmsg args applied on every tick — country, format, tags, boundary, etc. | + +`OSMSG_EXTRA_ARGS` runs on every tick. Do not put `--last`, `--days`, or `--update` here — +tick adds those automatically based on whether state exists. Geofabrik sub-daily replication uses your OSM credentials directly — no browser opt-in required. @@ -66,6 +66,13 @@ curl 'http://localhost/health' Set `OSMSG_DOMAIN` to your server's hostname for automatic HTTPS. +### Update to latest images + +```bash +cd infra +docker compose pull && docker compose up -d +``` + ## Run as a systemd service Only the `infra/` directory needs to be on the server — no source code or build tools required. @@ -102,7 +109,7 @@ systemctl stop osmsg # graceful shutdown ## Populate all-time stats (backfill) Run the worker once with a date range before starting the continuous service. -The worker detects existing state and resumes from `--update` automatically on next ticks. +The worker detects existing state and resumes with `--update` automatically on next ticks. **Nepal stats since 2012:** @@ -124,7 +131,9 @@ docker compose up -d **Last 90 days then keep refreshing:** ```bash -OSMSG_BOOTSTRAP_DAYS=90 docker compose up -d +# Set OSMSG_EXTRA_ARGS with --days 90 for first run, then start normally +OSMSG_EXTRA_ARGS="--name stats --output-dir /var/lib/osmsg --cache-dir /var/cache/osmsg --url minute --days 90 --format psql --psql-dsn postgresql://osmsg:osmsg@db:5432/osmsg" \ + docker compose up -d ``` ## API endpoints @@ -132,10 +141,14 @@ OSMSG_BOOTSTRAP_DAYS=90 docker compose up -d ```text GET / GET /health -GET /api/v1/user-stats?start=&end=[&hashtag=][&limit=N][&offset=N] -GET /docs +GET /api/v1/stats?start=&end=[&hashtag=][&tags=true|false][&limit=N][&offset=N] +GET /docs/swagger ``` +`tags=true` (default) returns the per-user `tag_stats` map populated when the worker +runs with `--all` or `--keys`. Set `tags=false` to skip the JSONB expansion for +cheaper / smaller responses. + ## Run the API standalone (without compose) ```bash diff --git a/infra/.env.example b/infra/.env.example index c43c2cf..fe088e6 100644 --- a/infra/.env.example +++ b/infra/.env.example @@ -1,13 +1,21 @@ -# Caddy — set to your domain for automatic HTTPS +# Caddy # OSMSG_DOMAIN=stats.example.com -# Worker — leave blank for planet/minute defaults -# OSMSG_NAME=nepal -# OSMSG_URL=minute # minute/hour/day or full URL; ignored when OSMSG_COUNTRY is set -# OSMSG_COUNTRY=nepal -# OSMSG_BOOTSTRAP=day -# OSMSG_BOOTSTRAP_DAYS= # exact day count; alternative to OSMSG_BOOTSTRAP -# OSMSG_BOUNDARY= # GeoJSON path or Geofabrik region name; overrides OSMSG_COUNTRY geometry # OSMSG_SCHEDULE=0 * * * * +# OSMSG_BOOTSTRAP=hour +# OSMSG_BOOTSTRAP_DAYS=90 + # OSM_USERNAME= # OSM_PASSWORD= + + +# --all collects every tag key=value AND auto-pulls changeset metadata (hashtags + editors). +# Drop --all (or swap for --keys) for lighter runs. + +# Planet/minute: +# OSMSG_EXTRA_ARGS=--name stats --output-dir /var/lib/osmsg --cache-dir /var/cache/osmsg --url minute --format psql --psql-dsn postgresql://osmsg:osmsg@db:5432/osmsg --all + +# Country replication: +# OSMSG_EXTRA_ARGS=--name nepal --output-dir /var/lib/osmsg --cache-dir /var/cache/osmsg --country nepal --format psql --psql-dsn postgresql://osmsg:osmsg@db:5432/osmsg --all + +OSMSG_EXTRA_ARGS=--name stats --output-dir /var/lib/osmsg --cache-dir /var/cache/osmsg --url minute --format psql --psql-dsn postgresql://osmsg:osmsg@db:5432/osmsg --all diff --git a/infra/docker-compose.yml b/infra/docker-compose.yml index 73421e5..b0bc6fb 100644 --- a/infra/docker-compose.yml +++ b/infra/docker-compose.yml @@ -1,6 +1,7 @@ services: db: - image: postgres:17-alpine + image: postgis/postgis:17-3.5-alpine + platform: linux/amd64 environment: POSTGRES_USER: osmsg POSTGRES_PASSWORD: osmsg @@ -47,15 +48,12 @@ services: image: ghcr.io/osgeonepal/osmsg-worker:latest environment: DATABASE_URL: postgresql://osmsg:osmsg@db:5432/osmsg - OSMSG_NAME: ${OSMSG_NAME:-stats} - OSMSG_URL: ${OSMSG_URL:-minute} - OSMSG_COUNTRY: ${OSMSG_COUNTRY:-} + OSMSG_SCHEDULE: ${OSMSG_SCHEDULE:-*/2 * * * *} OSMSG_BOOTSTRAP: ${OSMSG_BOOTSTRAP:-hour} OSMSG_BOOTSTRAP_DAYS: ${OSMSG_BOOTSTRAP_DAYS:-} - OSMSG_BOUNDARY: ${OSMSG_BOUNDARY:-} - OSMSG_SCHEDULE: ${OSMSG_SCHEDULE:-*/2 * * * *} OSM_USERNAME: ${OSM_USERNAME:-} OSM_PASSWORD: ${OSM_PASSWORD:-} + OSMSG_EXTRA_ARGS: ${OSMSG_EXTRA_ARGS:-} volumes: - osmsg-data:/var/lib/osmsg - osmsg-cache:/var/cache/osmsg diff --git a/osmsg/_tick.py b/osmsg/_tick.py index ada17e0..9372a0e 100644 --- a/osmsg/_tick.py +++ b/osmsg/_tick.py @@ -1,9 +1,8 @@ """Worker tick: bootstrap on first run, --update thereafter.""" -from __future__ import annotations - import fcntl import os +import shlex import subprocess import sys from pathlib import Path @@ -23,19 +22,23 @@ def _has_state(db_path: Path, source_url: str) -> bool: return result +def _parse_arg(args: list[str], flag: str) -> str | None: + for i, arg in enumerate(args): + if arg == flag and i + 1 < len(args): + return args[i + 1] + return None + + def main() -> int: - name = os.environ.get("OSMSG_NAME", "stats") - out = Path(os.environ.get("OSMSG_OUTPUT_DIR", "/var/lib/osmsg")) - cache = Path(os.environ.get("OSMSG_CACHE_DIR", "/var/cache/osmsg")) - country = os.environ.get("OSMSG_COUNTRY") - url = os.environ.get("OSMSG_URL", "minute") - boundary = os.environ.get("OSMSG_BOUNDARY") + extra_args = shlex.split(os.environ.get("OSMSG_EXTRA_ARGS", "")) bootstrap = os.environ.get("OSMSG_BOOTSTRAP", "hour") bootstrap_days = os.environ.get("OSMSG_BOOTSTRAP_DAYS") - psql_dsn = os.environ.get("DATABASE_URL") + name = _parse_arg(extra_args, "--name") or "stats" + out = Path(_parse_arg(extra_args, "--output-dir") or "/var/lib/osmsg") + country = _parse_arg(extra_args, "--country") + url = _parse_arg(extra_args, "--url") or "minute" out.mkdir(parents=True, exist_ok=True) - cache.mkdir(parents=True, exist_ok=True) lock_path = out / f"{name}.lock" lock_fd = os.open(str(lock_path), os.O_CREAT | os.O_RDWR, 0o644) @@ -45,28 +48,13 @@ def main() -> int: print("[osmsg-tick] previous tick still running, skipping", flush=True) return 0 + source_url = country_update_url(country) if country else resolve_url(url) db_path = out / f"{name}.duckdb" - cmd = [ - "osmsg", - "--name", - name, - "--output-dir", - str(out), - "--cache-dir", - str(cache), - ] - if country: - cmd.extend(["--country", country]) - source_url = country_update_url(country) - else: - cmd.extend(["--url", url]) - source_url = resolve_url(url) - if boundary: - cmd.extend(["--boundary", boundary]) - if psql_dsn: - cmd.extend(["--format", "psql", "--psql-dsn", psql_dsn]) - else: - cmd.extend(["--format", "parquet"]) + + extra_set = set(extra_args) + cmd = ["osmsg"] + extra_args + if not (extra_set & {"--all", "--keys"}): + cmd.append("--all") if _has_state(db_path, source_url): cmd.append("--update") diff --git a/osmsg/cli.py b/osmsg/cli.py index 4c064b6..70044dc 100644 --- a/osmsg/cli.py +++ b/osmsg/cli.py @@ -148,8 +148,14 @@ def main( formats: Annotated[list[Format] | None, typer.Option("--format", "-f", help="One or more output formats.")] = None, summary: Annotated[bool, typer.Option(help="Also write _summary.parquet + summary.md.")] = False, changeset: Annotated[bool, typer.Option(hidden=True)] = False, - all_tags: Annotated[bool, typer.Option("--all-tags", help="Track every tag key.")] = False, - key_value: Annotated[bool, typer.Option("--key-value", help="Store key=value combos. Implies --all-tags.")] = False, + all_stats: Annotated[ + bool, + typer.Option( + "--all", + help="Collect all tag key=value stats and changeset metadata (hashtags, editors).", + ), + ] = False, + keys_only: Annotated[bool, typer.Option("--keys", help="Collect tag key stats only (no value breakdown).")] = False, exact_lookup: Annotated[ bool, typer.Option("--exact-lookup", help="Hashtag whole-word match. Only meaningful with --hashtags.") ] = False, @@ -205,8 +211,7 @@ def main( hashtags=hashtags, length_tags=length, users_filter=users, - all_tags=all_tags or key_value, - key_value=key_value, + tag_mode="all" if all_stats else ("keys" if keys_only else "none"), exact_lookup=exact_lookup, changeset=changeset, summary=summary, diff --git a/osmsg/db/duckdb_schema.py b/osmsg/db/duckdb_schema.py index 5140bf7..b3d4321 100644 --- a/osmsg/db/duckdb_schema.py +++ b/osmsg/db/duckdb_schema.py @@ -9,10 +9,7 @@ created_at TIMESTAMPTZ, hashtags VARCHAR[], editor VARCHAR, - min_lon DOUBLE, - min_lat DOUBLE, - max_lon DOUBLE, - max_lat DOUBLE + geom GEOMETRY ); CREATE INDEX IF NOT EXISTS idx_changesets_created_at ON changesets(created_at); CREATE TABLE IF NOT EXISTS changeset_stats ( diff --git a/osmsg/db/ingest.py b/osmsg/db/ingest.py index a824c66..d6f3836 100644 --- a/osmsg/db/ingest.py +++ b/osmsg/db/ingest.py @@ -114,11 +114,14 @@ def pattern(name: str) -> str: if any(parquet_dir.glob("temp_*_users_*.parquet")): conn.execute(f"INSERT OR IGNORE INTO users SELECT uid, username FROM read_parquet('{pattern('users')}')") if any(parquet_dir.glob("temp_*_changesets_*.parquet")): + conn.execute("LOAD spatial") conn.execute( f""" INSERT OR IGNORE INTO changesets SELECT changeset_id, uid, created_at, hashtags, editor, - min_lon, min_lat, max_lon, max_lat + CASE WHEN min_lon IS NOT NULL + THEN ST_MakeEnvelope(min_lon, min_lat, max_lon, max_lat) + END FROM read_parquet('{pattern("changesets")}') """ ) diff --git a/osmsg/db/queries.py b/osmsg/db/queries.py index 942ba9f..e8bcef5 100644 --- a/osmsg/db/queries.py +++ b/osmsg/db/queries.py @@ -83,8 +83,7 @@ def _accumulate_tags( tag_stats: dict[str, dict[str, dict[str, Any]]], *, additional_tags: list[str] | None, - all_tags: bool, - key_value: bool, + tag_mode: str, length_tags: list[str] | None, ) -> None: if additional_tags: @@ -101,13 +100,13 @@ def _accumulate_tags( continue total = sum(float(v.get("len", 0) or 0) for v in vd.values()) target[f"{k}_len_m"] = round(target.get(f"{k}_len_m", 0) + total) - if all_tags: + if tag_mode != "none": tc = target.setdefault("tags_create", {}) tm = target.setdefault("tags_modify", {}) for key, vd in tag_stats.items(): tc[key] = tc.get(key, 0) + sum(int(v.get("c", 0)) for v in vd.values()) tm[key] = tm.get(key, 0) + sum(int(v.get("m", 0)) for v in vd.values()) - if key_value: + if tag_mode == "all": for value, stat in vd.items(): kv = f"{key}={value}" tc[kv] = tc.get(kv, 0) + int(stat.get("c", 0)) @@ -119,19 +118,18 @@ def attach_tag_stats( rows: list[dict[str, Any]], *, additional_tags: list[str] | None = None, - all_tags: bool = False, - key_value: bool = False, + tag_mode: str = "none", length_tags: list[str] | None = None, ) -> None: """In-place: parse the JSON tag_stats column once per row, then aggregate per user.""" if not rows: return - if not (additional_tags or all_tags or length_tags): + if not (additional_tags or tag_mode != "none" or length_tags): return by_uid = {r["uid"]: r for r in rows} for r in rows: - if all_tags: + if tag_mode != "none": r.setdefault("tags_create", {}) r.setdefault("tags_modify", {}) for k in additional_tags or []: @@ -153,12 +151,11 @@ def attach_tag_stats( by_uid[uid], payload, additional_tags=additional_tags, - all_tags=all_tags, - key_value=key_value, + tag_mode=tag_mode, length_tags=length_tags, ) - if all_tags: + if tag_mode != "none": for r in rows: r["tags_create"] = dict(sorted(r.get("tags_create", {}).items(), key=lambda x: -x[1])) r["tags_modify"] = dict(sorted(r.get("tags_modify", {}).items(), key=lambda x: -x[1])) @@ -168,8 +165,7 @@ def daily_summary( conn: duckdb.DuckDBPyConnection, *, additional_tags: list[str] | None = None, - all_tags: bool = False, - key_value: bool = False, + tag_mode: str = "none", length_tags: list[str] | None = None, ) -> list[dict[str, Any]]: """One row per UTC day. Requires `changesets` populated (--changeset / --hashtags).""" @@ -218,11 +214,11 @@ def daily_summary( if date in by_date: by_date[date]["editors"] = editors or [] - if not (additional_tags or all_tags or length_tags): + if not (additional_tags or tag_mode != "none" or length_tags): return rows for r in rows: - if all_tags: + if tag_mode != "none": r.setdefault("tags_create", {}) r.setdefault("tags_modify", {}) for k in additional_tags or []: @@ -248,12 +244,11 @@ def daily_summary( by_date[date], payload, additional_tags=additional_tags, - all_tags=all_tags, - key_value=key_value, + tag_mode=tag_mode, length_tags=length_tags, ) - if all_tags: + if tag_mode != "none": for r in rows: r["tags_create"] = dict(sorted(r.get("tags_create", {}).items(), key=lambda x: -x[1])) r["tags_modify"] = dict(sorted(r.get("tags_modify", {}).items(), key=lambda x: -x[1])) diff --git a/osmsg/db/schema.py b/osmsg/db/schema.py index fb5115b..1c46a7b 100644 --- a/osmsg/db/schema.py +++ b/osmsg/db/schema.py @@ -16,6 +16,7 @@ def close(conn: duckdb.DuckDBPyConnection) -> None: def create_tables(conn: duckdb.DuckDBPyConnection) -> None: + conn.execute("LOAD spatial") for stmt in DUCKDB_SCHEMA.strip().split(";"): stmt = stmt.strip() if stmt: diff --git a/osmsg/export/markdown.py b/osmsg/export/markdown.py index b8f3a2a..cfac160 100644 --- a/osmsg/export/markdown.py +++ b/osmsg/export/markdown.py @@ -57,7 +57,7 @@ def summary_markdown( end_date, additional_tags: list[str] | None = None, length_tags: list[str] | None = None, - all_tags: bool = False, + tag_mode: str = "none", fname: str = "stats", tm_stats: bool = False, ) -> Path: @@ -124,7 +124,7 @@ def _sum(*cols: str) -> int: total_m = sum(int(r.get(f"{k}_len_m", 0) or 0) for r in rows) parts.append(f"- {k} length created: {_human(round(total_m / 1000))} km") - if all_tags: + if tag_mode != "none": merged_create: dict[str, int] = {} merged_modify: dict[str, int] = {} for r in rows: diff --git a/osmsg/export/psql.py b/osmsg/export/psql.py index 1ef238c..2d51158 100644 --- a/osmsg/export/psql.py +++ b/osmsg/export/psql.py @@ -1,12 +1,4 @@ -"""PostgreSQL exporter via DuckDB's postgres extension. - -No new Python dep — DuckDB attaches the target Postgres database, mirrors the -osmsg schema, and runs `INSERT … SELECT` so the same DuckDB → Postgres copy -benefits from streaming. The tables created on the Postgres side mirror the -osmsg DuckDB schema, which makes both backends queryable identically. -""" - -from __future__ import annotations +"""PostgreSQL exporter via DuckDB's postgres extension.""" import duckdb @@ -14,9 +6,13 @@ def to_psql(conn: duckdb.DuckDBPyConnection, dsn: str) -> None: - """Push every osmsg table into the libpq DSN target. DSN must be trusted (ATTACH interpolation).""" + """Push every osmsg table to the libpq DSN target. + + DSN must be trusted — it is interpolated directly into the ATTACH statement. + """ conn.execute("INSTALL postgres") conn.execute("LOAD postgres") + conn.execute("LOAD spatial") safe_dsn = dsn.replace("'", "''") conn.execute(f"ATTACH '{safe_dsn}' AS pg_target (TYPE postgres)") try: @@ -25,11 +21,10 @@ def to_psql(conn: duckdb.DuckDBPyConnection, dsn: str) -> None: if stmt: conn.execute(f"CALL postgres_execute('pg_target', $${stmt}$$)") - # Tables with natural primary keys: ON CONFLICT DO NOTHING is a no-op safety net. + _copy = "INSERT INTO pg_target.{t} SELECT * FROM {t} ON CONFLICT DO NOTHING" for table in ("users", "changesets", "changeset_stats"): - conn.execute(f"INSERT INTO pg_target.{table} SELECT * FROM {table} ON CONFLICT DO NOTHING") + conn.execute(_copy.format(t=table)) - # state is single-row-per-source: UPSERT to mirror the DuckDB-side truth. conn.execute( """ INSERT INTO pg_target.state (source_url, last_seq, last_ts, updated_at) diff --git a/osmsg/handlers.py b/osmsg/handlers.py index 8704441..ff1b829 100644 --- a/osmsg/handlers.py +++ b/osmsg/handlers.py @@ -172,7 +172,7 @@ def _accumulate(self, uid, uname, cs_id, version, tags, kind, way_nodes=None) -> length_keys = cfg["length"] or () track_length = len_m > 0 and action is Action.CREATE - if cfg["all_tags"]: + if cfg["tag_mode"] != "none": for k, v in tags: tv = stats.tag_stats.setdefault(k, {}).setdefault(v, TagValueStat()) tv.add(action) diff --git a/osmsg/pg_schema.py b/osmsg/pg_schema.py index 1afe55e..178670a 100644 --- a/osmsg/pg_schema.py +++ b/osmsg/pg_schema.py @@ -1,4 +1,5 @@ PG_SCHEMA = """ +CREATE EXTENSION IF NOT EXISTS postgis; CREATE TABLE IF NOT EXISTS users ( uid BIGINT PRIMARY KEY, username TEXT NOT NULL @@ -9,12 +10,10 @@ created_at TIMESTAMPTZ, hashtags TEXT[], editor TEXT, - min_lon DOUBLE PRECISION, - min_lat DOUBLE PRECISION, - max_lon DOUBLE PRECISION, - max_lat DOUBLE PRECISION + geom GEOMETRY(POLYGON) ); CREATE INDEX IF NOT EXISTS idx_changesets_created_at ON changesets(created_at); +CREATE INDEX IF NOT EXISTS idx_changesets_geom ON changesets USING GIST (geom); CREATE TABLE IF NOT EXISTS changeset_stats ( changeset_id BIGINT NOT NULL REFERENCES changesets(changeset_id), seq_id BIGINT NOT NULL, diff --git a/osmsg/pipeline.py b/osmsg/pipeline.py index de7a4d8..4c6adcb 100644 --- a/osmsg/pipeline.py +++ b/osmsg/pipeline.py @@ -56,8 +56,7 @@ class RunConfig: hashtags: list[str] | None = None length_tags: list[str] | None = None users_filter: list[str] | None = None - all_tags: bool = False - key_value: bool = False + tag_mode: str = "none" exact_lookup: bool = False changeset: bool = False summary: bool = False @@ -181,8 +180,7 @@ def _processing_config(cfg: RunConfig, *, parquet_dir: Path, geom_wkt: str | Non return { "hashtags": cfg.hashtags, "additional_tags": cfg.additional_tags, - "all_tags": cfg.all_tags, - "key_value": cfg.key_value, + "tag_mode": cfg.tag_mode, "length": cfg.length_tags, "exact_lookup": cfg.exact_lookup, "changeset_meta": cfg.changeset, @@ -303,8 +301,8 @@ def run(cfg: RunConfig) -> dict[str, Any]: cfg.changeset = cfg.changeset or not cfg.hashtags geom_wkt = (unary_union(geoms) if len(geoms) > 1 else geoms[0]).wkt - # summary/tm_stats read the changesets table — populate it even if user didn't ask. - if (cfg.tm_stats or cfg.summary) and not cfg.changeset and not cfg.hashtags: + # summary/tm_stats/--all read the changesets table — populate it even if user didn't ask. + if (cfg.tm_stats or cfg.summary or cfg.tag_mode == "all") and not cfg.changeset and not cfg.hashtags: cfg.changeset = True max_workers = cfg.workers or _cpu_count() @@ -416,13 +414,12 @@ def run(cfg: RunConfig) -> dict[str, Any]: if cfg.changeset or cfg.hashtags: attach_metadata(conn, rows) - if cfg.additional_tags or cfg.all_tags or cfg.length_tags: + if cfg.additional_tags or cfg.tag_mode != "none" or cfg.length_tags: attach_tag_stats( conn, rows, additional_tags=cfg.additional_tags, - all_tags=cfg.all_tags, - key_value=cfg.key_value, + tag_mode=cfg.tag_mode, length_tags=cfg.length_tags, ) @@ -449,7 +446,7 @@ def run(cfg: RunConfig) -> dict[str, Any]: end_date=end_date_utc, additional_tags=cfg.additional_tags, length_tags=cfg.length_tags, - all_tags=cfg.all_tags, + tag_mode=cfg.tag_mode, fname=cfg.name, tm_stats=cfg.tm_stats, ) @@ -460,8 +457,7 @@ def run(cfg: RunConfig) -> dict[str, Any]: summary_rows = daily_summary( conn, additional_tags=cfg.additional_tags, - all_tags=cfg.all_tags, - key_value=cfg.key_value, + tag_mode=cfg.tag_mode, length_tags=cfg.length_tags, ) if summary_rows: @@ -480,7 +476,7 @@ def run(cfg: RunConfig) -> dict[str, Any]: end_date=end_date_utc, additional_tags=cfg.additional_tags, length_tags=cfg.length_tags, - all_tags=cfg.all_tags, + tag_mode=cfg.tag_mode, fname=cfg.name, tm_stats=cfg.tm_stats, ) diff --git a/tests/conftest.py b/tests/conftest.py index ff2da03..eb0b0c1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -65,8 +65,9 @@ def _populate(conn: duckdb.DuckDBPyConnection) -> duckdb.DuckDBPyConnection: conn.execute( """ INSERT INTO changesets - VALUES (1, 10, '2026-04-01 10:00:00+00', ['#mapathon'], 'iD', 85.0, 27.0, 85.5, 27.5), - (2, 20, '2026-04-02 09:00:00+00', NULL, 'JOSM', NULL, NULL, NULL, NULL) + VALUES (1, 10, '2026-04-01 10:00:00+00', ['#mapathon'], 'iD', + ST_MakeEnvelope(85.0, 27.0, 85.5, 27.5)), + (2, 20, '2026-04-02 09:00:00+00', NULL, 'JOSM', NULL) """ ) conn.execute( @@ -87,8 +88,7 @@ def changefile_config(): return { "hashtags": None, "additional_tags": ["building", "highway"], - "all_tags": False, - "key_value": False, + "tag_mode": "none", "length": None, "exact_lookup": False, "changeset_meta": False, diff --git a/tests/test_api.py b/tests/test_api.py index 221d34e..4dd04b3 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -54,10 +54,11 @@ def _stats_app(monkeypatch, fake_fetch): def test_user_stats_endpoint_returns_expected_response(monkeypatch): - async def fake_fetch_user_stats(*, start, end, hashtag, limit, offset): + async def fake_fetch_user_stats(*, start, end, hashtag, tags, limit, offset): assert start.isoformat() == "2026-05-01T00:00:00+00:00" assert end.isoformat() == "2026-05-02T00:00:00+00:00" assert hashtag == ["#mapathon", "#roads"] + assert tags is True assert limit == 1 assert offset == 0 return [ @@ -78,6 +79,7 @@ async def fake_fetch_user_stats(*, start, end, hashtag, limit, offset): "poi_modify": 1, "map_changes": 58, "rank": 1, + "tag_stats": {"building": {"yes": {"c": 3, "m": 0}}}, } ] @@ -99,6 +101,7 @@ async def fake_fetch_user_stats(*, start, end, hashtag, limit, offset): "start": "2026-05-01T00:00:00Z", "end": "2026-05-02T00:00:00Z", "hashtag": ["#mapathon", "#roads"], + "tags": True, "limit": 1, "offset": 0, "users": [ @@ -119,6 +122,7 @@ async def fake_fetch_user_stats(*, start, end, hashtag, limit, offset): "poi_modify": 1, "map_changes": 58, "rank": 1, + "tag_stats": {"building": {"yes": {"c": 3, "m": 0, "len": None}}}, } ], } @@ -136,3 +140,47 @@ async def fake_fetch_user_stats(**kwargs): assert response.status_code == 400 assert response.json()["detail"] == "start must be before end" + + +def test_user_stats_endpoint_tags_false_drops_tag_stats(monkeypatch): + async def fake_fetch_user_stats(*, tags, **_kwargs): + assert tags is False + return [ + { + "uid": 10, + "name": "alice", + "changesets": 1, + "nodes_create": 0, + "nodes_modify": 0, + "nodes_delete": 0, + "ways_create": 0, + "ways_modify": 0, + "ways_delete": 0, + "rels_create": 0, + "rels_modify": 0, + "rels_delete": 0, + "poi_create": 0, + "poi_modify": 0, + "map_changes": 0, + "rank": 1, + "tag_stats": None, + } + ] + + with TestClient(_stats_app(monkeypatch, fake_fetch_user_stats)) as client: + response = client.get("/api/v1/stats", params={"tags": "false"}) + + assert response.status_code == 200 + body = response.json() + assert body["tags"] is False + assert body["users"][0]["tag_stats"] is None + + +def test_user_stats_sql_omits_tag_ctes_when_tags_false(): + from api.queries import _user_stats_sql + + sql_with = _user_stats_sql(filter_dates=False, filter_hashtags=False, include_tags=True) + sql_without = _user_stats_sql(filter_dates=False, filter_hashtags=False, include_tags=False) + assert "tag_per_user" in sql_with + assert "tag_per_user" not in sql_without + assert "NULL::jsonb AS tag_stats" in sql_without diff --git a/tests/test_cli.py b/tests/test_cli.py index 65b60ed..f6ec708 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -140,8 +140,8 @@ def fake_run(cfg): ["--users", "alice", "--users", "bob"], ["--hashtags", "mapathon"], ["--hashtags", "mapathon", "--exact-lookup"], - ["--all-tags"], - ["--all-tags", "--key-value"], + ["--keys"], + ["--all"], ["--workers", "2"], ["--rows", "5"], ["--name", "myrun"], diff --git a/tests/test_db_queries.py b/tests/test_db_queries.py index 8e5b859..625b0f3 100644 --- a/tests/test_db_queries.py +++ b/tests/test_db_queries.py @@ -24,11 +24,12 @@ def populated_db(fresh_db): conn.execute("INSERT INTO users VALUES (10, 'alice'), (20, 'bob')") conn.execute( """ - INSERT INTO changesets (changeset_id, uid, created_at, hashtags, editor, min_lon, min_lat, max_lon, max_lat) + INSERT INTO changesets (changeset_id, uid, created_at, hashtags, editor, geom) VALUES - (1, 10, '2026-04-01 10:00:00+00', ['#hotosm-project-1', '#mapathon'], 'iD', 85.0, 27.0, 85.5, 27.5), - (2, 10, '2026-04-01 14:00:00+00', ['#mapathon'], 'iD', NULL, NULL, NULL, NULL), - (3, 20, '2026-04-02 09:00:00+00', NULL, 'JOSM', NULL, NULL, NULL, NULL) + (1, 10, '2026-04-01 10:00:00+00', ['#hotosm-project-1', '#mapathon'], 'iD', + ST_MakeEnvelope(85.0, 27.0, 85.5, 27.5)), + (2, 10, '2026-04-01 14:00:00+00', ['#mapathon'], 'iD', NULL), + (3, 20, '2026-04-02 09:00:00+00', NULL, 'JOSM', NULL) """ ) tag_stats_alice = json.dumps( @@ -114,15 +115,22 @@ def test_attach_tag_stats_with_length(populated_db): def test_attach_tag_stats_all_tags_with_key_value(populated_db): rows = user_stats(populated_db) - attach_tag_stats(populated_db, rows, all_tags=True, key_value=True) + attach_tag_stats(populated_db, rows, tag_mode="all") alice = next(r for r in rows if r["name"] == "alice") - # plain key total assert alice["tags_create"]["building"] == 5 - # key=value combo assert alice["tags_create"]["building=yes"] == 5 assert alice["tags_create"]["highway=residential"] == 2 +def test_attach_tag_stats_keys_mode_omits_value_breakdown(populated_db): + rows = user_stats(populated_db) + attach_tag_stats(populated_db, rows, tag_mode="keys") + alice = next(r for r in rows if r["name"] == "alice") + assert alice["tags_create"]["building"] == 5 + assert "building=yes" not in alice["tags_create"] + assert "highway=residential" not in alice["tags_create"] + + def test_daily_summary_groups_by_utc_date(populated_db): rows = daily_summary(populated_db) by_date = {r["date"]: r for r in rows} diff --git a/tests/test_export.py b/tests/test_export.py index 7506fd9..7faf389 100644 --- a/tests/test_export.py +++ b/tests/test_export.py @@ -93,7 +93,7 @@ def test_summary_markdown_writes_top_users_and_totals(tmp_path: Path): output_path=tmp_path / "stats_summary.md", start_date="2026-04-01", end_date="2026-04-02", - all_tags=True, + tag_mode="all", fname="stats", ) body = out.read_text() diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 72b81de..79636da 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -554,7 +554,7 @@ def test_changefile_handler_tracks_specified_tag_keys(osc_factory, changefile_co def test_changefile_handler_all_tags_captures_everything(osc_factory, changefile_config): - changefile_config["all_tags"] = True + changefile_config["tag_mode"] = "all" changefile_config["additional_tags"] = None osc = osc_factory( "005.osc", diff --git a/tests/test_psql_export.py b/tests/test_psql_export.py index d355158..7efae82 100644 --- a/tests/test_psql_export.py +++ b/tests/test_psql_export.py @@ -10,8 +10,11 @@ import re import duckdb +import pyarrow.parquet as pq import pytest +from osmsg.db.queries import user_stats +from osmsg.export.parquet import to_parquet from osmsg.export.psql import PG_SCHEMA, to_psql @@ -43,21 +46,100 @@ def test_pg_schema_statements_each_parse_with_postgres_extension(): """Each individual CREATE statement is well-formed enough that the postgres extension's parser would accept it — we use DuckDB's own parser as an approximation (DuckDB's CREATE TABLE syntax is compatible).""" - duckdb_clone = PG_SCHEMA.replace("DOUBLE PRECISION", "DOUBLE").replace("JSONB", "JSON").replace("TEXT", "VARCHAR") + duckdb_clone = ( + PG_SCHEMA.replace("DOUBLE PRECISION", "DOUBLE") + .replace("JSONB", "JSON") + .replace("TEXT", "VARCHAR") + .replace("GEOMETRY(POLYGON)", "GEOMETRY") + ) conn = duckdb.connect(":memory:") + conn.execute("LOAD spatial") for stmt in [s.strip() for s in duckdb_clone.split(";") if s.strip()]: + upper = stmt.upper() + if upper.startswith("CREATE EXTENSION") or "USING GIST" in upper: + continue conn.execute(stmt) tables = {r[0] for r in conn.execute("SELECT table_name FROM information_schema.tables").fetchall()} assert {"users", "changesets", "changeset_stats", "state"} <= tables +EXPECTED_USER_STATS = { + "alice": {"changesets": 1, "nodes_create": 30, "ways_create": 8, "poi_create": 5, "map_changes": 44}, + "bob": {"changesets": 1, "nodes_create": 50, "ways_create": 0, "poi_create": 50, "map_changes": 50}, +} + + +def _assert_user_stats_match(actual: list[dict], expected: dict[str, dict[str, int]]) -> None: + by_name = {r["name"]: r for r in actual} + assert set(by_name) == set(expected), f"users mismatch: {set(by_name)} vs {set(expected)}" + for name, fields in expected.items(): + for col, want in fields.items(): + assert by_name[name][col] == want, f"{name}.{col}: got {by_name[name][col]} want {want}" + + +def test_duckdb_user_stats_match_seed_data(fresh_db, populated_db_factory): + """Anchors EXPECTED_USER_STATS against the seed fixture; if this drifts, every + other roundtrip in this file silently compares against wrong numbers.""" + rows = user_stats(populated_db_factory(fresh_db)) + _assert_user_stats_match(rows, EXPECTED_USER_STATS) + + +def test_user_stats_roundtrip_through_parquet(tmp_path, fresh_db, populated_db_factory): + rows = user_stats(populated_db_factory(fresh_db)) + out = to_parquet(rows, tmp_path / "stats.parquet") + + table = pq.read_table(out).to_pylist() + _assert_user_stats_match(table, EXPECTED_USER_STATS) + + @pytest.mark.network @pytest.mark.skipif(not os.environ.get("OSMSG_PG_DSN"), reason="OSMSG_PG_DSN not set; live PG push not exercised") -def test_live_push_to_postgres(fresh_db, populated_db_factory): - """Live test: push a populated DuckDB into the PG instance specified by OSMSG_PG_DSN. - - Pre-requisite — a reachable PG with rights to create tables, e.g.: - export OSMSG_PG_DSN="host=localhost port=5432 dbname=osmsg_test user=osm password=osm" - """ +def test_user_stats_roundtrip_through_postgres(fresh_db, populated_db_factory): populated = populated_db_factory(fresh_db) - to_psql(populated, os.environ["OSMSG_PG_DSN"]) + dsn = os.environ["OSMSG_PG_DSN"] + + populated.execute("INSTALL postgres") + populated.execute("LOAD postgres") + safe_dsn = dsn.replace("'", "''") + populated.execute(f"ATTACH '{safe_dsn}' AS pg_wipe (TYPE postgres)") + try: + for stmt in PG_SCHEMA.strip().split(";"): + stmt = stmt.strip() + if stmt: + populated.execute(f"CALL postgres_execute('pg_wipe', $${stmt}$$)") + for table in ("changeset_stats", "changesets", "users", "state"): + populated.execute(f"CALL postgres_execute('pg_wipe', $$DELETE FROM {table}$$)") + finally: + populated.execute("DETACH pg_wipe") + + to_psql(populated, dsn) + + verifier = duckdb.connect(":memory:") + verifier.execute("INSTALL postgres") + verifier.execute("LOAD postgres") + verifier.execute(f"ATTACH '{safe_dsn}' AS pg_src (TYPE postgres, READ_ONLY)") + try: + rows = verifier.execute( + """ + SELECT u.username AS name, + COUNT(DISTINCT cs.changeset_id) AS changesets, + SUM(cs.nodes_created) AS nodes_create, + SUM(cs.ways_created) AS ways_create, + SUM(cs.poi_created) AS poi_create, + SUM( + cs.nodes_created + cs.nodes_modified + cs.nodes_deleted + + cs.ways_created + cs.ways_modified + cs.ways_deleted + + cs.rels_created + cs.rels_modified + cs.rels_deleted + ) AS map_changes + FROM pg_src.users u + JOIN pg_src.changeset_stats cs ON u.uid = cs.uid + GROUP BY u.username + """ + ).fetchall() + finally: + verifier.execute("DETACH pg_src") + verifier.close() + + cols = ("name", "changesets", "nodes_create", "ways_create", "poi_create", "map_changes") + actual = [dict(zip(cols, r, strict=True)) for r in rows] + _assert_user_stats_match(actual, EXPECTED_USER_STATS) diff --git a/tests/test_stats_correctness.py b/tests/test_stats_correctness.py index 01c6d32..e5a8924 100644 --- a/tests/test_stats_correctness.py +++ b/tests/test_stats_correctness.py @@ -62,7 +62,7 @@ def _flush(handler: ChangefileHandler, parquet_dir, pid: int = 1, batch: int = 1 def test_user_stats_match_hand_counted_changes(tmp_path, osc_factory, changefile_config): """Build a deterministic .osc, run the full pipeline, assert every counter.""" - changefile_config["all_tags"] = True + changefile_config["tag_mode"] = "all" changefile_config["additional_tags"] = None osc = osc_factory( From 669fddf8e7e395f0986be4b563f447d61d2180b2 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Thu, 7 May 2026 23:24:09 +0200 Subject: [PATCH 32/49] fix(ci): fixes spatial extension loading bug api --- osmsg/db/ingest.py | 1 + osmsg/db/schema.py | 1 + osmsg/export/psql.py | 1 + tests/test_psql_export.py | 1 + 4 files changed, 4 insertions(+) diff --git a/osmsg/db/ingest.py b/osmsg/db/ingest.py index d6f3836..edd534b 100644 --- a/osmsg/db/ingest.py +++ b/osmsg/db/ingest.py @@ -114,6 +114,7 @@ def pattern(name: str) -> str: if any(parquet_dir.glob("temp_*_users_*.parquet")): conn.execute(f"INSERT OR IGNORE INTO users SELECT uid, username FROM read_parquet('{pattern('users')}')") if any(parquet_dir.glob("temp_*_changesets_*.parquet")): + conn.execute("INSTALL spatial") conn.execute("LOAD spatial") conn.execute( f""" diff --git a/osmsg/db/schema.py b/osmsg/db/schema.py index 1c46a7b..0291c1b 100644 --- a/osmsg/db/schema.py +++ b/osmsg/db/schema.py @@ -16,6 +16,7 @@ def close(conn: duckdb.DuckDBPyConnection) -> None: def create_tables(conn: duckdb.DuckDBPyConnection) -> None: + conn.execute("INSTALL spatial") conn.execute("LOAD spatial") for stmt in DUCKDB_SCHEMA.strip().split(";"): stmt = stmt.strip() diff --git a/osmsg/export/psql.py b/osmsg/export/psql.py index 2d51158..f472a3e 100644 --- a/osmsg/export/psql.py +++ b/osmsg/export/psql.py @@ -12,6 +12,7 @@ def to_psql(conn: duckdb.DuckDBPyConnection, dsn: str) -> None: """ conn.execute("INSTALL postgres") conn.execute("LOAD postgres") + conn.execute("INSTALL spatial") conn.execute("LOAD spatial") safe_dsn = dsn.replace("'", "''") conn.execute(f"ATTACH '{safe_dsn}' AS pg_target (TYPE postgres)") diff --git a/tests/test_psql_export.py b/tests/test_psql_export.py index 7efae82..35b5cb8 100644 --- a/tests/test_psql_export.py +++ b/tests/test_psql_export.py @@ -53,6 +53,7 @@ def test_pg_schema_statements_each_parse_with_postgres_extension(): .replace("GEOMETRY(POLYGON)", "GEOMETRY") ) conn = duckdb.connect(":memory:") + conn.execute("INSTALL spatial") conn.execute("LOAD spatial") for stmt in [s.strip() for s in duckdb_clone.split(";") if s.strip()]: upper = stmt.upper() From 66f76309f459313f3faf31ba8ee30ed729684eb6 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Fri, 8 May 2026 00:00:01 +0200 Subject: [PATCH 33/49] fix(changeset): null bug on bbox when newer one appears open changeset stats --- api/db.py | 8 +- api/routers/v1.py | 20 ++-- osmsg/db/duckdb_schema.py | 7 +- osmsg/db/ingest.py | 20 ++++ osmsg/export/psql.py | 19 +++- tests/test_api.py | 206 ++++++++++++++++++++++++++++++++++++++ tests/test_psql_export.py | 197 +++++++++++++++++++++++++++++++++++- 7 files changed, 462 insertions(+), 15 deletions(-) diff --git a/api/db.py b/api/db.py index c50dee1..c23b170 100644 --- a/api/db.py +++ b/api/db.py @@ -1,3 +1,4 @@ +import json import os import asyncpg @@ -17,10 +18,15 @@ def get_database_url() -> str: return database_url +async def _init_connection(conn: asyncpg.Connection) -> None: + await conn.set_type_codec("jsonb", encoder=json.dumps, decoder=json.loads, schema="pg_catalog") + await conn.set_type_codec("json", encoder=json.dumps, decoder=json.loads, schema="pg_catalog") + + async def open_pool() -> None: global _pool if _pool is None: - _pool = await asyncpg.create_pool(dsn=get_database_url(), min_size=1, max_size=10) + _pool = await asyncpg.create_pool(dsn=get_database_url(), min_size=1, max_size=10, init=_init_connection) async def close_pool() -> None: diff --git a/api/routers/v1.py b/api/routers/v1.py index 15a1be2..bf91ede 100644 --- a/api/routers/v1.py +++ b/api/routers/v1.py @@ -1,4 +1,5 @@ from datetime import UTC, datetime +from typing import Annotated from litestar import Controller, Router, get from litestar.exceptions import HTTPException @@ -32,12 +33,19 @@ class StatsController(Controller): @get() async def get_user_stats( self, - start: datetime | None = None, - end: datetime | None = None, - hashtag: list[str] | None = None, - tags: bool = True, - limit: int = Parameter(default=100, ge=1, le=1000), - offset: int = Parameter(default=0, ge=0), + start: Annotated[ + datetime | None, Parameter(description="Inclusive UTC lower bound (ISO 8601). If omitted, no lower bound.") + ] = None, + end: Annotated[ + datetime | None, + Parameter(description="Exclusive UTC upper bound (ISO 8601). Defaults to now if start is set."), + ] = None, + hashtag: Annotated[ + list[str] | None, Parameter(description="Filter to changesets carrying any of these hashtags. Repeatable.") + ] = None, + tags: Annotated[bool, Parameter(description="Include per-user tag_stats breakdown in the response.")] = True, + limit: Annotated[int, Parameter(ge=1, le=1000, description="Page size (1–1000).")] = 100, + offset: Annotated[int, Parameter(ge=0, description="Page offset.")] = 0, ) -> UserStatsResponse: start = start or (datetime.min.replace(tzinfo=UTC) if end else None) end = end or (datetime.now(tz=UTC) if start else None) diff --git a/osmsg/db/duckdb_schema.py b/osmsg/db/duckdb_schema.py index b3d4321..ce7e2e5 100644 --- a/osmsg/db/duckdb_schema.py +++ b/osmsg/db/duckdb_schema.py @@ -1,3 +1,4 @@ +# No FKs: DuckDB rejects UPDATE on FK-referenced LIST/GEOMETRY columns, which would block changeset upgrades. DUCKDB_SCHEMA = """ CREATE TABLE IF NOT EXISTS users ( uid BIGINT PRIMARY KEY, @@ -5,7 +6,7 @@ ); CREATE TABLE IF NOT EXISTS changesets ( changeset_id BIGINT PRIMARY KEY, - uid BIGINT NOT NULL REFERENCES users(uid), + uid BIGINT NOT NULL, created_at TIMESTAMPTZ, hashtags VARCHAR[], editor VARCHAR, @@ -13,9 +14,9 @@ ); CREATE INDEX IF NOT EXISTS idx_changesets_created_at ON changesets(created_at); CREATE TABLE IF NOT EXISTS changeset_stats ( - changeset_id BIGINT NOT NULL REFERENCES changesets(changeset_id), + changeset_id BIGINT NOT NULL, seq_id BIGINT NOT NULL, - uid BIGINT NOT NULL REFERENCES users(uid), + uid BIGINT NOT NULL, nodes_created INTEGER DEFAULT 0, nodes_modified INTEGER DEFAULT 0, nodes_deleted INTEGER DEFAULT 0, diff --git a/osmsg/db/ingest.py b/osmsg/db/ingest.py index edd534b..abf7d85 100644 --- a/osmsg/db/ingest.py +++ b/osmsg/db/ingest.py @@ -126,6 +126,26 @@ def pattern(name: str) -> str: FROM read_parquet('{pattern("changesets")}') """ ) + # OSM re-emits changesets monotonically richer each time; newer non-NULL columns win, NULL never downgrades. + conn.execute( + f""" + UPDATE changesets c + SET created_at = COALESCE(src.created_at, c.created_at), + hashtags = COALESCE(src.hashtags, c.hashtags), + editor = COALESCE(src.editor, c.editor), + geom = COALESCE(src.geom, c.geom) + FROM ( + SELECT changeset_id, created_at, hashtags, editor, + CASE WHEN min_lon IS NOT NULL + THEN ST_MakeEnvelope(min_lon, min_lat, max_lon, max_lat) + END AS geom + FROM read_parquet('{pattern("changesets")}') + ) src + WHERE c.changeset_id = src.changeset_id + AND (src.created_at IS NOT NULL OR src.hashtags IS NOT NULL + OR src.editor IS NOT NULL OR src.geom IS NOT NULL) + """ + ) if any(parquet_dir.glob("temp_*_changeset_stats_*.parquet")): conn.execute( f""" diff --git a/osmsg/export/psql.py b/osmsg/export/psql.py index f472a3e..aa19c72 100644 --- a/osmsg/export/psql.py +++ b/osmsg/export/psql.py @@ -22,9 +22,22 @@ def to_psql(conn: duckdb.DuckDBPyConnection, dsn: str) -> None: if stmt: conn.execute(f"CALL postgres_execute('pg_target', $${stmt}$$)") - _copy = "INSERT INTO pg_target.{t} SELECT * FROM {t} ON CONFLICT DO NOTHING" - for table in ("users", "changesets", "changeset_stats"): - conn.execute(_copy.format(t=table)) + conn.execute("INSERT INTO pg_target.users SELECT * FROM users ON CONFLICT DO NOTHING") + + # Mirrors the DuckDB-side merge: newer non-NULL wins, NULL never downgrades. + conn.execute( + """ + INSERT INTO pg_target.changesets AS c (changeset_id, uid, created_at, hashtags, editor, geom) + SELECT changeset_id, uid, created_at, hashtags, editor, geom FROM changesets + ON CONFLICT (changeset_id) DO UPDATE SET + created_at = COALESCE(EXCLUDED.created_at, c.created_at), + hashtags = COALESCE(EXCLUDED.hashtags, c.hashtags), + editor = COALESCE(EXCLUDED.editor, c.editor), + geom = COALESCE(EXCLUDED.geom, c.geom) + """ + ) + + conn.execute("INSERT INTO pg_target.changeset_stats SELECT * FROM changeset_stats ON CONFLICT DO NOTHING") conn.execute( """ diff --git a/tests/test_api.py b/tests/test_api.py index 4dd04b3..0cf4d9d 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,12 +1,19 @@ +import json +import os +from contextlib import asynccontextmanager +from datetime import UTC, datetime, timedelta from importlib import import_module +import pytest from litestar import Litestar from litestar.testing import TestClient from api import app as api_app from api.app import health +from api.db import close_pool, ensure_schema, open_pool from api.pg_schema import PG_SCHEMA as API_PG_SCHEMA from api.routers.v1 import normalize_hashtags, v1_router +from osmsg.export.psql import to_psql from osmsg.pg_schema import PG_SCHEMA as CLI_PG_SCHEMA v1_module = import_module("api.routers.v1") @@ -184,3 +191,202 @@ def test_user_stats_sql_omits_tag_ctes_when_tags_false(): assert "tag_per_user" in sql_with assert "tag_per_user" not in sql_without assert "NULL::jsonb AS tag_stats" in sql_without + + +def _seed_pg_via_to_psql(fresh_db, populated_db_factory, dsn): + populated = populated_db_factory(fresh_db) + populated.execute( + "UPDATE changeset_stats SET tag_stats = ?::JSON WHERE changeset_id = 1", + [ + json.dumps( + { + "building": {"yes": {"c": 5, "m": 1}, "house": {"c": 2, "m": 0}}, + "highway": {"residential": {"c": 3, "m": 0, "len": 245.7}}, + } + ) + ], + ) + populated.execute( + "UPDATE changeset_stats SET tag_stats = ?::JSON WHERE changeset_id = 2", + [json.dumps({"natural": {"tree": {"c": 50, "m": 0}}})], + ) + safe_dsn = dsn.replace("'", "''") + import duckdb + + wiper = duckdb.connect(":memory:") + wiper.execute("INSTALL postgres") + wiper.execute("LOAD postgres") + wiper.execute(f"ATTACH '{safe_dsn}' AS pg_w (TYPE postgres)") + try: + for table in ("changeset_stats", "changesets", "users", "state"): + wiper.execute(f"CALL postgres_execute('pg_w', $$DELETE FROM {table}$$)") + finally: + wiper.execute("DETACH pg_w") + wiper.close() + to_psql(populated, dsn) + + +@asynccontextmanager +async def _api_lifespan(_app): + await open_pool() + await ensure_schema() + try: + yield + finally: + await close_pool() + + +def _live_api_app() -> Litestar: + return Litestar(route_handlers=[health, v1_router], lifespan=[_api_lifespan]) + + +@pytest.fixture +def live_api_client(monkeypatch, fresh_db, populated_db_factory): + dsn = os.environ.get("OSMSG_PG_DSN") + if not dsn: + pytest.skip("OSMSG_PG_DSN not set; live API integration not exercised") + pairs = [kv.strip() for kv in dsn.split() if "=" in kv] + parts = dict(kv.split("=", 1) for kv in pairs) + db_url = ( + f"postgresql://{parts.get('user', 'osmsg')}:{parts.get('password', 'osmsg')}" + f"@{parts.get('host', 'localhost')}:{parts.get('port', '5432')}/{parts.get('dbname', 'osmsg')}" + ) + monkeypatch.setenv("DATABASE_URL", db_url) + + _seed_pg_via_to_psql(fresh_db, populated_db_factory, dsn) + with TestClient(_live_api_app()) as client: + yield client + + +@pytest.mark.network +def test_live_api_stats_default_returns_dicts_not_strings(live_api_client): + r = live_api_client.get("/api/v1/stats") + assert r.status_code == 200, r.text + body = r.json() + assert body["tags"] is True + assert body["count"] == 2 + by_name = {u["name"]: u for u in body["users"]} + assert isinstance(by_name["alice"]["tag_stats"], dict) + assert by_name["alice"]["tag_stats"]["building"]["yes"]["c"] == 5 + assert by_name["alice"]["tag_stats"]["building"]["yes"]["m"] == 1 + assert by_name["alice"]["tag_stats"]["highway"]["residential"]["len"] == 245.7 + assert by_name["bob"]["tag_stats"]["natural"]["tree"]["c"] == 50 + + +@pytest.mark.network +def test_live_api_stats_tags_false_skips_tag_stats(live_api_client): + r = live_api_client.get("/api/v1/stats", params={"tags": "false"}) + assert r.status_code == 200, r.text + body = r.json() + assert body["tags"] is False + for u in body["users"]: + assert u["tag_stats"] is None + + +@pytest.mark.network +def test_live_api_stats_user_totals_match_seed(live_api_client): + r = live_api_client.get("/api/v1/stats") + by_name = {u["name"]: u for u in r.json()["users"]} + alice = by_name["alice"] + bob = by_name["bob"] + assert alice == { + **alice, + "changesets": 1, + "nodes_create": 30, + "ways_create": 8, + "poi_create": 5, + "map_changes": 44, + "rank": 2, + } + assert bob == { + **bob, + "changesets": 1, + "nodes_create": 50, + "ways_create": 0, + "poi_create": 50, + "map_changes": 50, + "rank": 1, + } + + +@pytest.mark.network +def test_live_api_stats_hashtag_filters_to_matching_changesets(live_api_client): + r = live_api_client.get("/api/v1/stats", params={"hashtag": "mapathon"}) + assert r.status_code == 200 + body = r.json() + assert body["hashtag"] == ["#mapathon"] + names = {u["name"] for u in body["users"]} + assert names == {"alice"} + + +@pytest.mark.network +def test_live_api_stats_date_range_filters_changesets(live_api_client): + r = live_api_client.get( + "/api/v1/stats", + params={"start": "2026-04-02T00:00:00Z", "end": "2026-04-03T00:00:00Z"}, + ) + assert r.status_code == 200 + body = r.json() + names = {u["name"] for u in body["users"]} + assert names == {"bob"} + + +@pytest.mark.network +def test_live_api_stats_pagination(live_api_client): + page1 = live_api_client.get("/api/v1/stats", params={"limit": 1, "offset": 0}).json() + page2 = live_api_client.get("/api/v1/stats", params={"limit": 1, "offset": 1}).json() + assert page1["limit"] == 1 and page1["offset"] == 0 + assert page2["limit"] == 1 and page2["offset"] == 1 + assert len(page1["users"]) == 1 + assert len(page2["users"]) == 1 + assert page1["users"][0]["name"] != page2["users"][0]["name"] + + +@pytest.mark.network +def test_live_api_stats_limit_validation_rejects_zero(live_api_client): + r = live_api_client.get("/api/v1/stats", params={"limit": 0}) + assert r.status_code == 400 + + +@pytest.mark.network +def test_live_api_stats_limit_validation_rejects_too_large(live_api_client): + r = live_api_client.get("/api/v1/stats", params={"limit": 1001}) + assert r.status_code == 400 + + +@pytest.mark.network +def test_live_api_stats_offset_validation_rejects_negative(live_api_client): + r = live_api_client.get("/api/v1/stats", params={"offset": -1}) + assert r.status_code == 400 + + +@pytest.mark.network +def test_live_api_stats_response_echoes_query(live_api_client): + start = datetime(2026, 4, 1, tzinfo=UTC) + end = start + timedelta(days=2) + r = live_api_client.get( + "/api/v1/stats", + params={ + "start": start.isoformat().replace("+00:00", "Z"), + "end": end.isoformat().replace("+00:00", "Z"), + "hashtag": "mapathon", + "tags": "true", + "limit": 10, + "offset": 0, + }, + ) + body = r.json() + assert body["start"] == "2026-04-01T00:00:00Z" + assert body["end"] == "2026-04-03T00:00:00Z" + assert body["hashtag"] == ["#mapathon"] + assert body["tags"] is True + assert body["limit"] == 10 + assert body["offset"] == 0 + + +@pytest.mark.network +def test_live_api_health_reports_seeded_state(live_api_client): + r = live_api_client.get("/health") + assert r.status_code == 200 + body = r.json() + assert body["status"] == "ok" diff --git a/tests/test_psql_export.py b/tests/test_psql_export.py index 35b5cb8..48b07fc 100644 --- a/tests/test_psql_export.py +++ b/tests/test_psql_export.py @@ -79,8 +79,7 @@ def _assert_user_stats_match(actual: list[dict], expected: dict[str, dict[str, i def test_duckdb_user_stats_match_seed_data(fresh_db, populated_db_factory): - """Anchors EXPECTED_USER_STATS against the seed fixture; if this drifts, every - other roundtrip in this file silently compares against wrong numbers.""" + """Anchor for EXPECTED_USER_STATS — if it drifts, every other roundtrip test silently lies.""" rows = user_stats(populated_db_factory(fresh_db)) _assert_user_stats_match(rows, EXPECTED_USER_STATS) @@ -144,3 +143,197 @@ def test_user_stats_roundtrip_through_postgres(fresh_db, populated_db_factory): cols = ("name", "changesets", "nodes_create", "ways_create", "poi_create", "map_changes") actual = [dict(zip(cols, r, strict=True)) for r in rows] _assert_user_stats_match(actual, EXPECTED_USER_STATS) + + +def test_merge_parquet_upgrades_empty_changeset_when_richer_data_arrives(fresh_db, tmp_path): + """Empty stub from tick 1 must be upgraded to richer data when tick 2 arrives.""" + import json as _json + + from osmsg.db.ingest import flush_rows_to_parquet, merge_parquet_files + + flush_rows_to_parquet( + parquet_dir=tmp_path / "round1", + pid=1, + batch_index=0, + users=[(99, "lexoa")], + changesets=[(182308935, 99, None, None, None, None, None, None, None)], + changeset_stats=[], + ) + merge_parquet_files(fresh_db, tmp_path / "round1", cleanup=True) + + row = fresh_db.execute( + "SELECT geom IS NULL, editor, hashtags FROM changesets WHERE changeset_id = 182308935" + ).fetchone() + assert row == (True, None, None), f"round 1 expected empty stub, got {row}" + + flush_rows_to_parquet( + parquet_dir=tmp_path / "round2", + pid=2, + batch_index=0, + users=[(99, "lexoa")], + changesets=[(182308935, 99, None, ["#mapathon"], "iD", 85.0, 27.0, 85.5, 27.5)], + changeset_stats=[ + ( + 182308935, + 5000, + 99, + 10, + 0, + 0, + 3, + 0, + 0, + 0, + 0, + 0, + 5, + 0, + _json.dumps({"building": {"yes": {"c": 3, "m": 0}}}), + ) + ], + ) + merge_parquet_files(fresh_db, tmp_path / "round2", cleanup=True) + + geom_wkt, editor, hashtags = fresh_db.execute( + "SELECT ST_AsText(geom), editor, hashtags FROM changesets WHERE changeset_id = 182308935" + ).fetchone() + assert "POLYGON" in geom_wkt + assert editor == "iD" + assert hashtags == ["#mapathon"] + + stats = fresh_db.execute( + "SELECT COUNT(*), SUM(nodes_created) FROM changeset_stats WHERE changeset_id = 182308935" + ).fetchone() + assert stats == (1, 10) + + +def test_merge_parquet_keeps_existing_geom_when_new_row_has_null(fresh_db, tmp_path): + """A NULL src column must not overwrite existing non-NULL data.""" + from osmsg.db.ingest import flush_rows_to_parquet, merge_parquet_files + + flush_rows_to_parquet( + parquet_dir=tmp_path / "rich", + pid=1, + batch_index=0, + users=[(99, "lexoa")], + changesets=[(900, 99, None, ["#a"], "iD", 1.0, 2.0, 3.0, 4.0)], + changeset_stats=[], + ) + merge_parquet_files(fresh_db, tmp_path / "rich", cleanup=True) + + flush_rows_to_parquet( + parquet_dir=tmp_path / "stub", + pid=2, + batch_index=0, + users=[(99, "lexoa")], + changesets=[(900, 99, None, None, None, None, None, None, None)], + changeset_stats=[], + ) + merge_parquet_files(fresh_db, tmp_path / "stub", cleanup=True) + + geom_wkt, editor, hashtags = fresh_db.execute( + "SELECT ST_AsText(geom), editor, hashtags FROM changesets WHERE changeset_id = 900" + ).fetchone() + assert "POLYGON" in geom_wkt + assert editor == "iD" + assert hashtags == ["#a"] + + +def test_merge_parquet_replaces_partial_geom_when_richer_arrives(fresh_db, tmp_path): + """OSM bbox grows monotonically across re-emits; later tick must overwrite earlier partial bbox.""" + from osmsg.db.ingest import flush_rows_to_parquet, merge_parquet_files + + flush_rows_to_parquet( + parquet_dir=tmp_path / "partial", + pid=1, + batch_index=0, + users=[(99, "lexoa")], + changesets=[(901, 99, None, None, "iD", 10.0, 10.0, 10.5, 10.5)], + changeset_stats=[], + ) + merge_parquet_files(fresh_db, tmp_path / "partial", cleanup=True) + + flush_rows_to_parquet( + parquet_dir=tmp_path / "final", + pid=2, + batch_index=0, + users=[(99, "lexoa")], + changesets=[(901, 99, None, ["#mapathon"], "iD", 10.0, 10.0, 12.0, 12.0)], + changeset_stats=[], + ) + merge_parquet_files(fresh_db, tmp_path / "final", cleanup=True) + + geom_wkt, hashtags = fresh_db.execute( + "SELECT ST_AsText(geom), hashtags FROM changesets WHERE changeset_id = 901" + ).fetchone() + assert "12 12" in geom_wkt, f"expected final bbox with 12,12 corner, got {geom_wkt}" + assert hashtags == ["#mapathon"] + + +@pytest.mark.network +@pytest.mark.skipif(not os.environ.get("OSMSG_PG_DSN"), reason="OSMSG_PG_DSN not set; live PG push not exercised") +def test_to_psql_upgrades_empty_changeset_when_pushed_again(fresh_db, tmp_path): + """Same empty-then-rich scenario across two to_psql() calls into PG.""" + import json as _json + + from osmsg.db.ingest import flush_rows_to_parquet, merge_parquet_files + + dsn = os.environ["OSMSG_PG_DSN"] + safe_dsn = dsn.replace("'", "''") + + fresh_db.execute("INSTALL postgres") + fresh_db.execute("LOAD postgres") + fresh_db.execute(f"ATTACH '{safe_dsn}' AS pg_w (TYPE postgres)") + try: + for stmt in PG_SCHEMA.strip().split(";"): + stmt = stmt.strip() + if stmt: + fresh_db.execute(f"CALL postgres_execute('pg_w', $${stmt}$$)") + for table in ("changeset_stats", "changesets", "users", "state"): + fresh_db.execute(f"CALL postgres_execute('pg_w', $$DELETE FROM {table}$$)") + finally: + fresh_db.execute("DETACH pg_w") + + flush_rows_to_parquet( + parquet_dir=tmp_path / "r1", + pid=1, + batch_index=0, + users=[(77, "carol")], + changesets=[(900900, 77, None, None, None, None, None, None, None)], + changeset_stats=[], + ) + merge_parquet_files(fresh_db, tmp_path / "r1", cleanup=True) + to_psql(fresh_db, dsn) + + flush_rows_to_parquet( + parquet_dir=tmp_path / "r2", + pid=2, + batch_index=0, + users=[(77, "carol")], + changesets=[(900900, 77, None, ["#x"], "JOSM", 10.0, 20.0, 11.0, 21.0)], + changeset_stats=[ + (900900, 6000, 77, 5, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, _json.dumps({"shop": {"bakery": {"c": 1, "m": 0}}})), + ], + ) + merge_parquet_files(fresh_db, tmp_path / "r2", cleanup=True) + to_psql(fresh_db, dsn) + + verifier = duckdb.connect(":memory:") + verifier.execute("INSTALL postgres") + verifier.execute("LOAD postgres") + verifier.execute(f"ATTACH '{safe_dsn}' AS pg_src (TYPE postgres, READ_ONLY)") + try: + editor, hashtags, has_geom = verifier.execute( + "SELECT editor, hashtags, geom IS NOT NULL FROM pg_src.changesets WHERE changeset_id = 900900" + ).fetchone() + n_stats = verifier.execute( + "SELECT COUNT(*) FROM pg_src.changeset_stats WHERE changeset_id = 900900" + ).fetchone()[0] + finally: + verifier.execute("DETACH pg_src") + verifier.close() + + assert editor == "JOSM" + assert hashtags == ["#x"] + assert has_geom is True + assert n_stats == 1 From 462a0b909b3707433075eb9c64e19033fdcfa0bc Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Fri, 8 May 2026 00:25:50 +0200 Subject: [PATCH 34/49] fix(url): respect url when it is passed for country use it as boundary --- osmsg/pipeline.py | 4 +++- tests/test_pipeline_smoke.py | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/osmsg/pipeline.py b/osmsg/pipeline.py index 4c6adcb..1992a1a 100644 --- a/osmsg/pipeline.py +++ b/osmsg/pipeline.py @@ -77,7 +77,9 @@ def _resolve_country_urls(countries: list[str]) -> list[str]: def _normalize_urls(cfg: RunConfig) -> None: - if cfg.countries: + # Explicit --url wins over --country's default Geofabrik URL; --country still + # contributes the boundary geometry filter downstream. + if cfg.countries and not cfg.url_explicit: cfg.urls = _resolve_country_urls(cfg.countries) return # Order-preserving dedupe: cfg.urls[0] is load-bearing for resume. diff --git a/tests/test_pipeline_smoke.py b/tests/test_pipeline_smoke.py index 8fc6360..e98223d 100644 --- a/tests/test_pipeline_smoke.py +++ b/tests/test_pipeline_smoke.py @@ -46,6 +46,21 @@ def test_normalize_urls_preserves_order(): assert cfg.urls == ["https://example.com/zebra", "https://example.com/alpha"] +def test_normalize_urls_country_alone_uses_geofabrik(): + """--country with no explicit --url falls through to the Geofabrik country feed.""" + cfg = RunConfig(countries=["nepal"], urls=["minute"], url_explicit=False) + _normalize_urls(cfg) + assert cfg.urls == ["https://download.geofabrik.de/asia/nepal-updates"] + + +def test_normalize_urls_explicit_url_overrides_country(): + """--url is explicit user intent; it must beat --country's default Geofabrik URL. + --country still applies the boundary geometry filter downstream (handled elsewhere).""" + cfg = RunConfig(countries=["nepal"], urls=["minute"], url_explicit=True) + _normalize_urls(cfg) + assert cfg.urls == ["https://planet.openstreetmap.org/replication/minute"] + + def test_run_config_defaults_to_parquet(): cfg = RunConfig() assert cfg.formats == ["parquet"] From b7a6ef3fb7566299414109529bc7e0fe3fe2865b Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Fri, 8 May 2026 01:46:22 +0200 Subject: [PATCH 35/49] fix(stats): fix stats inconsistency on null fix cors issue on api --- api/app.py | 2 + api/queries.py | 36 ++++++---------- osmsg/db/ingest.py | 10 ++++- osmsg/export/psql.py | 13 ++++++ osmsg/pipeline.py | 22 ++++++---- osmsg/replication.py | 39 +++++++++-------- tests/test_api.py | 78 +++++++++++++++++++++++++++++++++ tests/test_pipeline_smoke.py | 15 ++++++- tests/test_psql_export.py | 82 +++++++++++++++++++++++++++++++++++ tests/test_replication.py | 83 ++++++++++++++++++++++++++++++++++++ 10 files changed, 329 insertions(+), 51 deletions(-) diff --git a/api/app.py b/api/app.py index fe705d6..eab0f8a 100644 --- a/api/app.py +++ b/api/app.py @@ -2,6 +2,7 @@ from pathlib import Path from litestar import Litestar, get +from litestar.config.cors import CORSConfig from litestar.contrib.jinja import JinjaTemplateEngine from litestar.openapi.config import OpenAPIConfig from litestar.openapi.plugins import SwaggerRenderPlugin @@ -47,6 +48,7 @@ async def health() -> HealthResponse: app = Litestar( route_handlers=[home, health, v1_router], lifespan=[lifespan], + cors_config=CORSConfig(allow_origins=["*"]), openapi_config=OpenAPIConfig( title="OSMSG API", version="1.0.0", diff --git a/api/queries.py b/api/queries.py index c804bbe..ddddf98 100644 --- a/api/queries.py +++ b/api/queries.py @@ -51,15 +51,24 @@ def _user_stats_sql(*, filter_dates: bool, filter_hashtags: bool, include_tags: if filter_hashtags: changeset_filters.append(f"hashtags && ${n}::TEXT[]") n += 1 - enable_unfiltered_fallback = "FALSE" - else: - enable_unfiltered_fallback = "TRUE" limit_param = f"${n}" n += 1 offset_param = f"${n}" - changeset_where = f"WHERE {' AND '.join(changeset_filters)}" if changeset_filters else "" + # No filter -> all stats (orphans included); any filter -> JOIN through changesets. + if changeset_filters: + scope_cte = f""" + WITH filtered_changesets AS ( + SELECT changeset_id FROM changesets WHERE {" AND ".join(changeset_filters)} + ), + stats_scope AS ( + SELECT st.* + FROM changeset_stats st + JOIN filtered_changesets fc ON st.changeset_id = fc.changeset_id + )""" + else: + scope_cte = "WITH stats_scope AS (SELECT * FROM changeset_stats)" tag_ctes = _TAG_CTES if include_tags else "" tag_select = "tpu.tag_stats" if include_tags else "NULL::jsonb AS tag_stats" @@ -67,24 +76,7 @@ def _user_stats_sql(*, filter_dates: bool, filter_hashtags: bool, include_tags: tag_group = ", tpu.tag_stats" if include_tags else "" return f""" - WITH filtered_changesets AS ( - SELECT changeset_id - FROM changesets - {changeset_where} - ), - matching_stats AS ( - SELECT st.* - FROM changeset_stats st - JOIN filtered_changesets fc ON st.changeset_id = fc.changeset_id - ), - stats_scope AS ( - SELECT * FROM matching_stats - UNION ALL - SELECT st.* - FROM changeset_stats st - WHERE {enable_unfiltered_fallback} - AND NOT EXISTS (SELECT 1 FROM matching_stats) - ){tag_ctes} + {scope_cte}{tag_ctes} SELECT u.uid, u.username AS name, diff --git a/osmsg/db/ingest.py b/osmsg/db/ingest.py index abf7d85..c2961e6 100644 --- a/osmsg/db/ingest.py +++ b/osmsg/db/ingest.py @@ -126,7 +126,7 @@ def pattern(name: str) -> str: FROM read_parquet('{pattern("changesets")}') """ ) - # OSM re-emits changesets monotonically richer each time; newer non-NULL columns win, NULL never downgrades. + # Newer non-NULL wins; dedupe src so multiple emits per window don't trip the PK on UPDATE. conn.execute( f""" UPDATE changesets c @@ -135,11 +135,17 @@ def pattern(name: str) -> str: editor = COALESCE(src.editor, c.editor), geom = COALESCE(src.geom, c.geom) FROM ( - SELECT changeset_id, created_at, hashtags, editor, + SELECT DISTINCT ON (changeset_id) + changeset_id, created_at, hashtags, editor, CASE WHEN min_lon IS NOT NULL THEN ST_MakeEnvelope(min_lon, min_lat, max_lon, max_lat) END AS geom FROM read_parquet('{pattern("changesets")}') + ORDER BY changeset_id, + (min_lon IS NOT NULL) DESC, + (editor IS NOT NULL) DESC, + (hashtags IS NOT NULL) DESC, + created_at DESC NULLS LAST ) src WHERE c.changeset_id = src.changeset_id AND (src.created_at IS NOT NULL OR src.hashtags IS NOT NULL diff --git a/osmsg/export/psql.py b/osmsg/export/psql.py index aa19c72..751fe95 100644 --- a/osmsg/export/psql.py +++ b/osmsg/export/psql.py @@ -2,6 +2,7 @@ import duckdb +from ..exceptions import OsmsgError from ..pg_schema import PG_SCHEMA @@ -22,6 +23,18 @@ def to_psql(conn: duckdb.DuckDBPyConnection, dsn: str) -> None: if stmt: conn.execute(f"CALL postgres_execute('pg_target', $${stmt}$$)") + # Refuse cross-source push: would double-count via the (seq_id, changeset_id) PK. + local_sources = {r[0] for r in conn.execute("SELECT source_url FROM state").fetchall()} + existing_sources = {r[0] for r in conn.execute("SELECT source_url FROM pg_target.state").fetchall()} + cross_source = existing_sources - local_sources + if cross_source and local_sources: + raise OsmsgError( + f"PG target already has data from source(s) {sorted(cross_source)} " + f"but this run pushes from {sorted(local_sources)}. Mixing sources " + f"double-counts via the (seq_id, changeset_id) key. Use a separate " + f"--psql-dsn, or wipe the existing PG tables first." + ) + conn.execute("INSERT INTO pg_target.users SELECT * FROM users ON CONFLICT DO NOTHING") # Mirrors the DuckDB-side merge: newer non-NULL wins, NULL never downgrades. diff --git a/osmsg/pipeline.py b/osmsg/pipeline.py index 1992a1a..681920e 100644 --- a/osmsg/pipeline.py +++ b/osmsg/pipeline.py @@ -119,11 +119,12 @@ def _canonical_hashtags(hashtags: list[str]) -> list[str]: return ["#" + h.lstrip("#") for h in hashtags] -def _resolve_url_starts(conn, cfg: RunConfig) -> dict[str, dt.datetime]: +def _resolve_url_starts(conn, cfg: RunConfig) -> dict[str, tuple[dt.datetime, int | None]]: + """Per-URL (start_ts, resume_seq); resume_seq is set only on --update.""" if cfg.update: if not cfg.urls: raise OsmsgError("--update requires at least one source URL.") - starts: dict[str, dt.datetime] = {} + starts: dict[str, tuple[dt.datetime, int | None]] = {} for url in cfg.urls: last = get_state(conn, url) if not last: @@ -139,11 +140,11 @@ def _resolve_url_starts(conn, cfg: RunConfig) -> dict[str, dt.datetime]: "(Replaying the same window through a different granularity would double-count " "via the changeset_stats (seq_id, changeset_id) key.)" ) - starts[url] = last["last_ts"] + starts[url] = (last["last_ts"], last["last_seq"] + 1) return starts if cfg.start_date is None: raise OsmsgError("start_date is required. Pass --start, --last, --days, or --update with a prior run.") - return {url: cfg.start_date for url in cfg.urls} + return {url: (cfg.start_date, None) for url in cfg.urls} def _ensure_credentials(cfg: RunConfig) -> str | None: @@ -275,7 +276,7 @@ def run(cfg: RunConfig) -> dict[str, Any]: url_starts = _resolve_url_starts(conn, cfg) if cfg.update: # Changeset-replication reads one planet-wide source; widest window covers every URL. - cfg.start_date = min(url_starts.values()) + cfg.start_date = min(ts for ts, _seq in url_starts.values()) info(f"--update: resuming each source from its own state row (earliest: {cfg.start_date.isoformat()})") # _resolve_url_starts guarantees start_date is set (or raised); narrow for ty. @@ -349,8 +350,10 @@ def run(cfg: RunConfig) -> dict[str, Any]: for url in cfg.urls: info(f"Changefiles ← {url}") - url_start = url_starts[url] - urls, server_ts, src_start_seq, src_end_seq, _, _ = changefile_download_urls(url_start, cfg.end_date, url) + url_start, resume_seq = url_starts[url] + urls, server_ts, src_start_seq, src_end_seq, _, _ = changefile_download_urls( + url_start, cfg.end_date, url, resume_seq=resume_seq + ) if start_seq is None: start_seq = src_start_seq end_seq = src_end_seq @@ -406,7 +409,10 @@ def run(cfg: RunConfig) -> dict[str, Any]: if sub.exists(): shutil.rmtree(sub, ignore_errors=True) - start_date_utc = min(url_starts.values()).astimezone(UTC) if url_starts else cfg.start_date.astimezone(UTC) + if url_starts: + start_date_utc = min(ts for ts, _seq in url_starts.values()).astimezone(UTC) + else: + start_date_utc = cfg.start_date.astimezone(UTC) rows = user_stats(conn, top_n=None) if not rows: diff --git a/osmsg/replication.py b/osmsg/replication.py index 598977f..063de0f 100644 --- a/osmsg/replication.py +++ b/osmsg/replication.py @@ -35,26 +35,31 @@ def seq_to_timestamp(state_url: str) -> datetime: def changefile_download_urls( - start_date: datetime, end_date: datetime, base_url: str + start_date: datetime | None, + end_date: datetime, + base_url: str, + *, + resume_seq: int | None = None, ) -> tuple[list[str], datetime, int, int, str, str]: - """Return (urls, server_ts, start_seq, end_seq, start_seq_url, end_seq_url). - - For Geofabrik base URLs, public list-URLs are rewritten to the internal server - (which carries uid/changeset_id metadata; the OAuth 2.0 cookie is required at fetch time). - """ + """resume_seq starts exactly there (skipping the timestamp lookup + backward pad used on first runs).""" repl = ReplicationServer(base_url) - seq = repl.timestamp_to_sequence(start_date) - if seq is None: - raise OsmsgError(f"Cannot reach replication service '{base_url}'") - - start_seq_time = seq_to_timestamp(repl.get_state_url(seq)) - if start_date > start_seq_time: - # Pad backwards by one window so we never miss a diff straddling the boundary. - if "minute" in base_url: - seq = (seq + int((start_date - start_seq_time).total_seconds() / 60)) - 60 - elif "hour" in base_url: - seq = (seq + int((start_date - start_seq_time).total_seconds() / 3600)) - 1 + if resume_seq is not None: + seq = resume_seq + else: + if start_date is None: + raise OsmsgError("changefile_download_urls requires either start_date or resume_seq") + seq = repl.timestamp_to_sequence(start_date) + if seq is None: + raise OsmsgError(f"Cannot reach replication service '{base_url}'") + + start_seq_time = seq_to_timestamp(repl.get_state_url(seq)) + if start_date > start_seq_time: + # Pad backwards by one window so we never miss a diff straddling the boundary. + if "minute" in base_url: + seq = (seq + int((start_date - start_seq_time).total_seconds() / 60)) - 60 + elif "hour" in base_url: + seq = (seq + int((start_date - start_seq_time).total_seconds() / 3600)) - 1 start_seq = seq start_seq_url = repl.get_state_url(start_seq) diff --git a/tests/test_api.py b/tests/test_api.py index 0cf4d9d..e82d95f 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -390,3 +390,81 @@ def test_live_api_health_reports_seeded_state(live_api_client): assert r.status_code == 200 body = r.json() assert body["status"] == "ok" + + +@pytest.mark.network +def test_live_api_returns_orphan_stats_when_no_filter(live_api_client): + """Orphan stats (no parent changesets row) must surface when no filter is applied.""" + dsn = os.environ["OSMSG_PG_DSN"] + safe_dsn = dsn.replace("'", "''") + import duckdb + + conn = duckdb.connect(":memory:") + conn.execute("INSTALL postgres") + conn.execute("LOAD postgres") + conn.execute(f"ATTACH '{safe_dsn}' AS pg (TYPE postgres)") + try: + conn.execute( + "CALL postgres_execute('pg', $$" + "ALTER TABLE changeset_stats " + "DROP CONSTRAINT IF EXISTS changeset_stats_changeset_id_fkey" + "$$)" + ) + conn.execute( + "CALL postgres_execute('pg', $$" + "INSERT INTO changeset_stats (changeset_id, seq_id, uid, nodes_created) " + "VALUES (9999, 9999, 10, 7) ON CONFLICT DO NOTHING" + "$$)" + ) + finally: + conn.execute("DETACH pg") + conn.close() + + r = live_api_client.get("/api/v1/stats") + assert r.status_code == 200 + by_name = {u["name"]: u for u in r.json()["users"]} + assert by_name["alice"]["nodes_create"] == 30 + 7 # original 30 + orphan stub + + +@pytest.mark.network +def test_live_api_date_filter_with_no_matches_returns_empty_not_all(live_api_client): + """Date filter with no matches must return empty, not silently fall back to all.""" + r = live_api_client.get( + "/api/v1/stats", + params={"start": "2099-01-01T00:00:00Z", "end": "2099-12-31T00:00:00Z"}, + ) + assert r.status_code == 200 + body = r.json() + assert body["count"] == 0 + assert body["users"] == [] + + +def test_user_stats_sql_no_filter_skips_changesets_join(): + """No-filter path must not JOIN the changesets table — orphan stats would be dropped.""" + from api.queries import _user_stats_sql + + sql = _user_stats_sql(filter_dates=False, filter_hashtags=False, include_tags=False) + assert "filtered_changesets" not in sql + assert "JOIN filtered_changesets" not in sql + assert "stats_scope AS (SELECT * FROM changeset_stats)" in sql + + +def test_user_stats_sql_filtered_uses_changesets_join(): + """Filtered path must scope through changesets so date/hashtag predicates apply.""" + from api.queries import _user_stats_sql + + sql_dates = _user_stats_sql(filter_dates=True, filter_hashtags=False, include_tags=False) + sql_tags = _user_stats_sql(filter_dates=False, filter_hashtags=True, include_tags=False) + for sql in (sql_dates, sql_tags): + assert "filtered_changesets" in sql + assert "JOIN filtered_changesets" in sql + + +def test_user_stats_sql_no_unfiltered_fallback_remains(): + """The buggy 'fallback to all stats when matching is empty' branch must be gone.""" + from api.queries import _user_stats_sql + + for combo in [(True, True), (True, False), (False, True), (False, False)]: + sql = _user_stats_sql(filter_dates=combo[0], filter_hashtags=combo[1], include_tags=False) + assert "NOT EXISTS (SELECT 1 FROM matching_stats)" not in sql + assert "enable_unfiltered_fallback" not in sql diff --git a/tests/test_pipeline_smoke.py b/tests/test_pipeline_smoke.py index e98223d..f9b9523 100644 --- a/tests/test_pipeline_smoke.py +++ b/tests/test_pipeline_smoke.py @@ -87,7 +87,7 @@ def test_resolve_url_starts_no_update_uses_cfg_start(tmp_path): start = dt.datetime(2026, 4, 1, tzinfo=dt.UTC) cfg = RunConfig(urls=["https://x", "https://y"], start_date=start) starts = _resolve_url_starts(conn, cfg) - assert starts == {"https://x": start, "https://y": start} + assert starts == {"https://x": (start, None), "https://y": (start, None)} def test_resolve_url_starts_no_update_no_start_raises(tmp_path): @@ -106,7 +106,18 @@ def test_resolve_url_starts_update_reads_each_url_state_row(tmp_path): upsert_state(conn, source_url="https://y", last_seq=2, last_ts=ts_y, updated_at=ts_y) cfg = RunConfig(urls=["https://x", "https://y"], update=True) starts = _resolve_url_starts(conn, cfg) - assert starts == {"https://x": ts_x, "https://y": ts_y} + assert starts == {"https://x": (ts_x, 2), "https://y": (ts_y, 3)} + + +def test_resolve_url_starts_update_resume_seq_is_last_seq_plus_one(tmp_path): + """--update must resume at last_seq + 1 — no overlap, no gap, no backward pad.""" + conn = _open_db(tmp_path) + ts = dt.datetime(2026, 5, 1, tzinfo=dt.UTC) + upsert_state(conn, source_url="https://planet", last_seq=12345, last_ts=ts, updated_at=ts) + cfg = RunConfig(urls=["https://planet"], update=True) + starts = _resolve_url_starts(conn, cfg) + _ts, resume_seq = starts["https://planet"] + assert resume_seq == 12346 def test_resolve_url_starts_update_missing_state_raises_per_url(tmp_path): diff --git a/tests/test_psql_export.py b/tests/test_psql_export.py index 48b07fc..d65d36d 100644 --- a/tests/test_psql_export.py +++ b/tests/test_psql_export.py @@ -337,3 +337,85 @@ def test_to_psql_upgrades_empty_changeset_when_pushed_again(fresh_db, tmp_path): assert hashtags == ["#x"] assert has_geom is True assert n_stats == 1 + + +@pytest.mark.network +@pytest.mark.skipif(not os.environ.get("OSMSG_PG_DSN"), reason="OSMSG_PG_DSN not set; live PG push not exercised") +def test_to_psql_refuses_when_pg_has_data_from_a_different_source(fresh_db, populated_db_factory): + """Pushing source B to a PG that already has source A's state must hard-error.""" + import datetime as _dt + + from osmsg.exceptions import OsmsgError + + dsn = os.environ["OSMSG_PG_DSN"] + safe_dsn = dsn.replace("'", "''") + + populated = populated_db_factory(fresh_db) + populated.execute("INSTALL postgres") + populated.execute("LOAD postgres") + populated.execute(f"ATTACH '{safe_dsn}' AS pg_w (TYPE postgres)") + try: + for stmt in PG_SCHEMA.strip().split(";"): + stmt = stmt.strip() + if stmt: + populated.execute(f"CALL postgres_execute('pg_w', $${stmt}$$)") + for table in ("changeset_stats", "changesets", "users", "state"): + populated.execute(f"CALL postgres_execute('pg_w', $$DELETE FROM {table}$$)") + finally: + populated.execute("DETACH pg_w") + + populated.execute( + "INSERT INTO state VALUES (?, ?, ?, ?)", + [ + "https://download.geofabrik.de/asia/nepal-updates", + 100, + _dt.datetime(2026, 5, 1, tzinfo=_dt.UTC), + _dt.datetime(2026, 5, 1, tzinfo=_dt.UTC), + ], + ) + to_psql(populated, dsn) + + populated.execute("DELETE FROM state") + populated.execute( + "INSERT INTO state VALUES (?, ?, ?, ?)", + [ + "https://planet.openstreetmap.org/replication/minute", + 7000000, + _dt.datetime(2026, 5, 7, tzinfo=_dt.UTC), + _dt.datetime(2026, 5, 7, tzinfo=_dt.UTC), + ], + ) + + with pytest.raises(OsmsgError, match="Mixing sources"): + to_psql(populated, dsn) + + +@pytest.mark.network +@pytest.mark.skipif(not os.environ.get("OSMSG_PG_DSN"), reason="OSMSG_PG_DSN not set; live PG push not exercised") +def test_to_psql_allows_repush_from_same_source(fresh_db, populated_db_factory): + """A second push from the SAME source URL is fine — common --update path.""" + import datetime as _dt + + dsn = os.environ["OSMSG_PG_DSN"] + safe_dsn = dsn.replace("'", "''") + + populated = populated_db_factory(fresh_db) + populated.execute("INSTALL postgres") + populated.execute("LOAD postgres") + populated.execute(f"ATTACH '{safe_dsn}' AS pg_w (TYPE postgres)") + try: + for stmt in PG_SCHEMA.strip().split(";"): + stmt = stmt.strip() + if stmt: + populated.execute(f"CALL postgres_execute('pg_w', $${stmt}$$)") + for table in ("changeset_stats", "changesets", "users", "state"): + populated.execute(f"CALL postgres_execute('pg_w', $$DELETE FROM {table}$$)") + finally: + populated.execute("DETACH pg_w") + + populated.execute( + "INSERT INTO state VALUES ('https://planet.openstreetmap.org/replication/minute', 1, ?, ?)", + [_dt.datetime(2026, 5, 1, tzinfo=_dt.UTC), _dt.datetime(2026, 5, 1, tzinfo=_dt.UTC)], + ) + to_psql(populated, dsn) + to_psql(populated, dsn) diff --git a/tests/test_replication.py b/tests/test_replication.py index 876bbd0..66d9bbb 100644 --- a/tests/test_replication.py +++ b/tests/test_replication.py @@ -61,3 +61,86 @@ def test_download_urls_caps_end_at_cur_seq(repl): _, _, end_seq = r.download_urls(start, end) assert end_seq <= cur_seq + + +@pytest.fixture +def changefile_repl(monkeypatch): + """Offline ReplicationServer stub; 1 sequence == 1 minute.""" + from osmsg import replication as _repl_mod + + cur_seq = 5_000_000 + cur_ts = dt.datetime(2026, 5, 7, 22, 0, tzinfo=dt.UTC) + + def fake_seq_to_ts(_state_url): + # The state_url encodes the seq; here we just bind to cur_ts/cur_seq via a closure + # over the call sequence. Simpler: read the seq from the URL pattern. + import re + + m = re.search(r"(\d{3})/(\d{3})/(\d{3})", _state_url) + if not m: + return cur_ts + seq = int(m.group(1)) * 1_000_000 + int(m.group(2)) * 1_000 + int(m.group(3)) + return cur_ts + dt.timedelta(minutes=(seq - cur_seq)) + + monkeypatch.setattr(_repl_mod, "seq_to_timestamp", fake_seq_to_ts) + + class FakeReplicationServer: + def __init__(self, _base_url): + pass + + def timestamp_to_sequence(self, ts): + # floor division to match osmium's "seq whose state timestamp <= ts" semantics + return cur_seq + int((ts - cur_ts).total_seconds() // 60) + + def get_state_url(self, seq): + a, b, c = seq // 1_000_000, (seq // 1_000) % 1_000, seq % 1_000 + return f"https://planet.openstreetmap.org/replication/minute/{a:03d}/{b:03d}/{c:03d}.state.txt" + + def get_diff_url(self, seq): + return self.get_state_url(seq).replace(".state.txt", ".osc.gz") + + def get_state_info(self): + return cur_seq, cur_ts + + monkeypatch.setattr(_repl_mod, "ReplicationServer", FakeReplicationServer) + return cur_seq, cur_ts + + +def test_changefile_download_urls_resume_seq_skips_backward_pad(changefile_repl): + """resume_seq must be used exactly — no 60-minute backward pad.""" + from osmsg.replication import changefile_download_urls + + cur_seq, cur_ts = changefile_repl + last_seq = cur_seq - 30 # tick processed up to 30 minutes ago + end = cur_ts + + urls, _server_ts, start_seq, end_seq, _, _ = changefile_download_urls( + start_date=cur_ts - dt.timedelta(minutes=30), + end_date=end, + base_url="https://planet.openstreetmap.org/replication/minute", + resume_seq=last_seq + 1, + ) + + assert start_seq == last_seq + 1 + # Without resume_seq, the backward pad would have produced ~60 extra URLs. + # With resume_seq, we get only the genuinely new diffs from last_seq+1 onward. + assert len(urls) <= 60 # very loose upper bound; the point is "no 60 backpad" + + +def test_changefile_download_urls_no_resume_seq_pads_backward(changefile_repl): + """First-run path (no --update) keeps the 60-minute backward pad on minute replication.""" + from osmsg.replication import changefile_download_urls + + cur_seq, cur_ts = changefile_repl + + # 30 seconds offset from a seq boundary so timestamp_to_sequence rounds down + # and the backward-pad branch actually runs (it requires start_date > seq_ts). + _, _, start_seq, _, _, _ = changefile_download_urls( + start_date=cur_ts - dt.timedelta(minutes=10) + dt.timedelta(seconds=30), + end_date=cur_ts, + base_url="https://planet.openstreetmap.org/replication/minute", + ) + expected_unpadded = cur_seq - 10 + assert start_seq <= expected_unpadded - 50, ( + f"expected backward pad of ~60, got start_seq={start_seq} (unpadded would be {expected_unpadded})" + ) From c4838da17c78dd3c868dd498288d04e4f8929931 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Fri, 8 May 2026 10:50:20 +0200 Subject: [PATCH 36/49] fix(padding): fix changeset pad also verifies stats are filtered in changeset level on pipeline --- README.md | 23 +++++++ osmsg/cli.py | 56 +++++++++++++--- osmsg/pipeline.py | 103 +++++++++++++++++++++++++---- osmsg/replication.py | 44 +++++++----- tests/test_cli.py | 8 ++- tests/test_pipeline_smoke.py | 114 ++++++++++++++++++++++++++------ tests/test_replication.py | 82 +++++++++++++++-------- tests/test_stats_correctness.py | 96 +++++++++++++++++++++++++-- 8 files changed, 435 insertions(+), 91 deletions(-) diff --git a/README.md b/README.md index 155ec73..1deb9af 100644 --- a/README.md +++ b/README.md @@ -137,6 +137,29 @@ Any flag works as a YAML key. See [docs/Manual.md](./docs/Manual.md) for the ful Every run writes `stats.duckdb` (or `<--name>.duckdb`) plus the formats you ask for via `-f parquet|csv|json|markdown|psql`. Parquet is the default. Open it with duckdb, polars, pandas, anything. +## Configuration + +Every meaningful flag has a matching `OSMSG_*` env var so the CLI, a `.env` file, and a +docker-compose `environment:` block all reach the same setting. CLI flag wins over env var. + +| CLI flag | Env var | Default | Notes | +| --- | --- | --- | --- | +| `--name` | `OSMSG_NAME` | `stats` | Output basename; sets `.duckdb`. | +| `--country` | `OSMSG_COUNTRY` | unset | Geofabrik region id(s). Comma-separated when set via env. | +| `--boundary` | `OSMSG_BOUNDARY` | unset | GeoJSON path or inline GeoJSON. | +| `--url` | `OSMSG_URL` | `minute` | `minute`/`hour`/`day` shortcut or full URL. Comma-separated when set via env. | +| `--workers` | `OSMSG_WORKERS` | cpu count | Parallel workers. | +| `--cache-dir` | `OSMSG_CACHE_DIR` | platform cache | Where downloaded OSM files are kept across runs. | +| `--output-dir` | `OSMSG_OUTPUT_DIR` | `.` | Where `.duckdb` and exports are written. | +| `--format` / `-f` | `OSMSG_FORMAT` | `parquet` | Repeat for multiple. Comma-separated when set via env. | +| `--psql-dsn` | `OSMSG_PSQL_DSN` | unset | libpq DSN for `-f psql`. | +| `--changeset-pad-hours` | `OSMSG_CHANGESET_PAD_HOURS` | `1` | See below. | +| (auto-bootstrap on `--update`) | `OSMSG_BOOTSTRAP` | `hour` | `hour`, `day`, or `week`. Used when `--update` runs against an empty DB. | +| (auto-bootstrap on `--update`) | `OSMSG_BOOTSTRAP_DAYS` | unset | Integer N; overrides `OSMSG_BOOTSTRAP`. | +| OSM credentials (Geofabrik) | `OSM_USERNAME`, `OSM_PASSWORD` | unset | Required only when a Geofabrik URL is in use. | + +A `.env` file at the working directory is loaded automatically. + ## Documentation - [Installation](./docs/Installation.md) diff --git a/osmsg/cli.py b/osmsg/cli.py index 70044dc..9d4c087 100644 --- a/osmsg/cli.py +++ b/osmsg/cli.py @@ -111,18 +111,29 @@ def main( bool | None, typer.Option("--version", callback=_version_callback, is_eager=True, help="Print version and exit."), ] = None, - name: Annotated[str, typer.Option(help="Output basename. Writes .duckdb + selected formats.")] = "stats", + name: Annotated[ + str, + typer.Option(envvar="OSMSG_NAME", help="Output basename. Writes .duckdb + selected formats."), + ] = "stats", start: Annotated[str | None, typer.Option(help="ISO start (UTC). 'YYYY-MM-DD HH:MM:SS'.")] = None, end: Annotated[str | None, typer.Option(help="ISO end (UTC). Defaults to now.")] = None, last: Annotated[Period | None, typer.Option(help="Convenience: hour|day|week|month|year.")] = None, days: Annotated[int | None, typer.Option(help="Last N days (mutually exclusive with --last).")] = None, country: Annotated[ list[str] | None, - typer.Option("--country", help="Geofabrik region id(s); resolved live. Requires OSM credentials."), + typer.Option( + "--country", + envvar="OSMSG_COUNTRY", + help="Geofabrik region id(s); resolved live. Requires OSM credentials. Comma-separated when set via env.", + ), ] = None, url: Annotated[ list[str] | None, - typer.Option("--url", help="Replication URL(s). Shortcuts: minute, hour, day."), + typer.Option( + "--url", + envvar="OSMSG_URL", + help="Replication URL(s). Shortcuts: minute, hour, day. Comma-separated when set via env.", + ), ] = None, hashtags: Annotated[ list[str] | None, @@ -134,7 +145,10 @@ def main( list[str] | None, typer.Option("--users", help="Filter to OSM usernames (case-sensitive, exact match). Repeat for more."), ] = None, - workers: Annotated[int | None, typer.Option(help="Parallel workers (default: cpu count).")] = None, + workers: Annotated[ + int | None, + typer.Option(envvar="OSMSG_WORKERS", help="Parallel workers (default: cpu count)."), + ] = None, rows: Annotated[ int | None, typer.Option(help="Cap rows shown in the console table. Files always carry the full set."), @@ -142,10 +156,19 @@ def main( boundary: Annotated[ str | None, typer.Option( - help="Boundary filter: Geofabrik region name (e.g. 'nepal'), GeoJSON file path, or inline GeoJSON." + envvar="OSMSG_BOUNDARY", + help="Boundary filter: Geofabrik region name (e.g. 'nepal'), GeoJSON file path, or inline GeoJSON.", + ), + ] = None, + formats: Annotated[ + list[Format] | None, + typer.Option( + "--format", + "-f", + envvar="OSMSG_FORMAT", + help="One or more output formats. Comma-separated when set via env.", ), ] = None, - formats: Annotated[list[Format] | None, typer.Option("--format", "-f", help="One or more output formats.")] = None, summary: Annotated[bool, typer.Option(help="Also write _summary.parquet + summary.md.")] = False, changeset: Annotated[bool, typer.Option(hidden=True)] = False, all_stats: Annotated[ @@ -162,12 +185,14 @@ def main( tm_stats: Annotated[bool, typer.Option("--tm-stats", help="Attach Tasking Manager totals.")] = False, update: Annotated[bool, typer.Option(help="Append to existing .duckdb.")] = False, cache_dir: Annotated[ - Path, typer.Option("--cache-dir", help="Cache dir for downloaded OSM files.") + Path, + typer.Option("--cache-dir", envvar="OSMSG_CACHE_DIR", help="Cache dir for downloaded OSM files."), ] = DEFAULT_CACHE_DIR, output_dir: Annotated[ Path, typer.Option( "--output-dir", + envvar="OSMSG_OUTPUT_DIR", help="Where to write .duckdb + selected formats. Defaults to current directory.", ), ] = Path("."), @@ -186,7 +211,21 @@ def main( help="Read OSM password from stdin (one line). Else $OSM_PASSWORD, then prompt.", ), ] = False, - psql_dsn: Annotated[str | None, typer.Option("--psql-dsn", help="libpq DSN for --format psql.")] = None, + psql_dsn: Annotated[ + str | None, + typer.Option("--psql-dsn", envvar="OSMSG_PSQL_DSN", help="libpq DSN for --format psql."), + ] = None, + changeset_pad_hours: Annotated[ + int, + typer.Option( + "--changeset-pad-hours", + envvar="OSMSG_CHANGESET_PAD_HOURS", + help="Backward pad (hours) on first runs of changeset replication. " + "Set to 24 to capture long-running open changesets. --update runs skip the pad.", + min=0, + max=48, + ), + ] = 1, ) -> None: """Run osmsg.""" if formats is None: @@ -225,6 +264,7 @@ def main( osm_username=username, osm_password=_read_password_stdin() if password_stdin else None, psql_dsn=psql_dsn, + changeset_pad_hours=changeset_pad_hours, ) if last is not None: diff --git a/osmsg/pipeline.py b/osmsg/pipeline.py index 681920e..636805b 100644 --- a/osmsg/pipeline.py +++ b/osmsg/pipeline.py @@ -25,7 +25,13 @@ from .export import summary_markdown, to_csv, to_json, to_parquet, to_psql from .fetch import download_osm_file from .geofabrik import country_geometry, country_update_url -from .replication import SHORTCUTS, ChangesetReplication, changefile_download_urls, resolve_url +from .replication import ( + CHANGESETS_REPLICATION, + SHORTCUTS, + ChangesetReplication, + changefile_download_urls, + resolve_url, +) from .ui import info, progress_bar, warn UTC = dt.UTC @@ -70,6 +76,7 @@ class RunConfig: osm_username: str | None = None osm_password: str | None = None psql_dsn: str | None = None + changeset_pad_hours: int = ChangesetReplication.DEFAULT_PAD_HOURS def _resolve_country_urls(countries: list[str]) -> list[str]: @@ -119,20 +126,66 @@ def _canonical_hashtags(hashtags: list[str]) -> list[str]: return ["#" + h.lstrip("#") for h in hashtags] +def _needs_changefile_changeset_filter(cfg: RunConfig) -> bool: + # When any metadata-side filter is on, ChangefileHandler must drop edits whose + # changeset_id isn't in the allowlist; otherwise stub rows for global changesets + # pollute the changesets table. + return bool(cfg.hashtags or cfg.boundary or cfg.countries) + + +def _resolve_valid_changesets(conn, cfg: RunConfig) -> set[int] | None: + # None means "no allowlist, keep everything"; a set means "drop edits to changesets + # not in this set". The set is whatever ChangesetHandler already filtered into the + # changesets table earlier in the run. + if not _needs_changefile_changeset_filter(cfg): + return None + return set(list_changesets(conn)) + + +_BOOTSTRAP_PRESETS = { + "hour": dt.timedelta(hours=1), + "day": dt.timedelta(days=1), + "week": dt.timedelta(days=7), +} + + +def _bootstrap_window_start(now: dt.datetime | None = None) -> dt.datetime: + """Resolve the auto-bootstrap start_date for a fresh --update. + + OSMSG_BOOTSTRAP_DAYS=N wins over OSMSG_BOOTSTRAP=hour|day|week. Defaults to one hour, + matching the worker tick in osmsg/_tick.py. + """ + now = now or dt.datetime.now(UTC) + days_env = os.environ.get("OSMSG_BOOTSTRAP_DAYS") + if days_env: + return now - dt.timedelta(days=int(days_env)) + preset = os.environ.get("OSMSG_BOOTSTRAP", "hour") + return now - _BOOTSTRAP_PRESETS.get(preset, _BOOTSTRAP_PRESETS["hour"]) + + def _resolve_url_starts(conn, cfg: RunConfig) -> dict[str, tuple[dt.datetime, int | None]]: """Per-URL (start_ts, resume_seq); resume_seq is set only on --update.""" if cfg.update: if not cfg.urls: raise OsmsgError("--update requires at least one source URL.") + + all_known = [r[0] for r in conn.execute("SELECT source_url FROM state").fetchall()] + known_user_sources = [u for u in all_known if u != CHANGESETS_REPLICATION] + per_url_state = {url: get_state(conn, url) for url in cfg.urls} + if not known_user_sources and all(s is None for s in per_url_state.values()): + bootstrap_start = _bootstrap_window_start() + info( + f"--update: no prior state, bootstrapping from {bootstrap_start.isoformat()} " + "(set OSMSG_BOOTSTRAP=hour|day|week or OSMSG_BOOTSTRAP_DAYS=N to change)." + ) + return {url: (bootstrap_start, None) for url in cfg.urls} starts: dict[str, tuple[dt.datetime, int | None]] = {} - for url in cfg.urls: - last = get_state(conn, url) + for url, last in per_url_state.items(): if not last: - known = [r[0] for r in conn.execute("SELECT source_url FROM state").fetchall()] hint = ( - f" Existing state in this DuckDB is for: {', '.join(known)}. " + f" Existing state in this DuckDB is for: {', '.join(known_user_sources)}. " "Re-run --update with one of those URLs, or start fresh under a different --name." - if known + if known_user_sources else " Run osmsg once without --update to seed state." ) raise OsmsgError( @@ -277,7 +330,10 @@ def run(cfg: RunConfig) -> dict[str, Any]: if cfg.update: # Changeset-replication reads one planet-wide source; widest window covers every URL. cfg.start_date = min(ts for ts, _seq in url_starts.values()) - info(f"--update: resuming each source from its own state row (earliest: {cfg.start_date.isoformat()})") + info( + "--update: resuming each source from its own state row " + f"(earliest: {cfg.start_date.astimezone(UTC).isoformat()})" + ) # _resolve_url_starts guarantees start_date is set (or raised); narrow for ty. assert cfg.start_date is not None @@ -285,7 +341,7 @@ def run(cfg: RunConfig) -> dict[str, Any]: raise OsmsgError("start_date >= end_date — nothing to do.") span = cfg.end_date - cfg.start_date - info(f"Range: {cfg.start_date.isoformat()} → {cfg.end_date.isoformat()} ({span})") + info(f"Range: {cfg.start_date.astimezone(UTC).isoformat()} → {cfg.end_date.astimezone(UTC).isoformat()} ({span})") span_hours = span.total_seconds() / 3600 # When auto-switch was suppressed (--url explicit, --update, --country, multi-URL), a long # span on minute replication still floods the network. Hint the user. @@ -317,9 +373,16 @@ def run(cfg: RunConfig) -> dict[str, Any]: end_seq: int | None = None if cfg.hashtags or cfg.changeset: - cs_repl = ChangesetReplication() - urls, cs_start, cs_end = cs_repl.download_urls(cfg.start_date, cfg.end_date) - info(f"Changesets: {len(urls)} files (seq {cs_start}–{cs_end})") + cs_repl = ChangesetReplication(pad_hours=cfg.changeset_pad_hours) + cs_state = get_state(conn, CHANGESETS_REPLICATION) if cfg.update else None + cs_resume_seq = (cs_state["last_seq"] + 1) if cs_state else None + urls, cs_start, cs_end = cs_repl.download_urls(cfg.start_date, cfg.end_date, resume_seq=cs_resume_seq) + pad_note = ( + f"incremental from prior state seq {cs_state['last_seq']} (no backward pad)" + if cs_state + else f"first run with {cfg.changeset_pad_hours}h backward pad" + ) + info(f"Changesets: {len(urls)} files (seq {cs_start}-{cs_end}), {pad_note}.") if urls: cs_dir.mkdir(parents=True, exist_ok=True) @@ -341,10 +404,16 @@ def run(cfg: RunConfig) -> dict[str, Any]: description="Processing changesets", ) dbmod.merge_parquet_files(conn, cs_dir, cleanup=True) + upsert_state( + conn, + source_url=CHANGESETS_REPLICATION, + last_seq=cs_end, + last_ts=cfg.end_date.astimezone(UTC), + updated_at=dt.datetime.now(UTC), + ) info("Changeset processing complete.") - if cfg.hashtags or cfg.boundary: - valid_changesets = set(list_changesets(conn)) + valid_changesets = _resolve_valid_changesets(conn, cfg) end_date_utc = cfg.end_date.astimezone(UTC) @@ -362,6 +431,12 @@ def run(cfg: RunConfig) -> dict[str, Any]: url_start_date_utc = url_start.astimezone(UTC) url_end_date_utc = url_end_date.astimezone(UTC) + gap = server_ts - url_start_date_utc + info( + f" DB current to: {url_start_date_utc.isoformat()} | " + f"server head: {server_ts.isoformat()} | gap: {gap} | files: {len(urls)}" + ) + if not urls: info(f" {url}: already up-to-date") continue @@ -401,6 +476,8 @@ def run(cfg: RunConfig) -> dict[str, Any]: last_ts=url_end_date, updated_at=dt.datetime.now(UTC), ) + lag = server_ts - url_end_date_utc + info(f" DB now current to: {url_end_date_utc.isoformat()} | lag from server: {lag}") info(f"Changefile processing complete: {url}") if cfg.delete_temp: diff --git a/osmsg/replication.py b/osmsg/replication.py index 063de0f..82ea48c 100644 --- a/osmsg/replication.py +++ b/osmsg/replication.py @@ -103,8 +103,13 @@ def changefile_download_urls( class ChangesetReplication: """Planet changeset replication URL helper.""" - def __init__(self, base_url: str = CHANGESETS_REPLICATION) -> None: + # OSM caps changeset open time at 24h, so 24 is the maximum useful pad. Default 1h + # keeps first-run bootstraps cheap; see README "Configuration" for when to raise it. + DEFAULT_PAD_HOURS = 1 + + def __init__(self, base_url: str = CHANGESETS_REPLICATION, *, pad_hours: int = DEFAULT_PAD_HOURS) -> None: self.base = base_url + self.pad_min = pad_hours * 60 def _state(self) -> tuple[int, datetime]: txt = session.get(self.base + "state.yaml").text @@ -131,21 +136,30 @@ def sequence_to_timestamp(self, seq: int) -> datetime: txt = session.get(self.state_url(seq)).text return datetime.strptime(txt.split("last_run: ")[1][:19], "%Y-%m-%d %H:%M:%S").replace(tzinfo=UTC) - # OSM caps changeset open time at 24h, so a still-open changeset created up to - # 24h before start_date can still have its first edits land in our window. - # Smaller pads silently lose those long-runners' metadata (and hence their edits, - # which get filtered out by valid_changesets). - BACKWARD_PAD_MIN = 24 * 60 - - def download_urls(self, start_date: datetime, end_date: datetime | None = None) -> tuple[list[str], int, int]: - start_seq = self.timestamp_to_sequence(start_date) - start_ts = self.sequence_to_timestamp(start_seq) - if start_ts > start_date: - start_seq -= int((start_ts - start_date).total_seconds() / 60) + def download_urls( + self, + start_date: datetime, + end_date: datetime | None = None, + *, + resume_seq: int | None = None, + ) -> tuple[list[str], int, int]: + """Resolve [start_seq, end_seq] for the requested window. + + When ``resume_seq`` is provided (the --update fast path), we trust prior state: + every changeset whose minute-diff sequence is < resume_seq has already been + captured in the changesets table, so we skip the backward pad entirely. + """ + if resume_seq is not None: + start_seq = resume_seq + else: + start_seq = self.timestamp_to_sequence(start_date) start_ts = self.sequence_to_timestamp(start_seq) - if start_date > start_ts and (start_date - start_ts).seconds != 15 * 60: - start_seq += int((start_date - start_ts).total_seconds() / 60) - start_seq -= self.BACKWARD_PAD_MIN + if start_ts > start_date: + start_seq -= int((start_ts - start_date).total_seconds() / 60) + start_ts = self.sequence_to_timestamp(start_seq) + if start_date > start_ts and (start_date - start_ts).seconds != 15 * 60: + start_seq += int((start_date - start_ts).total_seconds() / 60) + start_seq -= self.pad_min cur_seq, last_run = self._state() if end_date is None or end_date > last_run: diff --git a/tests/test_cli.py b/tests/test_cli.py index f6ec708..d270ea6 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -6,6 +6,7 @@ from __future__ import annotations +import re from pathlib import Path import click @@ -46,8 +47,13 @@ def test_time_range_flags_are_mutually_exclusive(args): def test_changeset_flag_is_hidden_in_help(): + """The bare --changeset toggle is internal (set automatically when needed). Sibling + flags that legitimately start with --changeset- (e.g. --changeset-pad-hours) are + user-facing and may appear; only the bare toggle must stay hidden.""" result = runner.invoke(app, ["--help"]) - assert "--changeset" not in click.unstyle(result.stdout) + plain = click.unstyle(result.stdout) + # Match the bare flag with a trailing space or end-of-arg, not its --changeset-* siblings. + assert not re.search(r"--changeset(\s|,|$)", plain), "bare --changeset toggle leaked into --help" def test_password_flag_no_longer_accepted(): diff --git a/tests/test_pipeline_smoke.py b/tests/test_pipeline_smoke.py index f9b9523..208f462 100644 --- a/tests/test_pipeline_smoke.py +++ b/tests/test_pipeline_smoke.py @@ -13,12 +13,16 @@ from osmsg.pipeline import ( RunConfig, _auto_switch_replication, + _bootstrap_window_start, _canonical_hashtags, + _needs_changefile_changeset_filter, _normalize_urls, _pick_replication_for_span, _resolve_url_starts, ) -from osmsg.replication import SHORTCUTS +from osmsg.replication import CHANGESETS_REPLICATION, SHORTCUTS, ChangesetReplication + +PLANET_MINUTE = "https://planet.openstreetmap.org/replication/minute" def test_normalize_urls_expands_minute_shortcut(): @@ -121,38 +125,104 @@ def test_resolve_url_starts_update_resume_seq_is_last_seq_plus_one(tmp_path): def test_resolve_url_starts_update_missing_state_raises_per_url(tmp_path): + """Partial state must raise: auto-bootstrap is for empty DBs only, never for a mix.""" conn = _open_db(tmp_path) - upsert_state( - conn, - source_url="https://x", - last_seq=1, - last_ts=dt.datetime(2026, 4, 25, tzinfo=dt.UTC), - updated_at=dt.datetime(2026, 4, 25, tzinfo=dt.UTC), - ) + ts = dt.datetime(2026, 4, 25, tzinfo=dt.UTC) + upsert_state(conn, source_url="https://x", last_seq=1, last_ts=ts, updated_at=ts) cfg = RunConfig(urls=["https://x", "https://y"], update=True) with pytest.raises(OsmsgError, match="no prior state for https://y"): _resolve_url_starts(conn, cfg) -def test_resolve_url_starts_update_error_lists_known_urls_and_invariant(tmp_path): - """The error must surface (a) which URLs are seeded and (b) the seq_id double-count rationale — - so the user knows their two recovery options without spelunking the source.""" +def test_resolve_url_starts_update_different_url_raises_with_recovery_hint(tmp_path): + """State for a different URL must raise: switching granularity would double-count + via (seq_id, changeset_id). The error message names the known URL so the user can recover.""" conn = _open_db(tmp_path) - upsert_state( - conn, - source_url="https://planet.openstreetmap.org/replication/minute", - last_seq=1, - last_ts=dt.datetime(2026, 4, 25, tzinfo=dt.UTC), - updated_at=dt.datetime(2026, 4, 25, tzinfo=dt.UTC), - ) + ts = dt.datetime(2026, 4, 25, tzinfo=dt.UTC) + upsert_state(conn, source_url=PLANET_MINUTE, last_seq=1, last_ts=ts, updated_at=ts) cfg = RunConfig(urls=["https://planet.openstreetmap.org/replication/day"], update=True) with pytest.raises(OsmsgError) as exc: _resolve_url_starts(conn, cfg) msg = str(exc.value) - assert "Existing state in this DuckDB is for" in msg - assert "minute" in msg # known URL surfaced - assert "different --name" in msg # recovery hint - assert "seq_id" in msg # invariant referenced + for fragment in ("Existing state in this DuckDB is for", "minute", "different --name", "seq_id"): + assert fragment in msg + + +def test_resolve_url_starts_update_empty_db_bootstraps_one_hour(tmp_path, capsys): + """Fresh DB plus --update auto-seeds start = now - 1h instead of erroring.""" + conn = _open_db(tmp_path) + cfg = RunConfig(urls=[PLANET_MINUTE], update=True) + before = dt.datetime.now(dt.UTC) + starts = _resolve_url_starts(conn, cfg) + after = dt.datetime.now(dt.UTC) + + ts, resume_seq = starts[PLANET_MINUTE] + assert resume_seq is None + assert before - dt.timedelta(hours=1, seconds=1) <= ts <= after - dt.timedelta(hours=1) + dt.timedelta(seconds=1) + assert "no prior state" in capsys.readouterr().out + + +def test_bootstrap_honors_osmsg_bootstrap_env(tmp_path, monkeypatch): + """OSMSG_BOOTSTRAP=day shifts the auto-bootstrap window from 1h to 24h.""" + monkeypatch.setenv("OSMSG_BOOTSTRAP", "day") + conn = _open_db(tmp_path) + starts = _resolve_url_starts(conn, RunConfig(urls=[PLANET_MINUTE], update=True)) + age = dt.datetime.now(dt.UTC) - starts[PLANET_MINUTE][0] + assert dt.timedelta(hours=23, minutes=59) < age < dt.timedelta(hours=24, minutes=1) + + +def test_bootstrap_honors_osmsg_bootstrap_days_env(tmp_path, monkeypatch): + """OSMSG_BOOTSTRAP_DAYS=N wins over the preset.""" + monkeypatch.setenv("OSMSG_BOOTSTRAP_DAYS", "3") + monkeypatch.setenv("OSMSG_BOOTSTRAP", "hour") + conn = _open_db(tmp_path) + starts = _resolve_url_starts(conn, RunConfig(urls=[PLANET_MINUTE], update=True)) + age = dt.datetime.now(dt.UTC) - starts[PLANET_MINUTE][0] + assert dt.timedelta(days=2, hours=23) < age < dt.timedelta(days=3, hours=1) + + +def test_bootstrap_ignores_changeset_replication_state_row(tmp_path): + """The changesets-replication state row is internal bookkeeping and must not block + the bootstrap path: we only refuse fresh starts when *user-facing* state exists.""" + conn = _open_db(tmp_path) + ts = dt.datetime(2026, 4, 25, tzinfo=dt.UTC) + upsert_state(conn, source_url=CHANGESETS_REPLICATION, last_seq=999_999, last_ts=ts, updated_at=ts) + starts = _resolve_url_starts(conn, RunConfig(urls=[PLANET_MINUTE], update=True)) + ts, resume_seq = starts[PLANET_MINUTE] + assert resume_seq is None + assert dt.datetime.now(dt.UTC) - ts < dt.timedelta(hours=2) + + +def test_bootstrap_window_start_default_is_one_hour(): + now = dt.datetime(2026, 5, 8, 12, 0, tzinfo=dt.UTC) + assert _bootstrap_window_start(now) == now - dt.timedelta(hours=1) + + +def test_runconfig_default_changeset_pad_is_one_hour(): + assert RunConfig().changeset_pad_hours == 1 + + +def test_runconfig_changeset_pad_hours_round_trips_to_replication(): + """The CLI/env value must reach ChangesetReplication unchanged.""" + cfg = RunConfig(changeset_pad_hours=24) + assert ChangesetReplication(pad_hours=cfg.changeset_pad_hours).pad_min == 24 * 60 + + +@pytest.mark.parametrize( + "kwargs,expected", + [ + ({"hashtags": ["#hotosm"]}, True), + ({"boundary": "/tmp/x.geojson"}, True), + ({"countries": ["nepal"]}, True), + ({"countries": ["nepal", "india"]}, True), + ({}, False), + ({"changeset": True}, False), + ], +) +def test_needs_changefile_changeset_filter(kwargs, expected): + """Predicate-level guard: hashtags OR boundary OR countries must trigger the metadata + allowlist. Omitting countries silently leaks global changefile stubs into a country DB.""" + assert _needs_changefile_changeset_filter(RunConfig(**kwargs)) is expected @pytest.mark.parametrize( diff --git a/tests/test_replication.py b/tests/test_replication.py index 66d9bbb..7b1ce09 100644 --- a/tests/test_replication.py +++ b/tests/test_replication.py @@ -1,10 +1,9 @@ -"""ChangesetReplication URL math — verifies the 24h backward pad invariant. +"""ChangesetReplication URL math: backward-pad behavior and the resume-seq fast path. -OSM caps changeset open time at 24 hours. A still-open changeset created near the -24h boundary before our window can still have its first edits land in the window; -without the 24h backward pad, its open=true metadata entry sits before our cached -range, valid_changesets misses it, and the changefile filter silently drops the -in-window edits. +The pad covers still-open changesets opened before window start whose first edits +land inside the window. OSM caps changeset open time at 24h, so 24h is the maximum +useful pad. Default is 1h to keep first bootstraps cheap; --update runs skip the +pad entirely once they have prior state. """ from __future__ import annotations @@ -16,12 +15,11 @@ from osmsg.replication import ChangesetReplication -@pytest.fixture -def repl(monkeypatch): +def _make_repl(monkeypatch, pad_hours: int | None = None): """Stub the network: 1 sequence == 1 minute, anchored at a fixed cur_seq/last_run.""" cur_seq = 1_000_000 last_run = dt.datetime(2026, 4, 27, 22, 0, tzinfo=dt.UTC) - r = ChangesetReplication() + r = ChangesetReplication() if pad_hours is None else ChangesetReplication(pad_hours=pad_hours) def fake_state(): return cur_seq, last_run @@ -34,35 +32,63 @@ def fake_seq_to_ts(seq): return r, cur_seq, last_run -def test_download_urls_pads_backward_24h(repl): - """The first downloaded sequence must be ≥ 24h before start_date so any - changeset created up to 24h before is reachable from cache.""" - r, cur_seq, last_run = repl - start = dt.datetime(2026, 4, 27, 21, 4, tzinfo=dt.UTC) - end = dt.datetime(2026, 4, 27, 21, 54, tzinfo=dt.UTC) +@pytest.fixture +def repl(monkeypatch): + return _make_repl(monkeypatch) + + +def _backward_pad(repl_tuple, start, end): + """Run download_urls and return how far back of `start` the first seq lands.""" + r, cur_seq, last_run = repl_tuple + _, start_seq, _ = r.download_urls(start, end) + return start - (last_run + dt.timedelta(minutes=(start_seq - cur_seq))) + + +def test_default_pad_is_one_hour(repl): + pad = _backward_pad( + repl, + dt.datetime(2026, 4, 27, 21, 4, tzinfo=dt.UTC), + dt.datetime(2026, 4, 27, 21, 54, tzinfo=dt.UTC), + ) + assert dt.timedelta(hours=1) <= pad < dt.timedelta(hours=2) - urls, start_seq, end_seq = r.download_urls(start, end) - start_seq_ts = last_run + dt.timedelta(minutes=(start_seq - cur_seq)) - backward = start - start_seq_ts - assert backward >= dt.timedelta(hours=24), ( - f"backward pad must be ≥ 24h to catch long-running changesets, got {backward}" +def test_pad_hours_24_extends_backward_to_full_24h(monkeypatch): + """Opt-in 24h pad for first runs that must capture every long-running open changeset.""" + repl_tuple = _make_repl(monkeypatch, pad_hours=24) + pad = _backward_pad( + repl_tuple, + dt.datetime(2026, 4, 27, 21, 4, tzinfo=dt.UTC), + dt.datetime(2026, 4, 27, 21, 54, tzinfo=dt.UTC), ) - # Forward end seq should land on or just past end_date. - end_seq_ts = last_run + dt.timedelta(minutes=(end_seq - cur_seq)) - assert end_seq_ts >= end + assert pad >= dt.timedelta(hours=24) def test_download_urls_caps_end_at_cur_seq(repl): - """Future end_date can't fetch beyond the server's current sequence.""" + """end_date past server head clamps to cur_seq instead of requesting non-existent files.""" r, cur_seq, _ = repl - start = dt.datetime(2026, 4, 27, 21, 0, tzinfo=dt.UTC) - end = dt.datetime(2099, 1, 1, tzinfo=dt.UTC) - - _, _, end_seq = r.download_urls(start, end) + _, _, end_seq = r.download_urls( + dt.datetime(2026, 4, 27, 21, 0, tzinfo=dt.UTC), + dt.datetime(2099, 1, 1, tzinfo=dt.UTC), + ) assert end_seq <= cur_seq +def test_resume_seq_skips_backward_pad(monkeypatch): + """--update fast path: prior state already covers history, so the pad is redundant + even when pad_hours=24 is configured.""" + r, cur_seq, _ = _make_repl(monkeypatch, pad_hours=24) + last_seq = cur_seq - 30 + urls, start_seq, end_seq = r.download_urls( + dt.datetime(2026, 4, 27, 21, 0, tzinfo=dt.UTC), + dt.datetime(2026, 4, 27, 21, 30, tzinfo=dt.UTC), + resume_seq=last_seq + 1, + ) + assert start_seq == last_seq + 1 + assert len(urls) == end_seq - start_seq + 1 + assert len(urls) < 60 + + @pytest.fixture def changefile_repl(monkeypatch): """Offline ReplicationServer stub; 1 sequence == 1 minute.""" diff --git a/tests/test_stats_correctness.py b/tests/test_stats_correctness.py index e5a8924..2177d15 100644 --- a/tests/test_stats_correctness.py +++ b/tests/test_stats_correctness.py @@ -10,23 +10,31 @@ from __future__ import annotations import duckdb +import pytest +from shapely.geometry import box from osmsg.db.ingest import flush_rows_to_parquet, merge_parquet_files from osmsg.db.queries import attach_metadata, list_changesets, user_stats from osmsg.db.schema import create_tables from osmsg.handlers import ChangefileHandler, ChangesetHandler +from osmsg.pipeline import RunConfig, _resolve_valid_changesets def _write_changeset_xml(tmp_path, name, changesets): + """Pass `bbox=(min_lon, min_lat, max_lon, max_lat)` to emit min_*/max_* attributes + that ChangesetHandler's geom filter can intersect-test against.""" parts = ['', ''] for cs in changesets: - parts.append( - f' ' + f'uid="{cs.get("uid", 10)}" comments_count="0"' ) + if "bbox" in cs: + min_lon, min_lat, max_lon, max_lat = cs["bbox"] + attrs += f' min_lon="{min_lon}" min_lat="{min_lat}" max_lon="{max_lon}" max_lat="{max_lat}"' + parts.append(f" ") for k, v in cs.get("tags", {}).items(): parts.append(f' ') parts.append(" ") @@ -572,3 +580,83 @@ def test_hashtag_filter_keeps_changeset_with_no_in_window_edits(tmp_path, change assert user_stats(db) == [] cs_count = db.execute("SELECT COUNT(*) FROM changesets").fetchone()[0] assert cs_count == 1 + + +def test_country_filter_drops_non_country_edits_end_to_end(tmp_path, osc_factory, changefile_config): + """Full data-flow test for the --country boundary wiring.""" + cs_xml = _write_changeset_xml( + tmp_path, + "cs_geo.osm", + [ + {"id": 1, "user": "binod", "uid": 100, "bbox": (84.21, 27.60, 84.30, 27.65)}, + {"id": 2, "user": "sita", "uid": 200, "bbox": (85.30, 27.70, 85.35, 27.72)}, + {"id": 3, "user": "tanaka", "uid": 300, "bbox": (139.69, 35.68, 139.77, 35.71)}, + {"id": 4, "user": "olivia", "uid": 400, "bbox": (-0.13, 51.49, -0.12, 51.51)}, + ], + ) + cs_h = ChangesetHandler( + { + "hashtags": None, + "exact_lookup": False, + "changeset_meta": True, + "whitelisted_users": [], + "geom_filter_wkt": box(80.0, 26.0, 89.0, 31.0).wkt, + } + ) + cs_h.apply_file(str(cs_xml)) + assert set(cs_h.changesets.keys()) == {1, 2} + + db = duckdb.connect(str(tmp_path / "country.duckdb")) + create_tables(db) + _flush_changesets(cs_h, tmp_path / "cs_parq") + merge_parquet_files(db, tmp_path / "cs_parq", cleanup=False) + + valid = _resolve_valid_changesets(db, RunConfig(countries=["nepal"])) + assert valid == {1, 2} + + osc = osc_factory( + "global.osc", + [ + ( + "node", + {"id": 10, "version": 1, "uid": 100, "user": "binod", "changeset": 1, "tags": {"amenity": "cafe"}}, + ), + ("node", {"id": 20, "version": 1, "uid": 200, "user": "sita", "changeset": 2, "tags": {"shop": "bakery"}}), + ( + "node", + {"id": 30, "version": 1, "uid": 300, "user": "tanaka", "changeset": 3, "tags": {"amenity": "cafe"}}, + ), + ( + "node", + {"id": 40, "version": 1, "uid": 400, "user": "olivia", "changeset": 4, "tags": {"amenity": "pub"}}, + ), + ], + ) + cf_h = ChangefileHandler(changefile_config, sequence_id=1, valid_changesets=valid) + cf_h.apply_file(str(osc)) + assert set(cf_h.stubs.keys()) == {1, 2} + assert set(cf_h.users.keys()) == {100, 200} + + _flush(cf_h, tmp_path / "cf_parq", pid=2) + merge_parquet_files(db, tmp_path / "cf_parq", cleanup=False) + + assert db.execute("SELECT COUNT(*) FROM changesets WHERE created_at IS NULL").fetchone()[0] == 0 + assert {r["name"] for r in user_stats(db)} == {"binod", "sita"} + + +@pytest.mark.parametrize( + "cfg,expected_ids", + [ + (RunConfig(), None), + (RunConfig(hashtags=["#hotosm"]), {1, 2}), + (RunConfig(boundary="/tmp/x.geojson"), {1, 2}), + (RunConfig(countries=["nepal"]), {1, 2}), + (RunConfig(countries=["nepal"], boundary="/tmp/x.geojson"), {1, 2}), + ], +) +def test_resolve_valid_changesets_wiring(tmp_path, populated_db_factory, cfg, expected_ids): + """No filter -> None (keep everything); any filter -> the seeded changeset_ids.""" + db = duckdb.connect(str(tmp_path / "wiring.duckdb")) + create_tables(db) + populated_db_factory(db) + assert _resolve_valid_changesets(db, cfg) == expected_ids From 1da7f00cec4ab678e5029352e94a2683e6dd950f Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Fri, 8 May 2026 12:05:48 +0200 Subject: [PATCH 37/49] chore : add osmsg scme email --- infra/Caddyfile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/infra/Caddyfile b/infra/Caddyfile index a301726..7a030cf 100644 --- a/infra/Caddyfile +++ b/infra/Caddyfile @@ -1,3 +1,7 @@ +{ + email {$OSMSG_ACME_EMAIL} +} + {$OSMSG_DOMAIN:localhost} { reverse_proxy api:8000 } From 1c0b649a2619d79e816680a86e7e68ed80ce379d Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Fri, 8 May 2026 18:45:36 +0200 Subject: [PATCH 38/49] fix(health): fix health endpoint to include last_ts and updated_at --- api/app.py | 3 ++- api/schemas.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/api/app.py b/api/app.py index eab0f8a..3940056 100644 --- a/api/app.py +++ b/api/app.py @@ -41,7 +41,8 @@ async def health() -> HealthResponse: return HealthResponse( status="ok", last_seq=state["last_seq"] if state else None, - last_updated=state["last_ts"] if state else None, + last_ts=state["last_ts"] if state else None, + updated_at=state["updated_at"] if state else None, ) diff --git a/api/schemas.py b/api/schemas.py index 631b2ec..01402af 100644 --- a/api/schemas.py +++ b/api/schemas.py @@ -43,4 +43,5 @@ class UserStatsResponse(BaseModel): class HealthResponse(BaseModel): status: str last_seq: int | None - last_updated: datetime | None + last_ts: datetime | None + updated_at: datetime | None From a3f91c884e85f330bd6cf8a107e0423556af06e2 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Fri, 8 May 2026 18:53:18 +0200 Subject: [PATCH 39/49] fix(test): fix test cases on api --- tests/test_api.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_api.py b/tests/test_api.py index e82d95f..0d9e6a1 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -40,7 +40,8 @@ def test_health_endpoint_returns_ok(): data = response.json() assert data["status"] == "ok" assert data["last_seq"] is None - assert data["last_updated"] is None + assert data["last_ts"] is None + assert data["updated_at"] is None def test_normalize_hashtags_accepts_bare_or_prefixed_values(): From da79b341c6ac427a0d51a853daed1bdcd8902c4c Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Fri, 8 May 2026 21:02:49 +0200 Subject: [PATCH 40/49] =?UTF-8?q?bump:=20version=201.0.3=20=E2=86=92=201.1?= =?UTF-8?q?.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 30 ++++++++++++++++++++++++++++++ osmsg/__version__.py | 2 +- pyproject.toml | 2 +- uv.lock | 2 +- 4 files changed, 33 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ce01a1..31aff5e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,33 @@ +## v1.1.0 (2026-05-08) + +### Feat + +- **infra**: adds infra docker compose for hosting osmsg + +### Fix + +- **test**: fix test cases on api +- **health**: fix health endpoint to include last_ts and updated_at +- **padding**: fix changeset pad +- **stats**: fix stats inconsistency on null +- **url**: respect url when it is passed for country +- **changeset**: null bug on bbox when newer one appears +- **ci**: fixes spatial extension loading bug +- **validation**: pydantic arg validation and docs with swagger +- **test**: don't wait for fetch state to be there +- **url**: api url arg default start end +- **health**: patch health endpoint to include the last sequence and updated at +- **docker**: caddy +- **docker**: resource limit in docker compose +- **docker**: docker compose prod cluster +- **caddy**: adds caddy server and fix for the api rendering on 80 port +- **schema**: fixes shcmea being in multiple pieces , added test case to catch the change +- **pipeline**: Replace hardcoded "processing" label with stage-specific descriptions + +### Refactor + +- **alltags**: refactors all tags and schema + ## v1.0.3 (2026-04-28) ### Perf diff --git a/osmsg/__version__.py b/osmsg/__version__.py index 976498a..6849410 100644 --- a/osmsg/__version__.py +++ b/osmsg/__version__.py @@ -1 +1 @@ -__version__ = "1.0.3" +__version__ = "1.1.0" diff --git a/pyproject.toml b/pyproject.toml index b961a2e..4debc77 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "osmsg" -version = "1.0.3" +version = "1.1.0" description = "OpenStreetMap Stats Generator: Commandline" readme = "README.md" authors = [ diff --git a/uv.lock b/uv.lock index 56e9883..7b76d07 100644 --- a/uv.lock +++ b/uv.lock @@ -1006,7 +1006,7 @@ wheels = [ [[package]] name = "osmsg" -version = "1.0.3" +version = "1.1.0" source = { editable = "." } dependencies = [ { name = "duckdb" }, From 47c7add424536c59117617ad2556020da9c09682 Mon Sep 17 00:00:00 2001 From: gauravbarall Date: Sun, 10 May 2026 19:05:44 +0545 Subject: [PATCH 41/49] fix(osmsg): resolved markdown stats bug there was a bug where stats with markdown produced the summary instead due to summary_markdown function call instead of table_markdown --- osmsg/export/markdown.py | 9 +++++---- osmsg/pipeline.py | 13 ++----------- tests/test_export.py | 11 ++++++++--- 3 files changed, 15 insertions(+), 18 deletions(-) diff --git a/osmsg/export/markdown.py b/osmsg/export/markdown.py index cfac160..3604146 100644 --- a/osmsg/export/markdown.py +++ b/osmsg/export/markdown.py @@ -17,15 +17,16 @@ def _stringify(v: Any) -> str: return str(v) -def table_markdown(rows: list[dict[str, Any]], headers: list[str] | None = None) -> str: +def table_markdown(rows: list[dict[str, Any]], output_path: Path, headers: list[str] | None = None) -> Path: """Return a GitHub-flavored markdown table for the given rows.""" - if not rows: - return "" headers = headers or list(rows[0].keys()) lines = ["| " + " | ".join(headers) + " |", "| " + " | ".join("---" for _ in headers) + " |"] for r in rows: lines.append("| " + " | ".join(_stringify(r.get(h)) for h in headers) + " |") - return "\n".join(lines) + output_path = Path(output_path) + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text("\n".join(lines), encoding="utf-8") + return output_path def _human(n: int) -> str: diff --git a/osmsg/pipeline.py b/osmsg/pipeline.py index 636805b..50d2543 100644 --- a/osmsg/pipeline.py +++ b/osmsg/pipeline.py @@ -22,7 +22,7 @@ from .db.queries import attach_metadata, attach_tag_stats, daily_summary, list_changesets, user_stats from .db.schema import get_state, upsert_state from .exceptions import CredentialsRequiredError, NoDataFoundError, OsmsgError -from .export import summary_markdown, to_csv, to_json, to_parquet, to_psql +from .export import summary_markdown, table_markdown, to_csv, to_json, to_parquet, to_psql from .fetch import download_osm_file from .geofabrik import country_geometry, country_update_url from .replication import ( @@ -521,19 +521,10 @@ def run(cfg: RunConfig) -> dict[str, Any]: written["json"] = str(to_json(rows, out / f"{cfg.name}.json")) if "markdown" in cfg.formats: - from .export.markdown import summary_markdown as render_md - md_path = out / f"{cfg.name}.md" - render_md( + table_markdown( rows, output_path=md_path, - start_date=start_date_utc, - end_date=end_date_utc, - additional_tags=cfg.additional_tags, - length_tags=cfg.length_tags, - tag_mode=cfg.tag_mode, - fname=cfg.name, - tm_stats=cfg.tm_stats, ) written["markdown"] = str(md_path) diff --git a/tests/test_export.py b/tests/test_export.py index 7faf389..e4ed25e 100644 --- a/tests/test_export.py +++ b/tests/test_export.py @@ -61,9 +61,14 @@ def test_json_writes_native_types(tmp_path: Path): assert payload[0]["tags_create"] == {"building": 5} -def test_table_markdown_renders_header_and_rows(): - md = table_markdown(SAMPLE_ROWS, headers=["rank", "name", "map_changes"]) - lines = md.splitlines() +def test_table_markdown_writes_header_and_rows(tmp_path: Path): + output = table_markdown( + SAMPLE_ROWS, + output_path=tmp_path / "stats.md", + headers=["rank", "name", "map_changes"], + ) + body = output.read_text(encoding="utf-8") + lines = body.splitlines() assert lines[0] == "| rank | name | map_changes |" assert lines[1] == "| --- | --- | --- |" assert "alice" in lines[2] From 06c86e31f2a0925d430d951b46508952269467dd Mon Sep 17 00:00:00 2001 From: Niruta Neupane Date: Sun, 17 May 2026 23:37:36 +0545 Subject: [PATCH 42/49] Add hashtags to stats API response --- api/queries.py | 18 ++++++++++++++++-- api/schemas.py | 3 ++- tests/test_api.py | 8 ++++++++ 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/api/queries.py b/api/queries.py index ddddf98..f146e97 100644 --- a/api/queries.py +++ b/api/queries.py @@ -37,6 +37,18 @@ GROUP BY uid )""" +_HASHTAG_CTE = """, + user_hashtags AS ( + SELECT + st.uid, + ARRAY_AGG(DISTINCT ht.hashtag ORDER BY ht.hashtag) AS hashtags + FROM stats_scope st + JOIN changesets cs ON cs.changeset_id = st.changeset_id + CROSS JOIN LATERAL UNNEST(cs.hashtags) AS ht(hashtag) + WHERE cs.hashtags IS NOT NULL + GROUP BY st.uid + )""" + def _user_stats_sql(*, filter_dates: bool, filter_hashtags: bool, include_tags: bool) -> str: n = 1 @@ -76,7 +88,7 @@ def _user_stats_sql(*, filter_dates: bool, filter_hashtags: bool, include_tags: tag_group = ", tpu.tag_stats" if include_tags else "" return f""" - {scope_cte}{tag_ctes} + {scope_cte}{_HASHTAG_CTE}{tag_ctes} SELECT u.uid, u.username AS name, @@ -112,11 +124,13 @@ def _user_stats_sql(*, filter_dates: bool, filter_hashtags: bool, include_tags: ) DESC, u.uid ASC ) AS rank, + COALESCE(uh.hashtags, ARRAY[]::TEXT[]) AS hashtags, {tag_select} FROM users u JOIN stats_scope st ON u.uid = st.uid + LEFT JOIN user_hashtags uh ON uh.uid = u.uid {tag_join} - GROUP BY u.uid, u.username{tag_group} + GROUP BY u.uid, u.username, uh.hashtags{tag_group} ORDER BY map_changes DESC, u.uid ASC LIMIT {limit_param} OFFSET {offset_param} """ diff --git a/api/schemas.py b/api/schemas.py index 01402af..8ec604f 100644 --- a/api/schemas.py +++ b/api/schemas.py @@ -1,6 +1,6 @@ from datetime import datetime -from pydantic import BaseModel +from pydantic import BaseModel, Field class TagValueStats(BaseModel): @@ -26,6 +26,7 @@ class UserStat(BaseModel): poi_modify: int map_changes: int rank: int + hashtags: list[str] = Field(default_factory=list) tag_stats: dict[str, dict[str, TagValueStats]] | None = None diff --git a/tests/test_api.py b/tests/test_api.py index 0d9e6a1..893d660 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -87,6 +87,7 @@ async def fake_fetch_user_stats(*, start, end, hashtag, tags, limit, offset): "poi_modify": 1, "map_changes": 58, "rank": 1, + "hashtags": ["#mapathon", "#roads"], "tag_stats": {"building": {"yes": {"c": 3, "m": 0}}}, } ] @@ -130,6 +131,7 @@ async def fake_fetch_user_stats(*, start, end, hashtag, tags, limit, offset): "poi_modify": 1, "map_changes": 58, "rank": 1, + "hashtags": ["#mapathon", "#roads"], "tag_stats": {"building": {"yes": {"c": 3, "m": 0, "len": None}}}, } ], @@ -171,6 +173,7 @@ async def fake_fetch_user_stats(*, tags, **_kwargs): "poi_modify": 0, "map_changes": 0, "rank": 1, + "hashtags": [], "tag_stats": None, } ] @@ -192,6 +195,7 @@ def test_user_stats_sql_omits_tag_ctes_when_tags_false(): assert "tag_per_user" in sql_with assert "tag_per_user" not in sql_without assert "NULL::jsonb AS tag_stats" in sql_without + assert "user_hashtags" in sql_without def _seed_pg_via_to_psql(fresh_db, populated_db_factory, dsn): @@ -268,6 +272,8 @@ def test_live_api_stats_default_returns_dicts_not_strings(live_api_client): assert body["count"] == 2 by_name = {u["name"]: u for u in body["users"]} assert isinstance(by_name["alice"]["tag_stats"], dict) + assert by_name["alice"]["hashtags"] == ["#mapathon"] + assert by_name["bob"]["hashtags"] == [] assert by_name["alice"]["tag_stats"]["building"]["yes"]["c"] == 5 assert by_name["alice"]["tag_stats"]["building"]["yes"]["m"] == 1 assert by_name["alice"]["tag_stats"]["highway"]["residential"]["len"] == 245.7 @@ -318,6 +324,7 @@ def test_live_api_stats_hashtag_filters_to_matching_changesets(live_api_client): assert body["hashtag"] == ["#mapathon"] names = {u["name"] for u in body["users"]} assert names == {"alice"} + assert body["users"][0]["hashtags"] == ["#mapathon"] @pytest.mark.network @@ -448,6 +455,7 @@ def test_user_stats_sql_no_filter_skips_changesets_join(): assert "filtered_changesets" not in sql assert "JOIN filtered_changesets" not in sql assert "stats_scope AS (SELECT * FROM changeset_stats)" in sql + assert "LEFT JOIN user_hashtags" in sql def test_user_stats_sql_filtered_uses_changesets_join(): From 3759bf4c77efd164a3d8c08cf801e7d90df7ac2a Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Thu, 21 May 2026 08:55:57 +0200 Subject: [PATCH 43/49] fix(osmsg): update service configuration for docker compose --- infra/osmsg.service | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/infra/osmsg.service b/infra/osmsg.service index 4a1d68c..c6a8172 100644 --- a/infra/osmsg.service +++ b/infra/osmsg.service @@ -1,16 +1,17 @@ [Unit] Description=osmsg stats stack Requires=docker.service +BindsTo=docker.service After=docker.service network-online.target Wants=network-online.target [Service] -Type=simple -Restart=on-failure -RestartSec=10 +Type=oneshot +RemainAfterExit=yes WorkingDirectory=/opt/osmsg/infra EnvironmentFile=/opt/osmsg/infra/.env -ExecStart=/usr/bin/docker compose up +ExecStartPre=/usr/bin/docker compose pull +ExecStart=/usr/bin/docker compose up -d --remove-orphans ExecStop=/usr/bin/docker compose down TimeoutStartSec=300 TimeoutStopSec=60 From a51859ea6aecc409f9aaedeff1e11862bd467633 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Thu, 21 May 2026 09:29:29 +0200 Subject: [PATCH 44/49] fix(bug): replication timestamp fixes bug on last_ts date when changeset is upserted --- api/queries.py | 11 ++++++++++- osmsg/pipeline.py | 11 ++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/api/queries.py b/api/queries.py index f146e97..4175c28 100644 --- a/api/queries.py +++ b/api/queries.py @@ -137,8 +137,17 @@ def _user_stats_sql(*, filter_dates: bool, filter_hashtags: bool, include_tags: async def fetch_state() -> dict[str, Any] | None: + # last_ts/last_seq come from the worst-lagging source (slowest source bounds real freshness); + # updated_at is the most recent heartbeat across all sources (any tick proves the worker is alive). async with get_pool().acquire() as conn: - row = await conn.fetchrow("SELECT last_seq, last_ts, updated_at FROM state ORDER BY updated_at DESC LIMIT 1") + row = await conn.fetchrow( + """ + SELECT last_seq, last_ts, (SELECT MAX(updated_at) FROM state) AS updated_at + FROM state + ORDER BY last_ts ASC + LIMIT 1 + """ + ) if row is None: return None return dict(row) diff --git a/osmsg/pipeline.py b/osmsg/pipeline.py index 50d2543..fcb3787 100644 --- a/osmsg/pipeline.py +++ b/osmsg/pipeline.py @@ -408,7 +408,7 @@ def run(cfg: RunConfig) -> dict[str, Any]: conn, source_url=CHANGESETS_REPLICATION, last_seq=cs_end, - last_ts=cfg.end_date.astimezone(UTC), + last_ts=cs_repl.sequence_to_timestamp(cs_end), updated_at=dt.datetime.now(UTC), ) info("Changeset processing complete.") @@ -439,6 +439,15 @@ def run(cfg: RunConfig) -> dict[str, Any]: if not urls: info(f" {url}: already up-to-date") + if resume_seq is not None: + # Heartbeat: bump updated_at so /health can tell "alive, idle" apart from "stuck". + upsert_state( + conn, + source_url=url, + last_seq=resume_seq - 1, + last_ts=url_start, + updated_at=dt.datetime.now(UTC), + ) continue cf_dir.mkdir(parents=True, exist_ok=True) From d43462710233f90b0b459118263e4fe1117f0702 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Thu, 21 May 2026 09:37:15 +0200 Subject: [PATCH 45/49] chore(release): bump version to 1.1.1 and update changelog --- CHANGELOG.md | 8 ++++++++ docker-compose.yml | 2 ++ osmsg/__version__.py | 2 +- pyproject.toml | 2 +- uv.lock | 2 +- 5 files changed, 13 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 31aff5e..35c7d8a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +## v1.1.1 (2026-05-21) + +### Fix + +- **bug**: replication timestamp +- **osmsg**: update service configuration for docker compose +- **osmsg**: resolved markdown stats bug + ## v1.1.0 (2026-05-08) ### Feat diff --git a/docker-compose.yml b/docker-compose.yml index d70587d..f82f901 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -18,6 +18,7 @@ services: restart: unless-stopped api: + image: ghcr.io/osgeonepal/osmsg-api:latest build: context: . target: api @@ -34,6 +35,7 @@ services: restart: unless-stopped worker: + image: ghcr.io/osgeonepal/osmsg-api:latest build: context: . target: worker diff --git a/osmsg/__version__.py b/osmsg/__version__.py index 6849410..a82b376 100644 --- a/osmsg/__version__.py +++ b/osmsg/__version__.py @@ -1 +1 @@ -__version__ = "1.1.0" +__version__ = "1.1.1" diff --git a/pyproject.toml b/pyproject.toml index 4debc77..bf6b878 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "osmsg" -version = "1.1.0" +version = "1.1.1" description = "OpenStreetMap Stats Generator: Commandline" readme = "README.md" authors = [ diff --git a/uv.lock b/uv.lock index 7b76d07..63d482d 100644 --- a/uv.lock +++ b/uv.lock @@ -1006,7 +1006,7 @@ wheels = [ [[package]] name = "osmsg" -version = "1.1.0" +version = "1.1.1" source = { editable = "." } dependencies = [ { name = "duckdb" }, From 60b73c58b2ae26d477d300eba4b12e9cf1402792 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Thu, 21 May 2026 09:37:45 +0200 Subject: [PATCH 46/49] fix(worker): update worker service image to correct version --- docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index f82f901..d85975e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -35,7 +35,7 @@ services: restart: unless-stopped worker: - image: ghcr.io/osgeonepal/osmsg-api:latest + image: ghcr.io/osgeonepal/osmsg-worker:latest build: context: . target: worker From b8ef328974df58e740e62eafe799f9352aa56bbf Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Thu, 21 May 2026 10:16:22 +0200 Subject: [PATCH 47/49] fix(update): fix hte update bug on tick mirror the country url if update is passed --- osmsg/_tick.py | 7 +++- tests/test_tick.py | 100 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 2 deletions(-) create mode 100644 tests/test_tick.py diff --git a/osmsg/_tick.py b/osmsg/_tick.py index 9372a0e..d02426e 100644 --- a/osmsg/_tick.py +++ b/osmsg/_tick.py @@ -36,7 +36,8 @@ def main() -> int: name = _parse_arg(extra_args, "--name") or "stats" out = Path(_parse_arg(extra_args, "--output-dir") or "/var/lib/osmsg") country = _parse_arg(extra_args, "--country") - url = _parse_arg(extra_args, "--url") or "minute" + explicit_url = _parse_arg(extra_args, "--url") + url = explicit_url or "minute" out.mkdir(parents=True, exist_ok=True) @@ -48,7 +49,9 @@ def main() -> int: print("[osmsg-tick] previous tick still running, skipping", flush=True) return 0 - source_url = country_update_url(country) if country else resolve_url(url) + # Mirror pipeline._normalize_urls: explicit --url wins over --country's geofabrik default, + # otherwise --update can't find the state row and the DuckDB gets wiped every tick. + source_url = country_update_url(country) if country and explicit_url is None else resolve_url(url) db_path = out / f"{name}.duckdb" extra_set = set(extra_args) diff --git a/tests/test_tick.py b/tests/test_tick.py new file mode 100644 index 0000000..bd8d10a --- /dev/null +++ b/tests/test_tick.py @@ -0,0 +1,100 @@ +"""Worker tick: command assembly + state-row lookup precedence.""" + +from __future__ import annotations + +import datetime as dt +from pathlib import Path +from typing import Any + +import pytest + +from osmsg import _tick +from osmsg.db import connect, create_tables +from osmsg.db.schema import upsert_state +from osmsg.geofabrik import country_update_url +from osmsg.replication import SHORTCUTS + + +@pytest.fixture +def captured_cmd(monkeypatch): + captured: dict[str, Any] = {} + + def fake_call(cmd, *args, **kwargs): + captured["cmd"] = list(cmd) + return 0 + + monkeypatch.setattr(_tick.subprocess, "call", fake_call) + return captured + + +@pytest.fixture +def clean_env(monkeypatch): + for key in ("OSMSG_EXTRA_ARGS", "OSMSG_BOOTSTRAP", "OSMSG_BOOTSTRAP_DAYS"): + monkeypatch.delenv(key, raising=False) + + +def _seed_state(out_dir: Path, name: str, source_url: str) -> None: + conn = connect(str(out_dir / f"{name}.duckdb")) + try: + create_tables(conn) + ts = dt.datetime(2026, 5, 21, 7, 0, tzinfo=dt.UTC) + upsert_state(conn, source_url=source_url, last_seq=100, last_ts=ts, updated_at=ts) + finally: + conn.close() + + +def test_explicit_url_with_country_resolves_state_under_explicit_url(tmp_path, monkeypatch, captured_cmd, clean_env): + """--country + explicit --url: state row is keyed by the explicit URL (pipeline rule). + + Regression guard: previously _tick looked up state under the country's geofabrik URL, + never found it, and re-bootstrapped every tick (wiping the DuckDB each time). + """ + name = "nepal" + _seed_state(tmp_path, name, SHORTCUTS["minute"]) + + monkeypatch.setenv( + "OSMSG_EXTRA_ARGS", + f"--name {name} --output-dir {tmp_path} --country nepal --url minute", + ) + + assert _tick.main() == 0 + assert "--update" in captured_cmd["cmd"], ( + f"expected --update to be appended when state exists for the explicit URL; got {captured_cmd['cmd']}" + ) + assert "--last" not in captured_cmd["cmd"] + + +def test_country_only_resolves_state_under_geofabrik_url(tmp_path, monkeypatch, captured_cmd, clean_env): + """--country alone: state is keyed by geofabrik (pipeline derives URL from country).""" + name = "nepal" + _seed_state(tmp_path, name, country_update_url("nepal")) + + monkeypatch.setenv( + "OSMSG_EXTRA_ARGS", + f"--name {name} --output-dir {tmp_path} --country nepal", + ) + + assert _tick.main() == 0 + assert "--update" in captured_cmd["cmd"] + + +def test_no_state_appends_bootstrap_window(tmp_path, monkeypatch, captured_cmd, clean_env): + """First tick (no state row) → --last instead of --update.""" + name = "nepal" + monkeypatch.setenv("OSMSG_EXTRA_ARGS", f"--name {name} --output-dir {tmp_path} --url minute") + monkeypatch.setenv("OSMSG_BOOTSTRAP", "hour") + + assert _tick.main() == 0 + cmd = captured_cmd["cmd"] + assert "--update" not in cmd + assert cmd[-2:] == ["--last", "hour"] + + +def test_bootstrap_days_overrides_bootstrap_preset(tmp_path, monkeypatch, captured_cmd, clean_env): + name = "nepal" + monkeypatch.setenv("OSMSG_EXTRA_ARGS", f"--name {name} --output-dir {tmp_path}") + monkeypatch.setenv("OSMSG_BOOTSTRAP_DAYS", "3") + + assert _tick.main() == 0 + cmd = captured_cmd["cmd"] + assert cmd[-2:] == ["--days", "3"] From c6a7d68701c0b3954ec490bd0f829e9155f2bb40 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Thu, 21 May 2026 10:49:48 +0200 Subject: [PATCH 48/49] test(tick): add test cases for tick to capture the url update bug on future --- osmsg/_tick.py | 42 ++++++++++++++++--------------- tests/test_tick.py | 61 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 19 deletions(-) diff --git a/osmsg/_tick.py b/osmsg/_tick.py index d02426e..7c82af4 100644 --- a/osmsg/_tick.py +++ b/osmsg/_tick.py @@ -46,28 +46,32 @@ def main() -> int: try: fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB) except BlockingIOError: + os.close(lock_fd) print("[osmsg-tick] previous tick still running, skipping", flush=True) return 0 - # Mirror pipeline._normalize_urls: explicit --url wins over --country's geofabrik default, - # otherwise --update can't find the state row and the DuckDB gets wiped every tick. - source_url = country_update_url(country) if country and explicit_url is None else resolve_url(url) - db_path = out / f"{name}.duckdb" - - extra_set = set(extra_args) - cmd = ["osmsg"] + extra_args - if not (extra_set & {"--all", "--keys"}): - cmd.append("--all") - - if _has_state(db_path, source_url): - cmd.append("--update") - elif bootstrap_days: - cmd.extend(["--days", bootstrap_days]) - else: - cmd.extend(["--last", bootstrap]) - - print(f"[osmsg-tick] {' '.join(cmd)}", flush=True) - return subprocess.call(cmd) + try: + # Mirror pipeline._normalize_urls: explicit --url wins over --country's geofabrik default, + # otherwise --update can't find the state row and the DuckDB gets wiped every tick. + source_url = country_update_url(country) if country and explicit_url is None else resolve_url(url) + db_path = out / f"{name}.duckdb" + + extra_set = set(extra_args) + cmd = ["osmsg"] + extra_args + if not (extra_set & {"--all", "--keys"}): + cmd.append("--all") + + if _has_state(db_path, source_url): + cmd.append("--update") + elif bootstrap_days: + cmd.extend(["--days", bootstrap_days]) + else: + cmd.extend(["--last", bootstrap]) + + print(f"[osmsg-tick] {' '.join(cmd)}", flush=True) + return subprocess.call(cmd) + finally: + os.close(lock_fd) if __name__ == "__main__": diff --git a/tests/test_tick.py b/tests/test_tick.py index bd8d10a..643ca96 100644 --- a/tests/test_tick.py +++ b/tests/test_tick.py @@ -3,6 +3,8 @@ from __future__ import annotations import datetime as dt +import fcntl +import os from pathlib import Path from typing import Any @@ -98,3 +100,62 @@ def test_bootstrap_days_overrides_bootstrap_preset(tmp_path, monkeypatch, captur assert _tick.main() == 0 cmd = captured_cmd["cmd"] assert cmd[-2:] == ["--days", "3"] + + +def test_tick_lifecycle_cold_then_warm(tmp_path, monkeypatch, clean_env): + """Cold tick bootstraps; the next tick (after state lands) must switch to --update. + + End-to-end guard for the bug: tick 0 bootstraps, the pipeline writes a state row + under the planet/minute URL, tick 1 must find that row instead of looking under + the geofabrik URL and re-bootstrapping forever. + """ + calls: list[list[str]] = [] + + def fake_call(cmd, *args, **kwargs): + calls.append(list(cmd)) + return 0 + + monkeypatch.setattr(_tick.subprocess, "call", fake_call) + + name = "nepal" + monkeypatch.setenv( + "OSMSG_EXTRA_ARGS", + f"--name {name} --output-dir {tmp_path} --country nepal --url minute", + ) + monkeypatch.setenv("OSMSG_BOOTSTRAP", "hour") + + assert _tick.main() == 0 + assert calls[0][-2:] == ["--last", "hour"] + assert "--update" not in calls[0] + + _seed_state(tmp_path, name, SHORTCUTS["minute"]) + + assert _tick.main() == 0 + assert "--update" in calls[1] + assert "--last" not in calls[1] + + +def test_tick_skips_when_previous_tick_holds_lock(tmp_path, monkeypatch, clean_env): + """Concurrent-tick guard: flock is held → exit 0 immediately, never invoke subprocess.""" + name = "nepal" + monkeypatch.setenv("OSMSG_EXTRA_ARGS", f"--name {name} --output-dir {tmp_path}") + + call_count = 0 + + def fake_call(cmd, *args, **kwargs): + nonlocal call_count + call_count += 1 + return 0 + + monkeypatch.setattr(_tick.subprocess, "call", fake_call) + + lock_path = tmp_path / f"{name}.lock" + holder = os.open(str(lock_path), os.O_CREAT | os.O_RDWR, 0o644) + fcntl.flock(holder, fcntl.LOCK_EX) + try: + assert _tick.main() == 0 + finally: + fcntl.flock(holder, fcntl.LOCK_UN) + os.close(holder) + + assert call_count == 0 From 0642d32515900df866254f54e979b4d7559c1a82 Mon Sep 17 00:00:00 2001 From: gauravbarall Date: Thu, 28 May 2026 22:45:55 +0545 Subject: [PATCH 49/49] fix(osmsg): Fixed data-loss with update flag. Update now fully depends on last sequence updated and not timestamps. --- osmsg/handlers.py | 17 ++++-- osmsg/pipeline.py | 14 ++++- osmsg/replication.py | 6 +++ tests/conftest.py | 2 + tests/test_replication.py | 105 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 138 insertions(+), 6 deletions(-) diff --git a/osmsg/handlers.py b/osmsg/handlers.py index ff1b829..e0bd8cf 100644 --- a/osmsg/handlers.py +++ b/osmsg/handlers.py @@ -37,7 +37,11 @@ def changeset(self, c) -> None: # `c.open` gate is required: osmium uses 1970 as the closed_at sentinel. start = cfg.get("window_start_utc") end = cfg.get("window_end_utc") - if start is not None and end is not None: + resume_seq = cfg.get("cs_resume_seq") + update = cfg.get("update") + + is_update_resume = update and resume_seq is not None + if not is_update_resume and start is not None and end is not None: created = c.created_at if created.tzinfo is None: created = created.replace(tzinfo=dt.UTC) @@ -123,6 +127,8 @@ def __init__(self, config: dict[str, Any], sequence_id: int, valid_changesets: s self.seq_id = sequence_id # None == no filter; empty set == filter matched nothing (collect nothing). self.valid_changesets = valid_changesets + self.resume_seq = config["resume_seq_cf"] + self.update = config["update"] self.users: dict[int, User] = {} self.stubs: dict[int, Changeset] = {} @@ -189,14 +195,16 @@ def _accumulate(self, uid, uname, cs_id, version, tags, kind, way_nodes=None) -> tv.add_length(len_m) def node(self, n) -> None: - if not (self.start <= n.timestamp < self.end): + is_update_resume = self.update and self.resume_seq is not None + if not is_update_resume and not (self.start <= n.timestamp < self.end): return if not self._should_collect(n.user, n.changeset): return self._accumulate(n.uid, n.user, n.changeset, 0 if n.deleted else n.version, n.tags, "nodes") def way(self, w) -> None: - if not (self.start <= w.timestamp < self.end): + is_update_resume = self.update and self.resume_seq is not None + if not is_update_resume and not (self.start <= w.timestamp < self.end): return if not self._should_collect(w.user, w.changeset): return @@ -204,7 +212,8 @@ def way(self, w) -> None: self._accumulate(w.uid, w.user, w.changeset, 0 if w.deleted else w.version, w.tags, "ways", nodes) def relation(self, r) -> None: - if not (self.start <= r.timestamp < self.end): + is_update_resume = self.update and self.resume_seq is not None + if not is_update_resume and not (self.start <= r.timestamp < self.end): return if not self._should_collect(r.user, r.changeset): return diff --git a/osmsg/pipeline.py b/osmsg/pipeline.py index fcb3787..b1914f4 100644 --- a/osmsg/pipeline.py +++ b/osmsg/pipeline.py @@ -371,6 +371,7 @@ def run(cfg: RunConfig) -> dict[str, Any]: valid_changesets: set[int] | None = None start_seq: int | None = None end_seq: int | None = None + cs_resume_seq: int | None = None if cfg.hashtags or cfg.changeset: cs_repl = ChangesetReplication(pad_hours=cfg.changeset_pad_hours) @@ -382,6 +383,8 @@ def run(cfg: RunConfig) -> dict[str, Any]: if cs_state else f"first run with {cfg.changeset_pad_hours}h backward pad" ) + last_cs_ts = cs_repl.sequence_to_timestamp(cs_end) + info(f"Changesets: {len(urls)} files (seq {cs_start}-{cs_end}), {pad_note}.") if urls: @@ -389,6 +392,8 @@ def run(cfg: RunConfig) -> dict[str, Any]: cs_config = _processing_config(cfg, parquet_dir=cs_dir, geom_wkt=geom_wkt) cs_config["window_start_utc"] = cfg.start_date.astimezone(UTC) cs_config["window_end_utc"] = cfg.end_date.astimezone(UTC) + cs_config["cs_resume_seq"] = cs_resume_seq + cs_config["update"] = cfg.update _download_all( urls, "changeset", max_workers, None, cfg.cache_dir, "changesets", description="Downloading changesets" @@ -408,7 +413,7 @@ def run(cfg: RunConfig) -> dict[str, Any]: conn, source_url=CHANGESETS_REPLICATION, last_seq=cs_end, - last_ts=cs_repl.sequence_to_timestamp(cs_end), + last_ts=last_cs_ts, updated_at=dt.datetime.now(UTC), ) info("Changeset processing complete.") @@ -420,8 +425,11 @@ def run(cfg: RunConfig) -> dict[str, Any]: for url in cfg.urls: info(f"Changefiles ← {url}") url_start, resume_seq = url_starts[url] + + cs_ts = last_cs_ts if (cfg.hashtags or cfg.changeset) else None + urls, server_ts, src_start_seq, src_end_seq, _, _ = changefile_download_urls( - url_start, cfg.end_date, url, resume_seq=resume_seq + url_start, cfg.end_date, url, resume_seq=resume_seq, cs_ts=cs_ts, update=cfg.update ) if start_seq is None: start_seq = src_start_seq @@ -454,6 +462,8 @@ def run(cfg: RunConfig) -> dict[str, Any]: cf_config = _processing_config(cfg, parquet_dir=cf_dir, geom_wkt=None) cf_config["start_date_utc"] = url_start_date_utc cf_config["end_date_utc"] = url_end_date_utc + cf_config["update"] = cfg.update + cf_config["resume_seq_cf"] = resume_seq _download_all( urls, diff --git a/osmsg/replication.py b/osmsg/replication.py index 82ea48c..a92ad5b 100644 --- a/osmsg/replication.py +++ b/osmsg/replication.py @@ -40,6 +40,8 @@ def changefile_download_urls( base_url: str, *, resume_seq: int | None = None, + cs_ts: datetime | None = None, + update: bool, ) -> tuple[list[str], datetime, int, int, str, str]: """resume_seq starts exactly there (skipping the timestamp lookup + backward pad used on first runs).""" repl = ReplicationServer(base_url) @@ -71,6 +73,7 @@ def changefile_download_urls( server_ts = server_ts.astimezone(UTC) last_seq = server_seq + if end_date: end_seq = repl.timestamp_to_sequence(end_date) if end_seq is None: @@ -85,6 +88,9 @@ def changefile_download_urls( if last_seq >= server_seq: last_seq = server_seq + if update and cs_ts and ((end_date and (end_date > cs_ts)) or (not end_date and (server_ts > cs_ts))): + last_seq -= 1 + if seq >= last_seq: return [], server_ts, start_seq, last_seq, start_seq_url, repl.get_state_url(last_seq) diff --git a/tests/conftest.py b/tests/conftest.py index eb0b0c1..1be6889 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -99,4 +99,6 @@ def changefile_config(): "parquet_dir": "temp_parquet", "start_date_utc": dt.datetime(1969, 1, 1, tzinfo=dt.UTC), "end_date_utc": dt.datetime(2099, 12, 31, tzinfo=dt.UTC), + "resume_seq_cf": None, + "update": False, } diff --git a/tests/test_replication.py b/tests/test_replication.py index 7b1ce09..4c0f2f2 100644 --- a/tests/test_replication.py +++ b/tests/test_replication.py @@ -145,6 +145,7 @@ def test_changefile_download_urls_resume_seq_skips_backward_pad(changefile_repl) end_date=end, base_url="https://planet.openstreetmap.org/replication/minute", resume_seq=last_seq + 1, + update=False, ) assert start_seq == last_seq + 1 @@ -165,8 +166,112 @@ def test_changefile_download_urls_no_resume_seq_pads_backward(changefile_repl): start_date=cur_ts - dt.timedelta(minutes=10) + dt.timedelta(seconds=30), end_date=cur_ts, base_url="https://planet.openstreetmap.org/replication/minute", + update=False, ) expected_unpadded = cur_seq - 10 assert start_seq <= expected_unpadded - 50, ( f"expected backward pad of ~60, got start_seq={start_seq} (unpadded would be {expected_unpadded})" ) + + +def test_cs_ts_does_not_cap_last_seq_when_cs_ts_ahead_of_cf_server(changefile_repl): + """cs_ts > server_ts: changeset repl is ahead of changefile server. + last_seq must not be capped — the changesets table already covers the window.""" + from osmsg.replication import changefile_download_urls + + cur_seq, cur_ts = changefile_repl + cs_ts = cur_ts + dt.timedelta(minutes=5) # changeset repl is AHEAD of server + + _, _, _, end_seq_with_cs_ts, _, _ = changefile_download_urls( + start_date=cur_ts - dt.timedelta(minutes=10), + end_date=cur_ts, + base_url="https://planet.openstreetmap.org/replication/minute", + update=True, + cs_ts=cs_ts, + ) + _, _, _, end_seq_without_cs_ts, _, _ = changefile_download_urls( + start_date=cur_ts - dt.timedelta(minutes=10), + end_date=cur_ts, + base_url="https://planet.openstreetmap.org/replication/minute", + update=True, + cs_ts=None, + ) + assert end_seq_with_cs_ts == end_seq_without_cs_ts + + +def test_cs_ts_does_not_cap_last_seq_when_cs_ts_ahead_of_end_date(changefile_repl): + """cs_ts > end_date: changeset repl has already covered the full requested window. + last_seq must not be capped""" + from osmsg.replication import changefile_download_urls + + cur_seq, cur_ts = changefile_repl + end_date = cur_ts - dt.timedelta(minutes=10) + cs_ts = cur_ts # cs_ts is AHEAD of end_date + + _, _, _, end_seq_with_cs_ts, _, _ = changefile_download_urls( + start_date=cur_ts - dt.timedelta(minutes=30), + end_date=end_date, + base_url="https://planet.openstreetmap.org/replication/minute", + update=True, + cs_ts=cs_ts, + ) + _, _, _, end_seq_without_cs_ts, _, _ = changefile_download_urls( + start_date=cur_ts - dt.timedelta(minutes=30), + end_date=end_date, + base_url="https://planet.openstreetmap.org/replication/minute", + update=True, + cs_ts=None, + ) + assert end_seq_with_cs_ts == end_seq_without_cs_ts + + +def test_cs_ts_caps_last_seq_when_end_date_ahead_of_cs_ts(changefile_repl): + """end_date > cs_ts: changefile window is ahead of changeset repl. + last_seq must be capped by 1 to avoid processing diffs whose changesets + aren't yet in the changesets table.""" + from osmsg.replication import changefile_download_urls + + cur_seq, cur_ts = changefile_repl + cs_ts = cur_ts - dt.timedelta(minutes=5) # changeset repl is BEHIND + + _, _, _, end_seq_with_cs_ts, _, _ = changefile_download_urls( + start_date=cur_ts - dt.timedelta(minutes=10), + end_date=cur_ts, + base_url="https://planet.openstreetmap.org/replication/minute", + update=True, + cs_ts=cs_ts, + ) + _, _, _, end_seq_without_cs_ts, _, _ = changefile_download_urls( + start_date=cur_ts - dt.timedelta(minutes=10), + end_date=cur_ts, + base_url="https://planet.openstreetmap.org/replication/minute", + update=True, + cs_ts=None, + ) + assert end_seq_with_cs_ts == end_seq_without_cs_ts - 1 + + +def test_cs_ts_caps_last_seq_when_cf_server_ahead_of_cs_ts(changefile_repl): + """server_ts > cs_ts: changefile server is ahead of changeset repl. + last_seq must be capped by 1 so we don't process diffs whose changesets + aren't yet in the changesets table.""" + from osmsg.replication import changefile_download_urls + + cur_seq, cur_ts = changefile_repl + cs_ts = cur_ts - dt.timedelta(minutes=5) # changeset repl is BEHIND server + + _, _, _, end_seq_with_cs_ts, _, _ = changefile_download_urls( + start_date=cur_ts - dt.timedelta(minutes=10), + end_date=None, + base_url="https://planet.openstreetmap.org/replication/minute", + update=True, + cs_ts=cs_ts, + ) + _, _, _, end_seq_without_cs_ts, _, _ = changefile_download_urls( + start_date=cur_ts - dt.timedelta(minutes=10), + end_date=None, + base_url="https://planet.openstreetmap.org/replication/minute", + update=True, + cs_ts=None, + ) + assert end_seq_with_cs_ts == end_seq_without_cs_ts - 1