diff --git a/.gitignore b/.gitignore index 12d6b139..0ae5d8eb 100644 --- a/.gitignore +++ b/.gitignore @@ -121,3 +121,13 @@ infrastructure/aws/browser_config.js # imported docs docs/src/deployment/kubernetes/ eoapi-k8s/ + +# Maxar notebook downloads +demo/Maxar/collections.json +demo/Maxar/collections.json.* +demo/Maxar/items.json +demo/Maxar/items.json.zip + +# OAM notebook generated STAC files +demo/oam/oam_collection.json +demo/oam/oam_items.njson diff --git a/demo/Maxar/eoAPI_Maxar_demo.ipynb b/demo/Maxar/eoAPI_Maxar_demo.ipynb index 0d103c0b..8c375ef1 100644 --- a/demo/Maxar/eoAPI_Maxar_demo.ipynb +++ b/demo/Maxar/eoAPI_Maxar_demo.ipynb @@ -68,7 +68,8 @@ }, "outputs": [], "source": [ - "!python -m pip install \"pypgstac[psycopg]==0.9.2\"" + "# Dependencies are installed in the demo Docker image.\n", + "# Rebuild with `docker compose build demo-runner` after changing demo/requirements.txt.\n" ] }, { @@ -79,7 +80,7 @@ "outputs": [], "source": [ "# Download the collections file\n", - "!wget https://github.com/vincentsarago/MAXAR_opendata_to_pgstac/raw/main/Maxar/collections.json" + "!wget -q -O collections.json https://github.com/vincentsarago/MAXAR_opendata_to_pgstac/raw/main/Maxar/collections.json" ] }, { @@ -90,7 +91,7 @@ "outputs": [], "source": [ "# Download the items file\n", - "! wget https://github.com/vincentsarago/MAXAR_opendata_to_pgstac/raw/main/Maxar/items.json.zip && unzip items.json.zip && rm -rf items.json.zip" + "!wget -q -O items.json.zip https://github.com/vincentsarago/MAXAR_opendata_to_pgstac/raw/main/Maxar/items.json.zip && unzip -o items.json.zip && rm -f items.json.zip" ] }, { @@ -103,18 +104,18 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "af16cb95", "metadata": {}, "outputs": [], "source": [ "# Ingest the collections\n", - "!pypgstac load collections collections.json --dsn postgresql://username:password@0.0.0.0:5439/postgis --method insert_ignore" + "!pypgstac load collections collections.json --dsn postgresql://username:password@database:5432/postgis --method insert_ignore" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "f7ded3bc", "metadata": { "scrolled": true @@ -122,7 +123,7 @@ "outputs": [], "source": [ "# Ingest the items\n", - "!pypgstac load items items.json --dsn postgresql://username:password@0.0.0.0:5439/postgis --method insert_ignore" + "!pypgstac load items items.json --dsn postgresql://username:password@database:5432/postgis --method insert_ignore" ] }, { @@ -139,7 +140,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "dc3fa136", "metadata": {}, "outputs": [], @@ -148,7 +149,7 @@ "from psycopg import sql\n", "\n", "with psycopg.connect(\n", - " \"postgresql://username:password@0.0.0.0:5439/postgis\", \n", + " \"postgresql://username:password@database:5432/postgis\", \n", " autocommit=True,\n", " options=\"-c search_path=pgstac,public -c application_name=pgstac\",\n", ") as conn: \n", @@ -183,7 +184,7 @@ }, "outputs": [], "source": [ - "!python -m pip install httpx ipyleaflet" + "# httpx and ipyleaflet are installed in the demo Docker image.\n" ] }, { @@ -196,11 +197,12 @@ "from datetime import datetime\n", "\n", "import json\n", + "import os\n", "import httpx\n", "\n", "import ipyleaflet\n", "\n", - "stac_endpoint = \"http://127.0.0.1:8081\"" + "stac_endpoint = os.environ.get(\"STAC_API_URL\", \"http://127.0.0.1:8081\")\n" ] }, { @@ -339,9 +341,12 @@ "source": [ "items = httpx.get(f\"{stac_endpoint}/collections/{collection_id}/items\").json()\n", "\n", - "\n", - "print(f\"Nb Items in Db: {items['context']['matched']}\") # This is only available if CONTEXT=ON\n", - "print(f\"Returned {len(items['features'])} Items\")" + "matched = items.get(\"context\", {}).get(\"matched\")\n", + "if matched is None:\n", + " print(\"Nb Items in Db: unavailable; response has no STAC context\")\n", + "else:\n", + " print(f\"Nb Items in Db: {matched}\")\n", + "print(f\"Returned {len(items.get('features', []))} Items\")" ] }, { @@ -359,20 +364,27 @@ "metadata": {}, "outputs": [], "source": [ + "max_items = int(os.environ.get(\"MAXAR_MAX_ITEMS\", \"250\"))\n", + "page_size = min(max_items, 100)\n", "kahramanmaras_items = []\n", "\n", "url = f\"{stac_endpoint}/collections/{collection_id}/items\"\n", - "while True:\n", - " items = httpx.get(url, params={\"limit\": 100}).json()\n", - " \n", - " kahramanmaras_items.extend(items[\"features\"])\n", - " next_link = list(filter(lambda link: link[\"rel\"] == \"next\", items[\"links\"]))\n", + "while len(kahramanmaras_items) < max_items:\n", + " remaining = max_items - len(kahramanmaras_items)\n", + " items = httpx.get(url, params={\"limit\": min(page_size, remaining)}, timeout=30).json()\n", + " features = items.get(\"features\", [])\n", + " if not features:\n", + " break\n", + "\n", + " kahramanmaras_items.extend(features)\n", + " next_link = list(filter(lambda link: link[\"rel\"] == \"next\", items.get(\"links\", [])))\n", " if next_link:\n", " url = next_link[0][\"href\"]\n", " else:\n", " break\n", "\n", - "print(f\"Nb Items: {len(kahramanmaras_items)}\")" + "print(f\"Nb Items loaded for this demo run: {len(kahramanmaras_items)}\")\n", + "print(f\"MAXAR_MAX_ITEMS={max_items}\")" ] }, { @@ -424,7 +436,7 @@ "outputs": [], "source": [ "item = kahramanmaras_items[0]\n", - "print(\"Item example:\")\n", + "print(\"Initial item example:\")\n", "print(json.dumps(item, indent=4))" ] }, @@ -528,7 +540,11 @@ " )\n", ").json()\n", "\n", - "print(f\"Nb Items in Db: {pre_items_api['context']['matched']}\") # This is only available if CONTEXT=ON" + "matched = pre_items_api.get(\"context\", {}).get(\"matched\")\n", + "if matched is None:\n", + " print(\"Nb Items in Db: unavailable; response has no STAC context\")\n", + "else:\n", + " print(f\"Nb Items in Db: {matched}\")" ] }, { @@ -575,7 +591,33 @@ "metadata": {}, "outputs": [], "source": [ - "raster_endpoint = \"http://127.0.0.1:8082\"" + "raster_endpoint = os.environ.get(\"TITILER_URL\", \"http://127.0.0.1:8082\")\n", + "raster_public_endpoint = os.environ.get(\"TITILER_PUBLIC_URL\", \"http://127.0.0.1:8082\")\n", + "\n", + "def public_tilejson(tilejson):\n", + " if \"tiles\" not in tilejson:\n", + " raise KeyError(f\"TileJSON response has no tiles: {tilejson}\")\n", + " tilejson[\"tiles\"] = [tile.replace(raster_endpoint, raster_public_endpoint) for tile in tilejson[\"tiles\"]]\n", + " return tilejson\n", + "\n", + "def tilejson_urls(base_url):\n", + " # titiler-pgstac versions differ: newer OGC tile routes include WebMercatorQuad.\n", + " return (\n", + " f\"{base_url}/WebMercatorQuad/tilejson.json\",\n", + " f\"{base_url}/tilejson.json\",\n", + " )\n", + "\n", + "def get_tilejson(base_url, params):\n", + " errors = []\n", + " for url in tilejson_urls(base_url):\n", + " resp = httpx.get(url, params=params, timeout=60)\n", + " if resp.status_code == 200:\n", + " tilejson = resp.json()\n", + " if \"tiles\" not in tilejson:\n", + " raise RuntimeError(f\"TileJSON response has no tiles from {url}: {tilejson}\")\n", + " return public_tilejson(tilejson)\n", + " errors.append(f\"{url} -> HTTP {resp.status_code}: {resp.text[:200]}\")\n", + " raise RuntimeError(\"TileJSON request failed for all known routes: \" + \" | \".join(errors))\n" ] }, { @@ -585,18 +627,74 @@ "metadata": {}, "outputs": [], "source": [ - "# fetching Raster information for all the `raster` assets\n", - "item_id = item[\"id\"]\n", + "# Find the first sampled item that TiTiler can inspect and tile.\n", + "info = None\n", + "info_resp = None\n", + "asset_name = None\n", + "selected_tilejson = None\n", + "\n", + "for candidate in kahramanmaras_items:\n", + " candidate_id = candidate[\"id\"]\n", + " print(f\"Checking raster info for Item {candidate_id}\")\n", + " try:\n", + " candidate_resp = httpx.get(\n", + " f\"{raster_endpoint}/collections/{collection_id}/items/{candidate_id}/info\",\n", + " timeout=60,\n", + " )\n", + " except httpx.HTTPError as exc:\n", + " print(f\"Skipping {candidate_id}: {exc}\")\n", + " continue\n", + "\n", + " if candidate_resp.status_code != 200:\n", + " print(f\"Skipping {candidate_id}: HTTP {candidate_resp.status_code} {candidate_resp.text[:200]}\")\n", + " continue\n", + "\n", + " candidate_info = candidate_resp.json()\n", + " if not candidate_info:\n", + " print(f\"Skipping {candidate_id}: no raster assets returned\")\n", + " continue\n", + "\n", + " raster_asset_names = [name for name, asset in candidate_info.items() if \"bounds\" in asset or \"minzoom\" in asset or \"maxzoom\" in asset]\n", + " if \"visual\" in raster_asset_names:\n", + " raster_asset_names.remove(\"visual\")\n", + " raster_asset_names.insert(0, \"visual\")\n", + "\n", + " for candidate_asset_name in raster_asset_names:\n", + " try:\n", + " candidate_tilejson = get_tilejson(\n", + " f\"{raster_endpoint}/collections/{collection_id}/items/{candidate_id}\",\n", + " params=(\n", + " (\"assets\", candidate_asset_name),\n", + " (\"minzoom\", 12),\n", + " (\"maxzoom\", 19),\n", + " ),\n", + " )\n", + " except Exception as exc:\n", + " print(f\"Skipping {candidate_id}/{candidate_asset_name}: {exc}\")\n", + " continue\n", + "\n", + " item = candidate\n", + " item_id = candidate_id\n", + " info = candidate_info\n", + " info_resp = candidate_resp\n", + " asset_name = candidate_asset_name\n", + " selected_tilejson = candidate_tilejson\n", + " break\n", "\n", - "print(f\"Fetching Raster info for Item {item_id}\")\n", - "info = httpx.get(f\"{raster_endpoint}/collections/{collection_id}/items/{item_id}/info\").json()\n", + " if selected_tilejson is not None:\n", + " break\n", + "\n", + "if info is None:\n", + " raise RuntimeError(\"No sampled Maxar item returned usable raster tilejson. Increase MAXAR_MAX_ITEMS or inspect TiTiler logs.\")\n", "\n", + "print(f\"Using Item: {item_id}\")\n", "print(\"Returned metadata for Assets:\", list(info.keys()))\n", "print()\n", - "print(json.dumps(info[\"visual\"], indent=4))\n", + "print(f\"Using asset: {asset_name}\")\n", + "print(json.dumps(info[asset_name], indent=4))\n", "print()\n", "for name, asset in info.items():\n", - " print(name, asset[\"minzoom\"], asset[\"maxzoom\"])" + " print(name, asset.get(\"minzoom\", \"?\"), asset.get(\"maxzoom\", \"?\"))" ] }, { @@ -623,15 +721,8 @@ "metadata": {}, "outputs": [], "source": [ - "# `visual` Asset\n", - "tilejson = httpx.get(\n", - " f\"{raster_endpoint}/collections/{collection_id}/items/{item_id}/tilejson.json\",\n", - " params = (\n", - " (\"assets\", \"visual\"), # THIS PARAMETER IS MANDATORY\n", - " (\"minzoom\", 12), # By default the tiler will use 0\n", - " (\"maxzoom\", 19), # By default the tiler will use 24\n", - " )\n", - ").json()\n", + "# Selected raster asset\n", + "tilejson = selected_tilejson\n", "print(tilejson)\n", "\n", "bounds = tilejson[\"bounds\"]\n", @@ -849,16 +940,15 @@ "source": [ "search_id = pre_mosaic[\"id\"]\n", "\n", - "tilejson_pre = httpx.get(\n", - " f\"{raster_endpoint}/searches/{search_id}/tilejson.json\",\n", - " params = (\n", - " (\"assets\", \"visual\"), # THIS IS MANDATORY\n", + "tilejson_pre = get_tilejson(\n", + " f\"{raster_endpoint}/searches/{search_id}\",\n", + " params=(\n", + " (\"assets\", asset_name), # THIS IS MANDATORY\n", " (\"minzoom\", 12),\n", " (\"maxzoom\", 19), \n", - " )\n", - ").json()\n", + " ),\n", + ")\n", "print(tilejson_pre)\n", - "\n", "bounds = tilejson_pre[\"bounds\"]\n", "m = ipyleaflet.leaflet.Map(\n", " center=((bounds[1] + bounds[3]) / 2,(bounds[0] + bounds[2]) / 2),\n", @@ -898,16 +988,15 @@ "source": [ "search_id = post_mosaic[\"id\"]\n", "\n", - "tilejson_post = httpx.get(\n", - " f\"{raster_endpoint}/searches/{search_id}/tilejson.json\",\n", - " params = (\n", - " (\"assets\", \"visual\"), # THIS IS MANDATORY\n", + "tilejson_post = get_tilejson(\n", + " f\"{raster_endpoint}/searches/{search_id}\",\n", + " params=(\n", + " (\"assets\", asset_name), # THIS IS MANDATORY\n", " (\"minzoom\", 12),\n", " (\"maxzoom\", 19), \n", - " )\n", - ").json()\n", + " ),\n", + ")\n", "print(tilejson_post)\n", - "\n", "bounds = tilejson_post[\"bounds\"]\n", "m = ipyleaflet.leaflet.Map(\n", " center=((bounds[1] + bounds[3]) / 2,(bounds[0] + bounds[2]) / 2),\n", @@ -1005,6 +1094,28 @@ "\n", "Thank you for taking the time to go through this notebook." ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "364ca12f-993a-4657-961a-e6835caf677d", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6fc5746-03ff-4ba0-8cc6-76ff77a99d69", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [] } ], "metadata": { @@ -1023,7 +1134,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.2" + "version": "3.11.15" } }, "nbformat": 4, diff --git a/demo/README.md b/demo/README.md new file mode 100644 index 00000000..12b0d7c5 --- /dev/null +++ b/demo/README.md @@ -0,0 +1,63 @@ +# Demo Runner + +The demos can be run from a Docker Compose service instead of from your host +Python environment. The container includes Jupyter, `pypgstac`, `rio-stac`, +AWS/S3 clients, and the Python libraries used by the notebooks. + +Start the main stack and the demo notebook server: + +```bash +docker compose --profile demo up -d --build demo-runner +``` + +Open Jupyter at: + +```text +http://127.0.0.1:8888 +``` + +The repository's `demo/` directory is bind-mounted at `/workspace/demo` in the +demo containers, so notebook edits and generated demo files are visible without +rebuilding the image. + +Load demo data into the local pgSTAC database with the named Compose targets: + +```bash +docker compose run --rm demo-noaa +docker compose run --rm demo-facebook +docker compose run --rm demo-cmip6 +docker compose run --rm demo-oam +``` + +Load every available demo dataset: + +```bash +docker compose run --rm demo-all +``` + +You can also call the loader directly from the notebook image: + +```bash +docker compose run --rm demo-runner load-demos noaa facebook +``` + +`all` loads the static NOAA and Facebook data and also loads generated CMIP6 +or OAM data if the corresponding item files already exist. Generate those files +from their notebooks first, or use the existing checked-out generated files when +available. + +Inside the demo container, use Compose service URLs: + +```text +DATABASE_URL=postgresql://username:password@database:5432/postgis +STAC_API_URL=http://stac-fastapi:8080 +TITILER_URL=http://titiler-pgstac +``` + +From your host browser, keep using the published ports: + +```text +STAC API: http://127.0.0.1:8081 +TiTiler: http://127.0.0.1:8082 +Browser: http://127.0.0.1:8085 +``` diff --git a/demo/cmip6/generate_cmip6_items.ipynb b/demo/cmip6/generate_cmip6_items.ipynb index 4b091898..c0380ee8 100644 --- a/demo/cmip6/generate_cmip6_items.ipynb +++ b/demo/cmip6/generate_cmip6_items.ipynb @@ -14,25 +14,30 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "6f788363", "metadata": {}, "outputs": [], "source": [ "import boto3\n", + "import os\n", "import fsspec\n", "import json\n", "from pystac import Catalog, Collection, Item, Asset, MediaType\n", "from datetime import datetime\n", "import rio_stac\n", + "import rasterio\n", "from pprint import pprint\n", "import concurrent.futures\n", - "import threading" + "import threading\n", + "# Use unsigned requests for public S3 buckets when rasterio/GDAL opens s3:// URLs.\n", + "os.environ.setdefault(\"AWS_NO_SIGN_REQUEST\", \"YES\")\n", + "os.environ.setdefault(\"CPL_VSIL_CURL_ALLOWED_EXTENSIONS\", \".tif,.tiff,.vrt\")\n" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "8e4cfbb8", "metadata": { "tags": [ @@ -56,7 +61,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "cdaccd78", "metadata": {}, "outputs": [], @@ -67,7 +72,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "a7caab29", "metadata": {}, "outputs": [], @@ -77,18 +82,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "4936f757", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "23725 discovered from s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/\n" - ] - } - ], + "outputs": [], "source": [ "file_paths = fs_read.glob(f\"{s3_path}*\")\n", "print(f\"{len(file_paths)} discovered from {s3_path}\")" @@ -106,7 +103,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "999b0670", "metadata": {}, "outputs": [], @@ -125,18 +122,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "6af269dc", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Subseted data to files for 1950 and 1951. 730 files to process.\n" - ] - } - ], + "outputs": [], "source": [ "if len(subset_files) == 0:\n", " raise Exception(f\"No files to process. Do COGs for the {model} model exist?\")\n", @@ -144,6 +133,23 @@ " print(f\"Subseted data to files for 1950 and 1951. {len(subset_files)} files to process.\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "958cc7d5-4849-44b7-80c2-ff3ed71bea2e", + "metadata": {}, + "outputs": [], + "source": [ + "sizes = [fs_read.info(path)[\"size\"] for path in subset_files]\n", + "total = sum(sizes)\n", + "\n", + "print(f\"{len(sizes)} files\")\n", + "print(f\"Total: {total / 1024**3:.2f} GiB\")\n", + "print(f\"Average: {total / len(sizes) / 1024**2:.2f} MiB\")\n", + "print(f\"Min: {min(sizes) / 1024**2:.2f} MiB\")\n", + "print(f\"Max: {max(sizes) / 1024**2:.2f} MiB\")" + ] + }, { "cell_type": "markdown", "id": "ea59aceb-b80a-4166-a684-74de4230ac4a", @@ -156,7 +162,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "57dc1b5f", "metadata": {}, "outputs": [], @@ -169,7 +175,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "45771e88", "metadata": { "lines_to_next_cell": 1 @@ -183,7 +189,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "ececf9d5", "metadata": {}, "outputs": [], @@ -195,17 +201,19 @@ " day = day.replace('.tif', '')\n", " datetime_ = datetime.strptime(f'{year}{month}{day}', '%Y%m%d') \n", " # Create a new Item\n", - " item = rio_stac.create_stac_item(\n", - " id=filename,\n", - " source=s3_file,\n", - " collection=collection.id,\n", - " input_datetime=datetime_,\n", - " with_proj=True,\n", - " with_raster=True,\n", - " asset_name=\"data\",\n", - " asset_roles=[\"data\"],\n", - " asset_media_type=\"image/tiff; application=geotiff; profile=cloud-optimized\"\n", - " )\n", + " with rasterio.Env(AWS_NO_SIGN_REQUEST=\"YES\"):\n", + "\n", + " item = rio_stac.create_stac_item(\n", + " id=filename,\n", + " source=s3_file,\n", + " collection=collection.id,\n", + " input_datetime=datetime_,\n", + " with_proj=True,\n", + " with_raster=True,\n", + " asset_name=\"data\",\n", + " asset_roles=[\"data\"],\n", + " asset_media_type=\"image/tiff; application=geotiff; profile=cloud-optimized\"\n", + " )\n", " tiling_asset = Asset(\n", " href=s3_file,\n", " roles=['virtual', 'tiling'],\n", @@ -231,19 +239,10 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "645d3ccb", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_01.tif\n", - "Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_02.tif\n" - ] - } - ], + "outputs": [], "source": [ "lock = threading.Lock()\n", "file = open(stac_items_file, 'a')\n", @@ -263,23 +262,21 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "6965e650-f89a-4c7d-9f41-11774a905b81", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "postgresql://postgres:password@localhost:5432/postgres\n", - "Inserting collection from CMIP6_daily_GISS-E2-1-G_tas_collection.json\n", - "Inserting items from CMIP6_daily_GISS-E2-1-G_tas_stac_items.ndjson\n" - ] - } - ], + "outputs": [], "source": [ "!./seed-db.sh {model} {variable}" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36ec6a31-c5b6-4d4a-8dbf-d314a5132e37", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -303,7 +300,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.11.15" } }, "nbformat": 4, diff --git a/demo/cmip6/seed-db.sh b/demo/cmip6/seed-db.sh index 5f6166d7..bdc2267f 100755 --- a/demo/cmip6/seed-db.sh +++ b/demo/cmip6/seed-db.sh @@ -6,10 +6,10 @@ collection_json_file="CMIP6_daily_${model}_${variable}_collection.json" items_json_file="CMIP6_daily_${model}_${variable}_stac_items.ndjson" if [ -z "$DATABASE_URL" ]; then - username=postgres + username=username password=password - host=localhost - dbname=postgres + host=database + dbname=postgis port=5432 DATABASE_URL="postgresql://$username:$password@$host:$port/$dbname" fi diff --git a/demo/facebook/README.md b/demo/facebook/README.md index d18fb467..aa5e46ed 100644 --- a/demo/facebook/README.md +++ b/demo/facebook/README.md @@ -39,7 +39,7 @@ Note: We also recommend to use simpler item ID than the basename. "interval": [ [ "2016-01-01T00:00:00Z", - "null" + null ] ] } diff --git a/demo/facebook/demo.ipynb b/demo/facebook/demo.ipynb index 569c6477..a563f00f 100644 --- a/demo/facebook/demo.ipynb +++ b/demo/facebook/demo.ipynb @@ -5,10 +5,12 @@ "execution_count": null, "source": [ "import json\n", + "import os\n", "import requests\n", "from folium import Map, TileLayer, GeoJson\n", "\n", - "endpoint = \"\"" + "endpoint = os.environ.get(\"TITILER_URL\", \"http://127.0.0.1:8082\")\n", + "public_endpoint = os.environ.get(\"TITILER_PUBLIC_URL\", \"http://127.0.0.1:8082\")" ], "outputs": [], "metadata": {} @@ -29,11 +31,13 @@ " \"collections\": [\"facebook-population-density\"],\n", "}\n", "\n", - "response = requests.post(\n", - " f\"{endpoint}/mosaic/register\",\n", + "register_resp = requests.post(\n", + " f\"{endpoint}/searches/register\",\n", " json=body,\n", - ").json()\n", - "print(r)" + ")\n", + "register_resp.raise_for_status()\n", + "response = register_resp.json()\n", + "print(response)" ], "outputs": [], "metadata": {} @@ -48,8 +52,15 @@ ")\n", "\n", "# Fetch Tilejson (we HAVE TO add the asset name)\n", - "tj_resp = requests.get(\n", - " response['url'],\n", + "search_id = response.get(\"id\")\n", + "tilejson_url = response.get(\"url\")\n", + "if tilejson_url is None:\n", + " if search_id is None:\n", + " raise KeyError(f\"Search registration response has no id or url: {response}\")\n", + " tilejson_url = f\"{endpoint}/searches/{search_id}/WebMercatorQuad/tilejson.json\"\n", + "\n", + "tilejson_resp = requests.get(\n", + " tilejson_url,\n", " params={\n", " # Info to add to the tilejson response\n", " \"minzoom\": 4,\n", @@ -59,7 +70,22 @@ " \"rescale\": \"0,100\",\n", " \"colormap_name\": \"viridis\",\n", " }\n", - ").json()\n", + ")\n", + "if tilejson_resp.status_code == 404 and search_id is not None:\n", + " tilejson_url = f\"{endpoint}/searches/{search_id}/tilejson.json\"\n", + " tilejson_resp = requests.get(\n", + " tilejson_url,\n", + " params={\n", + " \"minzoom\": 4,\n", + " \"maxzoom\": 12,\n", + " \"assets\": \"cog\",\n", + " \"rescale\": \"0,100\",\n", + " \"colormap_name\": \"viridis\",\n", + " }\n", + " )\n", + "tilejson_resp.raise_for_status()\n", + "tj_resp = tilejson_resp.json()\n", + "tj_resp[\"tiles\"] = [tile.replace(endpoint, public_endpoint) for tile in tj_resp[\"tiles\"]]\n", "print(tj_resp)\n", "\n", "aod_layer = TileLayer(\n", @@ -106,4 +132,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/demo/facebook/facebook_collection.json b/demo/facebook/facebook_collection.json index 7d758a6a..6ae74375 100644 --- a/demo/facebook/facebook_collection.json +++ b/demo/facebook/facebook_collection.json @@ -1 +1 @@ -{"id":"facebook-population-density","title":"High Resolution Population Density Maps","description":"Population data for a selection of countries, allocated to 1 arcsecond blocks and provided in a combination of CSV and Cloud-optimized GeoTIFF files. This refines CIESIN’s Gridded Population of the World using machine learning models on high-resolution worldwide Digital Globe satellite imagery. CIESIN population counts aggregated from worldwide census data are allocated to blocks where imagery appears to contain buildings.","stac_version":"1.0.0","license":"public-domain","links":[],"extent":{"spatial":{"bbox":[[-180,-90,180,90]]},"temporal":{"interval":[["2016-01-01T00:00:00Z","null"]]}}} +{"id":"facebook-population-density","title":"High Resolution Population Density Maps","description":"Population data for a selection of countries, allocated to 1 arcsecond blocks and provided in a combination of CSV and Cloud-optimized GeoTIFF files. This refines CIESIN’s Gridded Population of the World using machine learning models on high-resolution worldwide Digital Globe satellite imagery. CIESIN population counts aggregated from worldwide census data are allocated to blocks where imagery appears to contain buildings.","stac_version":"1.0.0","license":"public-domain","links":[],"extent":{"spatial":{"bbox":[[-180,-90,180,90]]},"temporal":{"interval":[["2016-01-01T00:00:00Z",null]]}}} diff --git a/demo/load-demos.sh b/demo/load-demos.sh new file mode 100755 index 00000000..3a4ad5e2 --- /dev/null +++ b/demo/load-demos.sh @@ -0,0 +1,154 @@ +#!/bin/sh +set -eu + +DEMO_ROOT="${DEMO_ROOT:-/workspace/demo}" +DATABASE_URL="${DATABASE_URL:-postgresql://username:password@database:5432/postgis}" + +load_pair() { + name="$1" + collection_file="$2" + items_file="$3" + + if [ ! -f "$collection_file" ]; then + echo "Skipping $name: missing collection file $collection_file" + return 0 + fi + + if [ ! -f "$items_file" ]; then + echo "Skipping $name: missing items file $items_file" + return 0 + fi + + echo "Loading $name collection from $collection_file" + pypgstac load collections "$collection_file" --dsn "$DATABASE_URL" --method insert_ignore + + echo "Loading $name items from $items_file" + pypgstac load items "$items_file" --dsn "$DATABASE_URL" --method insert_ignore +} + +load_noaa() { + load_pair \ + "noaa" \ + "$DEMO_ROOT/noaa/noaa-emergency-response.json" \ + "$DEMO_ROOT/noaa/noaa-eri-nashville2020.json" +} + +load_facebook() { + load_pair \ + "facebook" \ + "$DEMO_ROOT/facebook/facebook_collection.json" \ + "$DEMO_ROOT/facebook/facebook_items.json" +} + +load_cmip6() { + model="${CMIP6_MODEL:-GISS-E2-1-G}" + variable="${CMIP6_VARIABLE:-tas}" + prefix="$DEMO_ROOT/cmip6/CMIP6_daily_${model}_${variable}" + + load_pair \ + "cmip6" \ + "${prefix}_collection.json" \ + "${prefix}_stac_items.ndjson" +} + +normalize_ndjson() { + input_file="$1" + output_file="$2" + + python - "$input_file" "$output_file" <<'PY_NDJSON' +import json +import re +import sys +from pathlib import Path + +import orjson + +input_path = Path(sys.argv[1]) +output_path = Path(sys.argv[2]) +invalid_escape = re.compile(r'\\(?!["\\/bfnrtu])') + + +def loads_line(line, lineno): + raw = line.encode("utf-8") + try: + return orjson.loads(raw) + except orjson.JSONDecodeError: + pass + + try: + return json.loads(line) + except json.JSONDecodeError: + repaired = invalid_escape.sub(r'\\\\', line) + try: + return json.loads(repaired) + except json.JSONDecodeError as exc: + raise SystemExit(f"Invalid JSON in {input_path} at line {lineno}: {exc}") from exc + + +items = [] +with input_path.open("r", encoding="utf-8") as src: + for lineno, line in enumerate(src, 1): + if not line.strip(): + continue + item = loads_line(line, lineno) + if "collection" not in item: + raise SystemExit(f"Normalized item from {input_path} at line {lineno} has no collection field") + items.append(item) + +output_path.write_bytes(orjson.dumps(items)) +parsed = orjson.loads(output_path.read_bytes()) +if not isinstance(parsed, list): + raise SystemExit(f"Normalized OAM output should be a JSON array, got {type(parsed).__name__}") +if parsed and "collection" not in parsed[0]: + raise SystemExit("First normalized OAM item has no collection field") +PY_NDJSON +} +load_oam() { + collection_file="$DEMO_ROOT/oam/oam_collection.json" + items_file="$DEMO_ROOT/oam/oam_items.njson" + normalized_items_file="/tmp/oam_items.normalized.json" + + if [ ! -f "$collection_file" ]; then + echo "Skipping oam: missing collection file $collection_file" + return 0 + fi + + if [ ! -f "$items_file" ]; then + echo "Skipping oam: missing items file $items_file" + return 0 + fi + + normalize_ndjson "$items_file" "$normalized_items_file" + load_pair "oam" "$collection_file" "$normalized_items_file" +} + +if [ "$#" -eq 0 ]; then + set -- all +fi + +for demo_name in "$@"; do + case "$demo_name" in + all) + load_noaa + load_facebook + load_cmip6 + load_oam + ;; + noaa) + load_noaa + ;; + facebook) + load_facebook + ;; + cmip6) + load_cmip6 + ;; + oam) + load_oam + ;; + *) + echo "Unknown demo '$demo_name'. Expected one of: all, noaa, facebook, cmip6, oam" >&2 + exit 2 + ;; + esac +done diff --git a/demo/noaa/README.md b/demo/noaa/README.md index 9b746bbd..10e036e2 100644 --- a/demo/noaa/README.md +++ b/demo/noaa/README.md @@ -34,7 +34,7 @@ $ aws s3 ls noaa-eri-pds/2020_Nashville_Tornado/20200307a_RGB/ \ "interval": [ [ "2005-01-01T00:00:00Z", - "null" + null ] ] } diff --git a/demo/noaa/demo.ipynb b/demo/noaa/demo.ipynb index bd792815..aa8d81fd 100644 --- a/demo/noaa/demo.ipynb +++ b/demo/noaa/demo.ipynb @@ -3,19 +3,23 @@ { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "import json\n", + "import os\n", "import requests\n", "from folium import Map, TileLayer, GeoJson\n", "\n", - "endpoint = \"\"" - ], - "outputs": [], - "metadata": {} + "endpoint = os.environ.get(\"TITILER_URL\", \"http://127.0.0.1:8082\")\n", + "public_endpoint = os.environ.get(\"TITILER_PUBLIC_URL\", \"http://127.0.0.1:8082\")" + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "geojson = {\"type\": \"Feature\", \"geometry\": {\"coordinates\": [[[-87.0251, 36.2251], [-87.0251, 36.0999], [-85.4249, 36.0999], [-85.4249, 36.2251], [-87.0251, 36.2251]]], \"type\": \"Polygon\"}}\n", "bounds = (-87.0251, 36.0999, -85.4249, 36.2251)\n", @@ -34,20 +38,20 @@ ")\n", "geo_json.add_to(m)\n", "m" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### Create Mosaic for the whole collection" - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "# Register Search Query\n", "body = {\n", @@ -56,18 +60,20 @@ " \"bbox\": bounds,\n", "}\n", "\n", - "response = requests.post(\n", - " f\"{endpoint}/mosaic/register\",\n", + "register_resp = requests.post(\n", + " f\"{endpoint}/searches/register\",\n", " json=body,\n", - ").json()\n", + ")\n", + "register_resp.raise_for_status()\n", + "response = register_resp.json()\n", "print(response)" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "m = Map(\n", " location=((bounds[1] + bounds[3]) / 2,(bounds[0] + bounds[2]) / 2),\n", @@ -75,8 +81,15 @@ ")\n", "\n", "# Fetch Tilejson (we HAVE TO add the asset name)\n", - "tj_resp = requests.get(\n", - " response['url'],\n", + "search_id = response.get(\"id\")\n", + "tilejson_url = response.get(\"url\")\n", + "if tilejson_url is None:\n", + " if search_id is None:\n", + " raise KeyError(f\"Search registration response has no id or url: {response}\")\n", + " tilejson_url = f\"{endpoint}/searches/{search_id}/WebMercatorQuad/tilejson.json\"\n", + "\n", + "tilejson_resp = requests.get(\n", + " tilejson_url,\n", " params={\n", " # Info to add to the tilejson response\n", " \"minzoom\": 13,\n", @@ -84,7 +97,20 @@ " # query parameter to add to the tile URL\n", " \"assets\": \"cog\",\n", " }\n", - ").json()\n", + ")\n", + "if tilejson_resp.status_code == 404 and search_id is not None:\n", + " tilejson_url = f\"{endpoint}/searches/{search_id}/tilejson.json\"\n", + " tilejson_resp = requests.get(\n", + " tilejson_url,\n", + " params={\n", + " \"minzoom\": 13,\n", + " \"maxzoom\": 20,\n", + " \"assets\": \"cog\",\n", + " }\n", + " )\n", + "tilejson_resp.raise_for_status()\n", + "tj_resp = tilejson_resp.json()\n", + "tj_resp[\"tiles\"] = [tile.replace(endpoint, public_endpoint) for tile in tj_resp[\"tiles\"]]\n", "print(tj_resp)\n", "\n", "geo_json = GeoJson(\n", @@ -104,16 +130,21 @@ ")\n", "aod_layer.add_to(m)\n", "m" - ], + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "metadata": {} + "source": [] }, { "cell_type": "code", "execution_count": null, - "source": [], + "metadata": {}, "outputs": [], - "metadata": {} + "source": [] } ], "metadata": { @@ -121,8 +152,8 @@ "hash": "e0a12c78cd70db9ff05ed68287a27ffcdd32788e19bdb884235a47fc6f52d8ad" }, "kernelspec": { - "name": "python3", - "display_name": "Python 3.8.2 64-bit ('py38': venv)" + "display_name": "Python 3.8.2 64-bit ('py38': venv)", + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -139,4 +170,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/demo/noaa/noaa-emergency-response.json b/demo/noaa/noaa-emergency-response.json index 5c7c1b87..41ed1776 100644 --- a/demo/noaa/noaa-emergency-response.json +++ b/demo/noaa/noaa-emergency-response.json @@ -1 +1 @@ -{"id":"noaa-emergency-response", "title": "NOAA Emergency Response Imagery", "description":"NOAA Emergency Response Imagery hosted on AWS Public Dataset.","stac_version":"1.0.0","license":"public-domain","links":[],"extent":{"spatial":{"bbox":[[-180,-90,180,90]]},"temporal":{"interval":[["2005-01-01T00:00:00Z","null"]]}}} +{"id":"noaa-emergency-response", "title": "NOAA Emergency Response Imagery", "description":"NOAA Emergency Response Imagery hosted on AWS Public Dataset.","stac_version":"1.0.0","license":"public-domain","links":[],"extent":{"spatial":{"bbox":[[-180,-90,180,90]]},"temporal":{"interval":[["2005-01-01T00:00:00Z",null]]}}} diff --git a/demo/oam/OpenAerialMap_demo.ipynb b/demo/oam/OpenAerialMap_demo.ipynb index f03648b4..09ebbcfe 100644 --- a/demo/oam/OpenAerialMap_demo.ipynb +++ b/demo/oam/OpenAerialMap_demo.ipynb @@ -42,17 +42,9 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'provided_by': 'OpenAerialMap', 'license': 'CC-BY 4.0', 'website': 'http://beta.openaerialmap.org', 'page': 1, 'limit': 1, 'found': 16194}\n" - ] - } - ], + "outputs": [], "source": [ "import httpx\n", "\n", @@ -79,7 +71,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -96,7 +88,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -169,7 +161,7 @@ " fout.write(\n", " json.dumps(\n", " item.to_dict(), ensure_ascii=False\n", - " ).encode(\"ascii\", \"ignore\").decode(\"utf-8\").replace('\\\\\"', \"\") + \"\\n\"\n", + " ).encode(\"ascii\", \"ignore\").decode(\"utf-8\") + \"\\n\"\n", " )\n", "\n", "init_data = min(\n", @@ -229,15 +221,54 @@ ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "```bash\n", - "pypgstac load collections oam_collection.json --dsn postgresql://{db-user}:{db-password}@{db-host}:{db-port}/{db-name} --method insert\n", + "import json\n", + "import os\n", + "import re\n", + "from pathlib import Path\n", + "\n", + "import orjson\n", + "\n", + "database_url = os.environ.get(\"DATABASE_URL\", \"postgresql://username:password@database:5432/postgis\")\n", + "items_file = Path(\"oam_items.njson\")\n", + "normalized_items_file = Path(\"/tmp/oam_items.normalized.json\")\n", + "invalid_escape = re.compile(r'\\\\(?![\"\\\\/bfnrtu])')\n", + "\n", + "def loads_line(line, lineno):\n", + " raw = line.encode(\"utf-8\")\n", + " try:\n", + " return orjson.loads(raw)\n", + " except orjson.JSONDecodeError:\n", + " pass\n", + "\n", + " try:\n", + " return json.loads(line)\n", + " except json.JSONDecodeError:\n", + " repaired = invalid_escape.sub(r'\\\\\\\\', line)\n", + " try:\n", + " return json.loads(repaired)\n", + " except json.JSONDecodeError as exc:\n", + " raise ValueError(f\"Invalid JSON in {items_file} at line {lineno}: {exc}\") from exc\n", + "\n", + "items = []\n", + "with items_file.open(\"r\", encoding=\"utf-8\") as src:\n", + " for lineno, line in enumerate(src, 1):\n", + " if not line.strip():\n", + " continue\n", + " item = loads_line(line, lineno)\n", + " if \"collection\" not in item:\n", + " raise ValueError(f\"Normalized item from {items_file} at line {lineno} has no collection field\")\n", + " items.append(item)\n", "\n", - "# NOTE: we need to set `--method ignore` because some items are duplicated in the OAM database\n", - "pypgstac load items oam_items.njson --dsn postgresql://{db-user}:{db-password}@{db-host}:{db-port}/{db-name} --method ignore\n", - "```" + "normalized_items_file.write_bytes(orjson.dumps(items))\n", + "\n", + "!pypgstac load collections oam_collection.json --dsn {database_url} --method insert_ignore\n", + "# OAM has duplicate item records, so use insert_ignore for repeatable local loads.\n", + "!pypgstac load items {normalized_items_file} --dsn {database_url} --method insert_ignore" ] }, { @@ -249,13 +280,14 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ + "import os\n", "import httpx\n", "\n", - "stac_endpoint = \"https://stac.eoapi.dev\"" + "stac_endpoint = os.environ.get(\"STAC_API_URL\", \"http://127.0.0.1:8081\")" ] }, { @@ -269,19 +301,11 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'limit': 100, 'matched': 6, 'returned': 6}\n" - ] - } - ], + "outputs": [], "source": [ - "# use /search endpoint with some `filter` parameter\n", + "# use /search endpoint with some filter parameter\n", "response = httpx.get(\n", " f\"{stac_endpoint}/search\",\n", " params={\n", @@ -290,118 +314,16 @@ " \"limit\": 100,\n", " },\n", ")\n", - "print(response.json()[\"context\"])\n", - "\n", - "feature_collection = response.json()" + "response.raise_for_status()\n", + "feature_collection = response.json()\n", + "print(feature_collection.get(\"context\", {\"returned\": len(feature_collection.get(\"features\", []))}))" ] }, { "cell_type": "code", - "execution_count": 64, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "