From 543525ee36a6cb30021b218bd4e96502700ac486 Mon Sep 17 00:00:00 2001 From: jankovicgd Date: Wed, 25 Jun 2025 10:18:22 +0200 Subject: [PATCH 1/3] feat: ogcapi harvesting --- CONTRIBUTING.md | 1 + poetry.lock | 19 +++++++++++- pyproject.toml | 1 + src/eodm/extract.py | 58 ++++++++++++++++++++++++++++++++++-- tests/test_ogcapi_records.py | 16 ++++++++++ 5 files changed, 91 insertions(+), 4 deletions(-) create mode 100644 tests/test_ogcapi_records.py diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a84a08e..3664864 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -337,6 +337,7 @@ git push && git push --tags - [Creodias OpenSearch](https://finder.creodias.eu/resto/api/collections/describe.xml) - [Creodias Sentinel1](https://finder.creodias.eu/resto/api/collections/Sentinel1/describe.xml) - [Creodias Sentinel2](https://finder.creodias.eu/resto/api/collections/Sentinel2/describe.xml) +- [EOEPCA+ develop resource catalogue](https://resource-catalogue.apx.develop.eoepca.org/collections) ### Inspiration diff --git a/poetry.lock b/poetry.lock index 76776ac..f825da9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1596,6 +1596,23 @@ files = [ {file = "numpy-2.2.2.tar.gz", hash = "sha256:ed6906f61834d687738d25988ae117683705636936cc605be0bb208b23df4d8f"}, ] +[[package]] +name = "owslib" +version = "0.34.1" +description = "OGC Web Service utility library" +optional = false +python-versions = ">=3.10" +files = [ + {file = "owslib-0.34.1-py3-none-any.whl", hash = "sha256:54ccde946f7732ac3be1408a35f9123ea9ffe794db2ef42d0d45294a5aa9e2f3"}, + {file = "owslib-0.34.1.tar.gz", hash = "sha256:9c46d59dc03c753912fc3ef3136dbc843dad7572feb1af2cdf0fc5d1a0959028"}, +] + +[package.dependencies] +lxml = "*" +python-dateutil = "*" +pyyaml = "*" +requests = "*" + [[package]] name = "packaging" version = "24.0" @@ -2870,4 +2887,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "0ec354d169f403955a3c354d7a6c89255b25fe0a3e1001946e6d0994934aeaca" +content-hash = "f45f84331ae269450388e97f362f8a730a29ed0e4aaa1767ee57e1cd70d59d34" diff --git a/pyproject.toml b/pyproject.toml index df053b9..b3e8bb5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ lxml = "^5.3.0" geojson-pydantic = "^1.1.2" rio-stac = "^0.10.1" rasterio = "^1.4.3" +owslib = "^0.34.1" [tool.poetry.group.dev.dependencies] diff --git a/src/eodm/extract.py b/src/eodm/extract.py index ce2add9..83723b5 100644 --- a/src/eodm/extract.py +++ b/src/eodm/extract.py @@ -1,6 +1,7 @@ from typing import Iterator, Optional import pystac_client +from owslib.ogcapi.records import Records from pystac import Collection, Item from .opensearch import OpenSearchClient, OpenSearchFeature @@ -59,9 +60,7 @@ def extract_stac_api_collections(url: str) -> Iterator[Collection]: def extract_opensearch_features( - url: str, - product_types: list[str], - limit: int = 0, + url: str, product_types: list[str], limit: int = 0 ) -> Iterator[OpenSearchFeature]: """Extracts OpenSearch Features from an OpenSearch API @@ -83,3 +82,56 @@ def extract_opensearch_features( if limit and i >= limit: break yield feature + + +def extract_ogcapi_records_catalogs(url: str) -> Iterator[dict]: + """Extracts OGC API Records from an OGC API Records endpoint + + Args: + url (str): Link to OGC API Records endpoint + + Yields: + Iterator[Item]: OGC API Records Catalogs(collections) + """ + + records = Records(url) + for record in records.collections()["collections"]: + yield record + + +def extract_ogcapi_records( + url: str, + catalog_ids: list[str], + datetime_interval: str | None = None, + bbox: list[float] | None = None, + filter: str | None = None, + cql: dict | None = None, + limit: int | None = None, +) -> Iterator[dict]: + """Extracts OGC API Records from an OGC API Records endpoint + + Args: + url (str): Link to OGC API Records endpoint + catalog_ids (list[str]): List of catalog/collection IDs to search for + datetime_interval (str | None, optional): Datetime interval to search. ISO8601 + datetime or interval Defaults to None. + bbox (list[float, float, float, float] | None, optional): Bounding box to search. + filter (str, optional): CQL filter to apply. Defaults to None. + cql (dict, optional): CQL JSON payload to apply. Defaults to None. + limit (int | None, optional): Limit query to given number. Defaults to None. + + Yields: + Iterator[Item]: OGC API Records Items + """ + + records = Records(url) + for catalog_id in catalog_ids: + for record in records.collection_items( + catalog_id, + bbox=bbox, + datetime_=datetime_interval, + filter=filter, + cql=cql, + limit=limit, + )["features"]: + yield record diff --git a/tests/test_ogcapi_records.py b/tests/test_ogcapi_records.py new file mode 100644 index 0000000..360efd0 --- /dev/null +++ b/tests/test_ogcapi_records.py @@ -0,0 +1,16 @@ +from eodm.extract import extract_ogcapi_records, extract_ogcapi_records_catalogs + + +def test_extract_ogcapi_records_catalogs(): + url = "https://resource-catalogue.apx.develop.eoepca.org" + for i in extract_ogcapi_records_catalogs(url): + assert i["type"] == "catalog" + + +def test_extract_ogcapi_records(): + url = "https://resource-catalogue.apx.develop.eoepca.org" + catalog_ids = ["S2MSI1C"] + datetime_interval = "2018-01-01/2020-01-01" + bbox = [15.7308, 47.4577, 17.2709, 48.2459] + for i in extract_ogcapi_records(url, catalog_ids, datetime_interval, bbox): + assert i["type"] == "Feature" From df519541e041ed679a6d46a1228101f5a2c57d1c Mon Sep 17 00:00:00 2001 From: jankovicgd Date: Wed, 25 Jun 2025 10:21:25 +0200 Subject: [PATCH 2/3] fix: removed cql for now --- src/eodm/extract.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/eodm/extract.py b/src/eodm/extract.py index 83723b5..0991c7d 100644 --- a/src/eodm/extract.py +++ b/src/eodm/extract.py @@ -105,7 +105,6 @@ def extract_ogcapi_records( datetime_interval: str | None = None, bbox: list[float] | None = None, filter: str | None = None, - cql: dict | None = None, limit: int | None = None, ) -> Iterator[dict]: """Extracts OGC API Records from an OGC API Records endpoint @@ -117,7 +116,6 @@ def extract_ogcapi_records( datetime or interval Defaults to None. bbox (list[float, float, float, float] | None, optional): Bounding box to search. filter (str, optional): CQL filter to apply. Defaults to None. - cql (dict, optional): CQL JSON payload to apply. Defaults to None. limit (int | None, optional): Limit query to given number. Defaults to None. Yields: @@ -131,7 +129,6 @@ def extract_ogcapi_records( bbox=bbox, datetime_=datetime_interval, filter=filter, - cql=cql, limit=limit, )["features"]: yield record From c952754cd8f9d566c6bbd099ed4d6e203f787051 Mon Sep 17 00:00:00 2001 From: jankovicgd Date: Wed, 25 Jun 2025 10:28:05 +0200 Subject: [PATCH 3/3] ci: update ubuntu image --- .github/workflows/docs.yml | 2 +- .github/workflows/main.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index a4021cc..4be1bba 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -26,7 +26,7 @@ permissions: jobs: build: - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 steps: - name: Check out repository uses: actions/checkout@v4 diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 3f45b06..6a07938 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -19,7 +19,7 @@ on: jobs: test: - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 strategy: matrix: python-version: ['3.10']