diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b0ee11d..4d13b5b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -4,25 +4,26 @@ on: [ push, pull_request ] jobs: main: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.12"] steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 + - uses: actions/checkout@master + - uses: actions/setup-python@v6 name: Setup Python ${{ matrix.python-version }} with: python-version: ${{ matrix.python-version }} - name: Install requirements 📦 run: | python3 -m pip install --upgrade pip + pip3 install setuptools pip3 install -r requirements.txt pip3 install -r requirements-dev.txt - name: Install package 📦 - run: python3 setup.py install + run: pip3 install . - name: run tests ⚙️ - run: python3 setup.py test + run: python3 tests/run_tests.py - name: run flake8 ⚙️ run: | find . -type f -name "*.py" | xargs flake8 diff --git a/README.md b/README.md index 8433db8..40c1f5c 100644 --- a/README.md +++ b/README.md @@ -27,8 +27,7 @@ cd pywiscat git clone https://github.com/wmo-im/pywiscat.git cd pywiscat pip3 install -r requirements.txt -python3 setup.py build -python3 setup.py install +pip3 install . ``` ## Running @@ -62,6 +61,25 @@ pywiscat search --bbox -142,42,-52,84 # get more information about a WIS2 GDC record pywiscat get urn:x-wmo:md:can:eccc-msc:c7c9d726-c48a-49e3-98ab-78a1ab87cda8 + +## Archive utilities + +# download and extract a WIS2 GDC metadata archive zipfile to a specific directory +pywiscat archive get /path/to/archive + +## Metrics analyzers + +# analyze core records by centre identifier +pywiscat metrics core /path/to/archive + +# analyze recommended records by centre identifier +pywiscat metrics recommended /path/to/archive + +# analyze Earth system disciplines by centre identifier +pywiscat metrics earth-system-discipline /path/to/archive + +# analyze Key Performance Indicators (KPIs) by centre identifier +pywiscat metrics kpi ca-eccc-msc /path/to/archive ``` ## Using the API @@ -94,9 +112,6 @@ python3 setup.py install ### Running tests ```bash -# via setuptools -python3 setup.py test -# manually python3 tests/run_tests.py ``` diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..205a289 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools>=46.4", "wheel"] +build-backend = "setuptools.build_meta" diff --git a/pywiscat/__init__.py b/pywiscat/__init__.py index 8cc28be..f815bf7 100644 --- a/pywiscat/__init__.py +++ b/pywiscat/__init__.py @@ -2,7 +2,7 @@ # # Authors: Tom Kralidis # -# Copyright (c) 2024 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -29,7 +29,9 @@ import click +from pywiscat.wis2.archive import archive from pywiscat.wis2.catalogue import get_gdc_record, search_gdc +from pywiscat.wis2.metrics import metrics __version__ = '0.3.dev2' @@ -42,3 +44,5 @@ def cli(): cli.add_command(search_gdc) cli.add_command(get_gdc_record) +cli.add_command(archive) +cli.add_command(metrics) diff --git a/pywiscat/env.py b/pywiscat/env.py new file mode 100644 index 0000000..0b5dc30 --- /dev/null +++ b/pywiscat/env.py @@ -0,0 +1,33 @@ +# ================================================================= +# +# Authors: Tom Kralidis +# +# Copyright (c) 2026 Tom Kralidis +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# ================================================================= + +import os + +GDC_URL = os.environ.get('PYWISCAT_GDC_URL', 'https://wis2-gdc.weather.gc.ca') +GDC_URL = f'{GDC_URL}/collections/wis2-discovery-metadata' diff --git a/pywiscat/wis2/archive.py b/pywiscat/wis2/archive.py new file mode 100644 index 0000000..abbbab1 --- /dev/null +++ b/pywiscat/wis2/archive.py @@ -0,0 +1,104 @@ +# ================================================================= +# +# Authors: Tom Kralidis +# +# Copyright (c) 2026 Tom Kralidis +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# ================================================================= + +import logging + +from io import BytesIO +import zipfile + +import click +import requests + +from pywiscat.cli_helpers import cli_option_verbosity +from pywiscat.env import GDC_URL + +LOGGER = logging.getLogger(__name__) + + +def download_and_extract_archive(gdc_url: str, output_dir: str) -> bool: + """ + Download and extract a metadata archive zipfile from a WIS2 GDC + + :param gdc_url: URL of WIS2 GDC + :param output_dir: output directory + + :returns: `bool` of result + """ + + archive_link = None + + LOGGER.debug(f'Fetching GDC collection information from {GDC_URL}') + response = requests.get(GDC_URL) + response.raise_for_status() + + response = response.json() + + for link in response['links']: + if link.get('rel') == 'archives': + archive_link = link['href'] + LOGGER.debug(f'Archive link found: {archive_link}') + break + + if archive_link is None: + LOGGER.warning('Archive link not found') + return False + + LOGGER.debug(f'Fetching metadata archive zipfile from {archive_link}') + response = requests.get(archive_link) + response.raise_for_status() + + LOGGER.debug(f'Extracting zipfile to {output_dir}') + with zipfile.ZipFile(BytesIO(response.content)) as fh: + fh.extractall(output_dir) + + return True + + +@click.group() +def archive(): + """Run archive utilities against a WIS2 GDC""" + + pass + + +@click.command() +@click.pass_context +@cli_option_verbosity +@click.argument('output_dir') +def get(ctx, output_dir, verbosity='NOTSET'): + """Download and extract archive""" + + click.echo(f'Downloading and extracting zipfile from {GDC_URL} to {output_dir}') # noqa + if not download_and_extract_archive(GDC_URL, output_dir): + click.echo('Download and extract failed. Set -v DEBUG for more information') # noqa + + click.echo('Done') + + +archive.add_command(get) diff --git a/pywiscat/wis2/catalogue.py b/pywiscat/wis2/catalogue.py index c5581fc..ece77ec 100644 --- a/pywiscat/wis2/catalogue.py +++ b/pywiscat/wis2/catalogue.py @@ -2,7 +2,7 @@ # # Authors: Tom Kralidis # -# Copyright (c) 2025 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -28,7 +28,6 @@ # ================================================================= import logging -import os from textwrap import indent, wrap import click @@ -38,13 +37,12 @@ import requests from pywiscat.cli_helpers import cli_option_verbosity -LOGGER = logging.getLogger(__name__) +from pywiscat.env import GDC_URL -GDC_URL = os.environ.get('PYWISCAT_GDC_URL', 'https://wis2-gdc.weather.gc.ca') -GDC_URL = f'{GDC_URL}/collections/wis2-discovery-metadata' +LOGGER = logging.getLogger(__name__) -def get_country_and_centre(identifier): +def get_country_and_centre_id(identifier): """ Get country and centre id from a WCMP2 identifier @@ -185,7 +183,7 @@ def search(**kwargs: dict) -> dict: LOGGER.debug('Building up results') for item in response_json['features']: - country, centre_id = get_country_and_centre(item['id']) + country, centre_id = get_country_and_centre_id(item['id']) output['fields'] = [ 'id', @@ -324,7 +322,7 @@ def get_gdc_record(ctx, identifier, verbosity): if 'description' in result: raise click.ClickException(f'Record identifier {identifier} not found') - country, centre_id = get_country_and_centre(result['id']) + country, centre_id = get_country_and_centre_id(result['id']) country = get_country_prettified(country) click.echo(f"Record: {result['properties']['title']}\n") diff --git a/pywiscat/wis2/metrics.py b/pywiscat/wis2/metrics.py new file mode 100644 index 0000000..411b531 --- /dev/null +++ b/pywiscat/wis2/metrics.py @@ -0,0 +1,231 @@ +# ================================================================= +# +# Authors: Tom Kralidis +# Ján Osuský +# +# Copyright (c) 2026 Tom Kralidis +# Copyright (c) 2026 IBL Software Engineering Portugal Lda. +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# ================================================================= + +from enum import Enum +from glob import glob +import json +import logging + +import click +from pywcmp.wcmp2.kpi import WMOCoreMetadataProfileKeyPerformanceIndicators + +from pywiscat.cli_helpers import cli_option_verbosity + +LOGGER = logging.getLogger(__name__) + + +class DataPolicy(Enum): + core: 'core' + recommended: 'recommended' + + +def get_centre_id(identifier: str) -> str: + """ + Derive centre identifier from a WCMP2 id + + :param identifier: `str` of WCMP2 id + + :returns: `str` of centre identifier + """ + + return identifier.split(':')[3] + + +def analyze_data_policy(data_policy: DataPolicy, archive_dir: str) -> dict: + """ + Analyze archive for data policy + + :param data_policy: `str` of data policy (core or recommended) + :param archive_dir: `str` of archive directory path + + :returns: `dict` of analysis, by centre identifier + """ + + report = {} + + file_list = glob(f'{archive_dir}/*.json') + if not file_list: + LOGGER.error(f'No files found in the archive folder "{archive_dir}".') + else: + LOGGER.debug(f'Analyzing {archive_dir} for {data_policy} records') + for f in file_list: + with open(f) as fh: + wcmp2 = json.load(fh) + centre_id = get_centre_id(wcmp2['id']) + + data_policy2 = wcmp2['properties']['wmo:dataPolicy'] + if data_policy2 == data_policy: + if centre_id not in report: + report[centre_id] = 1 + else: + report[centre_id] += 1 + + return dict(sorted(report.items())) + + +def analyze_earth_system_discipline(archive_dir: str) -> dict: + """ + Analyze archive for Earth system discipline + + :param archive_dir: `str` of archive directory path + + :returns: `dict` of analysis, by centre identifier + """ + + report = {} + + file_list = glob(f'{archive_dir}/*.json') + if not file_list: + LOGGER.error(f'No files found in the archive folder "{archive_dir}".') + else: + LOGGER.debug(f'Analyzing {archive_dir} for Earth system disciplines') + for f in glob(f'{archive_dir}/*.json'): + with open(f) as fh: + wcmp2 = json.load(fh) + centre_id = get_centre_id(wcmp2['id']) + + for theme in wcmp2['properties']['themes']: + if theme.get('scheme') == 'https://codes.wmo.int/wis/topic-hierarchy/earth-system-discipline': # noqa + for concept in theme.get('concepts', []): + id_ = concept.get('id') + LOGGER.debug(f'concept: {id_}') + + if centre_id not in report: + report[centre_id] = { + id_: 1 + } + else: + if id_ not in report[centre_id]: + report[centre_id][id_] = 1 + else: + report[centre_id][id_] += 1 + + return dict(sorted(report.items())) + + +def analyze_kpi(centre_id: str, archive_dir: str) -> dict: + """ + Analyze archive for Key Performance Indicators (KPI) + + :param archive_dir: `str` of centre identifier + :param archive_dir: `str` of archive directory path + + :returns: `dict` of analysis, by centre identifier + """ + + LOGGER.debug(f'Analyzing KPIs for {centre_id}') + report = { + centre_id: { + 'kpi_percentage_average': 0, + 'kpi_percentage_over80_total': 0, + 'scoring': {} + } + } + + for f in glob(f'{archive_dir}/*{centre_id}*.json'): + with open(f) as fh: + wcmp2 = json.load(fh) + + kpis = WMOCoreMetadataProfileKeyPerformanceIndicators(wcmp2) + results = kpis.evaluate() + report[centre_id]['scoring'][wcmp2['id']] = results['summary']['percentage'] # noqa + + kpi_values = report[centre_id]['scoring'].values() + + if not kpi_values: + LOGGER.warning(f'no files for "{centre_id}"') + else: + total = sum(kpi_values) + average = total / len(report[centre_id]['scoring']) + report[centre_id]['kpi_percentage_average'] = average + + over80_total = sum(1 for value in kpi_values if value > 80) + report[centre_id]['kpi_percentage_over80_total'] = over80_total + + return dict(sorted(report.items())) + + +@click.group() +def metrics(): + """Run metrics against a WIS2 GDC""" + + pass + + +@click.command() +@click.pass_context +@click.argument('archive_dir') +@cli_option_verbosity +def core(ctx, archive_dir, verbosity): + """Analyze core records""" + + report = analyze_data_policy('core', archive_dir) + click.echo(json.dumps(report, indent=4)) + + +@click.command() +@click.pass_context +@click.argument('archive_dir') +@cli_option_verbosity +def recommended(ctx, archive_dir, verbosity): + """Analyze recommended records""" + + report = analyze_data_policy('recommended', archive_dir) + click.echo(json.dumps(report, indent=4)) + + +@click.command() +@click.pass_context +@click.argument('archive_dir') +@cli_option_verbosity +def earth_system_discipline(ctx, archive_dir, verbosity): + """Analyze Earth system disciplines""" + + report = analyze_earth_system_discipline(archive_dir) + click.echo(json.dumps(report, indent=4)) + + +@click.command() +@click.pass_context +@click.argument('centre_id') +@click.argument('archive_dir') +@cli_option_verbosity +def kpi(ctx, centre_id, archive_dir, verbosity): + """Analyze Key Performance Indicators (KPIs)""" + + report = analyze_kpi(centre_id, archive_dir) + click.echo(json.dumps(report, indent=4)) + + +metrics.add_command(core) +metrics.add_command(recommended) +metrics.add_command(earth_system_discipline) +metrics.add_command(kpi) diff --git a/requirements.txt b/requirements.txt index 05ee2e8..887a50b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ click emoji-country-flag iso3166 prettytable +pywcmp requests diff --git a/setup.py b/setup.py index 36a8798..b38ed7d 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ # # Authors: Tom Kralidis # -# Copyright (c) 2021 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -30,24 +30,7 @@ import io import os import re -from setuptools import Command, find_packages, setup -import sys - - -class PyTest(Command): - user_options = [] - - def initialize_options(self): - pass - - def finalize_options(self): - pass - - def run(self): - import subprocess - errno = subprocess.call([sys.executable, os.path.join('tests', - 'run_tests.py')]) - raise SystemExit(errno) +from setuptools import find_packages, setup def read(filename, encoding='utf-8'): @@ -73,7 +56,7 @@ def get_package_version(): 'wis' ] -DESCRIPTION = 'WMO WIS Catalogue analysis tool' +DESCRIPTION = 'WMO WIS Catalogue Python client' # ensure a fresh MANIFEST file is generated if (os.path.exists('MANIFEST')): @@ -113,6 +96,5 @@ def get_package_version(): 'Topic :: Scientific/Engineering :: GIS', 'Topic :: Scientific/Engineering :: Information Analysis' ], - cmdclass={'test': PyTest}, test_suite='tests.run_tests' ) diff --git a/tests/run_tests.py b/tests/run_tests.py index db33ee7..8fd2cc2 100644 --- a/tests/run_tests.py +++ b/tests/run_tests.py @@ -3,7 +3,7 @@ # Authors: Tom Kralidis # Ján Osuský # -# Copyright (c) 2023 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # Copyright (c) 2021, IBL Software Engineering spol. s r. o. # # Permission is hereby granted, free of charge, to any person @@ -31,6 +31,10 @@ import unittest +from pywiscat.wis2.catalogue import ( + get_country_and_centre_id, get_country_prettified) +from pywiscat.wis2.metrics import get_centre_id + class WISCatalogueUtilTest(unittest.TestCase): """WIS Catalogue tests""" @@ -43,6 +47,27 @@ def tearDown(self): """return to pristine state""" pass + def test_get_centre_id(self): + """test for centre-id detection from a WCMP2 ID""" + + wcmp2_id = 'urn:wmo:md:zm-zmd:core.surface-based-observations.synop' + self.assertEqual(get_centre_id(wcmp2_id), 'zm-zmd') + + def test_get_country_and_centre_id(self): + """test for country and centre-id detection from a WCMP2 ID""" + + wcmp2_id = 'urn:wmo:md:zm-zmd:core.surface-based-observations.synop' + country, centre_id = get_country_and_centre_id(wcmp2_id) + self.assertEqual(country, 'zm') + self.assertEqual(centre_id, 'zm-zmd') + + def test_get_prettified_country(self): + """test for country and centre-id detection from a WCMP2 ID""" + + country_prettified = get_country_prettified('zm') + + self.assertEqual(country_prettified, 'Zambia 🇿🇲') + if __name__ == '__main__': unittest.main()