From bd0924c8e54c1b50961e3c7336a717554d2f1d83 Mon Sep 17 00:00:00 2001 From: Tom Kralidis Date: Tue, 6 Jan 2026 20:58:20 -0500 Subject: [PATCH 1/7] implement WIS2 GDC metrics --- README.md | 19 ++++ pywiscat/__init__.py | 6 +- pywiscat/env.py | 33 ++++++ pywiscat/wis2/archive.py | 104 ++++++++++++++++++ pywiscat/wis2/catalogue.py | 6 +- pywiscat/wis2/metrics.py | 217 +++++++++++++++++++++++++++++++++++++ 6 files changed, 380 insertions(+), 5 deletions(-) create mode 100644 pywiscat/env.py create mode 100644 pywiscat/wis2/archive.py create mode 100644 pywiscat/wis2/metrics.py diff --git a/README.md b/README.md index 8433db8..5a336bf 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,25 @@ pywiscat search --bbox -142,42,-52,84 # get more information about a WIS2 GDC record pywiscat get urn:x-wmo:md:can:eccc-msc:c7c9d726-c48a-49e3-98ab-78a1ab87cda8 + +## Archive utilities + +# download and extract a WIS2 GDC metadata archive zipfile to a specific directory +pywiscat archive get /path/to/archive + +## Metrics analyzers + +# analyze core records by centre identifier +pywiscat metrics core /path/to/archive + +# analyze recommended records by centre identifier +pywiscat metrics recommended /path/to/archive + +# analyze Earth system disciplines by centre identifier +pywiscat metrics earth-system-discipline /path/to/archive + +# analyze Key Performance Indicators (KPIs) by centre identifier +pywiscat metrics kpi ca-eccc-msc /path/to/archive ``` ## Using the API diff --git a/pywiscat/__init__.py b/pywiscat/__init__.py index 8cc28be..bd274aa 100644 --- a/pywiscat/__init__.py +++ b/pywiscat/__init__.py @@ -2,7 +2,7 @@ # # Authors: Tom Kralidis # -# Copyright (c) 2024 Tom Kralidis +# Copyright (c) 2025 Tom Kralidis # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -29,7 +29,9 @@ import click +from pywiscat.wis2.archive import archive from pywiscat.wis2.catalogue import get_gdc_record, search_gdc +from pywiscat.wis2.metrics import metrics __version__ = '0.3.dev2' @@ -42,3 +44,5 @@ def cli(): cli.add_command(search_gdc) cli.add_command(get_gdc_record) +cli.add_command(archive) +cli.add_command(metrics) diff --git a/pywiscat/env.py b/pywiscat/env.py new file mode 100644 index 0000000..66d52e2 --- /dev/null +++ b/pywiscat/env.py @@ -0,0 +1,33 @@ +# ================================================================= +# +# Authors: Tom Kralidis +# +# Copyright (c) 2025 Tom Kralidis +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# ================================================================= + +import os + +GDC_URL = os.environ.get('PYWISCAT_GDC_URL', 'https://wis2-gdc.weather.gc.ca') +GDC_URL = f'{GDC_URL}/collections/wis2-discovery-metadata' diff --git a/pywiscat/wis2/archive.py b/pywiscat/wis2/archive.py new file mode 100644 index 0000000..5561e30 --- /dev/null +++ b/pywiscat/wis2/archive.py @@ -0,0 +1,104 @@ +# ================================================================= +# +# Authors: Tom Kralidis +# +# Copyright (c) 2025 Tom Kralidis +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# ================================================================= + +import logging + +from io import BytesIO +import zipfile + +import click +import requests + +from pywiscat.cli_helpers import cli_option_verbosity +from pywiscat.env import GDC_URL + +LOGGER = logging.getLogger(__name__) + + +def download_and_extract_archive(gdc_url: str, output_dir: str) -> bool: + """ + Download and extract a metadata archive zipfile from a WIS2 GDC + + :param gdc_url: URL of WIS2 GDC + :param output_dir: output directory + + :returns: `bool` of result + """ + + archive_link = None + + LOGGER.debug(f'Fetching GDC collection information from {GDC_URL}') + response = requests.get(GDC_URL) + response.raise_for_status() + + response = response.json() + + for link in response['links']: + if link.get('rel') == 'archives': + archive_link = link['href'] + LOGGER.debug(f'Archive link found: {archive_link}') + break + + if archive_link is None: + LOGGER.warning('Archive link not found') + return False + + LOGGER.debug(f'Fetching metadata archive zipfile from {archive_link}') + response = requests.get(archive_link) + response.raise_for_status() + + LOGGER.debug(f'Extracting zipfile to {output_dir}') + with zipfile.ZipFile(BytesIO(response.content)) as fh: + fh.extractall(output_dir) + + return True + + +@click.group() +def archive(): + """Run archive utilities against a WIS2 GDC""" + + pass + + +@click.command() +@click.pass_context +@cli_option_verbosity +@click.argument('output_dir') +def get(ctx, output_dir, verbosity='NOTSET'): + """Download and extract archive""" + + click.echo(f'Downloading and extracting zipfile from {GDC_URL} to {output_dir}') # noqa + if not download_and_extract_archive(GDC_URL, output_dir): + click.echo('Download and extract failed. Set -v DEBUG for more information') # noqa + + click.echo('Done') + + +archive.add_command(get) diff --git a/pywiscat/wis2/catalogue.py b/pywiscat/wis2/catalogue.py index c5581fc..8da00be 100644 --- a/pywiscat/wis2/catalogue.py +++ b/pywiscat/wis2/catalogue.py @@ -28,7 +28,6 @@ # ================================================================= import logging -import os from textwrap import indent, wrap import click @@ -38,10 +37,9 @@ import requests from pywiscat.cli_helpers import cli_option_verbosity -LOGGER = logging.getLogger(__name__) +from pywiscat.env import GDC_URL -GDC_URL = os.environ.get('PYWISCAT_GDC_URL', 'https://wis2-gdc.weather.gc.ca') -GDC_URL = f'{GDC_URL}/collections/wis2-discovery-metadata' +LOGGER = logging.getLogger(__name__) def get_country_and_centre(identifier): diff --git a/pywiscat/wis2/metrics.py b/pywiscat/wis2/metrics.py new file mode 100644 index 0000000..b3851a2 --- /dev/null +++ b/pywiscat/wis2/metrics.py @@ -0,0 +1,217 @@ +# ================================================================= +# +# Authors: Tom Kralidis +# +# Copyright (c) 2025 Tom Kralidis +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# ================================================================= + +from enum import Enum +from glob import glob +import json +import logging + +import click +from pywcmp.wcmp2.kpi import WMOCoreMetadataProfileKeyPerformanceIndicators + +from pywiscat.cli_helpers import cli_option_verbosity + +LOGGER = logging.getLogger(__name__) + + +class DataPolicy(Enum): + core: 'core' + recommended: 'recommended' + + +def get_centre_id(identifier: str) -> str: + """ + Derive centre identifier from a WCMP2 id + + :param identifier: `str` of WCMP2 id + + :returns: `str` of centre identifier + """ + + return identifier.split(':')[3] + + +def analyze_data_policy(data_policy: DataPolicy, archive_dir: str) -> dict: + """ + Analyze archive for data policy + + :param data_policy: `str` of data policy (core or recommended) + :param archive_dir: `str` of archive directory path + + :returns: `dict` of analysis, by centre identifier + """ + + report = {} + + LOGGER.debug(f'Analyzing {archive_dir} for {data_policy} records') + for f in glob(f'{archive_dir}/*.json'): + with open(f) as fh: + wcmp2 = json.load(fh) + centre_id = get_centre_id(wcmp2['id']) + + data_policy2 = wcmp2['properties']['wmo:dataPolicy'] + if data_policy2 == data_policy: + if centre_id not in report: + report[centre_id] = 1 + else: + report[centre_id] += 1 + + return dict(sorted(report.items())) + + +def analyze_earth_system_discipline(archive_dir: str) -> dict: + """ + Analyze archive for Earth system discipline + + :param archive_dir: `str` of archive directory path + + :returns: `dict` of analysis, by centre identifier + """ + + report = {} + + LOGGER.debug(f'Analyzing {archive_dir} for Earth system disciplines') + for f in glob(f'{archive_dir}/*.json'): + with open(f) as fh: + wcmp2 = json.load(fh) + centre_id = get_centre_id(wcmp2['id']) + + for theme in wcmp2['properties']['themes']: + if theme.get('scheme') == 'https://codes.wmo.int/wis/topic-hierarchy/earth-system-discipline': # noqa + for concept in theme.get('concepts', []): + id_ = concept.get('id') + LOGGER.debug(f'concept: {id_}') + + if centre_id not in report: + report[centre_id] = { + id_: 1 + } + else: + if id_ not in report[centre_id]: + report[centre_id][id_] = 1 + else: + report[centre_id][id_] += 1 + + return dict(sorted(report.items())) + + +def analyze_kpi(centre_id: str, archive_dir: str) -> dict: + """ + Analyze archive for Key Performance Indicators (KPI) + + :param archive_dir: `str` of centre identifier + :param archive_dir: `str` of archive directory path + + :returns: `dict` of analysis, by centre identifier + """ + + LOGGER.debug(f'Analyzing KPIs for {centre_id}') + report = { + centre_id: { + 'kpi_percentage_average': 0, + 'kpi_percentage_over80_total': 0, + 'scoring': {} + } + } + + for f in glob(f'{archive_dir}/*{centre_id}*.json'): + with open(f) as fh: + wcmp2 = json.load(fh) + + kpis = WMOCoreMetadataProfileKeyPerformanceIndicators(wcmp2) + results = kpis.evaluate() + report[centre_id]['scoring'][wcmp2['id']] = results['summary']['percentage'] # noqa + + kpi_values = report[centre_id]['scoring'].values() + total = sum(kpi_values) + average = total / len(report[centre_id]['scoring']) + report[centre_id]['kpi_percentage_average'] = average + + over80_total = sum(1 for value in kpi_values if value > 80) + report[centre_id]['kpi_percentage_over80_total'] = over80_total + + return dict(sorted(report.items())) + + +@click.group() +def metrics(): + """Run metrics against a WIS2 GDC""" + + pass + + +@click.command() +@click.pass_context +@click.argument('archive_dir') +@cli_option_verbosity +def core(ctx, archive_dir, verbosity): + """Analyze core records""" + + report = analyze_data_policy('core', archive_dir) + click.echo(json.dumps(report, indent=4)) + + +@click.command() +@click.pass_context +@click.argument('archive_dir') +@cli_option_verbosity +def recommended(ctx, archive_dir, verbosity): + """Analyze recommended records""" + + report = analyze_data_policy('recommended', archive_dir) + click.echo(json.dumps(report, indent=4)) + + +@click.command() +@click.pass_context +@click.argument('archive_dir') +@cli_option_verbosity +def earth_system_discipline(ctx, archive_dir, verbosity): + """Analyze Earth system disciplines""" + + report = analyze_earth_system_discipline(archive_dir) + click.echo(json.dumps(report, indent=4)) + + +@click.command() +@click.pass_context +@click.argument('centre_id') +@click.argument('archive_dir') +@cli_option_verbosity +def kpi(ctx, centre_id, archive_dir, verbosity): + """Analyze Key Performance Indicators (KPIs)""" + + report = analyze_kpi(centre_id, archive_dir) + click.echo(json.dumps(report, indent=4)) + + +metrics.add_command(core) +metrics.add_command(recommended) +metrics.add_command(earth_system_discipline) +metrics.add_command(kpi) From 5d9572c2f1afefe48417c98e49be758c43388330 Mon Sep 17 00:00:00 2001 From: Tom Kralidis Date: Tue, 6 Jan 2026 21:03:53 -0500 Subject: [PATCH 2/7] update to Python 3.12 and Ubuntu 24.04 --- .github/workflows/main.yml | 12 ++++++------ README.md | 6 +----- pyproject.toml | 3 +++ setup.py | 22 ++-------------------- 4 files changed, 12 insertions(+), 31 deletions(-) create mode 100644 pyproject.toml diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b0ee11d..27706cc 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -4,13 +4,13 @@ on: [ push, pull_request ] jobs: main: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.12"] steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 + - uses: actions/checkout@master + - uses: actions/setup-python@v6 name: Setup Python ${{ matrix.python-version }} with: python-version: ${{ matrix.python-version }} @@ -20,9 +20,9 @@ jobs: pip3 install -r requirements.txt pip3 install -r requirements-dev.txt - name: Install package 📦 - run: python3 setup.py install + run: pip3 install . - name: run tests ⚙️ - run: python3 setup.py test + run: python3 tests/run_tests.py - name: run flake8 ⚙️ run: | find . -type f -name "*.py" | xargs flake8 diff --git a/README.md b/README.md index 5a336bf..40c1f5c 100644 --- a/README.md +++ b/README.md @@ -27,8 +27,7 @@ cd pywiscat git clone https://github.com/wmo-im/pywiscat.git cd pywiscat pip3 install -r requirements.txt -python3 setup.py build -python3 setup.py install +pip3 install . ``` ## Running @@ -113,9 +112,6 @@ python3 setup.py install ### Running tests ```bash -# via setuptools -python3 setup.py test -# manually python3 tests/run_tests.py ``` diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..205a289 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools>=46.4", "wheel"] +build-backend = "setuptools.build_meta" diff --git a/setup.py b/setup.py index 36a8798..2683c1e 100644 --- a/setup.py +++ b/setup.py @@ -30,24 +30,7 @@ import io import os import re -from setuptools import Command, find_packages, setup -import sys - - -class PyTest(Command): - user_options = [] - - def initialize_options(self): - pass - - def finalize_options(self): - pass - - def run(self): - import subprocess - errno = subprocess.call([sys.executable, os.path.join('tests', - 'run_tests.py')]) - raise SystemExit(errno) +from setuptools import find_packages, setup def read(filename, encoding='utf-8'): @@ -73,7 +56,7 @@ def get_package_version(): 'wis' ] -DESCRIPTION = 'WMO WIS Catalogue analysis tool' +DESCRIPTION = 'WMO WIS Catalogue Python client' # ensure a fresh MANIFEST file is generated if (os.path.exists('MANIFEST')): @@ -113,6 +96,5 @@ def get_package_version(): 'Topic :: Scientific/Engineering :: GIS', 'Topic :: Scientific/Engineering :: Information Analysis' ], - cmdclass={'test': PyTest}, test_suite='tests.run_tests' ) From 3a2e1b982e5d7e3df6a1f10141a72ee6391817e7 Mon Sep 17 00:00:00 2001 From: Tom Kralidis Date: Tue, 6 Jan 2026 21:25:14 -0500 Subject: [PATCH 3/7] add tests --- pywiscat/wis2/catalogue.py | 6 +++--- tests/run_tests.py | 25 +++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/pywiscat/wis2/catalogue.py b/pywiscat/wis2/catalogue.py index 8da00be..db5ac29 100644 --- a/pywiscat/wis2/catalogue.py +++ b/pywiscat/wis2/catalogue.py @@ -42,7 +42,7 @@ LOGGER = logging.getLogger(__name__) -def get_country_and_centre(identifier): +def get_country_and_centre_id(identifier): """ Get country and centre id from a WCMP2 identifier @@ -183,7 +183,7 @@ def search(**kwargs: dict) -> dict: LOGGER.debug('Building up results') for item in response_json['features']: - country, centre_id = get_country_and_centre(item['id']) + country, centre_id = get_country_and_centre_id(item['id']) output['fields'] = [ 'id', @@ -322,7 +322,7 @@ def get_gdc_record(ctx, identifier, verbosity): if 'description' in result: raise click.ClickException(f'Record identifier {identifier} not found') - country, centre_id = get_country_and_centre(result['id']) + country, centre_id = get_country_and_centre_id(result['id']) country = get_country_prettified(country) click.echo(f"Record: {result['properties']['title']}\n") diff --git a/tests/run_tests.py b/tests/run_tests.py index db33ee7..91019d6 100644 --- a/tests/run_tests.py +++ b/tests/run_tests.py @@ -31,6 +31,10 @@ import unittest +from pywiscat.wis2.catalogue import ( + get_country_and_centre_id, get_country_prettified) +from pywiscat.wis2.metrics import get_centre_id + class WISCatalogueUtilTest(unittest.TestCase): """WIS Catalogue tests""" @@ -43,6 +47,27 @@ def tearDown(self): """return to pristine state""" pass + def test_get_centre_id(self): + """test for centre-id detection from a WCMP2 ID""" + + wcmp2_id = 'urn:wmo:md:zm-zmd:core.surface-based-observations.synop' + self.assertEqual(get_centre_id(wcmp2_id), 'zm-zmd') + + def test_get_country_and_centre_id(self): + """test for country and centre-id detection from a WCMP2 ID""" + + wcmp2_id = 'urn:wmo:md:zm-zmd:core.surface-based-observations.synop' + country, centre_id = get_country_and_centre_id(wcmp2_id) + self.assertEqual(country, 'zm') + self.assertEqual(centre_id, 'zm-zmd') + + def test_get_prettified_country(self): + """test for country and centre-id detection from a WCMP2 ID""" + + country_prettified = get_country_prettified('zm') + + self.assertEqual(country_prettified, 'Zambia 🇿🇲') + if __name__ == '__main__': unittest.main() From f4b0ba5d9464fdbd8f88aea30833ec6a46f50d1d Mon Sep 17 00:00:00 2001 From: Tom Kralidis Date: Tue, 6 Jan 2026 21:26:26 -0500 Subject: [PATCH 4/7] fix requirements --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 05ee2e8..887a50b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ click emoji-country-flag iso3166 prettytable +pywcmp requests From 2b8e4d4d5707fff93e4e807b7da2a6ece5d52653 Mon Sep 17 00:00:00 2001 From: Tom Kralidis Date: Tue, 6 Jan 2026 21:28:00 -0500 Subject: [PATCH 5/7] update CI --- .github/workflows/main.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 27706cc..4d13b5b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -17,6 +17,7 @@ jobs: - name: Install requirements 📦 run: | python3 -m pip install --upgrade pip + pip3 install setuptools pip3 install -r requirements.txt pip3 install -r requirements-dev.txt - name: Install package 📦 From 4c43a7736d0a3f6d57481d256b1eb5ff56b55b69 Mon Sep 17 00:00:00 2001 From: Tom Kralidis Date: Wed, 7 Jan 2026 20:14:30 -0500 Subject: [PATCH 6/7] update copyright year on touched files --- pywiscat/__init__.py | 2 +- pywiscat/env.py | 2 +- pywiscat/wis2/archive.py | 2 +- pywiscat/wis2/catalogue.py | 2 +- pywiscat/wis2/metrics.py | 2 +- setup.py | 2 +- tests/run_tests.py | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pywiscat/__init__.py b/pywiscat/__init__.py index bd274aa..f815bf7 100644 --- a/pywiscat/__init__.py +++ b/pywiscat/__init__.py @@ -2,7 +2,7 @@ # # Authors: Tom Kralidis # -# Copyright (c) 2025 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation diff --git a/pywiscat/env.py b/pywiscat/env.py index 66d52e2..0b5dc30 100644 --- a/pywiscat/env.py +++ b/pywiscat/env.py @@ -2,7 +2,7 @@ # # Authors: Tom Kralidis # -# Copyright (c) 2025 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation diff --git a/pywiscat/wis2/archive.py b/pywiscat/wis2/archive.py index 5561e30..abbbab1 100644 --- a/pywiscat/wis2/archive.py +++ b/pywiscat/wis2/archive.py @@ -2,7 +2,7 @@ # # Authors: Tom Kralidis # -# Copyright (c) 2025 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation diff --git a/pywiscat/wis2/catalogue.py b/pywiscat/wis2/catalogue.py index db5ac29..ece77ec 100644 --- a/pywiscat/wis2/catalogue.py +++ b/pywiscat/wis2/catalogue.py @@ -2,7 +2,7 @@ # # Authors: Tom Kralidis # -# Copyright (c) 2025 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation diff --git a/pywiscat/wis2/metrics.py b/pywiscat/wis2/metrics.py index b3851a2..01ea7a8 100644 --- a/pywiscat/wis2/metrics.py +++ b/pywiscat/wis2/metrics.py @@ -2,7 +2,7 @@ # # Authors: Tom Kralidis # -# Copyright (c) 2025 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation diff --git a/setup.py b/setup.py index 2683c1e..b38ed7d 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ # # Authors: Tom Kralidis # -# Copyright (c) 2021 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation diff --git a/tests/run_tests.py b/tests/run_tests.py index 91019d6..8fd2cc2 100644 --- a/tests/run_tests.py +++ b/tests/run_tests.py @@ -3,7 +3,7 @@ # Authors: Tom Kralidis # Ján Osuský # -# Copyright (c) 2023 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # Copyright (c) 2021, IBL Software Engineering spol. s r. o. # # Permission is hereby granted, free of charge, to any person From a25a5a6535d88101e12a43a2eaebed61630f06d8 Mon Sep 17 00:00:00 2001 From: Jan Osusky Date: Thu, 15 Jan 2026 14:58:06 +0000 Subject: [PATCH 7/7] handle edge cases in metrics calculations --- pywiscat/wis2/metrics.py | 88 +++++++++++++++++++++++----------------- 1 file changed, 51 insertions(+), 37 deletions(-) diff --git a/pywiscat/wis2/metrics.py b/pywiscat/wis2/metrics.py index 01ea7a8..411b531 100644 --- a/pywiscat/wis2/metrics.py +++ b/pywiscat/wis2/metrics.py @@ -1,8 +1,10 @@ # ================================================================= # # Authors: Tom Kralidis +# Ján Osuský # # Copyright (c) 2026 Tom Kralidis +# Copyright (c) 2026 IBL Software Engineering Portugal Lda. # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -69,18 +71,22 @@ def analyze_data_policy(data_policy: DataPolicy, archive_dir: str) -> dict: report = {} - LOGGER.debug(f'Analyzing {archive_dir} for {data_policy} records') - for f in glob(f'{archive_dir}/*.json'): - with open(f) as fh: - wcmp2 = json.load(fh) - centre_id = get_centre_id(wcmp2['id']) - - data_policy2 = wcmp2['properties']['wmo:dataPolicy'] - if data_policy2 == data_policy: - if centre_id not in report: - report[centre_id] = 1 - else: - report[centre_id] += 1 + file_list = glob(f'{archive_dir}/*.json') + if not file_list: + LOGGER.error(f'No files found in the archive folder "{archive_dir}".') + else: + LOGGER.debug(f'Analyzing {archive_dir} for {data_policy} records') + for f in file_list: + with open(f) as fh: + wcmp2 = json.load(fh) + centre_id = get_centre_id(wcmp2['id']) + + data_policy2 = wcmp2['properties']['wmo:dataPolicy'] + if data_policy2 == data_policy: + if centre_id not in report: + report[centre_id] = 1 + else: + report[centre_id] += 1 return dict(sorted(report.items())) @@ -96,27 +102,31 @@ def analyze_earth_system_discipline(archive_dir: str) -> dict: report = {} - LOGGER.debug(f'Analyzing {archive_dir} for Earth system disciplines') - for f in glob(f'{archive_dir}/*.json'): - with open(f) as fh: - wcmp2 = json.load(fh) - centre_id = get_centre_id(wcmp2['id']) - - for theme in wcmp2['properties']['themes']: - if theme.get('scheme') == 'https://codes.wmo.int/wis/topic-hierarchy/earth-system-discipline': # noqa - for concept in theme.get('concepts', []): - id_ = concept.get('id') - LOGGER.debug(f'concept: {id_}') - - if centre_id not in report: - report[centre_id] = { - id_: 1 - } - else: - if id_ not in report[centre_id]: - report[centre_id][id_] = 1 + file_list = glob(f'{archive_dir}/*.json') + if not file_list: + LOGGER.error(f'No files found in the archive folder "{archive_dir}".') + else: + LOGGER.debug(f'Analyzing {archive_dir} for Earth system disciplines') + for f in glob(f'{archive_dir}/*.json'): + with open(f) as fh: + wcmp2 = json.load(fh) + centre_id = get_centre_id(wcmp2['id']) + + for theme in wcmp2['properties']['themes']: + if theme.get('scheme') == 'https://codes.wmo.int/wis/topic-hierarchy/earth-system-discipline': # noqa + for concept in theme.get('concepts', []): + id_ = concept.get('id') + LOGGER.debug(f'concept: {id_}') + + if centre_id not in report: + report[centre_id] = { + id_: 1 + } else: - report[centre_id][id_] += 1 + if id_ not in report[centre_id]: + report[centre_id][id_] = 1 + else: + report[centre_id][id_] += 1 return dict(sorted(report.items())) @@ -149,12 +159,16 @@ def analyze_kpi(centre_id: str, archive_dir: str) -> dict: report[centre_id]['scoring'][wcmp2['id']] = results['summary']['percentage'] # noqa kpi_values = report[centre_id]['scoring'].values() - total = sum(kpi_values) - average = total / len(report[centre_id]['scoring']) - report[centre_id]['kpi_percentage_average'] = average - over80_total = sum(1 for value in kpi_values if value > 80) - report[centre_id]['kpi_percentage_over80_total'] = over80_total + if not kpi_values: + LOGGER.warning(f'no files for "{centre_id}"') + else: + total = sum(kpi_values) + average = total / len(report[centre_id]['scoring']) + report[centre_id]['kpi_percentage_average'] = average + + over80_total = sum(1 for value in kpi_values if value > 80) + report[centre_id]['kpi_percentage_over80_total'] = over80_total return dict(sorted(report.items()))