From ade2c6d5ae243c04a3c954338810095a64eff1bc Mon Sep 17 00:00:00 2001 From: Simon K <6615834+simon-20@users.noreply.github.com> Date: Wed, 22 Apr 2026 11:54:24 +0100 Subject: [PATCH 1/5] feat: add --run-for-single-reporting-org flag This commit adds a feature to limit processing to a single reporting org, which helps with more targeted debugging when there are issues with real data that need investigating. --- src/config/bds_context.py | 5 +++++ .../iati_registry_suitecrm.py | 19 ++++++++++++++++++- src/iati_bulk_data_service.py | 6 ++++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/src/config/bds_context.py b/src/config/bds_context.py index 8763e9c..34a03c5 100644 --- a/src/config/bds_context.py +++ b/src/config/bds_context.py @@ -20,6 +20,7 @@ def __init__(self, environment: dict, logger: logging.Logger, service_factory: I self._RUN_FOR_N_DATASETS = ( int(self["run_for_n_datasets"]) if self.get("run_for_n_datasets") is not None else None ) + self._RUN_FOR_SINGLE_REPORTING_ORG = self.get("run_for_single_reporting_org", None) self._SEND_DATASET_CHECK_MESSAGES = self["SEND_DATASET_CHECK_RESULT_MESSAGES"] == "yes" self._SKIP_SAFETY = self.get("skip_safety", False) @@ -51,6 +52,10 @@ def REDOWNLOAD_FROM_NON_HEAD_SERVERS_AFTER_HOURS(self) -> int: def RUN_FOR_N_DATASETS(self) -> int | None: return self._RUN_FOR_N_DATASETS + @property + def RUN_FOR_SINGLE_REPORTING_ORG(self) -> str | None: + return self._RUN_FOR_SINGLE_REPORTING_ORG + @property def SEND_DATASET_CHECK_MESSAGES(self) -> bool: return self._SEND_DATASET_CHECK_MESSAGES diff --git a/src/dataset_registration/iati_registry_suitecrm.py b/src/dataset_registration/iati_registry_suitecrm.py index 9bfcad2..513264a 100644 --- a/src/dataset_registration/iati_registry_suitecrm.py +++ b/src/dataset_registration/iati_registry_suitecrm.py @@ -21,6 +21,8 @@ def fetch_datasets_metadata( crm.fetch_access_token() + context.logger.info("Fetching all dataset metadata using the libsuitecrm library...") + filters = Filter().equal("iati_visibility", "public") suitecrm_dataset_records = [r for r in crm.get_all_records("IATI_Datasets", filters=filters)] @@ -51,6 +53,8 @@ def fetch_datasets_metadata( continue owning_org = reporting_orgs.get(uuid.UUID(record["attributes"]["iati_dataset_owner_org_id"]), None) + if context.RUN_FOR_SINGLE_REPORTING_ORG is not None and owning_org is None: + continue if owning_org is None: context.logger.error( f"SuiteCRM dataset id: {record['id']} has reporting org id: " @@ -63,6 +67,8 @@ def fetch_datasets_metadata( record, owning_org, refresh_timestamp ) + context.logger.info("Fetched metadata for {} datasets".format(len(results))) + return results @@ -72,11 +78,22 @@ def fetch_reporting_orgs_metadata(context: BDSContext, refresh_timestamp: dateti crm.fetch_access_token() - context.logger.info("Fetching all reporting orgs using the libsuitecrm library...") + context.logger.info("Fetching all reporting org metadata using the libsuitecrm library...") filters = Filter().equal("iati_registry_discoverable", "1") suitecrm_reporting_org_records = [r for r in crm.get_all_records("Accounts", filters=filters)] + if context.RUN_FOR_SINGLE_REPORTING_ORG is not None: + suitecrm_reporting_org_records = [ + o + for o in suitecrm_reporting_org_records + if o.get("attributes", {}).get("iati_short_name", "") == context.RUN_FOR_SINGLE_REPORTING_ORG + ] + context.logger.info( + "--run-for-single-reporting-org is set so only " + f"processing reporting org '{context.RUN_FOR_SINGLE_REPORTING_ORG}'." + ) + crm.logout() results = {} diff --git a/src/iati_bulk_data_service.py b/src/iati_bulk_data_service.py index 41f4144..a0e7388 100644 --- a/src/iati_bulk_data_service.py +++ b/src/iati_bulk_data_service.py @@ -20,6 +20,7 @@ def main(args: argparse.Namespace): config = config | { "single_run": args.single_run, "run_for_n_datasets": args.run_for_n_datasets, + "run_for_single_reporting_org": args.run_for_single_reporting_org, "skip_safety": args.skip_safety, } @@ -63,6 +64,11 @@ def main(args: argparse.Namespace): type=int, help="Run on the first N datasets from registration service (useful for testing)", ) + parser.add_argument( + "--run-for-single-reporting-org", + type=str, + help="Run only for the datasets belonging to the specified reporting org (useful for testing)", + ) parser.add_argument( "--skip-safety", action="store_true", From b7895a1c036e735849b8069b76e2eb2bd6420e0d Mon Sep 17 00:00:00 2001 From: Simon K <6615834+simon-20@users.noreply.github.com> Date: Wed, 22 Apr 2026 12:16:08 +0100 Subject: [PATCH 2/5] chore: fixes flake8 warnings and lints test files Housekeeping commit which fixes most of the outstanding flake8 warnings. --- .../reporting-orgs-01-four-orgs.json | 4 +- tests/helpers/azure_service_bus_helpers.py | 1 - tests/helpers/data_helpers.py | 7 +- tests/integration/test_dataset_add.py | 5 +- tests/integration/test_dataset_expiry.py | 5 +- tests/integration/test_dataset_indexing.py | 2 +- .../integration/test_dataset_registration.py | 4 +- tests/integration/test_dataset_update.py | 2 +- tests/integration/test_db.py | 4 +- .../test_mq_registry_dataset_changes.py | 1 - tests/integration/test_zip_creation.py | 95 ++++++++++++++----- 11 files changed, 92 insertions(+), 38 deletions(-) diff --git a/tests/artifacts/libsuitecrm-responses/reporting-orgs-01-four-orgs.json b/tests/artifacts/libsuitecrm-responses/reporting-orgs-01-four-orgs.json index e3fb524..07a8652 100644 --- a/tests/artifacts/libsuitecrm-responses/reporting-orgs-01-four-orgs.json +++ b/tests/artifacts/libsuitecrm-responses/reporting-orgs-01-four-orgs.json @@ -82,7 +82,7 @@ "iati_org_type": "10", "iati_hq_country": "GB", "iati_region": "89", - "iati_registry_approved": "0", + "iati_registry_approved": "1", "iati_first_publish_date": "", "iati_data_portal_url": "https://www.example.org/data-portal", "iati_exclusions_policy_url": "https://www.example.org/exclusions-policy", @@ -488,7 +488,7 @@ "iati_org_type": "15", "iati_hq_country": "GB", "iati_region": "489", - "iati_registry_approved": "0", + "iati_registry_approved": "1", "iati_first_publish_date": "", "iati_data_portal_url": "https://www.example.org/data-portal", "iati_exclusions_policy_url": "https://www.example.org/exclusions-policy", diff --git a/tests/helpers/azure_service_bus_helpers.py b/tests/helpers/azure_service_bus_helpers.py index d528c16..cff5177 100644 --- a/tests/helpers/azure_service_bus_helpers.py +++ b/tests/helpers/azure_service_bus_helpers.py @@ -1,5 +1,4 @@ import json -from datetime import datetime from uuid import UUID import pytest diff --git a/tests/helpers/data_helpers.py b/tests/helpers/data_helpers.py index a3f6d77..4ed6be4 100644 --- a/tests/helpers/data_helpers.py +++ b/tests/helpers/data_helpers.py @@ -3,8 +3,7 @@ import uuid from config.bds_context import BDSContext -from utilities.azure import get_azure_blob_public_url -from utilities.misc import dataset_has_iati_xml_download, get_object_from_json_str, get_timestamp +from utilities.misc import get_object_from_json_str, get_timestamp def check_most_recent_get_attempt_http_error(dataset: dict): @@ -220,4 +219,6 @@ def expected_values_for_dataset_registration_fields(source_url: str) -> list: def check_registration_service_refreshed_datetime(data_record: dict): assert data_record["registration_service_metadata_refreshed_datetime"] is not None - assert data_record["registration_service_metadata_refreshed_datetime"] > (get_timestamp() - datetime.timedelta(minutes=1)) + assert data_record["registration_service_metadata_refreshed_datetime"] > ( + get_timestamp() - datetime.timedelta(minutes=1) + ) diff --git a/tests/integration/test_dataset_add.py b/tests/integration/test_dataset_add.py index b21a95f..c17226f 100644 --- a/tests/integration/test_dataset_add.py +++ b/tests/integration/test_dataset_add.py @@ -52,7 +52,10 @@ def test_add_new_undownloadable_dataset(get_and_clear_up_context, source_url, ex @pytest.mark.parametrize( - "dataset_url,last_known_good_dataset_hash,last_known_good_dataset_hash_excluding_generated_timestamp,last_known_good_dataset_content_length", + ( + "dataset_url,last_known_good_dataset_hash,last_known_good_dataset_hash_excluding_generated_timestamp," + "last_known_good_dataset_content_length" + ), [ ( "http://localhost:3000/data/test_foundation_a-dataset-001.xml", diff --git a/tests/integration/test_dataset_expiry.py b/tests/integration/test_dataset_expiry.py index b6c9aa5..d90bb8c 100644 --- a/tests/integration/test_dataset_expiry.py +++ b/tests/integration/test_dataset_expiry.py @@ -25,8 +25,9 @@ def test_dataset_expiry_after_72_hours_failed_downloads(get_and_clear_up_context assert get_number_xml_files_in_working_dir(context) == 1 dataset = datasets_in_bds[uuid.UUID("c8a40aa5-9f31-4bcf-a36f-51c1fc2cc159")] - dataset["last_known_good_dataset_downloaded"] = (dataset["last_known_good_dataset_downloaded"] - - timedelta(hours=max_hours + 2)) + dataset["last_known_good_dataset_downloaded"] = dataset["last_known_good_dataset_downloaded"] - timedelta( + hours=max_hours + 2 + ) context["DATA_REGISTRY_BASE_URL"] = "http://localhost:3000/ckan-registration/datasets-03-1-dataset-404" checker_run(context, datasets_in_bds) diff --git a/tests/integration/test_dataset_indexing.py b/tests/integration/test_dataset_indexing.py index 3820e3f..2e39b3d 100644 --- a/tests/integration/test_dataset_indexing.py +++ b/tests/integration/test_dataset_indexing.py @@ -15,7 +15,7 @@ from helpers.helpers import download_index_from_azure, get_and_clear_up_context # noqa: F401 from utilities.azure import get_azure_container_name from utilities.db import get_reporting_orgs_in_bds -from utilities.misc import find_object_by_key, format_timestamp_as_utc_str +from utilities.misc import find_object_by_key def test_indices_uploaded_to_blob_storage(get_and_clear_up_context): # noqa: F811 diff --git a/tests/integration/test_dataset_registration.py b/tests/integration/test_dataset_registration.py index 0038c3c..8cf719d 100644 --- a/tests/integration/test_dataset_registration.py +++ b/tests/integration/test_dataset_registration.py @@ -9,7 +9,7 @@ from dataset_registration.iati_registry_ckan import get_publisher_metadata_as_str from dataset_registration.registration_proxy import fetch_datasets_metadata, fetch_reporting_orgs_metadata from helpers.helpers import get_and_clear_up_context # noqa: F401 -from utilities.misc import find_object_by_key, get_timestamp +from utilities.misc import get_timestamp @pytest.mark.parametrize("http_status_code", ["400", "404", "500"]) @@ -139,7 +139,7 @@ def test_suitecrm_registry_conversion_of_registry_reporting_orgs(get_and_clear_u assert ro_1["default_licence_id"] == "gpl-3.0" assert ro_1["description"] == "Eaque eaque nostrum quia illum ipsum." assert ro_1["exclusions_policy_url"] == "https://www.example.org/exclusions-policy" - assert ro_1["first_publication_date"] == None + assert ro_1["first_publication_date"] is None assert ro_1["hq_country"] == "GB" assert ro_1["human_readable_name"] == "Gov Agency 1234" assert ro_1["organisation_identifier"] == "GOV-AGENCY-AID-1234" diff --git a/tests/integration/test_dataset_update.py b/tests/integration/test_dataset_update.py index a96e903..feb93ae 100644 --- a/tests/integration/test_dataset_update.py +++ b/tests/integration/test_dataset_update.py @@ -362,4 +362,4 @@ def test_dataset_successful_twice_after_url_change(get_and_clear_up_context): # # run again checker_run(context, datasets_in_bds) - check_last_known_good_dataset_values_are_set(datasets_in_bds[dataset_id]) \ No newline at end of file + check_last_known_good_dataset_values_are_set(datasets_in_bds[dataset_id]) diff --git a/tests/integration/test_db.py b/tests/integration/test_db.py index 5b601ea..487cf34 100644 --- a/tests/integration/test_db.py +++ b/tests/integration/test_db.py @@ -12,7 +12,7 @@ ) -def test_save_reporting_org_db_record(get_and_clear_up_context): +def test_save_reporting_org_db_record(get_and_clear_up_context): # noqa: F811 context = get_and_clear_up_context @@ -51,7 +51,7 @@ def test_save_reporting_org_db_record(get_and_clear_up_context): assert reporting_org_from_db == reporting_org -def test_save_dataset_db_record(get_and_clear_up_context): +def test_save_dataset_db_record(get_and_clear_up_context): # noqa: F811 context = get_and_clear_up_context diff --git a/tests/integration/test_mq_registry_dataset_changes.py b/tests/integration/test_mq_registry_dataset_changes.py index 73bb1bf..b6ed6f6 100644 --- a/tests/integration/test_mq_registry_dataset_changes.py +++ b/tests/integration/test_mq_registry_dataset_changes.py @@ -48,7 +48,6 @@ async def test_dataset_created_message_01_success(get_and_clear_up_context, serv check_registration_service_refreshed_datetime(datasets_in_bds[dataset_id]) - @pytest.mark.asyncio async def test_dataset_created_message_02_error_dataset_already_exists( get_and_clear_up_context, service_bus_context # noqa: F811 diff --git a/tests/integration/test_zip_creation.py b/tests/integration/test_zip_creation.py index a84c48d..02217b7 100644 --- a/tests/integration/test_zip_creation.py +++ b/tests/integration/test_zip_creation.py @@ -2,7 +2,6 @@ import os import zipfile -import pytest import requests from bulk_data_service.checker import checker_run @@ -18,9 +17,14 @@ def test_dataset_saved_for_download_success(get_and_clear_up_context): # noqa: run_checker_then_zipper_once(context) assert get_number_xml_files_in_working_dir(context) == 1 - assert os.path.exists("{}{}".format( - context["ZIP_WORKING_DIR"], - "/iati-data/datasets/test_foundation_a/test_foundation_a-dataset-001.xml")) is True + assert ( + os.path.exists( + "{}{}".format( + context["ZIP_WORKING_DIR"], "/iati-data/datasets/test_foundation_a/test_foundation_a-dataset-001.xml" + ) + ) + is True + ) def test_dataset_not_saved_for_download_fail_and_no_cache(get_and_clear_up_context): # noqa: F811 @@ -39,9 +43,14 @@ def test_dataset_saved_for_download_fail_but_cached(get_and_clear_up_context): run_checker_then_zipper_download_fail_but_cached(context) assert get_number_xml_files_in_working_dir(context) == 1 - assert os.path.exists("{}{}".format( - context["ZIP_WORKING_DIR"], - "/iati-data/datasets/test_foundation_a/test_foundation_a-dataset-001.xml")) is True + assert ( + os.path.exists( + "{}{}".format( + context["ZIP_WORKING_DIR"], "/iati-data/datasets/test_foundation_a/test_foundation_a-dataset-001.xml" + ) + ) + is True + ) def test_publisher_metadata_saved_for_failed_metadata_dl(get_and_clear_up_context): # noqa: F811 @@ -106,14 +115,21 @@ def test_dataset_metadata_content_for_successful_metadata_dl(get_and_clear_up_co download_and_unpack_zip_to_tmp_unpack_folder(context, "code-for-iati-data-download.zip") - with open(context["TEST_TMP_ZIP_UNPACK"] + "/iati-data-main/metadata/test_foundation_a/test_foundation_a-dataset-001-newname.json", "r") as f: + with open( + context["TEST_TMP_ZIP_UNPACK"] + + "/iati-data-main/metadata/test_foundation_a/test_foundation_a-dataset-001-newname.json", + "r", + ) as f: assert f.read() == json.dumps( { "id": "c8a40aa5-9f31-4bcf-a36f-51c1fc2cc159", "license_id": "uk-ogl", "license_title": "UK Open Government Licence (OGL)", "name": "test_foundation_a-dataset-001-newname", - "organization": {"id": "ea055d99-f7e9-456f-9f99-963e95493c1b", "name": "test_foundation_a", }, + "organization": { + "id": "ea055d99-f7e9-456f-9f99-963e95493c1b", + "name": "test_foundation_a", + }, "resources": [{"url": "http://localhost:3000/not_found"}], "extras": [], "tags": [], @@ -134,7 +150,9 @@ def test_bds_zip_content_for_download_success(get_and_clear_up_context): # noqa assert file_found_in_extracted_zip(context, "iati-data/datasets-minimal.json") assert file_found_in_extracted_zip(context, "iati-data/datasets-full.json") assert file_found_in_extracted_zip(context, "iati-data/reporting-orgs.json") - assert file_found_in_extracted_zip(context, "iati-data/datasets/test_foundation_a/test_foundation_a-dataset-001.xml") + assert file_found_in_extracted_zip( + context, "iati-data/datasets/test_foundation_a/test_foundation_a-dataset-001.xml" + ) def test_bds_zip_content_for_download_success_dataset_updated_meta(get_and_clear_up_context, tmp_path): # noqa: F811 @@ -161,10 +179,14 @@ def test_bds_zip_content_for_download_success_dataset_updated_meta(get_and_clear assert file_found_in_extracted_zip(context, "iati-data/datasets-minimal.json") assert file_found_in_extracted_zip(context, "iati-data/datasets-full.json") assert file_found_in_extracted_zip(context, "iati-data/reporting-orgs.json") - assert file_found_in_extracted_zip(context, "iati-data/datasets/test_foundation_a/test_foundation_a-dataset-001-newname.xml") + assert file_found_in_extracted_zip( + context, "iati-data/datasets/test_foundation_a/test_foundation_a-dataset-001-newname.xml" + ) # The dataset as it was originally named should not be found - assert not file_found_in_extracted_zip(context, "iati-data/datasets/test_foundation_a/test_foundation_a-dataset-001.xml") + assert not file_found_in_extracted_zip( + context, "iati-data/datasets/test_foundation_a/test_foundation_a-dataset-001.xml" + ) def test_bds_zip_content_for_download_success_dataset_updated_content(get_and_clear_up_context): # noqa: F811 @@ -193,7 +215,12 @@ def test_bds_zip_content_for_download_success_dataset_updated_content(get_and_cl assert file_found_in_extracted_zip(context, "iati-data/datasets-minimal.json") - with open(os.path.join(context["TEST_TMP_ZIP_UNPACK"], "iati-data/datasets/test_foundation_a/test_foundation_a-dataset-001.xml"), "rb") as f: + with open( + os.path.join( + context["TEST_TMP_ZIP_UNPACK"], "iati-data/datasets/test_foundation_a/test_foundation_a-dataset-001.xml" + ), + "rb", + ) as f: contents_from_zip = f.read() with open("tests/artifacts/iati-xml-files/test_foundation_a-dataset-001-updated.xml", "rb") as f: @@ -213,7 +240,9 @@ def test_bds_zip_content_for_download_fail_but_cached(get_and_clear_up_context): assert file_found_in_extracted_zip(context, "iati-data/datasets-minimal.json") assert file_found_in_extracted_zip(context, "iati-data/datasets-full.json") assert file_found_in_extracted_zip(context, "iati-data/reporting-orgs.json") - assert file_found_in_extracted_zip(context, "iati-data/datasets/test_foundation_a/test_foundation_a-dataset-001.xml") + assert file_found_in_extracted_zip( + context, "iati-data/datasets/test_foundation_a/test_foundation_a-dataset-001.xml" + ) def test_bds_zip_content_for_download_fail_no_cached(get_and_clear_up_context): # noqa: F811 @@ -227,7 +256,9 @@ def test_bds_zip_content_for_download_fail_no_cached(get_and_clear_up_context): assert file_found_in_extracted_zip(context, "iati-data/datasets-minimal.json") assert file_found_in_extracted_zip(context, "iati-data/datasets-full.json") assert file_found_in_extracted_zip(context, "iati-data/reporting-orgs.json") - assert not file_found_in_extracted_zip(context, "iati-data/datasets/test_foundation_a/test_foundation_a-dataset-001.xml") + assert not file_found_in_extracted_zip( + context, "iati-data/datasets/test_foundation_a/test_foundation_a-dataset-001.xml" + ) def test_codeforiati_zip_content_for_download_success(get_and_clear_up_context): # noqa: F811 @@ -242,9 +273,13 @@ def test_codeforiati_zip_content_for_download_success(get_and_clear_up_context): assert not file_found_in_extracted_zip(context, "iati-data/datasets-full.json") assert not file_found_in_extracted_zip(context, "iati-data/reporting-orgs.json") assert file_found_in_extracted_zip(context, "iati-data-main/metadata.json") - assert file_found_in_extracted_zip(context, "iati-data-main/data/test_foundation_a/test_foundation_a-dataset-001.xml") + assert file_found_in_extracted_zip( + context, "iati-data-main/data/test_foundation_a/test_foundation_a-dataset-001.xml" + ) assert file_found_in_extracted_zip(context, "iati-data-main/metadata/test_foundation_a.json") - assert file_found_in_extracted_zip(context, "iati-data-main/metadata/test_foundation_a/test_foundation_a-dataset-001.json") + assert file_found_in_extracted_zip( + context, "iati-data-main/metadata/test_foundation_a/test_foundation_a-dataset-001.json" + ) def test_codeforiati_zip_content_for_download_fail_but_cached(get_and_clear_up_context): # noqa: F811 @@ -259,9 +294,13 @@ def test_codeforiati_zip_content_for_download_fail_but_cached(get_and_clear_up_c assert not file_found_in_extracted_zip(context, "iati-data/datasets-full.json") assert not file_found_in_extracted_zip(context, "iati-data/reporting-orgs.json") assert file_found_in_extracted_zip(context, "iati-data-main/metadata.json") - assert file_found_in_extracted_zip(context, "iati-data-main/data/test_foundation_a/test_foundation_a-dataset-001.xml") + assert file_found_in_extracted_zip( + context, "iati-data-main/data/test_foundation_a/test_foundation_a-dataset-001.xml" + ) assert file_found_in_extracted_zip(context, "iati-data-main/metadata/test_foundation_a.json") - assert file_found_in_extracted_zip(context, "iati-data-main/metadata/test_foundation_a/test_foundation_a-dataset-001.json") + assert file_found_in_extracted_zip( + context, "iati-data-main/metadata/test_foundation_a/test_foundation_a-dataset-001.json" + ) def test_codeforiati_zip_content_for_download_fail_no_cached(get_and_clear_up_context): # noqa: F811 @@ -276,11 +315,23 @@ def test_codeforiati_zip_content_for_download_fail_no_cached(get_and_clear_up_co assert not file_found_in_extracted_zip(context, "iati-data/datasets-full.json") assert not file_found_in_extracted_zip(context, "iati-data/reporting-orgs.json") assert file_found_in_extracted_zip(context, "iati-data-main/metadata.json") - assert file_found_in_extracted_zip(context, "iati-data-main/data/test_foundation_a/test_foundation_a-dataset-001.xml") + assert file_found_in_extracted_zip( + context, "iati-data-main/data/test_foundation_a/test_foundation_a-dataset-001.xml" + ) assert file_found_in_extracted_zip(context, "iati-data-main/metadata/test_foundation_a.json") - assert file_found_in_extracted_zip(context, "iati-data-main/metadata/test_foundation_a/test_foundation_a-dataset-001.json") + assert file_found_in_extracted_zip( + context, "iati-data-main/metadata/test_foundation_a/test_foundation_a-dataset-001.json" + ) - assert os.path.getsize(os.path.join(context["TEST_TMP_ZIP_UNPACK"], "iati-data-main/data/test_foundation_a/test_foundation_a-dataset-001.xml")) == 0 + assert ( + os.path.getsize( + os.path.join( + context["TEST_TMP_ZIP_UNPACK"], + "iati-data-main/data/test_foundation_a/test_foundation_a-dataset-001.xml", + ) + ) + == 0 + ) def run_checker_then_zipper(context, registry_url: str, datasets_in_bds: dict, datasets_in_zip: dict): From 9e3353d464f352babeaeb0de6c0788e2de0d862c Mon Sep 17 00:00:00 2001 From: Simon K <6615834+simon-20@users.noreply.github.com> Date: Wed, 22 Apr 2026 12:18:13 +0100 Subject: [PATCH 3/5] fix: stop processing unapproved reporting orgs This commit stops unapproved orgs from being downloaded, which in turn stops datasets from those orgs from being included, because any dataset which belongs to an unknown org is rejected. This resolves #136. --- src/dataset_registration/iati_registry_suitecrm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dataset_registration/iati_registry_suitecrm.py b/src/dataset_registration/iati_registry_suitecrm.py index 513264a..1b9f13c 100644 --- a/src/dataset_registration/iati_registry_suitecrm.py +++ b/src/dataset_registration/iati_registry_suitecrm.py @@ -80,7 +80,7 @@ def fetch_reporting_orgs_metadata(context: BDSContext, refresh_timestamp: dateti context.logger.info("Fetching all reporting org metadata using the libsuitecrm library...") - filters = Filter().equal("iati_registry_discoverable", "1") + filters = Filter().equal("iati_registry_discoverable", "1").equal("iati_registry_approved", 1) suitecrm_reporting_org_records = [r for r in crm.get_all_records("Accounts", filters=filters)] if context.RUN_FOR_SINGLE_REPORTING_ORG is not None: From 814ee7ac4258408fb780c0bd335c28232864aaaf Mon Sep 17 00:00:00 2001 From: Simon K <6615834+simon-20@users.noreply.github.com> Date: Wed, 22 Apr 2026 12:19:28 +0100 Subject: [PATCH 4/5] docs: update CHANGELOG --- CHANGELOG.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 64f40b8..6a464c7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,16 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ### Removed +## [1.4.4] - 2026-04-22 + +### Added + +- Command line flag to enable processing of a single reporting org. + +### Fixed + +- Stop datasets from unapproved reporting orgs from being processed. + ## [1.4.3] - 2026-03-10 ### Changed From 5b0ed1b390809e5df53314a7cb6b39094d8508f8 Mon Sep 17 00:00:00 2001 From: Simon K <6615834+simon-20@users.noreply.github.com> Date: Wed, 22 Apr 2026 12:19:36 +0100 Subject: [PATCH 5/5] build: bump version number --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 4e8e0cf..b7ab0e5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "bulk-data-service" -version = "1.4.3" +version = "1.4.4" requires-python = ">= 3.12.6" readme = "README.md" dependencies = [