From ade2c6d5ae243c04a3c954338810095a64eff1bc Mon Sep 17 00:00:00 2001
From: Simon K <6615834+simon-20@users.noreply.github.com>
Date: Wed, 22 Apr 2026 11:54:24 +0100
Subject: [PATCH 1/5] feat: add --run-for-single-reporting-org flag

This commit adds a feature to limit processing to a single
reporting org, which helps with more targeted debugging
when there are issues with real data that need investigating.
---
 src/config/bds_context.py                     |  5 +++++
 .../iati_registry_suitecrm.py                 | 19 ++++++++++++++++++-
 src/iati_bulk_data_service.py                 |  6 ++++++
 3 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/src/config/bds_context.py b/src/config/bds_context.py
index 8763e9c..34a03c5 100644
--- a/src/config/bds_context.py
+++ b/src/config/bds_context.py
@@ -20,6 +20,7 @@ def __init__(self, environment: dict, logger: logging.Logger, service_factory: I
         self._RUN_FOR_N_DATASETS = (
             int(self["run_for_n_datasets"]) if self.get("run_for_n_datasets") is not None else None
         )
+        self._RUN_FOR_SINGLE_REPORTING_ORG = self.get("run_for_single_reporting_org", None)
         self._SEND_DATASET_CHECK_MESSAGES = self["SEND_DATASET_CHECK_RESULT_MESSAGES"] == "yes"
         self._SKIP_SAFETY = self.get("skip_safety", False)
 
@@ -51,6 +52,10 @@ def REDOWNLOAD_FROM_NON_HEAD_SERVERS_AFTER_HOURS(self) -> int:
     def RUN_FOR_N_DATASETS(self) -> int | None:
         return self._RUN_FOR_N_DATASETS
 
+    @property
+    def RUN_FOR_SINGLE_REPORTING_ORG(self) -> str | None:
+        return self._RUN_FOR_SINGLE_REPORTING_ORG
+
     @property
     def SEND_DATASET_CHECK_MESSAGES(self) -> bool:
         return self._SEND_DATASET_CHECK_MESSAGES
diff --git a/src/dataset_registration/iati_registry_suitecrm.py b/src/dataset_registration/iati_registry_suitecrm.py
index 9bfcad2..513264a 100644
--- a/src/dataset_registration/iati_registry_suitecrm.py
+++ b/src/dataset_registration/iati_registry_suitecrm.py
@@ -21,6 +21,8 @@ def fetch_datasets_metadata(
 
     crm.fetch_access_token()
 
+    context.logger.info("Fetching all dataset metadata using the libsuitecrm library...")
+
     filters = Filter().equal("iati_visibility", "public")
 
     suitecrm_dataset_records = [r for r in crm.get_all_records("IATI_Datasets", filters=filters)]
@@ -51,6 +53,8 @@ def fetch_datasets_metadata(
             continue
 
         owning_org = reporting_orgs.get(uuid.UUID(record["attributes"]["iati_dataset_owner_org_id"]), None)
+        if context.RUN_FOR_SINGLE_REPORTING_ORG is not None and owning_org is None:
+            continue
         if owning_org is None:
             context.logger.error(
                 f"SuiteCRM dataset id: {record['id']} has reporting org id: "
@@ -63,6 +67,8 @@ def fetch_datasets_metadata(
             record, owning_org, refresh_timestamp
         )
 
+    context.logger.info("Fetched metadata for {} datasets".format(len(results)))
+
     return results
 
 
@@ -72,11 +78,22 @@ def fetch_reporting_orgs_metadata(context: BDSContext, refresh_timestamp: dateti
 
     crm.fetch_access_token()
 
-    context.logger.info("Fetching all reporting orgs using the libsuitecrm library...")
+    context.logger.info("Fetching all reporting org metadata using the libsuitecrm library...")
 
     filters = Filter().equal("iati_registry_discoverable", "1")
     suitecrm_reporting_org_records = [r for r in crm.get_all_records("Accounts", filters=filters)]
 
+    if context.RUN_FOR_SINGLE_REPORTING_ORG is not None:
+        suitecrm_reporting_org_records = [
+            o
+            for o in suitecrm_reporting_org_records
+            if o.get("attributes", {}).get("iati_short_name", "") == context.RUN_FOR_SINGLE_REPORTING_ORG
+        ]
+        context.logger.info(
+            "--run-for-single-reporting-org is set so only "
+            f"processing reporting org '{context.RUN_FOR_SINGLE_REPORTING_ORG}'."
+        )
+
     crm.logout()
 
     results = {}
diff --git a/src/iati_bulk_data_service.py b/src/iati_bulk_data_service.py
index 41f4144..a0e7388 100644
--- a/src/iati_bulk_data_service.py
+++ b/src/iati_bulk_data_service.py
@@ -20,6 +20,7 @@ def main(args: argparse.Namespace):
     config = config | {
         "single_run": args.single_run,
         "run_for_n_datasets": args.run_for_n_datasets,
+        "run_for_single_reporting_org": args.run_for_single_reporting_org,
         "skip_safety": args.skip_safety,
     }
 
@@ -63,6 +64,11 @@ def main(args: argparse.Namespace):
         type=int,
         help="Run on the first N datasets from registration service (useful for testing)",
     )
+    parser.add_argument(
+        "--run-for-single-reporting-org",
+        type=str,
+        help="Run only for the datasets belonging to the specified reporting org (useful for testing)",
+    )
     parser.add_argument(
         "--skip-safety",
         action="store_true",

From b7895a1c036e735849b8069b76e2eb2bd6420e0d Mon Sep 17 00:00:00 2001
From: Simon K <6615834+simon-20@users.noreply.github.com>
Date: Wed, 22 Apr 2026 12:16:08 +0100
Subject: [PATCH 2/5] chore: fixes flake8 warnings and lints test files

Housekeeping commit which fixes most of the outstanding
flake8 warnings.
---
 .../reporting-orgs-01-four-orgs.json          |  4 +-
 tests/helpers/azure_service_bus_helpers.py    |  1 -
 tests/helpers/data_helpers.py                 |  7 +-
 tests/integration/test_dataset_add.py         |  5 +-
 tests/integration/test_dataset_expiry.py      |  5 +-
 tests/integration/test_dataset_indexing.py    |  2 +-
 .../integration/test_dataset_registration.py  |  4 +-
 tests/integration/test_dataset_update.py      |  2 +-
 tests/integration/test_db.py                  |  4 +-
 .../test_mq_registry_dataset_changes.py       |  1 -
 tests/integration/test_zip_creation.py        | 95 ++++++++++++++-----
 11 files changed, 92 insertions(+), 38 deletions(-)

diff --git a/tests/artifacts/libsuitecrm-responses/reporting-orgs-01-four-orgs.json b/tests/artifacts/libsuitecrm-responses/reporting-orgs-01-four-orgs.json
index e3fb524..07a8652 100644
--- a/tests/artifacts/libsuitecrm-responses/reporting-orgs-01-four-orgs.json
+++ b/tests/artifacts/libsuitecrm-responses/reporting-orgs-01-four-orgs.json
@@ -82,7 +82,7 @@
             "iati_org_type": "10",
             "iati_hq_country": "GB",
             "iati_region": "89",
-            "iati_registry_approved": "0",
+            "iati_registry_approved": "1",
             "iati_first_publish_date": "",
             "iati_data_portal_url": "https://www.example.org/data-portal",
             "iati_exclusions_policy_url": "https://www.example.org/exclusions-policy",
@@ -488,7 +488,7 @@
             "iati_org_type": "15",
             "iati_hq_country": "GB",
             "iati_region": "489",
-            "iati_registry_approved": "0",
+            "iati_registry_approved": "1",
             "iati_first_publish_date": "",
             "iati_data_portal_url": "https://www.example.org/data-portal",
             "iati_exclusions_policy_url": "https://www.example.org/exclusions-policy",
diff --git a/tests/helpers/azure_service_bus_helpers.py b/tests/helpers/azure_service_bus_helpers.py
index d528c16..cff5177 100644
--- a/tests/helpers/azure_service_bus_helpers.py
+++ b/tests/helpers/azure_service_bus_helpers.py
@@ -1,5 +1,4 @@
 import json
-from datetime import datetime
 from uuid import UUID
 
 import pytest
diff --git a/tests/helpers/data_helpers.py b/tests/helpers/data_helpers.py
index a3f6d77..4ed6be4 100644
--- a/tests/helpers/data_helpers.py
+++ b/tests/helpers/data_helpers.py
@@ -3,8 +3,7 @@
 import uuid
 
 from config.bds_context import BDSContext
-from utilities.azure import get_azure_blob_public_url
-from utilities.misc import dataset_has_iati_xml_download, get_object_from_json_str, get_timestamp
+from utilities.misc import get_object_from_json_str, get_timestamp
 
 
 def check_most_recent_get_attempt_http_error(dataset: dict):
@@ -220,4 +219,6 @@ def expected_values_for_dataset_registration_fields(source_url: str) -> list:
 
 def check_registration_service_refreshed_datetime(data_record: dict):
     assert data_record["registration_service_metadata_refreshed_datetime"] is not None
-    assert data_record["registration_service_metadata_refreshed_datetime"] > (get_timestamp() - datetime.timedelta(minutes=1))
+    assert data_record["registration_service_metadata_refreshed_datetime"] > (
+        get_timestamp() - datetime.timedelta(minutes=1)
+    )
diff --git a/tests/integration/test_dataset_add.py b/tests/integration/test_dataset_add.py
index b21a95f..c17226f 100644
--- a/tests/integration/test_dataset_add.py
+++ b/tests/integration/test_dataset_add.py
@@ -52,7 +52,10 @@ def test_add_new_undownloadable_dataset(get_and_clear_up_context, source_url, ex
 
 
 @pytest.mark.parametrize(
-    "dataset_url,last_known_good_dataset_hash,last_known_good_dataset_hash_excluding_generated_timestamp,last_known_good_dataset_content_length",
+    (
+        "dataset_url,last_known_good_dataset_hash,last_known_good_dataset_hash_excluding_generated_timestamp,"
+        "last_known_good_dataset_content_length"
+    ),
     [
         (
             "http://localhost:3000/data/test_foundation_a-dataset-001.xml",
diff --git a/tests/integration/test_dataset_expiry.py b/tests/integration/test_dataset_expiry.py
index b6c9aa5..d90bb8c 100644
--- a/tests/integration/test_dataset_expiry.py
+++ b/tests/integration/test_dataset_expiry.py
@@ -25,8 +25,9 @@ def test_dataset_expiry_after_72_hours_failed_downloads(get_and_clear_up_context
     assert get_number_xml_files_in_working_dir(context) == 1
 
     dataset = datasets_in_bds[uuid.UUID("c8a40aa5-9f31-4bcf-a36f-51c1fc2cc159")]
-    dataset["last_known_good_dataset_downloaded"] = (dataset["last_known_good_dataset_downloaded"]
-                                           - timedelta(hours=max_hours + 2))
+    dataset["last_known_good_dataset_downloaded"] = dataset["last_known_good_dataset_downloaded"] - timedelta(
+        hours=max_hours + 2
+    )
 
     context["DATA_REGISTRY_BASE_URL"] = "http://localhost:3000/ckan-registration/datasets-03-1-dataset-404"
     checker_run(context, datasets_in_bds)
diff --git a/tests/integration/test_dataset_indexing.py b/tests/integration/test_dataset_indexing.py
index 3820e3f..2e39b3d 100644
--- a/tests/integration/test_dataset_indexing.py
+++ b/tests/integration/test_dataset_indexing.py
@@ -15,7 +15,7 @@
 from helpers.helpers import download_index_from_azure, get_and_clear_up_context  # noqa: F401
 from utilities.azure import get_azure_container_name
 from utilities.db import get_reporting_orgs_in_bds
-from utilities.misc import find_object_by_key, format_timestamp_as_utc_str
+from utilities.misc import find_object_by_key
 
 
 def test_indices_uploaded_to_blob_storage(get_and_clear_up_context):  # noqa: F811
diff --git a/tests/integration/test_dataset_registration.py b/tests/integration/test_dataset_registration.py
index 0038c3c..8cf719d 100644
--- a/tests/integration/test_dataset_registration.py
+++ b/tests/integration/test_dataset_registration.py
@@ -9,7 +9,7 @@
 from dataset_registration.iati_registry_ckan import get_publisher_metadata_as_str
 from dataset_registration.registration_proxy import fetch_datasets_metadata, fetch_reporting_orgs_metadata
 from helpers.helpers import get_and_clear_up_context  # noqa: F401
-from utilities.misc import find_object_by_key, get_timestamp
+from utilities.misc import get_timestamp
 
 
 @pytest.mark.parametrize("http_status_code", ["400", "404", "500"])
@@ -139,7 +139,7 @@ def test_suitecrm_registry_conversion_of_registry_reporting_orgs(get_and_clear_u
     assert ro_1["default_licence_id"] == "gpl-3.0"
     assert ro_1["description"] == "Eaque eaque nostrum quia illum ipsum."
     assert ro_1["exclusions_policy_url"] == "https://www.example.org/exclusions-policy"
-    assert ro_1["first_publication_date"] == None
+    assert ro_1["first_publication_date"] is None
     assert ro_1["hq_country"] == "GB"
     assert ro_1["human_readable_name"] == "Gov Agency 1234"
     assert ro_1["organisation_identifier"] == "GOV-AGENCY-AID-1234"
diff --git a/tests/integration/test_dataset_update.py b/tests/integration/test_dataset_update.py
index a96e903..feb93ae 100644
--- a/tests/integration/test_dataset_update.py
+++ b/tests/integration/test_dataset_update.py
@@ -362,4 +362,4 @@ def test_dataset_successful_twice_after_url_change(get_and_clear_up_context):  #
     # run again
     checker_run(context, datasets_in_bds)
 
-    check_last_known_good_dataset_values_are_set(datasets_in_bds[dataset_id])
\ No newline at end of file
+    check_last_known_good_dataset_values_are_set(datasets_in_bds[dataset_id])
diff --git a/tests/integration/test_db.py b/tests/integration/test_db.py
index 5b601ea..487cf34 100644
--- a/tests/integration/test_db.py
+++ b/tests/integration/test_db.py
@@ -12,7 +12,7 @@
 )
 
 
-def test_save_reporting_org_db_record(get_and_clear_up_context):
+def test_save_reporting_org_db_record(get_and_clear_up_context):  # noqa: F811
 
     context = get_and_clear_up_context
 
@@ -51,7 +51,7 @@ def test_save_reporting_org_db_record(get_and_clear_up_context):
     assert reporting_org_from_db == reporting_org
 
 
-def test_save_dataset_db_record(get_and_clear_up_context):
+def test_save_dataset_db_record(get_and_clear_up_context):  # noqa: F811
 
     context = get_and_clear_up_context
 
diff --git a/tests/integration/test_mq_registry_dataset_changes.py b/tests/integration/test_mq_registry_dataset_changes.py
index 73bb1bf..b6ed6f6 100644
--- a/tests/integration/test_mq_registry_dataset_changes.py
+++ b/tests/integration/test_mq_registry_dataset_changes.py
@@ -48,7 +48,6 @@ async def test_dataset_created_message_01_success(get_and_clear_up_context, serv
     check_registration_service_refreshed_datetime(datasets_in_bds[dataset_id])
 
 
-
 @pytest.mark.asyncio
 async def test_dataset_created_message_02_error_dataset_already_exists(
     get_and_clear_up_context, service_bus_context  # noqa: F811
diff --git a/tests/integration/test_zip_creation.py b/tests/integration/test_zip_creation.py
index a84c48d..02217b7 100644
--- a/tests/integration/test_zip_creation.py
+++ b/tests/integration/test_zip_creation.py
@@ -2,7 +2,6 @@
 import os
 import zipfile
 
-import pytest
 import requests
 
 from bulk_data_service.checker import checker_run
@@ -18,9 +17,14 @@ def test_dataset_saved_for_download_success(get_and_clear_up_context):  # noqa:
     run_checker_then_zipper_once(context)
 
     assert get_number_xml_files_in_working_dir(context) == 1
-    assert os.path.exists("{}{}".format(
-        context["ZIP_WORKING_DIR"],
-        "/iati-data/datasets/test_foundation_a/test_foundation_a-dataset-001.xml")) is True
+    assert (
+        os.path.exists(
+            "{}{}".format(
+                context["ZIP_WORKING_DIR"], "/iati-data/datasets/test_foundation_a/test_foundation_a-dataset-001.xml"
+            )
+        )
+        is True
+    )
 
 
 def test_dataset_not_saved_for_download_fail_and_no_cache(get_and_clear_up_context):  # noqa: F811
@@ -39,9 +43,14 @@ def test_dataset_saved_for_download_fail_but_cached(get_and_clear_up_context):
     run_checker_then_zipper_download_fail_but_cached(context)
 
     assert get_number_xml_files_in_working_dir(context) == 1
-    assert os.path.exists("{}{}".format(
-        context["ZIP_WORKING_DIR"],
-        "/iati-data/datasets/test_foundation_a/test_foundation_a-dataset-001.xml")) is True
+    assert (
+        os.path.exists(
+            "{}{}".format(
+                context["ZIP_WORKING_DIR"], "/iati-data/datasets/test_foundation_a/test_foundation_a-dataset-001.xml"
+            )
+        )
+        is True
+    )
 
 
 def test_publisher_metadata_saved_for_failed_metadata_dl(get_and_clear_up_context):  # noqa: F811
@@ -106,14 +115,21 @@ def test_dataset_metadata_content_for_successful_metadata_dl(get_and_clear_up_co
 
     download_and_unpack_zip_to_tmp_unpack_folder(context, "code-for-iati-data-download.zip")
 
-    with open(context["TEST_TMP_ZIP_UNPACK"] + "/iati-data-main/metadata/test_foundation_a/test_foundation_a-dataset-001-newname.json", "r") as f:
+    with open(
+        context["TEST_TMP_ZIP_UNPACK"]
+        + "/iati-data-main/metadata/test_foundation_a/test_foundation_a-dataset-001-newname.json",
+        "r",
+    ) as f:
         assert f.read() == json.dumps(
             {
                 "id": "c8a40aa5-9f31-4bcf-a36f-51c1fc2cc159",
                 "license_id": "uk-ogl",
                 "license_title": "UK Open Government Licence (OGL)",
                 "name": "test_foundation_a-dataset-001-newname",
-                "organization": {"id": "ea055d99-f7e9-456f-9f99-963e95493c1b", "name": "test_foundation_a", },
+                "organization": {
+                    "id": "ea055d99-f7e9-456f-9f99-963e95493c1b",
+                    "name": "test_foundation_a",
+                },
                 "resources": [{"url": "http://localhost:3000/not_found"}],
                 "extras": [],
                 "tags": [],
@@ -134,7 +150,9 @@ def test_bds_zip_content_for_download_success(get_and_clear_up_context):  # noqa
     assert file_found_in_extracted_zip(context, "iati-data/datasets-minimal.json")
     assert file_found_in_extracted_zip(context, "iati-data/datasets-full.json")
     assert file_found_in_extracted_zip(context, "iati-data/reporting-orgs.json")
-    assert file_found_in_extracted_zip(context, "iati-data/datasets/test_foundation_a/test_foundation_a-dataset-001.xml")
+    assert file_found_in_extracted_zip(
+        context, "iati-data/datasets/test_foundation_a/test_foundation_a-dataset-001.xml"
+    )
 
 
 def test_bds_zip_content_for_download_success_dataset_updated_meta(get_and_clear_up_context, tmp_path):  # noqa: F811
@@ -161,10 +179,14 @@ def test_bds_zip_content_for_download_success_dataset_updated_meta(get_and_clear
     assert file_found_in_extracted_zip(context, "iati-data/datasets-minimal.json")
     assert file_found_in_extracted_zip(context, "iati-data/datasets-full.json")
     assert file_found_in_extracted_zip(context, "iati-data/reporting-orgs.json")
-    assert file_found_in_extracted_zip(context, "iati-data/datasets/test_foundation_a/test_foundation_a-dataset-001-newname.xml")
+    assert file_found_in_extracted_zip(
+        context, "iati-data/datasets/test_foundation_a/test_foundation_a-dataset-001-newname.xml"
+    )
 
     # The dataset as it was originally named should not be found
-    assert not file_found_in_extracted_zip(context, "iati-data/datasets/test_foundation_a/test_foundation_a-dataset-001.xml")
+    assert not file_found_in_extracted_zip(
+        context, "iati-data/datasets/test_foundation_a/test_foundation_a-dataset-001.xml"
+    )
 
 
 def test_bds_zip_content_for_download_success_dataset_updated_content(get_and_clear_up_context):  # noqa: F811
@@ -193,7 +215,12 @@ def test_bds_zip_content_for_download_success_dataset_updated_content(get_and_cl
 
     assert file_found_in_extracted_zip(context, "iati-data/datasets-minimal.json")
 
-    with open(os.path.join(context["TEST_TMP_ZIP_UNPACK"], "iati-data/datasets/test_foundation_a/test_foundation_a-dataset-001.xml"), "rb") as f:
+    with open(
+        os.path.join(
+            context["TEST_TMP_ZIP_UNPACK"], "iati-data/datasets/test_foundation_a/test_foundation_a-dataset-001.xml"
+        ),
+        "rb",
+    ) as f:
         contents_from_zip = f.read()
 
     with open("tests/artifacts/iati-xml-files/test_foundation_a-dataset-001-updated.xml", "rb") as f:
@@ -213,7 +240,9 @@ def test_bds_zip_content_for_download_fail_but_cached(get_and_clear_up_context):
     assert file_found_in_extracted_zip(context, "iati-data/datasets-minimal.json")
     assert file_found_in_extracted_zip(context, "iati-data/datasets-full.json")
     assert file_found_in_extracted_zip(context, "iati-data/reporting-orgs.json")
-    assert file_found_in_extracted_zip(context, "iati-data/datasets/test_foundation_a/test_foundation_a-dataset-001.xml")
+    assert file_found_in_extracted_zip(
+        context, "iati-data/datasets/test_foundation_a/test_foundation_a-dataset-001.xml"
+    )
 
 
 def test_bds_zip_content_for_download_fail_no_cached(get_and_clear_up_context):  # noqa: F811
@@ -227,7 +256,9 @@ def test_bds_zip_content_for_download_fail_no_cached(get_and_clear_up_context):
     assert file_found_in_extracted_zip(context, "iati-data/datasets-minimal.json")
     assert file_found_in_extracted_zip(context, "iati-data/datasets-full.json")
     assert file_found_in_extracted_zip(context, "iati-data/reporting-orgs.json")
-    assert not file_found_in_extracted_zip(context, "iati-data/datasets/test_foundation_a/test_foundation_a-dataset-001.xml")
+    assert not file_found_in_extracted_zip(
+        context, "iati-data/datasets/test_foundation_a/test_foundation_a-dataset-001.xml"
+    )
 
 
 def test_codeforiati_zip_content_for_download_success(get_and_clear_up_context):  # noqa: F811
@@ -242,9 +273,13 @@ def test_codeforiati_zip_content_for_download_success(get_and_clear_up_context):
     assert not file_found_in_extracted_zip(context, "iati-data/datasets-full.json")
     assert not file_found_in_extracted_zip(context, "iati-data/reporting-orgs.json")
     assert file_found_in_extracted_zip(context, "iati-data-main/metadata.json")
-    assert file_found_in_extracted_zip(context, "iati-data-main/data/test_foundation_a/test_foundation_a-dataset-001.xml")
+    assert file_found_in_extracted_zip(
+        context, "iati-data-main/data/test_foundation_a/test_foundation_a-dataset-001.xml"
+    )
     assert file_found_in_extracted_zip(context, "iati-data-main/metadata/test_foundation_a.json")
-    assert file_found_in_extracted_zip(context, "iati-data-main/metadata/test_foundation_a/test_foundation_a-dataset-001.json")
+    assert file_found_in_extracted_zip(
+        context, "iati-data-main/metadata/test_foundation_a/test_foundation_a-dataset-001.json"
+    )
 
 
 def test_codeforiati_zip_content_for_download_fail_but_cached(get_and_clear_up_context):  # noqa: F811
@@ -259,9 +294,13 @@ def test_codeforiati_zip_content_for_download_fail_but_cached(get_and_clear_up_c
     assert not file_found_in_extracted_zip(context, "iati-data/datasets-full.json")
     assert not file_found_in_extracted_zip(context, "iati-data/reporting-orgs.json")
     assert file_found_in_extracted_zip(context, "iati-data-main/metadata.json")
-    assert file_found_in_extracted_zip(context, "iati-data-main/data/test_foundation_a/test_foundation_a-dataset-001.xml")
+    assert file_found_in_extracted_zip(
+        context, "iati-data-main/data/test_foundation_a/test_foundation_a-dataset-001.xml"
+    )
     assert file_found_in_extracted_zip(context, "iati-data-main/metadata/test_foundation_a.json")
-    assert file_found_in_extracted_zip(context, "iati-data-main/metadata/test_foundation_a/test_foundation_a-dataset-001.json")
+    assert file_found_in_extracted_zip(
+        context, "iati-data-main/metadata/test_foundation_a/test_foundation_a-dataset-001.json"
+    )
 
 
 def test_codeforiati_zip_content_for_download_fail_no_cached(get_and_clear_up_context):  # noqa: F811
@@ -276,11 +315,23 @@ def test_codeforiati_zip_content_for_download_fail_no_cached(get_and_clear_up_co
     assert not file_found_in_extracted_zip(context, "iati-data/datasets-full.json")
     assert not file_found_in_extracted_zip(context, "iati-data/reporting-orgs.json")
     assert file_found_in_extracted_zip(context, "iati-data-main/metadata.json")
-    assert file_found_in_extracted_zip(context, "iati-data-main/data/test_foundation_a/test_foundation_a-dataset-001.xml")
+    assert file_found_in_extracted_zip(
+        context, "iati-data-main/data/test_foundation_a/test_foundation_a-dataset-001.xml"
+    )
     assert file_found_in_extracted_zip(context, "iati-data-main/metadata/test_foundation_a.json")
-    assert file_found_in_extracted_zip(context, "iati-data-main/metadata/test_foundation_a/test_foundation_a-dataset-001.json")
+    assert file_found_in_extracted_zip(
+        context, "iati-data-main/metadata/test_foundation_a/test_foundation_a-dataset-001.json"
+    )
 
-    assert os.path.getsize(os.path.join(context["TEST_TMP_ZIP_UNPACK"], "iati-data-main/data/test_foundation_a/test_foundation_a-dataset-001.xml")) == 0
+    assert (
+        os.path.getsize(
+            os.path.join(
+                context["TEST_TMP_ZIP_UNPACK"],
+                "iati-data-main/data/test_foundation_a/test_foundation_a-dataset-001.xml",
+            )
+        )
+        == 0
+    )
 
 
 def run_checker_then_zipper(context, registry_url: str, datasets_in_bds: dict, datasets_in_zip: dict):

From 9e3353d464f352babeaeb0de6c0788e2de0d862c Mon Sep 17 00:00:00 2001
From: Simon K <6615834+simon-20@users.noreply.github.com>
Date: Wed, 22 Apr 2026 12:18:13 +0100
Subject: [PATCH 3/5] fix: stop processing unapproved reporting orgs

This commit stops unapproved orgs from being downloaded, which in turn
stops datasets from those orgs from being included, because any dataset
which belongs to an unknown org is rejected. This resolves #136.
---
 src/dataset_registration/iati_registry_suitecrm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/dataset_registration/iati_registry_suitecrm.py b/src/dataset_registration/iati_registry_suitecrm.py
index 513264a..1b9f13c 100644
--- a/src/dataset_registration/iati_registry_suitecrm.py
+++ b/src/dataset_registration/iati_registry_suitecrm.py
@@ -80,7 +80,7 @@ def fetch_reporting_orgs_metadata(context: BDSContext, refresh_timestamp: dateti
 
     context.logger.info("Fetching all reporting org metadata using the libsuitecrm library...")
 
-    filters = Filter().equal("iati_registry_discoverable", "1")
+    filters = Filter().equal("iati_registry_discoverable", "1").equal("iati_registry_approved", 1)
     suitecrm_reporting_org_records = [r for r in crm.get_all_records("Accounts", filters=filters)]
 
     if context.RUN_FOR_SINGLE_REPORTING_ORG is not None:

From 814ee7ac4258408fb780c0bd335c28232864aaaf Mon Sep 17 00:00:00 2001
From: Simon K <6615834+simon-20@users.noreply.github.com>
Date: Wed, 22 Apr 2026 12:19:28 +0100
Subject: [PATCH 4/5] docs: update CHANGELOG

---
 CHANGELOG.md | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 64f40b8..6a464c7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,16 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 
 ### Removed
 
+## [1.4.4] - 2026-04-22
+
+### Added
+
+- Command line flag to enable processing of a single reporting org.
+
+### Fixed
+
+- Stop datasets from unapproved reporting orgs from being processed.
+
 ## [1.4.3] - 2026-03-10
 
 ### Changed

From 5b0ed1b390809e5df53314a7cb6b39094d8508f8 Mon Sep 17 00:00:00 2001
From: Simon K <6615834+simon-20@users.noreply.github.com>
Date: Wed, 22 Apr 2026 12:19:36 +0100
Subject: [PATCH 5/5] build: bump version number

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 4e8e0cf..b7ab0e5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "bulk-data-service"
-version = "1.4.3"
+version = "1.4.4"
 requires-python = ">= 3.12.6"
 readme = "README.md"
 dependencies = [