diff --git a/pyproject.toml b/pyproject.toml index 6cc8d2b..5c2f8f6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "bulk-data-service" -version = "1.3.1" +version = "1.3.2" requires-python = ">= 3.12.6" readme = "README.md" dependencies = [ diff --git a/src/bulk_data_service/dataset_updater.py b/src/bulk_data_service/dataset_updater.py index 3f01fcf..ee2fb06 100644 --- a/src/bulk_data_service/dataset_updater.py +++ b/src/bulk_data_service/dataset_updater.py @@ -333,11 +333,6 @@ def download_and_save_dataset( bds_dataset: dict, attempt_datetime: datetime, ): - cached_xml_url = None - cached_xml_etag = None - cached_zip_url = None - cached_zip_etag = None - download_response = http_download_dataset(session, bds_dataset["source_url"], timeout=context.DATASET_GET_TIMEOUT) last_modified_header = get_last_modified_header_if_exists(download_response) @@ -394,15 +389,20 @@ def download_and_save_dataset( "application/zip", ) + bds_dataset.update( + { + "last_known_good_dataset_cached_dataset_xml_etag": cached_xml_etag, + "last_known_good_dataset_cached_dataset_xml_url": cached_xml_url, + "last_known_good_dataset_cached_dataset_zip_etag": cached_zip_etag, + "last_known_good_dataset_cached_dataset_zip_url": cached_zip_url, + } + ) + update_dataset_http_attempt_fields_as_success(bds_dataset, attempt_datetime, "get", download_response.status_code) bds_dataset.update( { "last_update_check": attempt_datetime, - "last_known_good_dataset_cached_dataset_xml_etag": cached_xml_etag, - "last_known_good_dataset_cached_dataset_xml_url": cached_xml_url, - "last_known_good_dataset_cached_dataset_zip_etag": cached_zip_etag, - "last_known_good_dataset_cached_dataset_zip_url": cached_zip_url, "last_known_good_dataset_hash": hash, "last_known_good_dataset_hash_excluding_generated_timestamp": hash_excluding_generated, "last_known_good_dataset_downloaded": attempt_datetime, diff --git a/src/utilities/db.py b/src/utilities/db.py index ba73d94..5030667 100644 --- a/src/utilities/db.py +++ b/src/utilities/db.py @@ -103,6 +103,11 @@ def insert_or_update_dataset(connection: psycopg.Connection, data): last_update_check = %(last_update_check)s, + last_known_good_dataset_cached_dataset_xml_etag = %(last_known_good_dataset_cached_dataset_xml_etag)s, + last_known_good_dataset_cached_dataset_xml_url = %(last_known_good_dataset_cached_dataset_xml_url)s, + last_known_good_dataset_cached_dataset_zip_etag = %(last_known_good_dataset_cached_dataset_zip_etag)s, + last_known_good_dataset_cached_dataset_zip_url = %(last_known_good_dataset_cached_dataset_zip_url)s, + last_known_good_dataset_hash = %(last_known_good_dataset_hash)s, last_known_good_dataset_hash_excluding_generated_timestamp = %(last_known_good_dataset_hash_excluding_generated_timestamp)s, diff --git a/tests/artifacts/iati-xml-files/test_foundation_a-dataset-001-copy.xml b/tests/artifacts/iati-xml-files/test_foundation_a-dataset-001-copy.xml new file mode 100644 index 0000000..e165253 --- /dev/null +++ b/tests/artifacts/iati-xml-files/test_foundation_a-dataset-001-copy.xml @@ -0,0 +1,21 @@ + + + + AA-AAA-123456789-ABC123 + + + + + <narrative>An activity with a title</narrative> + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/helpers/data_helpers.py b/tests/helpers/data_helpers.py index 8719270..a3f6d77 100644 --- a/tests/helpers/data_helpers.py +++ b/tests/helpers/data_helpers.py @@ -54,7 +54,6 @@ def check_last_known_good_dataset_values_are_set(dataset: dict): assert dataset["last_known_good_dataset_cached_dataset_zip_url"] is not None assert dataset["last_known_good_dataset_cached_dataset_zip_etag"] is not None assert dataset["last_known_good_dataset_downloaded"] is not None - assert dataset["last_known_good_dataset_downloaded"] == dataset["last_known_good_dataset_verified_on_server"] assert dataset["last_known_good_dataset_hash"] is not None assert dataset["last_known_good_dataset_hash_excluding_generated_timestamp"] is not None assert dataset["last_known_good_dataset_content_length"] > 0 @@ -62,6 +61,7 @@ def check_last_known_good_dataset_values_are_set(dataset: dict): assert dataset["last_known_good_dataset_server_header_last_modified"] is not None assert dataset["last_known_good_dataset_server_header_etag"] is not None assert dataset["last_known_good_dataset_source_url"] is not None + assert dataset["last_known_good_dataset_verified_on_server"] is not None def check_last_known_good_dataset_values_are_unset(dataset: dict): diff --git a/tests/integration/test_dataset_update.py b/tests/integration/test_dataset_update.py index 41473ef..a96e903 100644 --- a/tests/integration/test_dataset_update.py +++ b/tests/integration/test_dataset_update.py @@ -312,3 +312,54 @@ def test_dataset_successful_xml_download_then_empty(get_and_clear_up_context): check_most_recent_get_attempt_downloaded_but_non_iati(datasets_in_bds[dataset_id]) check_last_known_good_dataset_values_are_set(datasets_in_bds[dataset_id]) + + +def test_dataset_successful_twice_nothing_change(get_and_clear_up_context): # noqa: F811 + + context = get_and_clear_up_context + + dataset_id = uuid.UUID("c8a40aa5-9f31-4bcf-a36f-51c1fc2cc159") + + # dataset c8a40aa5-9f31-... with XML + context["DATA_REGISTRY_BASE_URL"] = "http://localhost:3000/ckan-registration/datasets-01-1-dataset" + datasets_in_bds = {} + checker_run(context, datasets_in_bds) + + check_last_known_good_dataset_values_are_set(datasets_in_bds[dataset_id]) + assert datasets_in_bds[dataset_id]["last_known_good_dataset_initial_contents"] == ( + '