diff --git a/pyproject.toml b/pyproject.toml
index 6cc8d2b..5c2f8f6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "bulk-data-service"
-version = "1.3.1"
+version = "1.3.2"
requires-python = ">= 3.12.6"
readme = "README.md"
dependencies = [
diff --git a/src/bulk_data_service/dataset_updater.py b/src/bulk_data_service/dataset_updater.py
index 3f01fcf..ee2fb06 100644
--- a/src/bulk_data_service/dataset_updater.py
+++ b/src/bulk_data_service/dataset_updater.py
@@ -333,11 +333,6 @@ def download_and_save_dataset(
bds_dataset: dict,
attempt_datetime: datetime,
):
- cached_xml_url = None
- cached_xml_etag = None
- cached_zip_url = None
- cached_zip_etag = None
-
download_response = http_download_dataset(session, bds_dataset["source_url"], timeout=context.DATASET_GET_TIMEOUT)
last_modified_header = get_last_modified_header_if_exists(download_response)
@@ -394,15 +389,20 @@ def download_and_save_dataset(
"application/zip",
)
+ bds_dataset.update(
+ {
+ "last_known_good_dataset_cached_dataset_xml_etag": cached_xml_etag,
+ "last_known_good_dataset_cached_dataset_xml_url": cached_xml_url,
+ "last_known_good_dataset_cached_dataset_zip_etag": cached_zip_etag,
+ "last_known_good_dataset_cached_dataset_zip_url": cached_zip_url,
+ }
+ )
+
update_dataset_http_attempt_fields_as_success(bds_dataset, attempt_datetime, "get", download_response.status_code)
bds_dataset.update(
{
"last_update_check": attempt_datetime,
- "last_known_good_dataset_cached_dataset_xml_etag": cached_xml_etag,
- "last_known_good_dataset_cached_dataset_xml_url": cached_xml_url,
- "last_known_good_dataset_cached_dataset_zip_etag": cached_zip_etag,
- "last_known_good_dataset_cached_dataset_zip_url": cached_zip_url,
"last_known_good_dataset_hash": hash,
"last_known_good_dataset_hash_excluding_generated_timestamp": hash_excluding_generated,
"last_known_good_dataset_downloaded": attempt_datetime,
diff --git a/src/utilities/db.py b/src/utilities/db.py
index ba73d94..5030667 100644
--- a/src/utilities/db.py
+++ b/src/utilities/db.py
@@ -103,6 +103,11 @@ def insert_or_update_dataset(connection: psycopg.Connection, data):
last_update_check = %(last_update_check)s,
+ last_known_good_dataset_cached_dataset_xml_etag = %(last_known_good_dataset_cached_dataset_xml_etag)s,
+ last_known_good_dataset_cached_dataset_xml_url = %(last_known_good_dataset_cached_dataset_xml_url)s,
+ last_known_good_dataset_cached_dataset_zip_etag = %(last_known_good_dataset_cached_dataset_zip_etag)s,
+ last_known_good_dataset_cached_dataset_zip_url = %(last_known_good_dataset_cached_dataset_zip_url)s,
+
last_known_good_dataset_hash = %(last_known_good_dataset_hash)s,
last_known_good_dataset_hash_excluding_generated_timestamp =
%(last_known_good_dataset_hash_excluding_generated_timestamp)s,
diff --git a/tests/artifacts/iati-xml-files/test_foundation_a-dataset-001-copy.xml b/tests/artifacts/iati-xml-files/test_foundation_a-dataset-001-copy.xml
new file mode 100644
index 0000000..e165253
--- /dev/null
+++ b/tests/artifacts/iati-xml-files/test_foundation_a-dataset-001-copy.xml
@@ -0,0 +1,21 @@
+
+
+
+ AA-AAA-123456789-ABC123
+
+
+
+
+ An activity with a title
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/helpers/data_helpers.py b/tests/helpers/data_helpers.py
index 8719270..a3f6d77 100644
--- a/tests/helpers/data_helpers.py
+++ b/tests/helpers/data_helpers.py
@@ -54,7 +54,6 @@ def check_last_known_good_dataset_values_are_set(dataset: dict):
assert dataset["last_known_good_dataset_cached_dataset_zip_url"] is not None
assert dataset["last_known_good_dataset_cached_dataset_zip_etag"] is not None
assert dataset["last_known_good_dataset_downloaded"] is not None
- assert dataset["last_known_good_dataset_downloaded"] == dataset["last_known_good_dataset_verified_on_server"]
assert dataset["last_known_good_dataset_hash"] is not None
assert dataset["last_known_good_dataset_hash_excluding_generated_timestamp"] is not None
assert dataset["last_known_good_dataset_content_length"] > 0
@@ -62,6 +61,7 @@ def check_last_known_good_dataset_values_are_set(dataset: dict):
assert dataset["last_known_good_dataset_server_header_last_modified"] is not None
assert dataset["last_known_good_dataset_server_header_etag"] is not None
assert dataset["last_known_good_dataset_source_url"] is not None
+ assert dataset["last_known_good_dataset_verified_on_server"] is not None
def check_last_known_good_dataset_values_are_unset(dataset: dict):
diff --git a/tests/integration/test_dataset_update.py b/tests/integration/test_dataset_update.py
index 41473ef..a96e903 100644
--- a/tests/integration/test_dataset_update.py
+++ b/tests/integration/test_dataset_update.py
@@ -312,3 +312,54 @@ def test_dataset_successful_xml_download_then_empty(get_and_clear_up_context):
check_most_recent_get_attempt_downloaded_but_non_iati(datasets_in_bds[dataset_id])
check_last_known_good_dataset_values_are_set(datasets_in_bds[dataset_id])
+
+
+def test_dataset_successful_twice_nothing_change(get_and_clear_up_context): # noqa: F811
+
+ context = get_and_clear_up_context
+
+ dataset_id = uuid.UUID("c8a40aa5-9f31-4bcf-a36f-51c1fc2cc159")
+
+ # dataset c8a40aa5-9f31-... with XML
+ context["DATA_REGISTRY_BASE_URL"] = "http://localhost:3000/ckan-registration/datasets-01-1-dataset"
+ datasets_in_bds = {}
+ checker_run(context, datasets_in_bds)
+
+ check_last_known_good_dataset_values_are_set(datasets_in_bds[dataset_id])
+ assert datasets_in_bds[dataset_id]["last_known_good_dataset_initial_contents"] == (
+ '