Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "bulk-data-service"
version = "1.3.1"
version = "1.3.2"
requires-python = ">= 3.12.6"
readme = "README.md"
dependencies = [
Expand Down
18 changes: 9 additions & 9 deletions src/bulk_data_service/dataset_updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,11 +333,6 @@ def download_and_save_dataset(
bds_dataset: dict,
attempt_datetime: datetime,
):
cached_xml_url = None
cached_xml_etag = None
cached_zip_url = None
cached_zip_etag = None

download_response = http_download_dataset(session, bds_dataset["source_url"], timeout=context.DATASET_GET_TIMEOUT)

last_modified_header = get_last_modified_header_if_exists(download_response)
Expand Down Expand Up @@ -394,15 +389,20 @@ def download_and_save_dataset(
"application/zip",
)

bds_dataset.update(
{
"last_known_good_dataset_cached_dataset_xml_etag": cached_xml_etag,
"last_known_good_dataset_cached_dataset_xml_url": cached_xml_url,
"last_known_good_dataset_cached_dataset_zip_etag": cached_zip_etag,
"last_known_good_dataset_cached_dataset_zip_url": cached_zip_url,
}
)

update_dataset_http_attempt_fields_as_success(bds_dataset, attempt_datetime, "get", download_response.status_code)

bds_dataset.update(
{
"last_update_check": attempt_datetime,
"last_known_good_dataset_cached_dataset_xml_etag": cached_xml_etag,
"last_known_good_dataset_cached_dataset_xml_url": cached_xml_url,
"last_known_good_dataset_cached_dataset_zip_etag": cached_zip_etag,
"last_known_good_dataset_cached_dataset_zip_url": cached_zip_url,
"last_known_good_dataset_hash": hash,
"last_known_good_dataset_hash_excluding_generated_timestamp": hash_excluding_generated,
"last_known_good_dataset_downloaded": attempt_datetime,
Expand Down
5 changes: 5 additions & 0 deletions src/utilities/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,11 @@ def insert_or_update_dataset(connection: psycopg.Connection, data):

last_update_check = %(last_update_check)s,

last_known_good_dataset_cached_dataset_xml_etag = %(last_known_good_dataset_cached_dataset_xml_etag)s,
last_known_good_dataset_cached_dataset_xml_url = %(last_known_good_dataset_cached_dataset_xml_url)s,
last_known_good_dataset_cached_dataset_zip_etag = %(last_known_good_dataset_cached_dataset_zip_etag)s,
last_known_good_dataset_cached_dataset_zip_url = %(last_known_good_dataset_cached_dataset_zip_url)s,

last_known_good_dataset_hash = %(last_known_good_dataset_hash)s,
last_known_good_dataset_hash_excluding_generated_timestamp =
%(last_known_good_dataset_hash_excluding_generated_timestamp)s,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<?xml version="1.0" encoding="UTF-8"?>
<iati-activities version="2.03" generated-datetime="2024-05-03T08:47:49+00:00">
<iati-activity>
<iati-identifier>AA-AAA-123456789-ABC123</iati-identifier>
<reporting-org ref="" type="">
<narrative></narrative>
</reporting-org>
<title>
<narrative>An activity with a title</narrative>
</title>
<description>
<narrative></narrative>
</description>
<participating-org role="">
</participating-org>
<activity-status code="">
</activity-status>
<activity-date type="" iso-date="2018-01-01">
</activity-date>
</iati-activity>
</iati-activities>
2 changes: 1 addition & 1 deletion tests/helpers/data_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,14 @@ def check_last_known_good_dataset_values_are_set(dataset: dict):
assert dataset["last_known_good_dataset_cached_dataset_zip_url"] is not None
assert dataset["last_known_good_dataset_cached_dataset_zip_etag"] is not None
assert dataset["last_known_good_dataset_downloaded"] is not None
assert dataset["last_known_good_dataset_downloaded"] == dataset["last_known_good_dataset_verified_on_server"]
assert dataset["last_known_good_dataset_hash"] is not None
assert dataset["last_known_good_dataset_hash_excluding_generated_timestamp"] is not None
assert dataset["last_known_good_dataset_content_length"] > 0
assert dataset["last_known_good_dataset_initial_contents"] is not None
assert dataset["last_known_good_dataset_server_header_last_modified"] is not None
assert dataset["last_known_good_dataset_server_header_etag"] is not None
assert dataset["last_known_good_dataset_source_url"] is not None
assert dataset["last_known_good_dataset_verified_on_server"] is not None


def check_last_known_good_dataset_values_are_unset(dataset: dict):
Expand Down
51 changes: 51 additions & 0 deletions tests/integration/test_dataset_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,3 +312,54 @@ def test_dataset_successful_xml_download_then_empty(get_and_clear_up_context):
check_most_recent_get_attempt_downloaded_but_non_iati(datasets_in_bds[dataset_id])

check_last_known_good_dataset_values_are_set(datasets_in_bds[dataset_id])


def test_dataset_successful_twice_nothing_change(get_and_clear_up_context): # noqa: F811

context = get_and_clear_up_context

dataset_id = uuid.UUID("c8a40aa5-9f31-4bcf-a36f-51c1fc2cc159")

# dataset c8a40aa5-9f31-... with XML
context["DATA_REGISTRY_BASE_URL"] = "http://localhost:3000/ckan-registration/datasets-01-1-dataset"
datasets_in_bds = {}
checker_run(context, datasets_in_bds)

check_last_known_good_dataset_values_are_set(datasets_in_bds[dataset_id])
assert datasets_in_bds[dataset_id]["last_known_good_dataset_initial_contents"] == (
'<?xml version="1.0" encoding="UTF-8"?><iati-activities version="2.03" '
'generated-datetime="2024-05-03T08:47:49+00:00"> <iati-activity> <iati-identi'
)

# run again
checker_run(context, datasets_in_bds)

check_last_known_good_dataset_values_are_set(datasets_in_bds[dataset_id])


def test_dataset_successful_twice_after_url_change(get_and_clear_up_context): # noqa: F811

context = get_and_clear_up_context

dataset_id = uuid.UUID("c8a40aa5-9f31-4bcf-a36f-51c1fc2cc159")

# dataset c8a40aa5-9f31-... with XML
context["DATA_REGISTRY_BASE_URL"] = "http://localhost:3000/ckan-registration/datasets-01-1-dataset"
datasets_in_bds = {}
checker_run(context, datasets_in_bds)

check_last_known_good_dataset_values_are_set(datasets_in_bds[dataset_id])
assert datasets_in_bds[dataset_id]["last_known_good_dataset_initial_contents"] == (
'<?xml version="1.0" encoding="UTF-8"?><iati-activities version="2.03" '
'generated-datetime="2024-05-03T08:47:49+00:00"> <iati-activity> <iati-identi'
)

context["DATA_REGISTRY_BASE_URL"] = (
"http://localhost:3000/ckan-registration/datasets-01-1-dataset/"
"http%3A%2F%2Flocalhost%3A3000%2Fdata%2Ftest_foundation_a-dataset-001-copy.xml"
)

# run again
checker_run(context, datasets_in_bds)

check_last_known_good_dataset_values_are_set(datasets_in_bds[dataset_id])