diff --git a/docs/source/usage.md b/docs/source/usage.md index cbedbfd..cb036a3 100644 --- a/docs/source/usage.md +++ b/docs/source/usage.md @@ -33,6 +33,11 @@ Prepares a dataset for release by uploading it to the staging area and updating uv run datamanager prepare ``` +When preparing a dataset, you will be prompted for an optional **Temoa Repository Hash** (git commit hash). This helps track which version of the temoa repository this database works against. You can: + +- Enter a valid git commit hash (e.g., `abc1234` or `a1b2c3d4e5f6...`) +- Press Enter to skip (optional field) + After running `prepare`, follow the on-screen instructions: 1. `git add manifest.json` @@ -44,12 +49,20 @@ After running `prepare`, follow the on-screen instructions: ### `list-datasets` -Lists all datasets currently tracked in `manifest.json`. +Lists all datasets currently tracked in `manifest.json`, including the latest version, update time, SHA256 hash, and Temoa repository hash (if available). ```bash uv run datamanager list-datasets ``` +The output includes: + +- **Dataset Name**: The logical name of the dataset +- **Latest Version**: The most recent version tag +- **Last Updated**: When the latest version was created (relative time and absolute timestamp) +- **SHA256**: First 12 characters of the file hash +- **Temoa Hash**: First 12 characters of the temoa repository commit hash (or "N/A" if not specified) + ![list_datasets](../../assets/list_datasets.png) ### `pull` diff --git a/docs/source/workflow.md b/docs/source/workflow.md index ffc7c6a..453dee0 100644 --- a/docs/source/workflow.md +++ b/docs/source/workflow.md @@ -21,7 +21,7 @@ Use the `datamanager` tool to stage your changes. The `prepare` command handles uv run datamanager prepare energy-data.sqlite ./local-files/new-energy.sqlite ``` -The tool will guide you through the process. For other maintenance tasks like `rollback` or `delete`, use the corresponding command. +The tool will guide you through the process, including an optional prompt for the **Temoa Repository Hash** (git commit hash) to track which version of the temoa repository this database works against. For other maintenance tasks like `rollback` or `delete`, use the corresponding command. ## Step 3: Commit and Push diff --git a/manifest.json b/manifest.json index 47d689c..b8d56be 100644 --- a/manifest.json +++ b/manifest.json @@ -10,6 +10,7 @@ "r2_object_key": "test_database/v4-6d60f0035a80de92c3f3df433212699e0584a09a7d4943693ae0889d98640641.sqlite", "diffFromPrevious": "diffs/test_database.sqlite/diff-v3-to-v4.diff", "commit": "5803a97", + "temoaRepoHash": null, "description": "testing the autogenerating data docs script" }, { @@ -19,6 +20,7 @@ "r2_object_key": "test_database/v3-6c37e0744a6f49f8b3e5b24b74080c2ae845b925633ccefa81193201639bee12.sqlite", "diffFromPrevious": "diffs/test_database.sqlite/diff-v2-to-v3.diff", "commit": "ecc49b5", + "temoaRepoHash": null, "description": "testing sql diffing with summary" }, { @@ -28,6 +30,7 @@ "r2_object_key": "test_database/v2-e287b00772296e3ae8d65699570662ff316d8dae50deef4041fde65ca73202a5.sqlite", "diffFromPrevious": "diffs/test_database.sqlite/diff-v1-to-v2.diff", "commit": "a621125", + "temoaRepoHash": null, "description": "updating test_database to get multiple versions" } ] diff --git a/src/datamanager/__main__.py b/src/datamanager/__main__.py index 5f75948..23ea590 100644 --- a/src/datamanager/__main__.py +++ b/src/datamanager/__main__.py @@ -2,6 +2,7 @@ import subprocess from datetime import datetime, timezone import tempfile +import re from dateutil.parser import isoparse from pathlib import Path @@ -41,6 +42,22 @@ def _rel(iso: str) -> str: return f"{hours} h ago" +def _validate_temoa_hash(temoa_hash: str) -> bool: + """ + Validates that a temoa repo hash looks like a valid git commit hash. + accepts 4-40 hexadecimal characters (case-insensitive). + """ + if not temoa_hash or not temoa_hash.strip(): + return True # Empty is allowed (optional field) + + temoa_hash = temoa_hash.strip() + # Git commit hashes are hexadecimal and can be 4-40 characters + if re.match(r"^[a-fA-F0-9]{4,40}$", temoa_hash): + return True + + return False + + # Initialize Typer app and Rich console app = typer.Typer( name="datamanager", @@ -97,15 +114,26 @@ def verify(ctx: typer.Context) -> None: def list_datasets(ctx: typer.Context) -> None: """Lists all datasets tracked in the manifest.""" data = manifest.read_manifest() - table = Table("Dataset Name", "Latest Version", "Last Updated", "SHA256") + table = Table( + "Dataset Name", "Latest Version", "Last Updated", "SHA256", "Temoa Hash" + ) for item in data: latest = item["history"][0] + temoa_hash_display = "N/A" + if latest.get("temoaRepoHash"): + temoa_hash_display = ( + f"{latest['temoaRepoHash'][:12]}..." + if len(str(latest["temoaRepoHash"])) > 12 + else str(latest["temoaRepoHash"]) + ) + table.add_row( item["fileName"], latest["version"], # latest["timestamp"], f"{_rel(latest['timestamp'])} ({latest['timestamp']})", f"{latest['sha256'][:12]}...", + temoa_hash_display, ) console.print(table) @@ -128,8 +156,12 @@ def _run_pull_logic(name: str, version: str, output: Optional[Path]) -> None: else: final_path = output + temoa_hash_info = "" + if version_entry.get("temoaRepoHash"): + temoa_hash_info = f", temoa: {version_entry['temoaRepoHash']}" + console.print( - f"Pulling version [magenta]{version_entry['version']}[/] (commit: {version_entry['commit']}) to [cyan]{final_path}[/]" + f"Pulling version [magenta]{version_entry['version']}[/] (commit: {version_entry['commit']}{temoa_hash_info}) to [cyan]{final_path}[/]" ) success = core.pull_and_verify( @@ -190,10 +222,12 @@ def _pull_interactive(ctx: typer.Context) -> None: console.print(f"[red]Error: No version history found for {selected_name}.[/]") return - version_choices = [ - f"{entry['version']} (commit: {entry['commit']}, {_rel(entry['timestamp'])})" - for entry in dataset["history"] - ] + version_choices = [] + for entry in dataset["history"]: + temoa_info = f", temoa: {entry.get('temoaRepoHash', 'N/A')}" + version_choices.append( + f"{entry['version']} (commit: {entry['commit']}, {_rel(entry['timestamp'])}{temoa_info})" + ) selected_version_str = questionary.select( "Which version would you like to pull?", choices=version_choices ).ask() @@ -231,6 +265,48 @@ def _run_prepare_logic(ctx: typer.Context, name: str, file: Path) -> None: dataset = manifest.get_dataset(name) client = core.get_r2_client() # Moved up to be available for diffing + # Prompt for temoa repo hash (optional) + temoa_hash = None + if not ctx.obj.get("no_prompt"): + console.print("\n[bold]Temoa Repository Hash[/]") + console.print( + "This helps track which version of the temoa repository this database works against." + ) + + while True: + temoa_hash_input = questionary.text( + "Enter the temoa repository commit hash (optional, press Enter to skip):", + default="", + ).ask() + + if not temoa_hash_input or not temoa_hash_input.strip(): + console.print("Skipping temoa repo hash (optional field).") + break + + temoa_hash_candidate = temoa_hash_input.strip() + if _validate_temoa_hash(temoa_hash_candidate): + temoa_hash = temoa_hash_candidate + console.print(f"Using temoa repo hash: [green]{temoa_hash}[/]") + break + else: + console.print( + f"[bold red]Invalid format:[/] '{temoa_hash_candidate}' doesn't look like a valid git commit hash." + ) + console.print( + "Git commit hashes should contain only hexadecimal characters (0-9, a-f, A-F) and be 4-40 characters long." + ) + retry = questionary.confirm( + "Would you like to try again?", default=True + ).ask() + if not retry: + console.print("Skipping temoa repo hash.") + break + else: + # In non-interactive mode, temoa hash is not provided + console.print( + "Running in non-interactive mode - temoa repo hash not specified." + ) + # Check for changes BEFORE doing any uploads. if dataset: latest_version = dataset["history"][0] @@ -287,6 +363,7 @@ def _run_prepare_logic(ctx: typer.Context, name: str, file: Path) -> None: if diff_git_path else None, # Add path to entry "commit": "pending-merge", + "temoaRepoHash": temoa_hash, "description": "pending-merge", } manifest.add_history_entry(name, new_entry) @@ -307,6 +384,7 @@ def _run_prepare_logic(ctx: typer.Context, name: str, file: Path) -> None: "staging_key": staging_key, "diffFromPrevious": None, # Explicitly None for new datasets "commit": "pending-merge", + "temoaRepoHash": temoa_hash, "description": "pending-merge", } ], @@ -419,6 +497,7 @@ def _run_rollback_logic(ctx: typer.Context, name: str, to_version: str) -> None: "r2_object_key": target_entry["r2_object_key"], "diffFromPrevious": None, "commit": "pending-merge", + "temoaRepoHash": target_entry.get("temoaRepoHash"), "description": f"Rollback to version {target_entry['version']}", } @@ -481,10 +560,12 @@ def _rollback_interactive(ctx: typer.Context) -> None: return # Exclude the latest version from the choices, as you can't roll back to it. - version_choices = [ - f"{entry['version']} (commit: {entry['commit']}, {_rel(entry['timestamp'])})" - for entry in dataset["history"][1:] # Start from the second entry - ] + version_choices = [] + for entry in dataset["history"][1:]: # Start from the second entry + temoa_info = f", temoa: {entry.get('temoaRepoHash', 'N/A')}" + version_choices.append( + f"{entry['version']} (commit: {entry['commit']}, {_rel(entry['timestamp'])}{temoa_info})" + ) selected_version_str = questionary.select( "Which version do you want to restore?", choices=version_choices ).ask() diff --git a/tests/test_main.py b/tests/test_main.py index 6e45a51..5e85e02 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -21,6 +21,12 @@ def test_prepare_for_create_success(test_repo: Path, mocker: MockerFixture) -> N mocker.patch("datamanager.core.get_r2_client") mock_upload = mocker.patch("datamanager.core.upload_to_staging") + # Mock the temoa hash prompt to return empty (skip) + mocker.patch( + "questionary.text", + return_value=mocker.Mock(ask=mocker.Mock(return_value="")), + ) + result = runner.invoke(app, ["prepare", "new-dataset.sqlite", str(new_file)]) assert result.exit_code == 0, result.stdout @@ -32,6 +38,7 @@ def test_prepare_for_create_success(test_repo: Path, mocker: MockerFixture) -> N assert dataset is not None assert dataset["history"][0]["diffFromPrevious"] is None assert dataset["history"][0]["description"] == "pending-merge" + assert dataset["history"][0]["temoaRepoHash"] is None def test_prepare_for_update_with_small_diff( @@ -44,6 +51,12 @@ def test_prepare_for_update_with_small_diff( mocker.patch("datamanager.core.upload_to_staging") mocker.patch("datamanager.core.download_from_r2") + # Mock the temoa hash prompt to return empty (skip) + mocker.patch( + "questionary.text", + return_value=mocker.Mock(ask=mocker.Mock(return_value="")), + ) + # Prepare a fake summary and full diff fake_summary = "# summary: 1 add, 1 del\n" fake_full = "--- a\n+++ b\n-foo\n+bar\n" @@ -80,6 +93,13 @@ def test_prepare_for_update_with_large_diff( mock_r2_client.head_object.return_value = {"ContentLength": 1024} mocker.patch("datamanager.core.upload_to_staging") mocker.patch("datamanager.core.download_from_r2") + + # Mock the temoa hash prompt to return empty (skip) + mocker.patch( + "questionary.text", + return_value=mocker.Mock(ask=mocker.Mock(return_value="")), + ) + # Make the full diff larger than the default limit, but still provide a summary large_full = "line\n" * (settings.max_diff_lines + 1) small_summary = "# summary: huge diff, see details in PR\n" @@ -115,6 +135,12 @@ def test_prepare_no_changes(test_repo: Path, mocker: MockerFixture) -> None: os.chdir(test_repo) mock_upload = mocker.patch("datamanager.core.upload_to_staging") + # Mock the temoa hash prompt to return empty (skip) + mocker.patch( + "questionary.text", + return_value=mocker.Mock(ask=mocker.Mock(return_value="")), + ) + result = runner.invoke(app, ["prepare", "core-dataset.sqlite", "new_data.sqlite"]) assert result.exit_code == 0, result.stdout diff --git a/tests/test_manifest.py b/tests/test_manifest.py index bdcb03a..51c3863 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -5,6 +5,7 @@ import pytest from datamanager import manifest +from datamanager.__main__ import _validate_temoa_hash def test_read_manifest(test_repo: Path) -> None: @@ -48,3 +49,27 @@ def test_update_latest_history_entry(test_repo: Path) -> None: assert data[0]["history"][0]["version"] == "v2" assert data[0]["history"][0]["commit"] == "abcdef" assert data[0]["latestVersion"] == "v2" + + +def test_validate_temoa_hash() -> None: + """Test the temoa hash validation function.""" + # Valid short hash + assert _validate_temoa_hash("abc123") + assert _validate_temoa_hash("ABCDEF") + + # Valid long hash + assert _validate_temoa_hash("a" * 40) + assert _validate_temoa_hash("1234567890abcdef" * 2) + + # Invalid formats + assert not _validate_temoa_hash("gggggg") # 'g' is not hex + assert not _validate_temoa_hash("abc123g") # contains 'g' + assert not _validate_temoa_hash("abc12345-") # contains dash + assert _validate_temoa_hash("") # Empty is allowed (optional) + assert _validate_temoa_hash(" ") # Whitespace only is allowed (optional) + + # Too short + assert not _validate_temoa_hash("abc") # Less than 4 chars + + # Too long + assert not _validate_temoa_hash("a" * 41) # More than 40 chars