From 0e1917a70b10c5796952b74dac76c8f09b5bb13d Mon Sep 17 00:00:00 2001 From: Alexander Minges Date: Wed, 4 Mar 2026 09:39:39 +0100 Subject: [PATCH 1/2] fix(output): preserve explicit empty string fields during TSV conversion --- tests/integration_tests.py | 28 +++++++++++++++++++ .../valid/explicit_empty_string_watermark.yml | 20 +++++++++++++ yml2block/output.py | 4 +++ 3 files changed, 52 insertions(+) create mode 100644 tests/valid/explicit_empty_string_watermark.yml diff --git a/tests/integration_tests.py b/tests/integration_tests.py index 1545978..6001a76 100644 --- a/tests/integration_tests.py +++ b/tests/integration_tests.py @@ -212,3 +212,31 @@ def test_error_flag(): ["check", "--error", "e004", "tests/invalid/whitespace_in_key.yml"], ) assert result.exit_code == 1, result.output + + +def test_convert_preserves_explicit_empty_string_fields(): + """Ensure quoted empty strings do not shift TSV columns during conversion.""" + runner = CliRunner() + input_file = "tests/valid/explicit_empty_string_watermark.yml" + output_file = "/tmp/y2b_explicit_empty_string_watermark.tsv" + result = runner.invoke( + yml2block.__main__.main, + ["convert", input_file, "-o", output_file], + ) + + assert result.exit_code == 0, result.output + assert "Invalid entry ''" not in result.output + + with open(output_file, "r") as tsv_file: + lines = tsv_file.readlines() + + header_idx = next( + idx for idx, line in enumerate(lines) if line.startswith("#datasetField") + ) + header = lines[header_idx].rstrip("\n").split("\t") + row = lines[header_idx + 1].rstrip("\n").split("\t") + + assert len(header) == len(row) + assert row[header.index("watermark")] == "" + assert row[header.index("fieldType")] == "text" + assert row[header.index("displayOrder")] == "1" diff --git a/tests/valid/explicit_empty_string_watermark.yml b/tests/valid/explicit_empty_string_watermark.yml new file mode 100644 index 0000000..39c4341 --- /dev/null +++ b/tests/valid/explicit_empty_string_watermark.yml @@ -0,0 +1,20 @@ +metadataBlock: + - name: Example + dataverseAlias: + displayName: Example +datasetField: + - name: Foo + title: Foo + description: Some field + watermark: "" + fieldType: text + displayOrder: 1 + displayFormat: + advancedSearchField: true + allowControlledVocabulary: false + allowmultiples: false + facetable: false + displayoncreate: true + required: true + parent: + metadatablock_id: Example diff --git a/yml2block/output.py b/yml2block/output.py index a202b74..b552f26 100644 --- a/yml2block/output.py +++ b/yml2block/output.py @@ -36,6 +36,10 @@ def write_metadata_block(yml_metadata, output_path, longest_line, verbose): # This catches empty values, which yaml reports # as a Python None new_line.append("") + elif value == "": + # Quoted empty strings in YAML are valid values and + # must produce an empty TSV cell without shifting columns. + new_line.append("") elif str(value): # This could be more specific using int() and float() # The conversion of `value` to a string happens to From 2415202da71aadbf4463d9de6982b1560940d0e2 Mon Sep 17 00:00:00 2001 From: Alexander Minges Date: Wed, 4 Mar 2026 09:54:21 +0100 Subject: [PATCH 2/2] style(main): format module with black --- yml2block/__main__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yml2block/__main__.py b/yml2block/__main__.py index f8642b4..358fec5 100644 --- a/yml2block/__main__.py +++ b/yml2block/__main__.py @@ -3,6 +3,7 @@ https://guides.dataverse.org/en/latest/admin/metadatacustomization.html """ + import os import sys import click