diff --git a/lib/format_normalization.py b/lib/format_normalization.py index a115097..08165a9 100644 --- a/lib/format_normalization.py +++ b/lib/format_normalization.py @@ -17,9 +17,12 @@ import re -# Quantity prefix pattern: "2x", "3x", etc. +# Quantity prefix pattern: "2x", "3x", etc. (Discogs format strings) _QUANTITY_RE = re.compile(r"^\d+x", re.IGNORECASE) +# Quantity suffix pattern: " x 2", " x 3 box", etc. (WXYC library format strings) +_LIB_QUANTITY_SUFFIX_RE = re.compile(r"\s+x\s+\d+(\s+box)?$", re.IGNORECASE) + # Mapping from lowercase format string to category. _FORMAT_MAP: dict[str, str] = { "vinyl": "Vinyl", @@ -82,7 +85,18 @@ def normalize_library_format(raw: str | None) -> str | None: if not fmt: return None - return _FORMAT_MAP.get(fmt.lower()) + # Strip quantity suffix: " x 2", " x 3 box", etc. + fmt = _LIB_QUANTITY_SUFFIX_RE.sub("", fmt) + lowered = fmt.lower() + + # Handle "vinyl - SIZE" compound format + if lowered.startswith("vinyl - "): + size = fmt[len("vinyl - ") :].strip() + if size: + return _FORMAT_MAP.get(size.lower(), "Vinyl") + return "Vinyl" + + return _FORMAT_MAP.get(lowered) def format_matches(release_format: str | None, library_formats: set[str | None]) -> bool: diff --git a/tests/unit/test_format_normalization.py b/tests/unit/test_format_normalization.py index 7e5f8dd..43b52e2 100644 --- a/tests/unit/test_format_normalization.py +++ b/tests/unit/test_format_normalization.py @@ -121,8 +121,43 @@ class TestNormalizeLibraryFormat: ("Vinyl", "Vinyl"), (None, None), ("", None), + # Quantity suffix + ("cd x 2", "CD"), + ("cd x 3", "CD"), + ("cd x 4", "CD"), + ("cd x 2 box", "CD"), + # Vinyl with size + ('vinyl - 12"', "Vinyl"), + ('vinyl - 7"', '7"'), + ("vinyl - LP", "Vinyl"), + ('vinyl - 10"', "Vinyl"), + # Vinyl with size + quantity + ("vinyl - LP x 2", "Vinyl"), + ('vinyl - 7" x 2', '7"'), + ('vinyl - 12" x 2', "Vinyl"), + ('vinyl - 10" x 2', "Vinyl"), + ], + ids=[ + "lp", + "cd", + "cassette", + "7_inch", + "vinyl", + "none", + "empty", + "cd_x_2", + "cd_x_3", + "cd_x_4", + "cd_x_2_box", + "vinyl_12_inch", + "vinyl_7_inch", + "vinyl_lp", + "vinyl_10_inch", + "vinyl_lp_x_2", + "vinyl_7_inch_x_2", + "vinyl_12_inch_x_2", + "vinyl_10_inch_x_2", ], - ids=["lp", "cd", "cassette", "7_inch", "vinyl", "none", "empty"], ) def test_normalize_library_format(self, raw, expected): assert normalize_library_format(raw) == expected