Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions lib/format_normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,12 @@

import re

# Quantity prefix pattern: "2x", "3x", etc.
# Quantity prefix pattern: "2x", "3x", etc. (Discogs format strings)
_QUANTITY_RE = re.compile(r"^\d+x", re.IGNORECASE)

# Quantity suffix pattern: " x 2", " x 3 box", etc. (WXYC library format strings)
_LIB_QUANTITY_SUFFIX_RE = re.compile(r"\s+x\s+\d+(\s+box)?$", re.IGNORECASE)

# Mapping from lowercase format string to category.
_FORMAT_MAP: dict[str, str] = {
"vinyl": "Vinyl",
Expand Down Expand Up @@ -82,7 +85,18 @@ def normalize_library_format(raw: str | None) -> str | None:
if not fmt:
return None

return _FORMAT_MAP.get(fmt.lower())
# Strip quantity suffix: " x 2", " x 3 box", etc.
fmt = _LIB_QUANTITY_SUFFIX_RE.sub("", fmt)
lowered = fmt.lower()

# Handle "vinyl - SIZE" compound format
if lowered.startswith("vinyl - "):
size = fmt[len("vinyl - ") :].strip()
if size:
return _FORMAT_MAP.get(size.lower(), "Vinyl")
return "Vinyl"

return _FORMAT_MAP.get(lowered)


def format_matches(release_format: str | None, library_formats: set[str | None]) -> bool:
Expand Down
37 changes: 36 additions & 1 deletion tests/unit/test_format_normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,43 @@ class TestNormalizeLibraryFormat:
("Vinyl", "Vinyl"),
(None, None),
("", None),
# Quantity suffix
("cd x 2", "CD"),
("cd x 3", "CD"),
("cd x 4", "CD"),
("cd x 2 box", "CD"),
# Vinyl with size
('vinyl - 12"', "Vinyl"),
('vinyl - 7"', '7"'),
("vinyl - LP", "Vinyl"),
('vinyl - 10"', "Vinyl"),
# Vinyl with size + quantity
("vinyl - LP x 2", "Vinyl"),
('vinyl - 7" x 2', '7"'),
('vinyl - 12" x 2', "Vinyl"),
('vinyl - 10" x 2', "Vinyl"),
],
ids=[
"lp",
"cd",
"cassette",
"7_inch",
"vinyl",
"none",
"empty",
"cd_x_2",
"cd_x_3",
"cd_x_4",
"cd_x_2_box",
"vinyl_12_inch",
"vinyl_7_inch",
"vinyl_lp",
"vinyl_10_inch",
"vinyl_lp_x_2",
"vinyl_7_inch_x_2",
"vinyl_12_inch_x_2",
"vinyl_10_inch_x_2",
],
ids=["lp", "cd", "cassette", "7_inch", "vinyl", "none", "empty"],
)
def test_normalize_library_format(self, raw, expected):
assert normalize_library_format(raw) == expected
Expand Down
Loading