Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
190 changes: 158 additions & 32 deletions scripts/l10n/generate_po_files.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#!/usr/bin/env python3


# holidays
# --------
# A fast, efficient Python library for generating country, province and state
Expand All @@ -17,37 +16,75 @@
import inspect
import sys
from concurrent.futures import ProcessPoolExecutor
from datetime import datetime
from pathlib import Path
from time import perf_counter

from lingva.extract import extract as create_pot_file
from lingva.extract import _location_sort_key
from polib import pofile

sys.path.insert(0, str(Path.cwd())) # Make holidays visible.
sys.path.insert(0, str(Path.cwd()))
from holidays import __version__ as package_version
from holidays.holiday_base import HolidayBase

WRAP_WIDTH = 99
HEADER_PATH = Path("docs/file_header.txt")


class POGenerator:
"""Generates .po files for supported country/market entities."""

@staticmethod
def _get_license_header() -> str:
"""Reads and formats the license header from docs/file_header.txt."""
if not HEADER_PATH.exists():
return ""

content = HEADER_PATH.read_text(encoding="utf-8").strip()
if not content:
return ""

lines = []
for line in content.splitlines():
line = line.rstrip()
if not line:
lines.append("#")
elif line.startswith("#"):
lines.append(line)
else:
lines.append(f"# {line}")

return "\n".join(lines) + "\n"
Comment on lines +44 to +58
Copy link
Collaborator

@PPsyrius PPsyrius Jan 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fix the first line not getting 2 space & simplify this up a bit
image

Suggested change
content = HEADER_PATH.read_text(encoding="utf-8").strip()
if not content:
return ""
lines = []
for line in content.splitlines():
line = line.rstrip()
if not line:
lines.append("#")
elif line.startswith("#"):
lines.append(line)
else:
lines.append(f"# {line}")
return "\n".join(lines) + "\n"
content = HEADER_PATH.read_text(encoding="utf-8").rstrip("\n")
if not content:
return ""
return "\n".join(
"#" if not line.rstrip() else f"# {line.rstrip()}"
for line in content.splitlines()
) + "\n"


@staticmethod
def _get_standard_metadata(default_language: str = "en_US") -> dict:
"""Returns the standard metadata required for gettext."""
return {
"Report-Msgid-Bugs-To": "dr-prodigy@users.noreply.github.com",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"Report-Msgid-Bugs-To": "dr-prodigy@users.noreply.github.com",

This wasn't included in any existing l10n files AFAIK, let's remove them for now

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: Reminder for ME
"Report-Msgid-Bugs-To: l10n@vacanza.dev\n"

"POT-Creation-Date": datetime.now().strftime("%Y-%m-%d %H:%M%z"),
"Last-Translator": "Vacanza Team <dr.prodigy.github@gmail.com>",
Comment on lines +65 to +66
Copy link

Copilot AI Jan 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The timestamp format string '%z' may produce an empty string on some platforms where timezone information is not available. Consider using a fixed timezone or handling the case where timezone offset is not available to ensure consistent output across different environments.

Suggested change
"POT-Creation-Date": datetime.now().strftime("%Y-%m-%d %H:%M%z"),
"Last-Translator": "Vacanza Team <dr.prodigy.github@gmail.com>",
"POT-Creation-Date": datetime.now().astimezone().strftime("%Y-%m-%d %H:%M%z"),
"Last-Translator": "Vacanza Team <dr-prodigy.github@gmail.com>",

Copilot uses AI. Check for mistakes.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did double check and the bot's astimezone() inclusion is correct - but let's revert to the placeholder name instead

Suggested change
"POT-Creation-Date": datetime.now().strftime("%Y-%m-%d %H:%M%z"),
"Last-Translator": "Vacanza Team <dr.prodigy.github@gmail.com>",
"POT-Creation-Date": datetime.now().astimezone().strftime("%Y-%m-%d %H:%M%z"),
"Last-Translator": "FULL NAME <EMAIL@ADDRESS>",

Alternatively, since we're doing this, may as well make the example email RFC 2606-compliant

Suggested change
"POT-Creation-Date": datetime.now().strftime("%Y-%m-%d %H:%M%z"),
"Last-Translator": "Vacanza Team <dr.prodigy.github@gmail.com>",
"POT-Creation-Date": datetime.now().astimezone().strftime("%Y-%m-%d %H:%M%z"),
"Last-Translator": "FULL NAME <EMAIL@EXAMPLE.COM>",

"Language-Team": "Holidays Localization Team",
"MIME-Version": "1.0",
"Content-Type": "text/plain; charset=UTF-8",
"Content-Transfer-Encoding": "8bit",
"Generated-By": "Lingva 5.0.5",
Copy link

Copilot AI Jan 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The hardcoded version string "Lingva 5.0.5" in the metadata is a maintainability issue. This value will become outdated if the lingva library is updated. Consider dynamically retrieving the lingva version from the package or removing this field if not essential.

Copilot uses AI. Check for mistakes.
Copy link
Collaborator

@PPsyrius PPsyrius Jan 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pareshjoshij Let's remove "Generated-By": "Lingva 5.0.5", line, it should be automatically added by Lingva anyway

"X-Source-Language": default_language,
}

@staticmethod
def _process_entity_worker(
entity_code_info: tuple[str, tuple[str, Path]],
) -> list[tuple[str, str]]:
entity_code_info: tuple[str, tuple[str, Path, str]],
) -> list[tuple[str, str, str, str]]:
"""Process a single entity: create .pot, default .po, and return update tasks."""
entity_code, (default_language, class_file_path) = entity_code_info
entity_code, (default_language, class_file_path, class_docstring) = entity_code_info

locale_path = Path("holidays/locale")
pot_path = locale_path / "pot"
pot_path.mkdir(exist_ok=True)

pot_file_path = pot_path / f"{entity_code}.pot"

# Create .pot file.
create_pot_file(
sources=[class_file_path],
keywords=["tr"],
Expand All @@ -58,50 +95,107 @@ def _process_entity_worker(
allow_empty=True,
Copy link
Collaborator

@PPsyrius PPsyrius Jan 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should disable l10n location inclusion in the .po file

Suggested change
allow_empty=True,
location=False,
allow_empty=True,

With this (and my other comments), it should now at least work as a proof-of-concept, though I can get it to generate for non-default language yet

)

# Update .pot file metadata.
pot_file = pofile(pot_file_path, wrapwidth=WRAP_WIDTH)
pot_file.metadata.update(
{
"Language": default_language,
"Language-Team": "Holidays Localization Team",
"X-Source-Language": default_language,
}
)
pot_file.metadata.update(POGenerator._get_standard_metadata(default_language))
pot_file.metadata["Project-Id-Version"] = f"Holidays {package_version}"
pot_file.save(newline="\n")

# Create entity default .po file from the .pot file.
po_directory = locale_path / default_language / "LC_MESSAGES"
po_directory.mkdir(parents=True, exist_ok=True)
default_po_path = po_directory / f"{entity_code}.po"

if not default_po_path.exists():
pot_file.metadata["PO-Revision-Date"] = pot_file.metadata["POT-Creation-Date"]
pot_file.metadata["Language"] = default_language
pot_file.save(str(default_po_path), newline="\n")

# Collect .po update tasks.
return [
(str(po_file_path), str(pot_file_path))
(str(po_file_path), str(pot_file_path), class_docstring, default_language)
for po_file_path in locale_path.rglob(f"{entity_code}.po")
]

@staticmethod
def _update_po_file(args: tuple[str, str]) -> None:
"""Merge .po file with .pot"""
po_path, pot_path = args
po_file = pofile(po_path, wrapwidth=WRAP_WIDTH)
def _update_po_file(args: tuple[str, str, str, str]) -> None:
"""Merge .po file with .pot using strict no-change policies."""
po_path_str, pot_path_str, entity_docstring, default_language = args
po_path = Path(po_path_str)
pot_path = Path(pot_path_str)

po_file = pofile(str(po_path), wrapwidth=WRAP_WIDTH)
po_file_initial = po_file.copy()

pot_file = pofile(pot_path)
pot_file = pofile(str(pot_path))

po_file.merge(pot_file)
po_file.sort(key=_location_sort_key)
for entry in po_file:
entry.occurrences.clear()

# Only update the project version if po file translation entries has changed.
if po_file != po_file_initial:
po_file.metadata["Project-Id-Version"] = f"Holidays {package_version}"
po_file.metadata["PO-Revision-Date"] = pot_file.metadata["PO-Revision-Date"]
has_content_changed = po_file != po_file_initial

license_header = POGenerator._get_license_header()
current_lang = po_path.parent.parent.name
is_default_lang = current_lang == default_language

clean_name = ""
if entity_docstring:
first_line = entity_docstring.strip().split("\n")[0].strip().rstrip(".")
if first_line.endswith(" holidays"):
clean_name = first_line[:-9].strip()
else:
clean_name = first_line
Comment on lines +142 to +146
Copy link

Copilot AI Jan 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The description line generation has a specific hardcoded pattern where it strips " holidays" from the end of the first line (line 143). This assumes a specific docstring format. If the docstring doesn't follow this format exactly, the logic may produce unexpected results. Consider documenting this expectation or making the parsing more flexible.

Suggested change
first_line = entity_docstring.strip().split("\n")[0].strip().rstrip(".")
if first_line.endswith(" holidays"):
clean_name = first_line[:-9].strip()
else:
clean_name = first_line
# Use the first non-empty line of the docstring as a short description.
# Commonly, entity docstrings follow the "<entity> holidays." pattern.
# In that case we strip the trailing "holidays"/"holiday" keyword here
# to avoid duplicating it when building `desc_line` below. If the
# docstring uses a different format, we fall back to the first
# sentence unchanged.
first_line = entity_docstring.strip().split("\n", 1)[0].strip()
# Only consider the first sentence to keep the description concise.
first_sentence = first_line.split(".", 1)[0].strip()
lowered = first_sentence.lower()
if lowered.endswith(" holidays"):
clean_name = first_sentence[: -len(" holidays")].strip()
elif lowered.endswith(" holiday"):
clean_name = first_sentence[: -len(" holiday")].strip()
else:
clean_name = first_sentence

Copilot uses AI. Check for mistakes.

desc_line = ""
if not is_default_lang and clean_name:
desc_line = f"# {clean_name} holidays {current_lang} localization."
elif clean_name:
desc_line = f"# {clean_name} holidays."

if not has_content_changed:
if po_path.exists():
content = po_path.read_text(encoding="utf-8")
if "Authors: Vacanza Team" not in content:
new_parts = []
if license_header:
new_parts.append(license_header)
if desc_line and desc_line not in content:
new_parts.append(desc_line)

if new_parts:
new_parts.append("#")
final_content = "\n".join(new_parts) + "\n" + content
if final_content.strip() != content.strip():
po_path.write_text(final_content, encoding="utf-8")
return
Comment on lines +154 to +169
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if not has_content_changed:
if po_path.exists():
content = po_path.read_text(encoding="utf-8")
if "Authors: Vacanza Team" not in content:
new_parts = []
if license_header:
new_parts.append(license_header)
if desc_line and desc_line not in content:
new_parts.append(desc_line)
if new_parts:
new_parts.append("#")
final_content = "\n".join(new_parts) + "\n" + content
if final_content.strip() != content.strip():
po_path.write_text(final_content, encoding="utf-8")
return
if not has_content_changed:
if po_path.exists():
content = po_path.read_text(encoding="utf-8")
content = POGenerator._strip_gettext_boilerplate(content)
if "Authors: Vacanza Team" not in content:
new_parts = []
if license_header:
new_parts.extend(license_header.rstrip("\n").splitlines())
new_parts.append("#")
if desc_line and desc_line not in content:
new_parts.append(desc_line)
if new_parts:
new_parts.append("#")
final_content = "\n".join(new_parts) + "\n" + content
if final_content.strip() != content.strip():
po_path.write_text(final_content, encoding="utf-8")
return

This and Lingva's boiler plate stripper:

    @staticmethod
    def _strip_gettext_boilerplate(content: str) -> str:
        if content.startswith("# SOME DESCRIPTIVE TITLE"):
            return content.split("#, fuzzy", 1)[1].lstrip()
        return content.lstrip()


timestamp = datetime.now().strftime("%Y-%m-%d %H:%M%z")
Copy link

Copilot AI Jan 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The same timestamp format issue exists here. The '%z' format specifier may produce an empty string on some platforms where timezone information is not available. This could lead to inconsistent or invalid POT-Creation-Date values.

Copilot uses AI. Check for mistakes.
po_file.metadata["Project-Id-Version"] = f"Holidays {package_version}"
po_file.metadata["PO-Revision-Date"] = timestamp

std_meta = POGenerator._get_standard_metadata(default_language)
std_meta["Language"] = current_lang

# Save the file each time in order to capture all other changes properly.
po_file.save(po_path, newline="\n")
for k, v in std_meta.items():
if k not in po_file.metadata:
po_file.metadata[k] = v
Comment on lines +179 to +180
Copy link

Copilot AI Jan 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The metadata update only adds missing fields but never updates existing ones with potentially stale values. When content changes, fields like 'POT-Creation-Date', 'Generated-By', or 'Report-Msgid-Bugs-To' should be updated even if they already exist in the metadata. This could lead to inconsistent or outdated metadata across PO files.

Suggested change
if k not in po_file.metadata:
po_file.metadata[k] = v
po_file.metadata[k] = v

Copilot uses AI. Check for mistakes.

po_file.save(str(po_path), newline="\n")

content = po_path.read_text(encoding="utf-8")
new_parts = []

if license_header and "Authors: Vacanza Team" not in content:
Copy link

Copilot AI Jan 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The check 'if "Authors: Vacanza Team" not in content' is used as a proxy to determine whether the license header already exists. This is fragile because it only checks for one specific string within the header. If the header format changes or if this string appears elsewhere in the file, the logic will break. Consider using a more robust marker or checking for the complete header structure.

Copilot uses AI. Check for mistakes.
new_parts.append(license_header)

if desc_line and desc_line not in content:
new_parts.append(desc_line)

if new_parts:
if not content.startswith("#"):
new_parts.append("#")

Comment on lines +194 to +196
Copy link

Copilot AI Jan 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logic for determining whether to add headers is inconsistent. In the no-change path (lines 154-169), both license header and desc_line are checked before adding. In the changed path (lines 193-198), an extra condition checks if content doesn't start with "#". This inconsistency could lead to different header formatting between files that have changed and those that haven't.

Suggested change
if not content.startswith("#"):
new_parts.append("#")
new_parts.append("#")

Copilot uses AI. Check for mistakes.
final_content = "\n".join(new_parts) + "\n" + content
po_path.write_text(final_content, encoding="utf-8")

def process_entities(self):
"""Processes entities in specified directory."""
Expand All @@ -111,17 +205,49 @@ def process_entities(self):
if path.stem == "__init__":
continue
module = f"holidays.{entity_type}.{path.stem}"
for _, cls in inspect.getmembers(importlib.import_module(module), inspect.isclass):

try:
mod = importlib.import_module(module)
except ImportError:
continue
Comment on lines +209 to +212
Copy link

Copilot AI Jan 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ImportError is caught but silently ignored without any logging. This could hide genuine import problems and make debugging difficult. Consider adding at least a debug log message when a module fails to import so that issues can be identified during development.

Copilot uses AI. Check for mistakes.

candidates = []
for _, cls in inspect.getmembers(mod, inspect.isclass):
if (
issubclass(cls, HolidayBase)
and cls.__module__ == module
and getattr(cls, "default_language") is not None
):
name = getattr(cls, "country", getattr(cls, "market", None))
entity_code_info_mapping[name.upper()] = (cls.default_language, path)
candidates.append(cls)

if not candidates:
continue

chosen_cls = None
target_name = path.stem.replace("_", "").lower()

for cls in candidates:
if cls.__name__.lower() == target_name:
Comment on lines +227 to +230
Copy link

Copilot AI Jan 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The class name matching algorithm is case-insensitive and removes underscores from the filename, but doesn't account for potential edge cases. For a file named "united_states.py", target_name becomes "unitedstates", but if the class is named "UnitedStates", this will match. However, if there are naming variations or special characters, this logic may fail. Consider more robust matching strategies.

Copilot uses AI. Check for mistakes.
chosen_cls = cls
break

all_po_update_tasks: list[tuple[str, str]] = []
if not chosen_cls:
candidates.sort(key=lambda c: len(c.__doc__ or ""), reverse=True)
Copy link

Copilot AI Jan 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fallback logic for choosing the "best" class is flawed. Sorting by docstring length may select a less relevant class over the primary one. For example, if a file contains a main holiday class with a short docstring and helper classes with longer docstrings, the wrong class could be selected. Consider using additional heuristics such as class inheritance depth or checking if the class directly implements certain methods.

Suggested change
candidates.sort(key=lambda c: len(c.__doc__ or ""), reverse=True)
def _class_selection_key(cls) -> tuple[int, int]:
"""
Heuristic for selecting the most relevant HolidayBase subclass
when multiple candidates exist in a module.
Higher score is better; we invert it for use in sort().
"""
score = 0
# Prefer classes that implement the core population logic themselves.
if "_populate" in cls.__dict__:
score += 2
# Prefer classes that directly declare identifying attributes.
if "country" in cls.__dict__ or "market" in cls.__dict__:
score += 1
doc_len = len(cls.__doc__ or "")
# sort() is ascending, so use negatives to put best candidates first.
return (-score, -doc_len)
candidates.sort(key=_class_selection_key)

Copilot uses AI. Check for mistakes.
chosen_cls = candidates[0]

name = getattr(chosen_cls, "country", getattr(chosen_cls, "market", None))
if not name:
continue

doc_text = chosen_cls.__doc__ if chosen_cls.__doc__ else ""

entity_code_info_mapping[name.upper()] = (
chosen_cls.default_language,
path,
doc_text,
)

all_po_update_tasks: list[tuple[str, str, str, str]] = []
with ProcessPoolExecutor() as executor:
for po_tasks in executor.map(
self._process_entity_worker, entity_code_info_mapping.items()
Expand Down