-
Notifications
You must be signed in to change notification settings - Fork 42
Replaceing the metadata parser from packaging.metadata #607
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,8 +6,8 @@ | |
| import pathlib | ||
| import tempfile | ||
| import typing | ||
| import zipfile | ||
|
|
||
| import pkginfo | ||
| import pyproject_hooks | ||
| import tomlkit | ||
| from packaging.metadata import Metadata | ||
|
|
@@ -344,14 +344,23 @@ def default_get_install_dependencies_of_sdist( | |
| return set(metadata.requires_dist) | ||
|
|
||
|
|
||
| def parse_metadata(metadata_file: pathlib.Path, *, validate: bool = True) -> Metadata: | ||
| """Parse a dist-info/METADATA file | ||
| def parse_metadata( | ||
| metadata_source: pathlib.Path | bytes, *, validate: bool = True | ||
| ) -> Metadata: | ||
| """Parse metadata from a file path or bytes. | ||
|
|
||
| Args: | ||
| metadata_source: Path to METADATA file or bytes containing metadata | ||
| validate: Whether to validate metadata (default: True) | ||
|
|
||
| The default parse mode is 'strict'. It even fails for a mismatch of field | ||
| and core metadata version, e.g. a package with metadata 2.2 and | ||
| license-expression field (added in 2.4). | ||
| Returns: | ||
| Parsed Metadata object | ||
| """ | ||
| return Metadata.from_email(metadata_file.read_bytes(), validate=validate) | ||
| if isinstance(metadata_source, pathlib.Path): | ||
| metadata_bytes = metadata_source.read_bytes() | ||
| else: | ||
| metadata_bytes = metadata_source | ||
| return Metadata.from_email(metadata_bytes, validate=validate) | ||
|
|
||
|
|
||
| def pep517_metadata_of_sdist( | ||
|
|
@@ -418,16 +427,74 @@ def validate_dist_name_version( | |
| def get_install_dependencies_of_wheel( | ||
| req: Requirement, wheel_filename: pathlib.Path, requirements_file_dir: pathlib.Path | ||
| ) -> set[Requirement]: | ||
| """Get install dependencies from a wheel file. | ||
|
|
||
| Extracts and parses the METADATA file from the wheel to get the | ||
| Requires-Dist entries. | ||
|
|
||
| Args: | ||
| req: The requirement being processed | ||
| wheel_filename: Path to the wheel file | ||
| requirements_file_dir: Directory to write the requirements file | ||
|
|
||
| Returns: | ||
| Set of requirements from the wheel's metadata | ||
| """ | ||
| logger.info(f"getting installation dependencies from {wheel_filename}") | ||
| wheel = pkginfo.Wheel(str(wheel_filename)) | ||
| deps = _filter_requirements(req, wheel.requires_dist) | ||
| # Disable validation because many third-party packages have metadata version | ||
| # mismatches (e.g., setuptools declares Metadata-Version: 2.2 but uses | ||
| # license-file which was introduced in 2.4). The old pkginfo library | ||
| # didn't validate this, so we maintain backward compatibility. | ||
| metadata = _get_metadata_from_wheel(wheel_filename, validate=False) | ||
| requires_dist = metadata.requires_dist or [] | ||
| deps = _filter_requirements(req, requires_dist) | ||
| _write_requirements_file( | ||
| deps, | ||
| requirements_file_dir / INSTALL_REQ_FILE_NAME, | ||
| ) | ||
| return deps | ||
|
|
||
|
|
||
| def _get_metadata_from_wheel( | ||
| wheel_filename: pathlib.Path, *, validate: bool = True | ||
| ) -> Metadata: | ||
| """Extract and parse METADATA from a wheel file. | ||
|
|
||
| Args: | ||
| wheel_filename: Path to the wheel file | ||
| validate: Whether to validate metadata (default: True) | ||
|
|
||
| Returns: | ||
| Parsed Metadata object | ||
|
|
||
| Raises: | ||
| ValueError: If no METADATA file is found in the wheel | ||
| """ | ||
| # Predict the dist-info directory name from the wheel filename | ||
| # Wheel format: {distribution}-{version}(-{build})?-{python}-{abi}-{platform}.whl | ||
| # Dist-info format: {distribution}-{version}.dist-info | ||
| # Note: We extract from the filename directly rather than using parse_wheel_filename | ||
| # because the dist-info directory uses the original (non-normalized) name | ||
| wheel_name_parts = wheel_filename.stem.split("-") | ||
| dist_name = wheel_name_parts[0] | ||
| dist_version = wheel_name_parts[1] | ||
| predicted_dist_info = f"{dist_name}-{dist_version}.dist-info/METADATA" | ||
|
Comment on lines
+478
to
+481
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's not invent our own wheel parsing algorithm. The function |
||
|
|
||
| with zipfile.ZipFile(wheel_filename) as whl: | ||
| # Try predicted path first for efficiency | ||
| if predicted_dist_info in whl.namelist(): | ||
| metadata_content = whl.read(predicted_dist_info) | ||
| return parse_metadata(metadata_content, validate=validate) | ||
|
Comment on lines
+485
to
+487
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This constructs a list of all files in a wheel and then searches through the list of files. It's inefficient. |
||
|
|
||
| # Fallback to iterating if prediction fails (e.g., non-standard naming) | ||
| for entry in whl.namelist(): | ||
| if entry.endswith(".dist-info/METADATA"): | ||
| metadata_content = whl.read(entry) | ||
| return parse_metadata(metadata_content, validate=validate) | ||
|
Comment on lines
+489
to
+493
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This check is not necessary. A wheel file MUST contain a correct dist-info directory. If the dist-info directory does not match the wheel file name, then pip and uv will refuse to install the file. |
||
|
|
||
| raise ValueError(f"Could not find METADATA file in wheel: {wheel_filename}") | ||
|
|
||
|
|
||
| def get_pyproject_contents(sdist_root_dir: pathlib.Path) -> dict[str, typing.Any]: | ||
| pyproject_toml_filename = sdist_root_dir / "pyproject.toml" | ||
| if not os.path.exists(pyproject_toml_filename): | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
parse_metadatavalidates the metadata by default. This will raise an exception if the metadata version does not match the metadata content, e.g.license-filefield in Metadata < 2.4.