Skip to content
Merged

dev #109

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions .github/workflows/tox_matrix.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
on: [push, pull_request]
name: Tox Multi-platform Compatibility Test

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true


jobs:
setup:
name: Setup
runs-on: ubuntu-latest
outputs:
tox_matrix: ${{ steps.dataStep.outputs.myoutput }}
steps:
- uses: actions/checkout@v6
- uses: actions/setup-python@v6
name: Get tox environments
with:
python-version: '3.13'
cache: 'pip' # caching pip dependencies
- id: dataStep
run: |
pip install uv
echo "myoutput=$(uv run --quiet --only-group=tox tox --listenvs -a | jq --raw-input . | jq -c --slurp )" >> $GITHUB_OUTPUT

build:
needs: setup
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
tox-environment: ${{ fromJson(needs.setup.outputs.tox_matrix) }}
fail-fast: false
name: "${{ matrix.os }}: ${{ matrix.tox-environment }} "
steps:
- uses: actions/checkout@v6
- uses: actions/setup-python@v6
with:
cache: 'pip' # caching pip dependencies
- name: Install requirements
run: |
pip install uv
- name: Run tox
env:
TOXENV: ${{ matrix.tox-environment }}
run: uv run --only-group tox-uv tox run -e ${{matrix.tox-environment}}
25 changes: 18 additions & 7 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
name = "HathiValidate"
version = "0.3.9.dev0"
dependencies = [
"lxml<6.0; sys_platform != 'darwin' or python_version != '3.8' or platform_machine != 'arm64'",
"lxml",
"PyYAML",
]
requires-python = ">=3.10"
Expand All @@ -29,24 +29,35 @@ Documentation = "https://www.library.illinois.edu/dccdocs/hathi_validate"
Download = "https://github.com/UIUCLibrary/HathiValidate"

[dependency-groups]
audit-dependencies = ["uv-secure"]
publish = ["twine"]
tox = ["tox"]
tox-uv = [
{include-group = "tox"},
"tox-uv-bare"
]
docs = ["sphinx"]
test =["pytest", "behave"]
type-checking = ["mypy", "types-PyYAML"]
code-style = ["pydocstyle", "flake8"]
lint = [
{include-group = "code-style"},
"pylint"
]
security = ["bandit"]
dev = [
"coverage",
"flake8",
"lxml",
"mypy",
"paramiko",
"pep517",
"pydocstyle",
"pylint",
"PyYAML",
"setuptools>=30.3.0",
{include-group = "test"},
{include-group = "code-style"},
{include-group = "docs"},
{include-group = "lint"},
{include-group = "test"},
{include-group = "tox"},
{include-group = "type-checking"},
{include-group = "publish"},
"types-PyYAML"
]
Expand All @@ -61,7 +72,7 @@ zip-safe = false
package-dir = {"" = "src"}

[tool.setuptools.package-data]
hathi_validate = ["xsd/*.xsd", "py.typed"]
hathi_validate = ["xsd/*.xsd", "py.typed", "catalog.xml"]

[project.scripts]
hathivalidate = "hathi_validate.cli:main"
Expand Down
7 changes: 7 additions & 0 deletions src/hathi_validate/catalog.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<?xml version="1.0"?>
<!DOCTYPE catalog PUBLIC "-//OASIS//DTD XML Catalogs V1.0//EN"
"http://www.oasis-open.org/committees/entity/release/1.0/catalog.dtd">
<catalog xmlns="urn:oasis:names:tc:entity:xmlns:xml:catalog">
<uri name="http://www.loc.gov/standards/xlink/xlink.xsd"
uri="xsd/xlink.xsd"/>
</catalog>
38 changes: 28 additions & 10 deletions src/hathi_validate/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@
import yaml
from lxml import etree

from importlib.resources import files
from importlib.resources import files, as_file

import hathi_validate
from hathi_validate import result
from hathi_validate import xsd as hathi_xsd
from . import validator
Expand Down Expand Up @@ -102,7 +103,7 @@

def calculate_md5(filename: str, chunk_size: int = 8192) -> str:
"""Calculate the md5 hash value of a file."""
md5 = hashlib.md5()
md5 = hashlib.md5(usedforsecurity=False)

with open(filename, "rb") as file_handle:
while True:
Expand Down Expand Up @@ -416,7 +417,7 @@
return finder.find_errors()


def find_errors_ocr(path: str) -> result.ResultSummary:

Check failure on line 420 in src/hathi_validate/process.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Refactor this function to reduce its Cognitive Complexity from 19 to the 15 allowed.

See more on https://sonarcloud.io/project/issues?id=UIUCLibrary_HathiValidate&issues=AZ0CU0nFLWZ_v3XcHG6-&open=AZ0CU0nFLWZ_v3XcHG6-&pullRequest=109
"""Validate all xml files located in the given path.

Make sure they are valid to the alto scheme
Expand All @@ -442,11 +443,25 @@
return True

logger = logging.getLogger(__name__)
alto_scheme = etree.XMLSchema(
etree.XML(
files(hathi_xsd).joinpath("alto.xsd").read_bytes()
)
)
existing_xml_catalog_file: Optional[str] = None

try:
# This is because libxml2 no longer downloads xsd files automatically.
# This redirects it to use the xlink document included
with as_file(
files(hathi_validate).joinpath('catalog.xml')
) as catalog_file:
os.environ['XML_CATALOG_FILES'] = str(catalog_file)
alto_scheme = etree.XMLSchema(
etree.XML(
files(hathi_xsd).joinpath("alto.xsd").read_bytes()
)
)
finally:
if existing_xml_catalog_file is None:

Check failure on line 461 in src/hathi_validate/process.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Remove this identity check; it will always be True.

See more on https://sonarcloud.io/project/issues?id=UIUCLibrary_HathiValidate&issues=AZ0CU0nFLWZ_v3XcHG6_&open=AZ0CU0nFLWZ_v3XcHG6_&pullRequest=109
os.environ.pop('XML_CATALOG_FILES', None)
else:
os.environ['XML_CATALOG_FILES'] = existing_xml_catalog_file

summary_builder = result.SummaryDirector(source=path)
for xml_file in filter(ocr_filter, os.scandir(path)):
Expand All @@ -455,9 +470,12 @@
doc = etree.fromstring(file_handle.read().encode("utf-8"))

if not alto_scheme.validate(doc):
summary_builder.add_error(
"{} does not validate to ALTO scheme".format(xml_file.name)
)
for error in alto_scheme.error_log:

Check warning on line 473 in src/hathi_validate/process.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Non-iterable value alto_scheme.error_log is used in an iterating context

See more on https://sonarcloud.io/project/issues?id=UIUCLibrary_HathiValidate&issues=AZ0CU0nFLWZ_v3XcHG7A&open=AZ0CU0nFLWZ_v3XcHG7A&pullRequest=109
summary_builder.add_error(
f"{xml_file.name} does not validate to ALTO scheme. "
f"Line: {error.line}, Column: {error.column}, "
f"Reason: {error.message}"
)
else:
logger.info(
"%s validates to the ALTO XML scheme", xml_file.name
Expand Down
41 changes: 19 additions & 22 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
; pytest_args = "--junitxml=reports/junit-{env:OS:UNKNOWN_OS}-{envname}.xml --junit-prefix={env:OS:UNKNOWN_OS} --basetemp={envtmpdir}"
; sphinx_args = "-W -b html -d {envtmpdir}/doctrees source {envtmpdir}/html"
[tox]
envlist = py310, py311, py312, py313
envlist = py310, py311, py312, py313, py314, py314t
isolated_build = true
min_version = 4.20

[tool:pytest]
testpath = tests
addopts = --verbose

[testenv]
dependency_groups = test
commands=
Expand All @@ -19,44 +20,40 @@ commands=

[testenv:docs]
changedir=docs
deps=
sphinx
dependency_groups = docs
commands=
sphinx-build {env:sphinx_args:-W -b html -d {envtmpdir}/doctrees source {distdir}/html}

sphinx-build {env:sphinx_args:-W -b html -d {envtmpdir}/doctrees source {temp_dir}/docs/html}

[testenv:mypy]
deps =
mypy
dependency_groups = type-checking
skip_install=True
ignore_outcome = true
setenv = MYPY_CACHE_DIR = {temp_dir}/.mypy_cache
commands = mypy {posargs: -p hathi_validate}
commands = mypy {posargs: src/hathi_validate}

[testenv:flake8]
description = check the code style
deps =
flake8
dependency_groups = code-style
skip_install=True
commands = flake8 {posargs: hathi_validate}
commands = flake8 {posargs: src/hathi_validate}


[testenv:pylint]
description = check the code style
deps =
pylint
skip_install=True
commands = pylint {posargs: hathi_validate} --disable import-error

dependency_groups = lint
skip_install = true
ignore_outcome = true
commands = pylint {posargs: src/hathi_validate} --disable import-error

[testenv:pydocstyle]
skip_install = true
deps=
pydocstyle
ignore_outcome = true
dependency_groups = code-style
commands =
pydocstyle {posargs: {toxinidir}/hathi_validate}
pydocstyle {posargs: {toxinidir}/src/hathi_validate}

[testenv:bandit]
skip_install = true
deps=
bandit
dependency_groups = security
commands =
bandit {posargs: --recursive {toxinidir}/hathi_validate}
bandit {posargs: --recursive {toxinidir}/src/hathi_validate}
Loading
Loading