Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions pyjelly/integrations/generic/generic_sink.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,14 +185,18 @@ def is_triples_sink(self) -> bool:
return bool(self._store) and len(self._store[0]) == TRIPLE_ARITY

def parse(self, input_file: IO[bytes]) -> None:
from pyjelly.integrations.generic.parse import parse_jelly_to_graph
from pyjelly.integrations.generic.parse import (
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please don't do irrelevant changes in PRs. Please revert this and other changes like this.

parse_jelly_to_graph,
)

parsed_result = parse_jelly_to_graph(input_file)
self._store = parsed_result._store
self._namespaces = parsed_result._namespaces
self._identifier = parsed_result._identifier

def serialize(self, output_file: IO[bytes]) -> None:
from pyjelly.integrations.generic.serialize import grouped_stream_to_file
from pyjelly.integrations.generic.serialize import (
grouped_stream_to_file,
)

grouped_stream_to_file((sink for sink in [self]), output_file)
4 changes: 3 additions & 1 deletion pyjelly/serialize/streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,9 @@ def for_rdflib(cls, options: SerializerOptions | None = None) -> Stream:
if cls is Stream:
msg = "Stream is an abstract base class, use a subclass instead"
raise TypeError(msg)
from pyjelly.integrations.rdflib.serialize import RDFLibTermEncoder
from pyjelly.integrations.rdflib.serialize import (
RDFLibTermEncoder,
)

lookup_preset: LookupPreset | None = None
if options is not None:
Expand Down
82 changes: 42 additions & 40 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
[build-system]
requires = [
"hatchling>=1.24",
"hatch-mypyc; platform_python_implementation == 'CPython'",
"mypy>=1.8; platform_python_implementation == 'CPython'",
"hatchling>=1.24",
"hatch-mypyc; platform_python_implementation == 'CPython'",
"mypy>=1.8; platform_python_implementation == 'CPython'",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same

]
build-backend = "hatchling.build"

[tool.hatch.build.targets.wheel]
only-include = ["pyjelly"]

artifacts = [
"*__mypyc*.*",
"**/*__mypyc*.*",
"**/*.so",
"**/*.pyd",
"**/*.dylib"
"*__mypyc*.*",
"**/*__mypyc*.*",
"**/*.so",
"**/*.pyd",
"**/*.dylib",
]


Expand All @@ -27,29 +27,31 @@ require-cpython = true
modules = ["pyjelly"]

include = [
"pyjelly/serialize/ioutils.py",
"pyjelly/integrations/generic/serialize.py",
"pyjelly/parse/lookup.py",
"pyjelly/parse/ioutils.py",
"pyjelly/serialize/streams.py",
"pyjelly/parse/decode.py",
"pyjelly/serialize/encode.py",
"pyjelly/serialize/lookup.py",
"pyjelly/serialize/ioutils.py",
"pyjelly/integrations/generic/serialize.py",
"pyjelly/parse/lookup.py",
"pyjelly/parse/ioutils.py",
"pyjelly/serialize/streams.py",
"pyjelly/parse/decode.py",
"pyjelly/serialize/encode.py",
"pyjelly/serialize/lookup.py",
]

mypy-args = [
"--ignore-missing-imports",
"--no-warn-no-return"
]
mypy-args = ["--ignore-missing-imports", "--no-warn-no-return"]

[tool.cibuildwheel]
build = ["cp310-*", "cp311-*", "cp312-*", "cp313-*", "cp314-*"]
skip = ["*-win32"]
skip = ["*-win32"]
test-extras = ["rdflib"]
test-requires = [
"mypy-extensions",
"pytest", "hypothesis", "inline-snapshot", "pytest-mock",
"pytest-subtests", "pytest-accept", "detect-test-pollution"
"mypy-extensions",
"pytest",
"hypothesis",
"inline-snapshot",
"pytest-mock",
"pytest-subtests",
"pytest-accept",
"detect-test-pollution",
]
test-command = "cp -r {project}/pyjelly /tmp/pyjelly && cp -r {project}/tests /tmp/tests && cd /tmp && PYTHONPATH=/tmp python -m pytest -q --import-mode=importlib tests"

Expand All @@ -67,9 +69,7 @@ name = "pyjelly"
version = "0.0.0"
readme = "README.md"
description = "Jelly-RDF implementation for Python"
authors = [
{ name = "NeverBlink et al.", email = "contact@neverblink.eu" }
]
authors = [{ name = "NeverBlink et al.", email = "contact@neverblink.eu" }]
requires-python = ">=3.10, <3.15"
classifiers = [
"Development Status :: 4 - Beta",
Expand Down Expand Up @@ -146,9 +146,9 @@ docs = [
"markupsafe<4.0.0",
"linkchecker~=10.6.0",
]
# version 3.1 required for python 3.14 support
ci = ["cibuildwheel>=3.1.0,<4 ; python_version >= '3.11'"]

# version 3.11 required for python 3.14 support
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what?

ci = ['cibuildwheel>=3.1.0,<4 ; python_version >= "3.11"']
bench = ["pytest-benchmark>=5.2.1", "rdflib>=7.1.4"]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please rename to "benchmark" to make it clearer what this is.


[tool.uv]
required-version = "~=0.9.0"
Expand All @@ -157,12 +157,7 @@ default-groups = ["dev", "extras", "test", "types"]
[tool.mypy]
strict = true
explicit_package_bases = true
exclude = [
"build/",
"docs/",
".*_pb2(_grpc)?\\.pyi?",
"examples/"
]
exclude = ["build/", "docs/", ".*_pb2(_grpc)?\\.pyi?", "examples/"]

[[tool.mypy.overrides]]
module = "google.protobuf.proto"
Expand All @@ -174,7 +169,16 @@ extra-standard-library = ["typing_extensions"]

[tool.pytest.ini_options]
pythonpath = "."
addopts = ["--import-mode=importlib", "--doctest-modules", "--ignore-glob=docs/examples/**", "--ignore-glob=examples/**"]
addopts = [
"--import-mode=importlib",
"--doctest-modules",
"--ignore-glob=docs/examples/**",
"--ignore-glob=examples/**",
"--ignore-glob=tests/utils/benchmark_*",
"--ignore=tests/benchmark_tests",
"-m",
"not benchmark",
]

[tool.ruff]
extend-exclude = ["*{_pb2,_pb2_grpc}.{py,pyi}"]
Expand All @@ -184,9 +188,7 @@ packages = ["pyjelly"]

[tool.ruff.lint]
extend-per-file-ignores = { "tests/unit_tests/**" = ["PLR2004"] }
exclude = [
"**examples/**",
]
exclude = ["**examples/**"]
select = ["ALL"]
ignore = [
# Description: Dynamically typed expressions (typing.Any) are disallowed
Expand Down
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe just "benchmarks" instead of "benchmark_tests"?

Empty file.
170 changes: 170 additions & 0 deletions tests/benchmark_tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
from __future__ import annotations

import io
from itertools import islice
from pathlib import Path

import pytest
from rdflib import Dataset, Graph

pytest.importorskip(
"pytest_benchmark",
reason="Install bench dependency group and run with -m benchmark",
)


def pytest_addoption(parser: pytest.Parser) -> None:
g = parser.getgroup("benchmark")
g.addoption("--in-nt", type=str, help="path to N-Triples file.")
g.addoption("--in-nq", type=str, help="path to N-Quads file.")
g.addoption(
"--in-jelly-triples",
type=str,
default=None,
help="optional Jelly triples file; if none, generated in-memory from nt file.",
)
g.addoption(
"--in-jelly-quads",
type=str,
default=None,
help="optional Jelly quads file; if none, generated in-memory from nq slice.",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is an "nq slice"?

)

g.addoption(
"--limit-statements",
type=int,
default=5_000_000,
help="first N statements from input.",
)
g.addoption(
"--warmup-rounds",
type=int,
default=5,
help="warmup rounds, not counted to evaluation.",
)
g.addoption("--rounds", type=int, default=10, help="measured rounds.")
g.addoption("--iterations", type=int, default=1, help="iterations per round.")


def _slice_lines_to_bytes(path: Path, limit: int) -> bytes:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are no comments here or anywhere else, again. This makes the code rather hard to review.

Please make this code readable, and then I will review it again.

buf = io.BytesIO()
with path.open("rb") as f:
buf.writelines(islice(f, limit))
return buf.getvalue()


@pytest.fixture(scope="session")
def limit_statements(request: pytest.FixtureRequest) -> int:
return int(request.config.getoption("--limit-statements"))


@pytest.fixture(scope="session")
def pedantic_cfg(request: pytest.FixtureRequest) -> dict[str, int]:
return {
"warmup_rounds": int(request.config.getoption("--warmup-rounds")),
"rounds": int(request.config.getoption("--rounds")),
"iterations": int(request.config.getoption("--iterations")),
}


@pytest.fixture(scope="session")
def nt_path(request: pytest.FixtureRequest) -> Path:
opt = request.config.getoption("--in-nt")
assert opt, "--in-nt is required"
p = Path(opt)
assert p.exists(), f"--in-nt not found: {p}"
return p


@pytest.fixture(scope="session")
def nq_path(request: pytest.FixtureRequest) -> Path:
opt = request.config.getoption("--in-nq")
assert opt, "--in-nq is required"
p = Path(opt)
assert p.exists(), f"--in-nq not found: {p}"
return p


@pytest.fixture(scope="session")
def jelly_triples_path(request: pytest.FixtureRequest) -> Path | None:
opt = request.config.getoption("--in-jelly-triples")
return Path(opt) if opt else None


@pytest.fixture(scope="session")
def jelly_quads_path(request: pytest.FixtureRequest) -> Path | None:
opt = request.config.getoption("--in-jelly-quads")
return Path(opt) if opt else None


@pytest.fixture(scope="session")
def nt_bytes_sliced(nt_path: Path, limit_statements: int) -> bytes:
return _slice_lines_to_bytes(nt_path, limit_statements)


@pytest.fixture(scope="session")
def nq_bytes_sliced(nq_path: Path, limit_statements: int) -> bytes:
return _slice_lines_to_bytes(nq_path, limit_statements)


@pytest.fixture(scope="session")
def nt_graph(nt_bytes_sliced: bytes) -> Graph:
g = Graph()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you even use Graph? For buffering in-memory you must use an array of statements, otherwise you will get nonsensical results. Same with Dataset, of course.

g.parse(data=nt_bytes_sliced, format="nt")
return g


@pytest.fixture(scope="session")
def nq_dataset(nq_bytes_sliced: bytes) -> Dataset:
ds = Dataset()
ds.parse(data=nq_bytes_sliced, format="nquads")
return ds


@pytest.fixture(scope="session")
def jelly_triples_bytes(jelly_triples_path: Path | None, nt_graph: Graph) -> bytes:
if jelly_triples_path and jelly_triples_path.exists():
return jelly_triples_path.read_bytes()
return nt_graph.serialize(destination=None, format="jelly", encoding="utf-8")


@pytest.fixture(scope="session")
def jelly_quads_bytes(jelly_quads_path: Path | None, nq_dataset: Dataset) -> bytes:
if jelly_quads_path and jelly_quads_path.exists():
return jelly_quads_path.read_bytes()
return nq_dataset.serialize(destination=None, format="jelly", encoding="utf-8")


def pytest_configure(config: pytest.Config) -> None:
config.addinivalue_line("markers", "benchmark: flat ser/des benchmarks")
config.addinivalue_line(
"markers", "triples: triples-only benchmarks (NT/Jelly-triples)"
)
config.addinivalue_line("markers", "quads: quads-only benchmarks (NQ/Jelly-quads)")


def pytest_collection_modifyitems(
config: pytest.Config, items: list[pytest.Item]
) -> None:
has_nt = bool(config.getoption("--in-nt"))
has_nq = bool(config.getoption("--in-nq"))

deselected: list[pytest.Item] = []
selected: list[pytest.Item] = []

for it in items:
is_triples = it.get_closest_marker("triples") is not None
is_quads = it.get_closest_marker("quads") is not None

if is_triples and not has_nt:
deselected.append(it)
continue
if is_quads and not has_nq:
deselected.append(it)
continue

selected.append(it)

if deselected:
config.hook.pytest_deselected(items=deselected)
items[:] = selected
Loading