Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
[![rtemis.a3 status badge](https://rtemis-org.r-universe.dev/rtemis.a3/badges/version)](https://rtemis-org.r-universe.dev/rtemis.a3)
[![npm version](https://img.shields.io/npm/v/@rtemis/a3.svg)](https://www.npmjs.com/package/@rtemis/a3)
[![Crates.io Version](https://img.shields.io/crates/v/rtemis_a3)](https://crates.io/crates/rtemis_a3)

[![r-ci](https://github.com/rtemis-org/a3/actions/workflows/r-ci.yml/badge.svg)](https://github.com/rtemis-org/a3/actions/workflows/r-ci.yml)
[![python-ci](https://github.com/rtemis-org/a3/actions/workflows/python-ci.yml/badge.svg)](https://github.com/rtemis-org/a3/actions/workflows/python-ci.yml)
Expand Down
373 changes: 373 additions & 0 deletions python/rtemis_a3/LICENSE

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion python/rtemis_a3/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,9 @@ json_string = a3_to_json(a3, indent=2) # pretty-printed

```json
{
"sequence": "MKTAYIAKQR",
"$schema": "https://schema.rtemis.org/a3/v1/schema.json",
"a3_version": "1.0.0",
"sequence": "MKTAYIAKQR",
"annotations": {
"site": { "Active site": { "index": [3, 5], "type": "activeSite" } },
"region": { "Repeat 1": { "index": [[1, 4]], "type": "" } },
Expand Down
18 changes: 16 additions & 2 deletions python/rtemis_a3/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,16 +1,30 @@
[project]
name = "rtemis-a3"
version = "0.1.0"
description = "Add your description here"
description = "Python implementation of the Amino Acid Annotation (A3) format"
readme = "README.md"
license = "MPL-2.0"
license-files = ["LICENSE"]
authors = [
{ name = "Stathis Gennatas", email = "gennatas@gmail.com" }
{ name = "E.D. Gennatas", email = "gennatas@gmail.com" }
]
keywords = ["bioinformatics", "proteomics", "amino acid", "annotation", "protein"]
classifiers = [
"Intended Audience :: Science/Research",
"Programming Language :: Python :: 3.13",
"Topic :: Scientific/Engineering :: Bio-Informatics",
"Typing :: Typed",
]
requires-python = ">=3.13"
dependencies = [
"pydantic>=2.12.5",
]

[project.urls]
Homepage = "https://github.com/rtemis-org/a3"
Repository = "https://github.com/rtemis-org/a3"
Issues = "https://github.com/rtemis-org/a3/issues"

[build-system]
requires = ["uv_build>=0.11.2,<0.12.0"]
build-backend = "uv_build"
Expand Down
30 changes: 26 additions & 4 deletions python/rtemis_a3/src/rtemis/a3/_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
field_validator,
model_validator,
)
from pydantic.functional_validators import BeforeValidator

from ._normalize import (
check_no_overlap,
Expand All @@ -24,11 +25,26 @@
sort_ranges,
)


class _BoundsErrors(Exception):
"""Internal: carries individual bounds-check messages out of the model validator."""

def __init__(self, messages: list[str]) -> None:
self.messages = messages


# ---------------------------------------------------------------------------
# Constrained types
# ---------------------------------------------------------------------------

Position = Annotated[int, Field(gt=0)]

def _reject_bool(v: Any) -> Any:
if isinstance(v, bool):
raise ValueError("boolean values are not valid positions")
return v


Position = Annotated[int, BeforeValidator(_reject_bool), Field(gt=0)]

# ---------------------------------------------------------------------------
# Annotation entry models
Expand Down Expand Up @@ -71,6 +87,8 @@ def _normalize_ranges(cls, v: Any) -> list[tuple[int, int]]:
for item in v:
if isinstance(item, (list, tuple)) and len(item) == 2:
s, e = item
if isinstance(s, bool) or isinstance(e, bool):
raise ValueError("boolean values are not valid positions")
Comment on lines +90 to +91
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The check for boolean values here is redundant. The RegionEntry model defines the index field as list[tuple[Position, Position]], and the Position type alias already includes a BeforeValidator(_reject_bool) that handles this check. Since _normalize_ranges is a before validator, Pydantic will subsequently validate its output against the field's type annotation, making this explicit check unnecessary. Relying on the Position type for this validation will make the code more concise and less repetitive.

if not (isinstance(s, int) and isinstance(e, int)):
raise ValueError(
f"range elements must be integers, got [{type(s).__name__}, "
Expand Down Expand Up @@ -108,12 +126,16 @@ def _normalize_flex_index(cls, v: Any) -> list[int] | list[tuple[int, int]]:
return []
# Determine geometry from first element
first = v[0]
if isinstance(first, bool):
raise ValueError("boolean values are not valid positions")
if isinstance(first, (list, tuple)):
# Ranges path
coerced: list[tuple[int, int]] = []
for item in v:
if isinstance(item, (list, tuple)) and len(item) == 2:
s, e = item
if isinstance(s, bool) or isinstance(e, bool):
raise ValueError("boolean values are not valid positions")
if not (isinstance(s, int) and isinstance(e, int)):
raise ValueError(
f"range elements must be integers, got "
Expand All @@ -132,10 +154,10 @@ def _normalize_flex_index(cls, v: Any) -> list[int] | list[tuple[int, int]]:
sorted_ranges = sort_ranges(coerced)
check_no_overlap(sorted_ranges)
return sorted_ranges
elif isinstance(first, int):
elif isinstance(first, int) and not isinstance(first, bool):
# Positions path
for item in v:
if not isinstance(item, int):
if isinstance(item, bool) or not isinstance(item, int):
raise ValueError(
"cannot mix integers and non-integers in index"
)
Expand Down Expand Up @@ -283,7 +305,7 @@ def _bounds_check(self) -> A3:
)

if errors:
raise ValueError("\n".join(errors))
raise _BoundsErrors(errors)

return self

Expand Down
39 changes: 22 additions & 17 deletions python/rtemis_a3/src/rtemis/a3/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from pydantic import ValidationError

from ._models import A3, VariantRecord
from ._models import A3, VariantRecord, _BoundsErrors
from .errors import A3ParseError, A3ValidationError

_A3_SCHEMA_URI = "https://schema.rtemis.org/a3/v1/schema.json"
Expand Down Expand Up @@ -79,6 +79,9 @@ def create_a3(

try:
return A3.model_validate(data)
except _BoundsErrors as exc:
errors = [{"loc": (), "msg": msg, "type": "value_error"} for msg in exc.messages]
raise A3ValidationError("\n".join(exc.messages), errors) from exc
Comment on lines +82 to +84
Copy link

Copilot AI Apr 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_BoundsErrors are converted into A3ValidationError.errors, but the generated entries all use loc=() and omit the offending field path. This makes the structured errors list much less useful for programmatic consumers and also conflicts with the docstring claim that loc contains field-path components. Consider having _BoundsErrors carry structured error objects (with loc tuples), or parse the path prefix in each message into a loc tuple when building errors.

Copilot uses AI. Check for mistakes.
except ValidationError as exc:
raise A3ValidationError(str(exc), cast(list[dict[str, Any]], exc.errors())) from exc

Expand Down Expand Up @@ -109,33 +112,35 @@ def a3_from_json(text: str) -> A3:
raise A3ParseError(f"invalid JSON: {exc}") from exc

if not isinstance(data, dict):
raise A3ParseError("JSON root must be an object")
raise A3ValidationError(
"JSON root must be an object",
[{"loc": (), "msg": "JSON root must be an object", "type": "value_error", "input": data}],
)
envelope_errors: list[dict[str, Any]] = []
schema_val = data.get("$schema")
if schema_val is None:
raise A3ParseError("missing required field '$schema'")
if schema_val != _A3_SCHEMA_URI:
raise A3ParseError(
f"'$schema' must be '{_A3_SCHEMA_URI}', got '{schema_val}'"
)
envelope_errors.append({"loc": ("$schema",), "msg": "missing required field '$schema'", "type": "missing", "input": data})
elif schema_val != _A3_SCHEMA_URI:
envelope_errors.append({"loc": ("$schema",), "msg": f"'$schema' must be '{_A3_SCHEMA_URI}', got '{schema_val}'", "type": "value_error", "input": schema_val})
version_val = data.get("a3_version")
if version_val is None:
raise A3ParseError("missing required field 'a3_version'")
if version_val != _A3_VERSION:
raise A3ParseError(
f"'a3_version' must be '{_A3_VERSION}', got '{version_val}'"
envelope_errors.append({"loc": ("a3_version",), "msg": "missing required field 'a3_version'", "type": "missing", "input": data})
elif version_val != _A3_VERSION:
envelope_errors.append({"loc": ("a3_version",), "msg": f"'a3_version' must be '{_A3_VERSION}', got '{version_val}'", "type": "value_error", "input": version_val})
if envelope_errors:
raise A3ValidationError(
"; ".join(e["msg"] for e in envelope_errors),
envelope_errors,
)
if "sequence" not in data:
raise A3ParseError("missing required field 'sequence'")
if "annotations" not in data:
raise A3ParseError("missing required field 'annotations'")
if "metadata" not in data:
raise A3ParseError("missing required field 'metadata'")
# Strip envelope keys before passing to the data model
for key in _ENVELOPE_KEYS:
data.pop(key, None)

try:
return A3.model_validate(data)
except _BoundsErrors as exc:
errors = [{"loc": (), "msg": msg, "type": "value_error"} for msg in exc.messages]
raise A3ValidationError("\n".join(exc.messages), errors) from exc
Comment on lines +141 to +143
Copy link

Copilot AI Apr 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same issue as above: bounds-check violations are mapped to errors entries with loc=(), losing the field path and limiting how callers can present/associate errors. Prefer emitting structured loc tuples for each bounds violation (either by carrying them in _BoundsErrors or deriving them here).

Copilot uses AI. Check for mistakes.
except ValidationError as exc:
raise A3ValidationError(str(exc), cast(list[dict[str, Any]], exc.errors())) from exc

Expand Down
11 changes: 10 additions & 1 deletion python/rtemis_a3/src/rtemis/a3/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,22 @@ class A3ValidationError(Exception):
message : str
Human-readable summary.
errors : list[dict[str, Any]]
Structured error list (from Pydantic's ``ValidationError.errors()``).
Structured error list. Each entry has at minimum ``loc`` (tuple of
field path components), ``msg`` (human-readable message), and
``type`` (error kind string). Pydantic structural errors follow
Pydantic's native format; bounds and envelope errors follow the same
shape for consistency.
Comment on lines +20 to +24
Copy link

Copilot AI Apr 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The docstring says bounds errors “follow the same shape” as Pydantic errors with loc as a tuple of field-path components, but bounds violations are currently surfaced with loc=() in api.py. Either adjust the documentation here to reflect the actual structure, or (preferably) emit per-violation loc tuples for bounds errors to match the documented contract.

Suggested change
Structured error list. Each entry has at minimum ``loc`` (tuple of
field path components), ``msg`` (human-readable message), and
``type`` (error kind string). Pydantic structural errors follow
Pydantic's native format; bounds and envelope errors follow the same
shape for consistency.
Structured error list. Each entry has at minimum ``loc`` (a tuple of
field path components, or ``()`` for root-level/non-field-specific
violations), ``msg`` (human-readable message), and ``type`` (error
kind string). Pydantic structural errors follow Pydantic's native
format; contextual errors such as bounds or envelope violations may
use an empty ``loc`` when no specific field path applies.

Copilot uses AI. Check for mistakes.
"""

def __init__(self, message: str, errors: list[dict[str, Any]] | None = None):
super().__init__(message)
self.errors: list[dict[str, Any]] = errors or []

@property
def messages(self) -> list[str]:
"""Individual error messages, one per violation."""
return [e["msg"] for e in self.errors]


class A3ParseError(Exception):
"""Raised when JSON parsing or file I/O fails."""
34 changes: 22 additions & 12 deletions python/rtemis_a3/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,26 +107,36 @@ def test_valid_json_invalid_a3(self):
'{"$schema": "https://schema.rtemis.org/a3/v1/schema.json", "a3_version": "1.0.0", "sequence": "M", "annotations": {}, "metadata": {}}'
) # too short

def test_missing_annotations_field(self):
with pytest.raises(A3ParseError, match="annotations"):
a3_from_json(
'{"$schema": "https://schema.rtemis.org/a3/v1/schema.json", "a3_version": "1.0.0", "sequence": "MAEPRQ", "metadata": {}}'
)
def test_missing_annotations_field_defaults(self):
# annotations has a model default — omitting it from JSON is valid
a3 = a3_from_json(
'{"$schema": "https://schema.rtemis.org/a3/v1/schema.json", "a3_version": "1.0.0", "sequence": "MAEPRQ", "metadata": {}}'
)
assert a3.annotations.site == {}

def test_missing_metadata_field(self):
with pytest.raises(A3ParseError, match="metadata"):
a3_from_json(
'{"$schema": "https://schema.rtemis.org/a3/v1/schema.json", "a3_version": "1.0.0", "sequence": "MAEPRQ", "annotations": {}}'
)
def test_missing_metadata_field_defaults(self):
# metadata has a model default — omitting it from JSON is valid
a3 = a3_from_json(
'{"$schema": "https://schema.rtemis.org/a3/v1/schema.json", "a3_version": "1.0.0", "sequence": "MAEPRQ", "annotations": {}}'
)
assert a3.metadata.uniprot_id == ""

def test_missing_schema_field(self):
with pytest.raises(A3ParseError, match=r"\$schema"):
with pytest.raises(A3ValidationError, match=r"\$schema"):
a3_from_json('{"a3_version": "1.0.0", "sequence": "MAEPRQ"}')

def test_missing_version_field(self):
with pytest.raises(A3ParseError, match="a3_version"):
with pytest.raises(A3ValidationError, match="a3_version"):
a3_from_json('{"$schema": "https://schema.rtemis.org/a3/v1/schema.json", "sequence": "MAEPRQ"}')

def test_wrong_schema_uri(self):
with pytest.raises(A3ValidationError, match=r"\$schema"):
a3_from_json('{"$schema": "https://wrong.example.com/schema.json", "a3_version": "1.0.0", "sequence": "MAEPRQ"}')

def test_wrong_version(self):
with pytest.raises(A3ValidationError, match="a3_version"):
a3_from_json('{"$schema": "https://schema.rtemis.org/a3/v1/schema.json", "a3_version": "9.9.9", "sequence": "MAEPRQ"}')


class TestA3ToJson:
def test_roundtrip_minimal(self):
Expand Down
Loading