diff --git a/Makefile b/Makefile index e0e6062..c31f719 100644 --- a/Makefile +++ b/Makefile @@ -2,6 +2,45 @@ test test-r test-python test-typescript test-julia test-rust \ docs docs-r docs-python docs-typescript docs-julia docs-rust +# ── Document ───────────────────────────────────────────────────────────────── + +docs-r: + @echo "==> R" + cd r && Rscript -e "devtools::document()" + +docs-python: + @echo "==> Python" + cd python && bash build-docs.sh + +docs-julia: + @echo "==> Julia" + cd julia && bash build-docs.sh + +docs-typescript: + @echo "==> TypeScript" + cd typescript && pnpm build + +docs-rust: + @echo "==> Rust" + cd rust && bash build-docs.sh + +docs: + @r=0; p=0; ts=0; jl=0; rs=0; \ + $(MAKE) docs-r || r=1; \ + $(MAKE) docs-python || p=1; \ + $(MAKE) docs-julia || jl=1; \ + $(MAKE) docs-typescript || ts=1; \ + $(MAKE) docs-rust || rs=1; \ + echo ""; \ + echo "── Docs Summary ──────────────────────────────────────"; \ + [ $$r -eq 0 ] && echo " R: done" || echo " R: FAILED"; \ + [ $$p -eq 0 ] && echo " Python: done" || echo " Python: FAILED"; \ + [ $$jl -eq 0 ] && echo " Julia: done" || echo " Julia: FAILED"; \ + [ $$ts -eq 0 ] && echo " TypeScript: done" || echo " TypeScript: FAILED"; \ + [ $$rs -eq 0 ] && echo " Rust: done" || echo " Rust: FAILED"; \ + echo "─────────────────────────────────────────────────────"; \ + [ $$((r+p+ts+jl+rs)) -eq 0 ] + # ── Install ────────────────────────────────────────────────────────────────── install-r: @@ -46,42 +85,3 @@ test: [ $$rs -eq 0 ] && echo " Rust: passed" || echo " Rust: FAILED"; \ echo "─────────────────────────────────────────────────────"; \ [ $$((r+p+ts+jl+rs)) -eq 0 ] - -# ── Document ───────────────────────────────────────────────────────────────── - -docs-r: - @echo "==> R" - cd r && Rscript -e "devtools::document()" - -docs-python: - @echo "==> Python" - cd python && bash build-docs.sh - -docs-julia: - @echo "==> Julia" - cd julia && bash build-docs.sh - -docs-typescript: - @echo "==> TypeScript" - cd typescript && pnpm build - -docs-rust: - @echo "==> Rust" - cd rust && bash build-docs.sh - -docs: - @r=0; p=0; ts=0; jl=0; rs=0; \ - $(MAKE) docs-r || r=1; \ - $(MAKE) docs-python || p=1; \ - $(MAKE) docs-julia || jl=1; \ - $(MAKE) docs-typescript || ts=1; \ - $(MAKE) docs-rust || rs=1; \ - echo ""; \ - echo "── Docs Summary ──────────────────────────────────────"; \ - [ $$r -eq 0 ] && echo " R: done" || echo " R: FAILED"; \ - [ $$p -eq 0 ] && echo " Python: done" || echo " Python: FAILED"; \ - [ $$jl -eq 0 ] && echo " Julia: done" || echo " Julia: FAILED"; \ - [ $$ts -eq 0 ] && echo " TypeScript: done" || echo " TypeScript: FAILED"; \ - [ $$rs -eq 0 ] && echo " Rust: done" || echo " Rust: FAILED"; \ - echo "─────────────────────────────────────────────────────"; \ - [ $$((r+p+ts+jl+rs)) -eq 0 ] diff --git a/julia/RtemisA3/src/validate.jl b/julia/RtemisA3/src/validate.jl index 68fcbe5..2e4155a 100644 --- a/julia/RtemisA3/src/validate.jl +++ b/julia/RtemisA3/src/validate.jl @@ -24,7 +24,12 @@ function validate_positions(raw::AbstractVector, path::String)::Vector{Int} throw(A3ValidationError("$path[$i]: position $v must be >= 1")) push!(positions, Int(v)) end - return sort_dedup(positions) + sorted = sort(positions) + for (prev, curr) in zip(sorted, @view sorted[2:end]) + prev == curr && + throw(A3ValidationError("$path: duplicate position $curr")) + end + return sorted end function validate_ranges(raw::AbstractVector, path::String)::Vector{Tuple{Int,Int}} diff --git a/julia/RtemisA3/test/runtests.jl b/julia/RtemisA3/test/runtests.jl index a647b57..273db8b 100644 --- a/julia/RtemisA3/test/runtests.jl +++ b/julia/RtemisA3/test/runtests.jl @@ -11,6 +11,7 @@ parse_err(f) = begin e = err(f); @test e isa A3ParseError; e end # ─── Normalization ──────────────────────────────────────────────────────────── @testset "sort_dedup" begin + # sort_dedup is a normalization utility (future clean API), not a validator @test sort_dedup([3, 1, 2, 2, 1]) == [1, 2, 3] @test sort_dedup(Int[]) == Int[] @test sort_dedup([5]) == [5] @@ -64,9 +65,14 @@ end @testset "site entries" begin a = create_a3("MAEPRQ"; - site = Dict("test" => Dict("index" => [3, 1, 2, 2], "type" => "")) + site = Dict("test" => Dict("index" => [3, 1, 2], "type" => "")) ) - @test a.annotations.site["test"].index == [1, 2, 3] # sorted + deduped + @test a.annotations.site["test"].index == [1, 2, 3] # sorted + + # duplicate positions rejected + val_err(() -> create_a3("MAEPRQ"; + site = Dict("test" => Dict("index" => [3, 1, 2, 2], "type" => "")) + )) # out of bounds val_err(() -> create_a3("MAEPRQ"; diff --git a/python/rtemis_a3/src/rtemis/a3/_models.py b/python/rtemis_a3/src/rtemis/a3/_models.py index 237da75..cd5290e 100644 --- a/python/rtemis_a3/src/rtemis/a3/_models.py +++ b/python/rtemis_a3/src/rtemis/a3/_models.py @@ -19,9 +19,9 @@ from pydantic.functional_validators import BeforeValidator from ._normalize import ( + check_no_duplicate_positions, check_no_overlap, is_json_compatible, - sort_dedup, sort_ranges, ) @@ -66,7 +66,7 @@ class A3Position(BaseModel): def _normalize_positions(cls, v: Any) -> list[int]: if not isinstance(v, list): raise ValueError("index must be a list of positive integers") - return sort_dedup(v) + return check_no_duplicate_positions(v) class A3Range(BaseModel): @@ -161,7 +161,7 @@ def _normalize_flex_index(cls, v: Any) -> list[int] | list[tuple[int, int]]: raise ValueError( "cannot mix integers and non-integers in index" ) - return sort_dedup(v) + return check_no_duplicate_positions(v) else: raise ValueError( f"index elements must be integers or [start, end] pairs, " diff --git a/python/rtemis_a3/src/rtemis/a3/_normalize.py b/python/rtemis_a3/src/rtemis/a3/_normalize.py index eeb3f0c..6706ae5 100644 --- a/python/rtemis_a3/src/rtemis/a3/_normalize.py +++ b/python/rtemis_a3/src/rtemis/a3/_normalize.py @@ -10,6 +10,9 @@ def sort_dedup(values: list[int]) -> list[int]: """Deduplicate and sort ascending. + Intended for use by a future ``clean``/``normalize`` API. + Strict parsers should use :func:`check_no_duplicate_positions` instead. + Parameters ---------- values : list[int] @@ -23,6 +26,38 @@ def sort_dedup(values: list[int]) -> list[int]: return sorted(set(values)) +def check_no_duplicate_positions(values: list[int]) -> list[int]: + """Sort positions ascending and raise if any value appears more than once. + + Parameters + ---------- + values : list[int] + List of integers (expected positive). + + Returns + ------- + list[int] + Sorted list, guaranteed unique. + + Raises + ------ + ValueError + If any position appears more than once. + """ + for item in values: + if isinstance(item, bool): + raise ValueError("boolean values are not valid positions") + if not isinstance(item, int): + raise ValueError( + f"index must be a list of positive integers, got {type(item).__name__!r}" + ) + sorted_v = sorted(values) + for i in range(1, len(sorted_v)): + if sorted_v[i] == sorted_v[i - 1]: + raise ValueError(f"duplicate position: {sorted_v[i]}") + return sorted_v + + def sort_ranges(ranges: list[tuple[int, int]]) -> list[tuple[int, int]]: """Sort ranges by start position, then end position for ties. diff --git a/python/rtemis_a3/tests/test_models.py b/python/rtemis_a3/tests/test_models.py index 9380931..b093323 100644 --- a/python/rtemis_a3/tests/test_models.py +++ b/python/rtemis_a3/tests/test_models.py @@ -27,9 +27,9 @@ def test_basic(self): assert entry.index == [1, 3, 5] # sorted assert entry.type == "activeSite" - def test_dedup(self): - entry = A3Position(index=[3, 3, 1]) - assert entry.index == [1, 3] + def test_duplicate_positions_rejected(self): + with pytest.raises(ValidationError, match="duplicate position"): + A3Position(index=[3, 3, 1]) def test_default_type(self): entry = A3Position(index=[1, 2]) diff --git a/r/man/read_A3json.Rd b/r/man/read_A3json.Rd index 0fb57c9..fdcfe42 100644 --- a/r/man/read_A3json.Rd +++ b/r/man/read_A3json.Rd @@ -4,7 +4,7 @@ \alias{read_A3json} \title{Read \code{A3} object from JSON file} \usage{ -read_A3json(filepath, verbosity = 0L) +read_A3json(filepath, verbosity = 1L) } \arguments{ \item{filepath}{Character: Path to JSON file.} diff --git a/rust/src/normalization.rs b/rust/src/normalization.rs index c757872..151546c 100644 --- a/rust/src/normalization.rs +++ b/rust/src/normalization.rs @@ -13,38 +13,27 @@ /// Steps, in order: /// 1. Reject any position that is zero (positions are 1-based). /// 2. Sort ascending. -/// 3. Remove duplicates. +/// 3. Reject duplicate positions. /// /// Returns `Ok(Vec)` on success, or `Err(String)` describing the problem. /// -/// In Rust, `Result` is the standard return type for fallible operations. -/// `Ok(value)` means success; `Err(message)` means failure. The caller decides -/// what to do — there are no exceptions. -/// /// The `field` parameter is the dot-separated JSON path (e.g. /// `"annotations.site.catalytic"`) used in error messages so the caller /// knows exactly where the problem is. pub fn normalize_positions(positions: Vec, field: &str) -> Result, String> { - // Check for zero values before sorting so we can report them clearly. - // `.any()` short-circuits on the first match and allocates nothing — - // more idiomatic and efficient than collecting into a Vec just to check - // `.is_empty()`. if positions.contains(&0) { return Err(format!( "{field}: positions must be ≥ 1 (1-based); found zero" )); } - // `mut` makes the binding mutable — Rust variables are immutable by default. let mut sorted = positions; - - // Sort in-place. `.sort_unstable()` is slightly faster than `.sort()` and - // fine here because we deduplicate immediately after. sorted.sort_unstable(); - // Remove consecutive duplicates. `dedup()` only removes *adjacent* equal - // values, which is why we sort first. - sorted.dedup(); + // Reject duplicates — adjacent after sorting. + if let Some(dup) = sorted.windows(2).find(|w| w[0] == w[1]) { + return Err(format!("{field}: duplicate position: {}", dup[0])); + } Ok(sorted) } @@ -157,11 +146,16 @@ mod tests { use super::*; #[test] - fn positions_sorted_and_deduped() { - let result = normalize_positions(vec![3, 1, 2, 1], "test").unwrap(); + fn positions_sorted() { + let result = normalize_positions(vec![3, 1, 2], "test").unwrap(); assert_eq!(result, vec![1, 2, 3]); } + #[test] + fn positions_rejects_duplicates() { + assert!(normalize_positions(vec![3, 1, 2, 1], "test").is_err()); + } + #[test] fn positions_rejects_zero() { assert!(normalize_positions(vec![0, 1, 2], "test").is_err()); diff --git a/typescript/jsr.json b/typescript/jsr.json new file mode 100644 index 0000000..403b390 --- /dev/null +++ b/typescript/jsr.json @@ -0,0 +1,16 @@ +{ + "$schema": "https://jsr.io/schema/config-file.v1.json", + "name": "@rtemis/a3", + "version": "0.1.3", + "license": "MPL-2.0", + "imports": { + "zod": "npm:zod@^4.3.6" + }, + "exports": { + ".": "./src/index.ts", + "./browser": "./src/index-browser.ts" + }, + "publish": { + "include": ["jsr.json", "package.json", "README.md", "LICENSE.txt", "src/**/*.ts"] + } +} diff --git a/typescript/package.json b/typescript/package.json index 5448d60..af66f7d 100644 --- a/typescript/package.json +++ b/typescript/package.json @@ -42,7 +42,9 @@ "format": "biome format src tests", "format:write": "biome format --write src tests", "lint:fix": "biome lint --write src tests", - "fix": "biome check --write src tests" + "fix": "biome check --write src tests", + "publish:jsr": "deno publish --allow-slow-types", + "publish:jsr:dry": "deno publish --allow-slow-types --dry-run" }, "dependencies": { "zod": "^4.3.6" diff --git a/typescript/src/schemas.ts b/typescript/src/schemas.ts index 9055ac7..ada9463 100644 --- a/typescript/src/schemas.ts +++ b/typescript/src/schemas.ts @@ -1,13 +1,25 @@ import { z } from "zod"; -import { isJsonCompatible, sortDedup, sortRanges } from "./normalize"; +import { isJsonCompatible, sortRanges } from "./normalize"; // ── Primitives ──────────────────────────────────────────────────────────────── // 1-based positive integer position const PositionSchema = z.number().int().min(1); -// Sorted, deduplicated array of positions -const PositionsSchema = z.array(PositionSchema).transform(sortDedup); +// Sorted array of positions; duplicate positions are rejected +const PositionsSchema = z + .array(PositionSchema) + .transform((arr) => [...arr].sort((a, b) => a - b)) + .superRefine((sorted, ctx) => { + for (let i = 1; i < sorted.length; i++) { + if (sorted[i] === sorted[i - 1]) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: `duplicate position: ${sorted[i]}`, + }); + } + } + }); // Inclusive [start, end] range tuple, start < end const RangeTupleSchema = z diff --git a/typescript/tests/schemas.test.ts b/typescript/tests/schemas.test.ts index 3e5d10e..f98bfdf 100644 --- a/typescript/tests/schemas.test.ts +++ b/typescript/tests/schemas.test.ts @@ -239,12 +239,12 @@ describe("annotation validation", () => { expect(result.success).toBe(false); }); - it("deduplicates and sorts positions", () => { + it("sorts positions", () => { const result = A3InputSchema.safeParse({ ...MINIMAL_VALID, annotations: { ...MINIMAL_VALID.annotations, - site: { A: { index: [3, 1, 3, 2], type: "" } }, + site: { A: { index: [3, 1, 2], type: "" } }, }, }); expect(result.success).toBe(true); @@ -252,6 +252,17 @@ describe("annotation validation", () => { expect(result.data.annotations.site.A?.index).toEqual([1, 2, 3]); } }); + + it("rejects duplicate positions", () => { + const result = A3InputSchema.safeParse({ + ...MINIMAL_VALID, + annotations: { + ...MINIMAL_VALID.annotations, + site: { A: { index: [3, 1, 3, 2], type: "" } }, + }, + }); + expect(result.success).toBe(false); + }); }); describe("variant validation", () => {