From b65c1793df6678a9c2c3a55b764e77fbd681d127 Mon Sep 17 00:00:00 2001 From: Conrad Date: Wed, 10 Jun 2026 11:36:53 -0400 Subject: [PATCH 1/2] refactor: Port DISTANCE emitter to resolver pass metadata Resolve DISTANCE's two column operands in the ResolveOperatorRefs pass and read them from the attached metadata at emission time. A new ResolvedColumn dataclass carries the alias-qualified physical chrom, start, end, and strand fragments plus the backing Table config, stored in a columns channel on OperatorResolution alongside the existing slot and deferral channels. Alias-to-table derivation reuses the shared enclosing-alias-map helper introduced by the NEAREST port, so one implementation serves both operators, and the validation boundary gains a column arm. Coordinate canonicalization stays in the emitter, wrapping the raw endpoint fragments with the Table config the metadata carries, until the CanonicalizeCoordinates migration retires it. The emitter falls back to the legacy string-level resolution when no metadata is attached, because the DISTANCE suites invoke the generator directly without the pipeline; the shared legacy helpers remain live for the spatial predicate paths. --- src/giql/generators/base.py | 109 +++++++++++------- src/giql/resolver.py | 214 +++++++++++++++++++++++++++++++++--- 2 files changed, 266 insertions(+), 57 deletions(-) diff --git a/src/giql/generators/base.py b/src/giql/generators/base.py index 631d8dc..b7b5e58 100644 --- a/src/giql/generators/base.py +++ b/src/giql/generators/base.py @@ -20,6 +20,7 @@ from giql.range_parser import RangeParser from giql.resolver import META_KEY from giql.resolver import OperatorResolution +from giql.resolver import ResolvedColumn from giql.resolver import ResolvedInterval from giql.resolver import ResolvedRef from giql.resolver import resolve_operator_refs @@ -374,64 +375,45 @@ def giqldisjoin_sql(self, expression: GIQLDisjoin) -> str: def giqldistance_sql(self, expression: GIQLDistance) -> str: """Generate SQL CASE expression for DISTANCE function. + Reads the :class:`~giql.resolver.ResolvedColumn` metadata that + ``ResolveOperatorRefs`` (pass 1) attaches to each interval operand. When + the pass deferred an operand (a literal range, an unqualified column, or + a tree the pass never reached) the emitter falls back to its historical + string-level resolution and raises the same diagnostics as before. + + Coordinate canonicalization stays here (epic #114 step 8 / issue #123): + the resolved metadata carries each operand's :class:`~giql.table.Table` + so the endpoints are wrapped identically. + :param expression: GIQLDistance expression node :return: SQL CASE expression string calculating genomic distance """ - # Extract the two interval arguments - interval_a = expression.this - interval_b = expression.args.get("expression") - stranded = self._extract_bool_param(expression.args.get("stranded")) signed = self._extract_bool_param(expression.args.get("signed")) - # Get SQL representations - interval_a_sql = self.sql(interval_a) - interval_b_sql = self.sql(interval_b) + col_a = self._distance_operand(expression, "this", "first") + col_b = self._distance_operand(expression, "expression", "second") - # Check if we're dealing with column-to-column or column-to-literal - if "." in interval_a_sql and not interval_a_sql.startswith("'"): - # Column reference for interval_a - if stranded: - chrom_a, start_a, end_a, strand_a = self._get_column_refs( - interval_a_sql, None, include_strand=True - ) - else: - chrom_a, start_a, end_a = self._get_column_refs(interval_a_sql, None) - strand_a = None - else: - # Literal range - not implemented yet for interval_a - raise ValueError("Literal range as first argument not yet supported") - - if "." in interval_b_sql and not interval_b_sql.startswith("'"): - # Column reference for interval_b - if stranded: - chrom_b, start_b, end_b, strand_b = self._get_column_refs( - interval_b_sql, None, include_strand=True - ) - else: - chrom_b, start_b, end_b = self._get_column_refs(interval_b_sql, None) - strand_b = None - else: - # Literal range - not implemented yet - raise ValueError("Literal range as second argument not yet supported") + # Strand columns are consumed only in stranded mode (matching the + # historical 3-tuple vs 4-tuple branching in the legacy emitter). + strand_a = col_a.strand if stranded else None + strand_b = col_b.strand if stranded else None # Distance math below assumes 0-based half-open. - table_a = self._resolve_table(interval_a_sql) - table_b = self._resolve_table(interval_b_sql) - start_a = canonical_start(start_a, table_a) - end_a = canonical_end(end_a, table_a) - start_b = canonical_start(start_b, table_b) - end_b = canonical_end(end_b, table_b) + start_a = canonical_start(col_a.start, col_a.table) + end_a = canonical_end(col_a.end, col_a.table) + start_b = canonical_start(col_b.start, col_b.table) + end_b = canonical_end(col_b.end, col_b.table) # Generate CASE expression return self._generate_distance_case( - chrom_a, + col_a.chrom, start_a, end_a, strand_a, - chrom_b, + col_b.chrom, start_b, end_b, strand_b, @@ -439,6 +421,51 @@ def giqldistance_sql(self, expression: GIQLDistance) -> str: signed=signed, ) + def _distance_operand( + self, expression: GIQLDistance, arg: str, position: str + ) -> ResolvedColumn: + """Resolve one DISTANCE interval operand to a :class:`ResolvedColumn`. + + Prefers the metadata attached by ``ResolveOperatorRefs`` (pass 1). When + the pass deferred the operand — it could not resolve a literal range or + an unqualified column, or never ran (the generator was invoked directly + without the pass) — this falls back to the legacy ``_get_column_refs`` / + ``_resolve_table`` path, raising the historical literal-range error for + a non-column operand. + + :param expression: + GIQLDistance expression node + :param arg: + The operand arg key (``"this"`` or ``"expression"``) + :param position: + Human-readable operand position for the error message (``"first"`` + or ``"second"``) + :return: + The resolved column operand + :raises ValueError: + If the operand is a literal range rather than a column reference + """ + resolution = expression.meta.get(META_KEY) + if isinstance(resolution, OperatorResolution): + resolved = resolution.column(arg) + if resolved is not None: + return resolved + + # Deferred: fall back to string-level resolution. + operand_sql = self.sql(expression.args.get(arg)) + if "." in operand_sql and not operand_sql.startswith("'"): + chrom, start, end, strand = self._get_column_refs( + operand_sql, None, include_strand=True + ) + return ResolvedColumn( + chrom=chrom, + start=start, + end=end, + strand=strand, + table=self._resolve_table(operand_sql), + ) + raise ValueError(f"Literal range as {position} argument not yet supported") + def _generate_distance_case( self, chrom_a: str, diff --git a/src/giql/resolver.py b/src/giql/resolver.py index c2fac29..f7fffbc 100644 --- a/src/giql/resolver.py +++ b/src/giql/resolver.py @@ -47,9 +47,11 @@ ``this``) resolve to a :class:`ResolvedRef`. NEAREST's ``reference`` slot — whose accepted shapes are the non-table literal-range / column / implicit-outer forms — resolves to a :class:`ResolvedInterval` (epic #114, - step 3). DISTANCE and the spatial predicates still declare their column / - literal slots but defer resolution to their port issues (#119, #120), which - reuse :class:`ResolvedInterval` and :class:`SlotDeferral`. + step 3). DISTANCE's two *column* operands (``this`` / ``expression``) are + resolved to a :class:`ResolvedColumn` and attached through the separate + :attr:`OperatorResolution.columns` channel (epic #114, step 4). The spatial + predicates still declare their column / literal slots but defer resolution to + their port issue (#120), which reuses these resolved-metadata types. """ from __future__ import annotations @@ -85,6 +87,7 @@ "IntervalKind", "ResolvedRef", "ResolvedInterval", + "ResolvedColumn", "SlotDeferral", "OperatorResolution", "ResolutionError", @@ -234,6 +237,52 @@ class ResolvedInterval: table: Table | None +@dataclass(frozen=True, slots=True) +class ResolvedColumn: + """Resolved metadata for one column-shaped interval operand. + + Models the resolution of a DISTANCE operand (``a.interval``) — an + ``exp.Column`` qualified by a table alias — into the physical genomic + columns it references, qualified by that alias. Unlike a + :class:`ResolvedRef` (which names a whole relation), a column operand + resolves to concrete SQL column expressions ready to drop into the + emitter's distance arithmetic. + + Coordinate canonicalization stays in the emitter (epic #114 defers its + removal to step 8 / issue #123); this metadata therefore carries the + backing :class:`~giql.table.Table` so the emitter keeps wrapping the + endpoints with :func:`giql.canonical.canonical_start` / + :func:`giql.canonical.canonical_end` exactly as before. + + Attributes + ---------- + chrom : str + The chromosome column qualified by the operand's alias, e.g. + ``a."chrom"``. + start : str + The start column qualified by the operand's alias, e.g. ``a."start"``. + end : str + The end column qualified by the operand's alias, e.g. ``a."end"``. + strand : str | None + The strand column qualified by the operand's alias, e.g. + ``a."strand"``. Always resolved when the operand is a column (mirroring + the generator's ``_get_column_refs(..., include_strand=True)``); the + emitter consumes it only in stranded mode. + table : Table | None + The :class:`~giql.table.Table` config backing the operand's relation + (carrying its coordinate system), or ``None`` when the alias does not + resolve to a registered table (an unregistered relation is assumed + canonical, exactly as the generator's ``_resolve_table`` returns + ``None``). + """ + + chrom: str + start: str + end: str + strand: str | None + table: Table | None + + @dataclass(frozen=True, slots=True) class SlotDeferral: """Why a slot was left unresolved, so the emitter raises the right error. @@ -286,11 +335,17 @@ class OperatorResolution: slot was deferred, when the emitter needs that context to raise the historical diagnostic verbatim. Empty for slots that resolved or whose deferral the emitter can reclassify unaided. + columns : dict[str, ResolvedColumn] + Mapping from a *column* operand's arg key to its resolved + :class:`ResolvedColumn`. Carries DISTANCE's two interval operands; an + operand the pass could not resolve (a literal range, or an unqualified + column) is left out and the generator raises its existing error. """ operator: str slots: dict[str, ResolvedRef | ResolvedInterval] deferrals: dict[str, SlotDeferral] = field(default_factory=dict) + columns: dict[str, ResolvedColumn] = field(default_factory=dict) def slot(self, arg: str) -> ResolvedRef | ResolvedInterval | None: """Return the resolved metadata for slot *arg*, or ``None``.""" @@ -300,6 +355,10 @@ def deferral(self, arg: str) -> SlotDeferral | None: """Return the deferral recorded for slot *arg*, or ``None``.""" return self.deferrals.get(arg) + def column(self, arg: str) -> ResolvedColumn | None: + """Return the resolved column operand for *arg*, or ``None``.""" + return self.columns.get(arg) + def resolve_operator_refs(expression: exp.Expression, tables: Tables) -> exp.Expression: """Attach resolution metadata to every GIQL operator in *expression*. @@ -335,8 +394,8 @@ def resolve_operator_refs(expression: exp.Expression, tables: Tables) -> exp.Exp # Fallback for any operator a scope walk did not reach (e.g. if scope # construction failed). Resolving with no visible CTE names keeps the pass - # behavior-preserving: a missed CTE reference simply stays unresolved and - # the generator handles it on its existing path. + # behavior-preserving: a missed CTE reference or column operand simply stays + # unresolved and the generator handles it on its existing path. for node in expression.walk(): if isinstance(node, _OPERATORS) and id(node) not in seen: seen.add(id(node)) @@ -363,9 +422,10 @@ def _safe_traverse_scope(expression: exp.Expression) -> list[Scope]: def _resolve_operator( node: exp.Expression, tables: Tables, cte_names: frozenset[str] ) -> None: - """Resolve *node*'s reference slots and attach an :class:`OperatorResolution`.""" + """Resolve *node*'s slots and attach an :class:`OperatorResolution`.""" slots: dict[str, ResolvedRef | ResolvedInterval] = {} deferrals: dict[str, SlotDeferral] = {} + columns: dict[str, ResolvedColumn] = {} if isinstance(node, GIQLDisjoin): target_ref = _resolve_target(node.this, tables) @@ -388,11 +448,15 @@ def _resolve_operator( slots["reference"] = interval if deferral is not None: deferrals["reference"] = deferral - # DISTANCE and the spatial predicates declare only column / literal slots, - # whose resolution metadata is designed by their port issues; the pass + elif isinstance(node, GIQLDistance): + columns = _resolve_distance_columns(node, tables) + # The spatial predicates declare only column / literal slots, whose + # resolution metadata is designed by their port issue (#120); the pass # attaches an (empty-slot) resolution so every operator carries metadata. - node.meta[META_KEY] = OperatorResolution(type(node).__name__, slots, deferrals) + node.meta[META_KEY] = OperatorResolution( + type(node).__name__, slots, deferrals, columns + ) def _target_name(target: exp.Expression) -> str: @@ -426,6 +490,92 @@ def _resolve_target(target: exp.Expression, tables: Tables) -> ResolvedRef | Non ) +def _resolve_distance_columns( + node: GIQLDistance, tables: Tables +) -> dict[str, ResolvedColumn]: + """Resolve DISTANCE's two interval operands to :class:`ResolvedColumn`\\s. + + DISTANCE's ``this`` and ``expression`` operands are both column refs (per + its :attr:`~giql.expressions.GIQLDistance.GIQL_SLOTS`). Each is resolved + against the operand's table alias, mirroring the generator's historical + ``_get_column_refs`` / ``_resolve_table`` path: the alias's physical + genomic columns (from the registered :class:`~giql.table.Table` config, or + the canonical defaults) qualified by that alias, plus the backing table + config for the emitter's canonicalization wrapping. + + An operand the pass cannot resolve — a literal range, or an unqualified + column — is omitted; the generator then raises its historical + "Literal range as ... argument not yet supported" error. + """ + alias_map, current_table = _enclosing_alias_map(node) + columns: dict[str, ResolvedColumn] = {} + for arg in ("this", "expression"): + operand = node.args.get(arg) + resolved = _resolve_column_operand(operand, tables, alias_map, current_table) + if resolved is not None: + columns[arg] = resolved + return columns + + +def _resolve_column_operand( + operand: exp.Expression | None, + tables: Tables, + alias_map: dict[str, str], + current_table: str | None, +) -> ResolvedColumn | None: + """Resolve a single column operand, or ``None`` if it is not a column ref. + + Mirrors the generator's ``_get_column_refs`` / ``_resolve_table`` exactly: + the operand's alias is resolved to an underlying table name via the alias + map (with the current FROM table as fallback), the physical column names + come from that table's config (or the canonical defaults), and every column + is qualified by the operand's alias. + + Returns ``None`` for an operand that is not a qualified column (a literal + range or an unaliased column), deferring the diagnostic to the generator. + """ + if not isinstance(operand, exp.Column): + return None + alias = operand.table + if not alias: + # An unqualified column has no alias to qualify by; the generator + # treats it as a literal range and raises its existing error. + return None + + table_name = alias_map.get(alias, current_table) + table = tables.get(table_name) if table_name else None + chrom_col, start_col, end_col, strand_col = _physical_cols(table) + return ResolvedColumn( + chrom=f'{alias}."{chrom_col}"', + start=f'{alias}."{start_col}"', + end=f'{alias}."{end_col}"', + strand=f'{alias}."{strand_col}"', + table=table, + ) + + +def _physical_cols(table: Table | None) -> tuple[str, str, str, str]: + """Return the ``(chrom, start, end, strand)`` physical column names. + + Mirrors ``_get_column_refs``: the canonical defaults unless the registered + :class:`~giql.table.Table` overrides them, with the strand column falling + back to the default when the table declares none. + """ + if table is None: + return ( + DEFAULT_CHROM_COL, + DEFAULT_START_COL, + DEFAULT_END_COL, + DEFAULT_STRAND_COL, + ) + return ( + table.chrom_col, + table.start_col, + table.end_col, + table.strand_col or DEFAULT_STRAND_COL, + ) + + def _resolve_disjoin_reference( reference: exp.Expression | None, target_ref: ResolvedRef, @@ -769,14 +919,19 @@ def validate_operator_refs(expression: exp.Expression) -> None: specs: tuple[SlotSpec, ...] = getattr(node, "GIQL_SLOTS", ()) for spec in specs: resolved = resolution.slots.get(spec.arg) - if resolved is None: - # Deferred: an unresolved slot is handled by the generator on - # its existing path (and may carry a SlotDeferral). + if resolved is not None: + if spec.is_ref_slot: + _validate_ref(resolved, spec, type(node).__name__) + else: + _validate_interval(resolved, spec, type(node).__name__) continue - if spec.is_ref_slot: - _validate_ref(resolved, spec, type(node).__name__) - else: - _validate_interval(resolved, spec, type(node).__name__) + # A column operand (DISTANCE) resolves through the separate columns + # channel rather than the slots map; validate it there. + column = resolution.columns.get(spec.arg) + if column is not None: + _validate_column(column, spec, type(node).__name__) + # Otherwise deferred: an unresolved slot is handled by the generator + # on its existing path (and may carry a SlotDeferral). def _validate_interval(interval: object, spec: SlotSpec, operator: str) -> None: @@ -805,6 +960,33 @@ def _validate_interval(interval: object, spec: SlotSpec, operator: str) -> None: ) +def _validate_column(column: object, spec: SlotSpec, operator: str) -> None: + """Assert a single resolved column operand is well-formed against its slot. + + DISTANCE's two interval operands resolve to a :class:`ResolvedColumn` in the + :attr:`OperatorResolution.columns` channel. A column operand is only ever + attached for a slot whose declared shapes include ``"column"``; the endpoint + fragments must be SQL strings ready to drop into the emitter's arithmetic. + """ + if not isinstance(column, ResolvedColumn): + raise ResolutionError( + f"{operator} slot {spec.arg!r} carries {type(column).__name__}, " + "expected ResolvedColumn." + ) + if "column" not in spec.accepts: + raise ResolutionError( + f"{operator} slot {spec.arg!r} resolved to a column operand, which is " + f"not accepted by the slot (accepts {sorted(spec.accepts)})." + ) + if not all( + isinstance(part, str) for part in (column.chrom, column.start, column.end) + ): + raise ResolutionError( + f"{operator} slot {spec.arg!r} has malformed column endpoints; " + "expected SQL fragment strings for chrom/start/end." + ) + + def _validate_ref(ref: object, spec: SlotSpec, operator: str) -> None: """Assert a single resolved reference is well-formed against its slot spec.""" if not isinstance(ref, ResolvedRef): From 716e2ac44cb574f3846a9de1672327974043d2cb Mon Sep 17 00:00:00 2001 From: Conrad Date: Wed, 10 Jun 2026 11:36:54 -0400 Subject: [PATCH 2/2] test: Cover DISTANCE operand resolution and emitter equivalence Pin column-operand resolution across aliased default columns, custom column names, custom and missing strand columns, unregistered aliases, literal-range deferral, and unqualified-column deferral, plus the new column validation arm. Add a transpilation regression asserting the resolver-driven path emits byte-identical SQL to direct generation. --- tests/test_distance_transpilation.py | 20 +++ tests/test_resolver.py | 237 +++++++++++++++++++++++++++ 2 files changed, 257 insertions(+) diff --git a/tests/test_distance_transpilation.py b/tests/test_distance_transpilation.py index 69b0b0c..380aa19 100644 --- a/tests/test_distance_transpilation.py +++ b/tests/test_distance_transpilation.py @@ -5,6 +5,7 @@ from sqlglot import parse_one +from giql import transpile from giql.dialect import GIQLDialect from giql.generators import BaseGIQLGenerator @@ -69,6 +70,25 @@ def test_distance_transpilation_postgres(self): assert output == expected, f"Expected:\n{expected}\n\nGot:\n{output}" + def test_distance_resolver_path_matches_direct_generation(self): + """ + GIVEN a DISTANCE query over registered default-convention tables + WHEN transpiling through the full pipeline (the resolver pass) versus + generating directly from the parsed AST + THEN both paths should emit byte-identical SQL, proving the + ResolvedColumn metadata path reproduces the legacy string path + """ + query = ( + "SELECT DISTANCE(a.interval, b.interval) AS dist " + "FROM features_a a, features_b b" + ) + + via_transpile = transpile(query, tables=["features_a", "features_b"]) + ast = parse_one(query, dialect=GIQLDialect) + via_generate = BaseGIQLGenerator().generate(ast) + + assert via_transpile == via_generate + def test_distance_transpilation_signed_duckdb(self): """ GIVEN a GIQL query with DISTANCE(..., signed := true) diff --git a/tests/test_resolver.py b/tests/test_resolver.py index ab50a28..9f7b85a 100644 --- a/tests/test_resolver.py +++ b/tests/test_resolver.py @@ -18,6 +18,7 @@ from giql.resolver import META_KEY from giql.resolver import OperatorResolution from giql.resolver import ResolutionError +from giql.resolver import ResolvedColumn from giql.resolver import ResolvedInterval from giql.resolver import ResolvedRef from giql.resolver import resolve_operator_refs @@ -53,6 +54,11 @@ def _nearest_node(ast: exp.Expression) -> GIQLNearest: return next(n for n in ast.walk() if isinstance(n, GIQLNearest)) +def _distance_node(ast: exp.Expression) -> GIQLDistance: + """Return the single GIQLDistance node reachable from an annotated AST.""" + return next(n for n in ast.walk() if isinstance(n, GIQLDistance)) + + class TestResolveOperatorRefs: """Tests for the resolve_operator_refs pass.""" @@ -656,6 +662,237 @@ def test_resolve_operator_refs_with_arbitrary_table_config(self, names): validate_operator_refs(ast) +class TestResolveDistanceColumns: + """Tests for DISTANCE interval-operand (column) resolution.""" + + def test_resolve_distance_columns_resolves_aliased_operands(self): + """Test that DISTANCE operands resolve to alias-qualified default columns. + + Given: + A DISTANCE over two aliased registered tables with default columns. + When: + Running the resolve pass. + Then: + It should attach a ResolvedColumn per operand, qualified by the + operand alias and backed by the registered Table config. + """ + # Arrange + tables = _tables("intervals_a", "intervals_b") + ast = parse_one( + "SELECT DISTANCE(a.interval, b.interval) AS dist " + "FROM intervals_a a, intervals_b b", + dialect=GIQLDialect, + ) + + # Act + resolve_operator_refs(ast, tables) + + # Assert + resolution = _distance_node(ast).meta[META_KEY] + assert resolution.column("this") == ResolvedColumn( + chrom='a."chrom"', + start='a."start"', + end='a."end"', + strand='a."strand"', + table=tables.get("intervals_a"), + ) + assert resolution.column("expression") == ResolvedColumn( + chrom='b."chrom"', + start='b."start"', + end='b."end"', + strand='b."strand"', + table=tables.get("intervals_b"), + ) + + def test_resolve_distance_columns_honors_custom_column_names(self): + """Test that DISTANCE operands pick up a table's custom column names. + + Given: + A DISTANCE whose operand's registered table declares custom + chrom/start/end column names. + When: + Running the resolve pass. + Then: + The resolved column should carry the custom physical names, + qualified by the operand alias. + """ + # Arrange + tables = Tables() + tables.register( + "features_a", + Table("features_a", chrom_col="chr", start_col="lo", end_col="hi"), + ) + tables.register("features_b", Table("features_b")) + ast = parse_one( + "SELECT DISTANCE(a.interval, b.interval) FROM features_a a, features_b b", + dialect=GIQLDialect, + ) + + # Act + resolve_operator_refs(ast, tables) + + # Assert + col = _distance_node(ast).meta[META_KEY].column("this") + assert col.chrom == 'a."chr"' + assert col.start == 'a."lo"' + assert col.end == 'a."hi"' + assert col.table is tables.get("features_a") + + def test_resolve_distance_columns_resolves_custom_strand_column(self): + """Test that a DISTANCE operand resolves a table's custom strand column. + + Given: + A DISTANCE whose operand's table declares a custom strand column. + When: + Running the resolve pass. + Then: + The resolved column's strand member should name that custom column, + qualified by the operand alias. + """ + # Arrange + tables = Tables() + tables.register( + "features_a", + Table("features_a", strand_col="dir"), + ) + tables.register("features_b", Table("features_b")) + ast = parse_one( + "SELECT DISTANCE(a.interval, b.interval) FROM features_a a, features_b b", + dialect=GIQLDialect, + ) + + # Act + resolve_operator_refs(ast, tables) + + # Assert + col = _distance_node(ast).meta[META_KEY].column("this") + assert col.strand == 'a."dir"' + + def test_resolve_distance_columns_defaults_when_table_strandless(self): + """Test that a strandless table still yields a default strand column. + + Given: + A DISTANCE operand whose table declares no strand column. + When: + Running the resolve pass. + Then: + The resolved column's strand should fall back to the default + strand name, mirroring the generator's _get_column_refs. + """ + # Arrange + tables = Tables() + tables.register("features_a", Table("features_a", strand_col=None)) + tables.register("features_b", Table("features_b")) + ast = parse_one( + "SELECT DISTANCE(a.interval, b.interval) FROM features_a a, features_b b", + dialect=GIQLDialect, + ) + + # Act + resolve_operator_refs(ast, tables) + + # Assert + col = _distance_node(ast).meta[META_KEY].column("this") + assert col.strand == 'a."strand"' + + def test_resolve_distance_columns_unregistered_alias_has_no_table(self): + """Test that an operand over an unregistered relation carries no Table. + + Given: + A DISTANCE over subquery-derived relations that are not registered + tables. + When: + Running the resolve pass. + Then: + The resolved column should carry default column names and a None + Table config, so the emitter applies no canonicalization. + """ + # Arrange + ast = parse_one( + "SELECT DISTANCE(a.interval, b.interval) FROM " + "(SELECT 1) AS a, (SELECT 1) AS b", + dialect=GIQLDialect, + ) + + # Act + resolve_operator_refs(ast, Tables()) + + # Assert + col = _distance_node(ast).meta[META_KEY].column("this") + assert col.chrom == 'a."chrom"' + assert col.table is None + + def test_resolve_distance_columns_defers_literal_range_operand(self): + """Test that a literal-range operand is left unresolved. + + Given: + A DISTANCE whose second operand is a literal genomic range string. + When: + Running the resolve pass. + Then: + It should resolve the column operand but leave the literal operand + unresolved, deferring the diagnostic to the generator. + """ + # Arrange + ast = parse_one( + "SELECT DISTANCE(a.interval, 'chr1:1000-2000') FROM features_a a", + dialect=GIQLDialect, + ) + + # Act + resolve_operator_refs(ast, _tables("features_a")) + + # Assert + resolution = _distance_node(ast).meta[META_KEY] + assert resolution.column("this") is not None + assert resolution.column("expression") is None + + def test_resolve_distance_columns_defers_unqualified_column(self): + """Test that an unqualified column operand is left unresolved. + + Given: + A DISTANCE whose first operand is a column with no table qualifier. + When: + Running the resolve pass. + Then: + It should leave that operand unresolved, deferring to the generator + (which treats an unqualified operand as a literal range). + """ + # Arrange + ast = parse_one( + "SELECT DISTANCE(interval, b.interval) FROM features_a a, features_b b", + dialect=GIQLDialect, + ) + + # Act + resolve_operator_refs(ast, _tables("features_a", "features_b")) + + # Assert + resolution = _distance_node(ast).meta[META_KEY] + assert resolution.column("this") is None + assert resolution.column("expression") is not None + + def test_resolve_distance_columns_pass_validates(self): + """Test that a DISTANCE-annotated tree passes the validation boundary. + + Given: + A DISTANCE annotated with resolved column metadata. + When: + Running the validation boundary. + Then: + It should not raise (column operands are not ref slots). + """ + # Arrange + ast = parse_one( + "SELECT DISTANCE(a.interval, b.interval) FROM features_a a, features_b b", + dialect=GIQLDialect, + ) + resolve_operator_refs(ast, _tables("features_a", "features_b")) + + # Act & assert + validate_operator_refs(ast) + + class TestValidateOperatorRefs: """Tests for the validate_operator_refs validation boundary."""