diff --git a/src/giql/generators/base.py b/src/giql/generators/base.py index b7b5e58..189a14e 100644 --- a/src/giql/generators/base.py +++ b/src/giql/generators/base.py @@ -557,6 +557,42 @@ def _generate_distance_case( f"ELSE ({start_a} - {end_b}) END END" ) + def _predicate_operand( + self, expression: exp.Expression, arg: str, ctx_table: str | None + ) -> ResolvedColumn: + """Return the :class:`ResolvedColumn` for a spatial predicate operand. + + Reads the column resolution attached to *expression* by the + ``ResolveOperatorRefs`` pass (the metadata-driven path used by the full + transpile pipeline). When the pass did not annotate the node — e.g. a + generator invoked on a bare AST without running pass 1 — it falls back to + the generator's historical ``_current_table`` / alias-map resolution so + direct ``generate()`` callers keep their existing behavior. Both paths + format physical column references identically, so the emitted SQL is the + same regardless of which produced the :class:`ResolvedColumn`. + + :param expression: + The spatial predicate node carrying the resolution metadata. + :param arg: + The operand slot key (``"this"`` or ``"expression"``). + :param ctx_table: + The current-table resolution context for the fallback path — + ``self._current_table`` for a literal-range operand, ``None`` for a + column-to-column operand. + :return: + The resolved column metadata. + """ + resolution = expression.meta.get(META_KEY) + if isinstance(resolution, OperatorResolution): + resolved = resolution.column(arg) + if resolved is not None: + return resolved + + column_ref = self.sql(expression, arg) + chrom, start, end = self._get_column_refs(column_ref, ctx_table) + table = self._resolve_table(column_ref, ctx_table) + return ResolvedColumn(chrom=chrom, start=start, end=end, strand="", table=table) + def _generate_spatial_op(self, expression: exp.Binary, op_type: str) -> str: """Generate SQL for a spatial operation. @@ -567,18 +603,20 @@ def _generate_spatial_op(self, expression: exp.Binary, op_type: str) -> str: :return: SQL predicate string """ - left = self.sql(expression, "this") right_raw = self.sql(expression, "expression") # Check if right side is a column reference or a literal range string if "." in right_raw and not right_raw.startswith("'"): # Column-to-column join (e.g., a.interval INTERSECTS b.interval) - return self._generate_column_join(left, right_raw, op_type) + left = self._predicate_operand(expression, "this", None) + right = self._predicate_operand(expression, "expression", None) + return self._generate_column_join(left, right, op_type) else: # Literal range string (e.g., interval INTERSECTS 'chr1:1000-2000') try: range_str = right_raw.strip("'\"") parsed_range = RangeParser.parse(range_str).to_zero_based_half_open() + left = self._predicate_operand(expression, "this", self._current_table) return self._generate_range_predicate(left, parsed_range, op_type) except Exception as e: raise ValueError( @@ -587,14 +625,15 @@ def _generate_spatial_op(self, expression: exp.Binary, op_type: str) -> str: def _generate_range_predicate( self, - column_ref: str, + column: ResolvedColumn, parsed_range: ParsedRange, op_type: str, ) -> str: """Generate SQL predicate for a range operation. - :param column_ref: - Column reference (e.g., 'v.interval' or 'interval') + :param column: + Resolved column operand (physical chrom/start/end fragments plus the + backing :class:`~giql.table.Table` config for canonicalization). :param parsed_range: Parsed genomic range :param op_type: @@ -602,13 +641,12 @@ def _generate_range_predicate( :return: SQL predicate string """ - # Get column references - chrom_col, raw_start_col, raw_end_col = self._get_column_refs( - column_ref, self._current_table - ) - table = self._resolve_table(column_ref, self._current_table) - start_col = canonical_start(raw_start_col, table) - end_col = canonical_end(raw_end_col, table) + # Canonicalize the raw physical endpoints to 0-based half-open. The + # alias-qualified column fragments come pre-resolved on the + # ResolvedColumn; canonicalization stays here (epic #114 step #123). + chrom_col = column.chrom + start_col = canonical_start(column.start, column.table) + end_col = canonical_end(column.end, column.table) chrom = parsed_range.chromosome start = parsed_range.start @@ -648,28 +686,28 @@ def _generate_range_predicate( raise ValueError(f"Unknown operation: {op_type}") - def _generate_column_join(self, left_col: str, right_col: str, op_type: str) -> str: + def _generate_column_join( + self, left: ResolvedColumn, right: ResolvedColumn, op_type: str + ) -> str: """Generate SQL for column-to-column spatial joins. - :param left_col: - Left column reference (e.g., 'a.interval') - :param right_col: - Right column reference (e.g., 'b.interval') + :param left: + Resolved left column operand (e.g., for 'a.interval'). + :param right: + Resolved right column operand (e.g., for 'b.interval'). :param op_type: 'intersects', 'contains', or 'within' :return: SQL predicate string """ - # Get column references for both sides - # Pass None to let _get_column_refs extract and resolve table from column ref - l_chrom, raw_l_start, raw_l_end = self._get_column_refs(left_col, None) - r_chrom, raw_r_start, raw_r_end = self._get_column_refs(right_col, None) - l_table = self._resolve_table(left_col) - r_table = self._resolve_table(right_col) - l_start = canonical_start(raw_l_start, l_table) - l_end = canonical_end(raw_l_end, l_table) - r_start = canonical_start(raw_r_start, r_table) - r_end = canonical_end(raw_r_end, r_table) + # Canonicalize each side's raw physical endpoints; the alias-qualified + # chrom/start/end fragments come pre-resolved on the ResolvedColumns. + l_chrom = left.chrom + r_chrom = right.chrom + l_start = canonical_start(left.start, left.table) + l_end = canonical_end(left.end, left.table) + r_start = canonical_start(right.start, right.table) + r_end = canonical_end(right.end, right.table) if op_type == "intersects": # Ranges overlap if: chrom1 = chrom2 AND start1 < end2 AND end1 > start2 @@ -709,11 +747,15 @@ def _generate_spatial_set(self, expression: SpatialSetPredicate) -> str: :return: SQL predicate string """ - column_ref = self.sql(expression, "this") operator = expression.args["operator"] quantifier = expression.args["quantifier"] ranges = expression.args["ranges"] + # Resolve the (single) left column operand once; every range condition + # compares against the same column. The set predicate's ranges are + # always literals, so only this operand needs resolution. + column = self._predicate_operand(expression, "this", self._current_table) + # Parse all ranges parsed_ranges = [] for range_expr in ranges: @@ -726,7 +768,7 @@ def _generate_spatial_set(self, expression: SpatialSetPredicate) -> str: # Generate conditions for each range conditions = [] for parsed_range in parsed_ranges: - condition = self._generate_range_predicate(column_ref, parsed_range, op_type) + condition = self._generate_range_predicate(column, parsed_range, op_type) conditions.append(condition) # Combine with AND (for ALL) or OR (for ANY) diff --git a/src/giql/resolver.py b/src/giql/resolver.py index f7fffbc..2c6889a 100644 --- a/src/giql/resolver.py +++ b/src/giql/resolver.py @@ -50,8 +50,12 @@ step 3). DISTANCE's two *column* operands (``this`` / ``expression``) are resolved to a :class:`ResolvedColumn` and attached through the separate :attr:`OperatorResolution.columns` channel (epic #114, step 4). The spatial - predicates still declare their column / literal slots but defer resolution to - their port issue (#120), which reuses these resolved-metadata types. + predicates' column operands (:class:`~giql.expressions.Intersects` / + ``Contains`` / ``Within`` ``this`` and column-shaped ``expression``, and + ``SpatialSetPredicate`` ``this``) resolve onto the same + :class:`ResolvedColumn` type through that columns channel (epic #114, + step #120); a literal-range ``expression`` slot stays deferred and the emitter + parses it on its existing path. """ from __future__ import annotations @@ -337,9 +341,11 @@ class OperatorResolution: deferral the emitter can reclassify unaided. columns : dict[str, ResolvedColumn] Mapping from a *column* operand's arg key to its resolved - :class:`ResolvedColumn`. Carries DISTANCE's two interval operands; an - operand the pass could not resolve (a literal range, or an unqualified - column) is left out and the generator raises its existing error. + :class:`ResolvedColumn`. Carries DISTANCE's two interval operands and + the spatial predicates' column operands; an operand the pass could not + resolve (a literal range, or an unqualified column outside a + current-table context) is left out and the generator raises its existing + error. """ operator: str @@ -450,9 +456,8 @@ def _resolve_operator( deferrals["reference"] = deferral elif isinstance(node, GIQLDistance): columns = _resolve_distance_columns(node, tables) - # The spatial predicates declare only column / literal slots, whose - # resolution metadata is designed by their port issue (#120); the pass - # attaches an (empty-slot) resolution so every operator carries metadata. + elif isinstance(node, (Intersects, Contains, Within, SpatialSetPredicate)): + columns = _resolve_predicate_columns(node, tables) node.meta[META_KEY] = OperatorResolution( type(node).__name__, slots, deferrals, columns @@ -517,6 +522,129 @@ def _resolve_distance_columns( return columns +def _resolve_predicate_columns( + node: exp.Expression, tables: Tables +) -> dict[str, ResolvedColumn]: + """Resolve a spatial predicate's column operands to :class:`ResolvedColumn`. + + Mirrors the generator's historical ``_generate_spatial_op`` operand handling + exactly so the emitted SQL is byte-identical: + + * A literal-range predicate (``column INTERSECTS 'chr1:...'``) and every + ``SpatialSetPredicate`` resolve only their left ``this`` operand, against + the FROM-clause current table — matching ``_generate_range_predicate``, + which passes ``self._current_table`` as the resolution context. + * A column-to-column predicate (``a.interval CONTAINS b.interval``) resolves + both ``this`` and ``expression`` with *no* current-table context, so each + operand resolves through the alias map — matching ``_generate_column_join``, + which passes ``None`` as the context for both sides. + + Most column-to-column ``INTERSECTS`` joins never reach here: the + :class:`~giql.transformer.IntersectsBinnedJoinTransformer` rewrites them into + binned equi-joins before this pass runs, deleting the ``Intersects`` node. + Column-to-column ``CONTAINS`` / ``WITHIN`` (which that transformer leaves + untouched) and non-join ``INTERSECTS`` shapes still reach the emitter, so the + column-join branch is exercised. + + Reuses the shared ``_enclosing_alias_map`` (FROM/JOIN alias derivation) and + ``_physical_cols`` helpers; the predicate-specific bit lives in + :func:`_resolve_predicate_column`, whose current-table-vs-alias-only + precedence differs from DISTANCE's ``_resolve_column_operand``. + """ + alias_map, current_table = _enclosing_alias_map(node) + this_node = node.this + + if isinstance(node, SpatialSetPredicate): + if not isinstance(this_node, exp.Column): + return {} + return { + "this": _resolve_predicate_column( + this_node, current_table, alias_map, current_table, tables + ) + } + + right = node.args.get("expression") + if isinstance(right, exp.Column) and right.table: + # Column-to-column: resolve both operands with no current-table context, + # so each resolves through the alias map. + columns: dict[str, ResolvedColumn] = {} + if isinstance(this_node, exp.Column): + columns["this"] = _resolve_predicate_column( + this_node, None, alias_map, current_table, tables + ) + columns["expression"] = _resolve_predicate_column( + right, None, alias_map, current_table, tables + ) + return columns + + # Literal-range predicate: resolve only the left operand, anchored to the + # FROM-clause current table. + if not isinstance(this_node, exp.Column): + return {} + return { + "this": _resolve_predicate_column( + this_node, current_table, alias_map, current_table, tables + ) + } + + +def _resolve_predicate_column( + column: exp.Column, + ctx_table: str | None, + alias_map: dict[str, str], + current_table: str | None, + tables: Tables, +) -> ResolvedColumn: + """Resolve one spatial-predicate column operand to a :class:`ResolvedColumn`. + + Replicates the generator's ``_get_column_refs`` / ``_resolve_table`` (and the + ``_resolve_table_name`` precedence underneath them) exactly: + + * the operand's table qualifier is kept verbatim for output formatting; + * the *config* table name is resolved by precedence — an explicit + ``ctx_table`` (the caller-supplied current table) wins; otherwise a + qualified operand resolves through the alias map (falling back to + ``current_table``), and an unqualified operand resolves to no table; + * physical column names come from the resolved :class:`~giql.table.Table` + config via the shared ``_physical_cols`` helper, or the canonical defaults + when no table backs the operand. + + This is the minimal predicate-specific layer DISTANCE's + ``_resolve_column_operand`` cannot serve: it formats unqualified operands + (bare ``interval``) and lets the literal-range path anchor to ``ctx_table`` + directly, where ``_resolve_column_operand`` requires a qualifier and always + routes through the alias map. + """ + alias = column.table or "" + qualified = bool(alias) + + if ctx_table: + table_name: str | None = ctx_table + elif qualified: + table_name = alias_map.get(alias, current_table) + else: + table_name = None + + table = tables.get(table_name) if table_name else None + chrom_col, start_col, end_col, strand_col = _physical_cols(table) + + if qualified: + return ResolvedColumn( + chrom=f'{alias}."{chrom_col}"', + start=f'{alias}."{start_col}"', + end=f'{alias}."{end_col}"', + strand=f'{alias}."{strand_col}"', + table=table, + ) + return ResolvedColumn( + chrom=f'"{chrom_col}"', + start=f'"{start_col}"', + end=f'"{end_col}"', + strand=f'"{strand_col}"', + table=table, + ) + + def _resolve_column_operand( operand: exp.Expression | None, tables: Tables, diff --git a/tests/test_resolver.py b/tests/test_resolver.py index 9f7b85a..c92afae 100644 --- a/tests/test_resolver.py +++ b/tests/test_resolver.py @@ -11,10 +11,13 @@ from sqlglot import parse_one from giql.dialect import GIQLDialect +from giql.expressions import Contains from giql.expressions import GIQLDisjoin from giql.expressions import GIQLDistance from giql.expressions import GIQLNearest +from giql.expressions import Intersects from giql.expressions import SpatialSetPredicate +from giql.expressions import Within from giql.resolver import META_KEY from giql.resolver import OperatorResolution from giql.resolver import ResolutionError @@ -59,6 +62,11 @@ def _distance_node(ast: exp.Expression) -> GIQLDistance: return next(n for n in ast.walk() if isinstance(n, GIQLDistance)) +def _predicate_node(ast: exp.Expression, node_type) -> exp.Expression: + """Return the single predicate node of *node_type* reachable from the AST.""" + return next(n for n in ast.walk() if isinstance(n, node_type)) + + class TestResolveOperatorRefs: """Tests for the resolve_operator_refs pass.""" @@ -893,6 +901,223 @@ def test_resolve_distance_columns_pass_validates(self): validate_operator_refs(ast) +class TestResolvePredicateColumns: + """Tests for spatial-predicate column-operand resolution.""" + + def test_resolve_aliased_literal_predicate_qualifies_columns(self): + """Test that an aliased literal-range predicate qualifies its column. + + Given: + A literal-range INTERSECTS over an aliased FROM table. + When: + Running the resolve pass. + Then: + It should resolve the left operand to alias-qualified physical + columns carrying the table config, and attach no expression column. + """ + # Arrange + ast = parse_one( + "SELECT * FROM variants AS v WHERE v.interval INTERSECTS 'chr1:1000-2000'", + dialect=GIQLDialect, + ) + + # Act + resolve_operator_refs(ast, _tables("variants")) + + # Assert + node = _predicate_node(ast, Intersects) + column = node.meta[META_KEY].column("this") + assert column == ResolvedColumn( + chrom='v."chrom"', + start='v."start"', + end='v."end"', + strand='v."strand"', + table=_tables("variants").get("variants"), + ) + assert node.meta[META_KEY].column("expression") is None + + def test_resolve_undotted_predicate_uses_current_table(self): + """Test that an undotted predicate operand resolves to the FROM table. + + Given: + A literal-range CONTAINS whose left operand is the bare genomic + pseudo-column over a single-table FROM clause. + When: + Running the resolve pass. + Then: + It should resolve the operand to unqualified physical columns backed + by the FROM-clause table config. + """ + # Arrange + ast = parse_one( + "SELECT * FROM peaks WHERE interval CONTAINS 'chr1:1500'", + dialect=GIQLDialect, + ) + + # Act + resolve_operator_refs(ast, _tables("peaks")) + + # Assert + column = _predicate_node(ast, Contains).meta[META_KEY].column("this") + assert column.chrom == '"chrom"' + assert column.start == '"start"' + assert column.end == '"end"' + assert column.table.name == "peaks" + + def test_resolve_custom_columns_in_predicate(self): + """Test that a predicate operand carries the table's custom column names. + + Given: + A literal-range WITHIN over a table configured with custom genomic + column names. + When: + Running the resolve pass. + Then: + It should resolve the operand fragments to the custom physical + columns. + """ + # Arrange + tables = Tables() + tables.register( + "peaks", + Table( + "peaks", + chrom_col="chromosome", + start_col="start_pos", + end_col="end_pos", + strand_col="sense", + ), + ) + ast = parse_one( + "SELECT * FROM peaks WHERE interval WITHIN 'chr1:1000-2000'", + dialect=GIQLDialect, + ) + + # Act + resolve_operator_refs(ast, tables) + + # Assert + column = _predicate_node(ast, Within).meta[META_KEY].column("this") + assert column.chrom == '"chromosome"' + assert column.start == '"start_pos"' + assert column.end == '"end_pos"' + assert column.strand == '"sense"' + + def test_resolve_column_to_column_predicate_resolves_both_operands(self): + """Test that a column-to-column predicate resolves both operands. + + Given: + A column-to-column WITHIN joining two aliased tables (a shape the + binned-join transformer leaves untouched, so it reaches the pass). + When: + Running the resolve pass. + Then: + It should resolve both operands through the alias map to their + respective table configs. + """ + # Arrange + ast = parse_one( + "SELECT a.name, b.name FROM intervals_a a, intervals_b b " + "WHERE a.interval WITHIN b.interval", + dialect=GIQLDialect, + ) + + # Act + resolve_operator_refs(ast, _tables("intervals_a", "intervals_b")) + + # Assert + resolution = _predicate_node(ast, Within).meta[META_KEY] + this_col = resolution.column("this") + expr_col = resolution.column("expression") + assert this_col.chrom == 'a."chrom"' + assert this_col.table.name == "intervals_a" + assert expr_col.chrom == 'b."chrom"' + assert expr_col.table.name == "intervals_b" + + def test_resolve_set_predicate_resolves_left_operand(self): + """Test that a set predicate resolves only its left column operand. + + Given: + An INTERSECTS ANY set predicate over a single FROM table. + When: + Running the resolve pass. + Then: + It should resolve the left operand to the FROM table's columns and + attach no per-range column metadata. + """ + # Arrange + ast = parse_one( + "SELECT * FROM peaks " + "WHERE interval INTERSECTS ANY('chr1:1000-2000', 'chr2:500-1000')", + dialect=GIQLDialect, + ) + + # Act + resolve_operator_refs(ast, _tables("peaks")) + + # Assert + resolution = _predicate_node(ast, SpatialSetPredicate).meta[META_KEY] + assert resolution.column("this").chrom == '"chrom"' + assert set(resolution.columns) == {"this"} + + def test_resolve_predicate_over_unregistered_table_uses_defaults(self): + """Test that a predicate over an unregistered table defers to defaults. + + Given: + A literal-range INTERSECTS over a FROM table that is not registered. + When: + Running the resolve pass with no table configs. + Then: + It should resolve the operand to canonical default columns with no + backing table config. + """ + # Arrange + ast = parse_one( + "SELECT * FROM peaks WHERE interval INTERSECTS 'chr1:1000-2000'", + dialect=GIQLDialect, + ) + + # Act + resolve_operator_refs(ast, Tables()) + + # Assert + column = _predicate_node(ast, Intersects).meta[META_KEY].column("this") + assert column == ResolvedColumn( + chrom='"chrom"', + start='"start"', + end='"end"', + strand='"strand"', + table=None, + ) + + def test_resolve_predicate_without_base_from_table_defers(self): + """Test that an undotted predicate with no base FROM table resolves to defaults. + + Given: + A literal-range INTERSECTS in a SELECT whose FROM clause is a derived + table, so the enclosing scope exposes no base FROM table. + When: + Running the resolve pass. + Then: + It should resolve the undotted operand to unqualified default + columns with no backing table config. + """ + # Arrange + ast = parse_one( + "SELECT * FROM (SELECT * FROM peaks) AS d " + "WHERE interval INTERSECTS 'chr1:1000-2000'", + dialect=GIQLDialect, + ) + + # Act + resolve_operator_refs(ast, _tables("peaks")) + + # Assert + column = _predicate_node(ast, Intersects).meta[META_KEY].column("this") + assert column.chrom == '"chrom"' + assert column.table is None + + class TestValidateOperatorRefs: """Tests for the validate_operator_refs validation boundary."""