Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 71 additions & 29 deletions src/giql/generators/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,42 @@ def _generate_distance_case(
f"ELSE ({start_a} - {end_b}) END END"
)

def _predicate_operand(
self, expression: exp.Expression, arg: str, ctx_table: str | None
) -> ResolvedColumn:
"""Return the :class:`ResolvedColumn` for a spatial predicate operand.

Reads the column resolution attached to *expression* by the
``ResolveOperatorRefs`` pass (the metadata-driven path used by the full
transpile pipeline). When the pass did not annotate the node — e.g. a
generator invoked on a bare AST without running pass 1 — it falls back to
the generator's historical ``_current_table`` / alias-map resolution so
direct ``generate()`` callers keep their existing behavior. Both paths
format physical column references identically, so the emitted SQL is the
same regardless of which produced the :class:`ResolvedColumn`.

:param expression:
The spatial predicate node carrying the resolution metadata.
:param arg:
The operand slot key (``"this"`` or ``"expression"``).
:param ctx_table:
The current-table resolution context for the fallback path —
``self._current_table`` for a literal-range operand, ``None`` for a
column-to-column operand.
:return:
The resolved column metadata.
"""
resolution = expression.meta.get(META_KEY)
if isinstance(resolution, OperatorResolution):
resolved = resolution.column(arg)
if resolved is not None:
return resolved

column_ref = self.sql(expression, arg)
chrom, start, end = self._get_column_refs(column_ref, ctx_table)
table = self._resolve_table(column_ref, ctx_table)
return ResolvedColumn(chrom=chrom, start=start, end=end, strand="", table=table)

def _generate_spatial_op(self, expression: exp.Binary, op_type: str) -> str:
"""Generate SQL for a spatial operation.

Expand All @@ -567,18 +603,20 @@ def _generate_spatial_op(self, expression: exp.Binary, op_type: str) -> str:
:return:
SQL predicate string
"""
left = self.sql(expression, "this")
right_raw = self.sql(expression, "expression")

# Check if right side is a column reference or a literal range string
if "." in right_raw and not right_raw.startswith("'"):
# Column-to-column join (e.g., a.interval INTERSECTS b.interval)
return self._generate_column_join(left, right_raw, op_type)
left = self._predicate_operand(expression, "this", None)
right = self._predicate_operand(expression, "expression", None)
return self._generate_column_join(left, right, op_type)
else:
# Literal range string (e.g., interval INTERSECTS 'chr1:1000-2000')
try:
range_str = right_raw.strip("'\"")
parsed_range = RangeParser.parse(range_str).to_zero_based_half_open()
left = self._predicate_operand(expression, "this", self._current_table)
return self._generate_range_predicate(left, parsed_range, op_type)
except Exception as e:
raise ValueError(
Expand All @@ -587,28 +625,28 @@ def _generate_spatial_op(self, expression: exp.Binary, op_type: str) -> str:

def _generate_range_predicate(
self,
column_ref: str,
column: ResolvedColumn,
parsed_range: ParsedRange,
op_type: str,
) -> str:
"""Generate SQL predicate for a range operation.

:param column_ref:
Column reference (e.g., 'v.interval' or 'interval')
:param column:
Resolved column operand (physical chrom/start/end fragments plus the
backing :class:`~giql.table.Table` config for canonicalization).
:param parsed_range:
Parsed genomic range
:param op_type:
'intersects', 'contains', or 'within'
:return:
SQL predicate string
"""
# Get column references
chrom_col, raw_start_col, raw_end_col = self._get_column_refs(
column_ref, self._current_table
)
table = self._resolve_table(column_ref, self._current_table)
start_col = canonical_start(raw_start_col, table)
end_col = canonical_end(raw_end_col, table)
# Canonicalize the raw physical endpoints to 0-based half-open. The
# alias-qualified column fragments come pre-resolved on the
# ResolvedColumn; canonicalization stays here (epic #114 step #123).
chrom_col = column.chrom
start_col = canonical_start(column.start, column.table)
end_col = canonical_end(column.end, column.table)

chrom = parsed_range.chromosome
start = parsed_range.start
Expand Down Expand Up @@ -648,28 +686,28 @@ def _generate_range_predicate(

raise ValueError(f"Unknown operation: {op_type}")

def _generate_column_join(self, left_col: str, right_col: str, op_type: str) -> str:
def _generate_column_join(
self, left: ResolvedColumn, right: ResolvedColumn, op_type: str
) -> str:
"""Generate SQL for column-to-column spatial joins.

:param left_col:
Left column reference (e.g., 'a.interval')
:param right_col:
Right column reference (e.g., 'b.interval')
:param left:
Resolved left column operand (e.g., for 'a.interval').
:param right:
Resolved right column operand (e.g., for 'b.interval').
:param op_type:
'intersects', 'contains', or 'within'
:return:
SQL predicate string
"""
# Get column references for both sides
# Pass None to let _get_column_refs extract and resolve table from column ref
l_chrom, raw_l_start, raw_l_end = self._get_column_refs(left_col, None)
r_chrom, raw_r_start, raw_r_end = self._get_column_refs(right_col, None)
l_table = self._resolve_table(left_col)
r_table = self._resolve_table(right_col)
l_start = canonical_start(raw_l_start, l_table)
l_end = canonical_end(raw_l_end, l_table)
r_start = canonical_start(raw_r_start, r_table)
r_end = canonical_end(raw_r_end, r_table)
# Canonicalize each side's raw physical endpoints; the alias-qualified
# chrom/start/end fragments come pre-resolved on the ResolvedColumns.
l_chrom = left.chrom
r_chrom = right.chrom
l_start = canonical_start(left.start, left.table)
l_end = canonical_end(left.end, left.table)
r_start = canonical_start(right.start, right.table)
r_end = canonical_end(right.end, right.table)

if op_type == "intersects":
# Ranges overlap if: chrom1 = chrom2 AND start1 < end2 AND end1 > start2
Expand Down Expand Up @@ -709,11 +747,15 @@ def _generate_spatial_set(self, expression: SpatialSetPredicate) -> str:
:return:
SQL predicate string
"""
column_ref = self.sql(expression, "this")
operator = expression.args["operator"]
quantifier = expression.args["quantifier"]
ranges = expression.args["ranges"]

# Resolve the (single) left column operand once; every range condition
# compares against the same column. The set predicate's ranges are
# always literals, so only this operand needs resolution.
column = self._predicate_operand(expression, "this", self._current_table)

# Parse all ranges
parsed_ranges = []
for range_expr in ranges:
Expand All @@ -726,7 +768,7 @@ def _generate_spatial_set(self, expression: SpatialSetPredicate) -> str:
# Generate conditions for each range
conditions = []
for parsed_range in parsed_ranges:
condition = self._generate_range_predicate(column_ref, parsed_range, op_type)
condition = self._generate_range_predicate(column, parsed_range, op_type)
conditions.append(condition)

# Combine with AND (for ALL) or OR (for ANY)
Expand Down
144 changes: 136 additions & 8 deletions src/giql/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,12 @@
step 3). DISTANCE's two *column* operands (``this`` / ``expression``) are
resolved to a :class:`ResolvedColumn` and attached through the separate
:attr:`OperatorResolution.columns` channel (epic #114, step 4). The spatial
predicates still declare their column / literal slots but defer resolution to
their port issue (#120), which reuses these resolved-metadata types.
predicates' column operands (:class:`~giql.expressions.Intersects` /
``Contains`` / ``Within`` ``this`` and column-shaped ``expression``, and
``SpatialSetPredicate`` ``this``) resolve onto the same
:class:`ResolvedColumn` type through that columns channel (epic #114,
step #120); a literal-range ``expression`` slot stays deferred and the emitter
parses it on its existing path.
"""

from __future__ import annotations
Expand Down Expand Up @@ -337,9 +341,11 @@ class OperatorResolution:
deferral the emitter can reclassify unaided.
columns : dict[str, ResolvedColumn]
Mapping from a *column* operand's arg key to its resolved
:class:`ResolvedColumn`. Carries DISTANCE's two interval operands; an
operand the pass could not resolve (a literal range, or an unqualified
column) is left out and the generator raises its existing error.
:class:`ResolvedColumn`. Carries DISTANCE's two interval operands and
the spatial predicates' column operands; an operand the pass could not
resolve (a literal range, or an unqualified column outside a
current-table context) is left out and the generator raises its existing
error.
"""

operator: str
Expand Down Expand Up @@ -450,9 +456,8 @@ def _resolve_operator(
deferrals["reference"] = deferral
elif isinstance(node, GIQLDistance):
columns = _resolve_distance_columns(node, tables)
# The spatial predicates declare only column / literal slots, whose
# resolution metadata is designed by their port issue (#120); the pass
# attaches an (empty-slot) resolution so every operator carries metadata.
elif isinstance(node, (Intersects, Contains, Within, SpatialSetPredicate)):
columns = _resolve_predicate_columns(node, tables)

node.meta[META_KEY] = OperatorResolution(
type(node).__name__, slots, deferrals, columns
Expand Down Expand Up @@ -517,6 +522,129 @@ def _resolve_distance_columns(
return columns


def _resolve_predicate_columns(
node: exp.Expression, tables: Tables
) -> dict[str, ResolvedColumn]:
"""Resolve a spatial predicate's column operands to :class:`ResolvedColumn`.

Mirrors the generator's historical ``_generate_spatial_op`` operand handling
exactly so the emitted SQL is byte-identical:

* A literal-range predicate (``column INTERSECTS 'chr1:...'``) and every
``SpatialSetPredicate`` resolve only their left ``this`` operand, against
the FROM-clause current table — matching ``_generate_range_predicate``,
which passes ``self._current_table`` as the resolution context.
* A column-to-column predicate (``a.interval CONTAINS b.interval``) resolves
both ``this`` and ``expression`` with *no* current-table context, so each
operand resolves through the alias map — matching ``_generate_column_join``,
which passes ``None`` as the context for both sides.

Most column-to-column ``INTERSECTS`` joins never reach here: the
:class:`~giql.transformer.IntersectsBinnedJoinTransformer` rewrites them into
binned equi-joins before this pass runs, deleting the ``Intersects`` node.
Column-to-column ``CONTAINS`` / ``WITHIN`` (which that transformer leaves
untouched) and non-join ``INTERSECTS`` shapes still reach the emitter, so the
column-join branch is exercised.

Reuses the shared ``_enclosing_alias_map`` (FROM/JOIN alias derivation) and
``_physical_cols`` helpers; the predicate-specific bit lives in
:func:`_resolve_predicate_column`, whose current-table-vs-alias-only
precedence differs from DISTANCE's ``_resolve_column_operand``.
"""
alias_map, current_table = _enclosing_alias_map(node)
this_node = node.this

if isinstance(node, SpatialSetPredicate):
if not isinstance(this_node, exp.Column):
return {}
return {
"this": _resolve_predicate_column(
this_node, current_table, alias_map, current_table, tables
)
}

right = node.args.get("expression")
if isinstance(right, exp.Column) and right.table:
# Column-to-column: resolve both operands with no current-table context,
# so each resolves through the alias map.
columns: dict[str, ResolvedColumn] = {}
if isinstance(this_node, exp.Column):
columns["this"] = _resolve_predicate_column(
this_node, None, alias_map, current_table, tables
)
columns["expression"] = _resolve_predicate_column(
right, None, alias_map, current_table, tables
)
return columns

# Literal-range predicate: resolve only the left operand, anchored to the
# FROM-clause current table.
if not isinstance(this_node, exp.Column):
return {}
return {
"this": _resolve_predicate_column(
this_node, current_table, alias_map, current_table, tables
)
}


def _resolve_predicate_column(
column: exp.Column,
ctx_table: str | None,
alias_map: dict[str, str],
current_table: str | None,
tables: Tables,
) -> ResolvedColumn:
"""Resolve one spatial-predicate column operand to a :class:`ResolvedColumn`.

Replicates the generator's ``_get_column_refs`` / ``_resolve_table`` (and the
``_resolve_table_name`` precedence underneath them) exactly:

* the operand's table qualifier is kept verbatim for output formatting;
* the *config* table name is resolved by precedence — an explicit
``ctx_table`` (the caller-supplied current table) wins; otherwise a
qualified operand resolves through the alias map (falling back to
``current_table``), and an unqualified operand resolves to no table;
* physical column names come from the resolved :class:`~giql.table.Table`
config via the shared ``_physical_cols`` helper, or the canonical defaults
when no table backs the operand.

This is the minimal predicate-specific layer DISTANCE's
``_resolve_column_operand`` cannot serve: it formats unqualified operands
(bare ``interval``) and lets the literal-range path anchor to ``ctx_table``
directly, where ``_resolve_column_operand`` requires a qualifier and always
routes through the alias map.
"""
alias = column.table or ""
qualified = bool(alias)

if ctx_table:
table_name: str | None = ctx_table
elif qualified:
table_name = alias_map.get(alias, current_table)
else:
table_name = None

table = tables.get(table_name) if table_name else None
chrom_col, start_col, end_col, strand_col = _physical_cols(table)

if qualified:
return ResolvedColumn(
chrom=f'{alias}."{chrom_col}"',
start=f'{alias}."{start_col}"',
end=f'{alias}."{end_col}"',
strand=f'{alias}."{strand_col}"',
table=table,
)
return ResolvedColumn(
chrom=f'"{chrom_col}"',
start=f'"{start_col}"',
end=f'"{end_col}"',
strand=f'"{strand_col}"',
table=table,
)


def _resolve_column_operand(
operand: exp.Expression | None,
tables: Tables,
Expand Down
Loading
Loading