Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions src/etchdb/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,9 +255,11 @@ async def iter_rows_keyset(

- It must be a single column (composite-PK keyset uses
`(a, b) > (last_a, last_b)` and isn't supported here).
- It must be monotonic-ordered and unique enough that no two
rows tie. Primary keys usually qualify; created_at columns
can if the resolution is high enough.
- It must be NOT NULL and unique enough that no two rows
tie. Primary keys usually qualify; created_at columns can
if the resolution is high enough. A NULL at a page
boundary stalls the cursor (WHERE by > NULL is false),
so we raise rather than loop forever.

Defaults to `model.__pk__[0]`. Filters are AND'd with the
cursor. Ordering is ascending; descending is not supported.
Expand Down Expand Up @@ -287,6 +289,12 @@ async def iter_rows_keyset(
rows = await self._adapter.fetch(q.sql, *q.params)
if not rows:
return
if len(rows) == batch_size and rows[-1][by] is None:
raise ValueError(
f"iter_rows_keyset: {by!r} is NULL at the page boundary; "
f"the cursor cannot advance. Pick a non-nullable column "
f"for `by`, or filter NULLs out via raw SQL."
)
for row in rows:
yield model(**row)
if len(rows) < batch_size:
Expand Down
17 changes: 17 additions & 0 deletions tests/integration/test_iter_rows_keyset.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,20 @@ async def test_iter_rows_keyset_rejects_non_db_column(db: DB):
with pytest.raises(ValueError, match="not a DB column"):
async for _ in db.iter_rows_keyset(User, by="nonexistent_col"):
pass


async def test_iter_rows_keyset_raises_on_null_page_boundary(db: DB):
"""NULL at a full-page boundary stalls the cursor (WHERE by >
NULL is false), so the loop would re-fetch the same page forever.
Raise instead. Three NULL emails + one non-NULL + batch_size=2
reproduces on both SQLite (NULLs first) and Postgres (NULLs
last), since either way a NULL row lands at the end of a full
non-final page."""
await db.insert(User(id=1, name="u1", email=None))
await db.insert(User(id=2, name="u2", email=None))
await db.insert(User(id=3, name="u3", email=None))
await db.insert(User(id=4, name="u4", email="a@x"))

with pytest.raises(ValueError, match="NULL at the page boundary"):
async for _ in db.iter_rows_keyset(User, by="email", batch_size=2):
pass
Loading