diff --git a/src/etchdb/db.py b/src/etchdb/db.py index 43fd16b..02a957a 100644 --- a/src/etchdb/db.py +++ b/src/etchdb/db.py @@ -255,9 +255,11 @@ async def iter_rows_keyset( - It must be a single column (composite-PK keyset uses `(a, b) > (last_a, last_b)` and isn't supported here). - - It must be monotonic-ordered and unique enough that no two - rows tie. Primary keys usually qualify; created_at columns - can if the resolution is high enough. + - It must be NOT NULL and unique enough that no two rows + tie. Primary keys usually qualify; created_at columns can + if the resolution is high enough. A NULL at a page + boundary stalls the cursor (WHERE by > NULL is false), + so we raise rather than loop forever. Defaults to `model.__pk__[0]`. Filters are AND'd with the cursor. Ordering is ascending; descending is not supported. @@ -287,6 +289,12 @@ async def iter_rows_keyset( rows = await self._adapter.fetch(q.sql, *q.params) if not rows: return + if len(rows) == batch_size and rows[-1][by] is None: + raise ValueError( + f"iter_rows_keyset: {by!r} is NULL at the page boundary; " + f"the cursor cannot advance. Pick a non-nullable column " + f"for `by`, or filter NULLs out via raw SQL." + ) for row in rows: yield model(**row) if len(rows) < batch_size: diff --git a/tests/integration/test_iter_rows_keyset.py b/tests/integration/test_iter_rows_keyset.py index 67f9f7f..6901430 100644 --- a/tests/integration/test_iter_rows_keyset.py +++ b/tests/integration/test_iter_rows_keyset.py @@ -91,3 +91,20 @@ async def test_iter_rows_keyset_rejects_non_db_column(db: DB): with pytest.raises(ValueError, match="not a DB column"): async for _ in db.iter_rows_keyset(User, by="nonexistent_col"): pass + + +async def test_iter_rows_keyset_raises_on_null_page_boundary(db: DB): + """NULL at a full-page boundary stalls the cursor (WHERE by > + NULL is false), so the loop would re-fetch the same page forever. + Raise instead. Three NULL emails + one non-NULL + batch_size=2 + reproduces on both SQLite (NULLs first) and Postgres (NULLs + last), since either way a NULL row lands at the end of a full + non-final page.""" + await db.insert(User(id=1, name="u1", email=None)) + await db.insert(User(id=2, name="u2", email=None)) + await db.insert(User(id=3, name="u3", email=None)) + await db.insert(User(id=4, name="u4", email="a@x")) + + with pytest.raises(ValueError, match="NULL at the page boundary"): + async for _ in db.iter_rows_keyset(User, by="email", batch_size=2): + pass