From 3ebb291b6243ae9a672691e0e688e7bc9c50a276 Mon Sep 17 00:00:00 2001
From: Rafael Benchimol Klausner <rafa.bench@gmail.com>
Date: Sat, 2 May 2026 17:04:26 -0300
Subject: [PATCH] fix: ORDER BY id in _get_scalar_relation_map (#23)

Without ORDER BY, SQLite was free to pick an index-driven scan when a
covering index existed on the FK column, traversing rows in FK-value
order and producing a shuffled relation vector. Every other read in
this file already orders by id; this one was the outlier.

Closes #23

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/read.jl | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/read.jl b/src/read.jl
index 9b9a594..28966f5 100644
--- a/src/read.jl
+++ b/src/read.jl
@@ -591,7 +591,11 @@ function _get_scalar_relation_map(
     )
     attribute = _get_attribute(db, collection_from, attribute_on_collection_from)
 
-    query = "SELECT $(attribute.id) FROM $(attribute.table_where_is_located)"
+    # ORDER BY id so the result aligns with source rows in id order. Without
+    # it, SQLite is free to pick an index-driven scan when a covering index
+    # exists on the FK column, which traverses rows in FK-value order and
+    # shuffles the returned vector. See psrenergy/PSRDatabase.jl#23.
+    query = "SELECT $(attribute.id) FROM $(attribute.table_where_is_located) ORDER BY id"
     df = DBInterface.execute(db.sqlite_db, query) |> DataFrame
     results = df[!, 1]
     num_results = length(results)