From 85e699c8cfa5e999e59709c87ec8bca1109b2b8a Mon Sep 17 00:00:00 2001 From: chon3806 <93464148+chon3806@users.noreply.github.com> Date: Sat, 25 Apr 2026 12:57:54 -0400 Subject: [PATCH 1/2] fix(agents): return tables from all databases in get_usable_table_names The result_tables list was being reset on every iteration of the databases loop, so when an agent was configured with tables from multiple data sources (e.g. data.tables = ['db1.*', 'db2.*']), only the tables from the last iterated database were returned. Move the reset out of the loop so all matching tables across all databases are accumulated. --- .../interfaces/agents/utils/sql_toolkit.py | 1 - .../interfaces/agents/test_sql_toolkit.py | 80 +++++++++++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 tests/unit/interfaces/agents/test_sql_toolkit.py diff --git a/mindsdb/interfaces/agents/utils/sql_toolkit.py b/mindsdb/interfaces/agents/utils/sql_toolkit.py index 9588ad6ecba..f1c09ea795c 100644 --- a/mindsdb/interfaces/agents/utils/sql_toolkit.py +++ b/mindsdb/interfaces/agents/utils/sql_toolkit.py @@ -318,7 +318,6 @@ def get_usable_table_names(self, lazy=True): if self._cache: self._cache.set(cache_key, list_tables) - result_tables = [] for row in list_tables: if row.get("schema") is not None: parts = [db_name, row["schema"], row["name"]] diff --git a/tests/unit/interfaces/agents/test_sql_toolkit.py b/tests/unit/interfaces/agents/test_sql_toolkit.py new file mode 100644 index 00000000000..d3bfb69ca68 --- /dev/null +++ b/tests/unit/interfaces/agents/test_sql_toolkit.py @@ -0,0 +1,80 @@ +from unittest.mock import Mock + +import pandas as pd + +from mindsdb.interfaces.agents.utils.sql_toolkit import ( + MindsDBQuery, + TablesCollection, +) + + +def _make_query(tables): + """Build a MindsDBQuery without invoking SessionController/DB.""" + query = MindsDBQuery.__new__(MindsDBQuery) + query.tables = TablesCollection(tables) + query.knowledge_bases = TablesCollection([]) + query.command_executor = Mock() + query._cache = None + return query + + +def _make_handler(table_names): + """Mock data handler whose `get_tables` returns a TableResponse-like object.""" + handler = Mock() + df = pd.DataFrame({"table_name": table_names}) + response = Mock() + response.data_frame = df + handler.get_tables.return_value = response + return handler + + +class TestGetUsableTableNames: + def test_single_database_wildcard(self): + query = _make_query(["db1.*"]) + handler = _make_handler(["t1", "t2"]) + query.command_executor.session.integration_controller.get_data_handler.return_value = handler + + result = query.get_usable_table_names(lazy=False) + + names = [tuple(ident.parts) for ident in result] + assert ("db1", "t1") in names + assert ("db1", "t2") in names + assert len(names) == 2 + + def test_multi_database_wildcard_returns_all(self): + """Regression test: tables from all databases should be returned, + not just the last one iterated.""" + query = _make_query(["db1.*", "db2.*"]) + + handlers = { + "db1": _make_handler(["a", "b"]), + "db2": _make_handler(["c", "d", "e"]), + } + + def get_handler(name): + return handlers[name] + + query.command_executor.session.integration_controller.get_data_handler.side_effect = get_handler + + result = query.get_usable_table_names(lazy=False) + + names = {tuple(ident.parts) for ident in result} + assert names == { + ("db1", "a"), + ("db1", "b"), + ("db2", "c"), + ("db2", "d"), + ("db2", "e"), + } + + def test_lazy_returns_items_as_is(self): + query = _make_query(["db1.users", "db2.orders"]) + + result = query.get_usable_table_names(lazy=True) + + names = {tuple(ident.parts) for ident in result} + assert names == {("db1", "users"), ("db2", "orders")} + + def test_no_tables_returns_empty(self): + query = _make_query([]) + assert query.get_usable_table_names(lazy=False) == [] From 387cc203543aa056c13d05f008a89255e759c91c Mon Sep 17 00:00:00 2001 From: chon3806 <93464148+chon3806@users.noreply.github.com> Date: Sat, 25 Apr 2026 13:01:44 -0400 Subject: [PATCH 2/2] test(agents): cover mixed wildcard + specific table case Adds an additional regression test proving that with tables=['db1.users', 'db2.*'] the match() filter still correctly restricts db1 to the listed table while expanding db2 fully. --- .../interfaces/agents/test_sql_toolkit.py | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/unit/interfaces/agents/test_sql_toolkit.py b/tests/unit/interfaces/agents/test_sql_toolkit.py index d3bfb69ca68..4f0caff7035 100644 --- a/tests/unit/interfaces/agents/test_sql_toolkit.py +++ b/tests/unit/interfaces/agents/test_sql_toolkit.py @@ -78,3 +78,26 @@ def test_lazy_returns_items_as_is(self): def test_no_tables_returns_empty(self): query = _make_query([]) assert query.get_usable_table_names(lazy=False) == [] + + def test_mixed_wildcard_and_specific_table(self): + """Wildcard databases should expand fully, while specific entries + should still be filtered down to the exact table that matches.""" + query = _make_query(["db1.users", "db2.*"]) + + handlers = { + "db1": _make_handler(["users", "orders", "invoices"]), + "db2": _make_handler(["a", "b"]), + } + query.command_executor.session.integration_controller.get_data_handler.side_effect = lambda name: handlers[name] + + result = query.get_usable_table_names(lazy=False) + + names = {tuple(ident.parts) for ident in result} + assert names == { + ("db1", "users"), + ("db2", "a"), + ("db2", "b"), + } + # Tables present in db1 but not listed in `data.tables` must NOT leak through. + assert ("db1", "orders") not in names + assert ("db1", "invoices") not in names