From 8f0fe23c3a99090ef32154fbdb17c58269184868 Mon Sep 17 00:00:00 2001 From: Brandon Zarnitz Date: Wed, 6 May 2026 09:02:37 -0400 Subject: [PATCH 1/2] fix(mssql): expose real schemas in INFORMATION_SCHEMA.TABLES and fix preview query (#12242) MindsDB's information_schema layer always overwrites TABLE_SCHEMA with the datasource name, which previously caused the real SQL schema names (dbo, app, usr, etc.) to be completely invisible when querying INFORMATION_SCHEMA.TABLES against a MSSQL datasource. The UI table-preview tooltip generated an invalid two-part query (.) instead of the required three-part form (..
). Changes: * mssql_handler.get_tables() now accepts an `all` flag (matching the postgres / databricks pattern used by tree.py for the Explorer UI): - all=True (Explorer mode): returns raw table_schema + table_name columns so the UI can group tables under their schema nodes. - all=False (default, used by INFORMATION_SCHEMA.TABLES): qualifies table_name as ".
" so the full three-part name is preserved even after the system-level TABLE_SCHEMA override. Non-user schemas (sys, guest, fixed database roles) are filtered out in both modes. - self.schema configured: always filters to that single schema with plain unqualified names (original behavior preserved). * mssql_handler.get_columns() now accepts an optional schema_name parameter (consistent with the postgres handler) and automatically extracts the schema from a qualified ".
" table_name produced by get_tables(), so column lookups remain precise when no explicit schema is configured. * Tests: three new test methods cover get_tables(all=False), get_tables(all=True), get_tables with a configured schema, get_columns with a qualified table name, and get_columns with an explicit schema_name argument. Fixes #12242 Co-Authored-By: Claude Sonnet 4.6 --- .../handlers/mssql_handler/mssql_handler.py | 92 ++++++++++++++++--- tests/unit/handlers/test_mssql.py | 90 +++++++++++++++++- 2 files changed, 169 insertions(+), 13 deletions(-) diff --git a/mindsdb/integrations/handlers/mssql_handler/mssql_handler.py b/mindsdb/integrations/handlers/mssql_handler/mssql_handler.py index 7b6e42fff34..0ef36fb4e85 100644 --- a/mindsdb/integrations/handlers/mssql_handler/mssql_handler.py +++ b/mindsdb/integrations/handlers/mssql_handler/mssql_handler.py @@ -435,40 +435,108 @@ def query(self, query: ASTNode) -> Response: resp.error_message += f"\nThe problem with render: {render_error}" return resp - def get_tables(self) -> Response: + def get_tables(self, all: bool = False) -> Response: """ - Retrieves a list of all non-system tables and views in the current schema of the Microsoft SQL Server database. + Retrieves a list of all non-system tables and views in the Microsoft SQL Server database. + + When a specific schema is configured (``self.schema``), only tables belonging to that + schema are returned regardless of the ``all`` flag. When no schema is configured: + + * ``all=False`` (default, used by ``INFORMATION_SCHEMA.TABLES``) – returns tables from + every non-system schema and exposes the real SQL schema name in *table_schema*. + ``table_name`` is qualified as ``.
`` so that the three-part name + ``..
`` works transparently in MindsDB SQL. + * ``all=True`` (used by the Explorer UI / tree endpoint) – same query but + ``table_name`` is **not** prefixed; the raw ``table_schema`` column is kept so the + UI can group tables under their schema nodes. + + Args: + all (bool): When *True* the unqualified ``table_name`` is returned (Explorer mode). + When *False* (default) ``table_name`` is prefixed with the schema so + that ``INFORMATION_SCHEMA.TABLES`` exposes the full three-part table + reference. Returns: - Response: A response object containing the list of tables and views, formatted as per the `Response` class. + Response: A response object containing the list of tables and views, formatted as + per the ``Response`` class. """ + if self.schema: + # Single-schema mode: schema filter is always applied; names are unambiguous. + query = f""" + SELECT + table_schema, + table_name, + table_type + FROM {self.database}.INFORMATION_SCHEMA.TABLES + WHERE TABLE_TYPE in ('BASE TABLE', 'VIEW') + AND table_schema = '{self.schema}' + """ + return self.native_query(query) + + if all: + # Explorer / tree mode: return raw schema + table columns so the UI can + # group tables under their schema nodes. + query = f""" + SELECT + table_schema, + table_name, + table_type + FROM {self.database}.INFORMATION_SCHEMA.TABLES + WHERE TABLE_TYPE in ('BASE TABLE', 'VIEW') + AND table_schema NOT IN ('sys', 'INFORMATION_SCHEMA', 'guest', 'db_owner', + 'db_accessadmin', 'db_securityadmin', 'db_ddladmin', 'db_backupoperator', + 'db_datareader', 'db_datawriter', 'db_denydatareader', 'db_denydatawriter') + ORDER BY table_schema, table_name + """ + return self.native_query(query) + + # INFORMATION_SCHEMA.TABLES mode (all=False, no explicit schema): qualify + # table_name as ".
" so the datasource-level TABLE_SCHEMA + # override in MindsDB's system_tables layer does not hide the real schema. query = f""" SELECT table_schema, - table_name, + table_schema + '.' + table_name AS table_name, table_type FROM {self.database}.INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE in ('BASE TABLE', 'VIEW') + AND table_schema NOT IN ('sys', 'INFORMATION_SCHEMA', 'guest', 'db_owner', + 'db_accessadmin', 'db_securityadmin', 'db_ddladmin', 'db_backupoperator', + 'db_datareader', 'db_datawriter', 'db_denydatareader', 'db_denydatawriter') + ORDER BY table_schema, table_name """ - if self.schema: - query += f" AND table_schema = '{self.schema}'" - return self.native_query(query) - def get_columns(self, table_name) -> Response: + def get_columns(self, table_name: str, schema_name: str | None = None) -> Response: """ Retrieves column details for a specified table in the Microsoft SQL Server database. + ``table_name`` may arrive as a qualified ``.
`` string (produced by + :meth:`get_tables` when no explicit schema is configured). In that case the schema + part is extracted automatically and used as the ``table_schema`` filter. + Args: - table_name (str): The name of the table for which to retrieve column information. + table_name (str): The name of the table, optionally qualified as + ``.
``. + schema_name (str | None): Explicit schema override. When provided it takes + precedence over any schema embedded in ``table_name`` and over + ``self.schema``. Returns: - Response: A response object containing the column details, formatted as per the `Response` class. + Response: A response object containing the column details, formatted as per the + ``Response`` class. Raises: ValueError: If the 'table_name' is not a valid string. """ + # Resolve schema: explicit arg > embedded in table_name > handler-level self.schema. + effective_schema = schema_name or self.schema + if effective_schema is None and "." in table_name: + # table_name was qualified by get_tables() as ".
" + parts = table_name.split(".", 1) + effective_schema, table_name = parts[0], parts[1] + query = f""" SELECT COLUMN_NAME, @@ -489,8 +557,8 @@ def get_columns(self, table_name) -> Response: table_name = '{table_name}' """ - if self.schema: - query += f" AND table_schema = '{self.schema}'" + if effective_schema: + query += f" AND table_schema = '{effective_schema}'" result = self.native_query(query) result.to_columns_table_response(map_type_fn=_map_type) diff --git a/tests/unit/handlers/test_mssql.py b/tests/unit/handlers/test_mssql.py index d7024d51359..bc02b5fbd75 100644 --- a/tests/unit/handlers/test_mssql.py +++ b/tests/unit/handlers/test_mssql.py @@ -182,7 +182,12 @@ def test_query_method(self): def test_get_tables(self): """ - Tests that get_tables calls native_query with the correct SQL + Tests that get_tables calls native_query with the correct SQL. + + Default (all=False, no self.schema): table_name should be qualified as + ``.
`` so that the real SQL schema is not lost when + MindsDB's information_schema layer overwrites TABLE_SCHEMA with the + datasource name. """ expected_response = OkResponse() self.handler.native_query = MagicMock(return_value=expected_response) @@ -197,8 +202,53 @@ def test_get_tables(self): self.assertIn("table_schema", call_args) self.assertIn("table_name", call_args) self.assertIn("table_type", call_args) + # Qualified name: table_name must be "schema + '.' + table_name" + self.assertIn("table_schema + '.' + table_name", call_args) + self.assertEqual(response, expected_response) + + def test_get_tables_all_mode(self): + """ + Tests that get_tables(all=True) returns raw (unqualified) table_name so + the Explorer UI can group tables under their schema nodes. + """ + expected_response = OkResponse() + self.handler.native_query = MagicMock(return_value=expected_response) + + response = self.handler.get_tables(all=True) + + self.handler.native_query.assert_called_once() + call_args = self.handler.native_query.call_args[0][0] + database = self.handler.connection_args["database"] + + self.assertIn(f"{database}.INFORMATION_SCHEMA.TABLES", call_args) + self.assertIn("table_schema", call_args) + self.assertIn("table_name", call_args) + # In explorer mode the name must NOT be qualified + self.assertNotIn("table_schema + '.' + table_name", call_args) self.assertEqual(response, expected_response) + def test_get_tables_with_schema(self): + """ + Tests that get_tables with a configured schema always filters by that + schema and returns plain (unqualified) table names. + """ + self.handler.schema = "dbo" + expected_response = OkResponse() + self.handler.native_query = MagicMock(return_value=expected_response) + + response = self.handler.get_tables() + + self.handler.native_query.assert_called_once() + call_args = self.handler.native_query.call_args[0][0] + + self.assertIn("table_schema = 'dbo'", call_args) + # Single-schema mode: no schema qualification needed in table_name + self.assertNotIn("table_schema + '.' + table_name", call_args) + self.assertEqual(response, expected_response) + + # Reset handler schema + self.handler.schema = None + def test_get_columns(self): """ Tests that get_columns calls native_query with the correct SQL @@ -234,6 +284,44 @@ def test_get_columns(self): self.assertEqual(call_args, expected_sql) self.assertEqual(response, expected_response) + def test_get_columns_with_qualified_table_name(self): + """ + Tests that get_columns correctly handles a qualified ``.
`` name + produced by get_tables(all=False) when no explicit schema is configured. + The schema part is extracted and used as a table_schema filter so the query + returns the correct columns. + """ + expected_response = TableResponse(data=DataFrame([], columns=list(INF_SCHEMA_COLUMNS_NAMES_SET))) + self.handler.native_query = MagicMock(return_value=expected_response) + + qualified_name = "dbo.Customers" + response = self.handler.get_columns(qualified_name) + + assert response.type == RESPONSE_TYPE.COLUMNS_TABLE + self.handler.native_query.assert_called_once() + call_args = self.handler.native_query.call_args[0][0] + + # The query should filter by the bare table name AND the extracted schema + self.assertIn("table_name = 'Customers'", call_args) + self.assertIn("table_schema = 'dbo'", call_args) + + def test_get_columns_with_explicit_schema_name(self): + """ + Tests that an explicit schema_name argument takes precedence over any + schema embedded in table_name and over self.schema. + """ + expected_response = TableResponse(data=DataFrame([], columns=list(INF_SCHEMA_COLUMNS_NAMES_SET))) + self.handler.native_query = MagicMock(return_value=expected_response) + + response = self.handler.get_columns("Orders", schema_name="app") + + assert response.type == RESPONSE_TYPE.COLUMNS_TABLE + self.handler.native_query.assert_called_once() + call_args = self.handler.native_query.call_args[0][0] + + self.assertIn("table_name = 'Orders'", call_args) + self.assertIn("table_schema = 'app'", call_args) + def test_meta_get_tables_returns_response(self): # realistic names df = DataFrame( From 818d4c0bf4ee7492c82462f4d19218c75a0436df Mon Sep 17 00:00:00 2001 From: Brandon Zarnitz Date: Wed, 6 May 2026 10:04:33 -0400 Subject: [PATCH 2/2] fix(mssql): escape single-quotes in get_columns SQL string literals Addresses SQL injection risk flagged in review: effective_schema and table_name were interpolated directly into the WHERE clause without sanitization. Apply SQL string-literal escaping (doubling single-quotes) to safe_table_name and safe_schema before interpolation. Co-Authored-By: Claude Sonnet 4.6 --- .../handlers/mssql_handler/mssql_handler.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/mindsdb/integrations/handlers/mssql_handler/mssql_handler.py b/mindsdb/integrations/handlers/mssql_handler/mssql_handler.py index 0ef36fb4e85..6d58a9dc5a8 100644 --- a/mindsdb/integrations/handlers/mssql_handler/mssql_handler.py +++ b/mindsdb/integrations/handlers/mssql_handler/mssql_handler.py @@ -537,6 +537,10 @@ def get_columns(self, table_name: str, schema_name: str | None = None) -> Respon parts = table_name.split(".", 1) effective_schema, table_name = parts[0], parts[1] + # Escape single-quotes in identifier values used in SQL string literals. + safe_table_name = table_name.replace("'", "''") + safe_schema = effective_schema.replace("'", "''") if effective_schema else None + query = f""" SELECT COLUMN_NAME, @@ -554,11 +558,11 @@ def get_columns(self, table_name: str, schema_name: str | None = None) -> Respon FROM information_schema.columns WHERE - table_name = '{table_name}' + table_name = '{safe_table_name}' """ - if effective_schema: - query += f" AND table_schema = '{effective_schema}'" + if safe_schema: + query += f" AND table_schema = '{safe_schema}'" result = self.native_query(query) result.to_columns_table_response(map_type_fn=_map_type)