Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 85 additions & 13 deletions mindsdb/integrations/handlers/mssql_handler/mssql_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,40 +435,112 @@ def query(self, query: ASTNode) -> Response:
resp.error_message += f"\nThe problem with render: {render_error}"
return resp

def get_tables(self) -> Response:
def get_tables(self, all: bool = False) -> Response:
"""
Retrieves a list of all non-system tables and views in the current schema of the Microsoft SQL Server database.
Retrieves a list of all non-system tables and views in the Microsoft SQL Server database.

When a specific schema is configured (``self.schema``), only tables belonging to that
schema are returned regardless of the ``all`` flag. When no schema is configured:

* ``all=False`` (default, used by ``INFORMATION_SCHEMA.TABLES``) – returns tables from
every non-system schema and exposes the real SQL schema name in *table_schema*.
``table_name`` is qualified as ``<schema>.<table>`` so that the three-part name
``<datasource>.<schema>.<table>`` works transparently in MindsDB SQL.
* ``all=True`` (used by the Explorer UI / tree endpoint) – same query but
``table_name`` is **not** prefixed; the raw ``table_schema`` column is kept so the
UI can group tables under their schema nodes.

Args:
all (bool): When *True* the unqualified ``table_name`` is returned (Explorer mode).
When *False* (default) ``table_name`` is prefixed with the schema so
that ``INFORMATION_SCHEMA.TABLES`` exposes the full three-part table
reference.

Returns:
Response: A response object containing the list of tables and views, formatted as per the `Response` class.
Response: A response object containing the list of tables and views, formatted as
per the ``Response`` class.
"""

if self.schema:
# Single-schema mode: schema filter is always applied; names are unambiguous.
query = f"""
SELECT
table_schema,
table_name,
table_type
FROM {self.database}.INFORMATION_SCHEMA.TABLES
WHERE TABLE_TYPE in ('BASE TABLE', 'VIEW')
AND table_schema = '{self.schema}'
"""
return self.native_query(query)

if all:
# Explorer / tree mode: return raw schema + table columns so the UI can
# group tables under their schema nodes.
query = f"""
SELECT
table_schema,
table_name,
table_type
FROM {self.database}.INFORMATION_SCHEMA.TABLES
WHERE TABLE_TYPE in ('BASE TABLE', 'VIEW')
AND table_schema NOT IN ('sys', 'INFORMATION_SCHEMA', 'guest', 'db_owner',
'db_accessadmin', 'db_securityadmin', 'db_ddladmin', 'db_backupoperator',
'db_datareader', 'db_datawriter', 'db_denydatareader', 'db_denydatawriter')
ORDER BY table_schema, table_name
"""
return self.native_query(query)

# INFORMATION_SCHEMA.TABLES mode (all=False, no explicit schema): qualify
# table_name as "<schema>.<table>" so the datasource-level TABLE_SCHEMA
# override in MindsDB's system_tables layer does not hide the real schema.
query = f"""
SELECT
table_schema,
table_name,
table_schema + '.' + table_name AS table_name,
table_type
FROM {self.database}.INFORMATION_SCHEMA.TABLES
WHERE TABLE_TYPE in ('BASE TABLE', 'VIEW')
AND table_schema NOT IN ('sys', 'INFORMATION_SCHEMA', 'guest', 'db_owner',
'db_accessadmin', 'db_securityadmin', 'db_ddladmin', 'db_backupoperator',
'db_datareader', 'db_datawriter', 'db_denydatareader', 'db_denydatawriter')
ORDER BY table_schema, table_name
"""
if self.schema:
query += f" AND table_schema = '{self.schema}'"

return self.native_query(query)

def get_columns(self, table_name) -> Response:
def get_columns(self, table_name: str, schema_name: str | None = None) -> Response:
"""
Retrieves column details for a specified table in the Microsoft SQL Server database.

``table_name`` may arrive as a qualified ``<schema>.<table>`` string (produced by
:meth:`get_tables` when no explicit schema is configured). In that case the schema
part is extracted automatically and used as the ``table_schema`` filter.

Args:
table_name (str): The name of the table for which to retrieve column information.
table_name (str): The name of the table, optionally qualified as
``<schema>.<table>``.
schema_name (str | None): Explicit schema override. When provided it takes
precedence over any schema embedded in ``table_name`` and over
``self.schema``.

Returns:
Response: A response object containing the column details, formatted as per the `Response` class.
Response: A response object containing the column details, formatted as per the
``Response`` class.
Raises:
ValueError: If the 'table_name' is not a valid string.
"""

# Resolve schema: explicit arg > embedded in table_name > handler-level self.schema.
effective_schema = schema_name or self.schema
if effective_schema is None and "." in table_name:
# table_name was qualified by get_tables() as "<schema>.<table>"
parts = table_name.split(".", 1)
effective_schema, table_name = parts[0], parts[1]

# Escape single-quotes in identifier values used in SQL string literals.
safe_table_name = table_name.replace("'", "''")
safe_schema = effective_schema.replace("'", "''") if effective_schema else None

query = f"""
SELECT
COLUMN_NAME,
Comment on lines +533 to 546
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correctness: The new schema_name parameter is directly interpolated into the SQL query (f" AND table_schema = '{effective_schema}'") without any sanitization, creating a SQL injection vector that didn't exist before — a caller passing schema_name = "'; DROP TABLE foo; --" would execute arbitrary SQL.

🤖 AI Agent Prompt for Cursor/Windsurf

📋 Copy this prompt to your AI coding assistant (Cursor, Windsurf, etc.) to get help fixing this issue

In file `mindsdb/integrations/handlers/mssql_handler/mssql_handler.py`, the `get_columns` method (around line 533-542) builds SQL by directly interpolating `effective_schema` (which comes from the new `schema_name` parameter or from splitting `table_name`) into the query string. Add input validation/sanitization for `effective_schema` before using it in the f-string, e.g. validate it matches `^[a-zA-Z0-9_]+$` and raise ValueError otherwise, to prevent SQL injection through the new `schema_name` parameter.

Expand All @@ -486,11 +558,11 @@ def get_columns(self, table_name) -> Response:
FROM
information_schema.columns
WHERE
table_name = '{table_name}'
table_name = '{safe_table_name}'
"""

if self.schema:
query += f" AND table_schema = '{self.schema}'"
if safe_schema:
query += f" AND table_schema = '{safe_schema}'"

result = self.native_query(query)
result.to_columns_table_response(map_type_fn=_map_type)
Expand Down
90 changes: 89 additions & 1 deletion tests/unit/handlers/test_mssql.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,12 @@ def test_query_method(self):

def test_get_tables(self):
"""
Tests that get_tables calls native_query with the correct SQL
Tests that get_tables calls native_query with the correct SQL.

Default (all=False, no self.schema): table_name should be qualified as
``<schema>.<table>`` so that the real SQL schema is not lost when
MindsDB's information_schema layer overwrites TABLE_SCHEMA with the
datasource name.
"""
expected_response = OkResponse()
self.handler.native_query = MagicMock(return_value=expected_response)
Expand All @@ -197,8 +202,53 @@ def test_get_tables(self):
self.assertIn("table_schema", call_args)
self.assertIn("table_name", call_args)
self.assertIn("table_type", call_args)
# Qualified name: table_name must be "schema + '.' + table_name"
self.assertIn("table_schema + '.' + table_name", call_args)
self.assertEqual(response, expected_response)

def test_get_tables_all_mode(self):
"""
Tests that get_tables(all=True) returns raw (unqualified) table_name so
the Explorer UI can group tables under their schema nodes.
"""
expected_response = OkResponse()
self.handler.native_query = MagicMock(return_value=expected_response)

response = self.handler.get_tables(all=True)

self.handler.native_query.assert_called_once()
call_args = self.handler.native_query.call_args[0][0]
database = self.handler.connection_args["database"]

self.assertIn(f"{database}.INFORMATION_SCHEMA.TABLES", call_args)
self.assertIn("table_schema", call_args)
self.assertIn("table_name", call_args)
# In explorer mode the name must NOT be qualified
self.assertNotIn("table_schema + '.' + table_name", call_args)
self.assertEqual(response, expected_response)

def test_get_tables_with_schema(self):
"""
Tests that get_tables with a configured schema always filters by that
schema and returns plain (unqualified) table names.
"""
self.handler.schema = "dbo"
expected_response = OkResponse()
self.handler.native_query = MagicMock(return_value=expected_response)

response = self.handler.get_tables()

self.handler.native_query.assert_called_once()
call_args = self.handler.native_query.call_args[0][0]

self.assertIn("table_schema = 'dbo'", call_args)
# Single-schema mode: no schema qualification needed in table_name
self.assertNotIn("table_schema + '.' + table_name", call_args)
self.assertEqual(response, expected_response)

# Reset handler schema
self.handler.schema = None

def test_get_columns(self):
"""
Tests that get_columns calls native_query with the correct SQL
Expand Down Expand Up @@ -234,6 +284,44 @@ def test_get_columns(self):
self.assertEqual(call_args, expected_sql)
self.assertEqual(response, expected_response)

def test_get_columns_with_qualified_table_name(self):
"""
Tests that get_columns correctly handles a qualified ``<schema>.<table>`` name
produced by get_tables(all=False) when no explicit schema is configured.
The schema part is extracted and used as a table_schema filter so the query
returns the correct columns.
"""
expected_response = TableResponse(data=DataFrame([], columns=list(INF_SCHEMA_COLUMNS_NAMES_SET)))
self.handler.native_query = MagicMock(return_value=expected_response)

qualified_name = "dbo.Customers"
response = self.handler.get_columns(qualified_name)

assert response.type == RESPONSE_TYPE.COLUMNS_TABLE
self.handler.native_query.assert_called_once()
call_args = self.handler.native_query.call_args[0][0]

# The query should filter by the bare table name AND the extracted schema
self.assertIn("table_name = 'Customers'", call_args)
self.assertIn("table_schema = 'dbo'", call_args)

def test_get_columns_with_explicit_schema_name(self):
"""
Tests that an explicit schema_name argument takes precedence over any
schema embedded in table_name and over self.schema.
"""
expected_response = TableResponse(data=DataFrame([], columns=list(INF_SCHEMA_COLUMNS_NAMES_SET)))
self.handler.native_query = MagicMock(return_value=expected_response)

response = self.handler.get_columns("Orders", schema_name="app")

assert response.type == RESPONSE_TYPE.COLUMNS_TABLE
self.handler.native_query.assert_called_once()
call_args = self.handler.native_query.call_args[0][0]

self.assertIn("table_name = 'Orders'", call_args)
self.assertIn("table_schema = 'app'", call_args)

def test_meta_get_tables_returns_response(self):
# realistic names
df = DataFrame(
Expand Down
Loading