diff --git a/.github/workflows/install-dependencies-and-run-tests.yml b/.github/workflows/install-dependencies-and-run-tests.yml
new file mode 100644
index 0000000..673a091
--- /dev/null
+++ b/.github/workflows/install-dependencies-and-run-tests.yml
@@ -0,0 +1,51 @@
+name: Test Python package new version
+on:
+ push:
+ branches: [ "main" ]
+ pull_request:
+ branches: [ "main" ]
+jobs:
+ build:
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ python-version: ["3.9", "3.10", "3.11", "3.12"]
+ steps:
+ - uses: actions/checkout@v4
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v3
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install Linux dependencies
+ run: |
+ sudo apt-get update
+ sudo apt-get install -y \
+ gcc \
+ libkrb5-dev \
+ libsasl2-dev \
+ python3-dev \
+ python3-all-dev
+ - name: Update pip version
+ run: |
+ python -m pip install --upgrade pip
+ - name: Install dependencies
+ run: |
+ pip install -r test/requirements.txt
+ - name: Build new version of the package
+ run: |
+ python -m build
+ - name: Install the new version of the package
+ run: |
+ pip install dist/*.tar.gz
+ - name: Test with pytest
+ env:
+ HOSTNAME: ${{ secrets.HOSTNAME }}
+ PORT: ${{ secrets.PORT }}
+ PROTOCOL: ${{ secrets.PROTOCOL }}
+ ONTOLOGY: ${{ secrets.ONTOLOGY }}
+ USERNAME: ${{ secrets.USERNAME }}
+ PASSWORD: ${{ secrets.PASSWORD }}
+ CONNECT_ARGS: ${{ secrets.CONNECT_ARGS }}
+ run: |
+ pytest
diff --git a/.gitignore b/.gitignore
index b453995..b6441d4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -51,8 +51,9 @@ coverage.xml
*.py,cover
.hypothesis/
.pytest_cache/
-test.py
-test/
+test/env*
+test/.python-version
+test/.env
# Translations
*.mo
diff --git a/MANIFEST.in b/MANIFEST.in
index 5ded86d..e393ebd 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1 +1,2 @@
-include examples/*
\ No newline at end of file
+include examples/*
+include thrift/transport/THttpClient.py
\ No newline at end of file
diff --git a/README.md b/README.md
index a2c526c..4608476 100644
--- a/README.md
+++ b/README.md
@@ -1,11 +1,12 @@
-
+
[](https://app.fossa.com/projects/custom%2B50508%2Fgithub.com%2FWPSemantix%2Ftimbr_python_SQLAlchemy?ref=badge_shield&issueType=license)
[](https://app.fossa.com/projects/custom%2B50508%2Fgithub.com%2FWPSemantix%2Ftimbr_python_SQLAlchemy?ref=badge_shield&issueType=security)
-[](https://www.python.org/downloads/release/python-3713/)
-[](https://www.python.org/downloads/release/python-3820/)
[](https://www.python.org/downloads/release/python-3921/)
+[](https://www.python.org/downloads/release/python-31017/)
+[](https://www.python.org/downloads/release/python-31112/)
+[](https://www.python.org/downloads/release/python-3129/)
[](https://badge.fury.io/py/pytimbr-sqla)
@@ -14,7 +15,7 @@ This project is a python connector to timbr using SQLAlchemy.
## Dependencies
- Access to a timbr-server
-- Python from 3.7.13 or newer
+- Python from 3.9.13 or newer
- Support SQLAlchemy 1.4.36 or newer but not version 2.x yet.
- For Linux based machines only install those dependencies first:
- gcc
@@ -27,10 +28,9 @@ This project is a python connector to timbr using SQLAlchemy.
- Ubuntu example:
- apt install gcc, heimdal-dev, krb5, python-devel, python-dev, python-all-dev, libsasl2-dev
-
## Installation
- Install as clone repository:
- - Install Python: https://www.python.org/downloads/release/python-3713/
+ - Install Python: https://www.python.org/downloads/release/python-3913/
- Run the following command to install the Python dependencies: `pip install -r requirements.txt`
- Install using pip and git:
@@ -39,15 +39,6 @@ This project is a python connector to timbr using SQLAlchemy.
- Install using pip:
- `pip install pytimbr-sqla`
-## Known issues
-If you encounter a problem installing `PyHive` with sasl dependencies on windows, install the following wheel (for 64bit Windows) by running:
-
-`pip install https://download.lfd.uci.edu/pythonlibs/archived/cp37/sasl-0.3.1-cp37-cp37m-win_amd64.whl`
-
-For Python 3.9:
-
-`pip install https://download.lfd.uci.edu/pythonlibs/archived/sasl-0.3.1-cp39-cp39-win_amd64.whl`
-
## Sample usage
- For an example of how to use the Python SQLAlchemy connector for timbr, follow this [example file](examples/example.py)
- For an example of how to use the Python SQLAlchemy connector with 'PyHive' as async query for timbr, follow this [example file](examples/pyhive_async_example.py)
diff --git a/requirements.txt b/requirements.txt
index d949c06..a9f4076 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,8 @@
-future==0.18.3
-python-dateutil==2.8.2
-sasl>=0.2.1
-thrift>=0.13.0
-thrift_sasl>=0.1.0
+future==1.0.0
+python-dateutil==2.9.0
+ldap3
+thrift==0.21.0
+thrift_sasl==0.4.3
pure-sasl>=0.6.2
sqlalchemy>=1.4.36,<2.0.0
-requests_kerberos>=0.12.0
+requests_kerberos==0.15.0
diff --git a/setup.py b/setup.py
index c5dd254..b4d802a 100644
--- a/setup.py
+++ b/setup.py
@@ -5,28 +5,28 @@
setuptools.setup(
name='pytimbr_sqla',
- version='1.0.7',
+ version='2.0.0',
author='timbr',
author_email='contact@timbr.ai',
description='Timbr Python SQLAlchemy connector',
long_description=long_description,
long_description_content_type="text/markdown",
url='https://github.com/WPSemantix/timbr_python_SQLAlchemy',
- download_url = 'https://github.com/WPSemantix/timbr_python_SQLAlchemy/archive/refs/tags/v1.0.7.tar.gz',
+ download_url = 'https://github.com/WPSemantix/timbr_python_SQLAlchemy/archive/refs/tags/v2.0.0.tar.gz',
project_urls={
"Bug Tracker": "https://github.com/WPSemantix/timbr_python_SQLAlchemy/issues"
},
license='MIT',
- packages=['pytimbr_sqla', 'TCLIService'],
+ packages=['pytimbr_sqla', 'TCLIService', 'thrift', 'thrift.transport'],
install_requires=[
- 'future',
- 'python-dateutil',
- 'sasl>=0.2.1',
- 'thrift>=0.13.0',
- 'thrift_sasl>=0.1.0',
+ 'future==1.0.0',
+ 'python-dateutil==2.9.0',
+ 'ldap3',
+ 'thrift_sasl==0.4.3',
'pure-sasl>=0.6.2',
'sqlalchemy>=1.4.36,<2.0.0',
- 'requests_kerberos>=0.12.0',
+ 'requests_kerberos==0.15.0',
+ 'pyhive==0.7.0',
],
extras_require={},
package_data={},
@@ -64,9 +64,10 @@
'Topic :: Software Development :: Build Tools',
'License :: OSI Approved :: MIT License',
'Programming Language :: Python :: 3',
- 'Programming Language :: Python :: 3.7',
- 'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
+ 'Programming Language :: Python :: 3.10',
+ 'Programming Language :: Python :: 3.11',
+ 'Programming Language :: Python :: 3.12',
],
entry_points={
'sqlalchemy.dialects': [
diff --git a/test/conftest.py b/test/conftest.py
new file mode 100644
index 0000000..10a143a
--- /dev/null
+++ b/test/conftest.py
@@ -0,0 +1,20 @@
+import os
+import json
+import pytest
+from dotenv import load_dotenv
+
+# Load .env file if it exists
+load_dotenv(override=True)
+
+# Global fixture to load config values
+@pytest.fixture(scope="session")
+def test_config():
+ return {
+ "hostname": os.getenv("HOSTNAME"),
+ "port": os.getenv("PORT"),
+ "protocol": os.getenv("PROTOCOL"),
+ "ontology": os.getenv("ONTOLOGY"),
+ "username": os.getenv("USERNAME"),
+ "password": os.getenv("PASSWORD"),
+ "connect_args": json.loads(os.getenv("CONNECT_ARGS", "{}"))
+ }
diff --git a/test/requirements.txt b/test/requirements.txt
new file mode 100644
index 0000000..3cb219e
Binary files /dev/null and b/test/requirements.txt differ
diff --git a/test/test_hive_dialect.py b/test/test_hive_dialect.py
new file mode 100644
index 0000000..00f542a
--- /dev/null
+++ b/test/test_hive_dialect.py
@@ -0,0 +1,41 @@
+import pytest
+from utils import get_connection_uri_using_hive_dialect, run_query_using_hive_dialect
+
+def create_engine_and_run_query(config, is_async=False):
+ """
+ Creates a SQLAlchemy engine and runs a query using the Hive dialect.
+ """
+ uri = get_connection_uri_using_hive_dialect(
+ hostname=config['hostname'],
+ port=config['port'],
+ protocol=config['protocol'],
+ ontology=config['ontology'],
+ username=config['username'],
+ password=config['password']
+ )
+ return run_query_using_hive_dialect(
+ uri,
+ "SHOW CONCEPTS",
+ config['connect_args'],
+ is_async,
+ )
+
+def test_run_sync_query(test_config):
+ results_obj = create_engine_and_run_query(test_config, is_async=False)
+ results_data = results_obj["results"]
+ results_headers = results_obj["headers"]
+
+ assert results_obj is not None, "Query did not return any results"
+ assert len(results_data) > 0, "Query returned no rows"
+ assert len(results_headers) > 0, "Query returned no columns"
+ assert all(len(row) == len(results_headers) for row in results_data), "Row length does not match header length"
+
+def test_run_async_query(test_config):
+ results_obj = create_engine_and_run_query(test_config, is_async=True)
+ results_data = results_obj["results"]
+ results_headers = results_obj["headers"]
+
+ assert results_obj is not None, "Query did not return any results"
+ assert len(results_data) > 0, "Query returned no rows"
+ assert len(results_headers) > 0, "Query returned no columns"
+ assert all(len(row) == len(results_headers) for row in results_data), "Row length does not match header length"
\ No newline at end of file
diff --git a/test/test_timbr_dialect.py b/test/test_timbr_dialect.py
new file mode 100644
index 0000000..8a6145e
--- /dev/null
+++ b/test/test_timbr_dialect.py
@@ -0,0 +1,20 @@
+import pytest
+from utils import run_query_using_timbr_dialect, get_connection_uri_using_timbr_dialect
+
+def test_run_query(test_config):
+ uri = get_connection_uri_using_timbr_dialect(
+ hostname=test_config['hostname'],
+ port=test_config['port'],
+ protocol=test_config['protocol'],
+ ontology=test_config['ontology'],
+ username=test_config['username'],
+ password=test_config['password']
+ )
+ results_obj = run_query_using_timbr_dialect(uri, "SHOW CONCEPTS", connect_args=test_config['connect_args'])
+ results_data = results_obj["results"]
+ results_headers = results_obj["headers"]
+
+ assert results_obj is not None, "Query did not return any results"
+ assert len(results_data) > 0, "Query returned no rows"
+ assert len(results_headers) > 0, "Query returned no columns"
+ assert all(len(row) == len(results_headers) for row in results_data), "Row length does not match header length"
\ No newline at end of file
diff --git a/test/utils.py b/test/utils.py
new file mode 100644
index 0000000..091ed17
--- /dev/null
+++ b/test/utils.py
@@ -0,0 +1,118 @@
+from sqlalchemy import create_engine
+from TCLIService.ttypes import TOperationState
+
+def set_dialect_for_new_connection_uri(dialect: str, uri: str) -> str:
+ """
+ Sets the dialect for a new connection URI.
+
+ :param dialect: The database dialect to use (e.g., 'timbr', 'hive').
+ :param connection_uri: The original connection URI.
+
+ :return: A new connection URI with the specified dialect.
+ """
+ if not uri.startswith(f"{dialect}+"):
+ return f"{dialect}+{uri}"
+ return uri
+
+def get_connection_uri_using_timbr_dialect(hostname: str, port: int, protocol: str, ontology: str, username: str, password: str) -> str:
+ """
+ Constructs a connection URI for the database using the provided parameters.
+
+ :param hostname: The hostname of the database server.
+ :param port: The port number on which the database server is listening.
+ :param protocol: The protocol to use (e.g., 'http', 'https').
+ :param ontology: The ontology or database name.
+ :param username: The username for authentication.
+ :param password: The password for authentication.
+
+ :return: A formatted connection URI string.
+ """
+ return f"timbr+{protocol}://{username}@{ontology}:{password}@{hostname}:{port}"
+
+def get_connection_uri_using_hive_dialect(hostname: str, port: int, protocol: str, ontology: str, username: str, password: str) -> str:
+ """
+ Constructs a connection URI for the Hive database using the provided parameters.
+
+ :param hostname: The hostname of the Hive server.
+ :param port: The port number on which the Hive server is listening.
+ :param protocol: The protocol to use (e.g., 'http', 'https').
+ :param ontology: The ontology or database name.
+ :param username: The username for authentication.
+ :param password: The password for authentication.
+
+ :return: A formatted connection URI string for Hive.
+ """
+ return f"hive+{protocol}://{username}@{ontology}:{password}@{hostname}:{port}"
+
+def run_query_using_timbr_dialect(uri: str, query: str, connect_args={}) -> object:
+ """
+ Connects to a database using the given URI,
+ executes the provided SQL query,
+ and returns the result object.
+ """
+ # Create new sqlalchemy connection
+ engine = create_engine(uri, connect_args=connect_args)
+
+ # Connect to the created engine
+ conn = engine.connect()
+
+ # Execute a query
+ res_obj = conn.execute(query)
+
+ results_headers = res_obj.keys()
+ results = res_obj.fetchall()
+ connect_args = connect_args or {}
+
+ # Display the results of the execution formatted as a table
+ # Print the columns name
+ print(f"index | {' | '.join(results_headers)}")
+ # Print a separator line
+ print("-" * ((len(results_headers)+1) * 10))
+ # Print the results
+ for res_index, result in enumerate(results, start=1):
+ print(f"{res_index} | {' | '.join(map(str, result))}")
+
+ return dict(results=results, headers=results_headers)
+
+def run_query_using_hive_dialect(uri: str, query: str, connect_args={}, is_async=False) -> object:
+ """
+ Connects to a Hive database using the given URI,
+ executes the provided SQL query,
+ and returns the result object.
+ """
+ # Create new sqlalchemy connection
+ engine = create_engine(uri, connect_args=connect_args)
+
+ # Connect to the created engine
+ conn = engine.connect()
+
+ if is_async:
+ dbapi_conn = engine.raw_connection()
+ cursor = dbapi_conn.cursor()
+
+ # Use the connection to execute a query
+ cursor.execute(query)
+
+ # Check the status of this execution
+ status = cursor.poll().operationState
+ while status in (TOperationState.INITIALIZED_STATE, TOperationState.RUNNING_STATE):
+ status = cursor.poll().operationState
+ # Get the results of the execution
+ results_headers = [(desc[0], desc[1]) for desc in cursor.description]
+ results = cursor.fetchall()
+
+ else:
+ # Use the connection to execute a query
+ res_obj = conn.execute(query)
+ results_headers = [(desc[0], desc[1]) for desc in res_obj.cursor.description]
+ results = res_obj.fetchall()
+
+ # Print the columns name
+ for name, col_type in results_headers:
+ print(f"{name} - {col_type}")
+
+ # Print the results
+ for result in results:
+ print(result)
+
+ return dict(results=results, headers=results_headers)
\ No newline at end of file
diff --git a/thrift/__init__.py b/thrift/__init__.py
new file mode 100644
index 0000000..32113d1
--- /dev/null
+++ b/thrift/__init__.py
@@ -0,0 +1 @@
+# This file makes Python treat the directory as a package.
diff --git a/thrift/transport/THttpClient.py b/thrift/transport/THttpClient.py
new file mode 100644
index 0000000..14cff77
--- /dev/null
+++ b/thrift/transport/THttpClient.py
@@ -0,0 +1,223 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from io import BytesIO
+import os
+import ssl
+import sys
+import warnings
+import base64
+import socket # Ensure socket is imported for socket.error
+import logging # Added for logging
+
+from six.moves import urllib
+from six.moves import http_client
+
+from .TTransport import TTransportBase, TTransportException
+import six
+
+_LOGGER = logging.getLogger(__name__)
+
+
+class THttpClient(TTransportBase):
+ """Http implementation of TTransport base."""
+
+ def __init__(self, uri_or_host, port=None, path=None, cafile=None, cert_file=None, key_file=None, ssl_context=None):
+ """THttpClient supports two different types of construction:
+
+ THttpClient(host, port, path) - deprecated
+ THttpClient(uri, [port=, path=, cafile=, cert_file=, key_file=, ssl_context=])
+
+ Only the second supports https. To properly authenticate against the server,
+ provide the client's identity by specifying cert_file and key_file. To properly
+ authenticate the server, specify either cafile or ssl_context with a CA defined.
+ NOTE: if both cafile and ssl_context are defined, ssl_context will override cafile.
+ """
+ if port is not None:
+ warnings.warn(
+ "Please use the THttpClient('http{s}://host:port/path') constructor",
+ DeprecationWarning,
+ stacklevel=2)
+ self.host = uri_or_host
+ self.port = port
+ assert path
+ self.path = path
+ self.scheme = 'http'
+ else:
+ parsed = urllib.parse.urlparse(uri_or_host)
+ self.scheme = parsed.scheme
+ assert self.scheme in ('http', 'https')
+ if self.scheme == 'http':
+ self.port = parsed.port or http_client.HTTP_PORT
+ elif self.scheme == 'https':
+ self.port = parsed.port or http_client.HTTPS_PORT
+ self.certfile = cert_file
+ self.keyfile = key_file
+ self.context = ssl.create_default_context(cafile=cafile) if (cafile and not ssl_context) else ssl_context
+ self.host = parsed.hostname
+ self.path = parsed.path
+ if parsed.query:
+ self.path += '?%s' % parsed.query
+ try:
+ proxy = urllib.request.getproxies()[self.scheme]
+ except KeyError:
+ proxy = None
+ else:
+ if urllib.request.proxy_bypass(self.host):
+ proxy = None
+ if proxy:
+ parsed = urllib.parse.urlparse(proxy)
+ self.realhost = self.host
+ self.realport = self.port
+ self.host = parsed.hostname
+ self.port = parsed.port
+ self.proxy_auth = self.basic_proxy_auth_header(parsed)
+ else:
+ self.realhost = self.realport = self.proxy_auth = None
+ self.__wbuf = BytesIO()
+ self.__http = None
+ self.__http_response = None
+ self.__timeout = None
+ self.__custom_headers = None
+ self.headers = None
+
+ @staticmethod
+ def basic_proxy_auth_header(proxy):
+ if proxy is None or not proxy.username:
+ return None
+ ap = "%s:%s" % (urllib.parse.unquote(proxy.username),
+ urllib.parse.unquote(proxy.password))
+ cr = base64.b64encode(ap.encode()).strip()
+ return "Basic " + cr
+
+ def using_proxy(self):
+ return self.realhost is not None
+
+ def open(self):
+ """Open the HTTP transport."""
+ try:
+ if self.scheme == 'https':
+ try:
+ # Create SSL context
+ context = ssl.create_default_context()
+ if self.context and self.context.verify_mode is not None:
+ context = self.context
+ if self.certfile:
+ # key_file can be None if the private key is embedded in the cert_file
+ context.load_cert_chain(self.certfile, keyfile=self.keyfile)
+
+ self.__http = http_client.HTTPSConnection(
+ self.host, self.port,
+ timeout=self.__timeout,
+ context=context
+ )
+ except Exception as e:
+ _LOGGER.warning('SSL setup failed: %s', str(e), exc_info=True)
+ self.__http = None # Ensure __http is None on SSL setup failure
+ raise TTransportException(
+ type=TTransportException.SSL_ERROR,
+ message='SSL setup failed: %s' % e
+ )
+ else: # 'http'
+ self.__http = http_client.HTTPConnection(
+ self.host, self.port,
+ timeout=self.__timeout
+ )
+
+ self.__http.connect()
+
+ except (http_client.HTTPException, socket.error, socket.gaierror) as e:
+ # This block catches errors from self.__http.connect() or HTTPConnection/HTTPSConnection instantiation
+ _LOGGER.warning('Connect failed: %s', str(e), exc_info=True)
+ self.__http = None # Ensure __http is None on connect failure
+ raise TTransportException(type=TTransportException.NOT_OPEN, message=str(e))
+
+ def close(self):
+ self.__http.close()
+ self.__http = None
+ self.__http_response = None
+
+ def isOpen(self):
+ return self.__http is not None
+
+ def setTimeout(self, ms):
+ if ms is None:
+ self.__timeout = None
+ else:
+ self.__timeout = ms / 1000.0
+
+ def setCustomHeaders(self, headers):
+ self.__custom_headers = headers
+
+ def read(self, sz):
+ return self.__http_response.read(sz)
+
+ def write(self, buf):
+ self.__wbuf.write(buf)
+
+ def flush(self):
+ if self.isOpen():
+ self.close()
+ self.open()
+
+ # Pull data out of buffer
+ data = self.__wbuf.getvalue()
+ self.__wbuf = BytesIO()
+
+ # HTTP request
+ if self.using_proxy() and self.scheme == "http":
+ # need full URL of real host for HTTP proxy here (HTTPS uses CONNECT tunnel)
+ self.__http.putrequest('POST', "http://%s:%s%s" %
+ (self.realhost, self.realport, self.path))
+ else:
+ self.__http.putrequest('POST', self.path)
+
+ # Write headers
+ self.__http.putheader('Content-Type', 'application/x-thrift')
+ self.__http.putheader('Content-Length', str(len(data)))
+ if self.using_proxy() and self.scheme == "http" and self.proxy_auth is not None:
+ self.__http.putheader("Proxy-Authorization", self.proxy_auth)
+
+ if not self.__custom_headers or 'User-Agent' not in self.__custom_headers:
+ user_agent = 'Python/THttpClient'
+ script = os.path.basename(sys.argv[0])
+ if script:
+ user_agent = '%s (%s)' % (user_agent, urllib.parse.quote(script))
+ self.__http.putheader('User-Agent', user_agent)
+
+ if self.__custom_headers:
+ for key, val in six.iteritems(self.__custom_headers):
+ self.__http.putheader(key, val)
+
+ # Saves the cookie sent by the server in the previous response.
+ # HTTPConnection.putheader can only be called after a request has been
+ # started, and before it's been sent.
+ if self.headers and 'Set-Cookie' in self.headers:
+ self.__http.putheader('Cookie', self.headers['Set-Cookie'])
+
+ self.__http.endheaders()
+
+ # Write payload
+ self.__http.send(data)
+
+ # Get reply to flush the request
+ self.__http_response = self.__http.getresponse()
+ self.code = self.__http_response.status
+ self.message = self.__http_response.reason
+ self.headers = self.__http_response.msg
diff --git a/thrift/transport/__init__.py b/thrift/transport/__init__.py
new file mode 100644
index 0000000..32113d1
--- /dev/null
+++ b/thrift/transport/__init__.py
@@ -0,0 +1 @@
+# This file makes Python treat the directory as a package.