diff --git a/.github/workflows/install-dependencies-and-run-tests.yml b/.github/workflows/install-dependencies-and-run-tests.yml new file mode 100644 index 0000000..673a091 --- /dev/null +++ b/.github/workflows/install-dependencies-and-run-tests.yml @@ -0,0 +1,51 @@ +name: Test Python package new version +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] +jobs: + build: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.9", "3.10", "3.11", "3.12"] + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install Linux dependencies + run: | + sudo apt-get update + sudo apt-get install -y \ + gcc \ + libkrb5-dev \ + libsasl2-dev \ + python3-dev \ + python3-all-dev + - name: Update pip version + run: | + python -m pip install --upgrade pip + - name: Install dependencies + run: | + pip install -r test/requirements.txt + - name: Build new version of the package + run: | + python -m build + - name: Install the new version of the package + run: | + pip install dist/*.tar.gz + - name: Test with pytest + env: + HOSTNAME: ${{ secrets.HOSTNAME }} + PORT: ${{ secrets.PORT }} + PROTOCOL: ${{ secrets.PROTOCOL }} + ONTOLOGY: ${{ secrets.ONTOLOGY }} + USERNAME: ${{ secrets.USERNAME }} + PASSWORD: ${{ secrets.PASSWORD }} + CONNECT_ARGS: ${{ secrets.CONNECT_ARGS }} + run: | + pytest diff --git a/.gitignore b/.gitignore index b453995..b6441d4 100644 --- a/.gitignore +++ b/.gitignore @@ -51,8 +51,9 @@ coverage.xml *.py,cover .hypothesis/ .pytest_cache/ -test.py -test/ +test/env* +test/.python-version +test/.env # Translations *.mo diff --git a/MANIFEST.in b/MANIFEST.in index 5ded86d..e393ebd 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1,2 @@ -include examples/* \ No newline at end of file +include examples/* +include thrift/transport/THttpClient.py \ No newline at end of file diff --git a/README.md b/README.md index a2c526c..4608476 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,12 @@ -![Timbr logo](https://timbr.ai/wp-content/uploads/2023/06/timbr-ai-l-5-226x60-1.png) +![Timbr logo](https://timbr.ai/wp-content/uploads/2025/01/logotimbrai230125.png) [![FOSSA Status](https://app.fossa.com/api/projects/custom%2B50508%2Fgithub.com%2FWPSemantix%2Ftimbr_python_SQLAlchemy.svg?type=shield&issueType=license)](https://app.fossa.com/projects/custom%2B50508%2Fgithub.com%2FWPSemantix%2Ftimbr_python_SQLAlchemy?ref=badge_shield&issueType=license) [![FOSSA Status](https://app.fossa.com/api/projects/custom%2B50508%2Fgithub.com%2FWPSemantix%2Ftimbr_python_SQLAlchemy.svg?type=shield&issueType=security)](https://app.fossa.com/projects/custom%2B50508%2Fgithub.com%2FWPSemantix%2Ftimbr_python_SQLAlchemy?ref=badge_shield&issueType=security) -[![Python 3.7.13](https://img.shields.io/badge/python-3.7.13+-blue.svg)](https://www.python.org/downloads/release/python-3713/) -[![Python 3.8](https://img.shields.io/badge/python-3.8-blue.svg)](https://www.python.org/downloads/release/python-3820/) [![Python 3.9](https://img.shields.io/badge/python-3.9-blue.svg)](https://www.python.org/downloads/release/python-3921/) +[![Python 3.10](https://img.shields.io/badge/python-3.10-blue.svg)](https://www.python.org/downloads/release/python-31017/) +[![Python 3.11](https://img.shields.io/badge/python-3.11-blue.svg)](https://www.python.org/downloads/release/python-31112/) +[![Python 3.12](https://img.shields.io/badge/python-3.12-blue.svg)](https://www.python.org/downloads/release/python-3129/) [![PypiVersion](https://img.shields.io/pypi/v/pytimbr-sqla.svg)](https://badge.fury.io/py/pytimbr-sqla) @@ -14,7 +15,7 @@ This project is a python connector to timbr using SQLAlchemy. ## Dependencies - Access to a timbr-server -- Python from 3.7.13 or newer +- Python from 3.9.13 or newer - Support SQLAlchemy 1.4.36 or newer but not version 2.x yet. - For Linux based machines only install those dependencies first: - gcc @@ -27,10 +28,9 @@ This project is a python connector to timbr using SQLAlchemy. - Ubuntu example: - apt install gcc, heimdal-dev, krb5, python-devel, python-dev, python-all-dev, libsasl2-dev - ## Installation - Install as clone repository: - - Install Python: https://www.python.org/downloads/release/python-3713/ + - Install Python: https://www.python.org/downloads/release/python-3913/ - Run the following command to install the Python dependencies: `pip install -r requirements.txt` - Install using pip and git: @@ -39,15 +39,6 @@ This project is a python connector to timbr using SQLAlchemy. - Install using pip: - `pip install pytimbr-sqla` -## Known issues -If you encounter a problem installing `PyHive` with sasl dependencies on windows, install the following wheel (for 64bit Windows) by running: - -`pip install https://download.lfd.uci.edu/pythonlibs/archived/cp37/sasl-0.3.1-cp37-cp37m-win_amd64.whl` - -For Python 3.9: - -`pip install https://download.lfd.uci.edu/pythonlibs/archived/sasl-0.3.1-cp39-cp39-win_amd64.whl` - ## Sample usage - For an example of how to use the Python SQLAlchemy connector for timbr, follow this [example file](examples/example.py) - For an example of how to use the Python SQLAlchemy connector with 'PyHive' as async query for timbr, follow this [example file](examples/pyhive_async_example.py) diff --git a/requirements.txt b/requirements.txt index d949c06..a9f4076 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ -future==0.18.3 -python-dateutil==2.8.2 -sasl>=0.2.1 -thrift>=0.13.0 -thrift_sasl>=0.1.0 +future==1.0.0 +python-dateutil==2.9.0 +ldap3 +thrift==0.21.0 +thrift_sasl==0.4.3 pure-sasl>=0.6.2 sqlalchemy>=1.4.36,<2.0.0 -requests_kerberos>=0.12.0 +requests_kerberos==0.15.0 diff --git a/setup.py b/setup.py index c5dd254..b4d802a 100644 --- a/setup.py +++ b/setup.py @@ -5,28 +5,28 @@ setuptools.setup( name='pytimbr_sqla', - version='1.0.7', + version='2.0.0', author='timbr', author_email='contact@timbr.ai', description='Timbr Python SQLAlchemy connector', long_description=long_description, long_description_content_type="text/markdown", url='https://github.com/WPSemantix/timbr_python_SQLAlchemy', - download_url = 'https://github.com/WPSemantix/timbr_python_SQLAlchemy/archive/refs/tags/v1.0.7.tar.gz', + download_url = 'https://github.com/WPSemantix/timbr_python_SQLAlchemy/archive/refs/tags/v2.0.0.tar.gz', project_urls={ "Bug Tracker": "https://github.com/WPSemantix/timbr_python_SQLAlchemy/issues" }, license='MIT', - packages=['pytimbr_sqla', 'TCLIService'], + packages=['pytimbr_sqla', 'TCLIService', 'thrift', 'thrift.transport'], install_requires=[ - 'future', - 'python-dateutil', - 'sasl>=0.2.1', - 'thrift>=0.13.0', - 'thrift_sasl>=0.1.0', + 'future==1.0.0', + 'python-dateutil==2.9.0', + 'ldap3', + 'thrift_sasl==0.4.3', 'pure-sasl>=0.6.2', 'sqlalchemy>=1.4.36,<2.0.0', - 'requests_kerberos>=0.12.0', + 'requests_kerberos==0.15.0', + 'pyhive==0.7.0', ], extras_require={}, package_data={}, @@ -64,9 +64,10 @@ 'Topic :: Software Development :: Build Tools', 'License :: OSI Approved :: MIT License', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', ], entry_points={ 'sqlalchemy.dialects': [ diff --git a/test/conftest.py b/test/conftest.py new file mode 100644 index 0000000..10a143a --- /dev/null +++ b/test/conftest.py @@ -0,0 +1,20 @@ +import os +import json +import pytest +from dotenv import load_dotenv + +# Load .env file if it exists +load_dotenv(override=True) + +# Global fixture to load config values +@pytest.fixture(scope="session") +def test_config(): + return { + "hostname": os.getenv("HOSTNAME"), + "port": os.getenv("PORT"), + "protocol": os.getenv("PROTOCOL"), + "ontology": os.getenv("ONTOLOGY"), + "username": os.getenv("USERNAME"), + "password": os.getenv("PASSWORD"), + "connect_args": json.loads(os.getenv("CONNECT_ARGS", "{}")) + } diff --git a/test/requirements.txt b/test/requirements.txt new file mode 100644 index 0000000..3cb219e Binary files /dev/null and b/test/requirements.txt differ diff --git a/test/test_hive_dialect.py b/test/test_hive_dialect.py new file mode 100644 index 0000000..00f542a --- /dev/null +++ b/test/test_hive_dialect.py @@ -0,0 +1,41 @@ +import pytest +from utils import get_connection_uri_using_hive_dialect, run_query_using_hive_dialect + +def create_engine_and_run_query(config, is_async=False): + """ + Creates a SQLAlchemy engine and runs a query using the Hive dialect. + """ + uri = get_connection_uri_using_hive_dialect( + hostname=config['hostname'], + port=config['port'], + protocol=config['protocol'], + ontology=config['ontology'], + username=config['username'], + password=config['password'] + ) + return run_query_using_hive_dialect( + uri, + "SHOW CONCEPTS", + config['connect_args'], + is_async, + ) + +def test_run_sync_query(test_config): + results_obj = create_engine_and_run_query(test_config, is_async=False) + results_data = results_obj["results"] + results_headers = results_obj["headers"] + + assert results_obj is not None, "Query did not return any results" + assert len(results_data) > 0, "Query returned no rows" + assert len(results_headers) > 0, "Query returned no columns" + assert all(len(row) == len(results_headers) for row in results_data), "Row length does not match header length" + +def test_run_async_query(test_config): + results_obj = create_engine_and_run_query(test_config, is_async=True) + results_data = results_obj["results"] + results_headers = results_obj["headers"] + + assert results_obj is not None, "Query did not return any results" + assert len(results_data) > 0, "Query returned no rows" + assert len(results_headers) > 0, "Query returned no columns" + assert all(len(row) == len(results_headers) for row in results_data), "Row length does not match header length" \ No newline at end of file diff --git a/test/test_timbr_dialect.py b/test/test_timbr_dialect.py new file mode 100644 index 0000000..8a6145e --- /dev/null +++ b/test/test_timbr_dialect.py @@ -0,0 +1,20 @@ +import pytest +from utils import run_query_using_timbr_dialect, get_connection_uri_using_timbr_dialect + +def test_run_query(test_config): + uri = get_connection_uri_using_timbr_dialect( + hostname=test_config['hostname'], + port=test_config['port'], + protocol=test_config['protocol'], + ontology=test_config['ontology'], + username=test_config['username'], + password=test_config['password'] + ) + results_obj = run_query_using_timbr_dialect(uri, "SHOW CONCEPTS", connect_args=test_config['connect_args']) + results_data = results_obj["results"] + results_headers = results_obj["headers"] + + assert results_obj is not None, "Query did not return any results" + assert len(results_data) > 0, "Query returned no rows" + assert len(results_headers) > 0, "Query returned no columns" + assert all(len(row) == len(results_headers) for row in results_data), "Row length does not match header length" \ No newline at end of file diff --git a/test/utils.py b/test/utils.py new file mode 100644 index 0000000..091ed17 --- /dev/null +++ b/test/utils.py @@ -0,0 +1,118 @@ +from sqlalchemy import create_engine +from TCLIService.ttypes import TOperationState + +def set_dialect_for_new_connection_uri(dialect: str, uri: str) -> str: + """ + Sets the dialect for a new connection URI. + + :param dialect: The database dialect to use (e.g., 'timbr', 'hive'). + :param connection_uri: The original connection URI. + + :return: A new connection URI with the specified dialect. + """ + if not uri.startswith(f"{dialect}+"): + return f"{dialect}+{uri}" + return uri + +def get_connection_uri_using_timbr_dialect(hostname: str, port: int, protocol: str, ontology: str, username: str, password: str) -> str: + """ + Constructs a connection URI for the database using the provided parameters. + + :param hostname: The hostname of the database server. + :param port: The port number on which the database server is listening. + :param protocol: The protocol to use (e.g., 'http', 'https'). + :param ontology: The ontology or database name. + :param username: The username for authentication. + :param password: The password for authentication. + + :return: A formatted connection URI string. + """ + return f"timbr+{protocol}://{username}@{ontology}:{password}@{hostname}:{port}" + +def get_connection_uri_using_hive_dialect(hostname: str, port: int, protocol: str, ontology: str, username: str, password: str) -> str: + """ + Constructs a connection URI for the Hive database using the provided parameters. + + :param hostname: The hostname of the Hive server. + :param port: The port number on which the Hive server is listening. + :param protocol: The protocol to use (e.g., 'http', 'https'). + :param ontology: The ontology or database name. + :param username: The username for authentication. + :param password: The password for authentication. + + :return: A formatted connection URI string for Hive. + """ + return f"hive+{protocol}://{username}@{ontology}:{password}@{hostname}:{port}" + +def run_query_using_timbr_dialect(uri: str, query: str, connect_args={}) -> object: + """ + Connects to a database using the given URI, + executes the provided SQL query, + and returns the result object. + """ + # Create new sqlalchemy connection + engine = create_engine(uri, connect_args=connect_args) + + # Connect to the created engine + conn = engine.connect() + + # Execute a query + res_obj = conn.execute(query) + + results_headers = res_obj.keys() + results = res_obj.fetchall() + connect_args = connect_args or {} + + # Display the results of the execution formatted as a table + # Print the columns name + print(f"index | {' | '.join(results_headers)}") + # Print a separator line + print("-" * ((len(results_headers)+1) * 10)) + # Print the results + for res_index, result in enumerate(results, start=1): + print(f"{res_index} | {' | '.join(map(str, result))}") + + return dict(results=results, headers=results_headers) + +def run_query_using_hive_dialect(uri: str, query: str, connect_args={}, is_async=False) -> object: + """ + Connects to a Hive database using the given URI, + executes the provided SQL query, + and returns the result object. + """ + # Create new sqlalchemy connection + engine = create_engine(uri, connect_args=connect_args) + + # Connect to the created engine + conn = engine.connect() + + if is_async: + dbapi_conn = engine.raw_connection() + cursor = dbapi_conn.cursor() + + # Use the connection to execute a query + cursor.execute(query) + + # Check the status of this execution + status = cursor.poll().operationState + while status in (TOperationState.INITIALIZED_STATE, TOperationState.RUNNING_STATE): + status = cursor.poll().operationState + # Get the results of the execution + results_headers = [(desc[0], desc[1]) for desc in cursor.description] + results = cursor.fetchall() + + else: + # Use the connection to execute a query + res_obj = conn.execute(query) + results_headers = [(desc[0], desc[1]) for desc in res_obj.cursor.description] + results = res_obj.fetchall() + + # Print the columns name + for name, col_type in results_headers: + print(f"{name} - {col_type}") + + # Print the results + for result in results: + print(result) + + return dict(results=results, headers=results_headers) \ No newline at end of file diff --git a/thrift/__init__.py b/thrift/__init__.py new file mode 100644 index 0000000..32113d1 --- /dev/null +++ b/thrift/__init__.py @@ -0,0 +1 @@ +# This file makes Python treat the directory as a package. diff --git a/thrift/transport/THttpClient.py b/thrift/transport/THttpClient.py new file mode 100644 index 0000000..14cff77 --- /dev/null +++ b/thrift/transport/THttpClient.py @@ -0,0 +1,223 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +from io import BytesIO +import os +import ssl +import sys +import warnings +import base64 +import socket # Ensure socket is imported for socket.error +import logging # Added for logging + +from six.moves import urllib +from six.moves import http_client + +from .TTransport import TTransportBase, TTransportException +import six + +_LOGGER = logging.getLogger(__name__) + + +class THttpClient(TTransportBase): + """Http implementation of TTransport base.""" + + def __init__(self, uri_or_host, port=None, path=None, cafile=None, cert_file=None, key_file=None, ssl_context=None): + """THttpClient supports two different types of construction: + + THttpClient(host, port, path) - deprecated + THttpClient(uri, [port=, path=, cafile=, cert_file=, key_file=, ssl_context=]) + + Only the second supports https. To properly authenticate against the server, + provide the client's identity by specifying cert_file and key_file. To properly + authenticate the server, specify either cafile or ssl_context with a CA defined. + NOTE: if both cafile and ssl_context are defined, ssl_context will override cafile. + """ + if port is not None: + warnings.warn( + "Please use the THttpClient('http{s}://host:port/path') constructor", + DeprecationWarning, + stacklevel=2) + self.host = uri_or_host + self.port = port + assert path + self.path = path + self.scheme = 'http' + else: + parsed = urllib.parse.urlparse(uri_or_host) + self.scheme = parsed.scheme + assert self.scheme in ('http', 'https') + if self.scheme == 'http': + self.port = parsed.port or http_client.HTTP_PORT + elif self.scheme == 'https': + self.port = parsed.port or http_client.HTTPS_PORT + self.certfile = cert_file + self.keyfile = key_file + self.context = ssl.create_default_context(cafile=cafile) if (cafile and not ssl_context) else ssl_context + self.host = parsed.hostname + self.path = parsed.path + if parsed.query: + self.path += '?%s' % parsed.query + try: + proxy = urllib.request.getproxies()[self.scheme] + except KeyError: + proxy = None + else: + if urllib.request.proxy_bypass(self.host): + proxy = None + if proxy: + parsed = urllib.parse.urlparse(proxy) + self.realhost = self.host + self.realport = self.port + self.host = parsed.hostname + self.port = parsed.port + self.proxy_auth = self.basic_proxy_auth_header(parsed) + else: + self.realhost = self.realport = self.proxy_auth = None + self.__wbuf = BytesIO() + self.__http = None + self.__http_response = None + self.__timeout = None + self.__custom_headers = None + self.headers = None + + @staticmethod + def basic_proxy_auth_header(proxy): + if proxy is None or not proxy.username: + return None + ap = "%s:%s" % (urllib.parse.unquote(proxy.username), + urllib.parse.unquote(proxy.password)) + cr = base64.b64encode(ap.encode()).strip() + return "Basic " + cr + + def using_proxy(self): + return self.realhost is not None + + def open(self): + """Open the HTTP transport.""" + try: + if self.scheme == 'https': + try: + # Create SSL context + context = ssl.create_default_context() + if self.context and self.context.verify_mode is not None: + context = self.context + if self.certfile: + # key_file can be None if the private key is embedded in the cert_file + context.load_cert_chain(self.certfile, keyfile=self.keyfile) + + self.__http = http_client.HTTPSConnection( + self.host, self.port, + timeout=self.__timeout, + context=context + ) + except Exception as e: + _LOGGER.warning('SSL setup failed: %s', str(e), exc_info=True) + self.__http = None # Ensure __http is None on SSL setup failure + raise TTransportException( + type=TTransportException.SSL_ERROR, + message='SSL setup failed: %s' % e + ) + else: # 'http' + self.__http = http_client.HTTPConnection( + self.host, self.port, + timeout=self.__timeout + ) + + self.__http.connect() + + except (http_client.HTTPException, socket.error, socket.gaierror) as e: + # This block catches errors from self.__http.connect() or HTTPConnection/HTTPSConnection instantiation + _LOGGER.warning('Connect failed: %s', str(e), exc_info=True) + self.__http = None # Ensure __http is None on connect failure + raise TTransportException(type=TTransportException.NOT_OPEN, message=str(e)) + + def close(self): + self.__http.close() + self.__http = None + self.__http_response = None + + def isOpen(self): + return self.__http is not None + + def setTimeout(self, ms): + if ms is None: + self.__timeout = None + else: + self.__timeout = ms / 1000.0 + + def setCustomHeaders(self, headers): + self.__custom_headers = headers + + def read(self, sz): + return self.__http_response.read(sz) + + def write(self, buf): + self.__wbuf.write(buf) + + def flush(self): + if self.isOpen(): + self.close() + self.open() + + # Pull data out of buffer + data = self.__wbuf.getvalue() + self.__wbuf = BytesIO() + + # HTTP request + if self.using_proxy() and self.scheme == "http": + # need full URL of real host for HTTP proxy here (HTTPS uses CONNECT tunnel) + self.__http.putrequest('POST', "http://%s:%s%s" % + (self.realhost, self.realport, self.path)) + else: + self.__http.putrequest('POST', self.path) + + # Write headers + self.__http.putheader('Content-Type', 'application/x-thrift') + self.__http.putheader('Content-Length', str(len(data))) + if self.using_proxy() and self.scheme == "http" and self.proxy_auth is not None: + self.__http.putheader("Proxy-Authorization", self.proxy_auth) + + if not self.__custom_headers or 'User-Agent' not in self.__custom_headers: + user_agent = 'Python/THttpClient' + script = os.path.basename(sys.argv[0]) + if script: + user_agent = '%s (%s)' % (user_agent, urllib.parse.quote(script)) + self.__http.putheader('User-Agent', user_agent) + + if self.__custom_headers: + for key, val in six.iteritems(self.__custom_headers): + self.__http.putheader(key, val) + + # Saves the cookie sent by the server in the previous response. + # HTTPConnection.putheader can only be called after a request has been + # started, and before it's been sent. + if self.headers and 'Set-Cookie' in self.headers: + self.__http.putheader('Cookie', self.headers['Set-Cookie']) + + self.__http.endheaders() + + # Write payload + self.__http.send(data) + + # Get reply to flush the request + self.__http_response = self.__http.getresponse() + self.code = self.__http_response.status + self.message = self.__http_response.reason + self.headers = self.__http_response.msg diff --git a/thrift/transport/__init__.py b/thrift/transport/__init__.py new file mode 100644 index 0000000..32113d1 --- /dev/null +++ b/thrift/transport/__init__.py @@ -0,0 +1 @@ +# This file makes Python treat the directory as a package.