Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,10 @@ jobs:
steps:
- uses: actions/checkout@v3

- name: Set up Python 3.8
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.8
python-version: "3.11"

- name: Install dependencies
run: |
Expand Down Expand Up @@ -112,17 +112,22 @@ jobs:
package-dir: bindings/python
output-dir: wheelhouse
env:
CIBW_ENVIRONMENT_WINDOWS: TOKENIZER_ROOT='${{ github.workspace }}\install'
CIBW_BEFORE_ALL: bindings/python/tools/prepare_build_environment.sh
CIBW_ENVIRONMENT_LINUX: TOKENIZER_ROOT=/project/build/install ICU_ROOT=/project/icu
CIBW_ENVIRONMENT_MACOS: TOKENIZER_ROOT=${GITHUB_WORKSPACE}/build/install
CIBW_ENVIRONMENT_WINDOWS: TOKENIZER_ROOT=${GITHUB_WORKSPACE}/build/install
CIBW_BEFORE_ALL_LINUX: bindings/python/tools/prepare_build_environment_linux.sh
CIBW_BEFORE_ALL_MACOS: bindings/python/tools/prepare_build_environment_macos.sh
CIBW_BEFORE_ALL_WINDOWS: bash bindings/python/tools/prepare_build_environment_windows.sh
CIBW_BEFORE_BUILD: pip install pybind11==2.10.1
CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014
CIBW_MANYLINUX_AARCH64_IMAGE: manylinux2014
CIBW_BUILD: "cp310-* cp311-* cp312-*"
CIBW_TEST_COMMAND: pytest {project}/bindings/python/test/test.py
CIBW_TEST_REQUIRES: pytest
CIBW_ARCHS: ${{ matrix.arch }}
CIBW_SKIP: pp* *-musllinux_*
CIBW_TEST_SKIP: "*-macosx_arm64"
CIBW_REPAIR_WHEEL_COMMAND_MACOS: ""

- name: Upload Python wheels
uses: actions/upload-artifact@v4
Expand Down
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,15 @@ The project follows [semantic versioning 2.0.0](https://semver.org/). The API co

### Fixes and improvements

## [v1.38.0](https://github.com/OpenNMT/Tokenizer/releases/tag/v1.38.0) (2025-12-30)

### Fixes and improvements

* drop python 3.9 and under
* add python 3.12

### Fixes and improvements

## [v1.37.1](https://github.com/OpenNMT/Tokenizer/releases/tag/v1.37.1) (2023-03-01)

### Fixes and improvements
Expand Down
2 changes: 1 addition & 1 deletion bindings/python/pyonmttok/version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""Version information."""

__version__ = "1.37.1"
__version__ = "1.38.0"
7 changes: 2 additions & 5 deletions bindings/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,9 @@ def _maybe_add_library_root(lib_name, header_only=False):
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Topic :: Text Processing :: Linguistic",
"Topic :: Software Development :: Libraries :: Python Modules",
],
Expand All @@ -94,7 +91,7 @@ def _maybe_add_library_root(lib_name, header_only=False):
keywords="tokenization opennmt unicode bpe sentencepiece subword",
packages=find_packages(),
package_data=package_data,
python_requires=">=3.6",
python_requires=">=3.10",
setup_requires=["pytest-runner"],
tests_require=["pytest"],
ext_modules=[tokenizer_module],
Expand Down
48 changes: 0 additions & 48 deletions bindings/python/tools/prepare_build_environment.sh

This file was deleted.

29 changes: 29 additions & 0 deletions bindings/python/tools/prepare_build_environment_linux.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#! /bin/bash

set -e
set -x

ROOT_DIR=$PWD
ICU_ROOT=$ROOT_DIR/icu
CMAKE_EXTRA_ARGS=""

# Download and compile ICU from sources.
ICU_VERSION=${ICU_VERSION:-73.2}
curl -L -O https://github.com/unicode-org/icu/releases/download/release-${ICU_VERSION/./-}/icu4c-${ICU_VERSION/./_}-src.tgz
tar xf icu4c-*-src.tgz
cd icu/source
CFLAGS="-O3 -fPIC" CXXFLAGS="-O3 -fPIC" ./configure --disable-shared --enable-static --prefix=$ICU_ROOT
make -j2 install

cd $ROOT_DIR

# Install cmake.
pip install cmake

# Build Tokenizer.
rm -rf build
mkdir build
cd build
cmake -DLIB_ONLY=ON -DICU_ROOT=$ICU_ROOT $CMAKE_EXTRA_ARGS ..
VERBOSE=1 make -j2 install
cd $ROOT_DIR
43 changes: 43 additions & 0 deletions bindings/python/tools/prepare_build_environment_macos.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#! /bin/bash

set -e
set -x

ROOT_DIR="$PWD"
ICU_ROOT="$ROOT_DIR/icu"
CMAKE_EXTRA_ARGS=""

mkdir -p "$ICU_ROOT"

# Install ICU via Homebrew
brew install icu4c
ICU_PREFIX="$(brew --prefix icu4c)"

# Copy ICU into local prefix
rsync -a "$ICU_PREFIX/" "$ICU_ROOT/"

# Remove dynamic libraries to force static linking
rm -f "$ICU_ROOT/lib/"*.dylib || true

# Explicit Apple Silicon handling
if [[ "$(uname -m)" == "arm64" ]]; then
CMAKE_EXTRA_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64"
fi

# Install cmake
pip install cmake

# Build Tokenizer
rm -rf build
mkdir build
cd build
cmake \
-DLIB_ONLY=ON \
-DICU_ROOT="$ICU_ROOT" \
-DCMAKE_INSTALL_PREFIX="$ROOT_DIR/build/install" \
$CMAKE_EXTRA_ARGS \
..

VERBOSE=1 make -j2 install
cd "$ROOT_DIR"

Loading