Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 14 additions & 134 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -1,143 +1,23 @@
# CI — thin wrapper that calls the reusable test workflow.

name: CI

on:
pull_request:
branches: [main]
types: [opened, synchronize, reopened, ready_for_review]
workflow_dispatch:
workflow_call:

env:
CARGO_TERM_COLOR: always

jobs:
# Lint and format check (single job)
lint:
name: Lint & Format
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Install Rust
uses: dtolnay/rust-toolchain@stable
with:
components: rustfmt, clippy
concurrency:
group: ci-${{ github.ref }}
cancel-in-progress: true

- name: Install PCRE2
run: sudo apt-get update && sudo apt-get install -y libpcre2-dev
permissions:
contents: read

- name: Check formatting
run: cargo fmt --all --check

- name: Run clippy
run: cargo clippy --all-targets -- -D warnings

# Rust tests on multiple platforms
jobs:
test:
name: Test (${{ matrix.os }})
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]

steps:
- uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12'

- name: Install Rust
uses: dtolnay/rust-toolchain@stable

- name: Install PCRE2 (Ubuntu)
if: matrix.os == 'ubuntu-latest'
run: sudo apt-get update && sudo apt-get install -y libpcre2-dev

- name: Configure Python for PyO3 (Ubuntu)
if: matrix.os == 'ubuntu-latest'
run: echo "PYO3_PYTHON=$(which python3)" >> $GITHUB_ENV

- name: Install PCRE2 (macOS)
if: matrix.os == 'macos-latest'
run: brew install pcre2

- name: Configure Python for PyO3 (macOS)
if: matrix.os == 'macos-latest'
run: |
echo "PYO3_PYTHON=$(which python3)" >> $GITHUB_ENV
# Get Python library directory and set for linker
PYTHON_PREFIX=$(python3 -c "import sys; print(sys.prefix)")
echo "LIBRARY_PATH=${PYTHON_PREFIX}/lib" >> $GITHUB_ENV
echo "DYLD_LIBRARY_PATH=${PYTHON_PREFIX}/lib" >> $GITHUB_ENV
# Tell Cargo to link the Python framework
echo "CARGO_BUILD_RUSTFLAGS=-C link-arg=-undefined -C link-arg=dynamic_lookup" >> $GITHUB_ENV

- name: Install PCRE2 (Windows)
if: matrix.os == 'windows-latest'
run: |
vcpkg install pcre2:x64-windows
echo "PCRE2_SYS_STATIC=1" >> $env:GITHUB_ENV

- name: Configure Python for PyO3 (Windows)
if: matrix.os == 'windows-latest'
run: |
$pythonPath = (Get-Command python).Source
echo "PYO3_PYTHON=$pythonPath" >> $env:GITHUB_ENV

- name: Run tests
run: cargo test

# Python bindings tests
python:
name: Python tests
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12'

- name: Install PCRE2
run: sudo apt-get update && sudo apt-get install -y libpcre2-dev

- name: Install dependencies and build
run: |
python -m venv .venv
. .venv/bin/activate
python -m pip install --upgrade pip
pip install maturin tiktoken
maturin develop --release

- name: Test Python bindings
run: |
.venv/bin/python -c "
import splintr
import tiktoken

# Test cl100k_base
tok = splintr.Tokenizer.from_pretrained('cl100k_base')
tik = tiktoken.get_encoding('cl100k_base')

text = 'Hello, world!'
assert tok.encode(text) == list(tik.encode(text)), 'cl100k_base mismatch'

# Test o200k_base
tok2 = splintr.Tokenizer.from_pretrained('o200k_base')
tik2 = tiktoken.get_encoding('o200k_base')
assert tok2.encode(text) == list(tik2.encode(text)), 'o200k_base mismatch'

# Test streaming decoder
decoder = tok.streaming_decoder()
tokens = tok.encode('Hello')
result = []
for t in tokens:
chunk = decoder.add_token(t)
if chunk:
result.append(chunk)
result.append(decoder.flush())
assert ''.join(result) == 'Hello', 'Streaming decoder failed'

print('All Python tests passed!')
"
if: github.event.pull_request.draft == false
name: Test Suite
uses: ./.github/workflows/test.yml
19 changes: 10 additions & 9 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
name: Release
run-name: Release ${{ github.ref_name }}

on:
push:
tags:
- 'v*'
- "v*"

permissions:
contents: read
Expand All @@ -18,7 +19,7 @@ jobs:
pypi_version: ${{ steps.version.outputs.pypi_version }}
base_version: ${{ steps.version.outputs.base_version }}
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5

- name: Validate and extract version
id: version
Expand Down Expand Up @@ -78,7 +79,7 @@ jobs:
needs: validate-version
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5

- name: Install Rust
uses: dtolnay/rust-toolchain@stable
Expand All @@ -102,7 +103,7 @@ jobs:
run: cargo publish --allow-dirty --token ${{ secrets.CARGO_REGISTRY_TOKEN }}
env:
# Enable PCRE2 JIT compilation
PCRE2_SYS_JIT: '1'
PCRE2_SYS_JIT: "1"

# Build Python wheels for multiple platforms
build-wheels:
Expand All @@ -115,12 +116,12 @@ jobs:
os: [ubuntu-latest, macos-15-intel, macos-14, windows-latest]

steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12'
python-version: "3.12"

- name: Update version in pyproject.toml
shell: bash
Expand Down Expand Up @@ -156,11 +157,11 @@ jobs:
# Build with python feature (PyO3 bindings) + pcre2 (PCRE2 backend with JIT)
# Note: regexr's SIMD uses runtime detection, JIT is compiled at build time
args: --release --out dist --features python,pcre2
sccache: 'true'
sccache: "true"
manylinux: auto
env:
# Enable PCRE2 JIT compilation
PCRE2_SYS_JIT: '1'
PCRE2_SYS_JIT: "1"

- name: Upload wheels
uses: actions/upload-artifact@v4
Expand All @@ -174,7 +175,7 @@ jobs:
needs: validate-version
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5

- name: Update version in pyproject.toml
run: |
Expand Down
163 changes: 163 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
# Reusable test workflow: lint, check, and test.
#
# Called by:
# - ci.yml (PR checks)
# - release.yml (pre-publish gate)

name: Test

on:
workflow_call:

permissions:
contents: read

env:
CARGO_TERM_COLOR: always

jobs:
lint:
name: Lint & Format
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5

- name: Install Rust
uses: dtolnay/rust-toolchain@stable
with:
components: rustfmt, clippy

- name: Install PCRE2
run: sudo apt-get update && sudo apt-get install -y libpcre2-dev

- uses: Swatinem/rust-cache@v2
with:
prefix-key: lint

- name: Check formatting
run: cargo fmt --all --check

- name: Run clippy
run: cargo clippy --all-targets -- -D warnings

test:
name: Test (${{ matrix.target }})
runs-on: ${{ matrix.runs-on }}
strategy:
fail-fast: false
matrix:
include:
- runs-on: ubuntu-latest
target: x86_64-unknown-linux-gnu
- runs-on: macos-latest
target: aarch64-apple-darwin
- runs-on: windows-latest
target: x86_64-pc-windows-msvc
steps:
- uses: actions/checkout@v5

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"

- name: Install Rust
uses: dtolnay/rust-toolchain@stable

- uses: Swatinem/rust-cache@v2
with:
prefix-key: test-${{ matrix.target }}

- name: Install PCRE2 (Ubuntu)
if: runner.os == 'Linux'
run: sudo apt-get update && sudo apt-get install -y libpcre2-dev

- name: Configure Python for PyO3 (Ubuntu)
if: runner.os == 'Linux'
run: echo "PYO3_PYTHON=$(which python3)" >> $GITHUB_ENV

- name: Install PCRE2 (macOS)
if: runner.os == 'macOS'
run: brew install pcre2

- name: Configure Python for PyO3 (macOS)
if: runner.os == 'macOS'
run: |
echo "PYO3_PYTHON=$(which python3)" >> $GITHUB_ENV
PYTHON_PREFIX=$(python3 -c "import sys; print(sys.prefix)")
echo "LIBRARY_PATH=${PYTHON_PREFIX}/lib" >> $GITHUB_ENV
echo "DYLD_LIBRARY_PATH=${PYTHON_PREFIX}/lib" >> $GITHUB_ENV
echo "CARGO_BUILD_RUSTFLAGS=-C link-arg=-undefined -C link-arg=dynamic_lookup" >> $GITHUB_ENV

- name: Install PCRE2 (Windows)
if: runner.os == 'Windows'
run: |
vcpkg install pcre2:x64-windows
echo "PCRE2_SYS_STATIC=1" >> $env:GITHUB_ENV

- name: Configure Python for PyO3 (Windows)
if: runner.os == 'Windows'
run: |
$pythonPath = (Get-Command python).Source
echo "PYO3_PYTHON=$pythonPath" >> $env:GITHUB_ENV

- name: Run tests
run: cargo test

python:
name: Python bindings
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"

- name: Install PCRE2
run: sudo apt-get update && sudo apt-get install -y libpcre2-dev

- uses: Swatinem/rust-cache@v2
with:
prefix-key: python

- name: Install dependencies and build
run: |
python -m venv .venv
. .venv/bin/activate
python -m pip install --upgrade pip
pip install maturin tiktoken
maturin develop --release

- name: Test Python bindings
run: |
.venv/bin/python -c "
import splintr
import tiktoken

# Test cl100k_base
tok = splintr.Tokenizer.from_pretrained('cl100k_base')
tik = tiktoken.get_encoding('cl100k_base')

text = 'Hello, world!'
assert tok.encode(text) == list(tik.encode(text)), 'cl100k_base mismatch'

# Test o200k_base
tok2 = splintr.Tokenizer.from_pretrained('o200k_base')
tik2 = tiktoken.get_encoding('o200k_base')
assert tok2.encode(text) == list(tik2.encode(text)), 'o200k_base mismatch'

# Test streaming decoder
decoder = tok.streaming_decoder()
tokens = tok.encode('Hello')
result = []
for t in tokens:
chunk = decoder.add_token(t)
if chunk:
result.append(chunk)
result.append(decoder.flush())
assert ''.join(result) == 'Hello', 'Streaming decoder failed'

print('All Python tests passed!')
"
Loading
Loading