From 68a3ab4f40cb1b70be7f1256f67d54d60edb9f8e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 8 Apr 2026 06:47:40 +0000 Subject: [PATCH 01/12] Initial plan From 73083656ffce0dd46ed1894edbcc2454480e7022 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 8 Apr 2026 07:14:06 +0000 Subject: [PATCH 02/12] Convert to monorepo with GPU provider packages and device/provider API Agent-Logs-Url: https://github.com/jsilvanus/embedeer/sessions/898517f6-b6e8-43fa-98b5-5a16e9745dc5 Co-authored-by: jsilvanus <22452468+jsilvanus@users.noreply.github.com> --- .gitignore | 2 + README.md | 176 ++++------- package-lock.json | 66 ++++- package.json | 30 +- packages/embedeer/README.md | 224 ++++++++++++++ packages/embedeer/package.json | 37 +++ .../embedeer/src}/child-process-worker.js | 0 {src => packages/embedeer/src}/cli.js | 13 + {src => packages/embedeer/src}/embedder.js | 4 + {src => packages/embedeer/src}/index.js | 0 {src => packages/embedeer/src}/model-cache.js | 0 packages/embedeer/src/provider-loader.js | 150 ++++++++++ .../embedeer/src}/thread-worker-script.js | 11 +- .../embedeer/src}/thread-worker.js | 0 {src => packages/embedeer/src}/worker-pool.js | 10 +- {src => packages/embedeer/src}/worker.js | 9 +- .../test}/child-process-worker.test.js | 0 .../embedeer/test}/cli-format.test.js | 0 .../embedeer/test}/embedder-options.test.js | 0 .../embedeer/test}/embedder.test.js | 0 .../embedeer/test}/helpers/crash-worker.js | 0 .../test}/helpers/echo-thread-worker.js | 0 .../embedeer/test}/helpers/echo-worker.js | 0 .../embedeer/test/provider-loader.test.js | 279 ++++++++++++++++++ .../embedeer/test}/thread-worker.test.js | 0 .../test}/worker-pool-options.test.js | 0 .../embedeer/test}/worker-pool.test.js | 0 packages/ort-linux-x64-cuda/README.md | 113 +++++++ packages/ort-linux-x64-cuda/index.js | 66 +++++ packages/ort-linux-x64-cuda/install.js | 94 ++++++ packages/ort-linux-x64-cuda/package.json | 27 ++ packages/ort-win32-x64-cuda/README.md | 26 ++ packages/ort-win32-x64-cuda/index.js | 37 +++ packages/ort-win32-x64-cuda/install.js | 37 +++ packages/ort-win32-x64-cuda/package.json | 27 ++ packages/ort-win32-x64-dml/README.md | 30 ++ packages/ort-win32-x64-dml/index.js | 40 +++ packages/ort-win32-x64-dml/install.js | 44 +++ packages/ort-win32-x64-dml/package.json | 28 ++ 39 files changed, 1425 insertions(+), 155 deletions(-) create mode 100644 packages/embedeer/README.md create mode 100644 packages/embedeer/package.json rename {src => packages/embedeer/src}/child-process-worker.js (100%) rename {src => packages/embedeer/src}/cli.js (94%) mode change 100644 => 100755 rename {src => packages/embedeer/src}/embedder.js (94%) rename {src => packages/embedeer/src}/index.js (100%) rename {src => packages/embedeer/src}/model-cache.js (100%) create mode 100644 packages/embedeer/src/provider-loader.js rename {src => packages/embedeer/src}/thread-worker-script.js (76%) rename {src => packages/embedeer/src}/thread-worker.js (100%) rename {src => packages/embedeer/src}/worker-pool.js (94%) rename {src => packages/embedeer/src}/worker.js (82%) rename {test => packages/embedeer/test}/child-process-worker.test.js (100%) rename {test => packages/embedeer/test}/cli-format.test.js (100%) rename {test => packages/embedeer/test}/embedder-options.test.js (100%) rename {test => packages/embedeer/test}/embedder.test.js (100%) rename {test => packages/embedeer/test}/helpers/crash-worker.js (100%) rename {test => packages/embedeer/test}/helpers/echo-thread-worker.js (100%) rename {test => packages/embedeer/test}/helpers/echo-worker.js (100%) create mode 100644 packages/embedeer/test/provider-loader.test.js rename {test => packages/embedeer/test}/thread-worker.test.js (100%) rename {test => packages/embedeer/test}/worker-pool-options.test.js (100%) rename {test => packages/embedeer/test}/worker-pool.test.js (100%) create mode 100644 packages/ort-linux-x64-cuda/README.md create mode 100644 packages/ort-linux-x64-cuda/index.js create mode 100644 packages/ort-linux-x64-cuda/install.js create mode 100644 packages/ort-linux-x64-cuda/package.json create mode 100644 packages/ort-win32-x64-cuda/README.md create mode 100644 packages/ort-win32-x64-cuda/index.js create mode 100644 packages/ort-win32-x64-cuda/install.js create mode 100644 packages/ort-win32-x64-cuda/package.json create mode 100644 packages/ort-win32-x64-dml/README.md create mode 100644 packages/ort-win32-x64-dml/index.js create mode 100644 packages/ort-win32-x64-dml/install.js create mode 100644 packages/ort-win32-x64-dml/package.json diff --git a/.gitignore b/.gitignore index 2e8157a..05f6e26 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ node_modules/ +packages/*/node_modules/ .env *.log +packages/*/vendor/ diff --git a/README.md b/README.md index c6fa75b..5201c93 100644 --- a/README.md +++ b/README.md @@ -1,162 +1,92 @@ # embedeer A Node.js tool for generating text embeddings using models from [Hugging Face](https://huggingface.co/models). -Supports **batched** input, **parallel** execution, isolated **child-process** workers (default) or **in-process threads**, quantization, and Hugging Face auth. +Supports **batched** input, **parallel** execution, optional **GPU acceleration** (CUDA / DirectML), quantization, and Hugging Face auth. ---- - -## Features - -- Downloads any Hugging Face feature-extraction model on first use (cached in `~/.embedeer/models`) -- **Isolated processes** (default) — a worker crash cannot bring down the caller -- **In-process threads** — opt-in via `mode: 'thread'` for lower overhead -- **Sequential** execution when `concurrency: 1` -- Configurable batch size and concurrency -- Hugging Face API token support (`--token` / `HF_TOKEN` env var) -- Quantization via `dtype` (`fp32` · `fp16` · `q8` · `q4` · `q4f16` · `auto`) -- Rich CLI: pull model, embed from file, dump output as JSON / TXT / SQL +This repository is a **monorepo** managed with npm workspaces. --- -## Installation +## Packages -```bash -npm install -``` +| Package | Description | +|---------|-------------| +| [`embedeer`](packages/embedeer) | Main embeddings package (CPU + optional GPU) | +| [`@embedeer/ort-linux-x64-cuda`](packages/ort-linux-x64-cuda) | CUDA provider for Linux x64 | +| [`@embedeer/ort-win32-x64-cuda`](packages/ort-win32-x64-cuda) | CUDA provider for Windows x64 | +| [`@embedeer/ort-win32-x64-dml`](packages/ort-win32-x64-dml) | DirectML provider for Windows x64 | --- -## Programmatic API - -### Embed texts +## Quick Start -```js -import { Embedder } from 'embedeer'; +### CPU (default) -const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { - batchSize: 32, // texts per worker task (default: 32) - concurrency: 2, // parallel workers (default: 2) - mode: 'process', // 'process' | 'thread' (default: 'process') - pooling: 'mean', // 'mean' | 'cls' | 'none' (default: 'mean') - normalize: true, // L2-normalise vectors (default: true) - token: 'hf_...', // HF API token (optional; also reads HF_TOKEN env) - dtype: 'q8', // quantization dtype (optional) - cacheDir: '/my/cache', // override model cache (default: ~/.embedeer/models) -}); +```bash +# Install +npm install embedeer -const vectors = await embedder.embed(['Hello world', 'Foo bar baz']); -// → number[][] (one 384-dim vector per text for all-MiniLM-L6-v2) +# CLI +npx embedeer --model Xenova/all-MiniLM-L6-v2 --data "Hello world" -await embedder.destroy(); // shut down worker processes +# API +import { Embedder } from 'embedeer'; +const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2'); +const vectors = await embedder.embed(['Hello', 'World']); +await embedder.destroy(); ``` -### Pull (pre-cache) a model - -Like `ollama pull` — downloads the model once so workers start instantly: - -```js -import { loadModel } from 'embedeer'; +### GPU (two-step install) -const { modelName, cacheDir } = await loadModel('Xenova/all-MiniLM-L6-v2', { - token: 'hf_...', // optional - dtype: 'q8', // optional -}); -``` +```bash +# Step 1 - install embedeer +npm install embedeer -### Sequential execution +# Step 2 - install GPU provider for your platform +npm install @embedeer/ort-linux-x64-cuda # Linux x64 NVIDIA CUDA +npm install @embedeer/ort-win32-x64-cuda # Windows x64 NVIDIA CUDA +npm install @embedeer/ort-win32-x64-dml # Windows x64 DirectML (any GPU) -```js -const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { concurrency: 1 }); -``` +# CLI - auto-detect GPU, fall back to CPU +npx embedeer --model Xenova/all-MiniLM-L6-v2 --device auto --data "Hello GPU" -### In-process threads (same process, lower overhead) +# CLI - require GPU +npx embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" -```js -const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { mode: 'thread' }); +# API +import { Embedder } from 'embedeer'; +const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { + device: 'auto', // 'auto' | 'cpu' | 'gpu' + // provider: 'cuda', // explicit override: 'cuda' | 'dml' +}); ``` --- -## CLI - -``` -npx embedeer [options] - -Model management (pull / cache model): - npx embedeer --model - -Embed texts: - npx embedeer --model --data "text1" "text2" ... - npx embedeer --model --data '["text1","text2"]' - npx embedeer --model --file texts.txt - echo '["t1","t2"]' | npx embedeer --model - -Options: - -m, --model Hugging Face model (default: Xenova/all-MiniLM-L6-v2) - -d, --data Text(s) or JSON array to embed - --file Input file: JSON array or one text per line - --dump Write output to file instead of stdout - --output json|txt|sql Output format (default: json) - -b, --batch-size Texts per worker batch (default: 32) - -c, --concurrency Parallel workers (default: 2) - --mode process|thread Worker mode (default: process) - -p, --pooling mean|cls|none (default: mean) - --no-normalize Disable L2 normalisation - --dtype Quantization: fp32|fp16|q8|q4|q4f16|auto - --token Hugging Face API token (or set HF_TOKEN env) - --cache-dir Model cache directory (default: ~/.embedeer/models) - -h, --help Show this help -``` - -### Examples +## Monorepo Development ```bash -# Pull a model (like ollama pull) -npx embedeer --model Xenova/all-MiniLM-L6-v2 - -# Embed a few strings, output JSON -npx embedeer --model Xenova/all-MiniLM-L6-v2 --data "Hello" "World" +# Install all workspace packages +npm install -# Embed from a file, dump SQL to disk -npx embedeer --model Xenova/all-MiniLM-L6-v2 \ - --file texts.txt --output sql --dump out.sql +# Run tests (packages/embedeer) +npm test -# Use quantized model, in-process threads, private model with token -npx embedeer --model my-org/private-model \ - --token hf_xxx --dtype q8 --mode thread \ - --data "embed me" +# Run tests in a specific package +npm test --workspace=packages/embedeer ``` --- -## How it works - -``` -embed(texts) - │ - ├─ split into batches of batchSize - │ - └─ Promise.all(batches) ──► WorkerPool - │ - ├─ [process mode] ChildProcessWorker 0 → batch A - ├─ [process mode] ChildProcessWorker 1 → batch B - │ (OS-level isolation; crash → reject only that task) - │ - ├─ [thread mode] ThreadWorker 0 → batch A - └─ [thread mode] ThreadWorker 1 → batch B -``` +## GPU Provider Status -Workers load the model **once** at startup and reuse it for all batches, avoiding -repeated download overhead. Models are cached in `~/.embedeer/models` so -subsequent runs start instantly. +> The native binary download in GPU provider packages is currently **stubbed**. +> The JS API structure, dynamic loading hooks, and runtime selection logic are fully implemented. +> Actual CUDA/DirectML binaries will be added in a future release. +> See each provider package's `install.js` for the full TODO list. --- -## Testing - -```bash -npm test -``` +## Documentation -Tests use Node's built-in `node:test` runner. Worker behaviour is tested with -lightweight fake/echo workers — no real model download required. +Full API documentation, CLI reference, and options are in [`packages/embedeer/README.md`](packages/embedeer/README.md). diff --git a/package-lock.json b/package-lock.json index 12e891f..aa9d55c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,19 +1,28 @@ { - "name": "embedeer", + "name": "embedeer-monorepo", "version": "1.0.0", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "embedeer", + "name": "embedeer-monorepo", "version": "1.0.0", "license": "ISC", - "dependencies": { - "@huggingface/transformers": "^4.0.1" - }, - "bin": { - "embedeer": "src/cli.js" - } + "workspaces": [ + "packages/*" + ] + }, + "node_modules/@embedeer/ort-linux-x64-cuda": { + "resolved": "packages/ort-linux-x64-cuda", + "link": true + }, + "node_modules/@embedeer/ort-win32-x64-cuda": { + "resolved": "packages/ort-win32-x64-cuda", + "link": true + }, + "node_modules/@embedeer/ort-win32-x64-dml": { + "resolved": "packages/ort-win32-x64-dml", + "link": true }, "node_modules/@emnapi/runtime": { "version": "1.9.2", @@ -704,6 +713,10 @@ "integrity": "sha512-T0NIuQpnTvFDATNuHN5roPwSBG83rFsuO+MXXH9/3N1eFbn4wcPjttvjMLEPWJ0RGUYgQE7cGgS3tNxbqCGM7g==", "license": "MIT" }, + "node_modules/embedeer": { + "resolved": "packages/embedeer", + "link": true + }, "node_modules/es-define-property": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", @@ -1039,6 +1052,43 @@ "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.18.2.tgz", "integrity": "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==", "license": "MIT" + }, + "packages/embedeer": { + "version": "1.0.0", + "license": "ISC", + "dependencies": { + "@huggingface/transformers": "^4.0.1" + }, + "bin": { + "embedeer": "src/cli.js" + } + }, + "packages/ort-linux-x64-cuda": { + "name": "@embedeer/ort-linux-x64-cuda", + "version": "1.0.0", + "hasInstallScript": true, + "license": "ISC", + "engines": { + "node": ">=18" + } + }, + "packages/ort-win32-x64-cuda": { + "name": "@embedeer/ort-win32-x64-cuda", + "version": "1.0.0", + "hasInstallScript": true, + "license": "ISC", + "engines": { + "node": ">=18" + } + }, + "packages/ort-win32-x64-dml": { + "name": "@embedeer/ort-win32-x64-dml", + "version": "1.0.0", + "hasInstallScript": true, + "license": "ISC", + "engines": { + "node": ">=18" + } } } } diff --git a/package.json b/package.json index 7193420..fb9c239 100644 --- a/package.json +++ b/package.json @@ -1,34 +1,24 @@ { - "name": "embedeer", + "name": "embedeer-monorepo", "version": "1.0.0", - "description": "A node.js embedding tool", - "main": "src/index.js", - "bin": { - "embedeer": "src/cli.js" - }, + "private": true, + "description": "Monorepo for embedeer and its optional GPU provider packages", + "workspaces": [ + "packages/*" + ], "scripts": { - "test": "node --test test/*.test.js", - "start": "node src/cli.js" + "test": "npm run test --workspace=packages/embedeer", + "test:embedeer": "npm run test --workspace=packages/embedeer" }, "repository": { "type": "git", "url": "git+https://github.com/jsilvanus/embedeer.git" }, - "keywords": [ - "embeddings", - "huggingface", - "nlp", - "transformers", - "parallel" - ], "author": "", "license": "ISC", - "type": "module", "bugs": { "url": "https://github.com/jsilvanus/embedeer/issues" }, - "homepage": "https://github.com/jsilvanus/embedeer#readme", - "dependencies": { - "@huggingface/transformers": "^4.0.1" - } + "homepage": "https://github.com/jsilvanus/embedeer#readme" } + diff --git a/packages/embedeer/README.md b/packages/embedeer/README.md new file mode 100644 index 0000000..bd7020f --- /dev/null +++ b/packages/embedeer/README.md @@ -0,0 +1,224 @@ +# embedeer + +A Node.js tool for generating text embeddings using models from [Hugging Face](https://huggingface.co/models). +Supports **batched** input, **parallel** execution, isolated **child-process** workers (default) or **in-process threads**, quantization, optional GPU acceleration, and Hugging Face auth. + +--- + +## Features + +- Downloads any Hugging Face feature-extraction model on first use (cached in `~/.embedeer/models`) +- **Isolated processes** (default) — a worker crash cannot bring down the caller +- **In-process threads** — opt-in via `mode: 'thread'` for lower overhead +- **Sequential** execution when `concurrency: 1` +- Configurable batch size and concurrency +- **GPU acceleration** — optional via separate provider packages (see below) +- Hugging Face API token support (`--token` / `HF_TOKEN` env var) +- Quantization via `dtype` (`fp32` · `fp16` · `q8` · `q4` · `q4f16` · `auto`) +- Rich CLI: pull model, embed from file, dump output as JSON / TXT / SQL + +--- + +## Installation + +```bash +# CPU (default, works everywhere) +npm install embedeer + +# GPU — Linux x64 + NVIDIA CUDA +npm install @embedeer/ort-linux-x64-cuda + +# GPU — Windows x64 + NVIDIA CUDA +npm install @embedeer/ort-win32-x64-cuda + +# GPU — Windows x64 + DirectML (any GPU: NVIDIA / AMD / Intel) +npm install @embedeer/ort-win32-x64-dml +``` + +--- + +## Programmatic API + +### Embed texts (CPU — default) + +```js +import { Embedder } from 'embedeer'; + +const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { + batchSize: 32, // texts per worker task (default: 32) + concurrency: 2, // parallel workers (default: 2) + mode: 'process', // 'process' | 'thread' (default: 'process') + pooling: 'mean', // 'mean' | 'cls' | 'none' (default: 'mean') + normalize: true, // L2-normalise vectors (default: true) + token: 'hf_...', // HF API token (optional; also reads HF_TOKEN env) + dtype: 'q8', // quantization dtype (optional) + cacheDir: '/my/cache', // override model cache (default: ~/.embedeer/models) +}); + +const vectors = await embedder.embed(['Hello world', 'Foo bar baz']); +// → number[][] (one 384-dim vector per text for all-MiniLM-L6-v2) + +await embedder.destroy(); // shut down worker processes +``` + +### Embed texts with GPU + +```js +import { Embedder } from 'embedeer'; + +// Auto-detect GPU (falls back to CPU if no provider is installed) +const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { + device: 'auto', +}); + +// Require GPU (throws if no provider is available) +const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { + device: 'gpu', +}); + +// Explicitly select an execution provider +const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { + provider: 'cuda', // 'cuda' | 'dml' +}); +``` + +### Pull (pre-cache) a model + +Like `ollama pull` — downloads the model once so workers start instantly: + +```js +import { loadModel } from 'embedeer'; + +const { modelName, cacheDir } = await loadModel('Xenova/all-MiniLM-L6-v2', { + token: 'hf_...', // optional + dtype: 'q8', // optional +}); +``` + +--- + +## CLI + +``` +npx embedeer [options] + +Model management (pull / cache model): + npx embedeer --model + +Embed texts: + npx embedeer --model --data "text1" "text2" ... + npx embedeer --model --data '["text1","text2"]' + npx embedeer --model --file texts.txt + echo '["t1","t2"]' | npx embedeer --model + +Options: + -m, --model Hugging Face model (default: Xenova/all-MiniLM-L6-v2) + -d, --data Text(s) or JSON array to embed + --file Input file: JSON array or one text per line + --dump Write output to file instead of stdout + --output json|txt|sql Output format (default: json) + -b, --batch-size Texts per worker batch (default: 32) + -c, --concurrency Parallel workers (default: 2) + --mode process|thread Worker mode (default: process) + -p, --pooling mean|cls|none (default: mean) + --no-normalize Disable L2 normalisation + --dtype Quantization: fp32|fp16|q8|q4|q4f16|auto + --token Hugging Face API token (or set HF_TOKEN env) + --cache-dir Model cache directory (default: ~/.embedeer/models) + --device Compute device: auto|cpu|gpu (default: cpu) + --provider Execution provider override: cpu|cuda|dml + -h, --help Show this help +``` + +### CLI Examples + +```bash +# Pull a model (like ollama pull) +npx embedeer --model Xenova/all-MiniLM-L6-v2 + +# Embed a few strings, output JSON (CPU) +npx embedeer --model Xenova/all-MiniLM-L6-v2 --data "Hello" "World" + +# Auto-detect GPU, fall back to CPU if unavailable +npx embedeer --model Xenova/all-MiniLM-L6-v2 --device auto --data "Hello GPU" + +# Require GPU (error if no provider installed) +npx embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" + +# Use CUDA explicitly (requires @embedeer/ort-linux-x64-cuda or ort-win32-x64-cuda) +npx embedeer --model Xenova/all-MiniLM-L6-v2 --provider cuda --data "Hello CUDA" + +# Use DirectML on Windows (requires @embedeer/ort-win32-x64-dml) +npx embedeer --model Xenova/all-MiniLM-L6-v2 --provider dml --data "Hello DML" + +# Embed from a file, dump SQL to disk +npx embedeer --model Xenova/all-MiniLM-L6-v2 \ + --file texts.txt --output sql --dump out.sql + +# Use quantized model, in-process threads, private model with token +npx embedeer --model my-org/private-model \ + --token hf_xxx --dtype q8 --mode thread \ + --data "embed me" +``` + +--- + +## GPU Provider Packages + +GPU support requires an additional provider package that ships a CUDA-enabled (or DirectML-enabled) ONNX Runtime binary. + +| Platform | Provider | Package | +|----------------|-----------|----------------------------------| +| Linux x64 | CUDA | `@embedeer/ort-linux-x64-cuda` | +| Windows x64 | CUDA | `@embedeer/ort-win32-x64-cuda` | +| Windows x64 | DirectML | `@embedeer/ort-win32-x64-dml` | + +### Provider selection logic + +| `device` | `provider` | Behavior | +|----------|-----------|----------| +| `cpu` (default) | — | Always CPU | +| `auto` | — | Try GPU providers for the platform in order; silent CPU fallback | +| `gpu` | — | Try GPU providers; **throw** if none available | +| any | `cuda` | Load CUDA provider; **throw** if not available or not supported | +| any | `dml` | Load DirectML provider; **throw** if not available or not supported | +| any | `cpu` | Always CPU | + +On Linux x64: GPU order is `cuda`. +On Windows x64: GPU order is `cuda → dml`. + +--- + +## How it works + +``` +embed(texts) + │ + ├─ split into batches of batchSize + │ + └─ Promise.all(batches) ──► WorkerPool + │ + ├─ [process mode] ChildProcessWorker 0 + │ resolveProvider(device, provider) + │ → pipeline('feature-extraction', model, { device: 'cuda' }) + │ → embed batch A + │ + └─ [process mode] ChildProcessWorker 1 + resolveProvider(device, provider) + → pipeline(...) → embed batch B +``` + +Workers load the model **once** at startup and reuse it for all batches. +Provider activation happens per-worker before the pipeline is created. + +--- + +## Testing + +```bash +cd packages/embedeer && npm test +# or from the monorepo root: +npm test +``` + +Tests use Node's built-in `node:test` runner. No real model download required. diff --git a/packages/embedeer/package.json b/packages/embedeer/package.json new file mode 100644 index 0000000..2b68966 --- /dev/null +++ b/packages/embedeer/package.json @@ -0,0 +1,37 @@ +{ + "name": "embedeer", + "version": "1.0.0", + "description": "A node.js embedding tool with optional GPU acceleration", + "main": "src/index.js", + "bin": { + "embedeer": "src/cli.js" + }, + "scripts": { + "test": "node --test test/*.test.js" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/jsilvanus/embedeer.git", + "directory": "packages/embedeer" + }, + "keywords": [ + "embeddings", + "huggingface", + "nlp", + "transformers", + "parallel", + "gpu", + "cuda", + "onnxruntime" + ], + "author": "", + "license": "ISC", + "type": "module", + "bugs": { + "url": "https://github.com/jsilvanus/embedeer/issues" + }, + "homepage": "https://github.com/jsilvanus/embedeer/tree/main/packages/embedeer#readme", + "dependencies": { + "@huggingface/transformers": "^4.0.1" + } +} diff --git a/src/child-process-worker.js b/packages/embedeer/src/child-process-worker.js similarity index 100% rename from src/child-process-worker.js rename to packages/embedeer/src/child-process-worker.js diff --git a/src/cli.js b/packages/embedeer/src/cli.js old mode 100644 new mode 100755 similarity index 94% rename from src/cli.js rename to packages/embedeer/src/cli.js index 62d8fce..10ee2be --- a/src/cli.js +++ b/packages/embedeer/src/cli.js @@ -25,6 +25,8 @@ * --dtype Quantization: fp32|fp16|q8|q4|q4f16|auto * --token Hugging Face API token (overrides HF_TOKEN env var) * --cache-dir Custom model cache directory (default: ~/.embedeer/models) + * --device Compute device: auto|cpu|gpu (default: cpu) + * --provider Execution provider override: cpu|cuda|dml * -h, --help Show this help */ @@ -62,6 +64,8 @@ Options: --dtype Quantization: fp32|fp16|q8|q4|q4f16|auto --token Hugging Face API token --cache-dir Model cache directory (default: ${DEFAULT_CACHE_DIR}) + --device Compute device: auto|cpu|gpu (default: cpu) + --provider Execution provider override: cpu|cuda|dml -h, --help Show this help `.trim()); } @@ -72,6 +76,7 @@ const KNOWN_FLAGS = new Set([ '--help', '-h', '--model', '-m', '--data', '-d', '--file', '--dump', '--output', '--batch-size', '-b', '--concurrency', '-c', '--mode', '--pooling', '-p', '--no-normalize', '--dtype', '--token', '--cache-dir', + '--device', '--provider', ]); model: 'Xenova/all-MiniLM-L6-v2', data: null, // --data texts (array) @@ -86,6 +91,8 @@ const KNOWN_FLAGS = new Set([ dtype: undefined, token: undefined, cacheDir: undefined, + device: undefined, + provider: undefined, }; const positional = []; @@ -126,6 +133,10 @@ for (let i = 0; i < args.length; i++) { options.token = args[++i]; } else if (arg === '--cache-dir') { options.cacheDir = args[++i]; + } else if (arg === '--device') { + options.device = args[++i]; + } else if (arg === '--provider') { + options.provider = args[++i]; } else { positional.push(arg); } @@ -250,6 +261,8 @@ async function runEmbedding(texts, cacheDir) { dtype: options.dtype, token: options.token, cacheDir, + device: options.device, + provider: options.provider, }); try { diff --git a/src/embedder.js b/packages/embedeer/src/embedder.js similarity index 94% rename from src/embedder.js rename to packages/embedeer/src/embedder.js index 2310829..e01ceac 100644 --- a/src/embedder.js +++ b/packages/embedeer/src/embedder.js @@ -29,6 +29,8 @@ export class Embedder { * @param {string} [options.token] Hugging Face API token (overrides HF_TOKEN env) * @param {string} [options.dtype] Quantization dtype ('fp32'|'fp16'|'q8'|'q4'|'q4f16'|'auto') * @param {string} [options.cacheDir] Custom model cache directory + * @param {string} [options.device] Compute device: 'auto'|'cpu'|'gpu' (default: 'cpu') + * @param {string} [options.provider] Execution provider override: 'cpu'|'cuda'|'dml' */ constructor(modelName = 'Xenova/all-MiniLM-L6-v2', options = {}) { this.modelName = modelName; @@ -41,6 +43,8 @@ export class Embedder { token: options.token, dtype: options.dtype, cacheDir: options.cacheDir ?? getCacheDir(), + device: options.device, + provider: options.provider, }); } diff --git a/src/index.js b/packages/embedeer/src/index.js similarity index 100% rename from src/index.js rename to packages/embedeer/src/index.js diff --git a/src/model-cache.js b/packages/embedeer/src/model-cache.js similarity index 100% rename from src/model-cache.js rename to packages/embedeer/src/model-cache.js diff --git a/packages/embedeer/src/provider-loader.js b/packages/embedeer/src/provider-loader.js new file mode 100644 index 0000000..5c4b0ee --- /dev/null +++ b/packages/embedeer/src/provider-loader.js @@ -0,0 +1,150 @@ +/** + * Provider loader — dynamically selects and activates an ONNX Runtime + * execution-provider package before @huggingface/transformers creates its + * pipeline. + * + * Provider packages are published as separate optional npm packages: + * @embedeer/ort-linux-x64-cuda — CUDA on Linux x64 + * @embedeer/ort-win32-x64-cuda — CUDA on Windows x64 + * @embedeer/ort-win32-x64-dml — DirectML on Windows x64 + * + * Each provider package exports: + * activate(): Promise — runs any setup needed before pipeline() + * getDevice(): string — the device string to pass to pipeline() + * e.g. 'cuda', 'dml' + * + * Usage: + * import { resolveProvider } from './provider-loader.js'; + * const deviceStr = await resolveProvider(device, provider); + * // pass deviceStr to pipeline() if truthy + */ + +/** + * Map of "--" to package name. + * @type {Record} + */ +export const PROVIDER_PACKAGES = { + 'linux-x64-cuda': '@embedeer/ort-linux-x64-cuda', + 'win32-x64-cuda': '@embedeer/ort-win32-x64-cuda', + 'win32-x64-dml': '@embedeer/ort-win32-x64-dml', +}; + +/** + * Returns the ordered list of preferred GPU providers for the current platform. + * @returns {string[]} + */ +export function getPlatformDefaultProviders() { + const platform = process.platform; + const arch = process.arch; + if (platform === 'linux' && arch === 'x64') return ['cuda']; + if (platform === 'win32' && arch === 'x64') return ['cuda', 'dml']; + return []; +} + +/** + * Attempt to load a specific provider package. Returns a result object + * that distinguishes between: + * - package not installed (ERR_MODULE_NOT_FOUND) + * - package installed but activation failed (e.g. native binary missing) + * - package loaded successfully + * + * @param {string} provider e.g. 'cuda' or 'dml' + * @returns {Promise<{loaded: boolean, deviceStr: string|null, error: Error|null}>} + */ +export async function tryLoadProvider(provider) { + const key = `${process.platform}-${process.arch}-${provider}`; + const packageName = PROVIDER_PACKAGES[key]; + if (!packageName) { + return { loaded: false, deviceStr: null, error: null }; + } + try { + const mod = await import(packageName); + if (typeof mod.activate === 'function') { + await mod.activate(); + } + const deviceStr = typeof mod.getDevice === 'function' ? mod.getDevice() : provider; + return { loaded: true, deviceStr, error: null }; + } catch (err) { + // Any error (package not installed, binary missing, etc.) → not loaded + return { loaded: false, deviceStr: null, error: err }; + } +} + +/** + * Resolve and activate the appropriate execution provider, returning the + * device string to pass to `@huggingface/transformers` pipeline(). + * + * @param {'auto'|'cpu'|'gpu'|undefined} device + * @param {'cpu'|'cuda'|'dml'|undefined} provider Optional explicit override + * @returns {Promise} Device string or undefined (CPU default) + * + * @throws {Error} When an explicit provider is requested but not available. + * @throws {Error} When device='gpu' and no GPU provider is available. + */ +export async function resolveProvider(device, provider) { + // Normalise to lower-case strings for consistent comparison + const dev = (device ?? 'cpu').toLowerCase(); + const prov = provider ? provider.toLowerCase() : undefined; + + // --- Explicit CPU --- + if (dev === 'cpu' && !prov) return undefined; + if (prov === 'cpu') return undefined; + + // --- Explicit provider --- + if (prov && prov !== 'cpu') { + const key = `${process.platform}-${process.arch}-${prov}`; + const packageName = PROVIDER_PACKAGES[key]; + + if (!packageName) { + const supportedPlatforms = Object.entries(PROVIDER_PACKAGES) + .filter(([k]) => k.endsWith(`-${prov}`)) + .map(([k]) => k.replace(`-${prov}`, '')); + throw new Error( + `Provider '${prov}' is not supported on ${process.platform}/${process.arch}. ` + + `Supported platforms: ${supportedPlatforms.join(', ') || 'none'}.`, + ); + } + + const { loaded, deviceStr, error } = await tryLoadProvider(prov); + if (!loaded) { + // If error is NOT a "package not found" error, re-throw original (e.g. binary missing) + if (error && error.code !== 'ERR_MODULE_NOT_FOUND') { + throw error; + } + throw new Error( + `Provider '${prov}' was requested but its package '${packageName}' is not installed. ` + + `Run: npm install ${packageName}`, + ); + } + return deviceStr ?? undefined; + } + + // --- device='gpu' or device='auto': try platform defaults in order --- + const candidates = getPlatformDefaultProviders(); + let lastError = null; + + for (const candidate of candidates) { + const { loaded, deviceStr, error } = await tryLoadProvider(candidate); + if (loaded) return deviceStr ?? candidate; + if (error) lastError = error; + } + + if (dev === 'gpu') { + // If a package was found but activate() failed with a non-not-found error, + // re-throw that error as it contains useful diagnostic information. + if (lastError && lastError.code !== 'ERR_MODULE_NOT_FOUND') { + throw lastError; + } + const packageNames = candidates + .map((p) => PROVIDER_PACKAGES[`${process.platform}-${process.arch}-${p}`]) + .filter(Boolean); + throw new Error( + `device='gpu' was requested but no GPU provider packages are installed ` + + `for ${process.platform}/${process.arch}. ` + + `Install one of: ${packageNames.join(', ') || '(none available for this platform)'}.`, + ); + } + + // device='auto' and no GPU provider found → silently fall back to CPU + return undefined; +} diff --git a/src/thread-worker-script.js b/packages/embedeer/src/thread-worker-script.js similarity index 76% rename from src/thread-worker-script.js rename to packages/embedeer/src/thread-worker-script.js index e2b66b0..f34c762 100644 --- a/src/thread-worker-script.js +++ b/packages/embedeer/src/thread-worker-script.js @@ -18,8 +18,9 @@ import { workerData, parentPort } from 'worker_threads'; import { pipeline, env } from '@huggingface/transformers'; import { buildPipelineOptions } from './model-cache.js'; +import { resolveProvider } from './provider-loader.js'; -const { modelName, pooling, normalize, token, dtype, cacheDir } = workerData; +const { modelName, pooling, normalize, token, dtype, cacheDir, device, provider } = workerData; // Apply configuration before loading the model. if (token) process.env.HF_TOKEN = token; @@ -28,7 +29,13 @@ if (cacheDir) env.cacheDir = cacheDir; let extractor; async function init() { - extractor = await pipeline('feature-extraction', modelName, buildPipelineOptions(dtype)); + // Activate GPU provider (if requested) before creating the pipeline. + const deviceStr = await resolveProvider(device, provider); + const pipelineOpts = { + ...buildPipelineOptions(dtype), + ...(deviceStr ? { device: deviceStr } : {}), + }; + extractor = await pipeline('feature-extraction', modelName, pipelineOpts); parentPort.postMessage({ type: 'ready' }); } diff --git a/src/thread-worker.js b/packages/embedeer/src/thread-worker.js similarity index 100% rename from src/thread-worker.js rename to packages/embedeer/src/thread-worker.js diff --git a/src/worker-pool.js b/packages/embedeer/src/worker-pool.js similarity index 94% rename from src/worker-pool.js rename to packages/embedeer/src/worker-pool.js index a6ac0b5..ed3b25f 100644 --- a/src/worker-pool.js +++ b/packages/embedeer/src/worker-pool.js @@ -26,13 +26,15 @@ export class WorkerPool { /** * @param {string} modelName Hugging Face model identifier * @param {object} [options] - * @param {number} [options.poolSize=2] Number of parallel workers + * @param {string} [options.poolSize=2] Number of parallel workers * @param {string} [options.mode='process'] 'process' (isolated) or 'thread' (same process) * @param {string} [options.pooling='mean'] Pooling strategy ('mean'|'cls'|'none') * @param {boolean} [options.normalize=true] Whether to L2-normalise embeddings * @param {string} [options.token] Hugging Face API token (overrides HF_TOKEN env var) * @param {string} [options.dtype] Quantization dtype ('fp32'|'fp16'|'q8'|'q4'|'q4f16'|'auto') * @param {string} [options.cacheDir] Custom model cache directory + * @param {string} [options.device] Compute device: 'auto'|'cpu'|'gpu' (default: 'cpu') + * @param {string} [options.provider] Execution provider override: 'cpu'|'cuda'|'dml' * @param {Function} [options._WorkerClass] Override worker class (for testing) */ constructor(modelName, { @@ -43,6 +45,8 @@ export class WorkerPool { token, dtype, cacheDir, + device, + provider, _WorkerClass, } = {}) { this.modelName = modelName; @@ -53,6 +57,8 @@ export class WorkerPool { this.token = token; this.dtype = dtype; this.cacheDir = cacheDir; + this.device = device; + this.provider = provider; // Pick defaults based on mode; can be overridden for testing. if (_WorkerClass) { @@ -148,6 +154,8 @@ export class WorkerPool { token: this.token, dtype: this.dtype, cacheDir: this.cacheDir, + device: this.device, + provider: this.provider, }, }); diff --git a/src/worker.js b/packages/embedeer/src/worker.js similarity index 82% rename from src/worker.js rename to packages/embedeer/src/worker.js index 3bd65da..6e308f1 100644 --- a/src/worker.js +++ b/packages/embedeer/src/worker.js @@ -18,6 +18,7 @@ import { pipeline, env } from '@huggingface/transformers'; import { buildPipelineOptions } from './model-cache.js'; +import { resolveProvider } from './provider-loader.js'; let extractor; let pooling; @@ -30,7 +31,13 @@ process.on('message', async (msg) => { // Apply auth and cache config before loading the model. if (msg.token) process.env.HF_TOKEN = msg.token; if (msg.cacheDir) env.cacheDir = msg.cacheDir; - extractor = await pipeline('feature-extraction', msg.modelName, buildPipelineOptions(msg.dtype)); + // Activate GPU provider (if requested) before creating the pipeline. + const deviceStr = await resolveProvider(msg.device, msg.provider); + const pipelineOpts = { + ...buildPipelineOptions(msg.dtype), + ...(deviceStr ? { device: deviceStr } : {}), + }; + extractor = await pipeline('feature-extraction', msg.modelName, pipelineOpts); process.send({ type: 'ready' }); } catch (err) { process.send({ type: 'error', id: null, error: err.message }); diff --git a/test/child-process-worker.test.js b/packages/embedeer/test/child-process-worker.test.js similarity index 100% rename from test/child-process-worker.test.js rename to packages/embedeer/test/child-process-worker.test.js diff --git a/test/cli-format.test.js b/packages/embedeer/test/cli-format.test.js similarity index 100% rename from test/cli-format.test.js rename to packages/embedeer/test/cli-format.test.js diff --git a/test/embedder-options.test.js b/packages/embedeer/test/embedder-options.test.js similarity index 100% rename from test/embedder-options.test.js rename to packages/embedeer/test/embedder-options.test.js diff --git a/test/embedder.test.js b/packages/embedeer/test/embedder.test.js similarity index 100% rename from test/embedder.test.js rename to packages/embedeer/test/embedder.test.js diff --git a/test/helpers/crash-worker.js b/packages/embedeer/test/helpers/crash-worker.js similarity index 100% rename from test/helpers/crash-worker.js rename to packages/embedeer/test/helpers/crash-worker.js diff --git a/test/helpers/echo-thread-worker.js b/packages/embedeer/test/helpers/echo-thread-worker.js similarity index 100% rename from test/helpers/echo-thread-worker.js rename to packages/embedeer/test/helpers/echo-thread-worker.js diff --git a/test/helpers/echo-worker.js b/packages/embedeer/test/helpers/echo-worker.js similarity index 100% rename from test/helpers/echo-worker.js rename to packages/embedeer/test/helpers/echo-worker.js diff --git a/packages/embedeer/test/provider-loader.test.js b/packages/embedeer/test/provider-loader.test.js new file mode 100644 index 0000000..fa9c024 --- /dev/null +++ b/packages/embedeer/test/provider-loader.test.js @@ -0,0 +1,279 @@ +/** + * Unit tests for provider-loader.js + * + * Tests verify provider selection logic and error messages when provider + * packages are missing or unsupported. + * + * All tests use module mocking to avoid any real network or native binary + * access — the provider-loader is tested purely for its logic. + */ + +import { test, describe, mock, before, after } from 'node:test'; +import assert from 'node:assert/strict'; +import { + PROVIDER_PACKAGES, + getPlatformDefaultProviders, + tryLoadProvider, + resolveProvider, +} from '../src/provider-loader.js'; + +// ── Helpers ────────────────────────────────────────────────────────────────── + +/** + * Temporarily override process.platform and process.arch, restore after fn(). + */ +async function withPlatform(platform, arch, fn) { + const origPlatform = Object.getOwnPropertyDescriptor(process, 'platform'); + const origArch = Object.getOwnPropertyDescriptor(process, 'arch'); + Object.defineProperty(process, 'platform', { value: platform, configurable: true }); + Object.defineProperty(process, 'arch', { value: arch, configurable: true }); + try { + await fn(); + } finally { + if (origPlatform) Object.defineProperty(process, 'platform', origPlatform); + if (origArch) Object.defineProperty(process, 'arch', origArch); + } +} + +// ── PROVIDER_PACKAGES map ──────────────────────────────────────────────────── + +describe('PROVIDER_PACKAGES', () => { + test('contains entries for all supported platform+provider combinations', () => { + assert.equal(PROVIDER_PACKAGES['linux-x64-cuda'], '@embedeer/ort-linux-x64-cuda'); + assert.equal(PROVIDER_PACKAGES['win32-x64-cuda'], '@embedeer/ort-win32-x64-cuda'); + assert.equal(PROVIDER_PACKAGES['win32-x64-dml'], '@embedeer/ort-win32-x64-dml'); + }); +}); + +// ── getPlatformDefaultProviders() ──────────────────────────────────────────── + +describe('getPlatformDefaultProviders()', () => { + test('returns [cuda] on linux/x64', async () => { + await withPlatform('linux', 'x64', () => { + assert.deepEqual(getPlatformDefaultProviders(), ['cuda']); + }); + }); + + test('returns [cuda, dml] on win32/x64 (CUDA preferred over DML)', async () => { + await withPlatform('win32', 'x64', () => { + assert.deepEqual(getPlatformDefaultProviders(), ['cuda', 'dml']); + }); + }); + + test('returns [] on unsupported platforms (e.g. darwin/arm64)', async () => { + await withPlatform('darwin', 'arm64', () => { + assert.deepEqual(getPlatformDefaultProviders(), []); + }); + }); +}); + +// ── tryLoadProvider() ──────────────────────────────────────────────────────── + +describe('tryLoadProvider()', () => { + test('returns { loaded: false } when provider is not supported on platform', async () => { + await withPlatform('darwin', 'arm64', async () => { + const result = await tryLoadProvider('cuda'); + assert.equal(result.loaded, false); + assert.equal(result.deviceStr, null); + }); + }); + + test('returns { loaded: false } when provider package is not installed or binary is missing', async () => { + // In the workspace, @embedeer/ort-linux-x64-cuda is linked but the native + // binary does not exist (install.js was not run), so activate() throws. + // tryLoadProvider must return { loaded: false } in either case. + await withPlatform('linux', 'x64', async () => { + const result = await tryLoadProvider('cuda'); + assert.equal(result.loaded, false); + assert.equal(result.deviceStr, null); + // error may be set (binary not found) or null (package not installed) + }); + }); +}); + +// ── resolveProvider() ──────────────────────────────────────────────────────── + +describe('resolveProvider()', () => { + // ── CPU paths ───────────────────────────────────────────────────────────── + + test('returns undefined when device=cpu', async () => { + const result = await resolveProvider('cpu', undefined); + assert.equal(result, undefined); + }); + + test('returns undefined when provider=cpu', async () => { + const result = await resolveProvider('auto', 'cpu'); + assert.equal(result, undefined); + }); + + test('returns undefined when device and provider are both undefined', async () => { + const result = await resolveProvider(undefined, undefined); + assert.equal(result, undefined); + }); + + // ── device=auto with no packages installed ──────────────────────────────── + + test('device=auto returns undefined (CPU fallback) when GPU provider fails to activate', async () => { + await withPlatform('linux', 'x64', async () => { + // @embedeer/ort-linux-x64-cuda is linked in the workspace but binary is + // missing. device='auto' must silently fall back to CPU (return undefined). + const result = await resolveProvider('auto', undefined); + assert.equal(result, undefined); + }); + }); + + test('device=auto returns undefined on unsupported platform (no GPU providers)', async () => { + await withPlatform('darwin', 'arm64', async () => { + const result = await resolveProvider('auto', undefined); + assert.equal(result, undefined); + }); + }); + + // ── device=gpu with no packages installed ───────────────────────────────── + + test('device=gpu throws when no GPU provider is available (linux/x64)', async () => { + await withPlatform('linux', 'x64', async () => { + // In the workspace, ort-linux-x64-cuda is linked but binary is missing. + // resolveProvider should throw (either the activate error or a "not installed" error). + // The error must reference the @embedeer package name to guide the user. + await assert.rejects( + () => resolveProvider('gpu', undefined), + (err) => { + assert.ok( + err.message.includes('@embedeer/ort-linux-x64-cuda'), + `Expected package name in error, got: ${err.message}`, + ); + return true; + }, + ); + }); + }); + + test('device=gpu throws on unsupported platform with informative message', async () => { + await withPlatform('darwin', 'arm64', async () => { + await assert.rejects( + () => resolveProvider('gpu', undefined), + (err) => { + assert.ok( + err.message.includes('device=\'gpu\'') || err.message.includes("device='gpu'"), + `Expected GPU error message, got: ${err.message}`, + ); + return true; + }, + ); + }); + }); + + // ── explicit provider not installed ────────────────────────────────────── + + test('explicit provider=cuda throws with npm install hint when not installed', async () => { + await withPlatform('linux', 'x64', async () => { + await assert.rejects( + () => resolveProvider('cpu', 'cuda'), + (err) => { + assert.ok( + err.message.includes('@embedeer/ort-linux-x64-cuda'), + `Expected package name in error, got: ${err.message}`, + ); + assert.ok( + err.message.toLowerCase().includes('npm install'), + `Expected npm install hint in error, got: ${err.message}`, + ); + return true; + }, + ); + }); + }); + + test('explicit provider=dml throws with npm install hint on windows when not installed', async () => { + await withPlatform('win32', 'x64', async () => { + await assert.rejects( + () => resolveProvider('cpu', 'dml'), + (err) => { + assert.ok( + err.message.includes('@embedeer/ort-win32-x64-dml'), + `Expected package name in error, got: ${err.message}`, + ); + assert.ok( + err.message.toLowerCase().includes('npm install'), + `Expected npm install hint in error, got: ${err.message}`, + ); + return true; + }, + ); + }); + }); + + // ── unsupported provider on platform ────────────────────────────────────── + + test('explicit provider=dml throws "not supported" on linux', async () => { + await withPlatform('linux', 'x64', async () => { + await assert.rejects( + () => resolveProvider('gpu', 'dml'), + (err) => { + assert.ok( + err.message.toLowerCase().includes('not supported'), + `Expected "not supported" in error, got: ${err.message}`, + ); + return true; + }, + ); + }); + }); + + test('explicit provider=cuda throws "not supported" on darwin/arm64', async () => { + await withPlatform('darwin', 'arm64', async () => { + await assert.rejects( + () => resolveProvider('gpu', 'cuda'), + (err) => { + assert.ok( + err.message.toLowerCase().includes('not supported'), + `Expected "not supported" in error, got: ${err.message}`, + ); + return true; + }, + ); + }); + }); +}); + +// ── WorkerPool device/provider options ─────────────────────────────────────── + +describe('WorkerPool — device and provider options', async () => { + const { WorkerPool } = await import('../src/worker-pool.js'); + const { EventEmitter } = await import('events'); + + class SpyWorker extends EventEmitter { + constructor(scriptPath, opts) { + super(); + SpyWorker.lastOpts = opts; + setImmediate(() => this.emit('message', { type: 'ready' })); + } + postMessage() {} + async terminate() { setImmediate(() => this.emit('exit', 0)); } + } + + test('device and provider are stored in WorkerPool', () => { + const pool = new WorkerPool('model', { + _WorkerClass: SpyWorker, + device: 'gpu', + provider: 'cuda', + }); + assert.equal(pool.device, 'gpu'); + assert.equal(pool.provider, 'cuda'); + }); + + test('workerData includes device and provider', async () => { + const pool = new WorkerPool('model', { + _WorkerClass: SpyWorker, + poolSize: 1, + device: 'auto', + provider: 'cuda', + }); + await pool.initialize(); + const wd = SpyWorker.lastOpts.workerData; + assert.equal(wd.device, 'auto'); + assert.equal(wd.provider, 'cuda'); + await pool.destroy(); + }); +}); diff --git a/test/thread-worker.test.js b/packages/embedeer/test/thread-worker.test.js similarity index 100% rename from test/thread-worker.test.js rename to packages/embedeer/test/thread-worker.test.js diff --git a/test/worker-pool-options.test.js b/packages/embedeer/test/worker-pool-options.test.js similarity index 100% rename from test/worker-pool-options.test.js rename to packages/embedeer/test/worker-pool-options.test.js diff --git a/test/worker-pool.test.js b/packages/embedeer/test/worker-pool.test.js similarity index 100% rename from test/worker-pool.test.js rename to packages/embedeer/test/worker-pool.test.js diff --git a/packages/ort-linux-x64-cuda/README.md b/packages/ort-linux-x64-cuda/README.md new file mode 100644 index 0000000..fb0448f --- /dev/null +++ b/packages/ort-linux-x64-cuda/README.md @@ -0,0 +1,113 @@ +# @embedeer/ort-linux-x64-cuda + +CUDA execution provider for [embedeer](https://github.com/jsilvanus/embedeer) on **Linux x64**. + +Install this package alongside `embedeer` to enable GPU-accelerated embeddings using NVIDIA CUDA. + +--- + +## Installation (two-step) + +```bash +# Step 1 — install embedeer +npm install embedeer + +# Step 2 — install the CUDA provider for Linux x64 +npm install @embedeer/ort-linux-x64-cuda +``` + +> **Requirements** +> - Linux x86_64 +> - NVIDIA GPU with CUDA drivers installed (CUDA 12.x recommended) +> - NVIDIA CUDA Toolkit matching the binary version + +--- + +## Usage + +Once installed, embedeer automatically detects and uses this provider: + +```js +import { Embedder } from 'embedeer'; + +// Auto-detect GPU (falls back to CPU if no provider is installed) +const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { + device: 'auto', +}); + +// Require GPU (throws if not available) +const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { + device: 'gpu', +}); + +// Explicitly request CUDA +const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { + provider: 'cuda', +}); +``` + +CLI: + +```bash +# Auto GPU (falls back to CPU) +npx embedeer --model Xenova/all-MiniLM-L6-v2 --device auto --data "Hello GPU" + +# Require GPU +npx embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" + +# Explicit CUDA provider +npx embedeer --model Xenova/all-MiniLM-L6-v2 --provider cuda --data "Hello GPU" +``` + +--- + +## How it works + +This package provides: + +1. **`install.js`** — runs on `npm install` to download (or build) a CUDA-enabled + ONNX Runtime Node.js binding into `vendor/`. + +2. **`index.js`** — exports `activate()` and `getDevice()`. The `activate()` function + verifies that the native binary is present and configures ONNX Runtime to use the + CUDA execution provider. `getDevice()` returns `'cuda'` so embedeer passes the + correct device string to `@huggingface/transformers` `pipeline()`. + +--- + +## Current status (stub) + +> ⚠️ The binary download in `install.js` is currently **stubbed** — no real CUDA +> binary is downloaded yet. GPU execution is not functional until the TODO in +> `install.js` is implemented. +> +> See `install.js` for the full TODO list and skeleton download code. + +### What needs to be done + +1. Build a CUDA-enabled `onnxruntime-node` binding: + ```bash + # Clone ORT + git clone --recursive https://github.com/microsoft/onnxruntime + cd onnxruntime + # Build with CUDA + ./build.sh --config Release --build_nodejs --use_cuda \ + --cuda_home /usr/local/cuda \ + --cudnn_home /usr/local/cuda + ``` + +2. Upload the resulting `.node` file as a GitHub Release asset. + +3. Update `install.js` to download and verify the binary. + +4. Update `index.js` to wire the binary into ONNX Runtime's module resolution. + +--- + +## Platform + +| Platform | Architecture | Provider | Package | +|----------|-------------|----------|---------| +| Linux | x64 | CUDA | `@embedeer/ort-linux-x64-cuda` ← **this package** | +| Windows | x64 | CUDA | `@embedeer/ort-win32-x64-cuda` | +| Windows | x64 | DirectML | `@embedeer/ort-win32-x64-dml` | diff --git a/packages/ort-linux-x64-cuda/index.js b/packages/ort-linux-x64-cuda/index.js new file mode 100644 index 0000000..e7c68e9 --- /dev/null +++ b/packages/ort-linux-x64-cuda/index.js @@ -0,0 +1,66 @@ +/** + * @embedeer/ort-linux-x64-cuda + * + * CUDA execution provider for embedeer on Linux x64. + * + * This package activates a CUDA-enabled ONNX Runtime build so that + * @huggingface/transformers pipeline() runs inference on the GPU. + * + * Usage (automatic via embedeer): + * // Install this package and embedeer will use it when device='gpu' or 'auto' + * // npm install @embedeer/ort-linux-x64-cuda + * + * Manual usage: + * import { activate, getDevice } from '@embedeer/ort-linux-x64-cuda'; + * await activate(); + * // then pass getDevice() as the device option to pipeline() + */ + +import { existsSync } from 'fs'; +import { join, dirname } from 'path'; +import { fileURLToPath } from 'url'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +/** + * Path where the install script places the CUDA-enabled ORT native binding. + * TODO: update this path once actual binary distribution is implemented. + */ +const BINARY_PATH = join(__dirname, 'vendor', 'onnxruntime_binding.node'); + +/** + * Activate the CUDA provider. + * + * Verifies that the native CUDA-enabled ONNX Runtime binary is present. + * In the future this hook can also set environment variables or call into + * the ORT C API to configure the CUDA execution provider. + * + * TODO: When distributing real binaries, also configure the ORT env to + * point at the custom binary path so that onnxruntime-node loads it. + * + * @returns {Promise} + * @throws {Error} If the native binary is not present (install.js was not run). + */ +export async function activate() { + if (!existsSync(BINARY_PATH)) { + throw new Error( + `@embedeer/ort-linux-x64-cuda: native CUDA binary not found at ${BINARY_PATH}. ` + + `Re-run: npm install @embedeer/ort-linux-x64-cuda`, + ); + } + // TODO: wire up the custom ORT binary to onnxruntime-node resolution. + // This requires either: + // (a) patching the onnxruntime-node module resolution to load from BINARY_PATH, or + // (b) using ORT's env.ortModuleUrl / similar API once the JS library exposes it. + // For now the binary presence check above is sufficient to confirm installation. +} + +/** + * Returns the device string that @huggingface/transformers pipeline() should + * use with this provider. + * + * @returns {string} + */ +export function getDevice() { + return 'cuda'; +} diff --git a/packages/ort-linux-x64-cuda/install.js b/packages/ort-linux-x64-cuda/install.js new file mode 100644 index 0000000..9d3ae70 --- /dev/null +++ b/packages/ort-linux-x64-cuda/install.js @@ -0,0 +1,94 @@ +/** + * Install script for @embedeer/ort-linux-x64-cuda + * + * Downloads (or builds) a CUDA-enabled ONNX Runtime Node.js binding for + * Linux x64 and places it under vendor/ so the package can activate it at + * runtime. + * + * This script runs automatically via the "install" lifecycle hook: + * npm install @embedeer/ort-linux-x64-cuda + * + * ── Current status ──────────────────────────────────────────────────────── + * STUB — the actual binary download / build is not yet implemented. + * The structure and hooks are in place; see the TODOs below. + * ───────────────────────────────────────────────────────────────────────── + * + * Expected artifact layout after install: + * packages/ort-linux-x64-cuda/ + * └── vendor/ + * ├── onnxruntime_binding.node ← CUDA-enabled ORT Node binding + * └── libonnxruntime_providers_cuda.so ← shared lib (may be bundled in .node) + * + * TODO: + * 1. Build or obtain a CUDA-enabled onnxruntime-node binding. + * Options: + * (a) Build from source: https://onnxruntime.ai/docs/build/inferencing.html + * cmake flags: --use_cuda --cuda_home /usr/local/cuda + * (b) Download a prebuilt binary from a GitHub Release in this repo. + * See: https://github.com/jsilvanus/embedeer/releases + * 2. Upload the binary as a GitHub Release asset tagged by version + platform. + * 3. Replace the stub below with actual download logic using the fetch API + * (or the 'node-fetch' package for older Node versions). + * 4. Verify the binary checksum (SHA-256) before using it. + * + * CUDA compatibility: + * The binary must be compiled against the same CUDA major version as the + * host system (e.g. CUDA 12.x). Consider publishing multiple binaries: + * ort-linux-x64-cuda12, ort-linux-x64-cuda11, etc. + * + * onnxruntime version: + * Must match the version that @huggingface/transformers depends on. + * Check: node -e "require('onnxruntime-node/package.json').version" + */ + +import { mkdirSync, writeFileSync } from 'fs'; +import { join, dirname } from 'path'; +import { fileURLToPath } from 'url'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const VENDOR_DIR = join(__dirname, 'vendor'); + +// Only run on Linux x64; other platforms should not have installed this package, +// but guard anyway. +if (process.platform !== 'linux' || process.arch !== 'x64') { + console.warn( + `[embedeer] @embedeer/ort-linux-x64-cuda: skipping install on ${process.platform}/${process.arch}`, + ); + process.exit(0); +} + +console.log('[embedeer] @embedeer/ort-linux-x64-cuda: running install script...'); + +mkdirSync(VENDOR_DIR, { recursive: true }); + +// ── TODO: replace this stub with real binary download ───────────────────── +// +// Example skeleton for a real download (requires Node 18+ built-in fetch): +// +// const VERSION = '1.0.0'; +// const BASE_URL = `https://github.com/jsilvanus/embedeer/releases/download/ort-linux-x64-cuda-${VERSION}`; +// const BINARY_NAME = 'onnxruntime_binding.node'; +// const CHECKSUM_NAME = 'onnxruntime_binding.node.sha256'; +// +// const res = await fetch(`${BASE_URL}/${BINARY_NAME}`); +// if (!res.ok) throw new Error(`Download failed: ${res.status} ${res.statusText}`); +// const buf = Buffer.from(await res.arrayBuffer()); +// +// // TODO: verify SHA-256 checksum here +// +// writeFileSync(join(VENDOR_DIR, BINARY_NAME), buf); +// console.log(`[embedeer] Installed CUDA ORT binding → ${join(VENDOR_DIR, BINARY_NAME)}`); +// ────────────────────────────────────────────────────────────────────────── + +// For now write a placeholder so the package directory is not empty. +writeFileSync( + join(VENDOR_DIR, 'README.txt'), + 'This directory will contain the CUDA-enabled ONNX Runtime native binding.\n' + + 'See packages/ort-linux-x64-cuda/install.js for the download TODO.\n', +); + +console.warn( + '[embedeer] @embedeer/ort-linux-x64-cuda: STUB install complete. ' + + 'No real CUDA binary was downloaded yet — GPU execution is not available. ' + + 'See packages/ort-linux-x64-cuda/install.js for the implementation TODO.', +); diff --git a/packages/ort-linux-x64-cuda/package.json b/packages/ort-linux-x64-cuda/package.json new file mode 100644 index 0000000..ede34e4 --- /dev/null +++ b/packages/ort-linux-x64-cuda/package.json @@ -0,0 +1,27 @@ +{ + "name": "@embedeer/ort-linux-x64-cuda", + "version": "1.0.0", + "description": "CUDA execution provider for embedeer on Linux x64", + "type": "module", + "main": "index.js", + "scripts": { + "install": "node install.js" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/jsilvanus/embedeer.git", + "directory": "packages/ort-linux-x64-cuda" + }, + "keywords": [ + "embedeer", + "onnxruntime", + "cuda", + "gpu", + "linux" + ], + "author": "", + "license": "ISC", + "engines": { + "node": ">=18" + } +} diff --git a/packages/ort-win32-x64-cuda/README.md b/packages/ort-win32-x64-cuda/README.md new file mode 100644 index 0000000..1f47715 --- /dev/null +++ b/packages/ort-win32-x64-cuda/README.md @@ -0,0 +1,26 @@ +# @embedeer/ort-win32-x64-cuda + +CUDA execution provider for [embedeer](https://github.com/jsilvanus/embedeer) on **Windows x64**. + +Install this package alongside `embedeer` to enable GPU-accelerated embeddings using NVIDIA CUDA on Windows. + +## Installation + +```bash +npm install embedeer +npm install @embedeer/ort-win32-x64-cuda +``` + +## Usage + +```js +const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { device: 'gpu' }); +``` + +```bash +npx embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello" +``` + +> ⚠️ **Stub** — binary download not yet implemented. See `install.js` for TODO. + +See [packages/ort-linux-x64-cuda/README.md](../ort-linux-x64-cuda/README.md) for full documentation. diff --git a/packages/ort-win32-x64-cuda/index.js b/packages/ort-win32-x64-cuda/index.js new file mode 100644 index 0000000..75bdd60 --- /dev/null +++ b/packages/ort-win32-x64-cuda/index.js @@ -0,0 +1,37 @@ +/** + * @embedeer/ort-win32-x64-cuda + * + * CUDA execution provider for embedeer on Windows x64. + * + * @see packages/ort-linux-x64-cuda/index.js for full documentation. + */ + +import { existsSync } from 'fs'; +import { join, dirname } from 'path'; +import { fileURLToPath } from 'url'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +const BINARY_PATH = join(__dirname, 'vendor', 'onnxruntime_binding.node'); + +/** + * Activate the CUDA provider on Windows x64. + * @returns {Promise} + * @throws {Error} If the native binary is not present. + */ +export async function activate() { + if (!existsSync(BINARY_PATH)) { + throw new Error( + `@embedeer/ort-win32-x64-cuda: native CUDA binary not found at ${BINARY_PATH}. ` + + `Re-run: npm install @embedeer/ort-win32-x64-cuda`, + ); + } + // TODO: wire up the custom ORT binary to onnxruntime-node resolution. +} + +/** + * @returns {string} + */ +export function getDevice() { + return 'cuda'; +} diff --git a/packages/ort-win32-x64-cuda/install.js b/packages/ort-win32-x64-cuda/install.js new file mode 100644 index 0000000..a719253 --- /dev/null +++ b/packages/ort-win32-x64-cuda/install.js @@ -0,0 +1,37 @@ +/** + * Install script for @embedeer/ort-win32-x64-cuda + * + * Downloads a CUDA-enabled ONNX Runtime Node.js binding for Windows x64. + * + * STUB — see packages/ort-linux-x64-cuda/install.js for full documentation + * and TODO list. This file follows the same pattern. + */ + +import { mkdirSync, writeFileSync } from 'fs'; +import { join, dirname } from 'path'; +import { fileURLToPath } from 'url'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const VENDOR_DIR = join(__dirname, 'vendor'); + +if (process.platform !== 'win32' || process.arch !== 'x64') { + console.warn( + `[embedeer] @embedeer/ort-win32-x64-cuda: skipping install on ${process.platform}/${process.arch}`, + ); + process.exit(0); +} + +console.log('[embedeer] @embedeer/ort-win32-x64-cuda: running install script...'); + +mkdirSync(VENDOR_DIR, { recursive: true }); + +// TODO: replace with real binary download (see ort-linux-x64-cuda/install.js) +writeFileSync( + join(VENDOR_DIR, 'README.txt'), + 'This directory will contain the CUDA-enabled ONNX Runtime native binding for Windows x64.\n', +); + +console.warn( + '[embedeer] @embedeer/ort-win32-x64-cuda: STUB install complete. ' + + 'No real CUDA binary was downloaded yet.', +); diff --git a/packages/ort-win32-x64-cuda/package.json b/packages/ort-win32-x64-cuda/package.json new file mode 100644 index 0000000..f9d5ede --- /dev/null +++ b/packages/ort-win32-x64-cuda/package.json @@ -0,0 +1,27 @@ +{ + "name": "@embedeer/ort-win32-x64-cuda", + "version": "1.0.0", + "description": "CUDA execution provider for embedeer on Windows x64", + "type": "module", + "main": "index.js", + "scripts": { + "install": "node install.js" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/jsilvanus/embedeer.git", + "directory": "packages/ort-win32-x64-cuda" + }, + "keywords": [ + "embedeer", + "onnxruntime", + "cuda", + "gpu", + "windows" + ], + "author": "", + "license": "ISC", + "engines": { + "node": ">=18" + } +} diff --git a/packages/ort-win32-x64-dml/README.md b/packages/ort-win32-x64-dml/README.md new file mode 100644 index 0000000..2106ecd --- /dev/null +++ b/packages/ort-win32-x64-dml/README.md @@ -0,0 +1,30 @@ +# @embedeer/ort-win32-x64-dml + +DirectML execution provider for [embedeer](https://github.com/jsilvanus/embedeer) on **Windows x64**. + +Install this package alongside `embedeer` to enable GPU-accelerated embeddings using DirectML on Windows (supports NVIDIA, AMD, and Intel GPUs — no CUDA required). + +## Installation + +```bash +npm install embedeer +npm install @embedeer/ort-win32-x64-dml +``` + +## Usage + +```js +// On Windows, device='gpu' prefers CUDA first, then DirectML +const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { device: 'gpu' }); + +// Explicitly use DirectML +const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { provider: 'dml' }); +``` + +```bash +npx embedeer --model Xenova/all-MiniLM-L6-v2 --provider dml --data "Hello" +``` + +> ⚠️ **Stub** — binary download not yet implemented. See `install.js` for TODO. + +See [packages/ort-linux-x64-cuda/README.md](../ort-linux-x64-cuda/README.md) for full documentation. diff --git a/packages/ort-win32-x64-dml/index.js b/packages/ort-win32-x64-dml/index.js new file mode 100644 index 0000000..cbee47f --- /dev/null +++ b/packages/ort-win32-x64-dml/index.js @@ -0,0 +1,40 @@ +/** + * @embedeer/ort-win32-x64-dml + * + * DirectML execution provider for embedeer on Windows x64. + * + * DirectML supports NVIDIA, AMD, and Intel GPUs on Windows via the + * Direct3D 12 API — no CUDA installation required. + * + * @see packages/ort-linux-x64-cuda/index.js for full documentation. + */ + +import { existsSync } from 'fs'; +import { join, dirname } from 'path'; +import { fileURLToPath } from 'url'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +const BINARY_PATH = join(__dirname, 'vendor', 'onnxruntime_binding.node'); + +/** + * Activate the DirectML provider on Windows x64. + * @returns {Promise} + * @throws {Error} If the native binary is not present. + */ +export async function activate() { + if (!existsSync(BINARY_PATH)) { + throw new Error( + `@embedeer/ort-win32-x64-dml: native DirectML binary not found at ${BINARY_PATH}. ` + + `Re-run: npm install @embedeer/ort-win32-x64-dml`, + ); + } + // TODO: wire up the custom ORT binary to onnxruntime-node resolution. +} + +/** + * @returns {string} + */ +export function getDevice() { + return 'dml'; +} diff --git a/packages/ort-win32-x64-dml/install.js b/packages/ort-win32-x64-dml/install.js new file mode 100644 index 0000000..7d48fed --- /dev/null +++ b/packages/ort-win32-x64-dml/install.js @@ -0,0 +1,44 @@ +/** + * Install script for @embedeer/ort-win32-x64-dml + * + * Downloads a DirectML-enabled ONNX Runtime Node.js binding for Windows x64. + * + * STUB — see packages/ort-linux-x64-cuda/install.js for full documentation. + * + * DirectML note: onnxruntime already ships a DirectML provider on Windows. + * This package may only need to configure the execution provider order rather + * than download a full custom binary. See the TODO below. + */ + +import { mkdirSync, writeFileSync } from 'fs'; +import { join, dirname } from 'path'; +import { fileURLToPath } from 'url'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const VENDOR_DIR = join(__dirname, 'vendor'); + +if (process.platform !== 'win32' || process.arch !== 'x64') { + console.warn( + `[embedeer] @embedeer/ort-win32-x64-dml: skipping install on ${process.platform}/${process.arch}`, + ); + process.exit(0); +} + +console.log('[embedeer] @embedeer/ort-win32-x64-dml: running install script...'); + +mkdirSync(VENDOR_DIR, { recursive: true }); + +// TODO: +// - Verify onnxruntime-node ships with DirectML support on Windows. +// If it does, activate() may only need to set the execution provider preference. +// - If a separate DML-enabled binary is needed, download it here. +// See ort-linux-x64-cuda/install.js for the download skeleton. +writeFileSync( + join(VENDOR_DIR, 'README.txt'), + 'This directory will contain the DirectML-enabled ONNX Runtime native binding for Windows x64.\n', +); + +console.warn( + '[embedeer] @embedeer/ort-win32-x64-dml: STUB install complete. ' + + 'No real DirectML binary was downloaded yet.', +); diff --git a/packages/ort-win32-x64-dml/package.json b/packages/ort-win32-x64-dml/package.json new file mode 100644 index 0000000..3ef12e2 --- /dev/null +++ b/packages/ort-win32-x64-dml/package.json @@ -0,0 +1,28 @@ +{ + "name": "@embedeer/ort-win32-x64-dml", + "version": "1.0.0", + "description": "DirectML execution provider for embedeer on Windows x64", + "type": "module", + "main": "index.js", + "scripts": { + "install": "node install.js" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/jsilvanus/embedeer.git", + "directory": "packages/ort-win32-x64-dml" + }, + "keywords": [ + "embedeer", + "onnxruntime", + "directml", + "dml", + "gpu", + "windows" + ], + "author": "", + "license": "ISC", + "engines": { + "node": ">=18" + } +} From 535fb95f0584c066b7e204004a05f32806e987a8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 8 Apr 2026 07:16:02 +0000 Subject: [PATCH 03/12] Fix JSDoc type annotation and missing options declaration in cli.js Agent-Logs-Url: https://github.com/jsilvanus/embedeer/sessions/898517f6-b6e8-43fa-98b5-5a16e9745dc5 Co-authored-by: jsilvanus <22452468+jsilvanus@users.noreply.github.com> --- packages/embedeer/src/cli.js | 1 + packages/embedeer/src/worker-pool.js | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/embedeer/src/cli.js b/packages/embedeer/src/cli.js index 10ee2be..a833b2f 100755 --- a/packages/embedeer/src/cli.js +++ b/packages/embedeer/src/cli.js @@ -78,6 +78,7 @@ const KNOWN_FLAGS = new Set([ '--pooling', '-p', '--no-normalize', '--dtype', '--token', '--cache-dir', '--device', '--provider', ]); +const options = { model: 'Xenova/all-MiniLM-L6-v2', data: null, // --data texts (array) file: null, // --file path diff --git a/packages/embedeer/src/worker-pool.js b/packages/embedeer/src/worker-pool.js index ed3b25f..951eb26 100644 --- a/packages/embedeer/src/worker-pool.js +++ b/packages/embedeer/src/worker-pool.js @@ -26,7 +26,7 @@ export class WorkerPool { /** * @param {string} modelName Hugging Face model identifier * @param {object} [options] - * @param {string} [options.poolSize=2] Number of parallel workers + * @param {number} [options.poolSize=2] Number of parallel workers * @param {string} [options.mode='process'] 'process' (isolated) or 'thread' (same process) * @param {string} [options.pooling='mean'] Pooling strategy ('mean'|'cls'|'none') * @param {boolean} [options.normalize=true] Whether to L2-normalise embeddings From 7bc259ef90d6aa706d66716e1ef66f721c92ae73 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 8 Apr 2026 07:44:08 +0000 Subject: [PATCH 04/12] Implement working CUDA MVP: use bundled ORT providers, real system lib checks Agent-Logs-Url: https://github.com/jsilvanus/embedeer/sessions/7227830d-3d7c-4707-be90-545f53b4bc77 Co-authored-by: jsilvanus <22452468+jsilvanus@users.noreply.github.com> --- README.md | 100 +++++++---- .../embedeer/test/provider-loader.test.js | 32 ++-- packages/ort-linux-x64-cuda/README.md | 140 +++++++-------- packages/ort-linux-x64-cuda/index.js | 144 +++++++++++---- packages/ort-linux-x64-cuda/install.js | 164 ++++++++++-------- packages/ort-win32-x64-cuda/README.md | 27 ++- packages/ort-win32-x64-cuda/index.js | 42 +++-- packages/ort-win32-x64-cuda/install.js | 40 ++--- packages/ort-win32-x64-dml/README.md | 40 ++++- packages/ort-win32-x64-dml/index.js | 40 +++-- packages/ort-win32-x64-dml/install.js | 46 ++--- 11 files changed, 451 insertions(+), 364 deletions(-) diff --git a/README.md b/README.md index 5201c93..8501b9d 100644 --- a/README.md +++ b/README.md @@ -13,80 +13,108 @@ This repository is a **monorepo** managed with npm workspaces. |---------|-------------| | [`embedeer`](packages/embedeer) | Main embeddings package (CPU + optional GPU) | | [`@embedeer/ort-linux-x64-cuda`](packages/ort-linux-x64-cuda) | CUDA provider for Linux x64 | -| [`@embedeer/ort-win32-x64-cuda`](packages/ort-win32-x64-cuda) | CUDA provider for Windows x64 | | [`@embedeer/ort-win32-x64-dml`](packages/ort-win32-x64-dml) | DirectML provider for Windows x64 | +| [`@embedeer/ort-win32-x64-cuda`](packages/ort-win32-x64-cuda) | Windows CUDA (placeholder — use DML on Windows) | --- ## Quick Start -### CPU (default) +### CPU (default, works everywhere) ```bash -# Install npm install embedeer - -# CLI npx embedeer --model Xenova/all-MiniLM-L6-v2 --data "Hello world" +``` -# API +```js import { Embedder } from 'embedeer'; const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2'); const vectors = await embedder.embed(['Hello', 'World']); await embedder.destroy(); ``` -### GPU (two-step install) +--- + +## GPU — Two-Step Install + +### Linux x64 + NVIDIA CUDA (GPU MVP) + +**System requirements:** NVIDIA GPU + driver ≥ 525, CUDA 12, cuDNN 9 + +`onnxruntime-node` v1.14+ ships `libonnxruntime_providers_cuda.so` on Linux x64. No custom binary needed — just install CUDA 12 + cuDNN 9 system libraries and the npm package: ```bash -# Step 1 - install embedeer +# Install CUDA 12 + cuDNN 9 (Ubuntu/Debian) +sudo apt install cuda-toolkit-12-6 libcudnn9-cuda-12 + +# Install embedeer and the CUDA provider package npm install embedeer +npm install @embedeer/ort-linux-x64-cuda -# Step 2 - install GPU provider for your platform -npm install @embedeer/ort-linux-x64-cuda # Linux x64 NVIDIA CUDA -npm install @embedeer/ort-win32-x64-cuda # Windows x64 NVIDIA CUDA -npm install @embedeer/ort-win32-x64-dml # Windows x64 DirectML (any GPU) +# Run with GPU +npx embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" +``` -# CLI - auto-detect GPU, fall back to CPU -npx embedeer --model Xenova/all-MiniLM-L6-v2 --device auto --data "Hello GPU" +### Windows x64 + DirectML (any GPU) + +**System requirements:** Windows 10 (1903+) or 11, any DirectX 12 GPU, up-to-date drivers + +```bash +npm install embedeer +npm install @embedeer/ort-win32-x64-dml -# CLI - require GPU npx embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" +``` -# API +### GPU API options + +```js import { Embedder } from 'embedeer'; -const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { - device: 'auto', // 'auto' | 'cpu' | 'gpu' - // provider: 'cuda', // explicit override: 'cuda' | 'dml' -}); -``` ---- +// Auto-detect GPU, silent CPU fallback if unavailable +const e1 = await Embedder.create(model, { device: 'auto' }); -## Monorepo Development +// Require GPU — throws if no GPU provider is available +const e2 = await Embedder.create(model, { device: 'gpu' }); + +// Explicit provider +const e3 = await Embedder.create(model, { provider: 'cuda' }); // Linux CUDA +const e4 = await Embedder.create(model, { provider: 'dml' }); // Windows DirectML +``` ```bash -# Install all workspace packages -npm install +npx embedeer --device auto # try GPU, fall back to CPU +npx embedeer --device gpu # require GPU +npx embedeer --provider cuda # explicit CUDA (Linux) +npx embedeer --provider dml # explicit DirectML (Windows) +``` -# Run tests (packages/embedeer) -npm test +--- -# Run tests in a specific package -npm test --workspace=packages/embedeer -``` +## Provider Selection Logic + +| Platform | `device='auto'` or `device='gpu'` order | +|----------|-----------------------------------------| +| Linux x64 | CUDA → (CPU fallback) | +| Windows x64 | CUDA → DirectML → (CPU fallback) | +| Other | CPU only | + +For `device='auto'`: silently falls back to CPU if no GPU provider is available. +For `device='gpu'`: throws with a clear error and install instructions. +For explicit `--provider cuda/dml`: throws if libraries are missing, with install instructions. --- -## GPU Provider Status +## Monorepo Development -> The native binary download in GPU provider packages is currently **stubbed**. -> The JS API structure, dynamic loading hooks, and runtime selection logic are fully implemented. -> Actual CUDA/DirectML binaries will be added in a future release. -> See each provider package's `install.js` for the full TODO list. +```bash +npm install # install all workspace packages +npm test # run tests (packages/embedeer) +``` --- ## Documentation -Full API documentation, CLI reference, and options are in [`packages/embedeer/README.md`](packages/embedeer/README.md). +Full API documentation, CLI reference, and all options: [`packages/embedeer/README.md`](packages/embedeer/README.md) diff --git a/packages/embedeer/test/provider-loader.test.js b/packages/embedeer/test/provider-loader.test.js index fa9c024..d97af4b 100644 --- a/packages/embedeer/test/provider-loader.test.js +++ b/packages/embedeer/test/provider-loader.test.js @@ -166,8 +166,11 @@ describe('resolveProvider()', () => { // ── explicit provider not installed ────────────────────────────────────── - test('explicit provider=cuda throws with npm install hint when not installed', async () => { + test('explicit provider=cuda re-throws activate error when CUDA libraries are missing', async () => { await withPlatform('linux', 'x64', async () => { + // In this environment @embedeer/ort-linux-x64-cuda is installed (workspace link) + // but there is no NVIDIA GPU. activate() throws the GPU-not-found error which + // is re-thrown by resolveProvider so the user gets a clear diagnostic. await assert.rejects( () => resolveProvider('cpu', 'cuda'), (err) => { @@ -175,9 +178,12 @@ describe('resolveProvider()', () => { err.message.includes('@embedeer/ort-linux-x64-cuda'), `Expected package name in error, got: ${err.message}`, ); + // The error is the activate() diagnostic, not a generic "not installed" msg assert.ok( - err.message.toLowerCase().includes('npm install'), - `Expected npm install hint in error, got: ${err.message}`, + err.message.toLowerCase().includes('nvidia') || + err.message.toLowerCase().includes('cuda') || + err.message.toLowerCase().includes('gpu'), + `Expected GPU-related context in error, got: ${err.message}`, ); return true; }, @@ -185,22 +191,12 @@ describe('resolveProvider()', () => { }); }); - test('explicit provider=dml throws with npm install hint on windows when not installed', async () => { + test('explicit provider=dml succeeds on win32 when package is installed', async () => { await withPlatform('win32', 'x64', async () => { - await assert.rejects( - () => resolveProvider('cpu', 'dml'), - (err) => { - assert.ok( - err.message.includes('@embedeer/ort-win32-x64-dml'), - `Expected package name in error, got: ${err.message}`, - ); - assert.ok( - err.message.toLowerCase().includes('npm install'), - `Expected npm install hint in error, got: ${err.message}`, - ); - return true; - }, - ); + // DML is bundled with onnxruntime-node on Windows; activate() just checks + // the platform (mocked to win32 here) and succeeds. + const result = await resolveProvider('cpu', 'dml'); + assert.equal(result, 'dml'); }); }); diff --git a/packages/ort-linux-x64-cuda/README.md b/packages/ort-linux-x64-cuda/README.md index fb0448f..3ed5df7 100644 --- a/packages/ort-linux-x64-cuda/README.md +++ b/packages/ort-linux-x64-cuda/README.md @@ -2,112 +2,92 @@ CUDA execution provider for [embedeer](https://github.com/jsilvanus/embedeer) on **Linux x64**. -Install this package alongside `embedeer` to enable GPU-accelerated embeddings using NVIDIA CUDA. +Install this package alongside `embedeer` to enable GPU-accelerated embeddings using NVIDIA CUDA on Linux. ---- - -## Installation (two-step) - -```bash -# Step 1 — install embedeer -npm install embedeer +## How it works -# Step 2 — install the CUDA provider for Linux x64 -npm install @embedeer/ort-linux-x64-cuda -``` +`onnxruntime-node` v1.14+ ships `libonnxruntime_providers_cuda.so` on Linux x64 as part of its standard npm package — **no additional binary download is required**. -> **Requirements** -> - Linux x86_64 -> - NVIDIA GPU with CUDA drivers installed (CUDA 12.x recommended) -> - NVIDIA CUDA Toolkit matching the binary version +This package verifies that the required CUDA 12 system libraries are present, then returns `device='cuda'` so that `@huggingface/transformers` pipeline runs on the GPU. ---- +## System Requirements -## Usage +| Requirement | Version | +|-------------|---------| +| NVIDIA GPU Driver | ≥ 525 (CUDA 12 compatible) | +| CUDA Toolkit | 12.x (`libcudart.so.12`, `libcublas.so.12`, `libcublasLt.so.12`, `libcurand.so.10`, `libcufft.so.11`) | +| cuDNN | 9.x (`libcudnn.so.9`) | +| OS | Linux x64 | -Once installed, embedeer automatically detects and uses this provider: +### Installing CUDA 12 + cuDNN 9 -```js -import { Embedder } from 'embedeer'; - -// Auto-detect GPU (falls back to CPU if no provider is installed) -const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { - device: 'auto', -}); +**Ubuntu/Debian (recommended):** +```bash +# Add NVIDIA package repository +wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb +sudo dpkg -i cuda-keyring_1.1-1_all.deb +sudo apt update -// Require GPU (throws if not available) -const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { - device: 'gpu', -}); +# Install CUDA 12 and cuDNN 9 +sudo apt install cuda-toolkit-12-6 libcudnn9-cuda-12 -// Explicitly request CUDA -const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { - provider: 'cuda', -}); +# Add to PATH / LD_LIBRARY_PATH +echo 'export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH' >> ~/.bashrc +source ~/.bashrc ``` -CLI: +**CUDA Toolkit installer:** https://developer.nvidia.com/cuda-downloads +**cuDNN download:** https://developer.nvidia.com/cudnn-downloads +Verify installation: ```bash -# Auto GPU (falls back to CPU) -npx embedeer --model Xenova/all-MiniLM-L6-v2 --device auto --data "Hello GPU" - -# Require GPU -npx embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" - -# Explicit CUDA provider -npx embedeer --model Xenova/all-MiniLM-L6-v2 --provider cuda --data "Hello GPU" +nvidia-smi # confirm GPU is detected +nvcc --version # confirm CUDA toolkit is installed ``` ---- +## Installation -## How it works - -This package provides: +```bash +# Step 1 — main package +npm install embedeer -1. **`install.js`** — runs on `npm install` to download (or build) a CUDA-enabled - ONNX Runtime Node.js binding into `vendor/`. +# Step 2 — CUDA provider +npm install @embedeer/ort-linux-x64-cuda +``` -2. **`index.js`** — exports `activate()` and `getDevice()`. The `activate()` function - verifies that the native binary is present and configures ONNX Runtime to use the - CUDA execution provider. `getDevice()` returns `'cuda'` so embedeer passes the - correct device string to `@huggingface/transformers` `pipeline()`. +## Usage ---- +```js +import { Embedder } from 'embedeer'; -## Current status (stub) +// Auto-detect GPU (falls back to CPU if CUDA unavailable) +const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { device: 'auto' }); -> ⚠️ The binary download in `install.js` is currently **stubbed** — no real CUDA -> binary is downloaded yet. GPU execution is not functional until the TODO in -> `install.js` is implemented. -> -> See `install.js` for the full TODO list and skeleton download code. +// Require GPU (throws with clear error if CUDA unavailable) +const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { device: 'gpu' }); -### What needs to be done +// Explicit CUDA +const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { provider: 'cuda' }); +``` -1. Build a CUDA-enabled `onnxruntime-node` binding: - ```bash - # Clone ORT - git clone --recursive https://github.com/microsoft/onnxruntime - cd onnxruntime - # Build with CUDA - ./build.sh --config Release --build_nodejs --use_cuda \ - --cuda_home /usr/local/cuda \ - --cudnn_home /usr/local/cuda - ``` +```bash +# CLI — auto GPU +npx embedeer --model Xenova/all-MiniLM-L6-v2 --device auto --data "Hello GPU" -2. Upload the resulting `.node` file as a GitHub Release asset. +# CLI — explicit CUDA +npx embedeer --model Xenova/all-MiniLM-L6-v2 --provider cuda --data "Hello CUDA" +``` -3. Update `install.js` to download and verify the binary. +## Error messages -4. Update `index.js` to wire the binary into ONNX Runtime's module resolution. +If CUDA libraries are missing, you'll see: ---- +``` +@embedeer/ort-linux-x64-cuda: Missing CUDA system libraries: libcudart.so.12, libcudnn.so.9 -## Platform +onnxruntime-node CUDA requires CUDA 12 + cuDNN 9. Install them: -| Platform | Architecture | Provider | Package | -|----------|-------------|----------|---------| -| Linux | x64 | CUDA | `@embedeer/ort-linux-x64-cuda` ← **this package** | -| Windows | x64 | CUDA | `@embedeer/ort-win32-x64-cuda` | -| Windows | x64 | DirectML | `@embedeer/ort-win32-x64-dml` | + # Option A — CUDA 12 + cuDNN 9 via apt (Ubuntu/Debian) + sudo apt install cuda-toolkit-12-6 libcudnn9-cuda-12 + ... +``` diff --git a/packages/ort-linux-x64-cuda/index.js b/packages/ort-linux-x64-cuda/index.js index e7c68e9..af4eb94 100644 --- a/packages/ort-linux-x64-cuda/index.js +++ b/packages/ort-linux-x64-cuda/index.js @@ -3,62 +3,140 @@ * * CUDA execution provider for embedeer on Linux x64. * - * This package activates a CUDA-enabled ONNX Runtime build so that - * @huggingface/transformers pipeline() runs inference on the GPU. + * How it works: + * onnxruntime-node v1.14+ ships libonnxruntime_providers_cuda.so on Linux x64. + * No additional binary download is required — the CUDA execution provider is + * already bundled with the standard onnxruntime-node package. * - * Usage (automatic via embedeer): - * // Install this package and embedeer will use it when device='gpu' or 'auto' - * // npm install @embedeer/ort-linux-x64-cuda + * This package verifies that the required CUDA 12 system libraries are + * available before attempting to use the CUDA execution provider. * - * Manual usage: - * import { activate, getDevice } from '@embedeer/ort-linux-x64-cuda'; - * await activate(); - * // then pass getDevice() as the device option to pipeline() + * System requirements: + * - NVIDIA GPU with driver ≥ 525 (CUDA 12 compatible) + * - CUDA 12 Toolkit: libcudart.so.12, libcublas.so.12, libcublasLt.so.12, + * libcurand.so.10, libcufft.so.11 + * - cuDNN 9: libcudnn.so.9 + * + * Install CUDA 12: https://developer.nvidia.com/cuda-downloads + * Install cuDNN 9: https://developer.nvidia.com/cudnn-downloads + * Or via apt (Ubuntu/Debian): + * sudo apt install cuda-toolkit-12-x libcudnn9-cuda-12 */ +import { execSync } from 'child_process'; import { existsSync } from 'fs'; -import { join, dirname } from 'path'; -import { fileURLToPath } from 'url'; -const __dirname = dirname(fileURLToPath(import.meta.url)); +/** + * Shared libraries required by libonnxruntime_providers_cuda.so (CUDA 12 / cuDNN 9). + * These are system-installed libraries; they are NOT bundled with onnxruntime-node. + */ +const REQUIRED_LIBS = [ + 'libcudart.so.12', + 'libcublas.so.12', + 'libcublasLt.so.12', + 'libcurand.so.10', + 'libcufft.so.11', + 'libcudnn.so.9', +]; /** - * Path where the install script places the CUDA-enabled ORT native binding. - * TODO: update this path once actual binary distribution is implemented. + * Common directories where CUDA libraries may be installed. + * Includes entries from LD_LIBRARY_PATH so custom installs are detected. */ -const BINARY_PATH = join(__dirname, 'vendor', 'onnxruntime_binding.node'); +function cudaSearchDirs() { + const extra = (process.env.LD_LIBRARY_PATH ?? '').split(':').filter(Boolean); + return [ + '/usr/local/cuda/lib64', + '/usr/local/cuda-12/lib64', + '/usr/local/cuda-12.0/lib64', + '/usr/local/cuda-12.1/lib64', + '/usr/local/cuda-12.2/lib64', + '/usr/local/cuda-12.3/lib64', + '/usr/local/cuda-12.4/lib64', + '/usr/local/cuda-12.5/lib64', + '/usr/local/cuda-12.6/lib64', + '/usr/lib/x86_64-linux-gnu', + '/usr/lib64', + ...extra, + ]; +} /** - * Activate the CUDA provider. + * Find a shared library by name. Checks common CUDA paths then falls back to + * `ldconfig -p` for libraries registered in the dynamic linker cache. * - * Verifies that the native CUDA-enabled ONNX Runtime binary is present. - * In the future this hook can also set environment variables or call into - * the ORT C API to configure the CUDA execution provider. + * @param {string} libName e.g. 'libcudart.so.12' + * @returns {string|null} Path to the library, or null if not found. + */ +function findLib(libName) { + for (const dir of cudaSearchDirs()) { + const fullPath = `${dir}/${libName}`; + if (existsSync(fullPath)) return fullPath; + } + + // Use ldconfig cache as a fallback + try { + const output = execSync('ldconfig -p', { + stdio: ['ignore', 'pipe', 'ignore'], + encoding: 'utf8', + timeout: 3000, + }); + for (const line of output.split('\n')) { + if (line.includes(libName) && line.includes('=>')) { + const match = line.match(/=>\s*(.+)/); + if (match) return match[1].trim(); + } + } + } catch { + // ldconfig not available in all environments; that's ok + } + + return null; +} + +/** + * Activate the CUDA execution provider. * - * TODO: When distributing real binaries, also configure the ORT env to - * point at the custom binary path so that onnxruntime-node loads it. + * Checks that all required CUDA 12 / cuDNN 9 system libraries are present. + * onnxruntime-node v1.14+ bundles libonnxruntime_providers_cuda.so on Linux x64, + * so no additional binary download is needed — only system CUDA libraries are required. * * @returns {Promise} - * @throws {Error} If the native binary is not present (install.js was not run). + * @throws {Error} If NVIDIA GPU is not detected or required CUDA libraries are missing. */ export async function activate() { - if (!existsSync(BINARY_PATH)) { + // 1. Check for NVIDIA GPU / driver + if (!existsSync('/dev/nvidiactl')) { throw new Error( - `@embedeer/ort-linux-x64-cuda: native CUDA binary not found at ${BINARY_PATH}. ` + - `Re-run: npm install @embedeer/ort-linux-x64-cuda`, + '@embedeer/ort-linux-x64-cuda: No NVIDIA GPU detected (/dev/nvidiactl not found).\n' + + 'Ensure NVIDIA drivers are installed.\n' + + 'Verify with: nvidia-smi', ); } - // TODO: wire up the custom ORT binary to onnxruntime-node resolution. - // This requires either: - // (a) patching the onnxruntime-node module resolution to load from BINARY_PATH, or - // (b) using ORT's env.ortModuleUrl / similar API once the JS library exposes it. - // For now the binary presence check above is sufficient to confirm installation. + + // 2. Check required CUDA / cuDNN system libraries + const missing = REQUIRED_LIBS.filter((lib) => findLib(lib) === null); + + if (missing.length > 0) { + throw new Error( + `@embedeer/ort-linux-x64-cuda: Missing CUDA system libraries: ${missing.join(', ')}\n\n` + + 'onnxruntime-node CUDA requires CUDA 12 + cuDNN 9. Install them:\n\n' + + ' # Option A — CUDA 12 + cuDNN 9 via apt (Ubuntu/Debian)\n' + + ' sudo apt install cuda-toolkit-12-6 libcudnn9-cuda-12\n\n' + + ' # Option B — CUDA Toolkit installer from NVIDIA\n' + + ' https://developer.nvidia.com/cuda-downloads\n' + + ' https://developer.nvidia.com/cudnn-downloads\n\n' + + ' # After installing, make sure libraries are on LD_LIBRARY_PATH if non-standard:\n' + + ' export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH', + ); + } + + // onnxruntime-node will dynamically load libonnxruntime_providers_cuda.so at + // runtime when device='cuda' is passed to pipeline(). No further action needed here. } /** - * Returns the device string that @huggingface/transformers pipeline() should - * use with this provider. - * + * Returns the device string passed to @huggingface/transformers pipeline(). * @returns {string} */ export function getDevice() { diff --git a/packages/ort-linux-x64-cuda/install.js b/packages/ort-linux-x64-cuda/install.js index 9d3ae70..e978060 100644 --- a/packages/ort-linux-x64-cuda/install.js +++ b/packages/ort-linux-x64-cuda/install.js @@ -1,94 +1,104 @@ /** - * Install script for @embedeer/ort-linux-x64-cuda + * Install / post-install check for @embedeer/ort-linux-x64-cuda * - * Downloads (or builds) a CUDA-enabled ONNX Runtime Node.js binding for - * Linux x64 and places it under vendor/ so the package can activate it at - * runtime. + * onnxruntime-node v1.14+ ships libonnxruntime_providers_cuda.so on Linux x64. + * No additional binary download is required. This script just verifies that + * the necessary CUDA 12 system libraries are present, and prints actionable + * install instructions if they are not. * - * This script runs automatically via the "install" lifecycle hook: - * npm install @embedeer/ort-linux-x64-cuda - * - * ── Current status ──────────────────────────────────────────────────────── - * STUB — the actual binary download / build is not yet implemented. - * The structure and hooks are in place; see the TODOs below. - * ───────────────────────────────────────────────────────────────────────── - * - * Expected artifact layout after install: - * packages/ort-linux-x64-cuda/ - * └── vendor/ - * ├── onnxruntime_binding.node ← CUDA-enabled ORT Node binding - * └── libonnxruntime_providers_cuda.so ← shared lib (may be bundled in .node) - * - * TODO: - * 1. Build or obtain a CUDA-enabled onnxruntime-node binding. - * Options: - * (a) Build from source: https://onnxruntime.ai/docs/build/inferencing.html - * cmake flags: --use_cuda --cuda_home /usr/local/cuda - * (b) Download a prebuilt binary from a GitHub Release in this repo. - * See: https://github.com/jsilvanus/embedeer/releases - * 2. Upload the binary as a GitHub Release asset tagged by version + platform. - * 3. Replace the stub below with actual download logic using the fetch API - * (or the 'node-fetch' package for older Node versions). - * 4. Verify the binary checksum (SHA-256) before using it. - * - * CUDA compatibility: - * The binary must be compiled against the same CUDA major version as the - * host system (e.g. CUDA 12.x). Consider publishing multiple binaries: - * ort-linux-x64-cuda12, ort-linux-x64-cuda11, etc. - * - * onnxruntime version: - * Must match the version that @huggingface/transformers depends on. - * Check: node -e "require('onnxruntime-node/package.json').version" + * System requirements verified here: + * - NVIDIA GPU with CUDA 12-compatible driver (≥ 525) + * - CUDA 12 Toolkit: libcudart.so.12, libcublas.so.12, libcublasLt.so.12, + * libcurand.so.10, libcufft.so.11 + * - cuDNN 9: libcudnn.so.9 */ -import { mkdirSync, writeFileSync } from 'fs'; -import { join, dirname } from 'path'; -import { fileURLToPath } from 'url'; +import { execSync } from 'child_process'; +import { existsSync } from 'fs'; -const __dirname = dirname(fileURLToPath(import.meta.url)); -const VENDOR_DIR = join(__dirname, 'vendor'); - -// Only run on Linux x64; other platforms should not have installed this package, -// but guard anyway. if (process.platform !== 'linux' || process.arch !== 'x64') { console.warn( - `[embedeer] @embedeer/ort-linux-x64-cuda: skipping install on ${process.platform}/${process.arch}`, + `[embedeer] @embedeer/ort-linux-x64-cuda: skipping checks on ${process.platform}/${process.arch} (this package is for Linux x64 only)`, ); process.exit(0); } -console.log('[embedeer] @embedeer/ort-linux-x64-cuda: running install script...'); +console.log('[embedeer] @embedeer/ort-linux-x64-cuda: checking system CUDA requirements...'); -mkdirSync(VENDOR_DIR, { recursive: true }); +const REQUIRED_LIBS = [ + 'libcudart.so.12', + 'libcublas.so.12', + 'libcublasLt.so.12', + 'libcurand.so.10', + 'libcufft.so.11', + 'libcudnn.so.9', +]; -// ── TODO: replace this stub with real binary download ───────────────────── -// -// Example skeleton for a real download (requires Node 18+ built-in fetch): -// -// const VERSION = '1.0.0'; -// const BASE_URL = `https://github.com/jsilvanus/embedeer/releases/download/ort-linux-x64-cuda-${VERSION}`; -// const BINARY_NAME = 'onnxruntime_binding.node'; -// const CHECKSUM_NAME = 'onnxruntime_binding.node.sha256'; -// -// const res = await fetch(`${BASE_URL}/${BINARY_NAME}`); -// if (!res.ok) throw new Error(`Download failed: ${res.status} ${res.statusText}`); -// const buf = Buffer.from(await res.arrayBuffer()); -// -// // TODO: verify SHA-256 checksum here -// -// writeFileSync(join(VENDOR_DIR, BINARY_NAME), buf); -// console.log(`[embedeer] Installed CUDA ORT binding → ${join(VENDOR_DIR, BINARY_NAME)}`); -// ────────────────────────────────────────────────────────────────────────── +const CUDA_SEARCH_DIRS = [ + '/usr/local/cuda/lib64', + '/usr/local/cuda-12/lib64', + '/usr/lib/x86_64-linux-gnu', + '/usr/lib64', + ...(process.env.LD_LIBRARY_PATH ?? '').split(':').filter(Boolean), +]; -// For now write a placeholder so the package directory is not empty. -writeFileSync( - join(VENDOR_DIR, 'README.txt'), - 'This directory will contain the CUDA-enabled ONNX Runtime native binding.\n' + - 'See packages/ort-linux-x64-cuda/install.js for the download TODO.\n', -); +function findLib(libName) { + for (const dir of CUDA_SEARCH_DIRS) { + if (existsSync(`${dir}/${libName}`)) return `${dir}/${libName}`; + } + try { + const output = execSync('ldconfig -p', { + stdio: ['ignore', 'pipe', 'ignore'], + encoding: 'utf8', + timeout: 3000, + }); + for (const line of output.split('\n')) { + if (line.includes(libName) && line.includes('=>')) { + const match = line.match(/=>\s*(.+)/); + if (match) return match[1].trim(); + } + } + } catch { /* ldconfig not available */ } + return null; +} + +// Check NVIDIA GPU / driver +const hasGpu = existsSync('/dev/nvidiactl'); +if (!hasGpu) { + console.warn( + '\n[embedeer] WARNING: No NVIDIA GPU detected (/dev/nvidiactl not found).\n' + + ' @embedeer/ort-linux-x64-cuda requires an NVIDIA GPU with CUDA 12 drivers.\n' + + ' GPU acceleration will not be available until drivers are installed.\n', + ); +} else { + console.log('[embedeer] ✓ NVIDIA GPU detected'); +} + +// Check CUDA libraries +const missing = REQUIRED_LIBS.filter((lib) => findLib(lib) === null); +const found = REQUIRED_LIBS.filter((lib) => findLib(lib) !== null); + +for (const lib of found) { + console.log(`[embedeer] ✓ ${lib}`); +} + +if (missing.length > 0) { + console.warn( + `\n[embedeer] WARNING: Missing CUDA system libraries: ${missing.join(', ')}\n\n` + + ' onnxruntime-node CUDA EP requires CUDA 12 + cuDNN 9.\n\n' + + ' Install on Ubuntu/Debian:\n' + + ' sudo apt install cuda-toolkit-12-6 libcudnn9-cuda-12\n\n' + + ' Or download from NVIDIA:\n' + + ' https://developer.nvidia.com/cuda-downloads\n' + + ' https://developer.nvidia.com/cudnn-downloads\n\n' + + ' After installing, if libraries are not on the default path:\n' + + ' export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH\n', + ); + // Exit 0 so npm install doesn't fail — the user may install CUDA later. + process.exit(0); +} -console.warn( - '[embedeer] @embedeer/ort-linux-x64-cuda: STUB install complete. ' + - 'No real CUDA binary was downloaded yet — GPU execution is not available. ' + - 'See packages/ort-linux-x64-cuda/install.js for the implementation TODO.', +console.log( + '\n[embedeer] @embedeer/ort-linux-x64-cuda: all CUDA requirements satisfied.\n' + + ' GPU acceleration is available. Use device="gpu" or device="auto" in embedeer.\n', ); diff --git a/packages/ort-win32-x64-cuda/README.md b/packages/ort-win32-x64-cuda/README.md index 1f47715..b333f87 100644 --- a/packages/ort-win32-x64-cuda/README.md +++ b/packages/ort-win32-x64-cuda/README.md @@ -1,26 +1,23 @@ # @embedeer/ort-win32-x64-cuda -CUDA execution provider for [embedeer](https://github.com/jsilvanus/embedeer) on **Windows x64**. +> ⚠️ **Not yet available** — CUDA on Windows is not included in `onnxruntime-node` prebuilt binaries. -Install this package alongside `embedeer` to enable GPU-accelerated embeddings using NVIDIA CUDA on Windows. +This package is a placeholder for future Windows CUDA support. -## Installation +## Use DirectML instead + +For GPU acceleration on Windows, use DirectML — it supports NVIDIA, AMD, and Intel GPUs without CUDA: ```bash -npm install embedeer -npm install @embedeer/ort-win32-x64-cuda +npm install @embedeer/ort-win32-x64-dml ``` -## Usage - -```js -const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { device: 'gpu' }); -``` +See [`@embedeer/ort-win32-x64-dml`](../ort-win32-x64-dml/README.md) for full documentation. -```bash -npx embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello" -``` +## Why CUDA isn't available on Windows -> ⚠️ **Stub** — binary download not yet implemented. See `install.js` for TODO. +`onnxruntime-node` prebuilt binaries include CUDA support on **Linux x64** only (CUDA 12 + cuDNN 9). Windows CUDA support would require either: +- A future official ONNX Runtime release with Windows CUDA prebuilts +- A custom `onnxruntime-node` build against CUDA on Windows -See [packages/ort-linux-x64-cuda/README.md](../ort-linux-x64-cuda/README.md) for full documentation. +See [ONNX Runtime build docs](https://onnxruntime.ai/docs/build/inferencing.html) if you need Windows CUDA. diff --git a/packages/ort-win32-x64-cuda/index.js b/packages/ort-win32-x64-cuda/index.js index 75bdd60..f69e586 100644 --- a/packages/ort-win32-x64-cuda/index.js +++ b/packages/ort-win32-x64-cuda/index.js @@ -3,30 +3,36 @@ * * CUDA execution provider for embedeer on Windows x64. * - * @see packages/ort-linux-x64-cuda/index.js for full documentation. + * IMPORTANT: onnxruntime-node does not currently ship CUDA support for Windows + * in its prebuilt binaries. CUDA on Windows requires a custom ORT build or a + * future official release that includes Windows CUDA prebuilts. + * + * Use @embedeer/ort-win32-x64-dml for GPU acceleration on Windows instead — + * DirectML supports NVIDIA, AMD, and Intel GPUs on Windows 10/11 without + * requiring a CUDA installation. + * + * @see packages/ort-win32-x64-dml + * @see https://github.com/microsoft/onnxruntime/releases for CUDA Windows builds */ -import { existsSync } from 'fs'; -import { join, dirname } from 'path'; -import { fileURLToPath } from 'url'; - -const __dirname = dirname(fileURLToPath(import.meta.url)); - -const BINARY_PATH = join(__dirname, 'vendor', 'onnxruntime_binding.node'); - /** - * Activate the CUDA provider on Windows x64. + * Activate the CUDA execution provider on Windows x64. + * * @returns {Promise} - * @throws {Error} If the native binary is not present. + * @throws {Error} Always — CUDA is not currently supported via standard onnxruntime-node + * prebuilts on Windows. Use @embedeer/ort-win32-x64-dml for DirectML GPU acceleration. */ export async function activate() { - if (!existsSync(BINARY_PATH)) { - throw new Error( - `@embedeer/ort-win32-x64-cuda: native CUDA binary not found at ${BINARY_PATH}. ` + - `Re-run: npm install @embedeer/ort-win32-x64-cuda`, - ); - } - // TODO: wire up the custom ORT binary to onnxruntime-node resolution. + throw new Error( + '@embedeer/ort-win32-x64-cuda: CUDA is not currently available in onnxruntime-node\n' + + 'prebuilt binaries for Windows.\n\n' + + 'For GPU acceleration on Windows, use DirectML instead:\n' + + ' npm install @embedeer/ort-win32-x64-dml\n' + + ' npx embedeer --provider dml --data "Hello"\n\n' + + 'DirectML supports NVIDIA, AMD, and Intel GPUs on Windows 10/11 without CUDA.\n\n' + + 'For Windows CUDA support, a custom onnxruntime build is required.\n' + + 'See: https://onnxruntime.ai/docs/build/inferencing.html', + ); } /** diff --git a/packages/ort-win32-x64-cuda/install.js b/packages/ort-win32-x64-cuda/install.js index a719253..be60555 100644 --- a/packages/ort-win32-x64-cuda/install.js +++ b/packages/ort-win32-x64-cuda/install.js @@ -1,37 +1,17 @@ /** - * Install script for @embedeer/ort-win32-x64-cuda + * Install / post-install for @embedeer/ort-win32-x64-cuda * - * Downloads a CUDA-enabled ONNX Runtime Node.js binding for Windows x64. + * IMPORTANT: onnxruntime-node does not currently ship CUDA prebuilts for Windows. + * This package is a placeholder for future Windows CUDA support. * - * STUB — see packages/ort-linux-x64-cuda/install.js for full documentation - * and TODO list. This file follows the same pattern. + * For GPU acceleration on Windows, use DirectML instead: + * npm install @embedeer/ort-win32-x64-dml */ -import { mkdirSync, writeFileSync } from 'fs'; -import { join, dirname } from 'path'; -import { fileURLToPath } from 'url'; - -const __dirname = dirname(fileURLToPath(import.meta.url)); -const VENDOR_DIR = join(__dirname, 'vendor'); - -if (process.platform !== 'win32' || process.arch !== 'x64') { - console.warn( - `[embedeer] @embedeer/ort-win32-x64-cuda: skipping install on ${process.platform}/${process.arch}`, - ); - process.exit(0); -} - -console.log('[embedeer] @embedeer/ort-win32-x64-cuda: running install script...'); - -mkdirSync(VENDOR_DIR, { recursive: true }); - -// TODO: replace with real binary download (see ort-linux-x64-cuda/install.js) -writeFileSync( - join(VENDOR_DIR, 'README.txt'), - 'This directory will contain the CUDA-enabled ONNX Runtime native binding for Windows x64.\n', -); - console.warn( - '[embedeer] @embedeer/ort-win32-x64-cuda: STUB install complete. ' + - 'No real CUDA binary was downloaded yet.', + '\n[embedeer] WARNING: @embedeer/ort-win32-x64-cuda — CUDA is not currently available\n' + + ' in onnxruntime-node prebuilt binaries for Windows.\n\n' + + ' For GPU acceleration on Windows, use DirectML instead:\n' + + ' npm install @embedeer/ort-win32-x64-dml\n\n' + + ' DirectML supports NVIDIA, AMD, and Intel GPUs on Windows 10/11.\n', ); diff --git a/packages/ort-win32-x64-dml/README.md b/packages/ort-win32-x64-dml/README.md index 2106ecd..9f52657 100644 --- a/packages/ort-win32-x64-dml/README.md +++ b/packages/ort-win32-x64-dml/README.md @@ -2,29 +2,53 @@ DirectML execution provider for [embedeer](https://github.com/jsilvanus/embedeer) on **Windows x64**. -Install this package alongside `embedeer` to enable GPU-accelerated embeddings using DirectML on Windows (supports NVIDIA, AMD, and Intel GPUs — no CUDA required). +Install this package alongside `embedeer` to enable GPU-accelerated embeddings using DirectML on Windows. Supports **NVIDIA, AMD, and Intel GPUs** — no CUDA installation required. + +## How it works + +`onnxruntime-node` ships DirectML support bundled on Windows x64 — **no additional binary download is required**. + +DirectML is a Microsoft API built into Windows 10 (1903+) and Windows 11 that accelerates machine learning inference across all DirectX 12-capable GPUs. + +## System Requirements + +| Requirement | Version | +|-------------|---------| +| Windows | 10 (1903+) or Windows 11 | +| GPU | Any DirectX 12-capable GPU (NVIDIA, AMD, Intel — most GPUs from 2016+) | +| GPU Driver | Up-to-date drivers from your GPU vendor | + +No CUDA installation needed. ## Installation ```bash +# Step 1 — main package npm install embedeer + +# Step 2 — DirectML provider npm install @embedeer/ort-win32-x64-dml ``` ## Usage ```js -// On Windows, device='gpu' prefers CUDA first, then DirectML +import { Embedder } from 'embedeer'; + +// Auto-detect GPU (DirectML is tried first on Windows) +const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { device: 'auto' }); + +// Require GPU const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { device: 'gpu' }); -// Explicitly use DirectML +// Explicit DirectML const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { provider: 'dml' }); ``` ```bash -npx embedeer --model Xenova/all-MiniLM-L6-v2 --provider dml --data "Hello" -``` - -> ⚠️ **Stub** — binary download not yet implemented. See `install.js` for TODO. +# CLI — auto GPU (uses DirectML on Windows) +npx embedeer --model Xenova/all-MiniLM-L6-v2 --device auto --data "Hello GPU" -See [packages/ort-linux-x64-cuda/README.md](../ort-linux-x64-cuda/README.md) for full documentation. +# CLI — explicit DirectML +npx embedeer --model Xenova/all-MiniLM-L6-v2 --provider dml --data "Hello DML" +``` diff --git a/packages/ort-win32-x64-dml/index.js b/packages/ort-win32-x64-dml/index.js index cbee47f..e2a6945 100644 --- a/packages/ort-win32-x64-dml/index.js +++ b/packages/ort-win32-x64-dml/index.js @@ -3,36 +3,42 @@ * * DirectML execution provider for embedeer on Windows x64. * - * DirectML supports NVIDIA, AMD, and Intel GPUs on Windows via the - * Direct3D 12 API — no CUDA installation required. + * How it works: + * onnxruntime-node ships DirectML support on Windows x64 out of the box. + * No additional binary download is required — DirectML is bundled with + * the standard onnxruntime-node package and comes with Windows 10/11. * - * @see packages/ort-linux-x64-cuda/index.js for full documentation. + * Hardware: + * Supports NVIDIA, AMD, Intel, and Qualcomm GPUs via Direct3D 12. + * No CUDA installation required. + * + * System requirements: + * - Windows 10 (1903+) or Windows 11 + * - Any DirectX 12-capable GPU (most GPUs from 2016+) + * - Up-to-date GPU drivers (from your GPU vendor) */ -import { existsSync } from 'fs'; -import { join, dirname } from 'path'; -import { fileURLToPath } from 'url'; - -const __dirname = dirname(fileURLToPath(import.meta.url)); - -const BINARY_PATH = join(__dirname, 'vendor', 'onnxruntime_binding.node'); - /** - * Activate the DirectML provider on Windows x64. + * Activate the DirectML execution provider. + * + * DirectML is bundled with onnxruntime-node on Windows and available natively + * on Windows 10/11. No system library installation is required. + * * @returns {Promise} - * @throws {Error} If the native binary is not present. + * @throws {Error} If not running on Windows. */ export async function activate() { - if (!existsSync(BINARY_PATH)) { + if (process.platform !== 'win32') { throw new Error( - `@embedeer/ort-win32-x64-dml: native DirectML binary not found at ${BINARY_PATH}. ` + - `Re-run: npm install @embedeer/ort-win32-x64-dml`, + `@embedeer/ort-win32-x64-dml: DirectML is only available on Windows (current platform: ${process.platform}).`, ); } - // TODO: wire up the custom ORT binary to onnxruntime-node resolution. + // DirectML is natively available via onnxruntime-node on Windows 10/11. + // onnxruntime will load the DirectML EP automatically when device='dml' is requested. } /** + * Returns the device string passed to @huggingface/transformers pipeline(). * @returns {string} */ export function getDevice() { diff --git a/packages/ort-win32-x64-dml/install.js b/packages/ort-win32-x64-dml/install.js index 7d48fed..0392bb8 100644 --- a/packages/ort-win32-x64-dml/install.js +++ b/packages/ort-win32-x64-dml/install.js @@ -1,44 +1,26 @@ /** - * Install script for @embedeer/ort-win32-x64-dml + * Install / post-install check for @embedeer/ort-win32-x64-dml * - * Downloads a DirectML-enabled ONNX Runtime Node.js binding for Windows x64. + * onnxruntime-node ships DirectML support bundled on Windows x64. + * No additional binary download is required. * - * STUB — see packages/ort-linux-x64-cuda/install.js for full documentation. + * DirectML is part of Windows 10/11 and supports all DirectX 12 GPUs: + * NVIDIA, AMD, Intel, Qualcomm, etc. No CUDA installation needed. * - * DirectML note: onnxruntime already ships a DirectML provider on Windows. - * This package may only need to configure the execution provider order rather - * than download a full custom binary. See the TODO below. + * This script just confirms the environment is suitable. */ -import { mkdirSync, writeFileSync } from 'fs'; -import { join, dirname } from 'path'; -import { fileURLToPath } from 'url'; - -const __dirname = dirname(fileURLToPath(import.meta.url)); -const VENDOR_DIR = join(__dirname, 'vendor'); - -if (process.platform !== 'win32' || process.arch !== 'x64') { +if (process.platform !== 'win32') { console.warn( - `[embedeer] @embedeer/ort-win32-x64-dml: skipping install on ${process.platform}/${process.arch}`, + `[embedeer] @embedeer/ort-win32-x64-dml: skipping checks on ${process.platform}/${process.arch} (this package is for Windows x64 only)`, ); process.exit(0); } -console.log('[embedeer] @embedeer/ort-win32-x64-dml: running install script...'); - -mkdirSync(VENDOR_DIR, { recursive: true }); - -// TODO: -// - Verify onnxruntime-node ships with DirectML support on Windows. -// If it does, activate() may only need to set the execution provider preference. -// - If a separate DML-enabled binary is needed, download it here. -// See ort-linux-x64-cuda/install.js for the download skeleton. -writeFileSync( - join(VENDOR_DIR, 'README.txt'), - 'This directory will contain the DirectML-enabled ONNX Runtime native binding for Windows x64.\n', -); - -console.warn( - '[embedeer] @embedeer/ort-win32-x64-dml: STUB install complete. ' + - 'No real DirectML binary was downloaded yet.', +console.log( + '[embedeer] @embedeer/ort-win32-x64-dml: DirectML is bundled with onnxruntime-node on Windows.\n' + + ' No additional binary download is required.\n' + + ' GPU acceleration via DirectML is available on Windows 10 (1903+) / Windows 11\n' + + ' with any DirectX 12-capable GPU.\n' + + ' Make sure your GPU drivers are up to date.\n', ); From 5cc9f0515de604da5feb030e41c4c2db6d845543 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 8 Apr 2026 07:46:12 +0000 Subject: [PATCH 05/12] Fix version inconsistency in CUDA provider comments (v1.20+/v1.24.x) Agent-Logs-Url: https://github.com/jsilvanus/embedeer/sessions/7227830d-3d7c-4707-be90-545f53b4bc77 Co-authored-by: jsilvanus <22452468+jsilvanus@users.noreply.github.com> --- packages/ort-linux-x64-cuda/index.js | 6 ++++-- packages/ort-linux-x64-cuda/install.js | 3 ++- packages/ort-win32-x64-cuda/README.md | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/packages/ort-linux-x64-cuda/index.js b/packages/ort-linux-x64-cuda/index.js index af4eb94..5cefbb0 100644 --- a/packages/ort-linux-x64-cuda/index.js +++ b/packages/ort-linux-x64-cuda/index.js @@ -4,9 +4,10 @@ * CUDA execution provider for embedeer on Linux x64. * * How it works: - * onnxruntime-node v1.14+ ships libonnxruntime_providers_cuda.so on Linux x64. + * onnxruntime-node v1.20+ ships libonnxruntime_providers_cuda.so on Linux x64. * No additional binary download is required — the CUDA execution provider is * already bundled with the standard onnxruntime-node package. + * (@huggingface/transformers@4.x requires onnxruntime-node@1.24.x which ships CUDA.) * * This package verifies that the required CUDA 12 system libraries are * available before attempting to use the CUDA execution provider. @@ -98,7 +99,8 @@ function findLib(libName) { * Activate the CUDA execution provider. * * Checks that all required CUDA 12 / cuDNN 9 system libraries are present. - * onnxruntime-node v1.14+ bundles libonnxruntime_providers_cuda.so on Linux x64, + * onnxruntime-node v1.20+ bundles libonnxruntime_providers_cuda.so on Linux x64 + * (@huggingface/transformers@4.x requires onnxruntime-node@1.24.x which ships CUDA), * so no additional binary download is needed — only system CUDA libraries are required. * * @returns {Promise} diff --git a/packages/ort-linux-x64-cuda/install.js b/packages/ort-linux-x64-cuda/install.js index e978060..cacf635 100644 --- a/packages/ort-linux-x64-cuda/install.js +++ b/packages/ort-linux-x64-cuda/install.js @@ -1,7 +1,8 @@ /** * Install / post-install check for @embedeer/ort-linux-x64-cuda * - * onnxruntime-node v1.14+ ships libonnxruntime_providers_cuda.so on Linux x64. + * onnxruntime-node v1.20+ ships libonnxruntime_providers_cuda.so on Linux x64. + * (@huggingface/transformers@4.x requires onnxruntime-node@1.24.x which ships CUDA.) * No additional binary download is required. This script just verifies that * the necessary CUDA 12 system libraries are present, and prints actionable * install instructions if they are not. diff --git a/packages/ort-win32-x64-cuda/README.md b/packages/ort-win32-x64-cuda/README.md index b333f87..63b9409 100644 --- a/packages/ort-win32-x64-cuda/README.md +++ b/packages/ort-win32-x64-cuda/README.md @@ -16,7 +16,7 @@ See [`@embedeer/ort-win32-x64-dml`](../ort-win32-x64-dml/README.md) for full doc ## Why CUDA isn't available on Windows -`onnxruntime-node` prebuilt binaries include CUDA support on **Linux x64** only (CUDA 12 + cuDNN 9). Windows CUDA support would require either: +`onnxruntime-node` prebuilt binaries include CUDA support on **Linux x64** only (CUDA 12 + cuDNN 9, onnxruntime-node v1.24.x as required by @huggingface/transformers@4.x). Windows CUDA support would require either: - A future official ONNX Runtime release with Windows CUDA prebuilts - A custom `onnxruntime-node` build against CUDA on Windows From d5ad2ea2a2de536cc48787c4741cc95eddd00645 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 8 Apr 2026 07:54:39 +0000 Subject: [PATCH 06/12] Add Docker + CUDA documentation (Dockerfile, docker-compose, Container Toolkit guide) Agent-Logs-Url: https://github.com/jsilvanus/embedeer/sessions/e005d328-62aa-468f-90b9-8982c069d6ff Co-authored-by: jsilvanus <22452468+jsilvanus@users.noreply.github.com> --- README.md | 61 ++++++++++++++++++++++++++- packages/ort-linux-x64-cuda/README.md | 60 ++++++++++++++++++++++++++ 2 files changed, 120 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8501b9d..3fe8dc8 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ await embedder.destroy(); **System requirements:** NVIDIA GPU + driver ≥ 525, CUDA 12, cuDNN 9 -`onnxruntime-node` v1.14+ ships `libonnxruntime_providers_cuda.so` on Linux x64. No custom binary needed — just install CUDA 12 + cuDNN 9 system libraries and the npm package: +`onnxruntime-node` v1.24.x ships `libonnxruntime_providers_cuda.so` on Linux x64. No custom binary needed — just install CUDA 12 + cuDNN 9 system libraries and the npm package: ```bash # Install CUDA 12 + cuDNN 9 (Ubuntu/Debian) @@ -56,6 +56,65 @@ npm install @embedeer/ort-linux-x64-cuda npx embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" ``` +### Docker + NVIDIA CUDA + +Use an [NVIDIA CUDA Docker image](https://hub.docker.com/r/nvidia/cuda) as your base — it ships all required CUDA 12 + cuDNN 9 libraries, so no manual `apt install` is needed in your Dockerfile. + +**Requirements on the host:** +- NVIDIA GPU driver ≥ 525 +- [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) installed + +**Example `Dockerfile`:** + +```dockerfile +# CUDA 12 + cuDNN 9 runtime — all required libs are pre-installed +FROM nvidia/cuda:12.6.3-cudnn9-runtime-ubuntu24.04 + +WORKDIR /app + +# Install Node.js (e.g. via NodeSource) +RUN apt-get update && apt-get install -y curl && \ + curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \ + apt-get install -y nodejs && \ + rm -rf /var/lib/apt/lists/* + +# Install embedeer + CUDA provider +COPY package.json ./ +RUN npm install embedeer && \ + npm install @embedeer/ort-linux-x64-cuda + +COPY . . +``` + +**Build and run:** + +```bash +docker build -t my-embedeer-app . + +# --gpus all enables NVIDIA GPU access inside the container +docker run --rm --gpus all my-embedeer-app \ + npx embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" +``` + +**docker-compose:** + +```yaml +services: + embedeer: + build: . + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + command: > + npx embedeer --model Xenova/all-MiniLM-L6-v2 + --device gpu + --data "Hello GPU" +``` + ### Windows x64 + DirectML (any GPU) **System requirements:** Windows 10 (1903+) or 11, any DirectX 12 GPU, up-to-date drivers diff --git a/packages/ort-linux-x64-cuda/README.md b/packages/ort-linux-x64-cuda/README.md index 3ed5df7..5ba46d0 100644 --- a/packages/ort-linux-x64-cuda/README.md +++ b/packages/ort-linux-x64-cuda/README.md @@ -55,6 +55,66 @@ npm install embedeer npm install @embedeer/ort-linux-x64-cuda ``` +## Docker + +The easiest way to get CUDA working is with an [NVIDIA CUDA Docker image](https://hub.docker.com/r/nvidia/cuda) — all required CUDA 12 + cuDNN 9 libraries are pre-installed. + +**Host requirements:** +- NVIDIA GPU driver ≥ 525 +- [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) + +**Example `Dockerfile`:** + +```dockerfile +# CUDA 12 + cuDNN 9 runtime — all required libs are pre-installed +FROM nvidia/cuda:12.6.3-cudnn9-runtime-ubuntu24.04 + +WORKDIR /app + +# Install Node.js (via NodeSource) +RUN apt-get update && apt-get install -y curl && \ + curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \ + apt-get install -y nodejs && \ + rm -rf /var/lib/apt/lists/* + +# Install embedeer + CUDA provider +COPY package.json ./ +RUN npm install embedeer && \ + npm install @embedeer/ort-linux-x64-cuda + +COPY . . +``` + +**Run with `--gpus all`:** + +```bash +docker build -t my-embedeer-app . +docker run --rm --gpus all my-embedeer-app \ + npx embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" +``` + +**docker-compose:** + +```yaml +services: + embedeer: + build: . + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + command: > + npx embedeer --model Xenova/all-MiniLM-L6-v2 + --device gpu + --data "Hello GPU" +``` + +> **Why does this just work in Docker?** +> The NVIDIA Container Toolkit mounts `/dev/nvidiactl` into the container (our GPU presence check) and NVIDIA CUDA images place all libraries under `/usr/local/cuda/lib64` (one of our default search paths), so `activate()` finds everything automatically. + ## Usage ```js From 104cb9e8468a51f2c7739f7eedb276b08e270129 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 8 Apr 2026 08:19:16 +0000 Subject: [PATCH 07/12] =?UTF-8?q?Remove=20ort-win32-x64-cuda=20placeholder?= =?UTF-8?q?;=20rename=20scope=20@embedeer=20=E2=86=92=20@jsilvanus=20on=20?= =?UTF-8?q?all=20packages?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Agent-Logs-Url: https://github.com/jsilvanus/embedeer/sessions/aba1be69-ab33-480c-9457-d0579962b5a4 Co-authored-by: jsilvanus <22452468+jsilvanus@users.noreply.github.com> --- README.md | 25 ++++++----- package-lock.json | 34 +++++++-------- packages/embedeer/README.md | 24 +++++------ packages/embedeer/package.json | 2 +- packages/embedeer/src/provider-loader.js | 10 ++--- .../embedeer/test/provider-loader.test.js | 17 ++++---- packages/ort-linux-x64-cuda/README.md | 12 +++--- packages/ort-linux-x64-cuda/index.js | 6 +-- packages/ort-linux-x64-cuda/install.js | 10 ++--- packages/ort-linux-x64-cuda/package.json | 2 +- packages/ort-win32-x64-cuda/README.md | 23 ---------- packages/ort-win32-x64-cuda/index.js | 43 ------------------- packages/ort-win32-x64-cuda/install.js | 17 -------- packages/ort-win32-x64-cuda/package.json | 27 ------------ packages/ort-win32-x64-dml/README.md | 6 +-- packages/ort-win32-x64-dml/index.js | 4 +- packages/ort-win32-x64-dml/install.js | 6 +-- packages/ort-win32-x64-dml/package.json | 2 +- 18 files changed, 77 insertions(+), 193 deletions(-) delete mode 100644 packages/ort-win32-x64-cuda/README.md delete mode 100644 packages/ort-win32-x64-cuda/index.js delete mode 100644 packages/ort-win32-x64-cuda/install.js delete mode 100644 packages/ort-win32-x64-cuda/package.json diff --git a/README.md b/README.md index 3fe8dc8..0f0a169 100644 --- a/README.md +++ b/README.md @@ -11,10 +11,9 @@ This repository is a **monorepo** managed with npm workspaces. | Package | Description | |---------|-------------| -| [`embedeer`](packages/embedeer) | Main embeddings package (CPU + optional GPU) | -| [`@embedeer/ort-linux-x64-cuda`](packages/ort-linux-x64-cuda) | CUDA provider for Linux x64 | -| [`@embedeer/ort-win32-x64-dml`](packages/ort-win32-x64-dml) | DirectML provider for Windows x64 | -| [`@embedeer/ort-win32-x64-cuda`](packages/ort-win32-x64-cuda) | Windows CUDA (placeholder — use DML on Windows) | +| [`@jsilvanus/embedeer`](packages/embedeer) | Main embeddings package (CPU + optional GPU) | +| [`@jsilvanus/ort-linux-x64-cuda`](packages/ort-linux-x64-cuda) | CUDA provider for Linux x64 | +| [`@jsilvanus/ort-win32-x64-dml`](packages/ort-win32-x64-dml) | DirectML provider for Windows x64 | --- @@ -23,12 +22,12 @@ This repository is a **monorepo** managed with npm workspaces. ### CPU (default, works everywhere) ```bash -npm install embedeer +npm install @jsilvanus/embedeer npx embedeer --model Xenova/all-MiniLM-L6-v2 --data "Hello world" ``` ```js -import { Embedder } from 'embedeer'; +import { Embedder } from '@jsilvanus/embedeer'; const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2'); const vectors = await embedder.embed(['Hello', 'World']); await embedder.destroy(); @@ -49,8 +48,8 @@ await embedder.destroy(); sudo apt install cuda-toolkit-12-6 libcudnn9-cuda-12 # Install embedeer and the CUDA provider package -npm install embedeer -npm install @embedeer/ort-linux-x64-cuda +npm install @jsilvanus/embedeer +npm install @jsilvanus/ort-linux-x64-cuda # Run with GPU npx embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" @@ -80,8 +79,8 @@ RUN apt-get update && apt-get install -y curl && \ # Install embedeer + CUDA provider COPY package.json ./ -RUN npm install embedeer && \ - npm install @embedeer/ort-linux-x64-cuda +RUN npm install @jsilvanus/embedeer && \ + npm install @jsilvanus/ort-linux-x64-cuda COPY . . ``` @@ -120,8 +119,8 @@ services: **System requirements:** Windows 10 (1903+) or 11, any DirectX 12 GPU, up-to-date drivers ```bash -npm install embedeer -npm install @embedeer/ort-win32-x64-dml +npm install @jsilvanus/embedeer +npm install @jsilvanus/ort-win32-x64-dml npx embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" ``` @@ -129,7 +128,7 @@ npx embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" ### GPU API options ```js -import { Embedder } from 'embedeer'; +import { Embedder } from '@jsilvanus/embedeer'; // Auto-detect GPU, silent CPU fallback if unavailable const e1 = await Embedder.create(model, { device: 'auto' }); diff --git a/package-lock.json b/package-lock.json index aa9d55c..9006e85 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12,18 +12,6 @@ "packages/*" ] }, - "node_modules/@embedeer/ort-linux-x64-cuda": { - "resolved": "packages/ort-linux-x64-cuda", - "link": true - }, - "node_modules/@embedeer/ort-win32-x64-cuda": { - "resolved": "packages/ort-win32-x64-cuda", - "link": true - }, - "node_modules/@embedeer/ort-win32-x64-dml": { - "resolved": "packages/ort-win32-x64-dml", - "link": true - }, "node_modules/@emnapi/runtime": { "version": "1.9.2", "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.9.2.tgz", @@ -575,6 +563,18 @@ "url": "https://opencollective.com/libvips" } }, + "node_modules/@jsilvanus/embedeer": { + "resolved": "packages/embedeer", + "link": true + }, + "node_modules/@jsilvanus/ort-linux-x64-cuda": { + "resolved": "packages/ort-linux-x64-cuda", + "link": true + }, + "node_modules/@jsilvanus/ort-win32-x64-dml": { + "resolved": "packages/ort-win32-x64-dml", + "link": true + }, "node_modules/@protobufjs/aspromise": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz", @@ -713,10 +713,6 @@ "integrity": "sha512-T0NIuQpnTvFDATNuHN5roPwSBG83rFsuO+MXXH9/3N1eFbn4wcPjttvjMLEPWJ0RGUYgQE7cGgS3tNxbqCGM7g==", "license": "MIT" }, - "node_modules/embedeer": { - "resolved": "packages/embedeer", - "link": true - }, "node_modules/es-define-property": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", @@ -1054,6 +1050,7 @@ "license": "MIT" }, "packages/embedeer": { + "name": "@jsilvanus/embedeer", "version": "1.0.0", "license": "ISC", "dependencies": { @@ -1064,7 +1061,7 @@ } }, "packages/ort-linux-x64-cuda": { - "name": "@embedeer/ort-linux-x64-cuda", + "name": "@jsilvanus/ort-linux-x64-cuda", "version": "1.0.0", "hasInstallScript": true, "license": "ISC", @@ -1075,6 +1072,7 @@ "packages/ort-win32-x64-cuda": { "name": "@embedeer/ort-win32-x64-cuda", "version": "1.0.0", + "extraneous": true, "hasInstallScript": true, "license": "ISC", "engines": { @@ -1082,7 +1080,7 @@ } }, "packages/ort-win32-x64-dml": { - "name": "@embedeer/ort-win32-x64-dml", + "name": "@jsilvanus/ort-win32-x64-dml", "version": "1.0.0", "hasInstallScript": true, "license": "ISC", diff --git a/packages/embedeer/README.md b/packages/embedeer/README.md index bd7020f..7c913b8 100644 --- a/packages/embedeer/README.md +++ b/packages/embedeer/README.md @@ -23,16 +23,16 @@ Supports **batched** input, **parallel** execution, isolated **child-process** w ```bash # CPU (default, works everywhere) -npm install embedeer +npm install @jsilvanus/embedeer # GPU — Linux x64 + NVIDIA CUDA -npm install @embedeer/ort-linux-x64-cuda +npm install @jsilvanus/ort-linux-x64-cuda # GPU — Windows x64 + NVIDIA CUDA -npm install @embedeer/ort-win32-x64-cuda +npm install @jsilvanus/ort-win32-x64-cuda # GPU — Windows x64 + DirectML (any GPU: NVIDIA / AMD / Intel) -npm install @embedeer/ort-win32-x64-dml +npm install @jsilvanus/ort-win32-x64-dml ``` --- @@ -42,7 +42,7 @@ npm install @embedeer/ort-win32-x64-dml ### Embed texts (CPU — default) ```js -import { Embedder } from 'embedeer'; +import { Embedder } from '@jsilvanus/embedeer'; const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { batchSize: 32, // texts per worker task (default: 32) @@ -64,7 +64,7 @@ await embedder.destroy(); // shut down worker processes ### Embed texts with GPU ```js -import { Embedder } from 'embedeer'; +import { Embedder } from '@jsilvanus/embedeer'; // Auto-detect GPU (falls back to CPU if no provider is installed) const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { @@ -87,7 +87,7 @@ const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { Like `ollama pull` — downloads the model once so workers start instantly: ```js -import { loadModel } from 'embedeer'; +import { loadModel } from '@jsilvanus/embedeer'; const { modelName, cacheDir } = await loadModel('Xenova/all-MiniLM-L6-v2', { token: 'hf_...', // optional @@ -145,10 +145,10 @@ npx embedeer --model Xenova/all-MiniLM-L6-v2 --device auto --data "Hello GPU" # Require GPU (error if no provider installed) npx embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" -# Use CUDA explicitly (requires @embedeer/ort-linux-x64-cuda or ort-win32-x64-cuda) +# Use CUDA explicitly (requires @jsilvanus/ort-linux-x64-cuda or ort-win32-x64-cuda) npx embedeer --model Xenova/all-MiniLM-L6-v2 --provider cuda --data "Hello CUDA" -# Use DirectML on Windows (requires @embedeer/ort-win32-x64-dml) +# Use DirectML on Windows (requires @jsilvanus/ort-win32-x64-dml) npx embedeer --model Xenova/all-MiniLM-L6-v2 --provider dml --data "Hello DML" # Embed from a file, dump SQL to disk @@ -169,9 +169,9 @@ GPU support requires an additional provider package that ships a CUDA-enabled (o | Platform | Provider | Package | |----------------|-----------|----------------------------------| -| Linux x64 | CUDA | `@embedeer/ort-linux-x64-cuda` | -| Windows x64 | CUDA | `@embedeer/ort-win32-x64-cuda` | -| Windows x64 | DirectML | `@embedeer/ort-win32-x64-dml` | +| Linux x64 | CUDA | `@jsilvanus/ort-linux-x64-cuda` | +| Windows x64 | CUDA | `@jsilvanus/ort-win32-x64-cuda` | +| Windows x64 | DirectML | `@jsilvanus/ort-win32-x64-dml` | ### Provider selection logic diff --git a/packages/embedeer/package.json b/packages/embedeer/package.json index 2b68966..11d55f3 100644 --- a/packages/embedeer/package.json +++ b/packages/embedeer/package.json @@ -1,5 +1,5 @@ { - "name": "embedeer", + "name": "@jsilvanus/embedeer", "version": "1.0.0", "description": "A node.js embedding tool with optional GPU acceleration", "main": "src/index.js", diff --git a/packages/embedeer/src/provider-loader.js b/packages/embedeer/src/provider-loader.js index 5c4b0ee..f8f9af3 100644 --- a/packages/embedeer/src/provider-loader.js +++ b/packages/embedeer/src/provider-loader.js @@ -4,9 +4,8 @@ * pipeline. * * Provider packages are published as separate optional npm packages: - * @embedeer/ort-linux-x64-cuda — CUDA on Linux x64 - * @embedeer/ort-win32-x64-cuda — CUDA on Windows x64 - * @embedeer/ort-win32-x64-dml — DirectML on Windows x64 + * @jsilvanus/ort-linux-x64-cuda — CUDA on Linux x64 + * @jsilvanus/ort-win32-x64-dml — DirectML on Windows x64 * * Each provider package exports: * activate(): Promise — runs any setup needed before pipeline() @@ -24,9 +23,8 @@ * @type {Record} */ export const PROVIDER_PACKAGES = { - 'linux-x64-cuda': '@embedeer/ort-linux-x64-cuda', - 'win32-x64-cuda': '@embedeer/ort-win32-x64-cuda', - 'win32-x64-dml': '@embedeer/ort-win32-x64-dml', + 'linux-x64-cuda': '@jsilvanus/ort-linux-x64-cuda', + 'win32-x64-dml': '@jsilvanus/ort-win32-x64-dml', }; /** diff --git a/packages/embedeer/test/provider-loader.test.js b/packages/embedeer/test/provider-loader.test.js index d97af4b..1e3d11b 100644 --- a/packages/embedeer/test/provider-loader.test.js +++ b/packages/embedeer/test/provider-loader.test.js @@ -39,9 +39,8 @@ async function withPlatform(platform, arch, fn) { describe('PROVIDER_PACKAGES', () => { test('contains entries for all supported platform+provider combinations', () => { - assert.equal(PROVIDER_PACKAGES['linux-x64-cuda'], '@embedeer/ort-linux-x64-cuda'); - assert.equal(PROVIDER_PACKAGES['win32-x64-cuda'], '@embedeer/ort-win32-x64-cuda'); - assert.equal(PROVIDER_PACKAGES['win32-x64-dml'], '@embedeer/ort-win32-x64-dml'); + assert.equal(PROVIDER_PACKAGES['linux-x64-cuda'], '@jsilvanus/ort-linux-x64-cuda'); + assert.equal(PROVIDER_PACKAGES['win32-x64-dml'], '@jsilvanus/ort-win32-x64-dml'); }); }); @@ -79,7 +78,7 @@ describe('tryLoadProvider()', () => { }); test('returns { loaded: false } when provider package is not installed or binary is missing', async () => { - // In the workspace, @embedeer/ort-linux-x64-cuda is linked but the native + // In the workspace, @jsilvanus/ort-linux-x64-cuda is linked but the native // binary does not exist (install.js was not run), so activate() throws. // tryLoadProvider must return { loaded: false } in either case. await withPlatform('linux', 'x64', async () => { @@ -115,7 +114,7 @@ describe('resolveProvider()', () => { test('device=auto returns undefined (CPU fallback) when GPU provider fails to activate', async () => { await withPlatform('linux', 'x64', async () => { - // @embedeer/ort-linux-x64-cuda is linked in the workspace but binary is + // @jsilvanus/ort-linux-x64-cuda is linked in the workspace but binary is // missing. device='auto' must silently fall back to CPU (return undefined). const result = await resolveProvider('auto', undefined); assert.equal(result, undefined); @@ -135,12 +134,12 @@ describe('resolveProvider()', () => { await withPlatform('linux', 'x64', async () => { // In the workspace, ort-linux-x64-cuda is linked but binary is missing. // resolveProvider should throw (either the activate error or a "not installed" error). - // The error must reference the @embedeer package name to guide the user. + // The error must reference the @jsilvanus package name to guide the user. await assert.rejects( () => resolveProvider('gpu', undefined), (err) => { assert.ok( - err.message.includes('@embedeer/ort-linux-x64-cuda'), + err.message.includes('@jsilvanus/ort-linux-x64-cuda'), `Expected package name in error, got: ${err.message}`, ); return true; @@ -168,14 +167,14 @@ describe('resolveProvider()', () => { test('explicit provider=cuda re-throws activate error when CUDA libraries are missing', async () => { await withPlatform('linux', 'x64', async () => { - // In this environment @embedeer/ort-linux-x64-cuda is installed (workspace link) + // In this environment @jsilvanus/ort-linux-x64-cuda is installed (workspace link) // but there is no NVIDIA GPU. activate() throws the GPU-not-found error which // is re-thrown by resolveProvider so the user gets a clear diagnostic. await assert.rejects( () => resolveProvider('cpu', 'cuda'), (err) => { assert.ok( - err.message.includes('@embedeer/ort-linux-x64-cuda'), + err.message.includes('@jsilvanus/ort-linux-x64-cuda'), `Expected package name in error, got: ${err.message}`, ); // The error is the activate() diagnostic, not a generic "not installed" msg diff --git a/packages/ort-linux-x64-cuda/README.md b/packages/ort-linux-x64-cuda/README.md index 5ba46d0..ec0047e 100644 --- a/packages/ort-linux-x64-cuda/README.md +++ b/packages/ort-linux-x64-cuda/README.md @@ -1,4 +1,4 @@ -# @embedeer/ort-linux-x64-cuda +# @jsilvanus/ort-linux-x64-cuda CUDA execution provider for [embedeer](https://github.com/jsilvanus/embedeer) on **Linux x64**. @@ -49,10 +49,10 @@ nvcc --version # confirm CUDA toolkit is installed ```bash # Step 1 — main package -npm install embedeer +npm install @jsilvanus/embedeer # Step 2 — CUDA provider -npm install @embedeer/ort-linux-x64-cuda +npm install @jsilvanus/ort-linux-x64-cuda ``` ## Docker @@ -79,8 +79,8 @@ RUN apt-get update && apt-get install -y curl && \ # Install embedeer + CUDA provider COPY package.json ./ -RUN npm install embedeer && \ - npm install @embedeer/ort-linux-x64-cuda +RUN npm install @jsilvanus/embedeer && \ + npm install @jsilvanus/ort-linux-x64-cuda COPY . . ``` @@ -143,7 +143,7 @@ npx embedeer --model Xenova/all-MiniLM-L6-v2 --provider cuda --data "Hello CUDA" If CUDA libraries are missing, you'll see: ``` -@embedeer/ort-linux-x64-cuda: Missing CUDA system libraries: libcudart.so.12, libcudnn.so.9 +@jsilvanus/ort-linux-x64-cuda: Missing CUDA system libraries: libcudart.so.12, libcudnn.so.9 onnxruntime-node CUDA requires CUDA 12 + cuDNN 9. Install them: diff --git a/packages/ort-linux-x64-cuda/index.js b/packages/ort-linux-x64-cuda/index.js index 5cefbb0..632da69 100644 --- a/packages/ort-linux-x64-cuda/index.js +++ b/packages/ort-linux-x64-cuda/index.js @@ -1,5 +1,5 @@ /** - * @embedeer/ort-linux-x64-cuda + * @jsilvanus/ort-linux-x64-cuda * * CUDA execution provider for embedeer on Linux x64. * @@ -110,7 +110,7 @@ export async function activate() { // 1. Check for NVIDIA GPU / driver if (!existsSync('/dev/nvidiactl')) { throw new Error( - '@embedeer/ort-linux-x64-cuda: No NVIDIA GPU detected (/dev/nvidiactl not found).\n' + + '@jsilvanus/ort-linux-x64-cuda: No NVIDIA GPU detected (/dev/nvidiactl not found).\n' + 'Ensure NVIDIA drivers are installed.\n' + 'Verify with: nvidia-smi', ); @@ -121,7 +121,7 @@ export async function activate() { if (missing.length > 0) { throw new Error( - `@embedeer/ort-linux-x64-cuda: Missing CUDA system libraries: ${missing.join(', ')}\n\n` + + `@jsilvanus/ort-linux-x64-cuda: Missing CUDA system libraries: ${missing.join(', ')}\n\n` + 'onnxruntime-node CUDA requires CUDA 12 + cuDNN 9. Install them:\n\n' + ' # Option A — CUDA 12 + cuDNN 9 via apt (Ubuntu/Debian)\n' + ' sudo apt install cuda-toolkit-12-6 libcudnn9-cuda-12\n\n' + diff --git a/packages/ort-linux-x64-cuda/install.js b/packages/ort-linux-x64-cuda/install.js index cacf635..0e2b5a7 100644 --- a/packages/ort-linux-x64-cuda/install.js +++ b/packages/ort-linux-x64-cuda/install.js @@ -1,5 +1,5 @@ /** - * Install / post-install check for @embedeer/ort-linux-x64-cuda + * Install / post-install check for @jsilvanus/ort-linux-x64-cuda * * onnxruntime-node v1.20+ ships libonnxruntime_providers_cuda.so on Linux x64. * (@huggingface/transformers@4.x requires onnxruntime-node@1.24.x which ships CUDA.) @@ -19,12 +19,12 @@ import { existsSync } from 'fs'; if (process.platform !== 'linux' || process.arch !== 'x64') { console.warn( - `[embedeer] @embedeer/ort-linux-x64-cuda: skipping checks on ${process.platform}/${process.arch} (this package is for Linux x64 only)`, + `[embedeer] @jsilvanus/ort-linux-x64-cuda: skipping checks on ${process.platform}/${process.arch} (this package is for Linux x64 only)`, ); process.exit(0); } -console.log('[embedeer] @embedeer/ort-linux-x64-cuda: checking system CUDA requirements...'); +console.log('[embedeer] @jsilvanus/ort-linux-x64-cuda: checking system CUDA requirements...'); const REQUIRED_LIBS = [ 'libcudart.so.12', @@ -68,7 +68,7 @@ const hasGpu = existsSync('/dev/nvidiactl'); if (!hasGpu) { console.warn( '\n[embedeer] WARNING: No NVIDIA GPU detected (/dev/nvidiactl not found).\n' + - ' @embedeer/ort-linux-x64-cuda requires an NVIDIA GPU with CUDA 12 drivers.\n' + + ' @jsilvanus/ort-linux-x64-cuda requires an NVIDIA GPU with CUDA 12 drivers.\n' + ' GPU acceleration will not be available until drivers are installed.\n', ); } else { @@ -100,6 +100,6 @@ if (missing.length > 0) { } console.log( - '\n[embedeer] @embedeer/ort-linux-x64-cuda: all CUDA requirements satisfied.\n' + + '\n[embedeer] @jsilvanus/ort-linux-x64-cuda: all CUDA requirements satisfied.\n' + ' GPU acceleration is available. Use device="gpu" or device="auto" in embedeer.\n', ); diff --git a/packages/ort-linux-x64-cuda/package.json b/packages/ort-linux-x64-cuda/package.json index ede34e4..f578fe5 100644 --- a/packages/ort-linux-x64-cuda/package.json +++ b/packages/ort-linux-x64-cuda/package.json @@ -1,5 +1,5 @@ { - "name": "@embedeer/ort-linux-x64-cuda", + "name": "@jsilvanus/ort-linux-x64-cuda", "version": "1.0.0", "description": "CUDA execution provider for embedeer on Linux x64", "type": "module", diff --git a/packages/ort-win32-x64-cuda/README.md b/packages/ort-win32-x64-cuda/README.md deleted file mode 100644 index 63b9409..0000000 --- a/packages/ort-win32-x64-cuda/README.md +++ /dev/null @@ -1,23 +0,0 @@ -# @embedeer/ort-win32-x64-cuda - -> ⚠️ **Not yet available** — CUDA on Windows is not included in `onnxruntime-node` prebuilt binaries. - -This package is a placeholder for future Windows CUDA support. - -## Use DirectML instead - -For GPU acceleration on Windows, use DirectML — it supports NVIDIA, AMD, and Intel GPUs without CUDA: - -```bash -npm install @embedeer/ort-win32-x64-dml -``` - -See [`@embedeer/ort-win32-x64-dml`](../ort-win32-x64-dml/README.md) for full documentation. - -## Why CUDA isn't available on Windows - -`onnxruntime-node` prebuilt binaries include CUDA support on **Linux x64** only (CUDA 12 + cuDNN 9, onnxruntime-node v1.24.x as required by @huggingface/transformers@4.x). Windows CUDA support would require either: -- A future official ONNX Runtime release with Windows CUDA prebuilts -- A custom `onnxruntime-node` build against CUDA on Windows - -See [ONNX Runtime build docs](https://onnxruntime.ai/docs/build/inferencing.html) if you need Windows CUDA. diff --git a/packages/ort-win32-x64-cuda/index.js b/packages/ort-win32-x64-cuda/index.js deleted file mode 100644 index f69e586..0000000 --- a/packages/ort-win32-x64-cuda/index.js +++ /dev/null @@ -1,43 +0,0 @@ -/** - * @embedeer/ort-win32-x64-cuda - * - * CUDA execution provider for embedeer on Windows x64. - * - * IMPORTANT: onnxruntime-node does not currently ship CUDA support for Windows - * in its prebuilt binaries. CUDA on Windows requires a custom ORT build or a - * future official release that includes Windows CUDA prebuilts. - * - * Use @embedeer/ort-win32-x64-dml for GPU acceleration on Windows instead — - * DirectML supports NVIDIA, AMD, and Intel GPUs on Windows 10/11 without - * requiring a CUDA installation. - * - * @see packages/ort-win32-x64-dml - * @see https://github.com/microsoft/onnxruntime/releases for CUDA Windows builds - */ - -/** - * Activate the CUDA execution provider on Windows x64. - * - * @returns {Promise} - * @throws {Error} Always — CUDA is not currently supported via standard onnxruntime-node - * prebuilts on Windows. Use @embedeer/ort-win32-x64-dml for DirectML GPU acceleration. - */ -export async function activate() { - throw new Error( - '@embedeer/ort-win32-x64-cuda: CUDA is not currently available in onnxruntime-node\n' + - 'prebuilt binaries for Windows.\n\n' + - 'For GPU acceleration on Windows, use DirectML instead:\n' + - ' npm install @embedeer/ort-win32-x64-dml\n' + - ' npx embedeer --provider dml --data "Hello"\n\n' + - 'DirectML supports NVIDIA, AMD, and Intel GPUs on Windows 10/11 without CUDA.\n\n' + - 'For Windows CUDA support, a custom onnxruntime build is required.\n' + - 'See: https://onnxruntime.ai/docs/build/inferencing.html', - ); -} - -/** - * @returns {string} - */ -export function getDevice() { - return 'cuda'; -} diff --git a/packages/ort-win32-x64-cuda/install.js b/packages/ort-win32-x64-cuda/install.js deleted file mode 100644 index be60555..0000000 --- a/packages/ort-win32-x64-cuda/install.js +++ /dev/null @@ -1,17 +0,0 @@ -/** - * Install / post-install for @embedeer/ort-win32-x64-cuda - * - * IMPORTANT: onnxruntime-node does not currently ship CUDA prebuilts for Windows. - * This package is a placeholder for future Windows CUDA support. - * - * For GPU acceleration on Windows, use DirectML instead: - * npm install @embedeer/ort-win32-x64-dml - */ - -console.warn( - '\n[embedeer] WARNING: @embedeer/ort-win32-x64-cuda — CUDA is not currently available\n' + - ' in onnxruntime-node prebuilt binaries for Windows.\n\n' + - ' For GPU acceleration on Windows, use DirectML instead:\n' + - ' npm install @embedeer/ort-win32-x64-dml\n\n' + - ' DirectML supports NVIDIA, AMD, and Intel GPUs on Windows 10/11.\n', -); diff --git a/packages/ort-win32-x64-cuda/package.json b/packages/ort-win32-x64-cuda/package.json deleted file mode 100644 index f9d5ede..0000000 --- a/packages/ort-win32-x64-cuda/package.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "name": "@embedeer/ort-win32-x64-cuda", - "version": "1.0.0", - "description": "CUDA execution provider for embedeer on Windows x64", - "type": "module", - "main": "index.js", - "scripts": { - "install": "node install.js" - }, - "repository": { - "type": "git", - "url": "git+https://github.com/jsilvanus/embedeer.git", - "directory": "packages/ort-win32-x64-cuda" - }, - "keywords": [ - "embedeer", - "onnxruntime", - "cuda", - "gpu", - "windows" - ], - "author": "", - "license": "ISC", - "engines": { - "node": ">=18" - } -} diff --git a/packages/ort-win32-x64-dml/README.md b/packages/ort-win32-x64-dml/README.md index 9f52657..e1262e3 100644 --- a/packages/ort-win32-x64-dml/README.md +++ b/packages/ort-win32-x64-dml/README.md @@ -1,4 +1,4 @@ -# @embedeer/ort-win32-x64-dml +# @jsilvanus/ort-win32-x64-dml DirectML execution provider for [embedeer](https://github.com/jsilvanus/embedeer) on **Windows x64**. @@ -24,10 +24,10 @@ No CUDA installation needed. ```bash # Step 1 — main package -npm install embedeer +npm install @jsilvanus/embedeer # Step 2 — DirectML provider -npm install @embedeer/ort-win32-x64-dml +npm install @jsilvanus/ort-win32-x64-dml ``` ## Usage diff --git a/packages/ort-win32-x64-dml/index.js b/packages/ort-win32-x64-dml/index.js index e2a6945..a727493 100644 --- a/packages/ort-win32-x64-dml/index.js +++ b/packages/ort-win32-x64-dml/index.js @@ -1,5 +1,5 @@ /** - * @embedeer/ort-win32-x64-dml + * @jsilvanus/ort-win32-x64-dml * * DirectML execution provider for embedeer on Windows x64. * @@ -30,7 +30,7 @@ export async function activate() { if (process.platform !== 'win32') { throw new Error( - `@embedeer/ort-win32-x64-dml: DirectML is only available on Windows (current platform: ${process.platform}).`, + `@jsilvanus/ort-win32-x64-dml: DirectML is only available on Windows (current platform: ${process.platform}).`, ); } // DirectML is natively available via onnxruntime-node on Windows 10/11. diff --git a/packages/ort-win32-x64-dml/install.js b/packages/ort-win32-x64-dml/install.js index 0392bb8..14fd50a 100644 --- a/packages/ort-win32-x64-dml/install.js +++ b/packages/ort-win32-x64-dml/install.js @@ -1,5 +1,5 @@ /** - * Install / post-install check for @embedeer/ort-win32-x64-dml + * Install / post-install check for @jsilvanus/ort-win32-x64-dml * * onnxruntime-node ships DirectML support bundled on Windows x64. * No additional binary download is required. @@ -12,13 +12,13 @@ if (process.platform !== 'win32') { console.warn( - `[embedeer] @embedeer/ort-win32-x64-dml: skipping checks on ${process.platform}/${process.arch} (this package is for Windows x64 only)`, + `[embedeer] @jsilvanus/ort-win32-x64-dml: skipping checks on ${process.platform}/${process.arch} (this package is for Windows x64 only)`, ); process.exit(0); } console.log( - '[embedeer] @embedeer/ort-win32-x64-dml: DirectML is bundled with onnxruntime-node on Windows.\n' + + '[embedeer] @jsilvanus/ort-win32-x64-dml: DirectML is bundled with onnxruntime-node on Windows.\n' + ' No additional binary download is required.\n' + ' GPU acceleration via DirectML is available on Windows 10 (1903+) / Windows 11\n' + ' with any DirectX 12-capable GPU.\n' + diff --git a/packages/ort-win32-x64-dml/package.json b/packages/ort-win32-x64-dml/package.json index 3ef12e2..c4333ba 100644 --- a/packages/ort-win32-x64-dml/package.json +++ b/packages/ort-win32-x64-dml/package.json @@ -1,5 +1,5 @@ { - "name": "@embedeer/ort-win32-x64-dml", + "name": "@jsilvanus/ort-win32-x64-dml", "version": "1.0.0", "description": "DirectML execution provider for embedeer on Windows x64", "type": "module", From f721a068764cf542f56c8f1546a1c189d815b83f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 8 Apr 2026 08:51:48 +0000 Subject: [PATCH 08/12] Prefix ort packages with embedeer-; prepare all packages for npm publishing; show npx --device auto/gpu in READMEs Agent-Logs-Url: https://github.com/jsilvanus/embedeer/sessions/c6cb957d-1635-42ac-9914-88507015c38b Co-authored-by: jsilvanus <22452468+jsilvanus@users.noreply.github.com> --- README.md | 59 +++++++++--- package-lock.json | 11 ++- packages/embedeer/README.md | 89 +++++++++++++------ packages/embedeer/package.json | 12 ++- packages/embedeer/src/provider-loader.js | 8 +- .../embedeer/test/provider-loader.test.js | 14 +-- packages/ort-linux-x64-cuda/README.md | 16 ++-- packages/ort-linux-x64-cuda/index.js | 6 +- packages/ort-linux-x64-cuda/install.js | 10 +-- packages/ort-linux-x64-cuda/package.json | 12 ++- packages/ort-win32-x64-dml/README.md | 8 +- packages/ort-win32-x64-dml/index.js | 4 +- packages/ort-win32-x64-dml/install.js | 6 +- packages/ort-win32-x64-dml/package.json | 12 ++- 14 files changed, 181 insertions(+), 86 deletions(-) diff --git a/README.md b/README.md index 0f0a169..f2b36f9 100644 --- a/README.md +++ b/README.md @@ -12,8 +12,8 @@ This repository is a **monorepo** managed with npm workspaces. | Package | Description | |---------|-------------| | [`@jsilvanus/embedeer`](packages/embedeer) | Main embeddings package (CPU + optional GPU) | -| [`@jsilvanus/ort-linux-x64-cuda`](packages/ort-linux-x64-cuda) | CUDA provider for Linux x64 | -| [`@jsilvanus/ort-win32-x64-dml`](packages/ort-win32-x64-dml) | DirectML provider for Windows x64 | +| [`@jsilvanus/embedeer-ort-linux-x64-cuda`](packages/ort-linux-x64-cuda) | CUDA provider for Linux x64 | +| [`@jsilvanus/embedeer-ort-win32-x64-dml`](packages/ort-win32-x64-dml) | DirectML provider for Windows x64 | --- @@ -23,7 +23,7 @@ This repository is a **monorepo** managed with npm workspaces. ```bash npm install @jsilvanus/embedeer -npx embedeer --model Xenova/all-MiniLM-L6-v2 --data "Hello world" +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --data "Hello world" ``` ```js @@ -33,6 +33,37 @@ const vectors = await embedder.embed(['Hello', 'World']); await embedder.destroy(); ``` +### GPU — use CUDA where available (auto-detect) + +Add the provider package for your platform, then pass `--device auto`. +`auto` tries CUDA on Linux and DirectML on Windows; silently falls back to CPU if no GPU is found. + +**Linux x64 (NVIDIA CUDA):** + +```bash +# Install CUDA 12 + cuDNN 9 system libraries (Ubuntu/Debian) +sudo apt install cuda-toolkit-12-6 libcudnn9-cuda-12 + +npm install @jsilvanus/embedeer +npm install @jsilvanus/embedeer-ort-linux-x64-cuda + +# Auto-detect: uses CUDA on this system, CPU on any other +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device auto --data "Hello" + +# Or require GPU (throws if CUDA is unavailable): +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" +``` + +**Windows x64 (DirectML — any GPU: NVIDIA / AMD / Intel):** + +```bash +npm install @jsilvanus/embedeer +npm install @jsilvanus/embedeer-ort-win32-x64-dml + +# Auto-detect: uses DirectML on Windows, CPU elsewhere +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device auto --data "Hello" +``` + --- ## GPU — Two-Step Install @@ -49,10 +80,10 @@ sudo apt install cuda-toolkit-12-6 libcudnn9-cuda-12 # Install embedeer and the CUDA provider package npm install @jsilvanus/embedeer -npm install @jsilvanus/ort-linux-x64-cuda +npm install @jsilvanus/embedeer-ort-linux-x64-cuda # Run with GPU -npx embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" ``` ### Docker + NVIDIA CUDA @@ -80,7 +111,7 @@ RUN apt-get update && apt-get install -y curl && \ # Install embedeer + CUDA provider COPY package.json ./ RUN npm install @jsilvanus/embedeer && \ - npm install @jsilvanus/ort-linux-x64-cuda + npm install @jsilvanus/embedeer-ort-linux-x64-cuda COPY . . ``` @@ -92,7 +123,7 @@ docker build -t my-embedeer-app . # --gpus all enables NVIDIA GPU access inside the container docker run --rm --gpus all my-embedeer-app \ - npx embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" + npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" ``` **docker-compose:** @@ -109,7 +140,7 @@ services: count: all capabilities: [gpu] command: > - npx embedeer --model Xenova/all-MiniLM-L6-v2 + npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" ``` @@ -120,9 +151,9 @@ services: ```bash npm install @jsilvanus/embedeer -npm install @jsilvanus/ort-win32-x64-dml +npm install @jsilvanus/embedeer-ort-win32-x64-dml -npx embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" ``` ### GPU API options @@ -142,10 +173,10 @@ const e4 = await Embedder.create(model, { provider: 'dml' }); // Windows Direct ``` ```bash -npx embedeer --device auto # try GPU, fall back to CPU -npx embedeer --device gpu # require GPU -npx embedeer --provider cuda # explicit CUDA (Linux) -npx embedeer --provider dml # explicit DirectML (Windows) +npx @jsilvanus/embedeer --device auto # try GPU, fall back to CPU +npx @jsilvanus/embedeer --device gpu # require GPU +npx @jsilvanus/embedeer --provider cuda # explicit CUDA (Linux) +npx @jsilvanus/embedeer --provider dml # explicit DirectML (Windows) ``` --- diff --git a/package-lock.json b/package-lock.json index 9006e85..59e82e8 100644 --- a/package-lock.json +++ b/package-lock.json @@ -567,11 +567,11 @@ "resolved": "packages/embedeer", "link": true }, - "node_modules/@jsilvanus/ort-linux-x64-cuda": { + "node_modules/@jsilvanus/embedeer-ort-linux-x64-cuda": { "resolved": "packages/ort-linux-x64-cuda", "link": true }, - "node_modules/@jsilvanus/ort-win32-x64-dml": { + "node_modules/@jsilvanus/embedeer-ort-win32-x64-dml": { "resolved": "packages/ort-win32-x64-dml", "link": true }, @@ -1058,10 +1058,13 @@ }, "bin": { "embedeer": "src/cli.js" + }, + "engines": { + "node": ">=18" } }, "packages/ort-linux-x64-cuda": { - "name": "@jsilvanus/ort-linux-x64-cuda", + "name": "@jsilvanus/embedeer-ort-linux-x64-cuda", "version": "1.0.0", "hasInstallScript": true, "license": "ISC", @@ -1080,7 +1083,7 @@ } }, "packages/ort-win32-x64-dml": { - "name": "@jsilvanus/ort-win32-x64-dml", + "name": "@jsilvanus/embedeer-ort-win32-x64-dml", "version": "1.0.0", "hasInstallScript": true, "license": "ISC", diff --git a/packages/embedeer/README.md b/packages/embedeer/README.md index 7c913b8..702174e 100644 --- a/packages/embedeer/README.md +++ b/packages/embedeer/README.md @@ -26,13 +26,10 @@ Supports **batched** input, **parallel** execution, isolated **child-process** w npm install @jsilvanus/embedeer # GPU — Linux x64 + NVIDIA CUDA -npm install @jsilvanus/ort-linux-x64-cuda - -# GPU — Windows x64 + NVIDIA CUDA -npm install @jsilvanus/ort-win32-x64-cuda +npm install @jsilvanus/embedeer-ort-linux-x64-cuda # GPU — Windows x64 + DirectML (any GPU: NVIDIA / AMD / Intel) -npm install @jsilvanus/ort-win32-x64-dml +npm install @jsilvanus/embedeer-ort-win32-x64-dml ``` --- @@ -100,16 +97,16 @@ const { modelName, cacheDir } = await loadModel('Xenova/all-MiniLM-L6-v2', { ## CLI ``` -npx embedeer [options] +npx @jsilvanus/embedeer [options] Model management (pull / cache model): - npx embedeer --model + npx @jsilvanus/embedeer --model Embed texts: - npx embedeer --model --data "text1" "text2" ... - npx embedeer --model --data '["text1","text2"]' - npx embedeer --model --file texts.txt - echo '["t1","t2"]' | npx embedeer --model + npx @jsilvanus/embedeer --model --data "text1" "text2" ... + npx @jsilvanus/embedeer --model --data '["text1","text2"]' + npx @jsilvanus/embedeer --model --file texts.txt + echo '["t1","t2"]' | npx @jsilvanus/embedeer --model Options: -m, --model Hugging Face model (default: Xenova/all-MiniLM-L6-v2) @@ -134,44 +131,82 @@ Options: ```bash # Pull a model (like ollama pull) -npx embedeer --model Xenova/all-MiniLM-L6-v2 +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 # Embed a few strings, output JSON (CPU) -npx embedeer --model Xenova/all-MiniLM-L6-v2 --data "Hello" "World" +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --data "Hello" "World" # Auto-detect GPU, fall back to CPU if unavailable -npx embedeer --model Xenova/all-MiniLM-L6-v2 --device auto --data "Hello GPU" +# (uses CUDA on Linux, DirectML on Windows, CPU everywhere else) +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device auto --data "Hello" -# Require GPU (error if no provider installed) -npx embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" +# Require GPU (throws with install instructions if no provider found) +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" -# Use CUDA explicitly (requires @jsilvanus/ort-linux-x64-cuda or ort-win32-x64-cuda) -npx embedeer --model Xenova/all-MiniLM-L6-v2 --provider cuda --data "Hello CUDA" +# Explicit CUDA (Linux — requires @jsilvanus/embedeer-ort-linux-x64-cuda) +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --provider cuda --data "Hello CUDA" -# Use DirectML on Windows (requires @jsilvanus/ort-win32-x64-dml) -npx embedeer --model Xenova/all-MiniLM-L6-v2 --provider dml --data "Hello DML" +# Explicit DirectML (Windows — requires @jsilvanus/embedeer-ort-win32-x64-dml) +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --provider dml --data "Hello DML" # Embed from a file, dump SQL to disk -npx embedeer --model Xenova/all-MiniLM-L6-v2 \ +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 \ --file texts.txt --output sql --dump out.sql # Use quantized model, in-process threads, private model with token -npx embedeer --model my-org/private-model \ +npx @jsilvanus/embedeer --model my-org/private-model \ --token hf_xxx --dtype q8 --mode thread \ --data "embed me" ``` --- +### Using GPU with npx + +Install the provider package for your platform, then pass `--device auto` to use the GPU +wherever available, with silent CPU fallback. + +**Linux x64 — NVIDIA CUDA:** + +```bash +# One-time: install CUDA 12 system libraries (Ubuntu/Debian) +sudo apt install cuda-toolkit-12-6 libcudnn9-cuda-12 + +# Install both packages +npm install @jsilvanus/embedeer +npm install @jsilvanus/embedeer-ort-linux-x64-cuda + +# Auto-detect: uses CUDA here, CPU fallback on any other machine +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device auto --data "Hello" + +# Hard-require CUDA (error + install hint if unavailable): +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" + +# Explicit CUDA provider: +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --provider cuda --data "Hello CUDA" +``` + +**Windows x64 — DirectML (any GPU: NVIDIA / AMD / Intel):** + +```bash +npm install @jsilvanus/embedeer +npm install @jsilvanus/embedeer-ort-win32-x64-dml + +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device auto --data "Hello" +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --provider dml --data "Hello DML" +``` + +--- + ## GPU Provider Packages GPU support requires an additional provider package that ships a CUDA-enabled (or DirectML-enabled) ONNX Runtime binary. -| Platform | Provider | Package | -|----------------|-----------|----------------------------------| -| Linux x64 | CUDA | `@jsilvanus/ort-linux-x64-cuda` | -| Windows x64 | CUDA | `@jsilvanus/ort-win32-x64-cuda` | -| Windows x64 | DirectML | `@jsilvanus/ort-win32-x64-dml` | +| Platform | Provider | Package | +|----------------|-----------|-----------------------------------------------| +| Linux x64 | CUDA | `@jsilvanus/embedeer-ort-linux-x64-cuda` | +| Windows x64 | DirectML | `@jsilvanus/embedeer-ort-win32-x64-dml` | ### Provider selection logic diff --git a/packages/embedeer/package.json b/packages/embedeer/package.json index 11d55f3..dc5ccfb 100644 --- a/packages/embedeer/package.json +++ b/packages/embedeer/package.json @@ -6,6 +6,10 @@ "bin": { "embedeer": "src/cli.js" }, + "files": [ + "src", + "README.md" + ], "scripts": { "test": "node --test test/*.test.js" }, @@ -24,13 +28,19 @@ "cuda", "onnxruntime" ], - "author": "", + "author": "jsilvanus", "license": "ISC", "type": "module", + "engines": { + "node": ">=18" + }, "bugs": { "url": "https://github.com/jsilvanus/embedeer/issues" }, "homepage": "https://github.com/jsilvanus/embedeer/tree/main/packages/embedeer#readme", + "publishConfig": { + "access": "public" + }, "dependencies": { "@huggingface/transformers": "^4.0.1" } diff --git a/packages/embedeer/src/provider-loader.js b/packages/embedeer/src/provider-loader.js index f8f9af3..905a494 100644 --- a/packages/embedeer/src/provider-loader.js +++ b/packages/embedeer/src/provider-loader.js @@ -4,8 +4,8 @@ * pipeline. * * Provider packages are published as separate optional npm packages: - * @jsilvanus/ort-linux-x64-cuda — CUDA on Linux x64 - * @jsilvanus/ort-win32-x64-dml — DirectML on Windows x64 + * @jsilvanus/embedeer-ort-linux-x64-cuda — CUDA on Linux x64 + * @jsilvanus/embedeer-ort-win32-x64-dml — DirectML on Windows x64 * * Each provider package exports: * activate(): Promise — runs any setup needed before pipeline() @@ -23,8 +23,8 @@ * @type {Record} */ export const PROVIDER_PACKAGES = { - 'linux-x64-cuda': '@jsilvanus/ort-linux-x64-cuda', - 'win32-x64-dml': '@jsilvanus/ort-win32-x64-dml', + 'linux-x64-cuda': '@jsilvanus/embedeer-ort-linux-x64-cuda', + 'win32-x64-dml': '@jsilvanus/embedeer-ort-win32-x64-dml', }; /** diff --git a/packages/embedeer/test/provider-loader.test.js b/packages/embedeer/test/provider-loader.test.js index 1e3d11b..5e92829 100644 --- a/packages/embedeer/test/provider-loader.test.js +++ b/packages/embedeer/test/provider-loader.test.js @@ -39,8 +39,8 @@ async function withPlatform(platform, arch, fn) { describe('PROVIDER_PACKAGES', () => { test('contains entries for all supported platform+provider combinations', () => { - assert.equal(PROVIDER_PACKAGES['linux-x64-cuda'], '@jsilvanus/ort-linux-x64-cuda'); - assert.equal(PROVIDER_PACKAGES['win32-x64-dml'], '@jsilvanus/ort-win32-x64-dml'); + assert.equal(PROVIDER_PACKAGES['linux-x64-cuda'], '@jsilvanus/embedeer-ort-linux-x64-cuda'); + assert.equal(PROVIDER_PACKAGES['win32-x64-dml'], '@jsilvanus/embedeer-ort-win32-x64-dml'); }); }); @@ -78,7 +78,7 @@ describe('tryLoadProvider()', () => { }); test('returns { loaded: false } when provider package is not installed or binary is missing', async () => { - // In the workspace, @jsilvanus/ort-linux-x64-cuda is linked but the native + // In the workspace, @jsilvanus/embedeer-ort-linux-x64-cuda is linked but the native // binary does not exist (install.js was not run), so activate() throws. // tryLoadProvider must return { loaded: false } in either case. await withPlatform('linux', 'x64', async () => { @@ -114,7 +114,7 @@ describe('resolveProvider()', () => { test('device=auto returns undefined (CPU fallback) when GPU provider fails to activate', async () => { await withPlatform('linux', 'x64', async () => { - // @jsilvanus/ort-linux-x64-cuda is linked in the workspace but binary is + // @jsilvanus/embedeer-ort-linux-x64-cuda is linked in the workspace but binary is // missing. device='auto' must silently fall back to CPU (return undefined). const result = await resolveProvider('auto', undefined); assert.equal(result, undefined); @@ -139,7 +139,7 @@ describe('resolveProvider()', () => { () => resolveProvider('gpu', undefined), (err) => { assert.ok( - err.message.includes('@jsilvanus/ort-linux-x64-cuda'), + err.message.includes('@jsilvanus/embedeer-ort-linux-x64-cuda'), `Expected package name in error, got: ${err.message}`, ); return true; @@ -167,14 +167,14 @@ describe('resolveProvider()', () => { test('explicit provider=cuda re-throws activate error when CUDA libraries are missing', async () => { await withPlatform('linux', 'x64', async () => { - // In this environment @jsilvanus/ort-linux-x64-cuda is installed (workspace link) + // In this environment @jsilvanus/embedeer-ort-linux-x64-cuda is installed (workspace link) // but there is no NVIDIA GPU. activate() throws the GPU-not-found error which // is re-thrown by resolveProvider so the user gets a clear diagnostic. await assert.rejects( () => resolveProvider('cpu', 'cuda'), (err) => { assert.ok( - err.message.includes('@jsilvanus/ort-linux-x64-cuda'), + err.message.includes('@jsilvanus/embedeer-ort-linux-x64-cuda'), `Expected package name in error, got: ${err.message}`, ); // The error is the activate() diagnostic, not a generic "not installed" msg diff --git a/packages/ort-linux-x64-cuda/README.md b/packages/ort-linux-x64-cuda/README.md index ec0047e..91fbfcd 100644 --- a/packages/ort-linux-x64-cuda/README.md +++ b/packages/ort-linux-x64-cuda/README.md @@ -1,4 +1,4 @@ -# @jsilvanus/ort-linux-x64-cuda +# @jsilvanus/embedeer-ort-linux-x64-cuda CUDA execution provider for [embedeer](https://github.com/jsilvanus/embedeer) on **Linux x64**. @@ -52,7 +52,7 @@ nvcc --version # confirm CUDA toolkit is installed npm install @jsilvanus/embedeer # Step 2 — CUDA provider -npm install @jsilvanus/ort-linux-x64-cuda +npm install @jsilvanus/embedeer-ort-linux-x64-cuda ``` ## Docker @@ -80,7 +80,7 @@ RUN apt-get update && apt-get install -y curl && \ # Install embedeer + CUDA provider COPY package.json ./ RUN npm install @jsilvanus/embedeer && \ - npm install @jsilvanus/ort-linux-x64-cuda + npm install @jsilvanus/embedeer-ort-linux-x64-cuda COPY . . ``` @@ -90,7 +90,7 @@ COPY . . ```bash docker build -t my-embedeer-app . docker run --rm --gpus all my-embedeer-app \ - npx embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" + npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" ``` **docker-compose:** @@ -107,7 +107,7 @@ services: count: all capabilities: [gpu] command: > - npx embedeer --model Xenova/all-MiniLM-L6-v2 + npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" ``` @@ -132,10 +132,10 @@ const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { provider: 'c ```bash # CLI — auto GPU -npx embedeer --model Xenova/all-MiniLM-L6-v2 --device auto --data "Hello GPU" +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device auto --data "Hello GPU" # CLI — explicit CUDA -npx embedeer --model Xenova/all-MiniLM-L6-v2 --provider cuda --data "Hello CUDA" +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --provider cuda --data "Hello CUDA" ``` ## Error messages @@ -143,7 +143,7 @@ npx embedeer --model Xenova/all-MiniLM-L6-v2 --provider cuda --data "Hello CUDA" If CUDA libraries are missing, you'll see: ``` -@jsilvanus/ort-linux-x64-cuda: Missing CUDA system libraries: libcudart.so.12, libcudnn.so.9 +@jsilvanus/embedeer-ort-linux-x64-cuda: Missing CUDA system libraries: libcudart.so.12, libcudnn.so.9 onnxruntime-node CUDA requires CUDA 12 + cuDNN 9. Install them: diff --git a/packages/ort-linux-x64-cuda/index.js b/packages/ort-linux-x64-cuda/index.js index 632da69..46eaf61 100644 --- a/packages/ort-linux-x64-cuda/index.js +++ b/packages/ort-linux-x64-cuda/index.js @@ -1,5 +1,5 @@ /** - * @jsilvanus/ort-linux-x64-cuda + * @jsilvanus/embedeer-ort-linux-x64-cuda * * CUDA execution provider for embedeer on Linux x64. * @@ -110,7 +110,7 @@ export async function activate() { // 1. Check for NVIDIA GPU / driver if (!existsSync('/dev/nvidiactl')) { throw new Error( - '@jsilvanus/ort-linux-x64-cuda: No NVIDIA GPU detected (/dev/nvidiactl not found).\n' + + '@jsilvanus/embedeer-ort-linux-x64-cuda: No NVIDIA GPU detected (/dev/nvidiactl not found).\n' + 'Ensure NVIDIA drivers are installed.\n' + 'Verify with: nvidia-smi', ); @@ -121,7 +121,7 @@ export async function activate() { if (missing.length > 0) { throw new Error( - `@jsilvanus/ort-linux-x64-cuda: Missing CUDA system libraries: ${missing.join(', ')}\n\n` + + `@jsilvanus/embedeer-ort-linux-x64-cuda: Missing CUDA system libraries: ${missing.join(', ')}\n\n` + 'onnxruntime-node CUDA requires CUDA 12 + cuDNN 9. Install them:\n\n' + ' # Option A — CUDA 12 + cuDNN 9 via apt (Ubuntu/Debian)\n' + ' sudo apt install cuda-toolkit-12-6 libcudnn9-cuda-12\n\n' + diff --git a/packages/ort-linux-x64-cuda/install.js b/packages/ort-linux-x64-cuda/install.js index 0e2b5a7..7f0d9bb 100644 --- a/packages/ort-linux-x64-cuda/install.js +++ b/packages/ort-linux-x64-cuda/install.js @@ -1,5 +1,5 @@ /** - * Install / post-install check for @jsilvanus/ort-linux-x64-cuda + * Install / post-install check for @jsilvanus/embedeer-ort-linux-x64-cuda * * onnxruntime-node v1.20+ ships libonnxruntime_providers_cuda.so on Linux x64. * (@huggingface/transformers@4.x requires onnxruntime-node@1.24.x which ships CUDA.) @@ -19,12 +19,12 @@ import { existsSync } from 'fs'; if (process.platform !== 'linux' || process.arch !== 'x64') { console.warn( - `[embedeer] @jsilvanus/ort-linux-x64-cuda: skipping checks on ${process.platform}/${process.arch} (this package is for Linux x64 only)`, + `[embedeer] @jsilvanus/embedeer-ort-linux-x64-cuda: skipping checks on ${process.platform}/${process.arch} (this package is for Linux x64 only)`, ); process.exit(0); } -console.log('[embedeer] @jsilvanus/ort-linux-x64-cuda: checking system CUDA requirements...'); +console.log('[embedeer] @jsilvanus/embedeer-ort-linux-x64-cuda: checking system CUDA requirements...'); const REQUIRED_LIBS = [ 'libcudart.so.12', @@ -68,7 +68,7 @@ const hasGpu = existsSync('/dev/nvidiactl'); if (!hasGpu) { console.warn( '\n[embedeer] WARNING: No NVIDIA GPU detected (/dev/nvidiactl not found).\n' + - ' @jsilvanus/ort-linux-x64-cuda requires an NVIDIA GPU with CUDA 12 drivers.\n' + + ' @jsilvanus/embedeer-ort-linux-x64-cuda requires an NVIDIA GPU with CUDA 12 drivers.\n' + ' GPU acceleration will not be available until drivers are installed.\n', ); } else { @@ -100,6 +100,6 @@ if (missing.length > 0) { } console.log( - '\n[embedeer] @jsilvanus/ort-linux-x64-cuda: all CUDA requirements satisfied.\n' + + '\n[embedeer] @jsilvanus/embedeer-ort-linux-x64-cuda: all CUDA requirements satisfied.\n' + ' GPU acceleration is available. Use device="gpu" or device="auto" in embedeer.\n', ); diff --git a/packages/ort-linux-x64-cuda/package.json b/packages/ort-linux-x64-cuda/package.json index f578fe5..8671ada 100644 --- a/packages/ort-linux-x64-cuda/package.json +++ b/packages/ort-linux-x64-cuda/package.json @@ -1,9 +1,14 @@ { - "name": "@jsilvanus/ort-linux-x64-cuda", + "name": "@jsilvanus/embedeer-ort-linux-x64-cuda", "version": "1.0.0", "description": "CUDA execution provider for embedeer on Linux x64", "type": "module", "main": "index.js", + "files": [ + "index.js", + "install.js", + "README.md" + ], "scripts": { "install": "node install.js" }, @@ -19,9 +24,12 @@ "gpu", "linux" ], - "author": "", + "author": "jsilvanus", "license": "ISC", "engines": { "node": ">=18" + }, + "publishConfig": { + "access": "public" } } diff --git a/packages/ort-win32-x64-dml/README.md b/packages/ort-win32-x64-dml/README.md index e1262e3..bbddd70 100644 --- a/packages/ort-win32-x64-dml/README.md +++ b/packages/ort-win32-x64-dml/README.md @@ -1,4 +1,4 @@ -# @jsilvanus/ort-win32-x64-dml +# @jsilvanus/embedeer-ort-win32-x64-dml DirectML execution provider for [embedeer](https://github.com/jsilvanus/embedeer) on **Windows x64**. @@ -27,7 +27,7 @@ No CUDA installation needed. npm install @jsilvanus/embedeer # Step 2 — DirectML provider -npm install @jsilvanus/ort-win32-x64-dml +npm install @jsilvanus/embedeer-ort-win32-x64-dml ``` ## Usage @@ -47,8 +47,8 @@ const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { provider: 'd ```bash # CLI — auto GPU (uses DirectML on Windows) -npx embedeer --model Xenova/all-MiniLM-L6-v2 --device auto --data "Hello GPU" +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device auto --data "Hello GPU" # CLI — explicit DirectML -npx embedeer --model Xenova/all-MiniLM-L6-v2 --provider dml --data "Hello DML" +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --provider dml --data "Hello DML" ``` diff --git a/packages/ort-win32-x64-dml/index.js b/packages/ort-win32-x64-dml/index.js index a727493..43483f0 100644 --- a/packages/ort-win32-x64-dml/index.js +++ b/packages/ort-win32-x64-dml/index.js @@ -1,5 +1,5 @@ /** - * @jsilvanus/ort-win32-x64-dml + * @jsilvanus/embedeer-ort-win32-x64-dml * * DirectML execution provider for embedeer on Windows x64. * @@ -30,7 +30,7 @@ export async function activate() { if (process.platform !== 'win32') { throw new Error( - `@jsilvanus/ort-win32-x64-dml: DirectML is only available on Windows (current platform: ${process.platform}).`, + `@jsilvanus/embedeer-ort-win32-x64-dml: DirectML is only available on Windows (current platform: ${process.platform}).`, ); } // DirectML is natively available via onnxruntime-node on Windows 10/11. diff --git a/packages/ort-win32-x64-dml/install.js b/packages/ort-win32-x64-dml/install.js index 14fd50a..56e6ff0 100644 --- a/packages/ort-win32-x64-dml/install.js +++ b/packages/ort-win32-x64-dml/install.js @@ -1,5 +1,5 @@ /** - * Install / post-install check for @jsilvanus/ort-win32-x64-dml + * Install / post-install check for @jsilvanus/embedeer-ort-win32-x64-dml * * onnxruntime-node ships DirectML support bundled on Windows x64. * No additional binary download is required. @@ -12,13 +12,13 @@ if (process.platform !== 'win32') { console.warn( - `[embedeer] @jsilvanus/ort-win32-x64-dml: skipping checks on ${process.platform}/${process.arch} (this package is for Windows x64 only)`, + `[embedeer] @jsilvanus/embedeer-ort-win32-x64-dml: skipping checks on ${process.platform}/${process.arch} (this package is for Windows x64 only)`, ); process.exit(0); } console.log( - '[embedeer] @jsilvanus/ort-win32-x64-dml: DirectML is bundled with onnxruntime-node on Windows.\n' + + '[embedeer] @jsilvanus/embedeer-ort-win32-x64-dml: DirectML is bundled with onnxruntime-node on Windows.\n' + ' No additional binary download is required.\n' + ' GPU acceleration via DirectML is available on Windows 10 (1903+) / Windows 11\n' + ' with any DirectX 12-capable GPU.\n' + diff --git a/packages/ort-win32-x64-dml/package.json b/packages/ort-win32-x64-dml/package.json index c4333ba..7805d05 100644 --- a/packages/ort-win32-x64-dml/package.json +++ b/packages/ort-win32-x64-dml/package.json @@ -1,9 +1,14 @@ { - "name": "@jsilvanus/ort-win32-x64-dml", + "name": "@jsilvanus/embedeer-ort-win32-x64-dml", "version": "1.0.0", "description": "DirectML execution provider for embedeer on Windows x64", "type": "module", "main": "index.js", + "files": [ + "index.js", + "install.js", + "README.md" + ], "scripts": { "install": "node install.js" }, @@ -20,9 +25,12 @@ "gpu", "windows" ], - "author": "", + "author": "jsilvanus", "license": "ISC", "engines": { "node": ">=18" + }, + "publishConfig": { + "access": "public" } } From 05176ecee01a37a78935f7f71ba4ae8280b670ad Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 8 Apr 2026 09:15:02 +0000 Subject: [PATCH 09/12] Add configurable delimiter, jsonl/csv/--with-text output, comprehensive I/O docs Agent-Logs-Url: https://github.com/jsilvanus/embedeer/sessions/be63b249-589e-433f-bf10-b78ef1668bc1 Co-authored-by: jsilvanus <22452468+jsilvanus@users.noreply.github.com> --- README.md | 36 ++++ packages/embedeer/README.md | 159 +++++++++++++++-- packages/embedeer/src/cli.js | 138 ++++++++++----- packages/embedeer/test/cli-format.test.js | 205 +++++++++++++++++++--- 4 files changed, 457 insertions(+), 81 deletions(-) diff --git a/README.md b/README.md index f2b36f9..b0123fc 100644 --- a/README.md +++ b/README.md @@ -181,6 +181,42 @@ npx @jsilvanus/embedeer --provider dml # explicit DirectML (Windows) --- +## Input & Output + +Full reference: [`packages/embedeer/README.md`](packages/embedeer/README.md#input-sources) + +### Quick piping examples + +```bash +MODEL=Xenova/all-MiniLM-L6-v2 + +# Pipe any text — newline-delimited by default +printf 'Hello\nWorld\n' | npx @jsilvanus/embedeer --model $MODEL + +# JSON array on stdin +echo '["cat","dog","fish"]' | npx @jsilvanus/embedeer --model $MODEL + +# Null-byte delimiter (safe with text containing newlines) +printf 'Hello\0World\0' | npx @jsilvanus/embedeer --model $MODEL --delimiter '\0' + +# JSONL output — one {"text":...,"embedding":[...]} per line, great for jq / grep +npx @jsilvanus/embedeer --model $MODEL --output jsonl --data "foo" "bar" + +# Include source text in JSON output +npx @jsilvanus/embedeer --model $MODEL --output json --with-text --data "foo" "bar" + +# CSV output for pandas / Excel +npx @jsilvanus/embedeer --model $MODEL --file texts.txt --output csv --dump vectors.csv + +# SQL INSERT statements +npx @jsilvanus/embedeer --model $MODEL --file texts.txt --output sql --dump inserts.sql + +# Tab-separated floats (txt), with original text prepended +npx @jsilvanus/embedeer --model $MODEL --output txt --with-text --data "hello" "world" +``` + +--- + ## Provider Selection Logic | Platform | `device='auto'` or `device='gpu'` order | diff --git a/packages/embedeer/README.md b/packages/embedeer/README.md index 702174e..552edb9 100644 --- a/packages/embedeer/README.md +++ b/packages/embedeer/README.md @@ -107,26 +107,153 @@ Embed texts: npx @jsilvanus/embedeer --model --data '["text1","text2"]' npx @jsilvanus/embedeer --model --file texts.txt echo '["t1","t2"]' | npx @jsilvanus/embedeer --model + printf 'a\0b\0c' | npx @jsilvanus/embedeer --model --delimiter '\0' Options: - -m, --model Hugging Face model (default: Xenova/all-MiniLM-L6-v2) - -d, --data Text(s) or JSON array to embed - --file Input file: JSON array or one text per line - --dump Write output to file instead of stdout - --output json|txt|sql Output format (default: json) - -b, --batch-size Texts per worker batch (default: 32) - -c, --concurrency Parallel workers (default: 2) - --mode process|thread Worker mode (default: process) - -p, --pooling mean|cls|none (default: mean) - --no-normalize Disable L2 normalisation - --dtype Quantization: fp32|fp16|q8|q4|q4f16|auto - --token Hugging Face API token (or set HF_TOKEN env) - --cache-dir Model cache directory (default: ~/.embedeer/models) - --device Compute device: auto|cpu|gpu (default: cpu) - --provider Execution provider override: cpu|cuda|dml - -h, --help Show this help + -m, --model Hugging Face model (default: Xenova/all-MiniLM-L6-v2) + -d, --data Text(s) or JSON array to embed + --file Input file: JSON array or delimited texts + -D, --delimiter Record separator for stdin/file (default: \n) + Escape sequences supported: \0 \n \t \r + --dump Write output to file instead of stdout + --output Output: json|jsonl|csv|txt|sql (default: json) + --with-text Include source text alongside each embedding + -b, --batch-size Texts per worker batch (default: 32) + -c, --concurrency Parallel workers (default: 2) + --mode process|thread Worker mode (default: process) + -p, --pooling mean|cls|none (default: mean) + --no-normalize Disable L2 normalisation + --dtype Quantization: fp32|fp16|q8|q4|q4f16|auto + --token Hugging Face API token (or set HF_TOKEN env) + --cache-dir Model cache directory (default: ~/.embedeer/models) + --device Compute device: auto|cpu|gpu (default: cpu) + --provider Execution provider override: cpu|cuda|dml + -h, --help Show this help ``` +--- + +## Input Sources + +Texts can be provided in any of these ways (checked in order): + +| Source | How | +|--------|-----| +| Inline args | `--data "text1" "text2" "text3"` | +| Inline JSON | `--data '["text1","text2"]'` | +| File | `--file texts.txt` (JSON array or one record per line) | +| Stdin | Pipe or redirect — auto-detected; TTY is skipped | + +**Stdin auto-detection:** when `stdin` is not a TTY (i.e. data is piped or redirected), embedeer reads it before deciding what to do. JSON arrays are accepted directly; otherwise records are split on the delimiter. + +### Configurable delimiter (`-D` / `--delimiter`) + +By default records in stdin and files are split on newline (`\n`). Use `--delimiter` to change it: + +```bash +# Newline-delimited (default) +printf 'Hello\nWorld\n' | npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 + +# Null-byte delimited — safe with filenames/texts that contain newlines +printf 'Hello\0World\0' | npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --delimiter '\0' + +# Tab-delimited +printf 'Hello\tWorld' | npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --delimiter '\t' + +# Custom multi-character delimiter +printf 'Hello|||World|||Foo' | npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --delimiter '|||' + +# File with null-byte delimiter +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --file records.bin --delimiter '\0' + +# Integrate with find -print0 (handles filenames with spaces / newlines) +find ./docs -name '*.txt' -print0 | \ + xargs -0 cat | \ + npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --delimiter '\0' +``` + +Supported escape sequences in `--delimiter`: + +| Sequence | Character | +|----------|-----------| +| `\0` | Null byte (U+0000) | +| `\n` | Newline (U+000A) | +| `\t` | Tab (U+0009) | +| `\r` | Carriage return (U+000D) | + +--- + +## Output Formats + +| Format | Description | +|--------|-------------| +| `json` (default) | JSON array of float arrays: `[[0.1,0.2,...],[...]]` | +| `json --with-text` | JSON array of objects: `[{"text":"...","embedding":[...]}]` | +| `jsonl` | Newline-delimited JSON, one object per line: `{"text":"...","embedding":[...]}` | +| `csv` | CSV with header: `text,dim_0,dim_1,...,dim_N` | +| `txt` | Space-separated floats, one vector per line | +| `txt --with-text` | Tab-separated: `\t` | +| `sql` | `INSERT INTO embeddings (text, vector) VALUES ...;` | + +Use `--dump ` to write the output to a file instead of stdout. Progress messages always go to stderr so they never interfere with piped output. + +### Piping examples + +```bash +MODEL=Xenova/all-MiniLM-L6-v2 + +# --- json (default) --- +# Embed and pretty-print with jq +echo '["Hello","World"]' | npx @jsilvanus/embedeer --model $MODEL | jq '.[0] | length' + +# --- jsonl --- +# One object per line — pipe to jq, grep, awk, etc. +npx @jsilvanus/embedeer --model $MODEL --data "foo" "bar" --output jsonl + +# Filter by similarity: extract embedding for downstream processing +npx @jsilvanus/embedeer --model $MODEL --data "query text" --output jsonl \ + | jq -c '.embedding' + +# Stream a large file and store as JSONL +npx @jsilvanus/embedeer --model $MODEL --file big.txt --output jsonl --dump out.jsonl + +# --- json --with-text --- +# Keep the source text next to each vector (useful for building a search index) +npx @jsilvanus/embedeer --model $MODEL --output json --with-text \ + --data "cat" "dog" "fish" \ + | jq '.[] | {text, dims: (.embedding | length)}' + +# --- csv --- +# Embed then open in Python/pandas +npx @jsilvanus/embedeer --model $MODEL --file texts.txt --output csv --dump vectors.csv +python3 -c "import pandas as pd; df = pd.read_csv('vectors.csv'); print(df.shape)" + +# --- txt --- +# Raw floats — useful for awk/paste/numpy text loading +npx @jsilvanus/embedeer --model $MODEL --data "Hello" "World" --output txt \ + | awk '{print NF, "dimensions"}' + +# txt --with-text: original text + tab + floats, easy to parse +npx @jsilvanus/embedeer --model $MODEL --file texts.txt --output txt --with-text \ + | while IFS=$'\t' read -r text vec; do echo "TEXT: $text"; done + +# --- sql --- +# Generate INSERT statements for a vector DB or SQLite +npx @jsilvanus/embedeer --model $MODEL --file texts.txt --output sql --dump inserts.sql +sqlite3 mydb.sqlite < inserts.sql + +# --- Chaining with other tools --- +# Embed stdin from another command +cat docs/*.txt | npx @jsilvanus/embedeer --model $MODEL --output jsonl > embeddings.jsonl + +# Null-byte input from find (handles any filename or text with newlines) +find ./corpus -name '*.txt' -print0 \ + | xargs -0 cat \ + | npx @jsilvanus/embedeer --model $MODEL --delimiter '\0' --output jsonl +``` + +--- + ### CLI Examples ```bash diff --git a/packages/embedeer/src/cli.js b/packages/embedeer/src/cli.js index a833b2f..a9ec00a 100755 --- a/packages/embedeer/src/cli.js +++ b/packages/embedeer/src/cli.js @@ -10,24 +10,28 @@ * embedeer --model --data '["text1","text2"]' * embedeer --model --file texts.txt * echo '["t1","t2"]' | embedeer --model + * printf 'a\0b\0c' | embedeer --model --delimiter '\0' * * Options: - * -m, --model Hugging Face model (default: Xenova/all-MiniLM-L6-v2) - * -d, --data Text(s) to embed (JSON array or individual strings) - * --file File of texts (JSON array or one text per line) - * --dump Write output to file instead of stdout - * --output json|txt|sql Output format (default: json) - * -b, --batch-size Texts per worker batch (default: 32) - * -c, --concurrency Parallel worker processes/threads (default: 2) - * --mode process|thread Worker mode: isolated processes or in-process threads (default: process) - * -p, --pooling Pooling: mean|cls|none (default: mean) - * --no-normalize Disable L2 normalisation - * --dtype Quantization: fp32|fp16|q8|q4|q4f16|auto - * --token Hugging Face API token (overrides HF_TOKEN env var) - * --cache-dir Custom model cache directory (default: ~/.embedeer/models) - * --device Compute device: auto|cpu|gpu (default: cpu) - * --provider Execution provider override: cpu|cuda|dml - * -h, --help Show this help + * -m, --model Hugging Face model (default: Xenova/all-MiniLM-L6-v2) + * -d, --data Text(s) to embed (JSON array or individual strings) + * --file File of texts (JSON array or one text per line) + * -D, --delimiter Record separator for stdin/file input (default: \n) + * Escape sequences: \0 (null byte), \n, \t, \r + * --dump Write output to file instead of stdout + * --output Output format: json|jsonl|csv|txt|sql (default: json) + * --with-text Include source text in json/txt output + * -b, --batch-size Texts per worker batch (default: 32) + * -c, --concurrency Parallel worker processes/threads (default: 2) + * --mode process|thread Worker mode (default: process) + * -p, --pooling Pooling: mean|cls|none (default: mean) + * --no-normalize Disable L2 normalisation + * --dtype Quantization: fp32|fp16|q8|q4|q4f16|auto + * --token Hugging Face API token (overrides HF_TOKEN env var) + * --cache-dir Custom model cache directory (default: ~/.embedeer/models) + * --device Compute device: auto|cpu|gpu (default: cpu) + * --provider Execution provider override: cpu|cuda|dml + * -h, --help Show this help */ import { Embedder } from './embedder.js'; @@ -49,24 +53,28 @@ Embedding: embedeer --model [--data "text1" "text2" ...] embedeer --model --file texts.txt echo '["t1","t2"]' | embedeer --model + printf 'a\\0b\\0c' | embedeer --model --delimiter '\\0' Options: - -m, --model Hugging Face model (default: Xenova/all-MiniLM-L6-v2) - -d, --data Text(s) or JSON array to embed - --file Input file: JSON array or one text per line - --dump Write output to file instead of stdout - --output json|txt|sql Output format (default: json) - -b, --batch-size Texts per worker batch (default: 32) - -c, --concurrency Parallel workers (default: 2) - --mode process|thread Worker mode (default: process) - -p, --pooling mean|cls|none (default: mean) - --no-normalize Disable L2 normalisation - --dtype Quantization: fp32|fp16|q8|q4|q4f16|auto - --token Hugging Face API token - --cache-dir Model cache directory (default: ${DEFAULT_CACHE_DIR}) - --device Compute device: auto|cpu|gpu (default: cpu) - --provider Execution provider override: cpu|cuda|dml - -h, --help Show this help + -m, --model Hugging Face model (default: Xenova/all-MiniLM-L6-v2) + -d, --data Text(s) or JSON array to embed + --file Input file: JSON array or delimited texts + -D, --delimiter Record separator for stdin/file (default: \\n) + Escape sequences supported: \\0 \\n \\t \\r + --dump Write output to file instead of stdout + --output Output: json|jsonl|csv|txt|sql (default: json) + --with-text Include source text alongside each embedding + -b, --batch-size Texts per worker batch (default: 32) + -c, --concurrency Parallel workers (default: 2) + --mode process|thread Worker mode (default: process) + -p, --pooling mean|cls|none (default: mean) + --no-normalize Disable L2 normalisation + --dtype Quantization: fp32|fp16|q8|q4|q4f16|auto + --token Hugging Face API token + --cache-dir Model cache directory (default: ${DEFAULT_CACHE_DIR}) + --device Compute device: auto|cpu|gpu (default: cpu) + --provider Execution provider override: cpu|cuda|dml + -h, --help Show this help `.trim()); } @@ -74,16 +82,18 @@ Options: // --data so that negative numbers or hyphen-prefixed strings work correctly. const KNOWN_FLAGS = new Set([ '--help', '-h', '--model', '-m', '--data', '-d', '--file', '--dump', - '--output', '--batch-size', '-b', '--concurrency', '-c', '--mode', - '--pooling', '-p', '--no-normalize', '--dtype', '--token', '--cache-dir', - '--device', '--provider', + '--output', '--with-text', '--batch-size', '-b', '--concurrency', '-c', + '--mode', '--pooling', '-p', '--no-normalize', '--dtype', '--token', + '--cache-dir', '--device', '--provider', '--delimiter', '-D', ]); const options = { model: 'Xenova/all-MiniLM-L6-v2', data: null, // --data texts (array) file: null, // --file path + delimiter: '\n', // --delimiter record separator for stdin/file dump: null, // --dump path - output: 'json', // json | txt | sql + output: 'json', // json | jsonl | csv | txt | sql + withText: false, // --with-text: include source text in output batchSize: 32, concurrency: 2, mode: 'process', @@ -114,10 +124,14 @@ for (let i = 0; i < args.length; i++) { } } else if (arg === '--file') { options.file = args[++i]; + } else if (arg === '--delimiter' || arg === '-D') { + options.delimiter = parseDelimiter(args[++i]); } else if (arg === '--dump') { options.dump = args[++i]; } else if (arg === '--output') { options.output = args[++i]; + } else if (arg === '--with-text') { + options.withText = true; } else if (arg === '--batch-size' || arg === '-b') { options.batchSize = parseInt(args[++i], 10); } else if (arg === '--concurrency' || arg === '-c') { @@ -145,9 +159,28 @@ for (let i = 0; i < args.length; i++) { // ── Output formatting ─────────────────────────────────────────────────────── -function formatOutput(texts, embeddings, format) { +function formatOutput(texts, embeddings, format, withText) { switch (format) { + case 'jsonl': + return texts + .map((text, i) => JSON.stringify({ text, embedding: embeddings[i] })) + .join('\n'); + + case 'csv': { + if (embeddings.length === 0) return ''; + const dims = embeddings[0].length; + const header = ['text', ...Array.from({ length: dims }, (_, k) => `dim_${k}`)].join(','); + const rows = texts.map((text, i) => { + const safeText = '"' + text.replace(/"/g, '""') + '"'; + return [safeText, ...embeddings[i]].join(','); + }); + return [header, ...rows].join('\n'); + } + case 'txt': + if (withText) { + return texts.map((text, i) => `${text}\t${embeddings[i].join(' ')}`).join('\n'); + } return embeddings.map((vec) => vec.join(' ')).join('\n'); case 'sql': { @@ -164,6 +197,11 @@ function formatOutput(texts, embeddings, format) { } default: // json + if (withText) { + return JSON.stringify( + texts.map((text, i) => ({ text, embedding: embeddings[i] })) + ); + } return JSON.stringify(embeddings); } } @@ -179,13 +217,29 @@ function writeOutput(content, dumpPath) { // ── Input reading ─────────────────────────────────────────────────────────── -function parseTexts(raw) { +/** + * Convert a user-supplied delimiter string, resolving common escape sequences. + * Supports: \0 (null byte), \n (newline), \t (tab), \r (carriage return). + */ +export function parseDelimiter(str) { + return str + .replace(/\\0/g, '\0') + .replace(/\\n/g, '\n') + .replace(/\\t/g, '\t') + .replace(/\\r/g, '\r'); +} + +/** + * Parse a block of text into an array of strings. + * First tries to parse as a JSON array; if that fails, splits on `delimiter`. + */ +export function parseTexts(raw, delimiter = '\n') { try { const parsed = JSON.parse(raw); if (!Array.isArray(parsed)) throw new Error('Expected a JSON array'); return parsed; } catch { - return raw.split('\n').filter(Boolean); + return raw.split(delimiter).filter(Boolean); } } @@ -223,7 +277,7 @@ async function main() { return; } // Stdin provided — treat as text input. - const texts = parseTexts(stdinRaw); + const texts = parseTexts(stdinRaw, options.delimiter); return runEmbedding(texts, resolvedCacheDir); } @@ -232,7 +286,7 @@ async function main() { if (options.file) { const raw = readFileSync(options.file, 'utf8').trim(); - texts = parseTexts(raw); + texts = parseTexts(raw, options.delimiter); } else if (options.data && options.data.length > 0) { // --data may be a JSON array in a single arg or multiple plain strings if (options.data.length === 1) { @@ -268,7 +322,7 @@ async function runEmbedding(texts, cacheDir) { try { const embeddings = await embedder.embed(texts); - const content = formatOutput(texts, embeddings, options.output); + const content = formatOutput(texts, embeddings, options.output, options.withText); writeOutput(content, options.dump); } finally { await embedder.destroy(); diff --git a/packages/embedeer/test/cli-format.test.js b/packages/embedeer/test/cli-format.test.js index 818d29d..06fcb1b 100644 --- a/packages/embedeer/test/cli-format.test.js +++ b/packages/embedeer/test/cli-format.test.js @@ -1,5 +1,5 @@ /** - * Tests for CLI output formatting and model-cache helpers. + * Tests for CLI output formatting, input parsing, and model-cache helpers. * These are pure unit tests — no workers, no network. */ @@ -30,40 +30,192 @@ describe('model-cache', async () => { }); }); -describe('CLI output formatting', async () => { - // We test the formatting logic by importing the private helpers. - // Since cli.js is a script, we extract the formatting to test it directly. - - function formatOutput(texts, embeddings, format) { - switch (format) { - case 'txt': - return embeddings.map((vec) => vec.join(' ')).join('\n'); - case 'sql': { - const rows = texts.map((text, i) => { - const safeText = text.replace(/'/g, "''"); - const vector = JSON.stringify(embeddings[i]); - return ` ('${safeText}', '${vector}')`; - }); - return ( - 'INSERT INTO embeddings (text, vector) VALUES\n' + - rows.join(',\n') + - ';' - ); +// ── Inline helpers mirroring cli.js (cli.js runs main() on import) ────────── + +function parseDelimiter(str) { + return str + .replace(/\\0/g, '\0') + .replace(/\\n/g, '\n') + .replace(/\\t/g, '\t') + .replace(/\\r/g, '\r'); +} + +function parseTexts(raw, delimiter = '\n') { + try { + const parsed = JSON.parse(raw); + if (!Array.isArray(parsed)) throw new Error('Expected a JSON array'); + return parsed; + } catch { + return raw.split(delimiter).filter(Boolean); + } +} + +function formatOutput(texts, embeddings, format, withText = false) { + switch (format) { + case 'jsonl': + return texts + .map((text, i) => JSON.stringify({ text, embedding: embeddings[i] })) + .join('\n'); + + case 'csv': { + if (embeddings.length === 0) return ''; + const dims = embeddings[0].length; + const header = ['text', ...Array.from({ length: dims }, (_, k) => `dim_${k}`)].join(','); + const rows = texts.map((text, i) => { + const safeText = '"' + text.replace(/"/g, '""') + '"'; + return [safeText, ...embeddings[i]].join(','); + }); + return [header, ...rows].join('\n'); + } + + case 'txt': + if (withText) { + return texts.map((text, i) => `${text}\t${embeddings[i].join(' ')}`).join('\n'); } - default: - return JSON.stringify(embeddings); + return embeddings.map((vec) => vec.join(' ')).join('\n'); + + case 'sql': { + const rows = texts.map((text, i) => { + const safeText = text.replace(/'/g, "''"); + const vector = JSON.stringify(embeddings[i]); + return ` ('${safeText}', '${vector}')`; + }); + return ( + 'INSERT INTO embeddings (text, vector) VALUES\n' + + rows.join(',\n') + + ';' + ); } + + default: // json + if (withText) { + return JSON.stringify( + texts.map((text, i) => ({ text, embedding: embeddings[i] })) + ); + } + return JSON.stringify(embeddings); } +} + +// ── parseDelimiter ─────────────────────────────────────────────────────────── + +describe('parseDelimiter', () => { + test('leaves plain string unchanged', () => { + assert.equal(parseDelimiter('|||'), '|||'); + }); + + test('\\0 becomes null byte', () => { + assert.equal(parseDelimiter('\\0'), '\0'); + }); + + test('\\n becomes newline', () => { + assert.equal(parseDelimiter('\\n'), '\n'); + }); + + test('\\t becomes tab', () => { + assert.equal(parseDelimiter('\\t'), '\t'); + }); + + test('\\r becomes carriage return', () => { + assert.equal(parseDelimiter('\\r'), '\r'); + }); + + test('multiple escape sequences in one string', () => { + assert.equal(parseDelimiter('\\r\\n'), '\r\n'); + }); +}); + +// ── parseTexts ─────────────────────────────────────────────────────────────── + +describe('parseTexts', () => { + test('JSON array is parsed directly', () => { + const result = parseTexts('["a","b","c"]'); + assert.deepEqual(result, ['a', 'b', 'c']); + }); + + test('defaults to newline delimiter', () => { + const result = parseTexts('foo\nbar\nbaz'); + assert.deepEqual(result, ['foo', 'bar', 'baz']); + }); + test('custom delimiter (pipe)', () => { + const result = parseTexts('foo|bar|baz', '|'); + assert.deepEqual(result, ['foo', 'bar', 'baz']); + }); + + test('custom delimiter (null byte)', () => { + const result = parseTexts('foo\0bar\0baz', '\0'); + assert.deepEqual(result, ['foo', 'bar', 'baz']); + }); + + test('custom delimiter (tab)', () => { + const result = parseTexts('foo\tbar\tbaz', '\t'); + assert.deepEqual(result, ['foo', 'bar', 'baz']); + }); + + test('filters empty strings after split', () => { + const result = parseTexts('\nfoo\n\nbar\n', '\n'); + assert.deepEqual(result, ['foo', 'bar']); + }); + + test('JSON array takes precedence over delimiter parsing', () => { + const result = parseTexts('["x","y"]', '|'); + assert.deepEqual(result, ['x', 'y']); + }); +}); + +// ── CLI output formatting ──────────────────────────────────────────────────── + +describe('CLI output formatting', () => { const texts = ['Hello world', "It's a test"]; const embeddings = [[0.1, 0.2], [0.3, 0.4]]; - test('json output is valid JSON array', () => { + test('json output is valid JSON array of vectors', () => { const out = formatOutput(texts, embeddings, 'json'); const parsed = JSON.parse(out); assert.deepEqual(parsed, embeddings); }); + test('json --with-text wraps each item with text field', () => { + const out = formatOutput(texts, embeddings, 'json', true); + const parsed = JSON.parse(out); + assert.equal(parsed.length, 2); + assert.equal(parsed[0].text, 'Hello world'); + assert.deepEqual(parsed[0].embedding, [0.1, 0.2]); + assert.equal(parsed[1].text, "It's a test"); + assert.deepEqual(parsed[1].embedding, [0.3, 0.4]); + }); + + test('jsonl produces one JSON object per line', () => { + const out = formatOutput(texts, embeddings, 'jsonl'); + const lines = out.split('\n'); + assert.equal(lines.length, 2); + const first = JSON.parse(lines[0]); + assert.equal(first.text, 'Hello world'); + assert.deepEqual(first.embedding, [0.1, 0.2]); + const second = JSON.parse(lines[1]); + assert.equal(second.text, "It's a test"); + assert.deepEqual(second.embedding, [0.3, 0.4]); + }); + + test('csv produces header row and data rows', () => { + const out = formatOutput(texts, embeddings, 'csv'); + const lines = out.split('\n'); + assert.equal(lines[0], 'text,dim_0,dim_1'); + assert.equal(lines[1], '"Hello world",0.1,0.2'); + assert.equal(lines[2], '"It\'s a test",0.3,0.4'); + }); + + test('csv escapes double-quotes in text', () => { + const out = formatOutput(['say "hi"'], [[1, 2]], 'csv'); + const lines = out.split('\n'); + assert.equal(lines[1], '"say ""hi""",1,2'); + }); + + test('csv returns empty string for zero embeddings', () => { + assert.equal(formatOutput([], [], 'csv'), ''); + }); + test('txt output is one space-separated line per embedding', () => { const out = formatOutput(texts, embeddings, 'txt'); const lines = out.split('\n'); @@ -72,6 +224,13 @@ describe('CLI output formatting', async () => { assert.equal(lines[1], '0.3 0.4'); }); + test('txt --with-text prefixes each line with text and tab', () => { + const out = formatOutput(texts, embeddings, 'txt', true); + const lines = out.split('\n'); + assert.equal(lines[0], 'Hello world\t0.1 0.2'); + assert.equal(lines[1], "It's a test\t0.3 0.4"); + }); + test('sql output starts with INSERT and contains both rows', () => { const out = formatOutput(texts, embeddings, 'sql'); assert.ok(out.startsWith('INSERT INTO embeddings')); From 7bf2828502a56bfd5fcc3bd8503277c694aff4c3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 8 Apr 2026 09:45:08 +0000 Subject: [PATCH 10/12] Add --interactive/-i line-reader mode: per-line stdin, batch flush, streaming output Agent-Logs-Url: https://github.com/jsilvanus/embedeer/sessions/b3d9aa41-2047-43a4-be9a-96e4015529eb Co-authored-by: jsilvanus <22452468+jsilvanus@users.noreply.github.com> --- README.md | 22 ++++ packages/embedeer/README.md | 46 +++++++- packages/embedeer/src/cli.js | 201 +++++++++++++++++++++++++++++++++-- 3 files changed, 258 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index b0123fc..960b87e 100644 --- a/README.md +++ b/README.md @@ -215,6 +215,28 @@ npx @jsilvanus/embedeer --model $MODEL --file texts.txt --output sql --dump inse npx @jsilvanus/embedeer --model $MODEL --output txt --with-text --data "hello" "world" ``` +### Interactive / streaming line-reader (`-i` / `--interactive`) + +Paste records one per line and get embeddings as soon as each batch fills (or when you press Enter on an empty line to flush manually). Ideal for interactive use or streaming large datasets through a pipeline. + +```bash +MODEL=Xenova/all-MiniLM-L6-v2 + +# Interactive terminal session — paste lines, Ctrl+D when done +npx @jsilvanus/embedeer --model $MODEL --interactive --dump embeddings.jsonl + +# Stream a large file in batches (auto-flushes every 32 lines) +cat corpus.txt | npx @jsilvanus/embedeer --model $MODEL -i --output csv --dump out.csv + +# GPU-accelerated interactive mode +npx @jsilvanus/embedeer --model $MODEL --interactive --device auto \ + --batch-size 64 --output jsonl --dump out.jsonl +``` + +**Flushing:** batch fills to `--batch-size` (auto) or empty line (manual). Ctrl+D finishes. Ctrl+C aborts. +**Output:** progress messages go to stderr; embeddings go to `--dump` file or stdout. +**csv** writes the header on the first batch only. **json**/**sql** are promoted to **jsonl** automatically. + --- ## Provider Selection Logic diff --git a/packages/embedeer/README.md b/packages/embedeer/README.md index 552edb9..7ac1f13 100644 --- a/packages/embedeer/README.md +++ b/packages/embedeer/README.md @@ -102,19 +102,24 @@ npx @jsilvanus/embedeer [options] Model management (pull / cache model): npx @jsilvanus/embedeer --model -Embed texts: +Embed texts (batch): npx @jsilvanus/embedeer --model --data "text1" "text2" ... npx @jsilvanus/embedeer --model --data '["text1","text2"]' npx @jsilvanus/embedeer --model --file texts.txt echo '["t1","t2"]' | npx @jsilvanus/embedeer --model printf 'a\0b\0c' | npx @jsilvanus/embedeer --model --delimiter '\0' +Interactive / streaming line-reader: + npx @jsilvanus/embedeer --model --interactive --dump out.jsonl + cat big.txt | npx @jsilvanus/embedeer --model -i --output csv --dump out.csv + Options: -m, --model Hugging Face model (default: Xenova/all-MiniLM-L6-v2) -d, --data Text(s) or JSON array to embed --file Input file: JSON array or delimited texts -D, --delimiter Record separator for stdin/file (default: \n) Escape sequences supported: \0 \n \t \r + -i, --interactive Interactive line-reader (see below) --dump Write output to file instead of stdout --output Output: json|jsonl|csv|txt|sql (default: json) --with-text Include source text alongside each embedding @@ -143,9 +148,48 @@ Texts can be provided in any of these ways (checked in order): | Inline JSON | `--data '["text1","text2"]'` | | File | `--file texts.txt` (JSON array or one record per line) | | Stdin | Pipe or redirect — auto-detected; TTY is skipped | +| Interactive | `--interactive` / `-i` — line-reader, embeds as you type | **Stdin auto-detection:** when `stdin` is not a TTY (i.e. data is piped or redirected), embedeer reads it before deciding what to do. JSON arrays are accepted directly; otherwise records are split on the delimiter. +--- + +## Interactive Line-Reader Mode (`-i` / `--interactive`) + +The interactive mode opens a line-by-line reader that starts embedding as records arrive — ideal for pasting large datasets into a terminal or streaming data from another process. + +```bash +# Open an interactive session (paste lines, Ctrl+D when done) +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --interactive --dump embeddings.jsonl + +# Stream a large file through interactive mode with CSV output +cat big.txt | npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 \ + --interactive --output csv --dump embeddings.csv + +# Interactive with GPU, custom batch size, txt output +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 \ + --interactive --device auto --batch-size 16 --output txt --dump vecs.txt +``` + +**How it works:** + +| Event | What happens | +|-------|-------------| +| Type a line, press Enter | Record is buffered | +| Buffer reaches `--batch-size` | Auto-flush: embed + append to output | +| Type an empty line | Manual flush: embed whatever is buffered | +| Ctrl+D (EOF) | Flush remaining records and exit | +| Ctrl+C | Flush remaining records and exit | + +**Behaviour notes:** + +- Progress messages (`Batch N: M record(s) → file`) always go to **stderr** — they never pollute piped output. +- When stdin is a TTY, a `> ` prompt is shown on stderr. +- Output defaults to **stdout** if `--dump` is omitted; a tip is printed when running in TTY mode. +- `--output json` and `--output sql` are automatically promoted to `jsonl` since they produce complete documents that cannot be appended to incrementally. +- `--output csv` writes the dimension header (`text,dim_0,dim_1,...`) on the first batch only; subsequent batches append data rows. +- Each interactive session **clears** the `--dump` file on start so you always get a fresh output file. + ### Configurable delimiter (`-D` / `--delimiter`) By default records in stdin and files are split on newline (`\n`). Use `--delimiter` to change it: diff --git a/packages/embedeer/src/cli.js b/packages/embedeer/src/cli.js index a9ec00a..cd415c9 100755 --- a/packages/embedeer/src/cli.js +++ b/packages/embedeer/src/cli.js @@ -5,19 +5,24 @@ * Model management: * embedeer --model Pull / cache a model * - * Embedding: + * Embedding (batch): * embedeer --model --data "text1" "text2" ... * embedeer --model --data '["text1","text2"]' * embedeer --model --file texts.txt * echo '["t1","t2"]' | embedeer --model * printf 'a\0b\0c' | embedeer --model --delimiter '\0' * + * Interactive / streaming line-reader: + * embedeer --model --interactive --dump out.jsonl + * cat big.txt | embedeer --model --interactive --output csv --dump out.csv + * * Options: * -m, --model Hugging Face model (default: Xenova/all-MiniLM-L6-v2) * -d, --data Text(s) to embed (JSON array or individual strings) * --file File of texts (JSON array or one text per line) * -D, --delimiter Record separator for stdin/file input (default: \n) * Escape sequences: \0 (null byte), \n, \t, \r + * -i, --interactive Interactive line-reader: embed as lines arrive * --dump Write output to file instead of stdout * --output Output format: json|jsonl|csv|txt|sql (default: json) * --with-text Include source text in json/txt output @@ -36,7 +41,8 @@ import { Embedder } from './embedder.js'; import { getCacheDir, DEFAULT_CACHE_DIR } from './model-cache.js'; -import { readFileSync, writeFileSync } from 'fs'; +import { readFileSync, writeFileSync, appendFileSync } from 'fs'; +import readline from 'readline'; // ── Argument parsing ──────────────────────────────────────────────────────── @@ -49,18 +55,24 @@ embedeer — parallel batched embeddings from Hugging Face Model management (pull / cache): embedeer --model -Embedding: +Embedding (batch): embedeer --model [--data "text1" "text2" ...] embedeer --model --file texts.txt echo '["t1","t2"]' | embedeer --model printf 'a\\0b\\0c' | embedeer --model --delimiter '\\0' +Interactive / streaming line-reader: + embedeer --model --interactive --dump out.jsonl + cat big.txt | embedeer --model -i --output csv --dump out.csv + Options: -m, --model Hugging Face model (default: Xenova/all-MiniLM-L6-v2) -d, --data Text(s) or JSON array to embed --file Input file: JSON array or delimited texts -D, --delimiter Record separator for stdin/file (default: \\n) Escape sequences supported: \\0 \\n \\t \\r + -i, --interactive Interactive line-reader: embed as lines arrive + (empty line or full batch triggers immediate flush) --dump Write output to file instead of stdout --output Output: json|jsonl|csv|txt|sql (default: json) --with-text Include source text alongside each embedding @@ -85,15 +97,17 @@ const KNOWN_FLAGS = new Set([ '--output', '--with-text', '--batch-size', '-b', '--concurrency', '-c', '--mode', '--pooling', '-p', '--no-normalize', '--dtype', '--token', '--cache-dir', '--device', '--provider', '--delimiter', '-D', + '--interactive', '-i', ]); const options = { model: 'Xenova/all-MiniLM-L6-v2', - data: null, // --data texts (array) - file: null, // --file path - delimiter: '\n', // --delimiter record separator for stdin/file - dump: null, // --dump path - output: 'json', // json | jsonl | csv | txt | sql - withText: false, // --with-text: include source text in output + data: null, // --data texts (array) + file: null, // --file path + delimiter: '\n', // --delimiter record separator for stdin/file + interactive: false, // --interactive / -i: line-reader mode + dump: null, // --dump path + output: 'json', // json | jsonl | csv | txt | sql + withText: false, // --with-text: include source text in output batchSize: 32, concurrency: 2, mode: 'process', @@ -126,6 +140,8 @@ for (let i = 0; i < args.length; i++) { options.file = args[++i]; } else if (arg === '--delimiter' || arg === '-D') { options.delimiter = parseDelimiter(args[++i]); + } else if (arg === '--interactive' || arg === '-i') { + options.interactive = true; } else if (arg === '--dump') { options.dump = args[++i]; } else if (arg === '--output') { @@ -255,11 +271,176 @@ async function readStdin() { }); } -// ── Main ──────────────────────────────────────────────────────────────────── +// ── Interactive / streaming line-reader mode ──────────────────────────────── + +/** + * Interactive mode: read one text record per line from stdin, embed in + * configurable batches, and stream results to a file (or stdout). + * + * Flushing triggers: + * • Batch reaches --batch-size lines (auto-flush) + * • User types an empty line (manual flush) + * • EOF / Ctrl+D (flush remaining records and exit) + * • Ctrl+C (flush remaining records and exit) + * + * Output: + * • Formats json and sql are not appendable — they are promoted to jsonl. + * • csv writes the dimension header once (on the first batch). + * • All other formats append each batch as independent lines. + * • Progress/prompt messages always go to stderr. + */ +async function runInteractive(cacheDir) { + // json and sql produce complete documents that can't be appended to + // incrementally; switch to jsonl so each batch emits self-contained lines. + if (options.output === 'json' || options.output === 'sql') { + console.error( + `Warning: --output ${options.output} is not suitable for interactive mode. Switching to jsonl.` + ); + options.output = 'jsonl'; + } + + const isTTY = process.stdin.isTTY; + const outputFile = options.dump; + + if (isTTY && !outputFile) { + console.error( + 'Tip: use --dump to write output to a file so it does not mix with input.' + ); + } + + // Load the model before opening the reader so we are ready to embed immediately. + console.error(`Loading model: ${options.model}…`); + const embedder = await Embedder.create(options.model, { + batchSize: options.batchSize, + concurrency: options.concurrency, + mode: options.mode, + pooling: options.pooling, + normalize: options.normalize, + dtype: options.dtype, + token: options.token, + cacheDir, + device: options.device, + provider: options.provider, + }); + + if (isTTY) { + console.error('Model ready. Paste records below, one per line.'); + console.error(`Batch size: ${options.batchSize}. Empty line = flush now. Ctrl+D = done. Ctrl+C = abort.`); + } + + // Initialise / clear the output file so each interactive session starts fresh. + if (outputFile) { + writeFileSync(outputFile, '', 'utf8'); + } + + let csvHeaderWritten = false; + let batch = []; + let batchNumber = 0; + let flushing = false; + + /** + * Embed the current batch and write its output. + * The readline interface must be paused before calling this. + */ + async function flushBatch() { + if (batch.length === 0) return; + const texts = [...batch]; + batch = []; + batchNumber++; + + const embeddings = await embedder.embed(texts); + let content; + + if (options.output === 'csv') { + const full = formatOutput(texts, embeddings, 'csv', options.withText); + if (!csvHeaderWritten) { + content = full; // includes header + csvHeaderWritten = true; + } else { + content = full.split('\n').slice(1).join('\n'); // data rows only + } + } else { + content = formatOutput(texts, embeddings, options.output, options.withText); + } + + if (outputFile) { + appendFileSync(outputFile, content + '\n', 'utf8'); + console.error(`Batch ${batchNumber}: ${texts.length} record(s) → ${outputFile}`); + } else { + process.stdout.write(content + '\n'); + } + } + + const rl = readline.createInterface({ + input: process.stdin, + // Route the prompt to stderr so it never pollutes stdout embeddings. + output: isTTY ? process.stderr : null, + terminal: isTTY, + }); + + if (isTTY) rl.prompt(); + + rl.on('line', (line) => { + const text = line.trim(); + + if (text !== '') { + batch.push(text); + } + + const shouldFlush = text === '' || batch.length >= options.batchSize; + + if (shouldFlush && !flushing && batch.length > 0) { + flushing = true; + rl.pause(); + flushBatch() + .then(() => { + flushing = false; + rl.resume(); + if (isTTY) rl.prompt(); + }) + .catch((err) => { + console.error('Error embedding batch:', err.message); + flushing = false; + rl.resume(); + if (isTTY) rl.prompt(); + }); + } else if (isTTY) { + rl.prompt(); + } + }); + + await new Promise((resolve) => { + rl.on('close', async () => { + try { + await flushBatch(); + } catch (err) { + console.error('Error embedding final batch:', err.message); + } + await embedder.destroy(); + if (outputFile) { + console.error(`Done. ${batchNumber} batch(es) written to ${outputFile}`); + } + resolve(); + }); + + // Handle Ctrl+C — flush remaining records then exit cleanly. + rl.on('SIGINT', () => { + console.error('\nInterrupted — flushing remaining records…'); + rl.close(); // triggers 'close' event above + }); + }); +} + + async function main() { const resolvedCacheDir = getCacheDir(options.cacheDir); + // ── Interactive line-reader mode ───────────────────────────────────────── + if (options.interactive) { + return runInteractive(resolvedCacheDir); + } + // ── Model-only mode (pull / cache) ────────────────────────────────────── const hasDataSource = options.data || options.file || positional.length > 0; if (!hasDataSource) { From ed066832d427a86a919c6860c0862ff2f61101a0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 8 Apr 2026 09:53:46 +0000 Subject: [PATCH 11/12] Add .github/copilot-setup-steps.yml: run npm install before agent sessions Agent-Logs-Url: https://github.com/jsilvanus/embedeer/sessions/2e60c137-0653-4608-ba14-5926c7456201 Co-authored-by: jsilvanus <22452468+jsilvanus@users.noreply.github.com> --- .github/copilot-setup-steps.yml | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .github/copilot-setup-steps.yml diff --git a/.github/copilot-setup-steps.yml b/.github/copilot-setup-steps.yml new file mode 100644 index 0000000..245bc2b --- /dev/null +++ b/.github/copilot-setup-steps.yml @@ -0,0 +1,3 @@ +steps: + - name: Install dependencies + run: npm install From f5804b60fecb376e714502d489fb12a9c419eff1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 8 Apr 2026 10:21:23 +0000 Subject: [PATCH 12/12] Delete ort packages, inline GPU checks, flatten monorepo to single repo Agent-Logs-Url: https://github.com/jsilvanus/embedeer/sessions/6286b261-52f2-4560-ad02-795169949161 Co-authored-by: jsilvanus <22452468+jsilvanus@users.noreply.github.com> --- .gitignore | 2 - README.md | 485 ++++++++++++------ package-lock.json | 31 +- package.json | 42 +- packages/embedeer/README.md | 430 ---------------- packages/embedeer/package.json | 47 -- packages/embedeer/src/provider-loader.js | 148 ------ packages/ort-linux-x64-cuda/README.md | 153 ------ packages/ort-linux-x64-cuda/index.js | 146 ------ packages/ort-linux-x64-cuda/install.js | 105 ---- packages/ort-linux-x64-cuda/package.json | 35 -- packages/ort-win32-x64-dml/README.md | 54 -- packages/ort-win32-x64-dml/index.js | 46 -- packages/ort-win32-x64-dml/install.js | 26 - packages/ort-win32-x64-dml/package.json | 36 -- .../src => src}/child-process-worker.js | 0 {packages/embedeer/src => src}/cli.js | 0 {packages/embedeer/src => src}/embedder.js | 0 {packages/embedeer/src => src}/index.js | 0 {packages/embedeer/src => src}/model-cache.js | 0 src/provider-loader.js | 249 +++++++++ .../src => src}/thread-worker-script.js | 0 .../embedeer/src => src}/thread-worker.js | 0 {packages/embedeer/src => src}/worker-pool.js | 0 {packages/embedeer/src => src}/worker.js | 0 .../child-process-worker.test.js | 0 .../embedeer/test => test}/cli-format.test.js | 0 .../test => test}/embedder-options.test.js | 0 .../embedeer/test => test}/embedder.test.js | 0 .../test => test}/helpers/crash-worker.js | 0 .../helpers/echo-thread-worker.js | 0 .../test => test}/helpers/echo-worker.js | 0 .../test => test}/provider-loader.test.js | 71 +-- .../test => test}/thread-worker.test.js | 0 .../test => test}/worker-pool-options.test.js | 0 .../test => test}/worker-pool.test.js | 0 36 files changed, 643 insertions(+), 1463 deletions(-) delete mode 100644 packages/embedeer/README.md delete mode 100644 packages/embedeer/package.json delete mode 100644 packages/embedeer/src/provider-loader.js delete mode 100644 packages/ort-linux-x64-cuda/README.md delete mode 100644 packages/ort-linux-x64-cuda/index.js delete mode 100644 packages/ort-linux-x64-cuda/install.js delete mode 100644 packages/ort-linux-x64-cuda/package.json delete mode 100644 packages/ort-win32-x64-dml/README.md delete mode 100644 packages/ort-win32-x64-dml/index.js delete mode 100644 packages/ort-win32-x64-dml/install.js delete mode 100644 packages/ort-win32-x64-dml/package.json rename {packages/embedeer/src => src}/child-process-worker.js (100%) rename {packages/embedeer/src => src}/cli.js (100%) rename {packages/embedeer/src => src}/embedder.js (100%) rename {packages/embedeer/src => src}/index.js (100%) rename {packages/embedeer/src => src}/model-cache.js (100%) create mode 100644 src/provider-loader.js rename {packages/embedeer/src => src}/thread-worker-script.js (100%) rename {packages/embedeer/src => src}/thread-worker.js (100%) rename {packages/embedeer/src => src}/worker-pool.js (100%) rename {packages/embedeer/src => src}/worker.js (100%) rename {packages/embedeer/test => test}/child-process-worker.test.js (100%) rename {packages/embedeer/test => test}/cli-format.test.js (100%) rename {packages/embedeer/test => test}/embedder-options.test.js (100%) rename {packages/embedeer/test => test}/embedder.test.js (100%) rename {packages/embedeer/test => test}/helpers/crash-worker.js (100%) rename {packages/embedeer/test => test}/helpers/echo-thread-worker.js (100%) rename {packages/embedeer/test => test}/helpers/echo-worker.js (100%) rename {packages/embedeer/test => test}/provider-loader.test.js (72%) rename {packages/embedeer/test => test}/thread-worker.test.js (100%) rename {packages/embedeer/test => test}/worker-pool-options.test.js (100%) rename {packages/embedeer/test => test}/worker-pool.test.js (100%) diff --git a/.gitignore b/.gitignore index 05f6e26..2e8157a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,3 @@ node_modules/ -packages/*/node_modules/ .env *.log -packages/*/vendor/ diff --git a/README.md b/README.md index 960b87e..e3562c7 100644 --- a/README.md +++ b/README.md @@ -1,267 +1,424 @@ # embedeer A Node.js tool for generating text embeddings using models from [Hugging Face](https://huggingface.co/models). -Supports **batched** input, **parallel** execution, optional **GPU acceleration** (CUDA / DirectML), quantization, and Hugging Face auth. - -This repository is a **monorepo** managed with npm workspaces. +Supports **batched** input, **parallel** execution, isolated **child-process** workers (default) or **in-process threads**, quantization, optional GPU acceleration, and Hugging Face auth. --- -## Packages +## Features -| Package | Description | -|---------|-------------| -| [`@jsilvanus/embedeer`](packages/embedeer) | Main embeddings package (CPU + optional GPU) | -| [`@jsilvanus/embedeer-ort-linux-x64-cuda`](packages/ort-linux-x64-cuda) | CUDA provider for Linux x64 | -| [`@jsilvanus/embedeer-ort-win32-x64-dml`](packages/ort-win32-x64-dml) | DirectML provider for Windows x64 | +- Downloads any Hugging Face feature-extraction model on first use (cached in `~/.embedeer/models`) +- **Isolated processes** (default) — a worker crash cannot bring down the caller +- **In-process threads** — opt-in via `mode: 'thread'` for lower overhead +- **Sequential** execution when `concurrency: 1` +- Configurable batch size and concurrency +- **GPU acceleration** — optional CUDA (Linux x64) and DirectML (Windows x64), no extra packages needed +- Hugging Face API token support (`--token` / `HF_TOKEN` env var) +- Quantization via `dtype` (`fp32` · `fp16` · `q8` · `q4` · `q4f16` · `auto`) +- Rich CLI: pull model, embed from file, dump output as JSON / TXT / SQL --- -## Quick Start - -### CPU (default, works everywhere) +## Installation ```bash npm install @jsilvanus/embedeer -npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --data "Hello world" ``` +GPU acceleration (CUDA on Linux x64, DirectML on Windows x64) is built into `onnxruntime-node` +which ships as a transitive dependency. No additional packages are required. + +**For CUDA on Linux x64** you also need the CUDA 12 system libraries: + +```bash +# Ubuntu / Debian +sudo apt install cuda-toolkit-12-6 libcudnn9-cuda-12 +``` + +--- + +## Programmatic API + +### Embed texts (CPU — default) + ```js import { Embedder } from '@jsilvanus/embedeer'; -const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2'); -const vectors = await embedder.embed(['Hello', 'World']); -await embedder.destroy(); + +const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { + batchSize: 32, // texts per worker task (default: 32) + concurrency: 2, // parallel workers (default: 2) + mode: 'process', // 'process' | 'thread' (default: 'process') + pooling: 'mean', // 'mean' | 'cls' | 'none' (default: 'mean') + normalize: true, // L2-normalise vectors (default: true) + token: 'hf_...', // HF API token (optional; also reads HF_TOKEN env) + dtype: 'q8', // quantization dtype (optional) + cacheDir: '/my/cache', // override model cache (default: ~/.embedeer/models) +}); + +const vectors = await embedder.embed(['Hello world', 'Foo bar baz']); +// → number[][] (one 384-dim vector per text for all-MiniLM-L6-v2) + +await embedder.destroy(); // shut down worker processes ``` -### GPU — use CUDA where available (auto-detect) +### Embed texts with GPU -Add the provider package for your platform, then pass `--device auto`. -`auto` tries CUDA on Linux and DirectML on Windows; silently falls back to CPU if no GPU is found. +```js +import { Embedder } from '@jsilvanus/embedeer'; -**Linux x64 (NVIDIA CUDA):** +// Auto-detect GPU (falls back to CPU if no provider is installed) +const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { + device: 'auto', +}); -```bash -# Install CUDA 12 + cuDNN 9 system libraries (Ubuntu/Debian) -sudo apt install cuda-toolkit-12-6 libcudnn9-cuda-12 +// Require GPU (throws if no provider is available) +const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { + device: 'gpu', +}); -npm install @jsilvanus/embedeer -npm install @jsilvanus/embedeer-ort-linux-x64-cuda +// Explicitly select an execution provider +const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { + provider: 'cuda', // 'cuda' | 'dml' +}); +``` -# Auto-detect: uses CUDA on this system, CPU on any other -npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device auto --data "Hello" +### Pull (pre-cache) a model -# Or require GPU (throws if CUDA is unavailable): -npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" +Like `ollama pull` — downloads the model once so workers start instantly: + +```js +import { loadModel } from '@jsilvanus/embedeer'; + +const { modelName, cacheDir } = await loadModel('Xenova/all-MiniLM-L6-v2', { + token: 'hf_...', // optional + dtype: 'q8', // optional +}); ``` -**Windows x64 (DirectML — any GPU: NVIDIA / AMD / Intel):** +--- -```bash -npm install @jsilvanus/embedeer -npm install @jsilvanus/embedeer-ort-win32-x64-dml +## CLI -# Auto-detect: uses DirectML on Windows, CPU elsewhere -npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device auto --data "Hello" +``` +npx @jsilvanus/embedeer [options] + +Model management (pull / cache model): + npx @jsilvanus/embedeer --model + +Embed texts (batch): + npx @jsilvanus/embedeer --model --data "text1" "text2" ... + npx @jsilvanus/embedeer --model --data '["text1","text2"]' + npx @jsilvanus/embedeer --model --file texts.txt + echo '["t1","t2"]' | npx @jsilvanus/embedeer --model + printf 'a\0b\0c' | npx @jsilvanus/embedeer --model --delimiter '\0' + +Interactive / streaming line-reader: + npx @jsilvanus/embedeer --model --interactive --dump out.jsonl + cat big.txt | npx @jsilvanus/embedeer --model -i --output csv --dump out.csv + +Options: + -m, --model Hugging Face model (default: Xenova/all-MiniLM-L6-v2) + -d, --data Text(s) or JSON array to embed + --file Input file: JSON array or delimited texts + -D, --delimiter Record separator for stdin/file (default: \n) + Escape sequences supported: \0 \n \t \r + -i, --interactive Interactive line-reader (see below) + --dump Write output to file instead of stdout + --output Output: json|jsonl|csv|txt|sql (default: json) + --with-text Include source text alongside each embedding + -b, --batch-size Texts per worker batch (default: 32) + -c, --concurrency Parallel workers (default: 2) + --mode process|thread Worker mode (default: process) + -p, --pooling mean|cls|none (default: mean) + --no-normalize Disable L2 normalisation + --dtype Quantization: fp32|fp16|q8|q4|q4f16|auto + --token Hugging Face API token (or set HF_TOKEN env) + --cache-dir Model cache directory (default: ~/.embedeer/models) + --device Compute device: auto|cpu|gpu (default: cpu) + --provider Execution provider override: cpu|cuda|dml + -h, --help Show this help ``` --- -## GPU — Two-Step Install +## Input Sources + +Texts can be provided in any of these ways (checked in order): + +| Source | How | +|--------|-----| +| Inline args | `--data "text1" "text2" "text3"` | +| Inline JSON | `--data '["text1","text2"]'` | +| File | `--file texts.txt` (JSON array or one record per line) | +| Stdin | Pipe or redirect — auto-detected; TTY is skipped | +| Interactive | `--interactive` / `-i` — line-reader, embeds as you type | -### Linux x64 + NVIDIA CUDA (GPU MVP) +**Stdin auto-detection:** when `stdin` is not a TTY (i.e. data is piped or redirected), embedeer reads it before deciding what to do. JSON arrays are accepted directly; otherwise records are split on the delimiter. + +--- -**System requirements:** NVIDIA GPU + driver ≥ 525, CUDA 12, cuDNN 9 +## Interactive Line-Reader Mode (`-i` / `--interactive`) -`onnxruntime-node` v1.24.x ships `libonnxruntime_providers_cuda.so` on Linux x64. No custom binary needed — just install CUDA 12 + cuDNN 9 system libraries and the npm package: +The interactive mode opens a line-by-line reader that starts embedding as records arrive — ideal for pasting large datasets into a terminal or streaming data from another process. ```bash -# Install CUDA 12 + cuDNN 9 (Ubuntu/Debian) -sudo apt install cuda-toolkit-12-6 libcudnn9-cuda-12 +# Open an interactive session (paste lines, Ctrl+D when done) +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --interactive --dump embeddings.jsonl -# Install embedeer and the CUDA provider package -npm install @jsilvanus/embedeer -npm install @jsilvanus/embedeer-ort-linux-x64-cuda +# Stream a large file through interactive mode with CSV output +cat big.txt | npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 \ + --interactive --output csv --dump embeddings.csv -# Run with GPU -npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" +# Interactive with GPU, custom batch size, txt output +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 \ + --interactive --device auto --batch-size 16 --output txt --dump vecs.txt ``` -### Docker + NVIDIA CUDA +**How it works:** -Use an [NVIDIA CUDA Docker image](https://hub.docker.com/r/nvidia/cuda) as your base — it ships all required CUDA 12 + cuDNN 9 libraries, so no manual `apt install` is needed in your Dockerfile. +| Event | What happens | +|-------|-------------| +| Type a line, press Enter | Record is buffered | +| Buffer reaches `--batch-size` | Auto-flush: embed + append to output | +| Type an empty line | Manual flush: embed whatever is buffered | +| Ctrl+D (EOF) | Flush remaining records and exit | +| Ctrl+C | Flush remaining records and exit | -**Requirements on the host:** -- NVIDIA GPU driver ≥ 525 -- [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) installed +**Behaviour notes:** -**Example `Dockerfile`:** +- Progress messages (`Batch N: M record(s) → file`) always go to **stderr** — they never pollute piped output. +- When stdin is a TTY, a `> ` prompt is shown on stderr. +- Output defaults to **stdout** if `--dump` is omitted; a tip is printed when running in TTY mode. +- `--output json` and `--output sql` are automatically promoted to `jsonl` since they produce complete documents that cannot be appended to incrementally. +- `--output csv` writes the dimension header (`text,dim_0,dim_1,...`) on the first batch only; subsequent batches append data rows. +- Each interactive session **clears** the `--dump` file on start so you always get a fresh output file. -```dockerfile -# CUDA 12 + cuDNN 9 runtime — all required libs are pre-installed -FROM nvidia/cuda:12.6.3-cudnn9-runtime-ubuntu24.04 +### Configurable delimiter (`-D` / `--delimiter`) -WORKDIR /app +By default records in stdin and files are split on newline (`\n`). Use `--delimiter` to change it: -# Install Node.js (e.g. via NodeSource) -RUN apt-get update && apt-get install -y curl && \ - curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \ - apt-get install -y nodejs && \ - rm -rf /var/lib/apt/lists/* +```bash +# Newline-delimited (default) +printf 'Hello\nWorld\n' | npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 -# Install embedeer + CUDA provider -COPY package.json ./ -RUN npm install @jsilvanus/embedeer && \ - npm install @jsilvanus/embedeer-ort-linux-x64-cuda +# Null-byte delimited — safe with filenames/texts that contain newlines +printf 'Hello\0World\0' | npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --delimiter '\0' -COPY . . -``` +# Tab-delimited +printf 'Hello\tWorld' | npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --delimiter '\t' -**Build and run:** +# Custom multi-character delimiter +printf 'Hello|||World|||Foo' | npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --delimiter '|||' -```bash -docker build -t my-embedeer-app . +# File with null-byte delimiter +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --file records.bin --delimiter '\0' -# --gpus all enables NVIDIA GPU access inside the container -docker run --rm --gpus all my-embedeer-app \ - npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" +# Integrate with find -print0 (handles filenames with spaces / newlines) +find ./docs -name '*.txt' -print0 | \ + xargs -0 cat | \ + npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --delimiter '\0' ``` -**docker-compose:** - -```yaml -services: - embedeer: - build: . - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: all - capabilities: [gpu] - command: > - npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 - --device gpu - --data "Hello GPU" -``` +Supported escape sequences in `--delimiter`: + +| Sequence | Character | +|----------|-----------| +| `\0` | Null byte (U+0000) | +| `\n` | Newline (U+000A) | +| `\t` | Tab (U+0009) | +| `\r` | Carriage return (U+000D) | + +--- + +## Output Formats -### Windows x64 + DirectML (any GPU) +| Format | Description | +|--------|-------------| +| `json` (default) | JSON array of float arrays: `[[0.1,0.2,...],[...]]` | +| `json --with-text` | JSON array of objects: `[{"text":"...","embedding":[...]}]` | +| `jsonl` | Newline-delimited JSON, one object per line: `{"text":"...","embedding":[...]}` | +| `csv` | CSV with header: `text,dim_0,dim_1,...,dim_N` | +| `txt` | Space-separated floats, one vector per line | +| `txt --with-text` | Tab-separated: `\t` | +| `sql` | `INSERT INTO embeddings (text, vector) VALUES ...;` | -**System requirements:** Windows 10 (1903+) or 11, any DirectX 12 GPU, up-to-date drivers +Use `--dump ` to write the output to a file instead of stdout. Progress messages always go to stderr so they never interfere with piped output. + +### Piping examples ```bash -npm install @jsilvanus/embedeer -npm install @jsilvanus/embedeer-ort-win32-x64-dml +MODEL=Xenova/all-MiniLM-L6-v2 -npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" -``` +# --- json (default) --- +# Embed and pretty-print with jq +echo '["Hello","World"]' | npx @jsilvanus/embedeer --model $MODEL | jq '.[0] | length' -### GPU API options +# --- jsonl --- +# One object per line — pipe to jq, grep, awk, etc. +npx @jsilvanus/embedeer --model $MODEL --data "foo" "bar" --output jsonl -```js -import { Embedder } from '@jsilvanus/embedeer'; +# Filter by similarity: extract embedding for downstream processing +npx @jsilvanus/embedeer --model $MODEL --data "query text" --output jsonl \ + | jq -c '.embedding' -// Auto-detect GPU, silent CPU fallback if unavailable -const e1 = await Embedder.create(model, { device: 'auto' }); +# Stream a large file and store as JSONL +npx @jsilvanus/embedeer --model $MODEL --file big.txt --output jsonl --dump out.jsonl -// Require GPU — throws if no GPU provider is available -const e2 = await Embedder.create(model, { device: 'gpu' }); +# --- json --with-text --- +# Keep the source text next to each vector (useful for building a search index) +npx @jsilvanus/embedeer --model $MODEL --output json --with-text \ + --data "cat" "dog" "fish" \ + | jq '.[] | {text, dims: (.embedding | length)}' -// Explicit provider -const e3 = await Embedder.create(model, { provider: 'cuda' }); // Linux CUDA -const e4 = await Embedder.create(model, { provider: 'dml' }); // Windows DirectML -``` +# --- csv --- +# Embed then open in Python/pandas +npx @jsilvanus/embedeer --model $MODEL --file texts.txt --output csv --dump vectors.csv +python3 -c "import pandas as pd; df = pd.read_csv('vectors.csv'); print(df.shape)" -```bash -npx @jsilvanus/embedeer --device auto # try GPU, fall back to CPU -npx @jsilvanus/embedeer --device gpu # require GPU -npx @jsilvanus/embedeer --provider cuda # explicit CUDA (Linux) -npx @jsilvanus/embedeer --provider dml # explicit DirectML (Windows) -``` +# --- txt --- +# Raw floats — useful for awk/paste/numpy text loading +npx @jsilvanus/embedeer --model $MODEL --data "Hello" "World" --output txt \ + | awk '{print NF, "dimensions"}' ---- +# txt --with-text: original text + tab + floats, easy to parse +npx @jsilvanus/embedeer --model $MODEL --file texts.txt --output txt --with-text \ + | while IFS=$'\t' read -r text vec; do echo "TEXT: $text"; done -## Input & Output +# --- sql --- +# Generate INSERT statements for a vector DB or SQLite +npx @jsilvanus/embedeer --model $MODEL --file texts.txt --output sql --dump inserts.sql +sqlite3 mydb.sqlite < inserts.sql -Full reference: [`packages/embedeer/README.md`](packages/embedeer/README.md#input-sources) +# --- Chaining with other tools --- +# Embed stdin from another command +cat docs/*.txt | npx @jsilvanus/embedeer --model $MODEL --output jsonl > embeddings.jsonl -### Quick piping examples +# Null-byte input from find (handles any filename or text with newlines) +find ./corpus -name '*.txt' -print0 \ + | xargs -0 cat \ + | npx @jsilvanus/embedeer --model $MODEL --delimiter '\0' --output jsonl +``` -```bash -MODEL=Xenova/all-MiniLM-L6-v2 +--- -# Pipe any text — newline-delimited by default -printf 'Hello\nWorld\n' | npx @jsilvanus/embedeer --model $MODEL +### CLI Examples -# JSON array on stdin -echo '["cat","dog","fish"]' | npx @jsilvanus/embedeer --model $MODEL +```bash +# Pull a model (like ollama pull) +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 -# Null-byte delimiter (safe with text containing newlines) -printf 'Hello\0World\0' | npx @jsilvanus/embedeer --model $MODEL --delimiter '\0' +# Embed a few strings, output JSON (CPU) +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --data "Hello" "World" -# JSONL output — one {"text":...,"embedding":[...]} per line, great for jq / grep -npx @jsilvanus/embedeer --model $MODEL --output jsonl --data "foo" "bar" +# Auto-detect GPU, fall back to CPU if unavailable +# (uses CUDA on Linux, DirectML on Windows, CPU everywhere else) +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device auto --data "Hello" -# Include source text in JSON output -npx @jsilvanus/embedeer --model $MODEL --output json --with-text --data "foo" "bar" +# Require GPU (throws with install instructions if no provider found) +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" -# CSV output for pandas / Excel -npx @jsilvanus/embedeer --model $MODEL --file texts.txt --output csv --dump vectors.csv +# Explicit CUDA (Linux x64 — requires CUDA 12 system libraries) +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --provider cuda --data "Hello CUDA" -# SQL INSERT statements -npx @jsilvanus/embedeer --model $MODEL --file texts.txt --output sql --dump inserts.sql +# Explicit DirectML (Windows x64) +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --provider dml --data "Hello DML" -# Tab-separated floats (txt), with original text prepended -npx @jsilvanus/embedeer --model $MODEL --output txt --with-text --data "hello" "world" +# Embed from a file, dump SQL to disk +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 \ + --file texts.txt --output sql --dump out.sql + +# Use quantized model, in-process threads, private model with token +npx @jsilvanus/embedeer --model my-org/private-model \ + --token hf_xxx --dtype q8 --mode thread \ + --data "embed me" ``` -### Interactive / streaming line-reader (`-i` / `--interactive`) +--- + +### Using GPU -Paste records one per line and get embeddings as soon as each batch fills (or when you press Enter on an empty line to flush manually). Ideal for interactive use or streaming large datasets through a pipeline. +No additional packages are needed — `onnxruntime-node` (installed with `@jsilvanus/embedeer`) already +bundles the CUDA provider on Linux x64 and DirectML on Windows x64. + +**Linux x64 — NVIDIA CUDA:** ```bash -MODEL=Xenova/all-MiniLM-L6-v2 +# One-time: install CUDA 12 system libraries (Ubuntu/Debian) +sudo apt install cuda-toolkit-12-6 libcudnn9-cuda-12 -# Interactive terminal session — paste lines, Ctrl+D when done -npx @jsilvanus/embedeer --model $MODEL --interactive --dump embeddings.jsonl +# Auto-detect: uses CUDA here, CPU fallback on any other machine +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device auto --data "Hello" -# Stream a large file in batches (auto-flushes every 32 lines) -cat corpus.txt | npx @jsilvanus/embedeer --model $MODEL -i --output csv --dump out.csv +# Hard-require CUDA (throws with diagnostic error if unavailable): +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" -# GPU-accelerated interactive mode -npx @jsilvanus/embedeer --model $MODEL --interactive --device auto \ - --batch-size 64 --output jsonl --dump out.jsonl +# Explicit CUDA provider: +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --provider cuda --data "Hello CUDA" ``` -**Flushing:** batch fills to `--batch-size` (auto) or empty line (manual). Ctrl+D finishes. Ctrl+C aborts. -**Output:** progress messages go to stderr; embeddings go to `--dump` file or stdout. -**csv** writes the header on the first batch only. **json**/**sql** are promoted to **jsonl** automatically. +**Windows x64 — DirectML (any GPU: NVIDIA / AMD / Intel):** + +```bash +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device auto --data "Hello" +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" +npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --provider dml --data "Hello DML" +``` --- -## Provider Selection Logic +## GPU Acceleration + +GPU support is built into `onnxruntime-node` (a dependency of `@huggingface/transformers`): + +| Platform | Provider | Requirement | +|----------------|-----------|--------------------------------------------------------| +| Linux x64 | CUDA | NVIDIA GPU + driver ≥ 525, CUDA 12 toolkit, cuDNN 9 | +| Windows x64 | DirectML | Any DirectX 12 GPU (most GPUs since 2016), Windows 10+ | -| Platform | `device='auto'` or `device='gpu'` order | -|----------|-----------------------------------------| -| Linux x64 | CUDA → (CPU fallback) | -| Windows x64 | CUDA → DirectML → (CPU fallback) | -| Other | CPU only | +### Provider selection logic -For `device='auto'`: silently falls back to CPU if no GPU provider is available. -For `device='gpu'`: throws with a clear error and install instructions. -For explicit `--provider cuda/dml`: throws if libraries are missing, with install instructions. +| `device` | `provider` | Behavior | +|----------|-----------|----------| +| `cpu` (default) | — | Always CPU | +| `auto` | — | Try GPU providers for the platform in order; silent CPU fallback | +| `gpu` | — | Try GPU providers; **throw** if none available | +| any | `cuda` | Load CUDA provider; **throw** if not available or not supported | +| any | `dml` | Load DirectML provider; **throw** if not available or not supported | +| any | `cpu` | Always CPU | + +On Linux x64: GPU order is `cuda`. +On Windows x64: GPU order is `cuda → dml`. --- -## Monorepo Development +## How it works -```bash -npm install # install all workspace packages -npm test # run tests (packages/embedeer) ``` +embed(texts) + │ + ├─ split into batches of batchSize + │ + └─ Promise.all(batches) ──► WorkerPool + │ + ├─ [process mode] ChildProcessWorker 0 + │ resolveProvider(device, provider) + │ → pipeline('feature-extraction', model, { device: 'cuda' }) + │ → embed batch A + │ + └─ [process mode] ChildProcessWorker 1 + resolveProvider(device, provider) + → pipeline(...) → embed batch B +``` + +Workers load the model **once** at startup and reuse it for all batches. +Provider activation happens per-worker before the pipeline is created. --- -## Documentation +## Testing + +```bash +npm test +``` -Full API documentation, CLI reference, and all options: [`packages/embedeer/README.md`](packages/embedeer/README.md) +Tests use Node's built-in `node:test` runner. No real model download required. diff --git a/package-lock.json b/package-lock.json index 59e82e8..3efd3e2 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,16 +1,22 @@ { - "name": "embedeer-monorepo", + "name": "@jsilvanus/embedeer", "version": "1.0.0", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "embedeer-monorepo", + "name": "@jsilvanus/embedeer", "version": "1.0.0", "license": "ISC", - "workspaces": [ - "packages/*" - ] + "dependencies": { + "@huggingface/transformers": "^4.0.1" + }, + "bin": { + "embedeer": "src/cli.js" + }, + "engines": { + "node": ">=18" + } }, "node_modules/@emnapi/runtime": { "version": "1.9.2", @@ -563,18 +569,6 @@ "url": "https://opencollective.com/libvips" } }, - "node_modules/@jsilvanus/embedeer": { - "resolved": "packages/embedeer", - "link": true - }, - "node_modules/@jsilvanus/embedeer-ort-linux-x64-cuda": { - "resolved": "packages/ort-linux-x64-cuda", - "link": true - }, - "node_modules/@jsilvanus/embedeer-ort-win32-x64-dml": { - "resolved": "packages/ort-win32-x64-dml", - "link": true - }, "node_modules/@protobufjs/aspromise": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz", @@ -1052,6 +1046,7 @@ "packages/embedeer": { "name": "@jsilvanus/embedeer", "version": "1.0.0", + "extraneous": true, "license": "ISC", "dependencies": { "@huggingface/transformers": "^4.0.1" @@ -1066,6 +1061,7 @@ "packages/ort-linux-x64-cuda": { "name": "@jsilvanus/embedeer-ort-linux-x64-cuda", "version": "1.0.0", + "extraneous": true, "hasInstallScript": true, "license": "ISC", "engines": { @@ -1085,6 +1081,7 @@ "packages/ort-win32-x64-dml": { "name": "@jsilvanus/embedeer-ort-win32-x64-dml", "version": "1.0.0", + "extraneous": true, "hasInstallScript": true, "license": "ISC", "engines": { diff --git a/package.json b/package.json index fb9c239..bc7829e 100644 --- a/package.json +++ b/package.json @@ -1,24 +1,46 @@ { - "name": "embedeer-monorepo", + "name": "@jsilvanus/embedeer", "version": "1.0.0", - "private": true, - "description": "Monorepo for embedeer and its optional GPU provider packages", - "workspaces": [ - "packages/*" + "description": "A node.js embedding tool with optional GPU acceleration", + "main": "src/index.js", + "bin": { + "embedeer": "src/cli.js" + }, + "files": [ + "src", + "README.md" ], "scripts": { - "test": "npm run test --workspace=packages/embedeer", - "test:embedeer": "npm run test --workspace=packages/embedeer" + "test": "node --test test/*.test.js" }, "repository": { "type": "git", "url": "git+https://github.com/jsilvanus/embedeer.git" }, - "author": "", + "keywords": [ + "embeddings", + "huggingface", + "nlp", + "transformers", + "parallel", + "gpu", + "cuda", + "onnxruntime" + ], + "author": "jsilvanus", "license": "ISC", + "type": "module", + "engines": { + "node": ">=18" + }, "bugs": { "url": "https://github.com/jsilvanus/embedeer/issues" }, - "homepage": "https://github.com/jsilvanus/embedeer#readme" + "homepage": "https://github.com/jsilvanus/embedeer#readme", + "publishConfig": { + "access": "public" + }, + "dependencies": { + "@huggingface/transformers": "^4.0.1" + } } - diff --git a/packages/embedeer/README.md b/packages/embedeer/README.md deleted file mode 100644 index 7ac1f13..0000000 --- a/packages/embedeer/README.md +++ /dev/null @@ -1,430 +0,0 @@ -# embedeer - -A Node.js tool for generating text embeddings using models from [Hugging Face](https://huggingface.co/models). -Supports **batched** input, **parallel** execution, isolated **child-process** workers (default) or **in-process threads**, quantization, optional GPU acceleration, and Hugging Face auth. - ---- - -## Features - -- Downloads any Hugging Face feature-extraction model on first use (cached in `~/.embedeer/models`) -- **Isolated processes** (default) — a worker crash cannot bring down the caller -- **In-process threads** — opt-in via `mode: 'thread'` for lower overhead -- **Sequential** execution when `concurrency: 1` -- Configurable batch size and concurrency -- **GPU acceleration** — optional via separate provider packages (see below) -- Hugging Face API token support (`--token` / `HF_TOKEN` env var) -- Quantization via `dtype` (`fp32` · `fp16` · `q8` · `q4` · `q4f16` · `auto`) -- Rich CLI: pull model, embed from file, dump output as JSON / TXT / SQL - ---- - -## Installation - -```bash -# CPU (default, works everywhere) -npm install @jsilvanus/embedeer - -# GPU — Linux x64 + NVIDIA CUDA -npm install @jsilvanus/embedeer-ort-linux-x64-cuda - -# GPU — Windows x64 + DirectML (any GPU: NVIDIA / AMD / Intel) -npm install @jsilvanus/embedeer-ort-win32-x64-dml -``` - ---- - -## Programmatic API - -### Embed texts (CPU — default) - -```js -import { Embedder } from '@jsilvanus/embedeer'; - -const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { - batchSize: 32, // texts per worker task (default: 32) - concurrency: 2, // parallel workers (default: 2) - mode: 'process', // 'process' | 'thread' (default: 'process') - pooling: 'mean', // 'mean' | 'cls' | 'none' (default: 'mean') - normalize: true, // L2-normalise vectors (default: true) - token: 'hf_...', // HF API token (optional; also reads HF_TOKEN env) - dtype: 'q8', // quantization dtype (optional) - cacheDir: '/my/cache', // override model cache (default: ~/.embedeer/models) -}); - -const vectors = await embedder.embed(['Hello world', 'Foo bar baz']); -// → number[][] (one 384-dim vector per text for all-MiniLM-L6-v2) - -await embedder.destroy(); // shut down worker processes -``` - -### Embed texts with GPU - -```js -import { Embedder } from '@jsilvanus/embedeer'; - -// Auto-detect GPU (falls back to CPU if no provider is installed) -const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { - device: 'auto', -}); - -// Require GPU (throws if no provider is available) -const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { - device: 'gpu', -}); - -// Explicitly select an execution provider -const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { - provider: 'cuda', // 'cuda' | 'dml' -}); -``` - -### Pull (pre-cache) a model - -Like `ollama pull` — downloads the model once so workers start instantly: - -```js -import { loadModel } from '@jsilvanus/embedeer'; - -const { modelName, cacheDir } = await loadModel('Xenova/all-MiniLM-L6-v2', { - token: 'hf_...', // optional - dtype: 'q8', // optional -}); -``` - ---- - -## CLI - -``` -npx @jsilvanus/embedeer [options] - -Model management (pull / cache model): - npx @jsilvanus/embedeer --model - -Embed texts (batch): - npx @jsilvanus/embedeer --model --data "text1" "text2" ... - npx @jsilvanus/embedeer --model --data '["text1","text2"]' - npx @jsilvanus/embedeer --model --file texts.txt - echo '["t1","t2"]' | npx @jsilvanus/embedeer --model - printf 'a\0b\0c' | npx @jsilvanus/embedeer --model --delimiter '\0' - -Interactive / streaming line-reader: - npx @jsilvanus/embedeer --model --interactive --dump out.jsonl - cat big.txt | npx @jsilvanus/embedeer --model -i --output csv --dump out.csv - -Options: - -m, --model Hugging Face model (default: Xenova/all-MiniLM-L6-v2) - -d, --data Text(s) or JSON array to embed - --file Input file: JSON array or delimited texts - -D, --delimiter Record separator for stdin/file (default: \n) - Escape sequences supported: \0 \n \t \r - -i, --interactive Interactive line-reader (see below) - --dump Write output to file instead of stdout - --output Output: json|jsonl|csv|txt|sql (default: json) - --with-text Include source text alongside each embedding - -b, --batch-size Texts per worker batch (default: 32) - -c, --concurrency Parallel workers (default: 2) - --mode process|thread Worker mode (default: process) - -p, --pooling mean|cls|none (default: mean) - --no-normalize Disable L2 normalisation - --dtype Quantization: fp32|fp16|q8|q4|q4f16|auto - --token Hugging Face API token (or set HF_TOKEN env) - --cache-dir Model cache directory (default: ~/.embedeer/models) - --device Compute device: auto|cpu|gpu (default: cpu) - --provider Execution provider override: cpu|cuda|dml - -h, --help Show this help -``` - ---- - -## Input Sources - -Texts can be provided in any of these ways (checked in order): - -| Source | How | -|--------|-----| -| Inline args | `--data "text1" "text2" "text3"` | -| Inline JSON | `--data '["text1","text2"]'` | -| File | `--file texts.txt` (JSON array or one record per line) | -| Stdin | Pipe or redirect — auto-detected; TTY is skipped | -| Interactive | `--interactive` / `-i` — line-reader, embeds as you type | - -**Stdin auto-detection:** when `stdin` is not a TTY (i.e. data is piped or redirected), embedeer reads it before deciding what to do. JSON arrays are accepted directly; otherwise records are split on the delimiter. - ---- - -## Interactive Line-Reader Mode (`-i` / `--interactive`) - -The interactive mode opens a line-by-line reader that starts embedding as records arrive — ideal for pasting large datasets into a terminal or streaming data from another process. - -```bash -# Open an interactive session (paste lines, Ctrl+D when done) -npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --interactive --dump embeddings.jsonl - -# Stream a large file through interactive mode with CSV output -cat big.txt | npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 \ - --interactive --output csv --dump embeddings.csv - -# Interactive with GPU, custom batch size, txt output -npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 \ - --interactive --device auto --batch-size 16 --output txt --dump vecs.txt -``` - -**How it works:** - -| Event | What happens | -|-------|-------------| -| Type a line, press Enter | Record is buffered | -| Buffer reaches `--batch-size` | Auto-flush: embed + append to output | -| Type an empty line | Manual flush: embed whatever is buffered | -| Ctrl+D (EOF) | Flush remaining records and exit | -| Ctrl+C | Flush remaining records and exit | - -**Behaviour notes:** - -- Progress messages (`Batch N: M record(s) → file`) always go to **stderr** — they never pollute piped output. -- When stdin is a TTY, a `> ` prompt is shown on stderr. -- Output defaults to **stdout** if `--dump` is omitted; a tip is printed when running in TTY mode. -- `--output json` and `--output sql` are automatically promoted to `jsonl` since they produce complete documents that cannot be appended to incrementally. -- `--output csv` writes the dimension header (`text,dim_0,dim_1,...`) on the first batch only; subsequent batches append data rows. -- Each interactive session **clears** the `--dump` file on start so you always get a fresh output file. - -### Configurable delimiter (`-D` / `--delimiter`) - -By default records in stdin and files are split on newline (`\n`). Use `--delimiter` to change it: - -```bash -# Newline-delimited (default) -printf 'Hello\nWorld\n' | npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 - -# Null-byte delimited — safe with filenames/texts that contain newlines -printf 'Hello\0World\0' | npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --delimiter '\0' - -# Tab-delimited -printf 'Hello\tWorld' | npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --delimiter '\t' - -# Custom multi-character delimiter -printf 'Hello|||World|||Foo' | npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --delimiter '|||' - -# File with null-byte delimiter -npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --file records.bin --delimiter '\0' - -# Integrate with find -print0 (handles filenames with spaces / newlines) -find ./docs -name '*.txt' -print0 | \ - xargs -0 cat | \ - npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --delimiter '\0' -``` - -Supported escape sequences in `--delimiter`: - -| Sequence | Character | -|----------|-----------| -| `\0` | Null byte (U+0000) | -| `\n` | Newline (U+000A) | -| `\t` | Tab (U+0009) | -| `\r` | Carriage return (U+000D) | - ---- - -## Output Formats - -| Format | Description | -|--------|-------------| -| `json` (default) | JSON array of float arrays: `[[0.1,0.2,...],[...]]` | -| `json --with-text` | JSON array of objects: `[{"text":"...","embedding":[...]}]` | -| `jsonl` | Newline-delimited JSON, one object per line: `{"text":"...","embedding":[...]}` | -| `csv` | CSV with header: `text,dim_0,dim_1,...,dim_N` | -| `txt` | Space-separated floats, one vector per line | -| `txt --with-text` | Tab-separated: `\t` | -| `sql` | `INSERT INTO embeddings (text, vector) VALUES ...;` | - -Use `--dump ` to write the output to a file instead of stdout. Progress messages always go to stderr so they never interfere with piped output. - -### Piping examples - -```bash -MODEL=Xenova/all-MiniLM-L6-v2 - -# --- json (default) --- -# Embed and pretty-print with jq -echo '["Hello","World"]' | npx @jsilvanus/embedeer --model $MODEL | jq '.[0] | length' - -# --- jsonl --- -# One object per line — pipe to jq, grep, awk, etc. -npx @jsilvanus/embedeer --model $MODEL --data "foo" "bar" --output jsonl - -# Filter by similarity: extract embedding for downstream processing -npx @jsilvanus/embedeer --model $MODEL --data "query text" --output jsonl \ - | jq -c '.embedding' - -# Stream a large file and store as JSONL -npx @jsilvanus/embedeer --model $MODEL --file big.txt --output jsonl --dump out.jsonl - -# --- json --with-text --- -# Keep the source text next to each vector (useful for building a search index) -npx @jsilvanus/embedeer --model $MODEL --output json --with-text \ - --data "cat" "dog" "fish" \ - | jq '.[] | {text, dims: (.embedding | length)}' - -# --- csv --- -# Embed then open in Python/pandas -npx @jsilvanus/embedeer --model $MODEL --file texts.txt --output csv --dump vectors.csv -python3 -c "import pandas as pd; df = pd.read_csv('vectors.csv'); print(df.shape)" - -# --- txt --- -# Raw floats — useful for awk/paste/numpy text loading -npx @jsilvanus/embedeer --model $MODEL --data "Hello" "World" --output txt \ - | awk '{print NF, "dimensions"}' - -# txt --with-text: original text + tab + floats, easy to parse -npx @jsilvanus/embedeer --model $MODEL --file texts.txt --output txt --with-text \ - | while IFS=$'\t' read -r text vec; do echo "TEXT: $text"; done - -# --- sql --- -# Generate INSERT statements for a vector DB or SQLite -npx @jsilvanus/embedeer --model $MODEL --file texts.txt --output sql --dump inserts.sql -sqlite3 mydb.sqlite < inserts.sql - -# --- Chaining with other tools --- -# Embed stdin from another command -cat docs/*.txt | npx @jsilvanus/embedeer --model $MODEL --output jsonl > embeddings.jsonl - -# Null-byte input from find (handles any filename or text with newlines) -find ./corpus -name '*.txt' -print0 \ - | xargs -0 cat \ - | npx @jsilvanus/embedeer --model $MODEL --delimiter '\0' --output jsonl -``` - ---- - -### CLI Examples - -```bash -# Pull a model (like ollama pull) -npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 - -# Embed a few strings, output JSON (CPU) -npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --data "Hello" "World" - -# Auto-detect GPU, fall back to CPU if unavailable -# (uses CUDA on Linux, DirectML on Windows, CPU everywhere else) -npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device auto --data "Hello" - -# Require GPU (throws with install instructions if no provider found) -npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" - -# Explicit CUDA (Linux — requires @jsilvanus/embedeer-ort-linux-x64-cuda) -npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --provider cuda --data "Hello CUDA" - -# Explicit DirectML (Windows — requires @jsilvanus/embedeer-ort-win32-x64-dml) -npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --provider dml --data "Hello DML" - -# Embed from a file, dump SQL to disk -npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 \ - --file texts.txt --output sql --dump out.sql - -# Use quantized model, in-process threads, private model with token -npx @jsilvanus/embedeer --model my-org/private-model \ - --token hf_xxx --dtype q8 --mode thread \ - --data "embed me" -``` - ---- - -### Using GPU with npx - -Install the provider package for your platform, then pass `--device auto` to use the GPU -wherever available, with silent CPU fallback. - -**Linux x64 — NVIDIA CUDA:** - -```bash -# One-time: install CUDA 12 system libraries (Ubuntu/Debian) -sudo apt install cuda-toolkit-12-6 libcudnn9-cuda-12 - -# Install both packages -npm install @jsilvanus/embedeer -npm install @jsilvanus/embedeer-ort-linux-x64-cuda - -# Auto-detect: uses CUDA here, CPU fallback on any other machine -npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device auto --data "Hello" - -# Hard-require CUDA (error + install hint if unavailable): -npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" - -# Explicit CUDA provider: -npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --provider cuda --data "Hello CUDA" -``` - -**Windows x64 — DirectML (any GPU: NVIDIA / AMD / Intel):** - -```bash -npm install @jsilvanus/embedeer -npm install @jsilvanus/embedeer-ort-win32-x64-dml - -npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device auto --data "Hello" -npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" -npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --provider dml --data "Hello DML" -``` - ---- - -## GPU Provider Packages - -GPU support requires an additional provider package that ships a CUDA-enabled (or DirectML-enabled) ONNX Runtime binary. - -| Platform | Provider | Package | -|----------------|-----------|-----------------------------------------------| -| Linux x64 | CUDA | `@jsilvanus/embedeer-ort-linux-x64-cuda` | -| Windows x64 | DirectML | `@jsilvanus/embedeer-ort-win32-x64-dml` | - -### Provider selection logic - -| `device` | `provider` | Behavior | -|----------|-----------|----------| -| `cpu` (default) | — | Always CPU | -| `auto` | — | Try GPU providers for the platform in order; silent CPU fallback | -| `gpu` | — | Try GPU providers; **throw** if none available | -| any | `cuda` | Load CUDA provider; **throw** if not available or not supported | -| any | `dml` | Load DirectML provider; **throw** if not available or not supported | -| any | `cpu` | Always CPU | - -On Linux x64: GPU order is `cuda`. -On Windows x64: GPU order is `cuda → dml`. - ---- - -## How it works - -``` -embed(texts) - │ - ├─ split into batches of batchSize - │ - └─ Promise.all(batches) ──► WorkerPool - │ - ├─ [process mode] ChildProcessWorker 0 - │ resolveProvider(device, provider) - │ → pipeline('feature-extraction', model, { device: 'cuda' }) - │ → embed batch A - │ - └─ [process mode] ChildProcessWorker 1 - resolveProvider(device, provider) - → pipeline(...) → embed batch B -``` - -Workers load the model **once** at startup and reuse it for all batches. -Provider activation happens per-worker before the pipeline is created. - ---- - -## Testing - -```bash -cd packages/embedeer && npm test -# or from the monorepo root: -npm test -``` - -Tests use Node's built-in `node:test` runner. No real model download required. diff --git a/packages/embedeer/package.json b/packages/embedeer/package.json deleted file mode 100644 index dc5ccfb..0000000 --- a/packages/embedeer/package.json +++ /dev/null @@ -1,47 +0,0 @@ -{ - "name": "@jsilvanus/embedeer", - "version": "1.0.0", - "description": "A node.js embedding tool with optional GPU acceleration", - "main": "src/index.js", - "bin": { - "embedeer": "src/cli.js" - }, - "files": [ - "src", - "README.md" - ], - "scripts": { - "test": "node --test test/*.test.js" - }, - "repository": { - "type": "git", - "url": "git+https://github.com/jsilvanus/embedeer.git", - "directory": "packages/embedeer" - }, - "keywords": [ - "embeddings", - "huggingface", - "nlp", - "transformers", - "parallel", - "gpu", - "cuda", - "onnxruntime" - ], - "author": "jsilvanus", - "license": "ISC", - "type": "module", - "engines": { - "node": ">=18" - }, - "bugs": { - "url": "https://github.com/jsilvanus/embedeer/issues" - }, - "homepage": "https://github.com/jsilvanus/embedeer/tree/main/packages/embedeer#readme", - "publishConfig": { - "access": "public" - }, - "dependencies": { - "@huggingface/transformers": "^4.0.1" - } -} diff --git a/packages/embedeer/src/provider-loader.js b/packages/embedeer/src/provider-loader.js deleted file mode 100644 index 905a494..0000000 --- a/packages/embedeer/src/provider-loader.js +++ /dev/null @@ -1,148 +0,0 @@ -/** - * Provider loader — dynamically selects and activates an ONNX Runtime - * execution-provider package before @huggingface/transformers creates its - * pipeline. - * - * Provider packages are published as separate optional npm packages: - * @jsilvanus/embedeer-ort-linux-x64-cuda — CUDA on Linux x64 - * @jsilvanus/embedeer-ort-win32-x64-dml — DirectML on Windows x64 - * - * Each provider package exports: - * activate(): Promise — runs any setup needed before pipeline() - * getDevice(): string — the device string to pass to pipeline() - * e.g. 'cuda', 'dml' - * - * Usage: - * import { resolveProvider } from './provider-loader.js'; - * const deviceStr = await resolveProvider(device, provider); - * // pass deviceStr to pipeline() if truthy - */ - -/** - * Map of "--" to package name. - * @type {Record} - */ -export const PROVIDER_PACKAGES = { - 'linux-x64-cuda': '@jsilvanus/embedeer-ort-linux-x64-cuda', - 'win32-x64-dml': '@jsilvanus/embedeer-ort-win32-x64-dml', -}; - -/** - * Returns the ordered list of preferred GPU providers for the current platform. - * @returns {string[]} - */ -export function getPlatformDefaultProviders() { - const platform = process.platform; - const arch = process.arch; - if (platform === 'linux' && arch === 'x64') return ['cuda']; - if (platform === 'win32' && arch === 'x64') return ['cuda', 'dml']; - return []; -} - -/** - * Attempt to load a specific provider package. Returns a result object - * that distinguishes between: - * - package not installed (ERR_MODULE_NOT_FOUND) - * - package installed but activation failed (e.g. native binary missing) - * - package loaded successfully - * - * @param {string} provider e.g. 'cuda' or 'dml' - * @returns {Promise<{loaded: boolean, deviceStr: string|null, error: Error|null}>} - */ -export async function tryLoadProvider(provider) { - const key = `${process.platform}-${process.arch}-${provider}`; - const packageName = PROVIDER_PACKAGES[key]; - if (!packageName) { - return { loaded: false, deviceStr: null, error: null }; - } - try { - const mod = await import(packageName); - if (typeof mod.activate === 'function') { - await mod.activate(); - } - const deviceStr = typeof mod.getDevice === 'function' ? mod.getDevice() : provider; - return { loaded: true, deviceStr, error: null }; - } catch (err) { - // Any error (package not installed, binary missing, etc.) → not loaded - return { loaded: false, deviceStr: null, error: err }; - } -} - -/** - * Resolve and activate the appropriate execution provider, returning the - * device string to pass to `@huggingface/transformers` pipeline(). - * - * @param {'auto'|'cpu'|'gpu'|undefined} device - * @param {'cpu'|'cuda'|'dml'|undefined} provider Optional explicit override - * @returns {Promise} Device string or undefined (CPU default) - * - * @throws {Error} When an explicit provider is requested but not available. - * @throws {Error} When device='gpu' and no GPU provider is available. - */ -export async function resolveProvider(device, provider) { - // Normalise to lower-case strings for consistent comparison - const dev = (device ?? 'cpu').toLowerCase(); - const prov = provider ? provider.toLowerCase() : undefined; - - // --- Explicit CPU --- - if (dev === 'cpu' && !prov) return undefined; - if (prov === 'cpu') return undefined; - - // --- Explicit provider --- - if (prov && prov !== 'cpu') { - const key = `${process.platform}-${process.arch}-${prov}`; - const packageName = PROVIDER_PACKAGES[key]; - - if (!packageName) { - const supportedPlatforms = Object.entries(PROVIDER_PACKAGES) - .filter(([k]) => k.endsWith(`-${prov}`)) - .map(([k]) => k.replace(`-${prov}`, '')); - throw new Error( - `Provider '${prov}' is not supported on ${process.platform}/${process.arch}. ` + - `Supported platforms: ${supportedPlatforms.join(', ') || 'none'}.`, - ); - } - - const { loaded, deviceStr, error } = await tryLoadProvider(prov); - if (!loaded) { - // If error is NOT a "package not found" error, re-throw original (e.g. binary missing) - if (error && error.code !== 'ERR_MODULE_NOT_FOUND') { - throw error; - } - throw new Error( - `Provider '${prov}' was requested but its package '${packageName}' is not installed. ` + - `Run: npm install ${packageName}`, - ); - } - return deviceStr ?? undefined; - } - - // --- device='gpu' or device='auto': try platform defaults in order --- - const candidates = getPlatformDefaultProviders(); - let lastError = null; - - for (const candidate of candidates) { - const { loaded, deviceStr, error } = await tryLoadProvider(candidate); - if (loaded) return deviceStr ?? candidate; - if (error) lastError = error; - } - - if (dev === 'gpu') { - // If a package was found but activate() failed with a non-not-found error, - // re-throw that error as it contains useful diagnostic information. - if (lastError && lastError.code !== 'ERR_MODULE_NOT_FOUND') { - throw lastError; - } - const packageNames = candidates - .map((p) => PROVIDER_PACKAGES[`${process.platform}-${process.arch}-${p}`]) - .filter(Boolean); - throw new Error( - `device='gpu' was requested but no GPU provider packages are installed ` + - `for ${process.platform}/${process.arch}. ` + - `Install one of: ${packageNames.join(', ') || '(none available for this platform)'}.`, - ); - } - - // device='auto' and no GPU provider found → silently fall back to CPU - return undefined; -} diff --git a/packages/ort-linux-x64-cuda/README.md b/packages/ort-linux-x64-cuda/README.md deleted file mode 100644 index 91fbfcd..0000000 --- a/packages/ort-linux-x64-cuda/README.md +++ /dev/null @@ -1,153 +0,0 @@ -# @jsilvanus/embedeer-ort-linux-x64-cuda - -CUDA execution provider for [embedeer](https://github.com/jsilvanus/embedeer) on **Linux x64**. - -Install this package alongside `embedeer` to enable GPU-accelerated embeddings using NVIDIA CUDA on Linux. - -## How it works - -`onnxruntime-node` v1.14+ ships `libonnxruntime_providers_cuda.so` on Linux x64 as part of its standard npm package — **no additional binary download is required**. - -This package verifies that the required CUDA 12 system libraries are present, then returns `device='cuda'` so that `@huggingface/transformers` pipeline runs on the GPU. - -## System Requirements - -| Requirement | Version | -|-------------|---------| -| NVIDIA GPU Driver | ≥ 525 (CUDA 12 compatible) | -| CUDA Toolkit | 12.x (`libcudart.so.12`, `libcublas.so.12`, `libcublasLt.so.12`, `libcurand.so.10`, `libcufft.so.11`) | -| cuDNN | 9.x (`libcudnn.so.9`) | -| OS | Linux x64 | - -### Installing CUDA 12 + cuDNN 9 - -**Ubuntu/Debian (recommended):** -```bash -# Add NVIDIA package repository -wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb -sudo dpkg -i cuda-keyring_1.1-1_all.deb -sudo apt update - -# Install CUDA 12 and cuDNN 9 -sudo apt install cuda-toolkit-12-6 libcudnn9-cuda-12 - -# Add to PATH / LD_LIBRARY_PATH -echo 'export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH' >> ~/.bashrc -source ~/.bashrc -``` - -**CUDA Toolkit installer:** https://developer.nvidia.com/cuda-downloads -**cuDNN download:** https://developer.nvidia.com/cudnn-downloads - -Verify installation: -```bash -nvidia-smi # confirm GPU is detected -nvcc --version # confirm CUDA toolkit is installed -``` - -## Installation - -```bash -# Step 1 — main package -npm install @jsilvanus/embedeer - -# Step 2 — CUDA provider -npm install @jsilvanus/embedeer-ort-linux-x64-cuda -``` - -## Docker - -The easiest way to get CUDA working is with an [NVIDIA CUDA Docker image](https://hub.docker.com/r/nvidia/cuda) — all required CUDA 12 + cuDNN 9 libraries are pre-installed. - -**Host requirements:** -- NVIDIA GPU driver ≥ 525 -- [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) - -**Example `Dockerfile`:** - -```dockerfile -# CUDA 12 + cuDNN 9 runtime — all required libs are pre-installed -FROM nvidia/cuda:12.6.3-cudnn9-runtime-ubuntu24.04 - -WORKDIR /app - -# Install Node.js (via NodeSource) -RUN apt-get update && apt-get install -y curl && \ - curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \ - apt-get install -y nodejs && \ - rm -rf /var/lib/apt/lists/* - -# Install embedeer + CUDA provider -COPY package.json ./ -RUN npm install @jsilvanus/embedeer && \ - npm install @jsilvanus/embedeer-ort-linux-x64-cuda - -COPY . . -``` - -**Run with `--gpus all`:** - -```bash -docker build -t my-embedeer-app . -docker run --rm --gpus all my-embedeer-app \ - npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device gpu --data "Hello GPU" -``` - -**docker-compose:** - -```yaml -services: - embedeer: - build: . - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: all - capabilities: [gpu] - command: > - npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 - --device gpu - --data "Hello GPU" -``` - -> **Why does this just work in Docker?** -> The NVIDIA Container Toolkit mounts `/dev/nvidiactl` into the container (our GPU presence check) and NVIDIA CUDA images place all libraries under `/usr/local/cuda/lib64` (one of our default search paths), so `activate()` finds everything automatically. - -## Usage - -```js -import { Embedder } from 'embedeer'; - -// Auto-detect GPU (falls back to CPU if CUDA unavailable) -const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { device: 'auto' }); - -// Require GPU (throws with clear error if CUDA unavailable) -const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { device: 'gpu' }); - -// Explicit CUDA -const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { provider: 'cuda' }); -``` - -```bash -# CLI — auto GPU -npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device auto --data "Hello GPU" - -# CLI — explicit CUDA -npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --provider cuda --data "Hello CUDA" -``` - -## Error messages - -If CUDA libraries are missing, you'll see: - -``` -@jsilvanus/embedeer-ort-linux-x64-cuda: Missing CUDA system libraries: libcudart.so.12, libcudnn.so.9 - -onnxruntime-node CUDA requires CUDA 12 + cuDNN 9. Install them: - - # Option A — CUDA 12 + cuDNN 9 via apt (Ubuntu/Debian) - sudo apt install cuda-toolkit-12-6 libcudnn9-cuda-12 - ... -``` diff --git a/packages/ort-linux-x64-cuda/index.js b/packages/ort-linux-x64-cuda/index.js deleted file mode 100644 index 46eaf61..0000000 --- a/packages/ort-linux-x64-cuda/index.js +++ /dev/null @@ -1,146 +0,0 @@ -/** - * @jsilvanus/embedeer-ort-linux-x64-cuda - * - * CUDA execution provider for embedeer on Linux x64. - * - * How it works: - * onnxruntime-node v1.20+ ships libonnxruntime_providers_cuda.so on Linux x64. - * No additional binary download is required — the CUDA execution provider is - * already bundled with the standard onnxruntime-node package. - * (@huggingface/transformers@4.x requires onnxruntime-node@1.24.x which ships CUDA.) - * - * This package verifies that the required CUDA 12 system libraries are - * available before attempting to use the CUDA execution provider. - * - * System requirements: - * - NVIDIA GPU with driver ≥ 525 (CUDA 12 compatible) - * - CUDA 12 Toolkit: libcudart.so.12, libcublas.so.12, libcublasLt.so.12, - * libcurand.so.10, libcufft.so.11 - * - cuDNN 9: libcudnn.so.9 - * - * Install CUDA 12: https://developer.nvidia.com/cuda-downloads - * Install cuDNN 9: https://developer.nvidia.com/cudnn-downloads - * Or via apt (Ubuntu/Debian): - * sudo apt install cuda-toolkit-12-x libcudnn9-cuda-12 - */ - -import { execSync } from 'child_process'; -import { existsSync } from 'fs'; - -/** - * Shared libraries required by libonnxruntime_providers_cuda.so (CUDA 12 / cuDNN 9). - * These are system-installed libraries; they are NOT bundled with onnxruntime-node. - */ -const REQUIRED_LIBS = [ - 'libcudart.so.12', - 'libcublas.so.12', - 'libcublasLt.so.12', - 'libcurand.so.10', - 'libcufft.so.11', - 'libcudnn.so.9', -]; - -/** - * Common directories where CUDA libraries may be installed. - * Includes entries from LD_LIBRARY_PATH so custom installs are detected. - */ -function cudaSearchDirs() { - const extra = (process.env.LD_LIBRARY_PATH ?? '').split(':').filter(Boolean); - return [ - '/usr/local/cuda/lib64', - '/usr/local/cuda-12/lib64', - '/usr/local/cuda-12.0/lib64', - '/usr/local/cuda-12.1/lib64', - '/usr/local/cuda-12.2/lib64', - '/usr/local/cuda-12.3/lib64', - '/usr/local/cuda-12.4/lib64', - '/usr/local/cuda-12.5/lib64', - '/usr/local/cuda-12.6/lib64', - '/usr/lib/x86_64-linux-gnu', - '/usr/lib64', - ...extra, - ]; -} - -/** - * Find a shared library by name. Checks common CUDA paths then falls back to - * `ldconfig -p` for libraries registered in the dynamic linker cache. - * - * @param {string} libName e.g. 'libcudart.so.12' - * @returns {string|null} Path to the library, or null if not found. - */ -function findLib(libName) { - for (const dir of cudaSearchDirs()) { - const fullPath = `${dir}/${libName}`; - if (existsSync(fullPath)) return fullPath; - } - - // Use ldconfig cache as a fallback - try { - const output = execSync('ldconfig -p', { - stdio: ['ignore', 'pipe', 'ignore'], - encoding: 'utf8', - timeout: 3000, - }); - for (const line of output.split('\n')) { - if (line.includes(libName) && line.includes('=>')) { - const match = line.match(/=>\s*(.+)/); - if (match) return match[1].trim(); - } - } - } catch { - // ldconfig not available in all environments; that's ok - } - - return null; -} - -/** - * Activate the CUDA execution provider. - * - * Checks that all required CUDA 12 / cuDNN 9 system libraries are present. - * onnxruntime-node v1.20+ bundles libonnxruntime_providers_cuda.so on Linux x64 - * (@huggingface/transformers@4.x requires onnxruntime-node@1.24.x which ships CUDA), - * so no additional binary download is needed — only system CUDA libraries are required. - * - * @returns {Promise} - * @throws {Error} If NVIDIA GPU is not detected or required CUDA libraries are missing. - */ -export async function activate() { - // 1. Check for NVIDIA GPU / driver - if (!existsSync('/dev/nvidiactl')) { - throw new Error( - '@jsilvanus/embedeer-ort-linux-x64-cuda: No NVIDIA GPU detected (/dev/nvidiactl not found).\n' + - 'Ensure NVIDIA drivers are installed.\n' + - 'Verify with: nvidia-smi', - ); - } - - // 2. Check required CUDA / cuDNN system libraries - const missing = REQUIRED_LIBS.filter((lib) => findLib(lib) === null); - - if (missing.length > 0) { - throw new Error( - `@jsilvanus/embedeer-ort-linux-x64-cuda: Missing CUDA system libraries: ${missing.join(', ')}\n\n` + - 'onnxruntime-node CUDA requires CUDA 12 + cuDNN 9. Install them:\n\n' + - ' # Option A — CUDA 12 + cuDNN 9 via apt (Ubuntu/Debian)\n' + - ' sudo apt install cuda-toolkit-12-6 libcudnn9-cuda-12\n\n' + - ' # Option B — CUDA Toolkit installer from NVIDIA\n' + - ' https://developer.nvidia.com/cuda-downloads\n' + - ' https://developer.nvidia.com/cudnn-downloads\n\n' + - ' # After installing, make sure libraries are on LD_LIBRARY_PATH if non-standard:\n' + - ' export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH', - ); - } - - // onnxruntime-node will dynamically load libonnxruntime_providers_cuda.so at - // runtime when device='cuda' is passed to pipeline(). No further action needed here. -} - -/** - * Returns the device string passed to @huggingface/transformers pipeline(). - * @returns {string} - */ -export function getDevice() { - return 'cuda'; -} diff --git a/packages/ort-linux-x64-cuda/install.js b/packages/ort-linux-x64-cuda/install.js deleted file mode 100644 index 7f0d9bb..0000000 --- a/packages/ort-linux-x64-cuda/install.js +++ /dev/null @@ -1,105 +0,0 @@ -/** - * Install / post-install check for @jsilvanus/embedeer-ort-linux-x64-cuda - * - * onnxruntime-node v1.20+ ships libonnxruntime_providers_cuda.so on Linux x64. - * (@huggingface/transformers@4.x requires onnxruntime-node@1.24.x which ships CUDA.) - * No additional binary download is required. This script just verifies that - * the necessary CUDA 12 system libraries are present, and prints actionable - * install instructions if they are not. - * - * System requirements verified here: - * - NVIDIA GPU with CUDA 12-compatible driver (≥ 525) - * - CUDA 12 Toolkit: libcudart.so.12, libcublas.so.12, libcublasLt.so.12, - * libcurand.so.10, libcufft.so.11 - * - cuDNN 9: libcudnn.so.9 - */ - -import { execSync } from 'child_process'; -import { existsSync } from 'fs'; - -if (process.platform !== 'linux' || process.arch !== 'x64') { - console.warn( - `[embedeer] @jsilvanus/embedeer-ort-linux-x64-cuda: skipping checks on ${process.platform}/${process.arch} (this package is for Linux x64 only)`, - ); - process.exit(0); -} - -console.log('[embedeer] @jsilvanus/embedeer-ort-linux-x64-cuda: checking system CUDA requirements...'); - -const REQUIRED_LIBS = [ - 'libcudart.so.12', - 'libcublas.so.12', - 'libcublasLt.so.12', - 'libcurand.so.10', - 'libcufft.so.11', - 'libcudnn.so.9', -]; - -const CUDA_SEARCH_DIRS = [ - '/usr/local/cuda/lib64', - '/usr/local/cuda-12/lib64', - '/usr/lib/x86_64-linux-gnu', - '/usr/lib64', - ...(process.env.LD_LIBRARY_PATH ?? '').split(':').filter(Boolean), -]; - -function findLib(libName) { - for (const dir of CUDA_SEARCH_DIRS) { - if (existsSync(`${dir}/${libName}`)) return `${dir}/${libName}`; - } - try { - const output = execSync('ldconfig -p', { - stdio: ['ignore', 'pipe', 'ignore'], - encoding: 'utf8', - timeout: 3000, - }); - for (const line of output.split('\n')) { - if (line.includes(libName) && line.includes('=>')) { - const match = line.match(/=>\s*(.+)/); - if (match) return match[1].trim(); - } - } - } catch { /* ldconfig not available */ } - return null; -} - -// Check NVIDIA GPU / driver -const hasGpu = existsSync('/dev/nvidiactl'); -if (!hasGpu) { - console.warn( - '\n[embedeer] WARNING: No NVIDIA GPU detected (/dev/nvidiactl not found).\n' + - ' @jsilvanus/embedeer-ort-linux-x64-cuda requires an NVIDIA GPU with CUDA 12 drivers.\n' + - ' GPU acceleration will not be available until drivers are installed.\n', - ); -} else { - console.log('[embedeer] ✓ NVIDIA GPU detected'); -} - -// Check CUDA libraries -const missing = REQUIRED_LIBS.filter((lib) => findLib(lib) === null); -const found = REQUIRED_LIBS.filter((lib) => findLib(lib) !== null); - -for (const lib of found) { - console.log(`[embedeer] ✓ ${lib}`); -} - -if (missing.length > 0) { - console.warn( - `\n[embedeer] WARNING: Missing CUDA system libraries: ${missing.join(', ')}\n\n` + - ' onnxruntime-node CUDA EP requires CUDA 12 + cuDNN 9.\n\n' + - ' Install on Ubuntu/Debian:\n' + - ' sudo apt install cuda-toolkit-12-6 libcudnn9-cuda-12\n\n' + - ' Or download from NVIDIA:\n' + - ' https://developer.nvidia.com/cuda-downloads\n' + - ' https://developer.nvidia.com/cudnn-downloads\n\n' + - ' After installing, if libraries are not on the default path:\n' + - ' export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH\n', - ); - // Exit 0 so npm install doesn't fail — the user may install CUDA later. - process.exit(0); -} - -console.log( - '\n[embedeer] @jsilvanus/embedeer-ort-linux-x64-cuda: all CUDA requirements satisfied.\n' + - ' GPU acceleration is available. Use device="gpu" or device="auto" in embedeer.\n', -); diff --git a/packages/ort-linux-x64-cuda/package.json b/packages/ort-linux-x64-cuda/package.json deleted file mode 100644 index 8671ada..0000000 --- a/packages/ort-linux-x64-cuda/package.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "name": "@jsilvanus/embedeer-ort-linux-x64-cuda", - "version": "1.0.0", - "description": "CUDA execution provider for embedeer on Linux x64", - "type": "module", - "main": "index.js", - "files": [ - "index.js", - "install.js", - "README.md" - ], - "scripts": { - "install": "node install.js" - }, - "repository": { - "type": "git", - "url": "git+https://github.com/jsilvanus/embedeer.git", - "directory": "packages/ort-linux-x64-cuda" - }, - "keywords": [ - "embedeer", - "onnxruntime", - "cuda", - "gpu", - "linux" - ], - "author": "jsilvanus", - "license": "ISC", - "engines": { - "node": ">=18" - }, - "publishConfig": { - "access": "public" - } -} diff --git a/packages/ort-win32-x64-dml/README.md b/packages/ort-win32-x64-dml/README.md deleted file mode 100644 index bbddd70..0000000 --- a/packages/ort-win32-x64-dml/README.md +++ /dev/null @@ -1,54 +0,0 @@ -# @jsilvanus/embedeer-ort-win32-x64-dml - -DirectML execution provider for [embedeer](https://github.com/jsilvanus/embedeer) on **Windows x64**. - -Install this package alongside `embedeer` to enable GPU-accelerated embeddings using DirectML on Windows. Supports **NVIDIA, AMD, and Intel GPUs** — no CUDA installation required. - -## How it works - -`onnxruntime-node` ships DirectML support bundled on Windows x64 — **no additional binary download is required**. - -DirectML is a Microsoft API built into Windows 10 (1903+) and Windows 11 that accelerates machine learning inference across all DirectX 12-capable GPUs. - -## System Requirements - -| Requirement | Version | -|-------------|---------| -| Windows | 10 (1903+) or Windows 11 | -| GPU | Any DirectX 12-capable GPU (NVIDIA, AMD, Intel — most GPUs from 2016+) | -| GPU Driver | Up-to-date drivers from your GPU vendor | - -No CUDA installation needed. - -## Installation - -```bash -# Step 1 — main package -npm install @jsilvanus/embedeer - -# Step 2 — DirectML provider -npm install @jsilvanus/embedeer-ort-win32-x64-dml -``` - -## Usage - -```js -import { Embedder } from 'embedeer'; - -// Auto-detect GPU (DirectML is tried first on Windows) -const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { device: 'auto' }); - -// Require GPU -const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { device: 'gpu' }); - -// Explicit DirectML -const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { provider: 'dml' }); -``` - -```bash -# CLI — auto GPU (uses DirectML on Windows) -npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --device auto --data "Hello GPU" - -# CLI — explicit DirectML -npx @jsilvanus/embedeer --model Xenova/all-MiniLM-L6-v2 --provider dml --data "Hello DML" -``` diff --git a/packages/ort-win32-x64-dml/index.js b/packages/ort-win32-x64-dml/index.js deleted file mode 100644 index 43483f0..0000000 --- a/packages/ort-win32-x64-dml/index.js +++ /dev/null @@ -1,46 +0,0 @@ -/** - * @jsilvanus/embedeer-ort-win32-x64-dml - * - * DirectML execution provider for embedeer on Windows x64. - * - * How it works: - * onnxruntime-node ships DirectML support on Windows x64 out of the box. - * No additional binary download is required — DirectML is bundled with - * the standard onnxruntime-node package and comes with Windows 10/11. - * - * Hardware: - * Supports NVIDIA, AMD, Intel, and Qualcomm GPUs via Direct3D 12. - * No CUDA installation required. - * - * System requirements: - * - Windows 10 (1903+) or Windows 11 - * - Any DirectX 12-capable GPU (most GPUs from 2016+) - * - Up-to-date GPU drivers (from your GPU vendor) - */ - -/** - * Activate the DirectML execution provider. - * - * DirectML is bundled with onnxruntime-node on Windows and available natively - * on Windows 10/11. No system library installation is required. - * - * @returns {Promise} - * @throws {Error} If not running on Windows. - */ -export async function activate() { - if (process.platform !== 'win32') { - throw new Error( - `@jsilvanus/embedeer-ort-win32-x64-dml: DirectML is only available on Windows (current platform: ${process.platform}).`, - ); - } - // DirectML is natively available via onnxruntime-node on Windows 10/11. - // onnxruntime will load the DirectML EP automatically when device='dml' is requested. -} - -/** - * Returns the device string passed to @huggingface/transformers pipeline(). - * @returns {string} - */ -export function getDevice() { - return 'dml'; -} diff --git a/packages/ort-win32-x64-dml/install.js b/packages/ort-win32-x64-dml/install.js deleted file mode 100644 index 56e6ff0..0000000 --- a/packages/ort-win32-x64-dml/install.js +++ /dev/null @@ -1,26 +0,0 @@ -/** - * Install / post-install check for @jsilvanus/embedeer-ort-win32-x64-dml - * - * onnxruntime-node ships DirectML support bundled on Windows x64. - * No additional binary download is required. - * - * DirectML is part of Windows 10/11 and supports all DirectX 12 GPUs: - * NVIDIA, AMD, Intel, Qualcomm, etc. No CUDA installation needed. - * - * This script just confirms the environment is suitable. - */ - -if (process.platform !== 'win32') { - console.warn( - `[embedeer] @jsilvanus/embedeer-ort-win32-x64-dml: skipping checks on ${process.platform}/${process.arch} (this package is for Windows x64 only)`, - ); - process.exit(0); -} - -console.log( - '[embedeer] @jsilvanus/embedeer-ort-win32-x64-dml: DirectML is bundled with onnxruntime-node on Windows.\n' + - ' No additional binary download is required.\n' + - ' GPU acceleration via DirectML is available on Windows 10 (1903+) / Windows 11\n' + - ' with any DirectX 12-capable GPU.\n' + - ' Make sure your GPU drivers are up to date.\n', -); diff --git a/packages/ort-win32-x64-dml/package.json b/packages/ort-win32-x64-dml/package.json deleted file mode 100644 index 7805d05..0000000 --- a/packages/ort-win32-x64-dml/package.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "name": "@jsilvanus/embedeer-ort-win32-x64-dml", - "version": "1.0.0", - "description": "DirectML execution provider for embedeer on Windows x64", - "type": "module", - "main": "index.js", - "files": [ - "index.js", - "install.js", - "README.md" - ], - "scripts": { - "install": "node install.js" - }, - "repository": { - "type": "git", - "url": "git+https://github.com/jsilvanus/embedeer.git", - "directory": "packages/ort-win32-x64-dml" - }, - "keywords": [ - "embedeer", - "onnxruntime", - "directml", - "dml", - "gpu", - "windows" - ], - "author": "jsilvanus", - "license": "ISC", - "engines": { - "node": ">=18" - }, - "publishConfig": { - "access": "public" - } -} diff --git a/packages/embedeer/src/child-process-worker.js b/src/child-process-worker.js similarity index 100% rename from packages/embedeer/src/child-process-worker.js rename to src/child-process-worker.js diff --git a/packages/embedeer/src/cli.js b/src/cli.js similarity index 100% rename from packages/embedeer/src/cli.js rename to src/cli.js diff --git a/packages/embedeer/src/embedder.js b/src/embedder.js similarity index 100% rename from packages/embedeer/src/embedder.js rename to src/embedder.js diff --git a/packages/embedeer/src/index.js b/src/index.js similarity index 100% rename from packages/embedeer/src/index.js rename to src/index.js diff --git a/packages/embedeer/src/model-cache.js b/src/model-cache.js similarity index 100% rename from packages/embedeer/src/model-cache.js rename to src/model-cache.js diff --git a/src/provider-loader.js b/src/provider-loader.js new file mode 100644 index 0000000..e4c80ba --- /dev/null +++ b/src/provider-loader.js @@ -0,0 +1,249 @@ +/** + * Provider loader — selects and activates an ONNX Runtime execution provider + * before @huggingface/transformers creates its pipeline. + * + * onnxruntime-node (a transitive dependency of @huggingface/transformers@4.x) + * already ships the CUDA provider on Linux x64 and DirectML on Windows x64 with + * no additional packages needed. This module performs the necessary system checks + * (NVIDIA driver, CUDA libraries) and returns the device string to pass to + * pipeline(). + * + * Usage: + * import { resolveProvider } from './provider-loader.js'; + * const deviceStr = await resolveProvider(device, provider); + * // pass deviceStr to pipeline() if truthy + */ + +import { execSync } from 'child_process'; +import { existsSync } from 'fs'; + +// ── CUDA (linux/x64) ───────────────────────────────────────────────────────── + +/** + * Shared libraries required by libonnxruntime_providers_cuda.so (CUDA 12 / cuDNN 9). + * These are system-installed libraries; they are NOT bundled with onnxruntime-node. + */ +const REQUIRED_CUDA_LIBS = [ + 'libcudart.so.12', + 'libcublas.so.12', + 'libcublasLt.so.12', + 'libcurand.so.10', + 'libcufft.so.11', + 'libcudnn.so.9', +]; + +/** + * Common directories where CUDA libraries may be installed. + * Includes entries from LD_LIBRARY_PATH so custom installs are detected. + * @returns {string[]} + */ +function cudaSearchDirs() { + const extra = (process.env.LD_LIBRARY_PATH ?? '').split(':').filter(Boolean); + return [ + '/usr/local/cuda/lib64', + '/usr/local/cuda-12/lib64', + '/usr/local/cuda-12.0/lib64', + '/usr/local/cuda-12.1/lib64', + '/usr/local/cuda-12.2/lib64', + '/usr/local/cuda-12.3/lib64', + '/usr/local/cuda-12.4/lib64', + '/usr/local/cuda-12.5/lib64', + '/usr/local/cuda-12.6/lib64', + '/usr/lib/x86_64-linux-gnu', + '/usr/lib64', + ...extra, + ]; +} + +/** + * Find a shared library by name. Checks common CUDA paths then falls back to + * `ldconfig -p` for libraries registered in the dynamic linker cache. + * + * @param {string} libName e.g. 'libcudart.so.12' + * @returns {string|null} Path to the library, or null if not found. + */ +function findLib(libName) { + for (const dir of cudaSearchDirs()) { + if (existsSync(`${dir}/${libName}`)) return `${dir}/${libName}`; + } + try { + const output = execSync('ldconfig -p', { + stdio: ['ignore', 'pipe', 'ignore'], + encoding: 'utf8', + timeout: 3000, + }); + for (const line of output.split('\n')) { + if (line.includes(libName) && line.includes('=>')) { + const match = line.match(/=>\s*(.+)/); + if (match) return match[1].trim(); + } + } + } catch { + // ldconfig not available in all environments + } + return null; +} + +/** + * Activate the CUDA execution provider. + * Checks for NVIDIA GPU driver and required CUDA 12 / cuDNN 9 system libraries. + * + * @returns {Promise} + * @throws {Error} If NVIDIA GPU is not detected or required CUDA libraries are missing. + */ +async function activateCuda() { + if (!existsSync('/dev/nvidiactl')) { + throw new Error( + 'No NVIDIA GPU detected (/dev/nvidiactl not found).\n' + + 'Ensure NVIDIA drivers are installed. Verify with: nvidia-smi', + ); + } + + const missing = REQUIRED_CUDA_LIBS.filter((lib) => findLib(lib) === null); + if (missing.length > 0) { + throw new Error( + `Missing CUDA system libraries: ${missing.join(', ')}\n\n` + + 'onnxruntime-node CUDA requires CUDA 12 + cuDNN 9. Install them:\n\n' + + ' # Option A — CUDA 12 + cuDNN 9 via apt (Ubuntu/Debian)\n' + + ' sudo apt install cuda-toolkit-12-6 libcudnn9-cuda-12\n\n' + + ' # Option B — CUDA Toolkit installer from NVIDIA\n' + + ' https://developer.nvidia.com/cuda-downloads\n' + + ' https://developer.nvidia.com/cudnn-downloads\n\n' + + ' # After installing, make sure libraries are on LD_LIBRARY_PATH if non-standard:\n' + + ' export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH', + ); + } +} + +// ── DirectML (win32/x64) ───────────────────────────────────────────────────── + +/** + * Activate the DirectML execution provider. + * DirectML is bundled with onnxruntime-node on Windows. Just verifies platform. + * + * @returns {Promise} + * @throws {Error} If not running on Windows. + */ +async function activateDml() { + if (process.platform !== 'win32') { + throw new Error( + `DirectML is only available on Windows (current platform: ${process.platform}).`, + ); + } +} + +// ── Internal provider map ──────────────────────────────────────────────────── + +/** + * Internal map of "--" to inline activation logic. + * Replacing the old external-package-per-provider pattern since onnxruntime-node + * already bundles the CUDA and DirectML providers. + * + * @type {Record Promise, getDevice: () => string }>} + */ +const PROVIDER_IMPLS = { + 'linux-x64-cuda': { activate: activateCuda, getDevice: () => 'cuda' }, + 'win32-x64-dml': { activate: activateDml, getDevice: () => 'dml' }, +}; + +// ── Public API ─────────────────────────────────────────────────────────────── + +/** + * Returns the ordered list of preferred GPU providers for the current platform. + * @returns {string[]} + */ +export function getPlatformDefaultProviders() { + const platform = process.platform; + const arch = process.arch; + if (platform === 'linux' && arch === 'x64') return ['cuda']; + if (platform === 'win32' && arch === 'x64') return ['cuda', 'dml']; + return []; +} + +/** + * Attempt to activate a specific provider. Returns a result object: + * - { loaded: true, deviceStr, error: null } — provider ready + * - { loaded: false, deviceStr: null, error } — provider unavailable + * + * @param {string} provider e.g. 'cuda' or 'dml' + * @returns {Promise<{loaded: boolean, deviceStr: string|null, error: Error|null}>} + */ +export async function tryLoadProvider(provider) { + const key = `${process.platform}-${process.arch}-${provider}`; + const impl = PROVIDER_IMPLS[key]; + if (!impl) { + return { loaded: false, deviceStr: null, error: null }; + } + try { + await impl.activate(); + const deviceStr = impl.getDevice(); + return { loaded: true, deviceStr, error: null }; + } catch (err) { + return { loaded: false, deviceStr: null, error: err }; + } +} + +/** + * Resolve and activate the appropriate execution provider, returning the + * device string to pass to `@huggingface/transformers` pipeline(). + * + * @param {'auto'|'cpu'|'gpu'|undefined} device + * @param {'cpu'|'cuda'|'dml'|undefined} provider Optional explicit override + * @returns {Promise} Device string or undefined (CPU default) + * + * @throws {Error} When an explicit provider is requested but not available. + * @throws {Error} When device='gpu' and no GPU provider is available. + */ +export async function resolveProvider(device, provider) { + const dev = (device ?? 'cpu').toLowerCase(); + const prov = provider ? provider.toLowerCase() : undefined; + + // --- Explicit CPU --- + if (dev === 'cpu' && !prov) return undefined; + if (prov === 'cpu') return undefined; + + // --- Explicit provider --- + if (prov && prov !== 'cpu') { + const key = `${process.platform}-${process.arch}-${prov}`; + if (!PROVIDER_IMPLS[key]) { + const supportedPlatforms = Object.keys(PROVIDER_IMPLS) + .filter((k) => k.endsWith(`-${prov}`)) + .map((k) => k.replace(`-${prov}`, '')); + throw new Error( + `Provider '${prov}' is not supported on ${process.platform}/${process.arch}. ` + + `Supported platforms: ${supportedPlatforms.join(', ') || 'none'}.`, + ); + } + + const { loaded, deviceStr, error } = await tryLoadProvider(prov); + if (!loaded) { + if (error) throw error; + throw new Error( + `Provider '${prov}' is not available on ${process.platform}/${process.arch}.`, + ); + } + return deviceStr ?? undefined; + } + + // --- device='gpu' or device='auto': try platform defaults in order --- + const candidates = getPlatformDefaultProviders(); + let lastError = null; + + for (const candidate of candidates) { + const { loaded, deviceStr, error } = await tryLoadProvider(candidate); + if (loaded) return deviceStr ?? candidate; + if (error) lastError = error; + } + + if (dev === 'gpu') { + if (lastError) throw lastError; + throw new Error( + `device='gpu' was requested but no GPU provider is available ` + + `for ${process.platform}/${process.arch}. ` + + `Supported: linux/x64 (CUDA 12 + cuDNN 9), win32/x64 (DirectML).`, + ); + } + + // device='auto' and no GPU available → silently fall back to CPU + return undefined; +} diff --git a/packages/embedeer/src/thread-worker-script.js b/src/thread-worker-script.js similarity index 100% rename from packages/embedeer/src/thread-worker-script.js rename to src/thread-worker-script.js diff --git a/packages/embedeer/src/thread-worker.js b/src/thread-worker.js similarity index 100% rename from packages/embedeer/src/thread-worker.js rename to src/thread-worker.js diff --git a/packages/embedeer/src/worker-pool.js b/src/worker-pool.js similarity index 100% rename from packages/embedeer/src/worker-pool.js rename to src/worker-pool.js diff --git a/packages/embedeer/src/worker.js b/src/worker.js similarity index 100% rename from packages/embedeer/src/worker.js rename to src/worker.js diff --git a/packages/embedeer/test/child-process-worker.test.js b/test/child-process-worker.test.js similarity index 100% rename from packages/embedeer/test/child-process-worker.test.js rename to test/child-process-worker.test.js diff --git a/packages/embedeer/test/cli-format.test.js b/test/cli-format.test.js similarity index 100% rename from packages/embedeer/test/cli-format.test.js rename to test/cli-format.test.js diff --git a/packages/embedeer/test/embedder-options.test.js b/test/embedder-options.test.js similarity index 100% rename from packages/embedeer/test/embedder-options.test.js rename to test/embedder-options.test.js diff --git a/packages/embedeer/test/embedder.test.js b/test/embedder.test.js similarity index 100% rename from packages/embedeer/test/embedder.test.js rename to test/embedder.test.js diff --git a/packages/embedeer/test/helpers/crash-worker.js b/test/helpers/crash-worker.js similarity index 100% rename from packages/embedeer/test/helpers/crash-worker.js rename to test/helpers/crash-worker.js diff --git a/packages/embedeer/test/helpers/echo-thread-worker.js b/test/helpers/echo-thread-worker.js similarity index 100% rename from packages/embedeer/test/helpers/echo-thread-worker.js rename to test/helpers/echo-thread-worker.js diff --git a/packages/embedeer/test/helpers/echo-worker.js b/test/helpers/echo-worker.js similarity index 100% rename from packages/embedeer/test/helpers/echo-worker.js rename to test/helpers/echo-worker.js diff --git a/packages/embedeer/test/provider-loader.test.js b/test/provider-loader.test.js similarity index 72% rename from packages/embedeer/test/provider-loader.test.js rename to test/provider-loader.test.js index 5e92829..589085f 100644 --- a/packages/embedeer/test/provider-loader.test.js +++ b/test/provider-loader.test.js @@ -1,17 +1,16 @@ /** * Unit tests for provider-loader.js * - * Tests verify provider selection logic and error messages when provider - * packages are missing or unsupported. + * Tests verify provider selection logic and error messages when a GPU provider + * is unavailable or unsupported on the current platform. * - * All tests use module mocking to avoid any real network or native binary - * access — the provider-loader is tested purely for its logic. + * All tests use process.platform/arch overrides to isolate platform logic + * without requiring real GPU hardware. */ -import { test, describe, mock, before, after } from 'node:test'; +import { test, describe } from 'node:test'; import assert from 'node:assert/strict'; import { - PROVIDER_PACKAGES, getPlatformDefaultProviders, tryLoadProvider, resolveProvider, @@ -35,15 +34,6 @@ async function withPlatform(platform, arch, fn) { } } -// ── PROVIDER_PACKAGES map ──────────────────────────────────────────────────── - -describe('PROVIDER_PACKAGES', () => { - test('contains entries for all supported platform+provider combinations', () => { - assert.equal(PROVIDER_PACKAGES['linux-x64-cuda'], '@jsilvanus/embedeer-ort-linux-x64-cuda'); - assert.equal(PROVIDER_PACKAGES['win32-x64-dml'], '@jsilvanus/embedeer-ort-win32-x64-dml'); - }); -}); - // ── getPlatformDefaultProviders() ──────────────────────────────────────────── describe('getPlatformDefaultProviders()', () => { @@ -77,15 +67,15 @@ describe('tryLoadProvider()', () => { }); }); - test('returns { loaded: false } when provider package is not installed or binary is missing', async () => { - // In the workspace, @jsilvanus/embedeer-ort-linux-x64-cuda is linked but the native - // binary does not exist (install.js was not run), so activate() throws. - // tryLoadProvider must return { loaded: false } in either case. + test('returns { loaded: false } when GPU hardware or system libs are missing', async () => { + // In a typical CI environment there is no NVIDIA GPU, so activateCuda() + // throws when /dev/nvidiactl is missing. tryLoadProvider must catch it + // and return { loaded: false }. await withPlatform('linux', 'x64', async () => { const result = await tryLoadProvider('cuda'); assert.equal(result.loaded, false); assert.equal(result.deviceStr, null); - // error may be set (binary not found) or null (package not installed) + // error may be set (GPU not found) or null (provider not implemented) }); }); }); @@ -110,12 +100,11 @@ describe('resolveProvider()', () => { assert.equal(result, undefined); }); - // ── device=auto with no packages installed ──────────────────────────────── + // ── device=auto with no GPU available ──────────────────────────────────── test('device=auto returns undefined (CPU fallback) when GPU provider fails to activate', async () => { await withPlatform('linux', 'x64', async () => { - // @jsilvanus/embedeer-ort-linux-x64-cuda is linked in the workspace but binary is - // missing. device='auto' must silently fall back to CPU (return undefined). + // No NVIDIA GPU in CI; device='auto' must silently fall back to CPU. const result = await resolveProvider('auto', undefined); assert.equal(result, undefined); }); @@ -128,19 +117,20 @@ describe('resolveProvider()', () => { }); }); - // ── device=gpu with no packages installed ───────────────────────────────── + // ── device=gpu with no GPU available ───────────────────────────────────── - test('device=gpu throws when no GPU provider is available (linux/x64)', async () => { + test('device=gpu throws with GPU-related error when no GPU available (linux/x64)', async () => { await withPlatform('linux', 'x64', async () => { - // In the workspace, ort-linux-x64-cuda is linked but binary is missing. - // resolveProvider should throw (either the activate error or a "not installed" error). - // The error must reference the @jsilvanus package name to guide the user. + // No NVIDIA GPU in CI; resolveProvider should throw with a diagnostic + // message about the GPU or CUDA requirements. await assert.rejects( () => resolveProvider('gpu', undefined), (err) => { assert.ok( - err.message.includes('@jsilvanus/embedeer-ort-linux-x64-cuda'), - `Expected package name in error, got: ${err.message}`, + err.message.toLowerCase().includes('nvidia') || + err.message.toLowerCase().includes('cuda') || + err.message.toLowerCase().includes('gpu'), + `Expected GPU-related context in error, got: ${err.message}`, ); return true; }, @@ -154,7 +144,7 @@ describe('resolveProvider()', () => { () => resolveProvider('gpu', undefined), (err) => { assert.ok( - err.message.includes('device=\'gpu\'') || err.message.includes("device='gpu'"), + err.message.includes("device='gpu'"), `Expected GPU error message, got: ${err.message}`, ); return true; @@ -163,21 +153,15 @@ describe('resolveProvider()', () => { }); }); - // ── explicit provider not installed ────────────────────────────────────── + // ── explicit provider not available ────────────────────────────────────── - test('explicit provider=cuda re-throws activate error when CUDA libraries are missing', async () => { + test('explicit provider=cuda throws with diagnostic error when GPU hardware is missing', async () => { await withPlatform('linux', 'x64', async () => { - // In this environment @jsilvanus/embedeer-ort-linux-x64-cuda is installed (workspace link) - // but there is no NVIDIA GPU. activate() throws the GPU-not-found error which - // is re-thrown by resolveProvider so the user gets a clear diagnostic. + // No NVIDIA GPU in CI; activate() throws a diagnostic error about the + // missing hardware or CUDA libraries. resolveProvider re-throws it. await assert.rejects( () => resolveProvider('cpu', 'cuda'), (err) => { - assert.ok( - err.message.includes('@jsilvanus/embedeer-ort-linux-x64-cuda'), - `Expected package name in error, got: ${err.message}`, - ); - // The error is the activate() diagnostic, not a generic "not installed" msg assert.ok( err.message.toLowerCase().includes('nvidia') || err.message.toLowerCase().includes('cuda') || @@ -190,10 +174,9 @@ describe('resolveProvider()', () => { }); }); - test('explicit provider=dml succeeds on win32 when package is installed', async () => { + test('explicit provider=dml succeeds on win32 when platform is Windows', async () => { await withPlatform('win32', 'x64', async () => { - // DML is bundled with onnxruntime-node on Windows; activate() just checks - // the platform (mocked to win32 here) and succeeds. + // activateDml() checks process.platform === 'win32' (mocked here) and succeeds. const result = await resolveProvider('cpu', 'dml'); assert.equal(result, 'dml'); }); diff --git a/packages/embedeer/test/thread-worker.test.js b/test/thread-worker.test.js similarity index 100% rename from packages/embedeer/test/thread-worker.test.js rename to test/thread-worker.test.js diff --git a/packages/embedeer/test/worker-pool-options.test.js b/test/worker-pool-options.test.js similarity index 100% rename from packages/embedeer/test/worker-pool-options.test.js rename to test/worker-pool-options.test.js diff --git a/packages/embedeer/test/worker-pool.test.js b/test/worker-pool.test.js similarity index 100% rename from packages/embedeer/test/worker-pool.test.js rename to test/worker-pool.test.js