From b62ce9f505b912c9f522a24fc8a4facfec3bc22c Mon Sep 17 00:00:00 2001 From: Kevin Jahns Date: Wed, 13 May 2026 15:58:58 +0200 Subject: [PATCH] yn: experiment with merging updates using YN not all tests pass, there are differences of how yjs merges updates vs y-crdt [27;5;13~ --- README.md | 61 +++++++++++++++++++++++++++++++++++++++++++ package-lock.json | 12 ++++++--- package.json | 4 ++- scripts/build-yn.sh | 56 +++++++++++++++++++++++++++++++++++++++ src/compute-worker.js | 6 ++++- src/compute.js | 6 ++++- src/server.js | 6 ++++- src/yn.js | 16 ++++++++++++ 8 files changed, 160 insertions(+), 7 deletions(-) create mode 100755 scripts/build-yn.sh create mode 100644 src/yn.js diff --git a/README.md b/README.md index a9b5908..c1b4ec4 100644 --- a/README.md +++ b/README.md @@ -368,6 +368,67 @@ I'm looking for sponsors that want to sponsor the following work: If you are interested in sponsoring some of this work, please send a mail to . +## Experimental: native merge via yrs (y-crdt/yn) + +> :warning: **Highly experimental.** Off by default. Do not enable in production. + +y/hub can optionally use [y-crdt/yn](https://github.com/y-crdt/yn) — a thin +Node.js binding (via [neon](https://neon-rs.dev/)) over [yrs](https://github.com/y-crdt/y-crdt), +the Rust port of Yjs — to perform `mergeUpdates` natively instead of in +JavaScript. This is intended for benchmarking the merge hot path; everything +else (sync protocol, attribution metadata, delta/changeset computation, +awareness, snapshots, undo) continues to run on `@y/y`. + +**Scope.** Only the three `Y.mergeUpdates` call sites are affected: + +- the inline fast path on the main thread (`src/compute.js`) +- the worker-thread merge task (`src/compute-worker.js`) +- the WebSocket sync fan-out (`src/server.js`) + +When the flag is off, behavior is unchanged — the `yn` module is not even +loaded. + +**Caveats.** + +- Upstream y-crdt/yn has no npm release, no prebuilt binaries, and exposes a + single function (`applyUpdates(gc, updates)`). v2 update encoding is not + supported. +- Protocol compatibility between yrs and `@y/y` 14's attribution-laden updates + is **not verified**. Updates may round-trip incorrectly. Test against your + workload before drawing any conclusions. +- The native binary must be rebuilt after every `npm install` (npm wipes + `node_modules/yn/` and upstream has no `prepare` script). + +### Build the native binding + +Requires [Rust](https://rustup.rs/) ≥ 1.85 (edition 2024) and `git`: + +```bash +npm run build:yn +``` + +This clones `y-crdt/yn`, runs `cargo build --release` + `neon dist`, and +installs the resulting `index.node` into `node_modules/yn/`. Override the +upstream ref with `YN_REF= npm run build:yn`. + +### Run with native merge enabled + +After the standard setup (see the **Integration Guide** above), set +`USE_Y_CRDT=1` in your environment (or pass `--use-y-crdt` on the CLI): + +```bash +# one-off +USE_Y_CRDT=1 node --env-file .env ./bin/yhub.js + +# or in your .env (or .env.testing) +echo 'USE_Y_CRDT=1' >> .env +npm run start:server +``` + +The flag is read via `lib0/environment.hasConf`, so both `USE_Y_CRDT=…` and +`--use-y-crdt` work. Server and worker each evaluate the flag independently; +set it for both processes if you want native merges everywhere. + # Quick Start (standalone Docker) The fastest way to try y/hub. A single container runs PostgreSQL, Valkey diff --git a/package-lock.json b/package-lock.json index 7bcae59..80999d5 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@y/hub", - "version": "0.2.16", + "version": "0.2.18", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@y/hub", - "version": "0.2.16", + "version": "0.2.18", "license": "AGPL-3.0 OR PROPRIETARY", "dependencies": { "@y/protocols": "^1.0.6-rc.1", @@ -16,7 +16,8 @@ "pino": "^10.3.1", "postgres": "^3.4.3", "redis": "^5.10.0", - "uws": "github:uNetworking/uWebSockets.js#v20.57.0" + "uws": "github:uNetworking/uWebSockets.js#v20.57.0", + "yn": "github:y-crdt/yn" }, "bin": { "yhub": "bin/yhub.js" @@ -5567,6 +5568,11 @@ "url": "https://github.com/sponsors/dmonad" } }, + "node_modules/yn": { + "version": "0.1.0", + "resolved": "git+ssh://git@github.com/y-crdt/yn.git#39671535ab2d2b54bc2b7e2a1cfd61053d548e53", + "license": "ISC" + }, "node_modules/yocto-queue": { "version": "0.1.0", "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz", diff --git a/package.json b/package.json index c06a280..f36ae73 100644 --- a/package.json +++ b/package.json @@ -18,6 +18,7 @@ "dist": "npm run types", "types": "tsc --skipLibCheck", "lint": "standard && tsc --skipLibCheck", + "build:yn": "scripts/build-yn.sh", "test": "sh -c 'FORCE_COLOR=true node --expose-gc --max-old-space-size=8192 --env-file .env $(test -f .env.testing && echo --env-file .env.testing) tests/index.js \"$@\" | pino-pretty -S -L debug' --", "debug:test": "sh -c 'FORCE_COLOR=true node --expose-gc --max-old-space-size=8192 --env-file .env $(test -f .env.testing && echo --env-file .env.testing) --inspect-brk tests/index.js \"$@\" | pino-pretty -S' --", "preversion": "npm run lint && npm run dist", @@ -71,7 +72,8 @@ "pino": "^10.3.1", "postgres": "^3.4.3", "redis": "^5.10.0", - "uws": "github:uNetworking/uWebSockets.js#v20.57.0" + "uws": "github:uNetworking/uWebSockets.js#v20.57.0", + "yn": "github:y-crdt/yn" }, "engines": { "npm": ">=8.0.0", diff --git a/scripts/build-yn.sh b/scripts/build-yn.sh new file mode 100755 index 0000000..13e0ad2 --- /dev/null +++ b/scripts/build-yn.sh @@ -0,0 +1,56 @@ +#!/bin/sh +# Builds the y-crdt/yn native binding from source and installs the compiled +# index.node into node_modules/yn/. Re-run after every `npm install` — npm +# wipes node_modules/yn, and the upstream repo has no prepare script or +# prebuilt binaries. +# +# Requires: cargo (rustup.rs), git, npm. +# Override the ref with YN_REF=. + +set -eu + +YN_REPO="https://github.com/y-crdt/yn.git" +YN_REF="${YN_REF:-main}" +YHUB_DIR="$(cd "$(dirname "$0")/.." && pwd)" +YN_DIR="$YHUB_DIR/node_modules/yn" +BUILD_DIR="$(mktemp -d)" +trap 'rm -rf "$BUILD_DIR"' EXIT + +command -v cargo >/dev/null 2>&1 || { + echo "error: cargo not found. Install Rust via https://rustup.rs" >&2 + exit 1 +} + +# yn / yrs 0.25 require Rust edition 2024 (stable since 1.85). +RUSTC_MIN_MAJOR=1 +RUSTC_MIN_MINOR=85 +RUSTC_VER="$(rustc --version | awk '{print $2}')" +RUSTC_MAJOR="$(echo "$RUSTC_VER" | cut -d. -f1)" +RUSTC_MINOR="$(echo "$RUSTC_VER" | cut -d. -f2)" +if [ "$RUSTC_MAJOR" -lt "$RUSTC_MIN_MAJOR" ] || + { [ "$RUSTC_MAJOR" -eq "$RUSTC_MIN_MAJOR" ] && [ "$RUSTC_MINOR" -lt "$RUSTC_MIN_MINOR" ]; }; then + echo "error: rustc $RUSTC_VER is too old. yn requires >= ${RUSTC_MIN_MAJOR}.${RUSTC_MIN_MINOR} (edition 2024)." >&2 + echo " Run: rustup update stable" >&2 + exit 1 +fi + +echo "==> cloning $YN_REPO ($YN_REF) into $BUILD_DIR" +git clone --depth 1 --branch "$YN_REF" "$YN_REPO" "$BUILD_DIR" + +echo "==> installing yn build dependencies" +(cd "$BUILD_DIR" && npm install) + +echo "==> building yn (release)" +(cd "$BUILD_DIR" && npm run build) + +[ -f "$BUILD_DIR/index.node" ] || { + echo "error: build did not produce index.node" >&2 + exit 1 +} + +echo "==> installing into $YN_DIR" +mkdir -p "$YN_DIR" +cp "$BUILD_DIR/index.node" "$YN_DIR/index.node" +cp "$BUILD_DIR/package.json" "$YN_DIR/package.json" + +echo "==> done. yn ready at $YN_DIR/index.node" diff --git a/src/compute-worker.js b/src/compute-worker.js index a376300..88ec9af 100644 --- a/src/compute-worker.js +++ b/src/compute-worker.js @@ -1,11 +1,15 @@ import { parentPort } from 'node:worker_threads' import * as Y from '@y/y' +import * as env from 'lib0/environment' import * as time from 'lib0/time' import * as encoding from 'lib0/encoding' +import { ynMergeUpdates } from './yn.js' import { logger } from './logger.js' const log = logger.child({ module: 'compute-worker' }) +const mergeUpdates = env.hasConf('use-y-crdt') ? ynMergeUpdates : Y.mergeUpdates + if (parentPort == null) { throw new Error('Unable to run node worker!') } @@ -80,7 +84,7 @@ port.on('message', /** @param {import('./compute.js').ComputeTask} msg */ msg => break } case 'mergeUpdates': { - const result = Y.mergeUpdates(msg.updates) + const result = mergeUpdates(msg.updates) port.postMessage(result, [result.buffer]) break } diff --git a/src/compute.js b/src/compute.js index 260de0c..9b1e4b2 100644 --- a/src/compute.js +++ b/src/compute.js @@ -4,11 +4,15 @@ import * as time from 'lib0/time' import * as s from 'lib0/schema' import * as promise from 'lib0/promise' import * as Y from '@y/y' +import * as env from 'lib0/environment' import * as math from 'lib0/math' +import { ynMergeUpdates } from './yn.js' import { logger } from './logger.js' const log = logger.child({ module: 'compute' }) +const mergeUpdates = env.hasConf('use-y-crdt') ? ynMergeUpdates : Y.mergeUpdates + const workerUrl = new URL('./compute-worker.js', import.meta.url) const $computeTask = s.$union( @@ -268,7 +272,7 @@ class ComputePool { totalSize += updates[i].byteLength } if (totalSize <= 5120 || updates.length <= 1) { - return promise.resolveWith(Y.mergeUpdates(updates)) + return promise.resolveWith(mergeUpdates(updates)) } return this.run({ type: 'mergeUpdates', updates }, [], logContext) } diff --git a/src/server.js b/src/server.js index 8132ed9..161a94d 100644 --- a/src/server.js +++ b/src/server.js @@ -4,6 +4,7 @@ import * as encoding from 'lib0/encoding' import * as decoding from 'lib0/decoding' import * as promise from 'lib0/promise' import * as Y from '@y/y' +import * as env from 'lib0/environment' import * as s from 'lib0/schema' import * as time from 'lib0/time' import * as number from 'lib0/number' @@ -11,10 +12,13 @@ import * as t from './types.js' import * as protocol from './protocol.js' import * as math from 'lib0/math' import * as buffer from 'lib0/buffer' +import { ynMergeUpdates } from './yn.js' import { logger } from './logger.js' const log = logger.child({ module: 'ws' }) +const mergeUpdates = env.hasConf('use-y-crdt') ? ynMergeUpdates : Y.mergeUpdates + /** * @param {Y.ContentIds} contentids * @param {string} userid @@ -537,7 +541,7 @@ class WSUser { }) // @todo send this as a single update message if (ydocUpdates.length > 0) { - this.sendData(protocol.encodeSyncUpdate(Y.mergeUpdates(ydocUpdates))) + this.sendData(protocol.encodeSyncUpdate(mergeUpdates(ydocUpdates))) } if (awarenessUpdates.length > 0) { this.sendData(protocol.mergeAwarenessUpdates(awarenessUpdates)) diff --git a/src/yn.js b/src/yn.js new file mode 100644 index 0000000..285a044 --- /dev/null +++ b/src/yn.js @@ -0,0 +1,16 @@ +import { createRequire } from 'node:module' +import * as env from 'lib0/environment' + +// yn ships only a compiled `index.node` with no JS wrapper; Node's ESM loader +// can't import `.node` files directly, so route through CJS createRequire. +const require = createRequire(import.meta.url) +const YN = env.hasConf('use-y-crdt') ? require('yn') : /** @type {any} */ (null) + +/** + * Merge updates with the yrs (Rust) binding. Creates a yrs Doc, applies all + * updates in a single transaction, and returns the v1-encoded merged state. + * + * @param {Array>} updates + * @returns {Uint8Array} + */ +export const ynMergeUpdates = (updates) => YN.applyUpdates(false, updates)