Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 72 additions & 1 deletion src/opentype/shapers/DefaultShaper.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import {isDigit} from 'unicode-properties';
import {isDigit, isMark} from 'unicode-properties';
import GlyphInfo from '../GlyphInfo';

const VARIATION_FEATURES = ['rvrn'];
const COMMON_FEATURES = ['ccmp', 'locl', 'rlig', 'mark', 'mkmk'];
Expand Down Expand Up @@ -42,6 +43,13 @@ export default class DefaultShaper {
}

static assignFeatures(plan, glyphs) {
// Apply Unicode canonical composition (NFC) before GSUB, matching what
// HarfBuzz does for non-complex scripts: when the font has a precomposed
// glyph for a base + combining-mark sequence, use it. Decomposed input
// (e.g. "i" + U+0300) otherwise shapes as separate glyphs ([i, gravecomb])
// instead of the precomposed glyph the font intends ([igrave]).
composeGlyphs(plan.font, glyphs);

// Enable contextual fractions
for (let i = 0; i < glyphs.length; i++) {
let glyph = glyphs[i];
Expand Down Expand Up @@ -70,3 +78,66 @@ export default class DefaultShaper {
}
}
}

// Apply Unicode canonical composition (NFC) to each base + combining-mark
// cluster, mirroring HarfBuzz's normalization before GSUB/GPOS: the marks are
// reordered by combining class and composed onto the base when the font has a
// precomposed glyph for the result. Without this, decomposed input shapes as
// separate glyphs (e.g. "i" + U+0300 -> [i, gravecomb], or Arabic alef +
// fathatan + hamza-above -> [alef, fathatan, hamza]) instead of the precomposed
// glyph ([igrave]; [alef-with-hamza, fathatan]) HarfBuzz and browsers produce.
function composeGlyphs(font, glyphs) {
let i = 0;
while (i < glyphs.length) {
// A cluster starts at a non-mark base glyph...
if (glyphs[i].codePoints.length !== 1 || isMark(glyphs[i].codePoints[0])) {
i++;
continue;
}
// ...and extends across the combining marks that follow it.
let end = i + 1;
while (
end < glyphs.length &&
glyphs[end].codePoints.length === 1 &&
isMark(glyphs[end].codePoints[0])
) {
end++;
}
let input = [];
for (let j = i; j < end; j++) input.push(glyphs[j].codePoints[0]);
let composed = composeCodePoints(font, input);
// Only rebuild when composition (or its decompose fallback) actually changed
// the glyph count. NFC also canonically reorders marks, but applying a pure
// reorder isn't needed to reach a precomposed glyph and would disturb the
// order downstream GSUB expects (e.g. Arabic shadda + vowel calt lookups).
if (composed.length !== input.length) {
// The base's features are global at this stage, so the rebuilt cluster
// (precomposed base + any leftover marks) inherits them uniformly.
let features = glyphs[i].features;
let replacement = composed.map(
cp => new GlyphInfo(font, font.glyphForCodePoint(cp).id, [cp], features)
);
glyphs.splice(i, end - i, ...replacement);
i += replacement.length;
} else {
i = end;
}
}
}

// Font-aware Unicode canonical composition for one cluster's codepoints: NFC
// reorders the combining marks by combining class and composes them, then any
// resulting codepoint the font can't render is decomposed again (NFD) so its
// marks stay separate for GPOS mark positioning — exactly HarfBuzz's behaviour.
function composeCodePoints(font, codePoints) {
let result = [];
for (let char of String.fromCodePoint(...codePoints).normalize('NFC')) {
let cp = char.codePointAt(0);
if (font.hasGlyphForCodePoint(cp)) {
result.push(cp);
} else {
for (let part of char.normalize('NFD')) result.push(part.codePointAt(0));
}
}
return result;
}
50 changes: 50 additions & 0 deletions test/shaping.js
Original file line number Diff line number Diff line change
Expand Up @@ -582,4 +582,54 @@ describe('shaping', function () {
test('SHBALI-2/12', 'NotoSans/NotoSansBalinese-Regular.ttf', "ᬓ᭄ᭅᬸ", '23+2275|162+0|60@0,-1000+0');
});
});

describe('canonical composition (NFC)', function () {
// HarfBuzz composes a base + combining-mark sequence into the font's
// precomposed glyph before GSUB/GPOS when the font has one. The default
// shaper does the same: decomposed input must shape identically to the
// precomposed character, not as a separate base + floating mark.
let font = fontkit.openSync(new URL('data/FiraSans/FiraSans-Regular.ttf', import.meta.url));
let shape = (...cps) => font.layout(String.fromCodePoint(...cps)).glyphs.map(g => g.id);

it('composes base + combining mark into the precomposed glyph', function () {
// "i" + U+0300 (combining grave) === precomposed U+00EC "ì" (igrave)
assert.deepEqual(shape(0x69, 0x300), shape(0x00EC));
});

it('composes a multi-mark sequence greedily', function () {
// "e" + U+0302 (circumflex) + U+0301 (acute) === precomposed U+1EBF "ế"
assert.deepEqual(shape(0x65, 0x302, 0x301), shape(0x1EBF));
});

it('leaves a sequence decomposed when the font has no precomposed glyph', function () {
// No precomposed "b-grave" exists, so the mark must stay separate.
assert.equal(shape(0x62, 0x300).length, 2);
});

it('does not interfere with GSUB ligatures', function () {
// "office": the fi ligature must still form (composition is mark-only).
assert.ok(shape(0x6F, 0x66, 0x66, 0x69, 0x63, 0x65).length < 6);
});

it('reorders and composes Arabic marks across combining classes', function () {
let amiri = fontkit.openSync(new URL('data/amiri/amiri-regular.ttf', import.meta.url));
let shapeAr = (...cps) => amiri.layout(String.fromCodePoint(...cps)).glyphs.map(g => g.id);
// alef + combining hamza-above === precomposed U+0623 (alef with hamza).
assert.deepEqual(shapeAr(0x627, 0x654), shapeAr(0x623));
// alef + fathatan + hamza-above: the hamza (ccc 230) composes onto the
// alef across the lower-class fathatan (ccc 27) per canonical reordering,
// leaving the fathatan — i.e. identical to precomposed U+0623 + fathatan.
assert.deepEqual(shapeAr(0x627, 0x64b, 0x654), shapeAr(0x623, 0x64b));
});

it('leaves a non-composing mark cluster unreordered for GSUB', function () {
// alef + shadda + fathatan don't compose, so the cluster must stay in its
// original order: canonically reordering shadda (ccc 33) after fathatan
// (ccc 27) would break Amiri's calt, which keys on shadda-before-vowel and
// yields fathatan's small variant. Applying a pure NFC reorder regresses it.
let amiri = fontkit.openSync(new URL('data/amiri/amiri-regular.ttf', import.meta.url));
let { glyphs } = amiri.layout(String.fromCodePoint(0x627, 0x651, 0x64b), { calt: true }, undefined, 'ARA ');
assert.deepEqual(glyphs.map(g => g.name), ['uni064B.small', 'uni0651', 'uni0627']);
});
});
});