Skip to content

Commit fe22efd

Browse files
committed
Add source span mapping to rich layout APIs
1 parent 89cd7f6 commit fe22efd

4 files changed

Lines changed: 276 additions & 25 deletions

File tree

README.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ const { lines } = layoutWithLines(prepared, 320, 26) // 320px max width, 26px li
6363
for (let i = 0; i < lines.length; i++) ctx.fillText(lines[i].text, 0, i * 26)
6464
```
6565

66+
Each rich line also carries `sourceOffset` / `sourceLength`, so callers can map a rendered line back to the original input without reconstructing whitespace normalization themselves.
67+
6668
- `walkLineRanges()` gives you line widths and cursors without building the text strings:
6769

6870
```ts
@@ -88,6 +90,16 @@ while (true) {
8890
}
8991
```
9092

93+
- `cursorToSourceOffset()` / `cursorRangeToSourceSpan()` convert rich cursors back into original-source offsets:
94+
95+
```ts
96+
const line = layoutNextLine(prepared, cursor, width)
97+
if (line) {
98+
const start = cursorToSourceOffset(prepared, line.start)
99+
const { sourceOffset, sourceLength } = cursorRangeToSourceSpan(prepared, line.start, line.end)
100+
}
101+
```
102+
91103
This usage allows rendering to canvas, SVG, WebGL and (eventually) server-side.
92104

93105
### API Glossary
@@ -109,11 +121,15 @@ type LayoutLine = {
109121
width: number // Measured width of this line, e.g. 87.5
110122
start: LayoutCursor // Inclusive start cursor in prepared segments/graphemes
111123
end: LayoutCursor // Exclusive end cursor in prepared segments/graphemes
124+
sourceOffset: number // Source offset in the original input covered by this line
125+
sourceLength: number // Source span length in the original input covered by this line
112126
}
113127
type LayoutLineRange = {
114128
width: number // Measured width of this line, e.g. 87.5
115129
start: LayoutCursor // Inclusive start cursor in prepared segments/graphemes
116130
end: LayoutCursor // Exclusive end cursor in prepared segments/graphemes
131+
sourceOffset: number // Source offset in the original input covered by this line
132+
sourceLength: number // Source span length in the original input covered by this line
117133
}
118134
type LayoutCursor = {
119135
segmentIndex: number // Segment index in prepareWithSegments' prepared rich segment stream
@@ -125,6 +141,8 @@ Other helpers:
125141
```ts
126142
clearCache(): void // clears Pretext's shared internal caches used by prepare() and prepareWithSegments(). Useful if your app cycles through many different fonts or text variants and you want to release the accumulated cache
127143
setLocale(locale?: string): void // optional (by default we use the current locale). Sets locale for future prepare() and prepareWithSegments(). Internally, it also calls clearCache(). Setting a new locale doesn't affect existing prepare() and prepareWithSegments() states (no mutations to them)
144+
cursorToSourceOffset(prepared: PreparedTextWithSegments, cursor: LayoutCursor): number // converts a rich cursor back into an offset in the original input text
145+
cursorRangeToSourceSpan(prepared: PreparedTextWithSegments, start: LayoutCursor, end: LayoutCursor): { sourceOffset: number, sourceLength: number } // converts a rich cursor range back into the original input span
128146
```
129147
130148
## Caveats

src/analysis.ts

Lines changed: 91 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,17 @@ export type AnalysisChunk = {
3131
consumedEndSegmentIndex: number
3232
}
3333

34-
export type TextAnalysis = { normalized: string, chunks: AnalysisChunk[] } & MergedSegmentation
34+
export type TextAnalysis = { normalized: string, chunks: AnalysisChunk[], sourceBoundaries?: number[] } & MergedSegmentation
35+
36+
type NormalizedTextWithSourceBoundaries = {
37+
text: string
38+
sourceBoundaries: number[]
39+
}
3540

3641
export type AnalysisProfile = {
3742
carryCJKAfterClosingQuote: boolean
3843
}
3944

40-
const collapsibleWhitespaceRunRe = /[ \t\n\r\f]+/g
4145
const needsWhitespaceNormalizationRe = /[\t\n\r\f]| {2,}|^ | $/
4246

4347
type WhiteSpaceProfile = {
@@ -54,23 +58,89 @@ function getWhiteSpaceProfile(whiteSpace?: WhiteSpaceMode): WhiteSpaceProfile {
5458
}
5559

5660
export function normalizeWhitespaceNormal(text: string): string {
57-
if (!needsWhitespaceNormalizationRe.test(text)) return text
61+
return normalizeWhitespaceNormalWithSourceBoundaries(text).text
62+
}
63+
64+
function normalizeWhitespaceNormalWithSourceBoundaries(text: string): NormalizedTextWithSourceBoundaries {
65+
if (!needsWhitespaceNormalizationRe.test(text)) {
66+
const sourceBoundaries = new Array<number>(text.length + 1)
67+
for (let i = 0; i <= text.length; i++) sourceBoundaries[i] = i
68+
return { text, sourceBoundaries }
69+
}
70+
71+
const pieces: string[] = []
72+
const sourceBoundaries: number[] = []
73+
let i = 0
74+
75+
while (i < text.length) {
76+
const ch = text[i]!
77+
const isWhitespace = ch === ' ' || ch === '\t' || ch === '\n' || ch === '\r' || ch === '\f'
78+
if (!isWhitespace) break
79+
i++
80+
}
81+
82+
sourceBoundaries.push(i)
83+
84+
while (i < text.length) {
85+
const ch = text[i]!
86+
const isWhitespace = ch === ' ' || ch === '\t' || ch === '\n' || ch === '\r' || ch === '\f'
87+
if (isWhitespace) {
88+
while (i < text.length) {
89+
const next = text[i]!
90+
if (next !== ' ' && next !== '\t' && next !== '\n' && next !== '\r' && next !== '\f') break
91+
i++
92+
}
93+
if (i >= text.length) break
94+
pieces.push(' ')
95+
sourceBoundaries.push(i)
96+
continue
97+
}
5898

59-
let normalized = text.replace(collapsibleWhitespaceRunRe, ' ')
60-
if (normalized.charCodeAt(0) === 0x20) {
61-
normalized = normalized.slice(1)
99+
pieces.push(ch)
100+
i++
101+
sourceBoundaries.push(i)
62102
}
63-
if (normalized.length > 0 && normalized.charCodeAt(normalized.length - 1) === 0x20) {
64-
normalized = normalized.slice(0, -1)
103+
104+
return { text: pieces.join(''), sourceBoundaries }
105+
}
106+
107+
function normalizeWhitespacePreWrapWithSourceBoundaries(text: string): NormalizedTextWithSourceBoundaries {
108+
if (!/[\r\f]/.test(text)) {
109+
return {
110+
text,
111+
sourceBoundaries: buildPreWrapSourceBoundaries(text),
112+
}
113+
}
114+
115+
return {
116+
text: text
117+
.replace(/\r\n/g, '\n')
118+
.replace(/[\r\f]/g, '\n'),
119+
sourceBoundaries: buildPreWrapSourceBoundaries(text),
65120
}
66-
return normalized
67121
}
68122

69-
function normalizeWhitespacePreWrap(text: string): string {
70-
if (!/[\r\f]/.test(text)) return text.replace(/\r\n/g, '\n')
71-
return text
72-
.replace(/\r\n/g, '\n')
73-
.replace(/[\r\f]/g, '\n')
123+
function buildPreWrapSourceBoundaries(text: string): number[] {
124+
const sourceBoundaries = [0]
125+
let i = 0
126+
127+
while (i < text.length) {
128+
const ch = text[i]!
129+
if (ch === '\r' && i + 1 < text.length && text[i + 1] === '\n') {
130+
i += 2
131+
sourceBoundaries.push(i)
132+
continue
133+
}
134+
if (ch === '\r' || ch === '\f') {
135+
i += 1
136+
sourceBoundaries.push(i)
137+
continue
138+
}
139+
i += 1
140+
sourceBoundaries.push(i)
141+
}
142+
143+
return sourceBoundaries
74144
}
75145

76146
let sharedWordSegmenter: Intl.Segmenter | null = null
@@ -982,11 +1052,13 @@ export function analyzeText(
9821052
text: string,
9831053
profile: AnalysisProfile,
9841054
whiteSpace: WhiteSpaceMode = 'normal',
1055+
includeSourceBoundaries = false,
9851056
): TextAnalysis {
9861057
const whiteSpaceProfile = getWhiteSpaceProfile(whiteSpace)
987-
const normalized = whiteSpaceProfile.mode === 'pre-wrap'
988-
? normalizeWhitespacePreWrap(text)
989-
: normalizeWhitespaceNormal(text)
1058+
const normalizedResult = whiteSpaceProfile.mode === 'pre-wrap'
1059+
? normalizeWhitespacePreWrapWithSourceBoundaries(text)
1060+
: normalizeWhitespaceNormalWithSourceBoundaries(text)
1061+
const normalized = normalizedResult.text
9901062
if (normalized.length === 0) {
9911063
return {
9921064
normalized,
@@ -996,12 +1068,14 @@ export function analyzeText(
9961068
isWordLike: [],
9971069
kinds: [],
9981070
starts: [],
1071+
...(includeSourceBoundaries ? { sourceBoundaries: normalizedResult.sourceBoundaries } : {}),
9991072
}
10001073
}
10011074
const segmentation = buildMergedSegmentation(normalized, profile, whiteSpaceProfile)
10021075
return {
10031076
normalized,
10041077
chunks: compileAnalysisChunks(segmentation, whiteSpaceProfile),
1078+
...(includeSourceBoundaries ? { sourceBoundaries: normalizedResult.sourceBoundaries } : {}),
10051079
...segmentation,
10061080
}
10071081
}

src/layout.test.ts

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ let layout: LayoutModule['layout']
1717
let layoutWithLines: LayoutModule['layoutWithLines']
1818
let layoutNextLine: LayoutModule['layoutNextLine']
1919
let walkLineRanges: LayoutModule['walkLineRanges']
20+
let cursorToSourceOffset: LayoutModule['cursorToSourceOffset']
21+
let cursorRangeToSourceSpan: LayoutModule['cursorRangeToSourceSpan']
2022
let clearCache: LayoutModule['clearCache']
2123
let setLocale: LayoutModule['setLocale']
2224
let countPreparedLines: LineBreakModule['countPreparedLines']
@@ -107,6 +109,8 @@ beforeAll(async () => {
107109
layoutWithLines,
108110
layoutNextLine,
109111
walkLineRanges,
112+
cursorToSourceOffset,
113+
cursorRangeToSourceSpan,
110114
clearCache,
111115
setLocale,
112116
} = mod)
@@ -127,6 +131,8 @@ describe('prepare invariants', () => {
127131
test('collapses ordinary whitespace runs and trims the edges', () => {
128132
const prepared = prepareWithSegments(' Hello\t \n World ', FONT)
129133
expect(prepared.segments).toEqual(['Hello', ' ', 'World'])
134+
expect(prepared.segmentSourceOffsets).toEqual([2, 7, 12])
135+
expect(prepared.segmentSourceLengths).toEqual([5, 5, 5])
130136
})
131137

132138
test('pre-wrap mode keeps ordinary spaces instead of collapsing them', () => {
@@ -413,6 +419,8 @@ describe('layout invariants', () => {
413419
width: widthOfHello,
414420
start: { segmentIndex: 0, graphemeIndex: 0 },
415421
end: { segmentIndex: 1, graphemeIndex: 0 },
422+
sourceOffset: 0,
423+
sourceLength: 5,
416424
}])
417425
})
418426

@@ -430,6 +438,22 @@ describe('layout invariants', () => {
430438
expect(rich.lines.map(line => line.text).join('')).toBe('Superlongword')
431439
expect(rich.lines[0]!.start).toEqual({ segmentIndex: 0, graphemeIndex: 0 })
432440
expect(rich.lines.at(-1)!.end).toEqual({ segmentIndex: 1, graphemeIndex: 0 })
441+
expect(cursorToSourceOffset(prepared, rich.lines[0]!.end)).toBe(rich.lines[0]!.sourceOffset + rich.lines[0]!.sourceLength)
442+
})
443+
444+
test('rich lines preserve source spans through collapsed whitespace normalization', () => {
445+
const source = ' foo bar '
446+
const prepared = prepareWithSegments(source, FONT)
447+
const lines = layoutWithLines(prepared, 200, LINE_HEIGHT)
448+
expect(lines.lines).toHaveLength(1)
449+
expect(lines.lines[0]!.text).toBe('foo bar')
450+
expect(lines.lines[0]!.sourceOffset).toBe(2)
451+
expect(lines.lines[0]!.sourceLength).toBe(9)
452+
expect(source.slice(lines.lines[0]!.sourceOffset, lines.lines[0]!.sourceOffset + lines.lines[0]!.sourceLength)).toBe('foo bar')
453+
expect(cursorRangeToSourceSpan(prepared, lines.lines[0]!.start, lines.lines[0]!.end)).toEqual({
454+
sourceOffset: 2,
455+
sourceLength: 9,
456+
})
433457
})
434458

435459
test('mixed-direction text is a stable smoke test', () => {
@@ -586,13 +610,17 @@ describe('layout invariants', () => {
586610
width: number
587611
start: { segmentIndex: number, graphemeIndex: number }
588612
end: { segmentIndex: number, graphemeIndex: number }
613+
sourceOffset: number
614+
sourceLength: number
589615
}> = []
590616

591617
const lineCount = walkLineRanges(prepared, width, line => {
592618
actual.push({
593619
width: line.width,
594620
start: { ...line.start },
595621
end: { ...line.end },
622+
sourceOffset: line.sourceOffset,
623+
sourceLength: line.sourceLength,
596624
})
597625
})
598626

@@ -601,6 +629,8 @@ describe('layout invariants', () => {
601629
width: line.width,
602630
start: line.start,
603631
end: line.end,
632+
sourceOffset: line.sourceOffset,
633+
sourceLength: line.sourceLength,
604634
})))
605635
})
606636

0 commit comments

Comments
 (0)