diff --git a/core/src/exchanges/kalshi/normalizer.ts b/core/src/exchanges/kalshi/normalizer.ts index c128d2c1..6a2af8eb 100644 --- a/core/src/exchanges/kalshi/normalizer.ts +++ b/core/src/exchanges/kalshi/normalizer.ts @@ -35,10 +35,7 @@ export class KalshiNormalizer implements IExchangeNormalizer(); + for (const market of markets) { + const rawRule = typeof market?.rules_primary === 'string' ? market.rules_primary : ''; + if (!rawRule) continue; + + const candidate = this.deriveOutcomeLabel(market); + const templated = this.templateRule(rawRule, candidate); + templates.set(templated, (templates.get(templated) ?? 0) + 1); } - const suffixCandidates = texts.map((t) => t.slice(prefix.length)); - let suffix = suffixCandidates[0]; - for (const t of suffixCandidates) { - while (!t.endsWith(suffix)) suffix = suffix.slice(1); - if (!suffix) break; + // Only consider templates that actually contain the {x} placeholder so + // that a rule we failed to template (e.g. candidate name missing) can + // never win the vote and leak a specific name into the event description. + if (templates.size > 0) { + let bestTemplate: string | null = null; + let bestCount = 0; + for (const [template, count] of templates.entries()) { + if (!template.includes('{x}')) continue; + if (count > bestCount) { + bestTemplate = template; + bestCount = count; + } + } + if (bestTemplate) return bestTemplate; } - if (prefix.length + suffix.length < 20) return texts[0]; + return texts[0]; + } - const variables = texts.map((t) => t.slice(prefix.length, suffix.length ? t.length - suffix.length : undefined)); - if (new Set(variables).size === 1) return texts[0]; + private deriveOutcomeLabel(market: KalshiRawMarket): string | null { + const yesSubtitle = this.cleanLabel(market.yes_sub_title); + if (yesSubtitle) return yesSubtitle; + + const subtitle = this.cleanLabel(market.subtitle); + if (subtitle) return subtitle; + + return null; + } + + private cleanLabel(value: unknown): string | null { + if (typeof value !== 'string') return null; + const trimmed = value.trim(); + if (!trimmed) return null; + // Some Kalshi markets use structural subtitles like ":: Democratic". + if (trimmed.startsWith('::')) return null; + return trimmed; + } - return prefix + '{x}' + suffix; + private templateRule(rule: string, candidateName: string | null): string { + if (!candidateName) return rule; + const escaped = candidateName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + // Unicode-aware word boundaries so non-ASCII candidate names (Jose, + // Muller, O'Brien, etc.) still template correctly. JavaScript's \b is + // ASCII-only and would silently fail on such names. + const matcher = new RegExp(`(? { + if (typeof value !== "string") return null; + const trimmed = value.trim(); + if (!trimmed || trimmed.startsWith("::")) return null; + return trimmed; + }; + const candidateName: string | null = + cleanLabel(market.yes_sub_title) ?? cleanLabel(market.subtitle); // Calculate 24h change let priceChange = 0; diff --git a/core/test/unit/normalizers/kalshi.test.ts b/core/test/unit/normalizers/kalshi.test.ts new file mode 100644 index 00000000..898d41a3 --- /dev/null +++ b/core/test/unit/normalizers/kalshi.test.ts @@ -0,0 +1,120 @@ +import { describe, expect, test } from '@jest/globals'; +import { KalshiNormalizer } from '../../../src/exchanges/kalshi/normalizer'; +import { KalshiRawEvent, KalshiRawMarket } from '../../../src/exchanges/kalshi/fetcher'; + +const normalizer = new KalshiNormalizer(); + +function makeMarket(overrides: Partial): KalshiRawMarket { + return { + ticker: 'KALSHI-MKT', + expiration_time: '2029-01-20T15:00:00Z', + ...overrides, + }; +} + +describe('KalshiNormalizer outcome labels', () => { + test('prefers yes_sub_title over structural subtitle values', () => { + const event: KalshiRawEvent = { + event_ticker: 'KXGOVCA-26', + title: 'California Governor winner? (Person)', + markets: [ + makeMarket({ + ticker: 'KXGOVCA-26-TATK', + subtitle: ':: Democratic', + yes_sub_title: 'Toni Atkins', + rules_primary: 'If Toni Atkins is elected, then the market resolves to Yes.', + }), + ], + }; + + const market = normalizer.normalizeMarketsFromEvent(event)[0]; + expect(market.outcomes[0].label).toBe('Toni Atkins'); + expect(market.outcomes[1].label).toBe('Not Toni Atkins'); + }); +}); + +describe('KalshiNormalizer event description', () => { + test('uses dominant template and avoids malformed suffix truncation', () => { + const event: KalshiRawEvent = { + event_ticker: 'KXCABOUT-26MAR', + title: "Who will leave Trump's Cabinet next?", + markets: [ + makeMarket({ + ticker: 'KXCABOUT-26MAR-MRUB', + yes_sub_title: 'Marco Rubio', + rules_primary: 'If Marco Rubio is the first member of the Cabinet of Donald Trump to leave or announce they will leave (such as by quitting, being fired, or being impeached) after Mar 10, 2026, then the market resolves to Yes.', + }), + makeMarket({ + ticker: 'KXCABOUT-26MAR-SBES', + yes_sub_title: 'Scott Bessent', + rules_primary: 'If Scott Bessent is the first member of the Cabinet of Donald Trump to leave or announce they will leave (such as by quitting, being fired, or being impeached) after Mar 10, 2026, then the market resolves to Yes.', + }), + makeMarket({ + ticker: 'KXCABOUT-26MAR-MMUL', + yes_sub_title: 'Markwayne Mullin', + rules_primary: 'If Markwayne Mullin is the first member of the Cabinet of Donald Trump to leave or announce they will leave (such as by quitting, being fired, or being impeached) after Mar 30, 2026, then the market resolves to Yes.', + }), + ], + }; + + const unifiedEvent = normalizer.normalizeEvent(event)!; + expect(unifiedEvent.description).toBe( + 'If {x} is the first member of the Cabinet of Donald Trump to leave or announce they will leave (such as by quitting, being fired, or being impeached) after Mar 10, 2026, then the market resolves to Yes.', + ); + expect(unifiedEvent.description).not.toContain('{x}0, 2026'); + }); + + test('never leaks a candidate name when every market has a distinct template', () => { + const event: KalshiRawEvent = { + event_ticker: 'KXDISTINCT', + title: 'Distinct dates per market', + markets: [ + makeMarket({ + ticker: 'KXDISTINCT-A', + yes_sub_title: 'Alice', + rules_primary: 'If Alice wins by Jan 1, 2026, then the market resolves to Yes.', + }), + makeMarket({ + ticker: 'KXDISTINCT-B', + yes_sub_title: 'Bob', + rules_primary: 'If Bob wins by Feb 1, 2026, then the market resolves to Yes.', + }), + makeMarket({ + ticker: 'KXDISTINCT-C', + yes_sub_title: 'Carol', + rules_primary: 'If Carol wins by Mar 1, 2026, then the market resolves to Yes.', + }), + ], + }; + + const unifiedEvent = normalizer.normalizeEvent(event)!; + expect(unifiedEvent.description).toContain('{x}'); + expect(unifiedEvent.description).not.toContain('Alice'); + expect(unifiedEvent.description).not.toContain('Bob'); + expect(unifiedEvent.description).not.toContain('Carol'); + }); + + test('templates non-ASCII candidate names', () => { + const event: KalshiRawEvent = { + event_ticker: 'KXUNICODE', + title: 'Unicode candidate names', + markets: [ + makeMarket({ + ticker: 'KXUNICODE-J', + yes_sub_title: 'Jose Munoz', + rules_primary: 'If Jose Munoz is elected, then the market resolves to Yes.', + }), + makeMarket({ + ticker: 'KXUNICODE-M', + yes_sub_title: 'Muller', + rules_primary: 'If Muller is elected, then the market resolves to Yes.', + }), + ], + }; + + const unifiedEvent = normalizer.normalizeEvent(event)!; + expect(unifiedEvent.description).toContain('{x}'); + expect(unifiedEvent.description).not.toContain('Jose'); + expect(unifiedEvent.description).not.toContain('Muller'); + }); +});