From f4b6968469a36e4bbd5072d3b504c0894366d2fd Mon Sep 17 00:00:00 2001 From: Ido Shamun <1993245+idoshamun@users.noreply.github.com> Date: Mon, 23 Mar 2026 09:54:16 +0200 Subject: [PATCH] Ignore channel digest sources in digest and highlight generation --- __tests__/workers/generateChannelDigest.ts | 77 ++++++++ __tests__/workers/generateChannelHighlight.ts | 175 +++++++++++++++++- src/common/channelDigest/definitions.ts | 22 +++ src/common/channelDigest/generate.ts | 17 +- src/common/channelHighlight/generate.ts | 32 +++- src/common/channelHighlight/publish.ts | 38 +++- src/common/channelHighlight/queries.ts | 35 +++- 7 files changed, 381 insertions(+), 15 deletions(-) diff --git a/__tests__/workers/generateChannelDigest.ts b/__tests__/workers/generateChannelDigest.ts index 2644e78f8b..632282899d 100644 --- a/__tests__/workers/generateChannelDigest.ts +++ b/__tests__/workers/generateChannelDigest.ts @@ -91,8 +91,21 @@ const getLockKey = (digestKey: string, scheduledAt: string) => describe('generateChannelDigest worker', () => { afterEach(async () => { + jest.restoreAllMocks(); nock.cleanAll(); await deleteKeysByPattern('channel-digest:*'); + await con.getRepository(ChannelDigest).clear(); + await con + .createQueryBuilder() + .delete() + .from('post') + .where('"sourceId" IN (:...sourceIds)', { + sourceIds: ['content-source', AGENTS_DIGEST_SOURCE, 'weekly-source'], + }) + .execute(); + await con + .getRepository(Source) + .delete(['content-source', AGENTS_DIGEST_SOURCE, 'weekly-source']); }); it('should be registered', () => { @@ -270,6 +283,70 @@ describe('generateChannelDigest worker', () => { ).toBeGreaterThan(ONE_DAY_IN_SECONDS); }); + it('should ignore posts from all channel digest sources when generating digests', async () => { + const scheduledAt = '2026-03-03T10:00:00.000Z'; + await con + .getRepository(Source) + .save([ + createSource( + AGENTS_DIGEST_SOURCE, + 'Agents Digest', + 'https://daily.dev/agents.png', + ), + createSource( + 'weekly-source', + 'Weekly Digest', + 'https://daily.dev/weekly.png', + ), + ]); + await saveDefinition({ + key: 'agentic', + sourceId: AGENTS_DIGEST_SOURCE, + channel: 'vibes', + }); + await saveDefinition({ + key: 'weekly-test', + sourceId: 'weekly-source', + channel: 'weekly', + frequency: 'weekly', + }); + await savePost({ + id: 'agents-post', + sourceId: AGENTS_DIGEST_SOURCE, + title: 'Agents digest post', + content: 'Agents digest body', + createdAt: new Date('2026-03-03T09:10:00.000Z'), + channel: 'vibes', + }); + await savePost({ + id: 'weekly-post', + sourceId: 'weekly-source', + title: 'Weekly digest post', + content: 'Weekly digest body', + createdAt: new Date('2026-03-03T09:20:00.000Z'), + channel: 'vibes', + }); + const digestCountBefore = await con.getRepository(FreeformPost).countBy({ + sourceId: AGENTS_DIGEST_SOURCE, + }); + + await expectSuccessfulTypedBackground<'api.v1.generate-channel-digest'>( + worker, + { + digestKey: 'agentic', + scheduledAt, + }, + ); + + const digests = await con.getRepository(FreeformPost).findBy({ + sourceId: AGENTS_DIGEST_SOURCE, + }); + expect(digests).toHaveLength(digestCountBefore); + expect( + digests.find((digest) => digest.title === 'Mock sentiment digest'), + ).toBeUndefined(); + }); + it('should use a weekly done ttl for weekly digests', async () => { const scheduledAt = '2026-03-02T10:00:00.000Z'; await con diff --git a/__tests__/workers/generateChannelHighlight.ts b/__tests__/workers/generateChannelHighlight.ts index 4a35850c98..4d1032eed0 100644 --- a/__tests__/workers/generateChannelHighlight.ts +++ b/__tests__/workers/generateChannelHighlight.ts @@ -1,7 +1,9 @@ import { IsNull, type DataSource } from 'typeorm'; import createOrGetConnection from '../../src/db'; +import { ChannelDigest } from '../../src/entity/ChannelDigest'; import { ChannelHighlightDefinition } from '../../src/entity/ChannelHighlightDefinition'; import { ChannelHighlightRun } from '../../src/entity/ChannelHighlightRun'; +import { AGENTS_DIGEST_SOURCE } from '../../src/entity/Source'; import { PostHighlight, PostHighlightSignificance, @@ -123,6 +125,7 @@ describe('generateChannelHighlight worker', () => { await deleteKeysByPattern('channel-highlight:*'); await con.getRepository(ChannelHighlightRun).clear(); await con.getRepository(ChannelHighlightDefinition).clear(); + await con.getRepository(ChannelDigest).clear(); await con.getRepository(PostHighlight).clear(); await con.getRepository(PostRelation).clear(); await con @@ -130,12 +133,12 @@ describe('generateChannelHighlight worker', () => { .delete() .from('post') .where('"sourceId" IN (:...sourceIds)', { - sourceIds: ['content-source', 'secondary-source'], + sourceIds: ['content-source', 'secondary-source', AGENTS_DIGEST_SOURCE], }) .execute(); await con .getRepository(Source) - .delete(['content-source', 'secondary-source']); + .delete(['content-source', 'secondary-source', AGENTS_DIGEST_SOURCE]); }); it('should be registered', () => { @@ -379,6 +382,174 @@ describe('generateChannelHighlight worker', () => { expect(retiredHighlight?.retiredAt).toBeInstanceOf(Date); }); + it('should exclude retired highlights from candidates and keep them retired', async () => { + const now = new Date('2026-03-03T11:45:00.000Z'); + await con.getRepository(ChannelHighlightDefinition).save({ + channel: 'vibes', + mode: 'publish', + candidateHorizonHours: 72, + maxItems: 3, + }); + await saveArticle({ + id: 'retired-1', + title: 'Previously highlighted story', + createdAt: new Date('2026-03-03T11:15:00.000Z'), + }); + await saveArticle({ + id: 'fresh-1', + title: 'Fresh candidate', + createdAt: new Date('2026-03-03T11:20:00.000Z'), + }); + await con.getRepository(PostHighlight).save({ + channel: 'vibes', + postId: 'retired-1', + highlightedAt: new Date('2026-03-03T11:00:00.000Z'), + headline: 'Previously highlighted headline', + significance: PostHighlightSignificance.Major, + reason: 'previous run', + retiredAt: new Date('2026-03-03T11:10:00.000Z'), + }); + + const evaluatorSpy = jest + .spyOn(evaluator, 'evaluateChannelHighlights') + .mockResolvedValue({ + items: [ + { + postId: 'fresh-1', + headline: 'Fresh headline', + significanceLabel: 'breaking', + reason: 'test', + }, + ], + }); + + await expectSuccessfulTypedBackground<'api.v1.generate-channel-highlight'>( + worker, + { + channel: 'vibes', + scheduledAt: now.toISOString(), + }, + ); + + expect(evaluatorSpy).toHaveBeenCalledTimes(1); + expect(evaluatorSpy.mock.calls[0][0].newCandidates).toEqual([ + expect.objectContaining({ + postId: 'fresh-1', + title: 'Fresh candidate', + }), + ]); + + const liveHighlights = await con.getRepository(PostHighlight).find({ + where: { channel: 'vibes', retiredAt: IsNull() }, + }); + expect(liveHighlights).toEqual([ + expect.objectContaining({ + postId: 'fresh-1', + headline: 'Fresh headline', + }), + ]); + + const retiredHighlights = await con.getRepository(PostHighlight).find({ + where: { channel: 'vibes', postId: 'retired-1' }, + }); + expect(retiredHighlights).toHaveLength(1); + expect(retiredHighlights[0].retiredAt).toBeInstanceOf(Date); + }); + + it('should ignore posts from channel digest sources for highlights', async () => { + const now = new Date('2026-03-03T11:50:00.000Z'); + await con + .getRepository(Source) + .save( + createSource( + AGENTS_DIGEST_SOURCE, + 'Agents Digest', + 'https://daily.dev/agents.png', + ), + ); + await con.getRepository(ChannelDigest).save({ + key: 'agentic', + sourceId: AGENTS_DIGEST_SOURCE, + channel: 'vibes', + targetAudience: 'Digest readers', + frequency: 'daily', + includeSentiment: false, + sentimentGroupIds: [], + enabled: true, + }); + await con.getRepository(ChannelHighlightDefinition).save({ + channel: 'vibes', + mode: 'publish', + candidateHorizonHours: 72, + maxItems: 3, + }); + await saveArticle({ + id: 'digest-post', + sourceId: AGENTS_DIGEST_SOURCE, + title: 'Digest source post', + createdAt: new Date('2026-03-03T11:20:00.000Z'), + }); + await saveArticle({ + id: 'fresh-1', + title: 'Fresh candidate', + createdAt: new Date('2026-03-03T11:25:00.000Z'), + }); + await con.getRepository(PostHighlight).save({ + channel: 'vibes', + postId: 'digest-post', + highlightedAt: new Date('2026-03-03T11:10:00.000Z'), + headline: 'Digest highlight', + significance: PostHighlightSignificance.Major, + reason: 'existing', + }); + + const evaluatorSpy = jest + .spyOn(evaluator, 'evaluateChannelHighlights') + .mockResolvedValue({ + items: [ + { + postId: 'fresh-1', + headline: 'Fresh headline', + significanceLabel: 'major', + reason: 'test', + }, + ], + }); + + await expectSuccessfulTypedBackground<'api.v1.generate-channel-highlight'>( + worker, + { + channel: 'vibes', + scheduledAt: now.toISOString(), + }, + ); + + expect(evaluatorSpy).toHaveBeenCalledTimes(1); + expect(evaluatorSpy.mock.calls[0][0].newCandidates).toEqual([ + expect.objectContaining({ + postId: 'fresh-1', + }), + ]); + + const liveHighlights = await con.getRepository(PostHighlight).find({ + where: { channel: 'vibes', retiredAt: IsNull() }, + }); + expect(liveHighlights).toEqual([ + expect.objectContaining({ + postId: 'fresh-1', + headline: 'Fresh headline', + }), + ]); + + const retiredDigestHighlight = await con + .getRepository(PostHighlight) + .findOneByOrFail({ + channel: 'vibes', + postId: 'digest-post', + }); + expect(retiredDigestHighlight.retiredAt).toBeInstanceOf(Date); + }); + it('should remove highlights that aged past the configured horizon', async () => { const now = new Date('2026-03-03T12:00:00.000Z'); await con.getRepository(ChannelHighlightDefinition).save({ diff --git a/src/common/channelDigest/definitions.ts b/src/common/channelDigest/definitions.ts index df39a06134..ce083b76b2 100644 --- a/src/common/channelDigest/definitions.ts +++ b/src/common/channelDigest/definitions.ts @@ -35,6 +35,28 @@ export const getChannelDigestDefinitionByKey = async ({ }), ); +export const getChannelDigestSourceIds = async ({ + con, +}: { + con: DataSource; +}): Promise => { + const definitions = await queryReadReplica(con, ({ queryRunner }) => + queryRunner.manager.getRepository(ChannelDigest).find({ + select: { + sourceId: true, + }, + where: { + enabled: true, + }, + order: { + sourceId: 'ASC', + }, + }), + ); + + return definitions.map((definition) => definition.sourceId); +}; + export const isChannelDigestScheduledForDate = ({ definition, now, diff --git a/src/common/channelDigest/generate.ts b/src/common/channelDigest/generate.ts index bcfe5d7f3e..9befcb2282 100644 --- a/src/common/channelDigest/generate.ts +++ b/src/common/channelDigest/generate.ts @@ -15,7 +15,10 @@ import { PostRelation, PostRelationType, } from '../../entity/posts/PostRelation'; -import { getChannelDigestLookbackSeconds } from './definitions'; +import { + getChannelDigestLookbackSeconds, + getChannelDigestSourceIds, +} from './definitions'; type DigestPostRow = { title: string | null; @@ -76,10 +79,12 @@ const findDigestPosts = async ({ con, from, channel, + excludedSourceIds, }: { con: DataSource; from: Date; channel: string; + excludedSourceIds: string[]; }): Promise => { if (!channel) { return []; @@ -104,6 +109,12 @@ const findDigestPosts = async ({ .andWhere(`(post."contentMeta"->'channels') ? :channel`, { channel, }) + .andWhere( + excludedSourceIds.length + ? 'post."sourceId" NOT IN (:...excludedSourceIds)' + : '1=1', + { excludedSourceIds }, + ) .andWhere('relation."relatedPostId" IS NULL') .orderBy('post.createdAt', 'DESC') .getRawMany(); @@ -204,6 +215,9 @@ export const generateChannelDigest = async ({ now, definition, }); + const excludedSourceIds = await getChannelDigestSourceIds({ + con, + }); const [sentimentItems, posts] = await Promise.all([ findSentimentItems({ definition, @@ -214,6 +228,7 @@ export const generateChannelDigest = async ({ con, from, channel: definition.channel, + excludedSourceIds, }), ]); diff --git a/src/common/channelHighlight/generate.ts b/src/common/channelHighlight/generate.ts index 65a4b71f78..d33b2cd9f6 100644 --- a/src/common/channelHighlight/generate.ts +++ b/src/common/channelHighlight/generate.ts @@ -2,6 +2,7 @@ import type { DataSource } from 'typeorm'; import { logger as baseLogger } from '../../logger'; import { ChannelHighlightDefinition } from '../../entity/ChannelHighlightDefinition'; import { ChannelHighlightRun } from '../../entity/ChannelHighlightRun'; +import { getChannelDigestSourceIds } from '../channelDigest/definitions'; import { compareSnapshots } from './decisions'; import { evaluateChannelHighlights } from './evaluate'; import { replaceHighlightsForChannel } from './publish'; @@ -9,6 +10,7 @@ import { fetchCurrentHighlights, fetchIncrementalPosts, fetchPostsByIds, + fetchRetiredHighlightPostIds, fetchRelations, getFetchStart, getHorizonStart, @@ -66,10 +68,20 @@ export const generateChannelHighlight = async ({ ); try { - const currentHighlights = await fetchCurrentHighlights({ - con, - channel: definition.channel, - }); + const [currentHighlights, retiredHighlightPostIds, excludedSourceIds] = + await Promise.all([ + fetchCurrentHighlights({ + con, + channel: definition.channel, + }), + fetchRetiredHighlightPostIds({ + con, + channel: definition.channel, + }), + getChannelDigestSourceIds({ + con, + }), + ]); const horizonStart = getHorizonStart({ now, definition, @@ -91,10 +103,12 @@ export const generateChannelHighlight = async ({ channel: definition.channel, fetchStart, horizonStart, + excludedSourceIds, }), fetchPostsByIds({ con, ids: highlightedPostIds, + excludedSourceIds, }), ]); const basePosts = mergePosts([incrementalPosts, highlightedPosts]); @@ -112,6 +126,7 @@ export const generateChannelHighlight = async ({ ]), ), ], + excludedSourceIds, }); const availablePosts = mergePosts([basePosts, relationPosts]); const liveHighlights = canonicalizeCurrentHighlights({ @@ -123,11 +138,16 @@ export const generateChannelHighlight = async ({ const currentHighlightPostIds = new Set( liveHighlights.map((item) => item.postId), ); + const retiredHighlightPostIdSet = new Set(retiredHighlightPostIds); const newCandidates = buildCandidates({ posts: availablePosts, relations, horizonStart, - }).filter((candidate) => !currentHighlightPostIds.has(candidate.postId)); + }).filter( + (candidate) => + !currentHighlightPostIds.has(candidate.postId) && + !retiredHighlightPostIdSet.has(candidate.postId), + ); const admittedHighlights = newCandidates.length === 0 @@ -185,7 +205,9 @@ export const generateChannelHighlight = async ({ inputSummary: { fetchStart: fetchStart.toISOString(), horizonStart: horizonStart.toISOString(), + excludedSourceIds, currentHighlightPostIds: liveHighlights.map((item) => item.postId), + retiredHighlightPostIds, candidatePostIds: newCandidates.map( (candidate) => candidate.postId, ), diff --git a/src/common/channelHighlight/publish.ts b/src/common/channelHighlight/publish.ts index 8d4ddf29e2..dbaf24da6f 100644 --- a/src/common/channelHighlight/publish.ts +++ b/src/common/channelHighlight/publish.ts @@ -1,10 +1,30 @@ -import { IsNull, type EntityManager } from 'typeorm'; +import type { EntityManager } from 'typeorm'; import { PostHighlight, toPostHighlightSignificance, } from '../../entity/PostHighlight'; import type { HighlightItem } from './types'; +const normalizeHighlightItems = ({ + items, + retiredPostIds, +}: { + items: HighlightItem[]; + retiredPostIds: Set; +}): HighlightItem[] => { + const dedupedItems = new Map(); + + for (const item of items) { + if (retiredPostIds.has(item.postId) || dedupedItems.has(item.postId)) { + continue; + } + + dedupedItems.set(item.postId, item); + } + + return [...dedupedItems.values()]; +}; + export const replaceHighlightsForChannel = async ({ manager, channel, @@ -15,16 +35,22 @@ export const replaceHighlightsForChannel = async ({ items: HighlightItem[]; }): Promise => { const repo = manager.getRepository(PostHighlight); - const currentHighlights = await repo.find({ + const highlights = await repo.find({ where: { channel, - retiredAt: IsNull(), }, }); + const currentHighlights = highlights.filter((item) => !item.retiredAt); + const nextItems = normalizeHighlightItems({ + items, + retiredPostIds: new Set( + highlights.filter((item) => item.retiredAt).map((item) => item.postId), + ), + }); const currentByPostId = new Map( currentHighlights.map((item) => [item.postId, item]), ); - const nextPostIds = new Set(items.map((item) => item.postId)); + const nextPostIds = new Set(nextItems.map((item) => item.postId)); const retiredPostIds = currentHighlights .filter((item) => !nextPostIds.has(item.postId)) .map((item) => item.postId); @@ -40,12 +66,12 @@ export const replaceHighlightsForChannel = async ({ .execute(); } - if (!items.length) { + if (!nextItems.length) { return; } await repo.save( - items.map((item) => { + nextItems.map((item) => { const currentHighlight = currentByPostId.get(item.postId); return repo.create({ diff --git a/src/common/channelHighlight/queries.ts b/src/common/channelHighlight/queries.ts index 970b809a30..629df2a7fb 100644 --- a/src/common/channelHighlight/queries.ts +++ b/src/common/channelHighlight/queries.ts @@ -1,4 +1,4 @@ -import { Brackets, In, IsNull, type DataSource } from 'typeorm'; +import { Brackets, In, IsNull, Not, type DataSource } from 'typeorm'; import { ONE_HOUR_IN_SECONDS } from '../constants'; import { PostHighlight } from '../../entity/PostHighlight'; import { Post } from '../../entity/posts/Post'; @@ -76,12 +76,34 @@ export const fetchCurrentHighlights = async ({ }, }); +export const fetchRetiredHighlightPostIds = async ({ + con, + channel, +}: { + con: DataSource; + channel: string; +}): Promise => { + const highlights = await con.getRepository(PostHighlight).find({ + select: { + postId: true, + }, + where: { + channel, + retiredAt: Not(IsNull()), + }, + }); + + return highlights.map((highlight) => highlight.postId); +}; + export const fetchPostsByIds = async ({ con, ids, + excludedSourceIds = [], }: { con: DataSource; ids: string[]; + excludedSourceIds?: string[]; }): Promise => { if (!ids.length) { return []; @@ -90,6 +112,9 @@ export const fetchPostsByIds = async ({ return con.getRepository(Post).find({ where: { id: In(ids), + sourceId: excludedSourceIds.length + ? Not(In(excludedSourceIds)) + : undefined, visible: true, deleted: false, banned: false, @@ -102,11 +127,13 @@ export const fetchIncrementalPosts = async ({ channel, fetchStart, horizonStart, + excludedSourceIds = [], }: { con: DataSource; channel: string; fetchStart: Date; horizonStart: Date; + excludedSourceIds?: string[]; }): Promise => con .getRepository(Post) @@ -121,6 +148,12 @@ export const fetchIncrementalPosts = async ({ .andWhere(`NOT (post."contentCuration" && :rejectedCurations)`, { rejectedCurations: REJECTED_CONTENT_CURATIONS, }) + .andWhere( + excludedSourceIds.length + ? 'post."sourceId" NOT IN (:...excludedSourceIds)' + : '1=1', + { excludedSourceIds }, + ) .andWhere( new Brackets((builder) => { builder