Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions backend/migrations/069_news_source_url_unique.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
-- Migration 069: close the concurrent-insert duplicate-URL race on poi_news.
--
-- saveNewsItems dedups with a SELECT-then-INSERT, which has no DB-level guard:
-- two concurrent collection branches can both read "not present" and both
-- insert the same URL (observed repeatedly on the historicbridges POI, e.g.
-- ids 1523/1524 and 2622/2623 inserted ~24ms apart). A unique index on the
-- normalized source_url turns that race into a clean no-op when paired with
-- ON CONFLICT DO NOTHING in the insert path.
--
-- Events are intentionally excluded: distinct events legitimately share a
-- generic source URL (a venue homepage or calendar page), so source_url is not
-- unique for poi_events.

-- Safety net: collapse any pre-existing normalized-URL duplicates before adding
-- the constraint, keeping the best row per URL (published > pending > rejected,
-- then lowest id). Idempotent -- a no-op once the table holds one row per URL.
DELETE FROM poi_news a
USING poi_news b
WHERE a.source_url IS NOT NULL
AND b.source_url IS NOT NULL
AND LOWER(REGEXP_REPLACE(a.source_url, '/+$', '')) = LOWER(REGEXP_REPLACE(b.source_url, '/+$', ''))
AND (CASE a.moderation_status WHEN 'published' THEN 0 WHEN 'pending' THEN 1 ELSE 2 END, a.id)
> (CASE b.moderation_status WHEN 'published' THEN 0 WHEN 'pending' THEN 1 ELSE 2 END, b.id);
Comment on lines +17 to +23
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Using a self-join with USING to delete duplicates can be highly inefficient on large tables because it requires an $O(N^2)$ comparison of all rows matching the join condition.

A more performant and standard PostgreSQL approach is to use a subquery with ROW_NUMBER(). This scans the table once, partitions and sorts the rows, and deletes all but the first row in each partition.

Suggested change
DELETE FROM poi_news a
USING poi_news b
WHERE a.source_url IS NOT NULL
AND b.source_url IS NOT NULL
AND LOWER(REGEXP_REPLACE(a.source_url, '/+$', '')) = LOWER(REGEXP_REPLACE(b.source_url, '/+$', ''))
AND (CASE a.moderation_status WHEN 'published' THEN 0 WHEN 'pending' THEN 1 ELSE 2 END, a.id)
> (CASE b.moderation_status WHEN 'published' THEN 0 WHEN 'pending' THEN 1 ELSE 2 END, b.id);
DELETE FROM poi_news
WHERE id IN (
SELECT id
FROM (
SELECT id,
ROW_NUMBER() OVER (
PARTITION BY LOWER(REGEXP_REPLACE(source_url, '/+$', ''))
ORDER BY CASE moderation_status WHEN 'published' THEN 0 WHEN 'pending' THEN 1 ELSE 2 END, id
) as rn
FROM poi_news
WHERE source_url IS NOT NULL
) t
WHERE rn > 1
);


CREATE UNIQUE INDEX IF NOT EXISTS poi_news_source_url_norm_uniq
ON poi_news (LOWER(REGEXP_REPLACE(source_url, '/+$', '')))
WHERE source_url IS NOT NULL;
8 changes: 7 additions & 1 deletion backend/services/newsService.js
Original file line number Diff line number Diff line change
Expand Up @@ -1251,9 +1251,10 @@ export async function saveNewsItems(pool, poiId, newsItems, options = {}) {
}

const dateScore = item.date_consensus_score || 0;
await pool.query(`
const inserted = await pool.query(`
INSERT INTO poi_news (poi_id, title, summary, source_url, source_name, news_type, publication_date, date_consensus_score, moderation_status, rendered_content, date_signals, image_url, content_source)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)
ON CONFLICT DO NOTHING
`, [
effectivePoiId,
item.title,
Expand All @@ -1269,6 +1270,11 @@ export async function saveNewsItems(pool, poiId, newsItems, options = {}) {
item.image_url || null,
contentSource
]);
if (inserted.rowCount === 0) {
duplicateCount++;
if (log) log(`[Save] Skip duplicate (concurrent insert race): "${item.title}" → ${resolvedUrl}`);
continue;
}
savedCount++;
if (log) log(`[Save] Saved (pending): "${item.title}" (${item.published_date || 'no date'}, score=${dateScore}) → ${resolvedUrl}`);
} catch (error) {
Expand Down
12 changes: 10 additions & 2 deletions backend/tests/ogShareImages.integration.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,10 @@ describe('Open Graph share images', () => {
expect(ogImages[0].endsWith(FALLBACK_IMAGE_PATH)).toBe(true);
}, 15000);

it('uses the associated POI photo for a news permalink', async () => {
it('emits a real share image for a news permalink (source image or POI photo, never brand)', async () => {
// Priority is source article image -> POI primary photo -> brand fallback
// (server.js). For a POI that has a photo, the permalink must resolve to one
// of the first two, never the brand card.
let target = null;
for (const poi of (poiWithPhoto ? [poiWithPhoto, ...pois] : pois).slice(0, 80)) {
const news = await request(BASE_URL).get(`/api/pois/${poi.id}/news`);
Expand All @@ -117,7 +120,12 @@ describe('Open Graph share images', () => {
const res = await request(BASE_URL).get(url).expect(200);

const ogImages = metaContent(res.text, 'property', 'og:image');
const [twitterImage] = metaContent(res.text, 'name', 'twitter:image');
expect(ogImages.length).toBe(1);
expect(ogImages[0]).toMatch(new RegExp(`/api/pois/${target.poi.id}/thumbnail\\?size=large$`));
expect(ogImages[0].endsWith(FALLBACK_IMAGE_PATH)).toBe(false);
const isPoiPhoto = new RegExp(`/api/pois/${target.poi.id}/thumbnail\\?size=large$`).test(ogImages[0]);
const isSourceImage = /^https?:\/\//i.test(ogImages[0]);
expect(isPoiPhoto || isSourceImage).toBe(true);
expect(twitterImage).toBe(ogImages[0]);
}, 60000);
});
64 changes: 37 additions & 27 deletions backend/tests/ui.integration.test.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,30 @@
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
import { chromium } from 'playwright';

// Open a POI that has a More Info link via its bare-path permalink (/<slug>),
// which reliably opens the sidebar on both mobile and desktop. .more-info-link
// only renders when more_info_link is set, so clicking an arbitrary marker is
// non-deterministic now that amenity POIs without links exist. A given slug may
// not resolve (e.g. POI not in the loaded set), so try candidates until the
// sidebar opens with the link. Returns the POI, or null if none resolve.
async function openPoiWithMoreInfo(page, baseUrl) {
const res = await fetch(`${baseUrl}/api/destinations`);
const body = await res.json();
const list = (Array.isArray(body) ? body : (body.destinations || body.pois || []))
.filter(d => /^https?:\/\//i.test(d.more_info_link || ''));
for (const poi of list.slice(0, 15)) {
// Slug must match frontend/src/App.jsx generateSlug so the permalink resolves.
const slug = (poi.name || '').toLowerCase()
.replace(/[^a-z0-9\s-]/g, '').replace(/\s+/g, '-').replace(/-+/g, '-').replace(/^-|-$/g, '');
await page.goto(`${baseUrl}/${slug}`, { waitUntil: 'networkidle' });
try {
await page.waitForSelector('.sidebar.open', { timeout: 5000 });
} catch { continue; }
if (await page.locator('.more-info-link').count() > 0) return poi;
}
return null;
}

describe('UI Integration Tests', () => {
let browser;
let page;
Expand Down Expand Up @@ -353,22 +377,14 @@ describe('UI Integration Tests', () => {
// Set viewport to mobile size
await page.setViewportSize({ width: 375, height: 667 });

// Load page
await page.goto(baseUrl, { waitUntil: 'networkidle' });

// Wait for map markers to load
await page.waitForSelector('.leaflet-marker-icon', { timeout: 10000 });
await page.waitForTimeout(1000);

// Click a marker to open sidebar
const firstMarker = await page.locator('.leaflet-marker-icon').first();
await firstMarker.click();

// Wait for sidebar to open
await page.waitForSelector('.sidebar.open', { timeout: 10000 });
const poi = await openPoiWithMoreInfo(page, baseUrl);
if (!poi) {
console.warn('[ui] No POI with a More Info link in seed — skipping');
await page.setViewportSize({ width: 1280, height: 720 });
return;
}

// Wait for More Info link to appear (should be on Info tab by default)
// Scroll to bottom of content to make link visible
// Scroll to bottom of Info tab content to make the link visible
const tabContent = await page.locator('.sidebar-tab-content');
await tabContent.evaluate(el => el.scrollTop = el.scrollHeight);
await page.waitForTimeout(300);
Expand All @@ -382,8 +398,6 @@ describe('UI Integration Tests', () => {
let isVisible = await moreInfoLink.isVisible();
expect(isVisible).toBe(true);

// Test passes - link exists at bottom of Info tab content

// Reset viewport
await page.setViewportSize({ width: 1280, height: 720 });
}, 40000);
Expand All @@ -392,16 +406,12 @@ describe('UI Integration Tests', () => {
// Set viewport to mobile size
await page.setViewportSize({ width: 375, height: 667 });

// Load page
await page.goto(baseUrl, { waitUntil: 'networkidle' });

// Wait for map markers and click one to open sidebar
await page.waitForSelector('.leaflet-marker-icon', { timeout: 10000 });
await page.waitForTimeout(1000);
await page.locator('.leaflet-marker-icon').first().click();

// Wait for sidebar to open (increased timeout for CI environment)
await page.waitForSelector('.sidebar.open', { timeout: 10000 });
const poi = await openPoiWithMoreInfo(page, baseUrl);
if (!poi) {
console.warn('[ui] No POI with a More Info link in seed — skipping');
await page.setViewportSize({ width: 1280, height: 720 });
return;
}

// More Info link is at bottom of scrollable content, so scroll down to see it
const tabContent = await page.locator('.sidebar-tab-content');
Expand Down
Loading