Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ history stays consistent across GitHub and npm.
- Added `CMS-ROUTE-DUPLICATE` (error): flags two or more published documents
that resolve to the same route path. Drafts are ignored; the first document
in scan order is treated as the winner.
- Added opt-in JSON-LD structured-data validation via
`checks.routes.structuredData`. On 2xx routes it parses
`application/ld+json` blocks and flags malformed JSON (`SEO-JSONLD-INVALID`,
warning) or a route with no structured data (`SEO-JSONLD-MISSING`, info).
- Added opt-in canonical validation via `checks.routes.canonical`. On 2xx
routes it parses `<link rel="canonical">` and flags a missing canonical
(`SEO-CANONICAL-MISSING`, warning), a canonical on a different origin
Expand Down
8 changes: 8 additions & 0 deletions apps/site/app/docs/configuration/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,14 @@ export default defineConfig({
disagrees with the route is a warning. It is off by default because it
requires reading response bodies.
</div>
<div className="callout">
<strong>Structured data is opt-in</strong>
<code>{`checks: { routes: { structuredData: true } }`}</code> parses
each 2xx route&apos;s <code>application/ld+json</code> blocks: a
malformed block is a warning (<code>SEO-JSONLD-INVALID</code>) and a
route with no structured data is info (<code>SEO-JSONLD-MISSING</code>).
Off by default because it reads response bodies.
</div>
<h2 id="relationships">Relationships</h2>
<p>
Relationship checks compare one field on a source document with one
Expand Down
12 changes: 12 additions & 0 deletions apps/site/app/docs/diagnostics/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,18 @@ export default function DiagnosticsPage() {
Canonical path disagrees with the probed path beyond trailing slash
and case.
</DiagnosticCode>
<DiagnosticCode
code="SEO-JSONLD-INVALID"
severity="warning"
group="seo"
>
A route has a malformed <code>application/ld+json</code> block.
Opt-in via <code>checks.routes.structuredData</code>.
</DiagnosticCode>
<DiagnosticCode code="SEO-JSONLD-MISSING" severity="info" group="seo">
A route renders no JSON-LD structured data. Informational; enabled
via <code>checks.routes.structuredData</code>.
</DiagnosticCode>
<DiagnosticCode
code="SEO-OG-IMAGE-MISSING"
severity="warning"
Expand Down
1 change: 1 addition & 0 deletions packages/core/src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ const checksSchema = z
.strict()
.optional(),
canonical: z.boolean().optional(),
structuredData: z.boolean().optional(),
})
.strict(),
])
Expand Down
16 changes: 16 additions & 0 deletions packages/core/src/diagnostics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,22 @@ const explanations: DiagnosticExplanation[] = [
"The canonical path differs from the probed path beyond trailing slash and case. A canonical pointing at a stale slug after a rename quietly removes the real page from search.",
fix: "Point the canonical at the route's own path, or confirm the redirect target is intentional.",
},
{
code: "SEO-JSONLD-INVALID",
severity: "warning",
title: "JSON-LD structured data is malformed",
meaning:
"Structured-data validation is enabled (checks.routes.structuredData) and a route has an application/ld+json block that is not valid JSON. Search engines skip malformed structured data, so rich results break silently.",
fix: "Fix the JSON-LD output (often an unescaped value or template interpolation bug) so the block parses as valid JSON.",
},
{
code: "SEO-JSONLD-MISSING",
severity: "info",
title: "No JSON-LD structured data",
meaning:
"Structured-data validation is enabled and a route renders no application/ld+json block. This is informational: many pages do not need structured data.",
fix: "Add JSON-LD (for example Article, Product, or BreadcrumbList) if this page should produce rich results, or ignore this code for routes that do not need it.",
},
{
code: "SEO-OG-IMAGE-MISSING",
severity: "warning",
Expand Down
49 changes: 49 additions & 0 deletions packages/core/src/scan.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2184,3 +2184,52 @@ test("reports MISSING (not UNPUBLISHED) when there are no related records", asyn
expect(codes).toContain("CMS-RELATIONSHIP-MISSING");
expect(codes).not.toContain("CMS-RELATIONSHIP-UNPUBLISHED");
});

async function scanForStructuredData(body: string) {
return scanDocuments({
config: {
...baseConfig,
checks: {
routes: { structuredData: true },
seo: false,
a11y: false,
images: false,
fields: false,
},
},
project: {
framework: "next",
router: "app",
rootDir: "/site",
appDir: "/site/app",
},
documents: [
{ id: "doc-1", type: "page", uid: "home", status: "published", data: {} },
],
fetch: async () => new Response(body, { status: 200 }),
});
}

test("structured-data validation flags malformed JSON-LD", async () => {
const result = await scanForStructuredData(
'<script type="application/ld+json">{ "@type": "Article", }</script>',
);
const codes = result.diagnostics.map((d) => d.code);
expect(codes).toContain("SEO-JSONLD-INVALID");
expect(codes).not.toContain("SEO-JSONLD-MISSING");
});

test("structured-data validation passes valid JSON-LD", async () => {
const result = await scanForStructuredData(
'<script type="application/ld+json">{"@context":"https://schema.org","@type":"Article"}</script>',
);
expect(
result.diagnostics.filter((d) => d.code.startsWith("SEO-JSONLD")),
).toEqual([]);
});

test("structured-data validation reports routes with no JSON-LD as info", async () => {
const result = await scanForStructuredData("<html><head></head></html>");
const diag = result.diagnostics.find((d) => d.code === "SEO-JSONLD-MISSING");
expect(diag?.severity).toBe("info");
});
94 changes: 92 additions & 2 deletions packages/core/src/scan.ts
Original file line number Diff line number Diff line change
Expand Up @@ -351,12 +351,18 @@ async function checkRouteReachability(
}

// Body-level checks on 2xx responses. Only run (and only read the body)
// when explicitly enabled, since both require the response body:
// when explicitly enabled, since each requires the response body:
// - soft-404 detection via checks.routes.soft404
// - canonical validation via checks.routes.canonical
// - structured-data validation via checks.routes.structuredData
const soft404 = soft404Options(config);
const canonicalEnabled = canonicalCheckEnabled(config);
if ((soft404 || canonicalEnabled) && status >= 200 && status < 300) {
const structuredDataEnabled = structuredDataCheckEnabled(config);
if (
(soft404 || canonicalEnabled || structuredDataEnabled) &&
status >= 200 &&
status < 300
) {
try {
const body = await response.text();

Expand All @@ -377,6 +383,12 @@ async function checkRouteReachability(
...checkCanonical(config, candidate, diagnosticPath, url, body),
);
}

if (structuredDataEnabled) {
diagnostics.push(
...checkStructuredData(config, candidate, diagnosticPath, body),
);
}
} catch {
// Body read errors are not actionable here; leave as silent.
}
Expand Down Expand Up @@ -535,6 +547,84 @@ function normalizeCanonicalPath(pathname: string): string {
return (pathname.replace(/\/+$/, "") || "/").toLowerCase();
}

function structuredDataCheckEnabled(config: CmsLabConfig): boolean {
const routes = config.checks?.routes;
return (
typeof routes === "object" &&
routes !== null &&
routes.structuredData === true
);
}

/**
* Validate JSON-LD structured data on a 2xx route. Every
* `<script type="application/ld+json">` block must contain valid JSON; a
* malformed block (a common template-interpolation bug) breaks rich results
* silently. A route with no JSON-LD at all is reported as info.
*/
function checkStructuredData(
config: CmsLabConfig,
candidate: RouteCandidate,
diagnosticPath: string,
body: string,
): Diagnostic[] {
const blocks = extractJsonLdBlocks(body);
const source = sourceFor(config, candidate.document);

if (blocks.length === 0) {
return [
createDiagnostic({
severity: "info",
code: "SEO-JSONLD-MISSING",
message: `Route ${diagnosticPath} has no JSON-LD structured data`,
path: diagnosticPath,
source,
}),
];
}

const invalid = blocks.filter((block) => !isParseableJson(block));
if (invalid.length > 0) {
return [
createDiagnostic({
severity: "warning",
code: "SEO-JSONLD-INVALID",
message: `Route ${diagnosticPath} has ${invalid.length} malformed JSON-LD block(s)`,
path: diagnosticPath,
source,
}),
];
}

return [];
}

function extractJsonLdBlocks(body: string): string[] {
const pattern =
/<script\b[^>]*\btype\s*=\s*("application\/ld\+json"|'application\/ld\+json')[^>]*>([\s\S]*?)<\/script>/gi;
const blocks: string[] = [];
let match: RegExpExecArray | null;

while ((match = pattern.exec(body)) !== null) {
blocks.push((match[2] ?? "").trim());
}

return blocks;
}

function isParseableJson(value: string): boolean {
if (value.length === 0) {
return false;
}

try {
JSON.parse(value);
return true;
} catch {
return false;
}
}

export function resolveSiteHealthUrl(site: CmsLabConfig["site"]): URL {
if (site.healthUrl) {
return new URL(site.healthUrl);
Expand Down
6 changes: 6 additions & 0 deletions packages/core/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ export type RouteChecksOptions = {
* hostname), and a canonical whose path disagrees with the probed path.
*/
canonical?: boolean;
/**
* Validate JSON-LD structured data on each 2xx route. Opt-in, since it
* requires reading the response body. Flags malformed `application/ld+json`
* blocks and reports routes with no structured data.
*/
structuredData?: boolean;
};

export type RelationshipRule = {
Expand Down