Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions skills/frontmatter-guard/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,53 @@ JSON envelope (when `--json` is passed):

`gbrain frontmatter validate <path> --json` returns a similar envelope keyed on per-file results instead of per-source.

## Prevention — Writing Valid Frontmatter

**This is the most important section.** Fixing broken frontmatter is good. Not breaking it is better.

### YAML arrays (the #1 error source)

```yaml
# ✅ CORRECT — single-quoted YAML flow
tags: ['yc', 'w2025', 'ai']

# ✅ CORRECT — unquoted (if values have no special chars)
tags: [yc, w2025, ai]

# ✅ CORRECT — block style
tags:
- yc
- w2025

# ❌ WRONG — JSON-style double quotes (causes NESTED_QUOTES)
tags: ["yc", "w2025"]

# ❌ WRONG — mixed JSON objects and strings
tags: [{"name": "sports"}, "posterous"]
```

**Why this happens:** `JSON.stringify()` wraps strings in double quotes. When code does `tags: [${items.map(t => JSON.stringify(t)).join(', ')}]`, it produces the broken pattern. Use single quotes instead: `tags: [${items.map(t => "'" + t + "'").join(', ')}]` (with apostrophe fallback to double quotes).

### Quoted scalars

```yaml
# ✅ CORRECT — single quotes for values with special chars
title: 'My "Quoted" Title'

# ✅ CORRECT — double quotes when value has apostrophes
title: "Men's Fashion Guide"

# ❌ WRONG — double quotes wrapping inner double quotes
title: "My "Quoted" Title"
```

### When to quote at all

- **Unquoted** is fine for simple values: `type: person`, `batch: w2025`
- **Quote** when the value contains `: " ' # [ ] { } | > & * ! ? ,` or starts with `@`
- **Single quotes** are the default safe choice
- **Double quotes** only when the value itself contains apostrophes

## Anti-Patterns

**Don't auto-fix `MISSING_OPEN` or `EMPTY_FRONTMATTER` without user input.** These usually mean a human author started a page and didn't finish — silently inserting `---` markers around an unfinished draft is wrong.
Expand Down
67 changes: 61 additions & 6 deletions src/core/brain-writer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,67 @@ export function autoFixFrontmatter(
}
}

// 3. NESTED_QUOTES — rewrite `key: "...inner..."` lines that have 3+ unescaped
// 3a. JSON_ARRAY_IN_YAML — rewrite `key: ["x", "y"]` to `key: ['x', 'y']`.
// This is the #1 source of NESTED_QUOTES errors. LLMs and ingestion
// scripts serialize YAML arrays with JSON.stringify, producing double-
// quoted items that break YAML parsing. Fix: single-quote each item,
// falling back to double quotes only when the value itself contains
// an apostrophe.
{
const lines = working.split('\n');
let firstNonEmpty = -1;
for (let i = 0; i < lines.length; i++) {
if (lines[i].trim().length > 0) { firstNonEmpty = i; break; }
}
if (firstNonEmpty >= 0 && lines[firstNonEmpty].trim() === '---') {
let closeIdx = lines.length;
for (let i = firstNonEmpty + 1; i < lines.length; i++) {
if (lines[i].trim() === '---') { closeIdx = i; break; }
}
let fixedAny = false;
for (let i = firstNonEmpty + 1; i < closeIdx; i++) {
// Detect JSON-style arrays: key: ["val1", "val2"]
const arrMatch = lines[i].match(/^(\s*[A-Za-z_][\w-]*\s*:\s*)\[(.*)\]\s*$/);
if (arrMatch && arrMatch[2].includes('"')) {
const [, prefix, inner] = arrMatch;
// Parse the items: split on ", " boundaries respecting quotes
const items: string[] = [];
let current = '';
let inQuote = false;
for (let j = 0; j < inner.length; j++) {
const ch = inner[j];
if (ch === '"' && (j === 0 || inner[j - 1] !== '\\')) {
inQuote = !inQuote;
} else if (ch === ',' && !inQuote) {
items.push(current.trim());
current = '';
} else {
current += ch;
}
}
if (current.trim()) items.push(current.trim());

// Re-quote each item with single quotes (double if it contains apostrophe)
const reQuoted = items.map(v => {
const clean = v.replace(/^"|"$/g, '').trim();
if (!clean) return "''";
return clean.includes("'") ? `"${clean}"` : `'${clean}'`;
});
lines[i] = `${prefix}[${reQuoted.join(', ')}]`;
fixedAny = true;
}
}
if (fixedAny) {
working = lines.join('\n');
fixes.push({
code: 'NESTED_QUOTES',
description: 'Rewrote JSON-style double-quoted arrays to single-quoted YAML',
});
}
}
}

// 3b. NESTED_QUOTES — rewrite `key: "...inner..."` lines that have 3+ unescaped
// double-quotes by switching the outer wrapper to single quotes and
// leaving inner quotes alone.
{
Expand All @@ -175,12 +235,7 @@ export function autoFixFrontmatter(
for (let j = 0; j < inner.length; j++) {
if (inner[j] === '"' && (j === 0 || inner[j - 1] !== '\\')) count++;
}
// Total " on the line includes the two outer quotes the regex
// captured, plus whatever's in inner. We need 3+ to trigger.
if (count >= 1) {
// Inner already has unescaped " — outer wrap is causing the YAML
// parse failure. Rewrite to 'single-quoted'. YAML escapes `'` inside
// a single-quoted string by doubling it.
const escapedInner = inner.replace(/'/g, "''");
lines[i] = `${prefix}'${escapedInner}'${trailing ? ' ' + trailing : ''}`.replace(/\s+$/, '');
fixedAny = true;
Expand Down
20 changes: 17 additions & 3 deletions src/core/markdown.ts
Original file line number Diff line number Diff line change
Expand Up @@ -216,14 +216,28 @@ function collectValidationErrors(
});
}

// 5. NESTED_QUOTES — common breakage pattern: `title: "Name "Nick" Last"`.
// Detect any frontmatter `key: ...` line whose value contains 3 or more
// unescaped double-quote characters. A clean quoted value has 2.
// 5. NESTED_QUOTES — two sub-patterns:
// 5a. JSON-style arrays: `tags: ["yc", "w2025"]` — the #1 source.
// LLMs and ingestion scripts use JSON.stringify for array items.
// 5b. Nested scalar quotes: `title: "Name "Nick" Last"` — 3+ unescaped
// double-quote characters in a scalar value.
for (let i = firstNonEmpty + 1; i < closeLine; i++) {
const line = lines[i];
const m = line.match(/^\s*[A-Za-z_][\w-]*\s*:\s*(.*)$/);
if (!m) continue;
const value = m[1];

// 5a. JSON-style array: ["...", "..."]
if (/^\[.*".*".*\]$/.test(value.trim())) {
errors.push({
code: 'NESTED_QUOTES',
message: 'JSON-style double-quoted array in YAML (use single quotes: [\'val1\', \'val2\'])',
line: i + 1,
});
continue;
}

// 5b. Nested scalar quotes: 3+ unescaped double-quote chars.
let count = 0;
for (let j = 0; j < value.length; j++) {
if (value[j] === '"' && (j === 0 || value[j - 1] !== '\\')) count++;
Expand Down
14 changes: 13 additions & 1 deletion src/core/operations.ts
Original file line number Diff line number Diff line change
Expand Up @@ -585,7 +585,19 @@ const put_page: Operation = {
// default-source clobber path. importFromContent already accepts
// opts.sourceId (PR #707/#757 engine work); previously the op handler
// just didn't pass it.
const result = await importFromContent(ctx.engine, slug, p.content as string, {
// Pre-write normalization: auto-fix mechanical frontmatter issues
// (JSON-style arrays, nested quotes) before import. Non-blocking —
// if autoFixFrontmatter throws, fall through with original content.
let normalizedContent = p.content as string;
try {
const { autoFixFrontmatter } = await import('./brain-writer.ts');
const { content: fixed } = autoFixFrontmatter(normalizedContent);
normalizedContent = fixed;
} catch {
// Non-fatal; proceed with original content.
}

const result = await importFromContent(ctx.engine, slug, normalizedContent, {
noEmbed,
...(ctx.sourceId ? { sourceId: ctx.sourceId } : {}),
});
Expand Down
Loading