From a255e6bf4da684e68af199f4b424dfc6b2b66429 Mon Sep 17 00:00:00 2001
From: Tirth Kanani <tirthkanani18@gmail.com>
Date: Fri, 5 Jun 2026 17:06:54 +0100
Subject: [PATCH] fix(parse-knowledge-base): extract CommonMark [](page.md)
 links in Karpathy code path (#361)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The deterministic parser only extracted links via `[[wikilink]]` syntax. A
Karpathy-pattern wiki (has index.md + multiple cross-linked .md files +
schema) that uses CommonMark `[label](page.md)` links — common on
GitHub/GitLab where `[[wikilinks]]` aren't rendered — was detected as
karpathy but produced zero deterministic edges, leaving the graph to be
inferred entirely from prose by the LLM phase.

Inside the existing Karpathy code path, also extract `[label](page.md)`
links and resolve them by normalised relative path. Both `parse_index` and
the per-article extraction loop now scan both link styles, so category
membership and inter-article edges are recovered for mixed and pure
CommonMark Karpathy wikis. Pure-wikilink wikis remain byte-for-byte
equivalent (no regression).

Resolution handles `pages/x.md`, `./pages/x.md`, and `/pages/x.md`
identically; query/fragment suffixes are stripped; image links, external
URLs, and fenced code blocks are filtered.

Distinct from #342 (still wikilink-only) and #312 (separate doctrine
format gated on `index.md` being absent).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../test_parse_knowledge_base.py              | 494 ++++++++++++++++++
 .../parse-knowledge-base.py                   | 291 ++++++++++-
 2 files changed, 769 insertions(+), 16 deletions(-)
 create mode 100644 tests/skill/understand-knowledge/test_parse_knowledge_base.py

diff --git a/tests/skill/understand-knowledge/test_parse_knowledge_base.py b/tests/skill/understand-knowledge/test_parse_knowledge_base.py
new file mode 100644
index 00000000..6cc443ce
--- /dev/null
+++ b/tests/skill/understand-knowledge/test_parse_knowledge_base.py
@@ -0,0 +1,494 @@
+#!/usr/bin/env python3
+"""
+test_parse_knowledge_base.py — Tests for the Karpathy-pattern wiki parser.
+
+Focus: regression coverage for issue #361 — Karpathy wikis using CommonMark
+`[label](page.md)` links yield 0 deterministic edges.
+
+The fix extracts CommonMark `[](page.md)` links inside the Karpathy code path
+alongside the existing `[[wikilink]]` handling. The tests below cover:
+
+  - pure CommonMark wikis (no `[[ ]]` anywhere) — must produce real edges.
+  - mixed `[[ ]]` + `[](page.md)` wikis — both styles must contribute edges.
+  - pure-wikilink wikis — regression: must remain byte-for-byte equivalent.
+  - md-link helpers — filter external URLs, anchors, image links, fenced
+    code blocks; resolve relative, `./relative`, and `/absolute` targets.
+
+Run from the repo root:
+    python3 -m unittest tests.skill.understand-knowledge.test_parse_knowledge_base -v
+
+Or directly:
+    python3 tests/skill/understand-knowledge/test_parse_knowledge_base.py
+"""
+
+from __future__ import annotations
+
+import importlib.util
+import shutil
+import sys
+import tempfile
+import unittest
+from pathlib import Path
+from typing import Any
+
+
+# ── Module loader ─────────────────────────────────────────────────────────
+# `parse-knowledge-base.py` has a hyphen in its name, so we cannot `import`
+# it directly. Load it via importlib so we can call its module-level helpers.
+
+_HERE = Path(__file__).resolve().parent
+_REPO_ROOT = _HERE.parent.parent.parent
+_MODULE_PATH = (
+    _REPO_ROOT
+    / "understand-anything-plugin"
+    / "skills"
+    / "understand-knowledge"
+    / "parse-knowledge-base.py"
+)
+
+
+def _load_module() -> Any:
+    spec = importlib.util.spec_from_file_location(
+        "parse_knowledge_base", _MODULE_PATH
+    )
+    if spec is None or spec.loader is None:
+        raise RuntimeError(f"Could not load module from {_MODULE_PATH}")
+    module = importlib.util.module_from_spec(spec)
+    sys.modules["parse_knowledge_base"] = module
+    spec.loader.exec_module(module)
+    return module
+
+
+pkb = _load_module()
+
+
+# ── Fixture builder ───────────────────────────────────────────────────────
+
+
+class _WikiFixture:
+    """Build a temp Karpathy-pattern wiki on disk for parse_wiki()."""
+
+    def __init__(self) -> None:
+        self.tmp = Path(tempfile.mkdtemp(prefix="ua-pkb-"))
+
+    def write(self, rel_path: str, content: str) -> Path:
+        p = self.tmp / rel_path
+        p.parent.mkdir(parents=True, exist_ok=True)
+        p.write_text(content, encoding="utf-8")
+        return p
+
+    def cleanup(self) -> None:
+        shutil.rmtree(self.tmp, ignore_errors=True)
+
+
+def _edge_pairs(manifest: dict, edge_type: str | None = None) -> set[tuple[str, str]]:
+    """Return {(source, target)} for edges in manifest, optionally filtered by type."""
+    return {
+        (e["source"], e["target"])
+        for e in manifest["edges"]
+        if edge_type is None or e["type"] == edge_type
+    }
+
+
+# ── is_internal_md_target ─────────────────────────────────────────────────
+
+
+class IsInternalMdTargetTests(unittest.TestCase):
+    """Filter logic for raw markdown-link targets."""
+
+    def test_accepts_relative_md_paths(self) -> None:
+        for href in ["page.md", "pages/alpha.md", "./pages/alpha.md", "/pages/alpha.md"]:
+            with self.subTest(href=href):
+                self.assertTrue(pkb.is_internal_md_target(href))
+
+    def test_rejects_external_urls(self) -> None:
+        for href in [
+            "https://example.com/page.md",
+            "http://example.com",
+            "mailto:foo@example.com",
+            "ftp://example.com/file.md",
+        ]:
+            with self.subTest(href=href):
+                self.assertFalse(pkb.is_internal_md_target(href))
+
+    def test_rejects_bare_anchors(self) -> None:
+        self.assertFalse(pkb.is_internal_md_target("#section"))
+        self.assertFalse(pkb.is_internal_md_target("#"))
+
+    def test_rejects_non_md_assets(self) -> None:
+        for href in ["image.png", "data.json", "script.js", "page", "pages/"]:
+            with self.subTest(href=href):
+                self.assertFalse(pkb.is_internal_md_target(href))
+
+    def test_accepts_md_with_anchor_or_query(self) -> None:
+        # Path-part ends in .md once query/fragment are stripped.
+        self.assertTrue(pkb.is_internal_md_target("page.md#section"))
+        self.assertTrue(pkb.is_internal_md_target("page.md?v=1"))
+
+    def test_rejects_empty(self) -> None:
+        self.assertFalse(pkb.is_internal_md_target(""))
+        self.assertFalse(pkb.is_internal_md_target("   "))
+
+
+# ── extract_md_links ──────────────────────────────────────────────────────
+
+
+class ExtractMdLinksTests(unittest.TestCase):
+    """`[label](page.md)` extraction with image / code-block / URL filters."""
+
+    def test_extracts_basic_md_link(self) -> None:
+        links = pkb.extract_md_links("See [Alpha](pages/alpha.md) for details.")
+        self.assertEqual(len(links), 1)
+        self.assertEqual(links[0]["target"], "pages/alpha.md")
+        self.assertEqual(links[0]["display"], "Alpha")
+
+    def test_skips_image_links(self) -> None:
+        # `![alt](src)` is an image embed, not a page link — never an edge.
+        text = "![diagram](pages/diagram.md)\n[Alpha](pages/alpha.md)"
+        links = pkb.extract_md_links(text)
+        self.assertEqual([l["target"] for l in links], ["pages/alpha.md"])
+
+    def test_skips_external_urls(self) -> None:
+        text = "[GitHub](https://github.com/foo/bar) and [Alpha](pages/alpha.md)"
+        links = pkb.extract_md_links(text)
+        self.assertEqual([l["target"] for l in links], ["pages/alpha.md"])
+
+    def test_skips_links_in_fenced_code_blocks(self) -> None:
+        text = (
+            "Live link: [Alpha](pages/alpha.md)\n"
+            "\n"
+            "```markdown\n"
+            "Example: [NotARealEdge](pages/example.md)\n"
+            "```\n"
+        )
+        links = pkb.extract_md_links(text)
+        self.assertEqual([l["target"] for l in links], ["pages/alpha.md"])
+
+    def test_skips_anchors_and_non_md(self) -> None:
+        text = "[anchor](#section) and [json](data.json) and [Alpha](alpha.md)"
+        links = pkb.extract_md_links(text)
+        self.assertEqual([l["target"] for l in links], ["alpha.md"])
+
+    def test_returns_empty_for_text_without_links(self) -> None:
+        self.assertEqual(pkb.extract_md_links("plain text, no links"), [])
+        self.assertEqual(pkb.extract_md_links(""), [])
+
+    def test_preserves_wikilinks_untouched_in_extract_wikilinks(self) -> None:
+        # Backward-compat sanity: extract_wikilinks is unchanged.
+        text = "See [[Alpha]] and [Alpha](pages/alpha.md)."
+        wls = pkb.extract_wikilinks(text)
+        self.assertEqual([w["target"] for w in wls], ["Alpha"])
+
+
+# ── _normalise_md_target ──────────────────────────────────────────────────
+
+
+class NormaliseMdTargetTests(unittest.TestCase):
+    """Path normalisation for md-link resolution."""
+
+    def test_bare_relative_resolves_against_base_dir(self) -> None:
+        # File at `pages/alpha.md` links to `beta.md` → resolves to
+        # `pages/beta.md` relative to wiki_root.
+        norm = pkb._normalise_md_target(
+            "beta.md", Path("pages"), Path("/wiki")
+        )
+        self.assertEqual(norm, "pages/beta.md")
+
+    def test_dot_slash_prefix_normalised(self) -> None:
+        norm = pkb._normalise_md_target(
+            "./beta.md", Path("pages"), Path("/wiki")
+        )
+        self.assertEqual(norm, "pages/beta.md")
+
+    def test_absolute_path_treated_as_wiki_root_relative(self) -> None:
+        norm = pkb._normalise_md_target(
+            "/pages/alpha.md", Path("anywhere"), Path("/wiki")
+        )
+        self.assertEqual(norm, "pages/alpha.md")
+
+    def test_parent_dir_traversal(self) -> None:
+        # `pages/sub/file.md` links to `../alpha.md` → `pages/alpha.md`.
+        norm = pkb._normalise_md_target(
+            "../alpha.md", Path("pages/sub"), Path("/wiki")
+        )
+        self.assertEqual(norm, "pages/alpha.md")
+
+    def test_escape_above_wiki_root_returns_none(self) -> None:
+        # `pages/alpha.md` links to `../../escape.md` (would escape wiki_root).
+        norm = pkb._normalise_md_target(
+            "../../escape.md", Path("pages"), Path("/wiki")
+        )
+        self.assertIsNone(norm)
+
+    def test_query_and_fragment_stripped(self) -> None:
+        norm = pkb._normalise_md_target(
+            "pages/alpha.md#section", Path("."), Path("/wiki")
+        )
+        self.assertEqual(norm, "pages/alpha.md")
+        norm2 = pkb._normalise_md_target(
+            "pages/alpha.md?v=1", Path("."), Path("/wiki")
+        )
+        self.assertEqual(norm2, "pages/alpha.md")
+
+    def test_normalised_lowercase(self) -> None:
+        # `path_map` uses lower-cased keys for case-insensitive resolution.
+        norm = pkb._normalise_md_target(
+            "Pages/Alpha.MD", Path("."), Path("/wiki")
+        )
+        self.assertEqual(norm, "pages/alpha.md")
+
+
+# ── parse_wiki end-to-end ─────────────────────────────────────────────────
+
+
+class ParseWikiCommonMarkOnlyTests(unittest.TestCase):
+    """Regression for issue #361: a Karpathy-detected wiki using only
+    CommonMark `[](page.md)` links must produce deterministic edges.
+
+    Pre-fix behaviour: 0 edges, 0 category memberships → silent degradation.
+    """
+
+    def setUp(self) -> None:
+        self.fix = _WikiFixture()
+        self.addCleanup(self.fix.cleanup)
+        # A minimal Karpathy-shaped wiki (has index.md, multiple .md files,
+        # ≥3 markdown files) but using only CommonMark links.
+        self.fix.write(
+            "index.md",
+            "# Wiki Index\n\n"
+            "## Topic\n\n"
+            "- [Alpha](pages/alpha.md)\n"
+            "- [Beta](pages/beta.md)\n",
+        )
+        self.fix.write(
+            "pages/alpha.md",
+            "# Alpha\n\nAlpha relates to [Beta](beta.md) and back to "
+            "[the index](../index.md).\n",
+        )
+        self.fix.write(
+            "pages/beta.md",
+            "# Beta\n\nBeta references [Alpha](alpha.md).\n",
+        )
+
+    def test_detected_as_karpathy(self) -> None:
+        det = pkb.detect_format(self.fix.tmp)
+        self.assertTrue(det["detected"])
+        self.assertEqual(det["format"], "karpathy")
+
+    def test_md_link_edges_resolved(self) -> None:
+        manifest = pkb.parse_wiki(self.fix.tmp)
+        related_pairs = _edge_pairs(manifest, "related")
+        # Alpha → Beta and Beta → Alpha (both via [](beta.md) and [](alpha.md))
+        self.assertIn(
+            ("article:pages/alpha", "article:pages/beta"), related_pairs,
+            f"Expected alpha→beta edge; got: {related_pairs}",
+        )
+        self.assertIn(
+            ("article:pages/beta", "article:pages/alpha"), related_pairs,
+        )
+
+    def test_categorized_under_edges_from_md_links_in_index(self) -> None:
+        manifest = pkb.parse_wiki(self.fix.tmp)
+        cat_pairs = _edge_pairs(manifest, "categorized_under")
+        # Both alpha and beta should be categorised under "Topic".
+        self.assertIn(("article:pages/alpha", "topic:topic"), cat_pairs)
+        self.assertIn(("article:pages/beta", "topic:topic"), cat_pairs)
+
+    def test_category_present_on_article_nodes(self) -> None:
+        manifest = pkb.parse_wiki(self.fix.tmp)
+        article_nodes = {
+            n["id"]: n for n in manifest["nodes"] if n["type"] == "article"
+        }
+        self.assertEqual(
+            article_nodes["article:pages/alpha"]["knowledgeMeta"]["category"],
+            "Topic",
+        )
+        self.assertEqual(
+            article_nodes["article:pages/beta"]["knowledgeMeta"]["category"],
+            "Topic",
+        )
+
+    def test_topic_count_includes_md_links(self) -> None:
+        manifest = pkb.parse_wiki(self.fix.tmp)
+        topic_node = next(n for n in manifest["nodes"] if n["type"] == "topic")
+        self.assertEqual(topic_node["name"], "Topic")
+        # Summary mentions "(2 articles)" — both md-link entries counted.
+        self.assertIn("(2 articles)", topic_node["summary"])
+
+    def test_stats_reports_md_links(self) -> None:
+        manifest = pkb.parse_wiki(self.fix.tmp)
+        stats = manifest["stats"]
+        # 3 md-links in body (alpha→beta + alpha→index + beta→alpha). The
+        # alpha→index link resolves to an infra page (not an article) and is
+        # counted as unresolved. We assert the floor (>= 2 successful) rather
+        # than the exact total to keep the test resilient to additions.
+        self.assertGreaterEqual(stats["mdLinks"], 2)
+        # The deterministic parser produced edges — the core regression check.
+        self.assertGreaterEqual(len(manifest["edges"]), 2)
+
+
+class ParseWikiMixedSyntaxTests(unittest.TestCase):
+    """Mixed Karpathy wiki: some pages use `[[ ]]`, others use `[](page.md)`.
+    Both styles must contribute edges; neither path may regress."""
+
+    def setUp(self) -> None:
+        self.fix = _WikiFixture()
+        self.addCleanup(self.fix.cleanup)
+        # Index uses both syntaxes side-by-side under a single category.
+        self.fix.write(
+            "index.md",
+            "# Wiki Index\n\n"
+            "## Topic\n\n"
+            "- [[alpha]]\n"
+            "- [Beta](pages/beta.md)\n",
+        )
+        self.fix.write(
+            "alpha.md",
+            "# Alpha\n\nAlpha links via wikilink to [[beta]] and via "
+            "md-link to [Gamma](pages/gamma.md).\n",
+        )
+        self.fix.write(
+            "pages/beta.md",
+            "# Beta\n\nBeta references [Alpha](../alpha.md).\n",
+        )
+        self.fix.write(
+            "pages/gamma.md",
+            "# Gamma\n\nGamma links back via [[alpha]].\n",
+        )
+
+    def test_wikilink_edges_preserved(self) -> None:
+        manifest = pkb.parse_wiki(self.fix.tmp)
+        related = _edge_pairs(manifest, "related")
+        # alpha → beta via [[beta]]; gamma → alpha via [[alpha]]
+        self.assertIn(("article:alpha", "article:pages/beta"), related)
+        self.assertIn(("article:pages/gamma", "article:alpha"), related)
+
+    def test_md_link_edges_added(self) -> None:
+        manifest = pkb.parse_wiki(self.fix.tmp)
+        related = _edge_pairs(manifest, "related")
+        # alpha → gamma via [Gamma](pages/gamma.md)
+        self.assertIn(("article:alpha", "article:pages/gamma"), related)
+        # beta → alpha via [Alpha](../alpha.md)
+        self.assertIn(("article:pages/beta", "article:alpha"), related)
+
+    def test_mixed_category_lookups(self) -> None:
+        manifest = pkb.parse_wiki(self.fix.tmp)
+        article_nodes = {
+            n["id"]: n for n in manifest["nodes"] if n["type"] == "article"
+        }
+        # alpha categorised via [[alpha]] wikilink in index.
+        self.assertEqual(
+            article_nodes["article:alpha"]["knowledgeMeta"]["category"],
+            "Topic",
+        )
+        # beta categorised via [Beta](pages/beta.md) md-link in index.
+        self.assertEqual(
+            article_nodes["article:pages/beta"]["knowledgeMeta"]["category"],
+            "Topic",
+        )
+
+    def test_categorized_under_mix(self) -> None:
+        manifest = pkb.parse_wiki(self.fix.tmp)
+        cat_pairs = _edge_pairs(manifest, "categorized_under")
+        self.assertIn(("article:alpha", "topic:topic"), cat_pairs)
+        self.assertIn(("article:pages/beta", "topic:topic"), cat_pairs)
+
+
+class ParseWikiPureWikilinkRegressionTests(unittest.TestCase):
+    """Existing pure-wikilink Karpathy wikis must produce the same edges as
+    before — no regression from the md-link extraction additions."""
+
+    def setUp(self) -> None:
+        self.fix = _WikiFixture()
+        self.addCleanup(self.fix.cleanup)
+        self.fix.write(
+            "index.md",
+            "# Wiki Index\n\n## Topic\n\n- [[alpha]]\n- [[beta]]\n",
+        )
+        self.fix.write(
+            "alpha.md",
+            "# Alpha\n\nAlpha relates to [[beta]].\n",
+        )
+        self.fix.write(
+            "beta.md",
+            "# Beta\n\nBeta relates to [[alpha]].\n",
+        )
+
+    def test_no_md_link_stats_when_pure_wikilink(self) -> None:
+        manifest = pkb.parse_wiki(self.fix.tmp)
+        # mdLinks key exists but is 0 — no regression in counter behaviour.
+        self.assertEqual(manifest["stats"]["mdLinks"], 0)
+        # alpha.md and beta.md each carry one wikilink in their bodies
+        # (`[[beta]]` and `[[alpha]]` respectively). Wikilinks inside
+        # index.md are tallied by `parse_index`, not by `stats["wikilinks"]`,
+        # which only counts links inside article bodies.
+        self.assertEqual(manifest["stats"]["wikilinks"], 2)
+
+    def test_wikilink_edges_match_expected(self) -> None:
+        manifest = pkb.parse_wiki(self.fix.tmp)
+        related = _edge_pairs(manifest, "related")
+        self.assertEqual(
+            related,
+            {("article:alpha", "article:beta"), ("article:beta", "article:alpha")},
+        )
+
+    def test_categorized_under_unchanged(self) -> None:
+        manifest = pkb.parse_wiki(self.fix.tmp)
+        cat_pairs = _edge_pairs(manifest, "categorized_under")
+        self.assertEqual(
+            cat_pairs,
+            {("article:alpha", "topic:topic"), ("article:beta", "topic:topic")},
+        )
+
+    def test_no_mdlinks_key_in_knowledge_meta(self) -> None:
+        # Articles without md-links shouldn't carry an empty `mdLinks` key —
+        # keeps the manifest output identical to pre-fix for pure wikilink
+        # wikis.
+        manifest = pkb.parse_wiki(self.fix.tmp)
+        for node in manifest["nodes"]:
+            if node["type"] == "article":
+                self.assertNotIn(
+                    "mdLinks", node.get("knowledgeMeta", {}),
+                    f"node {node['id']} unexpectedly has mdLinks key",
+                )
+
+
+# ── resolve_md_link ───────────────────────────────────────────────────────
+
+
+class ResolveMdLinkTests(unittest.TestCase):
+    """`resolve_md_link` direct-call tests against a synthetic path_map."""
+
+    def test_resolves_relative(self) -> None:
+        path_map = {"pages/alpha.md": "pages/alpha"}
+        article_ids = {"article:pages/alpha"}
+        resolved = pkb.resolve_md_link(
+            "alpha.md", Path("pages"), Path("/wiki"), path_map, article_ids,
+        )
+        self.assertEqual(resolved, "article:pages/alpha")
+
+    def test_resolves_absolute(self) -> None:
+        path_map = {"pages/alpha.md": "pages/alpha"}
+        article_ids = {"article:pages/alpha"}
+        resolved = pkb.resolve_md_link(
+            "/pages/alpha.md", Path("other"), Path("/wiki"), path_map, article_ids,
+        )
+        self.assertEqual(resolved, "article:pages/alpha")
+
+    def test_returns_none_for_unresolved(self) -> None:
+        resolved = pkb.resolve_md_link(
+            "missing.md", Path("."), Path("/wiki"), {}, set(),
+        )
+        self.assertIsNone(resolved)
+
+    def test_returns_none_when_not_in_node_set(self) -> None:
+        path_map = {"pages/alpha.md": "pages/alpha"}
+        # node_ids deliberately empty — article is in path_map but not nodes.
+        resolved = pkb.resolve_md_link(
+            "alpha.md", Path("pages"), Path("/wiki"), path_map, set(),
+        )
+        self.assertIsNone(resolved)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/understand-anything-plugin/skills/understand-knowledge/parse-knowledge-base.py b/understand-anything-plugin/skills/understand-knowledge/parse-knowledge-base.py
index d6070512..df68dd1d 100644
--- a/understand-anything-plugin/skills/understand-knowledge/parse-knowledge-base.py
+++ b/understand-anything-plugin/skills/understand-knowledge/parse-knowledge-base.py
@@ -23,10 +23,21 @@
 # Regex patterns
 # ---------------------------------------------------------------------------
 WIKILINK_RE = re.compile(r"\[\[([^\]|]+)(?:\|([^\]]+))?\]\]")
+# CommonMark inline link: [label](target).
+#   - `(?<!\!)` skips image links `![alt](src)`.
+#   - The label `[label]` may not contain `]`.
+#   - The target `(...)` may not contain whitespace or `)` — covers the
+#     overwhelming majority of links found in wiki markdown. Title text
+#     (e.g. `[a](b "t")`) is not extracted here; we only use the target.
+MD_LINK_RE = re.compile(r"(?<!\!)\[([^\]]+)\]\(([^)\s]+)\)")
 FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.DOTALL)
 CODE_BLOCK_RE = re.compile(r"```(\w*)")
 HEADING_RE = re.compile(r"^(#{1,6})\s+(.+)$", re.MULTILINE)
 INDEX_SECTION_RE = re.compile(r"^##\s+(.+)$", re.MULTILINE)
+# Schemes / fragments that mark a markdown-link target as non-page:
+# external URLs (http, mailto…), anchors (#section), and explicit non-md
+# resource refs are filtered by the `is_internal_md_target` helper.
+_URL_SCHEME_RE = re.compile(r"^[a-zA-Z][a-zA-Z0-9+\-.]*:")
 
 # Files that are part of wiki infrastructure, not content articles
 INFRA_FILES = {"index.md", "log.md", "claude.md", "agents.md", "soul.md"}
@@ -97,6 +108,61 @@ def extract_wikilinks(text: str) -> list[dict]:
     return links
 
 
+def is_internal_md_target(target: str) -> bool:
+    """Return True if a markdown-link target points at an internal .md page.
+
+    Filters out external URLs (http://, mailto:, etc.), bare anchors
+    (`#section`), and explicit non-markdown asset paths. Targets without a
+    `.md` extension are rejected — this parser only links between pages.
+    """
+    if not target:
+        return False
+    t = target.strip()
+    if not t:
+        return False
+    # Pure anchor inside the current document — not a page link.
+    if t.startswith("#"):
+        return False
+    # External / scheme-prefixed URLs (http://, https://, mailto:, ftp:, …).
+    if _URL_SCHEME_RE.match(t):
+        return False
+    # Strip query / fragment for extension check.
+    path_part = t.split("#", 1)[0].split("?", 1)[0]
+    if not path_part:
+        return False
+    # Only resolve targets that point at a markdown file.
+    return path_part.lower().endswith(".md")
+
+
+def extract_md_links(text: str) -> list[dict]:
+    """Extract CommonMark `[label](page.md)` links pointing at internal .md
+    pages.
+
+    Skips image links (`![]()`), external URLs, anchors, and non-markdown
+    assets. Returned targets are raw (path-relative as written) — call
+    `resolve_md_link` to map them to article IDs.
+
+    Fenced code blocks are stripped before scanning so that a syntax-coloured
+    example link inside ```` ```md ```` does not get treated as a real edge.
+    """
+    if not text:
+        return []
+    # Strip fenced code blocks before scanning. We can't reliably tell which
+    # links inside a code fence are intentional, so we exclude them all —
+    # mirrors how renderers display them as inert text.
+    stripped = re.sub(r"```[\s\S]*?```", "", text)
+    links = []
+    for m in MD_LINK_RE.finditer(stripped):
+        target = m.group(2).strip()
+        if not is_internal_md_target(target):
+            continue
+        links.append({
+            "target": target,
+            "display": m.group(1).strip() or None,
+        })
+    return links
+
+
 def extract_headings(text: str) -> list[dict]:
     """Extract all markdown headings with level and text."""
     return [
@@ -168,7 +234,20 @@ def extract_h1(text: str) -> str:
 # ---------------------------------------------------------------------------
 
 def parse_index(index_path: Path) -> list[dict]:
-    """Parse index.md to extract categories from ## headings and their wikilinks."""
+    """Parse index.md to extract categories from ## headings and their links.
+
+    Recognises both `[[wikilink]]` and CommonMark `[label](page.md)` styles
+    under each `## Section` heading. Returns categories with two parallel
+    target lists:
+
+      - `articles`  — raw wikilink targets (stems or filenames), kept as
+        strings for backward compatibility with existing call sites.
+      - `md_links`  — raw CommonMark link targets (relative paths) that need
+        path-based resolution.
+
+    The two lists are populated independently so a wiki that uses only one
+    syntax (or both) keeps working.
+    """
     if not index_path.is_file():
         return []
     text = index_path.read_text(encoding="utf-8", errors="replace")
@@ -182,6 +261,7 @@ def parse_index(index_path: Path) -> list[dict]:
             current_category = {
                 "name": sec_match.group(1).strip(),
                 "articles": [],
+                "md_links": [],
             }
             categories.append(current_category)
             continue
@@ -190,6 +270,15 @@ def parse_index(index_path: Path) -> list[dict]:
         if current_category:
             for wl in WIKILINK_RE.finditer(line):
                 current_category["articles"].append(wl.group(1).strip())
+            # Also collect CommonMark `[label](page.md)` links so a Karpathy
+            # wiki rendered on GitHub/GitLab (which doesn't render `[[ ]]`)
+            # still produces deterministic category membership. Each link is
+            # filtered through `is_internal_md_target` so external URLs and
+            # image links are ignored.
+            for ml in MD_LINK_RE.finditer(line):
+                target = ml.group(2).strip()
+                if is_internal_md_target(target):
+                    current_category["md_links"].append(target)
 
     return categories
 
@@ -275,6 +364,94 @@ def resolve_wikilink(target: str, name_map: dict[str, str], node_ids: set[str] |
     return None
 
 
+def build_path_to_stem_map(wiki_root: Path) -> dict[str, str]:
+    """Build a case-insensitive map from `posix-style-relative-path.md` to
+    article stem (relative to wiki_root, no extension).
+
+    Used by `resolve_md_link` so CommonMark `[label](page.md)` targets resolve
+    by relative path even when the basename collides with another file (where
+    `name_map` deliberately drops the ambiguous bare-basename entry).
+    """
+    path_map: dict[str, str] = {}
+    for md_file in wiki_root.rglob("*.md"):
+        rel = md_file.relative_to(wiki_root)
+        stem = rel.with_suffix("").as_posix()
+        path_map[rel.as_posix().lower()] = stem
+    return path_map
+
+
+def _normalise_md_target(target: str, base_dir: Path, wiki_root: Path) -> str | None:
+    """Normalise a CommonMark link `target` to a posix path relative to
+    `wiki_root`.
+
+    `target` is the raw href as written in the markdown source. `base_dir` is
+    the directory of the file containing the link (relative to `wiki_root` —
+    use `Path('.')` for files at the wiki root). Behaviour:
+
+    - strips a trailing `#anchor` and `?query`;
+    - resolves `./`, `../`, and bare relative paths against `base_dir`;
+    - treats absolute paths (`/pages/x.md`) as relative to `wiki_root`;
+    - rejects paths that escape `wiki_root` (returns None).
+
+    Returns the lower-cased posix relative path (e.g. `"pages/alpha.md"`) or
+    None if the target is unresolvable.
+    """
+    if not target:
+        return None
+    # Strip query/fragment.
+    href = target.split("#", 1)[0].split("?", 1)[0].strip()
+    if not href:
+        return None
+    # Absolute paths in the wiki are treated as relative to the wiki root —
+    # mirrors how GitHub renders `/pages/x.md` in repo-rooted markdown.
+    if href.startswith("/"):
+        candidate = Path(href.lstrip("/"))
+    else:
+        candidate = base_dir / href
+    # Manual normalisation of `.` and `..` segments without touching the
+    # filesystem (Path.resolve would follow symlinks and require existence).
+    parts: list[str] = []
+    for part in candidate.as_posix().split("/"):
+        if part in ("", "."):
+            continue
+        if part == "..":
+            if not parts:
+                # Escapes wiki_root — unresolvable.
+                return None
+            parts.pop()
+        else:
+            parts.append(part)
+    if not parts:
+        return None
+    return "/".join(parts).lower()
+
+
+def resolve_md_link(
+    target: str,
+    base_dir: Path,
+    wiki_root: Path,
+    path_map: dict[str, str],
+    node_ids: set[str] | None = None,
+) -> str | None:
+    """Resolve a CommonMark `[label](path.md)` target to an article node ID.
+
+    Resolution is by normalised relative path (`pages/alpha.md`,
+    `./pages/alpha.md`, and `/pages/alpha.md` all map to the same key).
+    Returns None when the target cannot be matched against `path_map` or when
+    `node_ids` is provided and the resolved candidate is not in it.
+    """
+    norm = _normalise_md_target(target, base_dir, wiki_root)
+    if not norm:
+        return None
+    stem = path_map.get(norm)
+    if not stem:
+        return None
+    candidate = f"article:{stem}"
+    if node_ids is not None and candidate not in node_ids:
+        return None
+    return candidate
+
+
 def parse_wiki(root: Path) -> dict:
     """Parse a Karpathy-pattern wiki and produce the scan manifest."""
     detection = detect_format(root)
@@ -286,8 +463,10 @@ def parse_wiki(root: Path) -> dict:
     wiki_root = Path(detection["wiki_root"])
     raw_root = root / "raw"
 
-    # Build name resolution map
+    # Build name resolution map (wikilinks: by stem/basename)
     name_map = build_name_to_stem_map(wiki_root)
+    # Build path resolution map (md-links: by full relative path)
+    path_map = build_path_to_stem_map(wiki_root)
 
     # Find index.md and log.md
     index_path = wiki_root / "index.md"
@@ -301,11 +480,33 @@ def parse_wiki(root: Path) -> dict:
     categories = parse_index(index_path)
     log_entries = parse_log(log_path)
 
-    # Build category lookup: wikilink target → category name
+    # Resolve the index file's directory relative to wiki_root. This is the
+    # base against which md-link targets inside index.md are resolved. When
+    # the index lives outside wiki_root (e.g. repo-root index.md while
+    # wiki_root is root/wiki), `_normalise_md_target` will reject targets
+    # that escape via `..` — those won't have matching article IDs anyway.
+    try:
+        index_base = index_path.parent.relative_to(wiki_root)
+    except ValueError:
+        index_base = Path(".")
+
+    # Build category lookups:
+    #  - by wikilink target (lower-cased stem/basename) — existing behaviour
+    #  - by md-link relative-stem (resolved against the index file's directory)
+    # The md_category_lookup is keyed by the resolved `article:<stem>` ID so
+    # the per-article lookup below is a single dict access.
     category_lookup: dict[str, str] = {}
+    md_category_lookup: dict[str, str] = {}
     for cat in categories:
         for article_target in cat["articles"]:
             category_lookup[article_target.lower()] = cat["name"]
+        for md_target in cat.get("md_links", []):
+            norm = _normalise_md_target(md_target, index_base, wiki_root)
+            if not norm:
+                continue
+            stem = path_map.get(norm)
+            if stem:
+                md_category_lookup[f"article:{stem}"] = cat["name"]
 
     # --- Pre-compute article IDs (for edge resolution validation) ---
     # Only skip infra files at the wiki root level, not in subdirectories
@@ -323,7 +524,14 @@ def parse_wiki(root: Path) -> dict:
     nodes = []
     edges = []
     warnings = []
-    stats = {"articles": 0, "sources": 0, "topics": 0, "wikilinks": 0, "unresolved": 0}
+    stats = {
+        "articles": 0,
+        "sources": 0,
+        "topics": 0,
+        "wikilinks": 0,
+        "mdLinks": 0,
+        "unresolved": 0,
+    }
 
     for md_file in sorted(wiki_root.rglob("*.md")):
         rel = md_file.relative_to(wiki_root)
@@ -338,17 +546,22 @@ def parse_wiki(root: Path) -> dict:
         h1 = extract_h1(text)
         frontmatter = extract_frontmatter(text)
         wikilinks = extract_wikilinks(text)
+        md_links = extract_md_links(text)
         headings = extract_headings(text)
         code_langs = extract_code_blocks(text)
         summary = extract_first_paragraph(text)
         line_count = text.count("\n") + 1
         word_count = len(text.split())
 
-        # Derive category from index.md lookup
+        node_id = f"article:{stem}"
+
+        # Derive category from index.md lookup.
+        # Order: wikilink basename → wikilink stem → md-link by article ID.
         category = category_lookup.get(basename.lower(), "")
         if not category:
-            # Try stem match
             category = category_lookup.get(stem.lower(), "")
+        if not category:
+            category = md_category_lookup.get(node_id, "")
 
         # Derive tags (deduplicated)
         tag_set: set[str] = set()
@@ -361,16 +574,15 @@ def parse_wiki(root: Path) -> dict:
             tag_set.update(t.strip() for t in fm_tags.split(",") if t.strip())
         tags = sorted(tag_set)
 
-        # Complexity from wikilink density
-        wl_count = len(wikilinks)
-        if wl_count > 15:
+        # Complexity from total link density (wikilinks + md-links).
+        link_count = len(wikilinks) + len(md_links)
+        if link_count > 15:
             complexity = "complex"
-        elif wl_count > 5:
+        elif link_count > 5:
             complexity = "moderate"
         else:
             complexity = "simple"
 
-        node_id = f"article:{stem}"
         nodes.append({
             "id": node_id,
             "type": "article",
@@ -381,12 +593,14 @@ def parse_wiki(root: Path) -> dict:
             "complexity": complexity,
             "knowledgeMeta": {
                 "wikilinks": [wl["target"] for wl in wikilinks],
+                **({"mdLinks": [ml["target"] for ml in md_links]} if md_links else {}),
                 **({"category": category} if category else {}),
                 "content": text[:3000],  # First 3000 chars for LLM analysis
             },
         })
         stats["articles"] += 1
-        stats["wikilinks"] += wl_count
+        stats["wikilinks"] += len(wikilinks)
+        stats["mdLinks"] += len(md_links)
 
         # Build edges from wikilinks (resolve against known article IDs)
         for wl in wikilinks:
@@ -403,20 +617,43 @@ def parse_wiki(root: Path) -> dict:
                 warnings.append(f"Unresolved wikilink: [[{wl['target']}]] in {rel}")
                 stats["unresolved"] += 1
 
+        # Build edges from CommonMark md-links (resolved relative to this
+        # file's directory). Same edge shape as wikilinks so downstream
+        # consumers stay unchanged.
+        for ml in md_links:
+            target_id = resolve_md_link(
+                ml["target"], rel.parent, wiki_root, path_map, article_ids
+            )
+            if target_id and target_id != node_id:
+                edges.append({
+                    "source": node_id,
+                    "target": target_id,
+                    "type": "related",
+                    "direction": "forward",
+                    "weight": 0.7,
+                })
+            elif not target_id:
+                warnings.append(f"Unresolved md-link: [{ml['display']}]({ml['target']}) in {rel}")
+                stats["unresolved"] += 1
+
     # --- Build topic nodes from index.md categories ---
     for cat in categories:
         topic_id = f"topic:{cat['name'].lower().replace(' ', '-')}"
+        md_link_count = len(cat.get("md_links", []))
+        article_count = len(cat["articles"]) + md_link_count
         nodes.append({
             "id": topic_id,
             "type": "topic",
             "name": cat["name"],
-            "summary": f"Category from index: {cat['name']} ({len(cat['articles'])} articles)",
+            "summary": f"Category from index: {cat['name']} ({article_count} articles)",
             "tags": ["category"],
             "complexity": "simple",
         })
         stats["topics"] += 1
 
-        # categorized_under edges (only resolve to known article nodes)
+        # categorized_under edges (only resolve to known article nodes).
+        # Wikilink targets resolve via name_map; CommonMark md-link targets
+        # resolve by relative path via path_map.
         for article_target in cat["articles"]:
             article_id = resolve_wikilink(article_target, name_map, article_ids)
             if article_id:
@@ -427,6 +664,18 @@ def parse_wiki(root: Path) -> dict:
                     "direction": "forward",
                     "weight": 0.6,
                 })
+        for md_target in cat.get("md_links", []):
+            article_id = resolve_md_link(
+                md_target, index_base, wiki_root, path_map, article_ids
+            )
+            if article_id:
+                edges.append({
+                    "source": article_id,
+                    "target": topic_id,
+                    "type": "categorized_under",
+                    "direction": "forward",
+                    "weight": 0.6,
+                })
 
     # --- Build source nodes from raw/ ---
     if raw_root.is_dir():
@@ -471,7 +720,13 @@ def parse_wiki(root: Path) -> dict:
     return {
         "format": "karpathy",
         "stats": stats,
-        "categories": [{"name": c["name"], "count": len(c["articles"])} for c in categories],
+        "categories": [
+            {
+                "name": c["name"],
+                "count": len(c["articles"]) + len(c.get("md_links", [])),
+            }
+            for c in categories
+        ],
         "logEntries": len(log_entries),
         "nodes": nodes,
         "edges": deduped_edges,
@@ -499,8 +754,12 @@ def main():
 
     # Report to stderr
     s = manifest["stats"]
+    md_links = s.get("mdLinks", 0)
+    link_summary = f"{s['wikilinks']} wikilinks"
+    if md_links:
+        link_summary += f", {md_links} md-links"
     print(f"[parse] Karpathy wiki: {s['articles']} articles, {s['sources']} sources, "
-          f"{s['topics']} topics, {s['wikilinks']} wikilinks "
+          f"{s['topics']} topics, {link_summary} "
           f"({s['unresolved']} unresolved)", file=sys.stderr)
     print(f"[parse] Output: {out_path}", file=sys.stderr)