Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions openspec/changes/rhess-enterprise-skills-server/tasks.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@

## 4. Skill Ingestion Engine

- [ ] 4.1 Implement `clone(url: string, dest: string): Promise<void>` using `simple-git` with `--depth 1`
- [ ] 4.2 Implement `discoverSkills(repoPath: string): SkillCandidate[]` walking all Agent Skills spec discovery paths
- [ ] 4.3 Implement YAML frontmatter parser: validates `name` and `description` are present; returns structured metadata
- [ ] 4.4 Implement archive bundler: tar.gz multi-file skills, compute SHA256 digest; single-file skills served as-is with digest
- [x] 4.1 Implement `clone(url: string, dest: string): Promise<void>` using `simple-git` with `--depth 1`
- [x] 4.2 Implement `discoverSkills(repoPath: string): SkillCandidate[]` walking all Agent Skills spec discovery paths
- [x] 4.3 Implement YAML frontmatter parser: validates `name` and `description` are present; returns structured metadata
- [x] 4.4 Implement archive bundler: tar.gz multi-file skills, compute SHA256 digest; single-file skills served as-is with digest
- [ ] 4.5 Implement `ingestSource(sourceId, url): SyncReport` — clone → discover → parse → classify → stage
- [ ] 4.6 Implement atomic swap: single SQLite transaction deletes old source skills and inserts new ones
- [ ] 4.7 Implement bundled example skills loader: seeds catalog on first boot if no sources registered
Expand Down
67 changes: 67 additions & 0 deletions src/server/ingestion/bundle.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import crypto from "node:crypto";
import fs from "node:fs";
import path from "node:path";
import { create } from "tar";
import type { SkillCandidate } from "./discover.js";

export interface BundleResult {
artifactType: "skill-md" | "archive";
/** SHA-256 hex digest of the artifact (content for skill-md, tar.gz bytes for archive) */
digest: string;
/** For skill-md: raw SKILL.md content. For archive: base64-encoded tar.gz */
artifact: string;
}

export async function bundleSkill(candidate: SkillCandidate): Promise<BundleResult> {
if (candidate.supportingFiles.length === 0) {
return bundleSkillMd(candidate);
}
return bundleArchive(candidate);
}

async function bundleSkillMd(candidate: SkillCandidate): Promise<BundleResult> {
const rawContent = fs.readFileSync(candidate.skillMdPath, "utf-8");
const digest = crypto.createHash("sha256").update(rawContent, "utf-8").digest("hex");
return {
artifactType: "skill-md",
digest,
artifact: rawContent,
};
}

async function bundleArchive(candidate: SkillCandidate): Promise<BundleResult> {
const chunks: Buffer[] = [];

await new Promise<void>((resolve, reject) => {
const pack = create(
{
gzip: true,
cwd: candidate.skillDir,
portable: true,
},
getAllRelativeFiles(candidate),
);

pack.on("data", (chunk: Buffer) => chunks.push(chunk));
pack.on("end", resolve);
pack.on("error", reject);
});

const buffer = Buffer.concat(chunks);
const digest = crypto.createHash("sha256").update(buffer).digest("hex");

return {
artifactType: "archive",
digest,
artifact: buffer.toString("base64"),
};
}

function getAllRelativeFiles(candidate: SkillCandidate): string[] {
// SKILL.md first, then supporting files sorted lexicographically.
// Sorting is done here (not in discover.ts) so digest determinism is
// enforced at the archiving boundary regardless of how the candidate was built.
const skillMdRel = path.relative(candidate.skillDir, candidate.skillMdPath);
const sorted = [...candidate.supportingFiles].sort();
return [skillMdRel, ...sorted];
}
20 changes: 20 additions & 0 deletions src/server/ingestion/clone.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import { simpleGit } from "simple-git";

// Accepted forms: https://, http://, ssh://, or SCP-style git@host:path
const VALID_GIT_URL = /^(https?:\/\/|ssh:\/\/|git@)/;

export async function clone(url: string, dest: string): Promise<void> {
if (!VALID_GIT_URL.test(url)) {
throw new Error(
`CLONE_FAILED: invalid URL — only HTTPS and SSH Git URLs are accepted (got: ${url})`
);
}

const git = simpleGit();
try {
await git.clone(url, dest, ["--depth", "1"]);
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
throw new Error(`CLONE_FAILED: ${message}`);
}
}
141 changes: 141 additions & 0 deletions src/server/ingestion/discover.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import fs from "node:fs";
import path from "node:path";

export interface SkillCandidate {
slug: string;
skillMdPath: string;
skillDir: string;
discoveryPath: string;
supportingFiles: string[];
}

const DISCOVERY_DIRS = [
"skills",
".claude/skills",
".cursor/skills",
".github/copilot/skills",
".windsurf/skills",
".gemini/skills",
];

function isSkillMd(name: string): boolean {
return name.toLowerCase() === "skill.md";
}

/**
* Recursively walk a directory, collecting all SKILL.md files.
* Returns absolute paths to each SKILL.md found.
*/
function walkForSkillMd(dir: string): string[] {
const results: string[] = [];
let entries: fs.Dirent[];
try {
entries = fs.readdirSync(dir, { withFileTypes: true });
} catch {
return results;
}

for (const entry of entries) {
const fullPath = path.join(dir, entry.name);
if (entry.isDirectory()) {
results.push(...walkForSkillMd(fullPath));
} else if (entry.isFile() && isSkillMd(entry.name)) {
results.push(fullPath);
}
}
return results;
}

export function discoverSkills(repoPath: string): SkillCandidate[] {
const candidates: SkillCandidate[] = [];

for (const discoveryRelPath of DISCOVERY_DIRS) {
const discoveryAbsPath = path.join(repoPath, discoveryRelPath);

if (!fs.existsSync(discoveryAbsPath)) {
continue;
}

const skillMdPaths = walkForSkillMd(discoveryAbsPath);

for (const skillMdPath of skillMdPaths) {
const skillDir = path.dirname(skillMdPath);

// Determine slug: if SKILL.md is directly in the discovery dir, use the
// discovery dir's basename; otherwise use the immediate parent dir name.
const slug =
skillDir === discoveryAbsPath
? path.basename(discoveryAbsPath)
: path.basename(skillDir);

// Collect supporting files (non-SKILL.md files in skillDir).
// Subdirectories that contain their own SKILL.md are separate skill roots
// and must be excluded entirely — their contents are not supporting files
// of this skill.
const supportingFiles: string[] = [];
try {
const entries = fs.readdirSync(skillDir, { withFileTypes: true });
for (const entry of entries) {
if (entry.isFile() && !isSkillMd(entry.name)) {
supportingFiles.push(entry.name);
} else if (entry.isDirectory()) {
const subDir = path.join(skillDir, entry.name);
if (directoryContainsSkillMd(subDir)) {
// Separate skill root — skip entirely
continue;
}
const sub = walkAllFiles(subDir);
for (const f of sub) {
// Defensively exclude any SKILL.md encountered deeper in the tree
if (!isSkillMd(path.basename(f))) {
supportingFiles.push(path.relative(skillDir, f));
}
}
}
}
} catch {
// ignore read errors for supporting files
}

candidates.push({
slug,
skillMdPath,
skillDir,
discoveryPath: discoveryRelPath,
supportingFiles,
});
}
}

return candidates;
}

/** Returns true if the directory directly contains a SKILL.md (case-insensitive). */
function directoryContainsSkillMd(dir: string): boolean {
try {
return fs
.readdirSync(dir, { withFileTypes: true })
.some((e) => e.isFile() && isSkillMd(e.name));
} catch {
return false;
}
}

function walkAllFiles(dir: string): string[] {
const results: string[] = [];
let entries: fs.Dirent[];
try {
entries = fs.readdirSync(dir, { withFileTypes: true });
} catch {
return results;
}
for (const entry of entries) {
const fullPath = path.join(dir, entry.name);
if (entry.isDirectory()) {
results.push(...walkAllFiles(fullPath));
} else if (entry.isFile()) {
results.push(fullPath);
}
}
return results;
}
61 changes: 61 additions & 0 deletions src/server/ingestion/frontmatter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import yaml from "js-yaml";

export interface ParsedFrontmatter {
name: string;
description: string;
allowedTools: string[];
rawContent: string;
}

export type FrontmatterResult =
| { ok: true; data: ParsedFrontmatter }
| { ok: false; reason: string };

const FRONTMATTER_RE = /^---\r?\n([\s\S]*?)\r?\n---(?:\r?\n|$)/;

export function parseFrontmatter(content: string): FrontmatterResult {
const match = FRONTMATTER_RE.exec(content);
if (!match) {
return { ok: false, reason: "No YAML frontmatter delimiters found" };
}

const yamlBlock = match[1] ?? "";

let parsed: unknown;
try {
parsed = yaml.load(yamlBlock);
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
return { ok: false, reason: `Malformed YAML frontmatter: ${message}` };
}

if (parsed === null || typeof parsed !== "object" || Array.isArray(parsed)) {
return { ok: false, reason: "Frontmatter must be a YAML mapping" };
}

const fm = parsed as Record<string, unknown>;

if (typeof fm["name"] !== "string" || fm["name"].trim() === "") {
return { ok: false, reason: "Missing or empty 'name' field in frontmatter" };
}

if (typeof fm["description"] !== "string" || fm["description"].trim() === "") {
return { ok: false, reason: "Missing or empty 'description' field in frontmatter" };
}

const rawAllowedTools = fm["allowed-tools"];
let allowedTools: string[] = [];
if (Array.isArray(rawAllowedTools)) {
allowedTools = rawAllowedTools.filter((t): t is string => typeof t === "string");
}

return {
ok: true,
data: {
name: fm["name"],
description: fm["description"],
allowedTools,
rawContent: content,
},
};
}
7 changes: 7 additions & 0 deletions src/server/ingestion/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
export { clone } from "./clone.js";
export { discoverSkills } from "./discover.js";
export type { SkillCandidate } from "./discover.js";
export { parseFrontmatter } from "./frontmatter.js";
export type { ParsedFrontmatter, FrontmatterResult } from "./frontmatter.js";
export { bundleSkill } from "./bundle.js";
export type { BundleResult } from "./bundle.js";
Loading
Loading