diff --git a/.cursor/skills/add-katalyst-check-type/SKILL.md b/.cursor/skills/add-katalyst-check-type/SKILL.md index 10da2950..767eaee0 100644 --- a/.cursor/skills/add-katalyst-check-type/SKILL.md +++ b/.cursor/skills/add-katalyst-check-type/SKILL.md @@ -33,7 +33,7 @@ Copy this checklist and keep it updated: ```text Check Type Task Progress: - [ ] 1) Kind constant added (internal/checks/kinds.go) -- [ ] 2) Check file added (struct + Run + args + Descriptor + registerParsed) +- [ ] 2) Check file added (struct + Run + args + Descriptor configurableIn/document-needs + registerParsed) - [ ] 3) Tests added/updated - [ ] 4) Fixtures/readmes updated - [ ] 5) Reference regenerated @@ -62,6 +62,11 @@ Add one file in the check type's family package — `internal/checks/structuredo the family's `common.go`) rather than re-deriving. - An `init()` calling the family's `registerParsed(descriptor, parse, build, buildColl)`: + - Set `descriptor.ConfigurableIn`. Use `checks.ConfigCollection` for collection + `checks:` and add `checks.ConfigFilesystem` only when the same + implementation works under `filesystemChecks`. + - Set `descriptor.NeedsDocument` when the check reads frontmatter, markdown + body text, or source line maps. Path-only checks leave it false. - `parse func(*yaml.Node) (any, error)` decodes the node into the args struct and validates it. Use `internal/checks/argcheck` helpers (`RequireString`, `OneOf`, …) for uniform, test-stable error phrasing, plus any family-local diff --git a/cmd/check.go b/cmd/check.go index 38261990..e52fba7e 100644 --- a/cmd/check.go +++ b/cmd/check.go @@ -47,14 +47,23 @@ reported as unmatched references (errors).`, if err != nil { return err } + anyInvalid := false + out, errOut := cmd.OutOrStdout(), cmd.ErrOrStderr() + + if len(args) == 0 { + bad, err := runFilesystemChecks(errOut, e) + if err != nil { + return err + } + if bad { + anyInvalid = true + } + } res, err := resolveSelectors(e.proj, args) if err != nil { return err } - anyInvalid := false - out, errOut := cmd.OutOrStdout(), cmd.ErrOrStderr() - for _, item := range res.Items { ok, err := checkItem(out, errOut, e, item) if err != nil { diff --git a/cmd/check_test.go b/cmd/check_test.go index f0d14fff..6b5aae13 100644 --- a/cmd/check_test.go +++ b/cmd/check_test.go @@ -116,6 +116,159 @@ func TestCheck_wholeProjectWhenNoSelector(t *testing.T) { } } +func TestCheck_filesystemChecks_runWithoutCollections(t *testing.T) { + dir := t.TempDir() + writeProject(t, dir, map[string]string{ + "bases/local.yaml": `type: filesystem +root: . +filesystemChecks: + - name: docs + path: docs + include: ["**/*.md"] + checks: + - kind: filesystem_name_case + style: kebab +collections: {} +`, + }) + chdir(t, dir) + mustWrite(t, filepath.Join(dir, "docs/BadName.md"), "---\ntitle: Bad\n---\n# Bad\n") + + _, stderr, err := runRoot(t, "check") + if err == nil { + t.Fatalf("expected filesystem check failure") + } + if !strings.Contains(stderr, "filesystem docs: BadName.md") || !strings.Contains(stderr, "must be kebab-case") { + t.Errorf("expected filesystem name-case diagnostic, got: %q", stderr) + } +} + +func TestCheck_selectorDoesNotRunFilesystemChecks(t *testing.T) { + dir := t.TempDir() + writeProject(t, dir, map[string]string{ + "bases/local.yaml": `type: filesystem +root: . +filesystemChecks: + - name: docs + path: docs + include: ["**/*.md"] + checks: + - kind: filesystem_name_case + style: kebab +collections: + notes: + path: notes + checks: + - kind: markdown_requires_h1 +`, + }) + chdir(t, dir) + mustWrite(t, filepath.Join(dir, "docs/BadName.md"), "---\ntitle: Bad\n---\n# Bad\n") + mustWrite(t, filepath.Join(dir, "notes/good.md"), "---\ntitle: Good\n---\n# Good\n") + + stdout, stderr, err := runRoot(t, "check", "notes") + if err != nil { + t.Fatalf("selector check should ignore filesystem scopes: %v\nstderr: %s", err, stderr) + } + if !strings.Contains(stdout, "good.md: OK") { + t.Errorf("expected collection item OK, got: %q", stdout) + } + if strings.Contains(stderr, "BadName") { + t.Errorf("selector run should not report filesystem scope diagnostics, got: %q", stderr) + } +} + +func TestCheck_filesystemParseFailuresDefaultToError(t *testing.T) { + dir := t.TempDir() + writeProject(t, dir, map[string]string{ + "bases/local.yaml": `type: filesystem +root: . +filesystemChecks: + - name: docs + path: docs + include: ["**/*.md"] + checks: + - kind: filesystem_name_matches_field + field: title +collections: {} +`, + }) + chdir(t, dir) + mustWrite(t, filepath.Join(dir, "docs/bad.md"), "---\n: bad\n---\n# Bad\n") + + _, stderr, err := runRoot(t, "check") + if err == nil { + t.Fatalf("expected parse failure to fail by default") + } + var coded interface{ Code() int } + if !errors.As(err, &coded) || coded.Code() != 1 { + t.Errorf("expected exit code 1, got: %v", err) + } + if !strings.Contains(stderr, "filesystem docs: bad.md") || !strings.Contains(stderr, "parse document") { + t.Errorf("expected parse diagnostic, got: %q", stderr) + } +} + +func TestCheck_filesystemParseFailuresCanWarn(t *testing.T) { + dir := t.TempDir() + writeProject(t, dir, map[string]string{ + "bases/local.yaml": `type: filesystem +root: . +filesystemChecks: + - name: docs + path: docs + include: ["**/*.md"] + parseFailures: warning + checks: + - kind: filesystem_name_matches_field + field: title +collections: {} +`, + }) + chdir(t, dir) + mustWrite(t, filepath.Join(dir, "docs/bad.md"), "---\n: bad\n---\n# Bad\n") + + _, stderr, err := runRoot(t, "check") + if err != nil { + t.Fatalf("warning parse failure should not fail the run: %v\nstderr: %s", err, stderr) + } + if !strings.Contains(stderr, "warning: /: parse document") { + t.Errorf("expected warning parse diagnostic, got: %q", stderr) + } +} + +func TestCheck_filesystemUnmatchedFiles(t *testing.T) { + dir := t.TempDir() + writeProject(t, dir, map[string]string{ + "bases/local.yaml": `type: filesystem +root: . +filesystemChecks: + - name: docs + path: docs + include: ["**/*.md"] + exclude: ["ignored/**"] + checks: + - kind: filesystem_unmatched_files +collections: {} +`, + }) + chdir(t, dir) + mustWrite(t, filepath.Join(dir, "docs/page.md"), "---\ntitle: Page\n---\n# Page\n") + mustWrite(t, filepath.Join(dir, "docs/raw.txt"), "raw\n") + mustWrite(t, filepath.Join(dir, "docs/ignored/raw.txt"), "ignored\n") + + _, stderr, err := runRoot(t, "check") + if err == nil { + t.Fatalf("expected unmatched filesystem file failure") + } + if !strings.Contains(stderr, "filesystem docs: raw.txt") || !strings.Contains(stderr, "unmatched file") { + t.Errorf("expected unmatched-file diagnostic, got: %q", stderr) + } + if strings.Contains(stderr, "ignored/raw.txt") { + t.Errorf("excluded files should not be reported, got: %q", stderr) + } +} + func TestCheck_unmatchedFileInCollectionDir_isError(t *testing.T) { dir := setupNotesRepo(t, objectNotesConfig) mustWrite(t, filepath.Join(dir, "notes/ok.md"), "---\ntitle: Ok\nyear: 1\n---\n# Ok\n") diff --git a/cmd/check_types.go b/cmd/check_types.go index e6af48f4..eb96a58b 100644 --- a/cmd/check_types.go +++ b/cmd/check_types.go @@ -119,6 +119,7 @@ func runCheckTypesDetail(cmd *cobra.Command, checkType string, asJSON bool) erro printSectionHeader(out, fmt.Sprintf("%s › %s", fam.Title, d.Title)) fmt.Fprintf(out, "- kind: %s\n", d.CheckType) fmt.Fprintf(out, "- family: %s\n", d.Family) + fmt.Fprintf(out, "- configurableIn: %s\n", strings.Join(checks.DescriptorConfigurableIn(d), ", ")) scope := d.Scope if scope == "" { scope = "item" @@ -256,6 +257,7 @@ func jsonDescriptor(d checks.Descriptor) checks.Descriptor { if d.Fields == nil { d.Fields = []checks.Field{} } + d.ConfigurableIn = checks.DescriptorConfigurableIn(d) return d } diff --git a/cmd/check_types_test.go b/cmd/check_types_test.go index a78dc45e..f70492ea 100644 --- a/cmd/check_types_test.go +++ b/cmd/check_types_test.go @@ -185,9 +185,10 @@ func TestCheckTypesList_jsonArrayShape(t *testing.T) { } var got []struct { - CheckType string `json:"check_type"` - Family string `json:"family"` - Fields []struct { + CheckType string `json:"check_type"` + Family string `json:"family"` + ConfigurableIn []string `json:"configurableIn"` + Fields []struct { Name string `json:"name"` } `json:"fields"` ConfigExample string `json:"config_example"` @@ -199,8 +200,10 @@ func TestCheckTypesList_jsonArrayShape(t *testing.T) { t.Fatal("expected at least one descriptor") } seen := map[string]bool{} + configurableIn := map[string][]string{} for i, d := range got { seen[d.CheckType] = true + configurableIn[d.CheckType] = d.ConfigurableIn if got[i].ConfigExample == "" { t.Errorf("entry %d (%s): empty config_example", i, d.CheckType) } @@ -227,6 +230,12 @@ func TestCheckTypesList_jsonArrayShape(t *testing.T) { if strings.Contains(stdout, `"default": ""`) { t.Errorf("empty default should be omitted, not emitted") } + if strings.Join(configurableIn["filesystem_name_case"], ",") != "collection,filesystem" { + t.Errorf("filesystem_name_case configurableIn = %v, want collection+filesystem", configurableIn["filesystem_name_case"]) + } + if strings.Join(configurableIn["markdown_requires_h1"], ",") != "collection" { + t.Errorf("markdown_requires_h1 configurableIn = %v, want collection", configurableIn["markdown_requires_h1"]) + } } func TestCheckTypesShow_jsonObject(t *testing.T) { @@ -237,8 +246,9 @@ func TestCheckTypesShow_jsonObject(t *testing.T) { } var got struct { - CheckType string `json:"check_type"` - Fields []struct { + CheckType string `json:"check_type"` + ConfigurableIn []string `json:"configurableIn"` + Fields []struct { Name string `json:"name"` Required bool `json:"required"` } `json:"fields"` @@ -249,6 +259,9 @@ func TestCheckTypesShow_jsonObject(t *testing.T) { if got.CheckType != "object_number_range" { t.Errorf("got check type %q, want object_number_range", got.CheckType) } + if strings.Join(got.ConfigurableIn, ",") != "collection" { + t.Errorf("configurableIn = %v, want collection", got.ConfigurableIn) + } if len(got.Fields) != 3 { t.Fatalf("got %d fields, want 3", len(got.Fields)) } diff --git a/cmd/engine.go b/cmd/engine.go index 486d061f..5f556a3a 100644 --- a/cmd/engine.go +++ b/cmd/engine.go @@ -134,10 +134,6 @@ func (e *engine) checksFor(c project.Collection, meta map[string]any) ([]checks. effective = append(effective, matched.Checks...) } - if err := ensureLibrariesAvailable(effective); err != nil { - return nil, err - } - checkList := make([]checks.Check, 0, len(effective)) inlineSchema := "" @@ -168,14 +164,11 @@ func (e *engine) checksFor(c project.Collection, meta map[string]any) ([]checks. // Every non-object, per-item check is built from its registry entry. The // object check is handled above (it needs a compiled schema); collection- // scoped checks have no per-item builder, so Build skips them here. - for _, cc := range effective { - if cc.Kind == checks.CheckObject { - continue - } - if chk, ok := checks.Build(cc.Kind, cc.Args); ok { - checkList = append(checkList, chk) - } + fileChecks, err := e.fileChecksFor(effective) + if err != nil { + return nil, err } + checkList = append(checkList, fileChecks...) // An item that matched no variant under useExhaustiveVariants fails. The // verdict rides through RunAll like any other check (so `check` and @@ -228,8 +221,31 @@ func (unroutedCheck) Run(checks.Context) []checks.Violation { // collectionChecksFor builds the collection-scoped checks configured for a // collection. These run once per collection, after the per-item pass. func (e *engine) collectionChecksFor(c project.Collection) ([]checks.CollectionCheck, error) { + return e.fileSetChecksFor(c.Checks) +} + +func (e *engine) fileChecksFor(configured []checks.ConfiguredCheck) ([]checks.Check, error) { + if err := ensureLibrariesAvailable(configured); err != nil { + return nil, err + } + var out []checks.Check + for _, cc := range configured { + if cc.Kind == checks.CheckObject { + continue + } + if chk, ok := checks.Build(cc.Kind, cc.Args); ok { + out = append(out, chk) + } + } + return out, nil +} + +func (e *engine) fileSetChecksFor(configured []checks.ConfiguredCheck) ([]checks.CollectionCheck, error) { + if err := ensureLibrariesAvailable(configured); err != nil { + return nil, err + } var out []checks.CollectionCheck - for _, cc := range c.Checks { + for _, cc := range configured { if col, ok := checks.BuildCollection(cc.Kind, cc.Args); ok { out = append(out, col) } diff --git a/cmd/filesystem_check.go b/cmd/filesystem_check.go new file mode 100644 index 00000000..dafc07eb --- /dev/null +++ b/cmd/filesystem_check.go @@ -0,0 +1,164 @@ +package cmd + +import ( + "fmt" + "io" + "os" + + "github.com/abegong/katalyst/internal/checks" + "github.com/abegong/katalyst/internal/codec/markdownbodytext" + "github.com/abegong/katalyst/internal/storage/filesystemcheck" +) + +type runtimeFileCheck struct { + kind checks.CheckType + check checks.Check + needsDoc bool +} + +func runFilesystemChecks(errOut io.Writer, e *engine) (bool, error) { + bad := false + for _, scope := range e.proj.FilesystemCheckScopes() { + scopeBad, err := runFilesystemScope(errOut, e, scope) + if err != nil { + return false, err + } + if scopeBad { + bad = true + } + } + return bad, nil +} + +func runFilesystemScope(errOut io.Writer, e *engine, scope filesystemcheck.Scope) (bool, error) { + expanded, err := filesystemcheck.Expand(scope) + if err != nil { + return false, asUsageErr(err) + } + fileChecks, err := runtimeFileChecks(scope.Checks) + if err != nil { + return false, err + } + setChecks, err := e.fileSetChecksFor(scope.Checks) + if err != nil { + return false, err + } + + needsDoc := scopeNeedsDocument(scope.Checks) + bad := false + setCtx := checks.FileSetContext{ + Root: scope.Root, + Items: make([]checks.ItemContext, 0, len(expanded.Selected)), + Unmatched: rels(expanded.Unmatched), + Include: scope.Include, + Exclude: scope.Exclude, + } + for _, file := range expanded.Selected { + var doc *markdownbodytext.Document + meta := map[string]any{} + parseOK := true + if needsDoc { + src, err := os.ReadFile(file.Path) + if err != nil { + return false, asUsageErr(err) + } + doc, err = markdownbodytext.Parse(src) + if err != nil { + parseOK = false + severity := checks.SeverityError + if scope.ParseFailures == filesystemcheck.ParseFailuresWarning { + severity = checks.SeverityWarning + } + printFilesystemViolation(errOut, scope, file.Rel, checks.Violation{ + Path: "/", + Message: fmt.Sprintf("parse document: %v", err), + Severity: severity, + }) + if severity != checks.SeverityWarning { + bad = true + } + } else { + meta = dropKey(doc.Meta, "schema") + } + } + setCtx.Items = append(setCtx.Items, checks.ItemContext{FilePath: file.Path, Meta: meta}) + ctx := checks.FileContext{ + FilePath: file.Path, + CollectionRoot: scope.Root, + Doc: doc, + Meta: meta, + } + for _, rc := range fileChecks { + if rc.needsDoc && !parseOK { + continue + } + for _, v := range rc.check.Run(ctx) { + printFilesystemViolation(errOut, scope, file.Rel, v) + if v.Severity != checks.SeverityWarning { + bad = true + } + } + } + } + for _, v := range checks.RunFileSetAll(setCtx, setChecks) { + path := v.File + if path == "" { + path = scope.Name + } + printFilesystemViolation(errOut, scope, path, v) + if v.Severity != checks.SeverityWarning { + bad = true + } + } + return bad, nil +} + +func runtimeFileChecks(configured []checks.ConfiguredCheck) ([]runtimeFileCheck, error) { + if err := ensureLibrariesAvailable(configured); err != nil { + return nil, err + } + var out []runtimeFileCheck + for _, cc := range configured { + if chk, ok := checks.Build(cc.Kind, cc.Args); ok { + out = append(out, runtimeFileCheck{ + kind: cc.Kind, + check: chk, + needsDoc: checks.NeedsDocument(cc.Kind), + }) + } + } + return out, nil +} + +func scopeNeedsDocument(configured []checks.ConfiguredCheck) bool { + for _, cc := range configured { + if checks.NeedsDocument(cc.Kind) { + return true + } + } + return false +} + +func rels(files []filesystemcheck.File) []string { + out := make([]string, len(files)) + for i, file := range files { + out[i] = file.Rel + } + return out +} + +func printFilesystemViolation(w io.Writer, scope filesystemcheck.Scope, path string, v checks.Violation) { + loc := v.Path + if loc == "" { + loc = "/" + } + marker := "" + if v.Severity == checks.SeverityWarning { + marker = "warning: " + } + if v.Line > 0 { + fmt.Fprintf(w, "filesystem %s: %s:%d: %s%s: %s\n", scope.Name, path, v.Line, marker, loc, v.Message) + return + } + fmt.Fprintf(w, "filesystem %s: %s: %s%s: %s\n", scope.Name, path, marker, loc, v.Message) +} diff --git a/cmd/gendocs/main.go b/cmd/gendocs/main.go index f963f369..d1d5fa5d 100644 --- a/cmd/gendocs/main.go +++ b/cmd/gendocs/main.go @@ -314,6 +314,9 @@ func checkTypePage(d checks.Descriptor, fam checks.Family, weight int) string { if d.Scope == "collection" { fmt.Fprint(&b, "**Scope:** collection, runs once per collection over all its items.\n\n") } + if sites := checks.DescriptorConfigurableIn(d); !collectionOnly(sites) { + fmt.Fprintf(&b, "**Can be configured in:** %s.\n\n", configurationSites(sites)) + } if d.Severity == "warning" { fmt.Fprint(&b, "**Severity:** warning, reported for review; never fails a run.\n\n") } @@ -341,6 +344,25 @@ func checkTypePage(d checks.Descriptor, fam checks.Family, weight int) string { return b.String() } +func collectionOnly(sites []string) bool { + return len(sites) == 1 && sites[0] == checks.ConfigCollection +} + +func configurationSites(sites []string) string { + labels := make([]string, len(sites)) + for i, site := range sites { + switch site { + case checks.ConfigCollection: + labels[i] = "collection checks" + case checks.ConfigFilesystem: + labels[i] = "filesystem checks" + default: + labels[i] = site + } + } + return strings.Join(labels, ", ") +} + // plain strips inline-code backticks from a summary so it reads cleanly in a // link list. func plain(s string) string { diff --git a/cmd/testdata/snapshots/check-types/list.txt b/cmd/testdata/snapshots/check-types/list.txt index ea4b3436..4dc5a84e 100644 --- a/cmd/testdata/snapshots/check-types/list.txt +++ b/cmd/testdata/snapshots/check-types/list.txt @@ -64,7 +64,7 @@ Markdown body text (7) required: - optional: - -File system (13) +File system (14) ---------------- - filesystem_extension_in purpose: Allow only specific file extensions. @@ -118,6 +118,10 @@ File system (13) purpose: Require that no two items in the collection share a basename. required: - optional: - +- filesystem_unmatched_files + purpose: Report regular files under a filesystem scope that match neither include nor exclude patterns. + required: - + optional: - Plain text (4) -------------- diff --git a/cmd/testdata/snapshots/check-types/show-markdown_single_h1.txt b/cmd/testdata/snapshots/check-types/show-markdown_single_h1.txt index ff74a52d..63a879e1 100644 --- a/cmd/testdata/snapshots/check-types/show-markdown_single_h1.txt +++ b/cmd/testdata/snapshots/check-types/show-markdown_single_h1.txt @@ -2,6 +2,7 @@ Markdown body text › Single H1 -------------------------------- - kind: markdown_single_h1 - family: markdownBodyText +- configurableIn: collection - scope: item - severity: error - purpose: Require that the markdown body contains at most one H1 heading. diff --git a/cmd/testdata/snapshots/check-types/show-object_field_enum.txt b/cmd/testdata/snapshots/check-types/show-object_field_enum.txt index ef4263db..415c19c9 100644 --- a/cmd/testdata/snapshots/check-types/show-object_field_enum.txt +++ b/cmd/testdata/snapshots/check-types/show-object_field_enum.txt @@ -2,6 +2,7 @@ Structured object › Field enum -------------------------------- - kind: object_field_enum - family: structuredObject +- configurableIn: collection - scope: item - severity: error - purpose: Require that a field is one of a fixed set of values. diff --git a/cmd/testdata/snapshots/check-types/show-object_required_field.txt b/cmd/testdata/snapshots/check-types/show-object_required_field.txt index db0f40c2..bd567f4e 100644 --- a/cmd/testdata/snapshots/check-types/show-object_required_field.txt +++ b/cmd/testdata/snapshots/check-types/show-object_required_field.txt @@ -2,6 +2,7 @@ Structured object › Required field ------------------------------------ - kind: object_required_field - family: structuredObject +- configurableIn: collection - scope: item - severity: error - purpose: Require that a frontmatter field exists. diff --git a/docs/content/deep-dives/domain-model/checks.md b/docs/content/deep-dives/domain-model/checks.md index 9ca47335..388f5678 100644 --- a/docs/content/deep-dives/domain-model/checks.md +++ b/docs/content/deep-dives/domain-model/checks.md @@ -21,23 +21,47 @@ model]({{< relref "_index.md" >}}). |---|---| | **Check** | Shorthand for a check instance when context is unambiguous. A check asserts one condition and reports a violation when the condition fails. | | **Check type** | The reusable definition of a constraint: `object_required_field`, `markdown_single_h1`, and so on. A check type is selected by its `kind:` id and appears in the generated check types reference. | -| **Check instance** | One configured check attached to a collection: a check type plus its arguments, written as one YAML object under `checks:`. The type is the rule; the instance is the rule applied here. | +| **Check instance** | One configured check attached to a collection or filesystem scope: a check type plus its arguments, written as one YAML object under `checks:`. The type is the rule; the instance is the rule applied here. | | **Family** | The kind of source data a check type reads: `structuredObject` (frontmatter), `markdownBodyText` (the body), `fileSystem` (names and paths), or `plainText` (raw body text). | +| **Attachment target** | The config site where a check instance is valid: `collection` for collection `checks:`, or `filesystem` for base-level `filesystemChecks`. | | **Check library** | The provider that supplies and runs a check type. Native libraries wrap hand-written checks; schema-backed libraries delegate to an external validation engine. | -| **Scope** | The level where a check runs. Most checks are item-scoped; a few are collection-scoped and reason across every item in the collection. | +| **Runtime granularity** | The level where a check runs. Most checks are file-scoped; a few are file-set-scoped and reason across every selected file. | | **Severity** | The consequence of a violation. `error` fails the run; `warning` is advisory and does not change the exit code. | | **Violation** | One failed check result, with a message, source location, JSON pointer when applicable, severity, and sometimes a sibling file for collection-scoped findings. | -Family and library are separate axes. Family answers *what data does this check -read?* Library answers *who runs it?* A single family can span libraries: +Family, library, configuration site, and runtime granularity are separate axes. +Family answers *what data does this check read?* Library answers *who runs it?* +Attachment target answers *where can a user configure it?* Runtime granularity +answers *does it run once per file or once per selected file set?* A single +family can span libraries: `structuredObject` includes both `object` from the JSON Schema library and `object_required_field` from the native structured-object library. The registry is the single source of truth for check types. Each check type -self-registers a `Descriptor` (its id, family, docs metadata) and a constructor. -`cmd/engine` builds the runnable list by registry lookup; the docs generator and -`katalyst check-types list` read the same descriptors. A parity test fails if a -configured kind has no descriptor, so a check type cannot ship undocumented. +self-registers a `Descriptor` (its id, family, `configurableIn`, docs metadata) and a +constructor. `cmd/engine` builds the runnable list by registry lookup; the docs +generator and `katalyst check-types list` read the same descriptors. A parity +test fails if a configured kind has no descriptor, so a check type cannot ship +undocumented. + +## Configuration Sites + +Collection-attached checks live under a collection's `checks:` list. They run +after selector resolution and can use schema precedence, variants, and the +collection's full sibling set. This is the historical model and remains the +right place for rules that depend on collection identity. + +Filesystem-attached checks live under a filesystem base's `filesystemChecks` +list. Each scope selects raw files with `include` and `exclude` globs. A +no-selector `katalyst check` runs filesystem scopes before collection checks, +so a project can enforce path policy before any collection exists. Explicit +collection selectors stay collection-only. + +The descriptor's `ConfigurableIn` list controls where a check type may be configured. +During migration, an empty list means `collection`. File-system-family checks +that only read paths can usually support both configuration sites. Checks that read +frontmatter or body text declare that they need a parsed document so filesystem +scopes parse lazily and path-only scopes avoid unnecessary document work. ## Check libraries @@ -97,11 +121,13 @@ model]({{< relref "_index.md" >}}) for the precedence table and the full per-item lifecycle. Before any schema compiles, the engine confirms the owning libraries are available. -Running is uniform: every per-item check returns a list of violations, which the -engine concatenates; collection-scoped checks run in a second pass over the -whole collection. A violation carries a JSON-pointer `Path`, a `Message`, a -source `Line`, an optional `File` (for collection-scoped findings that name a -sibling), and a `Severity`. An item with no violations prints `path: OK`. +Running is uniform: every file check returns a list of violations, which the +engine concatenates; file-set checks run in a second pass over the selected +files. For collections, that file set is the whole collection. For filesystem +checks, it is the scope's selected file set plus its unmatched files. A +violation carries a JSON-pointer `Path`, a `Message`, a source `Line`, an +optional `File` (for file-set findings that name a sibling), and a `Severity`. +An item with no violations prints `path: OK`. ## Design rationale @@ -126,7 +152,7 @@ gives every library the same shape, so the engine compiles, caches, and gates them identically regardless of whether the work happens in-process or in a subprocess. -**Collection-scoped checks re-scan the whole collection.** A uniqueness or +**File-set checks re-scan the whole collection when attached to a collection.** A uniqueness or required-index verdict is only correct against every item, so these checks run a second pass over the full collection even under a single-item selector. The trade is that `katalyst check notes/one.md` does more work than its name @@ -156,8 +182,9 @@ real out-of-process library exists. descriptor, and generated docs read the same registry as the engine. 2. **Family and library stay separate.** Family describes the data a check reads; library describes the provider that runs it. -3. **Collection-scoped checks see the whole collection.** A selector may narrow - output, but a collection-level verdict still needs the full sibling set. +3. **Collection-attached file-set checks see the whole collection.** A selector + may narrow output, but a collection-level verdict still needs the full + sibling set. ## See also diff --git a/docs/content/getting-started.md b/docs/content/getting-started.md index fe011a3e..4adbda4a 100644 --- a/docs/content/getting-started.md +++ b/docs/content/getting-started.md @@ -59,13 +59,13 @@ katalyst check - `.katalyst/config.yaml`, commented project settings - `.katalyst/schemas/`, one schema per file (empty to start) -- `.katalyst/bases/local.yaml`, the default base (the local - filesystem), where you declare collections +- `.katalyst/bases/local.yaml`, the default filesystem base, where you declare + collections It writes no example content. Add a schema under `.katalyst/schemas/` and declare a collection inside `.katalyst/bases/local.yaml`, then run `katalyst check`. Next: -- [Configs]({{< relref "reference/configs/_index.md" >}}) +- [Configuration]({{< relref "reference/configs/_index.md" >}}) - [Check types reference]({{< relref "reference/check-types/_index.md" >}}) diff --git a/docs/content/how-to/configure-rules.md b/docs/content/how-to/configure-rules.md index 30f9499c..21c66edb 100644 --- a/docs/content/how-to/configure-rules.md +++ b/docs/content/how-to/configure-rules.md @@ -8,6 +8,32 @@ weight = 10 You have a directory of markdown files and want Katalyst to enforce checks on them. This guide adds a collection and attaches checks to it. +## Start before collections exist + +For early cleanup, attach checks directly to a filesystem base. These checks +run with `katalyst check` even when `collections: {}` is still empty: + +```yaml +# .katalyst/bases/local.yaml +type: filesystem +root: . +filesystemChecks: + - name: docs + path: docs/content + include: ["**/*.md"] + parseFailures: warning + checks: + - kind: filesystem_name_case + style: kebab + - kind: filesystem_unmatched_files +collections: {} +``` + +Use `parseFailures: warning` while onboarding a messy tree. Switch back to the +default `error` once malformed frontmatter should fail CI. Filesystem checks are +project-wide for now: no-selector `katalyst check` runs them, while +`katalyst check posts` runs only collection-attached checks. + ## 1. Point a collection at the directory Collections are declared inside a base. In a fresh project that is diff --git a/docs/content/reference/check-types/_index.md b/docs/content/reference/check-types/_index.md index 76c54c2d..b0bfe2f5 100644 --- a/docs/content/reference/check-types/_index.md +++ b/docs/content/reference/check-types/_index.md @@ -53,6 +53,7 @@ File-system check types validate filename and path conventions for items. - [Path depth]({{< relref "file-system/path-depth.md" >}}): Bound directory nesting relative to the collection root. - [Referenced files exist]({{< relref "file-system/referenced-files-exist.md" >}}): Require path-valued frontmatter fields to resolve to real files. - [Unique filename]({{< relref "file-system/unique-filename.md" >}}): Require that no two items in the collection share a basename. +- [Unmatched files]({{< relref "file-system/unmatched-files.md" >}}): Report regular files under a filesystem scope that match neither include nor exclude patterns. ## Plain text diff --git a/docs/content/reference/check-types/file-system/_index.md b/docs/content/reference/check-types/file-system/_index.md index cea037de..f57e22ad 100644 --- a/docs/content/reference/check-types/file-system/_index.md +++ b/docs/content/reference/check-types/file-system/_index.md @@ -24,3 +24,4 @@ Check types in this family: - [Path depth]({{< relref "path-depth.md" >}}): Bound directory nesting relative to the collection root. - [Referenced files exist]({{< relref "referenced-files-exist.md" >}}): Require path-valued frontmatter fields to resolve to real files. - [Unique filename]({{< relref "unique-filename.md" >}}): Require that no two items in the collection share a basename. +- [Unmatched files]({{< relref "unmatched-files.md" >}}): Report regular files under a filesystem scope that match neither include nor exclude patterns. diff --git a/docs/content/reference/check-types/file-system/extension-in.md b/docs/content/reference/check-types/file-system/extension-in.md index 7237996f..e1b624d2 100644 --- a/docs/content/reference/check-types/file-system/extension-in.md +++ b/docs/content/reference/check-types/file-system/extension-in.md @@ -10,6 +10,8 @@ aliases = ["/reference/rules/file-system/extension-in/"] `kind: filesystem_extension_in` +**Can be configured in:** collection checks, filesystem checks. + ## Purpose Allow only specific file extensions. diff --git a/docs/content/reference/check-types/file-system/index-file-required.md b/docs/content/reference/check-types/file-system/index-file-required.md index 1bab2e98..8f114dbf 100644 --- a/docs/content/reference/check-types/file-system/index-file-required.md +++ b/docs/content/reference/check-types/file-system/index-file-required.md @@ -12,6 +12,8 @@ aliases = ["/reference/rules/file-system/index-file-required/"] **Scope:** collection, runs once per collection over all its items. +**Can be configured in:** collection checks, filesystem checks. + ## Purpose Require that every directory containing items has an index file. diff --git a/docs/content/reference/check-types/file-system/name-affix.md b/docs/content/reference/check-types/file-system/name-affix.md index bb78de67..0393c3b4 100644 --- a/docs/content/reference/check-types/file-system/name-affix.md +++ b/docs/content/reference/check-types/file-system/name-affix.md @@ -10,6 +10,8 @@ aliases = ["/reference/rules/file-system/name-affix/"] `kind: filesystem_name_affix` +**Can be configured in:** collection checks, filesystem checks. + ## Purpose Require a name to start with a prefix and/or end with a suffix. diff --git a/docs/content/reference/check-types/file-system/name-case.md b/docs/content/reference/check-types/file-system/name-case.md index 13a8b010..826435ae 100644 --- a/docs/content/reference/check-types/file-system/name-case.md +++ b/docs/content/reference/check-types/file-system/name-case.md @@ -10,6 +10,8 @@ aliases = ["/reference/rules/file-system/name-case/"] `kind: filesystem_name_case` +**Can be configured in:** collection checks, filesystem checks. + ## Purpose Require a name (or path segments) to follow a case style. diff --git a/docs/content/reference/check-types/file-system/name-length.md b/docs/content/reference/check-types/file-system/name-length.md index 8d1e802c..49fa6536 100644 --- a/docs/content/reference/check-types/file-system/name-length.md +++ b/docs/content/reference/check-types/file-system/name-length.md @@ -10,6 +10,8 @@ aliases = ["/reference/rules/file-system/name-length/"] `kind: filesystem_name_length` +**Can be configured in:** collection checks, filesystem checks. + ## Purpose Bound the character length of a name. diff --git a/docs/content/reference/check-types/file-system/name-matches-field.md b/docs/content/reference/check-types/file-system/name-matches-field.md index 882f4bb9..55d6aba8 100644 --- a/docs/content/reference/check-types/file-system/name-matches-field.md +++ b/docs/content/reference/check-types/file-system/name-matches-field.md @@ -10,6 +10,8 @@ aliases = ["/reference/rules/file-system/name-matches-field/"] `kind: filesystem_name_matches_field` +**Can be configured in:** collection checks, filesystem checks. + ## Purpose Require a name to equal a frontmatter field, optionally slugified. diff --git a/docs/content/reference/check-types/file-system/name-regex.md b/docs/content/reference/check-types/file-system/name-regex.md index 25c18429..f686ac54 100644 --- a/docs/content/reference/check-types/file-system/name-regex.md +++ b/docs/content/reference/check-types/file-system/name-regex.md @@ -10,6 +10,8 @@ aliases = ["/reference/rules/file-system/name-regex/"] `kind: filesystem_name_regex` +**Can be configured in:** collection checks, filesystem checks. + ## Purpose Require a name to match a regular expression (anchored). diff --git a/docs/content/reference/check-types/file-system/parent-dir-in.md b/docs/content/reference/check-types/file-system/parent-dir-in.md index be4172ba..95d18cb4 100644 --- a/docs/content/reference/check-types/file-system/parent-dir-in.md +++ b/docs/content/reference/check-types/file-system/parent-dir-in.md @@ -10,6 +10,8 @@ aliases = ["/reference/rules/file-system/parent-dir-in/"] `kind: filesystem_parent_dir_in` +**Can be configured in:** collection checks, filesystem checks. + ## Purpose Require that the file's parent directory name is in an allowed set. diff --git a/docs/content/reference/check-types/file-system/parent-dir-matches-field.md b/docs/content/reference/check-types/file-system/parent-dir-matches-field.md index 91c3d70b..a0c3f9ad 100644 --- a/docs/content/reference/check-types/file-system/parent-dir-matches-field.md +++ b/docs/content/reference/check-types/file-system/parent-dir-matches-field.md @@ -10,6 +10,8 @@ aliases = ["/reference/rules/file-system/parent-dir-matches-field/"] `kind: filesystem_parent_dir_matches_field` +**Can be configured in:** collection checks, filesystem checks. + ## Purpose Require the parent directory name to equal a frontmatter field. diff --git a/docs/content/reference/check-types/file-system/path-charset.md b/docs/content/reference/check-types/file-system/path-charset.md index 918ae683..633af15f 100644 --- a/docs/content/reference/check-types/file-system/path-charset.md +++ b/docs/content/reference/check-types/file-system/path-charset.md @@ -10,6 +10,8 @@ aliases = ["/reference/rules/file-system/path-charset/"] `kind: filesystem_path_charset` +**Can be configured in:** collection checks, filesystem checks. + ## Purpose Constrain the characters allowed in the item's path. diff --git a/docs/content/reference/check-types/file-system/path-depth.md b/docs/content/reference/check-types/file-system/path-depth.md index 5503006e..5bac7406 100644 --- a/docs/content/reference/check-types/file-system/path-depth.md +++ b/docs/content/reference/check-types/file-system/path-depth.md @@ -10,6 +10,8 @@ aliases = ["/reference/rules/file-system/path-depth/"] `kind: filesystem_path_depth` +**Can be configured in:** collection checks, filesystem checks. + ## Purpose Bound directory nesting relative to the collection root. diff --git a/docs/content/reference/check-types/file-system/referenced-files-exist.md b/docs/content/reference/check-types/file-system/referenced-files-exist.md index 7d4dd9fa..a1878ec6 100644 --- a/docs/content/reference/check-types/file-system/referenced-files-exist.md +++ b/docs/content/reference/check-types/file-system/referenced-files-exist.md @@ -10,6 +10,8 @@ aliases = ["/reference/rules/file-system/referenced-files-exist/"] `kind: filesystem_referenced_files_exist` +**Can be configured in:** collection checks, filesystem checks. + ## Purpose Require path-valued frontmatter fields to resolve to real files. diff --git a/docs/content/reference/check-types/file-system/unique-filename.md b/docs/content/reference/check-types/file-system/unique-filename.md index 9f68e39e..ff4a8e09 100644 --- a/docs/content/reference/check-types/file-system/unique-filename.md +++ b/docs/content/reference/check-types/file-system/unique-filename.md @@ -12,6 +12,8 @@ aliases = ["/reference/rules/file-system/unique-filename/"] **Scope:** collection, runs once per collection over all its items. +**Can be configured in:** collection checks, filesystem checks. + ## Purpose Require that no two items in the collection share a basename. diff --git a/docs/content/reference/check-types/file-system/unmatched-files.md b/docs/content/reference/check-types/file-system/unmatched-files.md new file mode 100644 index 00000000..7cfae3ec --- /dev/null +++ b/docs/content/reference/check-types/file-system/unmatched-files.md @@ -0,0 +1,30 @@ ++++ +title = "Unmatched files" +weight = 140 +aliases = ["/reference/rules/file-system/unmatched-files/"] ++++ + + + +## Check type ID + +`kind: filesystem_unmatched_files` + +**Scope:** collection, runs once per collection over all its items. + +**Can be configured in:** filesystem checks. + +## Purpose + +Report regular files under a filesystem scope that match neither include nor exclude patterns. + +## Example + +```yaml +filesystemChecks: + - path: docs + include: ["**/*.md"] + exclude: ["**/_generated/**"] + checks: + - kind: filesystem_unmatched_files +``` diff --git a/docs/content/reference/check-types/structured-object/unique-field.md b/docs/content/reference/check-types/structured-object/unique-field.md index 0f423ba2..03df56c8 100644 --- a/docs/content/reference/check-types/structured-object/unique-field.md +++ b/docs/content/reference/check-types/structured-object/unique-field.md @@ -12,6 +12,8 @@ aliases = ["/reference/rules/structured-object/unique-field/"] **Scope:** collection, runs once per collection over all its items. +**Can be configured in:** collection checks, filesystem checks. + ## Purpose Require that no two items share a value for a frontmatter field. diff --git a/docs/content/reference/configs/bases.md b/docs/content/reference/configs/bases.md index 1dcbf40b..d7d64afe 100644 --- a/docs/content/reference/configs/bases.md +++ b/docs/content/reference/configs/bases.md @@ -15,6 +15,7 @@ filename stem. There is no implicit base; `katalyst init` writes a default | `type` | no | `filesystem` | Backend kind: `filesystem` or `sqlite`. | | `root` | no | `.` | Base root directory, relative to the repo root. Collection paths resolve against it. | | `path` | for `sqlite` | - | SQLite database path, relative to the repo root. Alias for `root` on SQLite bases. | +| `filesystemChecks` | no | - | Filesystem-attached checks for raw files under a filesystem base. Valid only when `type: filesystem`. | | `collections` | no | - | Map of collection name -> definition. See [Collections]({{< relref "collections.md" >}}). | ```yaml @@ -32,6 +33,36 @@ collections: Collection names are unique across the whole project (selectors are `/`, with no base qualifier). +Filesystem bases may also declare `filesystemChecks` for rules that run before +or beside collections. Each entry selects files under the base root, then runs +checks whose descriptors support the `filesystem` target: + +```yaml +# .katalyst/bases/local.yaml +type: filesystem +root: . +filesystemChecks: + - name: docs + path: docs/content + include: ["**/*.md"] + exclude: ["**/_generated/**"] + parseFailures: error + checks: + - kind: filesystem_name_case + style: kebab + - kind: filesystem_unmatched_files +collections: {} +``` + +| Key | Required | Default | Meaning | +|---|---|---|---| +| `name` | no | `path` | Diagnostic label and future selector handle. | +| `path` | no | `.` | Scope root, relative to the base root. | +| `include` | yes | - | Glob patterns relative to `path`; selected files match at least one. | +| `exclude` | no | `[]` | Glob patterns removed from the selected set and from unmatched-file reporting. | +| `parseFailures` | no | `error` | `error` fails the run; `warning` reports advisory parse failures when a check needs document data. | +| `checks` | yes | - | Check instances to run against selected files or the selected file set. | + SQLite bases use one table per collection. Each row is one item: ```yaml diff --git a/docs/content/reference/configs/checks.md b/docs/content/reference/configs/checks.md index a788c606..c763ef18 100644 --- a/docs/content/reference/configs/checks.md +++ b/docs/content/reference/configs/checks.md @@ -5,8 +5,11 @@ weight = 50 # Checks -Each entry in a collection's `checks:` list has a `kind` and the keys that -check type requires. Every check type is documented one per page in the +Each check instance has a `kind` and the keys that check type requires. A check +instance can be attached to a collection under `collections..checks`, or +to a raw filesystem scope under `filesystemChecks[].checks` when the check +type supports the `filesystem` target. Every check type is documented one per +page in the [check types reference]({{< relref "../check-types/_index.md" >}}): ```yaml @@ -20,6 +23,48 @@ checks: - kind: filesystem_name_matches_field ``` +## Configuration Sites + +Collection-attached checks run after a file belongs to a collection. They can +use collection schemas, variants, item selectors, and collection-wide sibling +sets: + +```yaml +collections: + posts: + path: content/posts + checks: + - kind: markdown_requires_h1 + - kind: filesystem_name_case + style: kebab +``` + +Filesystem-attached checks run from filesystem base config. They select files +with `include` and `exclude` globs and do not require collections to exist: + +```yaml +filesystemChecks: + - name: docs + path: docs/content + include: ["**/*.md"] + parseFailures: warning + checks: + - kind: filesystem_name_case + style: kebab + - kind: filesystem_name_matches_field + field: title +``` + +`katalyst check` with no selector runs filesystem scopes first, then collection +checks. `katalyst check ` and `katalyst check /` +run collection checks only. Filesystem scopes reject check types that do not +list `filesystem` in `configurableIn`. + +Document-aware filesystem checks parse selected files only when needed. +`parseFailures: error` is the default and fails the run on parse errors. +`parseFailures: warning` reports the parse error as advisory and skips +document-aware checks for that file. + ## Text rules The `text_*` check types lint the item **body** as raw text, independent of diff --git a/docs/content/reference/glossary.md b/docs/content/reference/glossary.md index 485fb28d..051b7f88 100644 --- a/docs/content/reference/glossary.md +++ b/docs/content/reference/glossary.md @@ -20,10 +20,11 @@ how each term maps onto today's code is documented in the per-package | **BaseType** | A known backend kind capable of holding content Katalyst can operate on (`filesystem` and `sqlite` today; `postgresql`, `mongodb`, and others later). | | **Body** | Everything after the closing frontmatter fence. Preserved verbatim except by `fix`. | | **Check** | Shorthand for a check instance when context is unambiguous. | -| **Check instance** | One configured check attached to a collection: a check type plus its arguments (one YAML object under `checks:`). It runs against each item (object, markdown, or filesystem family). | -| **Check type** | The reusable definition of a constraint: one entry in katalyst's check registry (`object_required_field`, `markdown_single_h1`, ...), selected by its `kind:` id. `katalyst check-types list` lists them. | +| **Check instance** | One configured check: a check type plus its arguments. It is attached either to a collection under `checks:` or to a filesystem scope under `filesystemChecks[].checks`. | +| **Check type** | The reusable definition of a constraint: one entry in katalyst's check registry (`object_required_field`, `markdown_single_h1`, ...), selected by its `kind:` id. `katalyst check-types list` lists them, including where they can be configured. | | **CheckLibrary** | The provider behind a check type. Native libraries (`filesystem`, `plaintext`, `markdownbodytext`, `structuredobject`) wrap hand-written checks; schema-backed libraries (`json-schema`, Vale next) compile a named schema and run items against it, and report their own availability. A library is provenance, orthogonal to the source-data family (`structuredObject`, `markdownBodyText`, `fileSystem`, `plainText`) the check reads. | | **Collection** | A group of items that share structure: a directory of similar files, a relational table, a Mongo collection, or a family of API resources. Collections own checks and are addressed by name. | +| **CollectionCheck** | A check instance attached to a collection definition. CollectionChecks can run per item or once over the collection's full item set. | | **Collection layer** | Inspectors that profile a configured collection's items, addressed by domain identity (collection + item id) and probing through the same substrate the checks use. | | **Collection-scoped check** | A check type that runs once per collection over all its items (e.g. `filesystem_unique_filename`), rather than per item. It re-scans the full collection even under a single-item selector. | | **Collection mapping** | The two-way mapping from a base instance's contents to collections and items. Yields one or more collections; filesystem and SQLite mappings are implemented today. Implemented by `CollectionDefinition` in code. | @@ -32,6 +33,9 @@ how each term maps onto today's code is documented in the per-package | **Document** | The markdown file-form of an **Item**: a parsed markdown file (frontmatter metadata + body + a line map). Use it where parsing or the on-disk file is the subject; elsewhere prefer **Item**. | | **Evidence** | The structured result of one inspector: counts and distributions with the unit count `n` as denominator. Never a recommendation or verdict. | | **Field** | A key in an item's structured object (its frontmatter map). A field is an **Attribute**; a filename is an attribute but not a field. The term used wherever object or frontmatter keys are meant (`object_field_type`, `name_matches_field`). | +| **FileCheck** | A runtime check that runs once per file. Collection-attached item checks and filesystem-attached per-file checks both use this shape. | +| **FileSetCheck** | A runtime check that runs once over a selected set of files, such as unique filename or unmatched-file checks. | +| **FilesystemCheck** | A check instance attached to a filesystem scope under a filesystem base's `filesystemChecks` list. It can run before collections exist. | | **Frontmatter** | The on-disk metadata block at the top of a markdown file, in YAML (`---`), TOML (`+++`), or JSON (`{ … }`). | | **Inspector** | A read-only operation that measures content and returns evidence. The descriptive dual of a check: a check asserts a predicate, an inspector reports the distribution. Inspectors come in two layers. | | **Item** | The unit of data in a collection, addressed by a selector and operated on by `check`, `fix`, and the `item` subcommands. In the filesystem backend an item is one file matching the collection's pattern, its id the filename stem; its markdown file-form is a **Document**. | @@ -49,7 +53,7 @@ how each term maps onto today's code is documented in the per-package | **Selector** | How a command names what to operate on: nothing (whole project), ``, or `/`. | | **Scope** | The level an operation or backend mapping applies to: item, collection, project, or across collections. In a base, scope answers whether one matched source unit becomes an item or a collection. | | **Span** | The slice of body text a text rule is evaluated against, chosen by its `target`: the whole `body`, each `line`, the `first-line`, or `matched-lines` (lines matching a `select` regex). | -| **Target** | The slice of a path a filesystem name/path check type tests: `filename`, `filename-ext`, `parent-dir`, or `path-segments` (every directory segment plus the basename). For a text rule, the slice of body it tests, see Span. | +| **Target** | The slice a rule tests in a specific family. For a filesystem name/path check, `target` can be `filename`, `filename-ext`, `parent-dir`, or `path-segments`; for a text rule, see Span. | | **Text rule** | A `text_*` check (`text_requires`, `text_forbids`, `text_denylist`) that tests the body as raw text, a regex or a literal denylist, independent of markdown structure. Applies to plain-text items too. | | **Validation result** | The product of running an item's checks: either `path: OK`, or a flat list of violations. | | **Variant** | A discriminated check group inside a collection (one entry of `variants:`): a `when` discriminator plus the schema/checks added for items that match it. An item runs the base checks plus the first matching variant's. | @@ -58,8 +62,8 @@ how each term maps onto today's code is documented in the per-package ## Usage notes - A **check type** is the definition; a **check instance** is that check type - configured in a collection and run against a specific item, and a - **violation** is a check that failed. The [check types + configured at a configuration site, and a **violation** is a check that + failed. The [check types reference]({{< relref "check-types/_index.md" >}}) and `katalyst check-types list` enumerate check types. - Prefer **schema** for what users author. The runtime check is the `object` diff --git a/internal/checks/AGENTS.md b/internal/checks/AGENTS.md index 0b2a61b6..b5c96f8d 100644 --- a/internal/checks/AGENTS.md +++ b/internal/checks/AGENTS.md @@ -4,8 +4,8 @@ The check engine: the check types Katalyst ships, the libraries that run them, and the violations they produce. **Architecture and design rationale** - the model (check type vs. instance, -family vs. library, scope), check libraries, how a check runs, and the -trade-offs - live in the +family vs. library vs. configuration site vs. runtime granularity), check +libraries, how a check runs, and the trade-offs - live in the [How checks work](../../docs/content/deep-dives/domain-model/checks.md) deep-dive, which is the source of truth. The per-type catalog is the generated [check-types reference](../../docs/content/reference/check-types/), and the @@ -18,6 +18,13 @@ local code conventions. it through the package's `register` helper (in `library.go`). To add one, see the [add-katalyst-check-type](../../.cursor/skills/add-katalyst-check-type/SKILL.md) skill. +- Set descriptor `ConfigurableIn` for every check that can attach outside a + collection. Empty `ConfigurableIn` means collection-only during migration. Use + `checks.ConfigCollection` and `checks.ConfigFilesystem` rather than string + literals. +- Set descriptor `NeedsDocument` when a check reads frontmatter, markdown body + text, or source line maps. Filesystem-attached checks use it to parse lazily + and to apply `parseFailures`. - Family packages (`structuredobject/`, `markdownbodytext/`, `filesystem/`, `plaintext/`) import the core `checks` package, never the reverse. Callers wire every family in by blank-importing `internal/checks/all`. diff --git a/internal/checks/checks.go b/internal/checks/checks.go index 951f0e62..d5c48753 100644 --- a/internal/checks/checks.go +++ b/internal/checks/checks.go @@ -11,8 +11,8 @@ package checks import "github.com/abegong/katalyst/internal/codec/markdownbodytext" -// Context carries all data a check may need. -type Context struct { +// FileContext carries all data a file check may need. +type FileContext struct { FilePath string // CollectionRoot is the absolute directory of the item's collection. // Path/filename targets that span directories (path-segments, path @@ -23,6 +23,9 @@ type Context struct { Meta map[string]any } +// Context is the historical name for FileContext. +type Context = FileContext + // Severity classifies how serious a violation is. The zero value is // SeverityError, so any check that does not set it keeps failing the run; // SeverityWarning is advisory, it is reported but never changes the exit diff --git a/internal/checks/collection.go b/internal/checks/collection.go index 7c3ec333..feeccd92 100644 --- a/internal/checks/collection.go +++ b/internal/checks/collection.go @@ -12,19 +12,28 @@ type ItemContext struct { Meta map[string]any } -// CollectionContext carries every item in a collection, for checks that -// reason across siblings (uniqueness, required index files). -type CollectionContext struct { - Root string - Items []ItemContext +// FileSetContext carries every selected file in a set, for checks that reason +// across siblings (uniqueness, required index files, unmatched files). +type FileSetContext struct { + Root string + Items []ItemContext + Unmatched []string + Include []string + Exclude []string } +// CollectionContext is the historical name for FileSetContext. +type CollectionContext = FileSetContext + // CollectionCheck validates a concern across all items in a collection. It // runs once per collection, after the per-item pass. type CollectionCheck interface { RunCollection(ctx CollectionContext) []Violation } +// FileSetCheck is the product-facing name for a set-level runtime check. +type FileSetCheck = CollectionCheck + // CollisionViolations emits one violation per group of two or more paths, // naming all colliding files. Groups and paths are sorted for determinism. It // is the shared helper behind the uniqueness checks (unique_filename in the @@ -58,3 +67,8 @@ func RunCollectionAll(ctx CollectionContext, list []CollectionCheck) []Violation } return out } + +// RunFileSetAll runs every file-set check and flattens the violations. +func RunFileSetAll(ctx FileSetContext, list []FileSetCheck) []Violation { + return RunCollectionAll(ctx, list) +} diff --git a/internal/checks/config.go b/internal/checks/config.go new file mode 100644 index 00000000..19dfdb7d --- /dev/null +++ b/internal/checks/config.go @@ -0,0 +1,130 @@ +package checks + +import ( + "fmt" + "strings" + + "gopkg.in/yaml.v3" +) + +// RawCheck mirrors one `checks:` entry. The struct fields exist so a misspelled +// key fails YAML's known-field validation; the retained node is what a check +// type's own parser decodes for its real args. +type RawCheck struct { + Kind string `yaml:"kind"` + Schema string `yaml:"schema"` + Field string `yaml:"field"` + Type string `yaml:"type"` + Value string `yaml:"value"` + Values []string `yaml:"values"` + Min *float64 `yaml:"min"` + Max *float64 `yaml:"max"` + MinLength int `yaml:"min_length"` + MaxLength int `yaml:"max_length"` + Heading string `yaml:"heading"` + Style string `yaml:"style"` + Target string `yaml:"target"` + Transform string `yaml:"transform"` + Prefix string `yaml:"prefix"` + Suffix string `yaml:"suffix"` + Allow []string `yaml:"allow"` + Deny []string `yaml:"deny"` + Pattern string `yaml:"pattern"` + Fields []string `yaml:"fields"` + Name string `yaml:"name"` + Match string `yaml:"match"` + Select string `yaml:"select"` + Fix string `yaml:"fix"` + + node *yaml.Node +} + +var rawCheckKeys = map[string]bool{ + "kind": true, "schema": true, "field": true, "type": true, + "value": true, "values": true, "min": true, "max": true, + "min_length": true, "max_length": true, "heading": true, + "style": true, "target": true, "transform": true, + "prefix": true, "suffix": true, "allow": true, "deny": true, + "pattern": true, "fields": true, "name": true, "match": true, + "select": true, "fix": true, +} + +// UnmarshalYAML decodes the entry's fields and stashes the raw node, so the +// node can travel to a check type's own parser. +func (rc *RawCheck) UnmarshalYAML(value *yaml.Node) error { + if value.Kind != yaml.MappingNode { + return fmt.Errorf("invalid check: expected a mapping") + } + for i := 0; i < len(value.Content); i += 2 { + key := value.Content[i].Value + if !rawCheckKeys[key] { + return fmt.Errorf("unknown check key %q", key) + } + } + type plain RawCheck + var p plain + if err := value.Decode(&p); err != nil { + return err + } + *rc = RawCheck(p) + rc.node = value + return nil +} + +// BuildConfiguredInput carries the shared pieces needed to turn raw config +// into validated configured checks. +type BuildConfiguredInput struct { + ErrorContext string + Schema string + Raw []RawCheck + SchemaKnown func(string) bool + ConfigurableIn string + AllowObject bool +} + +// BuildConfigured folds an optional schema name into a leading object check +// and parses all raw checks through the registry. +func BuildConfigured(in BuildConfiguredInput) ([]ConfiguredCheck, error) { + configurableIn := in.ConfigurableIn + if configurableIn == "" { + configurableIn = ConfigCollection + } + out := make([]ConfiguredCheck, 0, len(in.Raw)+1) + if in.Schema != "" { + if !in.AllowObject { + return nil, fmt.Errorf("%s: schema is not supported for %s checks", in.ErrorContext, configurableIn) + } + if in.SchemaKnown != nil && !in.SchemaKnown(in.Schema) { + return nil, fmt.Errorf("%s: unknown schema %q", in.ErrorContext, in.Schema) + } + out = append(out, ConfiguredCheck{Kind: CheckObject, Schema: in.Schema}) + } + for j, raw := range in.Raw { + kind := CheckType(strings.TrimSpace(raw.Kind)) + if kind == CheckObject { + if !in.AllowObject { + return nil, fmt.Errorf("%s: checks[%d]: object check is not supported for %s checks", in.ErrorContext, j, configurableIn) + } + if raw.Schema == "" { + return nil, fmt.Errorf("%s: checks[%d]: object check requires \"schema\"", in.ErrorContext, j) + } + if in.SchemaKnown != nil && !in.SchemaKnown(raw.Schema) { + return nil, fmt.Errorf("%s: checks[%d]: unknown schema %q", in.ErrorContext, j, raw.Schema) + } + if raw.Field != "" { + return nil, fmt.Errorf("%s: checks[%d]: object check does not support \"field\"", in.ErrorContext, j) + } + out = append(out, ConfiguredCheck{Kind: CheckObject, Schema: raw.Schema}) + continue + } + args, err := Parse(kind, raw.node) + if err != nil { + return nil, fmt.Errorf("%s: checks[%d]: %w", in.ErrorContext, j, err) + } + if !SupportsConfiguration(kind, configurableIn) { + return nil, fmt.Errorf("%s: checks[%d]: check type %q does not support %s checks", in.ErrorContext, j, kind, configurableIn) + } + out = append(out, ConfiguredCheck{Kind: kind, Args: args}) + } + return out, nil +} diff --git a/internal/checks/filesystem/extension_in.go b/internal/checks/filesystem/extension_in.go index 27eaee1a..0450ee34 100644 --- a/internal/checks/filesystem/extension_in.go +++ b/internal/checks/filesystem/extension_in.go @@ -37,11 +37,12 @@ func (f FilesystemExtensionIn) Run(ctx checks.Context) []checks.Violation { func init() { registerParsed(checks.Descriptor{ - CheckType: checks.CheckFilesystemExtensionIn, - Family: "fileSystem", - Slug: "extension-in", - Title: "Extension in", - Summary: "Allow only specific file extensions.", + CheckType: checks.CheckFilesystemExtensionIn, + Family: "fileSystem", + ConfigurableIn: []string{checks.ConfigCollection, checks.ConfigFilesystem}, + Slug: "extension-in", + Title: "Extension in", + Summary: "Allow only specific file extensions.", Fields: []checks.Field{ {Name: "values", Required: true, Desc: "Allowed extensions, including the leading dot."}, }, diff --git a/internal/checks/filesystem/index_file_required.go b/internal/checks/filesystem/index_file_required.go index cc6e8fc4..90a7fa7a 100644 --- a/internal/checks/filesystem/index_file_required.go +++ b/internal/checks/filesystem/index_file_required.go @@ -49,12 +49,13 @@ type indexFileArgs struct { func init() { registerParsed(checks.Descriptor{ - CheckType: checks.CheckFilesystemIndexFileRequired, - Family: "fileSystem", - Slug: "index-file-required", - Title: "Index file required", - Summary: "Require that every directory containing items has an index file.", - Scope: "collection", + CheckType: checks.CheckFilesystemIndexFileRequired, + Family: "fileSystem", + ConfigurableIn: []string{checks.ConfigCollection, checks.ConfigFilesystem}, + Slug: "index-file-required", + Title: "Index file required", + Summary: "Require that every directory containing items has an index file.", + Scope: "collection", Fields: []checks.Field{ {Name: "name", Required: false, Default: "_index.md", Desc: "Index filename that must be present in each item directory."}, }, diff --git a/internal/checks/filesystem/name_affix.go b/internal/checks/filesystem/name_affix.go index 525c7836..ca6e3ce0 100644 --- a/internal/checks/filesystem/name_affix.go +++ b/internal/checks/filesystem/name_affix.go @@ -42,11 +42,12 @@ type nameAffixArgs struct { func init() { registerParsed(checks.Descriptor{ - CheckType: checks.CheckFilesystemNameAffix, - Family: "fileSystem", - Slug: "name-affix", - Title: "Name affix", - Summary: "Require a name to start with a prefix and/or end with a suffix.", + CheckType: checks.CheckFilesystemNameAffix, + Family: "fileSystem", + ConfigurableIn: []string{checks.ConfigCollection, checks.ConfigFilesystem}, + Slug: "name-affix", + Title: "Name affix", + Summary: "Require a name to start with a prefix and/or end with a suffix.", Fields: []checks.Field{ {Name: "prefix", Required: false, Desc: "Required name prefix (at least one of prefix/suffix)."}, {Name: "suffix", Required: false, Desc: "Required name suffix (at least one of prefix/suffix)."}, diff --git a/internal/checks/filesystem/name_case.go b/internal/checks/filesystem/name_case.go index 6215a773..f4e7b65a 100644 --- a/internal/checks/filesystem/name_case.go +++ b/internal/checks/filesystem/name_case.go @@ -59,11 +59,12 @@ type nameCaseArgs struct { func init() { registerParsed(checks.Descriptor{ - CheckType: checks.CheckFilesystemNameCase, - Family: "fileSystem", - Slug: "name-case", - Title: "Name case", - Summary: "Require a name (or path segments) to follow a case style.", + CheckType: checks.CheckFilesystemNameCase, + Family: "fileSystem", + ConfigurableIn: []string{checks.ConfigCollection, checks.ConfigFilesystem}, + Slug: "name-case", + Title: "Name case", + Summary: "Require a name (or path segments) to follow a case style.", Fields: []checks.Field{ {Name: "style", Required: true, Desc: "One of `kebab`, `snake`, `screaming-snake`, `camel`, `pascal`, `point`, `lower`."}, {Name: "target", Required: false, Default: "filename", Desc: "What to test: `filename`, `filename-ext`, `parent-dir`, or `path-segments`."}, diff --git a/internal/checks/filesystem/name_length.go b/internal/checks/filesystem/name_length.go index 91daa984..832d0c3f 100644 --- a/internal/checks/filesystem/name_length.go +++ b/internal/checks/filesystem/name_length.go @@ -45,11 +45,12 @@ type nameLengthArgs struct { func init() { registerParsed(checks.Descriptor{ - CheckType: checks.CheckFilesystemNameLength, - Family: "fileSystem", - Slug: "name-length", - Title: "Name length", - Summary: "Bound the character length of a name.", + CheckType: checks.CheckFilesystemNameLength, + Family: "fileSystem", + ConfigurableIn: []string{checks.ConfigCollection, checks.ConfigFilesystem}, + Slug: "name-length", + Title: "Name length", + Summary: "Bound the character length of a name.", Fields: []checks.Field{ {Name: "min", Required: false, Desc: "Minimum length (at least one of min/max)."}, {Name: "max", Required: false, Desc: "Maximum length (at least one of min/max)."}, diff --git a/internal/checks/filesystem/name_matches_field.go b/internal/checks/filesystem/name_matches_field.go index a289c2d7..ef4ea761 100644 --- a/internal/checks/filesystem/name_matches_field.go +++ b/internal/checks/filesystem/name_matches_field.go @@ -67,11 +67,13 @@ type nameMatchesFieldArgs struct { func init() { registerParsed(checks.Descriptor{ - CheckType: checks.CheckFilesystemNameMatchesField, - Family: "fileSystem", - Slug: "name-matches-field", - Title: "Name matches field", - Summary: "Require a name to equal a frontmatter field, optionally slugified.", + CheckType: checks.CheckFilesystemNameMatchesField, + Family: "fileSystem", + ConfigurableIn: []string{checks.ConfigCollection, checks.ConfigFilesystem}, + NeedsDocument: true, + Slug: "name-matches-field", + Title: "Name matches field", + Summary: "Require a name to equal a frontmatter field, optionally slugified.", Fields: []checks.Field{ {Name: "field", Required: false, Default: "slug", Desc: "Frontmatter key compared to the name."}, {Name: "transform", Required: false, Default: "none", Desc: "`none` or `slugify` (applied to the field value before comparison)."}, diff --git a/internal/checks/filesystem/name_regex.go b/internal/checks/filesystem/name_regex.go index 25793db2..8a9c4d9b 100644 --- a/internal/checks/filesystem/name_regex.go +++ b/internal/checks/filesystem/name_regex.go @@ -39,11 +39,12 @@ type nameRegexArgs struct { func init() { registerParsed(checks.Descriptor{ - CheckType: checks.CheckFilesystemNameRegex, - Family: "fileSystem", - Slug: "name-regex", - Title: "Name regex", - Summary: "Require a name to match a regular expression (anchored).", + CheckType: checks.CheckFilesystemNameRegex, + Family: "fileSystem", + ConfigurableIn: []string{checks.ConfigCollection, checks.ConfigFilesystem}, + Slug: "name-regex", + Title: "Name regex", + Summary: "Require a name to match a regular expression (anchored).", Fields: []checks.Field{ {Name: "pattern", Required: true, Desc: "Regular expression; matched anchored (`^pattern$`)."}, {Name: "target", Required: false, Default: "filename", Desc: "What to test: `filename`, `filename-ext`, `parent-dir`, or `path-segments`."}, diff --git a/internal/checks/filesystem/parent_dir_in.go b/internal/checks/filesystem/parent_dir_in.go index 449b070d..a9d12249 100644 --- a/internal/checks/filesystem/parent_dir_in.go +++ b/internal/checks/filesystem/parent_dir_in.go @@ -32,11 +32,12 @@ type parentDirInArgs struct { func init() { registerParsed(checks.Descriptor{ - CheckType: checks.CheckFilesystemParentDirIn, - Family: "fileSystem", - Slug: "parent-dir-in", - Title: "Parent directory in", - Summary: "Require that the file's parent directory name is in an allowed set.", + CheckType: checks.CheckFilesystemParentDirIn, + Family: "fileSystem", + ConfigurableIn: []string{checks.ConfigCollection, checks.ConfigFilesystem}, + Slug: "parent-dir-in", + Title: "Parent directory in", + Summary: "Require that the file's parent directory name is in an allowed set.", Fields: []checks.Field{ {Name: "values", Required: true, Desc: "Allowed parent directory names."}, }, diff --git a/internal/checks/filesystem/parent_dir_matches_field.go b/internal/checks/filesystem/parent_dir_matches_field.go index 4ef294af..280abf9f 100644 --- a/internal/checks/filesystem/parent_dir_matches_field.go +++ b/internal/checks/filesystem/parent_dir_matches_field.go @@ -49,11 +49,13 @@ type parentDirMatchesArgs struct { func init() { registerParsed(checks.Descriptor{ - CheckType: checks.CheckFilesystemParentDirMatchesFld, - Family: "fileSystem", - Slug: "parent-dir-matches-field", - Title: "Parent directory matches field", - Summary: "Require the parent directory name to equal a frontmatter field.", + CheckType: checks.CheckFilesystemParentDirMatchesFld, + Family: "fileSystem", + ConfigurableIn: []string{checks.ConfigCollection, checks.ConfigFilesystem}, + NeedsDocument: true, + Slug: "parent-dir-matches-field", + Title: "Parent directory matches field", + Summary: "Require the parent directory name to equal a frontmatter field.", Fields: []checks.Field{ {Name: "field", Required: true, Desc: "Frontmatter key compared to the parent directory name."}, }, diff --git a/internal/checks/filesystem/path_charset.go b/internal/checks/filesystem/path_charset.go index 0a3bcf1d..99e58452 100644 --- a/internal/checks/filesystem/path_charset.go +++ b/internal/checks/filesystem/path_charset.go @@ -61,11 +61,12 @@ type pathCharsetArgs struct { func init() { registerParsed(checks.Descriptor{ - CheckType: checks.CheckFilesystemPathCharset, - Family: "fileSystem", - Slug: "path-charset", - Title: "Path charset", - Summary: "Constrain the characters allowed in the item's path.", + CheckType: checks.CheckFilesystemPathCharset, + Family: "fileSystem", + ConfigurableIn: []string{checks.ConfigCollection, checks.ConfigFilesystem}, + Slug: "path-charset", + Title: "Path charset", + Summary: "Constrain the characters allowed in the item's path.", Fields: []checks.Field{ {Name: "deny", Required: false, Desc: "Forbidden substrings (e.g. a space). Use `deny` or `allow`, not both."}, {Name: "allow", Required: false, Desc: "The only permitted characters; the path separator is always allowed."}, diff --git a/internal/checks/filesystem/path_depth.go b/internal/checks/filesystem/path_depth.go index 62fcf0bb..1b29dd93 100644 --- a/internal/checks/filesystem/path_depth.go +++ b/internal/checks/filesystem/path_depth.go @@ -58,11 +58,12 @@ type pathDepthArgs struct { func init() { registerParsed(checks.Descriptor{ - CheckType: checks.CheckFilesystemPathDepth, - Family: "fileSystem", - Slug: "path-depth", - Title: "Path depth", - Summary: "Bound directory nesting relative to the collection root.", + CheckType: checks.CheckFilesystemPathDepth, + Family: "fileSystem", + ConfigurableIn: []string{checks.ConfigCollection, checks.ConfigFilesystem}, + Slug: "path-depth", + Title: "Path depth", + Summary: "Bound directory nesting relative to the collection root.", Fields: []checks.Field{ {Name: "min", Required: false, Desc: "Minimum depth (at least one of min/max)."}, {Name: "max", Required: false, Desc: "Maximum depth; `0` means a flat collection (at least one of min/max)."}, diff --git a/internal/checks/filesystem/referenced_files.go b/internal/checks/filesystem/referenced_files.go index ea1e24b0..45aa647e 100644 --- a/internal/checks/filesystem/referenced_files.go +++ b/internal/checks/filesystem/referenced_files.go @@ -76,11 +76,13 @@ type referencedFilesArgs struct { func init() { registerParsed(checks.Descriptor{ - CheckType: checks.CheckFilesystemReferencedFiles, - Family: "fileSystem", - Slug: "referenced-files-exist", - Title: "Referenced files exist", - Summary: "Require path-valued frontmatter fields to resolve to real files.", + CheckType: checks.CheckFilesystemReferencedFiles, + Family: "fileSystem", + ConfigurableIn: []string{checks.ConfigCollection, checks.ConfigFilesystem}, + NeedsDocument: true, + Slug: "referenced-files-exist", + Title: "Referenced files exist", + Summary: "Require path-valued frontmatter fields to resolve to real files.", Fields: []checks.Field{ {Name: "fields", Required: true, Desc: "Frontmatter keys holding a path (string) or list of paths, resolved relative to the item."}, }, diff --git a/internal/checks/filesystem/unique_filename.go b/internal/checks/filesystem/unique_filename.go index 650fcfac..316d97f4 100644 --- a/internal/checks/filesystem/unique_filename.go +++ b/internal/checks/filesystem/unique_filename.go @@ -24,12 +24,13 @@ func (UniqueFilename) RunCollection(ctx checks.CollectionContext) []checks.Viola func init() { registerParsed(checks.Descriptor{ - CheckType: checks.CheckFilesystemUniqueFilename, - Family: "fileSystem", - Slug: "unique-filename", - Title: "Unique filename", - Summary: "Require that no two items in the collection share a basename.", - Scope: "collection", + CheckType: checks.CheckFilesystemUniqueFilename, + Family: "fileSystem", + ConfigurableIn: []string{checks.ConfigCollection, checks.ConfigFilesystem}, + Slug: "unique-filename", + Title: "Unique filename", + Summary: "Require that no two items in the collection share a basename.", + Scope: "collection", ConfigExample: `collections: notes: path: notes diff --git a/internal/checks/filesystem/unmatched_files.go b/internal/checks/filesystem/unmatched_files.go new file mode 100644 index 00000000..6f32f310 --- /dev/null +++ b/internal/checks/filesystem/unmatched_files.go @@ -0,0 +1,50 @@ +package filesystem + +import ( + "fmt" + "strings" + + "github.com/abegong/katalyst/internal/checks" +) + +// UnmatchedFiles reports files in a filesystem scope that match neither +// include nor exclude patterns. +type UnmatchedFiles struct{} + +func (UnmatchedFiles) RunCollection(ctx checks.CollectionContext) []checks.Violation { + out := make([]checks.Violation, 0, len(ctx.Unmatched)) + for _, rel := range ctx.Unmatched { + out = append(out, checks.Violation{ + File: rel, + Message: fmt.Sprintf("unmatched file (matches no include pattern %s and no exclude pattern %s)", patternList(ctx.Include), patternList(ctx.Exclude)), + }) + } + return out +} + +func patternList(patterns []string) string { + if len(patterns) == 0 { + return "[]" + } + return "[" + strings.Join(patterns, ", ") + "]" +} + +func init() { + registerParsed(checks.Descriptor{ + CheckType: checks.CheckFilesystemUnmatchedFiles, + Family: "fileSystem", + ConfigurableIn: []string{checks.ConfigFilesystem}, + Slug: "unmatched-files", + Title: "Unmatched files", + Summary: "Report regular files under a filesystem scope that match neither include nor exclude patterns.", + Scope: "collection", + ConfigExample: `filesystemChecks: + - path: docs + include: ["**/*.md"] + exclude: ["**/_generated/**"] + checks: + - kind: filesystem_unmatched_files`, + }, checks.NoArgs, nil, func(any) checks.CollectionCheck { + return UnmatchedFiles{} + }) +} diff --git a/internal/checks/kinds.go b/internal/checks/kinds.go index d852e95c..c4d61e5d 100644 --- a/internal/checks/kinds.go +++ b/internal/checks/kinds.go @@ -34,6 +34,7 @@ const ( CheckFilesystemUniqueFilename CheckType = "filesystem_unique_filename" CheckFilesystemUniqueField CheckType = "filesystem_unique_field" CheckFilesystemIndexFileRequired CheckType = "filesystem_index_file_required" + CheckFilesystemUnmatchedFiles CheckType = "filesystem_unmatched_files" CheckTextRequires CheckType = "text_requires" CheckTextForbids CheckType = "text_forbids" CheckTextDenylist CheckType = "text_denylist" diff --git a/internal/checks/registry.go b/internal/checks/registry.go index 09c32016..05297c61 100644 --- a/internal/checks/registry.go +++ b/internal/checks/registry.go @@ -59,11 +59,24 @@ type Descriptor struct { // Scope is "collection" for checks that run once per collection over all // its items; empty means an ordinary per-item check. Scope string `json:"scope,omitempty"` + // ConfigurableIn names the config attachment points that accept this check + // type: "collection", "filesystem". Empty means collection during + // migration. + ConfigurableIn []string `json:"configurableIn,omitempty"` + // NeedsDocument reports whether this check needs parsed document metadata + // or body text. Filesystem scopes use it to avoid parsing for path-only + // checks. + NeedsDocument bool `json:"needs_document,omitempty"` // Severity is "warning" for checks that emit advisory findings (never // failing the run); empty means the default, "error". Severity string `json:"severity,omitempty"` } +const ( + ConfigCollection = "collection" + ConfigFilesystem = "filesystem" +) + // Family identifies a check-type family: its id (used in Descriptor.Family and // `--family`), its docs-directory slug, and its intro copy. type Family struct { @@ -222,6 +235,38 @@ func DescriptorFor(kind CheckType) (Descriptor, bool) { return registrations[i].desc, true } +// DescriptorConfigurableIn returns the explicit or migration-default +// configuration sites for d. +func DescriptorConfigurableIn(d Descriptor) []string { + if len(d.ConfigurableIn) == 0 { + return []string{ConfigCollection} + } + out := make([]string, len(d.ConfigurableIn)) + copy(out, d.ConfigurableIn) + return out +} + +// SupportsConfiguration reports whether kind accepts the named configuration +// site. +func SupportsConfiguration(kind CheckType, site string) bool { + desc, ok := DescriptorFor(kind) + if !ok { + return false + } + for _, t := range DescriptorConfigurableIn(desc) { + if t == site { + return true + } + } + return false +} + +// NeedsDocument reports whether a check needs parsed document data. +func NeedsDocument(kind CheckType) bool { + desc, ok := DescriptorFor(kind) + return ok && desc.NeedsDocument +} + // CollectionScoped reports whether kind runs once per collection (vs. per item). func CollectionScoped(kind CheckType) bool { i, ok := byKind[kind] diff --git a/internal/checks/registry_test.go b/internal/checks/registry_test.go index 51512b9f..e95fcffb 100644 --- a/internal/checks/registry_test.go +++ b/internal/checks/registry_test.go @@ -55,3 +55,33 @@ func TestDescriptorLibrary(t *testing.T) { } } } + +func TestDescriptorConfigurableIn(t *testing.T) { + if !checks.SupportsConfiguration(checks.CheckFilesystemNameCase, checks.ConfigCollection) { + t.Fatalf("filesystem_name_case should support collection checks") + } + if !checks.SupportsConfiguration(checks.CheckFilesystemNameCase, checks.ConfigFilesystem) { + t.Fatalf("filesystem_name_case should support filesystem checks") + } + if checks.SupportsConfiguration(checks.CheckMarkdownRequiresH1, checks.ConfigFilesystem) { + t.Fatalf("markdown_requires_h1 should remain collection-only") + } + if !checks.SupportsConfiguration(checks.CheckFilesystemUnmatchedFiles, checks.ConfigFilesystem) { + t.Fatalf("filesystem_unmatched_files should support filesystem checks") + } + if checks.SupportsConfiguration(checks.CheckFilesystemUnmatchedFiles, checks.ConfigCollection) { + t.Fatalf("filesystem_unmatched_files should not support collection checks") + } +} + +func TestDescriptorNeedsDocument(t *testing.T) { + if checks.NeedsDocument(checks.CheckFilesystemNameCase) { + t.Fatalf("filesystem_name_case should stay path-only") + } + if !checks.NeedsDocument(checks.CheckFilesystemNameMatchesField) { + t.Fatalf("filesystem_name_matches_field should need document metadata") + } + if !checks.NeedsDocument(checks.CheckFilesystemUniqueField) { + t.Fatalf("filesystem_unique_field should need document metadata") + } +} diff --git a/internal/checks/structuredobject/unique_field.go b/internal/checks/structuredobject/unique_field.go index 39a9c08d..ae7167be 100644 --- a/internal/checks/structuredobject/unique_field.go +++ b/internal/checks/structuredobject/unique_field.go @@ -38,12 +38,14 @@ func (c UniqueField) RunCollection(ctx checks.CollectionContext) []checks.Violat func init() { registerParsed(checks.Descriptor{ - CheckType: checks.CheckFilesystemUniqueField, - Family: "structuredObject", - Slug: "unique-field", - Title: "Unique field", - Summary: "Require that no two items share a value for a frontmatter field.", - Scope: "collection", + CheckType: checks.CheckFilesystemUniqueField, + Family: "structuredObject", + ConfigurableIn: []string{checks.ConfigCollection, checks.ConfigFilesystem}, + NeedsDocument: true, + Slug: "unique-field", + Title: "Unique field", + Summary: "Require that no two items share a value for a frontmatter field.", + Scope: "collection", Fields: []checks.Field{ {Name: "field", Required: true, Desc: "Frontmatter key whose value must be unique across the collection."}, }, diff --git a/internal/project/loader.go b/internal/project/loader.go index db71ef1c..fbeb7dac 100644 --- a/internal/project/loader.go +++ b/internal/project/loader.go @@ -25,6 +25,7 @@ import ( "github.com/abegong/katalyst/internal/storage" "github.com/abegong/katalyst/internal/storage/collection" + "github.com/abegong/katalyst/internal/storage/filesystemcheck" "gopkg.in/yaml.v3" ) @@ -86,6 +87,9 @@ type BaseInstance struct { Root string // Collections this base declares, in name order. Collections []Collection + // FilesystemChecks are raw filesystem policy scopes attached to this + // filesystem base. + FilesystemChecks []filesystemcheck.Scope } // Collection, CollectionVariant, and ListingDefaults live in @@ -130,10 +134,11 @@ type rawBaseKind struct { // collections it declares (name → definition). The collection mirror lives with // the Collection type in internal/storage/collection. type rawBaseInstance struct { - Type string `yaml:"type"` - Root string `yaml:"root"` - Path string `yaml:"path"` - Collections map[string]collection.RawCollection `yaml:"collections"` + Type string `yaml:"type"` + Root string `yaml:"root"` + Path string `yaml:"path"` + FilesystemChecks []filesystemcheck.RawScope `yaml:"filesystemChecks"` + Collections map[string]collection.RawCollection `yaml:"collections"` } // Load finds the project root (nearest ancestor with a .katalyst/ dir), @@ -353,6 +358,24 @@ func (c *Config) buildInstance(name string, ri rawBaseInstance, exts []string, p rootRel = "." } instRoot := resolve(c.Root, rootRel) + var filesystemChecks []filesystemcheck.Scope + if len(ri.FilesystemChecks) > 0 { + if storage.BaseType(typ) != storage.Filesystem { + return BaseInstance{}, fmt.Errorf("%s %q: filesystemChecks requires type %q", label, name, storage.Filesystem) + } + filesystemChecks = make([]filesystemcheck.Scope, 0, len(ri.FilesystemChecks)) + for i, raw := range ri.FilesystemChecks { + scope, err := filesystemcheck.Build(filesystemcheck.BuildInput{ + ErrorContext: fmt.Sprintf("%s %q: filesystemChecks[%d]", label, name, i), + Raw: raw, + BaseRoot: instRoot, + }) + if err != nil { + return BaseInstance{}, err + } + filesystemChecks = append(filesystemChecks, scope) + } + } // Start with the inline collections, then fold in any per-collection files. raws := make(map[string]collection.RawCollection, len(ri.Collections)) @@ -401,7 +424,7 @@ func (c *Config) buildInstance(name string, ri rawBaseInstance, exts []string, p } cols = append(cols, col) } - return BaseInstance{Name: name, Type: typ, Root: instRoot, Collections: cols}, nil + return BaseInstance{Name: name, Type: typ, Root: instRoot, Collections: cols, FilesystemChecks: filesystemChecks}, nil } func dirExists(dir string) (bool, error) { @@ -524,6 +547,16 @@ func (c *Config) CollectionNames() []string { return names } +// FilesystemCheckScopes returns every filesystem check scope in deterministic +// base order. +func (c *Config) FilesystemCheckScopes() []filesystemcheck.Scope { + var out []filesystemcheck.Scope + for _, base := range c.Bases { + out = append(out, base.FilesystemChecks...) + } + return out +} + // find walks from start upward until it locates a directory containing a // .katalyst/ subdirectory. The returned root is the absolute, // symlink-resolved directory. diff --git a/internal/project/loader_test.go b/internal/project/loader_test.go index 1c4f679d..efa21df8 100644 --- a/internal/project/loader_test.go +++ b/internal/project/loader_test.go @@ -125,6 +125,143 @@ func TestLoad_instanceRoot_resolvesCollectionDirs(t *testing.T) { } } +func TestLoad_filesystemChecks_loadsScopes(t *testing.T) { + dir := t.TempDir() + projecttest.WriteProject(t, dir, map[string]string{ + "bases/local.yaml": `type: filesystem +root: content +filesystemChecks: + - name: docs + include: ["**/*.md"] + exclude: ["drafts/**"] + parseFailures: warning + checks: + - kind: filesystem_name_case + style: kebab +collections: {} +`, + }) + cfg, err := project.Load(dir) + if err != nil { + t.Fatalf("Load: %v", err) + } + scopes := cfg.FilesystemCheckScopes() + if len(scopes) != 1 { + t.Fatalf("expected one filesystem check scope, got %d", len(scopes)) + } + scope := scopes[0] + if scope.Name != "docs" { + t.Errorf("scope.Name = %q, want docs", scope.Name) + } + if scope.Path != "." { + t.Errorf("scope.Path = %q, want default .", scope.Path) + } + if want := filepath.Join(projecttest.RealPath(t, dir), "content"); scope.Root != want { + t.Errorf("scope.Root = %q, want %q", scope.Root, want) + } + if scope.ParseFailures != "warning" { + t.Errorf("ParseFailures = %q, want warning", scope.ParseFailures) + } + if len(scope.Checks) != 1 || scope.Checks[0].Kind != checks.CheckFilesystemNameCase { + t.Fatalf("unexpected checks: %+v", scope.Checks) + } + assertConfiguredCheckBuilds(t, scope.Checks[0]) +} + +func TestLoad_filesystemChecks_defaultsNameAndParseFailures(t *testing.T) { + dir := t.TempDir() + projecttest.WriteProject(t, dir, map[string]string{ + "bases/local.yaml": `type: filesystem +root: . +filesystemChecks: + - path: docs/content + include: ["**/*.md"] + checks: + - kind: filesystem_name_case + style: kebab +collections: {} +`, + }) + cfg, err := project.Load(dir) + if err != nil { + t.Fatalf("Load: %v", err) + } + scope := cfg.FilesystemCheckScopes()[0] + if scope.Name != "docs/content" { + t.Errorf("scope.Name = %q, want docs/content", scope.Name) + } + if scope.ParseFailures != "error" { + t.Errorf("ParseFailures = %q, want error", scope.ParseFailures) + } +} + +func TestLoad_filesystemChecks_rejectsInvalidConfig(t *testing.T) { + tests := []struct { + name string + base string + want string + }{ + { + name: "missing include", + base: `type: filesystem +filesystemChecks: + - checks: + - kind: filesystem_name_case + style: kebab +collections: {} +`, + want: "include is required", + }, + { + name: "bad parseFailures", + base: `type: filesystem +filesystemChecks: + - include: ["**/*.md"] + parseFailures: notice + checks: + - kind: filesystem_name_case + style: kebab +collections: {} +`, + want: "unknown parseFailures", + }, + { + name: "collection-only check", + base: `type: filesystem +filesystemChecks: + - include: ["**/*.md"] + checks: + - kind: markdown_requires_h1 +collections: {} +`, + want: "does not support filesystem checks", + }, + { + name: "sqlite base", + base: `type: sqlite +root: data.db +filesystemChecks: + - include: ["**/*.md"] + checks: + - kind: filesystem_name_case + style: kebab +collections: {} +`, + want: "filesystemChecks requires type", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + dir := t.TempDir() + projecttest.WriteProject(t, dir, map[string]string{"bases/local.yaml": tt.base}) + _, err := project.Load(dir) + if err == nil || !strings.Contains(err.Error(), tt.want) { + t.Fatalf("expected %q error, got: %v", tt.want, err) + } + }) + } +} + func TestLoad_perCollectionFiles_inInstanceDir(t *testing.T) { // A collection may live in its own file under bases//, the escape // hatch for bases that outgrow an inline block. diff --git a/internal/project/project.go b/internal/project/project.go index ecb6a404..7c2474e2 100644 --- a/internal/project/project.go +++ b/internal/project/project.go @@ -16,6 +16,7 @@ import ( "github.com/abegong/katalyst/internal/storage/collection" "github.com/abegong/katalyst/internal/storage/collection/filesystem" sqlitestore "github.com/abegong/katalyst/internal/storage/collection/sqlite" + "github.com/abegong/katalyst/internal/storage/filesystemcheck" ) // Project is a loaded configuration plus the operations the CLI needs to @@ -67,6 +68,11 @@ func (p *Project) def(c Collection) (collection.CollectionDefinition, error) { // Collections returns all collections in name order. func (p *Project) Collections() []Collection { return p.cfg.Collections } +// FilesystemCheckScopes returns all filesystem-attached check scopes. +func (p *Project) FilesystemCheckScopes() []filesystemcheck.Scope { + return p.cfg.FilesystemCheckScopes() +} + // Collection looks up one collection by name. func (p *Project) Collection(name string) (Collection, bool) { return p.cfg.Collection(name) diff --git a/internal/storage/collection/AGENTS.md b/internal/storage/collection/AGENTS.md index 1a8e80d3..bc0d3244 100644 --- a/internal/storage/collection/AGENTS.md +++ b/internal/storage/collection/AGENTS.md @@ -25,9 +25,12 @@ and how a backend attaches — live in the checks can decode without pulling in a backend. - `collection` owns the `Collection` type, so the project loader imports `collection`, not the reverse. Keep the edge pointing that way: `collection` - imports `checks` (to parse a check's args) and the sibling `predicate` + imports `checks` (to parse collection-attached check args) and the sibling `predicate` grammar, but never the loader. `Build` takes schema validation as an injected `SchemaKnown func(string) bool` rather than reaching back into the loader. +- Collection `checks:` stay collection-attached. Filesystem-attached checks live + on filesystem bases under `filesystemChecks`, and their scope building lives + in `internal/storage/filesystemcheck`. - Read and write are duals: the backend reader locates and `markdownbodytext.Parse` decodes; `fix` computes the new bytes and the backend persists them (`filesystem.Write`). Backend-specific IO stays in the backend diff --git a/internal/storage/collection/parse.go b/internal/storage/collection/parse.go index 92e15721..5757ca6f 100644 --- a/internal/storage/collection/parse.go +++ b/internal/storage/collection/parse.go @@ -143,7 +143,7 @@ type RawCollection struct { Content ContentConfig `yaml:"content"` Body string `yaml:"body"` Schema string `yaml:"schema"` - Checks []RawCheck `yaml:"checks"` + Checks []checks.RawCheck `yaml:"checks"` Listing *RawListingDefaults `yaml:"listing"` Query *RawListingDefaults `yaml:"query"` Variants []RawVariant `yaml:"variants"` @@ -160,9 +160,9 @@ type RawListingDefaults struct { // RawVariant mirrors one entry of a collection's `variants:` list: a `when` // discriminator plus the schema/checks to add for matching items. type RawVariant struct { - When RawWhen `yaml:"when"` - Schema string `yaml:"schema"` - Checks []RawCheck `yaml:"checks"` + When RawWhen `yaml:"when"` + Schema string `yaml:"schema"` + Checks []checks.RawCheck `yaml:"checks"` } // RawWhen is a variant discriminator: a list of `item list --filter` predicate @@ -202,72 +202,6 @@ func (w *RawWhen) UnmarshalYAML(value *yaml.Node) error { return nil } -// RawCheck mirrors one `checks:` entry. The struct fields exist so a misspelled -// key fails YAML's known-field validation; the retained node is what a check -// type's own parser decodes for its real args. -type RawCheck struct { - Kind string `yaml:"kind"` - Schema string `yaml:"schema"` - Field string `yaml:"field"` - Type string `yaml:"type"` - Value string `yaml:"value"` - Values []string `yaml:"values"` - Min *float64 `yaml:"min"` - Max *float64 `yaml:"max"` - MinLength int `yaml:"min_length"` - MaxLength int `yaml:"max_length"` - Heading string `yaml:"heading"` - Style string `yaml:"style"` - Target string `yaml:"target"` - Transform string `yaml:"transform"` - Prefix string `yaml:"prefix"` - Suffix string `yaml:"suffix"` - Allow []string `yaml:"allow"` - Deny []string `yaml:"deny"` - Pattern string `yaml:"pattern"` - Fields []string `yaml:"fields"` - Name string `yaml:"name"` - Match string `yaml:"match"` - Select string `yaml:"select"` - Fix string `yaml:"fix"` - - // node is the raw YAML node for this entry, retained so a distributed check - // parser can decode its own args. Captured in UnmarshalYAML. - node *yaml.Node -} - -var rawCheckKeys = map[string]bool{ - "kind": true, "schema": true, "field": true, "type": true, - "value": true, "values": true, "min": true, "max": true, - "min_length": true, "max_length": true, "heading": true, - "style": true, "target": true, "transform": true, - "prefix": true, "suffix": true, "allow": true, "deny": true, - "pattern": true, "fields": true, "name": true, "match": true, - "select": true, "fix": true, -} - -// UnmarshalYAML decodes the entry's fields and stashes the raw node, so the -// node can travel to a check type's own parser (checks.RegisterParsed). -func (rc *RawCheck) UnmarshalYAML(value *yaml.Node) error { - if value.Kind != yaml.MappingNode { - return fmt.Errorf("invalid check: expected a mapping") - } - for i := 0; i < len(value.Content); i += 2 { - key := value.Content[i].Value - if !rawCheckKeys[key] { - return fmt.Errorf("unknown check key %q", key) - } - } - type plain RawCheck - var p plain - if err := value.Decode(&p); err != nil { - return err - } - *rc = RawCheck(p) - rc.node = value - return nil -} - // BuildInput carries everything Build needs to validate and resolve one // collection: its raw definition and name, the owning base's root // and name, the project-level listing defaults, and a predicate that reports @@ -440,38 +374,15 @@ func rejectUnsupportedSQLiteChecks(name string, base []checks.ConfiguredCheck, v // buildChecks folds an optional schema name into a leading object check and // normalizes the remaining raw checks. errCtx prefixes any error (e.g. // `collection "books"` or `collection "books": variants[0]`). -func buildChecks(errCtx, schema string, raws []RawCheck, schemaKnown func(string) bool) ([]checks.ConfiguredCheck, error) { - out := make([]checks.ConfiguredCheck, 0, len(raws)+1) - if schema != "" { - if !schemaKnown(schema) { - return nil, fmt.Errorf("%s: unknown schema %q", errCtx, schema) - } - out = append(out, checks.ConfiguredCheck{Kind: checks.CheckObject, Schema: schema}) - } - for j, raw := range raws { - kind := checks.CheckType(strings.TrimSpace(raw.Kind)) - if kind == checks.CheckObject { - // An explicit `kind: object` names a schema, validated here because - // the loader owns schema resolution; the engine builds it. - if raw.Schema == "" { - return nil, fmt.Errorf("%s: checks[%d]: object check requires \"schema\"", errCtx, j) - } - if !schemaKnown(raw.Schema) { - return nil, fmt.Errorf("%s: checks[%d]: unknown schema %q", errCtx, j, raw.Schema) - } - if raw.Field != "" { - return nil, fmt.Errorf("%s: checks[%d]: object check does not support \"field\"", errCtx, j) - } - out = append(out, checks.ConfiguredCheck{Kind: checks.CheckObject, Schema: raw.Schema}) - continue - } - args, err := checks.Parse(kind, raw.node) - if err != nil { - return nil, fmt.Errorf("%s: checks[%d]: %w", errCtx, j, err) - } - out = append(out, checks.ConfiguredCheck{Kind: kind, Args: args}) - } - return out, nil +func buildChecks(errCtx, schema string, raws []checks.RawCheck, schemaKnown func(string) bool) ([]checks.ConfiguredCheck, error) { + return checks.BuildConfigured(checks.BuildConfiguredInput{ + ErrorContext: errCtx, + Schema: schema, + Raw: raws, + SchemaKnown: schemaKnown, + ConfigurableIn: checks.ConfigCollection, + AllowObject: true, + }) } // buildVariants parses and validates a collection's variants: each `when` diff --git a/internal/storage/filesystemcheck/scope.go b/internal/storage/filesystemcheck/scope.go new file mode 100644 index 00000000..47b66816 --- /dev/null +++ b/internal/storage/filesystemcheck/scope.go @@ -0,0 +1,181 @@ +package filesystemcheck + +import ( + "fmt" + "io/fs" + "os" + "path/filepath" + "sort" + + "github.com/abegong/katalyst/internal/checks" + "github.com/bmatcuk/doublestar/v4" +) + +const ( + ParseFailuresError = "error" + ParseFailuresWarning = "warning" +) + +// RawScope mirrors one filesystemChecks entry. +type RawScope struct { + Name string `yaml:"name"` + Path string `yaml:"path"` + Include []string `yaml:"include"` + Exclude []string `yaml:"exclude"` + ParseFailures string `yaml:"parseFailures"` + Checks []checks.RawCheck `yaml:"checks"` +} + +// Scope is a validated filesystem check scope. +type Scope struct { + Name string + Path string + Root string + Include []string + Exclude []string + ParseFailures string + Checks []checks.ConfiguredCheck +} + +// File is one regular file under a scope root. +type File struct { + Rel string + Path string +} + +// Expanded is the deterministic selected and unmatched file set for a scope. +type Expanded struct { + Selected []File + Unmatched []File +} + +// BuildInput carries the owning base and source location for a filesystem +// scope. +type BuildInput struct { + ErrorContext string + Raw RawScope + BaseRoot string +} + +// Build validates and resolves one filesystem scope. +func Build(in BuildInput) (Scope, error) { + raw := in.Raw + label := in.ErrorContext + if label == "" { + label = "filesystemChecks" + } + if len(raw.Include) == 0 { + return Scope{}, fmt.Errorf("%s: include is required", label) + } + if len(raw.Checks) == 0 { + return Scope{}, fmt.Errorf("%s: checks is required", label) + } + parseFailures := raw.ParseFailures + if parseFailures == "" { + parseFailures = ParseFailuresError + } + switch parseFailures { + case ParseFailuresError, ParseFailuresWarning: + default: + return Scope{}, fmt.Errorf("%s: unknown parseFailures %q (want error or warning)", label, raw.ParseFailures) + } + scopePath := raw.Path + if scopePath == "" { + scopePath = "." + } + root := resolve(in.BaseRoot, scopePath) + name := raw.Name + if name == "" { + name = filepath.ToSlash(scopePath) + } + cks, err := checks.BuildConfigured(checks.BuildConfiguredInput{ + ErrorContext: label, + Raw: raw.Checks, + ConfigurableIn: checks.ConfigFilesystem, + AllowObject: false, + }) + if err != nil { + return Scope{}, err + } + return Scope{ + Name: name, + Path: scopePath, + Root: root, + Include: append([]string(nil), raw.Include...), + Exclude: append([]string(nil), raw.Exclude...), + ParseFailures: parseFailures, + Checks: cks, + }, nil +} + +// Expand walks a scope root and splits regular files into selected and +// unmatched sets. Missing roots yield empty sets. +func Expand(scope Scope) (Expanded, error) { + info, err := os.Stat(scope.Root) + if err != nil || !info.IsDir() { + return Expanded{}, nil + } + var out Expanded + walkErr := filepath.WalkDir(scope.Root, func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if d.IsDir() { + return nil + } + info, err := d.Info() + if err != nil { + return err + } + if !info.Mode().IsRegular() { + return nil + } + rel, err := filepath.Rel(scope.Root, path) + if err != nil { + return err + } + rel = filepath.ToSlash(rel) + included, err := matchesAny(scope.Include, rel) + if err != nil { + return fmt.Errorf("include: %w", err) + } + excluded, err := matchesAny(scope.Exclude, rel) + if err != nil { + return fmt.Errorf("exclude: %w", err) + } + file := File{Rel: rel, Path: path} + switch { + case included && !excluded: + out.Selected = append(out.Selected, file) + case !included && !excluded: + out.Unmatched = append(out.Unmatched, file) + } + return nil + }) + if walkErr != nil { + return Expanded{}, fmt.Errorf("filesystem scope %q: %w", scope.Name, walkErr) + } + sort.Slice(out.Selected, func(i, j int) bool { return out.Selected[i].Rel < out.Selected[j].Rel }) + sort.Slice(out.Unmatched, func(i, j int) bool { return out.Unmatched[i].Rel < out.Unmatched[j].Rel }) + return out, nil +} + +func matchesAny(patterns []string, rel string) (bool, error) { + for _, p := range patterns { + ok, err := doublestar.Match(p, rel) + if err != nil { + return false, err + } + if ok { + return true, nil + } + } + return false, nil +} + +func resolve(root, p string) string { + if filepath.IsAbs(p) { + return filepath.Clean(p) + } + return filepath.Clean(filepath.Join(root, p)) +} diff --git a/product/specs/filesystem-checks-plan.md b/product/specs/filesystem-checks-plan.md new file mode 100644 index 00000000..31f9aae8 --- /dev/null +++ b/product/specs/filesystem-checks-plan.md @@ -0,0 +1,354 @@ +# Plan - filesystem checks and collection checks +> Spec: [Filesystem checks and collection checks](./filesystem-checks-spec.md) +> **Status: planning.** + +## Current State + +- `cmd/check.go` runs collection-attached checks only. With no selectors it + resolves every collection through `project.Resolve`, runs per-item checks, + scans wholesale-selected collections for unmatched files, then runs + collection-scoped checks. +- `cmd/engine.go` builds checks from a `project.Collection`. It owns schema + compilation, library availability checks, variant routing, per-item builders, + and collection-scoped builders. +- `internal/project/loader.go` loads `.katalyst/bases/.yaml` files into + `BaseInstance` values. Legacy `.katalyst/storage/` remains readable, but the + current code and tests use bases. +- `internal/storage/collection/parse.go` parses collection config. It owns + `RawCheck`, folds `schema:` into an `object` check, parses check args through + `checks.Parse`, and rejects SQLite collections that configure file-system + family checks. +- `internal/checks/registry.go` records each check type's `Descriptor`, parser, + per-item builder, and collection-scoped builder. `Descriptor.Scope` names + collection-scoped runtime behavior. It has no attachment-configurableIn metadata. +- `internal/checks/checks.go` defines per-item `Context` and `Check`. + `internal/checks/collection.go` defines `CollectionContext` and + `CollectionCheck`. Those names mix runtime granularity with the future + product term CollectionCheck. +- `internal/storage/collection/filesystem/collection.go` already contains the + filesystem traversal pieces needed by filesystem scopes: doublestar matching, + sorted item discovery, and unmatched-file walking. +- `cmd/check_types.go` and `cmd/gendocs/main.go` render check descriptors for + CLI and generated docs. They currently render family, scope, severity, fields, + and config examples. +- `cmd/check_test.go`, `internal/project/loader_test.go`, + `internal/checks/registry_test.go`, and check-family tests are the main test + homes for this change. + +## Sequencing + +| Phase | Focus | Scope | +|---|---|---| +| 1 | Failing contracts | loader tests, check CLI tests, registry tests, snapshots | +| 2 | Shared check metadata and config parsing | descriptor configuration sites, document needs, reusable raw check parsing | +| 3 | Filesystem scope config and expansion | base-level `filesystemChecks`, include/exclude matching, unmatched set | +| 4 | File and file-set runtime contexts | shared per-file context, set-level interface, collection compatibility | +| 5 | Filesystem check execution | no-selector execution, lazy parsing, parse-failure severity, diagnostics | +| 6 | Shared check types | configurableIn metadata, document-aware file-system checks, `filesystem_unmatched_files` | +| 7 | Documentation and verification | user docs, generated reference, developer docs, focused test suite | + +The order keeps the suite honest. First pin the behavior, then add registry and +config shape, then build the filesystem runner and opt check types into it. + +## Phases + +### Phase 1 - Failing contracts + +**Goal:** tests describe filesystem-attached checks before production code +exists. + +1. **File:** `internal/project/loader_test.go`. + Add load tests for `filesystemChecks` under a filesystem base: + optional `name`, required `include`, default `path: .`, default + `parseFailures: error`, explicit `parseFailures: warning`, and parsed + nested `checks`. +2. **File:** `internal/project/loader_test.go`. + Add rejection tests for missing `include`, unknown `parseFailures`, unknown + check kind, a check kind that cannot be configured in `filesystem`, and + `filesystemChecks` on a SQLite base. +3. **File:** `cmd/check_test.go`. + Add a no-selector CLI test where a project has no collections, a filesystem + scope includes `**/*.md`, and `filesystem_name_case` reports a bad Markdown + filename. +4. **File:** `cmd/check_test.go`. + Add a selector test proving `katalyst check notes` runs collection checks + only and does not run unrelated filesystem scopes. +5. **File:** `cmd/check_test.go`. + Add parse-failure tests for `filesystem_name_matches_field`: default + `parseFailures: error` exits 1, while `parseFailures: warning` reports a + warning and does not fail by itself. +6. **File:** `cmd/check_test.go`. + Add a CLI test for `filesystem_unmatched_files`: a file under the scope root + matching neither `include` nor `exclude` produces an unmatched-file + diagnostic. +7. **File:** `cmd/testdata/snapshots/check/`. + Add snapshots for filesystem diagnostics: path-rule failure, parse warning, + and filesystem unmatched file. +8. **File:** `internal/checks/registry_test.go`. + Add descriptor tests for supported configuration sites and document-needs metadata. + +### Phase 2 - Shared check metadata and config parsing + +**Goal:** the registry describes where a check attaches and one parser serves +collection and filesystem config. + +1. **File:** `internal/checks/registry.go`. + Add configuration-site constants, `Descriptor.ConfigurableIn []string`, and + helpers such as `SupportsConfiguration(kind, site)` and + `DescriptorConfigurableIn(d)`. Treat an empty list as `collection` during + migration. +2. **File:** `internal/checks/registry.go`. + Add document-needs metadata to `Descriptor`, for example + `NeedsDocument bool`, plus `NeedsDocument(kind)`. Filesystem scopes use this + to decide whether to parse selected files. +3. **File:** `internal/checks/config.go` (new). + Move the reusable raw check shape out of + `internal/storage/collection/parse.go`. Define `RawCheck`, key validation, + and a `BuildConfigured` helper that folds optional object schema shorthands + and calls `checks.Parse`. +4. **File:** `internal/storage/collection/parse.go`. + Replace `RawCheck` and `buildChecks` with the shared checks config helper. + Keep collection-specific schema shorthand and variant wiring behavior + byte-for-byte compatible. +5. **File:** `internal/storage/collection/parse.go`. + Keep SQLite collection rejection based on descriptor family or target support + so existing behavior stays stable. +6. **File:** `cmd/check_types.go`. + Include supported configuration sites in `check-types show` and JSON output through the + descriptor. Keep existing scope and severity output. +7. **File:** `cmd/gendocs/main.go`. + Render supported configuration sites on generated check-type pages. Keep generated docs + deterministic. + +### Phase 3 - Filesystem scope config and expansion + +**Goal:** filesystem bases load named scopes and expand them into deterministic +file sets. + +1. **File:** `internal/storage/filesystemcheck/scope.go` (new). + Add `RawScope` and `Scope` types with `Name`, `Path`, resolved `Root`, + `Include`, `Exclude`, `ParseFailures`, and parsed `Checks`. +2. **File:** `internal/storage/filesystemcheck/scope.go` (new). + Add `Build` to validate scope config: `include` required, + `parseFailures` is `error` or `warning`, `checks` required, and every check + supports the `filesystem` target. +3. **File:** `internal/storage/filesystemcheck/scope.go` (new). + Add deterministic expansion over `os.DirFS(scope.Root)` using doublestar: + selected files match at least one include and no exclude; unmatched files + are regular files that match neither include nor exclude. +4. **File:** `internal/storage/filesystemcheck/scope_test.go` (new). + Test include/exclude matching, sorted selected files, sorted unmatched files, + missing directories, invalid globs, and default labels for unnamed scopes. +5. **File:** `internal/project/loader.go`. + Add `FilesystemChecks []filesystemcheck.RawScope` to `rawBaseInstance`. + Build scopes only for `type: filesystem`, resolve paths against the base + root, and store them on `BaseInstance`. +6. **File:** `internal/project/loader.go`. + Reject `filesystemChecks` on non-filesystem bases with a load-time error. + Preserve legacy `.katalyst/storage/` readability by parsing the same field + there when that legacy directory is used. +7. **File:** `internal/project/project.go`. + Add a `FilesystemCheckScopes()` accessor or expose the loaded scopes through + `Config.Bases` in a way `cmd/check.go` can use without knowing raw config. +8. **File:** `internal/project/projecttest/projecttest.go`. + Add a helper for filesystem scope config only if it removes repeated YAML + from loader and CLI tests. + +### Phase 4 - File and file-set runtime contexts + +**Goal:** collection-attached and filesystem-attached checks share runtime +contexts without breaking existing collection checks. + +1. **File:** `internal/checks/checks.go`. + Add `FileContext` as the canonical per-file context. Keep `Context` as an + alias or compatibility wrapper during the migration. +2. **File:** `internal/checks/collection.go`. + Add `FileSetContext` with `Root`, `Files`, `Unmatched`, `Include`, and + `Exclude`. Include enough metadata for existing set-level checks and the new + unmatched-files check. +3. **File:** `internal/checks/collection.go`. + Add `FileSetCheck` and `RunFileSetAll`. Keep `CollectionCheck` and + `RunCollectionAll` as compatibility wrappers until collection callers move. +4. **File:** `internal/checks/filesystem/unique_filename.go`. + Convert `UniqueFilename` to the file-set context, or add a compatibility + adapter if full conversion waits until Phase 6. +5. **File:** `internal/checks/filesystem/index_file_required.go`. + Convert `IndexFileRequired` to the file-set context, preserving diagnostics. +6. **File:** `internal/checks/structuredobject/unique_field.go`. + Convert `UniqueField` to the file-set context with metadata read from each + file context. +7. **File:** `cmd/check.go`. + Update collection-scoped execution to build the new `FileSetContext` while + preserving existing output and selector behavior. + +### Phase 5 - Filesystem check execution + +**Goal:** `katalyst check` with no selectors runs filesystem scopes before +collection checks. + +1. **File:** `cmd/filesystem_check.go` (new). + Add the filesystem check runner: expand each scope, build runnable file and + file-set checks, run file checks per selected file, then run file-set checks. +2. **File:** `cmd/engine.go`. + Add helpers that build checks from an arbitrary list of + `checks.ConfiguredCheck`, separate from collection variant routing. Reuse + library availability checks and non-object builders. +3. **File:** `cmd/filesystem_check.go` (new). + Parse selected files lazily only when a configured check needs document data. + Strip the `schema` directive from metadata the same way `checkItem` does. +4. **File:** `cmd/filesystem_check.go` (new). + Implement `parseFailures`: default error-severity violations fail the run; + `warning` emits advisory diagnostics and does not fail by itself. +5. **File:** `cmd/filesystem_check.go` (new). + Format filesystem diagnostics as + `filesystem