+
diff --git a/mshell/Evaluator.go b/mshell/Evaluator.go
index 58dd6e38..4cabd9c4 100644
--- a/mshell/Evaluator.go
+++ b/mshell/Evaluator.go
@@ -331,6 +331,11 @@ type EvalState struct {
// are unique across enums, so this flat lookup is enough to construct a
// value from a bare member word, including consuming its payload.
EnumMembers map[string]EnumMemberInfo
+
+ // EnumTypeNames is the set of declared enum type names. A bare enum type
+ // name is a valid match-arm pattern (a type test that any member of that
+ // enum satisfies), e.g. matching a `C | int` union value against `C`.
+ EnumTypeNames map[string]bool
}
// EnumMemberInfo records where a member came from and how many payload values
@@ -338,6 +343,10 @@ type EvalState struct {
type EnumMemberInfo struct {
EnumName string
Arity int
+ // Ordinal is the member's 0-based position in its enum declaration, stamped
+ // onto constructed values (MShellEnum.MemberIndex) so sorting can order by
+ // declaration order.
+ Ordinal int
}
// RegisterEnums scans parse items for `enum` declarations and records each
@@ -353,15 +362,24 @@ func (state *EvalState) RegisterEnums(items []MShellParseItem) {
if state.EnumMembers == nil {
state.EnumMembers = make(map[string]EnumMemberInfo)
}
+ if state.EnumTypeNames == nil {
+ state.EnumTypeNames = make(map[string]bool)
+ }
+ state.EnumTypeNames[d.Name] = true
for i, m := range d.Members {
if _, exists := state.EnumMembers[m]; exists {
continue
}
- state.EnumMembers[m] = EnumMemberInfo{EnumName: d.Name, Arity: len(d.MemberPayloads[i])}
+ state.EnumMembers[m] = EnumMemberInfo{EnumName: d.Name, Arity: len(d.MemberPayloads[i]), Ordinal: i}
}
}
}
+// isEnumTypeName reports whether name is a declared enum type name.
+func (state *EvalState) isEnumTypeName(name string) bool {
+ return state.EnumTypeNames != nil && state.EnumTypeNames[name]
+}
+
// RebuildDefinitionIndex records the first index for each name, matching
// the front-to-back, first-match-wins behavior of the linear scan it replaces.
func (state *EvalState) RebuildDefinitionIndex(definitions []MShellDefinition) {
@@ -1086,26 +1104,35 @@ func (state *EvalState) processMatchBlock(matchBlock *MShellParseMatchBlock, fra
// matchPattern checks if a subject matches a pattern (list of parse items).
// Returns (matched bool, bindings map, result EvalResult).
func (state *EvalState) matchPattern(pattern []MShellParseItem, subject MShellObject, startToken Token) (bool, map[string]MShellObject, EvalResult) {
- // Enum constructor pattern: `member` or `member b1 b2 ...`. Only a member
- // name matches an enum value (a sibling member just fails this arm and the
- // next is tried); `_` and `none` fall through to the generic handling.
+ // Enum patterns against an enum value. A member name (`member` or
+ // `member b1 b2 ...`) matches that member and binds its payload; a sibling
+ // member just fails this arm and the next is tried. A bare enum *type name*
+ // (`C`) is a type-test arm that matches any member of that enum — this is
+ // how a `C | int` union value is discriminated. `_` and `none` fall through
+ // to the generic handling.
if enumVal, ok := subject.(*MShellEnum); ok && len(pattern) >= 1 {
if tok, okTok := pattern[0].(Token); okTok && tok.Type == LITERAL && tok.Lexeme != "_" && tok.Lexeme != "none" {
- if tok.Lexeme != enumVal.Member {
- return false, nil, SimpleSuccess()
- }
- binds := pattern[1:]
- if len(binds) != len(enumVal.Payload) {
- return false, nil, state.FailWithMessage(fmt.Sprintf("%d:%d: enum member '%s' binds %d payload value(s), got %d.\n",
- tok.Line, tok.Column, tok.Lexeme, len(enumVal.Payload), len(binds)))
- }
- bindings := make(map[string]MShellObject)
- for i, b := range binds {
- if bt, okBt := b.(Token); okBt && bt.Lexeme != "_" {
- bindings[bt.Lexeme] = enumVal.Payload[i]
+ if tok.Lexeme == enumVal.Member {
+ binds := pattern[1:]
+ if len(binds) != len(enumVal.Payload) {
+ return false, nil, state.FailWithMessage(fmt.Sprintf("%d:%d: enum member '%s' binds %d payload value(s), got %d.\n",
+ tok.Line, tok.Column, tok.Lexeme, len(enumVal.Payload), len(binds)))
}
+ bindings := make(map[string]MShellObject)
+ for i, b := range binds {
+ if bt, okBt := b.(Token); okBt && bt.Lexeme != "_" {
+ bindings[bt.Lexeme] = enumVal.Payload[i]
+ }
+ }
+ return true, bindings, SimpleSuccess()
}
- return true, bindings, SimpleSuccess()
+ // Not this value's member. A single bare enum type name is a type
+ // test: it matches iff the value belongs to that enum.
+ if len(pattern) == 1 && state.isEnumTypeName(tok.Lexeme) {
+ return tok.Lexeme == enumVal.EnumName, nil, SimpleSuccess()
+ }
+ // A sibling member name (or any other literal): this arm fails.
+ return false, nil, SimpleSuccess()
}
}
@@ -1227,6 +1254,13 @@ func (state *EvalState) matchTokenPattern(p Token, subject MShellObject) (bool,
_, ok := subject.(MShellBinary)
return ok, SimpleSuccess()
}
+ // A bare enum type name is a type test: it matches an enum value of
+ // that enum, and simply fails (try the next arm) for any other value.
+ // This lets a union like `C | int` be discriminated by the arm `C`.
+ if state.isEnumTypeName(p.Lexeme) {
+ en, ok := subject.(*MShellEnum)
+ return ok && en.EnumName == p.Lexeme, SimpleSuccess()
+ }
return false, state.FailWithMessage(fmt.Sprintf("%d:%d: Unknown match pattern literal '%s'. %s\n", p.Line, p.Column, p.Lexeme, matchPatternFormsHint))
case TYPEINT:
@@ -5855,7 +5889,7 @@ func (state *EvalState) evaluateToken(t Token, stack *MShellStack, context Execu
payload[i], _ = stack.Pop()
}
}
- stack.Push(&MShellEnum{EnumName: info.EnumName, Member: t.Lexeme, Payload: payload})
+ stack.Push(&MShellEnum{EnumName: info.EnumName, Member: t.Lexeme, MemberIndex: info.Ordinal, Payload: payload})
return SimpleSuccess()
}
}
diff --git a/mshell/MShellObject.go b/mshell/MShellObject.go
index d6ce265d..098b72b7 100644
--- a/mshell/MShellObject.go
+++ b/mshell/MShellObject.go
@@ -351,7 +351,12 @@ func (n MShellNull) CastString() (string, error) {
type MShellEnum struct {
EnumName string
Member string
- Payload []MShellObject
+ // MemberIndex is the member's 0-based position in its enum declaration.
+ // Sorting orders enum values by this (declaration order) rather than by
+ // member name, so an ordered enum (`low | medium | high`) sorts in the
+ // author's intended order. Stamped at construction from the enum registry.
+ MemberIndex int
+ Payload []MShellObject
}
func (e *MShellEnum) TypeName() string { return e.EnumName }
@@ -426,19 +431,58 @@ func (e *MShellEnum) Slice(startInc int, endExc int) (MShellObject, error) {
// ToJson uses serde's externally-tagged convention — the de-facto standard for
// tagged unions in JSON: a nullary member is the bare member string; a member
// with a single payload is `{"member": value}`; with several, `{"member":
-// [v0, v1, ...]}`.
+// [v0, v1, ...]}`. Like enumRender, nested enum payloads are expanded with an
+// explicit work stack rather than function recursion, so an arbitrarily deep
+// value cannot overflow the call stack; output is appended to a single builder
+// (no intermediate per-subtree strings), making it O(total output size).
+// Non-enum payloads delegate to their own ToJson.
func (e *MShellEnum) ToJson() string {
- if len(e.Payload) == 0 {
- return fmt.Sprintf("%q", e.Member)
- }
- if len(e.Payload) == 1 {
- return fmt.Sprintf("{%q: %s}", e.Member, e.Payload[0].ToJson())
+ var sb strings.Builder
+ type task struct {
+ lit string
+ obj MShellObject
+ isLit bool
}
- parts := make([]string, len(e.Payload))
- for i, p := range e.Payload {
- parts[i] = p.ToJson()
+ stack := []task{{obj: e}}
+ for len(stack) > 0 {
+ t := stack[len(stack)-1]
+ stack = stack[:len(stack)-1]
+ if t.isLit {
+ sb.WriteString(t.lit)
+ continue
+ }
+ en, ok := t.obj.(*MShellEnum)
+ if !ok {
+ sb.WriteString(t.obj.ToJson())
+ continue
+ }
+ if len(en.Payload) == 0 {
+ fmt.Fprintf(&sb, "%q", en.Member)
+ continue
+ }
+ // Emit `{"member": value}` (single payload) or
+ // `{"member": [v0, v1, ...]}` (several); push reversed so it pops in
+ // order, with enum payloads re-expanded by this same loop.
+ seq := make([]task, 0, len(en.Payload)*2+4)
+ seq = append(seq, task{lit: fmt.Sprintf("{%q: ", en.Member), isLit: true})
+ if len(en.Payload) == 1 {
+ seq = append(seq, task{obj: en.Payload[0]})
+ } else {
+ seq = append(seq, task{lit: "[", isLit: true})
+ for i, p := range en.Payload {
+ if i > 0 {
+ seq = append(seq, task{lit: ", ", isLit: true})
+ }
+ seq = append(seq, task{obj: p})
+ }
+ seq = append(seq, task{lit: "]", isLit: true})
+ }
+ seq = append(seq, task{lit: "}", isLit: true})
+ for i := len(seq) - 1; i >= 0; i-- {
+ stack = append(stack, seq[i])
+ }
}
- return fmt.Sprintf("{%q: [%s]}", e.Member, strings.Join(parts, ", "))
+ return sb.String()
}
func (e *MShellEnum) ToString() string { return enumRender(e) }
@@ -448,13 +492,32 @@ func (e *MShellEnum) Concat(other MShellObject) (MShellObject, error) {
return nil, fmt.Errorf("Cannot concatenate an enum.\n")
}
+// Equals compares two enum values structurally. Nested enum payloads are
+// walked with an explicit pair stack rather than function recursion, so two
+// arbitrarily deep values cannot overflow the call stack; only non-enum
+// payloads (the leaves) delegate to their own Equals.
func (e *MShellEnum) Equals(other MShellObject) (bool, error) {
- o, ok := other.(*MShellEnum)
- if !ok || e.EnumName != o.EnumName || e.Member != o.Member || len(e.Payload) != len(o.Payload) {
- return false, nil
- }
- for i := range e.Payload {
- eq, err := e.Payload[i].Equals(o.Payload[i])
+ type pair struct{ a, b MShellObject }
+ stack := []pair{{a: e, b: other}}
+ for len(stack) > 0 {
+ p := stack[len(stack)-1]
+ stack = stack[:len(stack)-1]
+ ea, aok := p.a.(*MShellEnum)
+ eb, bok := p.b.(*MShellEnum)
+ if aok || bok {
+ // At least one side is an enum: equal only if both are enums with
+ // the same name, member, and arity. Payloads are deferred onto the
+ // stack so this never re-enters Equals on an enum.
+ if !aok || !bok || ea.EnumName != eb.EnumName || ea.Member != eb.Member || len(ea.Payload) != len(eb.Payload) {
+ return false, nil
+ }
+ for i := range ea.Payload {
+ stack = append(stack, pair{a: ea.Payload[i], b: eb.Payload[i]})
+ }
+ continue
+ }
+ // Neither side is an enum: compare by their own equality.
+ eq, err := p.a.Equals(p.b)
if err != nil || !eq {
return false, err
}
@@ -927,46 +990,268 @@ func NewList(initLength int) *MShellList {
}
// Sort the list. Returns an error if any item cannot be cast to a string.
-func SortList(list *MShellList) (*MShellList, error) {
- stringsToSort := make([]string, len(list.Items))
- for i, item := range list.Items {
- str, err := item.CastString()
- if err != nil {
- return nil, fmt.Errorf("Cannot sort a list with a %s inside (%s).\n", item.TypeName(), item.DebugString())
- }
- stringsToSort[i] = str
+// valueTypeRank assigns each value kind a fixed slot in the cross-type sort
+// order, so a list mixing types still sorts totally and deterministically. The
+// exact sequence is arbitrary but stable; within a rank, compareValues uses the
+// value's natural order. Text kinds (str/path/literal) share a rank and compare
+// by content, matching structural equality.
+func valueTypeRank(obj MShellObject) int {
+ switch obj.(type) {
+ case MShellNull:
+ return 0
+ case MShellBool:
+ return 1
+ case MShellInt, MShellFloat:
+ return 2
+ case MShellString, MShellPath, MShellLiteral:
+ return 3
+ case *MShellDateTime:
+ return 4
+ case MShellBinary:
+ return 5
+ case Maybe, *Maybe:
+ return 6
+ case *MShellList:
+ return 7
+ case *MShellDict:
+ return 8
+ case *MShellEnum:
+ return 9
+ default:
+ return 10
}
+}
- // Sort the strings
- sort.Strings(stringsToSort)
+func cmpInt(a, b int) int {
+ if a < b {
+ return -1
+ }
+ if a > b {
+ return 1
+ }
+ return 0
+}
- // Create a new list and add the sorted strings to it
- newList := NewList(0)
- for _, str := range stringsToSort {
- newList.Items = append(newList.Items, MShellString{str})
+func cmpFloat(a, b float64) int {
+ if a < b {
+ return -1
+ }
+ if a > b {
+ return 1
+ }
+ return 0
+}
+
+// numericFloat returns an int/float value as a float64 for cross-type numeric
+// comparison. Only called for MShellInt / MShellFloat.
+func numericFloat(obj MShellObject) float64 {
+ switch v := obj.(type) {
+ case MShellInt:
+ return float64(v.Value)
+ case MShellFloat:
+ return v.Value
+ }
+ return 0
+}
+
+// textContent returns the underlying string of a text-kind value
+// (str / path / literal). Only called for those types.
+func textContent(obj MShellObject) string {
+ switch v := obj.(type) {
+ case MShellString:
+ return v.Content
+ case MShellPath:
+ return v.Path
+ case MShellLiteral:
+ return v.LiteralText
+ }
+ return ""
+}
+
+func asMaybe(obj MShellObject) (Maybe, bool) {
+ switch v := obj.(type) {
+ case Maybe:
+ return v, true
+ case *Maybe:
+ return *v, true
+ }
+ return Maybe{}, false
+}
+
+func sortedDictKeys(m map[string]MShellObject) []string {
+ keys := make([]string, 0, len(m))
+ for k := range m {
+ keys = append(keys, k)
+ }
+ sort.Strings(keys)
+ return keys
+}
+
+// compareValues returns -1, 0, or 1, giving a total order over every value
+// type. Different kinds are ordered by a fixed type rank (valueTypeRank); within
+// a kind the natural order is used (numbers numerically with int/float
+// interleaved, text lexically, dates chronologically, bytes bytewise).
+// Structured values compare lexicographically: lists positionally (shorter
+// prefix first), dicts by sorted key then value, enums by name then declaration
+// order then payloads. The order agrees with structural equality: compareValues
+// returns 0 exactly when the two values are Equals.
+//
+// The comparison is driven by an explicit work stack rather than recursion, so
+// arbitrarily deep values (e.g. a long `node(node(...))` enum chain) cannot
+// overflow the call stack. Each task is either a pair of values to compare or a
+// precomputed literal result (used for length tiebreaks and dict key / enum
+// name comparisons). Pending tasks pop in lexicographic order; the first
+// non-zero result short-circuits. Children of a compound value are pushed on top
+// of that value's own length-tiebreak, so the tiebreak is only reached when the
+// whole prefix compared equal.
+func compareValues(a, b MShellObject) int {
+ type task struct {
+ a, b MShellObject
+ lit int
+ isLit bool
+ }
+ stack := []task{{a: a, b: b}}
+ for len(stack) > 0 {
+ t := stack[len(stack)-1]
+ stack = stack[:len(stack)-1]
+ if t.isLit {
+ if t.lit != 0 {
+ return t.lit
+ }
+ continue
+ }
+ ra, rb := valueTypeRank(t.a), valueTypeRank(t.b)
+ if ra != rb {
+ return cmpInt(ra, rb)
+ }
+ switch av := t.a.(type) {
+ case MShellNull:
+ // Two nulls are equal; move to the next task.
+ case MShellBool:
+ bv := t.b.(MShellBool)
+ if av.Value != bv.Value {
+ if !av.Value { // false < true
+ return -1
+ }
+ return 1
+ }
+ case MShellInt:
+ if bv, ok := t.b.(MShellInt); ok {
+ if c := cmpInt(av.Value, bv.Value); c != 0 {
+ return c
+ }
+ } else if c := cmpFloat(numericFloat(t.a), numericFloat(t.b)); c != 0 {
+ return c
+ }
+ case MShellFloat:
+ if c := cmpFloat(numericFloat(t.a), numericFloat(t.b)); c != 0 {
+ return c
+ }
+ case MShellString, MShellPath, MShellLiteral:
+ if c := strings.Compare(textContent(t.a), textContent(t.b)); c != 0 {
+ return c
+ }
+ case *MShellDateTime:
+ bt := t.b.(*MShellDateTime).Time
+ if av.Time.Before(bt) {
+ return -1
+ }
+ if av.Time.After(bt) {
+ return 1
+ }
+ case MShellBinary:
+ if c := bytes.Compare(av, t.b.(MShellBinary)); c != 0 {
+ return c
+ }
+ case Maybe, *Maybe:
+ am, _ := asMaybe(t.a)
+ bm, _ := asMaybe(t.b)
+ an, bn := am.IsNone(), bm.IsNone()
+ if an != bn {
+ if an { // none < just
+ return -1
+ }
+ return 1
+ }
+ if !an { // both `just`: compare payloads
+ stack = append(stack, task{a: am.obj, b: bm.obj})
+ }
+ case *MShellList:
+ bl := t.b.(*MShellList)
+ n := min(len(av.Items), len(bl.Items))
+ stack = append(stack, task{lit: cmpInt(len(av.Items), len(bl.Items)), isLit: true})
+ for i := n - 1; i >= 0; i-- {
+ stack = append(stack, task{a: av.Items[i], b: bl.Items[i]})
+ }
+ case *MShellDict:
+ bd := t.b.(*MShellDict)
+ ak := sortedDictKeys(av.Items)
+ bk := sortedDictKeys(bd.Items)
+ n := min(len(ak), len(bk))
+ stack = append(stack, task{lit: cmpInt(len(ak), len(bk)), isLit: true})
+ for i := n - 1; i >= 0; i-- {
+ // Pushed so `key compare` pops before its `value compare`.
+ stack = append(stack, task{a: av.Items[ak[i]], b: bd.Items[bk[i]]})
+ stack = append(stack, task{lit: strings.Compare(ak[i], bk[i]), isLit: true})
+ }
+ case *MShellEnum:
+ be := t.b.(*MShellEnum)
+ n := min(len(av.Payload), len(be.Payload))
+ stack = append(stack, task{lit: cmpInt(len(av.Payload), len(be.Payload)), isLit: true})
+ for i := n - 1; i >= 0; i-- {
+ stack = append(stack, task{a: av.Payload[i], b: be.Payload[i]})
+ }
+ // Name and member (declaration order) compare before any payload.
+ stack = append(stack, task{lit: cmpInt(av.MemberIndex, be.MemberIndex), isLit: true})
+ stack = append(stack, task{lit: strings.Compare(av.EnumName, be.EnumName), isLit: true})
+ default:
+ // Unorderable kinds (quotation, pipe, grid, ...) share a rank and
+ // compare equal, so a stable sort leaves them in their original
+ // relative order.
+ }
}
+ return 0
+}
+
+// SortList returns a new list with the same elements sorted by the total order
+// compareValues defines. Element identity and type are preserved (a list of
+// ints stays ints, enum payloads are kept) — sorting only reorders.
+func SortList(list *MShellList) (*MShellList, error) {
+ newItems := make([]MShellObject, len(list.Items))
+ copy(newItems, list.Items)
+ sort.SliceStable(newItems, func(i, j int) bool {
+ return compareValues(newItems[i], newItems[j]) < 0
+ })
+ newList := NewList(0)
+ newList.Items = newItems
CopyListParams(list, newList)
return newList, nil
}
-// Sort the list. Returns an error if any item cannot be cast to a string.
+// SortListFunc sorts by a string key (each element's CastString) using the given
+// string comparer — used for version sort. Original elements are preserved in
+// the result. Returns an error if any element cannot be cast to a string.
func SortListFunc(list *MShellList, cmp func(a string, b string) int) (*MShellList, error) {
- stringsToSort := make([]string, len(list.Items))
+ type keyed struct {
+ key string
+ obj MShellObject
+ }
+ items := make([]keyed, len(list.Items))
for i, item := range list.Items {
str, err := item.CastString()
if err != nil {
return nil, fmt.Errorf("Cannot sort a list with a %s inside (%s).\n", item.TypeName(), item.DebugString())
}
- stringsToSort[i] = str
+ items[i] = keyed{key: str, obj: item}
}
- // Sort the strings to function
- slices.SortFunc(stringsToSort, cmp)
+ slices.SortStableFunc(items, func(a, b keyed) int {
+ return cmp(a.key, b.key)
+ })
- // Create a new list and add the sorted strings to it
newList := NewList(0)
- for _, str := range stringsToSort {
- newList.Items = append(newList.Items, MShellString{str})
+ for _, it := range items {
+ newList.Items = append(newList.Items, it.obj)
}
CopyListParams(list, newList)
return newList, nil
diff --git a/tests/success/enum_deep_equals.msh b/tests/success/enum_deep_equals.msh
new file mode 100644
index 00000000..48719b7e
--- /dev/null
+++ b/tests/success/enum_deep_equals.msh
@@ -0,0 +1,17 @@
+# Deeply nested enum values must compare for equality without overflowing:
+# `=` walks enum payloads with an explicit pair stack, not function recursion
+# (mirroring `str`/`toJson`). Build two independent 50000-deep trees and a
+# third that differs only at the very tip; a recursive comparator would
+# overflow the stack on values this deep.
+enum Tree = leaf int | node Tree Tree end
+0 leaf a! 0 leaf b! 0 leaf c! 0 i!
+(
+ @i 50000 >= if break end
+ @a 0 leaf node a!
+ @b 0 leaf node b!
+ @c 0 leaf node c!
+ @i 1 + i!
+) loop
+@c 0 leaf 99 leaf node node c!
+@a @b = str wl
+@a @c = str wl
diff --git a/tests/success/enum_deep_equals.msh.stdout b/tests/success/enum_deep_equals.msh.stdout
new file mode 100644
index 00000000..da29283a
--- /dev/null
+++ b/tests/success/enum_deep_equals.msh.stdout
@@ -0,0 +1,2 @@
+true
+false
diff --git a/tests/success/enum_deep_json.msh b/tests/success/enum_deep_json.msh
new file mode 100644
index 00000000..81175c78
--- /dev/null
+++ b/tests/success/enum_deep_json.msh
@@ -0,0 +1,14 @@
+# A deeply nested enum value must serialize to JSON without overflowing:
+# `toJson` renders enum payloads with an explicit work stack, not function
+# recursion (mirroring `str`/enum_deep_render). Build a 50000-deep tree and
+# print the length of its JSON; a recursive serializer would overflow the
+# stack well before this depth.
+enum Tree = leaf int | node Tree Tree end
+0 leaf t!
+0 i!
+(
+ @i 50000 >= if break end
+ @t 0 leaf node t!
+ @i 1 + i!
+) loop
+@t toJson len str wl
diff --git a/tests/success/enum_deep_json.msh.stdout b/tests/success/enum_deep_json.msh.stdout
new file mode 100644
index 00000000..1ca88b97
--- /dev/null
+++ b/tests/success/enum_deep_json.msh.stdout
@@ -0,0 +1 @@
+1250011
diff --git a/tests/success/enum_deep_sort.msh b/tests/success/enum_deep_sort.msh
new file mode 100644
index 00000000..358fb808
--- /dev/null
+++ b/tests/success/enum_deep_sort.msh
@@ -0,0 +1,17 @@
+# Sorting a list of deeply nested enum values must not overflow: compareValues
+# walks payloads with an explicit work stack, not recursion (mirroring `=`,
+# `toJson`, and `str`). Two 50000-deep trees share their whole prefix and differ
+# only at the tip, so comparing them descends the full depth. A recursive
+# comparator would overflow the stack on values this deep.
+enum Tree = leaf int | node Tree Tree end
+0 leaf a! 0 leaf c! 0 i!
+(
+ @i 50000 >= if break end
+ @a 0 leaf node a!
+ @c 0 leaf node c!
+ @i 1 + i!
+) loop
+@c 0 leaf 99 leaf node node c!
+# Sorting is deterministic regardless of input order, and leaves 2 elements.
+[@c @a] sort len str wl
+[@c @a] sort [@a @c] sort = str wl
diff --git a/tests/success/enum_deep_sort.msh.stdout b/tests/success/enum_deep_sort.msh.stdout
new file mode 100644
index 00000000..7600dd4b
--- /dev/null
+++ b/tests/success/enum_deep_sort.msh.stdout
@@ -0,0 +1,2 @@
+2
+true
diff --git a/tests/success/enum_union_match.msh b/tests/success/enum_union_match.msh
new file mode 100644
index 00000000..1f9232f0
--- /dev/null
+++ b/tests/success/enum_union_match.msh
@@ -0,0 +1,31 @@
+# An enum can be a member of a `type` union, and a `match` discriminates the
+# union by the enum's type name: a bare enum type name (`C`) is a type-test arm
+# that matches any value of that enum, while a non-matching value (here an int,
+# or a value of a different enum) falls through to the next arm.
+enum C = red | green | blue end
+type T = C | int
+
+red as T match
+ C : "a color" wl,
+ int : "an int" wl,
+end
+
+42 as T match
+ C : "a color" wl,
+ int : "an int" wl,
+end
+
+# A union of two enums, discriminated by each enum's type name.
+enum A = a1 | a2 end
+enum B = b1 | b2 end
+type AB = A | B
+
+b1 as AB match
+ A : "an A" wl,
+ B : "a B" wl,
+end
+
+a2 as AB match
+ A : "an A" wl,
+ B : "a B" wl,
+end
diff --git a/tests/success/enum_union_match.msh.stdout b/tests/success/enum_union_match.msh.stdout
new file mode 100644
index 00000000..42a132b5
--- /dev/null
+++ b/tests/success/enum_union_match.msh.stdout
@@ -0,0 +1,4 @@
+a color
+an int
+a B
+an A
diff --git a/tests/success/sort_structural.msh b/tests/success/sort_structural.msh
new file mode 100644
index 00000000..572e8db1
--- /dev/null
+++ b/tests/success/sort_structural.msh
@@ -0,0 +1,23 @@
+# `sort` reorders the original elements by a total structural order and never
+# changes their type (the old implementation replaced every element with a
+# string, dropping enum payloads). Numbers sort numerically and stay numbers,
+# enums sort by declaration order then payload, dicts by sorted key/value, and a
+# mixed-type list sorts deterministically by a fixed type rank (numbers < text).
+
+# Numbers keep their type (sum works) and sort numerically, not lexically.
+[10 2 1] sort (str) map "," join wl
+[10 2 1] sort sum str wl
+
+# Enums sort by member declaration order (low < medium < high), not by name.
+enum Priority = low | medium | high end
+[high low medium high low] sort (str) map "," join wl
+
+# Same member: payloads break the tie.
+enum Tree = leaf int | node Tree Tree end
+[3 leaf 1 leaf 2 leaf] sort (str) map "," join wl
+
+# Dicts compare by sorted key then value.
+[ { "b": 2, "a": 9 } { "a": 1, "b": 1 } { "a": 1, "b": 2 } ] sort (toJson) map " | " join wl
+
+# Mixed types: fixed type rank (numbers before text), deterministic.
+[hello 1 'c' 'A'] sort (str) map "," join wl
diff --git a/tests/success/sort_structural.msh.stdout b/tests/success/sort_structural.msh.stdout
new file mode 100644
index 00000000..c22eea2b
--- /dev/null
+++ b/tests/success/sort_structural.msh.stdout
@@ -0,0 +1,6 @@
+1,2,10
+13
+low,low,medium,high,high
+leaf(1),leaf(2),leaf(3)
+{"a": 1, "b": 1} | {"a": 1, "b": 2} | {"a": 9, "b": 2}
+1,A,c,hello
diff --git a/tests/success/sort_test.msh b/tests/success/sort_test.msh
index 61a38dbf..1d0f888f 100644
--- a/tests/success/sort_test.msh
+++ b/tests/success/sort_test.msh
@@ -1,5 +1,7 @@
"# Basic sort test" wl
-[hello 1 'c' 'A'] sort uw
+# sort preserves element types (the int stays an int), so stringify for display.
+# Across types the sort order is by a fixed type rank (numbers before text).
+[hello 1 'c' 'A'] sort (str) map uw
"# Unique sort test" wl
[z y 'x' y z] uniq sort uw
From 8abcaeecace36bce62551db5e8043014d2686add Mon Sep 17 00:00:00 2001
From: Mitchell Paulus
Date: Tue, 30 Jun 2026 20:55:15 -0500
Subject: [PATCH 13/32] Enum match: reject non-name payload bindings at runtime
The runtime enum payload-binding loop bound by token lexeme and silently
skipped non-token items, so it accepted match arms the type checker rejects:
an operator-token name like `ok x` (x lexes as INTERPRET) was bound, and a
malformed binding like `items [a b]` was ignored while the arm still matched.
Now each payload binding must be a plain name (LITERAL) or the `_` wildcard,
failing with a clear message otherwise. This mirrors the checker
(enumMemberPattern) and the `just`/type-test binding forms, whose runtime and
checker already agree, so all three binding forms are now consistent across
type-check and run.
Adds tests/fail/enum_bad_payload_binding.msh. All suites green: tests 214,
typecheck 197, go test.
Co-Authored-By: Claude Opus 4.8 (1M context)
Claude-Session: https://claude.ai/code/session_01E3aH9BBud5rjDoHUF8daaJ
---
mshell/Evaluator.go | 23 ++++++++++++++++++-
tests/fail/enum_bad_payload_binding.msh | 8 +++++++
.../fail/enum_bad_payload_binding.msh.stderr | 1 +
3 files changed, 31 insertions(+), 1 deletion(-)
create mode 100644 tests/fail/enum_bad_payload_binding.msh
create mode 100644 tests/fail/enum_bad_payload_binding.msh.stderr
diff --git a/mshell/Evaluator.go b/mshell/Evaluator.go
index 4cabd9c4..ef112e89 100644
--- a/mshell/Evaluator.go
+++ b/mshell/Evaluator.go
@@ -1101,6 +1101,15 @@ func (state *EvalState) processMatchBlock(matchBlock *MShellParseMatchBlock, fra
return state.FailWithMessage(fmt.Sprintf("%d:%d: No matching arm found in match block and no wildcard '_' arm provided.\n", startToken.Line, startToken.Column))
}
+// parseItemLexeme renders a parse item for a diagnostic: a token's lexeme, or a
+// non-token pattern's debug form.
+func parseItemLexeme(item MShellParseItem) string {
+ if tok, ok := item.(Token); ok {
+ return tok.Lexeme
+ }
+ return item.DebugString()
+}
+
// matchPattern checks if a subject matches a pattern (list of parse items).
// Returns (matched bool, bindings map, result EvalResult).
func (state *EvalState) matchPattern(pattern []MShellParseItem, subject MShellObject, startToken Token) (bool, map[string]MShellObject, EvalResult) {
@@ -1119,8 +1128,20 @@ func (state *EvalState) matchPattern(pattern []MShellParseItem, subject MShellOb
tok.Line, tok.Column, tok.Lexeme, len(enumVal.Payload), len(binds)))
}
bindings := make(map[string]MShellObject)
+ for _, b := range binds {
+ // A payload binding must be a plain name (a LITERAL) or the
+ // `_` wildcard — not a keyword/operator token (`end`, `x`,
+ // ...) or a nested pattern. This mirrors the type checker
+ // (enumMemberPattern) and the `just`/type-test binding forms,
+ // so the runtime never accepts an arm the checker rejects.
+ bt, okBt := b.(Token)
+ if !okBt || (bt.Type != LITERAL && bt.Type != UNDERSCORE) {
+ return false, nil, state.FailWithMessage(fmt.Sprintf("%d:%d: enum member '%s' payload bindings must be names, not '%s'.\n",
+ tok.Line, tok.Column, tok.Lexeme, parseItemLexeme(b)))
+ }
+ }
for i, b := range binds {
- if bt, okBt := b.(Token); okBt && bt.Lexeme != "_" {
+ if bt := b.(Token); bt.Lexeme != "_" {
bindings[bt.Lexeme] = enumVal.Payload[i]
}
}
diff --git a/tests/fail/enum_bad_payload_binding.msh b/tests/fail/enum_bad_payload_binding.msh
new file mode 100644
index 00000000..26ed8441
--- /dev/null
+++ b/tests/fail/enum_bad_payload_binding.msh
@@ -0,0 +1,8 @@
+# An enum payload binding must be a plain name (or `_`). A nested pattern (or a
+# keyword/operator token) is rejected at runtime, matching the type checker and
+# the `just`/type-test binding forms.
+enum Box = items [int] | z end
+[1 2 3] items match
+ items [a b] : "matched" wl,
+ z : "z" wl,
+end
diff --git a/tests/fail/enum_bad_payload_binding.msh.stderr b/tests/fail/enum_bad_payload_binding.msh.stderr
new file mode 100644
index 00000000..2682d61e
--- /dev/null
+++ b/tests/fail/enum_bad_payload_binding.msh.stderr
@@ -0,0 +1 @@
+6:3: enum member 'items' payload bindings must be names, not '['a', 'b']'.
From 99ac5b10dc3d06e9fc0c95353e2bfdd4a7423fa4 Mon Sep 17 00:00:00 2001
From: Mitchell Paulus
Date: Tue, 30 Jun 2026 21:37:34 -0500
Subject: [PATCH 14/32] Enum match: see through a `type` alias (brand) to the
enum
Naming an enum via `type Color2 = C` wraps it in a TKBrand, so the checker's
match logic (which tests Kind == TKEnum) no longer recognized the members: a
match on the aliased type was rejected as "unrecognized pattern", even though
brands are runtime-erased and the value matched correctly at run time.
This was inconsistent with a branded union: `type T = int | str` stays a
TKUnion node, so its arms remain matchable through the brand. Enums took the
opaque-wrapper path instead.
Now enumMemberPattern and CheckMatchExhaustive unwrap a TKBrand to its
underlying enum before dispatching, so a branded enum matches (and enforces
exhaustiveness over) its members exactly like a branded union matches its arms.
The brand stays nominal at value boundaries (an explicit `as` is still needed
to pass an enum where the alias is expected). Checker-only change; the runtime
already matched branded enums correctly.
Tests: tests/success/enum_branded_match.msh and
tests/typecheck_fail/enum_branded_nonexhaustive.msh. All suites green:
tests 215, typecheck 199, go test.
Co-Authored-By: Claude Opus 4.8 (1M context)
Claude-Session: https://claude.ai/code/session_01E3aH9BBud5rjDoHUF8daaJ
---
mshell/TypeBranch.go | 7 +++++
mshell/TypeCheckProgram.go | 5 ++++
tests/success/enum_branded_match.msh | 27 +++++++++++++++++++
tests/success/enum_branded_match.msh.stdout | 3 +++
.../enum_branded_nonexhaustive.msh | 10 +++++++
5 files changed, 52 insertions(+)
create mode 100644 tests/success/enum_branded_match.msh
create mode 100644 tests/success/enum_branded_match.msh.stdout
create mode 100644 tests/typecheck_fail/enum_branded_nonexhaustive.msh
diff --git a/mshell/TypeBranch.go b/mshell/TypeBranch.go
index c760cead..4542b667 100644
--- a/mshell/TypeBranch.go
+++ b/mshell/TypeBranch.go
@@ -117,7 +117,14 @@ func (c *Checker) CheckMatchExhaustive(matched TypeId, arms []MatchArmTag, callS
}
}
+ // Unwrap a `type X = Enum` brand so exhaustiveness dispatches on the
+ // underlying enum and checks coverage over its members — mirroring how a
+ // branded union (still a TKUnion node) is checked over its arms.
n := c.arena.Node(matched)
+ if n.Kind == TKBrand {
+ matched = c.underlying(matched)
+ n = c.arena.Node(matched)
+ }
switch n.Kind {
case TKMaybe:
hasJust, hasNone := false, false
diff --git a/mshell/TypeCheckProgram.go b/mshell/TypeCheckProgram.go
index b293623d..ba7c3683 100644
--- a/mshell/TypeCheckProgram.go
+++ b/mshell/TypeCheckProgram.go
@@ -1418,6 +1418,11 @@ func (c *Checker) enumMemberPattern(subject TypeId, pattern []MShellParseItem) (
return armPattern{}, false
}
resolved := c.subst.Apply(c.arena, subject)
+ // Unwrap a `type X = Enum` brand so a branded enum matches by its members,
+ // just as a branded union (`type T = int | str`) matches by its arms.
+ if c.arena.Node(resolved).Kind == TKBrand {
+ resolved = c.underlying(resolved)
+ }
sn := c.arena.Node(resolved)
if sn.Kind != TKEnum {
return armPattern{}, false
diff --git a/tests/success/enum_branded_match.msh b/tests/success/enum_branded_match.msh
new file mode 100644
index 00000000..a63a4a6a
--- /dev/null
+++ b/tests/success/enum_branded_match.msh
@@ -0,0 +1,27 @@
+# An enum named via a `type` alias is a distinct branded type, but it can still
+# be `match`ed by its members — just as a branded union (`type T = int | str`)
+# is matched by its arms. Exhaustiveness is enforced over the members, payload
+# binding works through the brand, and the brand stays nominal at call
+# boundaries (an explicit `as` is needed to pass an enum where the alias is).
+enum C = red | green | blue end
+type Color2 = C
+
+red as Color2 match
+ red : "r" wl,
+ green : "g" wl,
+ blue : "b" wl,
+end
+
+enum R = ok int | failed str | z end
+type R2 = R
+404 as int drop
+5 ok as R2 match
+ ok n : @n str wl,
+ failed m : @m wl,
+ z : "z" wl,
+end
+
+def paint (Color2 -- str)
+ match red: "is red", green: "is green", blue: "is blue", end
+end
+blue as Color2 paint wl
diff --git a/tests/success/enum_branded_match.msh.stdout b/tests/success/enum_branded_match.msh.stdout
new file mode 100644
index 00000000..c75b5130
--- /dev/null
+++ b/tests/success/enum_branded_match.msh.stdout
@@ -0,0 +1,3 @@
+r
+5
+is blue
diff --git a/tests/typecheck_fail/enum_branded_nonexhaustive.msh b/tests/typecheck_fail/enum_branded_nonexhaustive.msh
new file mode 100644
index 00000000..7be9ae0c
--- /dev/null
+++ b/tests/typecheck_fail/enum_branded_nonexhaustive.msh
@@ -0,0 +1,10 @@
+# Exhaustiveness is enforced through a `type` alias of an enum: matching a
+# branded enum must still cover every member (or use `_`). Here `blue` is
+# missing, so the match is rejected — the brand does not hide the members.
+enum C = red | green | blue end
+type Color2 = C
+
+red as Color2 match
+ red : "r" wl,
+ green : "g" wl,
+end
From 556b382cda3edcc7c455ab976ed2b8c490c5e22e Mon Sep 17 00:00:00 2001
From: Mitchell Paulus
Date: Wed, 1 Jul 2026 18:03:06 -0500
Subject: [PATCH 15/32] Enum: drop the EnumName prefix from DebugString
A list's ToString renders elements via DebugString, and the enum's
DebugString was the only value type to inject an `EnumName.` prefix. So an
enum inside a list printed as `[C.red C.green]`, inconsistent with its
standalone form (`red`), `map`-ed form (`red`), and dict/JSON form (`"red"`).
DebugString now returns the same member form as ToString, so an enum renders
identically in every context. The member name is globally unique, so the type
prefix added no disambiguation (unlike the quotes a string's DebugString adds).
Test: tests/success/enum_render_contexts.msh. Suites green: tests 217,
typecheck 202, go test.
Co-Authored-By: Claude Opus 4.8 (1M context)
Claude-Session: https://claude.ai/code/session_01E3aH9BBud5rjDoHUF8daaJ
---
mshell/MShellObject.go | 2 +-
tests/success/enum_render_contexts.msh | 12 ++++++++++++
tests/success/enum_render_contexts.msh.stdout | 6 ++++++
3 files changed, 19 insertions(+), 1 deletion(-)
create mode 100644 tests/success/enum_render_contexts.msh
create mode 100644 tests/success/enum_render_contexts.msh.stdout
diff --git a/mshell/MShellObject.go b/mshell/MShellObject.go
index 098b72b7..e628d06a 100644
--- a/mshell/MShellObject.go
+++ b/mshell/MShellObject.go
@@ -364,7 +364,7 @@ func (e *MShellEnum) IsCommandLineable() bool { return true }
func (e *MShellEnum) IsNumeric() bool { return false }
func (e *MShellEnum) FloatNumeric() float64 { return 0 }
func (e *MShellEnum) CommandLine() string { return enumRender(e) }
-func (e *MShellEnum) DebugString() string { return e.EnumName + "." + enumRender(e) }
+func (e *MShellEnum) DebugString() string { return enumRender(e) }
// enumRender renders an enum value as `member` (nullary) or
// `member(p0 p1 ...)`. Nested enum payloads are expanded with an explicit
diff --git a/tests/success/enum_render_contexts.msh b/tests/success/enum_render_contexts.msh
new file mode 100644
index 00000000..57c12293
--- /dev/null
+++ b/tests/success/enum_render_contexts.msh
@@ -0,0 +1,12 @@
+# An enum value renders the same way in every context: standalone, inside a
+# list, and via `map` all use the member form (`red`, `leaf(3)`) with no
+# `EnumName.` prefix. (A dict / toJson still use the JSON-tagged form.)
+enum C = red | green | blue end
+red str wl
+[red green blue] str wl
+[red green blue] (str) map "," join wl
+
+enum T = leaf int | node T T end
+3 leaf str wl
+[ 3 leaf 1 leaf ] str wl
+1 leaf 2 leaf node str wl
diff --git a/tests/success/enum_render_contexts.msh.stdout b/tests/success/enum_render_contexts.msh.stdout
new file mode 100644
index 00000000..cde9f53c
--- /dev/null
+++ b/tests/success/enum_render_contexts.msh.stdout
@@ -0,0 +1,6 @@
+red
+[red green blue]
+red,green,blue
+leaf(3)
+[leaf(3) leaf(1)]
+node(leaf(1) leaf(2))
From 622ce23e35b701dd2bbcd036a79682b1d60ecd01 Mon Sep 17 00:00:00 2001
From: Mitchell Paulus
Date: Wed, 1 Jul 2026 18:23:49 -0500
Subject: [PATCH 16/32] Fix Maybe equality: accept *Maybe, so None==None and
Just==Just work
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Maybe.Equals asserted other.(Maybe) (the value form), but the runtime
constructs Maybe values as *Maybe pointers everywhere (just/none and every
lookup/parse builtin push &Maybe{...}). So the assertion missed every operand
and the method bailed to false — making *all* Maybe-vs-Maybe comparisons
false, including `none none =`, `5 just 5 just =`, and `Maybe[enum]` equality.
By extension `uniq` could not dedupe Maybes, and any list/dict/enum containing
a Maybe compared unequal to an identical value.
Now it unwraps other via the existing asMaybe helper (value or pointer),
matching how the match code and compareValues already handle both forms. The
receiver side already worked via Go's value-method promotion on *Maybe.
The equality.msh test had baked the wrong answers into its expected output
(None==None and Just==Just recorded as false); corrected and expanded with
real Just/None and Maybe[enum] assertions.
Suites green: tests 217, typecheck 203, go test.
Co-Authored-By: Claude Opus 4.8 (1M context)
Claude-Session: https://claude.ai/code/session_01E3aH9BBud5rjDoHUF8daaJ
---
mshell/MShellObject.go | 5 ++++-
tests/success/equality.msh | 13 ++++++++++---
tests/success/equality.msh.stdout | 6 ++++++
3 files changed, 20 insertions(+), 4 deletions(-)
diff --git a/mshell/MShellObject.go b/mshell/MShellObject.go
index e628d06a..fb430c53 100644
--- a/mshell/MShellObject.go
+++ b/mshell/MShellObject.go
@@ -243,7 +243,10 @@ func (m Maybe) Concat(other MShellObject) (MShellObject, error) {
}
func (m Maybe) Equals(other MShellObject) (bool, error) {
- otherMaybe, ok := other.(Maybe)
+ // Maybe values are constructed as *Maybe at runtime, so accept either the
+ // value or pointer form — a plain other.(Maybe) misses every *Maybe and
+ // would make all Maybe-vs-Maybe comparisons (including None==None) false.
+ otherMaybe, ok := asMaybe(other)
if !ok {
return false, nil
}
diff --git a/tests/success/equality.msh b/tests/success/equality.msh
index 9259ebf1..6484e3e2 100644
--- a/tests/success/equality.msh
+++ b/tests/success/equality.msh
@@ -15,14 +15,21 @@
# Dicts compare structurally, independent of key order
{ "a": 1, "b": 2 } { "b": 2, "a": 1 } = str wl
-# Maybe
-"x" just "x" just = str wl
+# Maybe: None==None, Just==Just (equal / unequal payloads), Just != None, and
+# a Maybe nested in a list all compare structurally.
none none = str wl
+"x" just "x" just = str wl
+5 just 5 just = str wl
+5 just 6 just = str wl
+5 just none = str wl
+[5 just] [5 just] = str wl
-# Enums
+# Enums (including Maybe[enum], the common optional-enum case)
enum C = red | green end
red red = str wl
red green = str wl
+red just red just = str wl
+red just green just = str wl
# uniq now deduplicates any equatable value (lists, enums, ...)
[[1] [1] [2]] uniq len str wl
diff --git a/tests/success/equality.msh.stdout b/tests/success/equality.msh.stdout
index 13d6400c..23ac1554 100644
--- a/tests/success/equality.msh.stdout
+++ b/tests/success/equality.msh.stdout
@@ -4,9 +4,15 @@ true
false
true
true
+true
+true
+true
false
false
true
+true
+false
+true
false
2
2
From 3d6f3f94381b6315c5d43098c5d20db61ef441b7 Mon Sep 17 00:00:00 2001
From: Mitchell Paulus
Date: Wed, 1 Jul 2026 18:51:40 -0500
Subject: [PATCH 17/32] Match: unwrap a type-alias brand on the subject once,
for all pattern forms
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The branded-enum fix unwrapped the TKBrand in two per-site spots
(enumMemberPattern and CheckMatchExhaustive), so a branded enum matched its
members — but the `just`/`none` and ` name` binding paths in
armPatternOf still checked the raw subject kind. So matching a branded Maybe
(e.g. `type MC = Maybe[C]`, a named optional enum) by `just v`/`none` was
rejected by the checker (`@v` unbound / non-exhaustive) even though the runtime
matched it fine — brands are runtime-erased.
Unwrap the brand once where the match subject is established (checkMatchBlock),
so every arm form — enum member, `just v`, ` name`, list/dict — and the
exhaustiveness check see the underlying type uniformly. The two per-site unwraps
are removed (redundant now); enumMemberPattern and CheckMatchExhaustive get the
already-unwrapped subject. Brands stay nominal at value boundaries (an `as` is
still needed to pass an enum where the alias is expected).
Tests: tests/success/branded_maybe_match.msh and
tests/typecheck_fail/branded_maybe_nonexhaustive.msh. Suites green: tests 218,
typecheck 205, go test.
Co-Authored-By: Claude Opus 4.8 (1M context)
Claude-Session: https://claude.ai/code/session_01E3aH9BBud5rjDoHUF8daaJ
---
mshell/TypeBranch.go | 7 -----
mshell/TypeCheckProgram.go | 16 +++++++----
tests/success/branded_maybe_match.msh | 28 +++++++++++++++++++
tests/success/branded_maybe_match.msh.stdout | 4 +++
.../branded_maybe_nonexhaustive.msh | 7 +++++
5 files changed, 50 insertions(+), 12 deletions(-)
create mode 100644 tests/success/branded_maybe_match.msh
create mode 100644 tests/success/branded_maybe_match.msh.stdout
create mode 100644 tests/typecheck_fail/branded_maybe_nonexhaustive.msh
diff --git a/mshell/TypeBranch.go b/mshell/TypeBranch.go
index 4542b667..c760cead 100644
--- a/mshell/TypeBranch.go
+++ b/mshell/TypeBranch.go
@@ -117,14 +117,7 @@ func (c *Checker) CheckMatchExhaustive(matched TypeId, arms []MatchArmTag, callS
}
}
- // Unwrap a `type X = Enum` brand so exhaustiveness dispatches on the
- // underlying enum and checks coverage over its members — mirroring how a
- // branded union (still a TKUnion node) is checked over its arms.
n := c.arena.Node(matched)
- if n.Kind == TKBrand {
- matched = c.underlying(matched)
- n = c.arena.Node(matched)
- }
switch n.Kind {
case TKMaybe:
hasJust, hasNone := false, false
diff --git a/mshell/TypeCheckProgram.go b/mshell/TypeCheckProgram.go
index fc679a54..2a52c0b0 100644
--- a/mshell/TypeCheckProgram.go
+++ b/mshell/TypeCheckProgram.go
@@ -1206,6 +1206,15 @@ func (c *Checker) checkMatchBlock(matchBlock *MShellParseMatchBlock) {
// exhaustiveness check compare against `str` by type id, and the literal
// value carries no meaning for pattern matching.
subject := c.arena.WidenStrLit(c.stack.items[c.stack.Len()-1])
+ // See through a `type X = ...` brand once, here, so every arm form (enum
+ // member, `just`/` name` binding, list/dict pattern) and the
+ // exhaustiveness check match against the underlying type. A brand is
+ // nominal for typing but has no runtime representation, so a branded enum
+ // matches its members, a branded Maybe its `just`/`none`, etc. — exactly
+ // as the unbranded types do, which is what the runtime already does.
+ if resolved := c.subst.Apply(c.arena, subject); c.arena.Node(resolved).Kind == TKBrand {
+ subject = c.underlying(resolved)
+ }
entry := c.captureBranch()
if len(matchBlock.Arms) == 0 {
@@ -1420,12 +1429,9 @@ func (c *Checker) enumMemberPattern(subject TypeId, pattern []MShellParseItem) (
if !ok || tok.Type != LITERAL {
return armPattern{}, false
}
+ // The subject is already brand-unwrapped by checkMatchBlock, so a branded
+ // enum (`type X = Enum`) arrives here as its underlying TKEnum.
resolved := c.subst.Apply(c.arena, subject)
- // Unwrap a `type X = Enum` brand so a branded enum matches by its members,
- // just as a branded union (`type T = int | str`) matches by its arms.
- if c.arena.Node(resolved).Kind == TKBrand {
- resolved = c.underlying(resolved)
- }
sn := c.arena.Node(resolved)
if sn.Kind != TKEnum {
return armPattern{}, false
diff --git a/tests/success/branded_maybe_match.msh b/tests/success/branded_maybe_match.msh
new file mode 100644
index 00000000..6f4cd0b4
--- /dev/null
+++ b/tests/success/branded_maybe_match.msh
@@ -0,0 +1,28 @@
+# A `Maybe` (or enum, or any type) named via a `type` alias is a distinct brand,
+# but `match` sees through the brand to the underlying type — so a branded
+# Maybe matches by `just v` / `none`, a branded enum by its members, and a
+# branded primitive by a type-keyword arm. The brand is nominal for typing but
+# has no runtime form, matching what the runtime already does.
+enum C = red | green | blue end
+type MC = Maybe[C]
+
+red just as MC match
+ just v : @v str wl,
+ none : "n" wl,
+end
+
+none as MC match
+ just v : @v str wl,
+ none : "n" wl,
+end
+
+# Branded Maybe of a primitive, with a type-keyword binding inside.
+type MI = Maybe[int]
+7 just as MI match
+ just n : @n 1 + str wl,
+ none : "n" wl,
+end
+
+# Branded primitive, matched with a type-keyword + binding.
+type MyInt = int
+5 as MyInt match int n : @n str wl, _ : "o" wl, end
diff --git a/tests/success/branded_maybe_match.msh.stdout b/tests/success/branded_maybe_match.msh.stdout
new file mode 100644
index 00000000..373f91b9
--- /dev/null
+++ b/tests/success/branded_maybe_match.msh.stdout
@@ -0,0 +1,4 @@
+red
+n
+8
+5
diff --git a/tests/typecheck_fail/branded_maybe_nonexhaustive.msh b/tests/typecheck_fail/branded_maybe_nonexhaustive.msh
new file mode 100644
index 00000000..d1fb35bb
--- /dev/null
+++ b/tests/typecheck_fail/branded_maybe_nonexhaustive.msh
@@ -0,0 +1,7 @@
+# Exhaustiveness is enforced through a `type` alias of a Maybe: a branded Maybe
+# match must still cover both `just` and `none` (or use `_`). Here `none` is
+# missing, so it is rejected — the brand does not hide the cases.
+type MI = Maybe[int]
+5 just as MI match
+ just v : @v str wl,
+end
From 3413d03ddded5a252a6e26872c5f68168de6a057 Mon Sep 17 00:00:00 2001
From: Mitchell Paulus
Date: Wed, 1 Jul 2026 19:52:56 -0500
Subject: [PATCH 18/32] Fix exponential blowup comparing enum values with
shared substructure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
A value built by reusing one subtree twice per level (`@t @t node` in a
loop) is a DAG: n heap nodes but 2^n paths when walked as a tree. Equality
and ordering walked it structurally with no notion of sharing, so `=`,
`uniq`, and `sort` on such values went exponential — depth 24 took 0.7s,
each +1 level doubled it, and depth 40 (41 actual nodes) would run for
hours. Measured, not theoretical.
Two layers of defense, both zero-cost for ordinary values:
- sameRef fast path: a pointer-identical pair is equal by definition, so
the enum Equals pair loop, compareValues, and itemsEqual skip it instead
of expanding. This alone collapses every same-reference case (self
compare, dup, a shared subtree meeting itself) from 2^n to n. Typed
per-kind pointer comparisons, so it can never hit Go's non-comparable
interface panic (e.g. MShellBinary).
- dagGuard threshold memo: two *independently built* DAGs share no pointers
across operands, so the fast path never fires. The walks count pops; past
2^19 steps they memoize already-expanded enum/list/dict pointer pairs and
skip repeats, making the comparison polynomial in actual nodes. Sound in
a LIFO walk: a duplicate only pops after the first occurrence's expansion
fully resolved, and any mismatch returns immediately. The memo is capped
(2^18 entries) so a huge linear value cannot balloon memory; below the
threshold the guard is one integer increment and never allocates.
Depth-40 self-compare: was ~13 hours extrapolated, now 0.035s. The
depth-64 regression test (tests/success/enum_dag_equality.msh) covers both
modes plus uniq/sort and an unequal tip. Deep linear values (50k suite
tests, 4M manual) are unaffected. Suites green: tests 219, typecheck 206,
go test.
Co-Authored-By: Claude Fable 5
Claude-Session: https://claude.ai/code/session_01E3aH9BBud5rjDoHUF8daaJ
---
mshell/MShellObject.go | 123 +++++++++++++++++++++
tests/success/enum_dag_equality.msh | 34 ++++++
tests/success/enum_dag_equality.msh.stdout | 8 ++
3 files changed, 165 insertions(+)
create mode 100644 tests/success/enum_dag_equality.msh
create mode 100644 tests/success/enum_dag_equality.msh.stdout
diff --git a/mshell/MShellObject.go b/mshell/MShellObject.go
index fb430c53..6abf3541 100644
--- a/mshell/MShellObject.go
+++ b/mshell/MShellObject.go
@@ -501,10 +501,18 @@ func (e *MShellEnum) Concat(other MShellObject) (MShellObject, error) {
// payloads (the leaves) delegate to their own Equals.
func (e *MShellEnum) Equals(other MShellObject) (bool, error) {
type pair struct{ a, b MShellObject }
+ var guard dagGuard
stack := []pair{{a: e, b: other}}
for len(stack) > 0 {
p := stack[len(stack)-1]
stack = stack[:len(stack)-1]
+ // Shared substructure: a pointer-identical pair is equal by
+ // definition, and a pair this walk already expanded compared equal
+ // (see dagGuard). Skipping both keeps DAG-shaped values (a subtree
+ // reused twice per level) linear instead of 2^n.
+ if sameRef(p.a, p.b) || guard.skip(p.a, p.b) {
+ continue
+ }
ea, aok := p.a.(*MShellEnum)
eb, bok := p.b.(*MShellEnum)
if aok || bok {
@@ -1090,6 +1098,108 @@ func sortedDictKeys(m map[string]MShellObject) []string {
return keys
}
+// sameRef reports whether a and b are the identical heap object, for the kinds
+// that can form shared substructure (a value built as `@t @t node` reuses one
+// subtree twice). A pointer-identical pair is equal by definition, so equality
+// and ordering walks skip it instead of expanding it — without this, walking a
+// value with n levels of sharing costs 2^n. Only pointer kinds are compared:
+// comparing interfaces holding non-comparable dynamic types (e.g. MShellBinary,
+// a []byte) panics at runtime.
+func sameRef(a, b MShellObject) bool {
+ switch av := a.(type) {
+ case *MShellEnum:
+ bv, ok := b.(*MShellEnum)
+ return ok && av == bv
+ case *MShellList:
+ bv, ok := b.(*MShellList)
+ return ok && av == bv
+ case *MShellDict:
+ bv, ok := b.(*MShellDict)
+ return ok && av == bv
+ case *Maybe:
+ bv, ok := b.(*Maybe)
+ return ok && av == bv
+ case *MShellDateTime:
+ bv, ok := b.(*MShellDateTime)
+ return ok && av == bv
+ case *MShellQuotation:
+ bv, ok := b.(*MShellQuotation)
+ return ok && av == bv
+ }
+ return false
+}
+
+// dagGuard bounds a comparison walk over values with shared substructure that
+// sameRef alone cannot catch: two *independently built* DAGs share no pointers
+// across operands, so every level re-expands and the walk goes exponential.
+// The guard counts pops; once a walk runs long enough to suggest blowup, it
+// memoizes the pointer pairs it has already expanded and skips repeats.
+//
+// Skipping a repeated pair is sound in a LIFO walk: the first occurrence's
+// entire expansion resolves before any later duplicate (which sat lower in the
+// stack) pops, and a mismatch anywhere returns from the walk immediately — so
+// if a duplicate pops at all, its subtree already compared equal.
+//
+// Ordinary comparisons never allocate: below the step threshold the guard is
+// one integer increment. The memo is capped so a legitimately huge linear
+// value (millions of distinct pairs, no repeats) cannot balloon memory; a
+// blowup DAG has few distinct pairs and fits far below the cap.
+type dagGuard struct {
+ steps int
+ memo map[refPair]bool
+}
+
+type refPair struct{ a, b MShellObject }
+
+const dagStepThreshold = 1 << 19
+const dagMemoCap = 1 << 18
+
+// skip reports whether this pair was already expanded earlier in the walk.
+// Call once per popped pair; it records the pair (past the threshold) so
+// later duplicates skip.
+func (g *dagGuard) skip(a, b MShellObject) bool {
+ g.steps++
+ if g.steps < dagStepThreshold {
+ return false
+ }
+ key, ok := refPairKey(a, b)
+ if !ok {
+ return false
+ }
+ if g.memo == nil {
+ g.memo = make(map[refPair]bool, 1024)
+ }
+ if g.memo[key] {
+ return true
+ }
+ if len(g.memo) < dagMemoCap {
+ g.memo[key] = true
+ }
+ return false
+}
+
+// refPairKey returns a comparable identity key when both values are the same
+// container pointer kind — the kinds whose repeated pairs cause blowup.
+// Interface keys are only safe when the dynamic values are comparable, which
+// pointers are; scalar kinds are cheap to compare directly and get no key.
+func refPairKey(a, b MShellObject) (refPair, bool) {
+ switch a.(type) {
+ case *MShellEnum:
+ if _, ok := b.(*MShellEnum); ok {
+ return refPair{a, b}, true
+ }
+ case *MShellList:
+ if _, ok := b.(*MShellList); ok {
+ return refPair{a, b}, true
+ }
+ case *MShellDict:
+ if _, ok := b.(*MShellDict); ok {
+ return refPair{a, b}, true
+ }
+ }
+ return refPair{}, false
+}
+
// compareValues returns -1, 0, or 1, giving a total order over every value
// type. Different kinds are ordered by a fixed type rank (valueTypeRank); within
// a kind the natural order is used (numbers numerically with int/float
@@ -1113,6 +1223,7 @@ func compareValues(a, b MShellObject) int {
lit int
isLit bool
}
+ var guard dagGuard
stack := []task{{a: a, b: b}}
for len(stack) > 0 {
t := stack[len(stack)-1]
@@ -1123,6 +1234,13 @@ func compareValues(a, b MShellObject) int {
}
continue
}
+ // Shared substructure: a pointer-identical pair compares 0 by
+ // definition, and a pair this walk already expanded proved 0 (any
+ // non-zero would have returned; see dagGuard). Skipping both keeps
+ // DAG-shaped values linear instead of 2^n.
+ if sameRef(t.a, t.b) || guard.skip(t.a, t.b) {
+ continue
+ }
ra, rb := valueTypeRank(t.a), valueTypeRank(t.b)
if ra != rb {
return cmpInt(ra, rb)
@@ -2380,6 +2498,11 @@ func itemsEqual(a, b []MShellObject) (bool, error) {
return false, nil
}
for i := range a {
+ // Pointer-identical elements are equal by definition; skipping them
+ // keeps lists with shared substructure from re-walking it.
+ if sameRef(a[i], b[i]) {
+ continue
+ }
eq, err := a[i].Equals(b[i])
if err != nil || !eq {
return eq, err
diff --git a/tests/success/enum_dag_equality.msh b/tests/success/enum_dag_equality.msh
new file mode 100644
index 00000000..08b677e2
--- /dev/null
+++ b/tests/success/enum_dag_equality.msh
@@ -0,0 +1,34 @@
+# Equality and ordering on enum values with shared substructure must not blow
+# up: `@t @t node` reuses one subtree twice per level, so after 64 levels the
+# value is a DAG with 65 nodes but 2^64 tree paths. The comparison walks skip
+# pointer-identical pairs (and, past a step threshold, memoize already-expanded
+# pairs), so these finish instantly; a naive structural walk would run for
+# centuries.
+enum T = leaf int | node T T end
+
+# One DAG, compared against itself / its own reference.
+0 leaf t!
+0 i!
+( @i 64 >= if break end @t @t node t! @i 1 + i! ) loop
+@t @t = str wl
+@t dup = str wl
+[ @t @t ] uniq len str wl
+[ @t @t ] sort len str wl
+
+# Two DAGs built independently: no pointers are shared across the operands, so
+# the pointer fast path never fires — this exercises the memoized mode.
+0 leaf a!
+0 i!
+( @i 64 >= if break end @a @a node a! @i 1 + i! ) loop
+0 leaf b!
+0 i!
+( @i 64 >= if break end @b @b node b! @i 1 + i! ) loop
+@a @b = str wl
+
+# A third DAG differing at the bottom leaf: unequal, found without blowup.
+1 leaf c!
+0 i!
+( @i 64 >= if break end @c @c node c! @i 1 + i! ) loop
+@a @c = str wl
+[ @a @c @b ] sort len str wl
+[ @a @c @b ] uniq len str wl
diff --git a/tests/success/enum_dag_equality.msh.stdout b/tests/success/enum_dag_equality.msh.stdout
new file mode 100644
index 00000000..e102c904
--- /dev/null
+++ b/tests/success/enum_dag_equality.msh.stdout
@@ -0,0 +1,8 @@
+true
+true
+1
+2
+true
+false
+3
+2
From 8087f6c0f9b500d5d03e5b37eb9a5c2aaf35f6d6 Mon Sep 17 00:00:00 2001
From: Mitchell Paulus
Date: Wed, 1 Jul 2026 20:06:19 -0500
Subject: [PATCH 19/32] Enum: reject a def whose name collides with an enum
member
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The spec'd invariant — enum members share the word namespace, collisions are
declaration errors — was only enforced in one direction. defineEnum (pre-pass
1b) checks nameBuiltins, which at that point holds Go builtins and stdlib
sigs, so a member colliding with those is caught. But user def signatures
register in pre-pass 2, after the enums, with no reverse check — so a member
colliding with a same-file def was silently accepted in either textual order.
That was a real soundness hole: the checker resolved the shared word to the
enum constructor (e.g. when the context demanded the enum type) while the
runtime resolves definitions before enum members and ran the def — so a
cleanly type-checked program failed at runtime ("Unknown match pattern
literal") or diverged in stack shape for payload constructors.
defineEnum now records registered member names (enumMemberToks), and def
registration rejects a def whose name is a member, mirroring the existing
member-vs-def error. Def-vs-def duplication is untouched (a separate,
deferred decision).
Test: tests/typecheck_fail/enum_member_def_collision.msh. Suites green:
tests 219, typecheck 207, go test.
Co-Authored-By: Claude Fable 5
Claude-Session: https://claude.ai/code/session_01E3aH9BBud5rjDoHUF8daaJ
---
mshell/TypeCheckProgram.go | 12 ++++++++++++
mshell/TypeChecker.go | 8 ++++++++
mshell/TypeEnum.go | 4 ++++
tests/typecheck_fail/enum_member_def_collision.msh | 8 ++++++++
4 files changed, 32 insertions(+)
create mode 100644 tests/typecheck_fail/enum_member_def_collision.msh
diff --git a/mshell/TypeCheckProgram.go b/mshell/TypeCheckProgram.go
index 2a52c0b0..facb13ac 100644
--- a/mshell/TypeCheckProgram.go
+++ b/mshell/TypeCheckProgram.go
@@ -127,6 +127,18 @@ func (c *Checker) CheckProgram(file *MShellFile) {
sig := c.ResolveDefSig(def.Inputs, def.Outputs)
defSigs[i] = sig
nameId := c.names.Intern(def.Name)
+ // Enum constructors share the word namespace and registered in
+ // pre-pass 1b; a def reusing a member name would resolve to the
+ // constructor in the checker but to the def at runtime, so reject it —
+ // the mirror of defineEnum rejecting a member that collides with an
+ // existing def or builtin.
+ if _, isMember := c.enumMemberToks[nameId]; isMember {
+ c.errors = append(c.errors, TypeError{
+ Kind: TErrTypeParse, Pos: def.NameToken,
+ Hint: "definition '" + def.Name + "' conflicts with an enum member of the same name",
+ })
+ continue
+ }
c.nameBuiltins[nameId] = append(c.nameBuiltins[nameId], sig)
}
// Pre-pass 3: type-check each def body against its declared sig.
diff --git a/mshell/TypeChecker.go b/mshell/TypeChecker.go
index e82da8ba..e856c400 100644
--- a/mshell/TypeChecker.go
+++ b/mshell/TypeChecker.go
@@ -101,6 +101,14 @@ type Checker struct {
// type names are NOT stored here — they are recognized directly.
typeEnv map[NameId]TypeId
+ // enumMemberToks records every registered enum member name (value: the
+ // member's declaration token). Enum constructors and user defs share the
+ // word namespace, and enums register before same-file defs — so def
+ // registration checks this to reject a def whose name collides with a
+ // member, mirroring defineEnum rejecting a member that collides with an
+ // existing def or builtin.
+ enumMemberToks map[NameId]Token
+
// Quote-body inference state (Phase 7). When inferring is true,
// applySig responds to stack underflow by synthesizing fresh type
// variables instead of reporting an error; those vars accumulate
diff --git a/mshell/TypeEnum.go b/mshell/TypeEnum.go
index 4a20ee81..9180ed96 100644
--- a/mshell/TypeEnum.go
+++ b/mshell/TypeEnum.go
@@ -80,5 +80,9 @@ func (c *Checker) defineEnum(d *MShellEnumDecl) {
continue
}
c.nameBuiltins[mid] = append(c.nameBuiltins[mid], QuoteSig{Inputs: u.payloads, Outputs: []TypeId{enumType}})
+ if c.enumMemberToks == nil {
+ c.enumMemberToks = make(map[NameId]Token, len(uniq))
+ }
+ c.enumMemberToks[mid] = u.tok
}
}
diff --git a/tests/typecheck_fail/enum_member_def_collision.msh b/tests/typecheck_fail/enum_member_def_collision.msh
new file mode 100644
index 00000000..7c3a68a3
--- /dev/null
+++ b/tests/typecheck_fail/enum_member_def_collision.msh
@@ -0,0 +1,8 @@
+# Enum constructors and defs share the word namespace, so a def reusing a
+# member name must be rejected (in either textual order — enums register
+# first regardless). Without this, the checker resolves the word to the
+# constructor while the runtime runs the def, and a type-checked program
+# fails at runtime.
+enum E = foo | z end
+def foo ( -- int) 42 end
+foo str wl
From c441027fa2ad569e000895a728e571dd3abb058a Mon Sep 17 00:00:00 2001
From: Mitchell Paulus
Date: Wed, 1 Jul 2026 20:55:27 -0500
Subject: [PATCH 20/32] Register startup-file type/enum declarations in both
runtime and checker
`enum` declarations only functioned in the main script: RegisterEnums had a
single call site on the script path, so an enum declared in the stdlib, the
user init file, or at the interactive prompt parsed and evaluated as a no-op
and its member words fell through to the bare-literal path ("Found literal
token"). `type` aliases in startup files were similarly invisible to the
checker, whose pre-passes walked only the main file's items.
Type-checking and running are separate today but should semantically match,
so both sides now read startup declarations:
- Runtime: loadStartupFile registers each startup file's enums on the
EvalState (covering script and interactive sessions), and the REPL line
executor registers enums declared at the prompt, next to its existing
def handling.
- Checker: loadStartupFile retains startup top-level items;
TypeCheckProgram passes them to the new Checker.RegisterStartupTypes,
which runs the same three-phase pre-pass order as CheckProgram (enum
names, type aliases, enum bodies + constructor words). Declaration
bodies are not type-checked, matching the stdlib-def treatment. The LSP
diagnostics pass registers the stdlib's items the same way.
Collision checks now span files in both directions: a startup enum member
rejects a colliding program def, and vice versa.
Tests: startup enum/type visible to checker + cross-file collision
(TypeEnum_test.go), startup-file enum registers constructors and constructs
at runtime (Startup_test.go). Suites green: tests 219, typecheck 207,
go test.
Co-Authored-By: Claude Fable 5
Claude-Session: https://claude.ai/code/session_01E3aH9BBud5rjDoHUF8daaJ
---
mshell/Main.go | 41 +++++++++++++++++-------
mshell/Startup_test.go | 56 +++++++++++++++++++++++++++++++--
mshell/TypeCheckProgram.go | 36 ++++++++++++++++++++-
mshell/TypeCheckProgram_test.go | 2 +-
mshell/TypeEnum_test.go | 47 +++++++++++++++++++++++++++
mshell/lsp.go | 15 +++++----
mshell/lsp_test.go | 2 +-
7 files changed, 175 insertions(+), 24 deletions(-)
diff --git a/mshell/Main.go b/mshell/Main.go
index 42d55c6f..ef9b9dca 100644
--- a/mshell/Main.go
+++ b/mshell/Main.go
@@ -166,7 +166,7 @@ func getStartupFileSpecs(options startupLoadOptions) (startupFileSpec, startupFi
return stdlibSpec, initSpec, nil
}
-func loadStartupFile(path string, description string, stack *MShellStack, context ExecuteContext, state *EvalState, definitions *[]MShellDefinition) error {
+func loadStartupFile(path string, description string, stack *MShellStack, context ExecuteContext, state *EvalState, definitions *[]MShellDefinition, items *[]MShellParseItem) error {
sourceBytes, err := os.ReadFile(path)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
@@ -182,6 +182,13 @@ func loadStartupFile(path string, description string, stack *MShellStack, contex
*definitions = append(*definitions, parsedFile.Definitions...)
state.AddCompletionDefinitions(parsedFile.Definitions)
+ // Register enum constructors declared in this startup file, and retain the
+ // top-level items so the type checker can register the file's `type` and
+ // `enum` declarations — startup declarations behave like the main file's.
+ state.RegisterEnums(parsedFile.Items)
+ if items != nil {
+ *items = append(*items, parsedFile.Items...)
+ }
if len(parsedFile.Items) > 0 {
callStackItem := CallStackItem{
@@ -257,16 +264,17 @@ func preflightStartupFile(spec startupFileSpec) string {
return fmt.Sprintf("present at %s (parses ok; not evaluated because the other startup file failed first)", spec.path)
}
-func loadStartupDefinitions(options startupLoadOptions, stack *MShellStack, context ExecuteContext, state *EvalState) ([]MShellDefinition, error) {
+func loadStartupDefinitions(options startupLoadOptions, stack *MShellStack, context ExecuteContext, state *EvalState) ([]MShellDefinition, []MShellParseItem, error) {
stdlibSpec, initSpec, err := getStartupFileSpecs(options)
if err != nil {
- return nil, err
+ return nil, nil, err
}
definitions := make([]MShellDefinition, 0)
- if err := loadStartupFile(stdlibSpec.path, stdlibSpec.description, stack, context, state, &definitions); err != nil {
+ var items []MShellParseItem
+ if err := loadStartupFile(stdlibSpec.path, stdlibSpec.description, stack, context, state, &definitions, &items); err != nil {
initStatus := preflightStartupFile(initSpec)
- return nil, &startupLoadError{
+ return nil, nil, &startupLoadError{
which: "stdlib",
spec: stdlibSpec,
options: options,
@@ -276,14 +284,14 @@ func loadStartupDefinitions(options startupLoadOptions, stack *MShellStack, cont
}
}
- if err := loadStartupFile(initSpec.path, initSpec.description, stack, context, state, &definitions); err != nil {
+ if err := loadStartupFile(initSpec.path, initSpec.description, stack, context, state, &definitions, &items); err != nil {
if !initSpec.required && errors.Is(err, os.ErrNotExist) {
- return definitions, nil
+ return definitions, items, nil
}
- return nil, &startupLoadError{which: "init", spec: initSpec, options: options, cause: err}
+ return nil, nil, &startupLoadError{which: "init", spec: initSpec, options: options, cause: err}
}
- return definitions, nil
+ return definitions, items, nil
}
// formatStartupErrorMessage builds a multi-line explanation of how msh searches
@@ -821,7 +829,7 @@ func main() {
var allDefinitions []MShellDefinition
- startupDefinitions, err := loadStartupDefinitions(startupLoadOptions{
+ startupDefinitions, startupItems, err := loadStartupDefinitions(startupLoadOptions{
version: effectiveVersion,
allowEnvOverrides: allowStartupEnvOverrides,
requireInit: requireVersionedInit,
@@ -836,7 +844,7 @@ func main() {
state.AddCompletionDefinitions(file.Definitions)
if checkTypes {
- errs, ok := TypeCheckProgram(file, startupDefinitions)
+ errs, ok := TypeCheckProgram(file, startupDefinitions, startupItems)
if !ok {
for _, e := range errs {
fmt.Fprintln(os.Stderr, e)
@@ -2976,6 +2984,11 @@ func (state *TermState) ExecuteCurrentCommand() (bool, int) {
state.evalState.AddCompletionDefinitions(parsed.Definitions)
}
+ // Register enum constructors declared on this line, so an interactive
+ // `enum` declaration works like one in a script: its member words
+ // construct values on subsequent lines.
+ state.evalState.RegisterEnums(parsed.Items)
+
if len(parsed.Items) > 0 {
state.initCallStackItem.MShellParseItem = parsed.Items[0]
result := state.evalState.Evaluate(parsed.Items, &state.stack, state.context, state.stdLibDefs, state.initCallStackItem)
@@ -3162,11 +3175,15 @@ func (state *TermState) getCurrentPos() (int, int, error) {
}
func stdLibDefinitions(stack *MShellStack, context ExecuteContext, state *EvalState) ([]MShellDefinition, error) {
- return loadStartupDefinitions(startupLoadOptions{
+ // The interactive path has no whole-program type-check pass, so the
+ // startup items (already registered on the EvalState for runtime enum
+ // construction inside loadStartupFile) are not needed here.
+ defs, _, err := loadStartupDefinitions(startupLoadOptions{
version: mshellVersion,
allowEnvOverrides: true,
requireInit: false,
}, stack, context, state)
+ return defs, err
}
func registerTempFileForCleanup(tempFileName string) {
diff --git a/mshell/Startup_test.go b/mshell/Startup_test.go
index be7902dd..88c067df 100644
--- a/mshell/Startup_test.go
+++ b/mshell/Startup_test.go
@@ -151,7 +151,7 @@ func TestLoadStartupDefinitionsLoadsVersionedStdlibAndInit(t *testing.T) {
stack, context, state := newStartupTestContext()
- definitions, err := loadStartupDefinitions(startupLoadOptions{
+ definitions, _, err := loadStartupDefinitions(startupLoadOptions{
version: version,
allowEnvOverrides: false,
requireInit: true,
@@ -215,7 +215,7 @@ func TestLoadStartupDefinitionsRequiresInitForExplicitVersion(t *testing.T) {
stack, context, state := newStartupTestContext()
- _, err := loadStartupDefinitions(startupLoadOptions{
+ _, _, err := loadStartupDefinitions(startupLoadOptions{
version: version,
allowEnvOverrides: false,
requireInit: true,
@@ -251,7 +251,7 @@ func TestLoadStartupDefinitionsAllowsMissingInitForImplicitVersion(t *testing.T)
stack, context, state := newStartupTestContext()
- definitions, err := loadStartupDefinitions(startupLoadOptions{
+ definitions, _, err := loadStartupDefinitions(startupLoadOptions{
version: version,
allowEnvOverrides: true,
requireInit: false,
@@ -417,3 +417,53 @@ func TestEnvWithoutStartupOverridesRemovesOnlyStartupVars(t *testing.T) {
t.Fatalf("filtered env missing KEEP_ME: %q", filteredJoined)
}
}
+
+func TestStartupFileEnumRegistersConstructors(t *testing.T) {
+ // An `enum` declared in a startup file (stdlib / init) must register its
+ // constructors on the EvalState, so a member word in the main program (or
+ // at the interactive prompt) constructs a value instead of falling through
+ // to the bare-literal path.
+ dir := t.TempDir()
+ path := filepath.Join(dir, "init.msh")
+ if err := os.WriteFile(path, []byte("enum Status = active | inactive end\n"), 0644); err != nil {
+ t.Fatalf("WriteFile(init) error = %v", err)
+ }
+
+ stack, context, state := newStartupTestContext()
+ var defs []MShellDefinition
+ var items []MShellParseItem
+ if err := loadStartupFile(path, "test init", &stack, context, &state, &defs, &items); err != nil {
+ t.Fatalf("loadStartupFile() error = %v", err)
+ }
+
+ info, ok := state.EnumMembers["active"]
+ if !ok {
+ t.Fatalf("expected member 'active' registered from startup file")
+ }
+ if info.EnumName != "Status" || info.Arity != 0 {
+ t.Fatalf("EnumMembers[active] = %+v, want Status arity 0", info)
+ }
+ if len(items) == 0 {
+ t.Fatalf("expected startup items to be retained for the checker")
+ }
+
+ parsed, err := parseMShellInput("active", &TokenFile{"main"})
+ if err != nil {
+ t.Fatalf("parse error: %v", err)
+ }
+ callStackItem := CallStackItem{MShellParseItem: parsed.Items[0], Name: "main", CallStackType: CALLSTACKFILE}
+ result := state.Evaluate(parsed.Items, &stack, context, defs, callStackItem)
+ if !result.Success {
+ t.Fatalf("evaluating member word failed")
+ }
+ if len(stack) != 1 {
+ t.Fatalf("len(stack) = %d, want 1", len(stack))
+ }
+ en, ok := stack[0].(*MShellEnum)
+ if !ok {
+ t.Fatalf("stack top = %T (%s), want *MShellEnum", stack[0], stack[0].DebugString())
+ }
+ if en.EnumName != "Status" || en.Member != "active" {
+ t.Fatalf("enum value = %s.%s, want Status.active", en.EnumName, en.Member)
+ }
+}
diff --git a/mshell/TypeCheckProgram.go b/mshell/TypeCheckProgram.go
index facb13ac..2d01bf9a 100644
--- a/mshell/TypeCheckProgram.go
+++ b/mshell/TypeCheckProgram.go
@@ -43,12 +43,18 @@ import (
// type-checked here — std.msh exercises features (process lists,
// format strings, dynamic exec) the v1 checker does not yet model,
// and we trust the runtime tests catch breakage there.
-func TypeCheckProgram(file *MShellFile, stdlibDefs []MShellDefinition) (errors []string, ok bool) {
+//
+// startupItems is the startup files' top-level parse items; their `type`
+// and `enum` declarations are registered (bodies are not checked, matching
+// the def treatment) so the checker sees the same declarations the runtime
+// does.
+func TypeCheckProgram(file *MShellFile, stdlibDefs []MShellDefinition, startupItems []MShellParseItem) (errors []string, ok bool) {
arena := NewTypeArena()
names := NewNameTable()
checker := NewChecker(arena, names)
checker.RegisterStdlibSigs(stdlibDefs)
+ checker.RegisterStartupTypes(startupItems)
checker.CheckProgram(file)
out := make([]string, 0, len(checker.errors))
@@ -89,6 +95,34 @@ func (c *Checker) RegisterStdlibSigs(defs []MShellDefinition) {
}
}
+// RegisterStartupTypes registers the `type` and `enum` declarations found in
+// the startup files' top-level items (the stdlib, then the user init file),
+// so the checked program sees the same declarations the runtime does. It runs
+// the same three-phase order as CheckProgram's own pre-passes — enum names,
+// then type aliases, then enum payload bodies + constructor words — so
+// startup declarations may reference each other in any order. Call after
+// RegisterStdlibSigs (so a member colliding with a startup def is caught) and
+// before CheckProgram (whose def pre-pass catches the reverse collision).
+func (c *Checker) RegisterStartupTypes(items []MShellParseItem) {
+ var enumDecls []*MShellEnumDecl
+ for _, item := range items {
+ if d, ok := item.(*MShellEnumDecl); ok {
+ if c.predeclareEnum(d) {
+ enumDecls = append(enumDecls, d)
+ }
+ }
+ }
+ for _, item := range items {
+ if d, ok := item.(*MShellTypeDecl); ok {
+ body := c.resolveTypeExpr(d.Body, nil)
+ c.DeclareType(d.Name, body)
+ }
+ }
+ for _, d := range enumDecls {
+ c.defineEnum(d)
+ }
+}
+
// CheckProgram is the file-level type-check pass. It registers all
// type declarations and user-defined function sigs, then walks the
// parse tree driving the type stack. Error accumulation lives on the
diff --git a/mshell/TypeCheckProgram_test.go b/mshell/TypeCheckProgram_test.go
index 21d4a994..b7e646da 100644
--- a/mshell/TypeCheckProgram_test.go
+++ b/mshell/TypeCheckProgram_test.go
@@ -15,7 +15,7 @@ func parseAndCheck(t *testing.T, src string) ([]string, bool) {
if err != nil {
t.Fatalf("parse error: %v", err)
}
- return TypeCheckProgram(file, nil)
+ return TypeCheckProgram(file, nil, nil)
}
func TestTypeCheckProgramEmpty(t *testing.T) {
diff --git a/mshell/TypeEnum_test.go b/mshell/TypeEnum_test.go
index e5eda544..5cd57a7c 100644
--- a/mshell/TypeEnum_test.go
+++ b/mshell/TypeEnum_test.go
@@ -121,3 +121,50 @@ func TestEnumRecursivePayload(t *testing.T) {
t.Fatalf("self-referential enum payload should type-check; errs=%v ok=%v", errs, ok)
}
}
+
+// parseItemsForTest parses source and returns its top-level items, for tests
+// that feed startup-file declarations to the checker.
+func parseItemsForTest(t *testing.T, src string) []MShellParseItem {
+ t.Helper()
+ l := NewLexer(src, nil)
+ p := NewMShellParser(l)
+ file, err := p.ParseFile()
+ if err != nil {
+ t.Fatalf("parse error: %v", err)
+ }
+ return file.Items
+}
+
+func TestStartupEnumAndTypeVisibleToChecker(t *testing.T) {
+ // `enum` and `type` declarations in a startup file (stdlib / init) are
+ // registered before the main program is checked, so the program can
+ // construct members, match on them, and reference the alias — the same
+ // declarations the runtime registers.
+ startup := parseItemsForTest(t, "enum Status = active | inactive end\ntype Tagged = {name: str, s: Status}")
+ l := NewLexer("active match\n active : \"A\" wl,\n inactive : \"I\" wl,\nend\n{ \"name\": \"x\", \"s\": active } as Tagged drop", nil)
+ p := NewMShellParser(l)
+ file, err := p.ParseFile()
+ if err != nil {
+ t.Fatalf("parse error: %v", err)
+ }
+ errs, ok := TypeCheckProgram(file, nil, startup)
+ if !ok || len(errs) != 0 {
+ t.Fatalf("startup enum/type should be visible to the checker; errs=%v ok=%v", errs, ok)
+ }
+}
+
+func TestDefCollidingWithStartupEnumMemberRejected(t *testing.T) {
+ // The member/def collision check spans files: a program def reusing a
+ // startup enum's member name is rejected, same as a same-file collision.
+ startup := parseItemsForTest(t, "enum E = foo | zz end")
+ l := NewLexer("def foo ( -- int) 42 end\nfoo drop", nil)
+ p := NewMShellParser(l)
+ file, err := p.ParseFile()
+ if err != nil {
+ t.Fatalf("parse error: %v", err)
+ }
+ errs, ok := TypeCheckProgram(file, nil, startup)
+ if ok {
+ t.Fatalf("def colliding with startup enum member should fail; errs=%v", errs)
+ }
+}
diff --git a/mshell/lsp.go b/mshell/lsp.go
index a7a34377..b8d55288 100644
--- a/mshell/lsp.go
+++ b/mshell/lsp.go
@@ -41,6 +41,7 @@ type lspServer struct {
envNames map[string]struct{}
candsBuf []string
stdlibDefs []MShellDefinition
+ stdlibItems []MShellParseItem // stdlib top-level items; `type`/`enum` decls registered per diagnostics pass
builtinSigs map[string][]string // name -> formatted "(in -- out)" sigs from the type checker
stdlibHover map[string][]string // name -> formatted sigs for stdlib defs
}
@@ -134,10 +135,11 @@ func RunLSP(in io.Reader, out io.Writer) error {
envNames: make(map[string]struct{}),
}
- if defs, err := loadStdlibDefsForLSP(); err != nil {
+ if defs, items, err := loadStdlibDefsForLSP(); err != nil {
logLSP(fmt.Sprintf("type-check diagnostics: stdlib unavailable (%v); proceeding without stdlib sigs", err))
} else {
server.stdlibDefs = defs
+ server.stdlibItems = items
}
server.builtinSigs, server.stdlibHover = buildHoverIndex(server.stdlibDefs)
@@ -181,23 +183,23 @@ func buildHoverIndex(stdlibDefs []MShellDefinition) (map[string][]string, map[st
// MSHSTDLIB if set, else the version-keyed install path), parses it,
// and returns its definitions. The bodies are not evaluated; we only
// need the signatures to register as builtins for the type-checker.
-func loadStdlibDefsForLSP() ([]MShellDefinition, error) {
+func loadStdlibDefsForLSP() ([]MShellDefinition, []MShellParseItem, error) {
stdlibSpec, _, err := getStartupFileSpecs(startupLoadOptions{
version: mshellVersion,
allowEnvOverrides: true,
})
if err != nil {
- return nil, err
+ return nil, nil, err
}
source, err := os.ReadFile(stdlibSpec.path)
if err != nil {
- return nil, err
+ return nil, nil, err
}
parsed, err := parseMShellInput(string(source), &TokenFile{stdlibSpec.path})
if err != nil {
- return nil, err
+ return nil, nil, err
}
- return parsed.Definitions, nil
+ return parsed.Definitions, parsed.Items, nil
}
func (s *lspServer) run() error {
@@ -548,6 +550,7 @@ func (s *lspServer) computeDiagnostics(text string) []protocol.Diagnostic {
names := NewNameTable()
checker := NewChecker(arena, names)
checker.RegisterStdlibSigs(s.stdlibDefs)
+ checker.RegisterStartupTypes(s.stdlibItems)
checker.CheckProgram(file)
errs := checker.Errors()
diff --git a/mshell/lsp_test.go b/mshell/lsp_test.go
index 689d5a33..d92d4a72 100644
--- a/mshell/lsp_test.go
+++ b/mshell/lsp_test.go
@@ -1328,7 +1328,7 @@ func TestCompletionWordIncludesBuiltinAndStdlib(t *testing.T) {
}
func TestBuildHoverIndexCoversTypedBuiltinsAndStdlib(t *testing.T) {
- stdlibDefs, err := loadStdlibDefsForLSP()
+ stdlibDefs, _, err := loadStdlibDefsForLSP()
if err != nil {
t.Skipf("stdlib not available in test environment: %v", err)
}
From debe03e77c25bfcd378c6d08b61e249eb4e17562 Mon Sep 17 00:00:00 2001
From: Mitchell Paulus
Date: Wed, 1 Jul 2026 21:40:44 -0500
Subject: [PATCH 21/32] Unify render/JSON/equality into shared iterative
walkers over all containers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Deep values with ALTERNATING container kinds crashed with a fatal Go stack
overflow: `enum E = m Maybe[E] | z end` built ~4M deep (a linked list with
optional next — a natural shape) killed `str`, `toJson`, and `=`. The earlier
per-type stack-safety fixes only covered pure enum nesting — each iterative
walker delegated non-enum payloads to that child's own recursive method
(Maybe.ToString, Maybe.Equals, list/dict ToJson, ...), so every
enum→Maybe→enum cycle added Go frames and alternation restored O(depth)
recursion. compareValues was already immune because it expands every kind
inline; this applies the same design to the other two walks:
- renderValue(obj, flavor): one work-stack renderer for ToString /
DebugString / ToJson, expanding enum, Maybe, list, dict, and pipe inline
with each kind's exact existing format (flavor switches per child the way
the old methods did: list children render as DebugString, a dict's `str`
form is its JSON, enum payloads render as ToString). Scalars, grids, and
quotations stay leaves via their own methods.
- equalsIter(a, b): one pair-stack equality walk expanding the same kinds
inline, keeping the sameRef fast path and the dagGuard shared-substructure
memo that previously lived only on the enum walk (so lists/dicts/Maybes
now get DAG protection too).
All the per-type methods (enum, Maybe, list, dict, pipe) are one-line
routings into the shared walkers; enumRender, the enum ToJson walker, the
old recursive bodies, itemsEqual, and DebugStrs are deleted. Net -30 lines.
Output is byte-identical across the suites, with two deliberate changes:
multi-key dict DebugString now emits sorted keys (it iterated Go map order —
nondeterministic — before), and dict equality no longer short-circuits on a
TypeName mismatch, so a str and a literal with equal text compare equal
inside dicts exactly as they do at top level.
The 4M alternating chain now renders (18M chars), serializes (14M), and
compares cleanly; enum↔dict alternation at 500k likewise. Regression test:
tests/success/enum_alternating_deep.msh (50k, mirroring the deep-test
family). Suites green: tests 220, typecheck 208, go test.
Co-Authored-By: Claude Fable 5
Claude-Session: https://claude.ai/code/session_01E3aH9BBud5rjDoHUF8daaJ
---
mshell/MShellObject.go | 573 ++++++++----------
tests/success/enum_alternating_deep.msh | 25 +
.../success/enum_alternating_deep.msh.stdout | 4 +
3 files changed, 286 insertions(+), 316 deletions(-)
create mode 100644 tests/success/enum_alternating_deep.msh
create mode 100644 tests/success/enum_alternating_deep.msh.stdout
diff --git a/mshell/MShellObject.go b/mshell/MShellObject.go
index 6abf3541..27107aaa 100644
--- a/mshell/MShellObject.go
+++ b/mshell/MShellObject.go
@@ -199,10 +199,7 @@ func (m Maybe) CommandLine() string {
// This is meant for things like error messages, should be limited in length to 30 chars or so.
func (m Maybe) DebugString() string {
- if m.obj == nil {
- return "None"
- }
- return fmt.Sprintf("Maybe(%s)", m.obj.DebugString())
+ return renderValue(m, flavorDebug)
}
func (m Maybe) Index(index int) (MShellObject, error) {
return nil, fmt.Errorf("Cannot index into a Maybe.\n")
@@ -221,17 +218,11 @@ func (m Maybe) Slice(startInc int, endExc int) (MShellObject, error) {
}
func (m Maybe) ToJson() string {
- if m.obj == nil {
- return "null"
- }
- return m.obj.ToJson()
+ return renderValue(m, flavorJson)
}
func (m Maybe) ToString() string {
- if m.obj == nil {
- return "None"
- }
- return fmt.Sprintf("Just(%s)", m.obj.ToString())
+ return renderValue(m, flavorStr)
}
func (m Maybe) IndexErrStr() string {
@@ -243,24 +234,7 @@ func (m Maybe) Concat(other MShellObject) (MShellObject, error) {
}
func (m Maybe) Equals(other MShellObject) (bool, error) {
- // Maybe values are constructed as *Maybe at runtime, so accept either the
- // value or pointer form — a plain other.(Maybe) misses every *Maybe and
- // would make all Maybe-vs-Maybe comparisons (including None==None) false.
- otherMaybe, ok := asMaybe(other)
- if !ok {
- return false, nil
- }
-
- if m.obj == nil && otherMaybe.obj == nil {
- return true, nil
- }
-
- if m.obj == nil || otherMaybe.obj == nil {
- return false, nil
- }
-
- equal, err := m.obj.Equals(otherMaybe.obj)
- return equal, err
+ return equalsIter(m, other)
}
func (m Maybe) CastString() (string, error) {
@@ -366,54 +340,8 @@ func (e *MShellEnum) TypeName() string { return e.EnumName }
func (e *MShellEnum) IsCommandLineable() bool { return true }
func (e *MShellEnum) IsNumeric() bool { return false }
func (e *MShellEnum) FloatNumeric() float64 { return 0 }
-func (e *MShellEnum) CommandLine() string { return enumRender(e) }
-func (e *MShellEnum) DebugString() string { return enumRender(e) }
-
-// enumRender renders an enum value as `member` (nullary) or
-// `member(p0 p1 ...)`. Nested enum payloads are expanded with an explicit
-// work stack rather than function recursion, so an arbitrarily deep value
-// (e.g. a long `node(node(... ) ...)` chain) cannot overflow the call stack.
-// Non-enum payloads use their own ToString.
-func enumRender(top *MShellEnum) string {
- var sb strings.Builder
- type task struct {
- lit string
- obj MShellObject
- isLit bool
- }
- stack := []task{{obj: top}}
- for len(stack) > 0 {
- t := stack[len(stack)-1]
- stack = stack[:len(stack)-1]
- if t.isLit {
- sb.WriteString(t.lit)
- continue
- }
- en, ok := t.obj.(*MShellEnum)
- if !ok {
- sb.WriteString(t.obj.ToString())
- continue
- }
- if len(en.Payload) == 0 {
- sb.WriteString(en.Member)
- continue
- }
- // Emit `member ( p0 " " p1 ... )`; push reversed so it pops in order.
- seq := make([]task, 0, len(en.Payload)*2+3)
- seq = append(seq, task{lit: en.Member, isLit: true}, task{lit: "(", isLit: true})
- for i, p := range en.Payload {
- if i > 0 {
- seq = append(seq, task{lit: " ", isLit: true})
- }
- seq = append(seq, task{obj: p})
- }
- seq = append(seq, task{lit: ")", isLit: true})
- for i := len(seq) - 1; i >= 0; i-- {
- stack = append(stack, seq[i])
- }
- }
- return sb.String()
-}
+func (e *MShellEnum) CommandLine() string { return renderValue(e, flavorStr) }
+func (e *MShellEnum) DebugString() string { return renderValue(e, flavorStr) }
func (e *MShellEnum) Index(index int) (MShellObject, error) {
return nil, fmt.Errorf("Cannot index into an enum.\n")
@@ -434,106 +362,21 @@ func (e *MShellEnum) Slice(startInc int, endExc int) (MShellObject, error) {
// ToJson uses serde's externally-tagged convention — the de-facto standard for
// tagged unions in JSON: a nullary member is the bare member string; a member
// with a single payload is `{"member": value}`; with several, `{"member":
-// [v0, v1, ...]}`. Like enumRender, nested enum payloads are expanded with an
-// explicit work stack rather than function recursion, so an arbitrarily deep
-// value cannot overflow the call stack; output is appended to a single builder
-// (no intermediate per-subtree strings), making it O(total output size).
-// Non-enum payloads delegate to their own ToJson.
+// [v0, v1, ...]}`. Rendering runs on renderValue's shared work stack, so an
+// arbitrarily deep value cannot overflow the call stack.
func (e *MShellEnum) ToJson() string {
- var sb strings.Builder
- type task struct {
- lit string
- obj MShellObject
- isLit bool
- }
- stack := []task{{obj: e}}
- for len(stack) > 0 {
- t := stack[len(stack)-1]
- stack = stack[:len(stack)-1]
- if t.isLit {
- sb.WriteString(t.lit)
- continue
- }
- en, ok := t.obj.(*MShellEnum)
- if !ok {
- sb.WriteString(t.obj.ToJson())
- continue
- }
- if len(en.Payload) == 0 {
- fmt.Fprintf(&sb, "%q", en.Member)
- continue
- }
- // Emit `{"member": value}` (single payload) or
- // `{"member": [v0, v1, ...]}` (several); push reversed so it pops in
- // order, with enum payloads re-expanded by this same loop.
- seq := make([]task, 0, len(en.Payload)*2+4)
- seq = append(seq, task{lit: fmt.Sprintf("{%q: ", en.Member), isLit: true})
- if len(en.Payload) == 1 {
- seq = append(seq, task{obj: en.Payload[0]})
- } else {
- seq = append(seq, task{lit: "[", isLit: true})
- for i, p := range en.Payload {
- if i > 0 {
- seq = append(seq, task{lit: ", ", isLit: true})
- }
- seq = append(seq, task{obj: p})
- }
- seq = append(seq, task{lit: "]", isLit: true})
- }
- seq = append(seq, task{lit: "}", isLit: true})
- for i := len(seq) - 1; i >= 0; i-- {
- stack = append(stack, seq[i])
- }
- }
- return sb.String()
+ return renderValue(e, flavorJson)
}
-func (e *MShellEnum) ToString() string { return enumRender(e) }
+func (e *MShellEnum) ToString() string { return renderValue(e, flavorStr) }
func (e *MShellEnum) IndexErrStr() string { return "" }
func (e *MShellEnum) Concat(other MShellObject) (MShellObject, error) {
return nil, fmt.Errorf("Cannot concatenate an enum.\n")
}
-// Equals compares two enum values structurally. Nested enum payloads are
-// walked with an explicit pair stack rather than function recursion, so two
-// arbitrarily deep values cannot overflow the call stack; only non-enum
-// payloads (the leaves) delegate to their own Equals.
func (e *MShellEnum) Equals(other MShellObject) (bool, error) {
- type pair struct{ a, b MShellObject }
- var guard dagGuard
- stack := []pair{{a: e, b: other}}
- for len(stack) > 0 {
- p := stack[len(stack)-1]
- stack = stack[:len(stack)-1]
- // Shared substructure: a pointer-identical pair is equal by
- // definition, and a pair this walk already expanded compared equal
- // (see dagGuard). Skipping both keeps DAG-shaped values (a subtree
- // reused twice per level) linear instead of 2^n.
- if sameRef(p.a, p.b) || guard.skip(p.a, p.b) {
- continue
- }
- ea, aok := p.a.(*MShellEnum)
- eb, bok := p.b.(*MShellEnum)
- if aok || bok {
- // At least one side is an enum: equal only if both are enums with
- // the same name, member, and arity. Payloads are deferred onto the
- // stack so this never re-enters Equals on an enum.
- if !aok || !bok || ea.EnumName != eb.EnumName || ea.Member != eb.Member || len(ea.Payload) != len(eb.Payload) {
- return false, nil
- }
- for i := range ea.Payload {
- stack = append(stack, pair{a: ea.Payload[i], b: eb.Payload[i]})
- }
- continue
- }
- // Neither side is an enum: compare by their own equality.
- eq, err := p.a.Equals(p.b)
- if err != nil || !eq {
- return false, err
- }
- }
- return true, nil
+ return equalsIter(e, other)
}
func (e *MShellEnum) CastString() (string, error) { return e.Member, nil }
@@ -662,16 +505,7 @@ func (*MShellDict) CommandLine() string {
// This is meant for things like error messages, should be limited in length to 30 chars or so.
func (d *MShellDict) DebugString() string {
- // TODO: implement this
-
- sb := strings.Builder{}
- sb.WriteString("Dictionary{")
- for key, value := range d.Items {
- sb.WriteString(fmt.Sprintf("%s: %s, ", key, value.DebugString()))
- }
- sb.WriteString("}")
- return sb.String()
-
+ return renderValue(d, flavorDebug)
}
func (*MShellDict) Index(index int) (MShellObject, error) {
return nil, fmt.Errorf("Cannot index into a dictionary.\n")
@@ -687,43 +521,7 @@ func (*MShellDict) Slice(startInc int, endExc int) (MShellObject, error) {
return nil, fmt.Errorf("Cannot slice a dictionary.\n")
}
func (d *MShellDict) ToJson() string {
- var sb strings.Builder
-
- if len(d.Items) == 0 {
- return "{}"
- }
-
- if len(d.Items) == 1 {
- for key, value := range d.Items {
- keyEnc, _ := json.Marshal(key)
- return fmt.Sprintf("{%s: %s}", string(keyEnc), value.ToJson())
- }
- }
-
- keys := make([]string, 0, len(d.Items))
- for key := range d.Items {
- keys = append(keys, key)
- }
- sort.Strings(keys)
-
- sb.WriteString("{")
-
- // Write the first key-value pair
- firstKey := keys[0]
- firstValue := d.Items[firstKey]
-
- firstKeyEnc, _ := json.Marshal(firstKey)
- sb.WriteString(fmt.Sprintf("%s: %s", string(firstKeyEnc), firstValue.ToJson()))
-
- for _, key := range keys[1:] {
- value := d.Items[key]
- keyEnc, _ := json.Marshal(key)
- sb.WriteString(fmt.Sprintf(", %s: %s", string(keyEnc), value.ToJson()))
- }
-
- sb.WriteString("}")
-
- return sb.String()
+ return renderValue(d, flavorJson)
}
func (d *MShellDict) ToString() string { // This is what is used with 'str' command
@@ -739,51 +537,7 @@ func (*MShellDict) Concat(other MShellObject) (MShellObject, error) {
}
func (thisDict *MShellDict) Equals(other MShellObject) (bool, error) {
- thisKeys := make([]string, 0, len(thisDict.Items))
- for key := range thisDict.Items {
- thisKeys = append(thisKeys, key)
- }
- sort.Strings(thisKeys)
-
- otherDict, ok := other.(*MShellDict)
- if !ok {
- return false, nil
- }
-
- otherKeys := make([]string, 0, len(otherDict.Items))
- for key := range otherDict.Items {
- otherKeys = append(otherKeys, key)
- }
- sort.Strings(otherKeys)
-
- if len(thisKeys) != len(otherKeys) {
- return false, nil
- }
-
- for i, key := range thisKeys {
- if key != otherKeys[i] {
- return false, nil
- }
- }
-
- for _, key := range thisKeys {
- thisValue := thisDict.Items[key]
- otherValue := otherDict.Items[key]
-
- if thisValue.TypeName() != otherValue.TypeName() {
- return false, nil
- }
-
- equal, err := thisValue.Equals(otherValue)
- if err != nil {
- return false, err
- }
- if !equal {
- return false, nil
- }
- }
-
- return true, nil
+ return equalsIter(thisDict, other)
}
// This is meant for completely unambiougous conversion to a string value.
@@ -1200,6 +954,242 @@ func refPairKey(a, b MShellObject) (refPair, bool) {
return refPair{}, false
}
+// renderFlavor selects which of a value's three textual forms renderValue
+// emits: flavorStr is ToString (the `str` form), flavorDebug is DebugString
+// (stack dumps, list display), flavorJson is ToJson. Containers pick their
+// children's flavor the same way the per-type methods always did: a list
+// renders children as DebugString, a dict's `str` form is its JSON form, an
+// enum renders payloads with ToString, and Maybe keeps its own flavor.
+type renderFlavor uint8
+
+const (
+ flavorStr renderFlavor = iota
+ flavorDebug
+ flavorJson
+)
+
+type renderTask struct {
+ lit string
+ obj MShellObject
+ flavor renderFlavor
+ isLit bool
+}
+
+func renderLit(s string) renderTask { return renderTask{lit: s, isLit: true} }
+
+// renderJoin builds the task sequence `open item0 sep item1 sep ... close`,
+// rendering each item in the given flavor.
+func renderJoin(open, sep, close string, items []MShellObject, flavor renderFlavor) []renderTask {
+ seq := make([]renderTask, 0, len(items)*2+2)
+ if open != "" {
+ seq = append(seq, renderLit(open))
+ }
+ for i, it := range items {
+ if i > 0 {
+ seq = append(seq, renderLit(sep))
+ }
+ seq = append(seq, renderTask{obj: it, flavor: flavor})
+ }
+ if close != "" {
+ seq = append(seq, renderLit(close))
+ }
+ return seq
+}
+
+// renderValue renders a value in the requested flavor with one explicit work
+// stack instead of method recursion, expanding every container kind — enum,
+// Maybe, list, dict, pipe — inline. Arbitrarily deep values therefore cannot
+// overflow the call stack even when kinds alternate (enum→Maybe→enum, ...),
+// which per-type iterative renderers could not guarantee: each one delegated
+// other kinds to the child's own recursive method. Leaf kinds (scalars,
+// grids, quotations) still render via their own methods; their nesting depth
+// is bounded by their own structure.
+func renderValue(root MShellObject, flavor renderFlavor) string {
+ var sb strings.Builder
+ stack := []renderTask{{obj: root, flavor: flavor}}
+ // push schedules seq to pop in order (reversed onto the LIFO stack).
+ push := func(seq []renderTask) {
+ for i := len(seq) - 1; i >= 0; i-- {
+ stack = append(stack, seq[i])
+ }
+ }
+ for len(stack) > 0 {
+ t := stack[len(stack)-1]
+ stack = stack[:len(stack)-1]
+ if t.isLit {
+ sb.WriteString(t.lit)
+ continue
+ }
+ if m, ok := asMaybe(t.obj); ok {
+ switch {
+ case m.IsNone() && t.flavor == flavorJson:
+ sb.WriteString("null")
+ case m.IsNone():
+ sb.WriteString("None")
+ case t.flavor == flavorJson:
+ push([]renderTask{{obj: m.obj, flavor: flavorJson}})
+ case t.flavor == flavorDebug:
+ push([]renderTask{renderLit("Maybe("), {obj: m.obj, flavor: flavorDebug}, renderLit(")")})
+ default:
+ push([]renderTask{renderLit("Just("), {obj: m.obj, flavor: flavorStr}, renderLit(")")})
+ }
+ continue
+ }
+ switch v := t.obj.(type) {
+ case *MShellEnum:
+ if t.flavor == flavorJson {
+ // serde's externally-tagged convention: a nullary member is
+ // the bare member string, one payload is {"member": value},
+ // several are {"member": [v0, v1, ...]}.
+ if len(v.Payload) == 0 {
+ fmt.Fprintf(&sb, "%q", v.Member)
+ continue
+ }
+ seq := make([]renderTask, 0, len(v.Payload)*2+4)
+ seq = append(seq, renderLit(fmt.Sprintf("{%q: ", v.Member)))
+ if len(v.Payload) == 1 {
+ seq = append(seq, renderTask{obj: v.Payload[0], flavor: flavorJson})
+ } else {
+ seq = append(seq, renderJoin("[", ", ", "]", v.Payload, flavorJson)...)
+ }
+ seq = append(seq, renderLit("}"))
+ push(seq)
+ continue
+ }
+ // `member` (nullary) or `member(p0 p1 ...)`, payloads as ToString.
+ if len(v.Payload) == 0 {
+ sb.WriteString(v.Member)
+ continue
+ }
+ push(renderJoin(v.Member+"(", " ", ")", v.Payload, flavorStr))
+ case *MShellList:
+ if t.flavor == flavorJson {
+ push(renderJoin("[", ", ", "]", v.Items, flavorJson))
+ } else {
+ push(renderJoin("[", " ", "]", v.Items, flavorDebug))
+ }
+ case *MShellPipe:
+ if t.flavor == flavorJson {
+ push(renderJoin("[", ", ", "]", v.List.Items, flavorJson))
+ } else {
+ push(renderJoin("", " | ", "", v.List.Items, flavorDebug))
+ }
+ case *MShellDict:
+ keys := sortedDictKeys(v.Items)
+ if t.flavor == flavorDebug {
+ seq := make([]renderTask, 0, len(keys)*3+2)
+ seq = append(seq, renderLit("Dictionary{"))
+ for _, k := range keys {
+ seq = append(seq, renderLit(k+": "), renderTask{obj: v.Items[k], flavor: flavorDebug}, renderLit(", "))
+ }
+ seq = append(seq, renderLit("}"))
+ push(seq)
+ continue
+ }
+ // The `str` form of a dict is its JSON form.
+ if len(keys) == 0 {
+ sb.WriteString("{}")
+ continue
+ }
+ seq := make([]renderTask, 0, len(keys)*2+2)
+ seq = append(seq, renderLit("{"))
+ for i, k := range keys {
+ keyEnc, _ := json.Marshal(k)
+ if i > 0 {
+ seq = append(seq, renderLit(", "))
+ }
+ seq = append(seq, renderLit(string(keyEnc)+": "), renderTask{obj: v.Items[k], flavor: flavorJson})
+ }
+ seq = append(seq, renderLit("}"))
+ push(seq)
+ default:
+ switch t.flavor {
+ case flavorDebug:
+ sb.WriteString(t.obj.DebugString())
+ case flavorJson:
+ sb.WriteString(t.obj.ToJson())
+ default:
+ sb.WriteString(t.obj.ToString())
+ }
+ }
+ }
+ return sb.String()
+}
+
+// equalsIter is structural equality over any two values, walked with one
+// explicit pair stack that expands every container kind — enum, Maybe, list,
+// dict, pipe — inline, so deep values cannot overflow the call stack even
+// when kinds alternate. Pointer-identical pairs are skipped (equal by
+// definition), and past a step threshold already-expanded pairs are memoized
+// (see dagGuard), so shared substructure cannot blow up exponentially. Leaf
+// kinds compare via their own Equals.
+func equalsIter(a, b MShellObject) (bool, error) {
+ type pair struct{ a, b MShellObject }
+ var guard dagGuard
+ stack := []pair{{a: a, b: b}}
+ for len(stack) > 0 {
+ p := stack[len(stack)-1]
+ stack = stack[:len(stack)-1]
+ if sameRef(p.a, p.b) || guard.skip(p.a, p.b) {
+ continue
+ }
+ if am, aok := asMaybe(p.a); aok {
+ bm, bok := asMaybe(p.b)
+ if !bok || am.IsNone() != bm.IsNone() {
+ return false, nil
+ }
+ if !am.IsNone() {
+ stack = append(stack, pair{a: am.obj, b: bm.obj})
+ }
+ continue
+ }
+ switch av := p.a.(type) {
+ case *MShellEnum:
+ bv, ok := p.b.(*MShellEnum)
+ if !ok || av.EnumName != bv.EnumName || av.Member != bv.Member || len(av.Payload) != len(bv.Payload) {
+ return false, nil
+ }
+ for i := range av.Payload {
+ stack = append(stack, pair{a: av.Payload[i], b: bv.Payload[i]})
+ }
+ case *MShellList:
+ bv, ok := p.b.(*MShellList)
+ if !ok || len(av.Items) != len(bv.Items) {
+ return false, nil
+ }
+ for i := range av.Items {
+ stack = append(stack, pair{a: av.Items[i], b: bv.Items[i]})
+ }
+ case *MShellPipe:
+ bv, ok := p.b.(*MShellPipe)
+ if !ok || len(av.List.Items) != len(bv.List.Items) {
+ return false, nil
+ }
+ for i := range av.List.Items {
+ stack = append(stack, pair{a: av.List.Items[i], b: bv.List.Items[i]})
+ }
+ case *MShellDict:
+ bv, ok := p.b.(*MShellDict)
+ if !ok || len(av.Items) != len(bv.Items) {
+ return false, nil
+ }
+ for key, aval := range av.Items {
+ bval, ok := bv.Items[key]
+ if !ok {
+ return false, nil
+ }
+ stack = append(stack, pair{a: aval, b: bval})
+ }
+ default:
+ eq, err := p.a.Equals(p.b)
+ if err != nil || !eq {
+ return eq, err
+ }
+ }
+ }
+ return true, nil
+}
+
// compareValues returns -1, 0, or 1, giving a total order over every value
// type. Different kinds are ordered by a fixed type rank (valueTypeRank); within
// a kind the natural order is used (numbers numerically with int/float
@@ -1660,19 +1650,6 @@ func (obj MShellFloat) CommandLine() string {
return strconv.FormatFloat(obj.Value, 'f', -1, 64)
}
-// DebugString
-func DebugStrs(objs []MShellObject) []string {
- debugStrs := make([]string, len(objs))
- for i, obj := range objs {
- if obj == nil {
- debugStrs[i] = "nil"
- } else {
- debugStrs[i] = obj.DebugString()
- }
- }
- return debugStrs
-}
-
func (obj MShellLiteral) DebugString() string {
return obj.LiteralText
}
@@ -1706,8 +1683,8 @@ func (obj *MShellQuotation) DebugString() string {
}
func (obj *MShellList) DebugString() string {
- // Join the tokens with a space, surrounded by '[' and ']'
- return "[" + strings.Join(DebugStrs(obj.Items), " ") + "]"
+ // Elements joined with a space, surrounded by '[' and ']'
+ return renderValue(obj, flavorDebug)
}
func cleanStringForTerminal(input string) string {
@@ -1752,8 +1729,8 @@ func (obj MShellPath) DebugString() string {
}
func (obj *MShellPipe) DebugString() string {
- // Join each item with a ' | '
- return strings.Join(DebugStrs(obj.List.Items), " | ")
+ // Each item joined with ' | '
+ return renderValue(obj, flavorDebug)
}
func (obj MShellInt) DebugString() string {
@@ -2249,17 +2226,7 @@ func (obj *MShellQuotation) ToJson() string {
}
func (obj *MShellList) ToJson() string {
- builder := strings.Builder{}
- builder.WriteString("[")
- if len(obj.Items) > 0 {
- builder.WriteString(obj.Items[0].ToJson())
- for _, item := range obj.Items[1:] {
- builder.WriteString(", ")
- builder.WriteString(item.ToJson())
- }
- }
- builder.WriteString("]")
- return builder.String()
+ return renderValue(obj, flavorJson)
}
func (obj MShellString) ToJson() string {
@@ -2492,24 +2459,6 @@ func (obj MShellBool) Equals(other MShellObject) (bool, error) {
return obj.Value == asBool.Value, nil
}
-// itemsEqual compares two object slices element-wise by structural equality.
-func itemsEqual(a, b []MShellObject) (bool, error) {
- if len(a) != len(b) {
- return false, nil
- }
- for i := range a {
- // Pointer-identical elements are equal by definition; skipping them
- // keeps lists with shared substructure from re-walking it.
- if sameRef(a[i], b[i]) {
- continue
- }
- eq, err := a[i].Equals(b[i])
- if err != nil || !eq {
- return eq, err
- }
- }
- return true, nil
-}
func (obj *MShellQuotation) Equals(other MShellObject) (bool, error) {
// Quotations are code values; two are equal only when they are the same
@@ -2519,11 +2468,7 @@ func (obj *MShellQuotation) Equals(other MShellObject) (bool, error) {
}
func (obj *MShellList) Equals(other MShellObject) (bool, error) {
- o, ok := other.(*MShellList)
- if !ok {
- return false, nil
- }
- return itemsEqual(obj.Items, o.Items)
+ return equalsIter(obj, other)
}
func (obj MShellString) Equals(other MShellObject) (bool, error) {
@@ -2555,11 +2500,7 @@ func (obj MShellPath) Equals(other MShellObject) (bool, error) {
}
func (obj *MShellPipe) Equals(other MShellObject) (bool, error) {
- o, ok := other.(*MShellPipe)
- if !ok {
- return false, nil
- }
- return itemsEqual(obj.List.Items, o.List.Items)
+ return equalsIter(obj, other)
}
func (obj MShellInt) Equals(other MShellObject) (bool, error) {
diff --git a/tests/success/enum_alternating_deep.msh b/tests/success/enum_alternating_deep.msh
new file mode 100644
index 00000000..9ae0be88
--- /dev/null
+++ b/tests/success/enum_alternating_deep.msh
@@ -0,0 +1,25 @@
+# Deeply nested values must render, serialize, and compare without overflowing
+# even when container kinds alternate: rendering/JSON/equality all run on one
+# shared work-stack walker (renderValue / equalsIter) that expands enum, Maybe,
+# list, dict, and pipe inline. Per-type iterative walkers were not enough — an
+# enum→Maybe→enum chain re-entered each type's recursive method and overflowed
+# the Go stack well before this depth.
+enum E = m Maybe[E] | z end
+z e!
+0 i!
+(
+ @i 50000 >= if break end
+ @e just m e!
+ @i 1 + i!
+) loop
+@e str len str wl
+@e toJson len str wl
+z e2!
+0 i!
+(
+ @i 50000 >= if break end
+ @e2 just m e2!
+ @i 1 + i!
+) loop
+@e @e2 = str wl
+@e @e2 just m = str wl
diff --git a/tests/success/enum_alternating_deep.msh.stdout b/tests/success/enum_alternating_deep.msh.stdout
new file mode 100644
index 00000000..731afcab
--- /dev/null
+++ b/tests/success/enum_alternating_deep.msh.stdout
@@ -0,0 +1,4 @@
+450001
+350003
+true
+false
From 0d78c14689ab4affe3ed1c64cfafee6293b0961f Mon Sep 17 00:00:00 2001
From: Mitchell Paulus
Date: Thu, 2 Jul 2026 18:58:20 -0500
Subject: [PATCH 22/32] Error on rendering cyclic values instead of hanging
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
A list appended into itself (in-place `append`) is a genuinely cyclic value,
and an enum payload list can close a cycle through the enum
(`enum Box = wrap [Box] | z end`). Rendering one never terminated: the
work-stack renderer re-expanded the same pointer forever, growing output and
task stack without bound (the old recursive renderers at least died fast with
a stack overflow). Equality and sorting already terminate via the
pointer-identity fast path and pair memoization.
mshell is strict — a cycle is always the degenerate artifact of appending a
container into itself, never a meaningful value — so user-facing conversion
now errors: `str` and `toJson` on a cyclic value fail with "Cannot convert a
cyclic value (a container that contains itself) to a string/JSON".
renderValueDetect tracks the containers currently being expanded as an
on-path set (pointer kinds only; a DAG merely revisits a *finished* pointer
and still renders fine), unwinding via exit sentinels. Reaching an on-path
pointer emits a `` marker and reports cycled=true. Internal rendering
(DebugString in error messages, stack dumps) stays total via the marker —
those paths cannot propagate errors and must never hang. The on-path lookup
is gated on the pointer-kind check, since hashing an interface holding an
unhashable dynamic type (MShellBinary, a []byte) panics even on a map read.
Test: tests/fail/cyclic_render.msh (cycle equality terminates, then `str`
errors). Suites green: tests 221, typecheck 208, go test.
Co-Authored-By: Claude Fable 5
Claude-Session: https://claude.ai/code/session_01E3aH9BBud5rjDoHUF8daaJ
---
mshell/Evaluator.go | 11 +++-
mshell/MShellObject.go | 98 +++++++++++++++++++++++------
tests/fail/cyclic_render.msh | 10 +++
tests/fail/cyclic_render.msh.stderr | 1 +
4 files changed, 98 insertions(+), 22 deletions(-)
create mode 100644 tests/fail/cyclic_render.msh
create mode 100644 tests/fail/cyclic_render.msh.stderr
diff --git a/mshell/Evaluator.go b/mshell/Evaluator.go
index 9161a63b..9c220940 100644
--- a/mshell/Evaluator.go
+++ b/mshell/Evaluator.go
@@ -9442,7 +9442,10 @@ func (state *EvalState) evaluateToken(t Token, stack *MShellStack, context Execu
return state.FailWithMessage(fmt.Sprintf("%d:%d: Cannot do 'toJson' operation on an empty stack.\n", t.Line, t.Column))
}
- jsonStr := obj1.ToJson()
+ jsonStr, cycled := renderValueDetect(obj1, flavorJson)
+ if cycled {
+ return state.FailWithMessage(fmt.Sprintf("%d:%d: Cannot convert a cyclic value (a container that contains itself) to JSON.\n", t.Line, t.Column))
+ }
stack.Push(MShellString{jsonStr})
} else if t.Lexeme == "typeof" {
obj1, err := stack.Pop()
@@ -12481,7 +12484,11 @@ func (state *EvalState) evaluateToken(t Token, stack *MShellStack, context Execu
return state.FailWithMessage(fmt.Sprintf("%d:%d: Cannot convert an empty stack to a string.\n", t.Line, t.Column))
}
- stack.Push(MShellString{obj.ToString()})
+ strVal, cycled := renderValueDetect(obj, flavorStr)
+ if cycled {
+ return state.FailWithMessage(fmt.Sprintf("%d:%d: Cannot convert a cyclic value (a container that contains itself) to a string.\n", t.Line, t.Column))
+ }
+ stack.Push(MShellString{strVal})
} else if t.Type == INDEXER { // Token Type
obj1, err := stack.Pop()
if err != nil {
diff --git a/mshell/MShellObject.go b/mshell/MShellObject.go
index 27107aaa..7b5fe552 100644
--- a/mshell/MShellObject.go
+++ b/mshell/MShellObject.go
@@ -973,6 +973,9 @@ type renderTask struct {
obj MShellObject
flavor renderFlavor
isLit bool
+ // isExit marks the sentinel popped after a container's children have
+ // rendered; it removes the container from the on-path cycle set.
+ isExit bool
}
func renderLit(s string) renderTask { return renderTask{lit: s, isLit: true} }
@@ -996,16 +999,47 @@ func renderJoin(open, sep, close string, items []MShellObject, flavor renderFlav
return seq
}
-// renderValue renders a value in the requested flavor with one explicit work
-// stack instead of method recursion, expanding every container kind — enum,
-// Maybe, list, dict, pipe — inline. Arbitrarily deep values therefore cannot
-// overflow the call stack even when kinds alternate (enum→Maybe→enum, ...),
-// which per-type iterative renderers could not guarantee: each one delegated
-// other kinds to the child's own recursive method. Leaf kinds (scalars,
-// grids, quotations) still render via their own methods; their nesting depth
-// is bounded by their own structure.
+// cycleTrackable reports whether obj is a heap container that could sit on a
+// reference cycle (built via in-place list/dict mutation, e.g. a list appended
+// to itself). Only pointer kinds qualify — value kinds are copied and cannot
+// be revisited by identity.
+func cycleTrackable(obj MShellObject) bool {
+ switch obj.(type) {
+ case *MShellEnum, *MShellList, *MShellDict, *Maybe, *MShellPipe:
+ return true
+ }
+ return false
+}
+
+// renderValue renders a value in the requested flavor. It is total: a cyclic
+// value renders with a `` marker at the back-reference, which keeps
+// internal rendering (error messages, stack dumps) from hanging. User-facing
+// operations (`str`, `toJson`) call renderValueDetect instead and report a
+// cyclic value as an error — mshell is strict, so a cycle is always the
+// degenerate result of appending a container into itself, not a value with a
+// meaningful rendering.
func renderValue(root MShellObject, flavor renderFlavor) string {
+ s, _ := renderValueDetect(root, flavor)
+ return s
+}
+
+// renderValueDetect renders a value in the requested flavor with one explicit
+// work stack instead of method recursion, expanding every container kind —
+// enum, Maybe, list, dict, pipe — inline. Arbitrarily deep values therefore
+// cannot overflow the call stack even when kinds alternate (enum→Maybe→enum,
+// ...), which per-type iterative renderers could not guarantee: each one
+// delegated other kinds to the child's own recursive method. Leaf kinds
+// (scalars, grids, quotations) still render via their own methods; their
+// nesting depth is bounded by their own structure.
+//
+// Containers currently being expanded are tracked as an on-path set; reaching
+// one again is a true reference cycle (a DAG merely revisits a finished
+// pointer, which is fine), so the walk emits `` instead of descending
+// and reports cycled=true.
+func renderValueDetect(root MShellObject, flavor renderFlavor) (string, bool) {
var sb strings.Builder
+ cycled := false
+ var onPath map[MShellObject]bool
stack := []renderTask{{obj: root, flavor: flavor}}
// push schedules seq to pop in order (reversed onto the LIFO stack).
push := func(seq []renderTask) {
@@ -1013,6 +1047,18 @@ func renderValue(root MShellObject, flavor renderFlavor) string {
stack = append(stack, seq[i])
}
}
+ // enter marks t.obj as on the current path and schedules its removal
+ // after seq (the container's children) has fully rendered.
+ enter := func(obj MShellObject, seq []renderTask) []renderTask {
+ if !cycleTrackable(obj) {
+ return seq
+ }
+ if onPath == nil {
+ onPath = make(map[MShellObject]bool, 8)
+ }
+ onPath[obj] = true
+ return append(seq, renderTask{obj: obj, isExit: true})
+ }
for len(stack) > 0 {
t := stack[len(stack)-1]
stack = stack[:len(stack)-1]
@@ -1020,6 +1066,18 @@ func renderValue(root MShellObject, flavor renderFlavor) string {
sb.WriteString(t.lit)
continue
}
+ if t.isExit {
+ delete(onPath, t.obj)
+ continue
+ }
+ // Only pointer kinds are ever on the path; the guard also keeps
+ // unhashable dynamic types (MShellBinary, a []byte) away from the
+ // map lookup, which would panic even on a read.
+ if cycleTrackable(t.obj) && onPath[t.obj] {
+ sb.WriteString("")
+ cycled = true
+ continue
+ }
if m, ok := asMaybe(t.obj); ok {
switch {
case m.IsNone() && t.flavor == flavorJson:
@@ -1027,11 +1085,11 @@ func renderValue(root MShellObject, flavor renderFlavor) string {
case m.IsNone():
sb.WriteString("None")
case t.flavor == flavorJson:
- push([]renderTask{{obj: m.obj, flavor: flavorJson}})
+ push(enter(t.obj, []renderTask{{obj: m.obj, flavor: flavorJson}}))
case t.flavor == flavorDebug:
- push([]renderTask{renderLit("Maybe("), {obj: m.obj, flavor: flavorDebug}, renderLit(")")})
+ push(enter(t.obj, []renderTask{renderLit("Maybe("), {obj: m.obj, flavor: flavorDebug}, renderLit(")")}))
default:
- push([]renderTask{renderLit("Just("), {obj: m.obj, flavor: flavorStr}, renderLit(")")})
+ push(enter(t.obj, []renderTask{renderLit("Just("), {obj: m.obj, flavor: flavorStr}, renderLit(")")}))
}
continue
}
@@ -1053,7 +1111,7 @@ func renderValue(root MShellObject, flavor renderFlavor) string {
seq = append(seq, renderJoin("[", ", ", "]", v.Payload, flavorJson)...)
}
seq = append(seq, renderLit("}"))
- push(seq)
+ push(enter(t.obj, seq))
continue
}
// `member` (nullary) or `member(p0 p1 ...)`, payloads as ToString.
@@ -1061,18 +1119,18 @@ func renderValue(root MShellObject, flavor renderFlavor) string {
sb.WriteString(v.Member)
continue
}
- push(renderJoin(v.Member+"(", " ", ")", v.Payload, flavorStr))
+ push(enter(t.obj, renderJoin(v.Member+"(", " ", ")", v.Payload, flavorStr)))
case *MShellList:
if t.flavor == flavorJson {
- push(renderJoin("[", ", ", "]", v.Items, flavorJson))
+ push(enter(t.obj, renderJoin("[", ", ", "]", v.Items, flavorJson)))
} else {
- push(renderJoin("[", " ", "]", v.Items, flavorDebug))
+ push(enter(t.obj, renderJoin("[", " ", "]", v.Items, flavorDebug)))
}
case *MShellPipe:
if t.flavor == flavorJson {
- push(renderJoin("[", ", ", "]", v.List.Items, flavorJson))
+ push(enter(t.obj, renderJoin("[", ", ", "]", v.List.Items, flavorJson)))
} else {
- push(renderJoin("", " | ", "", v.List.Items, flavorDebug))
+ push(enter(t.obj, renderJoin("", " | ", "", v.List.Items, flavorDebug)))
}
case *MShellDict:
keys := sortedDictKeys(v.Items)
@@ -1083,7 +1141,7 @@ func renderValue(root MShellObject, flavor renderFlavor) string {
seq = append(seq, renderLit(k+": "), renderTask{obj: v.Items[k], flavor: flavorDebug}, renderLit(", "))
}
seq = append(seq, renderLit("}"))
- push(seq)
+ push(enter(t.obj, seq))
continue
}
// The `str` form of a dict is its JSON form.
@@ -1101,7 +1159,7 @@ func renderValue(root MShellObject, flavor renderFlavor) string {
seq = append(seq, renderLit(string(keyEnc)+": "), renderTask{obj: v.Items[k], flavor: flavorJson})
}
seq = append(seq, renderLit("}"))
- push(seq)
+ push(enter(t.obj, seq))
default:
switch t.flavor {
case flavorDebug:
@@ -1113,7 +1171,7 @@ func renderValue(root MShellObject, flavor renderFlavor) string {
}
}
}
- return sb.String()
+ return sb.String(), cycled
}
// equalsIter is structural equality over any two values, walked with one
diff --git a/tests/fail/cyclic_render.msh b/tests/fail/cyclic_render.msh
new file mode 100644
index 00000000..ba23816f
--- /dev/null
+++ b/tests/fail/cyclic_render.msh
@@ -0,0 +1,10 @@
+# mshell is strict: a cyclic value (a container appended into itself) is a
+# degenerate artifact of in-place mutation, so converting one to a string or
+# JSON is an error rather than a hang. Equality and sorting on cyclic values
+# still terminate (pointer-identity fast path + pair memoization).
+enum Box = wrap [Box] | z end
+[] x!
+@x wrap e!
+@x @e append drop
+@e dup = str wl
+@e str wl
diff --git a/tests/fail/cyclic_render.msh.stderr b/tests/fail/cyclic_render.msh.stderr
new file mode 100644
index 00000000..f45c549a
--- /dev/null
+++ b/tests/fail/cyclic_render.msh.stderr
@@ -0,0 +1 @@
+10:4: Cannot convert a cyclic value (a container that contains itself) to a string.
From da5f6a9ebac16758bf8d06a5fbf98e7efc150090 Mon Sep 17 00:00:00 2001
From: Mitchell Paulus
Date: Thu, 2 Jul 2026 19:38:50 -0500
Subject: [PATCH 23/32] Infer the match subject inside quotations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
A `match` as the body of an inferred quotation always failed the checker
with "stack underflow at 'match' (match subject)" even though it runs
fine — rejecting the canonical way to consume enums:
[ 1 leaf 2 leaf ] (match leaf n : @n, node a b : 0, end) map
checkMatchBlock errored on an empty stack unconditionally; other constructs
(operators, `if`) participate in quote-body inference, where applySig
responds to underflow by synthesizing fresh input vars. The failure predates
enums (Maybe matches in map had it too), but match is the enum eliminator,
so `(match ...) map/filter/each` never type-checking bit constantly.
Two pieces:
- Under inference, an empty stack at `match` synthesizes the subject exactly
as applySig's underflow path does (bottom of stack, front of inferInputs),
so the quotation infers a one-input signature.
- A subject that is still an unresolved var is pinned from the first arm
pattern that names a type — an enum member determines its enum (member
names are global), `just`/`none` determine Maybe[fresh]. Pinning happens
before the entry branch is captured, so every arm's analysis, payload
bindings, and the exhaustiveness check see the resolved subject (per-arm
substitution checkpoints would roll back a per-site pin). Value literals
and type keywords deliberately do not pin: a type-keyword match may be
discriminating a union, which pinning would wrongly narrow — those
matches still check via their wildcard arm.
Exhaustiveness now works inside quotations too: a match that omits a member
is rejected, and a pinned enum quotation applied to a list of a different
element type fails overload resolution as it should.
Tests: tests/success/enum_match_in_quote.msh (enum map/filter/each, Maybe,
value literals) and tests/typecheck_fail/enum_match_in_quote_nonexhaustive.msh.
Suites green: tests 222, typecheck 210, go test.
Co-Authored-By: Claude Fable 5
Claude-Session: https://claude.ai/code/session_01E3aH9BBud5rjDoHUF8daaJ
---
mshell/TypeCheckProgram.go | 61 +++++++++++++++++--
tests/success/enum_match_in_quote.msh | 16 +++++
tests/success/enum_match_in_quote.msh.stdout | 6 ++
.../enum_match_in_quote_nonexhaustive.msh | 4 ++
4 files changed, 81 insertions(+), 6 deletions(-)
create mode 100644 tests/success/enum_match_in_quote.msh
create mode 100644 tests/success/enum_match_in_quote.msh.stdout
create mode 100644 tests/typecheck_fail/enum_match_in_quote_nonexhaustive.msh
diff --git a/mshell/TypeCheckProgram.go b/mshell/TypeCheckProgram.go
index 2d01bf9a..d5750877 100644
--- a/mshell/TypeCheckProgram.go
+++ b/mshell/TypeCheckProgram.go
@@ -1241,12 +1241,21 @@ func formatPatternItem(it MShellParseItem) string {
func (c *Checker) checkMatchBlock(matchBlock *MShellParseMatchBlock) {
startTok := matchBlock.GetStartToken()
if c.stack.Len() == 0 {
- c.errors = append(c.errors, TypeError{
- Kind: TErrStackUnderflow,
- Pos: startTok,
- Hint: "match subject",
- })
- return
+ if !c.inferring {
+ c.errors = append(c.errors, TypeError{
+ Kind: TErrStackUnderflow,
+ Pos: startTok,
+ Hint: "match subject",
+ })
+ return
+ }
+ // Quote-body inference: the subject is the quote's own input.
+ // Synthesize a fresh var exactly as applySig's underflow path does —
+ // at the bottom of the stack and the front of inferInputs — so
+ // `(match ... end) map` infers a one-input quote instead of erroring.
+ v := c.subst.FreshVar(c.arena)
+ c.inferInputs = append([]TypeId{v}, c.inferInputs...)
+ c.stack.items = append([]TypeId{v}, c.stack.items...)
}
// Widen a string-literal subject to `str`: match arms and the
// exhaustiveness check compare against `str` by type id, and the literal
@@ -1261,6 +1270,16 @@ func (c *Checker) checkMatchBlock(matchBlock *MShellParseMatchBlock) {
if resolved := c.subst.Apply(c.arena, subject); c.arena.Node(resolved).Kind == TKBrand {
subject = c.underlying(resolved)
}
+ // An unresolved subject (a quote input under inference) is pinned from the
+ // first arm pattern that names a type: an enum member determines its enum
+ // (member names are global) and `just`/`none` determine Maybe. Pinning
+ // happens before the entry branch is captured so every arm and the
+ // exhaustiveness check see the resolved subject.
+ if c.arena.Node(c.subst.Apply(c.arena, subject)).Kind == TKVar {
+ if pin, ok := c.matchSubjectPin(matchBlock); ok {
+ c.unify(subject, pin)
+ }
+ }
entry := c.captureBranch()
if len(matchBlock.Arms) == 0 {
@@ -1317,6 +1336,36 @@ func (c *Checker) checkMatchBlock(matchBlock *MShellParseMatchBlock) {
c.reconcileArmBranches(armBranches, armLabels, entry, startTok)
}
+// matchSubjectPin returns the concrete type a match's arm patterns determine
+// for an as-yet-unresolved subject. An enum-member head names its enum (member
+// names are unique across enums), and a `just`/`none` head names Maybe[T] with
+// a fresh T. ok=false when no arm determines a type — value literals and type
+// keywords deliberately do not pin, since a type-keyword match may be
+// discriminating a union and pinning would wrongly narrow the input.
+func (c *Checker) matchSubjectPin(matchBlock *MShellParseMatchBlock) (TypeId, bool) {
+ for _, arm := range matchBlock.Arms {
+ if len(arm.Pattern) == 0 {
+ continue
+ }
+ tok, ok := arm.Pattern[0].(Token)
+ if !ok || tok.Type != LITERAL {
+ continue
+ }
+ if tok.Lexeme == "just" || tok.Lexeme == "none" {
+ return c.arena.MakeMaybe(c.subst.FreshVar(c.arena)), true
+ }
+ mid := c.names.Intern(tok.Lexeme)
+ if _, isMember := c.enumMemberToks[mid]; isMember {
+ // A member's constructor sig has the enum as its only output.
+ sigs := c.nameBuiltins[mid]
+ if len(sigs) > 0 && len(sigs[0].Outputs) == 1 && c.arena.Node(sigs[0].Outputs[0]).Kind == TKEnum {
+ return sigs[0].Outputs[0], true
+ }
+ }
+ }
+ return TidNothing, false
+}
+
// armPattern is the single interpretation of a match arm pattern. One
// analysis feeds all four consumers that used to re-pattern-match the
// arm independently: recognition diagnostics (Recognized), the
diff --git a/tests/success/enum_match_in_quote.msh b/tests/success/enum_match_in_quote.msh
new file mode 100644
index 00000000..ac91746f
--- /dev/null
+++ b/tests/success/enum_match_in_quote.msh
@@ -0,0 +1,16 @@
+# A `match` may be the body of an inferred quotation: the checker synthesizes
+# the quote's input as the match subject (like any other underflow under
+# inference) and pins it from the first arm that names a type — an enum member
+# determines its enum, `just`/`none` determine Maybe. This is the canonical
+# way to consume a list of enum values.
+enum T = leaf int | node T T end
+[ 1 leaf 2 leaf ] (match leaf n : @n, node a b : 0, end) map (str) map "," join wl
+
+enum C = red | green | blue end
+[ red green blue ] (match red : true, green : false, blue : true, end) filter len str wl
+
+[ 5 just none ] (match just v : @v, none : 0, end) map (str) map "," join wl
+
+[1 2 3] (match 1 : "one", _ : "other", end) map "," join wl
+
+[ red green ] (match red : "r" wl, green : "g" wl, blue : "b" wl, end) each
diff --git a/tests/success/enum_match_in_quote.msh.stdout b/tests/success/enum_match_in_quote.msh.stdout
new file mode 100644
index 00000000..8bd43448
--- /dev/null
+++ b/tests/success/enum_match_in_quote.msh.stdout
@@ -0,0 +1,6 @@
+1,2
+2
+5,0
+one,other,other
+r
+g
diff --git a/tests/typecheck_fail/enum_match_in_quote_nonexhaustive.msh b/tests/typecheck_fail/enum_match_in_quote_nonexhaustive.msh
new file mode 100644
index 00000000..0b96d55d
--- /dev/null
+++ b/tests/typecheck_fail/enum_match_in_quote_nonexhaustive.msh
@@ -0,0 +1,4 @@
+# Exhaustiveness is enforced inside inferred quotations too: the subject is
+# pinned to the member's enum, so a match that omits a member is rejected.
+enum C = red | green | blue end
+[ red ] (match red : 1, green : 2, end) map drop
From 88d8de32970a73b36b5f82c5ed127f1bab401dbb Mon Sep 17 00:00:00 2001
From: Mitchell Paulus
Date: Thu, 2 Jul 2026 20:02:33 -0500
Subject: [PATCH 24/32] Dedupe identical pairs at push time: fix exponential
cliff past the memo cap
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Comparing two independently built self-doubling DAGs deeper than dagMemoCap
(2^18 levels) hung: the cap's "stop inserting" overflow policy left every
level beyond 262144 un-memoized, and each un-memoized level doubles the
walk. Measured: depth 200k compared in 1s, depth 300k ran effectively
forever (2^38000 work).
A first attempt — clearing the memo generationally on overflow — did NOT
fix it (also measured): the pending duplicate for each upper level pops
only after the entire subtree between, so the working set spans all levels
and defeats any bounded memo regardless of eviction policy.
The structural fix: deduplicate pointer-identical pairs when a container
pushes its children. A self-doubling value (`@t @t node`, `[ @x @x ]`)
expands to the SAME pair twice; pushing it once makes the whole family
linear at any depth with no memo involvement — equality at depth 300k/600k/
4M now runs in 1.1s/2.1s/14.4s (linear), and sort past the cap likewise.
Applied in equalsIter (pushPairsDedup for enum payloads, list and pipe
elements) and in compareValues' enum and list arms (skipping is sound in
both walks: an identical pointer pair contributes equal/0). The generational
clear is kept — the memo still covers cross-parent duplicate pairs
(diamond-shaped sharing) up to the cap — and its comment now states
honestly what it does and does not defend against.
Extends tests/success/enum_dag_equality.msh with a 300k-deep (past-cap)
independent-DAG comparison. Suites green: tests 222, typecheck 210, go test.
Co-Authored-By: Claude Fable 5
Claude-Session: https://claude.ai/code/session_01E3aH9BBud5rjDoHUF8daaJ
---
mshell/MShellObject.go | 62 ++++++++++++++++------
tests/success/enum_dag_equality.msh | 12 +++++
tests/success/enum_dag_equality.msh.stdout | 1 +
3 files changed, 60 insertions(+), 15 deletions(-)
diff --git a/mshell/MShellObject.go b/mshell/MShellObject.go
index 7b5fe552..663f5da5 100644
--- a/mshell/MShellObject.go
+++ b/mshell/MShellObject.go
@@ -926,9 +926,18 @@ func (g *dagGuard) skip(a, b MShellObject) bool {
if g.memo[key] {
return true
}
- if len(g.memo) < dagMemoCap {
- g.memo[key] = true
+ // Generational overflow: when the memo is full, clear it and keep
+ // inserting rather than stopping (stopping would freeze the memo on the
+ // walk's earliest pairs). Note the memo is NOT the defense against
+ // self-doubling values — a walk whose pending-duplicate working set
+ // exceeds the cap defeats any bounded memo (measured, not theorized).
+ // That family is handled structurally by push-time pair dedup
+ // (pushPairsDedup); the memo covers cross-parent duplicate pairs
+ // (diamond-shaped sharing) up to the cap.
+ if len(g.memo) >= dagMemoCap {
+ g.memo = make(map[refPair]bool, 1024)
}
+ g.memo[key] = true
return false
}
@@ -1181,10 +1190,30 @@ func renderValueDetect(root MShellObject, flavor renderFlavor) (string, bool) {
// definition), and past a step threshold already-expanded pairs are memoized
// (see dagGuard), so shared substructure cannot blow up exponentially. Leaf
// kinds compare via their own Equals.
+type eqPair struct{ a, b MShellObject }
+
+// pushPairsDedup pushes element-wise comparison pairs, skipping a pair that is
+// pointer-identical to the one just pushed. A self-doubling value
+// (`@t @t node`, `[ @x @x ]`) expands to the SAME pair twice; pushing it once
+// makes that whole family linear at any depth, with no reliance on the
+// bounded dagGuard memo (whose eviction cannot cover a working set larger
+// than its cap).
+func pushPairsDedup(stack []eqPair, as, bs []MShellObject) []eqPair {
+ var lastA, lastB MShellObject
+ for i := range as {
+ ca, cb := as[i], bs[i]
+ if i > 0 && sameRef(ca, lastA) && sameRef(cb, lastB) {
+ continue
+ }
+ lastA, lastB = ca, cb
+ stack = append(stack, eqPair{a: ca, b: cb})
+ }
+ return stack
+}
+
func equalsIter(a, b MShellObject) (bool, error) {
- type pair struct{ a, b MShellObject }
var guard dagGuard
- stack := []pair{{a: a, b: b}}
+ stack := []eqPair{{a: a, b: b}}
for len(stack) > 0 {
p := stack[len(stack)-1]
stack = stack[:len(stack)-1]
@@ -1197,7 +1226,7 @@ func equalsIter(a, b MShellObject) (bool, error) {
return false, nil
}
if !am.IsNone() {
- stack = append(stack, pair{a: am.obj, b: bm.obj})
+ stack = append(stack, eqPair{a: am.obj, b: bm.obj})
}
continue
}
@@ -1207,25 +1236,19 @@ func equalsIter(a, b MShellObject) (bool, error) {
if !ok || av.EnumName != bv.EnumName || av.Member != bv.Member || len(av.Payload) != len(bv.Payload) {
return false, nil
}
- for i := range av.Payload {
- stack = append(stack, pair{a: av.Payload[i], b: bv.Payload[i]})
- }
+ stack = pushPairsDedup(stack, av.Payload, bv.Payload)
case *MShellList:
bv, ok := p.b.(*MShellList)
if !ok || len(av.Items) != len(bv.Items) {
return false, nil
}
- for i := range av.Items {
- stack = append(stack, pair{a: av.Items[i], b: bv.Items[i]})
- }
+ stack = pushPairsDedup(stack, av.Items, bv.Items)
case *MShellPipe:
bv, ok := p.b.(*MShellPipe)
if !ok || len(av.List.Items) != len(bv.List.Items) {
return false, nil
}
- for i := range av.List.Items {
- stack = append(stack, pair{a: av.List.Items[i], b: bv.List.Items[i]})
- }
+ stack = pushPairsDedup(stack, av.List.Items, bv.List.Items)
case *MShellDict:
bv, ok := p.b.(*MShellDict)
if !ok || len(av.Items) != len(bv.Items) {
@@ -1236,7 +1259,7 @@ func equalsIter(a, b MShellObject) (bool, error) {
if !ok {
return false, nil
}
- stack = append(stack, pair{a: aval, b: bval})
+ stack = append(stack, eqPair{a: aval, b: bval})
}
default:
eq, err := p.a.Equals(p.b)
@@ -1350,6 +1373,11 @@ func compareValues(a, b MShellObject) int {
n := min(len(av.Items), len(bl.Items))
stack = append(stack, task{lit: cmpInt(len(av.Items), len(bl.Items)), isLit: true})
for i := n - 1; i >= 0; i-- {
+ // Skip a pair pointer-identical to its neighbor: it compares 0
+ // and would double the walk on self-doubling values.
+ if i > 0 && sameRef(av.Items[i], av.Items[i-1]) && sameRef(bl.Items[i], bl.Items[i-1]) {
+ continue
+ }
stack = append(stack, task{a: av.Items[i], b: bl.Items[i]})
}
case *MShellDict:
@@ -1368,6 +1396,10 @@ func compareValues(a, b MShellObject) int {
n := min(len(av.Payload), len(be.Payload))
stack = append(stack, task{lit: cmpInt(len(av.Payload), len(be.Payload)), isLit: true})
for i := n - 1; i >= 0; i-- {
+ // Skip a pair pointer-identical to its neighbor (see list arm).
+ if i > 0 && sameRef(av.Payload[i], av.Payload[i-1]) && sameRef(be.Payload[i], be.Payload[i-1]) {
+ continue
+ }
stack = append(stack, task{a: av.Payload[i], b: be.Payload[i]})
}
// Name and member (declaration order) compare before any payload.
diff --git a/tests/success/enum_dag_equality.msh b/tests/success/enum_dag_equality.msh
index 08b677e2..ebb8d5e9 100644
--- a/tests/success/enum_dag_equality.msh
+++ b/tests/success/enum_dag_equality.msh
@@ -32,3 +32,15 @@ enum T = leaf int | node T T end
@a @c = str wl
[ @a @c @b ] sort len str wl
[ @a @c @b ] uniq len str wl
+
+# Past the dagGuard memo cap (2^18): self-doubling pairs are deduplicated at
+# push time, so independent DAGs deeper than the cap stay linear — a bounded
+# memo alone cannot cover a pending-duplicate working set larger than itself.
+0 leaf p! 0 leaf q! 0 i!
+(
+ @i 300000 >= if break end
+ @p @p node p!
+ @q @q node q!
+ @i 1 + i!
+) loop
+@p @q = str wl
diff --git a/tests/success/enum_dag_equality.msh.stdout b/tests/success/enum_dag_equality.msh.stdout
index e102c904..1654dd67 100644
--- a/tests/success/enum_dag_equality.msh.stdout
+++ b/tests/success/enum_dag_equality.msh.stdout
@@ -6,3 +6,4 @@ true
false
3
2
+true
From e7658a77a57ffd48adc713bcd41c7e6fef3ecf8b Mon Sep 17 00:00:00 2001
From: Mitchell Paulus
Date: Thu, 2 Jul 2026 20:11:11 -0500
Subject: [PATCH 25/32] Dedupe identical pairs in the dict comparison arms too
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The push-time pair dedup that closed the exponential cliff for self-doubling
values covered enum payloads, lists, and pipes — but not the two dict arms.
A dict-shaped doubling value ({ "l": @d, "r": @d } per level, or the same
structure through an enum's {str: E} payload) pushed the identical pointer
pair once per key and re-opened the cliff: equality at depth 200k ran in
1.4s, depth 300k (past the memo cap) hung. Measured, same boundary as the
list/enum case.
equalsIter's dict arm now skips a pair pointer-identical to the last one it
pushed, and compareValues' dict arm skips the identical *value* pair while
always keeping the key comparison. Dict-DAG equality at 300k/600k now runs
1.5s/3.3s (linear), sort works, and a deep unequal pair is still detected.
Extends tests/success/enum_dag_equality.msh with a 300k dict-payload DAG.
Suites green: tests 222, typecheck 210, go test.
Co-Authored-By: Claude Fable 5
Claude-Session: https://claude.ai/code/session_01E3aH9BBud5rjDoHUF8daaJ
---
mshell/MShellObject.go | 21 ++++++++++++++++++++-
tests/success/enum_dag_equality.msh | 13 +++++++++++++
tests/success/enum_dag_equality.msh.stdout | 1 +
3 files changed, 34 insertions(+), 1 deletion(-)
diff --git a/mshell/MShellObject.go b/mshell/MShellObject.go
index 663f5da5..8c8ffeb6 100644
--- a/mshell/MShellObject.go
+++ b/mshell/MShellObject.go
@@ -1254,11 +1254,21 @@ func equalsIter(a, b MShellObject) (bool, error) {
if !ok || len(av.Items) != len(bv.Items) {
return false, nil
}
+ // Track the last pushed pair to skip consecutive identical ones —
+ // the dict-shaped self-doubling value ({ "l": @d, "r": @d })
+ // pushes the same pointer pair once per key, and without dedup
+ // that doubles the walk per level (same cliff pushPairsDedup
+ // closes for lists and enum payloads).
+ var lastA, lastB MShellObject
for key, aval := range av.Items {
bval, ok := bv.Items[key]
if !ok {
return false, nil
}
+ if sameRef(aval, lastA) && sameRef(bval, lastB) {
+ continue
+ }
+ lastA, lastB = aval, bval
stack = append(stack, eqPair{a: aval, b: bval})
}
default:
@@ -1386,9 +1396,18 @@ func compareValues(a, b MShellObject) int {
bk := sortedDictKeys(bd.Items)
n := min(len(ak), len(bk))
stack = append(stack, task{lit: cmpInt(len(ak), len(bk)), isLit: true})
+ var lastVA, lastVB MShellObject
for i := n - 1; i >= 0; i-- {
// Pushed so `key compare` pops before its `value compare`.
- stack = append(stack, task{a: av.Items[ak[i]], b: bd.Items[bk[i]]})
+ // A value pair pointer-identical to the neighboring key's is
+ // skipped (it compares 0) — the dict-shaped self-doubling
+ // value would otherwise double the walk per level. The key
+ // comparison itself always stays.
+ va, vb := av.Items[ak[i]], bd.Items[bk[i]]
+ if !(sameRef(va, lastVA) && sameRef(vb, lastVB)) {
+ stack = append(stack, task{a: va, b: vb})
+ lastVA, lastVB = va, vb
+ }
stack = append(stack, task{lit: strings.Compare(ak[i], bk[i]), isLit: true})
}
case *MShellEnum:
diff --git a/tests/success/enum_dag_equality.msh b/tests/success/enum_dag_equality.msh
index ebb8d5e9..ab1ac4d7 100644
--- a/tests/success/enum_dag_equality.msh
+++ b/tests/success/enum_dag_equality.msh
@@ -44,3 +44,16 @@ enum T = leaf int | node T T end
@i 1 + i!
) loop
@p @q = str wl
+
+# Dict-shaped self-doubling past the cap: the dict arms dedupe consecutive
+# identical value pairs the same way (an enum with a dict payload closes the
+# same doubling structure through {str: E}).
+enum D = md {str: D} | zd end
+zd u! zd v! 0 i!
+(
+ @i 300000 >= if break end
+ { "l": @u, "r": @u } md u!
+ { "l": @v, "r": @v } md v!
+ @i 1 + i!
+) loop
+@u @v = str wl
diff --git a/tests/success/enum_dag_equality.msh.stdout b/tests/success/enum_dag_equality.msh.stdout
index 1654dd67..877a2259 100644
--- a/tests/success/enum_dag_equality.msh.stdout
+++ b/tests/success/enum_dag_equality.msh.stdout
@@ -7,3 +7,4 @@ false
3
2
true
+true
From 6341f99acf5aa655a73f4688397ab00db2746049 Mon Sep 17 00:00:00 2001
From: Mitchell Paulus
Date: Thu, 2 Jul 2026 20:21:34 -0500
Subject: [PATCH 26/32] Make the comparison pair-memo unbounded: end the DAG
blowup class globally
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Four successive fixes (memo cap, generational eviction, push-dedup for
enum/list/pipe, then dict arms) each closed one self-doubling pattern and
left the next container or sharing shape exponential past the cap — a
non-consecutive pattern like [x y x] per level still hung at depth 300k.
The chain of same-shaped patches was defending the wrong invariant: the
memo cap itself.
The memo is now unbounded. Every revisited pointer pair memo-hits, so ANY
sharing pattern — consecutive, alternating, cross-parent, any container
mix, any depth — is polynomial in actual heap nodes. There is no boundary
left to probe.
The memory trade is proportionate, not abstract: the memo only activates
past the step threshold (2^19), so ordinary comparisons never allocate,
and a comparison big enough to grow a large memo already holds operands
larger than the memo. Measured: the pathological 4M-deep linear compare
(every pair distinct — the case the cap was protecting) runs 21.9s at
2.6GB peak, of which the two operands are over half; the alternating
[x y x] DAG at 300k, which defeated every previous patch, compares in
3.0s. Push-time dedup stays as a constant-factor fast path (it also
avoids the pre-threshold spin for shallow doubling), but is no longer
load-bearing for termination.
Extends tests/success/enum_dag_equality.msh with the non-consecutive
alternating pattern at 300k. Suites green: tests 222, typecheck 210,
go test.
Co-Authored-By: Claude Fable 5
Claude-Session: https://claude.ai/code/session_01E3aH9BBud5rjDoHUF8daaJ
---
mshell/MShellObject.go | 29 +++++++++++-----------
tests/success/enum_dag_equality.msh | 13 ++++++++++
tests/success/enum_dag_equality.msh.stdout | 1 +
3 files changed, 28 insertions(+), 15 deletions(-)
diff --git a/mshell/MShellObject.go b/mshell/MShellObject.go
index 8c8ffeb6..3c66c248 100644
--- a/mshell/MShellObject.go
+++ b/mshell/MShellObject.go
@@ -895,9 +895,9 @@ func sameRef(a, b MShellObject) bool {
// if a duplicate pops at all, its subtree already compared equal.
//
// Ordinary comparisons never allocate: below the step threshold the guard is
-// one integer increment. The memo is capped so a legitimately huge linear
-// value (millions of distinct pairs, no repeats) cannot balloon memory; a
-// blowup DAG has few distinct pairs and fits far below the cap.
+// one integer increment. Past it, the memo grows without bound — see skip for
+// why an unbounded memo is the correct trade (a bounded one turns "assumption
+// exceeded" into exponential time).
type dagGuard struct {
steps int
memo map[refPair]bool
@@ -906,11 +906,21 @@ type dagGuard struct {
type refPair struct{ a, b MShellObject }
const dagStepThreshold = 1 << 19
-const dagMemoCap = 1 << 18
// skip reports whether this pair was already expanded earlier in the walk.
// Call once per popped pair; it records the pair (past the threshold) so
// later duplicates skip.
+//
+// The memo is deliberately UNBOUNDED. Every revisited pointer pair memo-hits,
+// which makes a comparison polynomial in actual heap nodes for any sharing
+// pattern — self-doubling, alternating, cross-parent diamonds, any container
+// mix, any depth. Earlier versions capped the memo to bound memory and then
+// patched the resulting exponential cliffs case by case (generational
+// eviction, per-container dedup); any bounded memo loses to a working set
+// larger than its bound (measured, not theorized), so no cap. Memory tracks
+// pairs actually walked: it activates only past the step threshold, so
+// ordinary comparisons never allocate, and a comparison large enough to build
+// a big memo already holds operands larger than the memo itself.
func (g *dagGuard) skip(a, b MShellObject) bool {
g.steps++
if g.steps < dagStepThreshold {
@@ -926,17 +936,6 @@ func (g *dagGuard) skip(a, b MShellObject) bool {
if g.memo[key] {
return true
}
- // Generational overflow: when the memo is full, clear it and keep
- // inserting rather than stopping (stopping would freeze the memo on the
- // walk's earliest pairs). Note the memo is NOT the defense against
- // self-doubling values — a walk whose pending-duplicate working set
- // exceeds the cap defeats any bounded memo (measured, not theorized).
- // That family is handled structurally by push-time pair dedup
- // (pushPairsDedup); the memo covers cross-parent duplicate pairs
- // (diamond-shaped sharing) up to the cap.
- if len(g.memo) >= dagMemoCap {
- g.memo = make(map[refPair]bool, 1024)
- }
g.memo[key] = true
return false
}
diff --git a/tests/success/enum_dag_equality.msh b/tests/success/enum_dag_equality.msh
index ab1ac4d7..064f00a6 100644
--- a/tests/success/enum_dag_equality.msh
+++ b/tests/success/enum_dag_equality.msh
@@ -57,3 +57,16 @@ zd u! zd v! 0 i!
@i 1 + i!
) loop
@u @v = str wl
+
+# Class closure: a NON-consecutive alternating sharing pattern ([x y x] per
+# level) defeats push-time dedup entirely and, past any bounded memo, every
+# such pattern re-explodes — so the pair memo is unbounded. Any sharing
+# pattern, any container mix, any depth is polynomial in actual nodes.
+[0] xa! [1] ya! [0] xb! [1] yb! 0 i!
+(
+ @i 300000 >= if break end
+ [ @xa @ya @xa ] t! [ @ya @xa @ya ] ya! @t xa!
+ [ @xb @yb @xb ] t! [ @yb @xb @yb ] yb! @t xb!
+ @i 1 + i!
+) loop
+@xa @xb = str wl
diff --git a/tests/success/enum_dag_equality.msh.stdout b/tests/success/enum_dag_equality.msh.stdout
index 877a2259..f7d26a30 100644
--- a/tests/success/enum_dag_equality.msh.stdout
+++ b/tests/success/enum_dag_equality.msh.stdout
@@ -8,3 +8,4 @@ false
2
true
true
+true
From 2a326e369c2ff5c4380a1ae96ad0e3238982a32d Mon Sep 17 00:00:00 2001
From: Mitchell Paulus
Date: Thu, 2 Jul 2026 21:50:42 -0500
Subject: [PATCH 27/32] Collapse the pointer-kind switches and drop the
superseded push-dedup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The comparison/render walkers carried three hand-maintained "which types
are pointers" switches (sameRef, refPairKey, cycleTrackable) that had
already drifted apart. Replace all three with one isRefKind predicate
that checks the dynamic kind via reflection: "is a pointer" is exactly
the property every site cares about (heap identity ⇒ can share
substructure, can cycle, safe to compare and use as a map key), and a
newly added pointer kind is covered with no list to keep in sync.
Also delete the push-time neighbor-dedup machinery (pushPairsDedup plus
three hand-rolled copies in the dict/list/enum arms). It was written to
compensate for the bounded dagGuard memo — its own comment still cited
the memo's eviction — but the memo is unbounded now, which subsumes the
whole trick: below the step threshold duplicate expansion is capped by
the threshold itself, past it every repeated pair memo-hits. Timing on
the DAG stress tests is unchanged.
Net -71 lines, no behavior change.
Co-Authored-By: Claude Fable 5
---
mshell/MShellObject.go | 163 ++++++++++++-----------------------------
1 file changed, 46 insertions(+), 117 deletions(-)
diff --git a/mshell/MShellObject.go b/mshell/MShellObject.go
index 3c66c248..aa060fad 100644
--- a/mshell/MShellObject.go
+++ b/mshell/MShellObject.go
@@ -7,6 +7,7 @@ import (
"fmt"
"golang.org/x/net/html"
"os"
+ "reflect"
"regexp"
"slices"
"sort"
@@ -852,42 +853,37 @@ func sortedDictKeys(m map[string]MShellObject) []string {
return keys
}
-// sameRef reports whether a and b are the identical heap object, for the kinds
-// that can form shared substructure (a value built as `@t @t node` reuses one
-// subtree twice). A pointer-identical pair is equal by definition, so equality
-// and ordering walks skip it instead of expanding it — without this, walking a
-// value with n levels of sharing costs 2^n. Only pointer kinds are compared:
-// comparing interfaces holding non-comparable dynamic types (e.g. MShellBinary,
-// a []byte) panics at runtime.
+// isRefKind reports whether obj's dynamic type is a pointer — a value with
+// heap identity. Only these kinds can form shared substructure or reference
+// cycles, and only these are safe in interface comparisons and as map keys: a
+// value kind may wrap a non-comparable type (MShellBinary, a []byte), which
+// panics at runtime. Checking the dynamic kind instead of enumerating types
+// means a newly added pointer kind is covered with no list to keep in sync.
+func isRefKind(obj MShellObject) bool {
+ return obj != nil && reflect.TypeOf(obj).Kind() == reflect.Pointer
+}
+
+// sameRef reports whether a and b are the identical heap object (a value built
+// as `@t @t node` reuses one subtree twice). A pointer-identical pair is equal
+// by definition, so equality and ordering walks skip it instead of expanding
+// it — without this, walking a value with n levels of sharing costs 2^n.
+// Interface equality is safe here: if b's dynamic type differs from a's the
+// comparison is false without inspecting values, and if it matches, isRefKind
+// guarantees it is a comparable pointer type.
func sameRef(a, b MShellObject) bool {
- switch av := a.(type) {
- case *MShellEnum:
- bv, ok := b.(*MShellEnum)
- return ok && av == bv
- case *MShellList:
- bv, ok := b.(*MShellList)
- return ok && av == bv
- case *MShellDict:
- bv, ok := b.(*MShellDict)
- return ok && av == bv
- case *Maybe:
- bv, ok := b.(*Maybe)
- return ok && av == bv
- case *MShellDateTime:
- bv, ok := b.(*MShellDateTime)
- return ok && av == bv
- case *MShellQuotation:
- bv, ok := b.(*MShellQuotation)
- return ok && av == bv
- }
- return false
+ return isRefKind(a) && a == b
}
// dagGuard bounds a comparison walk over values with shared substructure that
-// sameRef alone cannot catch: two *independently built* DAGs share no pointers
-// across operands, so every level re-expands and the walk goes exponential.
-// The guard counts pops; once a walk runs long enough to suggest blowup, it
-// memoizes the pointer pairs it has already expanded and skips repeats.
+// sameRef alone cannot catch: whenever the two operands are not the same
+// pointer (a value compared against an independently built copy, or two
+// distinct subtrees each shared internally), repeated substructure produces
+// repeated *pairs*, every one re-expands, and the walk goes exponential. The
+// guard counts pops; once a walk runs long enough to suggest blowup, it
+// memoizes the pointer pairs it has already expanded and skips repeats. This
+// is the single mechanism for the whole blowup class: duplicate expansion
+// below the threshold is capped by the threshold itself, and past it every
+// repeated pair memo-hits.
//
// Skipping a repeated pair is sound in a LIFO walk: the first occurrence's
// entire expansion resolves before any later duplicate (which sat lower in the
@@ -926,10 +922,12 @@ func (g *dagGuard) skip(a, b MShellObject) bool {
if g.steps < dagStepThreshold {
return false
}
- key, ok := refPairKey(a, b)
- if !ok {
+ // Only pointer kinds get keys: repeated pairs of anything else cannot
+ // cause blowup, and only pointers are guaranteed comparable as map keys.
+ if !isRefKind(a) || !isRefKind(b) {
return false
}
+ key := refPair{a, b}
if g.memo == nil {
g.memo = make(map[refPair]bool, 1024)
}
@@ -940,28 +938,6 @@ func (g *dagGuard) skip(a, b MShellObject) bool {
return false
}
-// refPairKey returns a comparable identity key when both values are the same
-// container pointer kind — the kinds whose repeated pairs cause blowup.
-// Interface keys are only safe when the dynamic values are comparable, which
-// pointers are; scalar kinds are cheap to compare directly and get no key.
-func refPairKey(a, b MShellObject) (refPair, bool) {
- switch a.(type) {
- case *MShellEnum:
- if _, ok := b.(*MShellEnum); ok {
- return refPair{a, b}, true
- }
- case *MShellList:
- if _, ok := b.(*MShellList); ok {
- return refPair{a, b}, true
- }
- case *MShellDict:
- if _, ok := b.(*MShellDict); ok {
- return refPair{a, b}, true
- }
- }
- return refPair{}, false
-}
-
// renderFlavor selects which of a value's three textual forms renderValue
// emits: flavorStr is ToString (the `str` form), flavorDebug is DebugString
// (stack dumps, list display), flavorJson is ToJson. Containers pick their
@@ -1007,18 +983,6 @@ func renderJoin(open, sep, close string, items []MShellObject, flavor renderFlav
return seq
}
-// cycleTrackable reports whether obj is a heap container that could sit on a
-// reference cycle (built via in-place list/dict mutation, e.g. a list appended
-// to itself). Only pointer kinds qualify — value kinds are copied and cannot
-// be revisited by identity.
-func cycleTrackable(obj MShellObject) bool {
- switch obj.(type) {
- case *MShellEnum, *MShellList, *MShellDict, *Maybe, *MShellPipe:
- return true
- }
- return false
-}
-
// renderValue renders a value in the requested flavor. It is total: a cyclic
// value renders with a `` marker at the back-reference, which keeps
// internal rendering (error messages, stack dumps) from hanging. User-facing
@@ -1056,9 +1020,11 @@ func renderValueDetect(root MShellObject, flavor renderFlavor) (string, bool) {
}
}
// enter marks t.obj as on the current path and schedules its removal
- // after seq (the container's children) has fully rendered.
+ // after seq (the container's children) has fully rendered. Only pointer
+ // kinds are tracked — value kinds are copied and cannot be revisited by
+ // identity, so they cannot sit on a reference cycle.
enter := func(obj MShellObject, seq []renderTask) []renderTask {
- if !cycleTrackable(obj) {
+ if !isRefKind(obj) {
return seq
}
if onPath == nil {
@@ -1081,7 +1047,7 @@ func renderValueDetect(root MShellObject, flavor renderFlavor) (string, bool) {
// Only pointer kinds are ever on the path; the guard also keeps
// unhashable dynamic types (MShellBinary, a []byte) away from the
// map lookup, which would panic even on a read.
- if cycleTrackable(t.obj) && onPath[t.obj] {
+ if isRefKind(t.obj) && onPath[t.obj] {
sb.WriteString("")
cycled = true
continue
@@ -1191,21 +1157,12 @@ func renderValueDetect(root MShellObject, flavor renderFlavor) (string, bool) {
// kinds compare via their own Equals.
type eqPair struct{ a, b MShellObject }
-// pushPairsDedup pushes element-wise comparison pairs, skipping a pair that is
-// pointer-identical to the one just pushed. A self-doubling value
-// (`@t @t node`, `[ @x @x ]`) expands to the SAME pair twice; pushing it once
-// makes that whole family linear at any depth, with no reliance on the
-// bounded dagGuard memo (whose eviction cannot cover a working set larger
-// than its cap).
-func pushPairsDedup(stack []eqPair, as, bs []MShellObject) []eqPair {
- var lastA, lastB MShellObject
+// pushPairs pushes element-wise comparison pairs onto the walk stack.
+// Duplicate pairs from shared substructure are not filtered here; the
+// dagGuard memo handles them (see dagGuard).
+func pushPairs(stack []eqPair, as, bs []MShellObject) []eqPair {
for i := range as {
- ca, cb := as[i], bs[i]
- if i > 0 && sameRef(ca, lastA) && sameRef(cb, lastB) {
- continue
- }
- lastA, lastB = ca, cb
- stack = append(stack, eqPair{a: ca, b: cb})
+ stack = append(stack, eqPair{a: as[i], b: bs[i]})
}
return stack
}
@@ -1235,39 +1192,29 @@ func equalsIter(a, b MShellObject) (bool, error) {
if !ok || av.EnumName != bv.EnumName || av.Member != bv.Member || len(av.Payload) != len(bv.Payload) {
return false, nil
}
- stack = pushPairsDedup(stack, av.Payload, bv.Payload)
+ stack = pushPairs(stack, av.Payload, bv.Payload)
case *MShellList:
bv, ok := p.b.(*MShellList)
if !ok || len(av.Items) != len(bv.Items) {
return false, nil
}
- stack = pushPairsDedup(stack, av.Items, bv.Items)
+ stack = pushPairs(stack, av.Items, bv.Items)
case *MShellPipe:
bv, ok := p.b.(*MShellPipe)
if !ok || len(av.List.Items) != len(bv.List.Items) {
return false, nil
}
- stack = pushPairsDedup(stack, av.List.Items, bv.List.Items)
+ stack = pushPairs(stack, av.List.Items, bv.List.Items)
case *MShellDict:
bv, ok := p.b.(*MShellDict)
if !ok || len(av.Items) != len(bv.Items) {
return false, nil
}
- // Track the last pushed pair to skip consecutive identical ones —
- // the dict-shaped self-doubling value ({ "l": @d, "r": @d })
- // pushes the same pointer pair once per key, and without dedup
- // that doubles the walk per level (same cliff pushPairsDedup
- // closes for lists and enum payloads).
- var lastA, lastB MShellObject
for key, aval := range av.Items {
bval, ok := bv.Items[key]
if !ok {
return false, nil
}
- if sameRef(aval, lastA) && sameRef(bval, lastB) {
- continue
- }
- lastA, lastB = aval, bval
stack = append(stack, eqPair{a: aval, b: bval})
}
default:
@@ -1382,11 +1329,6 @@ func compareValues(a, b MShellObject) int {
n := min(len(av.Items), len(bl.Items))
stack = append(stack, task{lit: cmpInt(len(av.Items), len(bl.Items)), isLit: true})
for i := n - 1; i >= 0; i-- {
- // Skip a pair pointer-identical to its neighbor: it compares 0
- // and would double the walk on self-doubling values.
- if i > 0 && sameRef(av.Items[i], av.Items[i-1]) && sameRef(bl.Items[i], bl.Items[i-1]) {
- continue
- }
stack = append(stack, task{a: av.Items[i], b: bl.Items[i]})
}
case *MShellDict:
@@ -1395,18 +1337,9 @@ func compareValues(a, b MShellObject) int {
bk := sortedDictKeys(bd.Items)
n := min(len(ak), len(bk))
stack = append(stack, task{lit: cmpInt(len(ak), len(bk)), isLit: true})
- var lastVA, lastVB MShellObject
for i := n - 1; i >= 0; i-- {
// Pushed so `key compare` pops before its `value compare`.
- // A value pair pointer-identical to the neighboring key's is
- // skipped (it compares 0) — the dict-shaped self-doubling
- // value would otherwise double the walk per level. The key
- // comparison itself always stays.
- va, vb := av.Items[ak[i]], bd.Items[bk[i]]
- if !(sameRef(va, lastVA) && sameRef(vb, lastVB)) {
- stack = append(stack, task{a: va, b: vb})
- lastVA, lastVB = va, vb
- }
+ stack = append(stack, task{a: av.Items[ak[i]], b: bd.Items[bk[i]]})
stack = append(stack, task{lit: strings.Compare(ak[i], bk[i]), isLit: true})
}
case *MShellEnum:
@@ -1414,10 +1347,6 @@ func compareValues(a, b MShellObject) int {
n := min(len(av.Payload), len(be.Payload))
stack = append(stack, task{lit: cmpInt(len(av.Payload), len(be.Payload)), isLit: true})
for i := n - 1; i >= 0; i-- {
- // Skip a pair pointer-identical to its neighbor (see list arm).
- if i > 0 && sameRef(av.Payload[i], av.Payload[i-1]) && sameRef(be.Payload[i], be.Payload[i-1]) {
- continue
- }
stack = append(stack, task{a: av.Payload[i], b: be.Payload[i]})
}
// Name and member (declaration order) compare before any payload.
From 2e2f4b6d2bd1ac3ef2eddeb44b5cf74e105c9ff5 Mon Sep 17 00:00:00 2001
From: Mitchell Paulus
Date: Thu, 2 Jul 2026 21:51:02 -0500
Subject: [PATCH 28/32] Error on duplicate definition names instead of silently
ignoring them
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Definition lookup is first-match-wins over [stdlib, init, script], so a
second def of a name never took effect — it was silently dead code while
the first definition kept running. A script "redefining" a stdlib word,
or an interactive redefinition, quietly did nothing; and since the type
checker registered the duplicate as an overload, a call could type-check
against the dead def's signature while the runtime executed the other
body. (Found via tests/success/enum_recursive_generic.msh, whose local
`def id` was shadowed by std.msh's id all along; renamed to `ident`.)
Reject duplicates everywhere instead:
- Runtime: FindDuplicateDefinition checks startup loading, script
startup+file assembly, and each interactive input line (which is
rejected with the session continuing). The error reports both
positions.
- Checker: def registration records every definition name (mirroring
the enum-member collision check) and rejects a repeat in both
RegisterStdlibSigs and CheckProgram, so --type-check-only and the
LSP report it too. A stdlib def records its name even when its sig
defers to a table builtin (the 2unpack case) — runtime lookup still
resolves to it. Def-shadowing-builtins stays legal; std.msh does
that on purpose.
- Lexer: makeToken now stamps Token.TokenFile, which was wired through
the lexer but never assigned. Cross-file collisions can then name the
other file: "already defined at lib/std.msh:62:5". No existing test
fixture is affected; stdin input still formats as bare line:col.
Co-Authored-By: Claude Fable 5
---
CHANGELOG.md | 11 +++++++
doc/mshell.md | 5 +++
mshell/Evaluator.go | 32 +++++++++++++++++++
mshell/Lexer.go | 1 +
mshell/Main.go | 17 ++++++++++
mshell/TypeCheckProgram.go | 28 ++++++++++++++++
mshell/TypeChecker.go | 8 +++++
tests/fail/duplicate_def.msh | 5 +++
tests/fail/duplicate_def.msh.stderr | 1 +
tests/success/enum_recursive_generic.msh | 4 +--
tests/typecheck_fail/duplicate_def.msh | 4 +++
tests/typecheck_fail/duplicate_def_stdlib.msh | 4 +++
12 files changed, 118 insertions(+), 2 deletions(-)
create mode 100644 tests/fail/duplicate_def.msh
create mode 100644 tests/fail/duplicate_def.msh.stderr
create mode 100644 tests/typecheck_fail/duplicate_def.msh
create mode 100644 tests/typecheck_fail/duplicate_def_stdlib.msh
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 13281322..92a51b19 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## Unreleased
+### Changed
+
+- Breaking: defining a name that is already defined is now an error, at runtime
+ and in the type checker. This covers a second `def` in the same file, a
+ script `def` whose name is already taken by the standard library or an init
+ file, and an interactive redefinition. Definition lookup is first-match-wins,
+ so a duplicate never took effect anyway — it was silently dead code while the
+ first definition kept running; the error makes that visible. The message
+ reports both positions: `Duplicate definition 'id'; already defined at
+ lib/std.msh:62:5.`
+
### Fixed
- `gridSetCell` no longer silently drops a value whose type differs from the
diff --git a/doc/mshell.md b/doc/mshell.md
index c666c2f1..07f0b174 100644
--- a/doc/mshell.md
+++ b/doc/mshell.md
@@ -714,6 +714,11 @@ end
Metadata values must be static: strings (single or double quoted), integers, floats, booleans, or nested lists/dicts of the same. Interpolated strings are not allowed.
+Definition names must be unique.
+Defining a name that is already defined — by the same file, the standard library, or an init file — is an error,
+both at runtime and in the type checker.
+(Lookup is first-match-wins, so a duplicate would never take effect; the error makes that visible.)
+
### Tail-Call Optimization
Recursive definitions in tail position are optimized to avoid stack overflow.
diff --git a/mshell/Evaluator.go b/mshell/Evaluator.go
index 9c220940..eda38f95 100644
--- a/mshell/Evaluator.go
+++ b/mshell/Evaluator.go
@@ -475,6 +475,38 @@ func (state *EvalState) lookupDefinition(definitions []MShellDefinition, name st
return definitions[i], true
}
+// tokenPosStr formats a token's position as `path:line:col` (or `line:col`
+// when the source file is unknown, e.g. stdin) for use in error messages.
+func tokenPosStr(t Token) string {
+ if t.TokenFile != nil && t.TokenFile.Path != "" {
+ return fmt.Sprintf("%s:%d:%d", t.TokenFile.Path, t.Line, t.Column)
+ }
+ return fmt.Sprintf("%d:%d", t.Line, t.Column)
+}
+
+// FindDuplicateDefinition scans the given definition slices, in order, and
+// returns an error for the first name defined twice. Definition lookup is
+// first-match-wins, so a second definition of a name is never an override —
+// it would be silently dead code. Erroring keeps a script (or init file, or
+// interactive input) from redefining a name already taken by the standard
+// library, an earlier startup file, or itself.
+func FindDuplicateDefinition(defLists ...[]MShellDefinition) error {
+ seen := make(map[string]Token)
+ for _, defs := range defLists {
+ for i := range defs {
+ name := defs[i].Name
+ prev, exists := seen[name]
+ if !exists {
+ seen[name] = defs[i].NameToken
+ continue
+ }
+ return fmt.Errorf("%s: Duplicate definition '%s'; already defined at %s.\n",
+ tokenPosStr(defs[i].NameToken), name, tokenPosStr(prev))
+ }
+ }
+ return nil
+}
+
func (state *EvalState) AddCompletionDefinitions(definitions []MShellDefinition) {
if state.CompletionDefinitions == nil {
state.CompletionDefinitions = make(map[string][]MShellDefinition)
diff --git a/mshell/Lexer.go b/mshell/Lexer.go
index 7b6bf50c..66eee988 100644
--- a/mshell/Lexer.go
+++ b/mshell/Lexer.go
@@ -400,6 +400,7 @@ func (l *Lexer) makeToken(tokenType TokenType) Token {
Start: l.start,
Lexeme: lexeme,
Type: tokenType,
+ TokenFile: l.tokenFile,
}
}
diff --git a/mshell/Main.go b/mshell/Main.go
index ef9b9dca..d01ff600 100644
--- a/mshell/Main.go
+++ b/mshell/Main.go
@@ -181,6 +181,9 @@ func loadStartupFile(path string, description string, stack *MShellStack, contex
}
*definitions = append(*definitions, parsedFile.Definitions...)
+ if err := FindDuplicateDefinition(*definitions); err != nil {
+ return fmt.Errorf("error loading %s at %s: %w", description, path, err)
+ }
state.AddCompletionDefinitions(parsedFile.Definitions)
// Register enum constructors declared in this startup file, and retain the
// top-level items so the type checker can register the file's `type` and
@@ -861,6 +864,14 @@ func main() {
}
}
+ // Definition lookup is first-match-wins, so a script def whose name is
+ // already taken (by the stdlib, the init file, or the script itself)
+ // would be silently dead code. Reject it instead.
+ if err := FindDuplicateDefinition(allDefinitions); err != nil {
+ fmt.Fprint(os.Stderr, err.Error())
+ os.Exit(1)
+ }
+
if len(file.Items) == 0 {
os.Exit(0)
}
@@ -2980,6 +2991,12 @@ func (state *TermState) ExecuteCurrentCommand() (bool, int) {
term.Restore(state.stdInFd, &state.oldState)
if len(parsed.Definitions) > 0 {
+ // Definition lookup is first-match-wins, so a redefinition would be
+ // silently ignored rather than take effect; reject the input instead.
+ if err := FindDuplicateDefinition(state.stdLibDefs, parsed.Definitions); err != nil {
+ fmt.Fprint(os.Stderr, err.Error())
+ goto PromptPrint
+ }
state.stdLibDefs = append(state.stdLibDefs, parsed.Definitions...)
state.evalState.AddCompletionDefinitions(parsed.Definitions)
}
diff --git a/mshell/TypeCheckProgram.go b/mshell/TypeCheckProgram.go
index d5750877..2c0a0c3b 100644
--- a/mshell/TypeCheckProgram.go
+++ b/mshell/TypeCheckProgram.go
@@ -87,6 +87,12 @@ func (c *Checker) RegisterStdlibSigs(defs []MShellDefinition) {
for i := range defs {
def := &defs[i]
nameId := c.names.Intern(def.Name)
+ // Record the name even when the sig registration below is skipped:
+ // the runtime's first-match-wins lookup still resolves to this def,
+ // so a later def of the same name is a duplicate regardless.
+ if c.recordDefName(nameId, def) {
+ continue
+ }
if _, exists := c.nameBuiltins[nameId]; exists {
continue
}
@@ -95,6 +101,25 @@ func (c *Checker) RegisterStdlibSigs(defs []MShellDefinition) {
}
}
+// recordDefName registers a definition's name for duplicate detection. If the
+// name is already taken by an earlier definition, it records an error and
+// returns true (mirroring the runtime's FindDuplicateDefinition, where the
+// first definition wins and a duplicate would be silently dead code).
+func (c *Checker) recordDefName(nameId NameId, def *MShellDefinition) bool {
+ if prev, exists := c.defNameToks[nameId]; exists {
+ c.errors = append(c.errors, TypeError{
+ Kind: TErrTypeParse, Pos: def.NameToken,
+ Hint: "duplicate definition '" + def.Name + "'; already defined at " + tokenPosStr(prev),
+ })
+ return true
+ }
+ if c.defNameToks == nil {
+ c.defNameToks = make(map[NameId]Token)
+ }
+ c.defNameToks[nameId] = def.NameToken
+ return false
+}
+
// RegisterStartupTypes registers the `type` and `enum` declarations found in
// the startup files' top-level items (the stdlib, then the user init file),
// so the checked program sees the same declarations the runtime does. It runs
@@ -173,6 +198,9 @@ func (c *Checker) CheckProgram(file *MShellFile) {
})
continue
}
+ if c.recordDefName(nameId, def) {
+ continue
+ }
c.nameBuiltins[nameId] = append(c.nameBuiltins[nameId], sig)
}
// Pre-pass 3: type-check each def body against its declared sig.
diff --git a/mshell/TypeChecker.go b/mshell/TypeChecker.go
index e856c400..2bb712cd 100644
--- a/mshell/TypeChecker.go
+++ b/mshell/TypeChecker.go
@@ -109,6 +109,14 @@ type Checker struct {
// existing def or builtin.
enumMemberToks map[NameId]Token
+ // defNameToks records every registered definition name (value: the def's
+ // name token). Runtime definition lookup is first-match-wins, so a second
+ // def of a name is silently dead code, not an override; def registration
+ // checks this and rejects the duplicate, mirroring the runtime's
+ // FindDuplicateDefinition. Stdlib/init defs register before file defs,
+ // so a script redefining a stdlib name is caught too.
+ defNameToks map[NameId]Token
+
// Quote-body inference state (Phase 7). When inferring is true,
// applySig responds to stack underflow by synthesizing fresh type
// variables instead of reporting an error; those vars accumulate
diff --git a/tests/fail/duplicate_def.msh b/tests/fail/duplicate_def.msh
new file mode 100644
index 00000000..831b2e97
--- /dev/null
+++ b/tests/fail/duplicate_def.msh
@@ -0,0 +1,5 @@
+# Defining the same name twice is an error: definition lookup is
+# first-match-wins, so the second def would be silently dead code.
+def greet (-- str) "hi" end
+def greet (-- str) "yo" end
+greet wl
diff --git a/tests/fail/duplicate_def.msh.stderr b/tests/fail/duplicate_def.msh.stderr
new file mode 100644
index 00000000..74ff8249
--- /dev/null
+++ b/tests/fail/duplicate_def.msh.stderr
@@ -0,0 +1 @@
+4:5: Duplicate definition 'greet'; already defined at 3:5.
diff --git a/tests/success/enum_recursive_generic.msh b/tests/success/enum_recursive_generic.msh
index 1f2773dd..51fbf6c3 100644
--- a/tests/success/enum_recursive_generic.msh
+++ b/tests/success/enum_recursive_generic.msh
@@ -4,7 +4,7 @@
# payload (`node Tree Tree`) forever and overflows the stack.
enum Tree = leaf int | node Tree Tree end
-def id (q -- q) end
+def ident (q -- q) end
-3 leaf id drop
+3 leaf ident drop
"ok" wl
diff --git a/tests/typecheck_fail/duplicate_def.msh b/tests/typecheck_fail/duplicate_def.msh
new file mode 100644
index 00000000..0455839f
--- /dev/null
+++ b/tests/typecheck_fail/duplicate_def.msh
@@ -0,0 +1,4 @@
+# A name defined twice in one file is rejected by the checker.
+def greet (-- str) "hi" end
+def greet (-- str) "yo" end
+greet wl
diff --git a/tests/typecheck_fail/duplicate_def_stdlib.msh b/tests/typecheck_fail/duplicate_def_stdlib.msh
new file mode 100644
index 00000000..c8015faf
--- /dev/null
+++ b/tests/typecheck_fail/duplicate_def_stdlib.msh
@@ -0,0 +1,4 @@
+# Redefining a name already defined by the standard library is rejected:
+# lookup is first-match-wins, so this def could never take effect.
+def id (q -- q) end
+3 id drop
From 3b7686002dfc2d94d9663e3ca2470c149e56ca5e Mon Sep 17 00:00:00 2001
From: Mitchell Paulus
Date: Fri, 3 Jul 2026 10:01:53 -0500
Subject: [PATCH 29/32] PR polish: CHANGELOG gaps and a comment overclaim
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Add the two user-facing branch changes missing from Unreleased: the
cyclic-value render error (str/toJson error instead of hanging) and
match type-checking inside inferred quotations ((match ...) map).
Also correct compareValues' doc comment: compare-0 coincides with
Equals only for orderable kinds — quotations and grids share a rank
and compare 0 while Equals still distinguishes them.
Co-Authored-By: Claude Fable 5
---
CHANGELOG.md | 8 ++++++++
mshell/MShellObject.go | 8 ++++++--
2 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 92a51b19..4bab1cdc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -31,6 +31,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
types yields `false` rather than an error (a genuinely incompatible
comparison is already a static type error), so the result no longer depends
on operand order and union members like `int | null` compare cleanly.
+- Converting a cyclic value (a container appended into itself) with `str` or
+ `toJson` now fails with a clear error instead of hanging forever. Internal
+ rendering (error messages, stack dumps) prints a `` marker at the
+ back-reference instead.
+- A `match` used as the body of an inferred quotation (e.g.
+ `(match leaf n : @n, node a b : 0, end) map`) now type-checks; it previously
+ always failed with "stack underflow at 'match'", rejecting the canonical way
+ to consume enums and Maybe values inside `map`/`filter`/`each`.
- `uniq` now accepts a list of any value type (matching its `([t] -- [t])`
signature) and deduplicates by structural equality, instead of throwing at
runtime for non-primitive elements such as enums, dicts, and booleans.
diff --git a/mshell/MShellObject.go b/mshell/MShellObject.go
index aa060fad..54ed3d17 100644
--- a/mshell/MShellObject.go
+++ b/mshell/MShellObject.go
@@ -1233,8 +1233,12 @@ func equalsIter(a, b MShellObject) (bool, error) {
// interleaved, text lexically, dates chronologically, bytes bytewise).
// Structured values compare lexicographically: lists positionally (shorter
// prefix first), dicts by sorted key then value, enums by name then declaration
-// order then payloads. The order agrees with structural equality: compareValues
-// returns 0 exactly when the two values are Equals.
+// order then payloads. For those kinds the order agrees with structural
+// equality: compareValues returns 0 exactly when the two values are Equals.
+// Unorderable kinds (quotation, grid, ...) are the exception — they share a
+// rank and always compare 0, so a stable sort preserves their original order,
+// while Equals still distinguishes them (identity for quotations, cell-wise
+// for grids).
//
// The comparison is driven by an explicit work stack rather than recursion, so
// arbitrarily deep values (e.g. a long `node(node(...))` enum chain) cannot
From fc7ba60c50ecd09120001993014e87eeef7cdbb8 Mon Sep 17 00:00:00 2001
From: Mitchell Paulus
Date: Fri, 3 Jul 2026 10:08:42 -0500
Subject: [PATCH 30/32] Syntax highlighting: cover the enum keyword and new int
literals
- doc/base.html: style mshellENUM (new this branch) and mshellTYPE
(pre-existing gap) with the other declaration keywords, so
--html-highlighted enum/type snippets render like def/match/end.
- Sublime: add enum, type, and match to the keyword list, and 0o/0b
integer literal patterns alongside the existing hex one.
The VS Code grammar highlights no keywords at all (pre-existing
design), so it needs no enum entry to stay consistent.
Co-Authored-By: Claude Fable 5
---
doc/base.html | 2 +-
sublime/msh.sublime-syntax | 6 +++++-
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/doc/base.html b/doc/base.html
index 6e5491ee..2ea961f7 100644
--- a/doc/base.html
+++ b/doc/base.html
@@ -27,7 +27,7 @@
color: #0000FF;
}
- .mshellIF, .mshellELSE, .mshellELSESTAR, .mshellSTARIF, .mshellEND, .mshellDEF, .mshellMATCH {
+ .mshellIF, .mshellELSE, .mshellELSESTAR, .mshellSTARIF, .mshellEND, .mshellDEF, .mshellMATCH, .mshellENUM, .mshellTYPE {
color: #0F4C81;
font-weight: bold;
}
diff --git a/sublime/msh.sublime-syntax b/sublime/msh.sublime-syntax
index 965d4832..3f9d4a07 100644
--- a/sublime/msh.sublime-syntax
+++ b/sublime/msh.sublime-syntax
@@ -26,7 +26,7 @@ contexts:
scope: keyword.control.msh
- match: '\\*if'
scope: keyword.control.msh
- - match: '\\b(def|end|if|iff|loop|read|str|break|continue|else)\\b'
+ - match: '\\b(def|end|if|iff|loop|read|str|break|continue|else|match|enum|type)\\b'
scope: keyword.control.msh
- match: '\\b(and|or|not)\\b'
scope: keyword.operator.word.msh
@@ -48,6 +48,10 @@ contexts:
numbers:
- match: '\\b0[xX][0-9A-Fa-f]+\\b'
scope: constant.numeric.integer.hex.msh
+ - match: '\\b0[oO][0-7]+\\b'
+ scope: constant.numeric.integer.octal.msh
+ - match: '\\b0[bB][01]+\\b'
+ scope: constant.numeric.integer.binary.msh
- match: '\\b\\d+\\.\\d*(?:[eE][+-]?\\d+)?\\b'
scope: constant.numeric.float.msh
- match: '\\b\\d+(?:[eE][+-]?\\d+)?\\b'
From b28b00aa8758bbe5476ca090c823134823464156 Mon Sep 17 00:00:00 2001
From: Mitchell Paulus
Date: Fri, 3 Jul 2026 11:41:33 -0500
Subject: [PATCH 31/32] VS Code grammar: highlight keywords, word operators,
types, numbers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The grammar only highlighted comments, booleans, strings, and
variables — no keyword rules at all, so def/end/match and the new
enum/type rendered as plain text. Add the same rule set as the
Sublime grammar: control keywords (including enum and type and the
else*/*if forms), and/or/not, soe, the int/float/bool type names,
and numeric literals including the new 0x/0o/0b integer forms.
Rules are placed after the variable patterns so a same-position tie
like `str!` keeps resolving to the variable-store rule.
Co-Authored-By: Claude Fable 5
---
code/syntaxes/mshell.textmate.json | 40 ++++++++++++++++++++++++++++++
1 file changed, 40 insertions(+)
diff --git a/code/syntaxes/mshell.textmate.json b/code/syntaxes/mshell.textmate.json
index b7bfb655..678189e1 100644
--- a/code/syntaxes/mshell.textmate.json
+++ b/code/syntaxes/mshell.textmate.json
@@ -45,6 +45,46 @@
{
"name": "variable.other.set.mshell",
"match": "@[a-zA-Z0-9_]+"
+ },
+ {
+ "name": "keyword.control.mshell",
+ "match": "else\\*|\\*if"
+ },
+ {
+ "name": "keyword.control.mshell",
+ "match": "\\b(def|end|if|iff|loop|read|str|break|continue|else|match|enum|type)\\b"
+ },
+ {
+ "name": "keyword.operator.word.mshell",
+ "match": "\\b(and|or|not)\\b"
+ },
+ {
+ "name": "keyword.other.mshell",
+ "match": "\\bsoe\\b"
+ },
+ {
+ "name": "storage.type.mshell",
+ "match": "\\b(int|float|bool)\\b"
+ },
+ {
+ "name": "constant.numeric.integer.hex.mshell",
+ "match": "\\b0[xX][0-9A-Fa-f]+\\b"
+ },
+ {
+ "name": "constant.numeric.integer.octal.mshell",
+ "match": "\\b0[oO][0-7]+\\b"
+ },
+ {
+ "name": "constant.numeric.integer.binary.mshell",
+ "match": "\\b0[bB][01]+\\b"
+ },
+ {
+ "name": "constant.numeric.float.mshell",
+ "match": "\\b\\d+\\.\\d*(?:[eE][+-]?\\d+)?\\b"
+ },
+ {
+ "name": "constant.numeric.integer.mshell",
+ "match": "\\b\\d+(?:[eE][+-]?\\d+)?\\b"
}
],
"repository": {
From 7b7e03ea4935491a41cce133a62e33b584d97857 Mon Sep 17 00:00:00 2001
From: Mitchell Paulus
Date: Fri, 3 Jul 2026 20:15:21 -0500
Subject: [PATCH 32/32] Bring the enum design doc up to the shipped design
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The doc predated several final decisions and still read as a proposal.
Update it to be the accurate decision record:
- Status header: implemented (V1), with a map of what shipped vs. what
stayed future work.
- Fix examples that no longer parse: `read`/`write` are lexer keywords
and cannot be member names; the §9/§10.A examples were missing the
mandatory `end` terminator the doc itself decided on in §6.
- §8: the shipped name-resolution rule replaced the draft's
CmdResult.ok qualification — member names are globally unique and
member/def collisions are declaration errors in both directions.
- §9: record the shipped serialization (externally-tagged toJson,
member(payload) str form), structural equality, and declaration-order
sorting.
- §10: per-tier status (A shipped; shape payload types shipped, their
destructuring sugar not; B/D/E/F future).
- §11: rewrite the end-to-end example against the shipped feature — an
explicit parseMode boundary word instead of the unimplemented
Mode.decode/Mode.values string backing. Example verified to run and
type-check.
- §12: open questions annotated with how each resolved.
Design-dir edit explicitly requested (doc was AI-authored).
Co-Authored-By: Claude Fable 5
---
design/literal_or_enum_typing.html | 157 ++++++++++++++++++-----------
1 file changed, 99 insertions(+), 58 deletions(-)
diff --git a/design/literal_or_enum_typing.html b/design/literal_or_enum_typing.html
index d5d738ab..d9bc41e3 100644
--- a/design/literal_or_enum_typing.html
+++ b/design/literal_or_enum_typing.html
@@ -163,11 +163,14 @@
The motivating request was type ConfigOption = "string1" | "string2". Working
@@ -198,7 +201,7 @@
2. Where mshell stands today
Surface type Name = <expr>, | unions, as casts | have | TypeExpr.go, TypeParseIntegration.go |
One generative tagged sum type: Maybe[t] = just t | none | have | MShellObject.go:172; just/none |
| Match with constructor destructuring + exhaustiveness | have | TypeBranch.go; match |
- | A declaration that introduces new constructors | missing | — (this proposal) |
+ | A declaration that introduces new constructors | have | enum — TypeEnum.go, MShellEnum in MShellObject.go |
@@ -274,9 +277,10 @@ 4. Prior art: Haskell, Rust, TypeScript
5. Decision
Add a single generative tagged sum type declaration. Keep type exactly as
it is (the transparent / branded structural form). The colloquial "enum of constants" is the all-nullary
- special case of the one mechanism — not a second concept. This subsumes Maybe (which
- becomes "the built-in enum Maybe[t] = just t | none") and unlocks "make illegal states
- unrepresentable" for command results, parse results, and JSON.
+ special case of the one mechanism — not a second concept. This conceptually subsumes Maybe
+ ("the built-in enum Maybe[t] = just t | none"; it stays a distinct built-in until enums
+ grow type parameters — see §10.E) and unlocks "make illegal states unrepresentable" for command
+ results, parse results, and JSON.
6. Syntax
@@ -299,7 +303,7 @@ Declaration form — |-separated members, closed by e
terminator def, if, match, and loop already use. A
nullary member is just a bare name.