From faae1ccd55f8d6c307f2354198fedb20d1578b19 Mon Sep 17 00:00:00 2001 From: Mitchell Paulus Date: Wed, 1 Jul 2026 09:44:21 -0500 Subject: [PATCH 1/3] Typecheck a literal dict key --- CHANGELOG.md | 6 +++ doc/mshell.md | 6 ++- doc/type_system.inc.html | 2 + mshell/OptionalDictKeys_test.go | 29 +++++++++++++ mshell/TypeCheckProgram.go | 74 +++++++++++++++++++++++++++++++++ mshell/TypeChecker.go | 27 ++++++++++++ 6 files changed, 143 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8429057..4a3d764 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 `httpGet`/`httpPost`, grid `groupBy` aggregation specs, and the `zip*` option dicts now declare their required and optional keys. A required value satisfies an optional parameter, but an optional value does not satisfy a required one. +- The type checker now resolves a string-literal key to `get` against a shape + the same way the `:name` getter does: `resp "body" get` yields the declared + `body` field's type instead of the union of every field type, so + `httpGet? "body" get?` type-checks as `bytes`. The literal must directly + precede `get`; a key computed at runtime still returns the generic + `Maybe[value]`. - The language server now reports an informational diagnostic when a `?` unwrap is statically guaranteed to fail — unwrapping a getter (`:k?`) for a field a concrete shape does not declare, or unwrapping a bare `none`. The hint is diff --git a/doc/mshell.md b/doc/mshell.md index ee26cde..d97b616 100644 --- a/doc/mshell.md +++ b/doc/mshell.md @@ -594,7 +594,11 @@ always present with a possibly-`none` value. A required value satisfies an optional parameter, but an optional value does not satisfy a required one. Reading is unchanged (`:field` is `Maybe[T]`, `:field?` unwraps it); the language server flags `:field?` on a field a concrete shape does not declare, since that -unwrap always fails. +unwrap always fails. A string literal passed to `get` resolves the same way as +the getter: `resp "body" get` reads the declared `body` field's type, not the +union of every field type, so it is interchangeable with `resp :body`. This +applies only when the literal directly precedes `get`; a key computed at runtime +returns the generic `Maybe[value]`. ```mshell type Request = {url: str, timeout?: int} diff --git a/doc/type_system.inc.html b/doc/type_system.inc.html index 6aa2649..14838c0 100644 --- a/doc/type_system.inc.html +++ b/doc/type_system.inc.html @@ -244,6 +244,8 @@

Optional Fields :field returns Maybe[T] and :field? unwraps it (failing at runtime on a missing key), for required and optional fields alike. Because of that, the language server emits an informational diagnostic when :field? is used on a field a concrete shape does not declare — that unwrap is guaranteed to fail. Homogeneous dictionaries ({str: T}) return a genuine Maybe[T] for any key and are never flagged. +A string literal passed to the get built-in resolves the same way as the getter — resp "body" get reads the declared body field's type rather than the union of every field type, so it is interchangeable with resp :body. +This applies only when the literal directly precedes get; a key computed at runtime returns the generic Maybe[value].

Nested Dictionaries §

diff --git a/mshell/OptionalDictKeys_test.go b/mshell/OptionalDictKeys_test.go index 79ce2b9..d3e61c2 100644 --- a/mshell/OptionalDictKeys_test.go +++ b/mshell/OptionalDictKeys_test.go @@ -215,3 +215,32 @@ func TestHttpGetUrlOnlyAccepted(t *testing.T) { t.Fatalf("httpGet with only a url should type-check; got %d", n) } } + +// A string literal key to `get` resolves the shape field just like the +// `:name` getter: `"body" get?` yields the response body's `bytes`, which +// writeFile accepts — it must NOT collapse the shape to the union of all +// field value types. +func TestGetLiteralKeyResolvesShapeField(t *testing.T) { + src := `{ "url": "https://example.com" } httpGet? "body" get? "out.bin" writeFile` + if n := fatalErrorCount(allCheckerErrors(t, src)); n != 0 { + t.Fatalf("`\"body\" get?` should resolve to bytes and satisfy writeFile; got %d errors", n) + } +} + +// The literal-key path is precise, not permissive: `"status" get?` resolves +// to the field's `int`, so feeding it to writeFile (str | bytes) is rejected. +func TestGetLiteralKeyIsFieldPrecise(t *testing.T) { + src := `{ "url": "https://example.com" } httpGet? "status" get? "out.bin" writeFile` + if n := fatalErrorCount(allCheckerErrors(t, src)); n == 0 { + t.Fatal("`\"status\" get?` resolves to int, which writeFile must reject") + } +} + +// The literal must directly precede `get`; an intervening op breaks the +// association and `get` falls back to the generic dict overload. +func TestGetLiteralKeyRequiresAdjacency(t *testing.T) { + src := `{ "url": "https://example.com" } httpGet? "status" dup drop get? "out.bin" writeFile` + if n := fatalErrorCount(allCheckerErrors(t, src)); n == 0 { + t.Fatal("non-adjacent literal should fall back to the union-typed get and be rejected by writeFile") + } +} diff --git a/mshell/TypeCheckProgram.go b/mshell/TypeCheckProgram.go index 8771674..0c3b20e 100644 --- a/mshell/TypeCheckProgram.go +++ b/mshell/TypeCheckProgram.go @@ -469,6 +469,12 @@ func (c *Checker) checkParseItem(item MShellParseItem) { if c.diverged { return } + // A pending string literal is consumable only by a `get` token that + // directly follows it. The Token case routes through checkOne, which + // manages pendingStr itself; any non-Token item breaks the adjacency. + if _, isTok := item.(Token); !isTok { + c.pendingStr = nil + } switch it := item.(type) { case *MShellTypeDecl: @@ -1639,6 +1645,74 @@ func (c *Checker) checkFormatBlock(src string, callSite Token, baseLine, baseCol c.stack.items = outerStack } +// stringLiteralValue returns the parsed content of a STRING / +// SINGLEQUOTESTRING token — the same key value the runtime would compute — +// or ("", false) if the token is not a string literal or fails to parse. +func (c *Checker) stringLiteralValue(tok Token) (string, bool) { + switch tok.Type { + case STRING: + v, err := ParseRawString(tok.Lexeme) + if err != nil { + return "", false + } + return v, true + case SINGLEQUOTESTRING: + if len(tok.Lexeme) < 2 { + return "", false + } + return tok.Lexeme[1 : len(tok.Lexeme)-1], true + } + return "", false +} + +// tryGetLiteralKey resolves a `get` whose key is a string literal that +// directly precedes it, against a shape receiver — so `"body" get` reads the +// declared `body` field type exactly like the `:body` getter, instead of +// collapsing the shape to a dict and yielding the union of every field type. +// It only fires for a shape (or a brand over one); dicts, GridRows, and +// still-generic receivers fall through to the ordinary `get` overloads. +func (c *Checker) tryGetLiteralKey(lit *pendingStrLit) bool { + if lit == nil || c.inferring { + return false + } + // The literal must still be the top of stack, with a receiver beneath it. + if c.stack.Len() < 2 || lit.index != c.stack.Len()-1 { + return false + } + recv := c.subst.Apply(c.arena, c.stack.items[c.stack.Len()-2]) + fieldType, ok := c.shapeFieldType(recv, c.names.Intern(lit.value)) + if !ok { + return false + } + // Pop the key and the receiver; push Maybe[field]. An absent field + // resolves to Maybe[bottom] (see shapeFieldType), so a following `?` + // is flagged as an always-failing unwrap, matching the getter. + c.stack.items = c.stack.items[:c.stack.Len()-2] + c.stack.Push(c.arena.MakeMaybe(fieldType)) + return true +} + +// shapeFieldType resolves field `name` on a shape receiver, mirroring the +// TKShape/TKBrand handling in lookupGetterValueType: a declared field yields +// its type; an undeclared field on a concrete shape yields bottom (the key is +// provably absent, so the get can only be None). Returns ok=false for any +// receiver that is not a shape, so callers defer to generic dispatch. +func (c *Checker) shapeFieldType(recv TypeId, name NameId) (TypeId, bool) { + n := c.arena.Node(recv) + switch n.Kind { + case TKShape: + for _, field := range c.arena.shapeFields[n.Extra] { + if field.Name == name { + return field.Type, true + } + } + return TidBottom, true + case TKBrand: + return c.shapeFieldType(c.subst.Apply(c.arena, TypeId(n.B)), name) + } + return TidNothing, false +} + // lookupGetterValueType returns the value type produced by a `:name` // getter applied to a value of type t. The result is the inner V of the // returned Maybe[V]; callers wrap it. diff --git a/mshell/TypeChecker.go b/mshell/TypeChecker.go index e62263b..da986ee 100644 --- a/mshell/TypeChecker.go +++ b/mshell/TypeChecker.go @@ -125,9 +125,25 @@ type Checker struct { // being reported as an unknown identifier when listDepth > 0. listDepth int + // pendingStr records a string literal that was just pushed onto the + // stack, so an immediately following `get` can resolve a shape field by + // its literal key — matching what the `:name` getter already does. It is + // set only by a STRING/SINGLEQUOTESTRING token and cleared at the start + // of the next token (or any non-Token parse item), so it is valid only + // for a literal that directly precedes its consumer. + pendingStr *pendingStrLit + currentFn *FnContext } +// pendingStrLit is a string literal sitting on top of the type stack. index +// is the stack position it occupies; value is the parsed string content +// (escapes resolved, quotes stripped) — the same key the runtime would use. +type pendingStrLit struct { + index int + value string +} + // NewChecker constructs a fresh checker with the given arena and name table. // The builtin sig table is built once here. func NewChecker(arena *TypeArena, names *NameTable) *Checker { @@ -195,6 +211,11 @@ func (c *Checker) checkOne(tok Token) { if c.diverged { return } + // A pending string literal is only usable by a `get` that directly + // follows it. Capture it for this token, then clear it so it never + // leaks past the immediately following op. + prevStr := c.pendingStr + c.pendingStr = nil switch tok.Type { case INTEGER: c.stack.Push(TidInt) @@ -204,6 +225,9 @@ func (c *Checker) checkOne(tok Token) { return case STRING, SINGLEQUOTESTRING: c.stack.Push(TidStr) + if v, ok := c.stringLiteralValue(tok); ok { + c.pendingStr = &pendingStrLit{index: c.stack.Len() - 1, value: v} + } return case TRUE, FALSE: c.stack.Push(TidBool) @@ -365,6 +389,9 @@ func (c *Checker) checkOne(tok Token) { if tok.Lexeme == "join" && c.tryGridJoin() { return } + if tok.Lexeme == "get" && c.tryGetLiteralKey(prevStr) { + return + } if tok.Lexeme == "pivot" && c.tryPivot(tok) { return } From 23e6921312ea3e1305a92e851c8b87195a06bf97 Mon Sep 17 00:00:00 2001 From: Mitchell Paulus Date: Wed, 1 Jul 2026 11:04:42 -0500 Subject: [PATCH 2/3] Resolve a literal `get` key against a shape Track a string literal's value as a `str` refinement (TKStrLit) so a `get` with a statically known key resolves a shape field the same way the `:name` getter does. `httpGet? "body" get?` now type-checks as `bytes` instead of the union of every response field. Because the value rides the stack as a type, it also resolves when the key reaches `get` through a variable, not only when written inline. The literal is a subtype of `str`: `unify` and every container constructor widen it back to `TidStr`, so it behaves as `str` everywhere else and never escapes into lists, dicts, shapes, unions, or match subjects. Programs without string literals are unaffected; string-heavy programs allocate less, since literal-key resolution avoids the shape-to-union collapse the generic `get` path performs. Co-Authored-By: Claude Opus 4.8 --- CHANGELOG.md | 13 ++++---- doc/mshell.md | 11 ++++--- doc/type_system.inc.html | 4 +-- mshell/OptionalDictKeys_test.go | 22 ++++++++++--- mshell/Type.go | 56 +++++++++++++++++++++++++++++++-- mshell/TypeCheckProgram.go | 37 +++++++++++----------- mshell/TypeChecker.go | 52 +++++++++++++++--------------- mshell/TypeChecker_test.go | 4 ++- mshell/TypeError.go | 4 +++ 9 files changed, 138 insertions(+), 65 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a3d764..4510fdb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,12 +25,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 `httpGet`/`httpPost`, grid `groupBy` aggregation specs, and the `zip*` option dicts now declare their required and optional keys. A required value satisfies an optional parameter, but an optional value does not satisfy a required one. -- The type checker now resolves a string-literal key to `get` against a shape - the same way the `:name` getter does: `resp "body" get` yields the declared - `body` field's type instead of the union of every field type, so - `httpGet? "body" get?` type-checks as `bytes`. The literal must directly - precede `get`; a key computed at runtime still returns the generic - `Maybe[value]`. +- The type checker now tracks the value of a string literal (as a `str` + refinement) so a `get` with a known key resolves a shape field the same way + the `:name` getter does: `resp "body" get` yields the declared `body` field's + type instead of the union of every field type, so `httpGet? "body" get?` + type-checks as `bytes`. Because the key rides the stack as a type, it resolves + even when the literal reaches `get` through a variable; a key computed at + runtime still returns the generic `Maybe[value]`. - The language server now reports an informational diagnostic when a `?` unwrap is statically guaranteed to fail — unwrapping a getter (`:k?`) for a field a concrete shape does not declare, or unwrapping a bare `none`. The hint is diff --git a/doc/mshell.md b/doc/mshell.md index d97b616..e2592b6 100644 --- a/doc/mshell.md +++ b/doc/mshell.md @@ -594,11 +594,12 @@ always present with a possibly-`none` value. A required value satisfies an optional parameter, but an optional value does not satisfy a required one. Reading is unchanged (`:field` is `Maybe[T]`, `:field?` unwraps it); the language server flags `:field?` on a field a concrete shape does not declare, since that -unwrap always fails. A string literal passed to `get` resolves the same way as -the getter: `resp "body" get` reads the declared `body` field's type, not the -union of every field type, so it is interchangeable with `resp :body`. This -applies only when the literal directly precedes `get`; a key computed at runtime -returns the generic `Maybe[value]`. +unwrap always fails. A string literal carries its value as a `str` refinement, +so a `get` with a known key resolves the same way as the getter: `resp "body" +get` reads the declared `body` field's type, not the union of every field type, +so it is interchangeable with `resp :body`. The key resolves even when it +reaches `get` through a variable (`"body" k! resp @k get`); a key computed at +runtime returns the generic `Maybe[value]`. ```mshell type Request = {url: str, timeout?: int} diff --git a/doc/type_system.inc.html b/doc/type_system.inc.html index 14838c0..bae627c 100644 --- a/doc/type_system.inc.html +++ b/doc/type_system.inc.html @@ -244,8 +244,8 @@

Optional Fields :field returns Maybe[T] and :field? unwraps it (failing at runtime on a missing key), for required and optional fields alike. Because of that, the language server emits an informational diagnostic when :field? is used on a field a concrete shape does not declare — that unwrap is guaranteed to fail. Homogeneous dictionaries ({str: T}) return a genuine Maybe[T] for any key and are never flagged. -A string literal passed to the get built-in resolves the same way as the getter — resp "body" get reads the declared body field's type rather than the union of every field type, so it is interchangeable with resp :body. -This applies only when the literal directly precedes get; a key computed at runtime returns the generic Maybe[value]. +A string literal carries its value as a str refinement, so a get with a known key resolves the same way as the getter — resp "body" get reads the declared body field's type rather than the union of every field type, so it is interchangeable with resp :body. +The key resolves even when it reaches get through a variable ("body" k! resp @k get); a key computed at runtime returns the generic Maybe[value].

Nested Dictionaries §

diff --git a/mshell/OptionalDictKeys_test.go b/mshell/OptionalDictKeys_test.go index d3e61c2..22c0786 100644 --- a/mshell/OptionalDictKeys_test.go +++ b/mshell/OptionalDictKeys_test.go @@ -236,11 +236,23 @@ func TestGetLiteralKeyIsFieldPrecise(t *testing.T) { } } -// The literal must directly precede `get`; an intervening op breaks the -// association and `get` falls back to the generic dict overload. -func TestGetLiteralKeyRequiresAdjacency(t *testing.T) { - src := `{ "url": "https://example.com" } httpGet? "status" dup drop get? "out.bin" writeFile` +// The key value rides the stack as a `str` refinement, so it resolves even +// when it reaches `get` through a variable rather than inline — the literal +// need not be adjacent to `get`. +func TestGetLiteralKeyThroughVariable(t *testing.T) { + src := `{ "url": "https://example.com" } httpGet? "body" k! @k get? "out.bin" writeFile` + if n := fatalErrorCount(allCheckerErrors(t, src)); n != 0 { + t.Fatalf("a literal key bound to a variable should still resolve `body` to bytes; got %d errors", n) + } +} + +// A key whose value is not statically known (here an env var, a plain `str`) +// cannot resolve a specific field, so `get` falls back to the generic dict +// overload and yields the union of every field value type — which writeFile +// rejects. +func TestGetDynamicKeyStaysGeneric(t *testing.T) { + src := `{ "url": "https://example.com" } httpGet? $KEY get? "out.bin" writeFile` if n := fatalErrorCount(allCheckerErrors(t, src)); n == 0 { - t.Fatal("non-adjacent literal should fall back to the union-typed get and be rejected by writeFile") + t.Fatal("a non-literal key should fall back to the union-typed get and be rejected by writeFile") } } diff --git a/mshell/Type.go b/mshell/Type.go index 08d725a..674a9e0 100644 --- a/mshell/Type.go +++ b/mshell/Type.go @@ -59,6 +59,14 @@ const ( TKGrid // Extra = index into gridSchemas (0 = unknown schema) TKGridView // Extra = index into gridSchemas (0 = unknown schema) TKGridRow // Extra = index into gridSchemas (0 = unknown schema) + + // TKStrLit is a `str` refined with a statically known value: A holds the + // interned NameId of the literal content. It is a subtype of `str` — + // unify and every container constructor widen it back to TidStr — so it + // behaves exactly like `str` everywhere except where a known key matters: + // `get` reads it off the stack to resolve a shape field by name, the same + // resolution the `:name` getter does from its token. + TKStrLit // A = NameId of the literal string content ) // String returns a debug name for a TypeKind. @@ -94,6 +102,8 @@ func (k TypeKind) String() string { return "GridView" case TKGridRow: return "GridRow" + case TKStrLit: + return "StrLit" } return "Unknown" } @@ -234,17 +244,44 @@ func (a *TypeArena) Kind(id TypeId) TypeKind { // MakeMaybe returns the canonical TypeId for Maybe[inner]. If a Maybe of // the same inner type was constructed before, the existing id is returned. func (a *TypeArena) MakeMaybe(inner TypeId) TypeId { - return a.intern(TKMaybe, uint32(inner), 0, 0) + return a.intern(TKMaybe, uint32(a.WidenStrLit(inner)), 0, 0) } // MakeList returns the canonical TypeId for [elem]. func (a *TypeArena) MakeList(elem TypeId) TypeId { - return a.intern(TKList, uint32(elem), 0, 0) + return a.intern(TKList, uint32(a.WidenStrLit(elem)), 0, 0) } // MakeDict returns the canonical TypeId for {key: value}. func (a *TypeArena) MakeDict(key, value TypeId) TypeId { - return a.intern(TKDict, uint32(key), uint32(value), 0) + return a.intern(TKDict, uint32(a.WidenStrLit(key)), uint32(a.WidenStrLit(value)), 0) +} + +// MakeStrLit returns the canonical TypeId for a `str` refined to the literal +// value named by `name`. It is a subtype of TidStr. +func (a *TypeArena) MakeStrLit(name NameId) TypeId { + return a.intern(TKStrLit, uint32(name), 0, 0) +} + +// StrLitName returns the interned literal value of a TKStrLit type, or +// (0, false) if id is not a string literal. +func (a *TypeArena) StrLitName(id TypeId) (NameId, bool) { + n := a.Node(id) + if n.Kind != TKStrLit { + return 0, false + } + return NameId(n.A), true +} + +// WidenStrLit widens a top-level string-literal refinement to plain `str`; +// any other type is returned unchanged. Container constructors and unify +// funnel through this so a literal never escapes the stack slot it was +// produced on — it stays observable only where a known key is read. +func (a *TypeArena) WidenStrLit(id TypeId) TypeId { + if a.Node(id).Kind == TKStrLit { + return TidStr + } + return id } // MakeVar returns the canonical TypeId for the generic type variable v. @@ -283,6 +320,18 @@ func (a *TypeArena) MakeCommand(argv TypeId, stdout, stderr CommandCaptureMode) // before lookup so two equivalent shapes always share a TypeId. A duplicate // field name is a programmer error and panics. func (a *TypeArena) MakeShape(fields []ShapeField) TypeId { + // A field never holds a string-literal refinement; widen so shapes stay + // keyed on plain value types (and hash-cons identically regardless of + // whether a field value arrived as a literal). + for i := range fields { + if w := a.WidenStrLit(fields[i].Type); w != fields[i].Type { + fields = append([]ShapeField(nil), fields...) + for j := range fields { + fields[j].Type = a.WidenStrLit(fields[j].Type) + } + break + } + } normalized := normalizeShapeFields(fields) key := encodeShapeKey(normalized) if id, ok := a.cons[key]; ok { @@ -471,6 +520,7 @@ func (a *TypeArena) Len() int { func (a *TypeArena) flattenAndCanonicalizeUnion(arms []TypeId) []TypeId { out := make([]TypeId, 0, len(arms)) for _, arm := range arms { + arm = a.WidenStrLit(arm) n := a.Node(arm) if n.Kind == TKUnion && n.A == 0 { // Unbranded inner union: flatten its arms. diff --git a/mshell/TypeCheckProgram.go b/mshell/TypeCheckProgram.go index 0c3b20e..94038f0 100644 --- a/mshell/TypeCheckProgram.go +++ b/mshell/TypeCheckProgram.go @@ -469,12 +469,6 @@ func (c *Checker) checkParseItem(item MShellParseItem) { if c.diverged { return } - // A pending string literal is consumable only by a `get` token that - // directly follows it. The Token case routes through checkOne, which - // manages pendingStr itself; any non-Token item breaks the adjacency. - if _, isTok := item.(Token); !isTok { - c.pendingStr = nil - } switch it := item.(type) { case *MShellTypeDecl: @@ -1190,7 +1184,10 @@ func (c *Checker) checkMatchBlock(matchBlock *MShellParseMatchBlock) { }) return } - subject := c.stack.items[c.stack.Len()-1] + // Widen a string-literal subject to `str`: match arms and the + // exhaustiveness check compare against `str` by type id, and the literal + // value carries no meaning for pattern matching. + subject := c.arena.WidenStrLit(c.stack.items[c.stack.Len()-1]) entry := c.captureBranch() if len(matchBlock.Arms) == 0 { @@ -1665,22 +1662,26 @@ func (c *Checker) stringLiteralValue(tok Token) (string, bool) { return "", false } -// tryGetLiteralKey resolves a `get` whose key is a string literal that -// directly precedes it, against a shape receiver — so `"body" get` reads the -// declared `body` field type exactly like the `:body` getter, instead of -// collapsing the shape to a dict and yielding the union of every field type. -// It only fires for a shape (or a brand over one); dicts, GridRows, and -// still-generic receivers fall through to the ordinary `get` overloads. -func (c *Checker) tryGetLiteralKey(lit *pendingStrLit) bool { - if lit == nil || c.inferring { +// tryGetLiteralKey resolves a `get` whose key is a known string literal +// (a TKStrLit on top of the stack) against a shape receiver — so `"body" get` +// reads the declared `body` field type exactly like the `:body` getter, +// instead of collapsing the shape to a dict and yielding the union of every +// field type. Because the key's value rides the stack as a type, this also +// fires when the literal reached `get` through a variable (`k! ... @k get`), +// not only when it is written inline. It only fires for a shape (or a brand +// over one); dicts, GridRows, and still-generic receivers fall through to the +// ordinary `get` overloads. +func (c *Checker) tryGetLiteralKey() bool { + if c.inferring || c.stack.Len() < 2 { return false } - // The literal must still be the top of stack, with a receiver beneath it. - if c.stack.Len() < 2 || lit.index != c.stack.Len()-1 { + key := c.subst.Apply(c.arena, c.stack.items[c.stack.Len()-1]) + name, ok := c.arena.StrLitName(key) + if !ok { return false } recv := c.subst.Apply(c.arena, c.stack.items[c.stack.Len()-2]) - fieldType, ok := c.shapeFieldType(recv, c.names.Intern(lit.value)) + fieldType, ok := c.shapeFieldType(recv, name) if !ok { return false } diff --git a/mshell/TypeChecker.go b/mshell/TypeChecker.go index da986ee..2ac50e8 100644 --- a/mshell/TypeChecker.go +++ b/mshell/TypeChecker.go @@ -125,25 +125,9 @@ type Checker struct { // being reported as an unknown identifier when listDepth > 0. listDepth int - // pendingStr records a string literal that was just pushed onto the - // stack, so an immediately following `get` can resolve a shape field by - // its literal key — matching what the `:name` getter already does. It is - // set only by a STRING/SINGLEQUOTESTRING token and cleared at the start - // of the next token (or any non-Token parse item), so it is valid only - // for a literal that directly precedes its consumer. - pendingStr *pendingStrLit - currentFn *FnContext } -// pendingStrLit is a string literal sitting on top of the type stack. index -// is the stack position it occupies; value is the parsed string content -// (escapes resolved, quotes stripped) — the same key the runtime would use. -type pendingStrLit struct { - index int - value string -} - // NewChecker constructs a fresh checker with the given arena and name table. // The builtin sig table is built once here. func NewChecker(arena *TypeArena, names *NameTable) *Checker { @@ -211,11 +195,6 @@ func (c *Checker) checkOne(tok Token) { if c.diverged { return } - // A pending string literal is only usable by a `get` that directly - // follows it. Capture it for this token, then clear it so it never - // leaks past the immediately following op. - prevStr := c.pendingStr - c.pendingStr = nil switch tok.Type { case INTEGER: c.stack.Push(TidInt) @@ -224,9 +203,13 @@ func (c *Checker) checkOne(tok Token) { c.stack.Push(TidFloat) return case STRING, SINGLEQUOTESTRING: - c.stack.Push(TidStr) + // Carry the known value as a `str` refinement so a later `get` (or the + // `:name` getter) can resolve a shape field by key. It behaves as `str` + // everywhere else — unify and every container constructor widen it. if v, ok := c.stringLiteralValue(tok); ok { - c.pendingStr = &pendingStrLit{index: c.stack.Len() - 1, value: v} + c.stack.Push(c.arena.MakeStrLit(c.names.Intern(v))) + } else { + c.stack.Push(TidStr) } return case TRUE, FALSE: @@ -389,7 +372,7 @@ func (c *Checker) checkOne(tok Token) { if tok.Lexeme == "join" && c.tryGridJoin() { return } - if tok.Lexeme == "get" && c.tryGetLiteralKey(prevStr) { + if tok.Lexeme == "get" && c.tryGetLiteralKey() { return } if tok.Lexeme == "pivot" && c.tryPivot(tok) { @@ -797,7 +780,7 @@ func (c *Checker) tryRedirect(tok Token) bool { if c.stack.Len() < 2 { return false } - target := c.subst.Apply(c.arena, c.stack.items[c.stack.Len()-1]) + target := c.arena.WidenStrLit(c.subst.Apply(c.arena, c.stack.items[c.stack.Len()-1])) if target != TidStr && target != TidPath && target != TidBytes { return false } @@ -906,6 +889,25 @@ func (c *Checker) unify(got, want TypeId) bool { gn := c.arena.Node(got) wn := c.arena.Node(want) + // A string literal is a subtype of `str`; widen both sides so a literal + // unifies exactly as `str` (including binding a free variable to `str`, + // not the narrower literal). Reusing the nodes loaded just above, the + // common case — neither side a literal — costs only two kind checks. + if gn.Kind == TKStrLit { + got = TidStr + if got == want { + return true + } + gn = c.arena.Node(TidStr) + } + if wn.Kind == TKStrLit { + want = TidStr + if got == want { + return true + } + wn = c.arena.Node(TidStr) + } + // Variable cases: bind whichever side is still a free variable. After // Apply above, a TKVar here is guaranteed unbound. if gn.Kind == TKVar { diff --git a/mshell/TypeChecker_test.go b/mshell/TypeChecker_test.go index da1f640..eb79177 100644 --- a/mshell/TypeChecker_test.go +++ b/mshell/TypeChecker_test.go @@ -118,7 +118,9 @@ func TestCheckerLiteralsPushPrimitives(t *testing.T) { t.Fatalf("stack len: want %d, got %d", len(want), len(got)) } for i := range want { - if got[i] != want[i] { + // A string literal pushes a `str` refined with its value (TKStrLit); + // widen it before comparing, since it behaves as `str` everywhere. + if c.arena.WidenStrLit(got[i]) != want[i] { t.Fatalf("stack[%d]: want %v, got %v", i, want[i], got[i]) } } diff --git a/mshell/TypeError.go b/mshell/TypeError.go index 66cc38c..9eabbf5 100644 --- a/mshell/TypeError.go +++ b/mshell/TypeError.go @@ -185,6 +185,10 @@ func FormatType(arena *TypeArena, names *NameTable, id TypeId) string { } n := arena.Node(id) switch n.Kind { + case TKStrLit: + // A literal is a `str` subtype; present it as `str` so diagnostics + // read the same whether a value arrived as a literal or not. + return "str" case TKMaybe: return "Maybe[" + FormatType(arena, names, TypeId(n.A)) + "]" case TKList: From e35969268f35468446d4c7cd8498265fba20ec66 Mon Sep 17 00:00:00 2001 From: Mitchell Paulus Date: Wed, 1 Jul 2026 11:14:52 -0500 Subject: [PATCH 3/3] Add runtime test for literal `get` key resolution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Exercises both the inline `"field" get?` and the variable-bound `"field" k! ... @k get?` forms on a heterogeneous shape in the shared success suite (type-checked and executed). Because the fields differ in type, `count` must resolve to `int` specifically for `2 +` to type-check — a dynamic key would type as `int | str` and be rejected — so the test guards against a resolution regression. Co-Authored-By: Claude Opus 4.8 --- tests/success/dicts.msh | 8 ++++++++ tests/success/dicts.msh.stdout | 3 +++ 2 files changed, 11 insertions(+) diff --git a/tests/success/dicts.msh b/tests/success/dicts.msh index 8b6137c..1ee7932 100644 --- a/tests/success/dicts.msh +++ b/tests/success/dicts.msh @@ -82,3 +82,11 @@ # Dict to List { "a" : 1, "b": 2 } keyValues (pair! @pair :k? key!, @pair :v? value! $"{@key} {@value 1 +}") map uw { "b" : 1, "a": 2 } keyValues (pair! @pair :k? key!, @pair :v? value! $"{@key} {@value 1 +}") map uw + +# A literal `get` key resolves the specific shape field type, not the union of +# every field value type. Both spellings resolve `count` to `int` (so `2 +` +# type-checks); a dynamic/unknown key would type as `int | str` and be rejected. +{ "count": 3, "label": "widgets" } rec! +@rec "count" get? 2 + str wl # inline literal key +"count" ckey! @rec @ckey get? 2 + str wl # key bound to a variable +@rec "label" get? wl # str field diff --git a/tests/success/dicts.msh.stdout b/tests/success/dicts.msh.stdout index eafd324..ec33996 100644 --- a/tests/success/dicts.msh.stdout +++ b/tests/success/dicts.msh.stdout @@ -29,3 +29,6 @@ a 2 b 3 a 3 b 2 +5 +5 +widgets