diff --git a/CHANGELOG.md b/CHANGELOG.md index 8429057..4510fdb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 `httpGet`/`httpPost`, grid `groupBy` aggregation specs, and the `zip*` option dicts now declare their required and optional keys. A required value satisfies an optional parameter, but an optional value does not satisfy a required one. +- The type checker now tracks the value of a string literal (as a `str` + refinement) so a `get` with a known key resolves a shape field the same way + the `:name` getter does: `resp "body" get` yields the declared `body` field's + type instead of the union of every field type, so `httpGet? "body" get?` + type-checks as `bytes`. Because the key rides the stack as a type, it resolves + even when the literal reaches `get` through a variable; a key computed at + runtime still returns the generic `Maybe[value]`. - The language server now reports an informational diagnostic when a `?` unwrap is statically guaranteed to fail — unwrapping a getter (`:k?`) for a field a concrete shape does not declare, or unwrapping a bare `none`. The hint is diff --git a/doc/mshell.md b/doc/mshell.md index ee26cde..e2592b6 100644 --- a/doc/mshell.md +++ b/doc/mshell.md @@ -594,7 +594,12 @@ always present with a possibly-`none` value. A required value satisfies an optional parameter, but an optional value does not satisfy a required one. Reading is unchanged (`:field` is `Maybe[T]`, `:field?` unwraps it); the language server flags `:field?` on a field a concrete shape does not declare, since that -unwrap always fails. +unwrap always fails. A string literal carries its value as a `str` refinement, +so a `get` with a known key resolves the same way as the getter: `resp "body" +get` reads the declared `body` field's type, not the union of every field type, +so it is interchangeable with `resp :body`. The key resolves even when it +reaches `get` through a variable (`"body" k! resp @k get`); a key computed at +runtime returns the generic `Maybe[value]`. ```mshell type Request = {url: str, timeout?: int} diff --git a/doc/type_system.inc.html b/doc/type_system.inc.html index 6aa2649..bae627c 100644 --- a/doc/type_system.inc.html +++ b/doc/type_system.inc.html @@ -244,6 +244,8 @@

Optional Fields :field returns Maybe[T] and :field? unwraps it (failing at runtime on a missing key), for required and optional fields alike. Because of that, the language server emits an informational diagnostic when :field? is used on a field a concrete shape does not declare — that unwrap is guaranteed to fail. Homogeneous dictionaries ({str: T}) return a genuine Maybe[T] for any key and are never flagged. +A string literal carries its value as a str refinement, so a get with a known key resolves the same way as the getter — resp "body" get reads the declared body field's type rather than the union of every field type, so it is interchangeable with resp :body. +The key resolves even when it reaches get through a variable ("body" k! resp @k get); a key computed at runtime returns the generic Maybe[value].

Nested Dictionaries §

diff --git a/mshell/OptionalDictKeys_test.go b/mshell/OptionalDictKeys_test.go index 79ce2b9..22c0786 100644 --- a/mshell/OptionalDictKeys_test.go +++ b/mshell/OptionalDictKeys_test.go @@ -215,3 +215,44 @@ func TestHttpGetUrlOnlyAccepted(t *testing.T) { t.Fatalf("httpGet with only a url should type-check; got %d", n) } } + +// A string literal key to `get` resolves the shape field just like the +// `:name` getter: `"body" get?` yields the response body's `bytes`, which +// writeFile accepts — it must NOT collapse the shape to the union of all +// field value types. +func TestGetLiteralKeyResolvesShapeField(t *testing.T) { + src := `{ "url": "https://example.com" } httpGet? "body" get? "out.bin" writeFile` + if n := fatalErrorCount(allCheckerErrors(t, src)); n != 0 { + t.Fatalf("`\"body\" get?` should resolve to bytes and satisfy writeFile; got %d errors", n) + } +} + +// The literal-key path is precise, not permissive: `"status" get?` resolves +// to the field's `int`, so feeding it to writeFile (str | bytes) is rejected. +func TestGetLiteralKeyIsFieldPrecise(t *testing.T) { + src := `{ "url": "https://example.com" } httpGet? "status" get? "out.bin" writeFile` + if n := fatalErrorCount(allCheckerErrors(t, src)); n == 0 { + t.Fatal("`\"status\" get?` resolves to int, which writeFile must reject") + } +} + +// The key value rides the stack as a `str` refinement, so it resolves even +// when it reaches `get` through a variable rather than inline — the literal +// need not be adjacent to `get`. +func TestGetLiteralKeyThroughVariable(t *testing.T) { + src := `{ "url": "https://example.com" } httpGet? "body" k! @k get? "out.bin" writeFile` + if n := fatalErrorCount(allCheckerErrors(t, src)); n != 0 { + t.Fatalf("a literal key bound to a variable should still resolve `body` to bytes; got %d errors", n) + } +} + +// A key whose value is not statically known (here an env var, a plain `str`) +// cannot resolve a specific field, so `get` falls back to the generic dict +// overload and yields the union of every field value type — which writeFile +// rejects. +func TestGetDynamicKeyStaysGeneric(t *testing.T) { + src := `{ "url": "https://example.com" } httpGet? $KEY get? "out.bin" writeFile` + if n := fatalErrorCount(allCheckerErrors(t, src)); n == 0 { + t.Fatal("a non-literal key should fall back to the union-typed get and be rejected by writeFile") + } +} diff --git a/mshell/Type.go b/mshell/Type.go index 08d725a..674a9e0 100644 --- a/mshell/Type.go +++ b/mshell/Type.go @@ -59,6 +59,14 @@ const ( TKGrid // Extra = index into gridSchemas (0 = unknown schema) TKGridView // Extra = index into gridSchemas (0 = unknown schema) TKGridRow // Extra = index into gridSchemas (0 = unknown schema) + + // TKStrLit is a `str` refined with a statically known value: A holds the + // interned NameId of the literal content. It is a subtype of `str` — + // unify and every container constructor widen it back to TidStr — so it + // behaves exactly like `str` everywhere except where a known key matters: + // `get` reads it off the stack to resolve a shape field by name, the same + // resolution the `:name` getter does from its token. + TKStrLit // A = NameId of the literal string content ) // String returns a debug name for a TypeKind. @@ -94,6 +102,8 @@ func (k TypeKind) String() string { return "GridView" case TKGridRow: return "GridRow" + case TKStrLit: + return "StrLit" } return "Unknown" } @@ -234,17 +244,44 @@ func (a *TypeArena) Kind(id TypeId) TypeKind { // MakeMaybe returns the canonical TypeId for Maybe[inner]. If a Maybe of // the same inner type was constructed before, the existing id is returned. func (a *TypeArena) MakeMaybe(inner TypeId) TypeId { - return a.intern(TKMaybe, uint32(inner), 0, 0) + return a.intern(TKMaybe, uint32(a.WidenStrLit(inner)), 0, 0) } // MakeList returns the canonical TypeId for [elem]. func (a *TypeArena) MakeList(elem TypeId) TypeId { - return a.intern(TKList, uint32(elem), 0, 0) + return a.intern(TKList, uint32(a.WidenStrLit(elem)), 0, 0) } // MakeDict returns the canonical TypeId for {key: value}. func (a *TypeArena) MakeDict(key, value TypeId) TypeId { - return a.intern(TKDict, uint32(key), uint32(value), 0) + return a.intern(TKDict, uint32(a.WidenStrLit(key)), uint32(a.WidenStrLit(value)), 0) +} + +// MakeStrLit returns the canonical TypeId for a `str` refined to the literal +// value named by `name`. It is a subtype of TidStr. +func (a *TypeArena) MakeStrLit(name NameId) TypeId { + return a.intern(TKStrLit, uint32(name), 0, 0) +} + +// StrLitName returns the interned literal value of a TKStrLit type, or +// (0, false) if id is not a string literal. +func (a *TypeArena) StrLitName(id TypeId) (NameId, bool) { + n := a.Node(id) + if n.Kind != TKStrLit { + return 0, false + } + return NameId(n.A), true +} + +// WidenStrLit widens a top-level string-literal refinement to plain `str`; +// any other type is returned unchanged. Container constructors and unify +// funnel through this so a literal never escapes the stack slot it was +// produced on — it stays observable only where a known key is read. +func (a *TypeArena) WidenStrLit(id TypeId) TypeId { + if a.Node(id).Kind == TKStrLit { + return TidStr + } + return id } // MakeVar returns the canonical TypeId for the generic type variable v. @@ -283,6 +320,18 @@ func (a *TypeArena) MakeCommand(argv TypeId, stdout, stderr CommandCaptureMode) // before lookup so two equivalent shapes always share a TypeId. A duplicate // field name is a programmer error and panics. func (a *TypeArena) MakeShape(fields []ShapeField) TypeId { + // A field never holds a string-literal refinement; widen so shapes stay + // keyed on plain value types (and hash-cons identically regardless of + // whether a field value arrived as a literal). + for i := range fields { + if w := a.WidenStrLit(fields[i].Type); w != fields[i].Type { + fields = append([]ShapeField(nil), fields...) + for j := range fields { + fields[j].Type = a.WidenStrLit(fields[j].Type) + } + break + } + } normalized := normalizeShapeFields(fields) key := encodeShapeKey(normalized) if id, ok := a.cons[key]; ok { @@ -471,6 +520,7 @@ func (a *TypeArena) Len() int { func (a *TypeArena) flattenAndCanonicalizeUnion(arms []TypeId) []TypeId { out := make([]TypeId, 0, len(arms)) for _, arm := range arms { + arm = a.WidenStrLit(arm) n := a.Node(arm) if n.Kind == TKUnion && n.A == 0 { // Unbranded inner union: flatten its arms. diff --git a/mshell/TypeCheckProgram.go b/mshell/TypeCheckProgram.go index 8771674..94038f0 100644 --- a/mshell/TypeCheckProgram.go +++ b/mshell/TypeCheckProgram.go @@ -1184,7 +1184,10 @@ func (c *Checker) checkMatchBlock(matchBlock *MShellParseMatchBlock) { }) return } - subject := c.stack.items[c.stack.Len()-1] + // Widen a string-literal subject to `str`: match arms and the + // exhaustiveness check compare against `str` by type id, and the literal + // value carries no meaning for pattern matching. + subject := c.arena.WidenStrLit(c.stack.items[c.stack.Len()-1]) entry := c.captureBranch() if len(matchBlock.Arms) == 0 { @@ -1639,6 +1642,78 @@ func (c *Checker) checkFormatBlock(src string, callSite Token, baseLine, baseCol c.stack.items = outerStack } +// stringLiteralValue returns the parsed content of a STRING / +// SINGLEQUOTESTRING token — the same key value the runtime would compute — +// or ("", false) if the token is not a string literal or fails to parse. +func (c *Checker) stringLiteralValue(tok Token) (string, bool) { + switch tok.Type { + case STRING: + v, err := ParseRawString(tok.Lexeme) + if err != nil { + return "", false + } + return v, true + case SINGLEQUOTESTRING: + if len(tok.Lexeme) < 2 { + return "", false + } + return tok.Lexeme[1 : len(tok.Lexeme)-1], true + } + return "", false +} + +// tryGetLiteralKey resolves a `get` whose key is a known string literal +// (a TKStrLit on top of the stack) against a shape receiver — so `"body" get` +// reads the declared `body` field type exactly like the `:body` getter, +// instead of collapsing the shape to a dict and yielding the union of every +// field type. Because the key's value rides the stack as a type, this also +// fires when the literal reached `get` through a variable (`k! ... @k get`), +// not only when it is written inline. It only fires for a shape (or a brand +// over one); dicts, GridRows, and still-generic receivers fall through to the +// ordinary `get` overloads. +func (c *Checker) tryGetLiteralKey() bool { + if c.inferring || c.stack.Len() < 2 { + return false + } + key := c.subst.Apply(c.arena, c.stack.items[c.stack.Len()-1]) + name, ok := c.arena.StrLitName(key) + if !ok { + return false + } + recv := c.subst.Apply(c.arena, c.stack.items[c.stack.Len()-2]) + fieldType, ok := c.shapeFieldType(recv, name) + if !ok { + return false + } + // Pop the key and the receiver; push Maybe[field]. An absent field + // resolves to Maybe[bottom] (see shapeFieldType), so a following `?` + // is flagged as an always-failing unwrap, matching the getter. + c.stack.items = c.stack.items[:c.stack.Len()-2] + c.stack.Push(c.arena.MakeMaybe(fieldType)) + return true +} + +// shapeFieldType resolves field `name` on a shape receiver, mirroring the +// TKShape/TKBrand handling in lookupGetterValueType: a declared field yields +// its type; an undeclared field on a concrete shape yields bottom (the key is +// provably absent, so the get can only be None). Returns ok=false for any +// receiver that is not a shape, so callers defer to generic dispatch. +func (c *Checker) shapeFieldType(recv TypeId, name NameId) (TypeId, bool) { + n := c.arena.Node(recv) + switch n.Kind { + case TKShape: + for _, field := range c.arena.shapeFields[n.Extra] { + if field.Name == name { + return field.Type, true + } + } + return TidBottom, true + case TKBrand: + return c.shapeFieldType(c.subst.Apply(c.arena, TypeId(n.B)), name) + } + return TidNothing, false +} + // lookupGetterValueType returns the value type produced by a `:name` // getter applied to a value of type t. The result is the inner V of the // returned Maybe[V]; callers wrap it. diff --git a/mshell/TypeChecker.go b/mshell/TypeChecker.go index e62263b..2ac50e8 100644 --- a/mshell/TypeChecker.go +++ b/mshell/TypeChecker.go @@ -203,7 +203,14 @@ func (c *Checker) checkOne(tok Token) { c.stack.Push(TidFloat) return case STRING, SINGLEQUOTESTRING: - c.stack.Push(TidStr) + // Carry the known value as a `str` refinement so a later `get` (or the + // `:name` getter) can resolve a shape field by key. It behaves as `str` + // everywhere else — unify and every container constructor widen it. + if v, ok := c.stringLiteralValue(tok); ok { + c.stack.Push(c.arena.MakeStrLit(c.names.Intern(v))) + } else { + c.stack.Push(TidStr) + } return case TRUE, FALSE: c.stack.Push(TidBool) @@ -365,6 +372,9 @@ func (c *Checker) checkOne(tok Token) { if tok.Lexeme == "join" && c.tryGridJoin() { return } + if tok.Lexeme == "get" && c.tryGetLiteralKey() { + return + } if tok.Lexeme == "pivot" && c.tryPivot(tok) { return } @@ -770,7 +780,7 @@ func (c *Checker) tryRedirect(tok Token) bool { if c.stack.Len() < 2 { return false } - target := c.subst.Apply(c.arena, c.stack.items[c.stack.Len()-1]) + target := c.arena.WidenStrLit(c.subst.Apply(c.arena, c.stack.items[c.stack.Len()-1])) if target != TidStr && target != TidPath && target != TidBytes { return false } @@ -879,6 +889,25 @@ func (c *Checker) unify(got, want TypeId) bool { gn := c.arena.Node(got) wn := c.arena.Node(want) + // A string literal is a subtype of `str`; widen both sides so a literal + // unifies exactly as `str` (including binding a free variable to `str`, + // not the narrower literal). Reusing the nodes loaded just above, the + // common case — neither side a literal — costs only two kind checks. + if gn.Kind == TKStrLit { + got = TidStr + if got == want { + return true + } + gn = c.arena.Node(TidStr) + } + if wn.Kind == TKStrLit { + want = TidStr + if got == want { + return true + } + wn = c.arena.Node(TidStr) + } + // Variable cases: bind whichever side is still a free variable. After // Apply above, a TKVar here is guaranteed unbound. if gn.Kind == TKVar { diff --git a/mshell/TypeChecker_test.go b/mshell/TypeChecker_test.go index da1f640..eb79177 100644 --- a/mshell/TypeChecker_test.go +++ b/mshell/TypeChecker_test.go @@ -118,7 +118,9 @@ func TestCheckerLiteralsPushPrimitives(t *testing.T) { t.Fatalf("stack len: want %d, got %d", len(want), len(got)) } for i := range want { - if got[i] != want[i] { + // A string literal pushes a `str` refined with its value (TKStrLit); + // widen it before comparing, since it behaves as `str` everywhere. + if c.arena.WidenStrLit(got[i]) != want[i] { t.Fatalf("stack[%d]: want %v, got %v", i, want[i], got[i]) } } diff --git a/mshell/TypeError.go b/mshell/TypeError.go index 66cc38c..9eabbf5 100644 --- a/mshell/TypeError.go +++ b/mshell/TypeError.go @@ -185,6 +185,10 @@ func FormatType(arena *TypeArena, names *NameTable, id TypeId) string { } n := arena.Node(id) switch n.Kind { + case TKStrLit: + // A literal is a `str` subtype; present it as `str` so diagnostics + // read the same whether a value arrived as a literal or not. + return "str" case TKMaybe: return "Maybe[" + FormatType(arena, names, TypeId(n.A)) + "]" case TKList: diff --git a/tests/success/dicts.msh b/tests/success/dicts.msh index 8b6137c..1ee7932 100644 --- a/tests/success/dicts.msh +++ b/tests/success/dicts.msh @@ -82,3 +82,11 @@ # Dict to List { "a" : 1, "b": 2 } keyValues (pair! @pair :k? key!, @pair :v? value! $"{@key} {@value 1 +}") map uw { "b" : 1, "a": 2 } keyValues (pair! @pair :k? key!, @pair :v? value! $"{@key} {@value 1 +}") map uw + +# A literal `get` key resolves the specific shape field type, not the union of +# every field value type. Both spellings resolve `count` to `int` (so `2 +` +# type-checks); a dynamic/unknown key would type as `int | str` and be rejected. +{ "count": 3, "label": "widgets" } rec! +@rec "count" get? 2 + str wl # inline literal key +"count" ckey! @rec @ckey get? 2 + str wl # key bound to a variable +@rec "label" get? wl # str field diff --git a/tests/success/dicts.msh.stdout b/tests/success/dicts.msh.stdout index eafd324..ec33996 100644 --- a/tests/success/dicts.msh.stdout +++ b/tests/success/dicts.msh.stdout @@ -29,3 +29,6 @@ a 2 b 3 a 3 b 2 +5 +5 +widgets