From 0866f0855470ca851c5e9a5e939edff938202381 Mon Sep 17 00:00:00 2001 From: Mitchell Paulus Date: Tue, 30 Jun 2026 17:04:31 -0500 Subject: [PATCH] Add other integer literals --- CHANGELOG.md | 9 ++ doc/data-types.inc.html | 28 +++++ doc/functions.inc.html | 10 +- doc/mshell.md | 37 ++++++- lib/std.msh | 12 ++ mshell/BuiltInList.go | 2 + mshell/Evaluator.go | 122 ++++++++++++++++++++- mshell/Lexer.go | 48 ++++++++ mshell/Lexer_test.go | 64 +++++++++++ mshell/Parser.go | 4 +- mshell/TypeBuiltins.go | 4 + tests/success/int_base_literals.msh | 29 +++++ tests/success/int_base_literals.msh.stdout | 22 ++++ tests/typecheck_fail/tobase_wrong_arg.msh | 3 + 14 files changed, 388 insertions(+), 6 deletions(-) create mode 100644 tests/success/int_base_literals.msh create mode 100644 tests/success/int_base_literals.msh.stdout create mode 100644 tests/typecheck_fail/tobase_wrong_arg.msh diff --git a/CHANGELOG.md b/CHANGELOG.md index f95383f0..8429057c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Octal, hexadecimal, and binary integer literals via `0o`, `0x`, and `0b` + prefixes (case-insensitive), e.g. `0o644`, `0xFF`, `0b101`. The base is purely + a way of writing the literal; the value is an ordinary integer and prints in + decimal. There are no digit separators. +- Functions + - `toBase` / `fromBase`: format an integer in / parse a string from an + arbitrary base (2–36). `fromBase` returns `Maybe[int]`. + - `toHex` / `toOctal` / `toBin` and `parseHex` / `parseOctal` / `parseBin`: + convenience wrappers over `toBase` / `fromBase` for the common bases. - Optional fields in dictionary shape types, written `name?: T` (and `"name"?: T` in `def` signatures). An optional field may be absent from a value; when present, its value is still type-checked. This lets option-style diff --git a/doc/data-types.inc.html b/doc/data-types.inc.html index 992860fe..2270dce5 100644 --- a/doc/data-types.inc.html +++ b/doc/data-types.inc.html @@ -241,6 +241,34 @@

Float and Integer 1.0 # This is a float literal +

+Integer literals may also be written in octal, hexadecimal, or binary using a +0o, 0x, or 0b prefix (the letters are +case-insensitive, so 0O/0X/0B work too). +The base is purely a way of writing the literal; the value on the stack is an +ordinary integer with no record of how it was entered, and it prints back in +decimal. There are no digit separators, so 0o6_44 is not a number. +

+ +
+0o644 # 420 (octal, e.g. a file mode)
+0xFF  # 255 (hexadecimal)
+0b101 # 5   (binary)
+
+ +

+To go the other way, format an integer to a base-N string with +toBase (or the toHex/toOctal/toBin +convenience words), and parse a string in a given base with fromBase +(or parseHex/parseOctal/parseBin), which +returns a Maybe[int]. Formatted output is bare digits with no prefix. +

+ +
+420 toOctal # "644"
+"644" parseOctal # Some 420
+
+

Division for integers will result in an integer without the remainder.

diff --git a/doc/functions.inc.html b/doc/functions.inc.html index a8e2095a..c57f6968 100644 --- a/doc/functions.inc.html +++ b/doc/functions.inc.html @@ -49,6 +49,14 @@

Built-ins quote -- ) toFloat Convert to a float. A string is parsed and returns a Maybe (none on parse failure); an int or float returns a plain float. (str -- Maybe[float]), (numeric -- float) toInt Convert to an int. A string is parsed and returns a Maybe (none on parse failure); an int or float returns a plain int (floats truncate toward zero). (str -- Maybe[int]), (numeric -- int) + toBase Format an integer in the given base (2–36) as bare digits (no 0o/0x/0b prefix). The named wrappers toHex, toOctal, and toBin cover bases 16/8/2. (int int -- str) + fromBase Parse a string as an integer in the given base (2–36), returning none on failure. Surrounding whitespace, an optional sign, and a redundant matching 0o/0x/0b prefix are all accepted. The named wrappers parseHex, parseOctal, and parseBin cover bases 16/8/2. (str int -- Maybe[int]) + toHex Format an integer as a bare hexadecimal string (16 toBase). (int -- str) + toOctal Format an integer as a bare octal string (8 toBase). (int -- str) + toBin Format an integer as a bare binary string (2 toBase). (int -- str) + parseHex Parse a hexadecimal string to Maybe[int] (16 fromBase); accepts an optional 0x prefix. (str -- Maybe[int]) + parseOctal Parse an octal string to Maybe[int] (8 fromBase); accepts an optional 0o prefix. (str -- Maybe[int]) + parseBin Parse a binary string to Maybe[int] (2 fromBase); accepts an optional 0b prefix. (str -- Maybe[int]) exit Exit the current script with the provided exit code. (int -- ) return Stop executing the current definition or quotation immediately, leaving the stack as-is for the caller. (--) read Read a line from stdin. Leaves the line and a success flag. (-- str bool) @@ -113,7 +121,7 @@

File and Directory mv Move a file or directory. (str:source str:dest -- ) zipDirInc Create/overwrite a .zip from a directory; the archive root contains the directory’s contents (no parent folder). (path:sourceDir path:zipPath -- ) zipDirExc Create/overwrite a .zip that includes the source directory itself at the archive root (entries are prefixed with the directory name). (path:sourceDir path:zipPath -- ) - zipPack Create/overwrite a .zip by packing a list of dictionaries. Each dictionary must contain path plus optional archivePath (override the in-archive name) and mode (os.FileMode as an int). ([dict] path:zipPath -- ) + zipPack Create/overwrite a .zip by packing a list of dictionaries. Each entry requires path (the file or directory to add); archivePath (override the in-archive name) and mode are optional. mode is a Go os.FileMode; write it with an octal literal, e.g. 0o644 (rw-r--r--), 0o755 (rwxr-xr-x), 0o600. On Linux/macOS these are the POSIX permission bits restored on extraction; on Windows file permissions are synthesized by Go and largely ignored (the executable bit is still preserved for Unix consumers). If mode is omitted, the entry keeps the source file’s own mode. ([dict] path:zipPath -- ) zipList List archive entries as dictionaries with columns: name (string, forward-slash paths, directories end with /), compressedSize (int bytes), uncompressedSize (int bytes), isDir (bool), perm (int POSIX permission bits), executable (bool), and modified (datetime from the archive entry). (path -- [dict]) zipExtract Extract an entire archive. Options dict is required; defaults: overwrite=false, skipExisting=false (mutually exclusive), stripComponents=0, pattern="" (glob matched before stripping), preservePermissions=true. Destination is created if missing. (path:zipPath path:destDir dict:options -- ) zipExtractEntry Extract a single entry (file or directory subtree) to a destination path. Options dict is required; defaults: overwrite=false, skipExisting=false (mutually exclusive), preservePermissions=true, mkdirs=true (create parent directories when needed). (path:zipPath str:entry path:dest dict:options -- ) diff --git a/doc/mshell.md b/doc/mshell.md index f694af66..ee26cde1 100644 --- a/doc/mshell.md +++ b/doc/mshell.md @@ -489,6 +489,28 @@ Dates can be subtracted from each other, and the result is a floating-point numb 2023-10-02 2023-10-01 - # 1.0 ``` +### Integers and Floats + +Any literal number containing a decimal point is a float; otherwise it is an integer. + +Integer literals may be written in octal, hexadecimal, or binary with a `0o`, `0x`, or `0b` prefix (case-insensitive). +The base is only a way of writing the literal — the stack value is a plain integer with no record of its base, and it prints in decimal. +There are no digit separators (`0o6_44` is not a number). + +```mshell +0o644 # 420 (octal, e.g. a file mode) +0xFF # 255 (hexadecimal) +0b101 # 5 (binary) +``` + +Format an integer to a base-N string with `toBase` / `toHex` / `toOctal` / `toBin` (bare digits, no prefix), +and parse a string in a given base with `fromBase` / `parseHex` / `parseOctal` / `parseBin` (returns `Maybe[int]`). + +```mshell +420 toOctal # "644" +"644" parseOctal # Some 420 +``` + ## Type System Use `msh --check-types script.msh` to run static type checking before script execution. @@ -994,6 +1016,10 @@ end wl # Output: 11 - `x`: Interpret/execute quotation `(quote -- )` - `toFloat`: Convert to float. A `str` is parsed and returns `Maybe[float]` (`none` on parse failure); an `int`/`float` returns a plain `float`. `(str -- Maybe[float])` / `(numeric -- float)` - `toInt`: Convert to int. A `str` is parsed and returns `Maybe[int]` (`none` on parse failure); an `int`/`float` returns a plain `int` (floats truncate toward zero). `(str -- Maybe[int])` / `(numeric -- int)` +- `toBase`: Format an integer in the given base (2–36) as bare digits (no `0o`/`0x`/`0b` prefix). `(int int -- str)` +- `fromBase`: Parse a string as an integer in the given base (2–36), returning `none` on failure. Whitespace, an optional sign, and a redundant matching `0o`/`0x`/`0b` prefix are accepted. `(str int -- Maybe[int])` +- `toHex` / `toOctal` / `toBin`: Format an int as a bare hex/octal/binary string (`16`/`8`/`2 toBase`). `(int -- str)` +- `parseHex` / `parseOctal` / `parseBin`: Parse a hex/octal/binary string to `Maybe[int]` (`16`/`8`/`2 fromBase`); an optional matching prefix is accepted. `(str -- Maybe[int])` - `exit`: Exit the current script with the provided exit code. `(int -- )` - `read`: Read a line from stdin. Puts a str and bool of whether the read was successful on the stack. `( -- str bool)` - `prompt`: Write a prompt string to the controlling TTY and read a line from the controlling TTY. Fails if no controlling TTY is available. `(str -- str)` @@ -1368,7 +1394,16 @@ See [Regexp.Expand](https://pkg.go.dev/regexp#Regexp.Expand) for replacement syn - `zipDirInc`: Create/overwrite a `.zip` from a directory; the archive root contains the directory's contents (no parent folder). `(path:sourceDir path:zipPath -- )` - `zipDirExc`: Create/overwrite a `.zip` that includes the source directory itself at the archive root (entries are prefixed with the directory name). `(path:sourceDir path:zipPath -- )` -- `zipPack`: Create/overwrite a `.zip` by packing a list of dictionaries. Each dictionary must contain `path` plus optional `archivePath` (override the in-archive name) and `mode` (os.FileMode as an int). `([dict] path:zipPath -- )` +- `zipPack`: Create/overwrite a `.zip` by packing a list of dictionaries. + Each entry requires `path` (the file or directory to add); + `archivePath` (override the in-archive name) and `mode` are optional. + `mode` is a Go `os.FileMode`; write it with an octal literal, + e.g. `0o644` (`rw-r--r--`), `0o755` (`rwxr-xr-x`), `0o600`. + On Linux/macOS these are the POSIX permission bits restored on extraction; + on Windows file permissions are synthesized by Go and largely ignored + (the executable bit is still preserved for Unix consumers). + If `mode` is omitted, the entry keeps the source file's own mode. + Type: `([{path: str | path, archivePath?: str, mode?: int}] str | path -- )` - `zipList`: List archive entries as dictionaries with keys: `name` (string, forward-slash paths, directories end with `/`), `compressedSize` (int bytes), `uncompressedSize` (int bytes), `isDir` (bool), `perm` (int POSIX permission bits), `executable` (bool), and `modified` (datetime from the archive entry). `(path -- [dict])` - `zipExtract`: Extract an entire archive. Options dict is required; defaults: `overwrite=false`, `skipExisting=false` (mutually exclusive), `stripComponents=0`, `pattern=""` (glob matched before stripping), `preservePermissions=true`. Destination is created if missing. `(path:zipPath path:destDir dict:options -- )` - `zipExtractEntry`: Extract a single entry (file or directory subtree) to a destination path. Options dict is required; defaults: `overwrite=false`, `skipExisting=false` (mutually exclusive), `preservePermissions=true`, `mkdirs=true`. `(path:zipPath str:entry path:dest dict:options -- )` diff --git a/lib/std.msh b/lib/std.msh index 0a3fdd86..12ebc903 100644 --- a/lib/std.msh +++ b/lib/std.msh @@ -454,6 +454,18 @@ def 2tuple (a b -- [a | b]) [] rot append swap append end +# Integer base formatting/parsing. The integer itself never records a base; +# these wrap the generic `toBase`/`fromBase` builtins for the common bases. +# Output is bare digits (no 0o/0x/0b prefix); parsing accepts an optional +# matching prefix, so both "0o644" and "644" work with parseOctal. +def toHex (int -- str) 16 toBase end +def toOctal (int -- str) 8 toBase end +def toBin (int -- str) 2 toBase end + +def parseHex (str -- Maybe[int]) 16 fromBase end +def parseOctal (str -- Maybe[int]) 8 fromBase end +def parseBin (str -- Maybe[int]) 2 fromBase end + # COMPLETIONS {{{ # msh {{{ def __mshCompletion { 'complete': ['msh' 'mshell'] } ([str] -- [str]) diff --git a/mshell/BuiltInList.go b/mshell/BuiltInList.go index e04060be..a73b0ff7 100644 --- a/mshell/BuiltInList.go +++ b/mshell/BuiltInList.go @@ -57,6 +57,7 @@ var BuiltInList = map[string]struct{}{ "intCmp": {}, "dateTimeCmp": {}, "floor": {}, + "fromBase": {}, "fromOleDate": {}, "fromUnixTime": {}, "fromUnixTimeMicro": {}, @@ -183,6 +184,7 @@ var BuiltInList = map[string]struct{}{ "tempFile": {}, "tempFileExt": {}, "title": {}, + "toBase": {}, "toDict": {}, "toDt": {}, "toFixed": {}, diff --git a/mshell/Evaluator.go b/mshell/Evaluator.go index 315f805f..a2c35f48 100644 --- a/mshell/Evaluator.go +++ b/mshell/Evaluator.go @@ -76,6 +76,77 @@ func (objList *MShellStack) Pop1(t Token) (MShellObject, error) { return obj1, nil } +// parseIntLiteral converts an integer literal lexeme to its value, honoring the +// 0o/0x/0b base prefixes the lexer accepts (in addition to plain decimal). The +// resulting int carries no record of which base was written; the prefix is +// purely a source-level convenience. +func parseIntLiteral(s string) (int, error) { + body := s + neg := false + if strings.HasPrefix(body, "-") { + neg = true + body = body[1:] + } + base := 10 + if len(body) > 2 && body[0] == '0' { + switch body[1] { + case 'o', 'O': + base, body = 8, body[2:] + case 'x', 'X': + base, body = 16, body[2:] + case 'b', 'B': + base, body = 2, body[2:] + } + } + v, err := strconv.ParseInt(body, base, 64) + if err != nil { + return 0, err + } + if neg { + v = -v + } + return int(v), nil +} + +// parseInBase parses a string as an integer in the given base (2-36), used by +// the 'fromBase' builtin. Surrounding whitespace and an optional sign are +// tolerated, as is a redundant 0o/0x/0b prefix when it matches the base (so +// "0o644" and "644" both parse under base 8). +func parseInBase(s string, base int) (int, error) { + body := strings.TrimSpace(s) + neg := false + if strings.HasPrefix(body, "-") { + neg = true + body = body[1:] + } else if strings.HasPrefix(body, "+") { + body = body[1:] + } + if len(body) > 2 && body[0] == '0' { + switch body[1] { + case 'o', 'O': + if base == 8 { + body = body[2:] + } + case 'x', 'X': + if base == 16 { + body = body[2:] + } + case 'b', 'B': + if base == 2 { + body = body[2:] + } + } + } + v, err := strconv.ParseInt(body, base, 64) + if err != nil { + return 0, err + } + if neg { + v = -v + } + return int(v), nil +} + // Returns two objects from the stack. // obj1, obj2 := stack.Pop2(t) // obj1 was on top of the stack, obj2 was below it. @@ -1178,7 +1249,7 @@ func (state *EvalState) matchTokenPattern(p Token, subject MShellObject) (bool, return ok, SimpleSuccess() case INTEGER: - intVal, err := strconv.Atoi(p.Lexeme) + intVal, err := parseIntLiteral(p.Lexeme) if err != nil { return false, state.FailWithMessage(fmt.Sprintf("%d:%d: Error parsing integer in match pattern: %s\n", p.Line, p.Column, err.Error())) } @@ -6529,6 +6600,53 @@ func (state *EvalState) evaluateToken(t Token, stack *MShellStack, context Execu default: return state.FailWithMessage(fmt.Sprintf("%d:%d: Cannot convert a %s to an int.\n", t.Line, t.Column, obj.TypeName())) } + } else if t.Lexeme == "toBase" { + // (int:value int:base -- str) Format an integer in the given + // base (2-36) as bare digits (no 0o/0x/0b prefix). + baseObj, valObj, err := stack.Pop2(t) + if err != nil { + return state.FailWithMessage(err.Error()) + } + baseInt, ok := baseObj.(MShellInt) + if !ok { + return state.FailWithMessage(fmt.Sprintf("%d:%d: 'toBase' requires the base as an integer on top of the stack. Found %s.\n", t.Line, t.Column, baseObj.TypeName())) + } + valInt, ok := valObj.(MShellInt) + if !ok { + return state.FailWithMessage(fmt.Sprintf("%d:%d: 'toBase' requires an integer to format. Found %s.\n", t.Line, t.Column, valObj.TypeName())) + } + if baseInt.Value < 2 || baseInt.Value > 36 { + return state.FailWithMessage(fmt.Sprintf("%d:%d: 'toBase' base must be between 2 and 36. Found %d.\n", t.Line, t.Column, baseInt.Value)) + } + stack.Push(MShellString{strconv.FormatInt(int64(valInt.Value), baseInt.Value)}) + } else if t.Lexeme == "fromBase" { + // (str int:base -- Maybe[int]) Parse a string in the given base + // (2-36). Returns none when the string is not a valid number. + baseObj, strObj, err := stack.Pop2(t) + if err != nil { + return state.FailWithMessage(err.Error()) + } + baseInt, ok := baseObj.(MShellInt) + if !ok { + return state.FailWithMessage(fmt.Sprintf("%d:%d: 'fromBase' requires the base as an integer on top of the stack. Found %s.\n", t.Line, t.Column, baseObj.TypeName())) + } + if baseInt.Value < 2 || baseInt.Value > 36 { + return state.FailWithMessage(fmt.Sprintf("%d:%d: 'fromBase' base must be between 2 and 36. Found %d.\n", t.Line, t.Column, baseInt.Value)) + } + var s string + switch strTyped := strObj.(type) { + case MShellString: + s = strTyped.Content + case MShellLiteral: + s = strTyped.LiteralText + default: + return state.FailWithMessage(fmt.Sprintf("%d:%d: 'fromBase' requires a string to parse. Found %s.\n", t.Line, t.Column, strObj.TypeName())) + } + if intVal, perr := parseInBase(s, baseInt.Value); perr != nil { + stack.Push(&Maybe{obj: nil}) + } else { + stack.Push(&Maybe{obj: MShellInt{intVal}}) + } } else if t.Lexeme == "toDt" { dateStrObj, err := stack.Pop() if err != nil { @@ -11338,7 +11456,7 @@ func (state *EvalState) evaluateToken(t Token, stack *MShellStack, context Execu } else if t.Type == FALSE { // Token Type stack.Push(MShellBool{false}) } else if t.Type == INTEGER { // Token Type - intVal, err := strconv.Atoi(t.Lexeme) + intVal, err := parseIntLiteral(t.Lexeme) if err != nil { return state.FailWithMessage(fmt.Sprintf("%d:%d: Error parsing integer: %s\n", t.Line, t.Column, err.Error())) } diff --git a/mshell/Lexer.go b/mshell/Lexer.go index 0a6d43cd..7a641f4b 100644 --- a/mshell/Lexer.go +++ b/mshell/Lexer.go @@ -456,6 +456,32 @@ var notAllowedLiteralChars = map[rune]bool{ '^': true, // Used for stderr [command]^; } +// baseFromPrefix maps the letter following a leading "0" to its numeric base. +func baseFromPrefix(r rune) (int, bool) { + switch r { + case 'o', 'O': + return 8, true + case 'x', 'X': + return 16, true + case 'b', 'B': + return 2, true + } + return 0, false +} + +// isBaseDigit reports whether r is a valid digit in the given base (8, 16, 2). +func isBaseDigit(base int, r rune) bool { + switch base { + case 8: + return r >= '0' && r <= '7' + case 16: + return (r >= '0' && r <= '9') || (r >= 'a' && r <= 'f') || (r >= 'A' && r <= 'F') + case 2: + return r == '0' || r == '1' + } + return false +} + func isAllowedLiteral(r rune) bool { if unicode.IsSpace(r) { return false @@ -911,6 +937,28 @@ func (l *Lexer) parseNumberOrStartIndexer() Token { } peek := l.peek() + + // Base-prefixed integer literals: 0o... (octal), 0x... (hex), 0b... (binary). + // Only valid when the digits read so far are exactly "0" (optionally negated, + // i.e. "-0"), and at least one valid base digit follows the prefix letter. + // The integer value itself carries no base information; the prefix is purely + // lexical sugar and the token is a normal INTEGER (see parseIntLiteral). + if base, ok := baseFromPrefix(peek); ok { + intPart := l.curLexeme() + if (intPart == "0" || intPart == "-0") && isBaseDigit(base, l.peekNext()) { + l.advance() // consume the prefix letter + for !l.atEnd() && isBaseDigit(base, l.peek()) { + l.advance() + } + // Mirror plain-integer behavior: a trailing literal char (e.g. + // "0xffg") makes the whole token a literal rather than an integer. + if isAllowedLiteral(l.peek()) { + return l.consumeLiteral() + } + return l.makeToken(INTEGER) + } + } + if peek == ':' { l.advance() diff --git a/mshell/Lexer_test.go b/mshell/Lexer_test.go index 42b7b5af..4b991541 100644 --- a/mshell/Lexer_test.go +++ b/mshell/Lexer_test.go @@ -39,6 +39,70 @@ func TestTypeCheckerKeywords(t *testing.T) { } } +// Base-prefixed integer literals (0o/0x/0b) tokenize as INTEGER, while +// malformed or separator-bearing forms fall back to LITERAL (mshell has no +// digit separators in numeric literals). +func TestBaseIntegerLiterals(t *testing.T) { + cases := []struct { + input string + want TokenType + }{ + {"0o644", INTEGER}, + {"0O17", INTEGER}, + {"0x1a4", INTEGER}, + {"0XFF", INTEGER}, + {"0b101", INTEGER}, + {"0B0", INTEGER}, + {"-0o10", INTEGER}, + {"42", INTEGER}, + // Malformed / not octal-hex-bin: stay literals. + {"0o", LITERAL}, // no digits after prefix + {"0o8", LITERAL}, // 8 is not an octal digit + {"0xG", LITERAL}, // G is not a hex digit + {"0b2", LITERAL}, // 2 is not a binary digit + {"0o6_44", LITERAL}, // no digit separators + {"0o644g", LITERAL}, // trailing literal char + {"10o", LITERAL}, // prefix only valid right after a lone 0 + } + for _, tc := range cases { + l := NewLexer(tc.input, nil) + toks, _ := l.Tokenize() + if len(toks) < 1 { + t.Errorf("%q: no tokens produced", tc.input) + continue + } + if toks[0].Type != tc.want { + t.Errorf("%q: got %s, want %s", tc.input, toks[0].Type, tc.want) + } + } +} + +func TestParseIntLiteral(t *testing.T) { + cases := []struct { + input string + want int + }{ + {"0o644", 420}, + {"0x1a4", 420}, + {"0b110100100", 420}, + {"-0o10", -8}, + {"0xFF", 255}, + {"42", 42}, + {"-42", -42}, + {"0", 0}, + } + for _, tc := range cases { + got, err := parseIntLiteral(tc.input) + if err != nil { + t.Errorf("%q: unexpected error %v", tc.input, err) + continue + } + if got != tc.want { + t.Errorf("%q: got %d, want %d", tc.input, got, tc.want) + } + } +} + func TestUnterminatedString(t *testing.T) { input := `"Hello, world!` l := NewLexer(input, nil) diff --git a/mshell/Parser.go b/mshell/Parser.go index f510cd7d..a31692f0 100644 --- a/mshell/Parser.go +++ b/mshell/Parser.go @@ -876,7 +876,7 @@ func (parser *MShellParser) parseDictKeyValue() (MShellParseDictKeyValue, error) } else if keyToken == LITERAL { key = parser.curr.Lexeme } else if keyToken == INTEGER { - intVal, _ := strconv.Atoi(parser.curr.Lexeme) // This normalizes the integer to not have leading 0's etc. + intVal, _ := parseIntLiteral(parser.curr.Lexeme) // This normalizes the integer to not have leading 0's etc. key = strconv.Itoa(intVal) } else if keyToken == STARTINDEXER { indexStr := parser.curr.Lexeme[:len(parser.curr.Lexeme)-1] @@ -936,7 +936,7 @@ func (parser *MShellParser) parseStaticDictKeyValue() (MShellParseDictKeyValue, } else if keyToken == LITERAL { key = parser.curr.Lexeme } else if keyToken == INTEGER { - intVal, _ := strconv.Atoi(parser.curr.Lexeme) + intVal, _ := parseIntLiteral(parser.curr.Lexeme) key = strconv.Itoa(intVal) } else if keyToken == STARTINDEXER { indexStr := parser.curr.Lexeme[:len(parser.curr.Lexeme)-1] diff --git a/mshell/TypeBuiltins.go b/mshell/TypeBuiltins.go index 04ffd547..9ae6d6b0 100644 --- a/mshell/TypeBuiltins.go +++ b/mshell/TypeBuiltins.go @@ -245,6 +245,10 @@ func builtinSigsByName(arena *TypeArena, names *NameTable) map[NameId][]QuoteSig r.reg("toFixed", "(int int -- str)", "(float int -- str)") // str-first ordering for the same reason as toFloat above. r.reg("toInt", "(str -- Maybe[int])", "(float -- int)", "(int -- int)") + // Format an int in an arbitrary base (2-36) as bare digits; parse a string + // in a given base back to Maybe[int]. The int carries no base of its own. + r.reg("toBase", "(int int -- str)") + r.reg("fromBase", "(str int -- Maybe[int])") // ----- Path / DateTime / File ops ----- diff --git a/tests/success/int_base_literals.msh b/tests/success/int_base_literals.msh new file mode 100644 index 00000000..f2c26478 --- /dev/null +++ b/tests/success/int_base_literals.msh @@ -0,0 +1,29 @@ +# Base-prefixed integer literals all push plain ints. +0o644 wl +0x1a4 wl +0b110100100 wl +-0o10 wl +0xFF wl +0B101 wl +0O17 wl + +# Generic builtins. +420 16 toBase wl +420 8 toBase wl +420 2 toBase wl +-8 16 toBase wl +"1a4" 16 fromBase -1 maybe wl +"zzz" 16 fromBase -1 maybe wl + +# Named std-lib wrappers. +420 toHex wl +420 toOctal wl +420 toBin wl +"1a4" parseHex -1 maybe wl +"644" parseOctal -1 maybe wl +"0o644" parseOctal -1 maybe wl +"110100100" parseBin -1 maybe wl +"nope" parseOctal -1 maybe wl + +# Round trip. +255 toHex parseHex -1 maybe wl diff --git a/tests/success/int_base_literals.msh.stdout b/tests/success/int_base_literals.msh.stdout new file mode 100644 index 00000000..f500eed0 --- /dev/null +++ b/tests/success/int_base_literals.msh.stdout @@ -0,0 +1,22 @@ +420 +420 +420 +-8 +255 +5 +15 +1a4 +644 +110100100 +-8 +420 +-1 +1a4 +644 +110100100 +420 +420 +420 +420 +-1 +255 diff --git a/tests/typecheck_fail/tobase_wrong_arg.msh b/tests/typecheck_fail/tobase_wrong_arg.msh new file mode 100644 index 00000000..b2416a74 --- /dev/null +++ b/tests/typecheck_fail/tobase_wrong_arg.msh @@ -0,0 +1,3 @@ +# toBase formats an int; passing a string for the value must be rejected. +# (The base argument is the int on top; the value below must also be int.) +"hello" 16 toBase wl