diff --git a/benchmark_comprehensive_test.go b/benchmark_comprehensive_test.go new file mode 100644 index 0000000..ca424f9 --- /dev/null +++ b/benchmark_comprehensive_test.go @@ -0,0 +1,1263 @@ +package astjson + +import ( + "strings" + "testing" + + "github.com/wundergraph/go-arena" +) + +// Sink vars prevent dead-code elimination by the compiler. +var ( + sinkValue *Value + sinkBytes []byte + sinkString string + sinkInt int + sinkFloat64 float64 + sinkBool bool + sinkErr error +) + +// fixture pairs a name with its JSON data for table-driven benchmarks. +type fixture struct { + name string + data string +} + +// fixtures is the default set (excludes 20mb for speed). +var fixtures = []fixture{ + {"small", smallFixture}, + {"medium", mediumFixture}, + {"large", largeFixture}, + {"canada", canadaFixture}, + {"citm", citmFixture}, + {"twitter", twitterFixture}, +} + +// bunchFieldsFixture is an 871-key object for large-object benchmarks. +var bunchFieldsFixture = getFromFile("testdata/bunchFields.json") + +// --------------------------------------------------------------------------- +// Section 1: Parsing +// --------------------------------------------------------------------------- + +func BenchmarkSTParse(b *testing.B) { + for _, f := range fixtures { + b.Run(f.name, func(b *testing.B) { + benchmarkSTParse(b, f.data) + }) + } + b.Run("20mb", func(b *testing.B) { + benchmarkSTParse(b, huge20MbFixture) + }) +} + +func benchmarkSTParse(b *testing.B, data string) { + var p Parser + b.ReportAllocs() + b.SetBytes(int64(len(data))) + b.ResetTimer() + for b.Loop() { + v, err := p.Parse(data) + if err != nil { + b.Fatal(err) + } + sinkValue = v + } +} + +func BenchmarkSTParseArena(b *testing.B) { + for _, f := range fixtures { + b.Run(f.name, func(b *testing.B) { + benchmarkSTParseArena(b, f.data, 2*1024*1024) + }) + } + b.Run("20mb", func(b *testing.B) { + benchmarkSTParseArena(b, huge20MbFixture, 32*1024*1024) + }) +} + +func benchmarkSTParseArena(b *testing.B, data string, arenaSize int) { + var p Parser + a := arena.NewMonotonicArena(arena.WithMinBufferSize(arenaSize)) + b.ReportAllocs() + b.SetBytes(int64(len(data))) + b.ResetTimer() + for b.Loop() { + v, err := p.ParseWithArena(a, data) + if err != nil { + b.Fatal(err) + } + sinkValue = v + a.Reset() + } +} + +func BenchmarkSTParseBytes(b *testing.B) { + for _, name := range []string{"small", "medium", "large", "twitter"} { + var data string + for _, f := range fixtures { + if f.name == name { + data = f.data + break + } + } + b.Run(name, func(b *testing.B) { + var p Parser + bb := []byte(data) + b.ReportAllocs() + b.SetBytes(int64(len(bb))) + b.ResetTimer() + for b.Loop() { + v, err := p.ParseBytes(bb) + if err != nil { + b.Fatal(err) + } + sinkValue = v + } + }) + } +} + +func BenchmarkSTParseBytesArena(b *testing.B) { + for _, name := range []string{"small", "medium", "large", "twitter"} { + var data string + for _, f := range fixtures { + if f.name == name { + data = f.data + break + } + } + b.Run(name, func(b *testing.B) { + var p Parser + a := arena.NewMonotonicArena(arena.WithMinBufferSize(2 * 1024 * 1024)) + bb := []byte(data) + b.ReportAllocs() + b.SetBytes(int64(len(bb))) + b.ResetTimer() + for b.Loop() { + v, err := p.ParseBytesWithArena(a, bb) + if err != nil { + b.Fatal(err) + } + sinkValue = v + a.Reset() + } + }) + } +} + +func BenchmarkSTParseRawString(b *testing.B) { + cases := []struct { + name string + s string // includes opening quote already stripped + }{ + {"empty", `"`}, + {"short", `hello"`}, + {"medium", `abcdefghijklmnopqrstuvwxyz012345678901234567890123"`}, + {"with_escape", `hello\"world\\nfoo"`}, + {"unicode", `\u0048\u0065\u006C\u006C\u006F"`}, + } + for _, tc := range cases { + b.Run(tc.name, func(b *testing.B) { + b.ReportAllocs() + b.SetBytes(int64(len(tc.s))) + b.ResetTimer() + for b.Loop() { + rs, _, err := parseRawString(tc.s) + if err != nil { + b.Fatal(err) + } + sinkString = rs + } + }) + } +} + +func BenchmarkSTParseRawNumber(b *testing.B) { + cases := []struct { + name string + s string + }{ + {"int", "12345,"}, + {"float", "123.456,"}, + {"exp", "123.456e+78,"}, + {"negative", "-12345.6789,"}, + } + for _, tc := range cases { + b.Run(tc.name, func(b *testing.B) { + b.ReportAllocs() + b.SetBytes(int64(len(tc.s))) + b.ResetTimer() + for b.Loop() { + rn, _, err := parseRawNumber(tc.s) + if err != nil { + b.Fatal(err) + } + sinkString = rn + } + }) + } +} + +func BenchmarkSTParseRawKey(b *testing.B) { + cases := []struct { + name string + s string // after the opening quote + }{ + {"simple", `username"`}, + {"long", strings.Repeat("a", 100) + `"`}, + {"with_escape", `user\"name"`}, + } + for _, tc := range cases { + b.Run(tc.name, func(b *testing.B) { + b.ReportAllocs() + b.SetBytes(int64(len(tc.s))) + b.ResetTimer() + for b.Loop() { + k, _, err := parseRawKey(tc.s) + if err != nil { + b.Fatal(err) + } + sinkString = k + } + }) + } +} + +func BenchmarkSTSkipWS(b *testing.B) { + cases := []struct { + name string + s string + }{ + {"none", `{"key": 1}`}, + {"short", ` {"key": 1}`}, + {"long", strings.Repeat(" ", 256) + `{"key": 1}`}, + } + for _, tc := range cases { + b.Run(tc.name, func(b *testing.B) { + b.ReportAllocs() + b.SetBytes(int64(len(tc.s))) + b.ResetTimer() + for b.Loop() { + sinkString = skipWS(tc.s) + } + }) + } +} + +func BenchmarkSTUnescapeStringBestEffort(b *testing.B) { + cases := []struct { + name string + s string + }{ + {"no_escape", "hello world plain text"}, + {"simple_escape", `hello\nworld\ttab`}, + {"unicode_escape", `\u0048\u0065\u006C\u006C\u006F`}, + {"surrogate_pair", `\uD83D\uDE00 smile`}, + } + for _, tc := range cases { + b.Run("heap/"+tc.name, func(b *testing.B) { + b.ReportAllocs() + b.SetBytes(int64(len(tc.s))) + b.ResetTimer() + for b.Loop() { + sinkString = unescapeStringBestEffort(nil, tc.s) + } + }) + b.Run("arena/"+tc.name, func(b *testing.B) { + a := arena.NewMonotonicArena(arena.WithMinBufferSize(4096)) + b.ReportAllocs() + b.SetBytes(int64(len(tc.s))) + b.ResetTimer() + for b.Loop() { + sinkString = unescapeStringBestEffort(a, tc.s) + a.Reset() + } + }) + } +} + +func BenchmarkSTEscapeString(b *testing.B) { + cases := []struct { + name string + s string + }{ + {"no_special", "hello world plain text no special chars here"}, + {"with_quotes", `hello "world" said "foo"`}, + {"with_control", "hello\nworld\ttab\rreturn"}, + {"mixed", "he said \"hi\"\nbye\\done"}, + } + for _, tc := range cases { + b.Run(tc.name, func(b *testing.B) { + dst := make([]byte, 0, 256) + b.ReportAllocs() + b.SetBytes(int64(len(tc.s))) + b.ResetTimer() + for b.Loop() { + sinkBytes = escapeString(dst[:0], tc.s) + } + }) + } +} + +func BenchmarkSTHasSpecialChars(b *testing.B) { + plain100 := strings.Repeat("abcdefghij", 10) + cases := []struct { + name string + s string + }{ + {"none_short", "hello"}, + {"none_long", plain100}, + {"early_hit", `ab"cd`}, + {"late_hit", plain100[:99] + `"`}, + } + for _, tc := range cases { + b.Run(tc.name, func(b *testing.B) { + b.ReportAllocs() + b.SetBytes(int64(len(tc.s))) + b.ResetTimer() + for b.Loop() { + sinkBool = hasSpecialChars(tc.s) + } + }) + } +} + +func BenchmarkSTParseInlineSmall(b *testing.B) { + cases := []struct { + name string + s string + }{ + {"null", "null"}, + {"true", "true"}, + {"false", "false"}, + {"number", "12345"}, + {"string", `"hello world"`}, + {"empty_object", "{}"}, + {"empty_array", "[]"}, + {"small_object", `{"a":1,"b":"x","c":true}`}, + } + for _, tc := range cases { + b.Run(tc.name, func(b *testing.B) { + var p Parser + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + v, err := p.Parse(tc.s) + if err != nil { + b.Fatal(err) + } + sinkValue = v + } + }) + } +} + +func BenchmarkSTParseDeepNesting(b *testing.B) { + // Build 100-level nested object: {"a":{"a":{"a":...1...}}} + depth := 100 + s := "1" + for range depth { + s = `{"a":` + s + `}` + } + var p Parser + b.ReportAllocs() + b.SetBytes(int64(len(s))) + b.ResetTimer() + for b.Loop() { + v, err := p.Parse(s) + if err != nil { + b.Fatal(err) + } + sinkValue = v + } +} + +// --------------------------------------------------------------------------- +// Section 2: Value Access +// --------------------------------------------------------------------------- + +func BenchmarkSTValueGet(b *testing.B) { + var p Parser + v, err := p.Parse(twitterFixture) + if err != nil { + b.Fatal(err) + } + + cases := []struct { + name string + keys []string + }{ + {"shallow", []string{"statuses"}}, + {"deep_2", []string{"statuses", "0"}}, + {"deep_3", []string{"statuses", "0", "user"}}, + {"miss", []string{"nonexistent"}}, + } + for _, tc := range cases { + b.Run(tc.name, func(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + sinkValue = v.Get(tc.keys...) + } + }) + } +} + +func BenchmarkSTObjectGet(b *testing.B) { + var p Parser + + // Small object: twitter statuses[0] (~25 keys) + tv, err := p.Parse(twitterFixture) + if err != nil { + b.Fatal(err) + } + smallObj := tv.Get("statuses", "0").GetObject() + + // Large object: bunchFields (871 keys) + bv, err := p.Parse(bunchFieldsFixture) + if err != nil { + b.Fatal(err) + } + largeObj := bv.GetObject() + + b.Run("small_hit", func(b *testing.B) { + b.ReportAllocs() + for b.Loop() { + sinkValue = smallObj.Get("user") + } + }) + b.Run("small_miss", func(b *testing.B) { + b.ReportAllocs() + for b.Loop() { + sinkValue = smallObj.Get("nonexistent_key_xyz") + } + }) + b.Run("large_first", func(b *testing.B) { + b.ReportAllocs() + for b.Loop() { + sinkValue = largeObj.Get("4") + } + }) + b.Run("large_last", func(b *testing.B) { + // Get the last key + var lastKey string + largeObj.Visit(func(key []byte, v *Value) { + lastKey = string(key) + }) + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + sinkValue = largeObj.Get(lastKey) + } + }) + b.Run("large_miss", func(b *testing.B) { + b.ReportAllocs() + for b.Loop() { + sinkValue = largeObj.Get("nonexistent_key_xyz") + } + }) +} + +func BenchmarkSTObjectVisit(b *testing.B) { + var p Parser + + tv, err := p.Parse(twitterFixture) + if err != nil { + b.Fatal(err) + } + obj := tv.Get("statuses", "0").GetObject() + + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + obj.Visit(func(key []byte, v *Value) { + sinkBytes = key + }) + } +} + +func BenchmarkSTGetStringBytes(b *testing.B) { + var p Parser + v, _ := p.Parse(`{"name":"hello world"}`) + sv := v.Get("name") + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + sb, err := sv.StringBytes() + if err != nil { + b.Fatal(err) + } + sinkBytes = sb + } +} + +func BenchmarkSTGetInt(b *testing.B) { + var p Parser + v, _ := p.Parse(`{"count":12345}`) + nv := v.Get("count") + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + n, err := nv.Int() + if err != nil { + b.Fatal(err) + } + sinkInt = n + } +} + +func BenchmarkSTGetFloat64(b *testing.B) { + var p Parser + v, _ := p.Parse(`{"price":123.456}`) + fv := v.Get("price") + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + f, err := fv.Float64() + if err != nil { + b.Fatal(err) + } + sinkFloat64 = f + } +} + +func BenchmarkSTGetBool(b *testing.B) { + var p Parser + v, _ := p.Parse(`{"active":true}`) + bv := v.Get("active") + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + sinkBool = bv.GetBool() + } +} + +// --------------------------------------------------------------------------- +// Section 3: Merging +// --------------------------------------------------------------------------- + +func BenchmarkSTMergeValuesObject(b *testing.B) { + b.Run("small", func(b *testing.B) { + a := arena.NewMonotonicArena(arena.WithMinBufferSize(4096)) + var p Parser + aVal, err := p.ParseWithArena(a, `{"x":1,"y":2,"z":3}`) + if err != nil { + b.Fatal(err) + } + bVal, err := p.ParseWithArena(a, `{"y":20,"w":4}`) + if err != nil { + b.Fatal(err) + } + aBytes := []byte(aVal.String()) + bBytes := []byte(bVal.String()) + a.Reset() + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + av, err := p.ParseBytesWithArena(a, aBytes) + if err != nil { + b.Fatal(err) + } + bv, err := p.ParseBytesWithArena(a, bBytes) + if err != nil { + b.Fatal(err) + } + v, _, err := MergeValues(a, av, bv) + if err != nil { + b.Fatal(err) + } + sinkValue = v + a.Reset() + } + }) + b.Run("medium", func(b *testing.B) { + // Build two 10-key objects with 3 overlapping keys + a := arena.NewMonotonicArena(arena.WithMinBufferSize(4096)) + var p Parser + aJSON := `{"a":1,"b":2,"c":3,"d":4,"e":5,"f":6,"g":7,"h":8,"i":9,"j":10}` + bJSON := `{"h":80,"i":90,"j":100,"k":11,"l":12}` + aBytes := []byte(aJSON) + bBytes := []byte(bJSON) + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + av, err := p.ParseBytesWithArena(a, aBytes) + if err != nil { + b.Fatal(err) + } + bv, err := p.ParseBytesWithArena(a, bBytes) + if err != nil { + b.Fatal(err) + } + v, _, err := MergeValues(a, av, bv) + if err != nil { + b.Fatal(err) + } + sinkValue = v + a.Reset() + } + }) + b.Run("large", func(b *testing.B) { + // Merge two copies of twitter statuses[0] (realistic large object merge) + a := arena.NewMonotonicArena(arena.WithMinBufferSize(2 * 1024 * 1024)) + var p Parser + tv, _ := p.Parse(twitterFixture) + obj := tv.Get("statuses", "0") + objJSON := obj.String() + objBytes := []byte(objJSON) + b.ReportAllocs() + b.SetBytes(int64(len(objBytes) * 2)) + b.ResetTimer() + for b.Loop() { + av, err := p.ParseBytesWithArena(a, objBytes) + if err != nil { + b.Fatal(err) + } + bv, err := p.ParseBytesWithArena(a, objBytes) + if err != nil { + b.Fatal(err) + } + v, _, err := MergeValues(a, av, bv) + if err != nil { + b.Fatal(err) + } + sinkValue = v + a.Reset() + } + }) +} + +func BenchmarkSTMergeValuesArray(b *testing.B) { + a := arena.NewMonotonicArena(arena.WithMinBufferSize(4096)) + var p Parser + aBytes := []byte(`[1,2,3,4,5,6,7,8,9,10]`) + bBytes := []byte(`[11,12,13,14,15,16,17,18,19,20]`) + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + av, _ := p.ParseBytesWithArena(a, aBytes) + bv, _ := p.ParseBytesWithArena(a, bBytes) + v, _, err := MergeValues(a, av, bv) + if err != nil { + b.Fatal(err) + } + sinkValue = v + a.Reset() + } +} + +func BenchmarkSTMergeValuesScalar(b *testing.B) { + b.Run("string", func(b *testing.B) { + var p Parser + aVal, _ := p.Parse(`"hello"`) + bVal, _ := p.Parse(`"world"`) + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + v, _, err := MergeValues(nil, aVal, bVal) + if err != nil { + b.Fatal(err) + } + sinkValue = v + } + }) + b.Run("number", func(b *testing.B) { + var p Parser + aVal, _ := p.Parse(`123`) + bVal, _ := p.Parse(`456`) + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + v, _, err := MergeValues(nil, aVal, bVal) + if err != nil { + b.Fatal(err) + } + sinkValue = v + } + }) + b.Run("bool", func(b *testing.B) { + var p Parser + aVal, _ := p.Parse(`true`) + bVal, _ := p.Parse(`false`) + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + v, _, err := MergeValues(nil, aVal, bVal) + if err != nil { + b.Fatal(err) + } + sinkValue = v + } + }) +} + +func BenchmarkSTMergeValuesWithPath(b *testing.B) { + b.Run("depth_1", func(b *testing.B) { + a := arena.NewMonotonicArena(arena.WithMinBufferSize(4096)) + var p Parser + aBytes := []byte(`{"data":"old_value"}`) + bBytes := []byte(`"new_value"`) + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + av, _ := p.ParseBytesWithArena(a, aBytes) + bv, _ := p.ParseBytesWithArena(a, bBytes) + v, _, err := MergeValuesWithPath(a, av, bv, "data") + if err != nil { + b.Fatal(err) + } + sinkValue = v + a.Reset() + } + }) + b.Run("depth_3", func(b *testing.B) { + a := arena.NewMonotonicArena(arena.WithMinBufferSize(4096)) + var p Parser + aBytes := []byte(`{"data":{"user":{"name":"old"}}}`) + bBytes := []byte(`"new_name"`) + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + av, _ := p.ParseBytesWithArena(a, aBytes) + bv, _ := p.ParseBytesWithArena(a, bBytes) + v, _, err := MergeValuesWithPath(a, av, bv, "data", "user", "name") + if err != nil { + b.Fatal(err) + } + sinkValue = v + a.Reset() + } + }) +} + +func BenchmarkSTMergeValuesNested(b *testing.B) { + // 5 levels of nesting with overlapping keys at each level + a := arena.NewMonotonicArena(arena.WithMinBufferSize(4096)) + var p Parser + aBytes := []byte(`{"l1":{"l2":{"l3":{"l4":{"l5":"a_val"},"x":1},"y":2},"z":3},"w":4}`) + bBytes := []byte(`{"l1":{"l2":{"l3":{"l4":{"l5":"b_val"},"x":10},"y":20},"z":30},"w":40}`) + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + av, _ := p.ParseBytesWithArena(a, aBytes) + bv, _ := p.ParseBytesWithArena(a, bBytes) + v, _, err := MergeValues(a, av, bv) + if err != nil { + b.Fatal(err) + } + sinkValue = v + a.Reset() + } +} + +// --------------------------------------------------------------------------- +// Section 4: Value Creation +// --------------------------------------------------------------------------- + +func BenchmarkSTValueCreationHeap(b *testing.B) { + b.Run("string", func(b *testing.B) { + b.ReportAllocs() + for b.Loop() { + sinkValue = StringValue(nil, "hello world") + } + }) + b.Run("int", func(b *testing.B) { + b.ReportAllocs() + for b.Loop() { + sinkValue = IntValue(nil, 12345) + } + }) + b.Run("float", func(b *testing.B) { + b.ReportAllocs() + for b.Loop() { + sinkValue = FloatValue(nil, 123.456) + } + }) + b.Run("true", func(b *testing.B) { + b.ReportAllocs() + for b.Loop() { + sinkValue = TrueValue(nil) + } + }) + b.Run("false", func(b *testing.B) { + b.ReportAllocs() + for b.Loop() { + sinkValue = FalseValue(nil) + } + }) + b.Run("object", func(b *testing.B) { + b.ReportAllocs() + for b.Loop() { + sinkValue = ObjectValue(nil) + } + }) + b.Run("array", func(b *testing.B) { + b.ReportAllocs() + for b.Loop() { + sinkValue = ArrayValue(nil) + } + }) +} + +func BenchmarkSTValueCreationArena(b *testing.B) { + a := arena.NewMonotonicArena(arena.WithMinBufferSize(1024 * 1024)) + b.Run("string", func(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + sinkValue = StringValue(a, "hello world") + a.Reset() + } + }) + b.Run("int", func(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + sinkValue = IntValue(a, 12345) + a.Reset() + } + }) + b.Run("float", func(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + sinkValue = FloatValue(a, 123.456) + a.Reset() + } + }) + b.Run("true", func(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + sinkValue = TrueValue(a) + a.Reset() + } + }) + b.Run("false", func(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + sinkValue = FalseValue(a) + a.Reset() + } + }) + b.Run("object", func(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + sinkValue = ObjectValue(a) + a.Reset() + } + }) + b.Run("array", func(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + sinkValue = ArrayValue(a) + a.Reset() + } + }) +} + +// --------------------------------------------------------------------------- +// Section 5: Mutation +// --------------------------------------------------------------------------- + +func BenchmarkSTObjectSet(b *testing.B) { + b.Run("new_key", func(b *testing.B) { + a := arena.NewMonotonicArena(arena.WithMinBufferSize(4096)) + var p Parser + base := []byte(`{"a":1,"b":2,"c":3}`) + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + v, _ := p.ParseBytesWithArena(a, base) + v.Set(a, "d", IntValue(a, 4)) + sinkValue = v + a.Reset() + } + }) + b.Run("existing_key", func(b *testing.B) { + a := arena.NewMonotonicArena(arena.WithMinBufferSize(4096)) + var p Parser + base := []byte(`{"a":1,"b":2,"c":3}`) + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + v, _ := p.ParseBytesWithArena(a, base) + v.Set(a, "b", IntValue(a, 20)) + sinkValue = v + a.Reset() + } + }) +} + +func BenchmarkSTObjectDel(b *testing.B) { + b.Run("first_key", func(b *testing.B) { + var p Parser + base := `{"a":1,"b":2,"c":3,"d":4,"e":5}` + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + v, _ := p.Parse(base) + v.Del("a") + sinkValue = v + } + }) + b.Run("last_key", func(b *testing.B) { + var p Parser + base := `{"a":1,"b":2,"c":3,"d":4,"e":5}` + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + v, _ := p.Parse(base) + v.Del("e") + sinkValue = v + } + }) + b.Run("miss", func(b *testing.B) { + var p Parser + base := `{"a":1,"b":2,"c":3,"d":4,"e":5}` + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + v, _ := p.Parse(base) + v.Del("nonexistent") + sinkValue = v + } + }) +} + +func BenchmarkSTSetArrayItem(b *testing.B) { + b.Run("replace", func(b *testing.B) { + a := arena.NewMonotonicArena(arena.WithMinBufferSize(4096)) + var p Parser + base := []byte(`[1,2,3,4,5]`) + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + v, _ := p.ParseBytesWithArena(a, base) + v.SetArrayItem(a, 2, IntValue(a, 30)) + sinkValue = v + a.Reset() + } + }) + b.Run("append", func(b *testing.B) { + a := arena.NewMonotonicArena(arena.WithMinBufferSize(4096)) + var p Parser + base := []byte(`[1,2,3,4,5]`) + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + v, _ := p.ParseBytesWithArena(a, base) + v.SetArrayItem(a, 5, IntValue(a, 6)) + sinkValue = v + a.Reset() + } + }) +} + +func BenchmarkSTAppendToArray(b *testing.B) { + a := arena.NewMonotonicArena(arena.WithMinBufferSize(4096)) + var p Parser + base := []byte(`[1,2,3,4,5,6,7,8,9,10]`) + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + v, _ := p.ParseBytesWithArena(a, base) + AppendToArray(a, v, IntValue(a, 11)) + sinkValue = v + a.Reset() + } +} + +// --------------------------------------------------------------------------- +// Section 6: Serialization +// --------------------------------------------------------------------------- + +func BenchmarkSTMarshalTo(b *testing.B) { + var p Parser + for _, f := range fixtures { + b.Run(f.name, func(b *testing.B) { + v, err := p.Parse(f.data) + if err != nil { + b.Fatal(err) + } + dst := make([]byte, 0, len(f.data)) + b.ReportAllocs() + b.SetBytes(int64(len(f.data))) + b.ResetTimer() + for b.Loop() { + sinkBytes = v.MarshalTo(dst[:0]) + } + }) + } + b.Run("20mb", func(b *testing.B) { + v, err := p.Parse(huge20MbFixture) + if err != nil { + b.Fatal(err) + } + dst := make([]byte, 0, len(huge20MbFixture)) + b.ReportAllocs() + b.SetBytes(int64(len(huge20MbFixture))) + b.ResetTimer() + for b.Loop() { + sinkBytes = v.MarshalTo(dst[:0]) + } + }) +} + +func BenchmarkSTMarshalToArena(b *testing.B) { + for _, f := range fixtures { + b.Run(f.name, func(b *testing.B) { + var p Parser + a := arena.NewMonotonicArena(arena.WithMinBufferSize(2 * 1024 * 1024)) + dst := make([]byte, 0, len(f.data)) + b.ReportAllocs() + b.SetBytes(int64(len(f.data))) + b.ResetTimer() + for b.Loop() { + v, err := p.ParseWithArena(a, f.data) + if err != nil { + b.Fatal(err) + } + sinkBytes = v.MarshalTo(dst[:0]) + a.Reset() + } + }) + } +} + +// --------------------------------------------------------------------------- +// Section 7: Utilities +// --------------------------------------------------------------------------- + +func BenchmarkSTDeepCopy(b *testing.B) { + subsets := []struct { + name string + data string + }{ + {"small", smallFixture}, + {"medium", mediumFixture}, + {"twitter", twitterFixture}, + } + for _, s := range subsets { + b.Run(s.name, func(b *testing.B) { + var p Parser + v, err := p.Parse(s.data) + if err != nil { + b.Fatal(err) + } + a := arena.NewMonotonicArena(arena.WithMinBufferSize(2 * 1024 * 1024)) + b.ReportAllocs() + b.SetBytes(int64(len(s.data))) + b.ResetTimer() + for b.Loop() { + sinkValue = DeepCopy(a, v) + a.Reset() + } + }) + } +} + +func BenchmarkSTDeduplicateObjectKeys(b *testing.B) { + b.Run("small", func(b *testing.B) { + var p Parser + data := `{"a":1,"b":2,"a":3,"c":4,"b":5}` + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + v, _ := p.Parse(data) + DeduplicateObjectKeysRecursively(v) + sinkValue = v + } + }) + b.Run("large", func(b *testing.B) { + // Use bunchFields fixture (871 keys, all unique — tests the scan cost) + var p Parser + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + v, _ := p.Parse(bunchFieldsFixture) + DeduplicateObjectKeysRecursively(v) + sinkValue = v + } + }) +} + +func BenchmarkSTSetValue(b *testing.B) { + b.Run("existing_path", func(b *testing.B) { + a := arena.NewMonotonicArena(arena.WithMinBufferSize(4096)) + var p Parser + base := []byte(`{"data":{"user":{"name":"old"}}}`) + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + v, _ := p.ParseBytesWithArena(a, base) + SetValue(a, v, StringValue(a, "new"), "data", "user", "name") + sinkValue = v + a.Reset() + } + }) + b.Run("new_path_depth2", func(b *testing.B) { + a := arena.NewMonotonicArena(arena.WithMinBufferSize(4096)) + var p Parser + base := []byte(`{"data":{}}`) + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + v, _ := p.ParseBytesWithArena(a, base) + SetValue(a, v, StringValue(a, "value"), "data", "newkey") + sinkValue = v + a.Reset() + } + }) + b.Run("new_path_depth4", func(b *testing.B) { + a := arena.NewMonotonicArena(arena.WithMinBufferSize(4096)) + var p Parser + base := []byte(`{}`) + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + v, _ := p.ParseBytesWithArena(a, base) + SetValue(a, v, StringValue(a, "value"), "a", "b", "c", "d") + sinkValue = v + a.Reset() + } + }) +} + +func BenchmarkSTSetNull(b *testing.B) { + b.Run("depth_1", func(b *testing.B) { + a := arena.NewMonotonicArena(arena.WithMinBufferSize(4096)) + var p Parser + base := []byte(`{"data":"value"}`) + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + v, _ := p.ParseBytesWithArena(a, base) + SetNull(a, v, "data") + sinkValue = v + a.Reset() + } + }) + b.Run("depth_3", func(b *testing.B) { + a := arena.NewMonotonicArena(arena.WithMinBufferSize(4096)) + var p Parser + base := []byte(`{"a":{"b":{"c":"value"}}}`) + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + v, _ := p.ParseBytesWithArena(a, base) + SetNull(a, v, "a", "b", "c") + sinkValue = v + a.Reset() + } + }) +} + +// --------------------------------------------------------------------------- +// Section 8: Validation +// --------------------------------------------------------------------------- + +func BenchmarkSTValidate(b *testing.B) { + for _, f := range fixtures { + b.Run(f.name, func(b *testing.B) { + b.ReportAllocs() + b.SetBytes(int64(len(f.data))) + b.ResetTimer() + for b.Loop() { + sinkErr = Validate(f.data) + } + }) + } +} + +// --------------------------------------------------------------------------- +// Section 9: Scanner +// --------------------------------------------------------------------------- + +func BenchmarkSTScanner(b *testing.B) { + // Concatenate 100 copies of small.json separated by whitespace + parts := make([]string, 100) + for i := range parts { + parts[i] = smallFixture + } + data := strings.Join(parts, "\n") + + var sc Scanner + b.ReportAllocs() + b.SetBytes(int64(len(data))) + b.ResetTimer() + for b.Loop() { + sc.Init(data) + for sc.Next() { + sinkValue = sc.Value() + } + if sc.Error() != nil { + b.Fatal(sc.Error()) + } + } +} + +// --------------------------------------------------------------------------- +// Section 10: End-to-End +// --------------------------------------------------------------------------- + +func BenchmarkSTParseAndGetMultiple(b *testing.B) { + b.Run("heap", func(b *testing.B) { + var p Parser + b.ReportAllocs() + b.SetBytes(int64(len(twitterFixture))) + b.ResetTimer() + for b.Loop() { + v, err := p.Parse(twitterFixture) + if err != nil { + b.Fatal(err) + } + _ = v.Get("statuses") + _ = v.Get("statuses", "0", "user") + _ = v.Get("search_metadata") + _ = v.Get("statuses", "0", "text") + sinkValue = v + } + }) + b.Run("arena", func(b *testing.B) { + var p Parser + a := arena.NewMonotonicArena(arena.WithMinBufferSize(2 * 1024 * 1024)) + b.ReportAllocs() + b.SetBytes(int64(len(twitterFixture))) + b.ResetTimer() + for b.Loop() { + v, err := p.ParseWithArena(a, twitterFixture) + if err != nil { + b.Fatal(err) + } + _ = v.Get("statuses") + _ = v.Get("statuses", "0", "user") + _ = v.Get("search_metadata") + _ = v.Get("statuses", "0", "text") + sinkValue = v + a.Reset() + } + }) +} + +func BenchmarkSTParseModifyMarshal(b *testing.B) { + var p Parser + data := smallFixture + dst := make([]byte, 0, len(data)*2) + b.ReportAllocs() + b.SetBytes(int64(len(data))) + b.ResetTimer() + for b.Loop() { + v, err := p.Parse(data) + if err != nil { + b.Fatal(err) + } + v.Set(nil, "newkey", StringValue(nil, "newval")) + sinkBytes = v.MarshalTo(dst[:0]) + } +} diff --git a/chartable.go b/chartable.go new file mode 100644 index 0000000..0763cfe --- /dev/null +++ b/chartable.go @@ -0,0 +1,65 @@ +package astjson + +const ( + charWS uint8 = 1 << 0 // whitespace: space, tab, newline, CR + charNumChar uint8 = 1 << 1 // valid in number: digits, ., -, +, e, E + charEscape uint8 = 1 << 2 // needs escaping in JSON string: ", \, < 0x20 +) + +// charFlags is a 256-byte lookup table for character classification. +// Replaces multi-branch comparisons in hot loops with a single table lookup. +var charFlags [256]uint8 + +// hexDigit maps ASCII bytes to their hex digit value (0-15). +// Invalid hex chars are mapped to 0xFF. +var hexDigit [256]uint8 + +func init() { + // Whitespace + charFlags[0x20] |= charWS // space + charFlags[0x09] |= charWS // tab + charFlags[0x0A] |= charWS // newline + charFlags[0x0D] |= charWS // carriage return + + // Number characters + for c := byte('0'); c <= '9'; c++ { + charFlags[c] |= charNumChar + } + charFlags['.'] |= charNumChar + charFlags['-'] |= charNumChar + charFlags['+'] |= charNumChar + charFlags['e'] |= charNumChar + charFlags['E'] |= charNumChar + + // Characters that need escaping in JSON strings + charFlags['"'] |= charEscape + charFlags['\\'] |= charEscape + for c := range 0x20 { + charFlags[c] |= charEscape + } + + // Hex digit lookup (0xFF = invalid) + for i := range hexDigit { + hexDigit[i] = 0xFF + } + for c := byte('0'); c <= '9'; c++ { + hexDigit[c] = c - '0' + } + for c := byte('a'); c <= 'f'; c++ { + hexDigit[c] = c - 'a' + 10 + } + for c := byte('A'); c <= 'F'; c++ { + hexDigit[c] = c - 'A' + 10 + } +} + +// parseHex4 parses 4 hex digits from s into a uint16. +// Returns the value and true on success, or 0 and false on invalid input. +func parseHex4(s string) (uint16, bool) { + a, b, c, d := hexDigit[s[0]], hexDigit[s[1]], hexDigit[s[2]], hexDigit[s[3]] + // Valid hex digits are 0..15 (low nibble); invalid sentinel 0xFF has high bits set. + if (a|b|c|d)&0xF0 != 0 { + return 0, false + } + return uint16(a)<<12 | uint16(b)<<8 | uint16(c)<<4 | uint16(d), true +} diff --git a/fastfloat/parse.go b/fastfloat/parse.go index 9a562a2..a91f61f 100644 --- a/fastfloat/parse.go +++ b/fastfloat/parse.go @@ -1,12 +1,18 @@ package fastfloat import ( - "fmt" + "errors" "math" "strconv" "strings" ) +var ( + errParseUint64Empty = errors.New("cannot parse uint64 from empty string") + errParseInt64Empty = errors.New("cannot parse int64 from empty string") + errParseFloat64Empty = errors.New("cannot parse float64 from empty string") +) + // ParseUint64BestEffort parses uint64 number s. // // It is equivalent to strconv.ParseUint(s, 10, 64), but is faster. @@ -54,7 +60,7 @@ func ParseUint64BestEffort(s string) uint64 { // See also ParseUint64BestEffort. func ParseUint64(s string) (uint64, error) { if len(s) == 0 { - return 0, fmt.Errorf("cannot parse uint64 from empty string") + return 0, errParseUint64Empty } i := uint(0) d := uint64(0) @@ -77,11 +83,11 @@ func ParseUint64(s string) (uint64, error) { break } if i <= j { - return 0, fmt.Errorf("cannot parse uint64 from %q", s) + return 0, errors.New("cannot parse uint64 from " + strconv.Quote(s)) } if i < uint(len(s)) { // Unparsed tail left. - return 0, fmt.Errorf("unparsed tail left after parsing uint64 from %q: %q", s, s[i:]) + return 0, errors.New("unparsed tail left after parsing uint64 from " + strconv.Quote(s) + ": " + strconv.Quote(s[i:])) } return d, nil } @@ -144,14 +150,14 @@ func ParseInt64BestEffort(s string) int64 { // See also ParseInt64BestEffort. func ParseInt64(s string) (int64, error) { if len(s) == 0 { - return 0, fmt.Errorf("cannot parse int64 from empty string") + return 0, errParseInt64Empty } i := uint(0) minus := s[0] == '-' if minus { i++ if i >= uint(len(s)) { - return 0, fmt.Errorf("cannot parse int64 from %q", s) + return 0, errors.New("cannot parse int64 from " + strconv.Quote(s)) } } @@ -175,11 +181,11 @@ func ParseInt64(s string) (int64, error) { break } if i <= j { - return 0, fmt.Errorf("cannot parse int64 from %q", s) + return 0, errors.New("cannot parse int64 from " + strconv.Quote(s)) } if i < uint(len(s)) { // Unparsed tail left. - return 0, fmt.Errorf("unparsed tail left after parsing int64 form %q: %q", s, s[i:]) + return 0, errors.New("unparsed tail left after parsing int64 from " + strconv.Quote(s) + ": " + strconv.Quote(s[i:])) } if minus { d = -d @@ -355,21 +361,21 @@ func ParseBestEffort(s string) float64 { // See also ParseBestEffort. func Parse(s string) (float64, error) { if len(s) == 0 { - return 0, fmt.Errorf("cannot parse float64 from empty string") + return 0, errParseFloat64Empty } i := uint(0) minus := s[0] == '-' if minus { i++ if i >= uint(len(s)) { - return 0, fmt.Errorf("cannot parse float64 from %q", s) + return 0, errors.New("cannot parse float64 from " + strconv.Quote(s)) } } // the integer part might be elided to remain compliant // with https://go.dev/ref/spec#Floating-point_literals if s[i] == '.' && (i+1 >= uint(len(s)) || s[i+1] < '0' || s[i+1] > '9') { - return 0, fmt.Errorf("missing integer and fractional part in %q", s) + return 0, errors.New("missing integer and fractional part in " + strconv.Quote(s)) } d := uint64(0) @@ -405,7 +411,7 @@ func Parse(s string) (float64, error) { if strings.EqualFold(ss, "nan") { return nan, nil } - return 0, fmt.Errorf("unparsed tail left after parsing float64 from %q: %q", s, ss) + return 0, errors.New("unparsed tail left after parsing float64 from " + strconv.Quote(s) + ": " + strconv.Quote(ss)) } f := float64(d) if i >= uint(len(s)) { @@ -433,7 +439,7 @@ func Parse(s string) (float64, error) { // The mantissa is out of range. Fall back to standard parsing. f, err := strconv.ParseFloat(s, 64) if err != nil && !math.IsInf(f, 0) { - return 0, fmt.Errorf("cannot parse mantissa in %q: %s", s, err) + return 0, errors.New("cannot parse mantissa in " + strconv.Quote(s) + ": " + err.Error()) } return f, nil } @@ -455,14 +461,14 @@ func Parse(s string) (float64, error) { // Parse exponent part. i++ if i >= uint(len(s)) { - return 0, fmt.Errorf("cannot parse exponent in %q", s) + return 0, errors.New("cannot parse exponent in " + strconv.Quote(s)) } expMinus := false if s[i] == '+' || s[i] == '-' { expMinus = s[i] == '-' i++ if i >= uint(len(s)) { - return 0, fmt.Errorf("cannot parse exponent in %q", s) + return 0, errors.New("cannot parse exponent in " + strconv.Quote(s)) } } exp := int16(0) @@ -476,7 +482,7 @@ func Parse(s string) (float64, error) { // Fall back to standard parsing. f, err := strconv.ParseFloat(s, 64) if err != nil && !math.IsInf(f, 0) { - return 0, fmt.Errorf("cannot parse exponent in %q: %s", s, err) + return 0, errors.New("cannot parse exponent in " + strconv.Quote(s) + ": " + err.Error()) } return f, nil } @@ -485,7 +491,7 @@ func Parse(s string) (float64, error) { break } if i <= j { - return 0, fmt.Errorf("cannot parse exponent in %q", s) + return 0, errors.New("cannot parse exponent in " + strconv.Quote(s)) } if expMinus { exp = -exp @@ -498,7 +504,7 @@ func Parse(s string) (float64, error) { return f, nil } } - return 0, fmt.Errorf("cannot parse float64 from %q", s) + return 0, errors.New("cannot parse float64 from " + strconv.Quote(s)) } var inf = math.Inf(1) diff --git a/mergevalues.go b/mergevalues.go index fbfb5fd..eb830bd 100644 --- a/mergevalues.go +++ b/mergevalues.go @@ -1,7 +1,6 @@ package astjson import ( - "bytes" "errors" "github.com/wundergraph/go-arena" @@ -34,18 +33,24 @@ func MergeValues(ar arena.Arena, a, b *Value) (v *Value, changed bool, err error if b == nil { return a, false, nil } - if b.Type() == TypeNull && a.Type() == TypeObject { + at, bt := a.t, b.t + if bt == TypeNull && at == TypeObject { // we assume that null was returned in an error case for resolving a nested object field // as we've got an object on the left side, we don't override the whole object with null // instead, we keep the left object and discard the null on the right side return a, false, nil } - aBool, bBool := a.Type() == TypeTrue || a.Type() == TypeFalse, b.Type() == TypeTrue || b.Type() == TypeFalse - booleans := aBool && bBool - if a.Type() != b.Type() && !booleans { - return nil, false, ErrMergeDifferentTypes + if at != bt { + // Only compute boolean compatibility when types actually differ + aBool := at == TypeTrue || at == TypeFalse + bBool := bt == TypeTrue || bt == TypeFalse + if !aBool || !bBool { + return nil, false, ErrMergeDifferentTypes + } + // Types differ but both are booleans — b replaces a + return b, true, nil } - switch a.Type() { + switch at { case TypeObject: ao, _ := a.Object() bo, _ := b.Object() @@ -94,29 +99,25 @@ func MergeValues(ar arena.Arena, a, b *Value) (v *Value, changed bool, err error } } return a, false, nil - case TypeFalse: - if b.Type() == TypeTrue { - return b, true, nil - } - return a, false, nil - case TypeTrue: - if b.Type() == TypeFalse { - return b, true, nil - } - return a, false, nil - case TypeNull: + case TypeTrue, TypeFalse, TypeNull: + // at == bt guaranteed by the check above, no change needed return a, false, nil case TypeNumber: - af, _ := a.Float64() - bf, _ := b.Float64() - if af != bf { + // Fast path: if raw number strings are identical, values are equal. + // This avoids expensive float64 parsing in the common case. + if a.s == b.s { + return a, false, nil + } + // Slow path: parse as float64. If either parse fails or values differ, + // treat as changed (b replaces a). + af, aErr := a.Float64() + bf, bErr := b.Float64() + if aErr != nil || bErr != nil || af != bf { return b, true, nil } return a, false, nil case TypeString: - as, _ := a.StringBytes() - bs, _ := b.StringBytes() - if !bytes.Equal(as, bs) { + if a.s != b.s { return b, true, nil } return a, false, nil diff --git a/mergevalues_test.go b/mergevalues_test.go index 088fa96..73c96dc 100644 --- a/mergevalues_test.go +++ b/mergevalues_test.go @@ -111,6 +111,15 @@ func TestMergeValues(t *testing.T) { out := merged.MarshalTo(nil) require.Equal(t, `1.1`, string(out)) }) + t.Run("floats equal different representation", func(t *testing.T) { + t.Parallel() + a, b := MustParse(`1.0`), MustParse(`1.00`) + merged, changed, err := MergeValues(nil, a, b) + require.NoError(t, err) + require.Equal(t, false, changed) + out := merged.MarshalTo(nil) + require.Equal(t, `1.0`, string(out)) + }) t.Run("arrays", func(t *testing.T) { t.Parallel() a, b := MustParse(`[1,2]`), MustParse(`[3,4]`) diff --git a/parser.go b/parser.go index bd7f1cf..11a7e0d 100644 --- a/parser.go +++ b/parser.go @@ -1,7 +1,7 @@ package astjson import ( - "fmt" + "errors" "strconv" "strings" "unicode/utf16" @@ -11,6 +11,87 @@ import ( "github.com/wundergraph/go-arena" ) +// Sentinel errors for static error messages. +// Using pre-allocated errors avoids fmt.Errorf allocations and removes the fmt +// import, which can improve inlining budgets for functions in this file. +var ( + errEmptyString = errors.New("cannot parse empty string") + errMaxDepth = errors.New("too big depth for the nested JSON; it exceeds 300") + errMissingClosingBracket = errors.New("missing ']'") + errMissingClosingBrace = errors.New("missing '}'") + errMissingCommaArray = errors.New("missing ',' after array value") + errMissingCommaObject = errors.New("missing ',' after object value") + errUnexpectedEndArray = errors.New("unexpected end of array") + errUnexpectedEndObject = errors.New("unexpected end of object") + errMissingOpenQuote = errors.New(`cannot find opening '"' for object key`) + errMissingColon = errors.New("missing ':' after object key") + errMissingClosingQuote = errors.New(`missing closing '"'`) +) + +// parseContext holds per-parse state including slab allocators that amortize +// arena allocation overhead by allocating Values and kvs in batches. +type parseContext struct { + a arena.Arena + vs valueSlab + ks kvSlab +} + +// valueSlab allocates Values in batches to amortize arena overhead. +// Starts with a small batch and doubles up to maxSlabSize. +type valueSlab struct { + values []Value + pos int +} + +const ( + minSlabSize = 8 + maxSlabSize = 64 +) + +func (s *valueSlab) get(a arena.Arena) *Value { + if a == nil { + return new(Value) + } + if s.pos >= len(s.values) { + size := len(s.values) * 2 + if size < minSlabSize { + size = minSlabSize + } else if size > maxSlabSize { + size = maxSlabSize + } + s.values = arena.AllocateSlice[Value](a, size, size) + s.pos = 0 + } + v := &s.values[s.pos] + s.pos++ + return v +} + +// kvSlab allocates kv structs in batches to amortize arena overhead. +type kvSlab struct { + kvs []kv + pos int +} + +func (s *kvSlab) get(a arena.Arena) *kv { + if a == nil { + return new(kv) + } + if s.pos >= len(s.kvs) { + size := len(s.kvs) * 2 + if size < minSlabSize { + size = minSlabSize + } else if size > maxSlabSize { + size = maxSlabSize + } + s.kvs = arena.AllocateSlice[kv](a, size, size) + s.pos = 0 + } + k := &s.kvs[s.pos] + s.pos++ + return k +} + // ParseError wraps a JSON parsing error. type ParseError struct { Err error @@ -106,15 +187,16 @@ func (p *Parser) ParseBytesWithArena(a arena.Arena, b []byte) (*Value, error) { } func (p *Parser) parse(a arena.Arena, s string) (*Value, error) { + ctx := parseContext{a: a} s = skipWS(s) - v, tail, err := parseValue(a, s, 0) + v, tail, err := parseValue(&ctx, s, 0) if err != nil { - return nil, NewParseError(fmt.Errorf("cannot parse JSON: %s; unparsed tail: %q", err, startEndString(tail))) + return nil, NewParseError(errors.New("cannot parse JSON: " + err.Error() + "; unparsed tail: " + strconv.Quote(startEndString(tail)))) } tail = skipWS(tail) if len(tail) > 0 { - return nil, NewParseError(fmt.Errorf("unexpected tail: %q", startEndString(tail))) + return nil, NewParseError(errors.New("unexpected tail: " + strconv.Quote(startEndString(tail)))) } return v, nil } @@ -131,15 +213,9 @@ func skipWSSlow(s string) string { if len(s) == 0 { return s } - - // Branch prediction optimization: check most common whitespace first - // Space (0x20) is most common, then newline, tab, carriage return for i := 0; i < len(s); i++ { - c := s[i] - if c != 0x20 { // Most common whitespace - if c != 0x0A && c != 0x09 && c != 0x0D { - return s[i:] - } + if charFlags[s[i]]&charWS == 0 { + return s[i:] } } return "" @@ -157,13 +233,13 @@ type kv struct { // MaxDepth is the maximum depth for nested JSON. const MaxDepth = 300 -func parseValue(a arena.Arena, s string, depth int) (*Value, string, error) { +func parseValue(ctx *parseContext, s string, depth int) (*Value, string, error) { if len(s) == 0 { - return nil, s, fmt.Errorf("cannot parse empty string") + return nil, s, errEmptyString } depth++ if depth > MaxDepth { - return nil, s, fmt.Errorf("too big depth for the nested JSON; it exceeds %d", MaxDepth) + return nil, s, errMaxDepth } // Branch prediction optimization: order by frequency @@ -173,99 +249,98 @@ func parseValue(a arena.Arena, s string, depth int) (*Value, string, error) { // String - most common in JSON ss, tail, err := parseRawString(s[1:]) if err != nil { - return nil, tail, fmt.Errorf("cannot parse string: %s", err) + return nil, tail, errors.New("cannot parse string: " + err.Error()) } - v := arena.Allocate[Value](a) + v := ctx.vs.get(ctx.a) v.t = TypeString - v.s = unescapeStringBestEffort(a, ss) + v.s = unescapeStringBestEffort(ctx.a, ss) return v, tail, nil case '{': // Object - very common - v, tail, err := parseObject(a, s[1:], depth) + v, tail, err := parseObject(ctx, s[1:], depth) if err != nil { - return nil, tail, fmt.Errorf("cannot parse object: %s", err) + return nil, tail, errors.New("cannot parse object: " + err.Error()) } return v, tail, nil case '[': // Array - common - v, tail, err := parseArray(a, s[1:], depth) + v, tail, err := parseArray(ctx, s[1:], depth) if err != nil { - return nil, tail, fmt.Errorf("cannot parse array: %s", err) + return nil, tail, errors.New("cannot parse array: " + err.Error()) } return v, tail, nil case 't': // true literal - less common if len(s) < len("true") || s[:len("true")] != "true" { - return nil, s, fmt.Errorf("unexpected value found: %q", s) + return nil, s, errors.New("unexpected value found: " + strconv.Quote(s)) } return valueTrue, s[len("true"):], nil case 'f': // false literal - less common if len(s) < len("false") || s[:len("false")] != "false" { - return nil, s, fmt.Errorf("unexpected value found: %q", s) + return nil, s, errors.New("unexpected value found: " + strconv.Quote(s)) } return valueFalse, s[len("false"):], nil case 'n': // null literal - less common if len(s) < len("null") || s[:len("null")] != "null" { // Try parsing NaN - if len(s) >= 3 && strings.EqualFold(s[:3], "nan") { - v := arena.Allocate[Value](a) + if len(s) >= 3 && (s[0]|0x20) == 'n' && (s[1]|0x20) == 'a' && (s[2]|0x20) == 'n' { + v := ctx.vs.get(ctx.a) v.t = TypeNumber v.s = s[:3] return v, s[3:], nil } - return nil, s, fmt.Errorf("unexpected value found: %q", s) + return nil, s, errors.New("unexpected value found: " + strconv.Quote(s)) } return valueNull, s[len("null"):], nil default: // Number - very common, but handled last due to complex parsing ns, tail, err := parseRawNumber(s) if err != nil { - return nil, tail, fmt.Errorf("cannot parse number: %s", err) + return nil, tail, errors.New("cannot parse number: " + err.Error()) } - v := arena.Allocate[Value](a) + v := ctx.vs.get(ctx.a) v.t = TypeNumber v.s = ns return v, tail, nil } } -func parseArray(a arena.Arena, s string, depth int) (*Value, string, error) { +func parseArray(ctx *parseContext, s string, depth int) (*Value, string, error) { s = skipWS(s) if len(s) == 0 { - return nil, s, fmt.Errorf("missing ']'") + return nil, s, errMissingClosingBracket } if s[0] == ']' { - v := arena.Allocate[Value](a) + v := ctx.vs.get(ctx.a) v.t = TypeArray v.a = v.a[:0] return v, s[1:], nil } - arr := arena.Allocate[Value](a) + arr := ctx.vs.get(ctx.a) arr.t = TypeArray - arr.a = arr.a[:0] + arr.a = arena.AllocateSlice[*Value](ctx.a, 0, 8) for { var v *Value var err error s = skipWS(s) - v, s, err = parseValue(a, s, depth) + v, s, err = parseValue(ctx, s, depth) if err != nil { - return nil, s, fmt.Errorf("cannot parse array value: %s", err) + return nil, s, errors.New("cannot parse array value: " + err.Error()) } - if arr.a == nil { - arr.a = arena.AllocateSlice[*Value](a, 1, 1) - arr.a[0] = v + if len(arr.a) < cap(arr.a) { + arr.a = append(arr.a, v) } else { - arr.a = arena.SliceAppend(a, arr.a, v) + arr.a = arena.SliceAppend(ctx.a, arr.a, v) } s = skipWS(s) if len(s) == 0 { - return nil, s, fmt.Errorf("unexpected end of array") + return nil, s, errUnexpectedEndArray } if s[0] == ',' { s = s[1:] @@ -275,56 +350,63 @@ func parseArray(a arena.Arena, s string, depth int) (*Value, string, error) { s = s[1:] return arr, s, nil } - return nil, s, fmt.Errorf("missing ',' after array value") + return nil, s, errMissingCommaArray } } -func parseObject(a arena.Arena, s string, depth int) (*Value, string, error) { +func parseObject(ctx *parseContext, s string, depth int) (*Value, string, error) { s = skipWS(s) if len(s) == 0 { - return nil, s, fmt.Errorf("missing '}'") + return nil, s, errMissingClosingBrace } if s[0] == '}' { - v := arena.Allocate[Value](a) + v := ctx.vs.get(ctx.a) v.t = TypeObject v.o.reset() return v, s[1:], nil } - o := arena.Allocate[Value](a) + o := ctx.vs.get(ctx.a) o.t = TypeObject - o.o.reset() + o.o.kvs = arena.AllocateSlice[*kv](ctx.a, 0, 8) for { var err error - kv := o.o.getKV(a) + // Inline kv allocation from slab instead of calling getKV + // (getKV is kept unchanged for Object.Set in update.go) + newKV := ctx.ks.get(ctx.a) + if len(o.o.kvs) < cap(o.o.kvs) { + o.o.kvs = append(o.o.kvs, newKV) + } else { + o.o.kvs = arena.SliceAppend(ctx.a, o.o.kvs, newKV) + } // Parse key. s = skipWS(s) if len(s) == 0 || s[0] != '"' { - return nil, s, fmt.Errorf(`cannot find opening '"" for object key`) + return nil, s, errMissingOpenQuote } - kv.k, s, err = parseRawKey(s[1:]) + newKV.k, s, err = parseRawKey(s[1:]) if err != nil { - return nil, s, fmt.Errorf("cannot parse object key: %s", err) + return nil, s, errors.New("cannot parse object key: " + err.Error()) } - kv.k = unescapeStringBestEffort(a, kv.k) - kv.keyUnescaped = true + newKV.k = unescapeStringBestEffort(ctx.a, newKV.k) + newKV.keyUnescaped = true s = skipWS(s) if len(s) == 0 || s[0] != ':' { - return nil, s, fmt.Errorf("missing ':' after object key") + return nil, s, errMissingColon } s = s[1:] // Parse value s = skipWS(s) - kv.v, s, err = parseValue(a, s, depth) + newKV.v, s, err = parseValue(ctx, s, depth) if err != nil { - return nil, s, fmt.Errorf("cannot parse object value: %s", err) + return nil, s, errors.New("cannot parse object value: " + err.Error()) } s = skipWS(s) if len(s) == 0 { - return nil, s, fmt.Errorf("unexpected end of object") + return nil, s, errUnexpectedEndObject } if s[0] == ',' { s = s[1:] @@ -333,7 +415,7 @@ func parseObject(a arena.Arena, s string, depth int) (*Value, string, error) { if s[0] == '}' { return o, s[1:], nil } - return nil, s, fmt.Errorf("missing ',' after object value") + return nil, s, errMissingCommaObject } } @@ -351,15 +433,8 @@ func escapeString(dst []byte, s string) []byte { } func hasSpecialChars(s string) bool { - // Branch prediction optimization: check most common cases first for i := 0; i < len(s); i++ { - c := s[i] - // Most common special chars first - if c == '"' || c == '\\' { - return true - } - // Control characters - less common - if c < 0x20 { + if charFlags[s[i]]&charEscape != 0 { return true } } @@ -411,12 +486,10 @@ func unescapeStringBestEffort(a arena.Arena, s string) string { return s } - // Estimate capacity to avoid frequent reallocations - estimatedCap := len(s) + 4 - b := arena.AllocateSlice[byte](a, 0, estimatedCap) - - // Add the initial part before the first escape - b = arena.SliceAppend(a, b, []byte(s[:n])...) + // Pre-allocate buffer to len(s) — unescaped is always <= escaped length. + // Use direct indexing instead of per-character SliceAppend. + b := arena.AllocateSlice[byte](a, len(s), len(s)) + w := copy(b, s[:n]) s = s[n+1:] for len(s) > 0 { @@ -424,95 +497,104 @@ func unescapeStringBestEffort(a arena.Arena, s string) string { s = s[1:] switch ch { case '"': - b = arena.SliceAppend(a, b, '"') + b[w] = '"' + w++ case '\\': - b = arena.SliceAppend(a, b, '\\') + b[w] = '\\' + w++ case '/': - b = arena.SliceAppend(a, b, '/') + b[w] = '/' + w++ case 'b': - b = arena.SliceAppend(a, b, '\b') + b[w] = '\b' + w++ case 'f': - b = arena.SliceAppend(a, b, '\f') + b[w] = '\f' + w++ case 'n': - b = arena.SliceAppend(a, b, '\n') + b[w] = '\n' + w++ case 'r': - b = arena.SliceAppend(a, b, '\r') + b[w] = '\r' + w++ case 't': - b = arena.SliceAppend(a, b, '\t') + b[w] = '\t' + w++ case 'u': if len(s) < 4 { - // Too short escape sequence. Just store it unchanged. - b = arena.SliceAppend(a, b, []byte("\\u")...) + b[w] = '\\' + b[w+1] = 'u' + w += 2 break } xs := s[:4] - x, err := strconv.ParseUint(xs, 16, 16) - if err != nil { - // Invalid escape sequence. Just store it unchanged. - b = arena.SliceAppend(a, b, []byte("\\u")...) + x, ok := parseHex4(xs) + if !ok { + b[w] = '\\' + b[w+1] = 'u' + w += 2 break } s = s[4:] if !utf16.IsSurrogate(rune(x)) { - var buf [utf8.UTFMax]byte - n := utf8.EncodeRune(buf[:], rune(x)) - b = arena.SliceAppend(a, b, buf[:n]...) + w += utf8.EncodeRune(b[w:], rune(x)) break } // Surrogate. // See https://en.wikipedia.org/wiki/Universal_Character_Set_characters#Surrogates if len(s) < 6 || s[0] != '\\' || s[1] != 'u' { - b = arena.SliceAppend(a, b, []byte("\\u")...) - b = arena.SliceAppend(a, b, []byte(xs)...) + b[w] = '\\' + b[w+1] = 'u' + w += 2 + w += copy(b[w:], xs) break } - x1, err := strconv.ParseUint(s[2:6], 16, 16) - if err != nil { - b = arena.SliceAppend(a, b, []byte("\\u")...) - b = arena.SliceAppend(a, b, []byte(xs)...) + x1, ok := parseHex4(s[2:6]) + if !ok { + b[w] = '\\' + b[w+1] = 'u' + w += 2 + w += copy(b[w:], xs) break } r := utf16.DecodeRune(rune(x), rune(x1)) - var buf [utf8.UTFMax]byte - rn := utf8.EncodeRune(buf[:], r) - b = arena.SliceAppend(a, b, buf[:rn]...) + w += utf8.EncodeRune(b[w:], r) s = s[6:] default: - // Unknown escape sequence. Just store it unchanged. - b = arena.SliceAppend(a, b, '\\', ch) + b[w] = '\\' + b[w+1] = ch + w += 2 } n = strings.IndexByte(s, '\\') if n < 0 { - b = arena.SliceAppend(a, b, []byte(s)...) + w += copy(b[w:], s) break } - b = arena.SliceAppend(a, b, []byte(s[:n])...) + w += copy(b[w:], s[:n]) s = s[n+1:] } - return b2s(b) + return b2s(b[:w]) } // parseRawKey is similar to parseRawString, but is optimized // for small-sized keys without escape sequences. func parseRawKey(s string) (string, string, error) { - for i := 0; i < len(s); i++ { - if s[i] == '"' { - // Fast path. - return s[:i], s[i+1:], nil - } - if s[i] == '\\' { - // Slow path. - return parseRawString(s) - } + n := strings.IndexByte(s, '"') + if n < 0 { + return s, "", errMissingClosingQuote } - return s, "", fmt.Errorf(`missing closing '"'`) + // Check if the key portion contains an escape sequence. + if strings.IndexByte(s[:n], '\\') >= 0 { + return parseRawString(s) + } + return s[:n], s[n+1:], nil } func parseRawString(s string) (string, string, error) { n := strings.IndexByte(s, '"') if n < 0 { - return s, "", fmt.Errorf(`missing closing '"'`) + return s, "", errMissingClosingQuote } if n == 0 || s[n-1] != '\\' { // Fast path. No escaped ". @@ -533,7 +615,7 @@ func parseRawString(s string) (string, string, error) { n = strings.IndexByte(s, '"') if n < 0 { - return ss, "", fmt.Errorf(`missing closing '"'`) + return ss, "", errMissingClosingQuote } if n == 0 || s[n-1] != '\\' { return ss[:len(ss)-len(s)+n], s[n+1:], nil @@ -546,18 +628,18 @@ func parseRawNumber(s string) (string, string, error) { // Find the end of the number. for i := 0; i < len(s); i++ { - ch := s[i] - if (ch >= '0' && ch <= '9') || ch == '.' || ch == '-' || ch == 'e' || ch == 'E' || ch == '+' { + if charFlags[s[i]]&charNumChar != 0 { continue } if i == 0 || i == 1 && (s[0] == '-' || s[0] == '+') { if len(s[i:]) >= 3 { xs := s[i : i+3] - if strings.EqualFold(xs, "inf") || strings.EqualFold(xs, "nan") { + if ((xs[0]|0x20) == 'i' && (xs[1]|0x20) == 'n' && (xs[2]|0x20) == 'f') || + ((xs[0]|0x20) == 'n' && (xs[1]|0x20) == 'a' && (xs[2]|0x20) == 'n') { return s[:i+3], s[i+3:], nil } } - return "", s, fmt.Errorf("unexpected char: %q", s[:1]) + return "", s, errors.New("unexpected char: " + strconv.Quote(s[:1])) } ns := s[:i] s = s[i:] @@ -573,12 +655,13 @@ func parseRawNumber(s string) (string, string, error) { // // Cache-friendly layout: hot data first type Object struct { - kvs []*kv // HOT: frequently accessed - 24 bytes - // Total: 24 bytes - compact and cache-friendly + kvs []*kv // HOT: frequently accessed + kvIndex map[string]int // lazily-built reverse index for O(1) lookups on objects with >16 keys; invalidated on Del/Set } func (o *Object) reset() { o.kvs = o.kvs[:0] + o.kvIndex = nil } // MarshalTo appends marshaled o to dst and returns the result. @@ -615,9 +698,14 @@ func (o *Object) String() string { func (o *Object) getKV(a arena.Arena) *kv { if o.kvs == nil { - o.kvs = arena.AllocateSlice[*kv](a, 0, 1) + o.kvs = arena.AllocateSlice[*kv](a, 0, 4) + } + newKV := arena.Allocate[kv](a) + if len(o.kvs) < cap(o.kvs) { + o.kvs = append(o.kvs, newKV) + } else { + o.kvs = arena.SliceAppend(a, o.kvs, newKV) } - o.kvs = arena.SliceAppend(a, o.kvs, arena.Allocate[kv](a)) return o.kvs[len(o.kvs)-1] } @@ -642,6 +730,22 @@ func (o *Object) Get(key string) *Value { if o == nil { return nil } + // For large objects, use a lazily-built hash map for O(1) lookup. + if len(o.kvs) > 16 { + if o.kvIndex == nil { + o.kvIndex = make(map[string]int, len(o.kvs)) + for i, kv := range o.kvs { + // Store first occurrence to match linear scan semantics. + if _, exists := o.kvIndex[kv.k]; !exists { + o.kvIndex[kv.k] = i + } + } + } + if i, ok := o.kvIndex[key]; ok { + return o.kvs[i].v + } + return nil + } // Keys are always pre-unescaped during parsing and Object.Set, // so direct comparison is sufficient. for _, kv := range o.kvs { @@ -675,11 +779,10 @@ func (o *Object) Visit(f func(key []byte, v *Value)) { // // Cache-friendly layout: hot data first, compact structure type Value struct { - t Type // HOT: accessed on every operation - 8 bytes + t Type // HOT: accessed on every operation - 1 byte s string // HOT: frequently accessed for strings/numbers - 16 bytes a []*Value // HOT: frequently accessed for arrays - 24 bytes - o Object // COLD: less frequently accessed - 25 bytes - // Total: 73 bytes - compact and cache-friendly + o Object // COLD: less frequently accessed - 24 bytes } // MarshalTo appends marshaled v to dst and returns the result. @@ -708,7 +811,7 @@ func (v *Value) MarshalTo(dst []byte) []byte { case TypeNull: return append(dst, "null"...) default: - panic(fmt.Errorf("BUG: unexpected Value type: %d", v.t)) + panic("BUG: unexpected Value type: " + strconv.Itoa(int(v.t))) } } @@ -727,7 +830,7 @@ func (v *Value) String() string { } // Type represents JSON type. -type Type int +type Type uint8 const ( // TypeNull is JSON null. @@ -773,7 +876,7 @@ func (t Type) String() string { // typeRawString is skipped intentionally, // since it shouldn't be visible to user. default: - panic(fmt.Errorf("BUG: unknown Value type: %d", t)) + panic("BUG: unknown Value type: " + strconv.Itoa(int(t))) } } @@ -953,7 +1056,7 @@ func (v *Value) GetBool(keys ...string) bool { // Use GetObject if you don't need error handling. func (v *Value) Object() (*Object, error) { if v.t != TypeObject { - return nil, fmt.Errorf("value doesn't contain object; it contains %s", v.Type()) + return nil, errors.New("value doesn't contain object; it contains " + v.Type().String()) } return &v.o, nil } @@ -965,7 +1068,7 @@ func (v *Value) Object() (*Object, error) { // Use GetArray if you don't need error handling. func (v *Value) Array() ([]*Value, error) { if v.t != TypeArray { - return nil, fmt.Errorf("value doesn't contain array; it contains %s", v.Type()) + return nil, errors.New("value doesn't contain array; it contains " + v.Type().String()) } return v.a, nil } @@ -977,7 +1080,7 @@ func (v *Value) Array() ([]*Value, error) { // Use GetStringBytes if you don't need error handling. func (v *Value) StringBytes() ([]byte, error) { if v.Type() != TypeString { - return nil, fmt.Errorf("value doesn't contain string; it contains %s", v.Type()) + return nil, errors.New("value doesn't contain string; it contains " + v.Type().String()) } return s2b(v.s), nil } @@ -987,7 +1090,7 @@ func (v *Value) StringBytes() ([]byte, error) { // Use GetFloat64 if you don't need error handling. func (v *Value) Float64() (float64, error) { if v.Type() != TypeNumber { - return 0, fmt.Errorf("value doesn't contain number; it contains %s", v.Type()) + return 0, errors.New("value doesn't contain number; it contains " + v.Type().String()) } return fastfloat.Parse(v.s) } @@ -997,7 +1100,7 @@ func (v *Value) Float64() (float64, error) { // Use GetInt if you don't need error handling. func (v *Value) Int() (int, error) { if v.Type() != TypeNumber { - return 0, fmt.Errorf("value doesn't contain number; it contains %s", v.Type()) + return 0, errors.New("value doesn't contain number; it contains " + v.Type().String()) } n, err := fastfloat.ParseInt64(v.s) if err != nil { @@ -1011,7 +1114,7 @@ func (v *Value) Int() (int, error) { // Use GetInt if you don't need error handling. func (v *Value) Uint() (uint, error) { if v.Type() != TypeNumber { - return 0, fmt.Errorf("value doesn't contain number; it contains %s", v.Type()) + return 0, errors.New("value doesn't contain number; it contains " + v.Type().String()) } n, err := fastfloat.ParseUint64(v.s) if err != nil { @@ -1025,7 +1128,7 @@ func (v *Value) Uint() (uint, error) { // Use GetInt64 if you don't need error handling. func (v *Value) Int64() (int64, error) { if v.Type() != TypeNumber { - return 0, fmt.Errorf("value doesn't contain number; it contains %s", v.Type()) + return 0, errors.New("value doesn't contain number; it contains " + v.Type().String()) } return fastfloat.ParseInt64(v.s) } @@ -1035,7 +1138,7 @@ func (v *Value) Int64() (int64, error) { // Use GetInt64 if you don't need error handling. func (v *Value) Uint64() (uint64, error) { if v.Type() != TypeNumber { - return 0, fmt.Errorf("value doesn't contain number; it contains %s", v.Type()) + return 0, errors.New("value doesn't contain number; it contains " + v.Type().String()) } return fastfloat.ParseUint64(v.s) } @@ -1050,7 +1153,7 @@ func (v *Value) Bool() (bool, error) { if v.t == TypeFalse { return false, nil } - return false, fmt.Errorf("value doesn't contain bool; it contains %s", v.Type()) + return false, errors.New("value doesn't contain bool; it contains " + v.Type().String()) } var ( diff --git a/parser_test.go b/parser_test.go index de67168..a1b2ebf 100644 --- a/parser_test.go +++ b/parser_test.go @@ -1672,7 +1672,7 @@ func TestObjectGetEdgeCases(t *testing.T) { // TestValueMarshalToEdgeCases tests edge cases in Value.MarshalTo func TestValueMarshalToEdgeCases(t *testing.T) { t.Run("unknown type", func(t *testing.T) { - v := &Value{t: Type(999)} // Invalid type + v := &Value{t: Type(255)} // Invalid type defer func() { if r := recover(); r == nil { t.Errorf("expected panic for unknown type") @@ -1685,7 +1685,7 @@ func TestValueMarshalToEdgeCases(t *testing.T) { // TestTypeStringEdgeCases tests edge cases in Type.String func TestTypeStringEdgeCases(t *testing.T) { t.Run("unknown type", func(t *testing.T) { - tp := Type(999) // Invalid type + tp := Type(255) // Invalid type defer func() { if r := recover(); r == nil { t.Errorf("expected panic for unknown type") diff --git a/scanner.go b/scanner.go index bd5474f..20d209e 100644 --- a/scanner.go +++ b/scanner.go @@ -61,7 +61,8 @@ func (sc *Scanner) Next() bool { return false } - v, tail, err := parseValue(nil, sc.s, 0) + ctx := parseContext{} // heap mode: a == nil + v, tail, err := parseValue(&ctx, sc.s, 0) if err != nil { sc.err = err return false diff --git a/update.go b/update.go index 30e04e3..19d891c 100644 --- a/update.go +++ b/update.go @@ -17,6 +17,7 @@ func (o *Object) Del(key string) { if kv.k == key { o.kvs = append(o.kvs[:i], o.kvs[i+1:]...) o.kvs[:len(o.kvs)+1][len(o.kvs)] = nil // clear hidden slot for GC + o.kvIndex = nil // invalidate index return } } @@ -75,6 +76,7 @@ func (o *Object) Set(a arena.Arena, key string, value *Value) { kv.k = arenaString(a, key) kv.v = value kv.keyUnescaped = true // New keys are already unescaped since they come from user input + o.kvIndex = nil // invalidate index } // Set sets (key, value) entry in the array or object v. diff --git a/validate.go b/validate.go index 5061062..0a5a0bf 100644 --- a/validate.go +++ b/validate.go @@ -1,22 +1,40 @@ package astjson import ( - "fmt" + "errors" "strconv" "strings" ) +var ( + errValEmptyString = errors.New("cannot parse empty string") + errValMissingBracket = errors.New("missing ']'") + errValMissingBrace = errors.New("missing '}'") + errValMissingCommaArr = errors.New("missing ',' after array value") + errValMissingCommaObj = errors.New("missing ',' after object value") + errValEndArray = errors.New("unexpected end of array") + errValEndObject = errors.New("unexpected end of object") + errValMissingOpenQuote = errors.New(`cannot find opening '"' for object key`) + errValMissingColon = errors.New("missing ':' after object key") + errValMissingCloseQuote = errors.New(`missing closing '"'`) + errValZeroLenNumber = errors.New("zero-length number") + errValMissingAfterMinus = errors.New("missing number after minus") + errValUnexpectedZero = errors.New("unexpected number starting from 0") + errValMissingFractional = errors.New("missing fractional part") + errValMissingExponent = errors.New("missing exponent part") +) + // Validate validates JSON s. func Validate(s string) error { s = skipWS(s) tail, err := validateValue(s) if err != nil { - return fmt.Errorf("cannot parse JSON: %s; unparsed tail: %q", err, startEndString(tail)) + return errors.New("cannot parse JSON: " + err.Error() + "; unparsed tail: " + strconv.Quote(startEndString(tail))) } tail = skipWS(tail) if len(tail) > 0 { - return fmt.Errorf("unexpected tail: %q", startEndString(tail)) + return errors.New("unexpected tail: " + strconv.Quote(startEndString(tail))) } return nil } @@ -28,58 +46,58 @@ func ValidateBytes(b []byte) error { func validateValue(s string) (string, error) { if len(s) == 0 { - return s, fmt.Errorf("cannot parse empty string") + return s, errValEmptyString } if s[0] == '{' { tail, err := validateObject(s[1:]) if err != nil { - return tail, fmt.Errorf("cannot parse object: %s", err) + return tail, errors.New("cannot parse object: " + err.Error()) } return tail, nil } if s[0] == '[' { tail, err := validateArray(s[1:]) if err != nil { - return tail, fmt.Errorf("cannot parse array: %s", err) + return tail, errors.New("cannot parse array: " + err.Error()) } return tail, nil } if s[0] == '"' { sv, tail, err := validateString(s[1:]) if err != nil { - return tail, fmt.Errorf("cannot parse string: %s", err) + return tail, errors.New("cannot parse string: " + err.Error()) } // Scan the string for control chars. for i := 0; i < len(sv); i++ { if sv[i] < 0x20 { - return tail, fmt.Errorf("string cannot contain control char 0x%02X", sv[i]) + return tail, errors.New("string cannot contain control char 0x" + strconv.FormatUint(uint64(sv[i]), 16)) } } return tail, nil } if s[0] == 't' { if len(s) < len("true") || s[:len("true")] != "true" { - return s, fmt.Errorf("unexpected value found: %q", s) + return s, errors.New("unexpected value found: " + strconv.Quote(s)) } return s[len("true"):], nil } if s[0] == 'f' { if len(s) < len("false") || s[:len("false")] != "false" { - return s, fmt.Errorf("unexpected value found: %q", s) + return s, errors.New("unexpected value found: " + strconv.Quote(s)) } return s[len("false"):], nil } if s[0] == 'n' { if len(s) < len("null") || s[:len("null")] != "null" { - return s, fmt.Errorf("unexpected value found: %q", s) + return s, errors.New("unexpected value found: " + strconv.Quote(s)) } return s[len("null"):], nil } tail, err := validateNumber(s) if err != nil { - return tail, fmt.Errorf("cannot parse number: %s", err) + return tail, errors.New("cannot parse number: " + err.Error()) } return tail, nil } @@ -87,7 +105,7 @@ func validateValue(s string) (string, error) { func validateArray(s string) (string, error) { s = skipWS(s) if len(s) == 0 { - return s, fmt.Errorf("missing ']'") + return s, errValMissingBracket } if s[0] == ']' { return s[1:], nil @@ -99,12 +117,12 @@ func validateArray(s string) (string, error) { s = skipWS(s) s, err = validateValue(s) if err != nil { - return s, fmt.Errorf("cannot parse array value: %s", err) + return s, errors.New("cannot parse array value: " + err.Error()) } s = skipWS(s) if len(s) == 0 { - return s, fmt.Errorf("unexpected end of array") + return s, errValEndArray } if s[0] == ',' { s = s[1:] @@ -114,14 +132,14 @@ func validateArray(s string) (string, error) { s = s[1:] return s, nil } - return s, fmt.Errorf("missing ',' after array value") + return s, errValMissingCommaArr } } func validateObject(s string) (string, error) { s = skipWS(s) if len(s) == 0 { - return s, fmt.Errorf("missing '}'") + return s, errValMissingBrace } if s[0] == '}' { return s[1:], nil @@ -133,23 +151,23 @@ func validateObject(s string) (string, error) { // Parse key. s = skipWS(s) if len(s) == 0 || s[0] != '"' { - return s, fmt.Errorf(`cannot find opening '"" for object key`) + return s, errValMissingOpenQuote } var key string key, s, err = validateKey(s[1:]) if err != nil { - return s, fmt.Errorf("cannot parse object key: %s", err) + return s, errors.New("cannot parse object key: " + err.Error()) } // Scan the key for control chars. for i := 0; i < len(key); i++ { if key[i] < 0x20 { - return s, fmt.Errorf("object key cannot contain control char 0x%02X", key[i]) + return s, errors.New("object key cannot contain control char 0x" + strconv.FormatUint(uint64(key[i]), 16)) } } s = skipWS(s) if len(s) == 0 || s[0] != ':' { - return s, fmt.Errorf("missing ':' after object key") + return s, errValMissingColon } s = s[1:] @@ -157,11 +175,11 @@ func validateObject(s string) (string, error) { s = skipWS(s) s, err = validateValue(s) if err != nil { - return s, fmt.Errorf("cannot parse object value: %s", err) + return s, errors.New("cannot parse object value: " + err.Error()) } s = skipWS(s) if len(s) == 0 { - return s, fmt.Errorf("unexpected end of object") + return s, errValEndObject } if s[0] == ',' { s = s[1:] @@ -170,7 +188,7 @@ func validateObject(s string) (string, error) { if s[0] == '}' { return s[1:], nil } - return s, fmt.Errorf("missing ',' after object value") + return s, errValMissingCommaObj } } @@ -187,7 +205,7 @@ func validateKey(s string) (string, string, error) { return validateString(s) } } - return "", s, fmt.Errorf(`missing closing '"'`) + return "", s, errValMissingCloseQuote } func validateString(s string) (string, string, error) { @@ -215,28 +233,28 @@ func validateString(s string) (string, string, error) { continue case 'u': if len(rs) < 4 { - return rs, tail, fmt.Errorf(`too short escape sequence: \u%s`, rs) + return rs, tail, errors.New(`too short escape sequence: \u` + rs) } xs := rs[:4] _, err := strconv.ParseUint(xs, 16, 16) if err != nil { - return rs, tail, fmt.Errorf(`invalid escape sequence \u%s: %s`, xs, err) + return rs, tail, errors.New(`invalid escape sequence \u` + xs + ": " + err.Error()) } rs = rs[4:] default: - return rs, tail, fmt.Errorf(`unknown escape sequence \%c`, ch) + return rs, tail, errors.New(`unknown escape sequence \` + string(ch)) } } } func validateNumber(s string) (string, error) { if len(s) == 0 { - return s, fmt.Errorf("zero-length number") + return s, errValZeroLenNumber } if s[0] == '-' { s = s[1:] if len(s) == 0 { - return s, fmt.Errorf("missing number after minus") + return s, errValMissingAfterMinus } } i := 0 @@ -247,10 +265,10 @@ func validateNumber(s string) (string, error) { i++ } if i <= 0 { - return s, fmt.Errorf("expecting 0..9 digit, got %c", s[0]) + return s, errors.New("expecting 0..9 digit, got " + string(s[0])) } if s[0] == '0' && i != 1 { - return s, fmt.Errorf("unexpected number starting from 0") + return s, errValUnexpectedZero } if i >= len(s) { return "", nil @@ -259,7 +277,7 @@ func validateNumber(s string) (string, error) { // Validate fractional part s = s[i+1:] if len(s) == 0 { - return s, fmt.Errorf("missing fractional part") + return s, errValMissingFractional } i = 0 for i < len(s) { @@ -269,7 +287,7 @@ func validateNumber(s string) (string, error) { i++ } if i == 0 { - return s, fmt.Errorf("expecting 0..9 digit in fractional part, got %c", s[0]) + return s, errors.New("expecting 0..9 digit in fractional part, got " + string(s[0])) } if i >= len(s) { return "", nil @@ -279,12 +297,12 @@ func validateNumber(s string) (string, error) { // Validate exponent part s = s[i+1:] if len(s) == 0 { - return s, fmt.Errorf("missing exponent part") + return s, errValMissingExponent } if s[0] == '-' || s[0] == '+' { s = s[1:] if len(s) == 0 { - return s, fmt.Errorf("missing exponent part") + return s, errValMissingExponent } } i = 0 @@ -295,7 +313,7 @@ func validateNumber(s string) (string, error) { i++ } if i == 0 { - return s, fmt.Errorf("expecting 0..9 digit in exponent part, got %c", s[0]) + return s, errors.New("expecting 0..9 digit in exponent part, got " + string(s[0])) } if i >= len(s) { return "", nil