databricks · stephenamar-db · Jan 10, 2025 · Jan 3, 2025 · Jan 3, 2025 · Jan 3, 2025
diff --git a/sjsonnet/src/sjsonnet/Evaluator.scala b/sjsonnet/src/sjsonnet/Evaluator.scala
@@ -579,7 +579,7 @@ class Evaluator(resolver: CachedResolver,
       newScope
     }
 
-    val builder = new java.util.LinkedHashMap[String, Val.Obj.Member]
+    val builder = Util.preSizedJavaLinkedHashMap[String, Val.Obj.Member](fields.length)
     fields.foreach {
       case Member.Field(offset, fieldName, plus, null, sep, rhs) =>
         val k = visitFieldName(fieldName, offset)
@@ -604,8 +604,14 @@ class Evaluator(resolver: CachedResolver,
           builder.put(k, v)
         }
       case _ =>
+        Error.fail("This case should never be hit", objPos)
     }
-    cachedObj = new Val.Obj(objPos, builder, false, if(asserts != null) assertions else null, sup)
+    val valueCache = if (sup == null) {
+      Val.Obj.getEmptyValueCacheForObjWithoutSuper(fields.length)
+    } else {
+      new java.util.HashMap[Any, Val]()
+    }
+    cachedObj = new Val.Obj(objPos, builder, false, if(asserts != null) assertions else null, sup, valueCache)
     cachedObj
   }
 
@@ -636,7 +642,12 @@ class Evaluator(resolver: CachedResolver,
           case _ =>
         }
       }
-      new Val.Obj(e.pos, builder, false, null, sup)
+      val valueCache = if (sup == null) {
+        Val.Obj.getEmptyValueCacheForObjWithoutSuper(builder.size())
+      } else {
+        new java.util.HashMap[Any, Val]()
+      }
+      new Val.Obj(e.pos, builder, false, null, sup, valueCache)
     }
 
     newSelf

diff --git a/sjsonnet/src/sjsonnet/Expr.scala b/sjsonnet/src/sjsonnet/Expr.scala
@@ -58,7 +58,7 @@ object Expr{
     }
     case class Field(pos: Position,
                      fieldName: FieldName,
-                     plus: Boolean,
+                     plus: Boolean, // see https://jsonnet.org/ref/language.html#nested-field-inheritance
                      args: Params,
                      sep: Visibility,
                      rhs: Expr) extends Member {
@@ -175,7 +175,7 @@ object Expr{
                        preLocals: Array[Bind],
                        key: Expr,
                        value: Expr,
-                       plus: Boolean,
+                       plus: Boolean, // see https://jsonnet.org/ref/language.html#nested-field-inheritance
                        postLocals: Array[Bind],
                        first: ForSpec,
                        rest: List[CompSpec]) extends ObjBody {

diff --git a/sjsonnet/src/sjsonnet/Std.scala b/sjsonnet/src/sjsonnet/Std.scala
@@ -381,7 +381,7 @@ class Std(private val additionalNativeFunctions: Map[String, Val.Builtin] = Map.
       val func = _func.asFunc
       val obj = _obj.asObj
       val allKeys = obj.allKeyNames
-      val m = new util.LinkedHashMap[String, Val.Obj.Member]()
+      val m = Util.preSizedJavaLinkedHashMap[String, Val.Obj.Member](allKeys.length)
       var i = 0
       while(i < allKeys.length) {
         val k = allKeys(i)
@@ -392,7 +392,8 @@ class Std(private val additionalNativeFunctions: Map[String, Val.Builtin] = Map.
         m.put(k, v)
         i += 1
       }
-      new Val.Obj(pos, m, false, null, null)
+      val valueCache = Val.Obj.getEmptyValueCacheForObjWithoutSuper(allKeys.length)
+      new Val.Obj(pos, m, false, null, null, valueCache)
     }
   }
 
@@ -922,39 +923,110 @@ class Std(private val additionalNativeFunctions: Map[String, Val.Builtin] = Map.
     builtin(Range),
     builtin("mergePatch", "target", "patch"){ (pos, ev, target: Val, patch: Val) =>
       val mergePosition = pos
-      def createMember(v: => Val) = new Val.Obj.Member(false, Visibility.Normal) {
+      def createLazyMember(v: => Val) = new Val.Obj.Member(false, Visibility.Normal) {
         def invoke(self: Val.Obj, sup: Val.Obj, fs: FileScope, ev: EvalScope): Val = v
       }
       def recPair(l: Val, r: Val): Val = (l, r) match{
         case (l: Val.Obj, r: Val.Obj) =>
-          val kvs = for {
-            k <- (l.visibleKeyNames ++ r.visibleKeyNames).distinct
-            lValue = if (l.containsVisibleKey(k)) Option(l.valueRaw(k, l, pos)(ev)) else None
-            rValue = if (r.containsVisibleKey(k)) Option(r.valueRaw(k, r, pos)(ev)) else None
-            if !rValue.exists(_.isInstanceOf[Val.Null])
-          } yield (lValue, rValue) match{
-            case (Some(lChild), None) => k -> createMember{lChild}
-            case (Some(lChild: Val.Obj), Some(rChild: Val.Obj)) => k -> createMember{recPair(lChild, rChild)}
-            case (_, Some(rChild)) => k -> createMember{recSingle(rChild)}
-            case (None, None) => Error.fail("std.mergePatch: This should never happen")
+          val keys: Array[String] = distinctKeys(l.visibleKeyNames, r.visibleKeyNames)
+          val kvs: Array[(String, Val.Obj.Member)] = new Array[(String, Val.Obj.Member)](keys.length)
+          var kvsIdx = 0
+          var i = 0
+          while (i < keys.length) {
+            val key = keys(i)
+            val lValue = if (l.containsVisibleKey(key)) l.valueRaw(key, l, pos)(ev) else null
+            val rValue = if (r.containsVisibleKey(key)) r.valueRaw(key, r, pos)(ev) else null
+            if (!rValue.isInstanceOf[Val.Null]) { // if we are not removing the key
+              if (lValue != null && rValue == null) {
+                // Preserve the LHS/target value:
+                kvs(kvsIdx) = (key, new Val.Obj.ConstMember(false, Visibility.Normal, lValue))
+              } else if (lValue.isInstanceOf[Val.Obj] && rValue.isInstanceOf[Val.Obj]) {
+                // Recursively merge objects:
+                kvs(kvsIdx) = (key, createLazyMember(recPair(lValue, rValue)))
+              } else if (rValue != null) {
+                // Use the RHS/patch value and recursively remove Null or hidden fields:
+                kvs(kvsIdx) = (key, createLazyMember(recSingle(rValue)))
+              } else {
+                Error.fail("std.mergePatch: This should never happen")
+              }
+              kvsIdx += 1
+            }
+            i += 1
           }
 
-          Val.Obj.mk(mergePosition, kvs:_*)
+          val trimmedKvs = if (kvsIdx == i) kvs else kvs.slice(0, kvsIdx)
+          Val.Obj.mk(mergePosition, trimmedKvs)
 
         case (_, _) => recSingle(r)
       }
       def recSingle(v: Val): Val  = v match{
         case obj: Val.Obj =>
-          val kvs = for{
-            k <- obj.visibleKeyNames
-            value = obj.value(k, pos, obj)(ev)
-            if !value.isInstanceOf[Val.Null]
-          } yield (k, createMember{recSingle(value)})
-
-          Val.Obj.mk(obj.pos, kvs:_*)
+          val keys: Array[String] = obj.visibleKeyNames
+          val kvs: Array[(String, Val.Obj.Member)] = new Array[(String, Val.Obj.Member)](keys.length)
+          var kvsIdx = 0
+          var i = 0
+          while (i < keys.length) {
+            val key = keys(i)
+            val value = obj.value(key, pos, obj)(ev)
+            if (!value.isInstanceOf[Val.Null]) {
+              kvs(kvsIdx) = (key, createLazyMember(recSingle(value)))
+              kvsIdx += 1
+            }
+            i += 1
+          }
+          val trimmedKvs = if (kvsIdx == i) kvs else kvs.slice(0, kvsIdx)
+          Val.Obj.mk(obj.pos, trimmedKvs)
 
         case _ => v
       }
+      def distinctKeys(lKeys: Array[String], rKeys: Array[String]): Array[String] = {
+        // Fast path for small RHS size (the common case when merging a small
+        // patch into a large target object), avoiding the cost of constructing
+        // and probing a hash set: instead, perform a nested loop where the LHS
+        // is scanned and matching RHS entries are marked as null to be skipped.
+        // Via local microbenchmarks simulating a "worst-case" (RHS keys all new),
+        // the threshold of `8` was empirically determined to be a good tradeoff
+        // between allocation + hashing costs vs. nested loop array scans.
+        if (rKeys.length <= 8) {
+          val rKeysCopy = new Array[String](rKeys.length)
+          rKeys.copyToArray(rKeysCopy)
+          var i = 0
+          var numNewRKeys = rKeysCopy.length
+          while (i < lKeys.length) {
+            val lKey = lKeys(i)
+            var j = 0
+            while (j < rKeysCopy.length) {
+              // This LHS key is in the RHS, so mark it to be skipped in output:
+              if (lKey == rKeysCopy(j)) {
+                rKeysCopy(j) = null
+                numNewRKeys -= 1
+              }
+              j += 1
+            }
+            i += 1
+          }
+          // Combine lKeys with non-null elements of rKeysCopy:
+          if (numNewRKeys == 0) {
+            lKeys
+          } else {
+            val outArray = new Array[String](lKeys.length + numNewRKeys)
+            System.arraycopy(lKeys, 0, outArray, 0, lKeys.length)
+            var outIdx = lKeys.length
+            var j = 0
+            while (j < rKeysCopy.length) {
+              if (rKeysCopy(j) != null) {
+                outArray(outIdx) = rKeysCopy(j)
+                outIdx += 1
+              }
+              j += 1
+            }
+            outArray
+          }
+        } else {
+          // Fallback: Use hash-based deduplication for large RHS arrays:
+          (lKeys ++ rKeys).distinct
+        }
+      }
       recPair(target, patch)
     },
     builtin("sqrt", "x"){ (pos, ev, x: Double) =>
@@ -1417,12 +1489,12 @@ class Std(private val additionalNativeFunctions: Map[String, Val.Builtin] = Map.
       }
       def rec(x: Val): Val = x match{
         case o: Val.Obj =>
-          val bindings = for{
+          val bindings: Array[(String, Val.Obj.Member)] = for{
             k <- o.visibleKeyNames
             v = rec(o.value(k, pos.fileScope.noOffsetPos)(ev))
             if filter(v)
           }yield (k, new Val.Obj.ConstMember(false, Visibility.Normal, v))
-          Val.Obj.mk(pos, bindings: _*)
+          Val.Obj.mk(pos, bindings)
         case a: Val.Arr =>
           new Val.Arr(pos, a.asStrictArray.map(rec).filter(filter).map(identity))
         case _ => x
@@ -1513,12 +1585,12 @@ class Std(private val additionalNativeFunctions: Map[String, Val.Builtin] = Map.
       )))
     },
     builtin("objectRemoveKey", "obj", "key") { (pos, ev, o: Val.Obj, key: String) =>
-      val bindings = for{
+      val bindings: Array[(String, Val.Obj.Member)] = for{
         k <- o.visibleKeyNames
         v = o.value(k, pos.fileScope.noOffsetPos)(ev)
         if k != key
       }yield (k, new Val.Obj.ConstMember(false, Visibility.Normal, v))
-      Val.Obj.mk(pos, bindings: _*)
+      Val.Obj.mk(pos, bindings)
     },
     builtin(MinArray),
     builtin(MaxArray),

diff --git a/sjsonnet/src/sjsonnet/Util.scala b/sjsonnet/src/sjsonnet/Util.scala
@@ -52,4 +52,19 @@ object Util{
       s"<$s>"
     }
   }
-}
+
+  def preSizedJavaLinkedHashMap[K, V](expectedElems: Int): java.util.LinkedHashMap[K, V] = {
+    // Set the initial capacity to the number of elems divided by the default load factor + 1
+    // this ensures that we can fill up the map to the total number of fields without resizing.
+    // From JavaDoc - true for both Scala & Java HashMaps
+    val hashMapDefaultLoadFactor = 0.75f
+    val capacity = (expectedElems / hashMapDefaultLoadFactor).toInt + 1
+    new java.util.LinkedHashMap[K, V](capacity, hashMapDefaultLoadFactor)
+  }
+
+  def preSizedJavaHashMap[K, V](expectedElems: Int): java.util.HashMap[K, V] = {
+    val hashMapDefaultLoadFactor = 0.75f
+    val capacity = (expectedElems / hashMapDefaultLoadFactor).toInt + 1
+    new java.util.HashMap[K, V](capacity, hashMapDefaultLoadFactor)
+  }
+}