Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions sjsonnet/src/sjsonnet/Evaluator.scala
Original file line number Diff line number Diff line change
Expand Up @@ -579,7 +579,7 @@ class Evaluator(resolver: CachedResolver,
newScope
}

val builder = new java.util.LinkedHashMap[String, Val.Obj.Member]
val builder = Util.preSizedJavaLinkedHashMap[String, Val.Obj.Member](fields.length)
fields.foreach {
case Member.Field(offset, fieldName, plus, null, sep, rhs) =>
val k = visitFieldName(fieldName, offset)
Expand All @@ -604,8 +604,14 @@ class Evaluator(resolver: CachedResolver,
builder.put(k, v)
}
case _ =>
Error.fail("This case should never be hit", objPos)
}
cachedObj = new Val.Obj(objPos, builder, false, if(asserts != null) assertions else null, sup)
val valueCache = if (sup == null) {
Val.Obj.getEmptyValueCacheForObjWithoutSuper(fields.length)
} else {
new java.util.HashMap[Any, Val]()
}
cachedObj = new Val.Obj(objPos, builder, false, if(asserts != null) assertions else null, sup, valueCache)
cachedObj
}

Expand Down Expand Up @@ -636,7 +642,12 @@ class Evaluator(resolver: CachedResolver,
case _ =>
}
}
new Val.Obj(e.pos, builder, false, null, sup)
val valueCache = if (sup == null) {
Val.Obj.getEmptyValueCacheForObjWithoutSuper(builder.size())
} else {
new java.util.HashMap[Any, Val]()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

better with a size

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See the PR description and the Scaladoc of getEmptyValueCacheForObjWithoutSuper for more details, but this is deliberate:

Because this object has a super object, computing an upper bound on the field count is not free. In this PR I'm aiming to be conservative and only down-size in cases where we have a cheap tight bound.

Also, an object might have a large number of fields but many of them might not end up being computed in a particular evaluation or materialization. In those cases, we want to avoid sizing the map larger than the previous default.

}
new Val.Obj(e.pos, builder, false, null, sup, valueCache)
}

newSelf
Expand Down
4 changes: 2 additions & 2 deletions sjsonnet/src/sjsonnet/Expr.scala
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ object Expr{
}
case class Field(pos: Position,
fieldName: FieldName,
plus: Boolean,
plus: Boolean, // see https://jsonnet.org/ref/language.html#nested-field-inheritance
args: Params,
sep: Visibility,
rhs: Expr) extends Member {
Expand Down Expand Up @@ -175,7 +175,7 @@ object Expr{
preLocals: Array[Bind],
key: Expr,
value: Expr,
plus: Boolean,
plus: Boolean, // see https://jsonnet.org/ref/language.html#nested-field-inheritance
postLocals: Array[Bind],
first: ForSpec,
rest: List[CompSpec]) extends ObjBody {
Expand Down
122 changes: 97 additions & 25 deletions sjsonnet/src/sjsonnet/Std.scala
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ class Std(private val additionalNativeFunctions: Map[String, Val.Builtin] = Map.
val func = _func.asFunc
val obj = _obj.asObj
val allKeys = obj.allKeyNames
val m = new util.LinkedHashMap[String, Val.Obj.Member]()
val m = Util.preSizedJavaLinkedHashMap[String, Val.Obj.Member](allKeys.length)
var i = 0
while(i < allKeys.length) {
val k = allKeys(i)
Expand All @@ -392,7 +392,8 @@ class Std(private val additionalNativeFunctions: Map[String, Val.Builtin] = Map.
m.put(k, v)
i += 1
}
new Val.Obj(pos, m, false, null, null)
val valueCache = Val.Obj.getEmptyValueCacheForObjWithoutSuper(allKeys.length)
new Val.Obj(pos, m, false, null, null, valueCache)
}
}

Expand Down Expand Up @@ -922,39 +923,110 @@ class Std(private val additionalNativeFunctions: Map[String, Val.Builtin] = Map.
builtin(Range),
builtin("mergePatch", "target", "patch"){ (pos, ev, target: Val, patch: Val) =>
val mergePosition = pos
def createMember(v: => Val) = new Val.Obj.Member(false, Visibility.Normal) {
def createLazyMember(v: => Val) = new Val.Obj.Member(false, Visibility.Normal) {
def invoke(self: Val.Obj, sup: Val.Obj, fs: FileScope, ev: EvalScope): Val = v
}
def recPair(l: Val, r: Val): Val = (l, r) match{
case (l: Val.Obj, r: Val.Obj) =>
val kvs = for {
k <- (l.visibleKeyNames ++ r.visibleKeyNames).distinct
lValue = if (l.containsVisibleKey(k)) Option(l.valueRaw(k, l, pos)(ev)) else None
rValue = if (r.containsVisibleKey(k)) Option(r.valueRaw(k, r, pos)(ev)) else None
if !rValue.exists(_.isInstanceOf[Val.Null])
} yield (lValue, rValue) match{
case (Some(lChild), None) => k -> createMember{lChild}
case (Some(lChild: Val.Obj), Some(rChild: Val.Obj)) => k -> createMember{recPair(lChild, rChild)}
case (_, Some(rChild)) => k -> createMember{recSingle(rChild)}
case (None, None) => Error.fail("std.mergePatch: This should never happen")
val keys: Array[String] = distinctKeys(l.visibleKeyNames, r.visibleKeyNames)
val kvs: Array[(String, Val.Obj.Member)] = new Array[(String, Val.Obj.Member)](keys.length)
var kvsIdx = 0
var i = 0
while (i < keys.length) {
val key = keys(i)
val lValue = if (l.containsVisibleKey(key)) l.valueRaw(key, l, pos)(ev) else null
val rValue = if (r.containsVisibleKey(key)) r.valueRaw(key, r, pos)(ev) else null
if (!rValue.isInstanceOf[Val.Null]) { // if we are not removing the key
if (lValue != null && rValue == null) {
// Preserve the LHS/target value:
kvs(kvsIdx) = (key, new Val.Obj.ConstMember(false, Visibility.Normal, lValue))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change to use ConstMember for the case where we have an already-computed value which doesn't need cleaning (which must be done lazily) ends up reducing allocations because we don't need to generate a lambda / thunk and capture state from the closure.

I'm considering adding a branch in the createMember(recSingle(rValue)) path (both here and in recSingle) to do a similar optimization for leaf non-Obj values that come in from the RHS.

} else if (lValue.isInstanceOf[Val.Obj] && rValue.isInstanceOf[Val.Obj]) {
// Recursively merge objects:
kvs(kvsIdx) = (key, createLazyMember(recPair(lValue, rValue)))
} else if (rValue != null) {
// Use the RHS/patch value and recursively remove Null or hidden fields:
kvs(kvsIdx) = (key, createLazyMember(recSingle(rValue)))
} else {
Error.fail("std.mergePatch: This should never happen")
}
kvsIdx += 1
}
i += 1
}

Val.Obj.mk(mergePosition, kvs:_*)
val trimmedKvs = if (kvsIdx == i) kvs else kvs.slice(0, kvsIdx)
Val.Obj.mk(mergePosition, trimmedKvs)

case (_, _) => recSingle(r)
}
def recSingle(v: Val): Val = v match{
case obj: Val.Obj =>
val kvs = for{
k <- obj.visibleKeyNames
value = obj.value(k, pos, obj)(ev)
if !value.isInstanceOf[Val.Null]
} yield (k, createMember{recSingle(value)})

Val.Obj.mk(obj.pos, kvs:_*)
val keys: Array[String] = obj.visibleKeyNames
val kvs: Array[(String, Val.Obj.Member)] = new Array[(String, Val.Obj.Member)](keys.length)
var kvsIdx = 0
var i = 0
while (i < keys.length) {
val key = keys(i)
val value = obj.value(key, pos, obj)(ev)
if (!value.isInstanceOf[Val.Null]) {
kvs(kvsIdx) = (key, createLazyMember(recSingle(value)))
kvsIdx += 1
}
i += 1
}
val trimmedKvs = if (kvsIdx == i) kvs else kvs.slice(0, kvsIdx)
Val.Obj.mk(obj.pos, trimmedKvs)

case _ => v
}
def distinctKeys(lKeys: Array[String], rKeys: Array[String]): Array[String] = {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I plan to add code comments here and throughout the PR before it merges, BTW. I'll revisit this with fresh eyes tomorrow.

// Fast path for small RHS size (the common case when merging a small
// patch into a large target object), avoiding the cost of constructing
// and probing a hash set: instead, perform a nested loop where the LHS
// is scanned and matching RHS entries are marked as null to be skipped.
// Via local microbenchmarks simulating a "worst-case" (RHS keys all new),
// the threshold of `8` was empirically determined to be a good tradeoff
// between allocation + hashing costs vs. nested loop array scans.
if (rKeys.length <= 8) {
val rKeysCopy = new Array[String](rKeys.length)
rKeys.copyToArray(rKeysCopy)
var i = 0
var numNewRKeys = rKeysCopy.length
while (i < lKeys.length) {
val lKey = lKeys(i)
var j = 0
while (j < rKeysCopy.length) {
// This LHS key is in the RHS, so mark it to be skipped in output:
if (lKey == rKeysCopy(j)) {
rKeysCopy(j) = null
numNewRKeys -= 1
}
j += 1
}
i += 1
}
// Combine lKeys with non-null elements of rKeysCopy:
if (numNewRKeys == 0) {
lKeys
} else {
val outArray = new Array[String](lKeys.length + numNewRKeys)
System.arraycopy(lKeys, 0, outArray, 0, lKeys.length)
var outIdx = lKeys.length
var j = 0
while (j < rKeysCopy.length) {
if (rKeysCopy(j) != null) {
outArray(outIdx) = rKeysCopy(j)
outIdx += 1
}
j += 1
}
outArray
}
} else {
// Fallback: Use hash-based deduplication for large RHS arrays:
(lKeys ++ rKeys).distinct
}
}
recPair(target, patch)
},
builtin("sqrt", "x"){ (pos, ev, x: Double) =>
Expand Down Expand Up @@ -1417,12 +1489,12 @@ class Std(private val additionalNativeFunctions: Map[String, Val.Builtin] = Map.
}
def rec(x: Val): Val = x match{
case o: Val.Obj =>
val bindings = for{
val bindings: Array[(String, Val.Obj.Member)] = for{
k <- o.visibleKeyNames
v = rec(o.value(k, pos.fileScope.noOffsetPos)(ev))
if filter(v)
}yield (k, new Val.Obj.ConstMember(false, Visibility.Normal, v))
Val.Obj.mk(pos, bindings: _*)
Val.Obj.mk(pos, bindings)
case a: Val.Arr =>
new Val.Arr(pos, a.asStrictArray.map(rec).filter(filter).map(identity))
case _ => x
Expand Down Expand Up @@ -1513,12 +1585,12 @@ class Std(private val additionalNativeFunctions: Map[String, Val.Builtin] = Map.
)))
},
builtin("objectRemoveKey", "obj", "key") { (pos, ev, o: Val.Obj, key: String) =>
val bindings = for{
val bindings: Array[(String, Val.Obj.Member)] = for{
k <- o.visibleKeyNames
v = o.value(k, pos.fileScope.noOffsetPos)(ev)
if k != key
}yield (k, new Val.Obj.ConstMember(false, Visibility.Normal, v))
Val.Obj.mk(pos, bindings: _*)
Val.Obj.mk(pos, bindings)
},
builtin(MinArray),
builtin(MaxArray),
Expand Down
17 changes: 16 additions & 1 deletion sjsonnet/src/sjsonnet/Util.scala
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,19 @@ object Util{
s"<$s>"
}
}
}

def preSizedJavaLinkedHashMap[K, V](expectedElems: Int): java.util.LinkedHashMap[K, V] = {
// Set the initial capacity to the number of elems divided by the default load factor + 1
// this ensures that we can fill up the map to the total number of fields without resizing.
// From JavaDoc - true for both Scala & Java HashMaps
val hashMapDefaultLoadFactor = 0.75f
val capacity = (expectedElems / hashMapDefaultLoadFactor).toInt + 1
new java.util.LinkedHashMap[K, V](capacity, hashMapDefaultLoadFactor)
}

def preSizedJavaHashMap[K, V](expectedElems: Int): java.util.HashMap[K, V] = {
val hashMapDefaultLoadFactor = 0.75f
val capacity = (expectedElems / hashMapDefaultLoadFactor).toInt + 1
new java.util.HashMap[K, V](capacity, hashMapDefaultLoadFactor)
}
}
Loading
Loading