Skip to content

Commit 6975f2a

Browse files
authored
Improve performance of deduplication (#47)
* Avoid filling dedup table * cleanup * Add benchmark * tune
1 parent bc4bdd0 commit 6975f2a

3 files changed

Lines changed: 40 additions & 7 deletions

File tree

Performance/Filter.cs

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
using BenchmarkDotNet.Attributes;
2+
using Combination.StringPools;
3+
4+
namespace Performance;
5+
6+
#pragma warning disable IDE1006 // Naming Styles
7+
#pragma warning disable CS8618
8+
public class Filter
9+
{
10+
internal static readonly Random Random = new();
11+
12+
[Params(10_000, 32_525, 100_000, 1_000_000)]
13+
public int PoolSize;
14+
15+
private IUtf8DeduplicatedStringPool filledPool;
16+
17+
[GlobalSetup]
18+
public void Setup()
19+
{
20+
filledPool = StringPool.DeduplicatedUtf8(4096, 1);
21+
for (var i = 0; i < PoolSize; i++)
22+
{
23+
filledPool.Add(Guid.NewGuid().ToString());
24+
}
25+
}
26+
27+
[Benchmark]
28+
public PooledUtf8String? DoTryGet()
29+
{
30+
return filledPool.TryGet("not-in-pool");
31+
}
32+
}

Performance/Program.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
using Performance;
33

44
var summary = BenchmarkRunner.Run<Deduplication>();
5+
// var summary = BenchmarkRunner.Run<Filter>();
56
//var summary = BenchmarkRunner.Run<Hashing>();
67

78
#if false

src/Combination.StringPools/Utf8StringPool.cs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ private const int
1313
PoolIndexBits =
1414
24; // Number of bits to use for pool index in handle (more bits = more pools, but less strings per pool)
1515

16+
// Maximum fill factor for deduplication table. Performance degrades when it is close to 1.
17+
private const float MaxDeduplicationTableFillFactor = 0.9f;
18+
1619
private static readonly List<Utf8StringPool?> Pools = new();
1720

1821
#pragma warning disable IDE1006 // Naming Styles
@@ -21,7 +24,7 @@ private const int
2124
internal static long totalAddedBytes;
2225
#pragma warning restore IDE1006 // Naming Styles
2326

24-
internal int overfillCount;
27+
internal int deduplicationFillCount;
2528

2629
private readonly List<nint> pages = new();
2730
private readonly int index;
@@ -309,12 +312,9 @@ private void AddToDeduplicationTable(ulong[]? currentTable, int currentTableBits
309312
var tableEntry = currentTable[(tableIndex + i) % tableSize];
310313
if (tableEntry == 0)
311314
{
312-
if (i > 0)
313-
{
314-
++overfillCount;
315-
}
315+
++deduplicationFillCount;
316316
currentTable[(tableIndex + i) % tableSize] = handle + 1;
317-
if (overfillCount > tableSize / 2)
317+
if (deduplicationFillCount > tableSize * MaxDeduplicationTableFillFactor)
318318
{
319319
ResizeDeduplicationTable(currentTableBits + 1);
320320
}
@@ -331,7 +331,7 @@ private void ResizeDeduplicationTable(int newBits)
331331
return;
332332
}
333333
var newDeduplicationTable = new ulong[1 << newBits];
334-
overfillCount = 0;
334+
deduplicationFillCount = 0;
335335
var tableSize = 1 << deduplicationTableBits;
336336
for (var i = 0; i < tableSize; ++i)
337337
{

0 commit comments

Comments
 (0)