Add Random library code.

jemc · jemc · commit f27ce3259653 · 2022-03-04T15:41:20.000-08:00
diff --git a/LICENSE.md b/LICENSE.md
@@ -1,4 +1,4 @@
-Copyright 2018 Joe Eli McIlvain
+Copyright 2021 Joe Eli McIlvain
 
 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 
diff --git a/README.md b/README.md
@@ -1,3 +1,3 @@
-A base repository for Savi language libraries, with common CI actions configured.
+# Random
 
-See the [Guide](https://github.com/savi-lang/base-standard-library/wiki/Guide) for details on how it works and how to use it for your own libraries.
+A standard interface for the varying random number generators in the Savi standard library.
diff --git a/manifest.savi b/manifest.savi
@@ -0,0 +1,17 @@
+:manifest lib Random
+  :sources "src/*.savi"
+
+:manifest bin "spec"
+  :copies Random
+  :sources "spec/*.savi"
+
+  :dependency DeterministicRandom v0
+    // TODO: :from "github:savi-lang/DeterministicRandom"
+    :depends on Random
+
+  :dependency Spec v0
+    :from "github:savi-lang/Spec"
+    :depends on Map
+
+  :transitive dependency Map v0
+    :from "github:savi-lang/Map"
diff --git a/spec/Main.savi b/spec/Main.savi
@@ -0,0 +1,5 @@
+:actor Main
+  :new (env)
+    Spec.Process.run(env, [
+      Spec.Run(Random.Spec).new(env)
+    ])
diff --git a/spec/Random.Spec.savi b/spec/Random.Spec.savi
@@ -0,0 +1,87 @@
+// A fake random implementation that always returns the same 64-bit value,
+// so that the variety of different derived functions can be easily tested.
+:class _FakeRandom64
+  :is Random
+  :fun ref u64 U64: 0x0123456789abcdef
+
+// A fake random implementation that always returns the same 64-bit value,
+// but overrides the 32-bit function to return a differently derived value,
+// to test that smaller values will prefer deriving from the 32-bit value.
+:class _FakeRandom6432
+  :is Random
+  :fun ref u64 U64: 0x0123456789abcdef
+  :fun ref u32 U32: 0xf7e6d5c4
+
+:class Random.Spec
+  :is Spec
+  :const describes: "Random"
+
+  :it "derives other values from the high side of the 64-bit value"
+    random = _FakeRandom64.new
+    assert: random.u64 == 0x0123456789abcdef
+    assert: random.u32 == 0x01234567
+    assert: random.u16 == 0x0123
+    assert: random.u8  == 0x01
+    assert: random.i64 == 0x0123456789abcdef
+    assert: random.i32 == 0x01234567
+    assert: random.i16 == 0x0123
+    assert: random.i8  == 0x01
+    assert: random.f64 == F64.from_bits(0x0123456789abcdef)
+    assert: random.f32 == F32.from_bits(0x01234567)
+    assert: random.bool == False
+
+  :it "derives smaller values from the high side of the 32-bit value if present"
+    random = _FakeRandom6432.new
+    assert: random.u64 == 0x0123456789abcdef
+    assert: random.u32 == 0xf7e6d5c4
+    assert: random.u16 == 0xf7e6
+    assert: random.u8  == 0xf7
+    assert: random.i64 == 0x0123456789abcdef
+    assert: random.i32 == 0xf7e6d5c4
+    assert: random.i16 == 0xf7e6
+    assert: random.i8  == 0xf7
+    assert: random.f64 == F64.from_bits(0x0123456789abcdef)
+    assert: random.f32 == F32.from_bits(0xf7e6d5c4)
+    assert: random.bool == True
+
+  :it "generates 64-bit fractional values between 0 and 1, averaging around 0.5"
+    random = DeterministicRandom.Xoroshiro128.new_128(1, 2)
+    count USize = 0x1_0000
+    total F64 = 0
+    count.times -> (total += random.frac_64)
+    assert: (total / count.f64) == 0.49999695846019471
+
+  :it "generates 32-bit fractional values between 0 and 1, averaging around 0.5"
+    random = DeterministicRandom.Xoroshiro128.new_128(1, 2)
+    count USize = 0x1_0000
+    total F32 = 0
+    count.times -> (total += random.frac_32)
+    assert: (total / count.f32) == 0.49999812245368958
+
+  :it "generates U64s below a given limit 52, averaging around 25.5"
+    random = DeterministicRandom.Xoroshiro128.new_128(1, 2)
+    count USize = 0x10_0000
+    total U64 = 0
+    count.times -> (total += random.u64_less_than(52))
+    assert: (total.f64 / count.f64) == 25.514418601989746
+
+  :it "generates U32s below a given limit 52, averaging around 25.5"
+    random = DeterministicRandom.Xoroshiro128.new_128(1, 2)
+    count USize = 0x10_0000
+    total U32 = 0
+    count.times -> (total += random.u32_less_than(52))
+    assert: (total.f32 / count.f32) == 25.514419555664062
+
+  :it "generates unbiased U64s below a given limit 52, discarding bias"
+    random = DeterministicRandom.Xoroshiro128.new_128(1, 2)
+    count USize = 0x10_0000
+    total U64 = 0
+    count.times -> (total += random.unbiased_u64_less_than(52))
+    assert: (total.f64 / count.f64) == 25.514418601989746
+
+  :it "generates unbiased U32s below a given limit 52, discarding bias"
+    random = DeterministicRandom.Xoroshiro128.new_128(1, 2)
+    count USize = 0x10_0000
+    total U32 = 0
+    count.times -> (total += random.unbiased_u32_less_than(52))
+    assert: (total.f32 / count.f32) == 25.514451980590820
diff --git a/src/Random.savi b/src/Random.savi
@@ -0,0 +1,169 @@
+// TODO: Document.
+:trait Random
+  // Each random generator is expected to supply an implementation to generate
+  // a pseudo-random 64-bit number, and other value types are derived from this.
+  :fun ref u64 U64
+
+  // Unsigned integers are derived by bit-shifting down from the original U64.
+  // However, some implementations may override these with different approaches.
+  //
+  // We intentionally cascade each method into the next, so that if for example,
+  // the `u32` method is overridden with another approach, then all
+  // smaller-width values will derive from `u32` instead of `u64`.
+  //
+  // In the normal case, with no overrides, we trust LLVM to inline and combine
+  // the bit shift operations to remove unnecessary bit shift instructions.
+  :fun ref u32 U32: @u64.bit_shr(32).u32
+  :fun ref u16 U16: @u32.bit_shr(16).u16
+  :fun ref u8 U8: @u16.bit_shr(8).u8
+
+  // Signed integers are derived by generating as unsigned, then converting.
+  :fun ref i64 I64: @u64.i64
+  :fun ref i32 I32: @u32.i32
+  :fun ref i16 I16: @u16.i16
+  :fun ref i8 I8: @u8.i8
+
+  // Floating-point numbers are similarly derived by converting unsigned ones.
+  :fun ref f64 F64: F64.from_bits(@u64)
+  :fun ref f32 F32: F32.from_bits(@u32)
+
+  // Boolean values are derived from the most significant bit of the value.
+  :fun ref bool Bool: @u8.bit_and(0x80) != 0
+
+  :: Return a random 32-bit fraction - an F32 in the range [0, 1).
+  ::
+  :: The possible values will be uniformly spaced at one half-epsilon apart,
+  :: because half-epsilon is the largest unit of least precision below 1.0.
+  ::
+  :: To generate in a range that includes 1 but excludes 0, see frac_32_nonzero.
+  :fun ref frac_32 F32
+    @u32.bit_shr(F32.exp_bit_width).f32 * F32.half_epsilon
+
+  :: Return a random 32-bit nonzero fraction - an F32 in the range (0, 1].
+  ::
+  :: The possible values will be uniformly spaced at one half-epsilon apart,
+  :: because half-epsilon is the largest unit of least precision below 1.0.
+  ::
+  :: To generate in a range that includes 0 but excludes 1, see frac_32.
+  :fun ref frac_32_nonzero F32
+    @frac_32 + F32.half_epsilon
+
+  :: Return a random 64-bit fraction - an F64 in the range [0, 1).
+  ::
+  :: The possible values will be uniformly spaced at one half-epsilon apart,
+  :: because half-epsilon is the largest unit of least precision below 1.0.
+  ::
+  :: To generate in a range that includes 1 but excludes 0, see frac_64_nonzero.
+  :fun ref frac_64 F64
+    @u64.bit_shr(F64.exp_bit_width).f64 * F64.half_epsilon
+
+  :: Return a random 64-bit nonzero fraction - an F64 in the range (0, 1].
+  ::
+  :: The possible values will be uniformly spaced at one half-epsilon apart,
+  :: because half-epsilon is the largest unit of least precision below 1.0.
+  ::
+  :: To generate in a range that includes 0 but excludes 1, see frac_64.
+  :fun ref frac_64_nonzero F64
+    @frac_64 + F64.half_epsilon
+
+  :: Return a random U64 below the given limit - in the range [0, limit).
+  ::
+  :: The results will be slightly biased (some numbers have one more sample
+  :: in the pool than others), but the bias is very slight if the limit is
+  :: much smaller than the maximum representable U64 value, and the bias is
+  :: eliminated entirely if the given limit is a power of 2.
+  ::
+  :: If you need fully unbiased results, use the `unbiased_u64_less_than`
+  :: method instead, which discards all samples from the biased zone.
+  :fun ref u64_less_than(limit U64) U64
+    @u64.wide_multiply(limit).head
+
+  :: Return a random U32 below the given limit - in the range [0, limit).
+  ::
+  :: The results will be slightly biased (some numbers have one more sample
+  :: in the pool than others), but the bias is very slight if the limit is
+  :: much smaller than the maximum representable U32 value, and the bias is
+  :: eliminated entirely if the given limit is a power of 2.
+  ::
+  :: If you need fully unbiased results, use the `unbiased_u32_less_than`
+  :: method instead, which discards all samples from the biased zone.
+  :fun ref u32_less_than(limit U32) U32
+    @u32.wide_multiply(limit).head
+
+  :: Return an unbiased random U64 below the given limit, discarding the rare
+  :: cases where we get a result from a biased zone of the output.
+  ::
+  :: This ensures that every integer in the range has an equal probability
+  :: of occurring (at least, assuming a perfectly fair underlying generator).
+  :: However, performance can degrade slightly due to discarding and retrying.
+  ::
+  :: To keep maximum performance at the cost of some possibility of bias,
+  :: use the `u64_less_than` method instead, which never discards results.
+  :fun ref unbiased_u64_less_than(limit U64) U64
+    // See https://arxiv.org/pdf/1805.10941.pdf
+    random = @
+    product = random.u64.wide_multiply(limit)
+    // Reject and try again if the tail happens to be in the zone of bias.
+    // The loop will continue generating new numbers until we get a good sample.
+    if (product.tail < limit) (
+      threshold = limit.negate % limit
+      while (product.tail < threshold) (
+        product = random.u64.wide_multiply(limit)
+      )
+    )
+    product.head
+
+  :: Return an unbiased random U32 below the given limit, discarding the rare
+  :: cases where we get a result from a biased zone of the output.
+  ::
+  :: This ensures that every integer in the range has an equal probability
+  :: of occurring (at least, assuming a perfectly fair underlying generator).
+  :: However, performance can degrade slightly due to discarding and retrying.
+  ::
+  :: To keep maximum performance at the cost of some possibility of bias,
+  :: use the `u32_less_than` method instead, which never discards results.
+  :fun ref unbiased_u32_less_than(limit U32) U32
+    // See https://arxiv.org/pdf/1805.10941.pdf
+    random = @
+    product = random.u32.wide_multiply(limit)
+    // Reject and try again if the tail happens to be in the zone of bias.
+    // The loop will continue generating new numbers until we get a good sample.
+    if (product.tail < limit) (
+      threshold = limit.negate % limit
+      while (product.tail < threshold) (
+        product = random.u32.wide_multiply(limit)
+      )
+    )
+    product.head
+
+  :: Return a random USize below the given limit - in the range [0, limit).
+  ::
+  :: The results will be slightly biased (some numbers have one more sample
+  :: in the pool than others), but the bias is very slight if the limit is
+  :: much smaller than the maximum representable U32 value, and the bias is
+  :: eliminated entirely if the given limit is a power of 2.
+  ::
+  :: If you need fully unbiased results, use the `unbiased_u32_less_than`
+  :: method instead, which discards all samples from the biased zone.
+  :fun ref usize_less_than(limit USize)
+    if (USize.bit_width == 32) (
+      @u32_less_than(limit.u32).usize
+    |
+      @u64_less_than(limit.u64).usize
+    )
+
+  :: Return an unbiased random USize below the given limit, discarding the rare
+  :: cases where we get a result from a biased zone of the output.
+  ::
+  :: This ensures that every integer in the range has an equal probability
+  :: of occurring (at least, assuming a perfectly fair underlying generator).
+  :: However, performance can degrade slightly due to discarding and retrying.
+  ::
+  :: To keep maximum performance at the cost of some possibility of bias,
+  :: use the `uSize_less_than` method instead, which never discards results.
+  :fun ref unbiased_usize_less_than(limit USize) USize
+    if (USize.bit_width == 32) (
+      @unbiased_u32_less_than(limit.u32).usize
+    |
+      @unbiased_u64_less_than(limit.u64).usize
+    )

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-Copyright 2018 Joe Eli McIlvain`
	`1`	`+Copyright 2021 Joe Eli McIlvain`
`2`	`2`
`3`	`3`	`Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:`
`4`	`4`