diff --git a/release-notes.txt b/release-notes.txt index 055282c..4de334d 100644 --- a/release-notes.txt +++ b/release-notes.txt @@ -4,6 +4,10 @@ Release notes: 0.6.0 - adds TaskSeq.scan and TaskSeq.scanAsync, #289 - adds TaskSeq.pairwise, #289 + - adds TaskSeq.reduce and TaskSeq.reduceAsync, #289 + - adds TaskSeq.unfold and TaskSeq.unfoldAsync, #289 + - adds TaskSeq.distinct, TaskSeq.distinctBy, TaskSeq.distinctByAsync + - performance: TaskSeq.exists, existsAsync, contains no longer allocate an intermediate Option value - adds TaskSeq.mapFold and TaskSeq.mapFoldAsync - adds TaskSeq.sum, sumBy, sumByAsync, average, averageBy, averageByAsync - adds TaskSeq.reduce and TaskSeq.reduceAsync, #289 diff --git a/src/FSharp.Control.TaskSeq.Test/FSharp.Control.TaskSeq.Test.fsproj b/src/FSharp.Control.TaskSeq.Test/FSharp.Control.TaskSeq.Test.fsproj index 0918138..4fdfe6f 100644 --- a/src/FSharp.Control.TaskSeq.Test/FSharp.Control.TaskSeq.Test.fsproj +++ b/src/FSharp.Control.TaskSeq.Test/FSharp.Control.TaskSeq.Test.fsproj @@ -20,6 +20,7 @@ + diff --git a/src/FSharp.Control.TaskSeq.Test/TaskSeq.Distinct.Tests.fs b/src/FSharp.Control.TaskSeq.Test/TaskSeq.Distinct.Tests.fs new file mode 100644 index 0000000..2b03944 --- /dev/null +++ b/src/FSharp.Control.TaskSeq.Test/TaskSeq.Distinct.Tests.fs @@ -0,0 +1,249 @@ +module TaskSeq.Tests.Distinct + +open Xunit +open FsUnit.Xunit + +open FSharp.Control + +// +// TaskSeq.distinct +// TaskSeq.distinctBy +// TaskSeq.distinctByAsync +// + + +module EmptySeq = + [] + let ``TaskSeq-distinct with null source raises`` () = assertNullArg <| fun () -> TaskSeq.distinct null + + [] + let ``TaskSeq-distinctBy with null source raises`` () = assertNullArg <| fun () -> TaskSeq.distinctBy id null + + [] + let ``TaskSeq-distinctByAsync with null source raises`` () = + assertNullArg + <| fun () -> TaskSeq.distinctByAsync (fun x -> Task.fromResult x) null + + [)>] + let ``TaskSeq-distinct on empty returns empty`` variant = + Gen.getEmptyVariant variant + |> TaskSeq.distinct + |> verifyEmpty + + [)>] + let ``TaskSeq-distinctBy on empty returns empty`` variant = + Gen.getEmptyVariant variant + |> TaskSeq.distinctBy id + |> verifyEmpty + + [)>] + let ``TaskSeq-distinctByAsync on empty returns empty`` variant = + Gen.getEmptyVariant variant + |> TaskSeq.distinctByAsync (fun x -> Task.fromResult x) + |> verifyEmpty + + +module Functionality = + [] + let ``TaskSeq-distinct removes duplicate ints`` () = task { + let! result = + taskSeq { yield! [ 1; 2; 2; 3; 1; 4; 3; 5 ] } + |> TaskSeq.distinct + |> TaskSeq.toListAsync + + result |> should equal [ 1; 2; 3; 4; 5 ] + } + + [] + let ``TaskSeq-distinct removes duplicate strings`` () = task { + let! result = + taskSeq { yield! [ "a"; "b"; "b"; "a"; "c" ] } + |> TaskSeq.distinct + |> TaskSeq.toListAsync + + result |> should equal [ "a"; "b"; "c" ] + } + + [] + let ``TaskSeq-distinct with all identical elements returns singleton`` () = task { + let! result = + taskSeq { yield! [ 7; 7; 7; 7; 7 ] } + |> TaskSeq.distinct + |> TaskSeq.toListAsync + + result |> should equal [ 7 ] + } + + [] + let ``TaskSeq-distinct with all distinct elements returns all`` () = task { + let! result = + taskSeq { yield! [ 1..5 ] } + |> TaskSeq.distinct + |> TaskSeq.toListAsync + + result |> should equal [ 1; 2; 3; 4; 5 ] + } + + [] + let ``TaskSeq-distinct on singleton returns singleton`` () = task { + let! result = + taskSeq { yield 42 } + |> TaskSeq.distinct + |> TaskSeq.toListAsync + + result |> should equal [ 42 ] + } + + [] + let ``TaskSeq-distinct keeps first occurrence, not last`` () = task { + // sequence [3;1;2;1;3] - first occurrences are at indices 0,1,2 for values 3,1,2 + let! result = + taskSeq { yield! [ 3; 1; 2; 1; 3 ] } + |> TaskSeq.distinct + |> TaskSeq.toListAsync + + result |> should equal [ 3; 1; 2 ] + } + + [] + let ``TaskSeq-distinct is different from distinctUntilChanged`` () = task { + // [1;2;1] - distinct gives [1;2], distinctUntilChanged gives [1;2;1] + let! distinct = + taskSeq { yield! [ 1; 2; 1 ] } + |> TaskSeq.distinct + |> TaskSeq.toListAsync + + let! distinctUntilChanged = + taskSeq { yield! [ 1; 2; 1 ] } + |> TaskSeq.distinctUntilChanged + |> TaskSeq.toListAsync + + distinct |> should equal [ 1; 2 ] + distinctUntilChanged |> should equal [ 1; 2; 1 ] + } + + [] + let ``TaskSeq-distinctBy removes elements with duplicate projected keys`` () = task { + let! result = + taskSeq { yield! [ 1; 2; 3; 4; 5; 6 ] } + |> TaskSeq.distinctBy (fun x -> x % 3) + |> TaskSeq.toListAsync + + // keys: 1%3=1, 2%3=2, 3%3=0, 4%3=1(dup), 5%3=2(dup), 6%3=0(dup) + result |> should equal [ 1; 2; 3 ] + } + + [] + let ``TaskSeq-distinctBy with string length as key`` () = task { + let! result = + taskSeq { yield! [ "a"; "bb"; "c"; "dd"; "eee" ] } + |> TaskSeq.distinctBy String.length + |> TaskSeq.toListAsync + + // lengths: 1, 2, 1(dup), 2(dup), 3 + result |> should equal [ "a"; "bb"; "eee" ] + } + + [] + let ``TaskSeq-distinctBy with identity projection equals distinct`` () = task { + let input = [ 1; 2; 2; 3; 1; 4 ] + + let! byId = + taskSeq { yield! input } + |> TaskSeq.distinctBy id + |> TaskSeq.toListAsync + + let! plain = + taskSeq { yield! input } + |> TaskSeq.distinct + |> TaskSeq.toListAsync + + byId |> should equal plain + } + + [] + let ``TaskSeq-distinctBy keeps first element with a given key`` () = task { + let! result = + taskSeq { yield! [ (1, "a"); (2, "b"); (1, "c") ] } + |> TaskSeq.distinctBy fst + |> TaskSeq.toListAsync + + result |> should equal [ (1, "a"); (2, "b") ] + } + + [] + let ``TaskSeq-distinctByAsync removes elements with duplicate projected keys`` () = task { + let! result = + taskSeq { yield! [ 1; 2; 3; 4; 5; 6 ] } + |> TaskSeq.distinctByAsync (fun x -> task { return x % 3 }) + |> TaskSeq.toListAsync + + result |> should equal [ 1; 2; 3 ] + } + + [] + let ``TaskSeq-distinctByAsync behaves identically to distinctBy`` () = task { + let input = [ 1; 2; 2; 3; 1; 4 ] + let projection x = x % 2 + + let! bySync = + taskSeq { yield! input } + |> TaskSeq.distinctBy projection + |> TaskSeq.toListAsync + + let! byAsync = + taskSeq { yield! input } + |> TaskSeq.distinctByAsync (fun x -> task { return projection x }) + |> TaskSeq.toListAsync + + bySync |> should equal byAsync + } + + [] + let ``TaskSeq-distinct with chars`` () = task { + let! result = + taskSeq { yield! [ 'A'; 'A'; 'B'; 'Z'; 'C'; 'C'; 'Z'; 'C'; 'D'; 'D'; 'D'; 'Z' ] } + |> TaskSeq.distinct + |> TaskSeq.toListAsync + + result |> should equal [ 'A'; 'B'; 'Z'; 'C'; 'D' ] + } + + +module SideEffects = + [] + let ``TaskSeq-distinct evaluates elements lazily`` () = task { + let mutable sideEffects = 0 + + let ts = taskSeq { + for i in 1..5 do + sideEffects <- sideEffects + 1 + yield i + } + + let distinct = ts |> TaskSeq.distinct + + // no evaluation yet + sideEffects |> should equal 0 + + let! _ = distinct |> TaskSeq.toListAsync + + // only evaluated when consumed + sideEffects |> should equal 5 + } + + [] + let ``TaskSeq-distinctBy evaluates projection lazily`` () = task { + let mutable projections = 0 + + let! result = + taskSeq { yield! [ 1; 2; 3; 1; 2 ] } + |> TaskSeq.distinctBy (fun x -> + projections <- projections + 1 + x) + |> TaskSeq.toListAsync + + result |> should equal [ 1; 2; 3 ] + // projection called once per element (5 elements) + projections |> should equal 5 + } diff --git a/src/FSharp.Control.TaskSeq/TaskSeq.fs b/src/FSharp.Control.TaskSeq/TaskSeq.fs index c9c96cc..0da32e0 100644 --- a/src/FSharp.Control.TaskSeq/TaskSeq.fs +++ b/src/FSharp.Control.TaskSeq/TaskSeq.fs @@ -463,6 +463,10 @@ type TaskSeq private () = static member except itemsToExclude source = Internal.except itemsToExclude source static member exceptOfSeq itemsToExclude source = Internal.exceptOfSeq itemsToExclude source + static member distinct source = Internal.distinct source + static member distinctBy projection source = Internal.distinctBy projection source + static member distinctByAsync projection source = Internal.distinctByAsync projection source + static member distinctUntilChanged source = Internal.distinctUntilChanged source static member pairwise source = Internal.pairwise source static member chunkBySize chunkSize source = Internal.chunkBySize chunkSize source @@ -471,17 +475,11 @@ type TaskSeq private () = static member forall predicate source = Internal.forall (Predicate predicate) source static member forallAsync predicate source = Internal.forall (PredicateAsync predicate) source - static member exists predicate source = - Internal.tryFind (Predicate predicate) source - |> Task.map Option.isSome + static member exists predicate source = Internal.exists (Predicate predicate) source - static member existsAsync predicate source = - Internal.tryFind (PredicateAsync predicate) source - |> Task.map Option.isSome + static member existsAsync predicate source = Internal.exists (PredicateAsync predicate) source - static member contains value source = - Internal.tryFind (Predicate((=) value)) source - |> Task.map Option.isSome + static member contains value source = Internal.contains value source static member pick chooser source = Internal.tryPick (TryPick chooser) source diff --git a/src/FSharp.Control.TaskSeq/TaskSeq.fsi b/src/FSharp.Control.TaskSeq/TaskSeq.fsi index 76b7ffa..a0d94f8 100644 --- a/src/FSharp.Control.TaskSeq/TaskSeq.fsi +++ b/src/FSharp.Control.TaskSeq/TaskSeq.fsi @@ -1409,6 +1409,62 @@ type TaskSeq = /// Thrown when either of the two input task sequences is null. static member exceptOfSeq<'T when 'T: equality> : itemsToExclude: seq<'T> -> source: TaskSeq<'T> -> TaskSeq<'T> + /// + /// Returns a new task sequence that contains no duplicate entries, using generic hash and equality comparisons. + /// If an element occurs multiple times in the sequence, only the first occurrence is returned. + /// + /// + /// + /// This function iterates the whole sequence and buffers all unique elements in a hash set, so it should not + /// be used on potentially infinite sequences. + /// + /// + /// The input task sequence. + /// A sequence with duplicate elements removed. + /// + /// Thrown when the input task sequence is null. + static member distinct<'T when 'T: equality> : source: TaskSeq<'T> -> TaskSeq<'T> + + /// + /// Returns a new task sequence that contains no duplicate entries according to the generic hash and equality + /// comparisons on the keys returned by the given projection function. + /// If two elements have the same projected key, only the first occurrence is returned. + /// If the projection function is asynchronous, consider using . + /// + /// + /// + /// This function iterates the whole sequence and buffers all unique keys in a hash set, so it should not + /// be used on potentially infinite sequences. + /// + /// + /// A function that transforms each element to a key that is used for equality comparison. + /// The input task sequence. + /// A sequence with elements whose projected keys are distinct. + /// + /// Thrown when the input task sequence is null. + static member distinctBy<'T, 'Key when 'Key: equality> : + projection: ('T -> 'Key) -> source: TaskSeq<'T> -> TaskSeq<'T> + + /// + /// Returns a new task sequence that contains no duplicate entries according to the generic hash and equality + /// comparisons on the keys returned by the given asynchronous projection function. + /// If two elements have the same projected key, only the first occurrence is returned. + /// If the projection function is synchronous, consider using . + /// + /// + /// + /// This function iterates the whole sequence and buffers all unique keys in a hash set, so it should not + /// be used on potentially infinite sequences. + /// + /// + /// An asynchronous function that transforms each element to a key used for equality comparison. + /// The input task sequence. + /// A sequence with elements whose projected keys are distinct. + /// + /// Thrown when the input task sequence is null. + static member distinctByAsync: + projection: ('T -> #Task<'Key>) -> source: TaskSeq<'T> -> TaskSeq<'T> when 'Key: equality + /// /// Returns a new task sequence without consecutive duplicate elements. /// diff --git a/src/FSharp.Control.TaskSeq/TaskSeqInternal.fs b/src/FSharp.Control.TaskSeq/TaskSeqInternal.fs index b98d817..c2a920e 100644 --- a/src/FSharp.Control.TaskSeq/TaskSeqInternal.fs +++ b/src/FSharp.Control.TaskSeq/TaskSeqInternal.fs @@ -847,6 +847,100 @@ module internal TaskSeqInternal = return state } + /// Direct bool-returning exists, avoiding the Option<'T> allocation that tryFind+isSome would incur. + let exists predicate (source: TaskSeq<_>) = + checkNonNull (nameof source) source + + match predicate with + | Predicate syncPredicate -> task { + use e = source.GetAsyncEnumerator CancellationToken.None + let mutable found = false + let! cont = e.MoveNextAsync() + let mutable hasMore = cont + + while not found && hasMore do + found <- syncPredicate e.Current + + if not found then + let! cont = e.MoveNextAsync() + hasMore <- cont + + return found + } + + | PredicateAsync asyncPredicate -> task { + use e = source.GetAsyncEnumerator CancellationToken.None + let mutable found = false + let! cont = e.MoveNextAsync() + let mutable hasMore = cont + + while not found && hasMore do + let! pred = asyncPredicate e.Current + found <- pred + + if not found then + let! cont = e.MoveNextAsync() + hasMore <- cont + + return found + } + + /// Direct bool-returning contains, avoiding the Option<'T> allocation and closure that tryFind+isSome would incur. + let contains (value: 'T) (source: TaskSeq<'T>) = + checkNonNull (nameof source) source + + task { + use e = source.GetAsyncEnumerator CancellationToken.None + let mutable found = false + let! cont = e.MoveNextAsync() + let mutable hasMore = cont + + while not found && hasMore do + if e.Current = value then + found <- true + else + let! cont = e.MoveNextAsync() + hasMore <- cont + + return found + } + + let distinct (source: TaskSeq<_>) = + checkNonNull (nameof source) source + + taskSeq { + // only create hashset when we start iterating; sequential so plain HashSet suffices + let seen = HashSet<_>(HashIdentity.Structural) + + for item in source do + if seen.Add item then + yield item + } + + let distinctBy (projection: _ -> _) (source: TaskSeq<_>) = + checkNonNull (nameof source) source + + taskSeq { + let seen = HashSet<_>(HashIdentity.Structural) + + for item in source do + if seen.Add(projection item) then + yield item + } + + let distinctByAsync (projection: _ -> #Task<_>) (source: TaskSeq<_>) = + checkNonNull (nameof source) source + + taskSeq { + let seen = HashSet<_>(HashIdentity.Structural) + + for item in source do + let! key = projection item + + if seen.Add key then + yield item + } + let skipOrTake skipOrTake count (source: TaskSeq<_>) = checkNonNull (nameof source) source raiseCannotBeNegative (nameof count) count