From 7eaf4e726b82dbc1fda59164b6fcf47c644bbe80 Mon Sep 17 00:00:00 2001
From: kayvank <kayvan@q2io.com>
Date: Sat, 28 Feb 2026 17:02:27 -0800
Subject: [PATCH 1/3] Implement Fisher Yates algorithm

---
 src/DataFrame/Operations/Permutation.hs | 28 ++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/src/DataFrame/Operations/Permutation.hs b/src/DataFrame/Operations/Permutation.hs
index a381f98..6dc2179 100644
--- a/src/DataFrame/Operations/Permutation.hs
+++ b/src/DataFrame/Operations/Permutation.hs
@@ -9,16 +9,17 @@ import qualified Data.List as L
 import qualified Data.Text as T
 import qualified Data.Vector as V
 import qualified Data.Vector.Unboxed as VU
+import qualified Data.Vector.Unboxed.Mutable as VUM
 
 import Control.Exception (throw)
+import Control.Monad.ST (runST)
 import DataFrame.Errors (DataFrameException (..))
-import DataFrame.Internal.Column
+import DataFrame.Internal.Column (Columnable, atIndicesStable)
 import DataFrame.Internal.DataFrame (DataFrame (..))
-import DataFrame.Internal.Expression
-import DataFrame.Internal.Row
-import DataFrame.Operations.Core
-import System.Random
-import System.Random.Shuffle (shuffle')
+import DataFrame.Internal.Expression (Expr (Col))
+import DataFrame.Internal.Row (sortedIndexes', toRowVector)
+import DataFrame.Operations.Core (columnNames, dimensions)
+import System.Random (Random (randomR), RandomGen)
 
 -- | Sort order taken as a parameter by the 'sortBy' function.
 data SortOrder where
@@ -76,4 +77,17 @@ shuffle pureGen df =
         df{columns = V.map (atIndicesStable indexes) (columns df)}
 
 shuffledIndices :: (RandomGen g) => g -> Int -> VU.Vector Int
-shuffledIndices pureGen k = VU.fromList (shuffle' [0 .. (k - 1)] k pureGen)
+shuffledIndices pureGen k = shuffleVec pureGen (VU.fromList [0 .. (k - 1)])
+  where
+    shuffleVec :: (RandomGen g) => g -> VU.Vector Int -> VU.Vector Int
+    shuffleVec g v = runST $ do
+        vm <- VU.thaw v
+        let (n, nGen) = randomR (0, (k - 1)) g
+        go vm n nGen
+        VU.unsafeFreeze vm
+
+    go v (-1) _ = pure ()
+    go v 0 _ = pure ()
+    go v maxInd gen =
+        let (n, nextGen) = randomR (0, maxInd) gen
+         in VUM.swap v 0 n *> go (VUM.tail v) (maxInd - 1) nextGen

From 3e69bd573e37ee63c44a15344a617b2a270d56d1 Mon Sep 17 00:00:00 2001
From: kayvank <kayvan@q2io.com>
Date: Sun, 1 Mar 2026 09:29:28 -0800
Subject: [PATCH 2/3] Clean up build warnings

Issue 170, implement PR comments and clean up build warnings
---
 dataframe.cabal                         |  1 -
 src/DataFrame/Operations/Permutation.hs | 14 ++++++++------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/dataframe.cabal b/dataframe.cabal
index c99a60a..f7f2a2f 100644
--- a/dataframe.cabal
+++ b/dataframe.cabal
@@ -106,7 +106,6 @@ library
                       process ^>= 1.6,
                       snappy-hs ^>= 0.1,
                       random >= 1.2 && < 1.3,
-                      random-shuffle >= 0.0.4 && < 1,
                       regex-tdfa >= 1.3.0 && < 2,
                       scientific >=0.3.1 && <0.4,
                       template-haskell >= 2.0 && < 3,
diff --git a/src/DataFrame/Operations/Permutation.hs b/src/DataFrame/Operations/Permutation.hs
index 6dc2179..b9f9c2c 100644
--- a/src/DataFrame/Operations/Permutation.hs
+++ b/src/DataFrame/Operations/Permutation.hs
@@ -77,17 +77,19 @@ shuffle pureGen df =
         df{columns = V.map (atIndicesStable indexes) (columns df)}
 
 shuffledIndices :: (RandomGen g) => g -> Int -> VU.Vector Int
-shuffledIndices pureGen k = shuffleVec pureGen (VU.fromList [0 .. (k - 1)])
+shuffledIndices pureGen k
+    | k <= 0 = VU.empty
+    | otherwise = shuffleVec pureGen
   where
-    shuffleVec :: (RandomGen g) => g -> VU.Vector Int -> VU.Vector Int
-    shuffleVec g v = runST $ do
-        vm <- VU.thaw v
-        let (n, nGen) = randomR (0, (k - 1)) g
+    shuffleVec :: (RandomGen g) => g -> VU.Vector Int
+    shuffleVec g = runST $ do
+        vm <- VUM.generate k id
+        let (n, nGen) = randomR (1, (k - 1)) g
         go vm n nGen
         VU.unsafeFreeze vm
 
     go v (-1) _ = pure ()
     go v 0 _ = pure ()
     go v maxInd gen =
-        let (n, nextGen) = randomR (0, maxInd) gen
+        let (n, nextGen) = randomR (1, maxInd) gen
          in VUM.swap v 0 n *> go (VUM.tail v) (maxInd - 1) nextGen

From 0b0073440d08e825bc05409e879ebd0066730d7a Mon Sep 17 00:00:00 2001
From: kayvank <kayvan@q2io.com>
Date: Sun, 1 Mar 2026 22:40:04 -0800
Subject: [PATCH 3/3] Unit tests for Fisher Yates algorithm

---
 src/DataFrame/Operations/Permutation.hs | 14 +++++++++-----
 tests/Operations/Shuffle.hs             | 20 +++++++++++++++++++-
 2 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/src/DataFrame/Operations/Permutation.hs b/src/DataFrame/Operations/Permutation.hs
index b9f9c2c..e799bfd 100644
--- a/src/DataFrame/Operations/Permutation.hs
+++ b/src/DataFrame/Operations/Permutation.hs
@@ -13,6 +13,7 @@ import qualified Data.Vector.Unboxed.Mutable as VUM
 
 import Control.Exception (throw)
 import Control.Monad.ST (runST)
+import Data.Vector.Internal.Check (HasCallStack)
 import DataFrame.Errors (DataFrameException (..))
 import DataFrame.Internal.Column (Columnable, atIndicesStable)
 import DataFrame.Internal.DataFrame (DataFrame (..))
@@ -76,20 +77,23 @@ shuffle pureGen df =
      in
         df{columns = V.map (atIndicesStable indexes) (columns df)}
 
-shuffledIndices :: (RandomGen g) => g -> Int -> VU.Vector Int
+shuffledIndices :: (HasCallStack, RandomGen g) => g -> Int -> VU.Vector Int
 shuffledIndices pureGen k
-    | k <= 0 = VU.empty
+    | k < 0 = error $ "Vector index may not be a neative number: " <> show k
+    | k == 0 = VU.empty
     | otherwise = shuffleVec pureGen
   where
     shuffleVec :: (RandomGen g) => g -> VU.Vector Int
     shuffleVec g = runST $ do
         vm <- VUM.generate k id
-        let (n, nGen) = randomR (1, (k - 1)) g
+        let (n, nGen) = randomR (1, k - 1) g
         go vm n nGen
         VU.unsafeFreeze vm
 
     go v (-1) _ = pure ()
     go v 0 _ = pure ()
     go v maxInd gen =
-        let (n, nextGen) = randomR (1, maxInd) gen
-         in VUM.swap v 0 n *> go (VUM.tail v) (maxInd - 1) nextGen
+        let
+            (n, nextGen) = randomR (1, maxInd) gen
+         in
+            VUM.swap v 0 n *> go (VUM.tail v) (maxInd - 1) nextGen
diff --git a/tests/Operations/Shuffle.hs b/tests/Operations/Shuffle.hs
index f1c52f4..9b609c3 100644
--- a/tests/Operations/Shuffle.hs
+++ b/tests/Operations/Shuffle.hs
@@ -5,7 +5,9 @@ module Operations.Shuffle where
 
 import qualified DataFrame as D
 
-import DataFrame.Operations.Permutation (shuffle)
+import qualified Data.Set as Set
+import qualified Data.Vector.Unboxed as VU
+import DataFrame.Operations.Permutation (shuffle, shuffledIndices)
 import System.Random (mkStdGen)
 import Test.HUnit (Test (..), assertEqual)
 
@@ -74,6 +76,21 @@ shuffleDifferentSeedIsDifferent =
                 (shuffled1 == shuffled2)
             )
 
+-- Test that ShuffleIndeces does not dorp, add, or repeat any index
+shuffleDoesNotAddOrDropIndices :: Test
+shuffleDoesNotAddOrDropIndices =
+    let
+        gen = mkStdGen 42
+        actual = (Set.fromList [0 .. 10])
+        computedVector = shuffledIndices gen 11
+        computed = (Set.fromList $ VU.toList $ shuffledIndices gen 11)
+     in
+        TestList
+            [ TestCase
+                (assertEqual "Indecis are not dropped or added" (VU.length computedVector) 11)
+            , TestCase (assertEqual "There are no repeated indecis" computed actual)
+            ]
+
 tests :: [Test]
 tests =
     [ TestLabel "shuffleShuffles" shuffleShuffles
@@ -81,4 +98,5 @@ tests =
     , TestLabel "shufflePreservesColumnNames" shufflePreservesColumnNames
     , TestLabel "shuffleSameSeedIsSameShuffle" shuffleSameSeedIsSameShuffle
     , TestLabel "shuffleDifferentSeedIsDifferent" shuffleDifferentSeedIsDifferent
+    , TestLabel "shuffleDoesNotAddOrDropIndices" shuffleDoesNotAddOrDropIndices
     ]