stefmolin · JCGoran · Jul 22, 2024 · Sep 16, 2024 · Nov 10, 2024 · Nov 25, 2024
diff --git a/src/data_morph/data/dataset.py b/src/data_morph/data/dataset.py
@@ -52,6 +52,9 @@ def __init__(
         self.df: pd.DataFrame = self._validate_data(df).pipe(self._scale_data, scale)
         """pandas.DataFrame: DataFrame containing columns x and y."""
 
+        self._x = self.df['x'].to_numpy()
+        self._y = self.df['y'].to_numpy()
-        self._x = self.df['x'].to_numpy()
-        self._y = self.df['y'].to_numpy()
+        self._x, self._y = self.df[['x', 'y']].to_numpy().T
-        self._x = self.df['x'].to_numpy()
-        self._y = self.df['y'].to_numpy()
+        self._x, self._y = self.df[['x', 'y']].to_numpy().T
+
         self.name: str = name
         """str: The name to use for the dataset."""
 

diff --git a/src/data_morph/data/stats.py b/src/data_morph/data/stats.py
@@ -1,8 +1,10 @@
 """Utility functions for calculating summary statistics."""
 
 from collections import namedtuple
+from numbers import Number
+from typing import Iterable
 
-import pandas as pd
+import numpy as np
 
 SummaryStatistics = namedtuple(
     'SummaryStatistics', ['x_mean', 'y_mean', 'x_stdev', 'y_stdev', 'correlation']
@@ -12,14 +14,17 @@
 )
 
 
-def get_values(df: pd.DataFrame) -> SummaryStatistics:
+def get_values(x: Iterable[Number], y: Iterable[Number]) -> SummaryStatistics:
     """
     Calculate the summary statistics for the given set of points.
 
     Parameters
     ----------
-    df : pandas.DataFrame
-        A dataset with columns x and y.
+    x : Iterable[Number]
+        The ``x`` value of the dataset.
+
-
-
+    y : Iterable[Number]
+        The ``y`` value of the dataset.
 
     Returns
     -------
@@ -28,9 +33,9 @@ def get_values(df: pd.DataFrame) -> SummaryStatistics:
         along with the Pearson correlation coefficient between the two.
     """
     return SummaryStatistics(
-        df.x.mean(),
-        df.y.mean(),
-        df.x.std(),
-        df.y.std(),
-        df.corr().x.y,
+        np.mean(x),
+        np.mean(y),
+        np.std(x, ddof=1),
+        np.std(y, ddof=1),
+        np.corrcoef(x, y)[0, 1],
     )
diff --git a/src/data_morph/morpher.py b/src/data_morph/morpher.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+from collections.abc import Iterable, MutableSequence
 from functools import partial
 from numbers import Number
 from pathlib import Path
@@ -240,16 +241,26 @@ def _record_frames(
                 frame_number += 1
         return frame_number
 
-    def _is_close_enough(self, df1: pd.DataFrame, df2: pd.DataFrame) -> bool:
+    def _is_close_enough(
+        self,
+        x1: Iterable[Number],
+        y1: Iterable[Number],
+        x2: Iterable[Number],
+        y2: Iterable[Number],
+    ) -> bool:
         """
         Check whether the statistics are within the acceptable bounds.
 
         Parameters
         ----------
-        df1 : pandas.DataFrame
-            The original DataFrame.
-        df2 : pandas.DataFrame
-            The DataFrame after the latest perturbation.
+        x1 : Iterable[Number]
+            The original value of ``x``.
+        y1 : Iterable[Number]
+            The original value of ``y``.
+        x2 : Iterable[Number]
+            The perturbed  value of ``x``.
-            The perturbed  value of ``x``.
+            The perturbed value of ``x``.
-            The perturbed  value of ``x``.
+            The perturbed value of ``x``.
+        y2 : Iterable[Number]
+            The perturbed value of ``y``.
 
         Returns
         -------
@@ -259,32 +270,33 @@ def _is_close_enough(self, df1: pd.DataFrame, df2: pd.DataFrame) -> bool:
         return np.all(
             np.abs(
                 np.subtract(
-                    *(
-                        np.floor(np.array(get_values(data)) * 10**self.decimals)
-                        for data in [df1, df2]
-                    )
+                    np.floor(np.array(get_values(x1, y1)) * 10**self.decimals),
+                    np.floor(np.array(get_values(x2, y2)) * 10**self.decimals),
                 )
             )
             == 0
         )
 
     def _perturb(
         self,
-        df: pd.DataFrame,
+        x: MutableSequence[Number],
+        y: MutableSequence[Number],
         target_shape: Shape,
         *,
         shake: Number,
         allowed_dist: Number,
         temp: Number,
         bounds: BoundingBox,
-    ) -> pd.DataFrame:
+    ) -> tuple[MutableSequence[Number], MutableSequence[Number]]:
         """
         Perform one round of perturbation.
 
         Parameters
         ----------
-        df : pandas.DataFrame
-            The data to perturb.
+        x : MutableSequence[Number]
+            The ``x`` part of the dataset.
+        y : MutableSequence[Number]
+            The ``y`` part of the dataset.
         target_shape : Shape
             The shape to morph the data into.
         shake : numbers.Number
@@ -301,12 +313,12 @@ def _perturb(
 
         Returns
         -------
-        pandas.DataFrame
+        tuple[MutableSequence[Number], MutableSequence[Number]]
             The input dataset with one point perturbed.
         """
-        row = self._rng.integers(0, len(df))
-        initial_x = df.at[row, 'x']
-        initial_y = df.at[row, 'y']
+        row = self._rng.integers(0, len(x))
+        initial_x = x[row]
+        initial_y = y[row]
 
         # this is the simulated annealing step, if "do_bad", then we are willing to
         # accept a new state which is worse than the current one
@@ -325,10 +337,10 @@ def _perturb(
             within_bounds = [new_x, new_y] in bounds
             done = close_enough and within_bounds
 
-        df.loc[row, 'x'] = new_x
-        df.loc[row, 'y'] = new_y
+        x[row] = new_x
+        y[row] = new_y
 
-        return df
+        return x, y
 
     def morph(
         self,
@@ -471,20 +483,27 @@ def _tweening(
             max_value=max_shake,
         )
 
+        x, y = (
+            start_shape.df['x'].to_numpy(copy=True),
+            start_shape.df['y'].to_numpy(copy=True),
+        )
-        x, y = (
-            start_shape.df['x'].to_numpy(copy=True),
-            start_shape.df['y'].to_numpy(copy=True),
-        )
+        x, y = start_shape._x, start_shape._y
-        x, y = (
-            start_shape.df['x'].to_numpy(copy=True),
-            start_shape.df['y'].to_numpy(copy=True),
-        )
+        x, y = start_shape._x, start_shape._y
+
         for i in self._looper(
             iterations, leave=True, ascii=True, desc=f'{target_shape} pattern'
         ):
             perturbed_data = self._perturb(
-                morphed_data.copy(),
+                np.copy(x),
+                np.copy(y),
                 target_shape=target_shape,
                 shake=get_current_shake(i),
                 allowed_dist=allowed_dist,
                 temp=get_current_temp(i),
                 bounds=start_shape.morph_bounds,
             )
 
-            if self._is_close_enough(start_shape.df, perturbed_data):
-                morphed_data = perturbed_data
+            if self._is_close_enough(x, y, *perturbed_data):
+                x, y = perturbed_data
+                morphed_data = pd.DataFrame({'x': x, 'y': y})
 
             frame_number = record_frames(
                 data=morphed_data,

diff --git a/src/data_morph/plotting/static.py b/src/data_morph/plotting/static.py
@@ -61,7 +61,7 @@ def plot(
     ax.xaxis.set_major_formatter(tick_formatter)
     ax.yaxis.set_major_formatter(tick_formatter)
 
-    res = get_values(df)
+    res = get_values(df['x'].to_numpy(), df['y'].to_numpy())
 
     labels = ('X Mean', 'Y Mean', 'X SD', 'Y SD', 'Corr.')
     locs = np.linspace(0.8, 0.2, num=len(labels))

diff --git a/tests/data/test_stats.py b/tests/data/test_stats.py
@@ -1,5 +1,7 @@
 """Test the stats module."""
 
+import numpy as np
+
 from data_morph.data.loader import DataLoader
 from data_morph.data.stats import get_values
 
@@ -9,10 +11,10 @@ def test_stats():
 
     data = DataLoader.load_dataset('dino').df
 
-    stats = get_values(data)
+    stats = get_values(data['x'], data['y'])
 
     assert stats.x_mean == data.x.mean()
     assert stats.y_mean == data.y.mean()
     assert stats.x_stdev == data.x.std()
     assert stats.y_stdev == data.y.std()
-    assert stats.correlation == data.corr().x.y
+    np.allclose(stats.correlation, data.corr().x.y)
diff --git a/tests/test_morpher.py b/tests/test_morpher.py
@@ -171,7 +171,9 @@ def test_no_writing(self, capsys):
 
         with pytest.raises(AssertionError):
             assert_frame_equal(morphed_data, dataset.df)
-        assert morpher._is_close_enough(dataset.df, morphed_data)
+        assert morpher._is_close_enough(
+            dataset.df['x'], dataset.df['y'], morphed_data['x'], morphed_data['y']
+        )
 
         _, err = capsys.readouterr()
         assert f'{target_shape} pattern: 100%' in err