-
-
Notifications
You must be signed in to change notification settings - Fork 24
Replace many Pandas operations with NumPy #198
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
e708f6c
cd2dd8e
34be08a
0a25272
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -52,6 +52,9 @@ def __init__( | |
| self.df: pd.DataFrame = self._validate_data(df).pipe(self._scale_data, scale) | ||
| """pandas.DataFrame: DataFrame containing columns x and y.""" | ||
|
|
||
| self._x = self.df['x'].to_numpy() | ||
| self._y = self.df['y'].to_numpy() | ||
|
Comment on lines
+55
to
+56
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should these be properties? If we change the DataFrame, these will no longer match. |
||
|
|
||
| self.name: str = name | ||
| """str: The name to use for the dataset.""" | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
| @@ -1,8 +1,10 @@ | ||||
| """Utility functions for calculating summary statistics.""" | ||||
|
|
||||
| from collections import namedtuple | ||||
| from numbers import Number | ||||
| from typing import Iterable | ||||
|
|
||||
| import pandas as pd | ||||
| import numpy as np | ||||
|
|
||||
| SummaryStatistics = namedtuple( | ||||
| 'SummaryStatistics', ['x_mean', 'y_mean', 'x_stdev', 'y_stdev', 'correlation'] | ||||
|
|
@@ -12,14 +14,17 @@ | |||
| ) | ||||
|
|
||||
|
|
||||
| def get_values(df: pd.DataFrame) -> SummaryStatistics: | ||||
| def get_values(x: Iterable[Number], y: Iterable[Number]) -> SummaryStatistics: | ||||
| """ | ||||
| Calculate the summary statistics for the given set of points. | ||||
|
|
||||
| Parameters | ||||
| ---------- | ||||
| df : pandas.DataFrame | ||||
| A dataset with columns x and y. | ||||
| x : Iterable[Number] | ||||
| The ``x`` value of the dataset. | ||||
|
|
||||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||
| y : Iterable[Number] | ||||
| The ``y`` value of the dataset. | ||||
|
|
||||
| Returns | ||||
| ------- | ||||
|
|
@@ -28,9 +33,9 @@ def get_values(df: pd.DataFrame) -> SummaryStatistics: | |||
| along with the Pearson correlation coefficient between the two. | ||||
| """ | ||||
| return SummaryStatistics( | ||||
| df.x.mean(), | ||||
| df.y.mean(), | ||||
| df.x.std(), | ||||
| df.y.std(), | ||||
| df.corr().x.y, | ||||
| np.mean(x), | ||||
| np.mean(y), | ||||
| np.std(x, ddof=1), | ||||
| np.std(y, ddof=1), | ||||
| np.corrcoef(x, y)[0, 1], | ||||
| ) | ||||
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -2,6 +2,7 @@ | |||||||||||
|
|
||||||||||||
| from __future__ import annotations | ||||||||||||
|
|
||||||||||||
| from collections.abc import Iterable, MutableSequence | ||||||||||||
| from functools import partial | ||||||||||||
| from numbers import Number | ||||||||||||
| from pathlib import Path | ||||||||||||
|
|
@@ -240,16 +241,26 @@ def _record_frames( | |||||||||||
| frame_number += 1 | ||||||||||||
| return frame_number | ||||||||||||
|
|
||||||||||||
| def _is_close_enough(self, df1: pd.DataFrame, df2: pd.DataFrame) -> bool: | ||||||||||||
| def _is_close_enough( | ||||||||||||
| self, | ||||||||||||
| x1: Iterable[Number], | ||||||||||||
| y1: Iterable[Number], | ||||||||||||
| x2: Iterable[Number], | ||||||||||||
| y2: Iterable[Number], | ||||||||||||
| ) -> bool: | ||||||||||||
| """ | ||||||||||||
| Check whether the statistics are within the acceptable bounds. | ||||||||||||
|
|
||||||||||||
| Parameters | ||||||||||||
| ---------- | ||||||||||||
| df1 : pandas.DataFrame | ||||||||||||
| The original DataFrame. | ||||||||||||
| df2 : pandas.DataFrame | ||||||||||||
| The DataFrame after the latest perturbation. | ||||||||||||
| x1 : Iterable[Number] | ||||||||||||
| The original value of ``x``. | ||||||||||||
| y1 : Iterable[Number] | ||||||||||||
| The original value of ``y``. | ||||||||||||
| x2 : Iterable[Number] | ||||||||||||
| The perturbed value of ``x``. | ||||||||||||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Extra space:
Suggested change
|
||||||||||||
| y2 : Iterable[Number] | ||||||||||||
| The perturbed value of ``y``. | ||||||||||||
|
|
||||||||||||
| Returns | ||||||||||||
| ------- | ||||||||||||
|
|
@@ -259,32 +270,33 @@ def _is_close_enough(self, df1: pd.DataFrame, df2: pd.DataFrame) -> bool: | |||||||||||
| return np.all( | ||||||||||||
| np.abs( | ||||||||||||
| np.subtract( | ||||||||||||
| *( | ||||||||||||
| np.floor(np.array(get_values(data)) * 10**self.decimals) | ||||||||||||
| for data in [df1, df2] | ||||||||||||
| ) | ||||||||||||
| np.floor(np.array(get_values(x1, y1)) * 10**self.decimals), | ||||||||||||
| np.floor(np.array(get_values(x2, y2)) * 10**self.decimals), | ||||||||||||
| ) | ||||||||||||
| ) | ||||||||||||
| == 0 | ||||||||||||
| ) | ||||||||||||
|
|
||||||||||||
| def _perturb( | ||||||||||||
| self, | ||||||||||||
| df: pd.DataFrame, | ||||||||||||
| x: MutableSequence[Number], | ||||||||||||
| y: MutableSequence[Number], | ||||||||||||
| target_shape: Shape, | ||||||||||||
| *, | ||||||||||||
| shake: Number, | ||||||||||||
| allowed_dist: Number, | ||||||||||||
| temp: Number, | ||||||||||||
| bounds: BoundingBox, | ||||||||||||
| ) -> pd.DataFrame: | ||||||||||||
| ) -> tuple[MutableSequence[Number], MutableSequence[Number]]: | ||||||||||||
| """ | ||||||||||||
| Perform one round of perturbation. | ||||||||||||
|
|
||||||||||||
| Parameters | ||||||||||||
| ---------- | ||||||||||||
| df : pandas.DataFrame | ||||||||||||
| The data to perturb. | ||||||||||||
| x : MutableSequence[Number] | ||||||||||||
| The ``x`` part of the dataset. | ||||||||||||
| y : MutableSequence[Number] | ||||||||||||
| The ``y`` part of the dataset. | ||||||||||||
| target_shape : Shape | ||||||||||||
| The shape to morph the data into. | ||||||||||||
| shake : numbers.Number | ||||||||||||
|
|
@@ -301,12 +313,12 @@ def _perturb( | |||||||||||
|
|
||||||||||||
| Returns | ||||||||||||
| ------- | ||||||||||||
| pandas.DataFrame | ||||||||||||
| tuple[MutableSequence[Number], MutableSequence[Number]] | ||||||||||||
| The input dataset with one point perturbed. | ||||||||||||
| """ | ||||||||||||
| row = self._rng.integers(0, len(df)) | ||||||||||||
| initial_x = df.at[row, 'x'] | ||||||||||||
| initial_y = df.at[row, 'y'] | ||||||||||||
| row = self._rng.integers(0, len(x)) | ||||||||||||
| initial_x = x[row] | ||||||||||||
| initial_y = y[row] | ||||||||||||
|
|
||||||||||||
| # this is the simulated annealing step, if "do_bad", then we are willing to | ||||||||||||
| # accept a new state which is worse than the current one | ||||||||||||
|
|
@@ -325,10 +337,10 @@ def _perturb( | |||||||||||
| within_bounds = [new_x, new_y] in bounds | ||||||||||||
| done = close_enough and within_bounds | ||||||||||||
|
|
||||||||||||
| df.loc[row, 'x'] = new_x | ||||||||||||
| df.loc[row, 'y'] = new_y | ||||||||||||
| x[row] = new_x | ||||||||||||
| y[row] = new_y | ||||||||||||
|
|
||||||||||||
| return df | ||||||||||||
| return x, y | ||||||||||||
|
|
||||||||||||
| def morph( | ||||||||||||
| self, | ||||||||||||
|
|
@@ -471,20 +483,27 @@ def _tweening( | |||||||||||
| max_value=max_shake, | ||||||||||||
| ) | ||||||||||||
|
|
||||||||||||
| x, y = ( | ||||||||||||
| start_shape.df['x'].to_numpy(copy=True), | ||||||||||||
| start_shape.df['y'].to_numpy(copy=True), | ||||||||||||
| ) | ||||||||||||
|
Comment on lines
+486
to
+489
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can't we use the
Suggested change
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm also wondering if we need to copy here, when we copy in the loop. |
||||||||||||
|
|
||||||||||||
| for i in self._looper( | ||||||||||||
| iterations, leave=True, ascii=True, desc=f'{target_shape} pattern' | ||||||||||||
| ): | ||||||||||||
| perturbed_data = self._perturb( | ||||||||||||
| morphed_data.copy(), | ||||||||||||
| np.copy(x), | ||||||||||||
| np.copy(y), | ||||||||||||
| target_shape=target_shape, | ||||||||||||
| shake=get_current_shake(i), | ||||||||||||
| allowed_dist=allowed_dist, | ||||||||||||
| temp=get_current_temp(i), | ||||||||||||
| bounds=start_shape.morph_bounds, | ||||||||||||
| ) | ||||||||||||
|
|
||||||||||||
| if self._is_close_enough(start_shape.df, perturbed_data): | ||||||||||||
| morphed_data = perturbed_data | ||||||||||||
| if self._is_close_enough(x, y, *perturbed_data): | ||||||||||||
| x, y = perturbed_data | ||||||||||||
| morphed_data = pd.DataFrame({'x': x, 'y': y}) | ||||||||||||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This isn't necessary in the loop with switch to NumPy. We can have |
||||||||||||
|
|
||||||||||||
| frame_number = record_frames( | ||||||||||||
| data=morphed_data, | ||||||||||||
|
|
||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.