-
Notifications
You must be signed in to change notification settings - Fork 10
Open
Description
When trying to use FeatureSelector I got "" message.
Command I use (python 3.10):
from verstack import FeatureSelector
FS = FeatureSelector(objective = 'classification', auto = True)
selected_feats = FS.fit_transform(X_encoded, y)
Error call stack:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[24], line 3
1 from verstack import FeatureSelector
2 FS = FeatureSelector(objective = 'classification', auto = True)
----> 3 selected_feats = FS.fit_transform(X_encoded, y)
File ~\AppData\Roaming\Python\Python310\site-packages\verstack\tools.py:19, in timer.<locals>.wrapped(*args, **kwargs)
16 @wraps(func)
17 def wrapped(*args, **kwargs):
18 start = time.time()
---> 19 result = func(*args, **kwargs)
20 end = time.time()
21 elapsed = round(end-start,5)
File ~\AppData\Roaming\Python\Python310\site-packages\verstack\FeatureSelector.py:232, in FeatureSelector.fit_transform(self, X, y, **kwargs)
230 if self.auto:
231 self.printer.print(f'Comparing LinearRegression and RandomForest for feature selection', order = 2)
--> 232 self._auto_linear_randomforest_selector(X, y, kwargs)
233 else:
234 self.printer.print(f'Running feature selection with {self._model}', order = 2)
File ~\AppData\Roaming\Python\Python310\site-packages\verstack\FeatureSelector.py:294, in FeatureSelector._auto_linear_randomforest_selector(self, X, y, kwargs)
291 selector_rf = self._get_selector(randomforest_model, y, kwargs)
293 self.printer.print(f'Running feature selection with {linear_model}', order = 2)
--> 294 feats_lr_flags = self._prepare_data_apply_selector(X, y, selector_lr, scale_data = True)
296 self.printer.print(f'Running feature selection with {randomforest_model}', order = 2)
297 feats_rf_flags = self._prepare_data_apply_selector(X, y, selector_rf, scale_data = False)
File ~\AppData\Roaming\Python\Python310\site-packages\verstack\FeatureSelector.py:251, in FeatureSelector._prepare_data_apply_selector(self, X, y, selector, scale_data)
249 X_subset, y_subset = self._subset_data(X, y)
250 if scale_data:
--> 251 X_subset = self._scale_data(X_subset)
252 try:
253 X_subset, y_subset = self._transform_data_to_float_32(X_subset, y_subset)
File ~\AppData\Roaming\Python\Python310\site-packages\verstack\FeatureSelector.py:499, in FeatureSelector._scale_data(self, X)
497 from sklearn.preprocessing import StandardScaler
498 scaler = StandardScaler()
--> 499 X = scaler.fit_transform(X)
500 return X
File C:\Anaconda3\envs\python_310\lib\site-packages\sklearn\base.py:867, in TransformerMixin.fit_transform(self, X, y, **fit_params)
863 # non-optimized default implementation; override when a better
864 # method is possible for a given clustering algorithm
865 if y is None:
866 # fit method of arity 1 (unsupervised transformation)
--> 867 return self.fit(X, **fit_params).transform(X)
868 else:
869 # fit method of arity 2 (supervised transformation)
870 return self.fit(X, y, **fit_params).transform(X)
File C:\Anaconda3\envs\python_310\lib\site-packages\sklearn\preprocessing\_data.py:809, in StandardScaler.fit(self, X, y, sample_weight)
807 # Reset internal state before fitting
808 self._reset()
--> 809 return self.partial_fit(X, y, sample_weight)
File C:\Anaconda3\envs\python_310\lib\site-packages\sklearn\preprocessing\_data.py:844, in StandardScaler.partial_fit(self, X, y, sample_weight)
812 """Online computation of mean and std on X for later scaling.
813
814 All of X is processed as a single batch. This is intended for cases
(...)
841 Fitted scaler.
842 """
843 first_call = not hasattr(self, "n_samples_seen_")
--> 844 X = self._validate_data(
845 X,
846 accept_sparse=("csr", "csc"),
847 dtype=FLOAT_DTYPES,
848 force_all_finite="allow-nan",
849 reset=first_call,
850 )
851 n_features = X.shape[1]
853 if sample_weight is not None:
File C:\Anaconda3\envs\python_310\lib\site-packages\sklearn\base.py:577, in BaseEstimator._validate_data(self, X, y, reset, validate_separately, **check_params)
575 raise ValueError("Validation should be done on X, y or both.")
576 elif not no_val_X and no_val_y:
--> 577 X = check_array(X, input_name="X", **check_params)
578 out = X
579 elif no_val_X and not no_val_y:
File C:\Anaconda3\envs\python_310\lib\site-packages\sklearn\utils\validation.py:856, in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)
854 array = array.astype(dtype, casting="unsafe", copy=False)
855 else:
--> 856 array = np.asarray(array, order=order, dtype=dtype)
857 except ComplexWarning as complex_warning:
858 raise ValueError(
859 "Complex data not supported\n{}\n".format(array)
860 ) from complex_warning
File C:\Anaconda3\envs\python_310\lib\site-packages\pandas\core\generic.py:2070, in NDFrame.__array__(self, dtype)
2069 def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray:
-> 2070 return np.asarray(self._values, dtype=dtype)
ValueError: could not convert string to float: 'x'
Could you help me what am I doing wrong?
Thanks,
balgad
ps.: anyway, it's a great package! :)
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels