Spaces:
Runtime error
Runtime error
import pandas as pd | |
import numpy as np | |
from sklearn.preprocessing import FunctionTransformer, SplineTransformer | |
from sklearn.base import BaseEstimator, TransformerMixin | |
from sklearn.utils.validation import check_is_fitted | |
class GroupImputer(BaseEstimator, TransformerMixin): | |
""" | |
Class used for imputing missing values in a pd.DataFrame | |
using mean, median, or mode by groupwise aggregation, | |
or a constant. | |
Parameters: | |
----------- | |
target : str | |
- The name of the column to be imputed | |
group_cols : list | |
- List of name(s) of columns on which to groupby | |
strategy : str | |
- The method for replacement; can be any of | |
['mean', 'median', 'mode'] | |
Returns: | |
-------- | |
X : pd.DataFrame | |
- The dataframe with imputed values in the target column | |
""" | |
def __init__(self,target,group_cols=None,strategy='median'): | |
assert strategy in ['mean','median','mode'], "strategy must be in ['mean', 'median', 'mode']'" | |
assert type(group_cols)==list, 'group_cols must be a list of column names' | |
assert type(target) == str, 'target must be a string' | |
self.group_cols = group_cols | |
self.strategy=strategy | |
self.target = target | |
def fit(self,X,y=None): | |
if self.strategy=='mode': | |
impute_map = X.groupby(self.group_cols)[self.target]\ | |
.agg(lambda x: pd.Series.mode(x,dropna=False)[0])\ | |
.reset_index(drop=False) | |
else: | |
impute_map = X.groupby(self.group_cols)[self.target]\ | |
.agg(self.strategy).reset_index(drop=False) | |
self.impute_map_ = impute_map | |
return self | |
def transform(self,X,y=None): | |
check_is_fitted(self,'impute_map_') | |
X=X.copy() | |
for index,row in self.impute_map_.iterrows(): | |
ind = (X[self.group_cols] == row[self.group_cols]).all(axis=1) | |
X.loc[ind,self.target] = X.loc[ind,self.target].fillna(row[self.target]) | |
return X | |
# Sine and consine transformations | |
def sin_feature_names(transformer, feature_names): | |
return [f'SIN_{col}' for col in feature_names] | |
def cos_feature_names(transformer, feature_names): | |
return [f'COS_{col}' for col in feature_names] | |
def sin_transformer(period): | |
return FunctionTransformer(lambda x: np.sin(2*np.pi*x/period),feature_names_out = sin_feature_names) | |
def cos_transformer(period): | |
return FunctionTransformer(lambda x: np.cos(2*np.pi*x/period),feature_names_out = cos_feature_names) | |
# Periodic spline transformation | |
def periodic_spline_transformer(period, n_splines=None, degree=3): | |
if n_splines is None: | |
n_splines = period | |
n_knots = n_splines + 1 # periodic and include_bias is True | |
return SplineTransformer( | |
degree=degree, | |
n_knots=n_knots, | |
knots=np.linspace(0, period, n_knots).reshape(n_knots, 1), | |
extrapolation="periodic", | |
include_bias=True, | |
) |