Source code for fklearn.tuning.model_agnostic_fc

from typing import List

import pandas as pd
from numpy import tril
from toolz import curry

from fklearn.types import LogType


[docs]@curry def correlation_feature_selection(train_set: pd.DataFrame, features: List[str], threshold: float = 1.0) -> LogType: """ Feature selection based on correlation Parameters ---------- train_set : pd.DataFrame A Pandas' DataFrame with the training data features : list of str The list of features to consider when dropping with correlation threshold : float The correlation threshold. Will drop features with correlation equal or above this threshold Returns ---------- log with feature correlation, features to drop and final features """ correlogram = train_set[features].corr() correlogram_diag = pd.DataFrame(tril(correlogram.values), columns=correlogram.columns, index=correlogram.index) features_to_drop = pd.melt(correlogram_diag.reset_index(), id_vars='index') \ .query('index!=variable') \ .query('abs(value)>0.0') \ .query('abs(value)>=%f' % threshold)["variable"].tolist() final_features = list(set(features) - set(features_to_drop)) return {"feature_corr": correlogram.to_dict(), "features_to_drop": features_to_drop, "final_features": final_features}
[docs]@curry def variance_feature_selection(train_set: pd.DataFrame, features: List[str], threshold: float = 0.0) -> LogType: """ Feature selection based on variance Parameters ---------- train_set : pd.DataFrame A Pandas' DataFrame with the training data features : list of str The list of features to consider when dropping with variance threshold : float The variance threshold. Will drop features with variance equal or bellow this threshold Returns ---------- log with feature variance, features to drop and final features """ feature_var = train_set[features].var() features_to_drop = feature_var[feature_var <= threshold].index.tolist() final_features = list(set(features) - set(features_to_drop)) return {"feature_var": feature_var.to_dict(), "features_to_drop": features_to_drop, "final_features": final_features}