Source code for fklearn.training.utils

from functools import reduce, wraps
from time import time
import re
from typing import Any, List

import pandas as pd
from toolz import curry
import toolz as fp

from fklearn.types import LearnerReturnType, UncurriedLearnerFnType


[docs]@curry def log_learner_time(learner: UncurriedLearnerFnType, learner_name: str) -> UncurriedLearnerFnType: @wraps(learner) def timed_learner(*args: Any, **kwargs: Any) -> LearnerReturnType: t0 = time() (p, d, l) = learner(*args, **kwargs) return p, d, fp.assoc_in(l, [learner_name, 'running_time'], "%2.3f s" % (time() - t0)) return timed_learner
[docs]def expand_features_encoded(df: pd.DataFrame, features: List[str]) -> List[str]: """ Expand the list of features to include features created automatically by fklearn in encoders such as Onehot-encoder. All features created by fklearn have the naming pattern `fklearn_feat__col==val`. This function looks for these names in the DataFrame columns, checks if they can be derivative of any of the features listed in `features`, adds them to the new list of features and removes the original names from the list. E.g. df has columns `col1` with values 0 and 1 and `col2`. After Onehot-encoding `col1` df will have columns `fklearn_feat_col1==0`, `fklearn_feat_col1==1`, `col2`. This function will then add `fklearn_feat_col1==0` and `fklearn_feat_col1==1` to the list of features and remove `col1`. If for some reason df also has another column `fklearn_feat_col3==x` but `col3` is not on the list of features, this column will not be added. Parameters ---------- df : pd.DataFrame A Pandas' DataFrame with all features. features : list of str The original list of features. """ def fklearn_features(df: pd.DataFrame) -> List[str]: return list(filter(lambda col: col.startswith("fklearn_feat__"), df.columns)) def feature_prefix(feature: str) -> str: return feature.split("==")[0] def filter_non_listed_features(fklearn_features: List[str], features: List[str]) -> List[str]: possible_prefixes_with_listed_features = ["fklearn_feat__" + f for f in features] return list(filter(lambda col: feature_prefix(col) in possible_prefixes_with_listed_features, fklearn_features)) def remove_original_pre_encoded_features(features: List[str], encoded_features: List[str]) -> List[str]: expr = r"fklearn_feat__(.*)==" original_preencoded_features: List[str] = reduce(lambda x, y: x + y, (map(lambda x: re.findall(expr, x), encoded_features)), []) return list(filter(lambda col: col not in set(original_preencoded_features), features)) all_fklearn_features = fklearn_features(df) encoded_features = filter_non_listed_features(all_fklearn_features, features) not_encoded_features = remove_original_pre_encoded_features(features, encoded_features) return not_encoded_features + encoded_features