Source code for fklearn.causal.cate_learning.double_machine_learning

from typing import List, Tuple

import numpy as np
import pandas as pd
from sklearn import __version__ as sk_version
from sklearn.base import RegressorMixin
from sklearn.model_selection import KFold
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.base import clone
from toolz import curry
from typing import Union

from fklearn.common_docstrings import learner_pred_fn_docstring, learner_return_docstring
from fklearn.training.utils import log_learner_time, expand_features_encoded
from fklearn.types import LearnerReturnType


def _cv_estimate(model: RegressorMixin,
                 train_data: pd.DataFrame,
                 features: List[str],
                 y: str,
                 n_splits: int) -> Tuple[pd.Series, List[RegressorMixin]]:

    cv = KFold(n_splits=n_splits)
    models = []
    cv_pred = pd.Series(np.nan, index=train_data.index)

    for train, test in cv.split(train_data):
        m = clone(model, safe=False).fit(train_data[features].iloc[train], train_data[y].iloc[train])
        cv_pred.iloc[test] = m.predict(train_data[features].iloc[test])
        models += [m]

    return cv_pred, models


[docs]@curry @log_learner_time(learner_name='non_parametric_double_ml_learner') def non_parametric_double_ml_learner(df: pd.DataFrame, feature_columns: List[str], treatment_column: str, outcome_column: str, debias_model: Union[RegressorMixin, None] = None, debias_feature_columns: List[str] = None, denoise_model: Union[RegressorMixin, None] = None, denoise_feature_columns: List[str] = None, final_model: Union[RegressorMixin, None] = None, final_model_feature_columns: List[str] = None, prediction_column: str = "prediction", cv_splits: int = 2, encode_extra_cols: bool = True) -> LearnerReturnType: """ Fits an Non-Parametric Double/ML Meta Learner for Conditional Average Treatment Effect Estimation. It implements the following steps: 1) fits k instances of the debias model to predict the treatment from the features and get out-of-fold residuals t_res=t-t_hat; 2) fits k instances of the denoise model to predict the outcome from the features and get out-of-fold residuals y_res=y-y_hat; 3) fits a final ML model to predict y_res / t_res from the features using weighted regression with weights set to t_res^2. Trained like this, the final model will output treatment effect predictions. Parameters ---------- df : pandas.DataFrame A Pandas' DataFrame with features, treatment and target columns. The model will be trained to predict the target column from the features. feature_columns : list of str A list os column names that are used as features for the denoise, debias and final models in double-ml. All this names should be in `df`. treatment_column : str The name of the column in `df` that should be used as treatment for the double-ml model. It will learn the impact of this column with respect to the outcome column. outcome_column : str The name of the column in `df` that should be used as outcome for the double-ml model. It will learn the impact of the treatment column on this outcome column. debias_model : RegressorMixin (default None) The estimator for fitting the treatment from the features. Must implement fit and predict methods. It can be an scikit-learn regressor. When None, defaults to GradientBoostingRegressor. debias_feature_columns : list of str (default None) A list os column names to be used only for the debias model. If not None, it will replace feature_columns when fitting the debias model. denoise_model : RegressorMixin (default None) The estimator for fitting the outcome from the features. Must implement fit and predict methods. It can be an scikit-learn regressor. When None, defaults to GradientBoostingRegressor. denoise_feature_columns : list of str (default None) A list os column names to be used only for the denoise model. If not None, it will replace feature_columns when fitting the denoise model. final_model : RegressorMixin (default None) The estimator for fitting the outcome residuals from the treatment residuals. Must implement fit and predict methods. It can be an arbitrary scikit-learn regressor. The fit method must accept sample_weight as a keyword argument. When None, defaults to GradientBoostingRegressor. final_model_feature_columns : list of str (default None) A list os column names to be used only for the final model. If not None, it will replace feature_columns when fitting the final model. prediction_column : str (default "prediction") The name of the column with the treatment effect predictions from the final model. cv_splits : int (default 2) Number of folds to split the training data when fitting the debias and denoise models encode_extra_cols : bool (default: True) If True, treats all columns in `df` with name pattern fklearn_feat__col==val` as feature columns. """ features = feature_columns if not encode_extra_cols else expand_features_encoded(df, feature_columns) debias_model = GradientBoostingRegressor() if debias_model is None else clone(debias_model, safe=False) denoise_model = GradientBoostingRegressor() if denoise_model is None else clone(denoise_model, safe=False) final_model = GradientBoostingRegressor() if final_model is None else clone(final_model, safe=False) t_hat, mts = _cv_estimate(debias_model, df, features if debias_feature_columns is None else debias_feature_columns, treatment_column, cv_splits) y_hat, mys = _cv_estimate(denoise_model, df, features if denoise_feature_columns is None else denoise_feature_columns, outcome_column, cv_splits) y_res = df[outcome_column] - y_hat t_res = df[treatment_column] - t_hat final_target = y_res / t_res weights = t_res ** 2 final_model_x = features if final_model_feature_columns is None else final_model_feature_columns model_final_fitted = final_model.fit(X=df[final_model_x], y=final_target, sample_weight=weights) def p(new_df: pd.DataFrame) -> pd.DataFrame: return new_df.assign(**{prediction_column: model_final_fitted.predict(new_df[final_model_x].values)}) p.__doc__ = learner_pred_fn_docstring("non_parametric_double_ml_learner") log = {'non_parametric_double_ml_learner': { 'features': feature_columns, 'debias_feature_columns': debias_feature_columns, 'denoise_feature_columns': denoise_feature_columns, 'final_model_feature_columns': final_model_feature_columns, 'outcome_column': outcome_column, 'treatment_column': treatment_column, 'prediction_column': prediction_column, 'package': "sklearn", 'package_version': sk_version, 'feature_importance': None, 'training_samples': len(df)}, 'debias_models': mts, 'denoise_models': mys, 'cv_splits': cv_splits, 'object': model_final_fitted} return p, p(df), log
non_parametric_double_ml_learner.__doc__ += learner_return_docstring("Non Parametric Double/ML")