Source code for fklearn.causal.validation.cate

import pandas as pd
from toolz import curry

from fklearn.types import EvalReturnType, UncurriedEvalFnType
from fklearn.validation.evaluators import r2_evaluator


def _validate_test_and_control_groups(test_data: pd.DataFrame,
                                      group_column: str,
                                      control_group_name: str) -> str:
    """
    Checks whether `test_data` has data on exactly two different experiment groups: test and control. Also returns the
    name of the test group.

    Parameters
    ----------
    test_data : DataFrame
        A Pandas' DataFrame with `group_column` as a column.

    group_column : str
        The name of the column that tells whether rows belong to the test or control group.

    control_group_name : str
        The name of the control group.

    Returns
    ----------
    test_group_name: str
        The name of the test group.
    """
    unique_values = test_data[group_column].unique()

    if control_group_name not in unique_values:
        raise ValueError("control group '{}' not found".format(control_group_name))

    n_groups = len(unique_values)
    if n_groups != 2:
        raise RuntimeError(
            "Exactly 2 groups are required for delta evaluations. found {}".format(
                n_groups
            )
        )
    return (
        unique_values[0] if control_group_name == unique_values[1] else unique_values[1]
    )


[docs]def cate_mean_by_bin(test_data: pd.DataFrame, group_column: str, control_group_name: str, bin_column: str, n_bins: int, allow_dropped_bins: bool, prediction_column: str, target_column: str) -> pd.DataFrame: """ Computes a dataframe with predicted and actual CATEs by bins of a given column. This is primarily an auxiliary function, but can be used to visualize the CATEs. Parameters ---------- test_data : DataFrame A Pandas' DataFrame with `group_column` as a column. group_column : str The name of the column that tells whether rows belong to the test or control group. control_group_name : str The name of the control group. bin_column : str The name of the column from which the quantiles will be created. n_bins : str The number of bins to be created. allow_dropped_bins : bool Whether to allow the function to drop duplicated quantiles. prediction_column : str The name of the column containing the predictions from the model being evaluated. target_column : str The name of the column containing the actual outcomes of the treatment. Returns ---------- gb: DataFrame The grouped dataframe with actual and predicted CATEs by bin. """ test_group_name = _validate_test_and_control_groups( test_data, group_column, control_group_name ) test_after_control = test_group_name > control_group_name quantile_column = bin_column + "_q" + str(n_bins) duplicates = "drop" if allow_dropped_bins else "raise" test_data_binned = test_data.assign( **{ quantile_column: pd.qcut( test_data[bin_column], n_bins, duplicates=duplicates ) } ) gb_columns = [group_column, quantile_column] gb = ( test_data_binned[gb_columns + [prediction_column, target_column]] .groupby(gb_columns) .mean() .sort_index(level=group_column, ascending=test_after_control) ) return gb.groupby(quantile_column).diff().dropna().reset_index(drop=True)
[docs]@curry def cate_mean_by_bin_meta_evaluator(test_data: pd.DataFrame, group_column: str, control_group_name: str, bin_column: str, n_bins: int, allow_dropped_bins: bool = False, inner_evaluator: UncurriedEvalFnType = r2_evaluator, eval_name: str = None, prediction_column: str = "prediction", target_column: str = "target") -> EvalReturnType: """ Evaluates the predictions of a causal model that outputs treatment outcomes w.r.t. its capabilities to predict the CATE. Due to the fundamental lack of counterfactual data, the CATEs are computed for bins of a given column. This function then applies a fklearn-like evaluator on top of the aggregated dataframe. Parameters ---------- test_data : DataFrame A Pandas' DataFrame with `group_column` as a column. group_column : str The name of the column that tells whether rows belong to the test or control group. control_group_name : str The name of the control group. bin_column : str The name of the column from which the quantiles will be created. n_bins : str The number of bins to be created. allow_dropped_bins : bool, optional (default=False) Whether to allow the function to drop duplicated quantiles. inner_evaluator : UncurriedEvalFnType, optional (default=r2_evaluator) An instance of a fklearn-like evaluator, which will be applied to the . eval_name : str, optional (default=None) The name of the evaluator as it will appear in the logs. prediction_column : str, optional (default=None) The name of the column containing the predictions from the model being evaluated. target_column : str, optional (default=None) The name of the column containing the actual outcomes of the treatment. Returns ---------- log: dict A log-like dictionary with the evaluation by `inner_evaluator` """ try: gb = cate_mean_by_bin( test_data, group_column, control_group_name, bin_column, n_bins, allow_dropped_bins, prediction_column, target_column, ) except ValueError: raise ValueError( "can't create {} bins for column '{}'. use 'allow_dropped_bins=True' to drop duplicated bins".format( n_bins, bin_column ) ) if eval_name is None: eval_name = ( "cate_mean_by_bin_" + bin_column + "[{}q]".format(n_bins) + "__" + inner_evaluator.__name__ ) return inner_evaluator( test_data=gb, prediction_column=prediction_column, target_column=target_column, eval_name=eval_name, )