Source code for fklearn.causal.validation.cate
import pandas as pd
from toolz import curry
from fklearn.types import EvalReturnType, UncurriedEvalFnType
from fklearn.validation.evaluators import r2_evaluator
def _validate_test_and_control_groups(test_data: pd.DataFrame,
group_column: str,
control_group_name: str) -> str:
"""
Checks whether `test_data` has data on exactly two different experiment groups: test and control. Also returns the
name of the test group.
Parameters
----------
test_data : DataFrame
A Pandas' DataFrame with `group_column` as a column.
group_column : str
The name of the column that tells whether rows belong to the test or control group.
control_group_name : str
The name of the control group.
Returns
----------
test_group_name: str
The name of the test group.
"""
unique_values = test_data[group_column].unique()
if control_group_name not in unique_values:
raise ValueError("control group '{}' not found".format(control_group_name))
n_groups = len(unique_values)
if n_groups != 2:
raise RuntimeError(
"Exactly 2 groups are required for delta evaluations. found {}".format(
n_groups
)
)
return (
unique_values[0] if control_group_name == unique_values[1] else unique_values[1]
)
[docs]def cate_mean_by_bin(test_data: pd.DataFrame,
group_column: str,
control_group_name: str,
bin_column: str,
n_bins: int,
allow_dropped_bins: bool,
prediction_column: str,
target_column: str) -> pd.DataFrame:
"""
Computes a dataframe with predicted and actual CATEs by bins of a given column.
This is primarily an auxiliary function, but can be used to visualize the CATEs.
Parameters
----------
test_data : DataFrame
A Pandas' DataFrame with `group_column` as a column.
group_column : str
The name of the column that tells whether rows belong to the test or control group.
control_group_name : str
The name of the control group.
bin_column : str
The name of the column from which the quantiles will be created.
n_bins : str
The number of bins to be created.
allow_dropped_bins : bool
Whether to allow the function to drop duplicated quantiles.
prediction_column : str
The name of the column containing the predictions from the model being evaluated.
target_column : str
The name of the column containing the actual outcomes of the treatment.
Returns
----------
gb: DataFrame
The grouped dataframe with actual and predicted CATEs by bin.
"""
test_group_name = _validate_test_and_control_groups(
test_data, group_column, control_group_name
)
test_after_control = test_group_name > control_group_name
quantile_column = bin_column + "_q" + str(n_bins)
duplicates = "drop" if allow_dropped_bins else "raise"
test_data_binned = test_data.assign(
**{
quantile_column: pd.qcut(
test_data[bin_column], n_bins, duplicates=duplicates
)
}
)
gb_columns = [group_column, quantile_column]
gb = (
test_data_binned[gb_columns + [prediction_column, target_column]]
.groupby(gb_columns)
.mean()
.sort_index(level=group_column, ascending=test_after_control)
)
return gb.groupby(quantile_column).diff().dropna().reset_index(drop=True)
[docs]@curry
def cate_mean_by_bin_meta_evaluator(test_data: pd.DataFrame,
group_column: str,
control_group_name: str,
bin_column: str,
n_bins: int,
allow_dropped_bins: bool = False,
inner_evaluator: UncurriedEvalFnType = r2_evaluator,
eval_name: str = None,
prediction_column: str = "prediction",
target_column: str = "target") -> EvalReturnType:
"""
Evaluates the predictions of a causal model that outputs treatment outcomes w.r.t. its capabilities to predict the
CATE.
Due to the fundamental lack of counterfactual data, the CATEs are computed for bins of a given column. This function
then applies a fklearn-like evaluator on top of the aggregated dataframe.
Parameters
----------
test_data : DataFrame
A Pandas' DataFrame with `group_column` as a column.
group_column : str
The name of the column that tells whether rows belong to the test or control group.
control_group_name : str
The name of the control group.
bin_column : str
The name of the column from which the quantiles will be created.
n_bins : str
The number of bins to be created.
allow_dropped_bins : bool, optional (default=False)
Whether to allow the function to drop duplicated quantiles.
inner_evaluator : UncurriedEvalFnType, optional (default=r2_evaluator)
An instance of a fklearn-like evaluator, which will be applied to the .
eval_name : str, optional (default=None)
The name of the evaluator as it will appear in the logs.
prediction_column : str, optional (default=None)
The name of the column containing the predictions from the model being evaluated.
target_column : str, optional (default=None)
The name of the column containing the actual outcomes of the treatment.
Returns
----------
log: dict
A log-like dictionary with the evaluation by `inner_evaluator`
"""
try:
gb = cate_mean_by_bin(
test_data,
group_column,
control_group_name,
bin_column,
n_bins,
allow_dropped_bins,
prediction_column,
target_column,
)
except ValueError:
raise ValueError(
"can't create {} bins for column '{}'. use 'allow_dropped_bins=True' to drop duplicated bins".format(
n_bins, bin_column
)
)
if eval_name is None:
eval_name = (
"cate_mean_by_bin_"
+ bin_column
+ "[{}q]".format(n_bins)
+ "__"
+ inner_evaluator.__name__
)
return inner_evaluator(
test_data=gb,
prediction_column=prediction_column,
target_column=target_column,
eval_name=eval_name,
)