Source code for fklearn.causal.validation.auc

import pandas as pd
from toolz import curry

from fklearn.types import EffectFnType
from fklearn.causal.validation.curves import cumulative_effect_curve
from fklearn.causal.effects import linear_effect


[docs]@curry def area_under_the_cumulative_effect_curve(df: pd.DataFrame, treatment: str, outcome: str, prediction: str, min_rows: int = 30, steps: int = 100, effect_fn: EffectFnType = linear_effect) -> float: """ Orders the dataset by prediction and computes the area under the cumulative effect curve, according to that ordering. Parameters ---------- df : Pandas' DataFrame A Pandas' DataFrame with target and prediction scores. treatment : str The name of the treatment column in `df`. outcome : Strings The name of the outcome column in `df`. prediction : Strings The name of the prediction column in `df`. min_rows : int Minimum number of observations needed to have a valid result. steps : Integer The number of cumulative steps to iterate when accumulating the effect effect_fn : function (df: pandas.DataFrame, treatment: str, outcome: str) -> int or Array of int A function that computes the treatment effect given a dataframe, the name of the treatment column and the name of the outcome column. Returns ---------- area_under_the_cumulative_gain_curve: float The area under the cumulative gain curve according to the predictions ordering. """ ate = effect_fn(df, treatment, outcome) size = df.shape[0] n_rows = list(range(min_rows, size, size // steps)) + [size] step_sizes = [min_rows] + [t - s for s, t in zip(n_rows, n_rows[1:])] cum_effect = cumulative_effect_curve(df=df, treatment=treatment, outcome=outcome, prediction=prediction, min_rows=min_rows, steps=steps, effect_fn=effect_fn) return abs(sum([(effect - ate) * (step_size / size) for effect, step_size in zip(cum_effect, step_sizes)]))
[docs]@curry def area_under_the_cumulative_gain_curve(df: pd.DataFrame, treatment: str, outcome: str, prediction: str, min_rows: int = 30, steps: int = 100, effect_fn: EffectFnType = linear_effect) -> float: """ Orders the dataset by prediction and computes the area under the cumulative gain curve, according to that ordering. Parameters ---------- df : Pandas' DataFrame A Pandas' DataFrame with target and prediction scores. treatment : Strings The name of the treatment column in `df`. outcome : Strings The name of the outcome column in `df`. prediction : Strings The name of the prediction column in `df`. min_rows : Integer Minimum number of observations needed to have a valid result. steps : Integer The number of cumulative steps to iterate when accumulating the effect effect_fn : function (df: pandas.DataFrame, treatment: str, outcome: str) -> int or Array of int A function that computes the treatment effect given a dataframe, the name of the treatment column and the name of the outcome column. Returns ---------- area_under_the_cumulative_gain_curve: float The area under the cumulative gain curve according to the predictions ordering. """ size = df.shape[0] n_rows = list(range(min_rows, size, size // steps)) + [size] step_sizes = [min_rows] + [t - s for s, t in zip(n_rows, n_rows[1:])] cum_effect = cumulative_effect_curve(df=df, treatment=treatment, outcome=outcome, prediction=prediction, min_rows=min_rows, steps=steps, effect_fn=effect_fn) return abs(sum([effect * (rows / size) * (step_size / size) for rows, effect, step_size in zip(n_rows, cum_effect, step_sizes)]))
[docs]@curry def area_under_the_relative_cumulative_gain_curve(df: pd.DataFrame, treatment: str, outcome: str, prediction: str, min_rows: int = 30, steps: int = 100, effect_fn: EffectFnType = linear_effect) -> float: """ Orders the dataset by prediction and computes the area under the relative cumulative gain curve, according to that ordering. Parameters ---------- df : Pandas' DataFrame A Pandas' DataFrame with target and prediction scores. treatment : Strings The name of the treatment column in `df`. outcome : Strings The name of the outcome column in `df`. prediction : Strings The name of the prediction column in `df`. min_rows : Integer Minimum number of observations needed to have a valid result. steps : Integer The number of cumulative steps to iterate when accumulating the effect effect_fn : function (df: pandas.DataFrame, treatment: str, outcome: str) -> int or Array of int A function that computes the treatment effect given a dataframe, the name of the treatment column and the name of the outcome column. Returns ---------- area under the relative cumulative gain curve: float The area under the relative cumulative gain curve according to the predictions ordering. """ ate = effect_fn(df, treatment, outcome) size = df.shape[0] n_rows = list(range(min_rows, size, size // steps)) + [size] step_sizes = [min_rows] + [t - s for s, t in zip(n_rows, n_rows[1:])] cum_effect = cumulative_effect_curve(df=df, treatment=treatment, outcome=outcome, prediction=prediction, min_rows=min_rows, steps=steps, effect_fn=effect_fn) return abs(sum([(effect - ate) * (rows / size) * (step_size / size) for rows, effect, step_size in zip(n_rows, cum_effect, step_sizes)]))