Source code for pycalf.uplift

from typing import Optional, Tuple

import numpy as np


[docs] class UpliftModel: """Class of Uplift Modeling.""" def __init__(self, learner_treat, learner_control) -> None: """ Parameters ---------- learner_treat : object Learner to estimate effect of treatment group. Must have fit and predict_proba methods. learner_control : object Learner to estimate effect of control group. Must have fit and predict_proba methods. """ self.learner_treat = learner_treat self.learner_control = learner_control
[docs] def fit( self, X_treat: np.ndarray, y_treat: np.ndarray, X_control: np.ndarray, y_control: np.ndarray, weight_treat: Optional[np.ndarray] = None, weight_control: Optional[np.ndarray] = None, ) -> None: """ Parameters ---------- X_treat : numpy.ndarray Features for learner_treat. y_treat : numpy.ndarray Labels for learner_treat. X_control : numpy.ndarray Features for learner_control. y_control : numpy.ndarray Labels for learner_control. weight_treat : numpy.ndarray or None Weights for learner_treat. weight_control : numpy.ndarray or None Weights for learner_control. Returns ------- None """ self.learner_treat.fit(X_treat, y_treat, sample_weight=weight_treat) self.learner_control.fit(X_control, y_control, sample_weight=weight_control)
[docs] def estimate_uplift_score(self, X: np.ndarray) -> np.ndarray: """Estimate uplift scores. Parameters ---------- X : numpy.ndarray Features for prediction treat and control probability. Returns ------- uplift_score : np.array Uplift Score. """ proba_treat = self.learner_treat.predict_proba(X)[:, 1] proba_control = self.learner_control.predict_proba(X)[:, 1] uplift_score = proba_treat / proba_control return uplift_score
[docs] def predict( self, X: np.ndarray, treatment: np.ndarray, y: np.ndarray ) -> Tuple[np.ndarray, np.ndarray]: """ Parameters ---------- X : numpy.ndarray Features for prediction treat and control probability. treatment : numpy.ndarray[bool] Flags with or without intervention. y : numpy.ndarray Outcome variables. Returns ------- (uplift_score, lift) : tuple Uplift score and lift values. """ uplift_score = self.estimate_uplift_score(X) sorted_idx = np.argsort(uplift_score)[::-1] uplift_score = uplift_score[sorted_idx] y = y[sorted_idx] treatment = treatment[sorted_idx] y_treat = np.nancumsum(np.where(treatment == 1, y, np.nan)) y_control = np.nancumsum(np.where(treatment == 0, y, np.nan)) treat_size = np.nancumsum(np.where(treatment == 1, treatment, np.nan)) control_size = np.nancumsum(np.where(treatment == 0, (1 - treatment), np.nan)) cumavg_y_treat = np.array( [0.0 if s == 0 else _y / s for _y, s in zip(y_treat, treat_size)] ) cumavg_y_control = np.array( [0.0 if s == 0 else _y / s for _y, s in zip(y_control, control_size)] ) lift = (cumavg_y_treat - cumavg_y_control) * treat_size return (uplift_score, lift)
[docs] def get_baseline(self, lift: np.ndarray) -> np.ndarray: """ Parameters ---------- lift : numpy.ndarray Array of lift, treatment effect. Returns ------- base_line : numpy.ndarray Array of random treat effect. """ data_size = len(lift) # Ensure that the first element is exactly 0 and last element matches lift[-1] base_line = np.linspace(0, lift[data_size - 1], data_size) return base_line
[docs] def get_auuc(self, lift: np.ndarray) -> float: """ Parameters ---------- lift : numpy.ndarray Array of lift, treatment effect. Returns ------- auuc : float AUUC score. """ base_line = self.get_baseline(lift) auuc = (lift - base_line).sum() / len(lift) return auuc