"""Implementation of the Perdomo et. al model of strategic classification.
The data is from the Kaggle Give Me Some Credit dataset:
https://www.kaggle.com/c/GiveMeSomeCredit/data,
and the dynamics are taken from:
Perdomo, Juan C., Tijana Zrnic, Celestine Mendler-Dünner, and Moritz Hardt.
"Performative Prediction." arXiv preprint arXiv:2002.06673 (2020).
"""
import copy
import dataclasses
from typing import Any
import whynot as wn
import whynot.traceable_numpy as np
from whynot.dynamics import BaseConfig, BaseIntervention, BaseState
from whynot.simulators.credit.dataloader import CreditData
[docs]@dataclasses.dataclass
class State(BaseState):
# pylint: disable-msg=too-few-public-methods
"""State of the Credit model."""
#: Matrix of agent features (e.g. https://www.kaggle.com/c/GiveMeSomeCredit/data)
features: np.ndarray = CreditData.features
#: Vector indicating whether or not the agent experiences financial distress
labels: np.ndarray = CreditData.labels
[docs] def values(self):
"""Return the state as a dictionary of numpy arrays."""
return {name: getattr(self, name) for name in self.variable_names()}
[docs]@dataclasses.dataclass
class Config(BaseConfig):
# pylint: disable-msg=too-few-public-methods
"""Parameterization of Credit simulator dynamics.
Examples
--------
>>> # Configure simulator for run for 10 iterations
>>> config = Config(start_time=0, end_time=10, delta_t=1)
"""
# Dynamics parameters
#: Subset of the features that can be manipulated by the agent
changeable_features: np.ndarray = np.array([0, 5, 7])
#: Model how much the agent adapt her features in response to a classifier
epsilon: float = 0.1
#: Parameters for logistic regression classifier used by the institution
theta: np.ndarray = np.ones((11, 1))
#: L2 penalty on the logistic regression loss
l2_penalty: float = 0.0
#: Whether or not dynamics have memory
memory: bool = False
#: State systems resets to if no memory.
base_state: Any = State()
# Simulator book-keeping
#: Start time of the simulator
start_time: int = 0
#: End time of the simulator
end_time: int = 5
#: Spacing of the evaluation grid
delta_t: int = 1
[docs]class Intervention(BaseIntervention):
# pylint: disable-msg=too-few-public-methods
"""Parameterization of an intervention in the Credit model.
An intervention changes a subset of the configuration variables in the
specified year. The remaining variables are unchanged.
Examples
--------
>>> # Starting at time 25, update the classifier to random chance.
>>> config = Config()
>>> Intervention(time=25, theta=np.zeros_like(config.theta))
"""
[docs] def __init__(self, time=30, **kwargs):
"""Specify an intervention in credit.
Parameters
----------
time: int
Time of intervention in simulator dynamics.
kwargs: dict
Only valid keyword arguments are parameters of Config.
"""
super(Intervention, self).__init__(Config, time, **kwargs)
def logistic_loss(config, features, labels, theta):
"""Evaluate the performative loss for logistic regression classifier."""
config = config.update(Intervention(theta=theta))
# compute log likelihood
num_samples = features.shape[0]
logits = features @ config.theta
log_likelihood = (1.0 / num_samples) * np.sum(
-1.0 * np.multiply(labels, logits) + np.log(1 + np.exp(logits))
)
# Add regularization (without considering the bias)
regularization = (config.l2_penalty / 2.0) * np.linalg.norm(config.theta[:-1]) ** 2
return log_likelihood + regularization
def agent_model(features, config):
"""Compute agent reponse to the classifier and adapt features accordingly.
TODO: For now, the best-response model corresponds to best-response with
linear utility and quadratic costs. We should expand this to cover a rich
set of agent models beyond linear/quadratic, and potentially beyond
best-response.
"""
# Move everything by epsilon in the direction towards better classification
strategic_features = np.copy(features)
theta_strat = config.theta[config.changeable_features].flatten()
strategic_features[:, config.changeable_features] -= config.epsilon * theta_strat
return strategic_features
def dynamics(state, time, config, intervention=None):
"""Perform one round of interaction between the agents and the credit scorer.
Parameters
----------
state: whynot.simulators.credit.State
Agent state at time TIME
time: int
Current round of interaction
config: whynot.simulators.credit.Config
Configuration object controlling the interaction, e.g. classifier
and agent model
intervention: whynot.simulators.credit.Intervention
Intervention object specifying when and how to update the dynamics.
Returns
-------
state: whynot.simulators.credit.State
Agent state after one step of strategic interaction.
"""
if intervention and time >= intervention.time:
config = config.update(intervention)
# Only use the current state if the dynamics have memory.
# Otherwise, agents "reset" to the base dataset. The latter
# case is the one treated in the performative prediction paper.
if config.memory:
features, labels = state.features, state.labels
else:
features, labels = config.base_state.features, config.base_state.labels
# Update features in response to classifier. Labels are fixed.
strategic_features = agent_model(features, config)
return strategic_features, labels
def simulate(initial_state, config, intervention=None, seed=None):
"""Simulate a run of the Credit model.
Parameters
----------
initial_state: whynot.credit.State
config: whynot.credit.Config
Base parameters for the simulator run
intervention: whynot.credit.Intervention
(Optional) Parameters specifying a change in dynamics
seed: int
Unused since the simulator is deterministic.
Returns
-------
run: whynot.dynamics.Run
Simulator rollout
"""
# Iterate the discrete dynamics
times = [config.start_time]
states = [initial_state]
state = copy.deepcopy(initial_state)
for step in range(config.start_time, config.end_time):
next_state = dynamics(state, step, config, intervention)
state = State(*next_state)
states.append(state)
times.append(step + 1)
return wn.dynamics.Run(states=states, times=times)
if __name__ == "__main__":
print(simulate(State(), Config(end_time=2)))