Source code for whynot.simulators.credit.simulator

"""Implementation of the Perdomo et. al model of strategic classification.

The data is from the Kaggle Give Me Some Credit dataset:

    https://www.kaggle.com/c/GiveMeSomeCredit/data,

and the dynamics are taken from:

    Perdomo, Juan C., Tijana Zrnic, Celestine Mendler-Dünner, and Moritz Hardt.
    "Performative Prediction." arXiv preprint arXiv:2002.06673 (2020).
"""
import copy
import dataclasses
from typing import Any

import whynot as wn
import whynot.traceable_numpy as np
from whynot.dynamics import BaseConfig, BaseIntervention, BaseState
from whynot.simulators.credit.dataloader import CreditData


[docs]@dataclasses.dataclass
class State(BaseState):
    # pylint: disable-msg=too-few-public-methods
    """State of the Credit model."""

    #: Matrix of agent features (e.g. https://www.kaggle.com/c/GiveMeSomeCredit/data)
    features: np.ndarray = CreditData.features

    #: Vector indicating whether or not the agent experiences financial distress
    labels: np.ndarray = CreditData.labels

[docs]    def values(self):
        """Return the state as a dictionary of numpy arrays."""
        return {name: getattr(self, name) for name in self.variable_names()}


[docs]@dataclasses.dataclass
class Config(BaseConfig):
    # pylint: disable-msg=too-few-public-methods
    """Parameterization of Credit simulator dynamics.

    Examples
    --------
    >>> # Configure simulator for run for 10 iterations
    >>> config = Config(start_time=0, end_time=10, delta_t=1)

    """

    # Dynamics parameters
    #: Subset of the features that can be manipulated by the agent
    changeable_features: np.ndarray = np.array([0, 5, 7])

    #: Model how much the agent adapt her features in response to a classifier
    epsilon: float = 0.1

    #: Parameters for logistic regression classifier used by the institution
    theta: np.ndarray = np.ones((11, 1))

    #: L2 penalty on the logistic regression loss
    l2_penalty: float = 0.0

    #: Whether or not dynamics have memory
    memory: bool = False

    #: State systems resets to if no memory.
    base_state: Any = State()

    # Simulator book-keeping
    #: Start time of the simulator
    start_time: int = 0
    #: End time of the simulator
    end_time: int = 5
    #: Spacing of the evaluation grid
    delta_t: int = 1


[docs]class Intervention(BaseIntervention):
    # pylint: disable-msg=too-few-public-methods
    """Parameterization of an intervention in the Credit model.

    An intervention changes a subset of the configuration variables in the
    specified year. The remaining variables are unchanged.

    Examples
    --------
    >>> # Starting at time 25, update the classifier to random chance.
    >>> config = Config()
    >>> Intervention(time=25, theta=np.zeros_like(config.theta))

    """

[docs]    def __init__(self, time=30, **kwargs):
        """Specify an intervention in credit.

        Parameters
        ----------
            time: int
                Time of intervention in simulator dynamics.
            kwargs: dict
                Only valid keyword arguments are parameters of Config.

        """
        super(Intervention, self).__init__(Config, time, **kwargs)


def logistic_loss(config, features, labels, theta):
    """Evaluate the performative loss for logistic regression classifier."""

    config = config.update(Intervention(theta=theta))

    # compute log likelihood
    num_samples = features.shape[0]
    logits = features @ config.theta
    log_likelihood = (1.0 / num_samples) * np.sum(
        -1.0 * np.multiply(labels, logits) + np.log(1 + np.exp(logits))
    )

    # Add regularization (without considering the bias)
    regularization = (config.l2_penalty / 2.0) * np.linalg.norm(config.theta[:-1]) ** 2

    return log_likelihood + regularization


def agent_model(features, config):
    """Compute agent reponse to the classifier and adapt features accordingly.

    TODO: For now, the best-response model corresponds to best-response with
    linear utility and quadratic costs. We should expand this to cover a rich
    set of agent models beyond linear/quadratic, and potentially beyond
    best-response.
    """
    # Move everything by epsilon in the direction towards better classification
    strategic_features = np.copy(features)
    theta_strat = config.theta[config.changeable_features].flatten()
    strategic_features[:, config.changeable_features] -= config.epsilon * theta_strat
    return strategic_features


def dynamics(state, time, config, intervention=None):
    """Perform one round of interaction between the agents and the credit scorer.

    Parameters
    ----------
        state: whynot.simulators.credit.State
            Agent state at time TIME
        time: int
            Current round of interaction
        config: whynot.simulators.credit.Config
            Configuration object controlling the interaction, e.g. classifier
            and agent model
        intervention: whynot.simulators.credit.Intervention
            Intervention object specifying when and how to update the dynamics.

    Returns
    -------
        state: whynot.simulators.credit.State
            Agent state after one step of strategic interaction.

    """
    if intervention and time >= intervention.time:
        config = config.update(intervention)

    # Only use the current state if the dynamics have memory.
    # Otherwise, agents "reset" to the base dataset. The latter
    # case is the one treated in the performative prediction paper.
    if config.memory:
        features, labels = state.features, state.labels
    else:
        features, labels = config.base_state.features, config.base_state.labels

    # Update features in response to classifier. Labels are fixed.
    strategic_features = agent_model(features, config)
    return strategic_features, labels


def simulate(initial_state, config, intervention=None, seed=None):
    """Simulate a run of the Credit model.

    Parameters
    ----------
        initial_state: whynot.credit.State
        config: whynot.credit.Config
            Base parameters for the simulator run
        intervention: whynot.credit.Intervention
            (Optional) Parameters specifying a change in dynamics
        seed: int
            Unused since the simulator is deterministic.

    Returns
    -------
        run: whynot.dynamics.Run
            Simulator rollout

    """
    # Iterate the discrete dynamics
    times = [config.start_time]
    states = [initial_state]
    state = copy.deepcopy(initial_state)

    for step in range(config.start_time, config.end_time):
        next_state = dynamics(state, step, config, intervention)
        state = State(*next_state)
        states.append(state)
        times.append(step + 1)

    return wn.dynamics.Run(states=states, times=times)


if __name__ == "__main__":
    print(simulate(State(), Config(end_time=2)))
Source code for whynot.simulators.credit.simulator

whynot

Navigation

Related Topics