Source code for fairlens.metrics.significance

"""
Collection of methods which can be used to numerically or analytically compute p-values and confidence intervals.

This module provides three functions to sample and generate distributions required for estimating p_values:
  - `permutation_statistic`
  - `bootstrap_statistic`
  - `bootstrap_binned_statistic`

The functions, `resampling_p_value`, `resampling_interval` can be use these distributions to
carry out p-value tests or obtain a confidence interval.
"""

from typing import Any, Callable, List, Mapping, Optional, Tuple, Union

import numpy as np
import pandas as pd
from scipy.stats import beta, binom_test, brunnermunzel, norm

from .. import utils


[docs]def binominal_proportion_p_value(p_obs: float, p_null: float, n: int, alternative: str = "two-sided") -> float:
    """Calculate an exact p-value for an observed binomial proportion of a sample.

    Args:
        p_obs (float):
            Observed proportion of successes.
        p_null (float):
            Expected proportion of sucesses under null hypothesis.
        n (int):
            Sample size.
        alternative (str, optional):
            Indicates the alternative hypothesis. One of "two-sided", "greater", "less".
            Defaults to "two-sided".

    Returns:
        float:
            The p-value under the null hypothesis.
    """

    k = np.ceil(p_obs * n)
    return binom_test(k, n, p_null, alternative)


[docs]def binominal_proportion_interval(
    p: float, n: int, cl: float = 0.95, method: str = "clopper-pearson"
) -> Tuple[float, float]:
    """Calculate an approximate confidence interval for a binomial proportion of a sample.

    Args:
        p (float):
            Proportion of successes.
        n (int):
            Sample size.
        cl (float, optional):
            Confidence level of the interval.
        method (str, optional):
            Approximation method. One of "normal", "clopper-pearson", "agresti-coull".
            Defaults to "clopper-pearson".

    Returns:
        Tuple[float, float]:
            A tuple containing the confidence interval.
    """

    k = n * p
    alpha = 1 - cl
    z = norm.ppf(1 - alpha / 2)

    if method == "normal":
        low = p - z * np.sqrt(p * (1 - p) / n)
        high = p + z * np.sqrt(p * (1 - p) / n)

    elif method == "clopper-pearson":
        low = beta.ppf(alpha / 2, k, n - k + 1)
        high = beta.ppf(1 - alpha / 2, k + 1, n - k)

    elif method == "agresti-coull":
        n_ = n + z**2
        p_ = 1 / n_ * (k + z**2 / 2)
        low = p_ - z * np.sqrt(p_ * (1 - p_) / n_)
        high = p_ + z * np.sqrt(p_ * (1 - p_) / n_)

    else:
        raise ValueError("'method' argument must be one of 'normal', 'clopper-pearson', 'agresti-coull'.")

    return low, high


[docs]def permutation_statistic(
    x: pd.Series,
    y: pd.Series,
    statistic: Callable[[pd.Series, pd.Series], float],
    n_perm: int = 100,
) -> np.ndarray:
    """
    Performs the sampling for a two sample permutation test.

    Args:
        x (pd.Series):
            First data sample.
        y (pd.Series):
            Second data sample.
        statistic (Callable[[pd.Series, pd.Series], float]):
            Function that computes the test statistic.
        n_perm (int):
            Number of permutations.

    Returns:
        np.ndarray:
            The distribution of the statistic on a n_perm permutations of samples.
    """

    joint = np.concatenate((x, y))
    t_null = np.empty(n_perm)

    for i in range(n_perm):
        perm = np.random.permutation(joint)
        x_sample = perm[: len(x)]
        y_sample = perm[len(x) :]
        t_null[i] = statistic(x_sample, y_sample)

    return t_null


[docs]def bootstrap_statistic(
    x: pd.Series,
    y: pd.Series,
    statistic: Callable[[pd.Series, pd.Series], float],
    n_samples: int = 100,
    sample_size: Optional[int] = None,
) -> np.ndarray:
    """Compute the samples of a statistic estimate using the bootstrap method.

    Args:
        x (pd.Series):
            First data sample.
        y (pd.Series):
            Second data sample.
        statistic (Callable[[pd.Series, pd.Series], float]):
            Function that computes the test statistic.
        n_samples (int, optional):
            Number of bootstrap samples to perform.
        sample_size (Optional[int], optional):
            Number of data samples in a bootstrap sample.

    Returns:
        np.ndarray:
            The bootstrap samples.
    """

    if sample_size is None:
        sample_size = min(len(x), len(y))

    statistic_samples = np.empty(n_samples)
    for i in range(n_samples):
        x_sample = x.sample(sample_size, replace=True)
        y_sample = y.sample(sample_size, replace=True)
        statistic_samples[i] = statistic(x_sample, y_sample)

    return statistic_samples


[docs]def bootstrap_binned_statistic(
    h_x: pd.Series, h_y: pd.Series, statistic: Callable[[pd.Series, pd.Series], float], n_samples: int = 1000
) -> np.ndarray:
    """Compute the samples of a binned statistic estimate using the bootstrap method.

    Args:
        h_x (pd.Series):
            First histogram.
        h_y (pd.Series):
            Second histogram.
        statistic (Callable[[pd.Series, pd.Series], float]):
            Function that computes the statistic.
        n_samples (int, optional):
            Number of bootstrap samples to perform.

    Returns:
        np.ndarray:
            The bootstrap samples.
    """

    statistic_samples = np.empty(n_samples)

    n_x = h_x.sum()
    n_y = h_y.sum()

    with np.errstate(divide="ignore", invalid="ignore"):
        p_x = np.nan_to_num(h_x / n_x)
        p_y = np.nan_to_num(h_y / n_y)

    x_samples = np.random.multinomial(n_x, p_x, size=n_samples)
    y_samples = np.random.multinomial(n_y, p_y, size=n_samples)

    for i in range(n_samples):
        statistic_samples[i] = statistic(x_samples[i], y_samples[i])

    return statistic_samples


[docs]def resampling_p_value(t_obs: float, t_distribution: pd.Series, alternative: str = "two-sided") -> float:
    """Compute a p-value using a resampled test statistic distribution.

    Args:
        t_obs (float):
            Observed value of the test statistic.
        t_distribution (pd.Series):
            Samples of test statistic distribution under the null hypothesis.
        alternative (str, optional):
            Indicates the alternative hypothesis. One of "two-sided", "greater", "less".
            Defaults to "two-sided".

    Returns:
        float:
            The p-value under the null hypothesis.
    """

    if alternative not in ("two-sided", "greater", "less"):
        raise ValueError("'alternative' argument must be one of 'two-sided', 'greater', 'less'")

    n_samples = len(t_distribution)

    if alternative == "two-sided":
        p = np.sum(np.abs(t_distribution) >= np.abs(t_obs)) / n_samples

    elif alternative == "greater":
        p = np.sum(t_distribution >= t_obs) / n_samples

    else:
        p = np.sum(t_distribution < t_obs) / n_samples

    return p


[docs]def resampling_interval(t_obs: float, t_distribution: pd.Series, cl: float = 0.95):
    """Compute a confidence interval using a distribution of the test statistic
    on resampled data.

    Args:
        t_obs (float):
            Observed value of the test statistic.
        t_distribution (pd.Series):
            Samples of test statistic distribution.
        cl (float, optional):
            Confidence level of the interval.

    Returns:
        Tuple[float, float]:
            A tuple containing the confidence interval.
    """

    percentiles = 100 * (1 - cl) / 2, 100 * (1 - (1 - cl) / 2)
    delta = t_distribution - t_obs
    d1, d2 = np.percentile(delta, percentiles)
    return t_obs + d1, t_obs + d2


[docs]def brunner_munzel_test(
    df: pd.DataFrame,
    target_attr: str,
    group1: Union[Mapping[str, List[Any]], pd.Series],
    group2: Union[Mapping[str, List[Any]], pd.Series],
) -> Tuple[float, float]:
    """Compute the non-parametric Brunner-Munzel test of the hypothesis that the probability
    of getting large values in the target attribute distributions (determined by the input
    groups of interest) is equal, without requiring equivariance.

    Args:
        df (pd.DataFrame):
            The input datafame.
        target_attr (str):
            The target attribute in the dataframe from which the distributions are formed.
        group1 (Union[Mapping[str, List[Any]], pd.Series]):
            The first group of interest. Each group can be a mapping / dict from attribute to value or
            a predicate itself, i.e. pandas series consisting of bools which can be used as a predicate
            to index a subgroup from the dataframe.
            Examples: {"Sex": ["Male"]}, df["Sex"] == "Female"
        group2 (Union[Mapping[str, List[Any]], pd.Series]):
            The second group of interest. Each group can be a mapping / dict from attribute to value or
            a predicate itself, i.e. pandas series consisting of bools which can be used as a predicate
            to index a subgroup from the dataframe.
            Examples: {"Sex": ["Male"]}, df["Sex"] == "Female"

    Returns:
        Tuple[float, float]:
            A tuple consisting of the Brunner-Munzel statistic and p-value associated with the significance
            of the observed difference.
    """
    pred_a, pred_b = tuple(utils.get_predicates_mult(df, [group1, group2]))
    sr_a = df[pred_a][target_attr]
    sr_b = df[pred_b][target_attr]
    return brunnermunzel(sr_a, sr_b, nan_policy="omit")