"""
Collection of methods which can be used to numerically or analytically compute p-values and confidence intervals.
This module provides three functions to sample and generate distributions required for estimating p_values:
- `permutation_statistic`
- `bootstrap_statistic`
- `bootstrap_binned_statistic`
The functions, `resampling_p_value`, `resampling_interval` can be use these distributions to
carry out p-value tests or obtain a confidence interval.
"""
from typing import Any, Callable, List, Mapping, Optional, Tuple, Union
import numpy as np
import pandas as pd
from scipy.stats import beta, binom_test, brunnermunzel, norm
from .. import utils
[docs]def binominal_proportion_p_value(p_obs: float, p_null: float, n: int, alternative: str = "two-sided") -> float:
"""Calculate an exact p-value for an observed binomial proportion of a sample.
Args:
p_obs (float):
Observed proportion of successes.
p_null (float):
Expected proportion of sucesses under null hypothesis.
n (int):
Sample size.
alternative (str, optional):
Indicates the alternative hypothesis. One of "two-sided", "greater", "less".
Defaults to "two-sided".
Returns:
float:
The p-value under the null hypothesis.
"""
k = np.ceil(p_obs * n)
return binom_test(k, n, p_null, alternative)
[docs]def binominal_proportion_interval(
p: float, n: int, cl: float = 0.95, method: str = "clopper-pearson"
) -> Tuple[float, float]:
"""Calculate an approximate confidence interval for a binomial proportion of a sample.
Args:
p (float):
Proportion of successes.
n (int):
Sample size.
cl (float, optional):
Confidence level of the interval.
method (str, optional):
Approximation method. One of "normal", "clopper-pearson", "agresti-coull".
Defaults to "clopper-pearson".
Returns:
Tuple[float, float]:
A tuple containing the confidence interval.
"""
k = n * p
alpha = 1 - cl
z = norm.ppf(1 - alpha / 2)
if method == "normal":
low = p - z * np.sqrt(p * (1 - p) / n)
high = p + z * np.sqrt(p * (1 - p) / n)
elif method == "clopper-pearson":
low = beta.ppf(alpha / 2, k, n - k + 1)
high = beta.ppf(1 - alpha / 2, k + 1, n - k)
elif method == "agresti-coull":
n_ = n + z**2
p_ = 1 / n_ * (k + z**2 / 2)
low = p_ - z * np.sqrt(p_ * (1 - p_) / n_)
high = p_ + z * np.sqrt(p_ * (1 - p_) / n_)
else:
raise ValueError("'method' argument must be one of 'normal', 'clopper-pearson', 'agresti-coull'.")
return low, high
[docs]def permutation_statistic(
x: pd.Series,
y: pd.Series,
statistic: Callable[[pd.Series, pd.Series], float],
n_perm: int = 100,
) -> np.ndarray:
"""
Performs the sampling for a two sample permutation test.
Args:
x (pd.Series):
First data sample.
y (pd.Series):
Second data sample.
statistic (Callable[[pd.Series, pd.Series], float]):
Function that computes the test statistic.
n_perm (int):
Number of permutations.
Returns:
np.ndarray:
The distribution of the statistic on a n_perm permutations of samples.
"""
joint = np.concatenate((x, y))
t_null = np.empty(n_perm)
for i in range(n_perm):
perm = np.random.permutation(joint)
x_sample = perm[: len(x)]
y_sample = perm[len(x) :]
t_null[i] = statistic(x_sample, y_sample)
return t_null
[docs]def bootstrap_statistic(
x: pd.Series,
y: pd.Series,
statistic: Callable[[pd.Series, pd.Series], float],
n_samples: int = 100,
sample_size: Optional[int] = None,
) -> np.ndarray:
"""Compute the samples of a statistic estimate using the bootstrap method.
Args:
x (pd.Series):
First data sample.
y (pd.Series):
Second data sample.
statistic (Callable[[pd.Series, pd.Series], float]):
Function that computes the test statistic.
n_samples (int, optional):
Number of bootstrap samples to perform.
sample_size (Optional[int], optional):
Number of data samples in a bootstrap sample.
Returns:
np.ndarray:
The bootstrap samples.
"""
if sample_size is None:
sample_size = min(len(x), len(y))
statistic_samples = np.empty(n_samples)
for i in range(n_samples):
x_sample = x.sample(sample_size, replace=True)
y_sample = y.sample(sample_size, replace=True)
statistic_samples[i] = statistic(x_sample, y_sample)
return statistic_samples
[docs]def bootstrap_binned_statistic(
h_x: pd.Series, h_y: pd.Series, statistic: Callable[[pd.Series, pd.Series], float], n_samples: int = 1000
) -> np.ndarray:
"""Compute the samples of a binned statistic estimate using the bootstrap method.
Args:
h_x (pd.Series):
First histogram.
h_y (pd.Series):
Second histogram.
statistic (Callable[[pd.Series, pd.Series], float]):
Function that computes the statistic.
n_samples (int, optional):
Number of bootstrap samples to perform.
Returns:
np.ndarray:
The bootstrap samples.
"""
statistic_samples = np.empty(n_samples)
n_x = h_x.sum()
n_y = h_y.sum()
with np.errstate(divide="ignore", invalid="ignore"):
p_x = np.nan_to_num(h_x / n_x)
p_y = np.nan_to_num(h_y / n_y)
x_samples = np.random.multinomial(n_x, p_x, size=n_samples)
y_samples = np.random.multinomial(n_y, p_y, size=n_samples)
for i in range(n_samples):
statistic_samples[i] = statistic(x_samples[i], y_samples[i])
return statistic_samples
[docs]def resampling_p_value(t_obs: float, t_distribution: pd.Series, alternative: str = "two-sided") -> float:
"""Compute a p-value using a resampled test statistic distribution.
Args:
t_obs (float):
Observed value of the test statistic.
t_distribution (pd.Series):
Samples of test statistic distribution under the null hypothesis.
alternative (str, optional):
Indicates the alternative hypothesis. One of "two-sided", "greater", "less".
Defaults to "two-sided".
Returns:
float:
The p-value under the null hypothesis.
"""
if alternative not in ("two-sided", "greater", "less"):
raise ValueError("'alternative' argument must be one of 'two-sided', 'greater', 'less'")
n_samples = len(t_distribution)
if alternative == "two-sided":
p = np.sum(np.abs(t_distribution) >= np.abs(t_obs)) / n_samples
elif alternative == "greater":
p = np.sum(t_distribution >= t_obs) / n_samples
else:
p = np.sum(t_distribution < t_obs) / n_samples
return p
[docs]def resampling_interval(t_obs: float, t_distribution: pd.Series, cl: float = 0.95):
"""Compute a confidence interval using a distribution of the test statistic
on resampled data.
Args:
t_obs (float):
Observed value of the test statistic.
t_distribution (pd.Series):
Samples of test statistic distribution.
cl (float, optional):
Confidence level of the interval.
Returns:
Tuple[float, float]:
A tuple containing the confidence interval.
"""
percentiles = 100 * (1 - cl) / 2, 100 * (1 - (1 - cl) / 2)
delta = t_distribution - t_obs
d1, d2 = np.percentile(delta, percentiles)
return t_obs + d1, t_obs + d2
[docs]def brunner_munzel_test(
df: pd.DataFrame,
target_attr: str,
group1: Union[Mapping[str, List[Any]], pd.Series],
group2: Union[Mapping[str, List[Any]], pd.Series],
) -> Tuple[float, float]:
"""Compute the non-parametric Brunner-Munzel test of the hypothesis that the probability
of getting large values in the target attribute distributions (determined by the input
groups of interest) is equal, without requiring equivariance.
Args:
df (pd.DataFrame):
The input datafame.
target_attr (str):
The target attribute in the dataframe from which the distributions are formed.
group1 (Union[Mapping[str, List[Any]], pd.Series]):
The first group of interest. Each group can be a mapping / dict from attribute to value or
a predicate itself, i.e. pandas series consisting of bools which can be used as a predicate
to index a subgroup from the dataframe.
Examples: {"Sex": ["Male"]}, df["Sex"] == "Female"
group2 (Union[Mapping[str, List[Any]], pd.Series]):
The second group of interest. Each group can be a mapping / dict from attribute to value or
a predicate itself, i.e. pandas series consisting of bools which can be used as a predicate
to index a subgroup from the dataframe.
Examples: {"Sex": ["Male"]}, df["Sex"] == "Female"
Returns:
Tuple[float, float]:
A tuple consisting of the Brunner-Munzel statistic and p-value associated with the significance
of the observed difference.
"""
pred_a, pred_b = tuple(utils.get_predicates_mult(df, [group1, group2]))
sr_a = df[pred_a][target_attr]
sr_b = df[pred_b][target_attr]
return brunnermunzel(sr_a, sr_b, nan_policy="omit")