Source code for infovar.stats.ranking

from typing import Optional, Union

import numpy as np
from tqdm import tqdm

from scipy import stats, integrate

__all__ = [
    "prob_higher"
]

[docs] def prob_higher( mus: np.ndarray, sigmas: np.ndarray, idx: Optional[int]=None, approx: bool=True, pbar: bool=False, ) -> Union[np.ndarray, float]: """ Returns the probability of a given estimation (described by an estimated value and a standard deviation) to be the highest among all provided estimations. The argument `idx` specifies the index of the estimation whose probability to be the highest has to be computed. If None, returns the probability for every provided estimation. Source: https://stats.stackexchange.com/questions/44139/what-is-px-1x-2-x-1x-3-x-1x-n Parameters ---------- mus : np.ndarray Estimates. sigmas : np.ndarray Uncertainty of estimates (1 sigma). idx : Optional[int], optional _description_, by default None approx : bool, optional If True, neglects estimates above three sigma. Default: True. pbar : bool, optional If True, displays a progress bar. Default: False Returns ------- Union[np.ndarray, float] If `idx` is an integer, probability of the i-th estimate to be the highest. If `idx` is None, array of probability for each estimate. """ if not isinstance(mus, np.ndarray): mus = np.array(mus) if not isinstance(sigmas, np.ndarray): sigmas = np.array(sigmas) assert mus.shape == sigmas.shape n_vars = mus.size def integrand(t: np.ndarray, i: int) -> np.ndarray: res = stats.norm.logpdf(t, loc=mus[i], scale=sigmas[i]) for j in range(n_vars): if j != i: res += stats.norm.logcdf(t, loc=mus[j], scale=sigmas[j]) return np.exp(res) # We restrict to a bounded interval to prevent large integration errors due to the small domain where the integrand is not zero n_sigma = 5 # In approx mode, only the probabilities of line combinations whose +/- n_sigma_approx intervals overlap are calculated if approx: i_higher = np.argmax(mus) n_sigma_approx = 1.5 significant = (mus + n_sigma_approx*sigmas) > (mus[i_higher] - n_sigma_approx*sigmas[i_higher]) else: significant = np.ones_like(mus, dtype=bool) # If the user wants to compute all probabilities if idx is None: probs = np.zeros(n_vars)# * np.nan _iterable = tqdm(range(n_vars)) if pbar else range(n_vars) for i in _iterable: if significant[i]: bounds = (mus[i]-n_sigma*sigmas[i], mus[i]+n_sigma*sigmas[i]) fun = lambda t: integrand(t, i) p = integrate.quad(fun, *bounds)[0] probs[i] = p return probs / np.sum(probs) # If the user has asked for a particular probability bounds = (mus[idx]-n_sigma*sigmas[idx], mus[idx]+n_sigma*sigmas[idx]) fun = lambda t: integrand(t, idx) return integrate.quad(fun, *bounds)[0]