Source code for quantificationlib.metrics.binary

"""
Score functions and loss functions for binary quantification problems
"""

# Authors: Alberto Castaño <bertocast@gmail.com>
#          Pablo González <gonzalezgpablo@uniovi.es>
#          Jaime Alonso <jalonso@uniovi.es>
#          Pablo Pérez <pabloperez@uniovi.es>
#          Juan José del Coz <juanjo@uniovi.es>
# License: GPLv3 clause, University of Oviedo

import numpy as np


[docs] def binary_kld(p_true, p_pred, eps=1e-12): """ A binary version of the Kullback-Leiber divergence (KLD) :math:`kld = p \cdot \log(p/\hat{p}) + (1-p) \cdot \log((1-p)/(1-\hat{p}))` Parameters ---------- p_true : array_like, shape = (n_classes) True prevalences p_pred : array_like, shape = (n_classes) Predicted prevalences. eps : float, (default=1e-12) To prevent a division by zero exception Returns ------- KLD: float The Kullback-Leiber divergence for binary problems """ if p_pred == 0: kld = p_true * np.log2(p_true / eps) else: kld = p_true * np.log2(p_true / p_pred) if p_pred == 1: kld = kld + (1 - p_true) * np.log2((1 - p_true) / eps) else: kld = kld + (1 - p_true) * np.log2((1 - p_true) / (1 - p_pred)) return kld
[docs] def bias(p_true, p_pred): """ Bias of a binary quantifier It is just the difference between the predicted prevalence (:math:`\hat{p}`) and the true prevalence (:math:`p`) :math:`bias = \hat{p} - p` It measures whether the binary quantifier tends to overestimate or underestimate the proportion of positives Parameters ---------- p_true : float True prevalence for the positive class p_pred : float Predicted prevalence for the positive class Returns ------- bias: float The bias for binary problems """ return p_pred - p_true
[docs] def absolute_error(p_true, p_pred): """ Binary version of the absolute error Absolute difference between the predicted prevalence (:math:`\hat{p}`) and the true prevalence (:math:`p`) :math:`ae = | \hat{p} - p |` Parameters ---------- p_true : float True prevalence for the positive class p_pred : float Predicted prevalence for the positive class Returns ------- absolute error: float The absolute error for binary problems """ return np.abs(p_pred - p_true)
[docs] def squared_error(p_true, p_pred): """ Binary version of the squared error. Only the prevalence of the positive class is used It is the quadratic difference between the predicted prevalence (:math:`\hat{p}`) and the true prevalence (:math:`p`) :math:`se = (\hat{p} - p)^2` It penalizes larger errors Parameters ---------- p_true : float True prevalence for the positive class p_pred : float Predicted prevalence for the positive class Returns ------- squared_error: float The squared error for binary problems """ return (p_pred - p_true) ** 2
[docs] def relative_absolute_error(p_true, p_pred, eps=1e-12): """ A binary relative version of the absolute error It is the relation between the absolute error and the true prevalence. :math:`rae = | \hat{p} - p | / p` Parameters ---------- p_true : float True prevalence for the positive class p_pred : float Predicted prevalence for the positive class eps : float, (default=1e-12) To prevent a division by zero exception Returns ------- RAE: float The relative absolute error for binary problems """ if p_true == 0: return np.abs(p_pred - p_true) / (p_true + eps) else: return np.abs(p_pred - p_true) / p_true
[docs] def symmetric_absolute_percentage_error(p_true, p_pred): """ A symmetric binary version of RAE :math:`sape = | \hat{p} - p | / (\hat{p} + p)` Parameters ---------- p_true : float True prevalence for the positive class p_pred : float Predicted prevalence for the positive class Returns ------- SAPE: float The symmetric absolute percentage error for binary problems """ if p_pred + p_true == 0: return 0 else: return np.abs(p_pred - p_true) / (p_pred + p_true)
[docs] def normalized_absolute_score(p_true, p_pred): """ A score version of the normalized binary absolute error :math:`nas = 1 - | \hat{p} - p | / max(p, 1-p)` Parameters ---------- p_true : float True prevalence for the positive class p_pred : float Predicted prevalence for the positive class Returns ------- NAS: float The normalized absolute score for binary problems """ return 1 - np.abs(p_pred - p_true) / np.max([p_true, 1 - p_true])
[docs] def normalized_squared_score(p_true, p_pred): """ A score version of the normalized binary squared error :math:`nss = 1 - ( (\hat{p} - p) / max(p, 1-p) )^2` Parameters ---------- p_true : float True prevalence for the positive class p_pred : float Predicted prevalence for the positive class Returns ------- NSS: float The normalized squared score for binary problems """ return 1 - (np.abs(p_pred - p_true) / np.max([p_true, 1 - p_true])) ** 2