Source code for quantificationlib.metrics.binary
"""
Score functions and loss functions for binary quantification problems
"""
# Authors: Alberto Castaño <bertocast@gmail.com>
# Pablo González <gonzalezgpablo@uniovi.es>
# Jaime Alonso <jalonso@uniovi.es>
# Pablo Pérez <pabloperez@uniovi.es>
# Juan José del Coz <juanjo@uniovi.es>
# License: GPLv3 clause, University of Oviedo
import numpy as np
[docs]
def binary_kld(p_true, p_pred, eps=1e-12):
""" A binary version of the Kullback-Leiber divergence (KLD)
:math:`kld = p \cdot \log(p/\hat{p}) + (1-p) \cdot \log((1-p)/(1-\hat{p}))`
Parameters
----------
p_true : array_like, shape = (n_classes)
True prevalences
p_pred : array_like, shape = (n_classes)
Predicted prevalences.
eps : float, (default=1e-12)
To prevent a division by zero exception
Returns
-------
KLD: float
The Kullback-Leiber divergence for binary problems
"""
if p_pred == 0:
kld = p_true * np.log2(p_true / eps)
else:
kld = p_true * np.log2(p_true / p_pred)
if p_pred == 1:
kld = kld + (1 - p_true) * np.log2((1 - p_true) / eps)
else:
kld = kld + (1 - p_true) * np.log2((1 - p_true) / (1 - p_pred))
return kld
[docs]
def bias(p_true, p_pred):
""" Bias of a binary quantifier
It is just the difference between the predicted prevalence (:math:`\hat{p}`) and the true prevalence (:math:`p`)
:math:`bias = \hat{p} - p`
It measures whether the binary quantifier tends to overestimate or underestimate the proportion of positives
Parameters
----------
p_true : float
True prevalence for the positive class
p_pred : float
Predicted prevalence for the positive class
Returns
-------
bias: float
The bias for binary problems
"""
return p_pred - p_true
[docs]
def absolute_error(p_true, p_pred):
""" Binary version of the absolute error
Absolute difference between the predicted prevalence (:math:`\hat{p}`) and the true prevalence (:math:`p`)
:math:`ae = | \hat{p} - p |`
Parameters
----------
p_true : float
True prevalence for the positive class
p_pred : float
Predicted prevalence for the positive class
Returns
-------
absolute error: float
The absolute error for binary problems
"""
return np.abs(p_pred - p_true)
[docs]
def squared_error(p_true, p_pred):
""" Binary version of the squared error. Only the prevalence of the positive class is used
It is the quadratic difference between the predicted prevalence (:math:`\hat{p}`) and
the true prevalence (:math:`p`)
:math:`se = (\hat{p} - p)^2`
It penalizes larger errors
Parameters
----------
p_true : float
True prevalence for the positive class
p_pred : float
Predicted prevalence for the positive class
Returns
-------
squared_error: float
The squared error for binary problems
"""
return (p_pred - p_true) ** 2
[docs]
def relative_absolute_error(p_true, p_pred, eps=1e-12):
""" A binary relative version of the absolute error
It is the relation between the absolute error and the true prevalence.
:math:`rae = | \hat{p} - p | / p`
Parameters
----------
p_true : float
True prevalence for the positive class
p_pred : float
Predicted prevalence for the positive class
eps : float, (default=1e-12)
To prevent a division by zero exception
Returns
-------
RAE: float
The relative absolute error for binary problems
"""
if p_true == 0:
return np.abs(p_pred - p_true) / (p_true + eps)
else:
return np.abs(p_pred - p_true) / p_true
[docs]
def symmetric_absolute_percentage_error(p_true, p_pred):
""" A symmetric binary version of RAE
:math:`sape = | \hat{p} - p | / (\hat{p} + p)`
Parameters
----------
p_true : float
True prevalence for the positive class
p_pred : float
Predicted prevalence for the positive class
Returns
-------
SAPE: float
The symmetric absolute percentage error for binary problems
"""
if p_pred + p_true == 0:
return 0
else:
return np.abs(p_pred - p_true) / (p_pred + p_true)
[docs]
def normalized_absolute_score(p_true, p_pred):
""" A score version of the normalized binary absolute error
:math:`nas = 1 - | \hat{p} - p | / max(p, 1-p)`
Parameters
----------
p_true : float
True prevalence for the positive class
p_pred : float
Predicted prevalence for the positive class
Returns
-------
NAS: float
The normalized absolute score for binary problems
"""
return 1 - np.abs(p_pred - p_true) / np.max([p_true, 1 - p_true])
[docs]
def normalized_squared_score(p_true, p_pred):
""" A score version of the normalized binary squared error
:math:`nss = 1 - ( (\hat{p} - p) / max(p, 1-p) )^2`
Parameters
----------
p_true : float
True prevalence for the positive class
p_pred : float
Predicted prevalence for the positive class
Returns
-------
NSS: float
The normalized squared score for binary problems
"""
return 1 - (np.abs(p_pred - p_true) / np.max([p_true, 1 - p_true])) ** 2