Source code for measurenary.distance

"""
Distances
---------

All the functions here are functions to find the distance between two pieces of data
based on contingency table.

+--------+-----------------+
|        | Row 2           |
+--------+-----+-----+-----+
| Row 1  |     | 1   | 0   |
|        +-----+-----+-----+
|        | 1   | a   | b   |
|        +-----+-----+-----+
|        | 0   | c   | d   |
+--------+-----+-----+-----+

Each function has the same parameters and return values:

Parameters
    a : int
        Count of same 1 numbers.
    b : int
        The sum of 1 in row 1 and 0 in row 2.
    c : int
        Sum 1 in row 2 and 0 in row 1.
    d : int
        Count of same 0 numbers.
    n : int
        Sum of all the numbers.

Returns
    float
        The similarity value between two pieces of data.
"""

from math import sqrt
import warnings

# function to get all the name of the functions
[docs]def get_all_functions_name(): """ Get all the functions in the module. Returns ------- list List of functions object. """ funcs = [func for func in globals() if callable(globals()[func])] # return excluded list from math and this function return funcs[2:]
# get specific function by name
[docs]def get_function(name): """ Get function by name. Parameters ---------- name : str Name of the function. Returns ------- function Function object. """ return globals()[name]
[docs]def hamming(a, b, c, d, n): """ Hamming coefficient from contingency table. """ if a < 0 or b < 0 or c < 0 or d < 0 or n < 0: raise ValueError('value in confusion matrix cannot less than zero') # raise warning if a, b, c, d or n is not integer if not isinstance(a, int) or not isinstance(b, int) or not isinstance(c, int) or not isinstance(d, int) or not isinstance(n, int): warnings.warn(RuntimeWarning('some value in confusion matrix is not integer')) try: result = b+c return result except: return None
[docs]def euclidean(a, b, c, d, n): """ Euclidean distance from contingency table. """ if a < 0 or b < 0 or c < 0 or d < 0 or n < 0: raise ValueError('value in confusion matrix cannot less than zero') # raise warning if a, b, c, d or n is not integer if not isinstance(a, int) or not isinstance(b, int) or not isinstance(c, int) or not isinstance(d, int) or not isinstance(n, int): warnings.warn(RuntimeWarning('some value in confusion matrix is not integer')) try: result = sqrt(b+c) return result except: return None
[docs]def squared_euclidean(a, b, c, d, n): """ Squared Euclidean distance from contingency table. """ if a < 0 or b < 0 or c < 0 or d < 0 or n < 0: raise ValueError('value in confusion matrix cannot less than zero') # raise warning if a, b, c, d or n is not integer if not isinstance(a, int) or not isinstance(b, int) or not isinstance(c, int) or not isinstance(d, int) or not isinstance(n, int): warnings.warn(RuntimeWarning('some value in confusion matrix is not integer')) try: result = sqrt((b+c)**2) return result except: return None
[docs]def canberra(a, b, c, d, n): """ Canberra distance from contingency table. """ if a < 0 or b < 0 or c < 0 or d < 0 or n < 0: raise ValueError('value in confusion matrix cannot less than zero') # raise warning if a, b, c, d or n is not integer if not isinstance(a, int) or not isinstance(b, int) or not isinstance(c, int) or not isinstance(d, int) or not isinstance(n, int): warnings.warn(RuntimeWarning('some value in confusion matrix is not integer')) try: result = (b+c)**(2/3) return result except: return None
[docs]def manhattan(a, b, c, d, n): """ Manhattan distance from contingency table. """ if a < 0 or b < 0 or c < 0 or d < 0 or n < 0: raise ValueError('value in confusion matrix cannot less than zero') # raise warning if a, b, c, d or n is not integer if not isinstance(a, int) or not isinstance(b, int) or not isinstance(c, int) or not isinstance(d, int) or not isinstance(n, int): warnings.warn(RuntimeWarning('some value in confusion matrix is not integer')) try: result = (b+c) return result except: return None
[docs]def mean_manhattan(a, b, c, d, n): """ Mean Manhattan distance from contingency table. """ if a < 0 or b < 0 or c < 0 or d < 0 or n < 0: raise ValueError('value in confusion matrix cannot less than zero') # raise warning if a, b, c, d or n is not integer if not isinstance(a, int) or not isinstance(b, int) or not isinstance(c, int) or not isinstance(d, int) or not isinstance(n, int): warnings.warn(RuntimeWarning('some value in confusion matrix is not integer')) try: result = (b+c)/(a+b+c+d) return result except: return None
[docs]def cityblock(a, b, c, d, n): """ Cityblock distance from contingency table. """ if a < 0 or b < 0 or c < 0 or d < 0 or n < 0: raise ValueError('value in confusion matrix cannot less than zero') # raise warning if a, b, c, d or n is not integer if not isinstance(a, int) or not isinstance(b, int) or not isinstance(c, int) or not isinstance(d, int) or not isinstance(n, int): warnings.warn(RuntimeWarning('some value in confusion matrix is not integer')) try: result = (b+c) return result except: return None
[docs]def minkowski(a, b, c, d, n): """ Minkowski distance from contingency table """ if a < 0 or b < 0 or c < 0 or d < 0 or n < 0: raise ValueError('value in confusion matrix cannot less than zero') # raise warning if a, b, c, d or n is not integer if not isinstance(a, int) or not isinstance(b, int) or not isinstance(c, int) or not isinstance(d, int) or not isinstance(n, int): warnings.warn(RuntimeWarning('some value in confusion matrix is not integer')) try: result = (b+c)**1 return result except: return None
[docs]def vari(a, b, c, d, n): """ Vari distance from contingency table. """ if a < 0 or b < 0 or c < 0 or d < 0 or n < 0: raise ValueError('value in confusion matrix cannot less than zero') # raise warning if a, b, c, d or n is not integer if not isinstance(a, int) or not isinstance(b, int) or not isinstance(c, int) or not isinstance(d, int) or not isinstance(n, int): warnings.warn(RuntimeWarning('some value in confusion matrix is not integer')) try: result = (b+c)/(4*(a+b+c+d)) return result except: return None
[docs]def size_difference(a, b, c, d, n): """ Size difference from contingency table. """ if a < 0 or b < 0 or c < 0 or d < 0 or n < 0: raise ValueError('value in confusion matrix cannot less than zero') # raise warning if a, b, c, d or n is not integer if not isinstance(a, int) or not isinstance(b, int) or not isinstance(c, int) or not isinstance(d, int) or not isinstance(n, int): warnings.warn(RuntimeWarning('some value in confusion matrix is not integer')) try: result = (b+c)**2/((a+b+c+d)**2) return result except: return None
[docs]def shape_difference(a, b, c, d, n): """ Shape difference from contingency table. """ if a < 0 or b < 0 or c < 0 or d < 0 or n < 0: raise ValueError('value in confusion matrix cannot less than zero') # raise warning if a, b, c, d or n is not integer if not isinstance(a, int) or not isinstance(b, int) or not isinstance(c, int) or not isinstance(d, int) or not isinstance(n, int): warnings.warn(RuntimeWarning('some value in confusion matrix is not integer')) try: result = (n*(b+c)-((b+c)**2))/((a+b+c+d)**2) return result except: return None
[docs]def pattern_difference(a, b, c, d, n): """ Pattern difference from contingency table. """ if a < 0 or b < 0 or c < 0 or d < 0 or n < 0: raise ValueError('value in confusion matrix cannot less than zero') # raise warning if a, b, c, d or n is not integer if not isinstance(a, int) or not isinstance(b, int) or not isinstance(c, int) or not isinstance(d, int) or not isinstance(n, int): warnings.warn(RuntimeWarning('some value in confusion matrix is not integer')) try: result = 4*b*c/((a+b+c+d)**2) return result except: return None
[docs]def lance_williams(a, b, c, d, n): """ Lance Williams distance from contingency table. """ if a < 0 or b < 0 or c < 0 or d < 0 or n < 0: raise ValueError('value in confusion matrix cannot less than zero') # raise warning if a, b, c, d or n is not integer if not isinstance(a, int) or not isinstance(b, int) or not isinstance(c, int) or not isinstance(d, int) or not isinstance(n, int): warnings.warn(RuntimeWarning('some value in confusion matrix is not integer')) try: result = (b+c)/(2*a+b+c) return result except: return None
[docs]def bray_curtis(a, b, c, d, n): """ Bray-Curtis distance from contingency table. """ if a < 0 or b < 0 or c < 0 or d < 0 or n < 0: raise ValueError('value in confusion matrix cannot less than zero') # raise warning if a, b, c, d or n is not integer if not isinstance(a, int) or not isinstance(b, int) or not isinstance(c, int) or not isinstance(d, int) or not isinstance(n, int): warnings.warn(RuntimeWarning('some value in confusion matrix is not integer')) try: result = (b+c)/(2*a+b+c) return result except: return None
[docs]def hellinger(a, b, c, d, n): """ Hellinger distance from contingency table. """ if a < 0 or b < 0 or c < 0 or d < 0 or n < 0: raise ValueError('value in confusion matrix cannot less than zero') # raise warning if a, b, c, d or n is not integer if not isinstance(a, int) or not isinstance(b, int) or not isinstance(c, int) or not isinstance(d, int) or not isinstance(n, int): warnings.warn(RuntimeWarning('some value in confusion matrix is not integer')) try: result = 2*sqrt(1-(a/sqrt((a+b)*(a+c)))) return result except: return None
[docs]def chord(a, b, c, d, n): """ Chord distance from contingency table. """ if a < 0 or b < 0 or c < 0 or d < 0 or n < 0: raise ValueError('value in confusion matrix cannot less than zero') # raise warning if a, b, c, d or n is not integer if not isinstance(a, int) or not isinstance(b, int) or not isinstance(c, int) or not isinstance(d, int) or not isinstance(n, int): warnings.warn(RuntimeWarning('some value in confusion matrix is not integer')) try: result = sqrt(2*(1-(a/sqrt((a+b)*(a+c))))) return result except: return None
[docs]def yuleq(a, b, c, d, n): """ Yule-Q distance from contingency table. """ if a < 0 or b < 0 or c < 0 or d < 0 or n < 0: raise ValueError('value in confusion matrix cannot less than zero') # raise warning if a, b, c, d or n is not integer if not isinstance(a, int) or not isinstance(b, int) or not isinstance(c, int) or not isinstance(d, int) or not isinstance(n, int): warnings.warn(RuntimeWarning('some value in confusion matrix is not integer')) try: result = (2*b*c)/(a*d + b*c) return result except: return None