import numpy as np
from ..embedding import BaseEmbeddingEvaluator, expand_vector
[docs]class CosineEvaluator(BaseEmbeddingEvaluator):
"""A :class:`CosineEvaluator` evaluates the distance between actual and desired embeddings computing
the cosine distance between them. (The smaller value the closest distance, it is not cosine similarity measure)
.. math::
1 - \\frac{u \\cdot v}
{||u||_2 ||v||_2}.
"""
metric = 'CosineDistance'
[docs] def evaluate(
self, actual: 'np.array', desired: 'np.array', *args, **kwargs
) -> float:
""" "
:param actual: the embedding of the document (resulting from an Encoder)
:param desired: the expected embedding of the document
:return the evaluation metric value for the request document
"""
actual = expand_vector(actual)
desired = expand_vector(desired)
return _cosine(_ext_A(_norm(actual)), _ext_B(_norm(desired)))
# duplicate on purpose, to be migrated to the Hub
def _get_ones(x, y):
return np.ones((x, y))
def _ext_A(A):
nA, dim = A.shape
A_ext = _get_ones(nA, dim * 3)
A_ext[:, dim : 2 * dim] = A
A_ext[:, 2 * dim :] = A ** 2
return A_ext
def _ext_B(B):
nB, dim = B.shape
B_ext = _get_ones(dim * 3, nB)
B_ext[:dim] = (B ** 2).T
B_ext[dim : 2 * dim] = -2.0 * B.T
del B
return B_ext
def _euclidean(A_ext, B_ext):
sqdist = A_ext.dot(B_ext).clip(min=0)
return np.sqrt(sqdist)
def _norm(A):
return A / np.linalg.norm(A, ord=2, axis=1, keepdims=True)
def _cosine(A_norm_ext, B_norm_ext):
return A_norm_ext.dot(B_norm_ext).clip(min=0) / 2