Source code for jina.executors.evaluators.embedding.cosine

import numpy as np

from ..embedding import BaseEmbeddingEvaluator, expand_vector


[docs]class CosineEvaluator(BaseEmbeddingEvaluator): """A :class:`CosineEvaluator` evaluates the distance between actual and desired embeddings computing the cosine distance between them. (The smaller value the closest distance, it is not cosine similarity measure) .. math:: 1 - \\frac{u \\cdot v} {||u||_2 ||v||_2}. """ metric = 'CosineDistance'
[docs] def evaluate( self, actual: 'np.array', desired: 'np.array', *args, **kwargs ) -> float: """ " :param actual: the embedding of the document (resulting from an Encoder) :param desired: the expected embedding of the document :return the evaluation metric value for the request document """ actual = expand_vector(actual) desired = expand_vector(desired) return _cosine(_ext_A(_norm(actual)), _ext_B(_norm(desired)))
# duplicate on purpose, to be migrated to the Hub def _get_ones(x, y): return np.ones((x, y)) def _ext_A(A): nA, dim = A.shape A_ext = _get_ones(nA, dim * 3) A_ext[:, dim : 2 * dim] = A A_ext[:, 2 * dim :] = A ** 2 return A_ext def _ext_B(B): nB, dim = B.shape B_ext = _get_ones(dim * 3, nB) B_ext[:dim] = (B ** 2).T B_ext[dim : 2 * dim] = -2.0 * B.T del B return B_ext def _euclidean(A_ext, B_ext): sqdist = A_ext.dot(B_ext).clip(min=0) return np.sqrt(sqdist) def _norm(A): return A / np.linalg.norm(A, ord=2, axis=1, keepdims=True) def _cosine(A_norm_ext, B_norm_ext): return A_norm_ext.dot(B_norm_ext).clip(min=0) / 2