Source code for evaluation
r"""Module containing utility functions to evaluate recommendation engines.
"""
import numpy as np
from metrics import Metrics
[docs]def evaluate(model, test_loader, metric_list):
r"""Evaluate the given method.
The ``model`` evaluation is performed with all the provided metrics in ``metric_list``.
The test set is loaded through the provided :class:`sampler.Sampler`
(i.e., ``test_loader``).
Parameters
----------
model : :class:`models.RecSysModel`
The model to evaluate.
test_loader : :class:`sampler.Sampler`
The test set loader.
metric_list : :obj:`list` of :obj:`str`
The list of metrics to compute. Metrics are indicated by strings formed in the
following way:
``matric_name`` @ ``k``
where ``matric_name`` must correspond to one of the
method names without the suffix '_at_k', and ``k`` is the corresponding parameter of
the method and it must be an integer value. For example: ``ndcg@10`` is a valid metric
name and it corresponds to the method :py:func:`ndcg_at_k <metrics.Metrics.ndcg_at_k>`
with ``k=10``.
Returns
-------
:obj:`dict` of :obj:`numpy.array`
Dictionary with the results for each metric in ``metric_list``. Keys are string
representing the metric, while the value is an array with the value of the metric
computed on the users.
"""
results = {m:[] for m in metric_list}
for _, (data_tr, heldout) in enumerate(test_loader):
data_tensor = data_tr.view(data_tr.shape[0], -1)
recon_batch = model.predict(data_tensor)[0].cpu().numpy()
heldout = heldout.view(heldout.shape[0], -1).cpu().numpy()
res = Metrics.compute(recon_batch, heldout, metric_list)
for m in res:
results[m].append(res[m])
for m in results:
results[m] = np.concatenate(results[m])
return results