Module sortedness.trustworthiness
Expand source code
# Copyright (c) 2023. Davi Pereira dos Santos
# This file is part of the sortedness project.
# Please respect the license - more about this in the section (*) below.
#
# sortedness is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# sortedness is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with sortedness. If not, see <http://www.gnu.org/licenses/>.
#
# (*) Removing authorship by any means, e.g. by distribution of derived
# works or verbatim, obfuscated, compiled or rewritten versions of any
# part of this work is illegal and it is unethical regarding the effort and
# time spent here.
#
from math import nan
import numpy as np
from numpy import eye, where, setdiff1d
from numpy.random import shuffle
from sklearn.decomposition import PCA
from sortedness.rank import rank_by_distances
def continuity(X, X_, k=5, return_pvalues=False):
"""
'continuity' of each point separately.
>>> import numpy as np
>>> from functools import partial
>>> from scipy.stats import spearmanr, weightedtau
>>> mean = (1, 2)
>>> cov = eye(2)
>>> rng = np.random.default_rng(seed=0)
>>> original = rng.multivariate_normal(mean, cov, size=12)
>>> s = continuity(original, original)
>>> min(s), max(s), s
(1.0, 1.0, array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]))
>>> projected = PCA(n_components=2).fit_transform(original)
>>> s = continuity(original, projected)
>>> min(s), max(s), s
(1.0, 1.0, array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]))
>>> projected = PCA(n_components=1).fit_transform(original)
>>> s, pvalues = continuity(original, projected, return_pvalues=True)
>>> min(s), max(s), s
(0.8, 1.0, array([0.95, 0.8 , 0.95, 1. , 0.9 , 0.95, 0.95, 1. , 0.95, 1. , 0.85,
0.9 ]))
>>> pvalues
array([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan])
Parameters
----------
k
X
matrix with an instance by row in a given space (often the original one)
X_
matrix with an instance by row in another given space (often the projected one)
return_pvalues
Add dummy p-values to result (NaNs)
Returns
-------
List of values, one for each instance
"""
return trustworthiness(X_, X, k, return_pvalues)
def trustworthiness(X, X_, k=5, return_pvalues=False):
"""
'trustworthiness' of each point separately.
>>> import numpy as np
>>> from functools import partial
>>> from scipy.stats import spearmanr, weightedtau
>>> mean = (1, 2)
>>> cov = eye(2)
>>> rng = np.random.default_rng(seed=0)
>>> original = rng.multivariate_normal(mean, cov, size=12)
>>> s = trustworthiness(original, original)
>>> min(s), max(s), s
(1.0, 1.0, array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]))
>>> projected = PCA(n_components=2).fit_transform(original)
>>> s = trustworthiness(original, projected)
>>> min(s), max(s), s
(1.0, 1.0, array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]))
>>> projected = PCA(n_components=1).fit_transform(original)
>>> s, pvalues = trustworthiness(original, projected, return_pvalues=True)
>>> min(s), max(s), s
(0.75, 1.0, array([0.8 , 0.75, 0.9 , 1. , 0.85, 0.9 , 0.95, 1. , 0.95, 1. , 0.85,
0.8 ]))
>>> pvalues
array([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan])
Parameters
----------
k
X
matrix with an instance by row in a given space (often the original one)
X_
matrix with an instance by row in another given space (often the projected one)
return_pvalues
Add dummy p-values to result (NaNs)
Returns
-------
List of values, one for each instance
"""
result, pvalues = [], []
n = len(X)
for a, b in zip(X, X_):
ra = rank_by_distances(X, a, "min")
rb = rank_by_distances(X_, b, "min")
a_neighbors = where(ra <= k)
b_neighbors = where(rb <= k)
U = setdiff1d(b_neighbors, a_neighbors)
r = 1 - 2 * sum(ra[U] - k) / k / (2 * n - 3 * k - 1)
result.append(r)
result = np.array(result)
if return_pvalues:
return result, np.array([nan for _ in result])
return result
Functions
def continuity(X, X_, k=5, return_pvalues=False)-
'continuity' of each point separately.
>>> import numpy as np >>> from functools import partial >>> from scipy.stats import spearmanr, weightedtau >>> mean = (1, 2) >>> cov = eye(2) >>> rng = np.random.default_rng(seed=0) >>> original = rng.multivariate_normal(mean, cov, size=12) >>> s = continuity(original, original) >>> min(s), max(s), s (1.0, 1.0, array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])) >>> projected = PCA(n_components=2).fit_transform(original) >>> s = continuity(original, projected) >>> min(s), max(s), s (1.0, 1.0, array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])) >>> projected = PCA(n_components=1).fit_transform(original) >>> s, pvalues = continuity(original, projected, return_pvalues=True) >>> min(s), max(s), s (0.8, 1.0, array([0.95, 0.8 , 0.95, 1. , 0.9 , 0.95, 0.95, 1. , 0.95, 1. , 0.85, 0.9 ])) >>> pvalues array([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan])Parameters
kX- matrix with an instance by row in a given space (often the original one)
X_- matrix with an instance by row in another given space (often the projected one)
return_pvalues- Add dummy p-values to result (NaNs)
Returns
Listofvalues, one for each instance
Expand source code
def continuity(X, X_, k=5, return_pvalues=False): """ 'continuity' of each point separately. >>> import numpy as np >>> from functools import partial >>> from scipy.stats import spearmanr, weightedtau >>> mean = (1, 2) >>> cov = eye(2) >>> rng = np.random.default_rng(seed=0) >>> original = rng.multivariate_normal(mean, cov, size=12) >>> s = continuity(original, original) >>> min(s), max(s), s (1.0, 1.0, array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])) >>> projected = PCA(n_components=2).fit_transform(original) >>> s = continuity(original, projected) >>> min(s), max(s), s (1.0, 1.0, array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])) >>> projected = PCA(n_components=1).fit_transform(original) >>> s, pvalues = continuity(original, projected, return_pvalues=True) >>> min(s), max(s), s (0.8, 1.0, array([0.95, 0.8 , 0.95, 1. , 0.9 , 0.95, 0.95, 1. , 0.95, 1. , 0.85, 0.9 ])) >>> pvalues array([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]) Parameters ---------- k X matrix with an instance by row in a given space (often the original one) X_ matrix with an instance by row in another given space (often the projected one) return_pvalues Add dummy p-values to result (NaNs) Returns ------- List of values, one for each instance """ return trustworthiness(X_, X, k, return_pvalues) def shuffle(x)-
Modify a sequence in-place by shuffling its contents.
This function only shuffles the array along the first axis of a multi-dimensional array. The order of sub-arrays is changed but their contents remains the same.
Note
New code should use the
~numpy.random.Generator.shufflemethod of a~numpy.random.Generatorinstance instead; please see the :ref:random-quick-start.Parameters
x:ndarrayorMutableSequence- The array, list or mutable sequence to be shuffled.
Returns
None
See Also
random.Generator.shuffle- which should be used for new code.
Examples
>>> arr = np.arange(10) >>> np.random.shuffle(arr) >>> arr [1 7 5 2 9 4 3 6 0 8] # randomMulti-dimensional arrays are only shuffled along the first axis:
>>> arr = np.arange(9).reshape((3, 3)) >>> np.random.shuffle(arr) >>> arr array([[3, 4, 5], # random [6, 7, 8], [0, 1, 2]]) def trustworthiness(X, X_, k=5, return_pvalues=False)-
'trustworthiness' of each point separately.
>>> import numpy as np >>> from functools import partial >>> from scipy.stats import spearmanr, weightedtau >>> mean = (1, 2) >>> cov = eye(2) >>> rng = np.random.default_rng(seed=0) >>> original = rng.multivariate_normal(mean, cov, size=12) >>> s = trustworthiness(original, original) >>> min(s), max(s), s (1.0, 1.0, array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])) >>> projected = PCA(n_components=2).fit_transform(original) >>> s = trustworthiness(original, projected) >>> min(s), max(s), s (1.0, 1.0, array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])) >>> projected = PCA(n_components=1).fit_transform(original) >>> s, pvalues = trustworthiness(original, projected, return_pvalues=True) >>> min(s), max(s), s (0.75, 1.0, array([0.8 , 0.75, 0.9 , 1. , 0.85, 0.9 , 0.95, 1. , 0.95, 1. , 0.85, 0.8 ])) >>> pvalues array([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan])Parameters
kX- matrix with an instance by row in a given space (often the original one)
X_- matrix with an instance by row in another given space (often the projected one)
return_pvalues- Add dummy p-values to result (NaNs)
Returns
Listofvalues, one for each instance
Expand source code
def trustworthiness(X, X_, k=5, return_pvalues=False): """ 'trustworthiness' of each point separately. >>> import numpy as np >>> from functools import partial >>> from scipy.stats import spearmanr, weightedtau >>> mean = (1, 2) >>> cov = eye(2) >>> rng = np.random.default_rng(seed=0) >>> original = rng.multivariate_normal(mean, cov, size=12) >>> s = trustworthiness(original, original) >>> min(s), max(s), s (1.0, 1.0, array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])) >>> projected = PCA(n_components=2).fit_transform(original) >>> s = trustworthiness(original, projected) >>> min(s), max(s), s (1.0, 1.0, array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])) >>> projected = PCA(n_components=1).fit_transform(original) >>> s, pvalues = trustworthiness(original, projected, return_pvalues=True) >>> min(s), max(s), s (0.75, 1.0, array([0.8 , 0.75, 0.9 , 1. , 0.85, 0.9 , 0.95, 1. , 0.95, 1. , 0.85, 0.8 ])) >>> pvalues array([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]) Parameters ---------- k X matrix with an instance by row in a given space (often the original one) X_ matrix with an instance by row in another given space (often the projected one) return_pvalues Add dummy p-values to result (NaNs) Returns ------- List of values, one for each instance """ result, pvalues = [], [] n = len(X) for a, b in zip(X, X_): ra = rank_by_distances(X, a, "min") rb = rank_by_distances(X_, b, "min") a_neighbors = where(ra <= k) b_neighbors = where(rb <= k) U = setdiff1d(b_neighbors, a_neighbors) r = 1 - 2 * sum(ra[U] - k) / k / (2 * n - 3 * k - 1) result.append(r) result = np.array(result) if return_pvalues: return result, np.array([nan for _ in result]) return result