Source code for hettmansperger
"""
.. py:currentmodule:: hettmansperger
.. module:: hettmansperger
:platform: Unix, Windows
:synopsis: Module to calculate the Hettmansperger-Norton test for patterned alternatives in k-sample problems using pseudo-ranks.
.. moduleauthor:: Martin Happ <martin.happ@aon.at>
"""
import pandas as pd
import numpy as np
import PyNonpar.pseudorank as ps
import math
import scipy
from scipy.stats import norm
import collections
from collections import namedtuple
[docs]def hettmansperger_norton_test(data, group, alternative = "increasing", trend = None):
"""
Function to calculate the Hettmansperger-Norton test.
Args:
data (list(float)): data from all groups \n
group (list(int)): group factor \n
alternative (str): either 'increasing', 'decreasing' or 'custom' \n
trend (list(float)): a vector specifying the alternative; only used, if alternative = 'custom' \n
Returns:
str: chosen alternative \n
list(int): trend \n
float: test statistic \n
float: one sided p-value
"""
d = {'data': ps.psrank(data, group, ties_method = "average"), 'grp': group}
df = pd.DataFrame(data=d)
df['grp'] = df['grp'].astype(pd.api.types.CategoricalDtype(categories=np.unique(df['grp'].tolist()), ordered=True))
df = df.sort_values(['grp'])
# dff.assign(grp = dff['grp'][dff.index].values)
# orig_sort = df.index
df = df.reset_index(drop=True)
df['codes'] = df['grp'].cat.codes
# calculate group sizes, number of groups and unweighted relative effects
N = len(df['data'])
a = len(np.unique(df['codes']))
n = [0 for x in range(a)]
p_hat = [0 for x in range(a)]
for i in range(0, a):
tmp = df[df['codes'] == i]
n[i] = len(tmp)
p_hat[i] = 1/N*(np.mean(tmp['data']) - 0.5)
# define weight function for hypothesis testing
w = np.arange(1, (a + 1))
if alternative == "decreasing":
w = w[::-1]
elif alternative == "custom":
w = trend
# defining matrices and vectors for the computing test statistic
v1 = [1 for x in range(a)]
mS = np.diag(n)
mI = np.diag(v1)
mJ = np.outer(v1, v1)
W = np.dot( mS, ( mI - np.dot( 1/N*mJ, mS ) ) )
# variance estimator
sum_psranks_squared = 0
for i in range(N):
sum_psranks_squared += (df['data'].tolist()[i] - (N + 1)/2) ** 2
sum_psranks_squared = (1/N **2)*1/(N-1)*sum_psranks_squared
sigma_part = np.dot(W, w)
sigma_hat_squared = N*sum_psranks_squared*np.dot(np.dot( sigma_part.transpose(), np.linalg.inv(mS) ), sigma_part )
# test statistic
test_hettmansperger = math.sqrt(N)*np.dot(sigma_part.transpose(), p_hat)*1/math.sqrt(sigma_hat_squared)
p_value = 1 - scipy.stats.norm.cdf(test_hettmansperger)
result = namedtuple('HettmanspergerNortonResult', ('alternative', 'weight', 'statistic', 'pvalue'))
output = result(alternative, w, test_hettmansperger, p_value)
return output