19-01-2021 Guido Cattani
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import matplotlib.patches as mpatches
from scipy.stats import mannwhitneyu as mannwhitneyu
def read_AC_thresholds():
f_in = '/media/guido/LACIE/Cingle_Guido/Master/AC_thresholds.xlsx'
p_in = Path(f_in)
df = pd.read_excel(p_in, sheet_name=0, header=0, nrows=85)
df = df.drop(['Unnamed: 0'], axis=1)
type_dict = {'Study_ID': 'str', 'Device': 'str',
'AC_125':'float32', 'AC_250':'float32', 'AC_500':'float32',
'AC_1000': 'float32', 'AC_2000': 'float32',
'AC_4000': 'float32', 'AC_8000': 'float32'}
df = df.astype(type_dict)
new_names = {
'AC_125':'AC_125_Hz', 'AC_250':'AC_250_Hz', 'AC_500':'AC_500_Hz',
'AC_1000': 'AC_1000_Hz', 'AC_2000': 'AC_2000_Hz',
'AC_4000': 'AC_4000_Hz', 'AC_8000': 'AC_8000_Hz'}
df.rename(mapper=new_names, axis=1, inplace=True)
df = df.fillna(pd.NA)
return df
def present_AC_thresholds():
ac = read_AC_thresholds()
ac1 = ac[['Device', 'AC_500_Hz', 'AC_1000_Hz', 'AC_2000_Hz', 'AC_4000_Hz']].copy()
ac1 = ac1.dropna()
pta = ac1.iloc[:, 1:].mean(axis='columns')
ac1.insert(5, column= 'PTA (0.5-4 kHz)', value=pta)
pta_hf = ac1.iloc[:, 2:].mean(axis='columns')
ac1.insert(6, column= 'PTA_HF (1-4 kHz)', value=pta_hf)
return ac1
def select_bp110(df):
# select BP110 data
is_bp110 = df['Device']=='BP110'
df_bp110 = df[is_bp110]
df_bp110.pop('Device')
return(df_bp110)
def select_bh5(df):
# select BAHA5P data
is_baha5p = df['Device']=='BAHA5P'
df_baha5p = df[is_baha5p]
df_baha5p.pop('Device')
return(df_baha5p)
act = present_AC_thresholds()
# group data by device type and perform calculation of quantiles 10, 50, 90
dvc = act.groupby('Device')
quantiles = [0.10, 0.50, 0.90]
q = dvc.quantile(q=quantiles)
q = q.reset_index()
q = q.drop(['Device', 'level_1'], axis=1)
diq = {0:'BAHA5P, P10', 1:'BAHA5P, P50', 2:'BAHA5P, P90',
3:'BP110, P10', 4:'BP110, P50', 5:'BP110, P90'}
q = q.rename(index=diq)
q = q.reindex(['BP110, P10', 'BP110, P50', 'BP110, P90',
'BAHA5P, P10', 'BAHA5P, P50', 'BAHA5P, P90'])
# AC thresholds filtered by device
act_bh5 = select_bh5(act)
act_bp110 = select_bp110(act)
len(act_bp110), len(act_bh5)
(49, 36)
# compare distribution thresholds group BAHA5P vs. BP110 with Mann Whitney U test
mwu = dict()
f = ['AC_500_Hz', 'AC_1000_Hz', 'AC_2000_Hz', 'AC_4000_Hz', 'PTA (0.5-4 kHz)', 'PTA_HF (1-4 kHz)']
for i in range(6):
a = act_bh5.iloc[:, i]
b = act_bp110.iloc[:, i]
u_statistic, pVal = mannwhitneyu(a , b, use_continuity=False, alternative='two-sided') # scipy.stats mannwhitneyu test
pVal = round(pVal, 15)
st = {f[i]: [u_statistic, pVal]}
mwu.update(st)
u_test = pd.DataFrame.from_dict(mwu, dtype='float')
diu = {0: 'Mann-Whitney U statistic', 1: 'p-value (two-sided)'}
u_test = u_test.rename(index=diu)
# make new dataFrame with percentiles & test results
analysis_ac = pd.concat([q, u_test])
analysis_ac = analysis_ac.round(decimals=3)
analysis_ac
AC_500_Hz | AC_1000_Hz | AC_2000_Hz | AC_4000_Hz | PTA (0.5-4 kHz) | PTA_HF (1-4 kHz) | |
---|---|---|---|---|---|---|
BP110, P10 | 5.000 | 5.000 | 5.000 | 10.000 | 7.500 | 6.875 |
BP110, P50 | 10.000 | 10.000 | 10.000 | 25.000 | 16.250 | 17.188 |
BP110, P90 | 25.000 | 20.000 | 25.000 | 55.000 | 25.000 | 27.625 |
BAHA5P, P10 | 5.000 | 0.000 | 5.000 | 7.500 | 6.250 | 6.562 |
BAHA5P, P50 | 10.000 | 10.000 | 10.000 | 20.000 | 12.500 | 13.125 |
BAHA5P, P90 | 20.000 | 20.000 | 20.000 | 42.500 | 23.750 | 25.938 |
Mann-Whitney U statistic | 868.000 | 851.500 | 739.000 | 693.000 | 655.000 | 643.500 |
p-value (two-sided) | 0.898 | 0.781 | 0.194 | 0.091 | 0.043 | 0.034 |
# write to xlsx file
analysis_ac.to_excel("/media/guido/LACIE/Cingle_Guido/Analysis_results/analysis_AC_thresholds.xlsx",
sheet_name='AC_thresholds')
bp110_thr = act_bp110.iloc[ : , :4]
bh5_thr = act_bh5.iloc[ : , :4]
# convert Pandas object in numpy array
frqc = np. array([500, 1000, 2000, 4000] , dtype=int)
bp110 = bp110_thr.T.to_numpy()
bh5 = bh5_thr.T.to_numpy()
p10_bp110 = q.iloc[0, :4].T.to_numpy()
median_bp110 = q.iloc[1, :4].T.to_numpy()
p90_bp110 = q.iloc[2, :4].T.to_numpy()
p10_bh5 = q.iloc[3, :4].T.to_numpy()
median_bh5 = q.iloc[4, :4].T.to_numpy()
p90_bh5 = q.iloc[5, :4].T.to_numpy()
# matplotlib line graph of output BAHA5P vs. BP110
plt.style.use('seaborn-paper')
fig, ax = plt.subplots()
ax.set(xlabel='f [Hz]', ylabel='Hearing Loss [dB HL]',
title='Best Ear Hearing Loss of group BAHA5P vs. BP110')
ax.set_xscale('log')
ax.set_xticks([500, 1000, 2000, 4000])
ax.set_xticklabels(['500', '1000', '2000', '4000'])
ax.minorticks_off()
ax.invert_yaxis()
# plot lines based on numpy arrays
ax.plot(frqc, bp110, color="dimgrey", alpha=0.5, label='Output FL BP110')
ax.plot(frqc, bh5, color="silver", alpha=0.5, label='Output FL BAHA5P')
ax.plot(frqc, median_bp110, color='black', marker='o', linestyle='dashed', linewidth=3, markersize=8)
ax.plot(frqc, p10_bp110, color='black', marker='o', linestyle='dotted', linewidth=3, markersize=8)
ax.plot(frqc, p90_bp110, color='black', marker='o', linestyle='dashdot', linewidth=3, markersize=8)
ax.plot(frqc, median_bh5, color='black', marker='X', linestyle='dashed', linewidth=3, markersize=9)
ax.plot(frqc, p10_bh5, color='black', marker='X', linestyle='dotted', linewidth=3, markersize=9)
ax.plot(frqc, p90_bh5, color='black', marker='X', linestyle='dashdot', linewidth=3, markersize=9)
# make a legend
leg_line_BH5 = mlines.Line2D([], [], color="silver", label='Output BAHA5P')
leg_line_BP110 = mlines.Line2D([], [], color="dimgrey", label='Output BP110')
leg_p90_bh5 = mlines.Line2D([], [], color='black', marker='X', linestyle='dashdot',
linewidth=3, markersize=9, label='P90 BAHA5P')
leg_med_bh5 = mlines.Line2D([], [], color='black', marker='X', linestyle='dashed',
linewidth=3, markersize=9, label='Median BAHA5P')
leg_p10_bh5 = mlines.Line2D([], [], color='black', marker='X', linestyle='dotted',
linewidth=3, markersize=9, label='P10 BAHA5P')
leg_p90_bp110 = mlines.Line2D([], [], color='black', marker='o', linestyle='dashdot',
linewidth=3, markersize=8, label='P90 BP110')
leg_med_bp110 = mlines.Line2D([], [], color='black', marker='o', linestyle='dashed',
linewidth=3, markersize=8, label='Median BP110')
leg_p10_bp110 = mlines.Line2D([], [], color='black', marker='o', linestyle='dotted',
linewidth=3, markersize=8, label='P10 BP110')
plt.legend(handles=[leg_line_BH5, leg_line_BP110, leg_p10_bh5, leg_p10_bp110, leg_med_bh5,
leg_med_bp110, leg_p90_bh5, leg_p90_bp110], bbox_to_anchor=(1.03, 1),
loc='upper left', labelspacing=1.,handlelength=7.5)
# save the figure
plt.savefig('/media/guido/LACIE/Cingle_Guido/Analysis_results/AC_thresholds.tiff',
transparent=False, dpi=500, bbox_inches="tight")
# plot the figure
plt.plot()
[]