15 November 2020 Guido Cattani
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import matplotlib.patches as mpatches
from scipy.stats import mannwhitneyu as mannwhitneyu
def read_BC_direct():
f_in = '/media/guido/LACIE/Cingle_Guido/Master/BC_direct_thresholds.xlsx'
p_in = Path(f_in)
df = pd.read_excel(p_in, sheet_name=0, header=0, index_col= 1, nrows=85)
df = df.drop(['Unnamed: 0'], axis=1)
type_dict = {'Device': 'str',
'BCd_250':'float32', 'BCd_250':'float32', 'BCd_500':'float32',
'BCd_1000': 'float32', 'BCd_1500': 'float32', 'BCd_2000': 'float32', 'BCd_3000': 'float32',
'BCd_4000': 'float32'}
df = df.astype(type_dict)
df = df.fillna(pd.NA)
df = df.rename(columns={'Device': 'BCD_type'})
return df
def present_BCdirect_thresholds():
bc = read_BC_direct()
bc1 = bc[['BCD_type', 'BCd_500', 'BCd_1000', 'BCd_2000', 'BCd_4000']].copy()
pta = bc1.iloc[:, 1:].mean(axis='columns')
bc1.dropna(inplace=True)
bc1.insert(5, column= 'PTA (0.5-4 kHz)', value=pta)
pta_hf = bc1.iloc[:, 2:].mean(axis='columns')
bc1.insert(6, column= 'PTA_HF (1-4 kHz)', value=pta_hf)
bc1.dropna(inplace=True)
type_dict = {'BCD_type': 'str', 'BCd_500':'float32', 'BCd_1000': 'float32',
'BCd_2000': 'float32', 'BCd_4000': 'float32'}
bc1 = bc1.astype(type_dict)
return bc1
def select_bp110(df):
# select BP110 data
is_bp110 = df['BCD_type']=='BP110'
df_bp110 = df[is_bp110]
df_bp110.pop('BCD_type')
return(df_bp110)
def select_bh5(df):
# select BAHA5P data
is_baha5p = df['BCD_type']=='BAHA5P'
df_baha5p = df[is_baha5p]
df_baha5p.pop('BCD_type')
return(df_baha5p)
bct = present_BCdirect_thresholds()
len(bct)
83
# select BC direct threshold by device type
bct_bh5 = select_bh5(bct)
bct_bp110 = select_bp110(bct)
len(bct_bh5)
36
len(bct_bp110)
47
dvc = bct.groupby('BCD_type')
quantiles = [0.10, 0.50, 0.90]
q = dvc.quantile(q=quantiles)
# group data by device type and perform calculation of quantiles 10, 50, 90
dvc = bct.groupby('BCD_type')
quantiles = [0.10, 0.50, 0.90]
q = dvc.quantile(q=quantiles)
q = q.reset_index()
q = q.drop(['BCD_type', 'level_1'], axis=1)
diq = {0:'BAHA5P, P10', 1:'BAHA5P, P50', 2:'BAHA5P, P90',
3:'BP110, P10', 4:'BP110, P50', 5:'BP110, P90'}
q = q.rename(index=diq)
q = q.reindex(['BP110, P10', 'BP110, P50', 'BP110, P90',
'BAHA5P, P10', 'BAHA5P, P50', 'BAHA5P, P90'])
# compare distribution thresholds group BAHA5P vs. BP110 with Mann Whitney U test
mwu = dict()
f = ['BCd_500', 'BCd_1000', 'BCd_2000', 'BCd_4000', 'PTA (0.5-4 kHz)', 'PTA_HF (1-4 kHz)']
for i in range(6):
a = bct_bh5.iloc[:, i]
b = bct_bp110.iloc[:, i]
u_statistic, pVal = mannwhitneyu(a , b, use_continuity=False, alternative='two-sided') # scipy.stats mannwhitneyu test
pVal = round(pVal, 15)
st = {f[i]: [u_statistic, pVal]}
mwu.update(st)
u_test = pd.DataFrame.from_dict(mwu, dtype='float')
diu = {0: 'Mann-Whitney U statistic', 1: 'p-value (two-sided)'}
u_test = u_test.rename(index=diu)
# make new dataFrame with percentiles & test results
analysis_bc = pd.concat([q, u_test])
analysis_bc = analysis_bc.round(decimals=3)
analysis_bc
BCd_500 | BCd_1000 | BCd_2000 | BCd_4000 | PTA (0.5-4 kHz) | PTA_HF (1-4 kHz) | |
---|---|---|---|---|---|---|
BP110, P10 | 15.000 | 15.000 | 23.000 | 28.000 | 26.250 | 25.625 |
BP110, P50 | 25.000 | 25.000 | 30.000 | 40.000 | 32.500 | 34.375 |
BP110, P90 | 37.000 | 40.000 | 45.000 | 67.000 | 38.750 | 40.438 |
BAHA5P, P10 | 20.000 | 17.500 | 20.000 | 25.000 | 24.375 | 24.062 |
BAHA5P, P50 | 32.500 | 22.500 | 30.000 | 35.000 | 30.625 | 29.844 |
BAHA5P, P90 | 42.500 | 32.500 | 42.500 | 52.500 | 40.625 | 40.781 |
Mann-Whitney U statistic | 1103.000 | 802.500 | 686.000 | 676.500 | 701.000 | 621.000 |
p-value (two-sided) | 0.016 | 0.682 | 0.134 | 0.115 | 0.182 | 0.039 |
# write to xlsx file
analysis_bc.to_excel("/media/guido/LACIE/Cingle_Guido/Analysis_results/analysis_BCdirect_thresholds.xlsx",
sheet_name='BC_direct_thresholds')
bp110_thr = bct_bp110.iloc[ : , :4]
bh5_thr = bct_bh5.iloc[ : , :4]
# convert Pandas object in numpy array
frqc = np.array([500, 1000, 2000, 4000] , dtype=int)
bp110 = bp110_thr.T.to_numpy()
bh5 = bh5_thr.T.to_numpy()
p10_bp110 = q.iloc[0, :4].T.to_numpy()
median_bp110 = q.iloc[1, :4].T.to_numpy()
p90_bp110 = q.iloc[2, :4].T.to_numpy()
p10_bh5 = q.iloc[3, :4].T.to_numpy()
median_bh5 = q.iloc[4, :4].T.to_numpy()
p90_bh5 = q.iloc[5, :4].T.to_numpy()
# matplotlib line graph of output BAHA5P vs. BP110
plt.style.use('seaborn-paper')
fig, ax = plt.subplots()
ax.set(xlabel='f [Hz]', ylabel='Hearing Loss [dB HL]',
title='BC-direct thresholds of group BAHA5P vs. BP110')
ax.set_xscale('log')
ax.set_xticks([500, 1000, 2000, 4000])
ax.set_xticklabels(['500', '1000', '2000', '4000'])
ax.minorticks_off()
ax.invert_yaxis()
# plot lines based on numpy arrays
ax.plot(frqc, bp110, color="dimgrey", alpha=0.5, label='Output FL BP110')
ax.plot(frqc, bh5, color="silver", alpha=0.5, label='Output FL BAHA5P')
ax.plot(frqc, median_bp110, color='black', marker='o', linestyle='dashed', linewidth=3, markersize=8)
ax.plot(frqc, p10_bp110, color='black', marker='o', linestyle='dotted', linewidth=3, markersize=8)
ax.plot(frqc, p90_bp110, color='black', marker='o', linestyle='dashdot', linewidth=3, markersize=8)
ax.plot(frqc, median_bh5, color='black', marker='X', linestyle='dashed', linewidth=3, markersize=9)
ax.plot(frqc, p10_bh5, color='black', marker='X', linestyle='dotted', linewidth=3, markersize=9)
ax.plot(frqc, p90_bh5, color='black', marker='X', linestyle='dashdot', linewidth=3, markersize=9)
# make a legend
leg_line_BH5 = mlines.Line2D([], [], color="silver", label='Output BAHA5P')
leg_line_BP110 = mlines.Line2D([], [], color="dimgrey", label='Output BP110')
leg_p90_bh5 = mlines.Line2D([], [], color='black', marker='X', linestyle='dashdot',
linewidth=3, markersize=9, label='P90 BAHA5P')
leg_med_bh5 = mlines.Line2D([], [], color='black', marker='X', linestyle='dashed',
linewidth=3, markersize=9, label='Median BAHA5P')
leg_p10_bh5 = mlines.Line2D([], [], color='black', marker='X', linestyle='dotted',
linewidth=3, markersize=9, label='P10 BAHA5P')
leg_p90_bp110 = mlines.Line2D([], [], color='black', marker='o', linestyle='dashdot',
linewidth=3, markersize=8, label='P90 BP110')
leg_med_bp110 = mlines.Line2D([], [], color='black', marker='o', linestyle='dashed',
linewidth=3, markersize=8, label='Median BP110')
leg_p10_bp110 = mlines.Line2D([], [], color='black', marker='o', linestyle='dotted',
linewidth=3, markersize=8, label='P10 BP110')
plt.legend(handles=[leg_line_BH5, leg_line_BP110, leg_p10_bh5, leg_p10_bp110, leg_med_bh5,
leg_med_bp110, leg_p90_bh5, leg_p90_bp110], bbox_to_anchor=(1.03, 1),
loc='upper left', labelspacing=1.,handlelength=7.5)
# save the figure
plt.savefig('/media/guido/LACIE/Cingle_Guido/Analysis_results/BC_direct_thresholds.tiff',
transparent=False, dpi=500, bbox_inches="tight")
# plot the figure
plt.plot()
[]