analysis of characteristics of the lost data population vs. the complete data population in BP110 group
1 March 2021 Guido Cattani
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import mannwhitneyu as mannwhitneyu
def read_BC_thresholds():
# function to read threshold in dB FL
f_in = '/media/guido/LACIE/Cingle_Guido/Master/BC_direct_thresholds.xlsx'
p_in = Path(f_in)
df = pd.read_excel(p_in, header=0, nrows=85)
df = df.drop(['Unnamed: 0'], axis=1)
df = df.fillna(pd.NA)
df.set_index('Study_ID', drop=True, append=False, inplace=True, verify_integrity=True)
df = select_bp110(df)
return df
def read_AC_thresholds():
# function to read threshold in dB SPL op de eardrum
f_in = '/media/guido/LACIE/Cingle_Guido/Master/AC_thresholds.xlsx'
p_in = Path(f_in)
df = pd.read_excel(p_in, header=0, nrows=85)
df = df.drop(['Unnamed: 0'], axis=1)
df = df.fillna(pd.NA)
df.set_index('Study_ID', drop=True, append=False, inplace=True, verify_integrity=True)
df = select_bp110(df)
return df
def read_BCD_output_65():
# function to read threshold in dB FL
f_in = '/media/guido/LACIE/Cingle_Guido/Master/BCD_band_output.xlsx'
p_in = Path(f_in)
df = pd.read_excel(p_in, sheet_name='BCD_output_65', header=0, nrows=85)
df = df.drop(['Unnamed: 0'], axis=1)
df = df.fillna(pd.NA)
df.set_index('Study_ID', drop=True, append=False, inplace=True, verify_integrity=True)
df = select_bp110(df)
return df
def read_BCD_output_55():
# function to read threshold in dB FL
f_in = '/media/guido/LACIE/Cingle_Guido/Master/BCD_band_output.xlsx'
p_in = Path(f_in)
df = pd.read_excel(p_in, sheet_name='BCD_output_55', header=0, nrows=85)
df = df.drop(['Unnamed: 0'], axis=1)
df = df.fillna(pd.NA)
df.set_index('Study_ID', drop=True, append=False, inplace=True, verify_integrity=True)
df = select_bp110(df)
return df
def select_bp110(df):
# select BP110 data
is_bp110 = df['Device']=='BP110'
df_bp110 = df[is_bp110]
df_bp110.pop('Device')
return(df_bp110)
def warning_loss(df):
bs = df.notna().all(axis=1)
t = bs.sum()
if t == len(bs): print('Fine: these data are complete')
else: print ('Attention: loss of data!!!')
def idx_loss():
df = read_BC_thresholds()
lost = df[df.isna().any(axis=1)]
return lost.index
def idx_complete():
df = read_BC_thresholds()
complete = df[df.notna().all(axis=1)]
return complete.index
def split_dataframe(df):
df_lost = df.loc[idx_loss(),]
df_complete = df.loc[idx_complete(),]
return (df_lost, df_complete)
def mwu_test(df1,df2):
# compare distribution with Mann Whitney U test
mwu = dict()
f = df1.columns.values.tolist()
for i in range(len(f)):
a = df1.iloc[:, i]
b = df2.iloc[:, i]
u_statistic, pVal = mannwhitneyu(a , b, use_continuity=False, alternative='two-sided') # scipy.stats mannwhitneyu test
pVal = round(pVal, 3)
st = {f[i]: [u_statistic, pVal]}
mwu.update(st)
u_test = pd.DataFrame.from_dict(mwu, dtype='float')
diu = {0: 'Mann-Whitney U statistic', 1: 'p-value (two-sided)'}
u_test = u_test.rename(index=diu)
return u_test
def bct_mwu():
# compare distribution with Mann Whitney U test for the BC direct thresholds
mwu = dict()
bct_lost, bct_complete = split_dataframe(bct)
f = bct_lost.columns.values.tolist()
for i in range(len(f)):
a = bct_lost.iloc[:, i]
a.dropna(inplace=True)
b = bct_complete.iloc[:, i]
u_statistic, pVal = mannwhitneyu(a , b, use_continuity=False, alternative='two-sided') # scipy.stats mannwhitneyu test
pVal = round(pVal, 3)
st = {f[i]: [u_statistic, pVal]}
mwu.update(st)
u_test = pd.DataFrame.from_dict(mwu, dtype='float')
diu = {0: 'Mann-Whitney U statistic', 1: 'p-value (two-sided)'}
u_test = u_test.rename(index=diu)
return u_test
# number of subjects in BP110 group with complete BC direct thresholds data
len(idx_complete())
35
# number of subjects in BP110 group with incomplete BC direct thresholds data
len(idx_loss())
14
BC direct thresholds
bct = read_BC_thresholds() # bc direct thresholds data
warning_loss(bct)
Attention: loss of data!!!
bct_mwu()
BCd_250 | BCd_500 | BCd_1000 | BCd_1500 | BCd_2000 | BCd_3000 | BCd_4000 | |
---|---|---|---|---|---|---|---|
Mann-Whitney U statistic | 186.000 | 189.000 | 223.000 | 102.000 | 208.500 | 162.000 | 241.500 |
p-value (two-sided) | 0.323 | 0.363 | 0.621 | 0.134 | 0.412 | 0.895 | 0.743 |
AC thresholds
act = read_AC_thresholds()
warning_loss(act)
Fine: these data are complete
act_lost, act_complete = split_dataframe(act)
mwu_test(act_lost, act_complete)
AC_125 | AC_250 | AC_500 | AC_1000 | AC_2000 | AC_4000 | AC_8000 | |
---|---|---|---|---|---|---|---|
Mann-Whitney U statistic | 250.000 | 260.500 | 246.500 | 243.500 | 240.500 | 301.500 | 257.00 |
p-value (two-sided) | 0.908 | 0.726 | 0.973 | 0.973 | 0.919 | 0.209 | 0.79 |
BCD output for input 65 dB (ISTS)
out65 = read_BCD_output_65()
warning_loss(out65)
Fine: these data are complete
out65_lost, out65_complete = split_dataframe(out65)
mwu_test(out65_lost, out65_complete)
250_Hz | 315_Hz | 400_Hz | 500_Hz | 630_Hz | 800_Hz | 1000_Hz | 1250_Hz | 1600_Hz | 2000_Hz | 2500_Hz | 3150_Hz | 4000_Hz | 5000_Hz | 6300_Hz | 8000_Hz | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Mann-Whitney U statistic | 221.000 | 253.500 | 241.500 | 284.500 | 281.500 | 273.000 | 264.500 | 228.500 | 225.000 | 219.500 | 201.500 | 253.000 | 266.000 | 271.000 | 257.500 | 273.000 |
p-value (two-sided) | 0.595 | 0.851 | 0.938 | 0.382 | 0.419 | 0.535 | 0.665 | 0.714 | 0.657 | 0.572 | 0.335 | 0.859 | 0.641 | 0.565 | 0.782 | 0.535 |
BCD output for input 55 dB (ISTS)
out55 = read_BCD_output_55()
warning_loss(out55)
Fine: these data are complete
out55_lost, out55_complete = split_dataframe(out55)
mwu_test(out65_lost, out65_complete)
250_Hz | 315_Hz | 400_Hz | 500_Hz | 630_Hz | 800_Hz | 1000_Hz | 1250_Hz | 1600_Hz | 2000_Hz | 2500_Hz | 3150_Hz | 4000_Hz | 5000_Hz | 6300_Hz | 8000_Hz | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Mann-Whitney U statistic | 221.000 | 253.500 | 241.500 | 284.500 | 281.500 | 273.000 | 264.500 | 228.500 | 225.000 | 219.500 | 201.500 | 253.000 | 266.000 | 271.000 | 257.500 | 273.000 |
p-value (two-sided) | 0.595 | 0.851 | 0.938 | 0.382 | 0.419 | 0.535 | 0.665 | 0.714 | 0.657 | 0.572 | 0.335 | 0.859 | 0.641 | 0.565 | 0.782 | 0.535 |