31 January 2021 Guido Cattani
from pathlib import Path
import pandas as pd
import numpy as np
from scipy.stats import mannwhitneyu as mannwhitneyu
from scipy.stats import fisher_exact as fisher_exact
def select_bp110(df):
# select BP110 data
is_bp110 = df['Device']=='BP110'
df_bp110 = df[is_bp110]
df_bp110.pop('Device')
return(df_bp110)
def select_bh5(df):
# select BAHA5P data
is_baha5p = df['Device']=='BAHA5P'
df_baha5p = df[is_baha5p]
df_baha5p.pop('Device')
return(df_baha5p)
def read_age():
f_in = '/media/guido/LACIE/Cingle_Guido/Master/Age&Sex.xlsx'
p_in = Path(f_in)
df = pd.read_excel(p_in, header=0, nrows=85)
df = df.drop(['Unnamed: 0'], axis=1)
df = df.fillna(pd.NA)
return df
data = read_age()
len(data)
85
age = data.drop(['Study_ID', 'Sex'], axis=1)
age
Device | Age_BCD_fitting | |
---|---|---|
0 | BP110 | 61.424658 |
1 | BP110 | 52.087671 |
2 | BP110 | 71.939726 |
3 | BP110 | 72.210959 |
4 | BP110 | 35.621918 |
... | ... | ... |
80 | BAHA5P | 58.093151 |
81 | BAHA5P | 57.019178 |
82 | BAHA5P | 49.906849 |
83 | BAHA5P | 70.356164 |
84 | BAHA5P | 26.904110 |
85 rows × 2 columns
# group data by device type and perform calculation of quantiles 10, 50, 90
dvc = age.groupby('Device')
quantiles = [0.10, 0.50, 0.90]
q = dvc.quantile(q=quantiles)
q= q.round(decimals=1)
q = q.reset_index()
diq = {0:'BAHA5P P10', 1:'BAHA5P P50', 2:'BAHA5P P90',
3:'BP110 P10', 4:'BP110 P50', 5:'BP110 P90'}
q = q.rename(index=diq)
q = q.drop(['Device', 'level_1'], axis=1)
q
Age_BCD_fitting | |
---|---|
BAHA5P P10 | 30.9 |
BAHA5P P50 | 52.4 |
BAHA5P P90 | 67.9 |
BP110 P10 | 41.6 |
BP110 P50 | 54.0 |
BP110 P90 | 69.6 |
age_bp110 = select_bp110(age)
age_bh5 = select_bh5(age)
len(age_bp110), len(age_bh5)
(49, 36)
bh5 = age_bh5.T.to_numpy()
bp110 = age_bp110.T.to_numpy()
d1 = dict()
d2 = dict()
for i in range(1):
(stat, pvalue) = mannwhitneyu(bp110[i], bh5[i], use_continuity=False, alternative='two-sided')
d1.update({i : stat})
d2.update({i : pvalue})
mwu = pd.DataFrame.from_dict([d1, d2])
rws = {0: 'Mann-Whitney U statistic', 1: 'p-value (two-sided)'}
clmns = {0 : 'Age_BCD_fitting'}
mwu.rename(index = rws, columns = clmns, inplace = True)
mwu = mwu.round(decimals=4)
mwu
Age_BCD_fitting | |
---|---|
Mann-Whitney U statistic | 958.0000 |
p-value (two-sided) | 0.4991 |
analysis_output = pd.concat([q, mwu])
analysis_output = analysis_output.round(decimals=3)
analysis_output
Age_BCD_fitting | |
---|---|
BAHA5P P10 | 30.900 |
BAHA5P P50 | 52.400 |
BAHA5P P90 | 67.900 |
BP110 P10 | 41.600 |
BP110 P50 | 54.000 |
BP110 P90 | 69.600 |
Mann-Whitney U statistic | 958.000 |
p-value (two-sided) | 0.499 |
# write to xlsx file
analysis_output.to_excel("/media/guido/LACIE/Cingle_Guido/Analysis_results/analysis_age.xlsx",
sheet_name='age')
sex = data.drop(['Study_ID', 'Age_BCD_fitting'], axis=1)
sex = sex.astype("category")
sex
Device | Sex | |
---|---|---|
0 | BP110 | 2 |
1 | BP110 | 1 |
2 | BP110 | 1 |
3 | BP110 | 1 |
4 | BP110 | 2 |
... | ... | ... |
80 | BAHA5P | 2 |
81 | BAHA5P | 1 |
82 | BAHA5P | 2 |
83 | BAHA5P | 1 |
84 | BAHA5P | 2 |
85 rows × 2 columns
grouped = sex.groupby(['Device', 'Sex'])
cnt = grouped.size().to_frame(name='Counts')
cnt = cnt.reset_index()
cnt
Device | Sex | Counts | |
---|---|---|---|
0 | BAHA5P | 1 | 14 |
1 | BAHA5P | 2 | 22 |
2 | BP110 | 1 | 27 |
3 | BP110 | 2 | 22 |
# write to xlsx file
cnt.to_excel("/media/guido/LACIE/Cingle_Guido/Analysis_results/analysis_sex.xlsx",
sheet_name='sex_counts')
counts = cnt['Counts']
cont_table = [[counts[0], counts[1]], [counts[2], counts[3]]]
oddsratio, pvalue = fisher_exact(cont_table)
pvalue
0.18801874699748428
oddsratio
0.5185185185185185
cont_table = [[counts[0], counts[2]], [counts[1], counts[3]]]
oddsratio, pvalue = fisher_exact(cont_table)
pvalue
0.1880187469974831