In [None]:
import os
import glob

import numpy as np
import pandas as pd

# DATA

In [None]:
source_dir = os.path.join('data','*')

# Importing all experimental data

In [None]:
df_list=[]
df_list_err=[]
df_list_coe=[]

for f in glob.glob(os.path.join(source_dir, '*.csv')):
    art = f.split(os.path.sep)[-2]
    filename=f.split(os.path.sep)[-1]
    df_tmp=pd.read_csv(f, 
                       names=['x','y'],
                       converters = { 'x' : lambda s : float(s.replace(',', '.')), 'y' : lambda s : float(s.replace(',', '.')), },
                       delimiter=';')
    if filename.startswith('err'):
                
        energy=float(filename[3:-7].replace(',', '.'))
        
        df_tmp_err = pd.DataFrame()
        df_tmp_err['x_err']=df_tmp.x[0::2]
        df_tmp_err['sf_err_up'] = df_tmp.y[0::2].values
        df_tmp_err['sf_err_down']= df_tmp.y[1::2].values
        df_tmp_err['energy']=int(energy*1000)
        df_tmp_err['article']=art
        df_tmp_err['nr']=np.arange(df_tmp_err.energy.size)
        df_list_err.append(df_tmp_err)
            
    elif filename.startswith("coe"):
        
        energy=float(filename[3:-7].replace(',', '.'))
        
        df_tmp_coe = pd.DataFrame()
        df_tmp_coe["alfa"]=df_tmp.x.iloc[[0]]
        df_tmp_coe["beta"]=df_tmp.x[1]
        df_tmp_coe["alfa_err"]=df_tmp.x[2]
        df_tmp_coe["beta_err"]=df_tmp.x[3]
        df_tmp_coe["LET"]=df_tmp.x[4]
        df_tmp_coe['article']=art
        df_tmp_coe['energy']=int(energy*1000)
        df_list_coe.append(df_tmp_coe)

    else:        
        energy=float(filename[0:-7].replace(',', '.'))
        df_tmp['energy']=int(energy*1000)
        df_tmp['article']=art        
        df_tmp['nr']=np.arange(df_tmp.energy.size)
       
        df_list.append(df_tmp)
        

result_err=pd.concat(df_list_err, axis=0)
result_err.replace(',', '.', inplace=True)
result_err['article'] = result_err.article.astype('category')

result_coe=pd.concat(df_list_coe, axis=0)
result_coe.replace(',', '.', inplace=True)
result_coe['article'] = result_coe.article.astype('category')


result=pd.concat(df_list, axis=0)
result.replace(',', '.', inplace=True)
result.rename(index=str, columns={"x": "dose", "y": "sf"}, inplace=True)
result['article'] = result.article.astype('category')

In [None]:
df_list_cells=[]

for f in glob.glob(os.path.join(source_dir, '*.xls')):    
    df = pd.read_excel(io=f, usecols=(0,2), names=('article', 'cells'))
    df_list_cells.append(df)
    
result_cells=pd.concat(df_list_cells, axis=0)

result_cells['article'] = result_cells.article.astype('str')
result_cells['cells'] = result_cells.cells.astype('category')

result_cells.reset_index(drop=True,inplace=True)

In [None]:
result_err['sf_error']=0.5*abs(result_err.sf_err_up-result_err.sf_err_down)
result_err["weight"]=1/(result_err.sf_error)

In [None]:
result.reset_index(drop=True,inplace=True)
result1 = pd.merge(result_cells, result, on=['article'])
result1['article'] = result1.article.astype('category')

# Concatenation of DataFrames

In [None]:
result.reset_index(drop=True,inplace=True)
result_coe.reset_index(drop=True,inplace=True)
result_err.reset_index(drop=True,inplace=True)

In [None]:
result_data = pd.merge(result_coe, result1,on=["article","energy"])
result_data['article'] = result_data.article.astype('category')

In [None]:
result_data.dose.clip(lower=0,inplace=True)

In [None]:
result_data.set_index(['article','energy','nr'],inplace=True)
result_err.set_index(['article','energy','nr'],inplace=True)

In [None]:
result_all = pd.merge(result_data,  result_err,left_index = True, right_index=True)

In [None]:
result_all.sort_index(inplace=True)

# Save

In [None]:
fname = os.path.join('tmp','rawdata.h5')
os.makedirs('tmp', exist_ok=True) # create directory if missing
result_all.to_hdf(fname, 'data_analyze', format='table')