import numpy as np
import pymc3 as pm
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
from scipy.stats import norm
from sklearn.preprocessing import LabelEncoder
from theano import tensor as tt
from sys import exit
import pickle
import arviz
import re
writeOut = False
path = './Plots_New/'
widthMM = 190
widthInch = widthMM / 25.4
ratio = 0.66
heigthInch = ratio*widthInch
aspect = widthInch / heigthInch
SMALL_SIZE = 8
MEDIUM_SIZE = 10
BIGGER_SIZE = 12
plt.rc('font', size=SMALL_SIZE) # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE) # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE) # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE) # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE) # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE) # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE) # fontsize of the figure title
sns.set_style("ticks")
Read in all excel sheets at once into one big table.
df = pd.concat([pd.read_excel("data/SEC-NUM-10_SEC-R-15_EAP-flake_1_E1.xlsx"), pd.read_excel("data/SEC-NUM-10_SEC-R-15_WEM-60_1_E1.xlsx"),pd.read_excel("data/SEC-NUM-10_SEC-R-15_BU-072_1_E1.xlsx")], axis=0)
df.reset_index().dropna("columns").drop('index', 1)
Add extra columns for location and object.
df['location'] = df.apply(lambda r: int(re.findall(r'\d+', r.section)[-1]),axis=1)
df['object'] = df.apply(lambda r: r.section.split('-')[0],axis=1)
df
Convert three method column to columns 'edge_angle' and 'method'.
otherCols = ['section','angle_number','steps','dist_intersection','segment_length','location','object']
df = pd.melt(df, value_vars=['3points', '2lines','best_fit'], id_vars=otherCols, var_name='method',value_name='edge_angle')
df
Use the filter criterion dist_intersection' only 2.0mm, 5.0mm und 10.0mm and 0.5mm 'segment_length' each.
df = df[(df.segment_length == 0.5) & ((df.dist_intersection == 2.0) | (df.dist_intersection == 5.0) | (df.dist_intersection == 10.0) ) ]
Furthermore I renamed 'location' to 'sectionNumber'.
df = df.rename(columns={"location": "sectionNumber"})
df
Read in the raw data again.
raw = pd.read_excel("data/SEC-NUM-10_SEC-R-15_BU-072_1_E1.xlsx")
raw
The values 121.4 and 26.8 are indeed measured several times, however always with different settings.
raw[(raw.best_fit == 121.4) | (raw.best_fit == 26.8) ]
An explicit query for duplicate lines (i.e. all entries the same) yields nothing.
df[df.duplicated(keep=False)]
First, the section is averaged over, then the dist_intersection.
sns.catplot(data=df[df.object == 'WEM'],x='dist_intersection',y='edge_angle',row='method',kind='violin',height=heigthInch,aspect=aspect)
plt.savefig(path + "Check_WEM_dist.pdf", bbox_inches='tight',dpi= 300)
sns.catplot(data=df[df.object == 'WEM'],x='sectionNumber',y='edge_angle',row='method',kind='violin',height=heigthInch,aspect=aspect)
plt.savefig(path + "Check_WEM_sec.pdf", bbox_inches='tight',dpi= 300)
We look at the angle for all objects, all locations and all methods. The depth is averaged over.
for method in df.method.unique():
for objectName in df.object.unique():
data = df[(df.method == method) & (df.object == objectName)]
ax = sns.relplot(data=data,x='sectionNumber',y='edge_angle',kind='line',hue='dist_intersection',legend="full",height=heigthInch,aspect=aspect, marker='o')
plt.title("method = {} | object = {}".format(method,objectName), fontsize=BIGGER_SIZE)
if objectName == "WEM":
plt.axhline(y=60,color='gray',alpha=0.7,ls='--')
plt.savefig(path + "Along_edge_{}_{}.pdf".format(method,objectName), bbox_inches='tight',dpi= 300)
We look at the angle on the two other objects by all methods for all locations and depths.
sns.relplot(data=df[ ~(df.object == 'WEM') ],x='sectionNumber',y='edge_angle',hue='method',row='object',height=heigthInch,aspect=aspect,kind='line', marker='o')
plt.savefig(path + "EAP_BU.pdf", bbox_inches='tight',dpi= 300)
In statistics, a quality measure that is often used is the mean squared error.
We can evaluate the mean squared error for the WEM object under the assumption that the true angle for all sections is 60°.
dfMS = df[df.object == 'WEM']
trueAngle = 60.0
dfMS = dfMS.assign(squaredError=dfMS.edge_angle.apply( lambda x: np.power(x - trueAngle,2)))
dfMS.groupby(['method','dist_intersection']).mean()['squaredError']
We see that the minimum squared error is realized by the 3points method at dist_intersection = 10.0 and is thus the recommended method.
As a sanity check I look at the number of data points and manually inspect the results:
dfMS.groupby(['method','dist_intersection']).count()['squaredError']
df[(df.object == 'WEM') & (df.method == '3points') & (df.dist_intersection == 10.0)]
We see that the values occur repeatedly, but there still seven distinct values.
Having choosen the method, the results on the other two objects now look as follows:
sns.relplot(data=df[ ~(df.object == 'WEM') & (df.method == '3points') & (df.dist_intersection == 10.0)],x='sectionNumber',y='edge_angle',col='object',kind='line', marker='o')
!jupyter nbconvert --to html EdgeAnglesV4.ipynb
!jupyter nbconvert --to markdown EdgeAnglesV4.ipynb