import sys, os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
%load_ext autoreload
%autoreload 2
import matplotlib.cm as cm
cmap = cm.tab10
## custom packages
src_dir = os.path.join(os.pardir,'src')
sys.path[0] = src_dir
from data_io_ts import *
from stats import xdata_to_xnx, x_autocorr_sm_ext
from exponential_fit import *
from modules_plotting import * # import setup for figure layout (see src/modules_plotting.py to customize)
We call the earthquake data: the occurrence of magnitudes $x$ of earthquakes.
we select a minimum magnitude of $x_{\min}=2$
The Gutenberg Richter law states that the probability density function follows:
$p(x) \propto e^{-\alpha x}$
### 1:earthquakes
label = 'earthquakes'
xmin = 2.
dict_args = {'xmin':xmin,
'src_dir':src_dir,
'log':False}
x_data = get_xdata_ts_wrapper(label,dict_args)
N = len(x_data)
Note that we are plotting the cumulative distribution $F(x)$.
## Empirical distribution
xmin = xmin
xmax = None
x,nx = xdata_to_xnx(x_data,norm=False,xmin=xmin,xmax=xmax)
N = np.sum(nx)
px = nx/N
Fx = 1-np.cumsum(px)
## Fit: Exponential
result = fit_exponential_cont_sign(x,nx,xmin=xmin)
a = result['a']
px_fit = pdf_exponential_cont(x,a,xmin=xmin)
Fx_fit = 1.-cdf_exponential_cont(x,a,xmin=xmin)
fig, ax = plt.subplots(nrows=1, ncols=1)#, figsize=fig_size)
## Plot the distribution
x_ = x
y_ = Fx
c_ = cmap(0)
ax.plot(x_,y_,marker='o',ms=2,lw=0,c = c_,label = 'Data')
## Plot the fit
c_ = cmap(1)
x_ = x
y_ = Fx_fit
plt.plot(x_,y_,lw=2,c=c_, label = 'Fit')
## Layout stuff
ax.legend(loc='lower left')
ax.set_xticks([2,4,6,8])
# ax.set_xscale('log')
ax.set_yscale('log')
ax.set_ylim(10**(-5),1.2)
ax.set_xlabel(r'$x$')#,labelpad=0)
ax.set_ylabel(r'$F(x)$')
ax.set_title('Earthquakes: Magnitude')
x_annot_formula = 0.6
y_annot_formula = 0.6
ax.annotate(r'$\propto e^{-\alpha x}$',xy=(x_annot_formula,y_annot_formula),xycoords = 'axes fraction')#,size=8)
np.random.seed(42)
nrep = 10**3
q1,q2 = 1,99
result = x_autocorr_sm_ext(x_data,nrep=nrep,q=[q1,q2])
x = result['tau']
y_mu,y_1,y_2 = result['C']
y_mu_rand,y_1_rand,y_2_rand = result['C_rand']
## here we calculate from t=0,1,...,t^* where t^* is the first point
## where true C(t) is within the q-percentiles of the random
ind_t_star = np.where( (y_mu<=y_2_rand)&(y_mu>=y_1_rand) )[0][1]
tau_star = x[ind_t_star]
print('Autocorrelation time: ',tau_star)
fig, ax = plt.subplots(nrows=1, ncols=1)#, figsize=fig_size)
alpha_val = .5
## Plot the empirical autocorrelation
x_ = x
y_ = y_mu
z1_ = y_1
z2_ = y_2
c_ = cmap(0)
ax.plot(x_,y_,c = c_,lw=lw, label = 'Data')
ax.fill_between(x_,z1_,z2_,color=c_,alpha=alpha_val,lw=0)
## Plot the randomized autocorrelation
x_ = x
y_ = y_mu_rand
z1_ = y_1_rand
z2_ = y_2_rand
c_ = cmap(1)
ax.plot(x_,y_,c = c_,lw=lw, label = 'Randomized')
ax.fill_between(x_,z1_,z2_,color=c_,alpha=alpha_val,lw=0)
ax.plot([tau_star,tau_star],[-1,1],lw=1,color='black',ls=':')
## Layout stuff
ax.legend(loc='upper right')
ax.set_xscale('log')
# ax.set_yscale('log')
ax.set_xlabel(r'Time Lag, $\tau$')#,labelpad=0)
ax.set_ylabel(r'$C(\tau)$')
ax.set_ylim(-0.1,0.3)
x_annot_tau = 0.8
y_annot_tau = 0.6
ax.annotate(r'$\tau^*=%s$'%(int(tau_star)),xy=(x_annot_tau,y_annot_tau),xycoords = 'axes fraction')
nrep_synth = 100 ## number of synthetic datasets to generate for empirical p-value
x,nx = xdata_to_xnx(x_data,norm=False,xmin=xmin,xmax=xmax)
result = fit_exponential_cont_sign(x,nx,xmin=xmin,nrep_synth=nrep_synth)
alpha = result['a'] ## fitted parameter
pval = result['pval'] ## p-value
KS = result['KS'] ## KS-distance
print('Original dataset. N = %s'%(N))
print('Empirical p-value from %s synthetic datasets: '%(nrep_synth), pval)
Note that the subsampling generates different datasets each time; therefore there will be variability in the measured p-value
p_sub = 1./tau_star ## 0 ... 1 (keep nothing ... keep all); we set 1/tau_star
N_sub = int(p_sub*N)
nrep_synth = 100 ## number of synthetic datasets to generate for empirical p-value
## subsample -- take N_sub samples from x_data without replacement
x_data_sub = np.random.choice(x_data,size=N_sub,replace=False)
x,nx = xdata_to_xnx(x_data_sub,norm=False,xmin=xmin,xmax=xmax)
result = fit_exponential_cont_sign(x,nx,xmin=xmin,nrep_synth=nrep_synth)
alpha = result['a'] ## fitted parameter
pval = result['pval'] ## p-value
KS = result['KS'] ## KS-distance
print(r'Subsampled dataset: n = %s (n/N ~ %s)'%(N_sub,p_sub))
print('Empirical p-value from %s synthetic datasets: '%(nrep_synth), pval)