import sys, os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
%load_ext autoreload
%autoreload 2
import matplotlib.cm as cm
cmap = cm.tab10
## custom packages
src_dir = os.path.join(os.pardir,'src')
sys.path[0] = src_dir
from data_io_ts import *
We generate synthetic data that follows a power-law:
$p(x) \propto x^{-\alpha }$.
The data is generated using a Markov-process such that we can include correlations via a paramter $\mu$.
label = 'mcmc_zipf'
k = 5 ## number of neighbors for correlated step
Ntypes = 10**3 ## maximum number of symbols
Ntokens = 10**5 ## number of samples
alpha = 1.5 ## powerlaw exponent
mu = 0.01 ## correlation parameter
# list_mu = [0.01,1.0] ## correlated and uncorrelated case
dict_args = {'Ntypes':Ntypes,
'Ntokens':Ntokens,
'alpha':alpha,
'mu':mu,
'k':k,
'src_dir':src_dir,
}
x_data = get_xdata_ts_wrapper(label,dict_args)
path_save = os.path.join(os.pardir,'data','synthetic')
fname_save = 'ts_synthetic_Ntypes%s_Ntokens%s_alpha%s_mu%s_k%s'%(Ntypes,Ntokens,alpha,mu,k)
filename = os.path.join(path_save,fname_save)
with open(filename,'w') as f:
for h in x_data:
f.write('%s\n'%(h))