In [1]:
import sys, os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

%matplotlib inline
%load_ext autoreload
%autoreload 2
import matplotlib.cm as cm
cmap = cm.tab10
In [2]:
## custom packages
src_dir = os.path.join(os.pardir,'src')
sys.path[0] = src_dir

from data_io_ts import *

Get an example dataset for a correlated time-series

We generate synthetic data that follows a power-law:

$p(x) \propto x^{-\alpha }$.

The data is generated using a Markov-process such that we can include correlations via a paramter $\mu$.

In [7]:
label = 'mcmc_zipf'
k = 5 ## number of neighbors for correlated step
Ntypes = 10**3 ## maximum number of symbols
Ntokens = 10**5 ## number of samples
alpha = 1.5 ## powerlaw exponent
mu = 0.01 ## correlation parameter
# list_mu = [0.01,1.0] ## correlated and uncorrelated case

dict_args = {'Ntypes':Ntypes,
             'Ntokens':Ntokens,
             'alpha':alpha,
             'mu':mu,
             'k':k,
             'src_dir':src_dir,
     }
x_data = get_xdata_ts_wrapper(label,dict_args)

Save the timeseries

In [8]:
path_save = os.path.join(os.pardir,'data','synthetic')
fname_save = 'ts_synthetic_Ntypes%s_Ntokens%s_alpha%s_mu%s_k%s'%(Ntypes,Ntokens,alpha,mu,k)
filename = os.path.join(path_save,fname_save)
with open(filename,'w') as f:
    for h in x_data:
        f.write('%s\n'%(h))
In [ ]: