#
# Parameter optimization of F of lorenz96 
#
# 20230310 updated using autocorrelation
from pylab import *
import numpy as np
import matplotlib.pyplot as plt
import numpy.ma as ma
import struct
from scipy.io import FortranFile
from sklearn.gaussian_process import kernels as sk_kern
from sklearn.gaussian_process import GaussianProcessRegressor


def costfunc(mean, std, obserr, ans, dim):
    cost1 = 0
    cost2 = 0
    for i in range(0,dim):
     cost1 += 0.5*((mean[i]-ans[i])**2/((std[i]+obserr[i])**2))
     #cost1 += 0.5*((mean[i]-ans[i])**2/(obserr[i]**2))
     #cost1 += 1/2*((mean-ans)**2/((std+obserr)**2))
#     cost2 += std[i]**2 + obserr[i]**2
    #for i in range(0, ndim):
     #cost2 += 0.5*(inputx[0,i]-0.5)**2/0.10
    return cost1 + cost2


#
# configurations
#
Nens = 100   # ensemble size
Nobs = 1   # observation size
xx = 9     # dimension
tt = 144000 # total time
clim = np.zeros((6,Nens))
h_ope = np.zeros((Nobs))
h_ope[0] = 0
#h_ope[1] = 3
#h_ope[2] = 6
h_ope = h_ope.astype(int)
data = np.zeros((tt,xx))

NeedOriginalData = True
#
# reading parameter ensemble members
#
if NeedOriginalData:
    for i in range(0,Nens):
        print('analyzing ensemble member ', i)
        filename = '../tmp/manypara'+str(i+1)+'.dat'
        #dbuf = np.fromfile(filename,dtype=np.float32,count=xx*tt
        f = FortranFile(filename,'r')
        for t in range(0,tt):
            data[t,:] = f.read_reals(float32)
        #
        # calculating climatorological indices
        #
        for j in range(0,Nobs):
            #print(h_ope[j])
            clim[0,i] += np.mean(data[134400:144400,h_ope[j]]**2)
            clim[1,i] = np.std(data[134440:144400,h_ope[j]])/np.mean(data[134400:144400,h_ope[j]])
            clim[2,i] = np.corrcoef(data[133400:143400,h_ope[j]],data[133401:143401,h_ope[j]])[0,1]
            clim[3,i] = np.corrcoef(data[133400:143400,h_ope[j]],data[133402:143402,h_ope[j]])[0,1]
            clim[4,i] = np.corrcoef(data[133400:143400,h_ope[j]],data[133403:143403,h_ope[j]])[0,1]
            clim[5,i] = np.corrcoef(data[133400:143400,h_ope[j]],data[133404:143404,h_ope[j]])[0,1]
            clim[0,i] = clim[0,i]/Nobs
            clim[1,i] = clim[1,i]/Nobs
            clim[2,i] = clim[2,i]/Nobs
            clim[3,i] = clim[3,i]/Nobs
            clim[4,i] = clim[4,i]/Nobs
            clim[5,i] = clim[5,i]/Nobs
                #clim[1,i] = np.mean(np.var(data[142560:144000,h_ope[:]],axis=1))/clim[0,i]
    plt.plot(clim[0,:])
    plt.savefig('F_clim0.png')
    plt.show()
    plt.plot(clim[1,:])
    plt.savefig('F_clim1.png')
    plt.show()
    plt.plot(clim[2,:])
    plt.savefig('F_clim2.png')
    plt.show()
    plt.plot(clim[3,:])
    plt.savefig('F_clim3.png')
    plt.show()
    plt.plot(clim[4,:])
    plt.savefig('F_clim4.png')
    plt.show()
    plt.plot(clim[5,:])
    plt.savefig('F_clim5.png')
    plt.show()
    np.savetxt('costlist',clim)


#
# start constructing surrogate models
#
x_train = np.linspace(0, 1, Nens)
y_train = np.loadtxt('costlist').T
kernel = sk_kern.Matern()
clf0 = GaussianProcessRegressor(
        kernel=kernel,
        alpha=1e-10,
        optimizer="fmin_l_bfgs_b",
        n_restarts_optimizer=80,
        normalize_y=True)
clf1 = GaussianProcessRegressor(
        kernel=kernel,
        alpha=1e-10,
        optimizer="fmin_l_bfgs_b",
        n_restarts_optimizer=80,
        normalize_y=True)
clf2 = GaussianProcessRegressor(
        kernel=kernel,
        alpha=1e-10,
        optimizer="fmin_l_bfgs_b",
        n_restarts_optimizer=80,
        normalize_y=True)
clf3 = GaussianProcessRegressor(
        kernel=kernel,
        alpha=1e-10,
        optimizer="fmin_l_bfgs_b",
        n_restarts_optimizer=80,
        normalize_y=True)

clf0.fit(x_train[:].reshape(-1,1),y_train[:,2])
clf1.fit(x_train[:].reshape(-1,1),y_train[:,3])
clf2.fit(x_train[:].reshape(-1,1),y_train[:,4])
clf3.fit(x_train[:].reshape(-1,1),y_train[:,5])

# test ----------------------------------------------
testsample = np.random.rand(100)
#testsample = 3 + (15-3)*testsample
pred_mean,predstd = clf0.predict(testsample.reshape(-1,1),return_std=True)
#pred_mean,predstd = clf1.predict(testsample.reshape(-1,1),return_std=True)
#print(pred_mean,predstd)
plt.scatter(testsample,pred_mean)
plt.show()
# test -------------------------------------------------

#
# observation
#
Nobs_obs = 4
tt_obs = 14400
filename = '../DATA/naturex.dat'
f = FortranFile(filename,'r')
nature = np.zeros((tt_obs,xx))
observationerror = 0.1
for t in range(0,tt_obs):
    nature[t,:] = f.read_reals(float32)
hope_obs = np.zeros((Nobs_obs))
hope_obs[0] = 0 
hope_obs[1] = 1 
hope_obs[2] = 4 
hope_obs[3] = 5 
hope_obs = hope_obs.astype(int)

obs = np.zeros((tt_obs,Nobs_obs))
for t in range(0,tt_obs):
    for i in range(0,Nobs_obs):
        obsr = np.random.randn() * observationerror
        obs[t,i] = obsr + nature[t,hope_obs[i]]
    print(obs[t,:])


#
# MCMC Sampler
#
iteration = 500000
ndim = 1
sdim = 3

# ---- case study 1-------------------
#obserr = [7.16,0.64] # sampletime=100
#obserr = [4.48,0.31] # sampletime=250
#obserr = [3.15,0.19] # sampletime=500
#obserr = [2.38,0.13] # sampletime=1000

# ---- case study 2-------------------
#obserr = [6.15,0.53] # sampletime=100
#obserr = [4.55,0.32] # sampletime=250
#obserr = [3.28,0.21] # sampletime=500
#obserr = [2.61,0.17] # sampletime=1000
#obserr = [0.022,0.049,0.083,0.11] # sampletime=100 
obserr = [0.050,0.090,0.11]#,0.11] # sampletime=100 
#obserr = [0.020,0.033,0.043] # sampletime=500 

# define variables
x = np.zeros((iteration,ndim))
pred_mean = np.zeros((sdim))
pred_std = np.zeros((sdim))
obsstat = np.zeros((sdim))

x[0,:] = 0.5 # initial contidion

# prediction
inputx = np.zeros((ndim))
inputx = x[0,:].reshape(-1,1)
#pred_mean[0],pred_std[0] = clf0.predict(inputx,return_std=True)
pred_mean[0],pred_std[0] = clf1.predict(inputx,return_std=True)
pred_mean[1],pred_std[1] = clf2.predict(inputx,return_std=True)
pred_mean[2],pred_std[2] = clf3.predict(inputx,return_std=True)

# observation
time = 0 #initial condition
location = 0 # initial condition
sampletime = 100
#obsstat[0] = np.corrcoef(obs[time:time+sampletime,location],obs[time+1:time+1+sampletime,location])[0,1]
obsstat[0] = np.corrcoef(obs[time:time+sampletime,location],obs[time+2:time+2+sampletime,location])[0,1]
obsstat[1] = np.corrcoef(obs[time:time+sampletime,location],obs[time+3:time+3+sampletime,location])[0,1]
obsstat[2] = np.corrcoef(obs[time:time+sampletime,location],obs[time+4:time+4+sampletime,location])[0,1]
#obsstat[0] = np.mean(obs[time:time+sampletime,location]**2)

f = costfunc(pred_mean,pred_std,obserr,obsstat,sdim)

#print('first f = ', f)
#print('Start MCMC sampler')

for t in range(1,iteration):
    print('sampling no. ', t)
    # changing observation
    if t % 1000 == 0:
        time = int(np.random.uniform(0,tt_obs-sampletime-4))
        location = np.random.randint(0,3) #random draw from observation
        #obsstat[0] = np.corrcoef(obs[time:time+sampletime,location],obs[time+1:time+1+sampletime,location])[0,1]
        obsstat[0] = np.corrcoef(obs[time:time+sampletime,location],obs[time+2:time+2+sampletime,location])[0,1]
        obsstat[1] = np.corrcoef(obs[time:time+sampletime,location],obs[time+3:time+3+sampletime,location])[0,1]
        obsstat[2] = np.corrcoef(obs[time:time+sampletime,location],obs[time+4:time+4+sampletime,location])[0,1]

        #obsstat[0] = np.mean(obs[time:time+sampletime,location]**2)
        #obsstat[1] = np.std(obs[time:time+sampletime,location])/np.mean(obs[time:time+sampletime,location])

    #
    # sampling new proposal
    #
    proposedx = - np.ones((ndim))
    for i in range (0,ndim):
        while proposedx[i] < 0 or proposedx[i] > 1:
            proposedx[i] = np.random.normal(x[t-1,i],0.05)
    #
    # evaluate proposal
    #
    inputx = proposedx.reshape(-1,1)
    #pred_mean[0],pred_std[0] = clf0.predict(inputx,return_std=True)
    pred_mean[0],pred_std[0] = clf1.predict(inputx,return_std=True)
    pred_mean[1],pred_std[1] = clf2.predict(inputx,return_std=True)
    pred_mean[2],pred_std[2] = clf3.predict(inputx,return_std=True)
    proposedf = costfunc(pred_mean,pred_std,obserr,obsstat,sdim)

    alpha = min(1,exp(-(proposedf-f)))
    #print(pred_mean,obsstat)
    #print(proposedf, alpha, x[t-1,:])
    #
    # sampling/rejecting
    #
    if alpha > 1:
        x[t,:] = proposedx
        f = proposedf
    else:
        rottery = np.random.rand()
        if rottery < alpha:
            x[t,:] = proposedx
            f = proposedf
        else:
            x[t,:] = x[t-1,:]

#
# output
#
np.savetxt('MCMCsamples_AR_obserror01_100',x)





