#ifndef M_MC_H
#define M_MC_H
#include <iostream>
#include <cmath>
#include <rand_gen.h>
#include <t_functions.h>

const int USE_NONE = 0;
const int USE_QN = 1;
const int USE_GR = 2;

const int METHOD_MC = 1;
const int METHOD_GR = 2;
const int METHOD_QN = 3;

const int MAX_GR_LOOP =10000;

/* MONTECARLO SIMULATIONS */
class m_mc_qn {
public:
  // MC
  int size_;
  double *X_;   // Minimisation Parameters
  double *dX_;  // Kick amplitude
  double *old_grad_X_;   //dg
  double T_; // current temperature.
  // Quasi Newton
  int iter_; // no of iteration performed.
  int iter_max_;
  double low_target_rate_;
  double dx_min_; // min value for dX_
  double dx_max_; // max value for dX_
  double step_max_;
  double epsilon_;
  double tolerance_x_;
  double tolerance_gradE_;
  double E_; // value of E evaluated in gradF.
  double f_of_p_;
  double grad_scale_; // scale for grad MC
  double *dE_;       //g
  double *old_dE_;   //dg
  double *HdE_;     //hdg
  double *p_;  // p
  double *pnew_; // Xnew
  double *x_;
  double *xnew_; //xi which is delta_p
  double *x_p_dx_;
  double *H_;
  bool last_rejected_; // True when last atempt was rejected
  bool grad_last_rejected_; // True when last grad atempt was rejected
  bool use_grad_mc_; // Set to tru to use gradient MC as well
  
private:
  bool   *frozen_;  // Frozen parameters
  double *success_; // Kick success rate for each par.
  double grad_success_; // Kick success rate for gradient.
  double Val_; // last evaluated value 
  double absVal_; // abs val of Val_
  int n_sweep_; // no of iterations in each sweep.
  int n_monitor_; // no of iterations for dX monitoring.
  rand_gen rand_;
  int track_; // true for tracking
  double *best_X_; // used for multi relaxations
  double lambda0_; //
  
public:
  m_mc_qn(int size, double *v, double *dv=0, int n_sweep=1000,
	  int n_monitor=100, double dvcoef=0.1) :
    size_(size), iter_max_(1000), low_target_rate_(0.33),
    dx_min_(1e-15), dx_max_(1e15),
    epsilon_(1e-8),
    tolerance_x_(1e-14), tolerance_gradE_(1e-12), grad_scale_(1e-4),
    last_rejected_(true), grad_last_rejected_(true), use_grad_mc_(false),
    n_sweep_(n_sweep), n_monitor_(n_monitor), track_(0), lambda0_(1.0)

  { int i;
    double f;

    X_ = 0;
    
    if(v==0) { return; }
    // MC
    X_ = new double[size_];
    dX_ =  new double[size_];
    frozen_ =  new bool[size_];
    success_ =  new double[size_];
    best_X_ =  new double[size_];
    old_grad_X_ = new double[size_];
    
    if(n_monitor_ > n_sweep_) { n_monitor_ = n_sweep_; }

    for(i=0; i < size_;++i) 
    { X_[i] = v[i];
      if(dv) { dX_[i] = dv[i]; }
      else 
      { f = fabs(X_[i]);
	if(fabs(f) < 1e-12)
	{ dX_[i] = dvcoef;	  
	}
	else
	{ dX_[i] = (f < 1e-15) ? 0.001: f*dvcoef;
	}
      } 
      frozen_[i] = false;
      success_[i] = 0;
    }
    grad_success_ = 0;
    
    absVal_ = Val_ = 0;

    // Quasi Newton
    dE_ = new double[size_];
    old_dE_ = new double[size_];
    HdE_ = new double[size_];     
    p_ = new double[size_];
    pnew_ = new double[size_];
    x_ = new double[size_];
    xnew_ = new double[size_];
    x_p_dx_ = new double[size_];
    H_ = new double[size_*size_];
  }

  virtual double E(double *v)=0;      // function to minimize
  virtual double Ei(int i, double *v)=0;   // Energy of node i
  double operator [] (int i) { return(X_[i]); };
  double *get_X() { return (X_); };
  void set_tracking(int track=1) { track_ = track; }
  int size() { return(size_);}
  void set_lambda0(double l) { lambda0_ = l; }
  void set_low_target_rate(double trate) { low_target_rate_ = trate; }
  void set_dx_min(double dx_min) { dx_min_ = dx_min; }
  void set_dx_max(double dx_max) { dx_max_ = dx_max; }
  void set_use_grad_mc(bool val = true) { use_grad_mc_ = val; }
  virtual void pre_relax(int method) {}
  
  virtual ~m_mc_qn()
  { if (X_==0) { return; } 
    delete X_;
    delete dX_;
    delete old_grad_X_;
    delete frozen_;
    delete success_;
    delete best_X_;
    
    delete dE_;
    delete old_dE_;
    delete HdE_;
    delete p_;
    delete pnew_;
    delete x_;
    delete xnew_;
    delete x_p_dx_;
    delete H_;
    
  };

  // freese par i
  void freeze(int i) { frozen_[i] = true; }

  virtual void display() 
  { int i;

    std::cout <<"X=(";
    for(i=0; i < size_; ++i)
    { std::cout <<X_[i]<<",";
    }
    std::cout <<") dX=(";
    for(i=0; i < size_; ++i)
    { std::cout <<dX_[i]<<",";
    }
    Val_ = E(X_);
    std::cout <<") T="<< T_<<" V="<<Val_<<"\n";
  }

  // Adjust dX
  // return the number of adjustments made
  virtual int monitor(int n_test)
  { int j;
    double rate;
    int adjust_no=0;

    for(j=0; j < size_; ++j)
    { if(!frozen_[j])
      { rate = success_[j]/n_test;

      //std::cerr<<"rate["<<j<<"]= "<<rate <<"\n";

        if(rate < low_target_rate_) // too few hits
        { if(rate < 0.05) { dX_[j] *= 0.5; }
	  else { dX_[j] *= (1-0.5*(low_target_rate_-rate)/low_target_rate_); }
          ++adjust_no;
        }
        else if (rate > 0.66) // too many hits
        { if(rate > 0.95) { dX_[j] *= 2; }
	  else { dX_[j] *= (1+(rate-0.66)/0.34); }
          ++adjust_no;
        }
	if (fabs(dX_[j]) < dx_min_) { dX_[j] = dx_min_; }
	if (fabs(dX_[j]) > dx_max_) { dX_[j] = dx_max_; }
      }
    }

    if (use_grad_mc_)
    { rate = grad_success_/n_test;
      if(rate < low_target_rate_) // too few hits
      { if(rate < 0.05) { grad_scale_ *= 0.5; }
	else
	{ grad_scale_ *= (1-0.5*(low_target_rate_-rate)/low_target_rate_);
	}
        ++adjust_no;
      }
      else if (rate > 0.66) // too many hits
      { if(rate > 0.95) { grad_scale_ *= 2; }
	else { grad_scale_ *= (1+(rate-0.66)/0.34);} 
        ++adjust_no;
      }
      if (fabs(grad_scale_) < dx_min_) { grad_scale_ = dx_min_; }
      if (fabs(grad_scale_) > dx_max_) { grad_scale_ = dx_max_; }
    }
   
    
    return(adjust_no);
  }

  // Test domain of parameters i
  virtual void check(double *v, int i)
  {
  }
  
  // Minimise f(x);
  double minimise_mc(double T0, double T_min, double Tcoef=0.8)
  { int i,j,n_test;
    double old_X,nVal;
    int adjust_no;
    int track = 1;
    std::cerr<<"minimise_mc\n"<<std::flush;
    T_ = T0;
    n_test = 0;
    Val_ = E(X_);
    absVal_ = fabs(Val_);

    int n_sweep = n_sweep_*((int)sqrt(size_)+1);
    
    while(T_ > T_min)
    { adjust_no = 0;
      
      for(i=0; i < n_sweep; ++i)
      { for(j=0; j < size_; ++j)
	{ if(!frozen_[j])
	  { old_X = X_[j];
	    X_[j] += rand_.next_m1_1()*dX_[j];
	    check(X_, j);
            nVal = E(X_);
	    //std::cerr<<"X["<<j<<"] ="<<X_[j]<<" Val="<<nVal<<"\n";
	    if((nVal < Val_) ||
               ((rand_.next_0_1()) < (exp((Val_-nVal)/(absVal_*T_)))))
	    { success_[j] += 1;
              Val_ = nVal;
              absVal_ = fabs(Val_);
	      last_rejected_ = false;
	    }
            else
	    { X_[j] = old_X;
	      last_rejected_ = true;
	    }
	  }
	} 

        // Use gradient
	if (use_grad_mc_)
	{ double old_val = grad_E(X_); // gradient in dE_;
	  absVal_ = fabs(old_val);

	  // Normalise dE_
	  double norm_grad_E = 1;
          for (int k=0; k < size_; ++k)
	  { norm_grad_E += dE_[k]*dE_[k];
	  }
	  norm_grad_E = sqrt(norm_grad_E);

	  if (norm_grad_E < 1e-12) { norm_grad_E = 1.0; }
	  for (int k=0; k < size_; ++k)
          { old_grad_X_[k] = X_[k];
	    X_[k] -= rand_.next_0_1()*0.001*grad_scale_*dE_[k]/norm_grad_E;
	    check(X_, k);
	  }
	  nVal = E(X_);

	  if((nVal < Val_) ||
               ((rand_.next_0_1()) < (exp((Val_-nVal)/(absVal_*T_)))))
	  { grad_success_ += 1;
            Val_ = nVal;
            absVal_ = fabs(Val_);
	    grad_last_rejected_ = false;
	  }
          else
	  { for (int k=0; k < size_; ++k)
	    { X_[k] = old_grad_X_[k];
	    }  
	    grad_last_rejected_ = true;
	  }
	}
	
        ++n_test;
        if(n_test >= n_monitor_)
	{ adjust_no = monitor(n_test);
          for(j=0; j < size_; ++j) { success_[j] = 0; }
          n_test = 0;
	}
      }
      //Val_ = E(X_); // this can help
      
      if ((track_ > 0) && (track++ >= track_))
      { track = 1;
	display();
      }

      if(adjust_no > 0) { T_ *= Tcoef; }
      
    }
    return(Val_);
  }

  // Minimise f(x);
  double minimise_delta(double T0, double T_min, double Tcoef=0.8)
  { int i,j,n_test;
    double old_X,nVal,oldEiVal,newEiVal;
    int adjust_no;
    int track = 1;
 
    T_ = T0;
    n_test = 0;
    Val_ = E(X_);
    absVal_ = fabs(Val_);
    
    int n_sweep = n_sweep_*((int)sqrt(size_)+1);
    while(T_ > T_min)
    { adjust_no = 0;
      for(i=0; i < n_sweep; ++i)
      { for(j=0; j < size_; ++j)
	{ if(!frozen_[j])
	  { old_X = X_[j];
	    oldEiVal = Ei(j,X_);
	    X_[j] += rand_.next_m1_1()*dX_[j];
	    check(X_, j);
            newEiVal = Ei(j,X_);
	    //std::cerr<<"X["<<j<<"] ="<<X_[j]<<" Val="<<nVal<<"\n";
	    if((nVal < Val_) ||
               ((rand_.next_0_1()) < (exp((oldEiVal-newEiVal)/(absVal_*T_)))))
	    { success_[j] += 1;
              Val_ += newEiVal -oldEiVal;
              absVal_ = fabs(Val_);
	      last_rejected_ = false;
	    }
            else
	    { X_[j] = old_X;
	      last_rejected_ = true;
	    }
	  }
	} 

        ++n_test;
        if(n_test >= n_monitor_)
	{ adjust_no = monitor(n_test);
          for(j=0; j < size_; ++j) { success_[j] = 0; }
          n_test = 0;
	}
      }

      if ((track_ > 0) && (track++ >= track_))
      { track = 1;
	display();
      }

      if(adjust_no > 0) { T_ *= Tcoef; }
      
    }
    return(Val_);
  }


   void set_iter_max(int val) { iter_max_ = val; }
   void set_epsilon(double val) { epsilon_ = val; }
   void set_tolerance_x(double val) { tolerance_x_ = val; }
  

   // Compute grad of F and store in dE_
   // Eval E(x) and save it in E_
   double grad_E(double *x)
   { double f;
     E_ = E(x);
     
     for (int i=0; i < size_; i++)
     { x_p_dx_[i] = x[i];
     }
     
     for (int i=0; i < size_; i++)
     { double tmp_x = x[i];
       double dx = epsilon_*fabs(tmp_x);
       if (dx < epsilon_) { dx = epsilon_; }
       x_p_dx_[i] = x[i]+dx;
       dx = x_p_dx_[i] - x[i];  // improves accuracy
       f = E(x_p_dx_);
       x_p_dx_[i] = x[i];   // restors previous value
       if (isnan(f)) { dE_[i] = 0.0; }
       else { dE_[i] = (f-E_)/dx; }
       //std::cout<<"dE_["<<i<<"] ="<< dE_[i]<<" dx=" << dx<<"\n";
     }
     return(E_);
   }

   virtual void display_qn() 
   { int i;

     std::cout <<" Steps:"<<iter_<<"\n";
     std::cout <<"p=(";
     for(i=0; i < size_; ++i)
     { std::cout <<p_[i]<<",";
     }
     std::cout <<")\n";
     std::cout <<" E="<<E_<<"\n";
   }

   // Minimise using downhill gradient
   double minimise_grad(double rel_err_x)
   { // compute the gradient
    double lambda, f=0, f_old, max_err,dx,grad_dx;
    bool test = true, over_shot;
    int count = 0;
    
    while (test && (count < MAX_GR_LOOP))
    { count ++;
      f_old = grad_E(X_); // gradient in grad_

      //std::cout<<"f_old="<<f_old<<"\n";
      lambda = lambda0_;
      over_shot = true;
      for(int i=0; i< size_; i++) { p_[i] = X_[i]; }
      
      while (over_shot && (lambda > tolerance_x_))
      { grad_dx = 0.0;
	
	for(int i = 0; i < size_; i++)
	{ dx = - lambda*dE_[i];
	  X_[i] = p_[i] +dx;
	  check(X_, i);
	  grad_dx += dE_[i]*dx;
        }
        f = E(X_);
	//std::cout<<"f="<<f<<"\n";
        if ((over_shot = (isnan(f) || (f > f_old+1e-4*grad_dx))))
	{ lambda *= 0.5;
	}
      }

      max_err = 0.0;
      for(int i=0; i < size_; i++)
      { double e = fabs(X_[i]-p_[i])/
	           (fabs(p_[i])>1.0 ? fabs(p_[i]) : 1);
	if (e > max_err) { max_err = e; }
      }
      test = max_err > rel_err_x;
     }
     return(f);
   }

  // Perform n consecutives MC and Quasi Newton relaxation and
  // keep the one with the lower energy
  // n : number of relaxations.
  // method : USE_QN or USE_GR
  // T0, Tmin, Tcof : MC parameters
  // tol : quasi_newton/gradient tolerance
  // echo : echo progress on screen
  void relax_multi(int n, int method, double T0, double Tmin, double Tcoef=0.8,
		   double tol=1e-8, bool echo=false)
  { double Emin = 1e120;
    best_X_ = new double[size_];
    
    for (int i=0; i < n; i++)
    { pre_relax(METHOD_MC);
      double E = minimise_mc(T0, Tmin, Tcoef);
      E_ = E;
      if (echo) std::cout<<"MC "<<i<<" : E="<<E<<"\n";
      if(E_ < Emin) // MC might give the best minima
      { Emin = E_;
        for(int j=0; j< size_; j++)
        { best_X_[j] = X_[j];
        }
      }
      if (method == USE_GR)
      { pre_relax(METHOD_GR);
	minimise_grad(tol);
        if (echo) std::cout<<"GR "<<i<<" : E="<<E_<<"\n";
      }
      if (echo) display();
      if(E_ < Emin)
      { Emin = E_;
	for(int j=0; j< size_; j++)
	{ best_X_[j] = X_[j];
	}
      }
    }
    // restore best value  
    E_ = Emin;
    for(int j=0; j< size_; j++)
    { X_[j] = best_X_[j];
    }
    if (echo) std::cout<<"Best E : "<<Emin<<"\n";
  }
};

#endif
