function [model, train, test, mu, s2, mse, nlpd] = picGP(x, y, param, xs, ys)
%
% This function is the implementation of 'Partial Independent Conditional'
% proposed by Snelson et al in the following paper: 
%  
% E. Snelson and Z. Ghahramani (2007), 'Local and global sparse Gaussian 
% process approximations,' In International Conference on Artifical 
% Intelligence and Statistics 11, pages 524531. 
%
% Two modes are possible: training or prediction: if no test cases are
% supplied, then the trained GP regression model is provided; If test cases 
% are given, then the predictions on the test cases are returned. Usage:
%
%   training: [model train test               ] = picGP(x,y, param);
% prediction: [model train test mu s2         ] = picGP(x,y, param, xs);
%         or: [model train test mu s2 mse nlpd] = picGP(x,y, param, xs, ys);
% where
%     
%   x                 n by 2 matrix of training inputs
%   y                 column vector of length n of training targets
%   
%   param.M           the number of pseudo inputs
%   param.K           the number of local regions
%   param.frachyper   (optional) the fraction of the training dataset used
%                     used for learning the hyperparameters (range:0.0-1.0, 
%                     default: 1.0)
%   param.nIter       (optional) the maximum number of iterations for 
%                     optimizing the hyperparameters (default: 50)
%   param.logtheta0   initial guess of hyperparameters of the prior
%                     covariance function
%   xs                (optional) ns by 2 matrix of test inputs
%   ys                (optional) column vector of length ns of test targets
%
%   model    trained Gaussian process model
%   train    the training time in seconds
%   test     the testing time in seconds
%   mu       column vector (of length ns) of predictive output means
%   s2       column vector (of length ns) of predictive output variances
%   mse      mean squared error computed with the test data (xs, ys)
%   nlpd     negative log predictive density
%
% Copyright (c) by Chiwoo Park, 2011-06-07

% check if the inputs are valid
if check_field(param, {'M', 'K', 'logtheta0'}) < 3
     error('picGP:argChk', 'Some mandatory fields in param are missing.');
end

% default parameter setting
if ~isfield(param, 'nIter')
    param.nIter = 50;
end
if ~isfield(param, 'frachyper')
    param.frachyper = 1;
end

M = param.M;
K = param.K;
loghyper= param.logtheta0;
[N, d] = size(x);

% initialize pseudo-inputs to a random subset of training inputs
[dum, idx] = sort(rand(N,1)); clear dum;
I = idx(1:M);
xb_init = x(I, :);  mask = zeros(N, 1);
mask(idx(1:round(N*param.frachyper))) = 1;
idx = logical(mask);
param.idx = idx;

%partition
[subdomains, cpt] = kmeansMesh(K, x, y, idx);

% initialize hyperparameters (pseudo inputs + log hyperparameters)
w_init = [reshape(xb_init,M*d,1);loghyper];

disp('learning....');
%options.MaxIter = param.nIter;
%nll_fuc = @(w) pic_lik(w, x, subdomains, param);
tic;
% optimize hyperparameters and pseudo-inputs
%w = fminunc(nll_fuc, w_init, options);
%w = minimize(w_init,'pic_lik_grad',-param.nIter, x, subdomains, param);
w = minimize(w_init, 'spgp_lik_fic',-param.nIter,y(idx, :),x(idx, :),M);

param.hyp = w(M*d+1:end,1);
param.xb  = reshape(w(1:M*d,1),M,d);
% learn pic model 
[model, subdomains] = learn_pic(x, subdomains, param);
train = toc;

model.subdomains = subdomains;
model.param = param;
model.cpt   = cpt;   % cluster centers

% prediction at test inputs
if nargin > 3
    disp('testing....');
    [mu,s2,test] = pic_pred(xs, model);
    ns = size(xs, 1);
    if nargin > 4  %compute MSE & NLPD
    	se   = (ys - mu)' * (ys - mu);
        nlpd = sum(0.5*(log(2*pi*s2) + ((ys - mu).^2)./s2));
        mse  = se / ns;
        nlpd = nlpd / ns;
    end
end
