function [model, train, test, mu, s2, mse, nlpd] = lprGP(x,y, param, xs, ys)
%
% This function is the implementation of the local probabilistic regression
% approach for online Gaussian process regression proposed by R. Urtasun 
% and T. Darrell (2008), which is published at 
%
% Raquel Urtasun and Trevor Darrell, 2008, 'Sparse probabilistic regression 
% for activity-independent human pose inference,' In IEEE Conference on 
% Computer Vision and Pattern Recognition 2008, pages 18.
%
% Two modes are possible: training or prediction: if no test cases are
% supplied, then the trained GP regression model is provided; If test cases 
% are given, then the predictions on the test cases are returned. Usage:
%
%   training: [model train test               ] = lprGP(x,y, param);
% prediction: [model train test mu s2         ] = lprGP(x,y, param, xs);
%         or: [model train test mu s2 mse nlpd] = lprGP(x,y, param, xs, ys);
%
% where:
%
%   x                 n by 2 matrix of training inputs
%   y                 column vector of length n of training targets
%
%   param.T           number of local experts
%   param.R           number of random sites chosen for local 
%                     hyperparameter learning
%   param.S           number of trainig inputs allocated to each local expert
%   param.covfunc     prior covariance function 
%   param.nIter       (optional) the maximum number of iterations for optimizing the
%                     hyperparameters (default: 50)
%   param.logtheta0   initial guess of hyperparameters of the prior 
%                     covariance function
%   xs                (optional) ns by 2 matrix of test inputs
%   ys                (optional) column vector of length ns of test targets
%
%   model    trained Gaussian process model
%   train    the training time in seconds
%   test     the testing time in seconds
%   mu       column vector (of length ns) of predictive output means
%   s2       column vector (of length ns) of predictive output variances
%   mse      mean squared error computed with the test data (xs, ys)
%   nlpd     negative log predictive density 
%
% Copyright (c) by Chiwoo Park, 2011-06-07

% check if the inputs are valid
if check_field(param, {'covfunc', 'T', 'R', 'S', 'logtheta0'}) < 5
     error('lprGP:argChk', 'Some mandatory fields in param are missing.');
end

% default parameter setting
if ~isfield(param, 'nIter')
    param.nIter = 50;
end

covfunc = param.covfunc;
logtheta0 = param.logtheta0;
R = param.R;
S = param.S;

disp('Warning: this algorithm may take a long time to complete (e.g., several hours) for analyzing a large dataset.');

% identify R cluster center locations 
disp('obtaining center locations of clusters...');
%[blockid, cpt] = kmeans(x, R);
w       = range(x);
num     = ceil(sqrt(R / (max(w)/min(w))));
spacing = round(min(w)/num);
x1_grid = min(x(:,1))+round(spacing/2):spacing:max(x(:,1))-round(spacing/2);
x2_grid = min(x(:,2))+round(spacing/2):spacing:max(x(:,2))-round(spacing/2);
[a,b]   = meshgrid(x1_grid, x2_grid);
R       = size(a,1) * size(a,2);
cpt     = [a(:) b(:)];

% 
ver = version('-release');
if isempty(str2num(ver(1:4)))
    KdtreeInMatlab = 0;
else
    KdtreeInMatlab = (str2num(ver(1:4)) >= 2011);
end
    
% OFFLINE: Learning hyperparameters
disp('learning hyperparameters...');
if KdtreeInMatlab 
    Tree_x = KDTreeSearcher(x,'Distance','euclidean');
else
    Tree_x = kdtree_build( x );
end
tic;
logtheta = cell(R, 1);
Y_R = zeros(R, 1);
X_R = zeros(R, 2);
for n = 1:R
    if KdtreeInMatlab
        i   = knnsearch(Tree_x, cpt(n, :),'k',1);
        idx = knnsearch(Tree_x, x(i,:),'k',S);  
    else
        i   = kdtree_nearest_neighbor(Tree_x, cpt(n, :));
        idx = kdtree_k_nearest_neighbors(Tree_x, x(i,:)', S);
    end
    logtheta{n} = minimize(logtheta0, 'loglikelihood', -param.nIter, ...
                                     covfunc, x(idx, :), y(idx, :));
    Y_R(n) = y(i);
    X_R(n, :) = x(i, :);
end

if KdtreeInMatlab
    Tree_r = KDTreeSearcher(Y_R,'Distance','euclidean');
else
    Tree_r = kdtree_build(Y_R); %Construct a KD Tree 
end
train = toc;
model.Tree_x = Tree_x;
model.Tree_r = Tree_r;
model.param  = param;
model.logtheta = logtheta; 
model.x = x; model.y = y; 

% ONLINE: Inference for test point x_t
if nargin > 3
    disp('testing....');
    [mu,s2,test] = lpr_pred(xs, model);
    ns = size(xs, 1);
    if nargin > 4  %compute MSE & NLPD
    	se   = (ys - mu)' * (ys - mu);
        nlpd = sum(0.5*(log(2*pi*s2) + ((ys - mu).^2)./s2));
        mse  = se / ns;
        nlpd = nlpd / ns;
    end
end
