function [model, train, test, mu, s2, mse, nlpd] = ddmGP_mn(x, y, dd_param, cv_param, xs, ys)
% Domain Decomposition Method for 2-d Gaussian Process regression. The
% ddmGP function is used to train the regression model based on the domain 
% decomposition method (Park et al. 2011) with a very large spatial data,
% or it is used to predict the regression function values at test cases.
% This function only works when the training data is two dimensional. 
%
% Two modes are possible: training or prediction: if no test cases are
% supplied, then the trained GP regression model is provided; If test cases 
% are given, then the predictions on the test cases are returned. Usage:
%
%   training: [model               ] = main_ddm(x,y, dd_param, cv_param);
% prediction: [model mu s2         ] = main_ddm(x,y, dd_param, cv_param, xs);
%         or: [model mu s2 mse nlpd] = main_ddm(x,y, dd_param, cv_param, xs, ys);
%
% where:
%
%   x                 n by 2 matrix of training inputs
%   y                 column vector of length n of training targets
%   dd_param.meshfunc the string name of the function used to decompose the
%                     domain into subdomains.
%   dd_param.mparam   input parameters for meshfunc (e.g. the size of
%                     subdomains)
%   dd_param.p        degrees of freedom for representing each boundary
%                     function
%   dd_param.q        the number of locations to check the continuity of 
%                     predictions on a boundary
%   
%   cv_param.covfunc  prior covariance function 
%   cv_param.local    (optional) 1 (use the local hyperparameters; default), 
%                                0 (use the global one)
%   cv_param.idx      the row indices of the training dataset, which 
%                     corresponds to a subset of the training dataset
%                     used for learning the hyperparameters 
%   cv_param.nIter    (optional) the maximum number of iterations for optimizing the
%                     hyperparameters (default: 50)
%   cv_param.logtheta  (optional) used as hyperparameters of the prior
%                      covariance function. If it is specified, the
%                      hyperparameter learning step is skipped.
%   cv_param.logtheta0 (optional) initial guess of hyperparameters of the
%                      prior covariance function
%   xs             (optional) ns by 2 matrix of test inputs
%   ys             (optional) column vector of length ns of test targets
%
%   model    trained Gaussian process model
%   train    the training time in seconds
%   test     the testing time in seconds
%   mu       column vector (of length ns) of predictive output means
%   s2       column vector (of length ns) of predictive output variances
%   mse      mean squared error computed with the test data (xs, ys)
%   nlpd     negative log predictive density
% 
% Refer to:
%     Park et al. (2011), 'Domain Decomposition Approach for Fast Gaussian 
%     Process Regression of Large Spatial Datasets', Journal of Machine 
%     Learning Research, vol 12.
%
% Copyright (c) by Chiwoo Park, 2011-06-07

global subdomains
global interfaces
global vertices
global affine
global iTk
global N
global M
global V
global dof
global bpts
global T_q
global iT_q2
global comm
global comm_size
global my_rank
global sp_task
global ep_task
global ntask
global mycenter
global nproc

% check if the inputs are valid
if check_field(dd_param, {'meshfunc', 'mparam', 'p', 'q'}) < 4
     error('ddmGP_mn:argChk', 'Some mandatory fields in dd_param are missing.');
end

% check if the inputs are valid
if check_field(cv_param, {'covfunc', 'idx'}) < 2
     error('ddmGP_mn:argChk', 'Some mandatory fields in cv_param are missing.');
end

% default parameter setting
if ~isfield(cv_param, 'local') 
    cv_param.local = 1;
end
if ~isfield(cv_param, 'nIter')
    cv_param.nIter = 50;
end

% check the dimensions of training and test dataset 
if size(x, 2) ~= 2
    error('ddmGP:argChk', 'x must have two columns');
end
if size(x, 1) ~= size(y, 1)
    error('ddmGP:argChk', 'x and y must have the same number of rows');
end
if nargin > 4 
    if size(xs, 2) ~= 2
        error('ddmGP:argChk', 'xs must have two columns');
    end
    if nargin == 6
        if size(xs, 1) ~= size(ys, 1)
            error('ddmGP:argChk', 'xs and ys must have the same number of rows');
        end
    end
end
meshfunc  = dd_param.meshfunc;
mparam    = dd_param.mparam;
dof       = dd_param.p;
bpts      = dd_param.q; 
covfunc   = cv_param.covfunc; 
idx       = cv_param.idx;

% decompose the training dataset following the inputs specified in dd_param
[subdomains, interfaces, vertices, memberFunc, affine] = feval(meshfunc,...
                                                        mparam, x, y, idx);
                                                    
N = size(subdomains,1);
M = size(interfaces,1);
V = size(vertices,1);

% iTk  : generate n points uniformly distributed between x1 and x2; used for
%        generating the points where the continuity of predictor over the boundary
%        is checked.
% T_q  : the global basis functions of Largrange finite element with degree
%        of freedom dof; evaluated at bpts points
% iT_q2: the inverse of T_q 
iTk     = @(x1, x2, n) [x1(1) + (x2(1) - x1(1))*(1:1:(n-1))'/n, x1(2) + ...
                         (x2(2) - x1(2))*(1:1:(n-1))'/n];
T_q     = LargrangeFE(((0:bpts)/bpts)', dof);
iT_q2   = pinv(T_q);

% Get size and rank.
comm_size = MPI_Comm_size(comm);
my_rank   = MPI_Comm_rank(comm);

% parallelize the loop with the size of N
[sp_task, ep_task, ntask] = splitTask(N, comm_size);
[mycenter, nproc] = getnodeinfo();

%% training
tic;
% train the global hyperparameter if necessary
disp('learning hyperparamters....');
if cv_param.local == 0
    cv_param.logtheta = cv_param.logtheta0;
else
    if ~isfield(cv_param, 'logtheta')
       cv_param.logtheta = minimize(cv_param.logtheta0, 'loglikelihood_ddm_mn', ...
                                    -cv_param.nIter, covfunc, subdomains); 
    end
end

% perform precomputation necessary for training local models
disp('performing local precomputations....');
names = {'logtheta', 'isigK', 'h_i', 'yKy'};
nFields = size(names, 2);
localData = cell(ntask(my_rank+1), nFields+1);
for i = sp_task(my_rank+1):ep_task(my_rank+1)
    sd = ddm_local(subdomains{i}, cv_param);
    subdomains{i} = sd;
    task_id = i - sp_task(my_rank+1) + 1;
    for j = 1:nFields
        localData{task_id, j} = sd.(names{j});
    end
    localData{task_id, nFields+1} = i;
end 
% send local computation results to a master node for update
collectLocalDataToCenter('11000', localData, names);

% estimate the boundary values
disp('learning boundary values');
r = ddm_bound_mn(covfunc);

% train a local model for each subdomain
disp('learning local models');
for i = sp_task(my_rank+1):ep_task(my_rank+1)
    sd     = subdomains{i};
    n_intf = sd.int_idx(length(sd.int_idx));
    qvalue = zeros(1, sd.q);
    for j = 1:n_intf
        k = sd.int_idx(j);
        interface = interfaces{k};
        qvalue(sd.gx_block{j}) = r(interface.rowI, 1)' * T_q';
    end
    sd.qvalue = qvalue;
    sd.S  = (qvalue - sd.h_i' * sd.K_iq) * sd.G_i;
    sd.v  = sd.h_i * sd.S / sd.yKy;
    subdomains{i,1} = sd;
end
train = toc;

%collectLocalDataToCenter2('99000', 'success!');

% compile the trained model
model.subdomains = subdomains;
model.dd_param   = dd_param;
model.cv_param   = cv_param;
model.memberFunc = memberFunc;

% prediction at test inputs
if nargin > 4
    disp('testing....');
    if (my_rank ~= 0)
        ddm_pred_mn(xs, model);
    else
        ns = size(xs,1);
        [mu, s2, test] = ddm_pred_mn(xs, model);
        if nargin > 5  %compute MSE & NLPD
            se   = (ys - mu)' * (ys - mu);
            nlpd = sum(0.5*(log(2*pi*s2) + ((ys - mu).^2)./s2));
            mse  = se / ns;
            nlpd = nlpd / ns;
        end
    end
end