% Model selection over skyline segments
function [fracTrue, criteria, kbest, nGrpbest, k, nGrp]= rocSkyFn(trajVal, folload)

% Assumptions and notes
% - known change-point, at which half samples introduced
% - two models to choose from - Kingman and a single change
% - works directly in log population size
% - test with several criteria: FIA, BIC, AIC, likelihood

%% Simulated coalescent data

% Possible trajectories to select
trajChoice = [folload num2str(trajVal)];

% Set data source
dataStr = folload;
% Read data generated from phylodyn package in R
thisDir = cd;
cd(['using phylodyn/' dataStr '/' trajChoice]);

% Coalescent and sample times
tcoal = csvread('coaltimes.csv');
tsamp = csvread('samptimes.csv');
% Lineages driving each coalescent
coalLin = csvread('coalLin.csv');
% Samples introduced at each sample time
sampIntro = csvread('sampIntro.csv');
% True trajectory type
fracTrue = csvread('frac.csv');
cd(thisDir);

% Combine coalescent and sample times
tLin = sort([tcoal' tsamp']);
len = length(tLin);

% Define num of samples and coalescents
nc = length(tcoal); ns = length(tsamp);
% Get whether a coalescent or sample time (complementary sets)
isamp = ismember(tLin, tsamp);
icoal = ismember(tLin, tcoal);

%% Heterochronous LTT construction

% Construct LTT, must start with sample
nLin = zeros(size(tLin));
nLin(1) = sampIntro(1);
% Check started with at least 2 samples
if nLin(1) < 2
    error('Started with under 2 samples');
end

% Counters for samples and coalescents
c_coal = 0; c_samp = 1;
for j = 2:len
    if isamp(j)
        % Sample event has occurred
        c_samp = c_samp + 1;
        nLin(j) = nLin(j-1) + sampIntro(c_samp);
    else
        % Coalescent event has occurred
        c_coal = c_coal + 1;
        nLin(j) = nLin(j-1) - 1;
    end
end
% Lineages that drive stated events (nLin is after events originally)
nLinPre = nLin;
nLinPre(icoal) = nLinPre(icoal) + 1;
nLinPre(isamp) = nLinPre(isamp) - sampIntro';

% Check have lineage pre-post relationship right
if ~ all(nLinPre(2:end) == nLin(1:end-1))
    error('The nLin and nLinPre relationship is wrong');
end
% Check that all events are used and that lineages match
if c_samp ~= ns || c_coal ~= nc || ~all(coalLin' == nLinPre(icoal))
    error('Computed LTT incorrectly');
end

% Number of intervals and lengths (times)
dtLin = diff(tLin); 
% The lineage count over dtLin(i) is nLinPre(i)
lendt = length(dtLin); 
% Cumulative sum of coalescents
icoalSum = cumsum(icoal);

%% Construct grouped skyline estimator

% Set group sizes (non-repeated)
k = 1:nc; k = k(rem(nc, k) == 0); 
k = k(k >= 4); lenk = length(k);

% Compute components for grouped skyline on each interval
alpha = 0.5*nLin(1:lendt).*(nLin(1:lendt) - 1);
compCoal = alpha.*dtLin;
% Max for FIA volume
Nmax = 10^9; V = log(Nmax);

% Coalescent events, skyline and ids for each grouping
c = cell(1, lenk); logNavg = c; tEnd = c; 
likSeg = c; nGrp = zeros(1, lenk); idGrp = c;
% Model selection criteria for each grouping
lik = nGrp; fia = nGrp; bic = nGrp; qian = nGrp; aic = nGrp;

% Skyline plots for various k, and their log-likelihoods
for i = 1:lenk
    % Sklyine MLE function
    [logNavg{i}, c{i}, nGrp(i), tEnd{i}, idGrp{i}] = getGrpSkyLog(k(i), compCoal, nc,...
        icoal, icoalSum, dtLin, tLin);
    % Negative max log-likelihood and selection criteria
    [lik(i), likSeg{i}, fia(i), bic(i), qian(i), aic(i)] = getLogNModSel2(nc, nGrp(i),...
        idGrp{i}, V, compCoal, icoal);
end

% Combine criteria, get best selected model id
criteria = [-lik', bic', fia', qian', aic']; 
[~, modID] = min(criteria);
% Best k and nGrp for skyline selection
kbest = k(modID); nGrpbest = nGrp(modID);

