% ROC examination for skylines in batch
clearvars; clc;
close all; tic;

% Assumptions and notes
% - expects all data in a single batch folder
% - single shift binary hypothesis problem
% - select over skyline segment number using robust criteria
% - test with several criteria: MDL, BIC, likelihood

% Aditional plotting/partition package
addpath(genpath('/Users/kp10/Documents/MATLAB'));

% Set figure defaults
set(groot, 'defaultAxesTickLabelInterpreter', 'latex');
set(groot, 'defaultLegendInterpreter', 'latex');
set(0, 'defaultTextInterpreter', 'latex');
set(0, 'defaultAxesFontSize', 16);
grey1 = 0.8*ones(1, 3); grey2 = 0.5*ones(1, 3);

% Home and folder to save/load
savetrue = 0;
folload = 'shift_50';
folsave = 'rocData';

% Metrics to be evaluated
metrics = {'$-\log L$', 'BIC', 'FIA', 'QK', 'AIC'};
nmet = length(metrics);

% Folder with all data from R sims
thisDir = cd;
cd(['using phylodyn/' folload '/']);
% True shift time and models
tshift = csvread('tshift.csv');
frac = csvread('frac.csv');
M = csvread('numTraj.csv');
% No. coalescent events
nc = csvread('nc.csv');
% Samples introduced at each sample time
sampIntro = csvread('sampIntro.csv');
cd(thisDir);

% Set group sizes possible (non-repeated)
k = 1:nc; k = k(rem(nc, k) == 0); 
k = k(k >= 4); lenk = length(k);
%k = k(end-1:end); lenk = 2;

%% Load each file and perform model selection

% Store frac of change, best models and criteria
criteria = cell(1, M); nGrp = criteria;
kbest = zeros(M, nmet); nGrpbest = kbest;

for i = 1:M
    % Main function for selection models
    [criteria{i}, kbest(i, :), nGrpbest(i, :), nGrp{i}]= rocSkyFn2(i, folload, k, lenk, sampIntro);
    disp(['Completed ' num2str(i) ' of ' num2str(M)]);
end

% Null model indices and true nGrp
modTrueID = ones(size(frac));
modTrueID(frac == 1) = 0;
nGrpTrue = modTrueID + 1;

% Boolean for if metric selected the correct model
boolMet = nGrpbest == nGrpTrue;
Psucc = sum(boolMet)/M;
% Rank from best to worst
[~, rankid] = sort(Psucc);
ranks = metrics(rankid); 
ranks = ranks(end:-1:1);
disp(ranks);

% No. alternative model choices
nPos = sum(modTrueID);
% Null model
nNeg = M - nPos;
% Classification performance
TPR = zeros(1, nmet); FPR = TPR;
C = cell(1, nmet); dimC = TPR;
for i = 1:nmet
    % Confusion matrix and dimension
    Ctemp = confusionmat(nGrpTrue, nGrpbest(:, i));
    C{i} = Ctemp; dimC(i) = length(Ctemp);
    % True and false positive rate
    TPR(i) = Ctemp(2, 2)/nPos; 
    FPR(i) = 1 - Ctemp(1, 1)/nNeg;
end

% Log time
tsim = toc/60;
disp(['Execution time: ' num2str(tsim) ' mins']);