%% correlate gap-filling orders and biomasses to abundances in experiments
options

modelDir = fullfile('data/gap-filling/iterative');

figOutDir = 'figures';
methods = {'carveme','gapseq','kbase','consensus'}

% load medium that has been used for gap filling
load(mediumFile)

for i=1:numel(habitat)
    disp(habitat{i})

    for j=1:numel(methods)
        disp(methods{j})
    
        % load gap-filled models
        load(fullfile(modelDir, habitat{i}, [methods{j},'.mat']));
        n = numel(GF);
    
        % load the abundances for each OTU 
        otuFile = fullfile(otuDir, habitat{i}, '/',experiments{1},'/otutab.txt');
        otuTab = readAbundancesFromFile(otuFile);
    
        % sort by the order of models in GF
        model_ids = cellfun(@(x)strtok(x.id), GF,...
            'UniformOutput', false);
        otuTab = sortrows(otuTab, 'Row');
        [~, ia] = sortrows(otuTab, 'abundances', 'descend');
      
        % ~~~ number of added reactions vs abundance ~~~ %
    
        added = cell(n, 1);
        for k=1:n
            % find added reactions
            added{k} = sum(cellfun(@(x)isequal(x,'gf'), GF{k}.rxnNotes));
        end
    
        [rho, pval] = corr(ia, cell2mat(added), 'type', 'Spearman');
    
        fprintf('Spearman rank correlation abundance, #added: %.2f (p=%.2f)\n', rho, pval)
    
    % ~~~ number of exported metabolites vs abundance ~~~ %
    
    exported = cell(n, 1);
    for k=1:n
        % find exported metabolites sink reactions
        exp = regexp(GF{k}.rxns(contains(GF{k}.rxns, 'sink_')),...
            'MNXM\d+\[e]', 'match');
        exp = [exp{:}]';
        exported{k} = numel(setdiff(exp, medium));
    end
    
    [rho, pval] = corr(ia, cell2mat(exported), 'type', 'Spearman');
    
    fprintf('Spearman rank correlation abundance, #exported: %.2f (p=%.2f)\n', rho, pval)
    
    % ~~~ number of imported metabolites vs abundance ~~~ %
    
    imported = cell(n, 1);
    for k=1:n
        % find imported metabolites from exchange reactions
                imp = regexp(GF{k}.rxns(contains(GF{k}.rxns, 'EX_')),...
            'MNXM\d+\[e]', 'match');
        imp = [imp{:}]';
        imported{k} = numel(setdiff(imp, medium));
    end
    
    [rho, pval] = corr(ia, cell2mat(imported), 'type', 'Spearman');
    
    fprintf('Spearman rank correlation abundance, #imported: %.2f (p=%.2f)\n', rho, pval)
    
    savingfile = [otuTab,added,imported,exported];
    savingfile.Properties.VariableNames = {'abundance','added','imported','exported'};
    
    disp('Saving table');

    writetable(savingfile,...
        [figOutDir, filesep, habitat{i},'_',methods{j},'_gf_vs_abundance.txt'],...
        'WriteVariableNames', true, 'WriteRowNames', true,... 
        'Delimiter', '\t')
    end
end
