%{ 
Updated 14 Nov 2022

Published with Nuber et al (in review), Sea Level controls on Agulhas Leakage 
Salinity and the Atlantic Overturning Circulation. Nature.

Requires Nuber22_coredata.xlsx

This script extracts data from the spreadsheet Nuber22_coredata.xlsx and
splices the records together. Then n new simulated records are generated by
re-sampling by replacement at each true age point, with weights applied to 
the sampling to account for proximity to the given age point and the
relative occurrence of each original core within the sampling window.

Final processed data is output to alldata_'date'

%}
clear all
close all

n=1000; %Number of sampling iterations

filename='Nuber22_coredata.xlsx'; %import file
opts = detectImportOptions(filename);
sheets = sheetnames(filename); %core names
window=15; %smoothing window size in kyr
sigma=window/5; %for gaussian smooth

%extract data
  for i=1:numel(sheets)
  dat=readtable(filename, 'Sheet',sheets(i)); 
  data{i}=dat; 
  end

  %pre-allocate cells 
  SST=cell(1,numel(sheets));
  d18O=SST;  
  SSS=SST;  

for i=1:numel(sheets)
    %remove NaNs and separate SST, d18O and SSS data
    SST{i}=[data{i}.Age(~isnan(data{i}.SST)),data{i}.SST(~isnan(data{i}.SST))];
    d18O{i}=[data{i}.d18Oage(~isnan(data{i}.d18O)),data{i}.d18O(~isnan(data{i}.d18O))];
    SSS{i}=[data{i}.SSSage(~isnan(data{i}.SSS)),data{i}.SSS(~isnan(data{i}.SSS))];
    %z scores
    SST{i}(:,3)=(SST{i}(:,2)-mean(SST{i}(:,2)))./std(SST{i}(:,2));
    d18O{i}(:,3)=(d18O{i}(:,2)-mean(d18O{i}(:,2)))./std(d18O{i}(:,2));
    SSS{i}(:,3)=(SSS{i}(:,2)-mean(SSS{i}(:,2)))./std(SSS{i}(:,2));
end

% Random age error as per Spratt and Lisieki (2016)
%create matrix of random modifiers.
xrand=[-2, -1, 0, 1, 2]; 
randage=reshape(randsample(xrand, n*numel(sheets), true), numel(sheets), []);


%% d18O
%splice data
d18Oall_0=[];
for i=1:numel(d18O)
    d18Oall_0=[d18Oall_0; [d18O{i}(:, 1:3), repmat(i, size(d18O{i}(:, 1:3), 1), 1)]];
end
d18Oall_0=sortrows(d18Oall_0);

%slightly adjust times so that no times are identical
for i=2:numel(d18Oall_0(:, 1))
    idx=find(ismember(d18Oall_0(:, 1), d18Oall_0(i, 1)));
    if numel(idx)>1
        d18Oall_0(idx, 1)=d18Oall_0(idx, 1)+idx*1E-9;
    end
end

%pre-allocate final bootstrapping output
d18Obootstat=nan(size(d18Oall_0, 1), n);
d18Obootstatz=d18Obootstat;

%% SST
%splice data
SSTall_0=[];
for i=1:numel(SST)
    SSTall_0=[SSTall_0; [SST{i}(:, 1:3), repmat(i, size(SST{i}(:, 1:3), 1), 1)]];
end
SSTall_0=sortrows(SSTall_0);

%slightly change times so that no times are identical
for i=2:numel(SSTall_0(:, 1))
    idx=find(ismember(SSTall_0(:, 1), SSTall_0(i, 1)));
    if numel(idx)>1
        SSTall_0(idx, 1)=SSTall_0(idx, 1)+idx*1E-9;
    end
end

%pre-allocate final bootstrapping output
SSTbootstat=nan(size(SSTall_0, 1), n);
SSTbootstatz=SSTbootstat;


%% SSS

%Splice data
SSSall_0=[];
for i=1:numel(SSS)
    SSSall_0=[SSSall_0; [SSS{i}(:, 1:3), repmat(i, size(SSS{i}(:, 1:3), 1), 1)]];
end
SSSall_0=sortrows(SSSall_0);

%slightly change times so that no times are identical
for i=2:numel(SSSall_0(:, 1))
    idx=find(ismember(SSSall_0(:, 1), SSSall_0(i, 1)));
    if numel(idx)>1
        SSSall_0(idx, 1)=SSSall_0(idx, 1)+idx*1E-9;
    end
end

%pre-allocate final bootstrapping output
SSSbootstat=nan(size(SSSall_0, 1), n);
SSSbootstatz=SSSbootstat;


for i=1:n
    %% d18O bootstrapping
              
    %Apply random age error
    d18Ox=d18Oall_0(:, 1);
    for j=1:numel(d18O)
        d18Ox(d18Oall_0(:, 4)==j)=d18Ox(d18Oall_0(:, 4)==j)+randage(j, i);
    end

    %re-sort data after random age error 
    d18Oall=d18Oall_0;
    d18Oall(:, 1)=d18Ox;   
    d18Oall=sortrows(d18Oall);
    d18Ox=d18Oall(:, 1);
    d18Oy=d18Oall(:, 2);
    d18Oz=d18Oall(:, 3);
    d18Oid=d18Oall(:, 4);

    %Cycle through every data point to create new simulated record
    for j = 1:numel(d18Ox)
        
        %define smoothing window in age and index space
        minx=d18Ox(j)-window/2;
        maxx=d18Ox(j)+window/2;
        windowindx=find(d18Ox>=minx & d18Ox<=maxx);

        %Skip when no other data in window.
        if numel(d18Ox(windowindx))<2            
            continue
        end

        %normalize the age so that the data point is at zero (centre of
        %gaussian)
        relativex=d18Ox(windowindx)-d18Ox(j);
        weights=zeros(numel(d18Ox(windowindx)), 2);
         %gaussian smoothing weights
        for k=1:numel(relativex)
            weights(k, 1)=(1/(2*pi*sigma^2)^0.5)*exp((-relativex(k)^2)/(2*sigma^2));
        end
        %normalise weights
        weights(:, 1)=weights(:, 1)./sum(weights(:, 1));

        % weights based on samples per core
        for k=min(d18Oid(windowindx)):max(d18Oid(windowindx))
            corenum=numel(unique(d18Oid(windowindx)));
            id=ismember(d18Oid(windowindx), k);
            weights(id, 2)=corenum/sum(id);
        end
        %normalise weights 
        weights(:, 2)=weights(:, 2)./sum(weights(:, 2));

        %combine the weights
        prodweights=weights(:, 1).*weights(:, 2);
        prodweights=prodweights./sum(prodweights);
        %Apply weights to the data to smooth
        d18Obootstat(j, i) = mean(randsample(d18Oy(windowindx),numel(d18Oy(windowindx)),true,prodweights));
        d18Obootstatz(j, i) = mean(randsample(d18Oz(windowindx),numel(d18Oz(windowindx)),true,prodweights));
    end


    %% SST

    %Apply random age error
    SSTx=SSTall_0(:, 1);
    for j=1:numel(SST)
        SSTx(SSTall_0(:, 4)==j)=SSTx(SSTall_0(:, 4)==j)+randage(j, i);
    end

    %re-sort data after random age error 
    SSTall=SSTall_0;
    SSTall(:, 1)=SSTx;
    SSTall=sortrows(SSTall);
    SSTx=SSTall(:, 1);
    SSTy=SSTall(:, 2);
    SSTz=SSTall(:, 3);
    SSTid=SSTall(:, 4);
    
    %Cycle through every data point to create new simulated record
    for j = 1:numel(SSTx)

        %define smoothing window in age and index space
        minx=SSTx(j)-window/2;
        maxx=SSTx(j)+window/2;
        windowindx=find(SSTx>=minx & SSTx<=maxx);
        
        %Skip when no other data in window.
        if numel(SSTx(windowindx))<2           
            continue
        end

        %normalize the age so that the data point is at zero (centre of
        %gaussian)
        relativex=SSTx(windowindx)-SSTx(j);
        weights=zeros(numel(SSTx(windowindx)), 2);
        for k=1:numel(relativex)
            %gaussian smoothing weights
            weights(k, 1)=(1/(2*pi*sigma^2)^0.5)*exp((-relativex(k)^2)/(2*sigma^2));
        end
        %normalise weights
        weights(:, 1)=weights(:, 1)./sum(weights(:, 1));

        % weights based on samples per core
        for k=min(SSTid(windowindx)):max(SSTid(windowindx))
            corenum=numel(unique(SSTid(windowindx)));
            id=ismember(SSTid(windowindx), k);
            weights(id, 2)=corenum/sum(id);
        end
        %normalise weights
        weights(:, 2)=weights(:, 2)./sum(weights(:, 2));
        %combine weights
        prodweights=weights(:, 1).*weights(:, 2);
        prodweights=prodweights./sum(prodweights);
        %Apply weights to the data to smooth
        SSTbootstat(j, i) = mean(randsample(SSTy(windowindx),numel(SSTy(windowindx)),true,prodweights));
        SSTbootstatz(j, i) = mean(randsample(SSTz(windowindx),numel(SSTz(windowindx)),true,prodweights));
    end


    %% SSS

    %Apply random age error
    SSSx=SSSall_0(:, 1);   
    for j=1:numel(SSS)
        SSSx(SSSall_0(:, 4)==j)=SSSx(SSSall_0(:, 4)==j)+randage(j, i);
    end
   
    %re-sort data after random age error 
    SSSall=SSSall_0;
    SSSall(:, 1)=SSSx;
    SSSall=sortrows(SSSall);
    SSSx=SSSall(:, 1);
    SSSy=SSSall(:, 2);
    SSSz=SSSall(:, 3);
    SSSid=SSSall(:, 4);

    %Cycle through every data point to create new simulated record
    for j = 1:numel(SSSx)

         %define smoothing window in age and index space
        minx=SSSx(j)-window/2;
        maxx=SSSx(j)+window/2;
        windowindx=find(SSSx>=minx & SSSx<=maxx);

         %Skip when no other data in window.
        if numel(SSSx(windowindx))<2
            continue
        end

        %normalize the age so that the data point is at zero (centre of
        %gaussian)
        relativex=SSSx(windowindx)-SSSx(j);
        weights=zeros(numel(SSSx(windowindx)), 2);
        for k=1:numel(relativex)
            %gaussian smoothing weights
            weights(k, 1)=(1/(2*pi*sigma^2)^0.5)*exp((-relativex(k)^2)/(2*sigma^2));
        end
        %normalise weights
        weights(:, 1)=weights(:, 1)./sum(weights(:, 1));

        % weights based on samples per core
        for k=min(SSSid(windowindx)):max(SSSid(windowindx))
            corenum=numel(unique(SSSid(windowindx)));
            id=ismember(SSSid(windowindx), k);
            weights(id, 2)=corenum/sum(id);
        end
        %normalise weights
        weights(:, 2)=weights(:, 2)./sum(weights(:, 2));
        %combine weights
        prodweights=weights(:, 1).*weights(:, 2);
        prodweights=prodweights./sum(prodweights);
        %Apply weights to the data to smooth
        SSSbootstat(j, i) = mean(randsample(SSSy(windowindx),numel(SSSy(windowindx)),true,prodweights));
        SSSbootstatz(j, i) = mean(randsample(SSSz(windowindx),numel(SSSz(windowindx)),true,prodweights));
    end
    disp([num2str(i/1000*100), '%'])
end

d18Oymean=mean(d18Obootstat, 2);
d18Oymeanstd=std(d18Obootstat, 0, 2);
d18Ozmean=mean(d18Obootstatz, 2);
d18Ozmeanstd=std(d18Obootstatz, 0, 2);

SSTymean=mean(SSTbootstat, 2);
SSTymeanstd=std(SSTbootstat, 0, 2);
SSTzmean=mean(SSTbootstatz, 2);
SSTzmeanstd=std(SSTbootstatz, 0, 2);

SSSymean=mean(SSSbootstat, 2);
SSSymeanstd=std(SSSbootstat, 0, 2);
SSSzmean=mean(SSSbootstatz, 2);
SSSzmeanstd=std(SSSbootstatz, 0, 2);


writematrix([SSTall_0(:, [1:2, 4]), SSTymean, SSTymeanstd*2, SSTzmean, SSTzmeanstd*2],['alldata_' date '.xlsx'],'Sheet','SST','Range','A2')
writematrix(["Age", "SST_data", "coreid", "SST_stack", "SST_95CI", "SSTz_stack", "SSTz_95CI"],['alldata_' date '.xlsx'],'Sheet','SST','Range','A1')
writematrix([SSSall_0(:, [1:2, 4]), SSSymean, SSSymeanstd*2, SSSzmean, SSSzmeanstd*2],['alldata_' date '.xlsx'],'Sheet','SSS','Range','A2')
writematrix(["Age", "SSS_data", "coreid", "SSS_stack", "SSS_95CI", "SSSz_stack", "SSSz_95CI"],['alldata_' date '.xlsx'],'Sheet','SSS','Range','A1')
writematrix([d18Oall_0(:, [1:2, 4]), d18Oymean, d18Oymeanstd*2, d18Ozmean, d18Ozmeanstd*2],['alldata_' date '.xlsx'],'Sheet','d18O','Range','A2')
writematrix(["Age", "d18O_data", "coreid", "d18O_stack", "d18O_95CI", "d18Oz_stack", "d18Oz_95CI"],['alldata_' date '.xlsx'],'Sheet','d18O','Range','A1')