%-------------------------------------------------------------------------------------------------------
% PURPOSE: "Backtesting Marginal Expected Shortfall and Related Systemic Risk Measures"
%           Banulescu, Hurlin, Leymarie, and Scaillet (2019)
%-------------------------------------------------------------------------------------------------------
% LRMES, SRISK backtesting exercises at monthly horizon h=22 (medium-term) for the 95 firms 
% (using a GJR-DCC model with normal innovations)
%-------------------------------------------------------------------------------------------------------
% J. Leymarie, September 25, 2017.
% LEO, University of Orleans
%-------------------------------------------------------------------------------------------------------
% Notes : 
% - The code is indiced according to time (Ex : year=2005 and month=1 means that test statistics are computed 
%   using data available until jan. 2005).
% - The excel file "ref_ticker.xlsx" reports the ticker reference associated with each firm=1,...,95 on the 
%   excel database "bdd_for_matlab.xlsx".
% - The parameters of the GJR-DCC model are estimated with the MFE Toolbox.
% - The outputs of this code are SRISK & LRMES estimations, Ht violation process, test statistics, and pvalues.
% - The overlapping of the daily OOS observations can be modifed via the variable 'd_shift'.
% - Becauce the distribution of the cumulated return is unknown, we use 100,000 simulated daily returns
% - As there is overlapping (mixing Ht) :
%    * we use an estimator of the long run variance for the UC test instead of theoretical variance
%    * the IND test statistic is not valid
% - 2 estimation scheme :
%   * recursive estimation scheme as in Brownlees and Engle (2017) (rw is set to 0) 
%   * rolling window estimation scheme (rw is set to 1) 
%-------------------------------------------------------------------------------------------------------

clear ; clc ; close all ;

%================%
%== Parameters ==%
%================%

filename='bdd_for_matlab.xlsx';                       % Excel file 

database=xlsread(filename);                           % Data importation 

nb_firm=95;                                           % Number of firm

h=22;                                                 % Horizon

d_shift=11;                                           % Daily shift (d_shift=1 => 1-day Yt shifting, d_shift=h => no overlapping)

nb_pseudo=100000;                                     % Number of simulated sample for pdf & cdf approximation

k=0.08;                                               % Prudential capital fraction k

rw=1;                                                 % 1 if rolling window estimation scheme, 0 if recursive estimation scheme

rw_size=500;                                          % Size of the in-sample rolling windows (if rw=1)

N=100;                                                % Out-of-sample size for the empirical application

first_year=2005;                                      % First year of backtesting computation

last_year=2016;                                       % Last year of backtesting computation

alpha=0.05;                                           % alpha level for quantile of Y2

options=optimset('Display','off');                    % Options

%======================%
%=== Initialization ===%
%======================%

uc_risk=NaN((last_year-first_year+1)*12,nb_firm);     % UC statistics with estimation risk

p_uc_risk=uc_risk;                                    % UC p_values with estimation risk

uc_c_hac=uc_risk; p_uc_c_hac=uc_risk;                 % Mixing corrected UC test statistics 

H_global=NaN(N,(last_year-first_year+1)*12,nb_firm);  % Cumulative joint violation process Ht

theta_global=NaN(11,(last_year-first_year+1)*12,nb_firm);% Estimated parameters

V_asy_global=NaN(11,11,(last_year-first_year+1)*12,nb_firm);% Covariance matrix of estimated parameters

LRMES=NaN((last_year-first_year+1)*12,nb_firm);       % Estimated h horizon LRMES

SRISK=NaN((last_year-first_year+1)*12,nb_firm);       % Estimated h horizon SRISK

for firm=1:nb_firm                                    % Loop for backtesting firm

for s=1:(last_year-first_year+1)*12                   % Loop for backtesting year and month

ref=ceil(s/12)-1;                                     % Year reference

year=first_year+ref;                                  % Last year for the full sample

month=s-ref*12;                                       % Last month for the full sample

data=database(database(:,1)==firm,:);                 % Data for the current firm  

year_base=data(data(:,2)<year,:);                     % Data available before year                

year_inprog=data(data(:,2)==year,:);                  % Data available for the current year

month_inprog=year_inprog(year_inprog(:,3)<=month,:);  % Selection of available month of the current year

full=[year_base;month_inprog];                        % In and out sample data until beginning of the sample

if (rw==0) && (length(full)<500+N*d_shift+h-d_shift)  % If recursive scheme, a minimum of 500 + Nh obs. required
                                                                                                          
    continue
    
end

if (rw==1) && (length(full)<rw_size+N*d_shift+h-d_shift)% If rolling scheme, a minimum of rw_size + Nh obs. required

    continue 
    
end

if (full(end,2)~=year) || (full(end,3)~=month)        % If no enough data available, skip to the next firm
    
    break
    
end

out=full(end-(N*d_shift+h-d_shift)+1:end,:);          % Out-of-sample data

in_full=full(1:end-(N*d_shift+h-d_shift),:);          % In-sample data

if rw==1
    
    in=in_full(end-rw_size+1:end,:);
    
else
   
    in=in_full;
    
end

T=length(in);                                         % In sample size

TTh=T+N*d_shift+h-d_shift;                            % Full sample size

in_out=[in;out];                                      % Full sample data

Y=in_out(:,[4,5]);                                    % Full sample returns

%============================
%=== Estimation In-Sample ===
%============================

[theta_in,loglik,Hcond,V_asy,scores,diagnostic] = dcc(Y(1:T,:),[],1,0,1,1,1,1,[],[],[],[],options); 

% If an error occurs in the estimation routine, use the line below to help the algorithm
% theta_init=[0.149 0.065 0.075 0.855 ...                      
%             0.057 0.001 0.268 0.786 ...                    
%             0.663 0.041 0.754]; 
% [theta_in,loglik,Hcond,V_asy,scores,diagnostic] = dcc(Y(1:T,:),[],1,0,1,1,1,1,[],[],[],theta_init,options); 

theta_global(:,s,firm)=theta_in';             % I record theta_in for future use

V_asy_global(:,:,s,firm)=V_asy;               % I record V_asy for future use

np=length(theta_in);                          % Number of estimated parameters

garch_hat=[theta_in(1:4)' theta_in(5:8)'];    % Estimated GJR-GARCH parameters

dcc_hat=theta_in(10:11);                      % Estimated DCC parameters (without rho_bar)

Q_bar_hat=[1 theta_in(9);theta_in(9) 1];      % Estimated uncondional correlation

%==============================================================================%
%==== Evaluation of the correlations-variances in-sample and out-of-sample ====%
%==============================================================================%

[Sigma_hat,rho_hat,Q_hat]= gjrdcc_evaluate(Y,garch_hat,dcc_hat,Q_bar_hat,T);% Evaluation over the full sample (initial condition based on in-sample period)

S2_hat=NaN(TTh,2);                                                     % Estimated cond. variances (in-sample and out-of-sample)

S2_hat(:,1)=Sigma_hat(1,1,:);S2_hat(:,2)=Sigma_hat(2,2,:);             % Estimated cond. variances (in-sample and out-of-sample)

%========================================================================================%
%=     Estimated monthly SRISK & LRMES given T+Nh (based on simulated innovations)      =%
%========================================================================================%

% Pseudo sample generation

p_epst=normrnd(0,1,h,2,nb_pseudo);                                % Pseudo sample of Gaussian innovations

p_rt_out=...                                                      % Pseudo log-returns
    pseudo_sample(Y,garch_hat,dcc_hat,...
    S2_hat,Q_hat,Q_bar_hat,h,nb_pseudo,p_epst,T+N*d_shift+h-d_shift);    

p_Rth_out=squeeze(exp(sum(p_rt_out)/100)-1)';                     % Pseudo multi-period sample

sort_p_Rth2_out=sort(p_Rth_out(:,2));                             % Sorted edf of market multi-period arithmetic return

VaR2h_out=sort_p_Rth2_out(alpha*nb_pseudo);                       % VaR2(alpha)

% LRMES computation

LRMES(s,firm)=sum(p_Rth_out(:,1).*(p_Rth_out(:,2)<VaR2h_out))/sum((p_Rth_out(:,2)<VaR2h_out));

% SRISK computation
        
SRISK(s,firm)=(k*out(end,6)-(1-k)*out(end,7)*(1+LRMES(s,firm)));   % SRISK

if SRISK(s,firm)<0
    
    SRISK(s,firm)=0;                                               % Negative SRISK is set to 0

end

%======================================%
%=== Computation of H(alpha) series ===%
%======================================%

H_risk=zeros(N,1);                                              % Ht(alpha) series

parfor i=1:N                                                    % Loop on out-of-sample periods

%fprintf('No OOS observation = %1.0f   \n',i)                   % Display observation i

j=i*d_shift-d_shift+1;                                          % j is used to extract OOS observations {T+j:T+j+h-1}

p_epst=normrnd(0,1,h,2,nb_pseudo);                              % Pseudo sample of Gaussian innovations

%==================================%
%==     h-cumulative return      ==%
%==================================%

Yth=exp(sum(Y(T+j:T+j+h-1,:))/100)-1;                           % Actual multi-period arithmetic return

%===========================================%
%== Ht computation (with estimation risk) ==%
%===========================================%

p_rt_out=...                                                    % Pseudo log-returns (with ER)
    pseudo_sample(Y,garch_hat,dcc_hat,...
    S2_hat,Q_hat,Q_bar_hat,h,nb_pseudo,p_epst,T+j-1);    

p_Rth_out=squeeze(exp(sum(p_rt_out)/100)-1)';                   % Pseudo multi-period sample (with ER)

p_Rth1_out=p_Rth_out(:,1);                                      % Edf of firm multi-period arithmetic return (with ER)

p_Rth2_out=p_Rth_out(:,2);                                      % Edf of market multi-period arithmetic return (with ER)

sort_p_Rth2_out=sort(p_Rth2_out);                               % Sorted edf of market multi-period arithmetic return (with ER)

VaR2h_out=sort_p_Rth2_out(alpha*nb_pseudo);                     % VaR2(alpha) (with ER)
  
trunc_p_Rth1_out=p_Rth1_out(p_Rth2_out<=VaR2h_out);             % Truncated edf (with ER)              

[u12_risk,~,~] = ...                                            % Kernel cdf estimates (with ER)
    ksdensity(trunc_p_Rth1_out,Yth(1),'function','cdf');

H_risk(i)=(Yth(2)<=VaR2h_out)*(1-u12_risk);                     % H (with ER)

end                                                                                                                                                                               % End of loop on i

H_global(:,s,firm)=H_risk;

% ========================================%
% === UC_MES - Unconditionnal backtest ===%  
% ========================================%

uc_risk(s,firm)=sqrt(N)*(mean(H_risk)-alpha/2)/sqrt(alpha*(1/3-alpha/4));                        % Statistic UC with estimation risk

p_uc_risk(s,firm)=2*normcdf(-abs(uc_risk(s,firm)));                                              % UC p_value

uc_c_hac(s,firm)=sqrt(N)*(mean(H_risk)-alpha/2)/sqrt(hac(H_risk,alpha/2,alpha*(1/3-alpha/4)));   % Mixing corrected hac      

p_uc_c_hac(s,firm)=2*normcdf(-abs(uc_c_hac(s,firm)));                                            % UC corrected p-value hac 

fprintf('  Firm reference              : %1.0f  \n',firm)
fprintf('  End of in-sample period     : year = %1.0f    month = %1.0f \n',in(end,2),in(end,3)),
fprintf('  End of out_of-sample period : year = %1.0f    month = %1.0f \n',year,month)

end

end

%=============================================%
%== Date vector construction (for graphics) ==%
%=============================================%

date=NaN(1,12*(last_year-first_year+1));
j=1;
for year=first_year:last_year
date(j*12-11:j*12)=datenum(year,1:12,31);
j=j+1;
end
