% Load and set folders
% Clear and close all
tic
close all
clear all
% Set the folder name
addpath 'x13tbx'

Y = xlsread('../../data/input_data/employment_by_occupation.xlsx','GDP','b12:u99');

dY = zeros(size(Y));
dY(2:end,:) = 100*(diff(log(Y(:,:))));
dummy_rec = zeros(size(dY));
dummy_test = dummy_rec;

for i=1:size(dY,1)-2
    for k=1:size(dY,2)
    if dY(i,k)<0 && dY(i+1,k)<0
        dummy_rec(i,k) = 1;
    end
    if dY(i,k)<0 && dummy_rec(i-1,k) ==1 
        dummy_rec(i,k) = 1;
    end
%     if dY(i,k)>0 && dY(i+1,k)<0 && dummy_rec(i-1,k) ==1 
%         dummy_rec(i,k) = 1;
%     end       
    if dY(i,k)>0 && dY(i+1,k)<0 && dY(i+2,k)<0 && dummy_rec(i-1,k) ==1 
        dummy_rec(i,k) = 1;
    end   
%     if dY(i,k)>0 && dY(i+1,k)>0 && dummy_rec(i-1,k) ==1 
%         dummy_rec(i,k) = 0;
%     end      
    end
end

for k=1:size(dY,2)
for i=1:size(dummy_rec,1)
    if dummy_rec(i,k) == 0
    dummy_test(i,k) = 0;
    elseif dummy_rec (i,k) == 1;
    dummy_test(i,k) = 1;
    end
end
for i = 1:size(dummy_rec,1)-1
    if dummy_test(i,k)>0 && dummy_test(i+1,k)==0
        dummy_test(i,k) = 0;
    end
end
end
    
            
EMP = xlsread('../../data/input_data/employment_by_occupation.xlsx','Un_Empl','b13:ig100');

time = 1998:.25:2019.75;
emp_label = {'Unemployment' 'Total', 'Managers','Professionals','Technicians and associate professionals','Clerical support workers','Service and sales workers','Skilled agricultural, forestry and fishery workers','Craft and related trades workers','Plant and machine operators and assemblers','Elementary occupations','Armed forces occupations'};

% % all countries
 cou_name = {'Euro Area', 'Belgium', 'Germany','Estonia','Ireland', 'Greece','Spain','France','Italy','Cyprus','Latvia','Lithuania','Luxembourg','Malta','Netherlands','Austria','Portugal','Slovenia','Slovakia','Finland'};
 cou = {'EA', 'BE', 'DE', 'ET', 'IR', 'GR', 'SP', 'FR', 'IT', 'CY', 'LA', 'LT', 'LX', 'MA', 'NL', 'AU', 'PT', 'SV', 'SK', 'FI'};

% 11 first EMU countries (12 - Luxembourg)
%cou_name = {'Belgium', 'Germany', 'Ireland', 'Greece','Spain','France','Italy','Netherlands','Austria','Portugal','Finland'};
%cou = {'BE', 'DE', 'IR', 'GR', 'SP', 'FR', 'IT', 'NL', 'AU', 'PT', 'FI'};

groups_label = {'Abstract', 'Routine Narrow', 'Manual', 'Abstract', 'Routine Broad', 'Manual narrow' };
num_cou = size(cou_name,2);
num_emp = size(emp_label,2);

% Filling missing value with linear interpolation between annual data
EMP = fillmissing(EMP,'linear');

time_st = 1998;
time_end = 2019.5;
st = find(time==time_st);
en = find(time==time_end);

t1 = datetime(1998,1,1,0,0,0);
t2 = datetime(2019,9,1,0,0,0);
t = t1:calmonths(3):t2;
date = datenum(t)';


%1. Remove statistical break - Eliminate break keeping the new part as reference
EMP = Transformations(EMP,time,num_emp,num_cou);

 
EA = EMP(:,1:num_emp);
BE = EMP(:,(1*num_emp)+1:(2*num_emp));
DE = EMP(:,(2*num_emp)+1:(3*num_emp));
ET = EMP(:,(3*num_emp)+1:(4*num_emp));
IR = EMP(:,(4*num_emp)+1:(5*num_emp));
GR = EMP(:,(5*num_emp)+1:(6*num_emp));
SP = EMP(:,(6*num_emp)+1:(7*num_emp));
FR = EMP(:,(7*num_emp)+1:(8*num_emp));
IT = EMP(:,(8*num_emp)+1:(9*num_emp));
CY = EMP(:,(9*num_emp)+1:(10*num_emp));
LA = EMP(:,(10*num_emp)+1:(11*num_emp));
LT = EMP(:,(11*num_emp)+1:(12*num_emp));
LX = EMP(:,(12*num_emp)+1:(13*num_emp));
MA = EMP(:,(13*num_emp)+1:(14*num_emp));
NL = EMP(:,(14*num_emp)+1:(15*num_emp));
AU = EMP(:,(15*num_emp)+1:(16*num_emp));
PT = EMP(:,(16*num_emp)+1:(17*num_emp));
SV = EMP(:,(17*num_emp)+1:(18*num_emp));
SK = EMP(:,(18*num_emp)+1:(19*num_emp));
FI = EMP(:,(19*num_emp)+1:(20*num_emp));


emp_data = zeros(size(EMP,1),num_emp, num_cou);
emp_share = emp_data;
dummy_vec = zeros(size(EMP,1),num_emp, num_cou);
unemp = zeros(size(EMP,1),num_cou);

for i = 1:num_cou
    emp_data(:,:,i) = eval(cell2mat(cou(i)));
end

gdp_real = Y(st:en,:);
emp_data = emp_data(st:en,:,:);
dummy_vec = dummy_vec(st:en,:,:);
emp_share = emp_share(st:en,:,:);
dummy_rec = dummy_rec(st:en,:,:);
dummy_test = dummy_test(st:en,:,:);
dummy_vec_level = dummy_vec;
dy = dY(st:en,:);
unemp = unemp(st:en,:);

%unemployment rate is calculated adjusting for structural break as the
%total level of unemployment (adjusted) divided by total labor force
%(adjusted)
for k = 1:num_cou
    unemp(:,k) = 100*(emp_data(:,1,k))./(emp_data(:,1,k) + emp_data(:,2,k));
for q = 2:num_emp
emp_share(:,q,k) = ( emp_data(:,q,k)./emp_data(:,2,k));
dummy_vec(:,q,k) = (dummy_test(:,k) * max(emp_share(:,q,k)));
if dummy_test(:,k) == 0
    dummy_vec_level(:,q,k) = (min(emp_data(:,q,k)));
else
    dummy_vec_level(:,q,k) = (dummy_test(:,k) * max(emp_data(:,q,k)));
end



% 1 Manual - Elementary Occupations + Skilled agricultural, forestry and
% fishery workers + Sales and Workers
basespec = makespec('LOG','TDAYS','EASTER','SPECTRUM');
spec = makespec(basespec,'X11','estimate','maxiter',3000);
groups(:,3,k) = ((emp_data(:,7,k)+emp_data(:,8,k)+emp_data(:,11,k))./emp_data(:,2,k));
%groups(:,3,k) = ((emp_data(:,6,k)+emp_data(:,7,k)+emp_data(:,10,k))./emp_data(:,1,k));
% 2 Routine Narrow -  Clerical support workers + Craft and related trades workers
% + Plant
groups(:,2,k) = ((emp_data(:,6,k)+emp_data(:,9,k)+emp_data(:,10,k))./emp_data(:,2,k));
%groups(:,2,k) = ((emp_data(:,5,k)+emp_data(:,8,k)+emp_data(:,9,k))./emp_data(:,1,k));
% 3 Cognitive - Abstract: Managers + Professionals + Technicians and
% associate professionals + Armed force
groups(:,1,k) = ((emp_data(:,3,k)+emp_data(:,4,k)+emp_data(:,5,k)+ emp_data(:,12,k))./emp_data(:,2,k));


% Second set with broader Routine
% 4 Routine Broad - Narrow + Elementary
groups(:,5,k) = ((emp_data(:,6,k)+emp_data(:,9,k)+emp_data(:,10,k) + emp_data(:,11,k))./emp_data(:,2,k));
% 5 Manual Narrow (- Elementary Occupations)
groups(:,6,k) = ((emp_data(:,7,k)+emp_data(:,8,k))./emp_data(:,2,k));
% 6 Cognitive
% 3 Cognitive - Abstract: Managers + Professionals + Technicians and
% associate professionals + Armed force
groups(:,4,k) = ((emp_data(:,3,k)+emp_data(:,4,k)+emp_data(:,5,k)+ emp_data(:,12,k))./emp_data(:,2,k));

check(:,k) = groups(:,1,k)+groups(:,2,k)+groups(:,3,k);
check_2(:,k) = groups(:,4,k)+groups(:,5,k)+groups(:,6,k);

end
for q  = 1:4
    dummy_agg (:,q,k) = (dummy_test(:,k) * max(groups(:,q,k)));
    dummy_agg_2(:,q,k) = (dummy_test(:,k) * max(groups(:,q,k)./groups(1,q,k)));
    dummy_agg_3(:,q,k) = (dummy_test(:,1) * max(groups(:,q,k)./groups(1,q,k)));
end

    dummy_unemp(:,k) =  (dummy_test(:,k) * max(unemp(:,k)));

if dummy_unemp(:,k) == 0
    dummy_unemp(:,k) = (min(unemp(:,k)));
else

end

end

% Create the dataset to use in Stata
Dates = time(:,st:en)';
DATASET = zeros(size(emp_data,1)*num_cou,11);
Dat_time = repmat(Dates,1,num_cou);
DATASET(:,1) = reshape(Dat_time(:, :), [], 1);
Dat_cou = repmat(cou_name,size(emp_data,1),1);
Dat_cou = reshape(Dat_cou(:,:),[],1);

for i =1: num_cou
    DATASET(i*size(Dates,1) - size(Dates,1)+1:i*size(Dates,1),3:8) = groups(:,:,i);
end

CEPR = repmat(dummy_rec(:,1),1,num_cou);
DATASET(:,9) = reshape(CEPR(:, :), [], 1);
DATASET(:,10) = reshape(dummy_rec(:,:),[],1);
DATASET(:,11) = reshape(gdp_real(:,:),[],1);
DATASET(:,12) = reshape(unemp(:,:),[],1);

name_variables = {'Date', 'Country', 'Coginitive', 'Routine Narrow', 'Manual Broad', 'Cognitive',	'Routine Broad',	'Manual Narrow',	'CEPR recessions','Country recession','Real GDP',	'u'};

writematrix(DATASET(:,1:end),'../../data/input_data/dataset_october23.xlsx','Sheet','DATASET','Range','A2')
writecell(name_variables,'../../data/input_data/dataset_october23.xlsx','Sheet','DATASET','Range','A1')
writecell(Dat_cou,'../../data/input_data/dataset_october23.xlsx','Sheet','DATASET','Range','B2')

