#############################################################################
# Code for generating the outputs for QuantMig Deliverable 9.4 (Bijak 2023) #
#   European migration scenarios with probabilistic uncertainty assessment  #
# Generated with R version 4.2.2 (2022-10-31 ucrt)  "Innocent and Trusting" #
#############################################################################

library(EnvStats)
library(stringr)
options(scipen=999)

#Data preparation, input and transformations 

mydir <- getwd() # To be replaced with the directory of choice
setwd(mydir)

load("QuantMig_Migration_Estimates_2009-19.RData")
correction = read.csv("D9-4_de_correction.csv", row.names=1)

# Auxiliary block: Read out median estimates for Germany for correction 
# Replaced by reading out the corrections from an external CSV file
#
# demedian = array(rep(0,8*11),c(8,11))
# for (t in 1:11) {
#     for (k in 1:5000) {
#         demig[1,t,k]= sum(output.MCMC$y.total.MCMC[k,33:34,11,t]) }
#     for (i in 2:8) {
#         for (k in 1:5000) {
#             demig[i,t,k] <- sum(output.MCMC$y.total.MCMC[k,33+i,11,t]) }
#     demedian[i,t] <- median(demig[i,t,])
#   }
# }

migdata = array(rep(0,8*11*5000),c(8,11,5000))
# demig = array(rep(0,8*11*5000),c(8,11,5000)) # Auxiliary line for German immigration
q_pareto = q_gevd = q_lnorm = q_exp = array(rep(0,2*8*5000),c(2,8,5000))
data_median = data_mean = data_95 = data_05 = x_pareto = x_gevd = x_lnorm = x_exp = array(rep(0,8*11),c(8,11))
sw_test = sw_pval = array(rep(0,4*8),c(4,8))
flab_gevd = flab_pareto = flab_lnorm = flab_exp = flab_qgevd = flab_qpareto = flab_qlnorm = flab_qexp = filename = rep("",8)
labels = attr(output.MCMC$y.total.MCMC,"dimnames")$origin[34:41]
mig_quantiles = mig_mean = array(rep(0,8*8),c(8,8))

for (t in 1:11) { 
  for (k in 1:5000) {
      migdata[1,t,k] = sum(output.MCMC$y.total.MCMC[k,33:34,1:32,t]) + correction[1,t]
          for (i in 2:8) {   
              migdata[i,t,k] <- sum(output.MCMC$y.total.MCMC[k,33+i,1:32,t]) + correction[i,t]
}}}

rm(output.MCMC)

for (i in 1:8) {
  flab_gevd[i] = gsub(" ","",str_c("qq_gevd_",labels[i],".jpeg"))
  flab_pareto[i] = gsub(" ","",str_c("qq_pareto_",labels[i],".jpeg"))
  flab_lnorm[i] = gsub(" ","",str_c("qq_lnorm_",labels[i],".jpeg"))
  flab_exp[i] = gsub(" ","",str_c("qq_exp_",labels[i],".jpeg"))
  flab_qgevd[i] = gsub(" ","",str_c("quantiles_gevd_",labels[i],".jpeg"))
  flab_qpareto[i] = gsub(" ","",str_c("quantiles_pareto_",labels[i],".jpeg"))
  flab_qlnorm[i] = gsub(" ","",str_c("quantiles_lnorm_",labels[i],".jpeg"))
  flab_qexp[i] = gsub(" ","",str_c("quantiles_exp_",labels[i],".jpeg"))
    for (t in 1:11) {
    data_median[i,t] <- median(migdata[i,t,])
    data_mean[i,t] <- mean(migdata[i,t,])
    data_05[i,t] <- quantile(migdata[i,t,],0.05)
    data_95[i,t] <- quantile(migdata[i,t,],0.95)
}}

# Fitting 0.9 and 0.98 quantiles for the four distributions

for (i in 1:8) {
    for (k in 1:5000) {
    try(q_pareto[,i,k] <- eqpareto(x=migdata[i,,k],p=c(0.9,0.98))$quantiles[],silent=TRUE)
    try(q_gevd[,i,k] <- eqgevd(x=migdata[i,,k],p=c(0.9,0.98))$quantiles[],silent=TRUE)
    try(q_lnorm[,i,k]  <- eqlnorm(x=migdata[i,,k],p=c(0.9,0.98))$quantiles[],silent=TRUE)
    try(q_exp[,i,k]  <- eqexp(x=migdata[i,,k],p=c(0.9,0.98))$quantiles[],silent=TRUE)  
}}

# Block plotting the Q-Q plots with error bars for observed values

for (i in 1:8) {
    jpeg(filename=flab_gevd[i],width=1024,height=768,units="px",pointsize=28,quality=600)
    x_gevd[i,] <- qqPlot(x=data_median[i,],distribution="gevd",estimate.params = TRUE,add.line = TRUE,qq.line.type="0-1",pch=16,xlim=c(0,1000000),ylim=c(0,1000000),xlab="Estimated quantiles from the fitted GEV distribution",ylab="Median estimates and 90% credible intervals",main=paste("Q-Q plot for the GEV distribution:\nImmigration from",labels[i]))$x
    arrows(x0=x_gevd[i,],x1=x_gevd[i,],y0=sort(data_05[i,]),y1=sort(data_95[i,]),code=3,angle=90,length=0.1)
    sw_test[2,i] <- gofTest(y=data_median[i,],distribution="gevd",estimate.params = TRUE,test="sw")$statistic
    sw_pval[2,i] <- gofTest(y=data_median[i,],distribution="gevd",estimate.params = TRUE,test="sw")$p.value
    dev.off()
    
    jpeg(filename=flab_pareto[i],width=1024,height=768,units="px",pointsize=28,quality=600)
    x_pareto[i,] <- qqPlot(x=data_median[i,],distribution="pareto",estimate.params = TRUE,add.line = TRUE,qq.line.type="0-1",pch=16,xlim=c(0,1000000),ylim=c(0,1000000),xlab="Estimated quantiles from the fitted Pareto distribution",ylab="Median estimates and 90% credible intervals",main=paste("Q-Q plot for the Pareto distribution:\nImmigration from",labels[i]))$x
    arrows(x0=x_pareto[i,],x1=x_pareto[i,],y0=sort(data_05[i,]),y1=sort(data_95[i,]),code=3,angle=90,length=0.1)
    sw_test[4,i] <- gofTest(y=data_median[i,],distribution="pareto",estimate.params = TRUE,test="sw")$statistic
    sw_pval[4,i] <- gofTest(y=data_median[i,],distribution="pareto",estimate.params = TRUE,test="sw")$p.value
    dev.off()
    
    jpeg(filename=flab_lnorm[i],width=1024,height=768,units="px",pointsize=28,quality=600)
    x_lnorm[i,] <- qqPlot(x=data_median[i,],distribution="lnormAlt",estimate.params = TRUE,add.line = TRUE,qq.line.type="0-1",pch=16,xlim=c(0,1000000),ylim=c(0,1000000),xlab="Estimated quantiles from the fitted log-normal distribution",ylab="Median estimates and 90% credible intervals",main=paste("Q-Q plot for the log-normal distribution:\nImmigration from",labels[i]))$x
    arrows(x0=x_lnorm[i,],x1=x_lnorm[i,],y0=sort(data_05[i,]),y1=sort(data_95[i,]),code=3,angle=90,length=0.1)
    sw_test[3,i] <- gofTest(y=data_median[i,],distribution="lnormAlt",estimate.params = TRUE,test="sw")$statistic
    sw_pval[3,i] <- gofTest(y=data_median[i,],distribution="lnormAlt",estimate.params = TRUE,test="sw")$p.value
    dev.off()
    
    jpeg(filename=flab_exp[i],width=1024,height=768,units="px",pointsize=28,quality=600)
    x_exp[i,] <- qqPlot(x=data_median[i,],distribution="exp",estimate.params = TRUE,add.line = TRUE,qq.line.type="0-1",pch=16,xlim=c(0,1000000),ylim=c(0,1000000),xlab="Estimated quantiles from the fitted exponential distribution",ylab="Median estimates and 90% credible intervals",main=paste("Q-Q plot for the exponential distribution:\nImmigration from",labels[i]))$x
    arrows(x0=x_exp[i,],x1=x_exp[i,],y0=sort(data_05[i,]),y1=sort(data_95[i,]),code=3,angle=90,length=0.1)
    sw_test[1,i] <- gofTest(y=data_median[i,],distribution="exp",estimate.params = TRUE,test="sw")$statistic
    sw_pval[1,i] <- gofTest(y=data_median[i,],distribution="exp",estimate.params = TRUE,test="sw")$p.value
    dev.off()
}

# Block plotting of the distributions of the 0.9 and 0.98 quantiles

for (i in 1:8) {
  jpeg(filename=flab_qgevd[i],width=1024,height=768,units="px",pointsize=28,quality=600)
  plot(density(q_gevd[1,i,]),xlim=c(0,y=max(density(q_gevd[1,i,])$x)*0.6),pch=16,lty=1,lwd=2,xlab="Estimated immigration volume",main=paste("Selected quantiles for the GEV distribution:\nImmigration from",labels[i]))
  abline(v=median(q_gevd[1,i,]),lty=2,lwd=2)
  abline(v=mean(q_gevd[1,i,]),lty=3,lwd=2)
  lines(density(q_gevd[2,i,]),xlim=c(0,y=max(density(q_gevd[1,i,])$x)*0.6),pch=16,lty=1,lwd=2,col="gray")
  abline(v=median(q_gevd[2,i,]),lty=2,lwd=2,col="gray")
  abline(v=mean(q_gevd[2,i,]),lty=3,lwd=2,col="gray")
  legend(x=max(density(q_gevd[1,i,])$x)*0.33,y=max(density(q_gevd[1,i,])$y),c("Quantile 0.9", str_c("Median = ", format(round(median(q_gevd[1,i,])),big.mark=",")), str_c("Mean = ", format(round(mean(q_gevd[1,i,])),big.mark=",")), "Quantile 0.98", str_c("Median = ", format(round(median(q_gevd[2,i,])),big.mark=",")),str_c("Mean = ", format(round(mean(q_gevd[2,i,])),big.mark=","))), lty=c(1,2,3,1,2,3),lwd=c(2,2,2,2,2,2),col=c("black","black","black","gray","gray","gray"),box.lty=0)
  dev.off()
  
  jpeg(filename=flab_qpareto[i],width=1024,height=768,units="px",pointsize=28,quality=600)
  plot(density(q_pareto[1,i,]),xlim=c(0,y=max(density(q_pareto[1,i,])$x)*0.55),pch=16,lty=1,lwd=2,xlab="Estimated immigration volume",main=paste("Selected quantiles for the Pareto distribution:\nImmigration from",labels[i]))
  abline(v=median(q_pareto[1,i,]),lty=2,lwd=2)
  abline(v=mean(q_pareto[1,i,]),lty=3,lwd=2)
  lines(density(q_pareto[2,i,]),xlim=c(0,y=max(density(q_pareto[1,i,])$x)*0.55),pch=16,lty=1,lwd=2,col="gray")
  abline(v=median(q_pareto[2,i,]),lty=2,lwd=2,col="gray")
  abline(v=mean(q_pareto[2,i,]),lty=3,lwd=2,col="gray")
  legend(x=max(density(q_pareto[1,i,])$x)*0.3,y=max(density(q_pareto[1,i,])$y),c("Quantile 0.9", str_c("Median = ", format(round(median(q_pareto[1,i,])),big.mark=",")), str_c("Mean = ", format(round(mean(q_pareto[1,i,])),big.mark=",")), "Quantile 0.98", str_c("Median = ", format(round(median(q_pareto[2,i,])),big.mark=",")),str_c("Mean = ", format(round(mean(q_pareto[2,i,])),big.mark=","))), lty=c(1,2,3,1,2,3),lwd=c(2,2,2,2,2,2),col=c("black","black","black","gray","gray","gray"),box.lty=0)
  dev.off()
  
  jpeg(filename=flab_qlnorm[i],width=1024,height=768,units="px",pointsize=28,quality=600)
  plot(density(q_lnorm[1,i,]),xlim=c(0,y=max(density(q_lnorm[1,i,])$x)*0.55),pch=16,lty=1,lwd=2,xlab="Estimated immigration volume",main=paste("Selected quantiles for the log-normal distribution:\nImmigration from",labels[i]))
  abline(v=median(q_lnorm[1,i,]),lty=2,lwd=2)
  abline(v=mean(q_lnorm[1,i,]),lty=3,lwd=2)
  lines(density(q_lnorm[2,i,]),xlim=c(0,y=max(density(q_lnorm[1,i,])$x)*0.55),pch=16,lty=1,lwd=2,col="gray")
  abline(v=median(q_lnorm[2,i,]),lty=2,lwd=2,col="gray")
  abline(v=mean(q_lnorm[2,i,]),lty=3,lwd=2,col="gray")
  legend(x=max(density(q_lnorm[1,i,])$x)*0.3,y=max(density(q_lnorm[1,i,])$y),c("Quantile 0.9", str_c("Median = ", format(round(median(q_lnorm[1,i,])),big.mark=",")), str_c("Mean = ", format(round(mean(q_lnorm[1,i,])),big.mark=",")), "Quantile 0.98", str_c("Median = ", format(round(median(q_lnorm[2,i,])),big.mark=",")),str_c("Mean = ", format(round(mean(q_lnorm[2,i,])),big.mark=","))), lty=c(1,2,3,1,2,3),lwd=c(2,2,2,2,2,2),col=c("black","black","black","gray","gray","gray"),box.lty=0)
  dev.off()
  
  jpeg(filename=flab_qexp[i],width=1024,height=768,units="px",pointsize=28,quality=600)
  plot(density(q_exp[1,i,]),xlim=c(0,y=max(density(q_exp[1,i,])$x)*0.6),pch=16,lty=1,lwd=2,xlab="Estimated immigration volume",main=paste("Selected quantiles for the exponential distribution:\nImmigration from",labels[i]))
  abline(v=median(q_exp[1,i,]),lty=2,lwd=2)
  abline(v=mean(q_exp[1,i,]),lty=3,lwd=2)
  lines(density(q_exp[2,i,]),xlim=c(0,y=max(density(q_exp[1,i,])$x)*0.6),pch=16,lty=1,lwd=2,col="gray")
  abline(v=median(q_exp[2,i,]),lty=2,lwd=2,col="gray")
  abline(v=mean(q_exp[2,i,]),lty=3,lwd=2,col="gray")
  legend(x=max(density(q_exp[1,i,])$x)*0.33,y=max(density(q_exp[1,i,])$y),c("Quantile 0.9", str_c("Median = ", format(round(median(q_exp[1,i,])),big.mark=",")), str_c("Mean = ", format(round(mean(q_exp[1,i,])),big.mark=",")), "Quantile 0.98", str_c("Median = ", format(round(median(q_exp[2,i,])),big.mark=",")),str_c("Mean = ", format(round(mean(q_exp[2,i,])),big.mark=","))), lty=c(1,2,3,1,2,3),lwd=c(2,2,2,2,2,2),col=c("black","black","black","gray","gray","gray"),box.lty=0)
  dev.off()
  
}

# Preparing output - mean- and median-based quantiles 

for (i in 1:8) {
  
  mig_mean[1,i] <- round(mean(q_exp[1,i,]))
  mig_mean[2,i] <- round(mean(q_exp[2,i,]))
  mig_mean[3,i] <- round(mean(q_gevd[1,i,]))
  mig_mean[4,i] <- round(mean(q_gevd[2,i,]))
  mig_mean[5,i] <- round(mean(q_lnorm[1,i,]))
  mig_mean[6,i] <- round(mean(q_lnorm[2,i,]))
  mig_mean[7,i] <- round(mean(q_pareto[1,i,]))
  mig_mean[8,i] <- round(mean(q_pareto[2,i,]))
  
  mig_quantiles[1,i] <- round(median(q_exp[1,i,]))
  mig_quantiles[2,i] <- round(median(q_exp[2,i,]))
  mig_quantiles[3,i] <- round(median(q_gevd[1,i,]))
  mig_quantiles[4,i] <- round(median(q_gevd[2,i,]))
  mig_quantiles[5,i] <- round(median(q_lnorm[1,i,]))
  mig_quantiles[6,i] <- round(median(q_lnorm[2,i,]))
  mig_quantiles[7,i] <- round(median(q_pareto[1,i,]))
  mig_quantiles[8,i] <- round(median(q_pareto[2,i,]))
      
}

# Preparing output - p-values from the Shapiro-Wilk test for goodness of fit 

for (i in 1:8) {
sw_pval[1,i] <- gofTest(y=data_median[i,],distribution="exp",estimate.params = TRUE,test="sw")$p.value
sw_pval[2,i] <- gofTest(y=data_median[i,],distribution="gevd",estimate.params = TRUE,test="sw")$p.value
sw_pval[3,i] <- gofTest(y=data_median[i,],distribution="lnormAlt",estimate.params = TRUE,test="sw")$p.value
sw_pval[4,i] <- gofTest(y=data_median[i,],distribution="pareto",estimate.params = TRUE,test="sw")$p.value
}

# Writing out the numerical output to a file

rowlab = rep("",24)
rowlab <- c("q90_exp_mean", "q98_exp_mean", "q90_gevd_mean", "q98_gevd_mean", "q90_lnorm_mean", "q98_lnorm_mean", "q90_pareto_mean", "q98_pareto_mean", "q90_exp_median", "q98_exp_median", "q90_gevd_median", "q98_gevd_median", "q90_lnorm_median", "q98_lnorm_median", "q90_pareto_median", "q98_pareto_median", "pval_exp_mean", "pval_gevd_mean", "pval_lnorm_mean", "pval_pareto_mean", "pval_exp_median", "pval_gevd_median", "pval_lnorm_median", "pval_pareto_median")
collab = rep("",9)
collab <- c("Indicator",labels[])

write.table(t(collab), file = "D9-4_migration_extreme_quantiles.csv", append = FALSE, quote = TRUE, sep = ",", eol = "\n", na = "NA", dec = ".", row.names = FALSE, col.names = FALSE)
write.table(mig_mean, file = "D9-4_migration_extreme_quantiles.csv", append = TRUE, quote = TRUE, sep = ",", eol = "\n", na = "NA", dec = ".", row.names = rowlab[1:8], col.names = FALSE)
write.table(mig_quantiles, file = "D9-4_migration_extreme_quantiles.csv", append = TRUE, quote = TRUE, sep = ",", eol = "\n", na = "NA", dec = ".", row.names = rowlab[9:16], col.names = FALSE)
write.table(sw_pval, file = "D9-4_migration_extreme_quantiles.csv", append = TRUE, quote = TRUE, sep = ",", eol = "\n", na = "NA", dec = ".", row.names = rowlab[17:20], col.names = FALSE)

################################################
# Illustrative Figures to the main D9.3 Report #
################################################

############
# Figure 1 #
############

domain <- c(0:5000)/1000
jpeg(filename="Figure_1.jpg",width=1536,height=768,units="px",pointsize=28,quality=600)
plot(x=domain,y=dexp(domain,1),xlim=c(0,5),ylim=c(0,2),pch=16,lty=1,lwd=3,col="white",xlab="x",ylab="density",main="Selected quantiles of three distributions with mean = 1")
lines(x=domain,y=dexp(domain,1),xlim=c(0,5),pch=16,lty=1,lwd=3,col="black")
lines(x=domain,y=dpareto(domain,0.5,2),xlim=c(0,5),pch=16,lty=2,lwd=3,col="red")
lines(x=domain[499:501],y=dpareto(domain[499:501],0.5,2),xlim=c(0,4),pch=16,lty=2,lwd=3,col="white")
lines(x=domain,y=dlnorm(domain,-0.5,1),xlim=c(0,5),pch=16,lty=3,lwd=3,col="blue")
abline(v=qexp(0.9,1),lty=1,lwd=2,col="black")
abline(v=qpareto(0.9,0.5,2),lty=2,lwd=2,col="red")
abline(v=qlnorm(0.9,-0.5,1),lty=3,lwd=2,col="blue")
abline(v=qexp(0.98,1),lty=1,lwd=2,col="black")
abline(v=qpareto(0.98,0.5,2),lty=2,lwd=2,col="red")
abline(v=qlnorm(0.98,-0.5,1),lty=3,lwd=2,col="blue")
legend(x=2.35,y=2,c("Exponential (1)", "Pareto (0.5, 2)", "Log-normal (-0.5, 1)"), lty=c(1,2,3),lwd=c(3,3,3),col=c("black","red","blue"),box.lty=0)
legend(x=1.34,y=1,c("Quantiles q(0.9)"),box.lty=0)
legend(x=3.55,y=1,c("Quantiles q(0.98)"),box.lty=0)
dev.off()

############
# Figure 2 #
############

jpeg(filename="Figure_2_bottom_left.jpg",width=1024,height=768,units="px",pointsize=28,quality=600)
plot(x=domain,y=dexp(domain,1),xlim=c(0,4),ylim=c(0,2),pch=16,lty=1,lwd=2,col="white",xlab="x",ylab="density",main="Density functions of three distributions with mean = 1:")
lines(x=domain,y=dexp(domain,1),xlim=c(0,4),pch=16,lty=1,lwd=2,col="black")
lines(x=domain,y=dpareto(domain,0.5,2),xlim=c(0,4),pch=16,lty=2,lwd=2,col="red")
lines(x=domain[499:501],y=dpareto(domain[499:501],0.5,2),xlim=c(0,4),pch=16,lty=2,lwd=2,col="white")
lines(x=domain,y=dlnorm(domain,-0.5,1),xlim=c(0,4),pch=16,lty=3,lwd=2,col="blue")
dev.off()
jpeg(filename="Figure_2_bottom_right.jpg",width=1024,height=768,units="px",pointsize=28,quality=600)
plot(x=domain,y=dexp(domain,1),xlim=c(4,8),ylim=c(0,0.025),pch=16,lty=1,lwd=2,col="white",xlab="x",ylab="",main="Zoom on the tails")
lines(x=domain,y=dexp(domain,1),xlim=c(4,8),pch=16,lty=1,lwd=2,col="black")
lines(x=domain,y=dpareto(domain,0.5,2),xlim=c(4,8),pch=16,lty=2,lwd=2,col="red")
lines(x=domain,y=dlnorm(domain,-0.5,1),xlim=c(4,8),pch=16,lty=3,lwd=2,col="blue")
legend(x=5,y=0.025,c("Exponential (1)", "Pareto (0.5, 2)", "Log-normal (-0.5, 1)"), lty=c(1,2,3),lwd=c(2,2,2),col=c("black","red","blue"),box.lty=0)
dev.off()

############
# Figure 3 #
############

newdomain <- c(1:1600)*1000
newquantiles = read.csv("D9-4_migration_extreme_quantiles.csv", row.names=1)  
# Reads the original version of the file, for version consistency #
parshape = parloc = rep(0,8)

for (i in 1:8) {
  parloc[i] = eqpareto(x=data_median[i,])$parameters[1]
  parshape[i] = eqpareto(x=data_median[i,])$parameters[2]
  filename[i] = gsub(" ","",str_c("Figure_3_",labels[i],".jpg"))
  jpeg(filename[i],width=1024,height=768,units="px",pointsize=28,quality=600)
  plot(x=newdomain,y=dpareto(newdomain,location=parloc[i],shape=parshape[i]),col="white",ylab="density",xlab="",main=paste("Mean quantiles: migration from",labels[i]))
  lines(x=newdomain,y=dpareto(newdomain,location=parloc[i],shape=parshape[i]),col="black",lty=1,lwd=2)
  lines(x=newdomain[1:round(parloc[i]/1000,0)+1],y=dpareto(newdomain[1:round(parloc[i]/1000,0)+1],location=parloc[i],shape=parshape[i]),col="white",lty=1,lwd=2)
  points(x=data_median[i,],y=rep(0,11),pch=21,bg="black")
  abline(v=mean(newquantiles[7,i]),lty=2,lwd=2)
  abline(v=mean(newquantiles[8,i]),lty=3,lwd=2)
  legend(x=900000,y=max(dpareto(newdomain,location=epareto(x=data_median[i,])$parameters[1],shape=epareto(x=data_median[i,])$parameters[2])),c("Quantile q(0.9)", "Quantile q(0.98)"), lty=c(2,3),lwd=c(2,2),col=c("black","black"),box.lty=0)
  dev.off()
}

rm(newquantiles)
