#
# Main file for the empirical analysis of the "meat and distribution" paper
# David Klenert
# Revise and Resubmit for Nature Food Summer 2023
#

library(DescTools); 
library(tidyr); 
library(ggplot2); 
library(viridis); 
library(openxlsx);
library(ggpubr); 
library(data.table); 
library(Hmisc);
library(dineq)
source("02-Code-Main/functions/meat_load_HBS.R");
source("02-Code-Main/functions/meat_EU_plots.R");
source("02-Code-Main/functions/meat_country_lvl_plots_0514.R");
source("02-Code-Main/functions/new_functions_2023.R")

########################################################################################
switch_HBS_2010<-TRUE; switch_HBS_2015<-FALSE # set the switch to load 2010 data
switch_HBS_2010<-FALSE; switch_HBS_2015<-TRUE # set the switch to load 2015 data
########################################################################################
# 1 #  Load data 

  # set parameters
      if(switch_HBS_2010){
        print("loading 2010 data")
        countrycodes  <- c("BE","BG","CY","CZ","DE","DK","EE","EL","ES","FI","FR","HR","HU","IE","IT","LT","LU","LV","MT","PL","PT","RO","SE","SI","SK","UK") #for 2010 wave
        HBS_data_path <- "01-Data/EUHBS-2010/" #path of 2010 HBS data relative to working directory
      }
      if(switch_HBS_2015){
        print("loading 2015 data")
        countrycodes  <- c("BE","BG","CY","CZ","DE","DK","EE","EL","ES","FI","FR","HR","HU","IE","IT","LT","LU","LV","NL","PL","RO","SE","SK") #for 2015 wave
        HBS_data_path <- "01-Data/EUHBS-2015/" #path of 2015 HBS data relative to working directory
      }
      
  # load HBS data (slow)
      HBS_all <- load_HBS_data(HBS_data_path, countrycodes)

  # load population data (EUROSTAT)
      if(switch_HBS_2010){pop_nr <-read.csv("01-Data/EUROSTAT_demography/2010_pop_data.csv",stringsAsFactors = F); pop_nr <- pop_nr[,c(2,6)]}
      if(switch_HBS_2015){pop_nr <-read.csv("01-Data/EUROSTAT_demography/2015_pop_data.csv",stringsAsFactors = F); pop_nr <- pop_nr[,c(1,3)]}
      names(pop_nr) <- c("COUNTRY","pop")
    
  # load 2010 VAT rates 
      VAT_2010 <- read.csv("01-Data/VAT_rates/VAT_2010_FnV.csv" )
      VAT_2015 <- read.csv("01-Data/VAT_rates/VAT_2015_FnV.csv" )
    
########################################################################################
# 2 # Clean and structure data 
    
  # clean data
    # Drop categories that are not relevant for the analysis (speeds up aggregation) 
      relevant_vars = c("COUNTRY","HA04","HA09","HA10","EUR_HE00","HB05","HB061","HB062","EUR_HE01111","EUR_HE01112","EUR_HE0112","EUR_HE01121","EUR_HE01122","EUR_HE01123","EUR_HE01124","EUR_HE01125","EUR_HE01126","EUR_HE01127","EUR_HE0116","EUR_HE01161","EUR_HE01162","EUR_HE01163","EUR_HE01164","EUR_HE01165","EUR_HE01166","EUR_HE01167","EUR_HE01168","EUR_HE01169","EUR_HE0117","EUR_HE01171","EUR_HE01172","EUR_HE01173","EUR_HE01174","EUR_HE01175","EUR_HE01176","EUR_HE01177","EUR_HE01178","HQ0112","HQ01121","HQ01122","HQ01123","HQ01124","HQ01125","HQ01126","HQ01127","HQ0116","HQ01161","HQ01162","HQ01163","HQ01164","HQ01165","HQ01166","HQ01167","HQ01168","HQ01169","HQ0117","HQ01171","HQ01172","HQ01173","HQ01174","HQ01175","HQ01176","HQ01177","HQ01178")
      HBS_all<-HBS_all[,which(names(HBS_all) %in% relevant_vars)]
    
    # remove NAs (not necessary for 2015)
      if(length(which(is.na(HBS_all$HA10)))>0){
        HBS_all<-HBS_all[-which(is.na(HBS_all$HA10)),]#HBS_all1<-HBS_all
      }
      if(length(which(is.na(HBS_all$EUR_HE01121)))>0){HBS_all<-HBS_all[-which(is.na(HBS_all$EUR_HE01121)),]}
      if(length(which(is.na(HBS_all$EUR_HE01122)))>0){HBS_all<-HBS_all[-which(is.na(HBS_all$EUR_HE01122)),]}
      if(length(which(is.na(HBS_all$EUR_HE01123)))>0){HBS_all<-HBS_all[-which(is.na(HBS_all$EUR_HE01123)),]}
      if(length(which(is.na(HBS_all$EUR_HE01124)))>0){HBS_all<-HBS_all[-which(is.na(HBS_all$EUR_HE01124)),]}
      if(length(which(is.na(HBS_all$EUR_HE01125)))>0){HBS_all<-HBS_all[-which(is.na(HBS_all$EUR_HE01125)),]}
      if(length(which(is.na(HBS_all$EUR_HE01126)))>0){HBS_all<-HBS_all[-which(is.na(HBS_all$EUR_HE01126)),]}
      if(length(which(is.na(HBS_all$EUR_HE01127)))>0){HBS_all<-HBS_all[-which(is.na(HBS_all$EUR_HE01127)),]}
      
    # remove households with negative spending
      vars_check_negative <- relevant_vars[grep("EUR",relevant_vars)]
      for(k in 1:length(vars_check_negative)){
        eval(parse(text=paste0("statement <- length(which(HBS_all$",vars_check_negative[k],"<0))>0")))
        if(statement){
        eval(parse(text=paste0("HBS_all<-HBS_all[-which(HBS_all$",vars_check_negative[k],"<0),]")))
        }
        rm(statement)
      }

    # remove clear accounting errors in quantity data (not necessary for 2015) | removes per capita meat consumption per capita above 500kg/year
      if(switch_HBS_2010){
        HBS_all<-HBS_all[-which(HBS_all$HQ01121/HBS_all$HB061 > 500 | HBS_all$HQ01122/HBS_all$HB061 > 500 | HBS_all$HQ01123/HBS_all$HB061 > 500 | HBS_all$HQ01124/HBS_all$HB061 > 500 | HBS_all$HQ01125/HBS_all$HB061 > 500 | HBS_all$HQ01126/HBS_all$HB061 > 500 | HBS_all$HQ01127/HBS_all$HB061 > 500),]
      }
      
    # remove Germany (due to lack of data for the relevant categories)
        HBS_all<-HBS_all[-which(HBS_all$COUNTRY=="DE" ),]
      
  # merge data with VAT rate data and calculate net-of-tax prices
      if(switch_HBS_2010){
        HBS_all      = merge(HBS_all,VAT_2010)
      }
      if(switch_HBS_2015){
        HBS_all      = merge(HBS_all,VAT_2015)
      }
      
      column_names=c("EUR_HE0112","EUR_HE01121","EUR_HE01122","EUR_HE01123","EUR_HE01124","EUR_HE01125","EUR_HE01126","EUR_HE01127")
    
  # 
      if(switch_HBS_2015){
        names(HBS_all)     [which(names(HBS_all     )=="VAT_meat_2015")]<-"VAT_meat_2010"
        names(HBS_all)     [which(names(HBS_all     )=="VAT_std_2015")] <-"VAT_std_2010"
        names(HBS_all)     [which(names(HBS_all     )=="VAT_FnV_2015")] <-"VAT_FnV_2010"
      }
         
  # calculate net-of-tax prices:  
      for(j in 1:length(column_names)){
        eval(parse(text=paste0("HBS_all$",column_names[j],"_net=HBS_all$",column_names[j],"/(1+HBS_all$VAT_meat_2010/100)")))
        eval(parse(text=paste0("HBS_all$",column_names[j],"_net=HBS_all$",column_names[j],"/(1+HBS_all$VAT_meat_2010/100)")))
      }; rm(column_names) 
    
      if(switch_HBS_2010){
        column_names_FnV=c("EUR_HE0116","EUR_HE01161","EUR_HE01162","EUR_HE01163","EUR_HE01164","EUR_HE01165","EUR_HE01166","EUR_HE01167","EUR_HE01168","EUR_HE01169","EUR_HE0117","EUR_HE01171","EUR_HE01172","EUR_HE01173","EUR_HE01174","EUR_HE01175","EUR_HE01176","EUR_HE01177","EUR_HE01178")#names(HBS_agg_country_lvl)[7:8]
      }
      if(switch_HBS_2015){
        column_names_FnV=c("EUR_HE0116","EUR_HE01161","EUR_HE01162","EUR_HE01163","EUR_HE01164","EUR_HE0117","EUR_HE01171","EUR_HE01172","EUR_HE01173","EUR_HE01174","EUR_HE01175","EUR_HE01176")#names(HBS_agg_country_lvl)[7:8]
      }
      for(j in 1:length(column_names_FnV)){
        eval(parse(text=paste0("HBS_all$",column_names_FnV[j],"_net=HBS_all$",column_names_FnV[j],"/(1+HBS_all$VAT_FnV_2010/100)")))
        eval(parse(text=paste0("HBS_all$",column_names_FnV[j],"_net=HBS_all$",column_names_FnV[j],"/(1+HBS_all$VAT_FnV_2010/100)")))
      }; rm(column_names_FnV)
  # merge with population data
      HBS_all = merge(HBS_all,pop_nr)
  
  # save dataset  (and reload to change character vectors to numeric)
      write.csv(HBS_all,"04-intmData/HBS_agg_net_2010.csv",row.names = FALSE) 
      HBS_all <- read.csv( "04-intmData/HBS_agg_net_2010.csv",    stringsAsFactors = F ) 
########################################################################################
# 3 # Fixing two additional problems: (1) PL and RO quantities are off in 2010 wave.
#                                     (2) for some countries, HE0112 is not the sum of individual meat quantities 
  
  # Problem 1: PL and RO quantities are off in 2010 wave. We estimated multipliers by how much average quantities deviate from 2015 quantities by comparing deviations between PL and RO and the rest of the countries in the sample in 2010 and 2015.
      if(switch_HBS_2010==TRUE){    
        multi_PL <- 11.8
        multi_RO <- 8.4
        
        names_q <- grep("HQ", names(HBS_all), value=TRUE)
        
        for(i in 1:length(names_q)){#Poland
          print(i)
          eval(parse(text=paste0("HBS_all$",names_q[i],"[which(HBS_all$COUNTRY==\"PL\")]   =HBS_all$",names_q[i],"[which(HBS_all$COUNTRY==\"PL\")]*multi_PL")))
        }
        for(i in 1:length(names_q)){#Romania
          print(i)
          eval(parse(text=paste0("HBS_all$",names_q[i],"[which(HBS_all$COUNTRY==\"RO\")]   =HBS_all$",names_q[i],"[which(HBS_all$COUNTRY==\"RO\")]*multi_RO")))
        }
      }
          
  #Problem 2: for some countries, the aggregates, i.e. HE0112, HQ0112, etc. are not the sum of individual categories
      if(switch_HBS_2010==TRUE){  

        #1.meat
        HBS_all$EUR_HE0112_man     <- HBS_all$EUR_HE01121    +HBS_all$EUR_HE01122    +HBS_all$EUR_HE01123    +HBS_all$EUR_HE01124    +HBS_all$EUR_HE01125    +HBS_all$EUR_HE01126    +HBS_all$EUR_HE01127
        HBS_all$EUR_HE0112_net_man <- HBS_all$EUR_HE01121_net+HBS_all$EUR_HE01122_net+HBS_all$EUR_HE01123_net+HBS_all$EUR_HE01124_net+HBS_all$EUR_HE01125_net+HBS_all$EUR_HE01126_net+HBS_all$EUR_HE01127_net
        HBS_all$HQ0112_man         <- HBS_all$HQ01121        +HBS_all$HQ01122        +HBS_all$HQ01123        +HBS_all$HQ01124        +HBS_all$HQ01125        +HBS_all$HQ01126        +HBS_all$HQ01127
        #2.fruit
        HBS_all$EUR_HE0116_man     <- HBS_all$EUR_HE01161    +HBS_all$EUR_HE01162    +HBS_all$EUR_HE01163    +HBS_all$EUR_HE01164    +HBS_all$EUR_HE01165    +HBS_all$EUR_HE01166    +HBS_all$EUR_HE01167     +HBS_all$EUR_HE01168     +HBS_all$EUR_HE01169
        HBS_all$EUR_HE0116_net_man <- HBS_all$EUR_HE01161_net+HBS_all$EUR_HE01162_net+HBS_all$EUR_HE01163_net+HBS_all$EUR_HE01164_net+HBS_all$EUR_HE01165_net+HBS_all$EUR_HE01166_net+HBS_all$EUR_HE01167_net +HBS_all$EUR_HE01168_net +HBS_all$EUR_HE01169_net
        HBS_all$HQ0116_man         <- HBS_all$HQ01161        +HBS_all$HQ01162        +HBS_all$HQ01163        +HBS_all$HQ01164        +HBS_all$HQ01165        +HBS_all$HQ01166        +HBS_all$HQ01167         +HBS_all$HQ01168         +HBS_all$HQ01169
        #3.veggies
        HBS_all$EUR_HE0117_man     <- HBS_all$EUR_HE01171    +HBS_all$EUR_HE01172    +HBS_all$EUR_HE01173    +HBS_all$EUR_HE01174    +HBS_all$EUR_HE01175    +HBS_all$EUR_HE01176    +HBS_all$EUR_HE01177     +HBS_all$EUR_HE01178
        HBS_all$EUR_HE0117_net_man <- HBS_all$EUR_HE01171_net+HBS_all$EUR_HE01172_net+HBS_all$EUR_HE01173_net+HBS_all$EUR_HE01174_net+HBS_all$EUR_HE01175_net+HBS_all$EUR_HE01176_net+HBS_all$EUR_HE01177_net +HBS_all$EUR_HE01178_net
        HBS_all$HQ0117_man         <- HBS_all$HQ01171        +HBS_all$HQ01172        +HBS_all$HQ01173        +HBS_all$HQ01174        +HBS_all$HQ01175        +HBS_all$HQ01176        +HBS_all$HQ01177        +HBS_all$HQ01178
        
        #find out which countries are affected:
        unique(HBS_all$COUNTRY[which(!round(HBS_all$EUR_HE0112_man,digits=0)==round(HBS_all$EUR_HE0112,digits=0))])
        test<-(HBS_all$EUR_HE0112_man-HBS_all$EUR_HE0112)/HBS_all$EUR_HE0112
        unique(HBS_all$COUNTRY[which(abs(test)>0.2)])
        
        #set HE0112 value manually
        #1. meat
        HBS_all$EUR_HE0112     <- HBS_all$EUR_HE0112_man;     HBS_all$EUR_HE0112_man     <- NULL
        HBS_all$EUR_HE0112_net <- HBS_all$EUR_HE0112_net_man; HBS_all$EUR_HE0112_net_man <- NULL
        HBS_all$HQ0112         <- HBS_all$HQ0112_man;         HBS_all$HQ0112_man         <- NULL
        #2. fruit
        HBS_all$EUR_HE0116     <- HBS_all$EUR_HE0116_man;     HBS_all$EUR_HE0116_man     <- NULL
        HBS_all$EUR_HE0116_net <- HBS_all$EUR_HE0116_net_man; HBS_all$EUR_HE0116_net_man <- NULL
        HBS_all$HQ0116         <- HBS_all$HQ0116_man;         HBS_all$HQ0116_man         <- NULL
        #3. veggies
        HBS_all$EUR_HE0117     <- HBS_all$EUR_HE0117_man;     HBS_all$EUR_HE0117_man     <- NULL
        HBS_all$EUR_HE0117_net <- HBS_all$EUR_HE0117_net_man; HBS_all$EUR_HE0117_net_man <- NULL
        HBS_all$HQ0117         <- HBS_all$HQ0117_man;         HBS_all$HQ0117_man         <- NULL
      }
      
      if(switch_HBS_2015==TRUE){  
        # for 2015 data
        #1.meat
        HBS_all$EUR_HE0112_man     <- HBS_all$EUR_HE01121    +HBS_all$EUR_HE01122    +HBS_all$EUR_HE01123    +HBS_all$EUR_HE01124    +HBS_all$EUR_HE01125    +HBS_all$EUR_HE01126    +HBS_all$EUR_HE01127
        HBS_all$EUR_HE0112_net_man <- HBS_all$EUR_HE01121_net+HBS_all$EUR_HE01122_net+HBS_all$EUR_HE01123_net+HBS_all$EUR_HE01124_net+HBS_all$EUR_HE01125_net+HBS_all$EUR_HE01126_net+HBS_all$EUR_HE01127_net
        HBS_all$HQ0112_man         <- HBS_all$HQ01121        +HBS_all$HQ01122        +HBS_all$HQ01123        +HBS_all$HQ01124        +HBS_all$HQ01125        +HBS_all$HQ01126        +HBS_all$HQ01127
        #2.fruit
        HBS_all$EUR_HE0116_man     <- HBS_all$EUR_HE01161    +HBS_all$EUR_HE01162    +HBS_all$EUR_HE01163    +HBS_all$EUR_HE01164    
        HBS_all$EUR_HE0116_net_man <- HBS_all$EUR_HE01161_net+HBS_all$EUR_HE01162_net+HBS_all$EUR_HE01163_net+HBS_all$EUR_HE01164_net
        HBS_all$HQ0116_man         <- HBS_all$HQ01161        +HBS_all$HQ01162        +HBS_all$HQ01163        +HBS_all$HQ01164       
        #3.veggies
        HBS_all$EUR_HE0117_man     <- HBS_all$EUR_HE01171    +HBS_all$EUR_HE01172    +HBS_all$EUR_HE01173    +HBS_all$EUR_HE01174    +HBS_all$EUR_HE01175    +HBS_all$EUR_HE01176    
        HBS_all$EUR_HE0117_net_man <- HBS_all$EUR_HE01171_net+HBS_all$EUR_HE01172_net+HBS_all$EUR_HE01173_net+HBS_all$EUR_HE01174_net+HBS_all$EUR_HE01175_net+HBS_all$EUR_HE01176_net
        HBS_all$HQ0117_man         <- HBS_all$HQ01171        +HBS_all$HQ01172        +HBS_all$HQ01173        +HBS_all$HQ01174        +HBS_all$HQ01175        +HBS_all$HQ01176    
        
        #find out which countries are affected:
        unique(HBS_all$COUNTRY[which(!round(HBS_all$EUR_HE0112_man,digits=0)==round(HBS_all$EUR_HE0112,digits=0))])
        test<-(HBS_all$EUR_HE0112_man-HBS_all$EUR_HE0112)/HBS_all$EUR_HE0112
        unique(HBS_all$COUNTRY[which(abs(test)>0.2)])
        
        #set HE0112 value manually
        #1. meat
        HBS_all$EUR_HE0112     <- HBS_all$EUR_HE0112_man;     HBS_all$EUR_HE0112_man     <- NULL
        HBS_all$EUR_HE0112_net <- HBS_all$EUR_HE0112_net_man; HBS_all$EUR_HE0112_net_man <- NULL
        HBS_all$HQ0112         <- HBS_all$HQ0112_man;         HBS_all$HQ0112_man         <- NULL
        #2. fruit
        HBS_all$EUR_HE0116     <- HBS_all$EUR_HE0116_man;     HBS_all$EUR_HE0116_man     <- NULL
        HBS_all$EUR_HE0116_net <- HBS_all$EUR_HE0116_net_man; HBS_all$EUR_HE0116_net_man <- NULL
        HBS_all$HQ0116         <- HBS_all$HQ0116_man;         HBS_all$HQ0116_man         <- NULL
        #3. veggies
        HBS_all$EUR_HE0117     <- HBS_all$EUR_HE0117_man;     HBS_all$EUR_HE0117_man     <- NULL
        HBS_all$EUR_HE0117_net <- HBS_all$EUR_HE0117_net_man; HBS_all$EUR_HE0117_net_man <- NULL
        HBS_all$HQ0117         <- HBS_all$HQ0117_man;         HBS_all$HQ0117_man         <- NULL
      }
      
  ##############################################################################################################  
      # create dataframe without countries that lack quantity data (and UK as data is nonexistent/strange):
      HBS_all_q <- HBS_all[-which(is.na(HBS_all$HQ01124) | HBS_all$HQ01124==0|HBS_all$COUNTRY=="UK"),] # I added UK here as UK data on quantities seems to be off 

########################################################################################
    # 4 # Determine quintiles at the country level   (for equivalised hhs)    
      
    # All available countries  
      HBS_all$qn <- NA
      countries<-unique(HBS_all$COUNTRY)
      for(i in 1:length(countries)){
        quintiles<- wtd.quantile(HBS_all$EUR_HE00[which(HBS_all$COUNTRY==countries[i])]/HBS_all$HB062[which(HBS_all$COUNTRY==countries[i])], weights=HBS_all$HA10[which(HBS_all$COUNTRY==countries[i])], probs=seq(0, 1, 1/5), na.rm=TRUE)
        print(countries[i])
        print(quintiles)
        
        HBS_all$qn[which(HBS_all$EUR_HE00/HBS_all$HB062<=quintiles[2] &                                               HBS_all$COUNTRY==countries[i]) ]<-1
        HBS_all$qn[which(HBS_all$EUR_HE00/HBS_all$HB062<=quintiles[3] & HBS_all$EUR_HE00/HBS_all$HB062>quintiles[2] & HBS_all$COUNTRY==countries[i]) ]<-2
        HBS_all$qn[which(HBS_all$EUR_HE00/HBS_all$HB062<=quintiles[4] & HBS_all$EUR_HE00/HBS_all$HB062>quintiles[3] & HBS_all$COUNTRY==countries[i]) ]<-3
        HBS_all$qn[which(HBS_all$EUR_HE00/HBS_all$HB062<=quintiles[5] & HBS_all$EUR_HE00/HBS_all$HB062>quintiles[4] & HBS_all$COUNTRY==countries[i]) ]<-4
        HBS_all$qn[which(HBS_all$EUR_HE00/HBS_all$HB062> quintiles[5] & HBS_all$COUNTRY==countries[i]) ]<-5
        rm(quintiles)
      }
     
     
     # Only countries with quantity data
      HBS_all_q$qn <- NA
      countries<-unique(HBS_all_q$COUNTRY)
      for(i in 1:length(countries)){
        quintiles<- wtd.quantile(HBS_all_q$EUR_HE00[which(HBS_all_q$COUNTRY==countries[i])]/HBS_all_q$HB062[which(HBS_all_q$COUNTRY==countries[i])], weights=HBS_all_q$HA10[which(HBS_all_q$COUNTRY==countries[i])], probs=seq(0, 1, 1/5), na.rm=TRUE)
        print(countries[i])
        print(quintiles)
        
        HBS_all_q$qn[which(HBS_all_q$EUR_HE00/HBS_all_q$HB062<=quintiles[2] &                                               HBS_all_q$COUNTRY==countries[i]) ]<-1
        HBS_all_q$qn[which(HBS_all_q$EUR_HE00/HBS_all_q$HB062<=quintiles[3] & HBS_all_q$EUR_HE00/HBS_all_q$HB062>quintiles[2] & HBS_all_q$COUNTRY==countries[i]) ]<-2
        HBS_all_q$qn[which(HBS_all_q$EUR_HE00/HBS_all_q$HB062<=quintiles[4] & HBS_all_q$EUR_HE00/HBS_all_q$HB062>quintiles[3] & HBS_all_q$COUNTRY==countries[i]) ]<-3
        HBS_all_q$qn[which(HBS_all_q$EUR_HE00/HBS_all_q$HB062<=quintiles[5] & HBS_all_q$EUR_HE00/HBS_all_q$HB062>quintiles[4] & HBS_all_q$COUNTRY==countries[i]) ]<-4
        HBS_all_q$qn[which(HBS_all_q$EUR_HE00/HBS_all_q$HB062>quintiles[5] & HBS_all_q$COUNTRY==countries[i]) ]<-5
        rm(quintiles)
      }
      
      write.csv(HBS_all,"04-intmData/HBS_agg.csv",row.names = FALSE) 
      write.csv(HBS_all_q,"04-intmData/HBS_agg_q.csv",row.names = FALSE) 

########################################################################################
# 5 # Scenario analyses       
      # see separate file "meat_scenario_analysis.R"

########################################################################################
# 6 # Outputting results
      result_vars      = c("COUNTRY","EUR_HE00_q1","EUR_HE00_q5","Gini_diff","Gini_diff_DR","Gini_diff_TLQ","Gini_diff_TLQ_DR","Gini_diff_ULT","Gini_diff_ULT_DR","Gini_diff_VAT",
                           "Gini_diff_VAT_DR","burden_q1","burden_q5","burden_q1_DR","burden_q5_DR","burden_q1_TLQ","burden_q5_TLQ","burden_q1_TLQ_DR",
                           "burden_q5_TLQ_DR","burden_q1_ULT","burden_q5_ULT","burden_q1_ULT_DR","burden_q5_ULT_DR","burden_q1_VAT", 
                           "burden_q5_VAT","burden_q1_VAT_DR","burden_q5_VAT_DR","Theil_diff","Theil_diff_DR","Theil_diff_TLQ","Theil_diff_TLQ_DR","Theil_diff_ULT","Theil_diff_ULT_DR","Theil_diff_VAT",
                           "Theil_diff_VAT_DR","pop")
      agg_results_sc1 <- outputting_result_tables(HBS_all_VAT_5perc,result_vars)
      agg_results_sc2 <- outputting_result_tables(HBS_all_VAT_stdrate,result_vars)
      agg_results_sc3 <- outputting_result_tables(HBS_all_co2tax_50,result_vars)
      agg_results_sc4 <- outputting_result_tables(HBS_all_unit_tax,result_vars)
      agg_results_sc5 <- outputting_result_tables(HBS_all_social_cost,result_vars)
      
      wb_results <- createWorkbook()
      addWorksheet(wb_results, "HBS_all_VAT_5perc");addWorksheet(wb_results, "HBS_all_VAT_stdrate");addWorksheet(wb_results, "HBS_all_co2tax_50");addWorksheet(wb_results, "HBS_all_unit_tax");addWorksheet(wb_results, "HBS_all_social_cost")
      writeData(wb_results, "HBS_all_VAT_5perc", agg_results_sc1, rowNames = FALSE);writeData(wb_results, "HBS_all_VAT_stdrate", agg_results_sc2, rowNames = FALSE)
      writeData(wb_results, "HBS_all_co2tax_50", agg_results_sc3, rowNames = FALSE);writeData(wb_results, "HBS_all_unit_tax", agg_results_sc4, rowNames = FALSE)
      writeData(wb_results, "HBS_all_social_cost", agg_results_sc5, rowNames = FALSE)
      saveWorkbook(wb_results, file = "results_2010_data_230831.xlsx", overwrite = TRUE)

########################################################################################
# 7 # aggregate data for plotting

      agg_data_countries<-aggregate_to_quintiles(HBS_all) #this takes long, around 30 minutes. This is only needed for the plots. All other results are determined in step 5 and are contained in results_sc... dataframes.
      write.csv(agg_data_countries,"04-intmData/agg_data_countries.csv",row.names = FALSE)   
      agg_data_countries_q<-aggregate_to_quintiles(HBS_all_q) #this takes long, around 30 minutes. This is only needed for the plots. All other results are determined in step 5 and are contained in results_sc... dataframes.
      write.csv(agg_data_countries_q,"04-intmData/agg_data_countries_q.csv",row.names = FALSE)       
      
      # calculate consumption per representative hh | agg_data_countries shows consumption per household
        names(agg_data_countries)
        names(agg_data_countries_q)
        agg_data_countries_percap<-agg_data_countries
        agg_data_countries_q_percap<-agg_data_countries_q
        
        for(k in 3:length(names(agg_data_countries))){
          agg_data_countries_percap[[k]]<-agg_data_countries[[k]]/agg_data_countries[[length(names(agg_data_countries))]]  
          agg_data_countries_q_percap[[k]]<-agg_data_countries_q[[k]]/agg_data_countries_q[[length(names(agg_data_countries_q))]]  
        }
      
      # calculate EU aggregate (only necessary for the plots | all other results are in the results_sc... dataframes from step 5)
        if(switch_HBS_2010){pop_nr <-read.csv("01-Data/EUROSTAT_demography/2010_pop_data.csv",stringsAsFactors = F); pop_nr <- pop_nr[,c(2,6)]}
        if(switch_HBS_2015){pop_nr <-read.csv("01-Data/EUROSTAT_demography/2015_pop_data.csv",stringsAsFactors = F); pop_nr <- pop_nr[,c(1,3)]}
        
        names(pop_nr)<-c("COUNTRY","pop")
        agg_data_countries_percap<-merge(agg_data_countries_percap,pop_nr)  
        agg_data_countries_q_percap<-merge(agg_data_countries_q_percap,pop_nr)  
        
        var_names<-names(agg_data_countries_percap)[3:length(names(agg_data_countries))]
        var_names_q<-names(agg_data_countries_q_percap)[3:length(names(agg_data_countries_q))]
        EU_agg_data<-data.frame(matrix(NA, nrow = 5, ncol = length(names(agg_data_countries))))
        EU_agg_data_q<-data.frame(matrix(NA, nrow = 5, ncol = length(names(agg_data_countries_q))))
        names(EU_agg_data) <- c("COUNTRY","qn",var_names)
        names(EU_agg_data_q) <- c("COUNTRY","qn",var_names_q)
        EU_agg_data$COUNTRY<-rep("EU", times=5)
        EU_agg_data_q$COUNTRY<-rep("EU", times=5)
        EU_agg_data$qn<-c(1,2,3,4,5)
        EU_agg_data_q$qn<-c(1,2,3,4,5)
        
        for(i in 1:length(var_names)){
          eval(parse(text= paste0("EU_agg_data$",var_names[i],"<-calc_EU_avg_quint(agg_data_countries_percap,\"",var_names[i],"\")") ))
        } 
        for(i in 1:length(var_names_q)){
          eval(parse(text= paste0("EU_agg_data_q$",var_names_q[i],"<-calc_EU_avg_quint(agg_data_countries_q_percap,\"",var_names_q[i],"\")") ))
        } 
        write.csv(EU_agg_data,  "04-intmData/EU_agg_data.csv"  ,row.names = FALSE)
        write.csv(EU_agg_data_q,"04-intmData/EU_agg_data_q.csv",row.names = FALSE)

########################################################################################
# 8 # Descriptive statistics 

    # Do country level plots for relative and absolute expenditure (with OECD HH weights)
          country_plot_directory <- "plots/plots_230703/" # this directory has to be created beforehand if it doesn't exist yet
          do_country_lvl_plots_OECD(agg_data_countries_percap,country_plot_directory)
          
    # Do plots for weighted EU average (absolute and relative expenditure) (with OECD HH weights)
          country_plot_directory <- "plots/plots_2010_230720/" # this directory has to be created beforehand if it doesn't exist yet
          do_EU_agg_plots(EU_agg_data,country_plot_directory)
########################################################################################
     
            