calc_EU_avg2<-function(results_no_rec,var_name){  #var_name="Gini_diff" results_no_rec=a
  # calculate EU average
  
  total_pop              = sum(na.omit(as.numeric(as.character(results_no_rec$pop))))
  results_no_rec$weights = as.numeric(as.character(results_no_rec$pop))/total_pop
  eval(parse(text=paste0("EU_avg_Gini_diff       = sum(na.omit(as.numeric(as.character(results_no_rec$",var_name,"))*results_no_rec$weights))")))
  
  return(EU_avg_Gini_diff)
}

calc_EU_avg_quint<-function(results_no_rec,var_name){  #Gini_var_name="Gini_diff" results_no_rec=agg_data_countries_percap
  # calculate EU average for a df with aggregate values for quintiles and countries
  
  total_pop              = sum(na.omit(as.numeric(as.character(unique(results_no_rec$pop)))))
  results_no_rec$weights = as.numeric(as.character(results_no_rec$pop))/total_pop
  quintiles=unique(results_no_rec$qn)
  EU_avg_var<-rep(NA,times=5)
  for(i in 1:length(quintiles)){
    eval(parse(text=paste0("EU_avg_var[",i,"]       = sum(na.omit(as.numeric(as.character(results_no_rec$",var_name,"[which(results_no_rec$qn==",i,")]))*results_no_rec$weights[which(results_no_rec$qn==",i,")]))")))
  }
  
  return(EU_avg_var)
}

create_new_empty_cols<-function(df){
  df$Gini_BT       = NA
  df$Gini_AT       = NA
  df$Gini_AT_DR    = NA
  df$Gini_AT_TLQ   = NA
  df$Gini_AT_TLQ_DR= NA 
  df$Gini_AT_ULT   = NA
  df$Gini_AT_ULT_DR= NA 
  df$Gini_AT_VAT   = NA
  df$Gini_AT_VAT_DR= NA 
  
  df$Theil_BT       = NA
  df$Theil_AT       = NA
  df$Theil_AT_DR    = NA
  df$Theil_AT_TLQ   = NA
  df$Theil_AT_TLQ_DR= NA 
  df$Theil_AT_ULT   = NA
  df$Theil_AT_ULT_DR= NA 
  df$Theil_AT_VAT   = NA
  df$Theil_AT_VAT_DR= NA 
  
  df$total_tax_rev    = NA
  df$total_tax_rev_DR = NA
  df$VAT_rate         = NA
  df$VAT_rate_DR      = NA
  
  df$exp_mns_tax_TLQ      = NA
  df$exp_mns_tax_TLQ_DR   = NA
  df$exp_mns_tax_ULT      = NA
  df$exp_mns_tax_ULT_DR   = NA
  df$exp_mns_tax_VAT   = NA
  df$exp_mns_tax_VAT_DR= NA
  
  return(df)
}

distr_analysis<-function(df){
  names_countries = unique(df$COUNTRY)
  
  for(i in 1:length(names_countries)){ #loop over countries
    HBS_temp = df[which(df$COUNTRY==names_countries[i]),]
    print(names_countries[i])
    Gini_pre_tax      <- Gini(HBS_temp$EUR_HE00,weights=HBS_temp$HA10,na.rm=T)
    Gini_after_tax    <- Gini(HBS_temp$exp_mns_tax,weights=HBS_temp$HA10,na.rm=T)
    Gini_after_tax_DR <- Gini(HBS_temp$exp_mns_tax_DR,weights=HBS_temp$HA10,na.rm=T)
    
    total_rev          <- sum((HBS_temp$EUR_HE0112_net_AT    - HBS_temp$EUR_HE0112)*HBS_temp$HA10,na.rm=T) # total
    total_rev_DR       <- sum((HBS_temp$EUR_HE00 - HBS_temp$exp_mns_tax_DR)*HBS_temp$HA10,na.rm=T)
    
    total_rev_per_hh   <- total_rev   /sum(HBS_temp$HA10,na.rm=T) #per hh
    total_rev_DR_per_hh<- total_rev_DR/sum(HBS_temp$HA10,na.rm=T) 
    
    # Uniform lump-sum transfers
    HBS_temp$exp_mns_tax_ULT    <- HBS_temp$exp_mns_tax   +total_rev_per_hh    # expenditure minus tax + uniform lump-sum transfer
    HBS_temp$exp_mns_tax_ULT_DR <- HBS_temp$exp_mns_tax_DR+total_rev_DR_per_hh 
    
    Gini_after_tax_ULT <- Gini(HBS_temp$exp_mns_tax_ULT,weights=HBS_temp$HA10,na.rm=T)
    Gini_after_tax_ULT_DR <- Gini(HBS_temp$exp_mns_tax_ULT_DR,weights=HBS_temp$HA10,na.rm=T) 

    
    #VAT reduction 
    TGS=sum((HBS_temp$EUR_HE0116    + HBS_temp$EUR_HE0117)*HBS_temp$HA10,na.rm=T) # total gross spending on fruits and veggies
    TNS=sum((HBS_temp$EUR_HE0116_net+ HBS_temp$EUR_HE0117_net)*HBS_temp$HA10,na.rm=T) # total net spending on fruits and veggies
    VAT_red=100 * ((TGS-total_rev)/TNS-1) #corresponding VAT rate on fruits and vegetables if all revenue is recycled via cuts in VAT rates on fruits and veggies
    
    HBS_temp$exp_mns_tax_VAT <- HBS_temp$exp_mns_tax+ ((HBS_temp$EUR_HE0116+HBS_temp$EUR_HE0117) - (HBS_temp$EUR_HE0116_net+HBS_temp$EUR_HE0117_net)*(1+VAT_red/100))#expenditure minus tax + VAT savings
    Gini_VATred<- Gini(HBS_temp$exp_mns_tax_VAT,weights=HBS_temp$HA10,na.rm=T)
    
    #VAT reduction with DR 
    VAT_red_DR=100 * ((TGS-total_rev_DR)/TNS-1) #corresponding VAT rate on fruits and vegetables if all revenue is recycled via cuts in VAT rates on fruits and veggies
    fruit_n_veg_abs<-c("EUR_HE0116_net",    "EUR_HE0117_net")
    for(j in 1:length(fruit_n_veg_abs)){
      eval(parse(text=paste0("HBS_temp$",fruit_n_veg_abs[j],"_AT_DR=HBS_temp$",fruit_n_veg_abs[j],"*(1+eta_FnG*(VAT_red_DR/100-HBS_temp$VAT_FnV_2010/100)/(1+HBS_temp$VAT_FnV_2010/100)) *(1+(VAT_red_DR/100-HBS_temp$VAT_FnV_2010/100)/(1+HBS_temp$VAT_FnV_2010/100))"  )))#
    }
    HBS_temp$exp_mns_tax_VAT_DR <- HBS_temp$exp_mns_tax_DR + (HBS_temp$VAT_FnV_2010 - VAT_red_DR)/100*(HBS_temp$EUR_HE0116_net_AT_DR+HBS_temp$EUR_HE0117_net_AT_DR)/(1+VAT_red_DR/100)

    Gini_VATred_DR<- Gini(HBS_temp$exp_mns_tax_VAT_DR,weights=HBS_temp$HA10,na.rm=T)
    
    #Transfer to lowest quintile only
    total_rev_per_hh_LQ = total_rev/sum(HBS_temp$HA10[which(HBS_temp$qn=="1")],na.rm=T) #per hh in first quintile
    HBS_temp$exp_mns_tax_TLQ <- HBS_temp$exp_mns_tax #expenditure minus tax for all HHs
    HBS_temp$exp_mns_tax_TLQ[which(HBS_temp$qn=="1")] <- HBS_temp$exp_mns_tax_TLQ[which(HBS_temp$qn=="1")] + total_rev_per_hh_LQ #update with lump-sum transfers for lowest quintile
    Gini_after_tax_TLQ <- Gini(HBS_temp$exp_mns_tax_TLQ,weights=HBS_temp$HA10,na.rm=T)
    
    total_rev_DR_per_hh_LQ = total_rev_DR/sum(HBS_temp$HA10[which(HBS_temp$qn=="1")],na.rm=T) #per hh in first quintile
    HBS_temp$exp_mns_tax_TLQ_DR <- HBS_temp$exp_mns_tax_DR #expenditure minus tax for all HHs
    HBS_temp$exp_mns_tax_TLQ_DR[which(HBS_temp$qn=="1")] <- HBS_temp$exp_mns_tax_TLQ_DR[which(HBS_temp$qn=="1")] + total_rev_DR_per_hh_LQ#update with lump-sum transfers for lowest quintile
    Gini_after_tax_TLQ_DR <- Gini(HBS_temp$exp_mns_tax_TLQ_DR,weights=HBS_temp$HA10,na.rm=T)
    
    #adding Theil index for reviewer2
    Theil_BT    <- theil.wtd(HBS_temp$EUR_HE00,          weights=HBS_temp$HA10)
    Theil_AT    <- theil.wtd(HBS_temp$exp_mns_tax,       weights=HBS_temp$HA10)
    Theil_AT_DR <- theil.wtd(HBS_temp$exp_mns_tax_DR,    weights=HBS_temp$HA10)
    Theil_TLQ   <- theil.wtd(HBS_temp$exp_mns_tax_TLQ,   weights=HBS_temp$HA10)
    Theil_TLQ_DR<- theil.wtd(HBS_temp$exp_mns_tax_TLQ_DR,weights=HBS_temp$HA10)
    Theil_ULT   <- theil.wtd(HBS_temp$exp_mns_tax_ULT,   weights=HBS_temp$HA10)
    Theil_ULT_DR<- theil.wtd(HBS_temp$exp_mns_tax_ULT_DR,weights=HBS_temp$HA10)
    Theil_VAT   <- theil.wtd(HBS_temp$exp_mns_tax_VAT,   weights=HBS_temp$HA10)
    Theil_VAT_DR<- theil.wtd(HBS_temp$exp_mns_tax_VAT_DR,weights=HBS_temp$HA10)
    
    
    # write to data frame
    df$Theil_BT     [which(df$COUNTRY==names_countries[i])]<- Theil_BT   
    df$Theil_AT     [which(df$COUNTRY==names_countries[i])]<- Theil_AT 
    df$Theil_AT_DR  [which(df$COUNTRY==names_countries[i])]<- Theil_AT_DR
    df$Theil_AT_TLQ    [which(df$COUNTRY==names_countries[i])]<- Theil_TLQ
    df$Theil_AT_TLQ_DR [which(df$COUNTRY==names_countries[i])]<- Theil_TLQ_DR
    df$Theil_AT_ULT    [which(df$COUNTRY==names_countries[i])]<- Theil_ULT
    df$Theil_AT_ULT_DR [which(df$COUNTRY==names_countries[i])]<- Theil_ULT_DR
    df$Theil_AT_VAT    [which(df$COUNTRY==names_countries[i])]<- Theil_VAT
    df$Theil_AT_VAT_DR [which(df$COUNTRY==names_countries[i])]<- Theil_VAT_DR
    df$Gini_BT       [which(df$COUNTRY==names_countries[i])] = Gini_pre_tax
    df$Gini_AT       [which(df$COUNTRY==names_countries[i])] = Gini_after_tax
    df$Gini_AT_DR    [which(df$COUNTRY==names_countries[i])] = Gini_after_tax_DR
    df$Gini_AT_TLQ   [which(df$COUNTRY==names_countries[i])] = Gini_after_tax_TLQ
    df$Gini_AT_TLQ_DR[which(df$COUNTRY==names_countries[i])] = Gini_after_tax_TLQ_DR 
    df$Gini_AT_ULT   [which(df$COUNTRY==names_countries[i])] = Gini_after_tax_ULT
    df$Gini_AT_ULT_DR[which(df$COUNTRY==names_countries[i])] = Gini_after_tax_ULT_DR # demand response added 16.2.2022 DK
    df$Gini_AT_VAT   [which(df$COUNTRY==names_countries[i])] = Gini_VATred # demand response added 16.2.2022 DK
    df$Gini_AT_VAT_DR[which(df$COUNTRY==names_countries[i])] = Gini_VATred_DR # demand response added 16.2.2022 DK
    df$total_tax_rev [which(df$COUNTRY==names_countries[i])] = total_rev #save this variable for later DK 15.07.2022
    df$total_tax_rev_DR  [which(df$COUNTRY==names_countries[i])] = total_rev_DR
    df$VAT_rate      [which(df$COUNTRY==names_countries[i])] = VAT_red #
    df$VAT_rate_DR   [which(df$COUNTRY==names_countries[i])] = VAT_red_DR #
    
    df$exp_mns_tax_TLQ      [which(df$COUNTRY==names_countries[i])] = HBS_temp$exp_mns_tax_TLQ #
    df$exp_mns_tax_TLQ_DR   [which(df$COUNTRY==names_countries[i])] = HBS_temp$exp_mns_tax_TLQ_DR #
    df$exp_mns_tax_ULT      [which(df$COUNTRY==names_countries[i])] = HBS_temp$exp_mns_tax_ULT #
    df$exp_mns_tax_ULT_DR   [which(df$COUNTRY==names_countries[i])] = HBS_temp$exp_mns_tax_ULT_DR #
    df$exp_mns_tax_VAT   [which(df$COUNTRY==names_countries[i])] = HBS_temp$exp_mns_tax_VAT #
    df$exp_mns_tax_VAT_DR[which(df$COUNTRY==names_countries[i])] = HBS_temp$exp_mns_tax_VAT_DR #
    rm(Gini_pre_tax,Gini_after_tax,Gini_after_tax_DR,Gini_after_tax_TLQ,Gini_after_tax_TLQ_DR,Gini_after_tax_ULT,Gini_after_tax_ULT_DR,Gini_VATred,Gini_VATred_DR,HBS_temp, total_rev, total_rev_DR, VAT_red, VAT_red_DR)##stopped here 12.6.2023
  }
  df$Gini_diff        = df$Gini_AT        - df$Gini_BT
  df$Gini_diff_DR     = df$Gini_AT_DR     - df$Gini_BT
  df$Gini_diff_TLQ    = df$Gini_AT_TLQ    - df$Gini_BT
  df$Gini_diff_TLQ_DR = df$Gini_AT_TLQ_DR - df$Gini_BT
  df$Gini_diff_ULT    = df$Gini_AT_ULT    - df$Gini_BT
  df$Gini_diff_ULT_DR = df$Gini_AT_ULT_DR - df$Gini_BT 
  df$Gini_diff_VAT    = df$Gini_AT_VAT    - df$Gini_BT 
  df$Gini_diff_VAT_DR = df$Gini_AT_VAT_DR - df$Gini_BT 
  
  df$Theil_diff        = df$Theil_AT        - df$Theil_BT
  df$Theil_diff_DR     = df$Theil_AT_DR     - df$Theil_BT
  df$Theil_diff_TLQ    = df$Theil_AT_TLQ    - df$Theil_BT
  df$Theil_diff_TLQ_DR = df$Theil_AT_TLQ_DR - df$Theil_BT
  df$Theil_diff_ULT    = df$Theil_AT_ULT    - df$Theil_BT
  df$Theil_diff_ULT_DR = df$Theil_AT_ULT_DR - df$Theil_BT 
  df$Theil_diff_VAT    = df$Theil_AT_VAT    - df$Theil_BT 
  df$Theil_diff_VAT_DR = df$Theil_AT_VAT_DR - df$Theil_BT 
  
  rm(i,j,names_countries, TGS, TNS)
  
  return(df)
}

check_and_prepare_for_STATA<-function(df){#input: df=results_sc1
  countries <- unique(df$COUNTRY)
  
  for(i in 1:length(countries)){
    if(i==1){
      indexes<-min(which(df$COUNTRY==countries[i]))
    }
    if(i>1){
      indexes<-c(indexes,min(which(df$COUNTRY==countries[i])))
    }
  }
  
  
    # calculate results for table 1
    print(paste("Gini_diff:",calc_EU_avg2(df[indexes,],"Gini_diff")))
    print(paste("Gini_diff_DR:",calc_EU_avg2(df[indexes,],"Gini_diff_DR")))
    print(paste("Gini_diff_TLQ:",calc_EU_avg2(df[indexes,],"Gini_diff_TLQ")))
    print(paste("Gini_diff_TLQ_DR:",calc_EU_avg2(df[indexes,],"Gini_diff_TLQ_DR")))
    print(paste("Gini_diff_ULT:",calc_EU_avg2(df[indexes,],"Gini_diff_ULT")))
    print(paste("Gini_diff_ULT_DR:",calc_EU_avg2(df[indexes,],"Gini_diff_ULT_DR")))
    print(paste("Gini_diff_VAT:",calc_EU_avg2(df[indexes,],"Gini_diff_VAT")))
    print(paste("Gini_diff_VAT_DR:",calc_EU_avg2(df[indexes,],"Gini_diff_VAT_DR")))
  
  
  # rename variables as input for STATA
  names(df)[which(names(df)=="EUR_HE00")]          <-"EUR_HE00_BT"
  names(df)[which(names(df)=="exp_mns_tax")]       <-"EUR_HE00_AT"
  names(df)[which(names(df)=="exp_mns_tax_DR")]    <-"EUR_HE00_AT_DR"
  names(df)[which(names(df)=="exp_mns_tax_TLQ")]   <-"EUR_HE00_AT_TLQ"
  names(df)[which(names(df)=="exp_mns_tax_TLQ_DR")]<-"EUR_HE00_AT_TLQ_DR"
  names(df)[which(names(df)=="exp_mns_tax_ULT")]   <-"EUR_HE00_AT_ULT"
  names(df)[which(names(df)=="exp_mns_tax_ULT_DR")]<-"EUR_HE00_AT_ULT_DR"
  names(df)[which(names(df)=="exp_mns_tax_VAT")]   <-"EUR_HE00_AT_VAT"
  names(df)[which(names(df)=="exp_mns_tax_VAT_DR")]<-"EUR_HE00_AT_VAT_DR"
  
  return(df)
}

tax_burden<-function(df){#input: full micro dataset, output: full micro dataset +new columns with aggregate values
  df$tax_burden        <-df$EUR_HE00 - df$exp_mns_tax
  df$tax_burden_DR     <-df$EUR_HE00 - df$exp_mns_tax_DR
  df$tax_burden_TLQ    <-df$EUR_HE00 - df$exp_mns_tax_TLQ
  df$tax_burden_TLQ_DR <-df$EUR_HE00 - df$exp_mns_tax_TLQ_DR
  df$tax_burden_ULT    <-df$EUR_HE00 - df$exp_mns_tax_ULT
  df$tax_burden_ULT_DR <-df$EUR_HE00 - df$exp_mns_tax_ULT_DR
  df$tax_burden_VAT    <-df$EUR_HE00 - df$exp_mns_tax_VAT
  df$tax_burden_VAT_DR <-df$EUR_HE00 - df$exp_mns_tax_VAT_DR

  df$burden_q1<-NA
  df$burden_q5<-NA
  df$burden_q1_DR<-NA
  df$burden_q5_DR<-NA
  
  df$burden_q1_TLQ<-NA
  df$burden_q5_TLQ<-NA
  df$burden_q1_TLQ_DR<-NA
  df$burden_q5_TLQ_DR<-NA
  
  df$burden_q1_ULT<-NA
  df$burden_q5_ULT<-NA
  df$burden_q1_ULT_DR<-NA
  df$burden_q5_ULT_DR<-NA
  
  df$burden_q1_VAT<-NA
  df$burden_q5_VAT<-NA
  df$burden_q1_VAT_DR<-NA
  df$burden_q5_VAT_DR<-NA
  
  df$EUR_HE00_q1 <- NA
  df$EUR_HE00_q5 <- NA
  
  countries <- unique(df$COUNTRY)
  for(i in 1:length(countries)){
    df_temp = df[which(df$COUNTRY==countries[i]),]
    df_temp$burden_q1   <-weighted.mean(df_temp$tax_burden[which(df_temp$qn==1)],df_temp$HA10[which(df_temp$qn==1)])
    df_temp$burden_q5   <-weighted.mean(df_temp$tax_burden[which(df_temp$qn==5)],df_temp$HA10[which(df_temp$qn==5)])
    df_temp$burden_q1_DR<-weighted.mean(df_temp$tax_burden_DR[which(df_temp$qn==1)],df_temp$HA10[which(df_temp$qn==1)])
    df_temp$burden_q5_DR<-weighted.mean(df_temp$tax_burden_DR[which(df_temp$qn==5)],df_temp$HA10[which(df_temp$qn==5)])
    
    df_temp$burden_q1_TLQ   <-weighted.mean(df_temp$tax_burden_TLQ[which(df_temp$qn==1)],df_temp$HA10[which(df_temp$qn==1)])
    df_temp$burden_q5_TLQ   <-weighted.mean(df_temp$tax_burden_TLQ[which(df_temp$qn==5)],df_temp$HA10[which(df_temp$qn==5)])
    df_temp$burden_q1_TLQ_DR<-weighted.mean(df_temp$tax_burden_TLQ_DR[which(df_temp$qn==1)],df_temp$HA10[which(df_temp$qn==1)])
    df_temp$burden_q5_TLQ_DR<-weighted.mean(df_temp$tax_burden_TLQ_DR[which(df_temp$qn==5)],df_temp$HA10[which(df_temp$qn==5)])
    
    df_temp$burden_q1_ULT   <-weighted.mean(df_temp$tax_burden_ULT[which(df_temp$qn==1)],df_temp$HA10[which(df_temp$qn==1)])
    df_temp$burden_q5_ULT   <-weighted.mean(df_temp$tax_burden_ULT[which(df_temp$qn==5)],df_temp$HA10[which(df_temp$qn==5)])
    df_temp$burden_q1_ULT_DR<-weighted.mean(df_temp$tax_burden_ULT_DR[which(df_temp$qn==1)],df_temp$HA10[which(df_temp$qn==1)])
    df_temp$burden_q5_ULT_DR<-weighted.mean(df_temp$tax_burden_ULT_DR[which(df_temp$qn==5)],df_temp$HA10[which(df_temp$qn==5)])
    
    df_temp$burden_q1_VAT   <-weighted.mean(df_temp$tax_burden_VAT[which(df_temp$qn==1)],df_temp$HA10[which(df_temp$qn==1)])
    df_temp$burden_q5_VAT   <-weighted.mean(df_temp$tax_burden_VAT[which(df_temp$qn==5)],df_temp$HA10[which(df_temp$qn==5)])
    df_temp$burden_q1_VAT_DR<-weighted.mean(df_temp$tax_burden_VAT_DR[which(df_temp$qn==1)],df_temp$HA10[which(df_temp$qn==1)])
    df_temp$burden_q5_VAT_DR<-weighted.mean(df_temp$tax_burden_VAT_DR[which(df_temp$qn==5)],df_temp$HA10[which(df_temp$qn==5)])
    
    df_temp$EUR_HE00_q1<-weighted.mean(df_temp$EUR_HE00[which(df_temp$qn==1)],df_temp$HA10[which(df_temp$qn==1)])
    df_temp$EUR_HE00_q5<-weighted.mean(df_temp$EUR_HE00[which(df_temp$qn==5)],df_temp$HA10[which(df_temp$qn==5)])
    
    print(paste(countries[i],"Burden Q1:",       unique(df_temp$burden_q1),       "Burden Q5:",unique(df_temp$burden_q5)))
    print(paste(countries[i],"Burden Q1 DR:",    unique(df_temp$burden_q1_DR),    "Burden Q5 DR:",unique(df_temp$burden_q5_DR)))
    print(paste(countries[i],"Burden Q1 TLQ:",   unique(df_temp$burden_q1_TLQ),   "Burden Q5 TLQ:",unique(df_temp$burden_q5_TLQ)))
    print(paste(countries[i],"Burden Q1 TLQ DR:",unique(df_temp$burden_q1_TLQ_DR),"Burden Q5 TLQ DR:",unique(df_temp$burden_q5_TLQ_DR)))
    print(paste(countries[i],"Burden Q1 ULT:",   unique(df_temp$burden_q1_ULT),   "Burden Q5 ULT:",unique(df_temp$burden_q5_ULT)))
    print(paste(countries[i],"Burden Q1 ULT DR:",unique(df_temp$burden_q1_ULT_DR),"Burden Q5 ULT DR:",unique(df_temp$burden_q5_ULT_DR)))
    print(paste(countries[i],"Burden Q1 VAT:",   unique(df_temp$burden_q1_VAT),   "Burden Q5 VAT:",unique(df_temp$burden_q5_VAT)))
    print(paste(countries[i],"Burden Q1 VAT DR:",unique(df_temp$burden_q1_VAT_DR),"Burden Q5 VAT DR:",unique(df_temp$burden_q5_VAT_DR)))
    
    df$burden_q1          [which(df$COUNTRY==countries[i])] = df_temp$burden_q1
    df$burden_q5          [which(df$COUNTRY==countries[i])] = df_temp$burden_q5
    df$burden_q1_DR       [which(df$COUNTRY==countries[i])] = df_temp$burden_q1_DR
    df$burden_q5_DR       [which(df$COUNTRY==countries[i])] = df_temp$burden_q5_DR
    
    df$burden_q1_TLQ      [which(df$COUNTRY==countries[i])] = df_temp$burden_q1_TLQ
    df$burden_q5_TLQ      [which(df$COUNTRY==countries[i])] = df_temp$burden_q5_TLQ
    df$burden_q1_TLQ_DR   [which(df$COUNTRY==countries[i])] = df_temp$burden_q1_TLQ_DR
    df$burden_q5_TLQ_DR   [which(df$COUNTRY==countries[i])] = df_temp$burden_q5_TLQ_DR
    
    df$burden_q1_ULT      [which(df$COUNTRY==countries[i])] = df_temp$burden_q1_ULT
    df$burden_q5_ULT      [which(df$COUNTRY==countries[i])] = df_temp$burden_q5_ULT
    df$burden_q1_ULT_DR   [which(df$COUNTRY==countries[i])] = df_temp$burden_q1_ULT_DR
    df$burden_q5_ULT_DR   [which(df$COUNTRY==countries[i])] = df_temp$burden_q5_ULT_DR
    
    df$burden_q1_VAT      [which(df$COUNTRY==countries[i])] = df_temp$burden_q1_VAT
    df$burden_q5_VAT      [which(df$COUNTRY==countries[i])] = df_temp$burden_q5_VAT
    df$burden_q1_VAT_DR   [which(df$COUNTRY==countries[i])] = df_temp$burden_q1_VAT_DR
    df$burden_q5_VAT_DR   [which(df$COUNTRY==countries[i])] = df_temp$burden_q5_VAT_DR
    
    df$EUR_HE00_q1   [which(df$COUNTRY==countries[i])] = df_temp$EUR_HE00_q1
    df$EUR_HE00_q5   [which(df$COUNTRY==countries[i])] = df_temp$EUR_HE00_q5
    
    rm(df_temp)
  }
  
  for(i in 1:length(countries)){
    if(i==1){
      indexes<-min(which(df$COUNTRY==countries[i]))
    }
    if(i>1){
      indexes<-c(indexes,min(which(df$COUNTRY==countries[i])))
    }
  }
  
  #need to feed a matrix to calc_EU_avg2 that only has one row per country, so I calculate "indexes" to subset df
  
  df$EU_burden_q1        <- calc_EU_avg2(df[indexes,],"burden_q1");        df$EU_burden_q5    <- calc_EU_avg2(df[indexes,],"burden_q5")  
  df$EU_burden_q1_DR     <- calc_EU_avg2(df[indexes,],"burden_q1_DR");     df$EU_burden_q5_DR <- calc_EU_avg2(df[indexes,],"burden_q5_DR")  
  
  df$EU_burden_q1_TLQ    <- calc_EU_avg2(df[indexes,],"burden_q1_TLQ");    df$EU_burden_q5_TLQ    <- calc_EU_avg2(df[indexes,],"burden_q5_TLQ")  
  df$EU_burden_q1_TLQ_DR <- calc_EU_avg2(df[indexes,],"burden_q1_TLQ_DR"); df$EU_burden_q5_TLQ_DR <- calc_EU_avg2(df[indexes,],"burden_q5_TLQ_DR") 
  
  df$EU_burden_q1_ULT    <- calc_EU_avg2(df[indexes,],"burden_q1_ULT");    df$EU_burden_q5_ULT    <- calc_EU_avg2(df[indexes,],"burden_q5_ULT")  
  df$EU_burden_q1_ULT_DR <- calc_EU_avg2(df[indexes,],"burden_q1_ULT_DR"); df$EU_burden_q5_ULT_DR <- calc_EU_avg2(df[indexes,],"burden_q5_ULT_DR") 
  
  df$EU_burden_q1_VAT    <- calc_EU_avg2(df[indexes,],"burden_q1_VAT");    df$EU_burden_q5_VAT    <- calc_EU_avg2(df[indexes,],"burden_q5_VAT")  
  df$EU_burden_q1_VAT_DR <- calc_EU_avg2(df[indexes,],"burden_q1_VAT_DR"); df$EU_burden_q5_VAT_DR <- calc_EU_avg2(df[indexes,],"burden_q5_VAT_DR") 
  
  df$EU_EUR_HE00_q1 <- calc_EU_avg2(df[indexes,],"EUR_HE00_q1"); df$EU_EUR_HE00_q5 <- calc_EU_avg2(df[indexes,],"EUR_HE00_q5")
  
  print(paste("EU","Burden Q1:",       unique(df$EU_burden_q1),       "Burden Q5:",       unique(df$EU_burden_q5)))
  print(paste("EU","Burden Q1 DR:",    unique(df$EU_burden_q1_DR),    "Burden Q5 DR:",    unique(df$EU_burden_q5_DR)))
  print(paste("EU","Burden Q1 TLQ:",   unique(df$EU_burden_q1_TLQ),   "Burden Q5 TLQ:",   unique(df$EU_burden_q5_TLQ)))
  print(paste("EU","Burden Q1 TLQ DR:",unique(df$EU_burden_q1_TLQ_DR),"Burden Q5 TLQ DR:",unique(df$EU_burden_q5_TLQ_DR)))
  print(paste("EU","Burden Q1 ULT:",   unique(df$EU_burden_q1_ULT),   "Burden Q5 ULT:",   unique(df$EU_burden_q5_ULT)))
  print(paste("EU","Burden Q1 ULT DR:",unique(df$EU_burden_q1_ULT_DR),"Burden Q5 ULT DR:",unique(df$EU_burden_q5_ULT_DR)))
  print(paste("EU","Burden Q1 VAT:",   unique(df$EU_burden_q1_VAT),   "Burden Q5 VAT:",   unique(df$EU_burden_q5_VAT)))
  print(paste("EU","Burden Q1 VAT DR:",unique(df$EU_burden_q1_VAT_DR),"Burden Q5 VAT DR:",unique(df$EU_burden_q5_VAT_DR)))
  print(paste("EU","EUR_HE00 Q1:",unique(df$EU_EUR_HE00_q1),"EUR_HE00 Q1:",unique(df$EU_EUR_HE00_q5)))
  
  return(df)
}

aggregate_to_quintiles<-function(df){  
  
  countries <- unique(df$COUNTRY)
  
  # set up empty data frame for aggregate data
  
  for(i in 1:length(countries)){
    if(i==1){
      countries_rep5 <- rep(countries[i],times=5)
    }
    if(i>1){
      countries_rep5 <- c(countries_rep5,rep(countries[i],times=5))
    }
  }
  
  quintiles <- rep(c(1,2,3,4,5),times=length(countries))
  agg_data_countries <- data.frame(cbind(countries_rep5,quintiles))
  names(agg_data_countries)<-c("COUNTRY","qn")
  
  
  indexes=c(grep("EUR_", names(df)),grep("HQ", names(df)))
  #indexes <- indexes[-which(indexes %in% grep("net", names(df)) )]
  var_name <- names(df)[indexes]
  var_name <- c(var_name,"HB062")
  rm(indexes)
  
  for(i in 1:length(var_name)){ # add empty columns for aggregate variables
    print(var_name[i])
    agg_data_countries<-cbind(agg_data_countries,rep(NA,times=length(agg_data_countries$COUNTRY))) 
    names(agg_data_countries)[dim(agg_data_countries)[2]]<-var_name[i] 
  }
  
  # aggregate variables
  
  qn<-c(1,2,3,4,5)
  
  for(i in 1:length(countries)){ #(takes like half an hour)
    print(countries[i])
    for(j in 1:length(var_name)){
      print(var_name[j])
      for(k in 1:length(qn)){
        eval(parse(text= paste0("agg_data_countries$",var_name[j],"[which(agg_data_countries$COUNTRY==\"",countries[i],"\" & agg_data_countries$qn==\"",qn[k],"\")]<-weighted.mean(df$",var_name[j],"[which(df$COUNTRY==\"",countries[i],"\" & df$qn==\"",qn[k],"\")],df$HA10[which(df$COUNTRY==\"",countries[i],"\" & df$qn==\"",qn[k],"\")])")  ))
      }
    }
  }
  return(agg_data_countries)
}

outputting_result_tables<-function(df,result_vars){#df=HBS_all_VAT_5perc
  
  df<-df[which(names(df) %in% result_vars)]
  
  countries <- unique(df$COUNTRY)
  
  for(i in 1:length(countries)){
    if(i==1){
      indexes<-min(which(df$COUNTRY==countries[i]))
    }
    if(i>1){
      indexes<-c(indexes,min(which(df$COUNTRY==countries[i])))
    }
  }

  df_new<-df[indexes,]
  variable_names <- names(df_new)
  new_line <- rep(NA,times=dim(df)[2])
  for(i in 3:dim(df)[2]){
    print(paste(variable_names[i],calc_EU_avg2(df_new,variable_names[i])))
    new_line[i]<-calc_EU_avg2(df_new,variable_names[i])
  }
  
  df_new<-rbind(df_new,new_line)
  df_new$COUNTRY[dim(df_new)[1]] <- "EU"
  df_new$pop[dim(df_new)[1]] <- sum(as.numeric(as.character(df_new$pop[c(1:dim(df_new)[1]-1)])))
  return(df_new)
  
}

calc_GHG_red <- function(df_agg,scenario,tr){
  
  df_agg$price_beef_pq = df_agg$EUR_HE01121/df_agg$HQ01121
  df_agg$price_pork_pq = df_agg$EUR_HE01122/df_agg$HQ01122
  df_agg$price_shpg_pq = df_agg$EUR_HE01123/df_agg$HQ01123
  df_agg$price_poul_pq = df_agg$EUR_HE01124/df_agg$HQ01124
  df_agg$price_drsl_pq = df_agg$EUR_HE01125/df_agg$HQ01125
  df_agg$price_oprs_pq = df_agg$EUR_HE01126/df_agg$HQ01126
  df_agg$price_othr_pq = df_agg$EUR_HE01127/df_agg$HQ01127
  
  if(length(which(is.infinite(df_agg$price_beef_pq)))>0){df_agg <- df_agg[-which(is.infinite(df_agg$price_beef_pq)),]}
  if(length(which(is.infinite(df_agg$price_pork_pq)))>0){df_agg <- df_agg[-which(is.infinite(df_agg$price_pork_pq)),]}
  if(length(which(is.infinite(df_agg$price_shpg_pq)))>0){df_agg <- df_agg[-which(is.infinite(df_agg$price_shpg_pq)),]}
  if(length(which(is.infinite(df_agg$price_poul_pq)))>0){df_agg <- df_agg[-which(is.infinite(df_agg$price_poul_pq)),]}
  if(length(which(is.infinite(df_agg$price_drsl_pq)))>0){df_agg <- df_agg[-which(is.infinite(df_agg$price_drsl_pq)),]}
  if(length(which(is.infinite(df_agg$price_oprs_pq)))>0){df_agg <- df_agg[-which(is.infinite(df_agg$price_oprs_pq)),]}
  if(length(which(is.infinite(df_agg$price_othr_pq)))>0){df_agg <- df_agg[-which(is.infinite(df_agg$price_othr_pq)),]}
  
  if(length(which(is.na(df_agg$price_beef_pq)))>0){df_agg <- df_agg[-which(is.na(df_agg$price_beef_pq)),]}
  if(length(which(is.na(df_agg$price_pork_pq)))>0){df_agg <- df_agg[-which(is.na(df_agg$price_pork_pq)),]}
  if(length(which(is.na(df_agg$price_shpg_pq)))>0){df_agg <- df_agg[-which(is.na(df_agg$price_shpg_pq)),]}
  if(length(which(is.na(df_agg$price_poul_pq)))>0){df_agg <- df_agg[-which(is.na(df_agg$price_poul_pq)),]}
  if(length(which(is.na(df_agg$price_drsl_pq)))>0){df_agg <- df_agg[-which(is.na(df_agg$price_drsl_pq)),]}
  if(length(which(is.na(df_agg$price_oprs_pq)))>0){df_agg <- df_agg[-which(is.na(df_agg$price_oprs_pq)),]}
  if(length(which(is.na(df_agg$price_othr_pq)))>0){df_agg <- df_agg[-which(is.na(df_agg$price_othr_pq)),]}
  
  df_agg$COUNTRY[which(is.infinite(df_agg$EUR_HE01127_net_tax_rel))]
  df_agg$COUNTRY[which(df_agg$EUR_HE01127_net_tax_rel==0)]
  if(switch_HBS_2010){
    VAT_2010 <- read.csv("inputData/VAT_2010_FnV.csv" )
    df_agg      = merge(df_agg,VAT_2010)
    #HBS_all      = merge(HBS_all,VAT_2010)
  }
  if(switch_HBS_2010){pop_nr <-read.csv("inputData/EUROSTAT_demography/demo_pjan_1_Data.csv",stringsAsFactors = F); pop_nr <- pop_nr[,c(2,6)]}
  names(pop_nr) <- c("COUNTRY","pop")
  df_agg<-merge(df_agg,pop_nr)  
  
  # here need to distinguish by case 
  if(scenario==1){
    df_agg$price_beef_pq_AT = df_agg$price_beef_pq*(1+df_agg$VAT_meat_2010/100+tr/100)/(1+df_agg$VAT_meat_2010/100)
    df_agg$price_pork_pq_AT = df_agg$price_pork_pq*(1+df_agg$VAT_meat_2010/100+tr/100)/(1+df_agg$VAT_meat_2010/100)
    df_agg$price_shpg_pq_AT = df_agg$price_shpg_pq*(1+df_agg$VAT_meat_2010/100+tr/100)/(1+df_agg$VAT_meat_2010/100)
    df_agg$price_poul_pq_AT = df_agg$price_poul_pq*(1+df_agg$VAT_meat_2010/100+tr/100)/(1+df_agg$VAT_meat_2010/100)
    df_agg$price_drsl_pq_AT = df_agg$price_drsl_pq*(1+df_agg$VAT_meat_2010/100+tr/100)/(1+df_agg$VAT_meat_2010/100)
    df_agg$price_oprs_pq_AT = df_agg$price_oprs_pq*(1+df_agg$VAT_meat_2010/100+tr/100)/(1+df_agg$VAT_meat_2010/100)
    df_agg$price_othr_pq_AT = df_agg$price_othr_pq*(1+df_agg$VAT_meat_2010/100+tr/100)/(1+df_agg$VAT_meat_2010/100)
  }
  if(scenario==2){
    df_agg$price_beef_pq_AT = df_agg$price_beef_pq*(1+df_agg$VAT_meat_2010/100+(df_agg$VAT_std_2010-df_agg$VAT_meat_2010)/100)/(1+df_agg$VAT_meat_2010/100)
    df_agg$price_pork_pq_AT = df_agg$price_pork_pq*(1+df_agg$VAT_meat_2010/100+(df_agg$VAT_std_2010-df_agg$VAT_meat_2010)/100)/(1+df_agg$VAT_meat_2010/100)
    df_agg$price_shpg_pq_AT = df_agg$price_shpg_pq*(1+df_agg$VAT_meat_2010/100+(df_agg$VAT_std_2010-df_agg$VAT_meat_2010)/100)/(1+df_agg$VAT_meat_2010/100)
    df_agg$price_poul_pq_AT = df_agg$price_poul_pq*(1+df_agg$VAT_meat_2010/100+(df_agg$VAT_std_2010-df_agg$VAT_meat_2010)/100)/(1+df_agg$VAT_meat_2010/100)
    df_agg$price_drsl_pq_AT = df_agg$price_drsl_pq*(1+df_agg$VAT_meat_2010/100+(df_agg$VAT_std_2010-df_agg$VAT_meat_2010)/100)/(1+df_agg$VAT_meat_2010/100)
    df_agg$price_oprs_pq_AT = df_agg$price_oprs_pq*(1+df_agg$VAT_meat_2010/100+(df_agg$VAT_std_2010-df_agg$VAT_meat_2010)/100)/(1+df_agg$VAT_meat_2010/100)
    df_agg$price_othr_pq_AT = df_agg$price_othr_pq*(1+df_agg$VAT_meat_2010/100+(df_agg$VAT_std_2010-df_agg$VAT_meat_2010)/100)/(1+df_agg$VAT_meat_2010/100)
  }
  if(scenario == 3){
    df_agg$price_beef_pq_AT = df_agg$price_beef_pq+tax_co2*co2_intensities[1]#*(1+df_agg$VAT_meat_2010/100+df_agg$EUR_HE01121_net_tax_rel)/(1+df_agg$VAT_meat_2010/100)
    df_agg$price_pork_pq_AT = df_agg$price_pork_pq+tax_co2*co2_intensities[2]#*(1+df_agg$VAT_meat_2010/100+df_agg$EUR_HE01122_net_tax_rel)/(1+df_agg$VAT_meat_2010/100)
    df_agg$price_shpg_pq_AT = df_agg$price_shpg_pq+tax_co2*co2_intensities[3]#*(1+df_agg$VAT_meat_2010/100+df_agg$EUR_HE01123_net_tax_rel)/(1+df_agg$VAT_meat_2010/100)
    df_agg$price_poul_pq_AT = df_agg$price_poul_pq+tax_co2*co2_intensities[4]#*(1+df_agg$VAT_meat_2010/100+df_agg$EUR_HE01124_net_tax_rel)/(1+df_agg$VAT_meat_2010/100)
    df_agg$price_drsl_pq_AT = df_agg$price_drsl_pq+tax_co2*co2_intensities[5]#*(1+df_agg$VAT_meat_2010/100+df_agg$EUR_HE01125_net_tax_rel)/(1+df_agg$VAT_meat_2010/100)
    df_agg$price_oprs_pq_AT = df_agg$price_oprs_pq+tax_co2*co2_intensities[6]#*(1+df_agg$VAT_meat_2010/100+df_agg$EUR_HE01126_net_tax_rel)/(1+df_agg$VAT_meat_2010/100)
    df_agg$price_othr_pq_AT = df_agg$price_othr_pq+tax_co2*co2_intensities[7]#*(1+df_agg$VAT_meat_2010/100+df_agg$EUR_HE01127_net_tax_rel)/(1+df_agg$VAT_meat_2010/100)
  }
  if(scenario==4){
    df_agg$price_beef_pq_AT = df_agg$price_beef_pq+unit_tax#*(1+df_agg$VAT_meat_2010/100+df_agg$EUR_HE01121_net_tax_rel)/(1+df_agg$VAT_meat_2010/100)
    df_agg$price_pork_pq_AT = df_agg$price_pork_pq+unit_tax#*(1+df_agg$VAT_meat_2010/100+df_agg$EUR_HE01122_net_tax_rel)/(1+df_agg$VAT_meat_2010/100)
    df_agg$price_shpg_pq_AT = df_agg$price_shpg_pq+unit_tax#*(1+df_agg$VAT_meat_2010/100+df_agg$EUR_HE01123_net_tax_rel)/(1+df_agg$VAT_meat_2010/100)
    df_agg$price_poul_pq_AT = df_agg$price_poul_pq+unit_tax#*(1+df_agg$VAT_meat_2010/100+df_agg$EUR_HE01124_net_tax_rel)/(1+df_agg$VAT_meat_2010/100)
    df_agg$price_drsl_pq_AT = df_agg$price_drsl_pq+unit_tax#*(1+df_agg$VAT_meat_2010/100+df_agg$EUR_HE01125_net_tax_rel)/(1+df_agg$VAT_meat_2010/100)
    df_agg$price_oprs_pq_AT = df_agg$price_oprs_pq+unit_tax#*(1+df_agg$VAT_meat_2010/100+df_agg$EUR_HE01126_net_tax_rel)/(1+df_agg$VAT_meat_2010/100)
    df_agg$price_othr_pq_AT = df_agg$price_othr_pq+unit_tax#*(1+df_agg$VAT_meat_2010/100+df_agg$EUR_HE01127_net_tax_rel)/(1+df_agg$VAT_meat_2010/100)
  }
  #new: (calc after tax and demand response quantities)
  df_agg$HQ01121_AT <- df_agg$EUR_HE01121_net_AT_DR/df_agg$price_beef_pq_AT
  df_agg$HQ01122_AT <- df_agg$EUR_HE01122_net_AT_DR/df_agg$price_pork_pq_AT
  df_agg$HQ01123_AT <- df_agg$EUR_HE01123_net_AT_DR/df_agg$price_shpg_pq_AT
  df_agg$HQ01124_AT <- df_agg$EUR_HE01124_net_AT_DR/df_agg$price_poul_pq_AT
  df_agg$HQ01125_AT <- df_agg$EUR_HE01125_net_AT_DR/df_agg$price_drsl_pq_AT
  df_agg$HQ01126_AT <- df_agg$EUR_HE01126_net_AT_DR/df_agg$price_oprs_pq_AT
  df_agg$HQ01127_AT <- df_agg$EUR_HE01127_net_AT_DR/df_agg$price_othr_pq_AT
  
  df_agg$dem_red_perc_beef <- (1-df_agg$HQ01121_AT/df_agg$HQ01121)*100
  df_agg$dem_red_perc_pork <- (1-df_agg$HQ01122_AT/df_agg$HQ01122)*100
  df_agg$dem_red_perc_shpg <- (1-df_agg$HQ01123_AT/df_agg$HQ01123)*100
  df_agg$dem_red_perc_poul <- (1-df_agg$HQ01124_AT/df_agg$HQ01124)*100
  df_agg$dem_red_perc_drsl <- (1-df_agg$HQ01125_AT/df_agg$HQ01125)*100
  df_agg$dem_red_perc_oprs <- (1-df_agg$HQ01126_AT/df_agg$HQ01126)*100
  df_agg$dem_red_perc_othr <- (1-df_agg$HQ01127_AT/df_agg$HQ01127)*100

  calc_EU_avg_quint(df_agg,"dem_red_perc_beef")[1]
  calc_EU_avg_quint(df_agg,"dem_red_perc_pork")[1]
  calc_EU_avg_quint(df_agg,"dem_red_perc_shpg")[1]
  calc_EU_avg_quint(df_agg,"dem_red_perc_poul")[1]
  calc_EU_avg_quint(df_agg,"dem_red_perc_drsl")[1]
  calc_EU_avg_quint(df_agg,"dem_red_perc_oprs")[1]
  calc_EU_avg_quint(df_agg,"dem_red_perc_othr")[1]
  
  labels <- c("bovine (mainly beef)", "pork", "sheep & goat", "poultry", "dried, smoked or salted", "other preserved", "other fresh")
  values <- c(calc_EU_avg_quint(df_agg,"dem_red_perc_beef")[1],calc_EU_avg_quint(df_agg,"dem_red_perc_pork")[1],calc_EU_avg_quint(df_agg,"dem_red_perc_shpg")[1],
              calc_EU_avg_quint(df_agg,"dem_red_perc_poul")[1],calc_EU_avg_quint(df_agg,"dem_red_perc_drsl")[1],calc_EU_avg_quint(df_agg,"dem_red_perc_oprs")[1],
              calc_EU_avg_quint(df_agg,"dem_red_perc_othr")[1])
  #total_BT <- c(total_beef_BT,total_pork_BT,total_shpg_BT,total_poul_BT,total_drsl_BT,total_oprs_BT,total_othr_BT)
  #total_AT <- c(total_beef_AT,total_pork_AT,total_shpg_AT,total_poul_AT,total_drsl_AT,total_oprs_AT,total_othr_AT)
  
  dem_red <- data.frame(cbind(labels,values))
  return(dem_red)
}