cap log close
clear all

* Working folder
cd "\\main.oecd.org\Homedir1\Cai_M\Projects_R\meat\"

* Full household level results (all years and scenarios)
use "intm_data\full_hh_results.dta", clear 

* Reshape the recycling schemes to long
drop ha09 hb* _merge
ren ha04 hhid
ren ha10 weight
ren eur_he00_bt base
reshape long eur_he00_, i( year scenario country hhid weight quintile base ) j( varname ) string
ren eur_he00_ value

* Strings are more difficult to handle
encode varname, g( recycle )
encode country, g( country1 )
drop country
ren country1 country
label save recycle using "intm_data\recycle_labels.do", replace
label save country1 using "intm_data\country_labels.do", replace

* Save for further processing
drop varname
compress
save "intm_Data\temp.dta", replace



*** Gini coefficient ***

* Country by country and scenario by scenario, test if the Gini coefficient differs from baseline levels
* About digini: 1) seemingly it does not support if conditions, so I will loop over countries, scenarios and recycling schemes; 2) it does not appear to store the results we want, so I will write the results to a log file and then process that file in R. There must be more elegant ways of doing this though...


*** Mean burden by quintile ***

* Within the loop structure of the Gini coefficient tests, compute mean burden for each quintile with and without the policy change. Test the statistical significance of the difference

* Create matrix to store results
use "intm_data\temp.dta", clear
keep year country scenario recycle quintile
duplicates drop
fillin year country scenario recycle quintile
matrix results = J( _N, 11, . )
matrix colnames results = year country scenario recycle quintile base_mean base_se value_mean value_se diff_mean diff_se


*** Loop over years, countries, scenarios and recycling schemes ***

* Indices
levelsof year
local all_years = r( levels )
levelsof country
local all_countries = r( levels )
levelsof scenario
local all_scenarios = r( levels )
levelsof recycle
local all_recycle= r( levels )
local i = 1

* Output to log file
log using "intm_data\test_log.txt", text replace

* For some year-country-scenario-recycle combinations we have no data (e.g. no quantity data).
* Avoid any 0 observations error when looping

* Loop
foreach yr of local all_years {
	foreach cntry of local all_countries {
		foreach scnr of local all_scenarios {
			foreach cycl of local all_recycle {
				dis "**********"	
				dis "`yr'"
				dis "`cntry'"
				dis "`scnr'"
				dis "`cycl'"


				*** Relevant set of results ***

				use "intm_data\temp.dta", clear
				keep if year == `yr' & country == `cntry' & scenario == `scnr' & recycle == `cycl'
				cap noisily svyset _n [ pw = weight ]        // set survey design veriables


				*** Gini coefficient and test ***

				cap noisily digini base value


				*** Mean burden by quintile ***

				foreach qntl of numlist 1/5 {

					* Identifiers

					local j = colnumb( results, "year" )
					matrix results[ `i', `j' ] = `yr'

					local j = colnumb( results, "country" )
					matrix results[ `i', `j' ] = `cntry'

					local j = colnumb( results, "scenario" )
					matrix results[ `i', `j' ] = `scnr'

					local j = colnumb( results, "recycle" )
					matrix results[ `i', `j' ] = `cycl'

					local j = colnumb( results, "quintile" )
					matrix results[ `i', `j' ] = `qntl'

					* Mean by quintile
					cap noisily svy: mean base value if quintile == `qntl'

					cap noisily local j = colnumb( results, "base_mean" )	
					cap noisily local r = colnumb( e( b ), "base" )	
					cap noisily matrix results[ `i', `j' ] = e( b )[ 1, `r' ]

					cap noisily local j = colnumb( results, "base_se" )	
					cap noisily local r = colnumb( e( V ), "base" )	
					cap noisily matrix results[ `i', `j' ] = sqrt( e(V)[  `r', `r' ] )

					cap noisily local j = colnumb( results, "value_mean" )	
					cap noisily local r = colnumb( e( b ), "value" )	
					cap noisily matrix results[ `i', `j' ] = e( b )[ 1, `r' ]

					cap noisily local j = colnumb( results, "value_se" )	
					cap noisily local r = colnumb( e( V ), "value" )	
					cap noisily matrix results[ `i', `j' ] = sqrt( e(V)[  `r', `r' ] )

					* Test difference
					cap noisily lincom [ base ] - [ value ]  // Cheked this with David

					cap noisily local j = colnumb( results, "diff_mean" )
					cap noisily matrix results[ `i', `j' ] = r( estimate )

					cap noisily local j = colnumb( results, "diff_se" )
					cap noisily matrix results[ `i', `j' ] = r( se )
					
					
					local i = `i' + 1
					clear results
				}
			}
		}
	}
}
log close



use intm_Data\temp, clear

* Recover country and scenario labels
clear
svmat results, names( col )
do "intm_data\country_labels.do"
do "intm_data\recycle_labels.do"
label values country country1
label values recycle recycle
decode country, g( country1 )
decode recycle, g( recycle1 )
replace recycle1 = "eur_he00_" + recycle1
la val country
la val recycle

* Recycling schemes with labels
g rscheme = 1
replace rscheme = 2 if recycle1 == "eur_he00_at_ult" | recycle1 == "eur_he00_at_ult_dr"
replace rscheme = 3 if recycle1 == "eur_he00_at_vat" | recycle1 == "eur_he00_at_vat_dr"
replace rscheme = 4 if recycle1 == "eur_he00_at_tlq" | recycle1 == "eur_he00_at_tlq_dr"

g rscheme1 = rscheme
la def rscheme 1 "Tax only" 2 "Tax + Uniform lump transfer" 3 "Tax + VAT reform" 4 "Tax + Transfer to bottom quintile"
la val rscheme1 rscheme

* Demand response
g dr = 0 
replace dr = 1 if strpos( recycle1, "_dr" ) > 0

* Scenario labels
g scenario1 = scenario
la def scenario 1 "5% rate" 2 "Standard VAT" 3 "GHG-based" 4 "Unit tax" 5 "I don't know" 
la val scenario1 scenario

* Output to text file
order year scenario scenario1 country country1 recycle recycle1 rscheme rscheme1 dr 
sort year scenario country recycle quintile
outsheet using "output_data\burden_by_country.csv", comma replace

* Clean up
*erase "intm_data\temp.dta"
*erase "intm_data\country_labels.do"
*erase "intm_data\recycle_labels.do"
