/*===========================================================================================*/
/*                                     Main Program                                          */
/*===========================================================================================*/
clear
clear matrix
clear mata
set trace on
set tracedepth 2
set more off
set matsize 11000, permanently
set maxvar 32767
timer clear

set seed 20478

capture program drop main
program define main
paths
capture log close

// =============== 0 Comment in/out subprograms you wish to run ================
 	
	paths
	*readACS
	GompertzLE, cutoff(90)
	compareGompertz
end 

/*===========================================================================================*/
/*                                    Sub Programs                                         */
/*===========================================================================================*/
 
/*---------------------------------------------------------*/
/* Define Path Macros and chemical lists 		   */
/*---------------------------------------------------------*/

capture program drop paths

program define paths
 	
 	adopath + "/projects/programs/"
	*global projdir "/projects/programs/rwalker/matt"
	global dataSTATA "/projects/programs/foodstamps/master_folder/LE_build/dataSTATA/"
	global datatemp_basic "/projects/programs/foodstamps/master_folder/LE_build/datatemp_basic/"
	global dataRAW "/projects/programs/foodstamps/master_folder/LE_build/dataRAW/"
		
end	

/*-----------------------------------------------------------------------------*/
/*  readACS			 					       */ 
/*-----------------------------------------------------------------------------*/

// Goal: bring in ACS-SSA matched data 
// keep variables that will be used to predict life expectancy 

capture program drop readACS
program define readACS

	// for 10% sample: 	
	// use "$dataSTATA/tempData.dta", clear 
	
	use "$datatemp_basic/dtacr_20.dta", clear

	// use if uniform()<0.01 using  "/projects/programs/duquette/FromBrenden/datatemp/dtacr_20.dta", clear
	
	// generate household income quantiles 
	//xtile incq = hh_inctot, nq(4)
	//qui: tab incq, gen(incq_)

	// calculate age of death 
	// question: why do we not have dis_8 here? 

	destring dodyy, replace force 
	destring dobyy, replace force
	encode gender, gen(sex)

	gen aod = dodyy-dobyy
	replace aod = aod+100 if aod<0

	// keep relevant variables 
	
	gen yy = year - 2000
	gen died = 1 if yy==dodyy
	replace died = 0 if mi(died)

	keep coufip statefip age sex aod perwt year yy dodyy dobyy birthyr died 
	
	save "$dataSTATA/mortalityInput.dta", replace

end 

/*-----------------------------------------------------------------------------*/
/*  Calculate life expectancy 						       */ 
/*-----------------------------------------------------------------------------*/

// Use gompertz function (as per Chetty JAMA article) to estimate life expectancy 

capture program drop GompertzLE
program define GompertzLE
syntax, [cutoff(int 99)]

	// option to tag output 
	local tag="_estimation4"

/* -------------------------------------- 

// Comment out creation of raw mortality 

	// Goal estimate gompertz on 30 - 63 for each group 
	// 'extrapolate' beyond 90 (w/ NCHS data )

	//use if uniform()<0.01 using "$dataSTATA/mortalityInput.dta", clear 
	// 10% sample: 
	//use "$dataSTATA/mortalitytemp.dta", clear 
	use "$dataSTATA/mortalityInput.dta", clear 
	
	gen fips = statefip*1000+coufip
	replace coufip = fips 

	egen group = group (sex coufip birthyr)
	egen birthyr_county_fe = group(birthyr coufip)

	// generate group temfile to save group definitions 
	preserve
	keep sex coufip birthyr group statefip
	duplicates drop group, force 
	tempfile groups
	save "$dataSTATA/groups`tag'.dta", replace 
	save `groups'
	restore
	
	// count total number of people in age bin, total deaths in age bin 
	drop if age ==. 
	bysort group age: gen deaths=sum(died=`tag'=1)	
	bys age group: gen total=_N
	
	// merge total obervations, total death count together by age and group: 
	preserve
	keep age group total 
	duplicates drop age group, force 
	xtset group age 
	tsfill, full
	*replace total = 0 if mi(total)
	* match age range to death files 
	drop if age<30
	tempfile totals 
	save `totals'
	save "$dataSTATA/totals`tag'.dta", replace
	restore
	
	keep aod group deaths
	duplicates drop aod group, force 
	drop if aod==. 
	xtset group aod 
	tsfill, full
	
	* if we do not observe any deaths within group-cell, count as zero
	replace deaths = 0 if mi(deaths) 
	rename aod age 
	
	merge 1:1 age group using `totals'
	* still need to replace some deaths, because some groups not represented even after tsfill above 
	replace deaths = 0 if mi(deaths)
	drop if mi(total)
	gen m_a_raw = deaths/total
	*keep group age m_a_raw
	
	// merge bak demographic data to use in estimation 
	merge m:1 group using `groups', nogen

	// keep version of data to use for prediction 
	// below we tsfill group_age_table to make a dataframe with ages 40 - 119 for all age groups 
	preserve
	drop if (age<30) 
	drop if (age>63)
	tempfile group_age_table
	save `group_age_table'
	save "$dataSTATA/group_age_table`tag'.dta", replace 
	restore
	
	// truncate age to 30 - 63 (63 is oldest foodstamps cohort)
	drop if age>63
	drop if age<30
	
	drop if m_a_raw>1
	gen log_m_a = log(m_a_raw)
	save "$dataSTATA/temp1`tag'.dta", replace 

	clear
	set matsize 11000
	set maxvar 32767

-------------------------------------- */

	// same mortality dataset across estimations 1,3,4

	use "$dataSTATA/temp1_estimation1.dta", clear 
	glm m_a_raw i.coufip i.birthyr i.coufip#c.birthyr i.sex i.coufip#c.age i.sex#c.age i.birthyr#c.age, family(binomial) link(log) iterate(20)
	
	// now we have a gompertz function for each group 
	// take a saturated group * age table and predict mortality rate for each cell

	use "$dataSTATA/group_age_table_estimation1.dta", clear 
	keep age group 
	drop if age==. | group ==.
	duplicates drop age group, force

	// add ages up to 119 

	append using "$dataSTATA/ages.dta"
	xtset group age 
	tsfill, full
	drop if group==.
	
	// merge back on group demographic info to use for prediction 
	merge m:1 group using "$dataSTATA/groups_estimation1.dta", nogen
	save "$dataSTATA/temp2`tag'.dta", replace 

	// predicy mortality rate at each age for each group (only up to age 90 -- gompertz does not perform well at high ages)
	//predict logm_predicted if age<90
	drop if age<40
	predict m_predicted if age<90
	save "$dataSTATA/temp3`tag'.dta", replace

	//predict logm_predicted if age<=90
	
	// for aged over 90 , we use NHCS / SSA mortality rate estimates instead of gompertz estimates, following Chetty et al. 
	
	// create sex2 variable for merging purposes 
	// note 8/16: we should redo nchs_ssa: rather than collapsing over races, we should just use the nchs tables by sex (these will have correct weights)
	preserve
	import delimited using "$dataRAW/nchs_ssa.csv", clear
	rename sex sex2
	drop v*
	collapse (mean) m, by(age sex2)
	tempfile ssa
	save `ssa'
	restore 
	
	tostring sex, generate(sex2)
	replace sex2="M" if sex2=="3"
	replace sex2="F" if sex2=="2"
	keep if sex2=="M" | sex2 == "F"
	capture drop _merge
	merge m:1 age sex2 using `ssa'
	
	//capture gen m_predicted = exp(logm_predicted)
	replace m_predicted = m if mi(m_predicted)
	drop if age==. 
	drop sex2
	
	//tempfile predicted
	//save `predicted'
	save "$dataSTATA/predicted`tag'.dta", replace

	use "$dataSTATA/predicted`tag'.dta", clear

	// steps:birthyr
	// 1) Calculate l_a = product of (1-death rate) for years below a. This is 'survivorship' to age a
	// 2) calculate L_a = (l_a + l_(a+1))/2. This is midpoint survivorshop 
	// 3) calculate LE = sum(L_a*m_a*age)

	// Use gompertz function (as per Chetty JAMA article) to estimate life expectancy
	
	// ------------------------------------
	// 1) Calculate l_a = product of (1-death rate) for years below a
	// ------------------------------------

	gen q = 1-m_predicted

	// multiply all (1-m_predicted) for ages below a
	forval age = 41/119 {
		preserve
		keep if age<`age'
		egen double product = total(ln(q)), by(group)
		replace product = exp(product)
		replace age = age+1 if age==`age'-1
		keep if age==`age'
		tempfile l`age'
		save `l`age''
		restore
		}
	
	use `l41', clear
	forval age = 42/119 {
		append using `l`age''
		}

	keep age product group
	rename product l_a
	tempfile l_master
	save `l_master'

	use "$dataSTATA/predicted`tag'.dta", clear 
	
	// merge survivorship back onto original data 
	merge 1:1 age group using `l_master', nogen

	// l_a is proportion that survives to age a
	replace l_a =1 if (l_a==. &  age==40)
	replace l_a = 0 if (l_a==. &  age==119)

	// ------------------------------------
	// 2) calculate L_a = (l_a + l_(a+1))/2
	// ------------------------------------
	
	// L_a is 'midpoint survivorship,' the proportion that make it to midpoint of age group a

	sort group age
	bysort group: gen lead = l_a[_n+1]
	gen L_a = (l_a + lead )/2
	replace L_a = 1 if (L_a==. &  age == 40)

	// ------------------------------------
	// 3) calculate Life expectancy =sum(L_a*mpredicted*age)
	// ------------------------------------
	
	// life expectancy is L_a*m_predicted*age 
	// L_a*m_predicted give proportion of people that make it to a, times the hazard rate gives proportion of total that die at age a
	// Then multiply by age to get expected death age (Life expectancy)

	gen r_a = L_a*m_predicted
	gen r_a_weighted = age*r_a
	bysort group: egen LE = sum(r_a_weighted)
	bysort group: egen adjustment = sum(r_a)
	*replace LE = LE / (adjustment)
	*adjustment would be to account for cases where r_a does not sum to one. 

	save "$dataSTATA/life_table`tag'.dta", replace
	
	duplicates drop group, force
	keep group LE
	merge m:1 group using "$dataSTATA/groups_estimation1.dta", nogen 
	hist LE 
	
	capture drop _merge
	save "$dataSTATA/LE_gompertz`tag'.dta", replace

end 

/*-----------------------------------------------------------------------------*/

/*-----------------------------------------------------------------------------*/
/*  Gompertz comparison							       */ 
/*-----------------------------------------------------------------------------*/

// Goal: compare our gompertz LE calculations to those of chetty et al. 

capture program drop compareGompertz
program define compareGompertz 

	local tag="_estimation4"
	
	// create county-state weighted crosswalk (comment out)
/* -----------------------------------------------------------------------	

	use  "/projects/programs/duquette/FromBrenden/datatemp/dtacr_20.dta", clear
	keep statefip coufip 
	gen fips = statefip*1000+coufip
	replace coufip = fips 
	gen x=1

	preserve
	collapse (count) x, by(statefip)
	rename x state_total
	tempfile state_totals
	save `state_totals'
	restore

	collapse (count) x, by(coufip statefip) 
	merge m:1 statefip using `state_totals', nogen 
	gen weight = x / state_total
	
	save "$dataSTATA/weighted_crosswalk.dta", replace

 ---------------------------------------------------------------------- */	

	use "$dataSTATA/LE_gompertz`tag'.dta", clear 
	
	drop if LE>100
	
	/*------ for chetty comparison -----------------------------*/
	preserve 
	tostring sex, replace force
	replace sex="M" if sex=="3"
	replace sex="F" if sex=="2"
	
	keep coufip birthyr sex LE
	rename LE LE4
	
	* for use in dtacr_23_newsubgroups_tarduno
	save "$dataSTATA/LE_gompertz_reformatted`tag'.dta", replace
	restore 
	
	tostring sex, replace force
	replace sex="M" if sex=="3"
	replace sex="F" if sex=="2"

	merge m:1 coufip using "$dataSTATA/weighted_crosswalk.dta" 
	collapse (mean) LE [pweight=1/weight], by (statefip sex)
	tempfile gompertz 
	save `gompertz' 
	
	use "$dataSTATA/chetty_reformatted.dta", clear
	collapse (mean) LE_chetty, by(statefip sex)
	merge 1:1 sex statefip using `gompertz', nogen
	drop if sex==""
	
	regress LE_chetty LE
	
	save "$dataSTATA/chetty_comparison`tag'.dta", replace


end

main 

