* Collapse NUMIDENT data
* Bryan Stuart
* Updated 5/2/2016 (use birth month for food stamps project)
* Updated 5/23/2016 (collapse to birth-year level instead of birth-month)
* Updated 7/11/2016 (Incorporate geographically consistent "super counties")

cap log close
log using "$output/dtacr_24.log", replace 

* NUMIDENT data at birthday - birth county - sex - race level.
use "$datatemp_basic/dtacr_22.dta", clear

keep if inrange(birthyr,1950,1980)

drop if female==.

gen race = race_white+2*race_black+3*race_other
drop if race==. | race==0		// 0 is unknown 
tab race, m

su birthyr birthmo
drop if birthmo==.


* NEW SECTION, 6/8/2016: Convert county codes to "super counties" that are
* consistent from 1950-1980
do "$dofile/countyStandardizeGNIS1950.do"
gen fips = 1000*st_fips + co_fips_1
countyFipsToREISfips, county(fips)
fixCounty, county(fips)
* Replace county fips code with new version
replace co_fips_1 = fips - 1000*st_fips




*** This does not seem right. I'm going to rework it.
* Why are we using obs1999 as the weight? This is the number who have died by
* 1999, i.e. the numerator in the fraction not surviving. Shouldn't we use
* the denominator, i.e. num_births?
preserve
collapse (mean) dis_7_all=dis_7 z_dis_7_all dis_9_all=dis_9 dis_10_all=dis_10 (rawsum) obs1999 ///
	num_births_all=num_births num_deaths_all=num_deaths [aw=num_births], ///
	by(birthyr /*birthmo*/ st_fips co_fips_1 exactmatch)
rename obs1999 obs1999_all
tempfile t_1
save `t_1'
restore
preserve
collapse (mean) dis_8_all=dis_8 z_dis_8_all (rawsum) obsdead [aw=obsdead], ///
	by(birthyr /*birthmo*/ st_fips co_fips_1 exactmatch)
rename obsdead obsdead_all
tempfile t_2
save `t_2'
restore
* Collapse for whites. 
preserve
keep if exactmatch==0
keep if race_white==1 
collapse (mean) dis_7_white=dis_7 z_dis_7_white dis_9_white=dis_9 dis_10_white=dis_10 (rawsum) obs1999 ///
	num_births_white=num_births num_deaths_white=num_deaths [aw=num_births], ///
	by(birthyr /*birthmo*/ st_fips co_fips_1)
rename obs1999 obs1999_white
gen exactmatch=0		// only doing this for all at the moment
tempfile t_w1
save `t_w1'
restore
preserve
keep if exactmatch==0
keep if race_white==1
collapse (mean) dis_8_white=dis_8 z_dis_8_white (rawsum) obsdead [aw=obsdead], ///
	by(birthyr /*birthmo*/ st_fips co_fips_1)
rename obsdead obsdead_white
gen exactmatch=0
tempfile t_w2
save `t_w2'
restore
* Collapse for non-whites. 
preserve
keep if exactmatch==0
keep if race_black==1 | race_other==1
collapse (mean) dis_7_nonwhite=dis_7 z_dis_7_nonwhite dis_9_nonwhite=dis_9 dis_10_nonwhite=dis_10 (rawsum) obs1999 ///
	num_births_nonwhite=num_births num_deaths_nonwhite=num_deaths [aw=num_births], ///
	by(birthyr /*birthmo*/ st_fips co_fips_1)
rename obs1999 obs1999_nonwhite
gen exactmatch=0
tempfile t_b1
save `t_b1'
restore
preserve
keep if exactmatch==0
keep if race_black==1 | race_other==1
collapse (mean) dis_8_nonwhite=dis_8 z_dis_8_nonwhite (rawsum) obsdead [aw=obsdead], ///
	by(birthyr /*birthmo*/ st_fips co_fips_1)
rename obsdead obsdead_nonwhite
gen exactmatch=0
tempfile t_b2
save `t_b2'
restore
* Collapse for men. 
preserve
keep if exactmatch==0
keep if female==0
collapse (mean) dis_7_men=dis_7 z_dis_7_men dis_9_men=dis_9 dis_10_men=dis_10 (rawsum) obs1999 ///
	num_births_men=num_births num_deaths_men=num_deaths [aw=num_births], ///
	by(birthyr /*birthmo*/ st_fips co_fips_1)
rename obs1999 obs1999_men
gen exactmatch=0
tempfile t_m1
save `t_m1'
restore
preserve
keep if exactmatch==0
keep if female==0
collapse (mean) dis_8_men=dis_8 z_dis_8_men (rawsum) obsdead [aw=obsdead], ///
	by(birthyr /*birthmo*/ st_fips co_fips_1)
rename obsdead obsdead_men
gen exactmatch=0
tempfile t_m2
save `t_m2'
restore
* Collapse for women. 
preserve
keep if exactmatch==0
keep if female==1
collapse (mean) dis_7_women=dis_7 z_dis_7_women dis_9_women=dis_9 dis_10_women=dis_10 (rawsum) obs1999 ///
	num_births_women=num_births num_deaths_women=num_deaths [aw=num_births], ///
	by(birthyr /*birthmo*/ st_fips co_fips_1)
rename obs1999 obs1999_women
gen exactmatch=0
tempfile t_f1
save `t_f1'
restore
preserve
keep if exactmatch==0
keep if female==1
collapse (mean) dis_8_women=dis_8 z_dis_8_women (rawsum) obsdead [aw=obsdead], ///
	by(birthyr /*birthmo*/ st_fips co_fips_1)
rename obsdead obsdead_women
gen exactmatch=0
tempfile t_f2
save `t_f2'
restore


/*** Collapse for all. 
preserve
collapse (mean) dis_7_all=dis_7 z_dis_7_all (rawsum) obs1999 ///
	num_births_all=num_births num_deaths_all=num_deaths [aw=obs1999], ///
	by(birthyr /*birthmo*/ st_fips co_fips_1 exactmatch)
rename obs1999 obs1999_all
tempfile t_1
save `t_1'
restore
preserve
collapse (mean) dis_8_all=dis_8 z_dis_8_all (rawsum) obsdead [aw=obsdead], ///
	by(birthyr /*birthmo*/ st_fips co_fips_1 exactmatch)
rename obsdead obsdead_all
tempfile t_2
save `t_2'
restore
* Collapse for whites. 
preserve
keep if exactmatch==0
keep if race_white==1 
collapse (mean) dis_7_white=dis_7 z_dis_7_white (rawsum) obs1999 ///
	num_births_white=num_births num_deaths_white=num_deaths [aw=obs1999], ///
	by(birthyr /*birthmo*/ st_fips co_fips_1)
rename obs1999 obs1999_white
gen exactmatch=0		// only doing this for all at the moment
tempfile t_w1
save `t_w1'
restore
preserve
keep if exactmatch==0
keep if race_white==1
collapse (mean) dis_8_white=dis_8 z_dis_8_white (rawsum) obsdead [aw=obsdead], ///
	by(birthyr /*birthmo*/ st_fips co_fips_1)
rename obsdead obsdead_white
gen exactmatch=0
tempfile t_w2
save `t_w2'
restore
* Collapse for non-whites. 
preserve
keep if exactmatch==0
keep if race_black==1 | race_other==1
collapse (mean) dis_7_nonwhite=dis_7 z_dis_7_nonwhite (rawsum) obs1999 ///
	num_births_nonwhite=num_births num_deaths_nonwhite=num_deaths [aw=obs1999], ///
	by(birthyr /*birthmo*/ st_fips co_fips_1)
rename obs1999 obs1999_nonwhite
gen exactmatch=0
tempfile t_b1
save `t_b1'
restore
preserve
keep if exactmatch==0
keep if race_black==1 | race_other==1
collapse (mean) dis_8_nonwhite=dis_8 z_dis_8_nonwhite (rawsum) obsdead [aw=obsdead], ///
	by(birthyr /*birthmo*/ st_fips co_fips_1)
rename obsdead obsdead_nonwhite
gen exactmatch=0
tempfile t_b2
save `t_b2'
restore
* Collapse for men. 
preserve
keep if exactmatch==0
keep if female==0
collapse (mean) dis_7_men=dis_7 z_dis_7_men (rawsum) obs1999 ///
	num_births_men=num_births num_deaths_men=num_deaths [aw=obs1999], ///
	by(birthyr /*birthmo*/ st_fips co_fips_1)
rename obs1999 obs1999_men
gen exactmatch=0
tempfile t_m1
save `t_m1'
restore
preserve
keep if exactmatch==0
keep if female==0
collapse (mean) dis_8_men=dis_8 z_dis_8_men (rawsum) obsdead [aw=obsdead], ///
	by(birthyr /*birthmo*/ st_fips co_fips_1)
rename obsdead obsdead_men
gen exactmatch=0
tempfile t_m2
save `t_m2'
restore
* Collapse for women. 
preserve
keep if exactmatch==0
keep if female==1
collapse (mean) dis_7_women=dis_7 z_dis_7_women (rawsum) obs1999 ///
	num_births_women=num_births num_deaths_women=num_deaths [aw=obs1999], ///
	by(birthyr /*birthmo*/ st_fips co_fips_1)
rename obs1999 obs1999_women
gen exactmatch=0
tempfile t_f1
save `t_f1'
restore
preserve
keep if exactmatch==0
keep if female==1
collapse (mean) dis_8_women=dis_8 z_dis_8_women (rawsum) obsdead [aw=obsdead], ///
	by(birthyr /*birthmo*/ st_fips co_fips_1)
rename obsdead obsdead_women
gen exactmatch=0
tempfile t_f2
save `t_f2'
restore
*/

* Combine NUMIDENT files
use `t_1', clear
merge 1:1 st_fips co_fips_1 birthyr /*birthmo*/ exactmatch using `t_2', gen(_merge2)
merge 1:1 st_fips co_fips_1 birthyr /*birthmo*/ exactmatch using `t_w1', gen(_mergew1)
merge 1:1 st_fips co_fips_1 birthyr /*birthmo*/ exactmatch using `t_w2', gen(_mergew2)
merge 1:1 st_fips co_fips_1 birthyr /*birthmo*/ exactmatch using `t_b1', gen(_mergeb1)
merge 1:1 st_fips co_fips_1 birthyr /*birthmo*/ exactmatch using `t_b2', gen(_mergeb2)
merge 1:1 st_fips co_fips_1 birthyr /*birthmo*/ exactmatch using `t_m1', gen(_mergem1)
merge 1:1 st_fips co_fips_1 birthyr /*birthmo*/ exactmatch using `t_m2', gen(_mergem2)
merge 1:1 st_fips co_fips_1 birthyr /*birthmo*/ exactmatch using `t_f1', gen(_mergef1)
merge 1:1 st_fips co_fips_1 birthyr /*birthmo*/ exactmatch using `t_f2', gen(_mergef2)


foreach x in all white nonwhite men women {

la var z_dis_7_`x' "Share which lived to 2000 (NUMIDENT), `x' (normalized)"
la var z_dis_8_`x' "Age at death, conditional on death (NUMIDENT), `x' (normalized)"

la var dis_7_`x' "Share which lived to 2000 (NUMIDENT), `x'"
la var dis_8_`x' "Age at death, conditional on death (NUMIDENT), `x'"
la var dis_9_`x' "Share which lived to 2012 (NUMIDENT), `x'"
la var dis_10_`x' "Share which lived to age 40 (NUMIDENT), `x'"

*la var obs1999_`x' "Number of NUMIDENT obs used to construct dis_7, `x'"
la var num_births_`x' "Number of NUMIDENT obs used to construct dis_7, dis_9, dis_10, `x'"
la var obsdead_`x' "Number of NUMIDENT obs used to construct dis_8, `x'"

la var num_births_`x' "Number of births, `x'"
la var num_deaths_`x' "Number of deaths, `x'"

}

su
des

compress
save "$datatemp/dtacr_24.dta", replace	

log close

/* OLD WEIGHTING:

* Create weight for collapse across survey years
egen tot_perwt = total(perwt), by(year)
gen d_perwt = perwt/tot_perwt		// maintains within year relative weights 
cap drop one
gen one = 1 if perwt!=0 & perwt!=.
egen cell_num = total(one), by(st_fips co_fips_1 birthyr year) // captures cross survey year weights
egen cell_denom = total(one), by(st_fips co_fips_1 birthyr) 
gen yearwgt = cell_num/cell_denom
gen wgt = d_perwt * yearwgt

collapse (mean) hc_* ess_* dis_* nei_* (rawsum) wgt obs=one  [aw=wgt], ///
	by(st_fips co_fips_1 birthyr year)

*/
