* Collapse NUMIDENT data
* Bryan Stuart
* This file takes NUMIDENT data on age at death and collapses it to means
* at the birth year, birth month, and county of birth level
* Updated 5/2/2016 (use birth month for food stamps project)
* Updated 5/23/2016 (collapse to birth-year level instead of birth-month)
* Updated 7/11/2016 (Incorporate geographically consistent "super counties")
* Updated 9/6/2016 (Collapse by month of birth)

cap log close
log using "$output/dtacr_24_birthmo_newsub.log", replace 

* NUMIDENT data at birthday - birth county - sex - race level.
use "$datatemp_basic/dtacr_22.dta", clear

keep if inrange(birthyr,1950,1980)

drop if female==.

gen race = race_white+2*race_black+3*race_other
drop if race==. | race==0		// 0 is unknown 
tab race, m

su birthyr birthmo
drop if birthmo==.


**** Super counties ****
* Convert county codes to "super counties" that are
* consistent from 1950-1980
do "$dofile/countyStandardizeGNIS1950.do"
gen fips = 1000*st_fips + co_fips_1
countyFipsToREISfips, county(fips)
fixCounty, county(fips)
* Replace county fips code with new version
replace co_fips_1 = fips - 1000*st_fips



* Change names of survived-to-age-X variables
rename alive5 dis_11
rename alive10 dis_12
rename alive20 dis_13
rename alive30 dis_14



* Since we are collapsing by race x gender here, need to redo normalized variables
* Normalize each variable according to 1950-1954 mean and sd
drop z_dis*
* All
su dis_7 if inrange(birthyr,1950,1954) & exactmatch==0 [aw=num_births]
gen z_dis_7_all = (dis_7-r(mean))/r(sd)
su dis_8 if inrange(birthyr,1950,1954) & exactmatch==0 [aw=num_deaths]
gen z_dis_8_all = (dis_8-r(mean))/r(sd)
* White males
su dis_7 if inrange(birthyr,1950,1954) & race_white==1 & female==0 & exactmatch==0 [aw=num_births]
gen z_dis_7_wm = (dis_7-r(mean))/r(sd) if race_white==1 & female==0
su dis_8 if inrange(birthyr,1950,1954) & race_white==1 & female==0 & exactmatch==0 [aw=num_deaths]
gen z_dis_8_wm = (dis_8-r(mean))/r(sd) if race_white==1 & female==0
* Non-white males
su dis_7 if inrange(birthyr,1950,1954) & (race_black==1|race_other==1) & female==0 & exactmatch==0 [aw=num_births]
gen z_dis_7_nm = (dis_7-r(mean))/r(sd) if (race_black==1|race_other==1) & female==0
su dis_8 if inrange(birthyr,1950,1954) & (race_black==1|race_other==1) & female==0 & exactmatch==0 [aw=num_deaths]
gen z_dis_8_nm = (dis_8-r(mean))/r(sd) if (race_black==1|race_other==1) & female==0
* White females
su dis_7 if inrange(birthyr,1950,1954) & race_white==1 & female==1 & exactmatch==0 [aw=num_births]
gen z_dis_7_wf = (dis_7-r(mean))/r(sd) if race_white==1 & female==1
su dis_8 if inrange(birthyr,1950,1954) & race_white==1 & female==1 & exactmatch==0 [aw=num_deaths]
gen z_dis_8_wf = (dis_8-r(mean))/r(sd) if race_white==1 & female==1
* Non-white females
su dis_7 if inrange(birthyr,1950,1954) & female==1 & (race_black==1|race_other==1) & exactmatch==0 [aw=num_births]
gen z_dis_7_nf = (dis_7-r(mean))/r(sd) if female==1 & (race_black==1|race_other==1)
su dis_8 if inrange(birthyr,1950,1954) & female==1 & (race_black==1|race_other==1) & exactmatch==0 [aw=num_deaths]
gen z_dis_8_nf = (dis_8-r(mean))/r(sd) if female==1 & (race_black==1|race_other==1)







*** Collapse for all. 
preserve
collapse (mean) dis_7_all=dis_7 z_dis_7_all dis_9_all=dis_9 dis_10_all=dis_10 ///
	dis_11_all=dis_11 dis_12_all=dis_12 dis_13_all=dis_13 dis_14_all=dis_14 (rawsum) obs1999 ///
	num_births_all=num_births [aw=num_births], ///
	by(birthyr birthmo st_fips co_fips_1 exactmatch)
rename obs1999 obs1999_all
tempfile t_1
save `t_1'
restore
preserve
collapse (mean) dis_8_all=dis_8 z_dis_8_all (rawsum) num_deaths [aw=num_deaths], ///
	by(birthyr birthmo st_fips co_fips_1 exactmatch)
rename num_deaths num_deaths_all
tempfile t_2
save `t_2'
restore
* Collapse for white males. 
preserve
keep if exactmatch==0
keep if race_white==1 & female==0
collapse (mean) dis_7_wm=dis_7 z_dis_7_wm=z_dis_7_wm dis_9_wm=dis_9 dis_10_wm=dis_10 ///
	dis_11_wm=dis_11 dis_12_wm=dis_12 dis_13_wm=dis_13 dis_14_wm=dis_14 (rawsum) obs1999 ///
	num_births_wm=num_births [aw=num_births], ///
	by(birthyr birthmo st_fips co_fips_1)
rename obs1999 obs1999_wm
gen exactmatch=0		// only doing this for all at the moment
tempfile t_wm1
save `t_wm1'
restore
preserve
keep if exactmatch==0
keep if race_white==1 & female==0
collapse (mean) dis_8_wm=dis_8 z_dis_8_wm (rawsum) num_deaths [aw=num_deaths], ///
	by(birthyr birthmo st_fips co_fips_1)
rename num_deaths num_deaths_wm
gen exactmatch=0
tempfile t_wm2
save `t_wm2'
restore
* Collapse for white females. 
preserve
keep if exactmatch==0
keep if race_white==1 & female==1
collapse (mean) dis_7_wf=dis_7 z_dis_7_wf dis_9_wf=dis_9 dis_10_wf=dis_10 ///
	dis_11_wf=dis_11 dis_12_wf=dis_12 dis_13_wf=dis_13 dis_14_wf=dis_14 (rawsum) obs1999 ///
	num_births_wf=num_births [aw=num_births], ///
	by(birthyr birthmo st_fips co_fips_1)
rename obs1999 obs1999_wf
gen exactmatch=0
tempfile t_wf1
save `t_wf1'
restore
preserve
keep if exactmatch==0
keep if race_white==1 & female==1
collapse (mean) dis_8_wf=dis_8 z_dis_8_wf (rawsum) num_deaths [aw=num_deaths], ///
	by(birthyr birthmo st_fips co_fips_1)
rename num_deaths num_deaths_wf
gen exactmatch=0
tempfile t_wf2
save `t_wf2'
restore
* Collapse for nonwhite males. 
preserve
keep if exactmatch==0
keep if female==0 & (race_black==1 | race_other==1)
collapse (mean) dis_7_nm=dis_7 z_dis_7_nm dis_9_nm=dis_9 dis_10_nm=dis_10 ///
	dis_11_nm=dis_11 dis_12_nm=dis_12 dis_13_nm=dis_13 dis_14_nm=dis_14 (rawsum) obs1999 ///
	num_births_nm=num_births [aw=num_births], ///
	by(birthyr birthmo st_fips co_fips_1)
rename obs1999 obs1999_nm
gen exactmatch=0
tempfile t_nm1
save `t_nm1'
restore
preserve
keep if exactmatch==0
keep if female==0 & (race_black==1 | race_other==1)
collapse (mean) dis_8_nm=dis_8 z_dis_8_nm (rawsum) num_deaths [aw=num_deaths], ///
	by(birthyr birthmo st_fips co_fips_1)
rename num_deaths num_deaths_nm
gen exactmatch=0
tempfile t_nm2
save `t_nm2'
restore
* Collapse for nonwhite females. 
preserve
keep if exactmatch==0
keep if female==1 & (race_black==1 | race_other==1)
collapse (mean) dis_7_nf=dis_7 z_dis_7_nf dis_9_nf=dis_9 dis_10_nf=dis_10 ///
	dis_11_nf=dis_11 dis_12_nf=dis_12 dis_13_nf=dis_13 dis_14_nf=dis_14 (rawsum) obs1999 ///
	num_births_nf=num_births [aw=num_births], ///
	by(birthyr birthmo st_fips co_fips_1)
rename obs1999 obs1999_nf
gen exactmatch=0
tempfile t_nf1
save `t_nf1'
restore
preserve
keep if exactmatch==0
keep if female==1 & (race_black==1 | race_other==1)
collapse (mean) dis_8_nf=dis_8 z_dis_8_nf (rawsum) num_deaths [aw=num_deaths], ///
	by(birthyr birthmo st_fips co_fips_1)
rename num_deaths num_deaths_nf
gen exactmatch=0
tempfile t_nf2
save `t_nf2'
restore

* Combine NUMIDENT files
use `t_1', clear
merge 1:1 st_fips co_fips_1 birthyr birthmo exactmatch using `t_2', gen(_merge2)
merge 1:1 st_fips co_fips_1 birthyr birthmo exactmatch using `t_wm1', gen(_mergewm1)
merge 1:1 st_fips co_fips_1 birthyr birthmo exactmatch using `t_wm2', gen(_mergewm2)
merge 1:1 st_fips co_fips_1 birthyr birthmo exactmatch using `t_wf1', gen(_mergewf1)
merge 1:1 st_fips co_fips_1 birthyr birthmo exactmatch using `t_wf2', gen(_mergewf2)
merge 1:1 st_fips co_fips_1 birthyr birthmo exactmatch using `t_nm1', gen(_mergenm1)
merge 1:1 st_fips co_fips_1 birthyr birthmo exactmatch using `t_nm2', gen(_mergenm2)
merge 1:1 st_fips co_fips_1 birthyr birthmo exactmatch using `t_nf1', gen(_mergenf1)
merge 1:1 st_fips co_fips_1 birthyr birthmo exactmatch using `t_nf2', gen(_mergenf2)


foreach x in all wm wf nm nf {

if "`x'"=="all" local g "all"
if "`x'"=="wm" local g "white males"
if "`x'"=="wf" local g "white females"
if "`x'"=="nm" local g "nonwhite males"
if "`x'"=="nf" local g "nonwhite females"

la var z_dis_7_`x' "Share which lived to 2000 (NUMIDENT), `g' (normalized)"
la var z_dis_8_`x' "Age at death, conditional on death (NUMIDENT), `g' (normalized)"

la var dis_7_`x' "Share which lived to 2000 (NUMIDENT), `g'"
la var dis_8_`x' "Age at death, conditional on death (NUMIDENT), `g'"

la var dis_9_`x' "Share which lived to 2012 (NUMIDENT), `g'"
la var dis_10_`x' "Share which lived to age 40 (NUMIDENT), `g'"
la var dis_11_`x' "Share which lived to age 5 (NUMIDENT), `g'"
la var dis_12_`x' "Share which lived to age 10 (NUMIDENT), `g'"
la var dis_13_`x' "Share which lived to age 20 (NUMIDENT), `g'"
la var dis_14_`x' "Share which lived to age 30 (NUMIDENT), `g'"

*la var obs1999_`x' "Number of NUMIDENT obs used to construct dis_7, `g'"
la var num_deaths_`x' "Number of NUMIDENT obs used to construct dis_8, `g'"

la var num_births_`x' "Number of births, `g'"
la var num_deaths_`x' "Number of deaths, `g'"

}



* Check sum of weights
foreach g in all wm wf nm nf {
	sum num_deaths_`g' if exactmatch==0
	di "`r(sum)'"
	sum num_births_`g' if exactmatch==0
	di "`r(sum)'"
}


su
des

compress
save "$datatemp/dtacr_24_birthmo_newsub.dta", replace	

log close

/* OLD WEIGHTING:

* Create weight for collapse across survey years
egen tot_perwt = total(perwt), by(year)
gen d_perwt = perwt/tot_perwt		// maintains within year relative weights 
cap drop one
gen one = 1 if perwt!=0 & perwt!=.
egen cell_num = total(one), by(st_fips co_fips_1 birthyr year) // captures cross survey year weights
egen cell_denom = total(one), by(st_fips co_fips_1 birthyr) 
gen yearwgt = cell_num/cell_denom
gen wgt = d_perwt * yearwgt

collapse (mean) hc_* ess_* dis_* nei_* (rawsum) wgt obs=one  [aw=wgt], ///
	by(st_fips co_fips_1 birthyr year)

*/
