﻿* fs_hsacr_1.do
* Create variables to run Hoynes/Schanzenbach/Almond analysis
* Combine census data with treatment data


cap log close
log using "$output/fs_hsacr_1.log", replace

* Define programs that edit county codes
do "$dofile/countyStandardizeGNIS1950.do"



* HSA use averages of hospital and REIS data from first 5 years of life. 
* Construct that here before merging
use "$datatemp/fs_dtacr_1_5.dta", clear
sort st_fips co_fips_1 birthyr
foreach var in tranpcret tranpcmcare1 tranpcpa1 {
	gen `var'_5yr = (`var'+`var'[_n+1]+`var'[_n+2]+`var'[_n+3]+`var'[_n+4])/5
	drop `var'
}
tempfile fs_dtacr_1_5
save "`fs_dtacr_1_5'"	

use "$datatemp/fs_dtacr_1_6.dta", clear
sort st_fips co_fips_1 birthyr
foreach var in nhosp_pc beds_pc {
	gen `var'_5yr = (`var'+`var'[_n+1]+`var'[_n+2]+`var'[_n+3]+`var'[_n+4])/5
	drop `var'
}
tempfile fs_dtacr_1_6
save "`fs_dtacr_1_6'"


***** Now on to merging *****
* data from basic processing at birth county-birth year-birth month-year-race-sex-exacthmatch level 
use "$datatemp/dtacr_25_birthmo.dta", clear

keep if inrange(birthyr,1950,1981)
drop if year==.		// cells which show up in NUMIDENT but not in census

tab exactmatch, mi		// indicator for whether underlying sample consists of exact matches



forvalues m=0/0	{ 		// not imposing exact matches for now		

keep if exactmatch==`m'


* combine with treatment data
merge m:1 st_fips co_fips_1 using "$datatemp/fs_dtacr_1_1.dta", gen(txmerge)
gen fs_year = fs_year_1
gen fs_month = fs_month_1
*drop fips_* fs_year_* fs_month_*





* combine with reis data (limited to birth years 1959-1980)
*merge m:1 st_fips co_fips_1 birthyr using "$datatemp/fs_dtacr_1_2.dta", gen(reismerge)

* combine with county DB data
merge m:1 st_fips co_fips_1 using "$datatemp/fs_dtacr_1_3.dta", gen(ctydb60merge)

* combine with natality data
merge m:1 st_fips co_fips_1 using "$datatemp/fs_dtacr_1_4.dta", gen(vscdmerge)

merge m:1 st_fips co_fips_1 birthyr using "`fs_dtacr_1_5'", gen(reismerge)

merge m:1 st_fips co_fips_1 birthyr using "`fs_dtacr_1_6'", gen(ahamerge)

merge m:1 st_fips co_fips_1 using "$datatemp/fs_dtacr_1_7.dta", gen(chcmerge)

drop if st_fips==2	// FS data don't have info on QQQ

drop if year==. 	// these are places not in census data


tab birthyr txmerge	// don't have QQQ data for 1950-58, 1979-on

drop if st_fips==8 & co_fips_1==14	// QQQ, QQQ (created circa 2001)
tab txmerge, m
assert txmerge==3

tab birthyr reismerge, mi
/* Don't have REIS data before 1959 */
*assert reismerge==3 if inrange(birthyr,1959,1981)
*assert reismerge==1 if inrange(birthyr,1950,1958)

assert ctydb60merge==3

assert vscdmerge==3


********** Merging of variables complete, now create treatment and event-time variables *******

* Create indices of birth month, in utero, and program
* bindex = index of birth year/month where 1956:1=1 (first birth cohort)
gen bindex = (birthyr-1956)*12+birthmo
* iuindex = in utero index of beginning of pregnancy
gen iuindex = (birthyr-1956)*12+birthmo-9
* pindex = index of year/month of FSP implementation
gen pindex = (fs_year-1956)*12+fs_month
* cindex = index of year of CHC implementation. I assume here that it starts in January
gen cindex = (chc_year_exp-1956)


* Now create variables for share of time between age A and B that FS is in place
* Age 0-18
gen shareFSPage0_18 = min(max(bindex+18*12-pindex,0),18*12)/(18*12)
replace shareFSPage0_18 = . if fs_year == .
tab birthyr, summarize(shareFSPage0_18) means

* Age 0-5
gen shareFSPage0_5 = min(max(bindex+5*12-pindex,0),5*12)/(5*12)
replace shareFSPage0_5 = . if fs_year == .
tab birthyr, summarize(shareFSPage0_5) means

* Age 5-18
gen shareFSPage5_18 = min(max(bindex+18*12-pindex-5*12,0),13*12)/(13*12)
replace shareFSPage5_18 = . if fs_year == .
tab birthyr, summarize(shareFSPage5_18) means

* Age in utero to 5
gen shareFSPageIU_5 = min(max(iuindex+5*12+9-pindex,0),5*12+9)/(5*12+9)
replace shareFSPageIU_5 = . if fs_year == .
tab birthyr, summarize(shareFSPageIU_5) means

* Age in utero to 18
gen shareFSPageIU_18 = min(max(iuindex+18*12+9-pindex,0),18*12+9)/(18*12+9)
replace shareFSPageIU_18 = . if fs_year == .
tab birthyr, summarize(shareFSPageIU_18) means


*** CHC versions ***
* In this case, if the CHC year variable is missing, I give the kid a 0
* Age 0-5
gen shareCHCage0_5 = min(max(birthyr+5-chc_year_exp,0),5)/5
replace shareCHCage0_5 = 0 if chc_year_exp == .
replace shareCHCage0_5 = 0 if chc_year_exp > 1974	/* Consistent with Bailey & Goodman-Bacon */
tab birthyr, summarize(shareCHCage0_5) means

/* Age 0-18
gen shareCHCage0_18 = min(max(bindex+18*12-cindex,0),18*12)/(18*12)
replace shareCHCage0_18 = 0 if chc_year_exp == .
tab birthyr, summarize(shareCHCage0_18) means

* Age 5-18
gen shareCHCage5_18 = min(max(bindex+18*12-pindex-5*12,0),13*12)/(13*12)
replace shareCHCage5_18 = 0 if chc_year_exp == .
tab birthyr, summarize(shareCHCage5_18) means

* Age in utero to 5
gen shareCHCageIU_5 = min(max(iuindex+5*12+9-pindex,0),5*12+9)/(5*12+9)
replace shareCHCageIU_5 = 0 if chc_year_exp == .
tab birthyr, summarize(shareCHCageIU_5) means

* Age in utero to 18
gen shareCHCageIU_18 = min(max(iuindex+18*12+9-pindex,0),18*12+9)/(18*12+9)
replace shareCHCageIU_18 = 0 if chc_year_exp == .
tab birthyr, summarize(shareCHCageIU_18) means
*/


/*** Now create event time
gen evtime = pindex - bindex

* construct event time dummies
gen byte prebirth5plus = evtime <= -5*12
for num 0/4: gen byte prebirthX = (evtime>(-1*(X+1))*12 & evtime<=(-1*(X))*12)
for num 1/14: gen byte postbirthX = (evtime>(X-1)*12 & evtime<=X*12)

gen byte prebirth4_3 = prebirth4 == 1 | prebirth3 == 1
gen byte prebirth2_1 = prebirth2 == 1 | prebirth1 == 1
gen byte postbirth0_1 = prebirth0 == 1 | postbirth1 == 1
gen byte postbirth2_3 = postbirth2 == 1 | postbirth3 == 1
gen byte postbirth4_5 = postbirth4 == 1 | postbirth4 == 1
gen byte postbirth6_7 = postbirth6 == 1 | postbirth7 == 1
gen byte postbirth8_9 = postbirth8 == 1 | postbirth9 == 1
gen byte postbirth10_11 = postbirth10 == 1 | postbirth11 == 1
gen byte postbirth12plus = evtime>11*12

* Make sure everyone is allotted to only one event-time variable
gen one = prebirth5plus + prebirth4_3 + prebirth2_1 + postbirth0_1 + postbirth2_3 + postbirth4_5 + postbirth6_7 + postbirth8_9 + postbirth10_11 + postbirth12plus
summ one
drop one
*/


* log 1960 pop
gen lpop60 = ln(pop60)

* Sort, compress, and save
sort st_fips co_fips_1 birthyr birthmo year
order st_fips co_fips_1 birthyr birthmo year

des 
su

compress
save "$datatemp/fs_hsacr_1_exactmatch`m'.dta", replace

}

log close
