﻿* fs_hsacr_1.do
* Create variables to run Hoynes/Schanzenbach/Almond analysis
* Combine census data with treatment data
* Updated: 8/29/2017: Add FSP caseload/pop for 1975 to look at heterogeneity 
* Updated 6/8/2021 Add additional outcomes of higher percent urban / colleges in adult county (Kate Moulton for RESTUD Revision)

cap log close
log using "$output/fs_hsacr_1_newsub_withCollege.log", replace

* Define programs that edit county codes
do "$dofile/countyStandardizeGNIS1950.do"



***** Now on to merging *****
* data from basic processing at birth county-birth year-birth month-year-race-sex-exacthmatch level 
use "$datatemp/dtacr_25_birthmo_newsub_withCollege.dta", clear


keep if inrange(birthyr,1950,1980)

* Cells with missing year
* To maintain consistent sample size compared to birth-year version, we keep
* counties even if not matched to any Census observations in a given month, 
* IF a Census observation matches to that county in another month in the same year
* This prevents us from creating extra implicit samples for disclosure purposes
gen validyear = year!=.
bys st_fips co_fips_1 birthyr exactmatch: egen keepyr = max(validyear)
tab validyear keepyr, m
drop if keepyr==0
drop validyear keepyr
*drop if year==.		// cells which show up in NUMIDENT but not in census

tab exactmatch, mi		// indicator for whether underlying sample consists of exact matches



forvalues m=0/0	{ 		// not imposing exact matches for now		

keep if exactmatch==`m'



* combine with treatment data
merge m:1 st_fips co_fips_1 using "$datatemp/fs_dtacr_1_1.dta", gen(txmerge)
gen fs_year = fs_year_1
gen fs_month = fs_month_1
*drop fips_* fs_year_* fs_month_*



* combine with reis data (limited to birth years 1959-1980)
merge m:1 st_fips co_fips_1 birthyr using "$datatemp/fs_dtacr_1_2.dta", gen(reismerge)



* combine with county DB data
merge m:1 st_fips co_fips_1 using "$datatemp/fs_dtacr_1_3.dta", gen(ctydb60merge)



* combine with natality data
merge m:1 st_fips co_fips_1 using "$datatemp/fs_dtacr_1_4.dta", gen(vscdmerge)





*merge m:1 st_fips co_fips_1 birthyr using "`fs_dtacr_1_5'", gen(reismerge)	// Not using this data anymore

*merge m:1 st_fips co_fips_1 birthyr using "`fs_dtacr_1_6'", gen(ahamerge)	// Not using this data anymore

merge m:1 st_fips co_fips_1 using "$datatemp/fs_dtacr_1_7.dta", gen(chcmerge)

merge m:1 st_fips co_fips_1 using "$datatemp/fs_dtacr_1_8.dta", gen(hungmerge)



drop if st_fips==2	// FS data don't have info on Alaska

*drop if year==. 	// these are places not in census data
drop if (txmerge==2 | reismerge==2 | ctydb60merge==2 | vscdmerge==2 | chcmerge==2 | hungmerge==2)	// What we really want in the line above

tab birthyr txmerge	// don't have tx data for 1950-58, 1979-on

QQQ
tab txmerge, m
assert txmerge==3



tab birthyr reismerge, mi
/* Don't have REIS data before 1959 */
assert reismerge==3 if inrange(birthyr,1959,1980)
assert reismerge==1 if inrange(birthyr,1950,1958)

assert ctydb60merge==3

assert vscdmerge==3




********** Merging of variables complete, now create treatment and event-time variables *******
* This code comes from Hilary Hoynes
* Create indices of birth month, in utero, and program
* bindex = index of birth year/month where 1956:1=1 (first birth cohort)
gen bindex = (birthyr-1956)*12+birthmo
* iuindex = in utero index of beginning of pregnancy
gen iuindex = (birthyr-1956)*12+birthmo-9
* pindex = index of year/month of FSP implementation
gen pindex = (fs_year-1956)*12+fs_month
* cindex = index of year of CHC implementation. I assume here that it starts in January
gen cindex = (chc_year_exp-1956)


* Now create variables for share of time between age A and B that FS is in place
* Age 0-18
gen shareFSPage0_18 = min(max(bindex+18*12-pindex,0),18*12)/(18*12)
replace shareFSPage0_18 = . if fs_year == .
tab birthyr, summarize(shareFSPage0_18) means

* Age 0-5
gen shareFSPage0_5 = min(max(bindex+5*12-pindex,0),5*12)/(5*12)
replace shareFSPage0_5 = . if fs_year == .
tab birthyr, summarize(shareFSPage0_5) means

* Age 5-18
gen shareFSPage5_18 = min(max(bindex+18*12-pindex-5*12,0),13*12)/(13*12)
replace shareFSPage5_18 = . if fs_year == .
tab birthyr, summarize(shareFSPage5_18) means

* Age in utero to 5
gen shareFSPageIU_5 = min(max(iuindex+5*12+9-pindex,0),5*12+9)/(5*12+9)
replace shareFSPageIU_5 = . if fs_year == .
tab birthyr, summarize(shareFSPageIU_5) means

* Age in utero to 18
gen shareFSPageIU_18 = min(max(iuindex+18*12+9-pindex,0),18*12+9)/(18*12+9)
replace shareFSPageIU_18 = . if fs_year == .
tab birthyr, summarize(shareFSPageIU_18) means


*** CHC versions ***
* In this case, if the CHC year variable is missing, I give the kid a 0
* Age 0-5
gen shareCHCage0_5 = min(max(birthyr+5-chc_year_exp,0),5)/5
replace shareCHCage0_5 = 0 if chc_year_exp == .
replace shareCHCage0_5 = 0 if chc_year_exp > 1974	/* Consistent with Bailey & Goodman-Bacon */
tab birthyr, summarize(shareCHCage0_5) means

/* Age 0-18
gen shareCHCage0_18 = min(max(bindex+18*12-cindex,0),18*12)/(18*12)
replace shareCHCage0_18 = 0 if chc_year_exp == .
tab birthyr, summarize(shareCHCage0_18) means

* Age 5-18
gen shareCHCage5_18 = min(max(bindex+18*12-pindex-5*12,0),13*12)/(13*12)
replace shareCHCage5_18 = 0 if chc_year_exp == .
tab birthyr, summarize(shareCHCage5_18) means

* Age in utero to 5
gen shareCHCageIU_5 = min(max(iuindex+5*12+9-pindex,0),5*12+9)/(5*12+9)
replace shareCHCageIU_5 = 0 if chc_year_exp == .
tab birthyr, summarize(shareCHCageIU_5) means

* Age in utero to 18
gen shareCHCageIU_18 = min(max(iuindex+18*12+9-pindex,0),18*12+9)/(18*12+9)
replace shareCHCageIU_18 = 0 if chc_year_exp == .
tab birthyr, summarize(shareCHCageIU_18) means
*/


/*** Now create event time
gen evtime = pindex - bindex

* construct event time dummies
gen byte prebirth5plus = evtime <= -5*12
for num 0/4: gen byte prebirthX = (evtime>(-1*(X+1))*12 & evtime<=(-1*(X))*12)
for num 1/14: gen byte postbirthX = (evtime>(X-1)*12 & evtime<=X*12)

gen byte prebirth4_3 = prebirth4 == 1 | prebirth3 == 1
gen byte prebirth2_1 = prebirth2 == 1 | prebirth1 == 1
gen byte postbirth0_1 = prebirth0 == 1 | postbirth1 == 1
gen byte postbirth2_3 = postbirth2 == 1 | postbirth3 == 1
gen byte postbirth4_5 = postbirth4 == 1 | postbirth4 == 1
gen byte postbirth6_7 = postbirth6 == 1 | postbirth7 == 1
gen byte postbirth8_9 = postbirth8 == 1 | postbirth9 == 1
gen byte postbirth10_11 = postbirth10 == 1 | postbirth11 == 1
gen byte postbirth12plus = evtime>11*12

* Make sure everyone is allotted to only one event-time variable
gen one = prebirth5plus + prebirth4_3 + prebirth2_1 + postbirth0_1 + postbirth2_3 + postbirth4_5 + postbirth6_7 + postbirth8_9 + postbirth10_11 + postbirth12plus
summ one
drop one
*/


* Interact exposure variable with 1960 county poverty rate
foreach v of varlist shareFSPage* {
	gen pov_`v' = `v' * inc3k60
	su pov_`v'
}


* Bring in FSP caseload/pop
* NOTE we have to use REIS population data instead of SEER, because SEER is missing NYC in 1975
preserve
use "$datap/countyLevel/FSP_caseload.dta", clear
gen fips = 1000*stfips + countyfips
countyFipsToREISfips, county(fips)
fixCounty, county(fips)
collapse (rawsum) cl1975, by(fips)
sum cl1975 if fips==36061	// QQQ caseload is all included in QQQ
local cl = `r(mean)'		// Give each borough the NYC-wide count
replace cl1975 = `cl' if inlist(fips,36005,36047,36061,36081,36085)	// Replace all NYC boroughs with NYC total
tempfile fsp
save "`fsp'"
use "$datap/countyLevel/reis_transfers.dta", clear
keep if year==1975
gen fips = 1000*stfips + countyfips
keep if inlist(fips,36005,36047,36061,36081,36085)	// Keep NYC boroughs
sum annualpop
local nycpop = `r(sum)'					// Save total NYC population
use "$datap/countyLevel/reis_transfers.dta", clear
keep if year==1975
gen fips = 1000*stfips + countyfips
replace annualpop = `nycpop' if inlist(fips,36005,36047,36061,36081,36085)	// Replace with total NYC pop
countyFipsToREISfips, county(fips)
fixCounty, county(fips)
collapse (rawsum) annualpop, by(fips)
merge 1:1 fips using "`fsp'"
tab fips _merge if _merge!=3, sum(annualpop)	// Should be just 2 counties with 0 pop or caseload in 1975
tab fips _merge if _merge!=3, sum(cl1975)
drop _merge
gen FSPpc75 = cl1975/annualpop
lab var FSPpc75 "County Food Stamps caseload per population, 1975"
sum FSPpc75 [aw=annualpop], d
gen FSPquartile = 1 + (FSPpc75>`r(p25)') + (FSPpc75>`r(p50)') + (FSPpc75>`r(p75)')
tabstat FSPpc75, by(FSPquartile) stat(mean sd min max n)
lab var FSPquartile "Quartile of pop-weighted distribution of FSP caseload"
gen st_fips = floor(fips/1000)
gen co_fips_1 = fips - 1000*st_fips
keep FSPpc75 FSPquartile st_fips co_fips_1
tempfile fsppc
save "`fsppc'"
restore
merge m:1 st_fips co_fips_1 using "`fsppc'", gen(fspmerge)
cap drop fips
gen fips = 1000*st_fips+co_fips_1
tab fips fspmerge if fspmerge!=3, m
drop fips


* log 1960 pop
gen lpop60 = ln(pop60)


* Data check (added 6/30/2017): Graph distribution of 1960 poverty rate
graph tw (hist inc3k60 [fw=obs_all], bin(100)), ///
	title("Distribution of birth-county 1960 poverty rates") ///
	subtitle("All individuals born 1950-1980, age 25-54 in 2000-2013")
graph export "$output/povhist.pdf", replace




* Sort, compress, and save
sort st_fips co_fips_1 birthyr birthmo year
order st_fips co_fips_1 birthyr birthmo year

des 
su

compress
save "$datatemp/fs_hsacr_1_newsub_exactmatch`m'_withCollege.dta", replace





}

log close
