﻿* fs_dtacr_2.do
* Combine census data with treatment data
* This dofile adds some covariates to the ACS/Census/NUMIDENT data, creates
* event time dummies, and then adds a number of variables we use to look at heterogeneity
* Updated 5/2/2016 (use month to define exposure)
* Updated 7/11/2016 (Incorporate geographically consistent "super counties")
* Updated 9/11/2016 (Add new variables for LSR analysis)
* Updated 9/23/2016 (Changed coding of variables for LSR analysis)
* Updated 5/2017 (Added placebo treatment)
* Updated: 8/29/2017: Add FSP caseload/pop for 1975 to look at heterogeneity 

cap log close
log using "$output/fs_dtacr_2.log", replace

* Define programs that edit county codes
do "$dofile/countyStandardizeGNIS1950.do"

* data from basic processing at birth county-birth year-birth month-year-race-sex-exacthmatch level 
use "$datatemp/dtacr_25_newsub.dta", clear

keep if inrange(birthyr,1950,1980)
drop if year==.		// cells which show up in NUMIDENT but not in census

tab exactmatch, mi		// indicator for whether underlying sample consists of exact matches


forvalues m=0/0	{ 		// not imposing exact matches for now		

keep if exactmatch==`m'



* combine with treatment data
merge m:1 st_fips co_fips_1 using "$datatemp/fs_dtacr_1_1.dta", gen(txmerge)
gen fs_year = fs_year_1

* combine with reis data (limited to birth years 1959-1980)
merge m:1 st_fips co_fips_1 birthyr using "$datatemp/fs_dtacr_1_2.dta", gen(reismerge)

* combine with county DB data
merge m:1 st_fips co_fips_1 using "$datatemp/fs_dtacr_1_3.dta", gen(ctydb60merge)

* combine with natality data
merge m:1 st_fips co_fips_1 using "$datatemp/fs_dtacr_1_4.dta", gen(vscdmerge)

* combine with hunger death data
merge m:1 st_fips co_fips_1 using "$datatemp/fs_dtacr_1_8.dta", gen(hungmerge)


drop if st_fips==2	// FS data don't have info on QQQ

drop if year==. 	// these are places not in census data

tab birthyr txmerge	// don't have QQQ data for 1950-58, 1979-on

drop if st_fips==8 & co_fips_1==14	// QQQ, QQQ (created circa 2001)
tab txmerge, m
assert txmerge==3


tab birthyr reismerge, mi
/* Don't have REIS data before 1959 */
assert reismerge==3 if inrange(birthyr,1959,1980)
assert reismerge==1 if inrange(birthyr,1950,1958)

assert ctydb60merge==3

assert vscdmerge==3

* create treatment variable (age when food stamps were rolled out in birth county)
gen exp = fs_year - birthyr		// for years

* Placebo version -- randomly assign FSP implementation date
* This is used to run a permutation test
set seed 12345
preserve
cap drop tag
egen tag = tag(fips)
keep if tag==1
keep fips
gen fs_year_placebo = runiformint(1961,1976)
tempfile placebo
save "`placebo'"
restore
merge m:1 fips using "`placebo'", gen(pmerge)
assert pmerge==3
drop pmerge

gen expPlac = fs_year_placebo - birthyr

tab exp, mi
tab expPlac, m



* BIRTH-YEAR VERSION: group endpoints at -6 and 18
gen exp2 = exp	/* Save version without endpoints grouped */
replace exp = -6 if exp < -6
replace exp = 18 if exp > 18 & exp < .
replace expPlac = -6 if expPlac < -6
replace expPlac = 18 if expPlac > 18 & expPlac < .

* Create event time dummies: use age 10 as omitted category
char 	exp[omit] 10
char    expPlac[omit] 10
xi, 	prefix(_TB_) i.exp i.expPlac

tabstat _TB_exp_*, by(exp)

tabstat _TB_expPlac_*, by(expPlac)


*** Extra variables for tests of heterogeneity ***

/* Interaction of event study dummies with 1960 poverty rate
* To save storage space, this is commented out. Can create in the analysis file
foreach v of varlist _TB_exp_* {
	gen pov`v' = inc3k60 * `v'
}
tabstat pov_TB_exp_*, by(exp)
*/

* New addition, 7/29/2016: Stratify sample by county poverty rate in 1960
* Use the Lampman measure here of families under $3,000 income
sum inc3k60 [aw=obs_all], d
gen povgroup = (inc3k60 > r(p75))
tab povgroup, m
assert povgroup != .
lab var povgroup "Above 1960 1st quartile poverty rate"

* New addition: stratify sample by hungerdeaths/total deaths or hungerdeaths/total pop in 1960
sum hungrate [aw=obs_all], d
gen hunggroup1 = (hungrate > r(p75))
replace hunggroup1 = . if hungrate==.
lab var hunggroup1 "In worst quartile of 1960 hungerdeaths/totdeaths"

sum hungshare [aw=obs_all], d
gen hunggroup2 = (hungshare> r(p75))
replace hunggroup2 = . if hungshare==.
lab var hunggroup2 "In worst quartile of 1960 hungerdeaths/totpop"

/*
* Create different event-time indicators by poverty group
char 	exp[omit] 0
xi,	prefix(_T1_) i.exp
xi, 	prefix(_T0_) i.exp
forval i = 0/1 {
	foreach var of varlist _T`i'_* {
		replace `var' = 0 if povgroup!=`i'
	}
}
tabstat _T0_* _T1_*, by(exp)
*/


* Bring in FSP caseload/pop
* NOTE we have to use REIS population data instead of SEER, because SEER is missing NYC in 1975
preserve
use "$datap/countyLevel/FSP_caseload.dta", clear
gen fips = 1000*stfips + countyfips
countyFipsToREISfips, county(fips)
fixCounty, county(fips)
collapse (rawsum) cl1975, by(fips)
sum cl1975 if fips==36061	// QQQ caseload is all included in QQQ
local cl = `r(mean)'		// Give each borough the NYC-wide count
replace cl1975 = `cl' if inlist(fips,36005,36047,36061,36081,36085)	// Replace all NYC boroughs with NYC total
tempfile fsp
save "`fsp'"
use "$datap/countyLevel/reis_transfers.dta", clear
keep if year==1975
gen fips = 1000*stfips + countyfips
keep if inlist(fips,36005,36047,36061,36081,36085)	// Keep NYC boroughs
sum annualpop
local nycpop = `r(sum)'					// Save total NYC population
use "$datap/countyLevel/reis_transfers.dta", clear
keep if year==1975
gen fips = 1000*stfips + countyfips
replace annualpop = `nycpop' if inlist(fips,36005,36047,36061,36081,36085)	// Replace with total NYC pop
countyFipsToREISfips, county(fips)
fixCounty, county(fips)
collapse (rawsum) annualpop, by(fips)
merge 1:1 fips using "`fsp'"
tab fips _merge if _merge!=3, sum(annualpop)	// Should be just 2 counties with 0 pop or caseload in 1975
tab fips _merge if _merge!=3, sum(cl1975)
drop _merge
gen FSPpc75 = cl1975/annualpop
lab var FSPpc75 "County Food Stamps caseload per population, 1975"
sum FSPpc75 [aw=annualpop], d
gen FSPquartile = 1 + (FSPpc75>`r(p25)') + (FSPpc75>`r(p50)') + (FSPpc75>`r(p75)')
tabstat FSPpc75, by(FSPquartile) stat(mean sd min max n)
lab var FSPquartile "Quartile of pop-weighted distribution of FSP caseload"
gen st_fips = floor(fips/1000)
gen co_fips_1 = fips - 1000*st_fips
keep FSPpc75 FSPquartile st_fips co_fips_1
tempfile fsppc
save "`fsppc'"
restore
merge m:1 st_fips co_fips_1 using "`fsppc'", gen(fspmerge)
cap drop fips
gen fips = 1000*st_fips+co_fips_1
tab fips fspmerge if fspmerge!=3, m
drop fips





* log 1960 pop
gen lpop60 = ln(pop60)


* 4-level region variable (1==northeast, 3==south, 2==midwest, 4==west)
gen region=.
replace region=1 if st_fips==9 | st_fips==23 | st_fips==25 | st_fips==33 | st_fips==44 | st_fips==50 | st_fips==34 | st_fips==36 | st_fips==42
replace region=2 if st_fips==17 | st_fips==18 | st_fips==26 | st_fips==39 | st_fips==55 | ///
	st_fips==19 | st_fips==20 | st_fips==27 | st_fips==29 | st_fips==31 | st_fips==38 | st_fips==46
replace region=3 if st_fips==10 | st_fips==11 | st_fips==12 | st_fips==13 | st_fips==24 | ///
	st_fips==37 | st_fips==45 | st_fips==51 | st_fips==54 | st_fips==1 | st_fips==21 | ///
	st_fips==28 | st_fips==47 | st_fips==5 | st_fips==22 | st_fips==40 | st_fips==48
replace region=4 if region==. & st_fips!=.
gen south=region==3
replace south=. if region==.



*** Create spline ***
* This is defined only for observations in balanced-panel window
* First FS year is 1961, last is 1976. So balance is from -4 to 11, full sample from -19 to +26
mkspline spline1_1 -1 spline1_2 5 spline1_3 = exp
replace spline1_1=0 if !inrange(exp,-4,11) & exp != .
replace spline1_2=0 if !inrange(exp,-4,11) & exp != .
replace spline1_3=0 if !inrange(exp,-4,11) & exp != .

* Placebo spline
mkspline splineP_1 -1 splineP_2 5 splineP_3 = expPlac
replace splineP_1=0 if !inrange(exp,-4,11) & expPlac != .
replace splineP_2=0 if !inrange(exp,-4,11) & expPlac != .
replace splineP_3=0 if !inrange(exp,-4,11) & expPlac != .

* Spline interacted with poverty rate
foreach v of varlist spline1_* {
	gen pov_`v' = inc3k60 * `v'
}

sort st_fips co_fips_1 birthyr year
order st_fips co_fips_1 birthyr year

des 
su

compress
save "$datatemp/fs_dtacr_2_exactmatch`m'_newsub.dta", replace





}		// end m (exactmatch) loop

log close
