﻿* fs_dtacr_2.do
* Combine census data with treatment data
* Updated 5/2/2016 (use month to define exposure)
* Updated 7/11/2016 (Incorporate geographically consistent "super counties")
* Updated 9/11/2016 (Add new variables for LSR analysis)
* Updated 9/23/2016 (Changed coding of variables for LSR analysis)

cap log close
log using "$output/fs_dtacr_2.log", replace

* Define programs that edit county codes
do "$dofile/countyStandardizeGNIS1950.do"

* data from basic processing at birth county-birth year-birth month-year-race-sex-exacthmatch level 
use "$datatemp/dtacr_25.dta", clear

keep if inrange(birthyr,1950,1980)
drop if year==.		// cells which show up in NUMIDENT but not in census

tab exactmatch, mi		// indicator for whether underlying sample consists of exact matches


forvalues m=0/0	{ 		// not imposing exact matches for now		

keep if exactmatch==`m'


* combine with treatment data
merge m:1 st_fips co_fips_1 using "$datatemp/fs_dtacr_1_1.dta", gen(txmerge)
gen fs_year = fs_year_1
*gen fs_month = fs_month_1
*drop fips_* fs_year_* fs_month_*





* combine with reis data (limited to birth years 1959-1980)
merge m:1 st_fips co_fips_1 birthyr using "$datatemp/fs_dtacr_1_2.dta", gen(reismerge)

* combine with county DB data
merge m:1 st_fips co_fips_1 using "$datatemp/fs_dtacr_1_3.dta", gen(ctydb60merge)

* combine with natality data
merge m:1 st_fips co_fips_1 using "$datatemp/fs_dtacr_1_4.dta", gen(vscdmerge)

* combine with hunger death data
merge m:1 st_fips co_fips_1 using "$datatemp/fs_dtacr_1_8.dta", gen(hungmerge)

drop if st_fips==2	// FS data don't have info on QQQ

tab birthyr txmerge	// don't have QQQ data for 1950-58, 1979-on

drop if year==. 	// these are places not in census data

drop if st_fips==8 & co_fips_1==14	// QQQ, QQQ (created circa 2001)
tab txmerge, m
assert txmerge==3

tab birthyr reismerge, mi
/* Don't have REIS data before 1959 */
assert reismerge==3 if inrange(birthyr,1959,1980)
assert reismerge==1 if inrange(birthyr,1950,1958)

assert ctydb60merge==3

assert vscdmerge==3

* create treatment variable (age when food stamps were rolled out in birth county)
gen exp = fs_year - birthyr		// for years
*gen exp = (fs_year*12 + fs_month) - (birthyr*12 + birthmo)	// measured in months
tab exp, mi

* need to address small number of places which are missing FS data (probably drop them)

* There has been some question about whether to do analysis at the birth-year/county/year
* level or the birth-year/birth-month/county/year level. Thus, 2 versions follow:

/* BIRTH-MONTH VERSION: group endpoints together (perhaps revisit this) 
replace exp = -72 if exp<-72 	 	// born 6 or more years after roll-out
replace exp = 215 if exp>215 & exp!=.	// born 18 or more years before roll-out

* put exposure into 6 month bins
egen _exp = cut(exp), at(-72(6)216)    

tab _exp, mi 

tab exp _exp, mi
assert exp==. if _exp==.

char    _exp[omit] 0
xi,     prefix(_T) i._exp

tabstat _T_exp*, by(_exp)
tabstat _T_exp*, by(exp)
*/


* BIRTH-YEAR VERSION: group endpoints together
gen exp2 = exp	/* Save version without endpoints grouped */
gen exp3 = exp
replace exp = -6 if exp < -6
replace exp = 18 if exp > 18
replace exp2 = -9 if exp2 < -8
replace exp2 = 20 if exp2 > 20
replace exp3 = -12 if exp3 <-12
replace exp3 = 22 if exp3>22

char 	exp[omit] 10
char    exp2[omit] 10
char	exp3[omit] 10
xi, 	prefix(_TB_) i.exp i.exp2 i.exp3

tabstat _TB_exp_*, by(exp)

* New addition, 7/29/2016: Stratify sample by county poverty rate in 1960
* Use the Lampman measure here of families under $3,000 income
sum inc3k60 [aw=obs_all], d
gen povgroup = (inc3k60 > r(p75))
tab povgroup, m
assert povgroup != .
lab var povgroup "Above 1960 1st quartile poverty rate"

* New addition: stratify sample by hungerdeaths/total deaths or hungerdeaths/total pop in 1960

sum hungrate [aw=obs_all], d
gen hunggroup1 = (hungrate > r(p75))
replace hunggroup1 = . if hungrate==.
lab var hunggroup1 "In worst quartile of 1960 hungerdeaths/totdeaths"

sum hungshare [aw=obs_all], d
gen hunggroup2 = (hungshare> r(p75))
replace hunggroup2 = . if hungshare==.
lab var hunggroup2 "In worst quartile of 1960 hungerdeaths/totpop"

/*
* Create different event-time indicators by poverty group
char 	exp[omit] 0
xi,	prefix(_T1_) i.exp
xi, 	prefix(_T0_) i.exp
forval i = 0/1 {
	foreach var of varlist _T`i'_* {
		replace `var' = 0 if povgroup!=`i'
	}
}
tabstat _T0_* _T1_*, by(exp)


* Add variables for LaFortune-Rothstein-Schanzenbach analysis
* 9/23: changing this in two ways. 1) Use mkspline command so the splines connect
*				   2) Allow the event study to be non-parametric outside of the region of *				      balance

*/


mkspline spline1_1 -1 spline1_2 5 spline1_3 = exp
replace spline1_1=0 if !inrange(exp,-4,11)
replace spline1_2=0 if !inrange(exp,-4,11)
replace spline1_3=0 if !inrange(exp,-4,11)

/*
forvalues i=2/3 {

mkspline spline`i'_1 -1 spline`i'_2 5 spline`i'_3 = exp`i'
replace spline`i'_1=0 if !inrange(exp`i',-4,11)
replace spline`i'_2=0 if !inrange(exp`i',-4,11)
replace spline`i'_3=0 if !inrange(exp`i',-4,11)

}
*/
* Identify observations in balanced-panel window
* First FS year is 1961, last is 1976. So balance is from -4 to 11, full sample from -19 to +26
*gen lsr = (exp2 <= 11 & exp2 >= -4)	/* Use these observations only if excluding extreme event-time */
*drop exp2

* log 1960 pop
gen lpop60 = ln(pop60)

la var exp "Age in months at FS roll out"
*la var _exp "Age in months at FS roll out, in 6 month bins"

sort st_fips co_fips_1 birthyr year
order st_fips co_fips_1 birthyr year

des 
su

** generate a south / non-south variable for one of Hilary's desired stratifications
	
* 4-level region variable (1==northeast, 3==south, 2==midwest, 4==west)
gen region=.
replace region=1 if st_fips==9 | st_fips==23 | st_fips==25 | st_fips==33 | st_fips==44 | st_fips==50 | st_fips==34 | st_fips==36 | st_fips==42
replace region=2 if st_fips==17 | st_fips==18 | st_fips==26 | st_fips==39 | st_fips==55 | ///
	st_fips==19 | st_fips==20 | st_fips==27 | st_fips==29 | st_fips==31 | st_fips==38 | st_fips==46
replace region=3 if st_fips==10 | st_fips==11 | st_fips==12 | st_fips==13 | st_fips==24 | ///
	st_fips==37 | st_fips==45 | st_fips==51 | st_fips==54 | st_fips==1 | st_fips==21 | ///
	st_fips==28 | st_fips==47 | st_fips==5 | st_fips==22 | st_fips==40 | st_fips==48
replace region=4 if region==. & st_fips!=.
gen south=region==3
replace south=. if region==.

compress
save "$datatemp/fs_dtacr_2_exactmatch`m'.dta", replace

}		// end m (exactmatch) loop

log close
