* master_foodstamps.do
* Bryan Stuart
* Updated by Brenden Timpe and Ari Binder
* Updated by Kate Moulton
* Analysis for food stamps project

* Type this into the Konsole shell to run: 
* qstata --dofile=master_foodstamps.do --statatype=mp --memsize=120000	--pbsname=foodstamps// for batch submission

version 14.0 
clear all
set more off, perm

* Global macros
global datap "/projects/data"		 
global datatemp "/projects/programs/foodstamps/master_folder/LE_build/datatemp"
global datatemp_basic "/projects/programs/foodstamps/master_folder/LE_build/datatemp_basic"
global dofile "/projects/programs/foodstamps/master_folder/LE_build/20181010dofiles"
global output "$dofile"
global datatemp_final "/projects/programs/foodstamps/master_folder/datatemp"

global sample = 100

*global dofile "/programs/transfer/krista/foodstamps/dofile"
*global output "/programs/transfer/krista/foodstamps/output"

******* UNZIP FILES
*!nice gunzip "$datatemp_basic/dtacr_20.dta.gz"
*!nice gunzip "$datatemp_basic/dtacr_22.dta.gz"
*!nice gunzip "$datatemp/fs_dtacr_2_exactmatch0_newsub.dta.gz"
*!nice gunzip "$datatemp/dtacr_23_newsub.dta.gz"
*!nice gunzip "$datatemp/dtacr_24_newsub.dta.gz"
*!nice gunzip "$datatemp/dtacr_25_newsub.dta.gz"
*!nice gunzip "$datatemp/dtacr_23_birthmo_newsub.dta.gz"
*!nice gunzip "$datatemp/dtacr_24_birthmo_newsub.dta.gz"
*!nice gunzip "$datatemp/dtacr_25_birthmo_newsub.dta.gz"
*!nice gunzip "$datatemp/fs_hsacr_1_newsub_exactmatch0.dta.gz"

/***** DATA PROCESSING *******/

/* Might want to re-do basic processing (dtacr_23 and dtacr_25) after deciding 
how to handle missing and allocated values. Not doing anything right now. */

*** First, collapse raw data by birth year for main analysis ***

/* Limit sample, create indices, and collapse census data			*/
/* Input: $datatemp_basic/dtacr_20						*/
/*	  $datap/chetty_data_040816.dta						*/
/* Output: $datatemp/dtacr_23_newsub						*/
/* do "$dofile/dtacr_23_newsubgroups.do"					*/
do "$dofile/dtacr_23_newsubgroups_tarduno.do"

/* Collapse NUMIDENT data							*/
/* Input: $datatemp_basic/dtacr_22						*/
/* Output: $datatemp/dtacr_24_newsub						*/
do "$dofile/dtacr_24_newsubgroups_tarduno.do"

/* Combine census and NUMIDENT collapsed data					*/
/* Input: $datatemp/dtacr_23, dtacr_24						*/
/* Output: $datatemp/dtacr_25_newsub						*/
do "$dofile/dtacr_25_newsubgroups_tarduno.do"


*** Next, collapse raw data separately by birth month for "exposure" specifications ***

/* Limit sample, create indices, and collapse census data at BIRTH MONTH level	*/
/* Input: $datatemp_basic/dtacr_20						*/
/*	  $datap/chetty_data_040816.dta						*/
/* Output: $datatemp/dtacr_23_birthmo_newsub					*/

do "$dofile/dtacr_23_birthmo_newsubgroups_tarduno.do"

/* Collapse NUMIDENT data at BIRTH MONTH level					*/
/* Input: $datatemp_basic/dtacr_22						*/
/* Output: $datatemp/dtacr_24_birthmo_newsub					*/
do "$dofile/dtacr_24_birthmo_newsubgroups_tarduno.do"

/* Combine census and NUMIDENT collapsed data					*/
/* Input: $datatemp/dtacr_23, dtacr_24						*/
/* Output: $datatemp/dtacr_25_birthmo_newsub					*/
do "$dofile/dtacr_25_birthmo_newsubgroups_tarduno.do"

* Process treatment data 					
* Input: "$datap/countyLevel/FSP_startdate.dta, $datap/countyLevel/reis_income.dta, 
*	$datap/countyLevel/cityctydb60.dta 				
* Output: $datatemp/fs_dtacr_1.dta" 						*/	
do "$dofile/fs_dtacr_1_tarduno.do"

* Combine collapsed census and NUMIDENT data with treatment data 		*/		
* Input: $datatemp_basic/dtacr_25.dta, $datatemp/fs_dtacr_1.dta			*/
* Output: $datatemp/fs_dtacr_2_exactmatch`m'_newsub				*/
do "$dofile/fs_dtacr_2_newsubgroups_tarduno.do"						

* Combine collapsed census and NUMIDENT data with treatment data, BIRTHMO version*/
* Input: $datatemp_basic/dtacr_25_birthmo_newsub.dta, $datatemp/fs_dtacr_1.dta	*/
* Output: $datatemp/fs_dtacr_2_newsub_exactmatch`m'				*/	
do "$dofile/fs_hsacr_1_newsubgroups_tarduno.do"					

/***** CHECK DATA *******

* Look at allocations from census microdata 
* (this needs to feed back into dtacr_23.do eventually)
* Input: $datatemp_basic/dtacr_20						
* Output: $datatemp/ch_1						
do "$dofile/ch_1.do"

* Run allocation regressions
* Input: $datatemp/ch_1					
* Output: figures						
do "$dofile/ch_1a.do"

* Plot allocations across survey years
* Input: $datatemp/ch_1					
* Output: figures						
do "$dofile/ch_1b.do"

* Create time series plots based on microdata
* Input: $datatemp/dtacr_20.dta
* Output: figures
do "$dofile/ch_2.do"

* Create time series plots based on grouped data
* Input: $datatemp/fs_dtacr_2_exactmatch0.dta
* Output: figures
do "$dofile/ch_2b.do"

* Create mortality plots by birth year
* Input: $datatemp/fs_dtacr_2_exactmatch0.dta
* Output: figures
do "$dofile/ch_2c.do"

* Create histograms from grouped data
* Input: $datatemp/fs_dtacr_2_exactmatch0
* Output: figures
do "$dofile/ch_3.do"

* Create table with cell size by age
* Input: $datatemp/fs_dtacr_2_exactmatch0.dta
* Output: figures
do "$dofile/ch_4.do"
*/


/***** ANALYSIS -- EVENT STUDY SPECIFICATIONS *******/
* Note that files 2-12 use the "old" subgroups of men, women, whites, and nonwhites
* These are no longer used

* Estimate index regressions
* Input: $datatemp/fs_dtacr_2_exactmatch0.dta
* Output: outreg tables
*do "$dofile/fs_an_2.do"

* Estimate specific outcomes
* Input: $datatemp/fs_dtacr_2_exactmatch0.dta
* Output: outreg tables
*do "$dofile/fs_an_3.do"

* Estimate disability outcomes (this requires different weights)
* Input: $datatemp/fs_dtacr_2_exactmatch0.dta
* Output: outreg tables
*do "$dofile/fs_an_4.do"

* Estimate index regressions with 2000 Census observations only
* Input: $datatemp/fs_dtacr_2_exactmatch0.dta
* Output: outreg tables
*do "$dofile/fs_an_5.do"

* Estimate mortality outcome
* Input: $datatemp/fs_dtacr_2_exactmatch0.dta
* Output: outreg tables
*do "$dofile/fs_an_6.do"

* Test aweights vs pweights to confirm they deliver the same result
* Input: $datatemp/fs_dtacr_2_exactmatch0.dta
* Output: outreg tables
*do "$dofile/fs_an_7.do"

* Diagnostic test of sample
* Input: $datatemp/fs_dtacr_2_exactmatch0.dta
* Output: outreg tables
*do "$dofile/fs_an_8.do

* Run main analysis without supercounties with conflicting FS implementation dates
* Input: $datatemp/fs_dtacr_2_exactmatch0.dta
* Output: outreg tables
*do "$dofile/fs_an_9.do

* Main index regressions, estimate separate dummies by category of 1960 county poverty rate
* Input: $datatemp/fs_dtacr_2_exactmatch0.dta
* Output: outreg tables
*do "$dofile/fs_an_10.do"

* Main index regressions, stratify sample on 1960 county poverty rate
* Input: $datatemp/fs_dtacr_2_exactmatch0.dta
* Output: outreg tables
*do "$dofile/fs_an_11.do"

* Run with ACS only
* Input: $datatemp/fs_dtacr_2_exactmatch0.dta
* Output: outreg tables
*do "$dofile/fs_an_12.do"

* Estimate effect on index outcomes with full-information sample and new subgroups
* Input: $datatemp/fs_dtacr_2_exactmatch0_newsub.dta
* Output: outreg tables
*do "$dofile/fs_an_13.do"

* Estimate effect on Not Incarcerated with full-information sample and new subgroups
* Input: $datatemp/fs_dtacr_2_exactmatch0_newsub.dta
* Output: outreg tables
*do "$dofile/fs_an_14.do"

* Estimate mortality regressions with new subgroups
* Input: $datatemp/fs_dtacr_2_exactmatch0_newsub.dta
* Output: outreg tables
*do "$dofile/fs_an_15.do"

* Placebo tests
* Input: $datatemp/fs_dtacr_2_exactmatch0_newsub.dta
* Output: outreg tables
*do "$dofile/fs_an_16.do"

* Test balance of demographics
* Input: $datatemp/fs_dtacr_2_exactmatch0_newsub.dta
* Output: outreg tables
*do "$dofile/fs_an_17.do"

* Add interaction term of event-time with 1960 poverty rates
* First, do CENSUS outcomes
* Input: $datatemp/fs_dtacr_2_exactmatch0_newsub.dta
* Output: outreg tables
*do "$dofile/fs_an_18.do"

* Add interaction term of event-time with 1960 poverty rates
* Next, do INCARCERATION
* Input: $datatemp/fs_dtacr_2_exactmatch0_newsub.dta
* Output: outreg tables
*do "$dofile/fs_an_19.do"

* Add interaction term of event-time with 1960 poverty rates
* Finally, do MORTALITY outcomes
* Input: $datatemp/fs_dtacr_2_exactmatch0_newsub.dta
* Output: outreg tables
*do "$dofile/fs_an_20.do"

* Check simple correlation between outcomes and 1960 poverty rate
* Input: $datatemp/fs_dtacr_2_exactmatch0_newsub.dta
* Output: outreg tables
*do "$dofile/fs_an_22.do"

* Add interaction term of event-time with 1975 FSP caseload/pop
* This does both CENSUS and MORTALITY outcomes
* Input: $datatemp/fs_dtacr_2_exactmatch0_newsub.dta
* Output: outreg tables
*do "$dofile/fs_an_23.do"

* Add interaction term of event-time with indicator for highest quartile of 1975 FSP caseload/pop
* This does both CENSUS and MORTALITY outcomes
* Input: $datatemp/fs_dtacr_2_exactmatch0_newsub.dta
* Output: outreg tables
*do "$dofile/fs_an_24.do"

* Estimate separate event-time dummies for each quartile of 1975 FSP caseload/pop
* This does both CENSUS and MORTALITY outcomes
* Input: $datatemp/fs_dtacr_2_exactmatch0_newsub.dta
* Output: outreg tables
*do "$dofile/fs_an_25.do"

* Fully interacted model: Estimate separately for each quartile of 1975 FSP caseload/pop
* This does both CENSUS and MORTALITY outcomes
* Input: $datatemp/fs_dtacr_2_exactmatch0_newsub.dta
* Output: outreg tables
*do "$dofile/fs_an_26.do"

* Estimate effect on specific Census outcomes using new subgroups
* Input: $datatemp/fs_dtacr_2_exactmatch0_newsub.dta
* Output: outreg tables
*do "$dofile/fs_an_27.do"



/**** ANALYSIS -- EXPOSURE SPECIFICATIONS ******/

* Run HSA (exposure) specifications, with subgroups white/nonwhite/men/women
* Input: $datatemp/fs_hsacr_1_exactmatch0.dta
* Output: outreg tables
*do "$dofile/fs_hsa_an_1.do"

* Run HSA (exposure) specifications, index outcomes, with subgroups nm/nf/wm/wf
* Input: $datatemp/fs_hsacr_1_newsub_exactmatch0.dta
* Output: outreg tables
*do "$dofile/fs_hsa_an_2.do"

* Run HSA (exposure) specifications, incarceration as outcome, with subgroups nm/nf/wm/wf
* Input: $datatemp/fs_hsacr_1_newsub_exactmatch0.dta
* Output: outreg tables
*do "$dofile/fs_hsa_an_3.do"

* Run HSA (exposure) specifications, mortality outcomes, with subgroups nm/nf/wm/wf
* Input: $datatemp/fs_hsacr_1_newsub_exactmatch0.dta
* Output: outreg tables
*do "$dofile/fs_hsa_an_4.do"

* Run HSA (exposure) specifications with an interaction of exposure with 1960 poverty rate, CENSUS outcomes
* Input: $datatemp/fs_hsacr_1_newsub_exactmatch0.dta
* Output: outreg tables
*do "$dofile/fs_hsa_an_5.do"

* Run 1,000 replications of placebo HSA (exposure) specifications
* Input: $datatemp/fs_hsacr_1_newsub_exactmatch0.dta
* Output: outreg tables
*do "$dofile/fs_hsa_an_6.do"

* Run HSA (exposure) specifications with an interaction of exposure with 1960 poverty rate, MORTALITY outcomes
* Input: $datatemp/fs_hsacr_1_newsub_exactmatch0.dta
* Output: outreg tables
*do "$dofile/fs_hsa_an_7.do"

* Simple regressions of outcome variables on 1960 poverty rate
* This is run at Hilary's request as a check on the heterogeneity results
* Input: $datatemp/fs_hsacr_1_newsub_exactmatch0.dta
* Output: outreg tables
*do "$dofile/fs_hsa_an_8.do"

* Run HSA (exposure) specifications with an interaction of exposure with 1975 FSP caseload
* Input: $datatemp/fs_hsacr_1_newsub_exactmatch0.dta
* Output: outreg tables
*do "$dofile/fs_hsa_an_9.do"

* Run HSA (exposure) specifications with an interaction of exposure with quartiles of 1975 FSP caseload
* Input: $datatemp/fs_hsacr_1_newsub_exactmatch0.dta
* Output: outreg tables
*do "$dofile/fs_hsa_an_10.do"

* Full interaction of exposure with quartiles of 1975 FSP caseload
* Input: $datatemp/fs_hsacr_1_newsub_exactmatch0.dta
* Output: outreg tables
*do "$dofile/fs_hsa_an_11.do"

* Run HSA (exposure) specifications, specific outcomes, with subgroups nm/nf/wm/wf
* Input: $datatemp/fs_hsacr_1_newsub_exactmatch0.dta
* Output: outreg tables
*do "$dofile/fs_hsa_an_12.do"



*** DISCLOSURE STATS ***

* Get counts of mortality outcomes
*do "$dofile/mortcounts.do"

* For CDAR request: Get counts of raw observations in cells in omitted category
*do "$dofile/omitcount.do"



/***** GRAPHING
Note: For ease of examining results, 
we generally use the files that export graphs as PNG files. PNG files can be
paged through easily using Kview, while PDF files have to be opened (slowly)
one at at ime. However, "graph export" does not work with PNG files
in batch mode. They have to be run interactively instead.
To get a sense of how these files work, refer to graph_index_birthyr.do
The do-files listed below produce results for specific analysis do-files
**************/
*do "$dofile/graph_16.do"
*do "$dofile/graph_17.do"
*do "$dofile/graph_18.do"
*do "$dofile/graph_19.do"
*do "$dofile/graph_20.do"
*do "$dofile/graph_23.do"
*do "$dofile/graph_24.do"
*do "$dofile/graph_25.do"
*do "$dofile/graph_26.do"

* This do-files produces histograms of results for the permutation tests
*do "$dofile/graph_hsa_6.do"






/*
** Graphs used when grouping treatment over multiple birth years
* Graph mortality regressions
do "$dofile/graph_mortality.do"

* Graph index regressions
do "$dofile/graph_index.do"

* Graph specific outcome regressions
do "$dofile/graph_specific.do"
*/

** OLD CODE


* Estimate mortality regressions
* Input: $datatemp/fs_dtacr_2.dta
* Output: outreg tables
*do "$dofile/fs_an_1.do"
* NB: these are run in fs_an_3.do. No need to run them separately. 


/* Zip unneeded files
!gzip "$datatemp_basic/dtacr_22.dta"
!gzip "$datatemp_basic/dtacr_20.dta"
!gzip "$datatemp/fs_dtacr_2_exactmatch0.dta"
*/
*!gzip "$datatemp/dtacr_23_birthmo.dta"
*!gzip "$datatemp/dtacr_24_birthmo.dta"
!gzip "$datatemp/dtacr_25_birthmo_newsub.dta"
!gzip "$datatemp/fs_dtacr_2_exactmatch0_newsub.dta"
*!gzip "$datatemp/fs_dtacr_2_exactmatch0.dta"
*!gzip "$datatemp/fs_hsacr_1_newsub_exactmatch0.dta"
!gzip "$datatemp/dtacr_25_newsub.dta"
!gzip "$datatemp/dtacr_24_newsub.dta"
!gzip "$datatemp/dtacr_23_newsub.dta"
!gzip "$datatemp/fs_hsacr_1_newsub_exactmatch0.dta"

clear
exit
