* Data cleaning -- put all files to construct the project data in one place

* relies on having dtacr_20.dta and dtacr_22.dta

* August 25, 2021
* Kate Moulton

version 14.0 
clear all
set more off, perm

* Global macros
global datap "/projects/data"		// project-wide data 
global datatemp "/projects/programs/foodstamps/master_folder/datatemp" // foodstamps project data
global datatemp_basic "/projects/programs/foodstamps/master_folder/basicprocessing/datatemp" // basic processing data
global dofile "/projects/programs/foodstamps/master_folder/foodstamps_build" // do files to construct foodstamps data
global output "/projects/programs/foodstamps/master_folder/foodstamps_build/logs" // output

******* UNZIP FILES
/*
*!gunzip "$datatemp_basic/dtacr_20.dta.gz"
*!gunzip "$datatemp_basic/dtacr_22.dta.gz"
!gunzip "$datatemp/fs_dtacr_2_exactmatch0_newsub.dta.gz"
!gunzip "$datatemp/dtacr_23_newsub.dta.gz"
!gunzip "$datatemp/dtacr_24_newsub.dta.gz"
*!gunzip "$datatemp/dtacr_25_newsub.dta.gz"
*!gunzip "$datatemp/dtacr_23_birthmo_newsub.dta.gz"
*!gunzip "$datatemp/dtacr_24_birthmo_newsub.dta.gz"
!gunzip "$datatemp/dtacr_25_birthmo_newsub.dta.gz"
!gunzip "$datatemp/fs_hsacr_1_newsub_exactmatch0.dta.gz"
*/

/***** DATA PROCESSING *******/

/* Might want to re-do basic processing (dtacr_23 and dtacr_25) after deciding 
how to handle missing and allocated values. Not doing anything right now. */

*** First, collapse raw data by birth year for main analysis ***
/* Limit sample, create indices, and collapse census data			*/
/* Input: $datatemp_basic/dtacr_20						*/
/*	  $datap/chetty_data_040816.dta						*/
/* Output: $datatemp/dtacr_23_newsub						*/
do "$dofile/dtacr_23_newsubgroups.do"

/* Collapse NUMIDENT data							*/
/* Input: $datatemp_basic/dtacr_22						*/
/* Output: $datatemp/dtacr_24_newsub						*/
do "$dofile/dtacr_24_newsubgroups.do"

/* Combine census and NUMIDENT collapsed data					*/
/* Input: $datatemp/dtacr_23, dtacr_24						*/
/* Output: $datatemp/dtacr_25_newsub						*/
do "$dofile/dtacr_25_newsubgroups.do"


*** Next, collapse raw data separately by birth month for "exposure" specifications ***
/* Limit sample, create indices, and collapse census data at BIRTH MONTH level	*/
/* Input: $datatemp_basic/dtacr_20						*/
/*	  $datap/chetty_data_040816.dta						*/
/* Output: $datatemp/dtacr_23_birthmo_newsub					*/
do "$dofile/dtacr_23_birthmo_newsubgroups.do"

/* Collapse NUMIDENT data at BIRTH MONTH level					*/
/* Input: $datatemp_basic/dtacr_22						*/
/* Output: $datatemp/dtacr_24_birthmo_newsub					*/
do "$dofile/dtacr_24_birthmo_newsubgroups.do"

/* Combine census and NUMIDENT collapsed data					*/
/* Input: $datatemp/dtacr_23, dtacr_24						*/
/* Output: $datatemp/dtacr_25_birthmo_newsub					*/
do "$dofile/dtacr_25_birthmo_newsubgroups.do"

*** Collapse raw data by birth year for main analysis -- but standardize indices using full-sample mean and SD ***
/*
* Limit sample, create indices, and collapse census data			
* Input: $datatemp_basic/dtacr_20						
*	  $datap/chetty_data_040816.dta						
* Output: $datatemp/dtacr_23_newsub						
do "$dofile/dtacr_23_newsubgroups_201906.do"

* Collapse NUMIDENT data							
* Input: $datatemp_basic/dtacr_22						
* Output: $datatemp/dtacr_24_newsub						
do "$dofile/dtacr_24_newsubgroups_201906.do"

* Combine census and NUMIDENT collapsed data					
* Input: $datatemp/dtacr_23, dtacr_24						
* Output: $datatemp/dtacr_25_newsub						
do "$dofile/dtacr_25_newsubgroups_201906.do"
*/

*** Collapse raw data separately by birth month for "exposure" specifications --
*but standardize indices using full-sample mean and SD ***

/* Limit sample, create indices, and collapse census data at BIRTH MONTH level	
* Input: $datatemp_basic/dtacr_20						
*	  $datap/chetty_data_040816.dta						
* Output: $datatemp/dtacr_23_birthmo_newsub					
do "$dofile/dtacr_23_birthmo_newsubgroups_201906.do"

* Collapse NUMIDENT data at BIRTH MONTH level					
* Input: $datatemp_basic/dtacr_22						
* Output: $datatemp/dtacr_24_birthmo_newsub					
do "$dofile/dtacr_24_birthmo_newsubgroups_201906.do"

* Combine census and NUMIDENT collapsed data					
* Input: $datatemp/dtacr_23, dtacr_24						
* Output: $datatemp/dtacr_25_birthmo_newsub					
do "$dofile/dtacr_25_birthmo_newsubgroups_201906.do"
*/

*** Collapse raw data separately by birth month for "exposure" specifications --
*with a new outcome
/* Limit sample, create indices, and collapse census data at BIRTH MONTH level	*/
/* Input: $datatemp_basic/dtacr_20						*/
/*	  $datap/chetty_data_040816.dta						*/
/* Output: $datatemp/dtacr_23_birthmo_newsub					*/
*do "$dofile/dtacr_23_birthmo_newsubgroups_withCollege.do"

/* Collapse NUMIDENT data at BIRTH MONTH level					*/
/* Input: $datatemp_basic/dtacr_22						*/
/* Output: $datatemp/dtacr_24_birthmo_newsub					*/
*do "$dofile/dtacr_24_birthmo_newsubgroups.do"

/* Combine census and NUMIDENT collapsed data					*/
/* Input: $datatemp/dtacr_23, dtacr_24						*/
/* Output: $datatemp/dtacr_25_birthmo_newsub					*/
*do "$dofile/dtacr_25_birthmo_newsubgroups_withCollege.do"


* Process treatment data 					
* Input: "$datap/countyLevel/FSP_startdate.dta, $datap/countyLevel/reis_income.dta, 
*	$datap/countyLevel/cityctydb60.dta 				
* Output: $datatemp/fs_dtacr_1.dta" 							
do "$dofile/fs_dtacr_1.do"

* Combine collapsed census and NUMIDENT data with treatment data 				
* Input: $datatemp_basic/dtacr_25.dta, $datatemp/fs_dtacr_1.dta			
* Output: $datatemp/fs_dtacr_2_exactmatch`m'_newsub				
do "$dofile/fs_dtacr_2_newsubgroups.do"						

* Combine collapsed census and NUMIDENT data with treatment data, BIRTHMO version
* Input: $datatemp_basic/dtacr_25_birthmo_newsub.dta, $datatemp/fs_dtacr_1.dta	
* Output: $datatemp/fs_dtacr_2_newsub_exactmatch`m'					
do "$dofile/fs_hsacr_1_newsubgroups.do"	
				


/*
* Combine collapsed census and NUMIDENT data with treatment data 		
* Using data with updated z-variables, standardized using full sample				
* Input: $datatemp_basic/dtacr_25.dta, $datatemp/fs_dtacr_1.dta			
* Output: $datatemp/fs_dtacr_2_exactmatch`m'_newsub_201906			
do "$dofile/fs_dtacr_2_newsubgroups_201906.do"	

* Combine collapsed census and NUMIDENT data with treatment data, BIRTHMO version
* Using data with updated z-variables, standardized using full sample				
* Input: $datatemp_basic/dtacr_25_birthmo_newsub.dta, $datatemp/fs_dtacr_1.dta	
* Output: $datatemp/fs_dtacr_2_newsub_exactmatch`m'_20106				
do "$dofile/fs_hsacr_1_newsubgroups_201906.do"	
*/

* Combine collapsed census and NUMIDENT data with treatment data, BIRTHMO version
* but with new outcomes for RESTUD revision
* Input: $datatemp_basic/dtacr_25_birthmo_newsub.dta, $datatemp/fs_dtacr_1.dta	
* Output: $datatemp/fs_dtacr_2_newsub_exactmatch`m'					
*do "$dofile/fs_hsacr_1_newsubgroups_withCollege.do"

/* Zip unneeded files
!gzip "$datatemp_basic/dtacr_22.dta"
!gzip "$datatemp_basic/dtacr_20.dta"
!gzip "$datatemp/fs_dtacr_2_exactmatch0.dta"
*!gzip "$datatemp/dtacr_23_birthmo.dta"
*!gzip "$datatemp/dtacr_24_birthmo.dta"
!gzip "$datatemp/dtacr_25_birthmo_newsub.dta"
!gzip "$datatemp/fs_dtacr_2_exactmatch0_newsub.dta"
*!gzip "$datatemp/fs_dtacr_2_exactmatch0.dta"
*!gzip "$datatemp/fs_hsacr_1_newsub_exactmatch0.dta"
!gzip "$datatemp/dtacr_25_newsub.dta"
!gzip "$datatemp/dtacr_24_newsub.dta"
!gzip "$datatemp/dtacr_23_newsub.dta"
!gzip "$datatemp/fs_hsacr_1_newsub_exactmatch0.dta"
*/

clear
exit
