/*******************************************************************************

************
** Function: 
************

  Creates the core interview sample used for some of the calibration targets and 
  regression analyses.

************
** Inputs  :
************
	
	- Cross-wave tracker file
	- Langa-Weir classification of cognitive function file
	- RAND HRS longitudinal file
	- RAND HRS family respondent-level file
	- RAND HRS family child-level file
	- CPI deflator
	- helpers_with_imputations.dta   (see HelperData_Run_All_Files.do)
	
************
** Outputs :
************
	
  - core_sample.dta	
	
	- TABA1_SAMPLE_COUNTS_1.txt
	
*******************************************************************************/

clear *
macro drop _all
set more off 

********************************************************************************
** Directory names
********************************************************************************

do GetDirNames

********************************************************************************
** Set up log for sample counts
********************************************************************************

cap log close
log using ${logs}/TABA1_SAMPLE_COUNTS_1.txt, text replace		
log off // turn off

********************************************************************************
** TRACKER
********************************************************************************

local VARS ///
	HHID PN ?SUBHH ?IWTYPE ?WGTR ?WGTRNH

use `VARS' using ${tracker}, clear

drop A* B* C* D* E*
// -> 1992, 1993, 1994, 1995, 1996

foreach var in IWTYPE SUBHH WGTR WGTRNH {
  cap rename F`var' `var'4 
  cap rename G`var' `var'5 
  cap rename H`var' `var'6 
  cap rename J`var' `var'7 
  cap rename K`var' `var'8 
  cap rename L`var' `var'9 
  cap rename M`var' `var'10 
  cap rename N`var' `var'11 
  cap rename O`var' `var'12 
}

reshape long SUBHH@ IWTYPE@ WGTR@ WGTRNH@, i(HHID PN) j(w)

lab def IWTYPE ///
1 "1.  Core interview obtained" ///
5 "5.  Core interview not obtained" ///
11 "11.  Exit interview obtained" ///
15 "15.  Exit interview not obtained" ///
21 "21.  Post-exit interview obtained" ///
25 "25.  Post-exit interview not obtained" ///
99 "99.  Not in the sample this wave"
lab val IWTYPE IWTYPE

lab def SUBHH ///
0 "0.  Original household" ///
1 "1.  Sub-household, split off from original" ///
2 "2.  Sub-household, split off from original" ///
3 "3.  Deceased respondent household" ///
4 "4.  Deceased respondent household" ///
5 "5.  Sub-household, split off a household that already split into a 1 and 2" ///
6 "6.  Sub-household, split off a household that already split into a 1 and 2" ///
7 "7.  Used when two respondents split and then recombine with each other" ///
8 "8.  Sub-household, split off a household that already split into a 1 and 2" ///
9 "9.  Not in the sample this wave or respondent not interviewed this wave"
destring SUBHH, gen(SUBHHnum)
lab val SUBHHnum SUBHH

save ${save}/core_tracker, replace 

********************************************************************************
** Langa-Weir classification of cognitive function (CORE)
********************************************************************************
/*
Provides a total summary score for cognition using measures from the core HRS 
interview as well as three derived categories: Normal, Cognitively Impaired but 
not Demented (CIND), and Demented. We refer to these as the Langa-Weir 
Classifications (Crimmins et al., 2011). HRS also makes use of proxy respondents 
to reduce sample attrition. Therefore, this dataset also includes scoring of 
cognition based on data from proxy respondent interviews.

HRS has two scales for cognitive function: a 35-point (ages 65+) and a 27-point 
scale (all respondents). Langa-Weir maps onto the 27-point scale (Variable: cogtot27_imp).
(For comparison, the 35-point scale is available in RAND HRS: r*cogtot.)

For proxy respondents, cognition cannot be directly measured. It is estimated 
using IADLs, proxy respondent’s assessments of respondent’s memory and cognition. 
Generate 11-point scale: 0-5 IADLS, memory 0 (very good) – 4 (poor), cognitive 
impairment 0 (no) – 2 (has CI). (11 = 5+4+2.) No CI question prior to 2000, so 
use 9-point scale in earlier waves (1995-1998). 
(Variable: prxyscore_imp.)

Summary cognition measure (Variable: cogfunction.)
                  Normal    CIND    Demented        
27-point scale     12-27     7-11     0-6
11-point scale      0-2      3-5      6-11
 9-point scale      0-2      3-4      5-9 

* CIND = Cognitively impaired but not demented.
*/

local VARSW ///
        hhid pn ///
				cogfunction* intrview* //cogtot27_imp* prxyscore_imp*


local VARSL ///
        cogfunction@ intrview@ //cogtot27_imp@ prxyscore_imp@


use `VARSW' using ${langaweir}, clear
drop *1995 *1996 *2014 *2016
// -> want 1998-2012 (waves 4-11)
qui reshape long `VARSL', i(hhid pn) j(year)

gen w = 4 + (year-1998)/2
drop year

drop if (intrview~=1)
// -> keep only cases that were interviewed (intrview==1)
drop intrview

lab def COGFUNC ///
1 "1. Normal" ///
2 "2. CI, Not Demented" ///
3 "3. Demented"
lab val cogfunction COGFUNC

tab cogfunction, m
// -> 1 missing case (wave 11)

renvars hhid pn, u

save ${save}/core_langaweir.dta, replace

********************************************************************************
** CORE: RAND LONGITUDINAL FILE 
********************************************************************************

local V1 hhidpn   rahhidpn ragender ///hacohort racohbyr  ///
         raeduc raracem rahispan rarelig  raestrat raehsamp  /// raedyrs  raevbrn   rarelig  ///
         r*proxy ///h*hhresp r*finr   r*famr   ///
         h*hhidc  r*wtresp r*wtr_nh r*wthh ///
		     r*wtcrnh r*iwstat r*iwbeg  r*iwend  ///r*isret     /// 
		     h*pickhh ///		 
		     r*mstat ///		 
         r*govmd ///r*hiltc  r*govva  r*higov  r*livsib r*beqany r*beq10k r*beq100  ///
         h*ahous  ///
		     h*afhous ///
				 h*ahoub  ///
		     h*atoth  ///h*ahoub  h*afhoub h*anethb          /// housing variables
         h*atotb  h*atotn  h*atotf ///h*achck  h*afchck ///
         ///h*aira      r*lifein ///
		     h*itot ///
		     r*agey_b h*cpl    h*child  s*agey_b s*gender   ///
         r*adla   r*iadlza r*shlt   ///
		     r*cogtot r*memry  r*memrye r*alzhe  r*demen  r*alzhee r*demene ///
		     r*nhmliv r*nrshom r*nrsnit ///r*nhmday ///
         r*walkr  r*dress  r*bath   r*eat    r*bed    r*toilt ///
         r*phone  r*meds   r*money  r*shop   r*meals  r*map   ///
         r*cendiv r*cenreg

local V2 h@hhidc  r@wtresp r@wtr_nh r@wthh ///
         r@wtcrnh r@iwstat r@iwbeg  r@iwend  ///r@isret   /// 
         r@govmd ///r@hiltc  r@govva  r@higov  r@livsib r@beqany r@beq10k r@beq100 /// 
         r@proxy ///h@hhresp r@finr   r@famr    /// 
		     h@pickhh ///
		     r@mstat ///
         h@ahous  ///
		     h@afhous ///
				 h@ahoub   ///
		     h@atoth ///h@ahoub  h@afhoub h@anethb          /// housing variables
         h@atotb  h@atotn h@atotf ///h@achck  h@afchck ///
         ///h@aira      r@lifein ///
		     h@itot ///
		     r@agey_b h@cpl    h@child  s@agey_b  s@gender ///
         r@adla   r@iadlza r@shlt   r@iadla   ///
		     r@cogtot r@memry  r@memrye r@alzhe  r@demen  r@alzhee r@demene ///
		     r@nhmliv r@nrshom r@nrsnit ///r@nhmday ///
         r@walkr  r@dress  r@bath   r@eat    r@bed    r@toilt ///
         r@phone  r@meds   r@money  r@shop   r@meals  r@map   ///
         r@cendiv r@cenreg
                 
use `V1' using $randhrs, clear
qui: reshape long `V2', i(hhidpn) j(w)

gen HHID      = substr(rahhidpn ,1,6)
gen PN        = substr(rahhidpn ,7,3)
gen SUBHH     = substr(hhhidc   ,7,1)   // extract sub-household id
sort HHID PN w

gen rwtall=rwtresp
replace rwtall=rwtr_nh if rnhmliv==1 // assign NH weights
// -> now provided by RAND (r*wtcrnh) 

* documenting inconsistencies in RAND HRS weights
summ rwtall rwtcrnh
summ rwtall rwtcrnh if !mi(rwtall,rwtcrnh)
summ rwt* rnhmliv if (rwtall != rwtcrnh)
tab w   if (rwtall != rwtcrnh)
tab rnhmliv if (rwtall != rwtcrnh)

by HHID PN: carryforward  rwtall, gen( cfrwtall  )

rename (rdress  rwalkr  rbath  reat   rbed   rtoilt  rmeals  rshop  rphone  rmeds rmoney)  ///
       ( dress   walkr   bath   eat    bed    toilt   meals   shop   phone   meds  money) 
// -> same variables names as for exit IW 

replace rmemry = 1 if ((ralzhe==1 | rdemen==1) & w>=10)
replace rmemry = 1 if ((ralzhe==0 & rdemen==0) & w>=10)
// -> In 2010 memry variable is replaced by Alzheimer and dementia indicators
replace rmemrye = 1 if ((ralzhee==1 | rdemene==1) & w>=10)
replace rmemrye = 1 if ((ralzhee==0 & rdemene==0) & w>=10)
// -> memrye is whether respondent has ever been diagnosed with a memory-related
//    disease
//    Update: using ralzhee and rdemene instead of ralzhe and rdemen.
recode rmemry (3 4 =.)

* Construction of disability index: totadl = adla + iadlza, where
* adla   = sum(BATHA,DRESSA,EATA,BEDA,WALKRA)
* iadlza = sum(PHONEA,MONEYA,MEDSA,SHOPA,MEALA)
* NOTE: RAND uses the "some difficulty" versions of the individual measures to construct radla.
*       e.g., RDRESSA 'R Some Diff-Dressing' instead of RDRESS 'R Diff-Dressing'.
*       QUESTION: Why don't we follow RAND's approach here?
* Both are taken from RAND. RAND also provides raw (I)ADLA scores. 
* 1-6 ADLs : dress, walkr, bath, eat, bed, toilet.
*            0=no, 1=yes, 2=can't do (RAND codes these to 'yes' in count), 9=don't do  
recode bath  (2 = 1) (9 = 1) 
recode dress (2 = 1) (9 = 1) 
recode eat   (2 = 1) (9 = 1)
recode bed   (2 = 1) (9 = 1)
recode walkr (2 = 1) (9 = 1)
recode toilt (2 = 1) (9 = 1)
* 1-5 IADLs: meals, shop, phone, meds, money.
recode meals (2 = 1) (9 = 1) 
recode shop  (2 = 1) (9 = 1) 
recode phone (2 = 1) (9 = 1) 
recode meds  (2 = 1) (9 = 1) (.z = 0) // .Z=Don't do/No if did
recode money (2 = 1) (9 = 1) 

* Get ADL and IADL indices
egen    adla  = rowtotal(dress walkr bath eat bed), missing
egen    iadlza= rowtotal(meals shop phone meds money), missing
egen    totadl= rowtotal(adla iadlza), missing 
// -> rowtotal assigns a zero if all values are missing; if "missing"
//    is specified it assigns a missing value if ALL values are missing. 

***************************************
** Sample counts for TABLE A1
***************************************

* resume logging
log on

unique HHID PN if (riwstat==1)
/*
Number of unique values of HHID PN is  37495
Number of records is  226563
*/
unique HHID if (riwstat==1)
/*
Number of unique values of HHID is  23373
Number of records is  226563
*/

keep if w>=4  // begin with year 1998

sort HHID PN w
order HHID PN w 

unique HHID PN // -> 37,495 unique individuals

unique HHID PN if (riwstat==1 & inrange(w,4,10))
/*
Number of unique values of HHID PN is  32973
Number of records is  136977
*/

unique HHID if (riwstat==1 & inrange(w,4,10))
/*
Number of unique values of HHID is  21211
Number of records is  136977
*/

* pause logging
log off

save ${save}/core_randhrs, replace 

********************************************************************************
** CORE: RAND Family File (R File)
********************************************************************************

* Respondent level data (includes those without kids).
* Data from core IWs only.
* Uniquely ID'd by HHID PN w.

local V1 hhidpn     rahhidpn    ///rlink                            ///
         ///inw*       h*ownhmkn   h*ownrhmkn h*ownrhmkf            ///
         h*nkid    h*ndau     /// h*educkmn  h*workftkn h*workptkn ///
		     h*lv10mikn h*resdkn ///r*hlpadlkn r*hlpiadlkn h*contkn
		     h*tcany h*tcamt h*fcany h*fcamt
		 
local V2 ///inw@       h@ownhmkn   h@ownrhmkn h@ownrhmkf            ///
         h@nkid    h@ndau   ///   h@educkmn  h@workftkn h@workptkn ///
		     h@lv10mikn h@resdkn ///r@hlpadlkn r@hlpiadlkn h@contkn    
		     h@tcany h@tcamt h@fcany h@fcamt

use `V1' using $randfamr, clear
qui: reshape long `V2', i(hhidpn) j(w)


gen HHID = substr(rahhidpn ,1,6)
gen PN   = substr(rahhidpn ,7,3)

keep if w>=4   // begin with year 1998

sort HHID PN w
order HHID PN w

unique HHID PN // -> 37,495 unique individuals
 
save ${save}/core_randfamr, replace 

********************************************************************************
** CORE: RAND Family File (K File)
********************************************************************************

loc V1 ///
	  hhidpn hhid pn kidid karel k*resd k*lv10mi k*inhp k*hlphrs 
		
loc V2 ///
	  k@resd k@lv10mi k@inhp k@hlphrs 
                                          
use `V1' using ${randfamk}, clear
drop kp* 
// -> drop kid partner variables
qui: reshape long `V2', i(hhidpn kidid) j(w)

keep if w>=4 // begin with year 1998
keep if w<= 10 // use only core data from 2010 and earlier

sort hhidpn w kidid

recode kresd (2=1) 
// =2 means resident away: can place this into resident category =1
by hhidpn w: egen hkresd = total(kresd),m

recode klv10mi (.l=1)
// .l = resident -> 1. within 10 miles
by hhidpn w: egen hklv10mi = total(klv10mi),m

replace khlphrs = 0 if (kinhp==0)
by hhidpn w: egen hkhlphrs = total(khlphrs),m

by hhidpn w: egen hkinhp = total(kinhp),m

by hhidpn w: keep if (_n==1)
drop kidid

keep hhidpn hhid pn w hk*

renvars hhid pn, u

save ${save}/core_randfamk, replace

********************************************************************************
** MERGE
********************************************************************************

use ${save}/core_tracker, clear
merge 1:1 HHID PN w using ${save}/core_randhrs
tab w _merge
drop if (_merge == 1)
drop _merge
merge 1:1 HHID PN w using ${save}/core_randfamr, nogen
merge 1:1 HHID PN w using ${save}/core_randfamk, nogen
merge m:1         w using ${save}/CPIdeflator  , nogen keep(1 3) keepusing(deflator10)
merge 1:1 HHID PN w using ${save}/core_langaweir, nogen keep(1 3)

renvars HHID PN, l

* add helper data with data for 1998-2010
merge 1:1 hhid pn w using ${save}/helpers_with_imputations.dta, nogen keep(1 3)

keep if (w <= 10)

unique hhid pn // -> 37,495

********************************************************************************
** Adjust nominal values
********************************************************************************

// convert nominal values to constant 2010 dollars
gen atotb10 = hatotb/deflator10 // net value of total wealth
gen atoth10 = hatoth/deflator10 // net value of housing wealth
gen ahous10 = hahous/deflator10 // value of primary residence (NOT net of mortgage)
gen ahoub10 = hahoub/deflator10 // value of secondary residence (NOT net of mortgage)
gen atotn10 = hatotn/deflator10 // net value of non-housing wealth
gen atotf10 = hatotf/deflator10 // net value of financial non-housing wealth
gen tcamt10 = htcamt/deflator10 // transfers to children
gen fcamt10 = hfcamt/deflator10 // transfers from children
gen hitot10 = hitot /deflator10 // household income

gen atotb10_1000s = atotb10 / 1000
gen atotf10_1000s = atotf10 / 1000
// -> in 1000s of 2010 dollars

xtile atotb10_quintiles = atotb10, nq(5)

********************************************************************************
** Find age of eldest in household
********************************************************************************

gen ageEldest = .
replace ageEldest = ragey_b if (ragey_b >= sagey_b & ragey_b <.) | (!mi(ragey_b) & mi(sagey_b))
replace ageEldest = sagey_b if (ragey_b  < sagey_b & sagey_b <.) | (!mi(sagey_b) & mi(ragey_b))
// -> pick oldest age in the household

gen ageEldestCat = recode(ageEldest,54,59,64,69,74,79,84,89,94) if inrange(ageEldest,50,94)
lab def ageEldestCat ///
  54 "50-54" 59 "55-59" ///
  64 "60-64" 69 "65-69" ///
  74 "70-74" 79 "75-79" ///
  84 "80-84" 89 "85-89" ///
  94 "90-94"
lab val ageEldestCat ageEldestCat

********************************************************************************
** Other demographics
********************************************************************************

gen age = ragey_b

gen sex = (ragender==2) if !mi(ragender)

recode raeduc (2/3=2), gen(educ) 
// -> GED = HS

********************************************************************************
** Home ownership indicator
********************************************************************************

recode hafhous (1/5=1) (6=0) (7/9=.m), gen(rownhm)
// -> indicator = 1 if own home, = 0 otherwise

********************************************************************************
** Child status indicator
********************************************************************************

recode hchild (0=0) (1/max=1), gen(child)
gen nochild = 1-child

sort hhid pn w
by hhid pn: egen everChild   = max(child)
by hhid pn: egen everNoChild = max(nochild)

********************************************************************************
** Sample counts for TABLE A1
********************************************************************************

* resume logging
log on

* Individuals
unique hhid pn                 // ->  37,495 unique individuals
unique hhid pn if (riwstat==1) // ->  32,973 unique individuals with data in core waves 4-10
count if (riwstat==1)          // -> 136,977 unique observations

* Households
unique hhid pn if (hpickhh==1)     // -> 24,100 unique households (with data in core waves 4-10)
                                   // (may be inaccurate if selected HH member changes between waves)
count if (hpickhh==1 & riwstat==1) // -> 93,385 unique observations

* Parents
unique hhid pn if (nochild==0) & (riwstat==1)                    // -> 30,181 unique ever parents
unique hhid pn if (everChild==1 & everNoChild==0) & (riwstat==1) // -> 29,835 unique always parents

* Childless
unique hhid pn if (nochild==1) & (riwstat==1)                    // ->  2,913 unique ever childless
unique hhid pn if (everChild==0 & everNoChild==1) & (riwstat==1) // ->  2,567 unique always childless

* Ambiguous cases
unique hhid pn if (everChild==1 & everNoChild==1) & (riwstat==1) // ->    346 both parent/childless
unique hhid pn if (everChild~=1 & everNoChild~=1) & (riwstat==1) // ->    225 neither parent/childless

di 29835 + 2567 + 346 + 225 // -> 32,973
di 30181 + 2913 - 346 + 225 // -> 32,973

* close log
cap log close

********************************************************************************
** Panel setup
********************************************************************************

sort hhidpn w
xtset hhidpn w

********************************************************************************
** Save
********************************************************************************

save ${save}/core_sample, replace

********************************************************************************
** Erase temporary files
********************************************************************************

cap erase ${save}/core_tracker.dta
cap erase ${save}/core_langaweir.dta
cap erase ${save}/core_randhrs.dta
cap erase ${save}/core_randfamr.dta
cap erase ${save}/core_randfamk.dta

********************************************************************************
