/*******************************************************************************

************
** Function: 
************

  Creates data extract for decedent sample.

************
** Notes   : 
************

       Sample of respondents (Rs) that have exit interviews during the time 
	   period 2000-2012 (waves 5-11). Of these, we only use data for individuals with exit 
	   interviews in the 2004-2012 (waves 7-11) period.
		 
       For these Rs combine data from exit interviews with data from core 
       interviews 1998-2010 (waves 4-10). 

************
** Inputs  : 
************
			 
			    - HRS Cross-wave tracker file
			 
Exit IWs: - Coverscreen (A)
          - Preload (PR)
          - Will and life insurance (T)
		      - Health services and insurance (N)		  
          - Functional limitations and helpers (G)
					- Cognition (D)
          
Core IWs: - Coverscreen (A)
          - Functional limitations and helpers (G)
		      - Family structure and transfers (E)

RAND HRS: - randhrs1992_2014v2      (respondent level)
          - randhrsfamr1992_2014v1  (respondent level, with summary info on children)
          - randhrsfamk1992_2014v1  (respondent-child level, we collapse to respondent level)       

RAND FAT FILES: 1998-2010
		  
OOPME FILE: Out-of-pocket medical expenditures from Fahle, McGarry, and Skinner (2016)
            Fiscal Studies.			
		  
************
** Output  : 
************

  - decedent_sample.dta
	- randkids.dta         (required in Decedents_Sample_Supplement.do)
	
	- TABA2a_SAMPLE_COUNTS_1.txt

*******************************************************************************/

clear *
macro drop _all
set more off 

********************************************************************************
** Directory names
********************************************************************************

do GetDirNames

********************************************************************************
** TRACKER
********************************************************************************

local VARS ///
	HHID PN ?IWTYPE BIRTHMO BIRTHYR KNOWNDECEASEDMO KNOWNDECEASEDYR

use `VARS' using ${tracker}, clear

drop AIWTYPE BIWTYPE CIWTYPE DIWTYPE EIWTYPE
// -> 1992, 1993, 1994, 1995, 1996

foreach var in IWTYPE {
  cap rename F`var' `var'4 
  cap rename G`var' `var'5 
  cap rename H`var' `var'6 
  cap rename J`var' `var'7 
  cap rename K`var' `var'8 
  cap rename L`var' `var'9 
  cap rename M`var' `var'10 
  cap rename N`var' `var'11 
  cap rename O`var' `var'12
	cap rename P`var' `var'13
}

reshape long IWTYPE@, i(HHID PN) j(w)

lab def IWTYPE ///
1 "1.  Core interview obtained" ///
5 "5.  Core interview not obtained" ///
11 "11.  Exit interview obtained" ///
15 "15.  Exit interview not obtained" ///
21 "21.  Post-exit interview obtained" ///
25 "25.  Post-exit interview not obtained" ///
99 "99.  Not in the sample this wave"
lab val IWTYPE IWTYPE

renvars, l

* birthdate
replace birthmo=. if inlist(birthmo,99,98)     // set DK values to missing 
replace birthyr=. if inlist(birthyr,9998,9999) // set DK values to missing 
gen birthdate = mdy(birthmo,15,birthyr)		    
// date of birth in Stata format, (days since 1 Jan 1960). (15 is the assumed day)
format birthdate %td				             
label var birthdate "DATE OF BIRTH"

* deathdate
replace knowndeceasedmo=. if inlist(knowndeceasedmo,99,98)     // set DK values to missing 
replace knowndeceasedyr=. if inlist(knowndeceasedyr,9998,9999) // set DK values to missing 
gen deathdate = mdy(knowndeceasedmo,15,knowndeceasedyr)		    
// date of death in Stata format, (days since 1 Jan 1960). (15 is the assumed day)
format deathdate %td				             
label var deathdate "DATE OF DEATH (KNOWNDECEASEDMO,YR)"

gen xage = floor( (deathdate - birthdate) / 365 )

keep hhid pn w xage deathdate birthdate

renvars xage deathdate birthdate, postfix(trk)

renvars hhid pn, u

save ${save}/tracker, replace 

********************************************************************************
** COVERSCREEN (A) 
********************************************************************************

** Variables
*   A019:   Age
*   A099:   Number of coresident kids
*   A101:   Number of kids 
*   A103:   Proxy relationship to respondent
*   A106:   Count of contact children
*   A121:   Month of death
*   A123:   Year of death
*   A124:   Location of death  		
*   A126M:  State of death (masked) = census division
*   A028:   (for waves 02,10,12) Respondent living in a nursing home/facility: 
*           1 means Yes
*           2 means Hospice
*           5 means No
*   A167:   (for waves 04,06,08) Respondent living in a nursing home/facility:   
*   A066:   Year R moved to nursing home
*   A070:   Whether R still owned or rent a home outside of facility
*   B063:   (section demographics 2002-2012) married, divorced, etc.
*   A038:   married, partnered, etc.
** 2004-2012
foreach wave in x12 x10 x08 x06 x04 {
     use HHID PN  ?A019 ?A099 ?A101 ?A103 ?A106 ?A121 ?A123 ?A124 ?A126M ?A065 ?A066 ?A070 ?A071 ///
      using ${hrsexit}/`wave'A_R, clear
     merge 1:1 HHID PN using ${hrsexit}/`wave'B_R, nogen keepusing(?B063)    	
 renvars ?A*, predrop(1)
 renvars ?B*, predrop(1)
 gen xIW=1
 save ${save}/`wave', replace
 }              
 				// use coverscreen to find ID and death date.
				// in exit interview, there is the year of
			    // the last interview, but info on month is
			    // confusing (SA114:PREV WAVE IW YEAR). So 
			    // rather find that after merging with past data
				
** 2002: treat separately, some variable names differ 
use HHID PN SA038 SA101 SA019 SA103 SA121 SA123 SA065 SA066 SA070 SA071 using ${hrsexit}/x02A_R, clear
renvars ?A*, predrop(1)
//rename A038 B063 // -> No. This is incorrect. Coding is different.
gen xIW=1
save ${save}/x02, replace

** 2000 (get coupleness only)
use HHID PN R597 using ${hrsexit}/x00CS_R, clear
lab def R597 1 "1. PART OF A MARRIED COUPLE" ///
             3 "3. PART OF A PARTNERED COUPLE" ///
						 6 "6. NOT PART OF A COUPLE"
lab val R597 R597						 
gen xIW=1
save ${save}/x00, replace

** add A028, R is NHR at time of death (available 2002,2010-2014)
foreach wave in x12 x10 x02 {
    use HHID PN ?A028 using ${hrsexit}/`wave'A_R, clear
    merge 1:1 HHID PN using ${save}/`wave', nogen
renvars ?A*, predrop(1)
save ${save}/`wave', replace
 }      
** add A167, R is NHR at time of death (available 2004-2008)
foreach wave in x08 x06 x04 {
    use HHID PN ?A167 using ${hrsexit}/`wave'A_R, clear
    merge 1:1 HHID PN using ${save}/`wave', nogen
renvars ?A*, predrop(1)
rename A167 A028
save ${save}/`wave', replace
 }       

** add A038, CURRENT COUPLENESS (available 2004-2010)
foreach wave in x10 x08 x06 x04 {
    use HHID PN ?A038 using ${hrsexit}/`wave'A_R, clear
    merge 1:1 HHID PN using ${save}/`wave', nogen
renvars ?A*, predrop(1)
save ${save}/`wave', replace
 } 

use ${save}/x12, clear
append using ${save}/x10 ///
             ${save}/x08 ///
						 ${save}/x06 ///
						 ${save}/x04 ///
						 ${save}/x02 ///
						 ${save}/x00, gen(w)
replace w=11-w

unique HHID PN

sort HHID PN w
order HHID PN w

// A121 YEAR of death,  MONTH is A123
replace A121=. if inlist(A121,99,98)     // set DK values to missing 
replace A123=. if inlist(A123,9998,9999) // set DK values to missing 			    
gen deathdate = mdy(A121, 15, A123)      // date of death in Stata format
format deathdate %td				     // (days since 1 Jan 1960).
label var deathdate "DATE OF DEATH"
drop A121 A123
// A065 YEAR of NH move,  MONTH is A066 
replace A065=. if inlist(A065,99,98)     // set DK values to missing 
replace A066=. if inlist(A066,9998,9999) // set DK values to missing 
gen NHdate = mdy(A065, 15, A066)  // date when R moved to NH in Stata format
format NHdate %td				   // (days since 1 Jan 1960).
label var NHdate "DATE MOVED TO NH PRIOR TO DEATH"
drop A065 A066

save ${save}/coverscreen, replace

foreach wave in x12 x10 x08 x06 x04 x02 x00 {
erase ${save}/`wave'.dta
}

********************************************************************************
** PRELOAD (PR) 
********************************************************************************

/*
Z039:   previous wave IW year was financial R
Z093:   previous wave IW year
Z132:   (2002) whether R owned a home in HH's last interview 
Z079:   (from 2004 on) whether R owned a home in HH's last interview 
            -> don't use, bad data in 2002! instead use directly from core IWs
X060:   R gender
X065:   Whether R was part of couple at time of death
*/

foreach wave in x12 x10 x08 x06 x04 {
   use HHID PN ?Z039 ?Z079 ?X060_R ?X065_R using ${hrsexit}/`wave'PR_R, clear   	
renvars ?Z*, predrop(1)
renvars ?X*, predrop(1)
save ${save}/`wave', replace
}
** 2002: treat separately, some variable names differ 
use HHID PN SZ132 SX060_R SX065_R using ${hrsexit}/x02PR_R, clear  // open exit records from 2002: coverscreen
rename SZ132 Z079
rename SX060 X060
rename SX065 X065
save ${save}/x02, replace

use ${save}/x12, clear
append using ${save}/x10 ///
             ${save}/x08 ///
						 ${save}/x06 ///
						 ${save}/x04 ///
						 ${save}/x02, gen(w) 
replace w=11-w

unique HHID PN

sort HHID PN w
order HHID PN w

save ${save}/preload, replace

foreach wave in x12 x10 x08 x06 x04 x02 {
erase ${save}/`wave'.dta
}

********************************************************************************
** WILLS AND LIFE INSURANCE (T)
********************************************************************************

/*                  
T101:    own home at death, did R still own home when died? 
         missing means R did not own home as of previous IW
T066:    If T101=2,5, proxy is asked whether R ever owned the home
T102:    owned home prior to death, what happened to home (gave away, sold, etc.)?
T104M1:  owned home prior to death, who got home (spouse, child, etc.)?
T111:    owned home at death, what happened to home (spouse, sold, inherited, etc.)?
T113:    if home inherited: who inherited home?
T116:    value of home (either sold or hypothetically "if had been sold")
T117:    lower bound on home value
T118:    upper bound on home value
T119:    if 98=DK, if 99=RF on sequencing
 ** 
T155:    R put assets in trust
T156:    R has a will
T157:    R's will probated
T161:    if R has neither a trust nor a probated will: 
         What happened to R's assets and possessions? 
 ** If R has a trust, a probated will, or T161 has determined that R left 
    assets/possesions without having a trust or a probabted will, then 
    specific questions about the identity of the recipients and the amount 
    left behind to the various recipients are asked:                             
T164:    Assets left to children? 
T165:    Did R's will provide equally for all children? (if R had more than 1 kid) 
T173:    Total value of estate (excluding life insurance)
T174:    Lower bound on total value of estate
T175:    Upper bound on total value of estate
T176:    if 98=DK, if 99=RF on sequencing
T242:    Whether value of estate includes value of home
ECBHOME: Whether value of estate includes value of home (only for 2002)
T9012:   Amount left to children (excluding life insurance)
T9013:   Percent of total estate left to children (excluding life insurance)

  ** Life insurance was so far excluded and is now asked about:
T181:    Asked all proxies: did anyone receive a settlement from life insurance? 
T182M1:  if yes: who were the beneficiaries of that life insurance?
T186:    Altogether, what was the value of the life insurance settlement?
T187:    Lower bound on life insurance settlement
T188:    Upper bound on life insurance settlement
T189:    if 98=DK, if 99=RF on sequencing
*/

foreach wave in x12 x10 x08 x06 x04 {
   use HHID PN ?T066 ?T101 ?T102 ?T104M? ?T111 ?T113M? ?T116 ?T117 ?T118 ?T119 /// 
               ?T155 ?T156 ?T157 ?T161 	   ///
               ?T163       /// ?T900?          ///
               ?T164 ?T165 /// ?T901? ?T902?_* ///
			   ?T168 /// ?T903? ///				
			   ?T169 /// ?T904? ///				
		  	   ?T170 /// ?T905? ///				
			   ?T171 /// ?T906? ///				
               ?T173 ?T174 ?T175 ?T176 ///                      
               ?T242  ?T181 ?T182M1 ?T186 ?T187 ?T188 ?T189 using ${hrsexit}/`wave'T_R, clear   	
renvars ?T*, predrop(1)
save ${save}/`wave', replace
}
** 2002: treat separately, T066 not available in this wave; also need additional variables
use HHID PN ST101 ST102 ST104M1 ST111 ST113M1 ST116 ST117 ST118 ST119 /// ST9012 ///
ECBFLAG ECBPATH ECBHOME using ${hrsexit}/x02T_R, clear  // open exit records from 2002: coverscreen
renvars ?T*, predrop(1)
save ${save}/x02, replace

use ${save}/x12, clear
append using ${save}/x10 ///
             ${save}/x08 ///
						 ${save}/x06 ///
						 ${save}/x04 ///
						 ${save}/x02, gen(w) 
replace w=11-w

unique HHID PN

sort HHID PN w
order HHID PN w


save ${save}/willsandli, replace

foreach wave in x12 x10 x08 x06 x04 x02 {
erase ${save}/`wave'.dta
}

********************************************************************************
** HEALTH SERVICES AND INSURANCE (N)
********************************************************************************

** Medicaid coverage since previous wave (N005)

foreach wave in x12 x10 x08 x06 x04 {
  use HHID PN ?N005 ?N114 using ${hrsexit}/`wave'N_R, clear   	
  renvars ?N???, predrop(1)
  save ${save}/`wave', replace
}

use ${save}/x12, clear
append using ${save}/x10 ///
             ${save}/x08 ///
						 ${save}/x06 ///
						 ${save}/x04, gen(w) 
replace w=11-w

save ${save}/section_n, replace

foreach wave in x12 x10 x08 x06 x04 {
erase ${save}/`wave'.dta
}

********************************************************************************
** FUNCTIONAL LIMITATIONS AND HELPERS (G)
** Cognition (D) 
********************************************************************************
 
** 2002-2012: 
// ADDED: memory variable for exit IWs. Proxy is asked to rate decedent's
//        memory and ability to think about thinks one month before death
// D510: 1=excellent
//       2=very good
//       3=good
//       4=fair
//       5=poor
//       8,9=DK,RF

   foreach wave in x12 x10 x08 x06 x04 x02 {
       use HHID PN ?D501  using ${hrsexit}/`wave'D_R, clear   	
       renvars ?D* , predrop(1)
       save ${save}/`wave', replace
 }  
use ${save}/x12, clear
append using ${save}/x10 ${save}/x08 ${save}/x06 ${save}/x04 ${save}/x02, gen(w) 
replace w=11-w
unique HHID PN
sort HHID PN w
order HHID PN w

recode D501 (1/3=0) (4/5=1) (8/9=.d), gen(xrmemrye)
// -> same variable name as for core IWs. Suggestion: 1-3 means no memory issues
//    4 and 5 means R has memory issues 

save ${save}/xmemry, replace
foreach wave in x12 x10 x08 x06 x04 x02 {
erase ${save}/`wave'.dta
}
// -> this is in SECTION D "Cognition" and there is no such question in SECTION G
//    which asks about functional limitations and helpers. In the core IW the
//    memory question is in the section which asks about functional limitations and helpers. 

 
** 2002-2012
   foreach wave in x12 x10 x08 x06 x04 x02 {
       use HHID PN ?G015 ?G020 ?G022 ?G024 ?G029 ?G031 ?G033_* ///
                   ?G043 ?G046 ?G049 ?G053             ?G055_1 ///
                   ?G061 ?G129  using ${hrsexit}/`wave'G_R, clear   	
       renvars ?G* , predrop(1)
       save ${save}/`wave', replace
 }  
use ${save}/x12, clear
append using ${save}/x10 ${save}/x08 ${save}/x06 ${save}/x04 ${save}/x02, gen(w) 
replace w=11-w
unique HHID PN
sort HHID PN w
order HHID PN w

merge 1:1 HHID PN w using ${save}/coverscreen, keepusing(A028)
// -> add nursing home status from coverscreen, need this information below
//    (coverscreen data needs to be generated above)
drop _m
merge 1:1 HHID PN w using ${save}/xmemry, keepusing(xrmemrye)
// -> add memory indicator from section "cognition"
drop _m

* NOTE:
* The I/ADL questions in the exit interview correspond to 'help' with activities
* rather than 'difficulty' with activities, which are the variables we use from the core IWs. 
* For example, the wording for the walk question is: 'Because of a health or 
* memory problem did anyone help [her/him ] get across a room (in the last three 
* months of [her/his] life)?' in the exit interviews. The corresponding core variable
* that we use asks 'Because of a health or memory problem do you have any difficulty 
* with walking across a room?'

* Need to recode ADLs and IADLs into binary variables
* ADLs                                        Functional limitation:
recode G015  (5 = 0) ( 6 7 = 1 ) (8 9 = .)    // dress
recode G020  (5 = 0) ( 6 7 = 1 ) (8 9 = .)    // walk
recode G022  (5 = 0) ( 6 7 = 1 ) (8 9 = .)    // bathe
recode G024  (5 = 0) ( 6 7 = 1 ) (8 9 = .)    // eat
recode G029  (5 = 0) ( 6 7 = 1 ) (8 9 = .)    // bed
recode G031  (5 = 0) ( 6 7 = 1 ) (8 9 = .)    // toilt
* IADLs
recode G043  (5 = 0) ( 6 7 = 1 ) (8 9 = .)    // meal
recode G046  (5 = 0) ( 6 7 = 1 ) (8 9 = .)    // shop
recode G049  (5 = 0) ( 6 7 = 1 ) (8 9 = .)    // phone
recode G053  (5 = 0) ( 6 7 = 1 ) (8 9 = .)    // medicine
recode G061  (5 = 0) ( 6 7 = 1 ) (8 9 = .)    // money
// -> question regarding use of map not available in exit IW
// 1.  YES
// 5.  NO
// 6.  COULD NOT DO
// 7.  DID NOT DO

tab G043 if A028<5,m  // A028 shows NH status:  5=no, 1=yes, 2=hospice
tab G046 if A028<5,m
tab G053 if A028<5,m
// -> NHRs are NOT asked about help with "meal", "shop", and "medicine"  
tab G049 if A028<5,m
tab G061 if A028<5,m
// -> NHRs are asked about "phone" and "money"
// Suggestion: set "meal", "shop", "medicine" equal to 1 for NHR 
replace G043=1 if A028<5  
replace G046=1 if A028<5
replace G053=1 if A028<5
// -> This becomes important when we impute NH hours based on the number of
//    functional limitations as otherwise we would underestimate the required hours

* Rename (I)ADL names
rename (G015    G020    G022   G024   G029   G031    G043    G046   G049    G053   G061) ///
       (xdress  xwalkr  xbath  xeat   xbed   xtoilt  xmeals  xshop  xphone  xmeds  xmoney)  


* G129: days in bed over last three months before death
* Coding varies: 
* 2004-2010 0-100, 993 everyday, 998 dk, 999 rf
* 2002      0-90 , 93  everyday, 98  dk  99  rf
* NOTE: the survey uses G129 < 86 as an ask criterion for ADLs!
tab w G129 if G129>=86,m
gen nrdaysBed = G129
recode nrdaysBed (86/100 = 90) (993=90) (998=.d) (999=.d) if (w >6)
recode nrdaysBed (86/90  = 90) ( 93=90) ( 98=.d) ( 99=.d) if (w==6)
// nrdaysBed: number of days R stayed in bed more than half the day because of
//            illness or injury during last three months before death.
//            90 means that R stayed in bed every day
//            .d means that proxy reporter "does not know"/"refuses" to answer
foreach ADL in xdress xwalkr xbath xeat xbed xtoilt {
tab `ADL' if nrdaysBed==90,m
}
// -> If R stayed in bed for more than 85 days because of illness or injury
//    questions regarding ADLs are skipped. 
//    But, the HRS still asks about who most often helps R with 
//    dressing, bathing, eating, getting in and out of bed, using the toilet, 
//    and getting across a room.
foreach ADL in xdress xwalkr xbath xeat xbed xtoilt {
tab `ADL' if nrdaysBed==.d,m
}
// -> Those who answered .d are still asked.
// Suggestion: set ADLs equal to 1 for those with nrdaysBed=90
foreach ADL in xdress xwalkr xbath xeat xbed xtoilt {
replace `ADL'= 1 if nrdaysBed==90 
}

foreach IADL in xmeals  xshop  xphone  xmeds  xmoney {
tab `IADL' if nrdaysBed==90,m
}
// ->  But, the HRS does ask about IADLs even for those who stayed in bed for 
//     more than 86 days. 


* Construction of disability index: totadl = adla + iadlza, where
* adla   = sum(BATHA,DRESSA,EATA,BEDA,WALKRA)
* iadlza = sum(PHONEA,MONEYA,MEDSA,SHOPA,MEALA)
egen    xadla  = rowtotal(xdress xwalkr xbath xeat xbed), missing
egen    xiadlza= rowtotal(xmeals xshop xphone xmeds xmoney), missing
egen    xtotadl= rowtotal(xadla xiadlza), missing 
// -> rowtotal assigns a zero if all values are missing; if "missing"
//    is specified it assigns a missing value ONLY if ALL values are missing. 

save ${save}/fctlimits, replace

foreach wave in x12 x10 x08 x06 x04 x02 {
erase ${save}/`wave'.dta
}
erase ${save}/xmemry.dta

********************************************************************************
** RAND FAT FILE 
********************************************************************************
// NOTE: RAND fat files are on a wave by wave basis just like the original HRS
//       data but they include all interview modules, except helper-level files
//       and some variables from the MC module, in a single data set. This makes 
//       it easier to quickly include additional variables of interest. 

** Variables
* A028 nursing home resident
* A030 is couple living together (either in NH facility or community)
* A070 still own/rent home outside of nursing facility
* A099 number of resident children
* A102 count of move in/out
* X033 move since previous wave
* X065 couple status
* Z080 previous wave marital status
* R077 bad record home
* H001 live on farm or ranch
* H002 type of home
* H004 own/rent home
* H008 own/rent farm or ranch
* H014 own/rent mobile home
* H088 R's home owned by relative
* H089 R's relationship to that relative
* H091 Relative owns home -- which child
* H092 R ever owned that home (the one owned by relative)
* E015     children own home 
* E016M0*  children OPNs who own a home
* E073     children got deed
* E074M*   children OPNs who got deed

* NOTE: for years 2000 and 1998 the following variables are extremely convoluted
*       to work with:
* X065 couple status
* Z080 previous wave marital status
* I included these variables to better understand how SUBHH is coded and used. 
* We can still do so using years 2002-2010 but leave out years 2000 and 1998 
* unless we really deem it essential later on for other purposes.

** NOTE: wave 12 (2014) does not include ?e073 ?e074m* (for future reference).

** 2002-2010 
foreach wave in fat10 fat08 fat06 fat04 fat02 {
    use rahhidpn *subhh ?a028 ?a030 ?a070 ?a099 ?a102 ?x033 ?x065_r ?z080   ///
                        ?r077 ?h001 ?h002 ?h004 ?h008 ?h014 ///
                        ?h088 ?h089 ?h091 ?h092 ?e015 ?e016m* ?e073 ?e074m* ///
						using ${rand`wave'}, clear
    renvars ?a028 ?a030  ?a070 ?a099  ?a102 ?x033 ?x065_r ?z080 ///
            ?h001 ?r077  ?h002 ?h004  ?h008 ?h014 ///
            ?h088 ?h089  ?h091 ?h092  ?e015 ?e016m* ?e073 ?e074m*,       predrop(1)
    renvars a028  a030    a070  a099   a102  x033  x065_r  z080 ///
            h001  r077    h002  h004   h008  h014 ///
            h088  h089    h091  h092   e015  e016m*  e073  e074m*,       upper   
    renvars A028  A030    A070  A099   A102  X033  X065_R  Z080   ///          
            H001  R077    H002  H004   H008  H014 ///
            H088  H089  H091  H092  E015  E016M*  E073  E074M*,          prefix(fat_)
    save ${save}/`wave', replace
    }
 
use ${save}/fat02, clear  
rename  fat_E016M0*  fat_E016M*    // Kid OPNs who own a home
rename  fat_E074M0*  fat_E074M*    // Kid OPNs who own a home
save ${save}/fat02, replace   
    
** 2000
use gsubhh rahhidpn g558 g562 g794 g886 g900 g56 g6001 g3059 g3060 g3061 g3064 g3069 ///
                    g3146   g3147 g3148m* g3149 g1983 g1984m* g2072  g2073m*  using $randfat00, clear
rename g558  fat_A028
rename g562  fat_A030
rename g794  fat_A070
rename g886  fat_A099
rename g900  fat_A102
rename g56   fat_X033
rename g6001 fat_R077
rename g3059 fat_H001
rename g3060 fat_H002
rename g3061 fat_H004
rename g3064 fat_H008
rename g3069 fat_H014
rename g3146 fat_H088
rename g3147 fat_H089
rename g3148m* fat_H091M*
rename g3149   fat_H092
rename g1983   fat_E015
rename g1984m* fat_E016M*
rename g2072   fat_E073
rename g2073m* fat_E074M*
save ${save}/fat00, replace

** 1998
use fsubhh rahhidpn f517 f521 f721 f809 f823 f56  f5648 f2741 f2742 f2743 f2746 f2751 ///
                    f2828 f2829 f2830 f2831 f1767 f1768m* f1827 f1828*      using $randfat98, clear
rename f517  fat_A028
rename f521  fat_A030
rename f721  fat_A070
rename f809  fat_A099
rename f823  fat_A102
rename f56   fat_X033
rename f5648 fat_R077
rename f2741 fat_H001
rename f2742 fat_H002
rename f2743 fat_H004
rename f2746 fat_H008
rename f2751 fat_H014
rename f2828 fat_H088
rename f2829 fat_H089
rename f2830 fat_H091
rename f2831 fat_H092
rename f1767 fat_E015
rename f1768m* fat_E016M*
rename f1827   fat_E073
rename f1828m* fat_E074M*
save ${save}/fat98, replace

use ${save}/fat10, clear
append using ${save}/fat08 ///
             ${save}/fat06 ///
						 ${save}/fat04 ///
						 ${save}/fat02 ///
						 ${save}/fat00 ///
						 ${save}/fat98, gen(w) 
replace w=10-w

gen HHID = substr(rahhidpn ,1,6)
gen PN   = substr(rahhidpn ,7,3)


sort HHID w
order HHID w
gen str SUBHH="" // generate sub-household ID, needs to be string variable
replace SUBHH=fsubhh if w==4
replace SUBHH=gsubhh if w==5
replace SUBHH=hsubhh if w==6
replace SUBHH=jsubhh if w==7
replace SUBHH=ksubhh if w==8
replace SUBHH=lsubhh if w==9
replace SUBHH=msubhh if w==10

foreach i in m l k j h g f  {
drop `i'subhh
}


destring fat_E016M*, replace // contains OPN of kids who own a home

sort  HHID SUBHH PN  w
order HHID SUBHH PN  w

drop rahhidpn

save ${save}/hrshousing, replace

foreach wave in fat10 fat08 fat06 fat04 fat02 fat00 fat98 {
erase ${save}/`wave'.dta
}

********************************************************************************
** FAMILY STRUCTURE (CHILDREN) (HHMEMBERCHILD)
******************************************************************************** 

* Variables not available in RAND fat file.
* HE035: child moved into R home or R moved into child home   
* HE037: move made to help out child, R, or both
* -> Note: Even though the wording of the questioning suggests that these 
*          variables are only coded for children it turns out that they are
*          also coded for other relatives such as siblings and parents. 
* EX061 PEOPLE RELATION TO R (X061_MC in 2002; G1977 in 2000; F1761 in 1998)

** 2002-2010
foreach wave in H10 H08 H06 H04 H02 {
       use  HHID ?SUBHH OPN ?E035 ?E037 ?*X061* using ${hrscore}/`wave'E_MC, clear   	
       renvars ?E035 ?E037 ?*X061*, predrop(1)
       rename *X061* EX061 	// -> different name in 2002
       save ${save}/`wave', replace
}  

* 2000
use HHID OPN G1977 G2014 G2015 GSUBHH using ${hrscore}/H00D_MC, clear
rename G1977 EX061
rename G2014 E035
rename G2015 E037
save ${save}/H00, replace
** 1998
use HHID OPN F1761 F1798 F1799 FSUBHH using ${hrscore}/H98D_MC, clear
rename F1761 EX061
rename F1798 E035
rename F1799 E037
save ${save}/H98, replace

use ${save}/H10, clear
append using ${save}/H08 ${save}/H06 ${save}/H04 ${save}/H02 ${save}/H00 ${save}/H98, gen(w) 
replace w=10-w

sort HHID w
order HHID w

gen str SUBHH="" // generate sub-household ID, needs to be string variable
replace SUBHH=FSUBHH if w==4
replace SUBHH=GSUBHH if w==5
replace SUBHH=HSUBHH if w==6
replace SUBHH=JSUBHH if w==7
replace SUBHH=KSUBHH if w==8
replace SUBHH=LSUBHH if w==9
replace SUBHH=MSUBHH if w==10

foreach i in M L K J H G F  {
drop `i'SUBHH
}

destring OPN, replace

// Generate variable on household level
keep if E035~=.
// -> keep only those with info on moving
duplicates report HHID SUBHH w // check whether HHID SUBHH w uniquely 
                                   // identify observations
// -> No, a household can have multiple records per wave since a moving event
//    can involve multiple persons given by the OPN. For example, household
//    HHID=="010404" in w==4 has OPN==201 and OPN==151.
//    In order to bring the data on a level uniquely identified by HHID SUBHH
//    and w we need to reshape the data set from wide to long.

bysort HHID SUBHH w: gen recno=_n // generate record number for given HHID SUBHH w
order HHID SUBHH w recno // to see the variables side-by-side in the spreadsheet
reshape wide OPN E035 E037 EX061, i(HHID SUBHH w) j(recno)
// Example of what this reshape does:
// FROM:
// HHID    SUBHH  w   recno   OPN
// ------  -----  -----   -----   ---
// 010404   0       4       1     201
// 010404   0       4       2     151

// TO:
// HHID    SUBHH  w   OPN1    OPN2
// ------  -----  -----   -----  -----
// 010404   0       4      201    152

// -> Now there is one record per HHID SUBHH w without losing any information.  

isid HHID SUBHH w
// -> no information is good and means that HHID SUBHH w unqiuely identifies
//    obervations

sort HHID w
order HHID SUBHH OPN* E035*

save ${save}/whomoves, replace

foreach wave in H10 H08 H06 H04 H02 H00 H98 {
erase ${save}/`wave'.dta
}

********************************************************************************
** RAND LONGITUDINAL FILE 
********************************************************************************

* Housing variables:
* H*AHOUS: Gross value of primary residence 
* H*AFHOUS: Indicates if value is imputed and if so, what level of information 
*           is available during the imputation process
* H*AHOUB: Gross value of secondary residence 
* H*AFHOUB: Indicates if value is imputed and if so, what level of information 
*           is available during the imputation process
* H*ATOTH: Net value of primary residence
* H*ANETHB: Net value of secondary residence


local V1 hhidpn   rahhidpn hacohort ragender raracem  rahispan racohbyr rabyear ///
         raestrat raehsamp raedyrs  raevbrn  raeduc   rarelig  ///
         ///h*hhresp r*finr   r*famr   ///
				 r*proxy   ///
         h*hhidc  r*wtresp r*wtr_nh r*wtcrnh r*iwstat r*iwbeg  r*iwend  ///
		     ///r*iearn  r*ipena  r*issdi  r*isret  r*iunwc  r*igxfr     /// 
         r*govmd  ///r*hiltc  r*govva  r*higov  ///
				 r*livsib ///
				 r*beqany r*beq10k r*beq100  ///
         h*ahous  h*afhous h*ahoub  h*afhoub h*atoth  h*anethb          /// housing variables
         h*atotb  h*atotf  h*atotn  /// h*achck  h*afchck ///
         h*arles  h*atran  h*absns h*aira ///  ///
				 h*itot   ///r*lifein ///
				 r*agey_b h*cpl    h*child  s*agey_b   ///
         r*adla   r*iadlza r*shlt   ///
		     r*cogtot r*memry  r*memrye r*alzhe  r*demen  r*alzhee r*demene ///
		     r*nhmliv r*nhmday r*nrshom r*homcar ///
         r*walkr  r*dress  r*bath   r*eat    r*bed    r*toilt ///
         r*phone  r*meds   r*money  r*shop   r*meals  r*map   ///
         r*cendiv r*jyears 

local V2 h@hhidc  r@wtresp r@wtr_nh r@wtcrnh r@iwstat r@iwbeg  r@iwend  ///
		     ///r@iearn  r@ipena  r@issdi  r@isret  r@iunwc  r@igxfr     /// 
         r@govmd  ///r@hiltc  r@govva  r@higov  ///
				 r@livsib ///
				 r@beqany r@beq10k r@beq100 /// 
         ///h@hhresp r@finr   r@famr   ///
				 r@proxy  ///  
         h@ahous  h@afhous h@ahoub h@afhoub h@atoth  h@anethb          /// housing variables
         h@atotb  h@atotf  h@atotn  /// h@achck  h@afchck ///
				 h@arles  h@atran  h@absns h@aira ///
				 h@itot   ///r@lifein ///
				 r@agey_b h@cpl    h@child  s@agey_b   ///
         r@adla   r@iadlza r@shlt   ///
		     r@cogtot r@memry  r@memrye r@alzhe  r@demen  r@alzhee r@demene ///
		     r@nhmliv r@nhmday r@nrshom r@homcar ///
         r@walkr  r@dress  r@bath   r@eat    r@bed    r@toilt ///
         r@phone  r@meds   r@money  r@shop   r@meals  r@map   ///
         r@cendiv r@jyears r@iadla
             		
use `V1' using ${randhrs}, clear
qui: reshape long `V2', i(hhidpn) j(w)

gen HHID      = substr(rahhidpn ,1,6)
gen PN        = substr(rahhidpn ,7,3)
gen SUBHH     = substr(hhhidc   ,7,1)   // extract sub-household id
sort HHID PN w

gen rwtall=rwtresp
replace rwtall=rwtr_nh if rnhmliv==1 // assign NH weights
// -> NOTE: now provided by RAND (r*wtcrnh) 

rename (rdress  rwalkr  rbath  reat   rbed   rtoilt  rmeals  rshop  rphone  rmeds rmoney)  ///
       ( dress   walkr   bath   eat    bed    toilt   meals   shop   phone   meds  money) 
// -> same variables names as for exit IW 

replace rmemry = 1 if ((ralzhe==1 | rdemen==1) & w>=10)
replace rmemry = 1 if ((ralzhe==0 & rdemen==0) & w>=10)
// -> In 2010 memry variable is replaced by Alzheimer and dementia indicators
replace rmemrye = 1 if ((ralzhee==1 | rdemene==1) & w>=10)
replace rmemrye = 1 if ((ralzhee==0 & rdemene==0) & w>=10)
// -> memrye is whether respondent has ever been diagnosed with a memory-related
//    disease
//    Update: using ralzhee and rdemene instead of ralzhe and rdemen.
recode rmemry (3 4 =.)

* Construction of disability index: totadl = adla + iadlza, where
* adla   = sum(BATHA,DRESSA,EATA,BEDA,WALKRA)
* iadlza = sum(PHONEA,MONEYA,MEDSA,SHOPA,MEALA)
* NOTE: RAND uses the "some difficulty" versions of the individual measures to construct radla.
*       e.g., RDRESSA 'R Some Diff-Dressing' instead of RDRESS 'R Diff-Dressing'.
*       QUESTION: Why don't we follow RAND's approach here?
* Both are taken from RAND. RAND also provides raw (I)ADLA scores. 
* 1-6 ADLs : dress, walkr, bath, eat, bed, toilet.
*            0=no, 1=yes, 2=can't do (RAND codes these to 'yes' in count), 9=don't do 
* NOTE: Below, we code 9=don't do as 1=yes. RAND treats these codes as missing.
*       For meds, we code .z=Don't do/No if did as 0=no. RAND treats these codes as missing.
 
/*
Except for taking medications from Wave 4 forward, Rw[iadl] is derived as 
follows. If the Respondent answers yes or no to the first question about having 
any difficulty, then Rw[iadl] is set to 1 for yes, difficulty or 0 for no 
difficulty, respectively. ... If the Respondent answers "can’t do" or "don’t do" 
to the first question, the second question that asks if it is because of a 
health problem is checked. If it is because of a health problem, Rw[iadl] is set 
to 2 for "can’t do". If not, the answer is considered a "don’t do" response and 
Rw[iadl] is set to 9.

HRS I/ADL constructs use the "some difficulty" versions of the I/ADL measures. E.g., 
for IADLs:

RwIADLA = sum (RwPHONEA, RwMONEYA, RwMEDSA)
RwIADLZA = sum (RwPHONEA, RwMONEYA, RwMEDSA, RwSHOPA, RwMEALA)

They are generally coded this way:

For other IADLs from Wave 3 forward, if a Respondent answers "no" or "yes" to a 
question asking if s/he has any difficulty, Rw[iadl]A is set to "0" for "no 
difficulty" or "1" for "some difficulty," respectively. If the answer is "can’t 
do" or "don’t do" and it is because of a health problem, Rw[iadl]A is set to "1" 
for "some difficulty." Otherwise, Rw[iadl]A is set to .X.

(RwMEDSA adds a special .z code for don't do but would not have difficulty if did.)

*/

recode bath  (2 = 1) (9 = 1) 
recode dress (2 = 1) (9 = 1) 
recode eat   (2 = 1) (9 = 1)
recode bed   (2 = 1) (9 = 1)
recode walkr (2 = 1) (9 = 1)
recode toilt (2 = 1) (9 = 1)
* 1-5 IADLs: meals, shop, phone, meds, money.
recode meals (2 = 1) (9 = 1) 
recode shop  (2 = 1) (9 = 1) 
recode phone (2 = 1) (9 = 1) 
recode meds  (2 = 1) (9 = 1) (.z = 0) // .Z=Don't do/No if did
recode money (2 = 1) (9 = 1) 

* Get ADL and IADL indices
egen    adla  = rowtotal(dress walkr bath eat bed), missing
egen    iadlza= rowtotal(meals shop phone meds money), missing
egen    totadl= rowtotal(adla iadlza), missing 
// -> rowtotal assigns a zero if all values are missing; if "missing"
//    is specified it assigns a missing value ONLY if ALL values are missing. 

* Interview dates, gaps
format riwbeg %td	              // Bring dates into readable format. 
format riwend %td    
gen iwdurdays = riwend - riwbeg   // interview duration in number of days: 
summ iwdurdays, det 			  // almost all done during one day
gen iwdate = riwbeg + iwdurdays/2 // set interview date to midpoint
drop iwdurdays 
format iwdate %td 				  // interview date

by HHID PN: gen pwiwdate = iwdate[_n-1] // previous wave interview date
format pwiwdate %td 
by HHID PN: carryforward pwiwdate, gen(cpwiwdate) 
  // bring forward information, will fill in information for both core and exit
  // interviews since RAND HRS data span all waves for each individual
drop pwiwdate  

keep if w>=4  // begin with year 1998
// Response and mortality status of R at each wave: IWSTAT
// Rs are identified by code 1, non-Rs by codes 0, 4-7 and 9.
// 1 R valid response status and alive 
// 0 no response, R does not belong to the current wave
// 4 no response, R is presumed to be alive
// 5 no response, R died between current and last interview
// 6 no response, R died before a previous wave
// 7 no response, R dropped from sample
// 9 no response, no such coding here

sort HHID PN w
order HHID PN w 

unique HHID PN  // -> 37,495 unique individuals

save ${save}/randhrs, replace 

********************************************************************************
** RAND Family File (R File)
********************************************************************************

* Respondent level data (includes those without kids).
* Data from core IWs only.
* Uniquely ID'd by HHID PN w.

** Variables
/*
HwCHILD:     number of living and in-contact children (same as in core data set)
HwNKID:      number of children reported from the respondent-kid level
HwNDAU:      number of daughters
HwEDUCKMN:   indicates the child’s average years of completed education
HwWORKFTKN:  number of children who work full-time
HwWORKPTKN:  number of children who work part-time      
RwHLPIADLKN: number of children who help with the respondent’s ADLs
RwHLPIADLKN: number of children who help with the respondent’s IADLs

HwNKID is based on the best guess relationship, not the relationship reported 
each wave. This variable is different from HwCHILD because HwCHILD only sums up 
the alive and in-contact kids and step-kids reported in that wave.

HwNKID indicates the number of kids reported from the Respondent-kid level. It 
includes the Respondent’s own kids, step-kids, kids-in-law and unknown children 
type. It includes alive, deceased and no contact kids.
*/

local V1 hhidpn     rahhidpn    rlink                          ///
         inw*       h*ownhmkn   ///h*ownrhmkn h*ownrhmkf       ///
         h*nkid     h*ndau      h*markn    h*educkmn  h*child  ///
				 h*workftkn h*workptkn  ///r*hlpadlkn r*hlpiadlkn      ///
				 h*lvnear   h*resdkn    h*lv10mikn h*contkn            ///
		     //h*fcany    h*fcamt     h*tcany    h*tcamt
		 
local V2 inw@       h@ownhmkn   ///h@ownrhmkn h@ownrhmkf       ///
         h@nkid     h@ndau      h@markn    h@educkmn  h@child  ///
				 h@workftkn h@workptkn  ///r@hlpadlkn r@hlpiadlkn      ///
				 h@lvnear   h@resdkn   h@lv10mikn h@contkn             ///
		     //h@fcany    h@fcamt     h@tcany    h@tcamt

use `V1' using ${randfamr}, clear
drop *unmarkn
qui: reshape long `V2', i(hhidpn) j(w)

gen HHID = substr(rahhidpn ,1,6)
gen PN   = substr(rahhidpn ,7,3)

keep if w>=4   // begin with year 1998
keep if w<= 10 // use only core data from 2010 and earlier

recode hlvnear ///
  (.c = 0) /// -> .c = no contact
	(.k = 0) /// -> .k = no kids
	(.s = 0) //  -> .s = kids deceased
lab def LVNEAR 0 "0. no contact/no kids/deceased", modify

sort HHID PN w
order HHID PN w

unique HHID PN // 37,495 unique HHID PN
 
save ${save}/randfamr, replace 

********************************************************************************
** RAND Family File (K File)
********************************************************************************

* Respondent-child level data.
* Data from respondents with kids only.
* Data from core IWs only.
* Uniquely ID'd by HHID PN w KIDID, collapse to HHID PN w. 

** Variables
/*
KIDID : uniquely identifies a kid. 
INW*  : indicate whether an individual responded to a particular wave.
KAREL : gives us RAND's best guess for the relationship of the kid to the resp.
        (RAND changed the name of this variable from KRREL to KAREL in last release.)
        If the resp. has no kids, this is coded to ".". However, there are many cases
        with "." where the resp. has responded to the wave and the record is for a kid.
        These cases are not mentioned in the RAND documentation for karel. Taking the
        fields K*REL and K*RELALT makes matters worse.
   -->  drop all kid records where KAREL=. or K*IND=. (these are empty anyway).
K*STAT: categorizes a child's status with the respondent: resident (1), died (4),
        non-resident (5), not kid (7)
K*INHP:  indicates whether the kid is reported in the Helper file (HP).
K*HELPR: indicates whether or not the child is a helper. It is derived from the 
         helper file (HP).  
         K*HELPR = .z  -> kis is not in helper file 
         K*HELPR = 1   -> kid is in helper file
         K*HELPR = 0   -> kid is not in helper file				 
*/

loc V1 ///
	  hhidpn   kidid    rahhidpn  link      karel      ///
	  h*subhh  inw*     k*inhp    k*agebg   k*nkid     ///  // Kid characteristics
    k*incb                                           ///  
    k*helpr  k*hlphrs k*hlpadl  k*hlpiadl k*hlpdays  ///  // transfer from Kid to R     
    k*ownrhm k*deed   k*hmdeed  k*resd    k*ownhm    //   // transfer from R to Kid
                                 		
loc V2 ///
	  h@subhh  inw@     k@inhp    k@agebg   k@nkid    ///  // Kid characteristics
    k@incb                                          ///  
    k@helpr  k@hlphrs k@hlpadl  k@hlpiadl k@hlpdays ///  // transfer from Kid to R     
    k@ownrhm k@deed   k@hmdeed  k@resd    k@ownhm   //   // transfer from R to Kid
                                          
use `V1' using ${randfamk}, clear
drop kp* // -> drop kid partner variables
qui: reshape long `V2', i(hhidpn kidid) j(w)

keep if w>=4 // begin with year 1998
keep if w<= 10 // use only core data from 2010 and earlier

recode kresd (2=1) // =2 means resident away: can place this into resident category =1

gen HHID = substr(rahhidpn ,1,6)
gen PN   = substr(rahhidpn ,7,3)
rename hsubhh SUBHH
sort HHID PN w

// denote household-child variables by hk, i.e. summarize info on child generation
// derived from respondent-child level data
by HHID PN w: egen hkownrhm   = max(kownrhm)      // R signed over home ownership to children prior death
by HHID PN w: egen hkdeed     = max(kdeed)        // Kids received a deed to a house from respondent
by HHID PN w: egen hkhmdeed   = max(khmdeed)      // Kids on home deed
by HHID PN w: egen hkresd     = total(kresd),m    // # of resident kids (recoded 2->1 above)
by HHID PN w: egen hknkid     = total(knkid),m    // # of grandchildren
by HHID PN w: egen hkincb     = mean(kincb)
by HHID PN w: egen hkage      = mean(kagebg)

order HHID PN w SUBHH kidid inw karel kinhp khelpr khlphrs 
 
savesome HHID PN w SUBHH kidid inw link karel ///
         kresd kinhp khelpr khlphrs /// 
         kownhm kownrhm kdeed kresd   ///
using ${save}/randkids, replace 
// save subset of data set before collapsing to R level
// kinhp=1 means kid is a helper (may have not provided positive hours)
// khelpr=1 means kid provided positive hours
 
sort HHID PN w
by HHID PN w: keep if _n == 1 // collapse to R level
keep HHID PN w inw hk*

unique HHID PN // -> 34,413 (NOTE: differs from RAND HRS universe)

save ${save}/randfamk, replace

********************************************************************************
** Out-of-pocket medical expenditures (core & exit)
********************************************************************************
/*
NOTE: Data are already in 2010 dollars.

For details about the data, see Fahle, McGarry, and Skinner (2016) Fiscal Studies.
*/

local V ///
  HHID PN year ///
	total_OOP ///
	iwtype
	
use `V' using ${oopmedexp}, clear

keep if (year>=1998)
drop if mi(year)
gen w = 4 + (year-1998)/2
drop year

renvars *_OOP, l

save ${save}/tmp_oopmedexp, replace

/*
The file at this point has total OOP med. exp. for each individual. We also
wish to have total spending for each HOUSEHOLD. We want this combined spending
to include spending for deceased household members, which should appear in the
survivor's households spending at the core interview following the decedent's
death. The code below does this.
*/

/*
Create a temporary file with RAND HRS data.
*/

use hhid pn hhidpn s*hhidpn h*hhidc r*iwstat using ${randhrs}, clear
reshape long s@hhidpn h@hhidc r@iwstat, i(hhid pn) j(w)
keep if (w>=4) // 1998 and later
// s*hhidpn: numeric version of concatenation of hhid and spouse pn
// (leading zeros are dropped)
// h*hhidc: string version of concatenation of hhid and subhh

//gen SUBHH = substr(hhhidc,strlen(hhhidc),strlen(hhhidc))
drop hhhidc

renvars hhid pn, u

save ${save}/tmp_randhrs, replace

/*
Merge file to temporary OOP Med Exp file. Obtain most recent spouse information
for each decedent and identify wave of death. Retain only exit interviews.
File is now indexed by HHID PN w. Then, drop hhidpn and replace with shhidpn.
If shhidpn is 0 or missing, drop the case. These cases have no spouse/partner.
*/

use ${save}/tmp_randhrs, clear

merge 1:1 HHID PN w using ${save}/tmp_oopmedexp, keep(1 3) nogen
// discards observations appearing only in med exp file

sort HHID PN w
by HHID PN: egen decedent = max(iwtype==1)
keep if (decedent==1)
drop decedent
// keeps only individuals with exit data

by HHID PN: carryforward shhidpn, replace
// bring forward spouse information from most recent core interview

by HHID PN: egen wavedied = max(cond(riwstat==5,w,.))
drop riwstat
// identify the wave in which decedent died

keep if (iwtype==1)
drop iwtype
drop w
// exit interviews only

rename total_oop dead_sp_oop

drop if (shhidpn==0)
// drop singles

drop if (shhidpn==.)
// spouse data missing

drop HHID PN hhidpn
// clearly decedent's data

// The file at this point has: 
// [ survivors-hhidpn   decedents-oopmedexp   decedents-wave-died ]
//   shhidpn            dead_sp_oop           wavedied

// unfortunately, some survivors have multiple deceased spouses, so file
// is not uniquely indexed by shhidpn.  see this below.

sort shhidpn wavedied
by shhidpn: gen spousenum = _n
tab spousenum
// a handful of individuals have multiple spouses that die (at most 2)
// problem because file is not uniquely indexed by shhidpn.

reshape wide dead_sp_oop@ wavedied@, i(shhidpn) j(spousenum)
// reshaping fixes this
// dead_sp_oop --> dead_sp_oop1, dead_sp_oop2
// wavedied    --> wavedied1, wavedied2

isid shhidpn
// now uniquely id

rename shhidpn hhidpn
// for the merge, we will merge on the survivor's hhidpn
// file is now uniquely identified by hhidpn

save ${save}/tmp_oopmedexp_survivors, replace

/*
Create another temporary OOP Med Exp file for spouses. 
1. in RAND data, retain only cases with non-missing spouse (shhidpn ~= 0). 
2. replace PN with spouse's pn. 
   file is now indexed by HHID PN w, where PN is spouse's PN.
3. merge to OOP med exp file. 
4. drop PN.
   rename medical spending to be spouse's spending.
   file is now indexed by hhidpn w.
	 the spending data on the file are for the spouse.
*/

use ${save}/tmp_randhrs, clear

drop riwstat
// not needed

keep if (shhidpn ~= 0) & !mi(shhidpn)
// keep only records with spouse/partner

tostring shhidpn, gen(shhidpn_string)
// convert numeric to string

drop PN
gen PN = substr(shhidpn_string,strlen(shhidpn_string)-2,strlen(shhidpn_string))
drop shhidpn_string
drop shhidpn
// extract spouse's pn
// last 3 characters of shhidpn are the spouse's pn.

isid HHID PN w
// verify that file is indexed by HHID PN w.
// here PN is the spouse's PN.

merge 1:1 HHID PN w using ${save}/tmp_oopmedexp, keep(3) nogen
// retain only matches

drop HHID PN
// these are the data of the spouse (PN swapped above). delete them.

rename total_oop spouse_total_oop
rename iwtype spouse_iwtype
// the OOP med exp data in this file is the spouse's (not the respondents).

isid hhidpn w
// verify that file is indexed by hhidpn w

save ${save}/tmp_oopmedexp_spouses, replace

/*
Merge RAND file, OOP med exp file, and spouses and survivors files together.
*/

use ${save}/tmp_randhrs, clear

drop shhidpn

merge 1:1 HHID PN w using ${save}/tmp_oopmedexp, keep(1 3) nogen
// discards observations appearing only in med exp file

merge m:1 hhidpn using ${save}/tmp_oopmedexp_survivors
keep if inlist(_merge,1,3)
// _merge = 2: very few cases (24) in which the surviving spouse cannot be
//             located, unclear why
drop _merge
lab drop _merge

merge 1:1 hhidpn w using ${save}/tmp_oopmedexp_spouses, nogen
// unmatched cases have no spouses. OK. 

/*
Assign decedents' data to the wave in which they died
*/

gen dead_spouse_oop = .
replace dead_spouse_oop = dead_sp_oop1 if (w == wavedied1)
replace dead_spouse_oop = dead_sp_oop2 if (w == wavedied2)
// assign decedent's oop med exp to the wave in which they died

drop dead_sp_oop? wavedied?

/*
Cumulate spending within each household in core interviews
*/

egen total_oop_HH_core = rowtotal(total_oop spouse_total_oop), m

/*
Add spending at the exit interview of deceased spouse
*/

egen total_oop_HH = rowtotal(total_oop_HH_core dead_spouse_oop), m

** Save

keep HHID PN w total_oop total_oop_HH

save ${save}/oopmedexp.dta, replace

/*
NOTE:
total_oop    : respondent's spending
total_oop_HH : household's spending (including spouse from core and exit interviews)
*/

** Clean up

erase ${save}/tmp_randhrs.dta
erase ${save}/tmp_oopmedexp.dta
erase ${save}/tmp_oopmedexp_spouses.dta
erase ${save}/tmp_oopmedexp_survivors.dta

********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************
** Merge files
********************************************************************************

use HHID SUBHH PN w riwstat using ${save}/randhrs, clear

unique HHID PN
// -> 37,495 individuals (337,455 obs = 37,495 x 9 waves (waves 4-12))

merge m:1               w using ${save}/CPIdeflator,  keep(1 3) nogen keepusing(deflator10)
merge m:1 HHID SUBHH    w using ${save}/whomoves,     keep(1 3) nogen
// -> for household-level variables the combination of HHID and SUBHH
//    is the correct identifier
merge 1:1 HHID SUBHH PN w using ${save}/hrshousing ,  keep(1 3) nogen
// -> data consists of both household- and respondent-level variables
//    use both SUBHH and PN

merge 1:1 HHID PN    w using ${save}/randhrs      ,  keep(1 3) nogen 
merge 1:1 HHID PN    w using ${save}/randfamr     ,  keep(1 3) nogen
merge 1:1 HHID PN    w using ${save}/randfamk     ,  keep(1 3) nogen
merge 1:1 HHID PN    w using ${save}/coverscreen  ,  keep(1 3) nogen  
merge 1:1 HHID PN    w using ${save}/preload      ,  keep(1 3) nogen  
merge 1:1 HHID PN    w using ${save}/willsandli   ,  keep(1 3) nogen 
merge 1:1 HHID PN    w using ${save}/fctlimits    ,  keep(1 3) nogen 
merge 1:1 HHID PN    w using ${save}/section_n    ,  keep(1 3) nogen
merge 1:1 HHID PN    w using ${save}/tracker      ,  keep(1 3) nogen
merge 1:1 HHID PN    w using ${save}/oopmedexp    ,  keep(1 3) nogen
 
renvars HHID SUBHH PN, l

* add helper data
merge 1:1 hhid pn w using ${save}/helpers_with_imputations.dta, nogen keep(1 3)
 
sort hhid pn w 
order hhid subhh pn w

unique hhid pn
// -> 37,495 individuals (337,455 obs = 37,495 x 9 waves (waves 4-12))

********************************************************************************
** Adjust nominal values
********************************************************************************

// convert nominal values to constant 2010 dollars
gen atotb10 = hatotb/deflator10 // net value of total wealth
gen atoth10 = hatoth/deflator10 // net value of primary residence (HwAHOUS - HwAMORT - HwAHMLN)
gen atotn10 = hatotn/deflator10 // total non-housing wealth
gen ahous10 = hahous/deflator10 // value of primary residence
gen ahoub10 = hahoub/deflator10 // net value of secondary residence
gen atotf10 = hatotf/deflator10 // net value of non-housing financial wealth
gen arles10 = harles/deflator10 // Net value of real estate (not primary residence)
gen atran10 = hatran/deflator10 // Net value of vehicles
gen absns10 = habsns/deflator10 // Net value of businesses
gen aira10  = haira/deflator10  // Net value of IRA, Keogh accounts

gen hitot10 = hitot/deflator10  // household income

drop hatotb hatoth hatotn hahous hahoub hatotf hitot

********************************************************************************
** Sample selection & counts: all records of Rs with exit interview in 2004-2012
********************************************************************************

drop if w==12

sort hhid pn w

* Identify histories of those who are alive in wave 11 or presumed to be alive in wave 11
by hhid pn: egen alive11 = max(w==11 & (riwstat==1|riwstat==4))

* Identify histories of those who dropped out of the survey at some point
by hhid pn: egen drop11 = max(w==11 & riwstat==7)

* Identify "inappropriate" histories 
by hhid pn: egen inapp11 = max(w==11 & riwstat==0)

* Identify histories without any information
by hhid pn: egen noInfo = max(w==4 & (riwstat==5|riwstat==6))

* Identify histories of those who die in waves 5 or 6
by hhid pn: egen dead56 = max(riwstat==5 & inrange(w,5,6))

* Identify histories of those who die in waves 7-11
by hhid pn: egen dead711 = max(riwstat==5 & inrange(w,7,11))

/*
SUMMARY:

alive11: respondent histories who are alive or presumed to be alive in wave 11
drop11:  respondent histories who dropped out of the sample at some point
inapp11: respondent histories who are not part of the sample at any point
noInfo:  respondent histories without any information (dead already in wave 4)
dead56:  respondent histories who die in waves 5 or 6 (there are some respondents
         who have an exit IW in waves 7-11)
dead711: respondent histories who die in waves 7-11
*/

* turn on log
cap log close
log using ${logs}/TABA2a_SAMPLE_COUNTS_1.txt, text replace	

unique hhid pn
// -> 37,495 individuals (299,960 observations = 37,495 individuals x 8 waves (waves 4-11))
//    (TABA2a: Line 1 - HRS Universe)

unique hhid pn if (dead56==1 | dead711==1)
unique hhid pn if (dead56==1 | dead711==1) & ~(alive11==1 | inapp11==1 | noInfo==1)
// -> same result in both cases:  9,804 unique individuals

drop if alive11==1
// -> 182,344 obs (22,793 individuals) deleted
drop if inapp11==1 // not part of survey at any point in observation period
// -> 1,080 obs (135 individuals) deleted 
drop if noInfo==1  // no information since already dead in wave 4
// -> 21,264 obs (2,658 individuals) deleted 

sort hhid pn w

unique hhid pn
// -> 11,909 individuals

unique hhid pn if (dead56==1 | dead711==1)
// -> 9,804 individuals
//    (TABA2a: Line 2 - died during waves 5-11)

* pause logging
log off

* Mark all records of Rs who have an exit IW in waves 7-11 (years 2004-2012)
by hhid pn: egen exitIW = max(xIW==1 & inrange(w,7,11))

* Mark records of Rs who have exit IW in waves 5-6 (years 2000-2002)
by hhid pn: egen exit56 = max(xIW==1 & inrange(w,5,6))

tab w xIW
// -> we have in this file data from exit interviews 5-11

replace xIW=0 if (xIW==1 & w<7)
// -> exit IWs are also available for waves 5-6, but we decided
//    to start with wave 7 and so wave 5-6 exit IWs are not part of our main sample.
//    recode these exit interviews with 0's to keep them separate

tab w xIW
/*
           |          xIW
         w |         0          1 |     Total
-----------+----------------------+----------
         5 |     1,295          0 |     1,295 
         6 |     1,471          0 |     1,471 
         7 |         0      1,221 |     1,221 
         8 |         0      1,310 |     1,310 
         9 |         0      1,330 |     1,330 
        10 |         0      1,443 |     1,443 
        11 |         0      1,187 |     1,187 
-----------+----------------------+----------
     Total |     2,766      6,491 |     9,257 
*/
// -> 9,257 exit interviews
//    6,491 in waves 7-11
//    2,766 in waves 5-6

* resume logging
log on

unique hhid pn if (exitIW==1)
// -> 6,491
//    (TABA2a: Line 3 - exit interview waves 7-11)

count if (riwstat==1 | xIW==1) & (exitIW==1)
count if (riwstat==1) & (exitIW==1)
count if (xIW==1) & (exitIW==1)
// -> 35,499 (core + exit)
// -> 29,008 (core)
// -> 6,491  (exit)

bys hhid pn: egen minstat=min(riwstat)
// -> if minstat=4 or 7, there is no information in the respondent's core IW history
drop if (minstat==4|minstat==7)
// -> drop these, they contain no information
drop minstat

sort hhid pn w

unique hhid pn
// -> 10,594
// -> (final sample created by this file, includes dropped individuals which we dont currently use)

unique hhid pn if (exitIW==1)
// -> 6,434 (decedent sample)
//    (TABA2a: Line 4 - appeared in at least 1 core interview in waves 4-8)

count if (riwstat==1 | xIW==1) & (exitIW==1)
count if (riwstat==1) & (exitIW==1)
count if (xIW==1) & (exitIW==1)
// -> 35,442 (core + exit)
// -> 29,008 (core)
// -> 6,434  (exit)

* close log
cap log close

tab w xIW
/*
           |          xIW
         w |         0          1 |     Total
-----------+----------------------+----------
         5 |     1,229          0 |     1,229 
         6 |     1,427          0 |     1,427 
         7 |         0      1,194 |     1,194 
         8 |         0      1,295 |     1,295 
         9 |         0      1,323 |     1,323 
        10 |         0      1,438 |     1,438 
        11 |         0      1,184 |     1,184 
-----------+----------------------+----------
     Total |     2,656      6,434 |     9,090 
*/
// -> 6,434 in waves 7-11

gen sample=0 // combine the remaining categories (exitIW, dead56, dead711)
replace sample=1 if dead711>0 & exitIW==1  // respondents with exit IWs available in waves 7-11
replace sample=1 if dead56>0  & exitIW==1  // respondents with exit IWs available in waves 7-11
// -> death in waves 5 or 6 but exit IW took place in a later wave
replace sample=2 if dead711>0 & exitIW==0  // respondents without exit IWs available in waves 7-11
replace sample=3 if dead56>0  & exitIW==0  // respondents without exit IWs available in waves 7-11

tab sample if w==11
tab riwstat if sample==0 & w==11
// -> sample=0 respondents all dropped from the sample at some point during the
//    observation period

label define sample 0 "0. dropped"  ///
                    1 "1. exit IW in 7-11 (died 5-11)" ///
					          2 "2. no exit IW in 7-11 (died 7-11)" ///
					          3 "3. no exit IW in 7-11 (died  5-6)"  
label values sample sample

tab sample  if w==11
// -> respondents with at least some information
//    sample=0: dropped at some point but have at least one core interview
//    sample=1: main sample (exit IW available in waves 7-11)
//    sample=2: death in waves 7-11 without exit IW in waves 7-11
//    sample=3: death in waves  5-6 without exit IW in waves 7-11
/*
                           sample |      Freq.     Percent        Cum.
----------------------------------+-----------------------------------
                       0. dropped |      1,060       10.01       10.01
   1. exit IW in 7-11 (died 5-11) |      6,434       60.73       70.74
2. no exit IW in 7-11 (died 7-11) |        305        2.88       73.62
3. no exit IW in 7-11 (died  5-6) |      2,795       26.38      100.00
----------------------------------+-----------------------------------
                            Total |     10,594      100.00
*/	

gen xsample = 0
replace xsample = 1 if (dead711==1 & exitIW==1) // died 7-11, exit 7-11
replace xsample = 2 if (dead56 ==1 & exitIW==1) // died 5-6 , exit 7-11
replace xsample = 3 if (dead56 ==1 & exit56==1) // died 5-6 , exit 5-6
replace xsample = 4 if (dead711==1 & exitIW==0) // died 7-11, no exit in 5-11
replace xsample = 5 if (dead56 ==1 & exit56==0 & exitIW==0) // died 5-6, no exit in 5-11

lab def xsample ///
  0 "0. dropped (not dead)     " ///
	1 "1. died 7-11, exit 7-11   " ///
	2 "2. died 5-6 , exit 7-11   " ///
	3 "3. died 5-6 , exit 5-6    " ///
  4 "4. died 7-11, no exit 5-11" ///
	5 "5. died 5-6 , no exit 5-11"
lab val xsample xsample	

tab xsample if w==11
/*
                   xsample |      Freq.     Percent        Cum.
---------------------------+-----------------------------------
0. dropped (not dead)      |      1,060       10.01       10.01
1. died 7-11, exit 7-11    |      6,406       60.47       70.47
2. died 5-6 , exit 7-11    |         28        0.26       70.74
3. died 5-6 , exit 5-6     |      2,656       25.07       95.81
4. died 7-11, no exit 5-11 |        305        2.88       98.69
5. died 5-6 , no exit 5-11 |        139        1.31      100.00
---------------------------+-----------------------------------
                     Total |     10,594      100.00
*/
// -> 6,406 died 7-11, exit 7-11 } these groups will comprise the main decedent 
// ->    28 died 5-6 , exit 7-11 } sample (once we eliminate couples)
// -> 2,565 died 5-6 , exit 5-6  > we keep this sample for the imputations
//   -------------                 NOTE: keeping these also affects # random draws in imputation
// -> 9,090 total with exit IWs

// the remaining 1,006 + 305 + 139 = 1,450 will not be used in any analyses


tab w xsample if xIW<.
/*
           |             xsample
         w | 1. died 7  2. died 5  3. died 5 |     Total
-----------+---------------------------------+----------
         5 |         0          0      1,229 |     1,229 
         6 |         0          0      1,427 |     1,427 
         7 |     1,171         23          0 |     1,194 
         8 |     1,291          4          0 |     1,295 
         9 |     1,322          1          0 |     1,323 
        10 |     1,438          0          0 |     1,438 
        11 |     1,184          0          0 |     1,184 
-----------+---------------------------------+----------
     Total |     6,406         28      2,656 |     9,090 
*/
// -> Shows when exit interviews occur and to which sample they belong
		 
tab xsample sample if w==11
// -> contrast two versions of sample definition: xsample and sample
/*
                      |                   sample
              xsample | 0. droppe  1. exit I  2. no exi  3. no exi |     Total
----------------------+--------------------------------------------+----------
0. dropped (not dead) |     1,060          0          0          0 |     1,060 
1. died 7-11, exit 7- |         0      6,406          0          0 |     6,406 
2. died 5-6 , exit 7- |         0         28          0          0 |        28 
3. died 5-6 , exit 5- |         0          0          0      2,656 |     2,656 
4. died 7-11, no exit |         0          0        305          0 |       305 
5. died 5-6 , no exit |         0          0          0        139 |       139 
----------------------+--------------------------------------------+----------
                Total |     1,060      6,434        305      2,795 |    10,594 
*/		 

drop alive11 inapp11 noInfo hhidpn rahhidpn hhhidc inw r*iadla 

order hhid pn w sample 

********************************************************************************
** Save
********************************************************************************

save ${save}/decedent_sample.dta, replace 

******************************************************************************** 
** Clean up temporary files
******************************************************************************** 

erase ${save}/tracker.dta
erase ${save}/coverscreen.dta
erase ${save}/fctlimits.dta
erase ${save}/hrshousing.dta
erase ${save}/preload.dta
erase ${save}/randfamk.dta
erase ${save}/randfamr.dta
erase ${save}/randhrs.dta
erase ${save}/section_n.dta
erase ${save}/willsandli.dta
erase ${save}/whomoves.dta
erase ${save}/oopmedexp.dta

********************************************************************************
