/*******************************************************************************

************
** Function: 
************

  Processes decedent data extract, defining many additional variables.
	Performs estate value imputations as well as the Pareto calculations 
	reported in the appendices.

************
** Inputs  : 
************

  - decedents_sample.dta             (see Decedents_GetSample.do)
	- decedents_sample_supplement.dta  (see Decedents_Sample_Supplement.do)

************
** Output  : 
************

  - decedents_sample_single.dta
  - decedents_sample_single_final.dta
	
	- FIGI1a.eps	
  - FIGI1b.eps
	- FIGI1c.eps 

	- TABG1.tex
	- TABG2.tex
	
	- TABA2a_SAMPLE_COUNTS_2.txt
	- POWER_LAW_RESULTS.txt
	
*******************************************************************************/

clear *
set more off
macro drop _all

********************************************************************************
** Director names
********************************************************************************

do GetDirNames.do

********************************************************************************
** Settings
********************************************************************************

local RECOMPUTE_PARETO    1
local GRAPH_PARETO        1

// set these settings to 1 (``true'') to re-create Pareto calculations
// in the appendix. set to 0 (``false'') to avoid this.

set scheme hack, permanently

********************************************************************************
** Master data set
********************************************************************************

use ${save}/decedent_sample.dta, clear
drop fat_H092 hkownrhm hkdeed // hkownhm // replace these with variables from supplement

unique hhid pn
// -> 10,594 individuals (84,752 observations = 10,594 x 8 waves (waves 4-11))

merge 1:1 hhid pn w using ${save}/decedent_sample_supplement // add variables
xtset, clear
/*

VARIABLES CONTAINED IN THE SUPPLEMENT:

	NEW_HKOWNHM:     whether any kid owns a home
	NEW_HKOWNRHM:    whether any kid owns respondent's home
	FAT_H092:        whether respondent ever owned home
	NEW_HKDEED:      whether any kid receives a deed to the respondent's home
	OWNTOFREE:       respondent changes status from owning to rent-free or other
	                 without physically moving location
	KMOVE:           kid moves to respondent's residency
	RMOVE:           parent moves to kid's residency
	KALWYS:          kid always lives with parent
	RKMOVE:          both parent and kid move in order to live together
	DKMOVE:          direction of move is unknown
	MOVEHLPSK:       move helps kid
	MOVEHLPSR:       move helps respondent
	MOVEHLPSRK:      move helps both
	MOVEHLPSNEITHER: move helps neither
	MOVEHLPSDK:      not known who benefits from move           

*/

tab riwstat if _m==1 // "master only" means data is only available in data set sample and
                     // could not find a counterpart in data set sample_supplement. 
// -> it makes sense that merge is unsuccessful for riwstat~=1 since these respondents
//    are only part of the master data set
tab hchild if riwstat==1 & _m==1, m
// -> almost all of the remaining unsuccessful merges are explained by the fact
//    that respondents are childless or data on children is missing and so they
//    appear only in the master data set and not in the using data which is based
//    on respodents with children

// Bottom line: we can keep the unsuccessful merges
drop _m

/*
IWSTAT: Response and mortality status: 
A response is identified by code 1, a non-response by codes 0, 4-7 and 9.
1    response, R is alive 
0 no response, R does not belong to the current wave
4 no response, R is presumed to be alive
5 no response, R died between current and last interview
6 no response, R died previous wave
7 no response, R dropped from sample
9 no response, no such coding here
-> only kept Rs with IWSTAT=1,4,5,6     
*/
tab w riwstat
// -> Wave 4: alive and response, presumed to be alive and no response, or not part of survey yet 
// -> Wave 11: dead or dropped from survey at some point (don't know vital status) 
tab sample 
// SAMPLE: We have 4 samples:
//    sample=0: dropped at some point during waves 4-11 but have at least one 
//              core interview (those without any core IWs we tossed out before)
//    sample=1: main sample (exit IW available in waves 7-11)
//    sample=2: death in waves 7-11 with missing exit IW
//    sample=3: death in waves 5-6 without exit IW (by construction)  
// -> sample=1 is our main sample:  
//    * Rs with exit interviews in 2004-2012 (waves 7 to 11)
//    * alive records are from 1998-2010
//    * for LTC hours during core IWs, records are from 2002-2010
drop if sample==0
// -> we want to have a sample for which we know that respondents have died
//    during the observation period. 

unique hhid pn
// -> 9,534 individuals (76,272 observations = 9,534 x 8 waves (waves 4-11))

// xIW: wave in which exit interview takes place. 
// =1 if exit takes place in 7-11, =0 if exit takes place in 5-6, otherwise missing
tab xIW if (exitIW==0 & exit56==0 & riwstat==5), m
// -> if no exit in 7-11 or 5-6, what value does xIW have in first wave deceased? should be missing
replace xIW=-1 if exitIW==0 & exit56==0 & riwstat==5 
// -> set xIW=-1 when respondent has no exit IW (in waves 7-11 or 5-6) and it is respondent's first death 
//    record. (since there is no exit IW, there is exit data in this case.)

tab w xIW
/*
           |               xIW
         w |        -1          0          1 |     Total
-----------+---------------------------------+----------
         5 |        44      1,229          0 |     1,273 
         6 |        95      1,427          0 |     1,522 
         7 |        45          0      1,194 |     1,239 
         8 |        36          0      1,295 |     1,331 
         9 |         5          0      1,323 |     1,328 
        10 |       105          0      1,438 |     1,543 
        11 |       114          0      1,184 |     1,298 
-----------+---------------------------------+----------
     Total |       444      2,656      6,434 |     9,534 

*/	
// ->   444 decedents in waves 5-11, have no exit interview in waves 5-11	 
// -> 2,656 decedents in waves 5-11, have exit interview in waves 5-6
// -> 6,434 decedents in waves 5-11, have exit interview in waves 7-11
//   -------
// -> 9,543 total decedents in waves 5-11		 

tab xsample xIW
/*
                      |               xIW
              xsample |        -1          0          1 |     Total
----------------------+---------------------------------+----------
1. died 7-11, exit 7- |         0          0      6,406 |     6,406 
2. died 5-6 , exit 7- |         0          0         28 |        28 
3. died 5-6 , exit 5- |         0      2,656          0 |     2,656 
4. died 7-11, no exit |       305          0          0 |       305 
5. died 5-6 , no exit |       139          0          0 |       139 
----------------------+---------------------------------+----------
                Total |       444      2,656      6,434 |     9,534 
*/

tab sample xIW
/*
                      |               xIW
               sample |        -1          0          1 |     Total
----------------------+---------------------------------+----------
1. exit IW in 7-11 (d |         0          0      6,434 |     6,434 
2. no exit IW in 7-11 |       305          0          0 |       305 
3. no exit IW in 7-11 |       139      2,656          0 |     2,795 
----------------------+---------------------------------+----------
                Total |       444      2,656      6,434 |     9,534 
*/
// -> 6,434 respondents with an exit IW
// -> 305   respondents who died between waves 7-11 without an exit IW in waves 7-11
// -> 2,795 respondents who died between waves  5-6 without an exit IW in waves 7-11

tab sample if dead711==1 & xIW==1 
// -> 6,406 deaths during waves 7 to 11, exit IW waves 7 to 11
tab sample if dead56==1  & xIW==1 
// -> 28 deaths during waves 5 to 6, exit IW waves 7 to 11
tab riwstat if dead711==1 & xIW==1
// -> 6,179 respondents die between 2004 and 2012 and have an exit IW subsequently
// ->   227 respondents die between 2004 and 2012 and have a delayed exit IW   
tab riwstat if dead56==1  & xIW==1   
// ->    28 respondents die between 2000 and 2002 and have a delayed exit IW  

********************************************************************************
** Housekeeping
********************************************************************************
sort hhid pn w // must sort here 

gen age=ragey_b
replace age=A019 if xIW==1  // age including age at time of death
// -> have info for age on death only if an exit IW is available

gen caucasian=. // non-hispanic white
replace caucasian=1 if (raracem == 1 & rahispan==0)
replace caucasian=0 if (rahispan==1 | raracem==2 | raracem==3)


by hhid pn: gen iwnr=sum(riwstat) if riwstat==1
        // IW number when alive: 1=first IW, 2=second IW, etc.
by hhid pn: egen totnriw=max(iwnr)       
        // total number of core IWs 
tab totnriw if xIW~=., m
// -> every respondent has at least one core IW        
gen xnriw=1+totnriw-iwnr   // countdown of core IWs until exit IW
replace xnriw=0 if xIW==1  // exit IW is number 0 (if available)
// -> 7,6,5,...,0
replace xnriw=. if (exitIW~=1) // Suggestion: define this variable *only* when 
                               //             exit IW is available

replace iwdate = deathdate if (xIW==1)

gen iwgap = (iwdate - cpwiwdate)     // time (in days) between current and previous IW
                                     // or between death and previous IW
replace iwgap = (2)*(365) if !mi(iwdate) & mi(cpwiwdate)
                                     // if previous interview date is missing (e.g., as for a first interview),
																		 // assign two years (following questionnaire).  

gen xnhgap = (deathdate - NHdate)    // time (in days) of NH stay between death and NH entry  
																		 
by hhid pn: egen tottime = total(iwgap),m // total time (in days) covered by interviews, incl exit
by hhid pn: gen  cumtime = sum(iwgap)
gen timetodeath = tottime - cumtime       // time (in days) between death and any prior IW 
drop cumtime
by hhid pn: egen hasDeathDate = max(cond(xIW==1,~mi(deathdate),.))
replace timetodeath = . if (hasDeathDate~=1)      // only relevant if we know death date
replace timetodeath = . if ~(riwstat==1 | xIW==1) // only useful in waves with data

summ timetodeath, det
tabstat timetodeath, by(xnriw) s(n p50)

* span of time covered by interviews before death
* = iwgap (time since previous interview) + timetodeath (time from current iw until death)
gen ttdSincePrevIW = timetodeath + iwgap
replace ttdSincePrevIW = . if (hasDeathDate~=1)      // only relevant if we know death date
replace ttdSincePrevIW = . if ~(riwstat==1 | xIW==1) // only useful in waves with data

tabstat ttdSincePrevIW, by(xnriw) s(n min p25 p50 p75 p90)
// ttdSincePrevIW is timetodeath from the START of the period covered by the interview
// NOT from the time of the interview (which is what timetodeath measures).

gen tottime_years = floor(tottime/365)
// total time covered by interviews in years

********************************************************************************
** Respondent housing variables (CORE IWs)
********************************************************************************

/*
* H002: What type of dwelling do you live in? 
        1996-2000 (Section F): (1=mobile home) (2=one-family house) (3=two-family house)
                               (4=apartment)   (10=condo)           (11/12/13/97=other)
        2002-2004 (Section H): (1=mobile home) (2=one-family house) (3=two-family house)
                               (4=apartment)   (7=other)
        2006      (Section H): (1=mobile home) (2=one-family house) (3=two-family house)
                               (4=apartment)   (97=other)
        2008-2010 (Section H): (1=mobile home) (2=one-family house) (3=two-family house)
                               (4=apartment)   (10=condo)           (11/12/13/97=other)  
 
  ->  Not asked if R is currently in NH and reported that she does not 
      own/rent a home outside of the NH (A070=5).

H004: Do you rent/own your home?
      1996 - 2000 (Section F): (1=own) (2=rent)               (7=other) (8/9=DK)
      2002 - 2010 (Section H): (1=own) (2=rent) (3=rent-free) (7=other) (8/9=DK)

  ->  Not asked if R is currently in NH and reported that she does not 
      own/rent a home outside of the NH (A070=5).
  ->  Not asked  if H002=1 (mobile home) then questions on home ownership (H004) are skipped
      and revisited in H014 only for those living in a mobile home.
  ->  Not asked if H001=1 (farm or ranch) then questions on home ownership (H004) are skipped
      and revisited in H008 only for those living on a ranch or farm
*/

** Construct home ownership variable based on RAND flag variable HAFHOUS
gen rownhm = .
replace rownhm=1 if hafhous==1 // continuous report on primary housing value
replace rownhm=1 if hafhous==2 // complete bracket on primary housing value
replace rownhm=1 if hafhous==3 // incomplete bracket on primary housing value
replace rownhm=1 if hafhous==5 // no value/bracket on primary housing value
replace rownhm=0 if hafhous==6 // no primary housing asset (rent, rent-free, other)
replace rownhm=9 if hafhous==7 // don't know ownership 
replace rownhm=9 if hafhous==9 // no financial respondent

bys rnhmliv: tab rownhm
// -> small percentage of code=9 among CR, higher among NHR (good enough)
bys rnhmliv: tab rownhm if rownhm<9
// -> about 72% home ownership among CR and 26% among NHR

// So far we only know whether or not a respondent is a homeowner but not 
// the housing status when the respondent does not own a home.
* When rownhm==0 want to differentiate between rent, rent-free, or other  
replace rownhm=2 if rownhm==0 & fat_H004==2  // rent
replace rownhm=3 if rownhm==0 & fat_H004==3  // rent-free
replace rownhm=7 if rownhm==0 & fat_H004==7  // other
* differentiate if R lives in MOBILE home (H002=1)
replace rownhm=2 if rownhm==0 & fat_H002==1 & fat_H014==4  // rents both (site and home)
replace rownhm=2 if rownhm==0 & fat_H002==1 & fat_H014==2  // rents (only owns site)
replace rownhm=3 if rownhm==0 & fat_H002==1 & fat_H014==7  // rent-free (owns neither site nor home)
// -> H014=1 (owns both mobile home and site) and H014=3 (owns only home) should be in rownhm==1
* differentiate if R lives on FARM or RANCH (H001=1)
replace rownhm=2 if rownhm==0 & fat_H001==1 & fat_H008==3  // rent
replace rownhm=7 if rownhm==0 & fat_H001==1 & fat_H008==7  // other
* Check whether there are any rownhm==0 cases left 
tab rownhm 
// -> Still many 0-cases. 
bys rnhmliv: tab rownhm
// -> Almost all the 0-cases are among NHR. This is ok: they are NHR that neither 
//    rent nor own a home outside of the NH (fat_H070==5). Will treat them as a separate
//    category.
// -> For CR this is not ok (only few cases though). Set to code==9, have exhausted 
//    all relevant observations.
replace rownhm=9 if rownhm==0  & rnhmliv==0
replace rownhm=0 if rnhmliv==1 & (rownhm==3|rownhm==7)
// -> NHR are only asked whether they rent or own a primary residence outside
//    of the nursing facility and so we set the cases where we erroneously have
//    "rent-free" or "other" to zero. This is harmless since for NHR the most 
//    important information for us is whether or not they own a home.

label define rownhm 0 "0.NHR ~own/rent" 1 "1.own" 2 "2.rent" 3 "3.rent-free" ///
                    7 "7.other" 9 "9.dk own"
label values rownhm rownhm

bys rnhmliv: tab rownhm 
bys rnhmliv: tab rownhm if rownhm<9 [aw=rwtall]
// --> HAVE GOOD INDICATOR VARIABLE FOR HOME OWNERSHIP FOR CORE INTERVIEWS
//     THAT TAKES INTO ACCOUNT THE SPECIAL CASES 


// Binary variable for own, rent, rentfree, other, and dkown
gen own      = 0 if rownhm<9  // homeowner 
/*
gen rent     = 0 if rownhm<9  // renter 
gen rentfree = 0 if rownhm<9  // rent free 
gen other    = 0 if rownhm<9  // other form of residency (not NH!)
gen dkown    = 0 if rownhm<9  // don't know
*/

replace own     =1 if rownhm==1
/*
replace rent    =1 if rownhm==2
replace rentfree=1 if rownhm==3  
replace other   =1 if rownhm==7  
replace dkown   =1 if rownhm==9
*/

********************************************************************************
** Housing variables (EXIT IWs)
********************************************************************************
/*
T101: Did R still own home at time of death (conditional on prior ownership)? 
     (1=yes) (2=records incorrect) (5=no) (8/9=DK)
     (.=did not own home)
     -> "records incorrect" is a type of asset reconciliation question which the
        HRS does in order to correct for past entry mistakes
     Exit interview skips questions regarding home-ownership if code is 2,8,9     
T066: only available from 2004 on. If T101=2,5, proxy is asked whether R
      ever owned the home: (1=yes) (5=no) (8/9 DK)
A070: Did NHR still own or rent home outside of facility? 
      (1=yes R in NH) (2=yes R in hospice) (5=no)
      There is no way to distinguish between renting and owning! 
    -> Suggestion: use T101 also for NHR. The questionnaire does seem to ask
                   home-ownership at death of all proxies.
Z079: Preload variable. Did R own or rent in previous wave?
      (1=own) (2=rent) (3=nursing home) (7=neither own/rent) 
      (.=data from prior wave not available)
*/

tab T101 if xIW==1, m
// #3,269 owned home in final core IW 
// #110   have incorrect record             (T101=2)
// #425   did not own home anymore          (T101=5)
// #9     DK                                (T101=8)
// #6     RF                                (T101=9)
// #2,615 did not own home in final core IW (T101=.)

tab Z079 T101 if xIW==1,m
// T101 only asked if Z079==1

//  If T101=2 or 5, proxy is asked whether R ever owned the home: 
//  (1=yes) (5=no) (8/9 DK)
tab T066 if xIW==1 & T101==5 // according to the proxy, #349 have owned the home before instead of #425 
// -> #66 incorrect records in this category
tab T066 if xIW==1 & T101==2 // according to the proxy, #33 of the incorrect records
                             // have owned the home before
recode T101 (5=0) if xIW==1 & T066==1 // #349 have owned the home before
recode T101 (2=0) if xIW==1 & T066==1 // #33 have owned the home before
recode T101 (2=8) (5=8) if xIW==1     // #153 don't know, recode to DK
recode T101 (.=8) if xIW==1 & Z079==. // if data from prior wave is unavailable recode to DK 

ren Z079 pwrownhm // previous wave home ownership (from preload section exit IW)
label define pwrownhmlab  1 "own"   2 "rent"  3 "NH" 7 "other"  
label values pwrownhm pwrownhmlab
lab var pwrownhm "Z079: PREV WAVE R OWNS OR RENTS"

ren  T101 xrownhm // exit wave home ownership
label define xrownhmlab  0 "0.disposed"   1 "1.own" 8 "8.DK" 9 "9.RF"
label values xrownhm xrownhmlab
lab var xrownhm "T101: STILL OWNED MAIN RESIDENCE"
// -> disposed means that an individuals previously owned (according to the pre-load
//    question Z079=1 and confirmed by exit question T066=1) but no longer owns (T101 is
//    5=no or 2=records incorrect).

* Disposition of home between final interview and death
ren T102 dispHome 
label define dispHomelab 1 "1.gave away" 2 "2.sold" 3 "3.spouse" 6 "6.foreclosed" ///
                         7 "7.other" 8 "8.DK" 9 "9.RF"
label values dispHome dispHomelab

ren T104M1 dispHomeTo // disposition of home to ...
label define dispHomeTolab 1 "1.spouse" 2 "2.child" 3 "3.other rltv" 4 "4.friend" ///
                         6 "6.grchild" 7 "7.other"  8 "8.DK" 9 "9.RF"
label values dispHomeTo dispHomeTolab


ren T111 xfateHome // what happened to home after death
label define xfateHomelab  1 "1.spouse"  2 "2.sold"  3 "3.inherited"  4 "4.not yet disposed" ///
                           6 "6.foreclosed" 7 "7.other"  8 "8.DK"  9 "9.RF"
label values xfateHome xfateHomelab

ren T113M1 xinhrtHome // who inherited home
label define xinhrtHomelab  1 "1.spouse"  2 "2.child"  3 "3.other rltv"  4 "4.friend" ///
                            5 "5.charity" 6 "6.grchild" 7 "7.other" 8 "8.DK"  9 "9.RF"
label values xinhrtHome xinhrtHomelab

********************************************************************************
** Construct inter-vivos real estate transfer (CORE IWs)
********************************************************************************
* new_hkownrhm + fat_H092 (have filled in information forwards)
* new_hkdeed  (only exists once when it happened, that is, we have not filled information forwards)
* owntofree + hkresd (?) (respondent-level variable)
* fat_R077, records indicate inconsistency in home ownership, HRS asks why

recode hkresd (1/7=1) // co-residency with at least one kid
recode fat_X033 (3=1) (5=0) // HH moved since previous wave (1="yes", 3="yes", 5="no")

sort  hhid pn w
order hhid pn w hkresd new_hkownrhm fat_H092 new_hkdeed owntofree fat_R077 

// Identify histories where some "housing" event took place
by hhid pn: egen check_rhm =max(new_hkownrhm) // maximum is 1 and means "kid owns parent's home"
by hhid pn: egen check_H092=min(fat_H092)     // minimum is 1 and means "parent ever owned home"
by hhid pn: egen check_deed=max(new_hkdeed)   // maximum is 1 and means "gave deed to kid"
by hhid pn: egen check_R077=min(fat_R077)     // minimum is 1 and means "gave to someone"
by hhid pn: egen check_owfr=max(owntofree)    // maximum is 1 and means status changes from "own" to "other" or "rent-free"
by hhid pn: egen check_resd=max(hkresd)       // maximum is 1 and means co-residency takes place

* Combine into single variable (categories are not mutually exclusive)
gen trhome=0     // inter-vivos real estate transfer
replace trhome=1 if check_rhm==1  & check_H092==1 
replace trhome=2 if check_deed==1 & trhome==0 
replace trhome=3 if check_R077==1 & trhome==0
replace trhome=4 if check_owfr==1 & trhome==0
* Indicator at time of exit IW whether a inter-vivos real estate transfer took place 
gen ivtrhome=. 
replace ivtrhome=1 if trhome~=0 & xIW~=.
replace ivtrhome=0 if trhome==0 & xIW~=.						   
tab ivtrhome

* How to handle disposition of home to child between final IW and death?
tab ivtrhome if  inlist(dispHomeTo,2,6)
replace ivtrhome=1 if (ivtrhome~=1 & (dispHomeTo==2 | dispHomeTo==6))
// -> respondent gave home to child between final IW and death
//    adds 97 transfers to ivtrhome (transfers increase from 761 to 908)

* "best guess" for ivtrhome, excludes the own-to-free story (trhome==4)
gen ivtrhomeBG=. 
replace ivtrhomeBG=1 if inlist(trhome,1,2,3) & xIW~=.
replace ivtrhomeBG=0 if inlist(trhome,0,4)   & xIW~=.
replace ivtrhomeBG=1 if (ivtrhomeBG~=1 & (dispHomeTo==2 | dispHomeTo==6))
tab ivtrhome ivtrhomeBG if xIW~=.
// -> 67 cases differ

********************************************************************************
** Remaining demographics
********************************************************************************

sort hhid pn w
by hhid pn : carryforward rwtall  hcpl  hchild  atotb10  atoth10  rnhmliv  rcendiv  rlivsib, ///
                     gen(crwtall chcpl chchild catotb10 catoth10 crnhmliv crcendiv crlivsib) 
// bring forward information from core wave to exit wave 

***************************************
** Coupleness / singleness
***************************************

/* Coupleness status of R at time of death
B063 (Demographics): Marital status of R at time of death
1. married
3. separated
4. divorced
5. widowed
6. never married
7. other
X065_R (Preload-updated): Is R married or living with a partner? (same as A038 in Coverscreen)
1. married
3. partnered
6. other (widowed, never married, etc.)
A038 CURRENT COUPLENESS
1.  MARRIED
2.  REMARRIED
3.  PARTNERED (VOLUNTEERED)
4.  REPARTNERED (VOLUNTEERED)
6.  OTHER
Blank.  INAP (Inapplicable); Partial Interview; Data Missing
*/

lab define A038lab 1 "1.married" 2 "2.remarried" 3 "3.partnered" 4 "4.repartnered" ///
                   6 "6.other"
lab values A038 A038lab				   

label define B063lab  1 "1.married" 2 "2.annulled"      3 "3.separated" 4 "4.divorced"  ///
					  5 "5.widowed" 6 "6.never married" 7 "7.other"  
label values B063 B063lab

tab B063 X065_R

tab A103 // =2 spouse/partner is proxy to respondent

** define single using information from all three coupleness variables. 
** if singleness is contradicted by another variable, assume not single.
** if proxy respondent is spouse/partner (A103==2), assume not single.
** (definition applies to 2002-2012)
gen xsingle = .
replace xsingle = 1 if  (A038 == 6 | inlist(B063,4,5,6) | X065_R == 6)
replace xsingle = 0 if  (inlist(A038,1,2,3,4) | inlist(B063,1,3,7) | inlist(X065_R,1,3))
replace xsingle = 0 if  (A103 == 2)
tab xsingle if xIW==1,m

** adjust definition for 2000
replace xsingle = 0 if (w==5 & inlist(R597,1,3))
replace xsingle = 1 if (w==5 & R597==6)
tab w xsingle if xIW~=.,m

** coupleness
replace hcpl=1-xsingle if xIW==1 // fill in end-of-life couple status
                                     
tab xIW
tab xIW if chcpl==0
tab xIW if chcpl==1

* coupleness history

by hhid pn: egen everCpl=max(hcpl)      // whether R was ever couple;
by hhid pn: egen nriwCpl=total(hcpl),m  // and number of waves R was couple

***************************************
** Nursing home status
***************************************

** Lives in NH at time of death (NOTE: 1. NH, 2. Hospice, 5. No)
recode A028 (5=0)   
// Suggestion: hospice is more like hospital care and there may not be a choice.
//             Classify hospice as nursing home, only if R was NHR in prior wave.
replace A028=1 if A028==2 & crnhmliv==1  // recode hospice to NH if R
                                                 // was NHR in last IW
replace A028=0 if A028==2 & crnhmliv==0 & xnhgap<90  // recode hospice to community
                                                             // if R was not NHR in last wave and 
                                                             // hospice stay was short
replace A028=1 if A028==2   // set the remaining hospice cases to NH
rename A028 xrnhmliv

replace xnhgap = 0 if (xrnhmliv==0)     // set NH gap to zero for community residence
replace rnhmliv = xrnhmliv if (xIW==1)  // fill in end-of-life NH status 

** Days since moved into nursing home
gen nhmday = .
replace nhmday = rnhmday if (riwstat==1)
replace nhmday = xnhgap if (xIW==1) & (rnhmliv==1) & (xnhgap <= iwgap) & !mi(xnhgap,iwgap)
replace nhmday =  iwgap if (xIW==1) & (rnhmliv==1) & (xnhgap >  iwgap) & !mi(xnhgap,iwgap)
                                     // at exit, these are days between NH entry and death
                                     // if days exceed iw gap, use iw gap (as is done by RAND)

** Days since moved into nursing home (unconditional, i.e., with zeros)
gen nhmday0s = nhmday
replace nhmday0s = 0 if (rnhmliv==0)																		 
					
** Spent any nights in NH since prev interview
gen nrshom = .                     
replace nrshom = rnrshom if (riwstat==1)
recode N114 (1=1) (5=0) (8=.d) (9=.r)
replace N114 = 1 if (xrnhmliv==1) // -> had NH stay if live in NH
replace nrshom = N114 if (xIW==1) 
rename N114 xrnrshom

***************************************
** Children, childlessness
***************************************

replace hchild = A101 if xIW==1 // fill in end-of-life child count
recode hchild (1/max=1), gen(Ihchild)

tab A103 if A101==0  // proxy respondent relationship: few cases with child-proxy among childless
gen childless=0 if xIW==1
replace childless=1 if A101==0 & A103>9 // childless: no children and no child-proxies at time of death

// -> carry information backward through decedent's history, call variable "nochild"
by hhid pn: egen nochild = max(childless)

gen haschild = 1 - nochild

tab childless if xIW==1  // childless records (excludes grandchildren)

tab chchild nochild if xIW==1,m
// compare # children in most recent core IW with exit IW child status
// -> a small number of cases of conflicting child statuses 

********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************

********************************************************************************
** Subsample: Widow(er)s/singles at time of death (including childless)
********************************************************************************
sort hhid pn w

unique hhid pn
// -> 9,534 individuals (76,272 observations = 9,534 x 8 waves (waves 4-11))

by hhid pn: egen singleExitIW = max(xIW==1 & xsingle==1)
// -> identifies records of single decedents with exit interview 7-11

//by hhid pn: egen singleExit56 = max(xIW==0 & xsingle==1)
// -> identifies records of single decedents with exit interview 5-6


** NOTE: The final sample in all of the analyses will be single decedents who
** died in waves 5-11 and had exit interviews in waves 7-11. For the purposes
** of having a slightly larger imputation sample for the helper hours of NH
** residents, we also retain decedents in waves 5-6 that did not have exit IWs
** in waves 7-11. Dropping these extra observations at this point does not 
** change the results in meaningful ways. We drop them below, so they do not
** appear in any regression analyses. Dropping them at this point impacts the
** results slightly by affecting the imputations: the imputation sample is
** smaller and the number of random draws changes (with the change in sample
** size). In general, these impacts are small. 

* Select sample:
keep if (singleExitIW==1 | sample==3)
* sample==3: wave 5-6 decedents without exit interviews in waves 7-11

* turn on log
cap log close
log using ${logs}/TABA2a_SAMPLE_COUNTS_2.txt, text replace	

* Count single decedents with exit iws in waves 7-11 who comprise main sample
unique hhid pn if (singleExitIW==1)
// -> 3,543 unique single decedents, died waves 5-11, with exit interviews 7-11
//    (TABA2a: Line 5 - single at the time of death)
count if ((riwstat==1 | xIW==1) & (singleExitIW==1))
// -> 19,679 observations

* pause logging
log off

tab xsample if (w==11) & (singleExitIW==1)
/*
                   xsample |      Freq.     Percent        Cum.
---------------------------+-----------------------------------
1. died 7-11, exit 7-11    |      3,530       99.63       99.63
2. died 5-6 , exit 7-11    |         13        0.37      100.00
---------------------------+-----------------------------------
                     Total |      3,543      100.00
*/

* Overall sample counts
unique hhid pn
// -> 6,338
count if (riwstat==1 | xIW<.)
// -> 26,601

********************************************************************************
** Additional sample selection
********************************************************************************

**  Here we are dropping low quality observations so that they do not 
**  enter the descriptive statistics or affect the imputations.

sort hhid pn w

tab xrownhm if xIW==1
by hhid pn: egen badinfoHome = max(xrownhm==8 | xrownhm==9)
// Suggestion: get rid of records when proxy doesn't know the status on the home

tab xfateHome if xIW==1 
by hhid pn: egen badinfofateHome = max(xfateHome==1 | xfateHome==8| xfateHome==9)
// Suggestion: get rid of records when the spouse was a beneficiary of the home,
//             don't know, or refused

tab xinhrtHome if xIW==1 
by hhid pn: egen badinfoinhrtHome = max(xinhrtHome==1 | xinhrtHome==8| xinhrtHome==9)
// Suggestion: get rid of records when the spouse inherited home,
//             don't know, refused.

by hhid pn: egen badweight = max(xIW==1 & crwtall==.)
// Identifies observations with missing weights for exit interview

by hhid pn: egen badproxy = max(xIW==1 & (inlist(T155,8,9,.) | inlist(T156,8,9,.)))
// Identifies proxies that did not know whether respondent had either a will or trust or both
// or for whom this information is missing.

tab iwgap if (xIW==1 & iwgap<0)
// Suggestion: get rid of records with ridiculous negative values here
by hhid pn: egen badinfoxiwgap = max(xIW==1 & iwgap<0)

***************************************
** Sample selection & counts:
***************************************

* resume logging
log on

count if xIW==1 // # 3,543 exit IWs
count if (xIW==1 | riwstat==1) & (exitIW==1) // 19,679
// TABA2a: Line 5

drop if (badinfoHome==1)
count if xIW==1 // # 3,476 exit IWs
count if (xIW==1 | riwstat==1) & (exitIW==1) // 19,326
// TABA2a: Line 6

drop if (badinfofateHome==1)
count if xIW==1 // # 3,451 exit IWs
count if (xIW==1 | riwstat==1) & (exitIW==1) //  19,190

drop if (badinfoinhrtHome==1)
count if xIW==1 // # 3,434 exit IWs
count if (xIW==1 | riwstat==1) & (exitIW==1) // 19,092

drop if (badweight==1)
count if xIW==1 // # 3,434 exit IWs
count if (xIW==1 | riwstat==1) & (exitIW==1) // 19,092
// TABA2a: Line 7

drop if (badproxy==1)
count if xIW==1 // # 3,265 exit IWs
count if (xIW==1 | riwstat==1) & (exitIW==1) // 18,168
// TABA2a: Line 8

drop if (badinfoxiwgap==1)
count if xIW==1 // # 3,227 exit IWs
count if (xIW==1 | riwstat==1) & (exitIW==1) // 17,974
// TABA2a: Line 9

drop badinfo* badproxy badweight

***************************************
** Sample counts:
***************************************

unique hhid pn
// -> 5,719 individuals (45,752 observations = 5,719 x 8 waves (waves 4-11))
// -> NOTE that the sample still includes some observations that we are not going
//    to be using. Dropping these here affects the random draws used in the imputations,
//    so we keep them for now and discard them below (to be consistent with our earlier results).

** Decedents

unique hhid pn if (exitIW==1)
// -> 3,227 individuals who died in waves 5-11 and have exit interviews in waves 7-11
// -> This is our decedent sample.

count if (exitIW==1) & (riwstat==1 | xIW==1)
// -> 17,974
//    The total number of interviews in our decedent sample

** Parents

unique hhid pn if (exitIW==1) & (nochild==0)
// -> 2,869

count if (exitIW==1) & (riwstat==1 | xIW==1) & (nochild==0)
// -> 16,049

** Parents: # interview given while single (HACC validation sample)

unique hhid pn if (exitIW==1) & (nochild==0)
// -> 2,869

count if (exitIW==1) & (riwstat==1 | xIW==1) & (hcpl==0) & (nochild==0)
// -> 13,400
//    The total number of interviews given by parents while single (incl core + exit)

** Childless

unique hhid pn if (exitIW==1) & (nochild==1)
// -> 358

count if (exitIW==1) & (riwstat==1 | xIW==1) & (nochild==1)
// -> 1,925

* close log
cap log close

********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************

********************************************************************************
** Estate
********************************************************************************

gen     EOLplan=0 if xIW==1  // mark observations with written end-of-life plan
replace EOLplan=1 if T155==1 | T156==1   // have a will and/or trust
lab def EOLplan ///
             0 "0. neither will nor trust" ///
             1 "1. will or trust" ///
			 
lab values EOLplan EOLplan
lab var EOLplan "INDICATOR OF WILL OR TRUST, T155 (TRUST), T156 (WILL) "

** Distribution of Assets:

// This is an important branchpoint in the survey. If R had a trust or the will
// had been probated then T161 is not asked. EVERYONE ELSE is asked
// T161 which aims to find out what happened to the decedent's belongings 
// irrespectively of whether or not there is a written end-of-life plan:
//  "Have R's assets and possessions been given to heirs, not yet been distributed,
//   was there nothing much of value, or what?"
// -> The assumption here must be that the estate is already
//    distributed 

// If R neither had a trust nor a probated will:
// What happened to possessions (excluding life insurance)?
tab T161 if ~(T155==1 | T157==1) 

gen     distAsset=0 if xIW==1                          // nothing of value
replace distAsset=1 if  (T155==1 | T157==1)            // will has been probated and/or R had trust
replace distAsset=2 if ~(T155==1 | T157==1) & T161==1  // estate already distributed
replace distAsset=3 if ~(T155==1 | T157==1) & T161==2  // estate not yet distributed
replace distAsset=7 if ~(T155==1 | T157==1) & T161==7  // other
replace distAsset=8 if ~(T155==1 | T157==1) & inlist(T161,8,9) // DK/RF

lab def distlab ///
             0 "0. nothing left" ///
             1 "1. probated will or trust" ///
             2 "2. already distributed"   ///
			 3 "3. not yet distributed" ///
			 7 "7. other" ///
			 8 "8. DK/RF" 
			 
lab values distAsset distlab
lab var distAsset "COMBINES INFO FROM T161, T155 (TRUST), T157 (PROBATED WILL)"

tab distAsset

* Indicator variable for any inheritance (excluding life insurance)
recode distAsset (0=0) (1/7=1) (8=.d), gen(anyBeq)
// -> different from EOLplan (tells us about Will and/or Trust) since also includes cases
//    without written end-of-life plan
lab def anyBeq ///
             0 "0.  nothing left behind" ///
             1 "1.  distAsset=(1,2,3,7)"  		 
lab values anyBeq anyBeq
lab var anyBeq "SQUEEZES DISTASSET INTO A BINARY VARIABLE"


tab anyBeq  // left something of value
tab anyBeq  [aw=crwtall] // left something of value

tab EOLplan anyBeq  [aw=crwtall], row
// -> % of decedents without written end-of-life plan left something behind
//    % of decedents with written end-of-life plan left someting behind

gen xstatus=. if xIW==1
replace xstatus=0 if anyBeq==0 & EOLplan==0 // no plan and nothing left behind
replace xstatus=1 if anyBeq==1 & EOLplan==0 // no plan and something left behind
replace xstatus=2 if anyBeq==0 & EOLplan==1 // plan and nothing left nehind
replace xstatus=3 if anyBeq==1 & EOLplan==1 // plan and something left behind
// -> combines info about planning and actually leaving something behind
sort hhid pn w
bysort hhid pn: mipolate xstatus w, gen(beqstatus) backward 
// -> assign info to R's history
drop xstatus

lab def beqstatus ///
             0 "0. no EOL plan, no estate" ///
             1 "1. no EOL plan, estate" ///
			       2 "2. EOL plan, no estate" ///		
			       3 "3. EOL plan, estate" 
			 
lab values beqstatus beqstatus
lab var beqstatus "COMBINES INFO FROM ANYBEQ AND EOLPLAN"

tab beqstatus if xIW==1
tab beqstatus if xIW==1 [aw=crwtall]

********************************************************************************
** Total estate values
*******************************************************************************

egen Z = rownonmiss(T173 T174 T175 T176) if xIW==1
// -> if Z=0 then all values are missing
recode Z (1/max=1)
tab distAsset Z
// -> distAsset = {0,3,8} should all have missing values by HRS construction.
// Set those to missing that have erroneous non-missing values 
replace T173 = .z if inlist(distAsset,0,3,8) & T173!=. 
replace T174 = .z if inlist(distAsset,0,3,8) & T174!=.
replace T175 = .z if inlist(distAsset,0,3,8) & T175!=.
replace T176 = .z if inlist(distAsset,0,3,8) & T176!=.
replace T242 = .z if inlist(distAsset,0,3,8) & T242!=. 
// -> and also for whether home included in estate

replace T173 = .d if inlist(T173, 9999998, 9999999,  99999998, 99999999, 999999998, 999999999, 9999999998, 9999999999)
// -> DK and RF, set to missing 

gen estVal10    = T173 / deflator10  // convert into 2010 dollars
gen ihsEstVal10 = log( estVal10 + sqrt( 1 + (estVal10)^2 ) )
// -> inverse hyperbolic sine function: similar to taking the logarithm but 
//    accomodates non-positive values  
            
gen valf = . 
replace valf = 0 if xIW==1 & distAsset==0  // nothing of value left behind
replace valf = 1 if xIW==1 & ~missing(T173) // continuous report
replace valf = 2 if xIW==1 &  missing(T173) & ~missing(T174) & T175 <99999996 & ~inlist(T176,97,98,99) // bracket
replace valf = 3 if xIW==1 &  missing(T173) & ~missing(T174) & T175==99999996 & ~inlist(T176,97,98,99) // amount exceeds upper bracket
replace valf = 4 if xIW==1 &  missing(T173) & ~missing(T174) & T175 <99999996 &  inlist(T176,97,98,99) // upper bracket but no lower bracket
replace valf = 5 if xIW==1 &  missing(T173) & ~missing(T174) & T175==99999996 &  inlist(T176,97,98,99) // lower bracket but no upper bracket
replace valf = 6 if xIW==1 &  missing(T173) &  missing(T174) &  missing(T175) & inlist(distAsset,1,2,3,7) // all values are missing but distAsset says estate exists
replace valf = 7 if xIW==1 &  missing(T173) &  missing(T174) &  missing(T175) & inlist(distAsset,8) // all values are missing and distAsset says DK/RF whether estate exists

lab def VALF ///
 0 "0. no asset" ///
 1 "1. continuous report" ///
 2 "2. complete brackets, closed" ///
 3 "3. complete brackets, top bracket" ///
 4 "4. incomplete brackets, closed" ///
 5 "5. incomplete brackets, open top bracket" ///
 6 "6. no bracket info" ///
 7 "7. dk ownership"

lab val valf VALF

tab valf

tab distAsset valf, row
tab beqstatus valf, row

drop Z

** Estate distribution when using only continuous reports:
xtile quantile=estVal10 if valf==1 [aw=crwtall], n(5)
latabstat estVal10 if xIW==1 [aw=crwtall], by(quantile) ///
stat(n mean p10 p25 p50 p75 p90 p95 p99)  
drop quantile

********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************

********************************************************************************
** Pareto / Power Law
********************************************************************************

********************************************************************************
** Finding best (alpha, xmin)
********************************************************************************

* Strategy:
* Loop over each possible xmin in the data, and for each:
*   Compute alpha using the MLE estimator
*   Compute D = max(x>=xmin) |S(x) - P(x)|
*     where S(x) is the empirical CDF and P(x) is the Pareto CDF given alpha, xmin
*   If new D lower than old D, store (D, xmin, alpha)

/*
Reference:
Clauset, A.; Shalizi, C. R.; Newman, M. E. J. (2009). 
"Power-Law Distributions in Empirical Data". 
SIAM Review. 51 (4): 661–703.
*/

if (`RECOMPUTE_PARETO') {

cap drop XMIN 			// -> keep track of each (xmin, alpha, D)
cap drop ALPHA
cap drop SE
cap drop KSD

gen XMIN=.
gen ALPHA=.
gen SE=.
gen KSD=.

loc DMIN = 999999 		// -> assign bogus starting values
loc BEST_XMIN = 99999
loc BEST_ALPHA = 99999
loc BEST_SE = 99999

//input
loc X 	estVal10

qui tab `X' if inrange(`X', 1e5, 1.5e6), matrow(XVALUES)
// -> set some arbitrary boundaries here: the best xmin won't lie outside of 
//    this range

forvalues j = 1(1)`=rowsof(XVALUES)' {
  
  local xmin = XVALUES[`j',1]
  //di "`j'. xmin = `xmin'"
  
  qui replace XMIN = `xmin' in `j'

  qui gen x = `X'
  
  qui replace x = . if x < `xmin'
  cumul x, gen(cdfx)
  qui gen antix = 1 - cdfx
  qui gen logantix = log(antix)
  qui gen logx = log(x)
  qui count if !missing(x)
  local N = r(N)  
  
  qui gen A = log(x/`xmin')
  qui summ A, det
  local N = r(N)
  local ahat = 1 + (1/r(mean)) 			// -> MLE estimator for alpha
  local ase = (`ahat' - 1) / sqrt(`N') 	// -> SE

  qui replace ALPHA = `ahat' in `j'
  qui replace SE = `ase' in `j'
  //di "alpha = `ahat'. se(alpha) = `ase'. N = `N'."
  
  qui gen pareto_anticdf = (x / `xmin') ^ (-`ahat' + 1)
  qui gen pareto_cdf = 1 - pareto_anticdf
  
  qui gen diff = abs(cdfx - pareto_cdf) 	// empirical - fitted CDF
  qui summ diff, det
  qui replace KSD = r(max) in `j' 			// max difference in CDFs
  //di "D = " r(max)
  
  if (r(max) < `DMIN') {
    local DMIN = r(max)
	local BEST_XMIN = `xmin'
	local BEST_ALPHA = `ahat'
	local BEST_SE = `ase'
  }
 
   foreach var in x cdfx antix logx logantix A pareto_* diff {
    cap drop `var'
  }
  // -> clean up variables
 
}

* turn on log
cap log close
log using ${logs}/POWER_LAW_RESULTS.txt, text replace

di "BEST RESULTS: XMIN = `BEST_XMIN'. ALPHA = `BEST_ALPHA'. DMIN = `DMIN'"

/*
BEST RESULTS: XMIN = 449184.5. ALPHA = 2.446384344956527. DMIN = .0478459000587463.
*/

* close log
cap log close

******************
** Graph results
******************

gr tw (sc KSD XMIN), xline(`BEST_XMIN', lc(cyan) lp(dash) lw(*2)) ///
	xti("Xmin") yti("D")
graph2tex, epsfile(${figs}/FIGI1b)

gen ALPHA_CI_UPP = ALPHA + 1.96*SE
gen ALPHA_CI_LOW = ALPHA - 1.96*SE

gr tw (sc ALPHA XMIN) ///
	  (line ALPHA_CI_UPP XMIN, sort lp(dash) lc(gs8)) ///
	  (line ALPHA_CI_LOW XMIN, sort lp(dash) lc(gs8)) ///
	  , xline(`BEST_XMIN', lc(cyan) lp(dash) lw(*2)) ///
	xti("Xmin") yti("Alpha") legend(off) //legend(order(1 "Data" 2 "95% CI"))
graph2tex, epsfile(${figs}/FIGI1c)

// -> to see how ALPHA & KSD and XMIN vary.
//    the dashed cyan line is the best XMIN cutoff

drop KSD ALPHA XMIN

} // end if(RECOMPUTE_PARETO)

********************************************************************************
** Power law -> Graph Best Results
********************************************************************************

*** FIGURE I.1(a) ***

if (`GRAPH_PARETO') {

if (`RECOMPUTE_PARETO') {

  local xmin = `BEST_XMIN'
  local alpha = `BEST_ALPHA'

} 
else {

  local xmin = 449184.5
  local alpha = 2.446384344956527
  // -> input from above  

}
  
local graphmin = 1e0
// -> where do we want graph to begin?

qui gen x = estVal10
replace x = . if x < `graphmin'
cumul x, gen(cdfx)
gen antix = 1 - cdfx
gen logantix = log(antix)
gen logx = log(x)

qui gen pareto = log( (x / `xmin') ^ (-`alpha' + 1) ) if x>= `xmin'

summ logantix if x>=`xmin'
qui gen pareto_shift = pareto - (0-r(max))
// -> fix alignment: want pareto and log(1-F(x)) to start at same point

gr tw (sc logantix     logx, ms(Oh)) ///
	  (sc pareto_shift logx, connect(d) ms(none) lc(red) lp(dash) lw(*2)) ///
	  , xti("ln(x)") yti("ln(Pr(X >= x))") ///
	  legend(order(1 "Data" 2 "Pareto") rows(1)) ///
	  xline(`=log(`xmin')', lc(cyan) lw(*2) lp(dash)) ///
	  text(-11 `=log(`xmin')' "ln(Xmin)", size(*1))


graph2tex, eps(${figs}/FIGI1a)

foreach var in x cdfx antix logx logantix A pareto* diff {
  cap drop `var'
} 
// -> cleanup variables

}

cap graph drop _all

********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************

********************************************************************************
** Imputation
********************************************************************************
/*

NOTES:

We do the imputation for singles only. Estate may mean different things for couples in which
a member survives and leaves a large part of the estate to their spouse. This could mean
that estates are reported differently between these groups. Furthermore, end-of-life expenses,
and hence the rate of dissaving at the end of life, could differ between married and single 
individuals.

For individuals whose home was known to not be included in the estate value (T242==5) but whose
home was inherited or given away prior to death, we add the value of the home (from the most
recent core interview) to the estate value. In cases where it was not known whether a home
was included in the value of the estate, we do nothing, implicitly assuming that the estate
value already includes the home (or that it was not included and should not have been included).

STRATEGY:

// home included correction for continuous reports

// ownership imputation -> sample: 0-6 -> impute: 7
//		-> logistic regression

// bracket imputation   -> sample: 1-3 -> impute: 4-7
//		-> ordered logistic regression

// amount imputation    -> sample: 1   -> impute: 2-7
//		-> closed brackets: nearest neighbor match
//		-> top bracket:     hot deck

// home included correction for bracketed reports

*/
********************************************************************************
** Problematic homeowner cases
********************************************************************************
// T242 indicates whether the estate value (amount or brackets) include the value
// of the primary residence. The question is asked to individuals who own a home
// at preload (Z079==1) and who enter the estate value sequence (T173-T176).
// In some cases where the home was not included (T242==5), we believe the home
// should have been included: when the home was inherited (xfateHome==3) or if
// the home was given away before death (dispHome==1). In the interest of not
// double counting, we do not add homes if we have already
// recorded an intervivos home transfer (ivtrhome==1) for the respondent. If the proxy
// did not know whether the estate included the home (T242==8 | T242==9), we make
// no correction.

// The second issue is when the home value correction should be made: before or
// after the imputation sequence. The ideal would be to make the correction first
// to get the donor distribution (for the nearest neighbor match) as close to 
// correct as possible. However, the question of how to add the home values to
// bracketed estate values is tricky to handle in the context of our current
// imputation procedure. We elect therefore to make the correction in the two pieces.
// First, prior to imputation, we make the correction to cases with continuously
// reported estate values (valf==1). Second, after the imputation, we make the correction
// to bracket cases (inlist(valf,2,3,4,5)).

replace estVal10 = estVal10 + catoth10 if ///
	(T242==5 & ivtrhome==0 & (dispHome==1 | xfateHome==3) & valf==1)
// -> NOTE: we retrieve the home value from the most recent interview (variable: h*atoth)
//    instead of using the home value from the exit interview data.
// -> NOTE: treat ivtrhome separately.

gen addedHome = 0 if xIW==1
replace addedHome = 1 if ///
	(T242==5 & ivtrhome==0 & (dispHome==1 | xfateHome==3) & valf==1)
	
replace T242 = 1 if ///
	(T242==5 & ivtrhome==0 & (dispHome==1 | xfateHome==3) & valf==1)
// -> update indicator for whether home is included in estate

********************************************************************************
** Brackets for imputation
********************************************************************************

// brackets are given by [0,9999]          [10000,10000]      [10001,24999]   [25000, 25000] 
//                       [25001,99999]     [100000, 100000]   [100001,499999] [500000, 500000] 
//                       [500001, 1999999] [2000000, 2000000] [2000001, max]
// brackets like [x, x] are "about" values

recode T173 (      0/   9999=      0) ///
	 		(  10000        =  10000) /// -> retain "about" values
	 		(  10001/  24999=  10001) ///
	 		(  25000        =  25000) ///
	 		(  25001/  99999=  25001) ///
	 		( 100000        = 100000) ///
	 		( 100001/ 499999= 100001) ///
	 		( 500000        = 500000) ///
	 		( 500001/1999999= 500001) ///
	 		(2000000        =2000000) ///
	 		(2000001/    max=2000001) ///
	 		, gen(valCat)
// -> categorical variable with name equal to the lower bound of a bracket

replace valCat = T174 if ~missing(T174) & ~missing(T175) & ~inlist(T176,97,98,99)
// -> for complete, imputable brackets, assign lower bound of bracket to category name
tab valCat valf

********************************************************************************
** Additional variable definitions for imputation models
********************************************************************************

sort hhid pn w

** wealth at previous waves
gen IHScatotb10 = log( catotb10 + sqrt( 1 + (catotb10)^2 ) ) 	
// -> most recent core interview

** preload housing
recode pwrownhm (1=1) (2 3 7=0), gen(preloadOwnHome)  
// = 1 if owned home, = 0 otherwise

** demographics
gen female = (ragender==2)
gen nonwhite = inlist(raracem,2,3) | rahispan==1 if !missing(raracem,rahispan)
recode raeduc (2/3=2), gen(educ) // -> GED = HS
qui tab educ, gen(educ)
lab var educ1 "Educ: less HS"
lab var educ2 "Educ: HS/GED"
lab var educ3 "Educ: some college"
lab var educ4 "Educ: college+"

** bequest intentions
by hhid pn: carryforward rbeq10k rbeq100, gen(crbeq10k crbeq100)

** medicaid
ren N005 xrgovmd
recode xrgovmd (1=1) (5=0) (8=.d) (9=.r)
replace xrgovmd = rgovmd if (xIW~=1) // -> fill in core values
by hhid pn: carryforward xrgovmd, gen(cxrgovmd)

********************************************************************************
** Imputation setup: set seed and sort
********************************************************************************

set seed 132547698
sort hhid pn w

********************************************************************************
** Define covariates 
********************************************************************************

global COVARIATES ///
  IHScatotb10 female educ2 educ3 educ4 age c.age#c.age nonwhite ///
  preloadOwnHome cxrgovmd crbeq10k crbeq100 
  
est clear
	
********************************************************************************
** Ownership imputation
********************************************************************************
// ownership imputation -> sample: 0-6 -> impute: 7
//		-> logistic regression

eststo: logit anyBeq $COVARIATES
// -> regress bequest dummy on covariates
predict pHat if (anyBeq==.d), pr
// -> impute probability when ownership is unknown
sort hhid pn w  					
// -> important to sort before call to runiform()
gen u = runiform()
// -> draw a random number from the uniform distribution on [0,1]
replace anyBeq = cond(u<=pHat,1,0) if (anyBeq==.d & ~missing(pHat))
// -> if phat excees the random draw, set ownership to yes

// -> check: how many missing values (.d) remain
tab anyBeq if xIW==1,m

drop u pHat

// --> ownership imputation done.

********************************************************************************
** Complete bracket imputation
********************************************************************************
// bracket imputation   -> sample: 1-3 -> impute: 4-7
//		-> ordered logistic regression

eststo: ologit valCat $COVARIATES
// -> ordered logit model based on those who provide a complete bracket

// fitted values
forvalues n = 1(1)11 {
  predict pHat`n' if xIW==1 & anyBeq==1 & missing(valCat), outcome(#`n') pr
}

// adjust probability for compatibility with reported incomplete brackets
// using information from lower brackets

replace pHat1  = 0 if ~missing(pHat1)  & T174 < . & T174 > 0
replace pHat2  = 0 if ~missing(pHat2)  & T174 < . & T174 > 10000
replace pHat3  = 0 if ~missing(pHat3)  & T174 < . & T174 > 10001
replace pHat4  = 0 if ~missing(pHat4)  & T174 < . & T174 > 25000
replace pHat5  = 0 if ~missing(pHat5)  & T174 < . & T174 > 25001
replace pHat6  = 0 if ~missing(pHat6)  & T174 < . & T174 > 100000
replace pHat7  = 0 if ~missing(pHat7)  & T174 < . & T174 > 100001
replace pHat8  = 0 if ~missing(pHat8)  & T174 < . & T174 > 500000
replace pHat9  = 0 if ~missing(pHat9)  & T174 < . & T174 > 500001
replace pHat10 = 0 if ~missing(pHat10) & T174 < . & T174 > 2000000
replace pHat11 = 0 if ~missing(pHat11) & T174 < . & T174 > 2000001 	// cannot happen

// using information from upper brackets

replace pHat1  = 0 if ~missing(pHat1)  & T175 < 9999   	// cannot happen
replace pHat2  = 0 if ~missing(pHat2)  & T175 < 10000
replace pHat3  = 0 if ~missing(pHat3)  & T175 < 24999
replace pHat4  = 0 if ~missing(pHat4)  & T175 < 25000
replace pHat5  = 0 if ~missing(pHat5)  & T175 < 99999
replace pHat6  = 0 if ~missing(pHat6)  & T175 < 100000
replace pHat7  = 0 if ~missing(pHat7)  & T175 < 499999
replace pHat8  = 0 if ~missing(pHat8)  & T175 < 500000
replace pHat9  = 0 if ~missing(pHat9)  & T175 < 1999999
replace pHat10 = 0 if ~missing(pHat10) & T175 < 2000000
replace pHat11 = 0 if ~missing(pHat11) & T175 < 99999996

egen pHatSum = rowtotal(pHat1  pHat2 pHat3 pHat4 pHat5  ///
                        pHat6  pHat7 pHat8 pHat9 pHat10 ///
						pHat11), missing
summ pHatSum, det

forvalues n = 1(1)11 {
  replace pHat`n' = pHat`n' / pHatSum
}

// cumulative probabilities

gen cHat1 = pHat1
forvalues n=2(1)11 {
  gen cHat`n' = cHat`=`n'-1' + pHat`n'
}

// -> check: should be equal to 1
summ cHat11, det

// impute brackets

sort hhid pn w
gen u = runiform()

replace valCat = cond(u <= cHat1,  0, ///
				 cond(u <= cHat2,  10000, ///
				 cond(u <= cHat3,  10001, ///				  			  
				 cond(u <= cHat4,  25000, ///
				 cond(u <= cHat5,  25001, ///
				 cond(u <= cHat6,  100000, ///
				 cond(u <= cHat7,  100001, ///
				 cond(u <= cHat8,  500000, ///
				 cond(u <= cHat9,  500001, ///
				 cond(u <= cHat10, 2000000, ///
				 				   2000001)))))))))) ///
						if xIW==1 & anyBeq==1 & missing(valCat) & pHatSum<.

// -> check: how many decedents that left something have missing bracket
count if xIW==1 & anyBeq==1 & missing(valCat)

tab valCat valf, col nof

drop u pHat* cHat*

// --> imputing brackets done.

********************************************************************************
** Filling in the "about" values
********************************************************************************

foreach v in 10000 25000 100000 500000 2000000 {
  replace estVal10 = `v' / deflator10 if xIW==1 & anyBeq==1 & valCat==`v' & missing(estVal10)
}

********************************************************************************
** Amount imputation for closed brackets
********************************************************************************
// amount imputation    -> sample: 1   -> impute: 2-7
//		-> closed brackets: nearest neighbor match

eststo: reg ihsEstVal10 $COVARIATES
// -> regress the inverse hyperbolic sine of estate values on the covariates
predict XB if inlist(valCat,0,10001,25001,100001,500001), xb
// -> get fitted values those in complete brackets, except for "about" values and
//    topmost bracket (valCat == 2000001), which is imputed separately (below)

** For each observation, find the closest fitted value reported within the bracket,
** and assign the continuous vale of that nearest neighbor.

gen XBdonor = XB if ~missing(ihsEstVal10)
// -> donor data

// for breaking ties
sort hhid pn w  		// -> important to sort before call to runiform()
gen U = runiform()
gen Unegative = -U
// -> alternatively, could have added very small error to XB

// In each bracket, find closest donor from above in terms of fitted values:
sort valCat XB U
// -> sort valCat from low to high; within each valCat, sort XB from low to high
by valCat: carryforward XBdonor estVal10, gen(XBAbove estValAbove)
gen dAbove = abs(XB - XBAbove)
// -> distance to closest donor from above

// In each bracket, find closest donor from below in terms of fitted values:
gen XBnegative = -XB
sort valCat XBnegative Unegative
by valCat: carryforward XBdonor estVal10, gen(XBBelow estValBelow)
gen dBelow = abs(XB - XBBelow)
// -> distance to closest donor from below

// -> check: how many missing values with non-missing bracket (below top bracket) have no donor
count if xIW==1 & valCat<2000001 & missing(estVal10) & missing(dAbove) & missing(dBelow)

// Assign the continuous report of the nearest neighbor: 
replace estVal10 = cond(dAbove <= dBelow, estValAbove, estValBelow) ///
					if xIW==1 & missing(estVal10) & valCat<2000001 & (dAbove<. | dBelow<.)
// -> impute estate values for all but the top bracket

// -> check: how many missing values with non-missing bracket (below top bracket)
//           could not be imputed 
count if xIW==1 & valCat<2000001 & missing(estVal10)

drop U Unegative *Above *Below XB XBdonor XBnegative

********************************************************************************
** Amount imputation for top (open) bracket
********************************************************************************
// amount imputation    -> sample: 1   -> impute: 2-7
//		-> top bracket:     hot deck

sort hhid pn w
gen U = runiform()

gen Udonor = U if ~missing(ihsEstVal10)
// -> donor data

// find closest donor from above
  
sort valCat U
by valCat: carryforward Udonor estVal10, gen(UAbove estValAbove)
gen dAbove = abs(U - UAbove)
// -> distance to closest donor from above

// find closest donor from below

gen Unegative = -U
sort valCat Unegative
by valCat: carryforward Udonor estVal10, gen(UBelow estValBelow)
gen dBelow = abs(U - UBelow)
// -> distance to closest donor from below

// -> count how many missing values in top bracket have no donor
count if xIW==1 & valCat==2000001 & missing(estVal10) & missing(dAbove) & missing(dBelow)

replace estVal10 = cond(dAbove <= dBelow, estValAbove, estValBelow) ///
					if xIW==1 & missing(estVal10) & valCat==2000001 & (dAbove<. | dBelow<.)
// -> impute estate values the top bracket

// -> check: how many missing values in top bracket could not be imputed
count if xIW==1 & valCat==2000001 & missing(estVal10)

drop *Above *Below U Udonor Unegative

** Estate distribution when using all reports (except "nothing of value"):
xtile quantile=estVal10 [aw=crwtall], n(5)
latabstat estVal10 [aw=crwtall], by(quantile) stat(n mean p10 p25 p50 p75 p90 p95 p99) f(%9.0fc) 
drop quantile

********************************************************************************
** Fill in zeros for those who left nothing
********************************************************************************

replace estVal10 = 0 if xIW==1 & anyBeq==0

// -> check: how many decedents still have missing estate values
count if xIW==1 & missing(estVal10)

// -> estate imputation complete.

********************************************************************************
** Problematic homeowner cases (continued)
********************************************************************************
// Continuation of handling corrections to estate values when homes wrongly
// not included (in our judgment). See notes above.

replace estVal10 = estVal10 + catoth10 if ///
	(T242==5 & ivtrhome==0 & (dispHome==1 | xfateHome==3) & inlist(valf,2,3,4,5))

replace addedHome = 1 if ///
	(T242==5 & ivtrhome==0 & (dispHome==1 | xfateHome==3) & inlist(valf,2,3,4,5))

replace T242=1 if ///
	(T242==5 & ivtrhome==0 & (dispHome==1 | xfateHome==3) & inlist(valf,2,3,4,5))

tab T242 addedHome if xIW==1,m
// -> 42 homes added to estate value (moved from not included T242==5 to included T242==1).
	
********************************************************************************
** Output imputation models for appendix
********************************************************************************

loc COEFLAB ///
	coeflabel( ///
		IHScatotb10 "ihs(Net Worth)" /// (most recent)
		female "Female" ///
		educ2 "Educ: high school or GED" ///
		educ3 "Educ: some college" ///
		educ4 "Educ: college graduate" ///
		age "Age" ///
		c.age#c.age "Age Squared" ///
		nonwhite "Non-white" ///
		preloadOwnHome "Owned Home 0/1" /// (preload)
		cxrgovmd "Medicaid Coverage" /// (most recent)
		crbeq10k "Intended Bequest 10k+" /// (most recent)
		crbeq100 "Intended Bequest 100k+" /// (most recent)
	)		

esttab *, ///
	se r2 pr2 starlevel(* .1 ** .05 *** .01)

esttab * using ${tabs}/TABG1.tex, replace booktabs ///
	se starlevel(* .1 ** .05 *** .01) ///
	stats(N r2 r2_p, label("Observations" "$ R^2 $" "pseudo-$ R^2 $") ///
	  fmt(%9.0fc %3.2f %3.2f)) ///
	drop(*_cons) `COEFLAB' mlabel("Any Estate" "Bracket" "ihs(Value)") ///
	nofloat nonote nonum ///
    substitute(\begin{tabular}{l*{3}{c}} ///
	           \begin{tabular*}{\textwidth}{@{\extracolsep{\fill}}lccc} ///
			   end{tabular}   ///
			   end{tabular*}  ///
			   main %main)	
				 
cat ${tabs}/TABG1.tex	

est clear	
	
********************************************************************************
** Output additional results for appendix
********************************************************************************

preserve

tab valf
// -> can check this against the output below

gen col1 = .
gen col2 = .
gen col3 = .
local RunningSum = 0
forvalues n=0(1)7 {
  egen col1_`n' = total(cond(valf<.,valf==`n',.)),m
  egen col2_`n' = mean(cond(valf<.,valf==`n',.))  
  
  replace col1 = col1_`n' if valf==`n'
  replace col2 = 100*col2_`n' if valf==`n'
  
  qui summ col2 if (valf==`n')
  local RunningSum = `RunningSum' + r(mean)
  replace col3 = `RunningSum' if (valf==`n')
}
est clear
qui: eststo: estpost tabstat col1 col2 col3, by(valf) nototal

loc OPTS ///
	cells("col1(lab(N) fmt(%9.0fc)) col2(lab(Percent) fmt(%9.2f)) col3(lab(Cum. Percent) fmt(%9.2f))") ///
	nonum mlab(none) eqlab(none) ///
	stats(N, label("Observations") fmt(%9.0fc)) ///
	coeflab(0 "No asset" ///
			1 "Continuous report" ///
			2 "Complete brackets, closed" ///
			3 "Complete brackets, top bracket" ///
			4 "Incomplete brackets, closed" ///
			5 "Incomplete brackets, open top" ///
			6 "No bracket information" ///
			7 "Don't know ownership") ///
    nofloat substitute(\begin{tabular}{l*{1}{ccc}} ///
				      \begin{tabular*}{\textwidth}{@{\extracolsep{\fill}}lccc} ///
	                   \end{tabular} \end{tabular*})

esttab ., `OPTS'
esttab . using ${tabs}/TABG2.tex, replace booktabs `OPTS'	
cat ${tabs}/TABG2.tex	
restore	
	
********************************************************************************
** Report final estate distributions
********************************************************************************

// -> check: final sample size of exit interviews # 3,227
count if xIW==1

// -> check: how many have non-missing estate values # 3,161
count if xIW==1 & ~missing(estVal10)

** Estate distribution when using only continuous reports:
xtile quantile=estVal10 if valf==1 [aw=crwtall], n(5)
latabstat estVal10 if xIW==1 [aw=crwtall], by(quantile) ///
	stat(n mean p10 p25 p50 p75 p90 p95 p99) f(%9.0fc)
drop quantile

** Estate distribution when using all reports (except "nothing of value"):
xtile quantile=estVal10 if valf>=1 [aw=crwtall], n(5)
tabstat estVal10 if valf>=1 [aw=crwtall], by(quantile) ///
	stat(n mean p10 p25 p50 p75 p90 p95 p99) f(%9.0fc)
latabstat estVal10 if valf>=1 [aw=crwtall], by(quantile) ///
	stat(n mean p10 p25 p50 p75 p90 p95 p99) f(%9.0fc)
drop quantile

** Estate distribution when using all reports:
xtile quantile=estVal10 [aw=crwtall], n(5)
tabstat estVal10 [aw=crwtall], by(quantile) stat(n mean p10 p25 p50 p75 p90 p95 p99) f(%9.0fc)
latabstat estVal10 [aw=crwtall], by(quantile) stat(n mean p10 p25 p50 p75 p90 p95 p99) f(%9.0fc)
drop quantile

********************************************************************************
** Save
********************************************************************************

save ${save}/decedent_sample_single.dta, replace

********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************

use ${save}/decedent_sample_single.dta, clear

********************************************************************************
** Prepare additional variables for decedent data analysis
********************************************************************************

recode ragender (1=0) (2=1), gen(sex)
lab def sex ///
             0 "0. male" ///
             1 "1. female"  
			 
lab values sex sex


gen estate  = (estVal10>0) if xIW==1 & estVal10<.
//gen estate=0 if xIW==1
//replace estate=1 if estVal10>0 & estVal10<.      // <- older version
lab def estate ///
             0 "0. zero estate" ///
             1 "1. positive estate"  
// -> Update: define 'estate' only if estVal10<. 
			 			 
lab values estate estate
sort hhid pn w
by hhid pn: mipolate estate w, gen(xestate) backward
by hhid pn: mipolate EOL w,    gen(xEOL) backward

	
** Indicators for different home ownership and nursing home statuses
** The indicators are filled in for each individual's entire history
** E.g., an individual who owned a home at exit-1, will have own_1x = 1 for all
** of her observations. An individual who did not own at exit-1 will have 
** own_1x = 0 for all observations. The variable will be missing for individuals 
** whose ownership status at exit-1 is missing.

sort hhid pn w

by hhid pn: egen own_1x = max(cond(xnriw==1 & rownhm<8, rownhm==1, .))

by hhid pn: egen nhr_1x = max(cond(xnriw==1 & rnhmliv<., rnhmliv==1, .))

summ timetodeath if xnriw==1 & sample==1, det
// -> median time between last IW and death is 426 days

summ timetodeath if xnriw==4 & sample==1, det
// -> median time between fourth core IW and death is 2680 days

********************************************************************************
** ADLs / IADLs
********************************************************************************

					 
// ADLs: dress, walk, bath, eat, bed, toilt
// IADLs: meal, shop, phone, medicine, money
// nrdaysBed: only in exit IW. Number of days R stayed in bed more than half 
//            the day because of illness or injury during last three months 
//            before death.
//            90 means that R stayed in bed every day for more than half the day
//            .d means that proxy reporter "does not know"/"refuses" to answer
// Indexes: adla   = sum(dress, walk, bath, eat, bed)
//          iadlza = sum(meal, shop, phone, medicine, money)
//          totadl = adla + iadlza 
// Memory:  rmemrye: for core IWs 2002-2008, whether R was ever diagnosed with a
//                   memory-related disease. In core IW 2010, question asks specifically
//                   about Alzheimer and Dementia. Recoded this into rmemrye yes or no.
//                   For exit IWs 2004-2012, the proxy respondent is asked on a 
//                   scale from 1-5 about the memory of the decedent. Recoded this
//                   into rmemrye to "yes" if proxy says "fair" or "poor", and "no"
//                   if proxy says "excellent", "very good" or "good"

* Rename xADLs and xIADLs
foreach LIMIT in rmemrye dress walkr bath eat bed toilt meals shop phone meds money {
replace `LIMIT'=x`LIMIT' if xIW==1
}
replace adla   = xadla   if xIW==1
replace iadlza = xiadlza if xIW==1
replace totadl = xtotadl if xIW==1
// -> rename functional limitations from exit IW to name in core IW, i.e. drop prefix `x'

********************************************************************************
** Compute cumulative and average care hours for interview period
********************************************************************************

***************************************
* Step 1:
* Generate weights = share of days in last X years (years = 1,2,...,8)
***************************************

forvalues n = 1(1)8 {

by hhid pn: gen daysharelast`n'y = ///
  (min( (`n')*(365) , ttdSincePrevIW ) - min( (`n')*(365) , timetodeath )) / ((`n')*(365)) ///
	if !mi(ttdSincePrevIW,timetodeath)

}	

forvalues n = 1(1)8 {
  tabstat daysharelast`n'y, by(xnriw) s(n mean p50)
}

***************************************
* Step 2:
* Compute weighted averages of hours
***************************************

* Mean young-gen hours in last X years (1998-2012)
forvalues n = 1(1)8 {
  by hhid pn: asgen yghrswklynew`n'ymean = yghrswklynew, weight(daysharelast`n'y)
	replace yghrswklynew`n'ymean = . if (xIW~=1) // define only for exit interviews
}

summ yghrswklynew?ymean, sep(0)

* Mean total hours in last X years (1998-2012), with NN-PMM imputed hours for NHRs
forvalues n = 1(1)8 {
  by hhid pn: asgen tohrswklyinnmnew`n'ymean = tohrswklyinnmnew, weight(daysharelast`n'y)
	replace tohrswklyinnmnew`n'ymean = . if (xIW~=1) // define only for exit interviews
}

summ tohrswklyinnmnew?ymean, sep(0)	
	
***************************************
* Step 3:
* Compute log(1+hours), to reduce influence of outliers (hours very skewed)
***************************************

// NOTE: '1+' accommodates zeros.

forvalues n=1(1)8 {
  gen yghrswklynew`n'log = log(1 + yghrswklynew`n'ymean)
	gen tohrswklyinnmnew`n'log = log(1 + tohrswklyinnmnew`n'ymean)
}

***************************************
*** Nursing home use
***************************************

** Ever used NH
by hhid pn: egen everNH=max(rnhmliv) if rnhmliv~=.  
// whether R was ever in NH and

** Mean share of days as nursing home resident in last X years
** (assumes NH residents were residents for entire interview period)
forvalues n = 1(1)8 {
  by hhid pn: asgen rnhmliv`n'ymean = rnhmliv, weight(daysharelast`n'y)
	replace rnhmliv`n'ymean = . if (xIW~=1) // define only for exit interviews
}

summ rnhmliv?ymean, sep(0)

********************************************************************************
** Child characteristics
********************************************************************************

// For coresidency use A099 which gives number of children who coreside as reported
// in the exit IW. For coresidency during core IWs, hknresd gives number of children who coreside.
recode A099 (1/max=1), gen(xkidres)
tab A101 xkidres if xIW==1,m
// -> tabs any co-resident children by number of children (A101)
//    when A101=0 (no kids), A099=0 (no coresident kids), as expected

recode hlvnear (0=0) (1=1) (5=0) (8=0), gen(hkidres) 
// -> dummy for any coresident child, preferred to hresdkn: is zero for childless by construction
//    and has fewer missing values

gen kidres = .
replace kidres = xkidres if (xIW==1)
replace kidres = hkidres if (riwstat==1)
// -> combined coresidence measure for core and exit interviews

recode hlvnear (0=0) (1=1) (5=1) (8=0), gen(kid10mi) 
// -> dummy for any child within 10 miles or closer, preferred to hlv10mikn: is zero for childless
//    by construction and has fewer missing values

/*
RAND Family Files variables to use:
hownhmkn
hndau      
hmarkn    
heduckmn  
hworkftkn
hlvnear
hresdkn
hlv10mikn
hcontkn

Plus our creations:
kidres
kid10mi
hkage
hkincb
hknkid
*/

sort hhid pn w

local KIDVARS ///
  hnkid heduckmn hkincb hkage hknkid hworkptkn hworkftkn hndau hmarkn hownhmkn ///
	hresdkn hlv10mikn hcontkn kidres kid10mi
	
// Bring forward kid characteristics
foreach kidvar of local KIDVARS {
  cap drop c`kidvar'
  by hhid pn: carryforward `kidvar', gen(c`kidvar')
	replace c`kidvar' = . if ~(riwstat==1 | xIW==1)
	// -> don't fill in if not a core or exit interview
	replace c`kidvar' = 0 if (mi(c`kidvar') & (chnkid==0))
	// -> if variable is missing and R has no kids, should be zero	
}

// Alternative: average kid characteristics across waves
foreach kidvar of local KIDVARS {
  cap drop m`kidvar'
	by hhid pn: egen m`kidvar' = mean(`kidvar')
	replace m`kidvar' = . if ~(riwstat==1 | xIW==1)
	// -> don't fill in if not a core or exit interview
	replace m`kidvar' = 0 if (mi(m`kidvar') & (mhnkid==0))
	// -> if variable is missing and R has no kids, should be zero	
}

********************************************************************************				  
** Variables for trajectories / histories
********************************************************************************
				  
***************************************
** Combine wealth and estate values
***************************************

// Create one wealth history per decedent core + exit: 
gen wlthhist=estVal10 
replace wlthhist=atotb10 if xIW~=1

// Assign info from anyBeq to history of the decedent
bysort hhid pn: mipolate anyBeq w, gen(anyBeqhist) backward

tabstat wlthhist  [aw=crwtall], by(xnriw) stat(n mean p10 p25 p50 p75 p90 p95 p99)
// -> wealth distribution by number of IWs away from exit IW including exit IW (xnriw=0)
tabstat wlthhist if anyBeqhist==1  [aw=crwtall], by(xnriw) stat(n mean p10 p25 p50 p75 p90 p95 p99)
// -> wealth distribution by number of IWs away from exit IW including exit IW (xnriw=0)
//    for decdedents who leave an estate
tabstat wlthhist if anyBeqhist==0  [aw=crwtall], by(xnriw) stat(n mean p10 p25 p50 p75 p90 p95 p99)
// -> wealth distribution by number of IWs away from exit IW including exit IW (xnriw=0)
//    for decdedents who do not leave an estate

* divide wealth by 1000 to get units in terms of 1000's for graphs 
replace wlthhist = wlthhist/1000

* dummy for full wealth history T-4, T-3, T-2, T-1, T for trajectories
by hhid pn: egen fullWlthHist4 = min( cond(inrange(xnriw,0,4),1 - mi(wlthhist),.) )
replace fullWlthHist4 = . if ~(totnriw>=4 & totnriw<.)

***************************************
** Home ownership history
***************************************

gen ownHist = .
replace ownHist = 1 if (riwstat == 1 & rownhm==1)               // -> core owners
replace ownHist = 0 if (riwstat == 1 & inlist(rownhm,0,2,3,7))  // -> core non-owners
replace ownHist = 1 if (xIW == 1 &  xrownhm==1)                 // -> exit owners
replace ownHist = 0 if (xIW == 1 & (xrownhm==0 | inlist(pwrownhm,2,3,7))) 
// -> exit non-owners (prev owners who disposed, or prev non-owners)				  

** With adjustment for inter-vivos transfers (add them back in)
by hhid pn: egen HOMEIVTR = max(ivtrhomeBG==1 | dispHome==1) 
// -> NOTE: dispHome=1 -> gave away home
gen ownHistPlusIVTR = ownHist
replace ownHistPlusIVTR = 1 if (ownHist==0 & HOMEIVTR==1)
drop HOMEIVTR

********************************************************************************
** Additional variables for decedent regression analyses
********************************************************************************
	
***************************************
* Estate variables
***************************************

drop ihsEstVal10 
// constructed in imputations before, construct again here
gen ihsEstVal10 = log( estVal10 + sqrt( 1 + (estVal10)^2 ) )
// -> inverse hyperbolic sine function: similar to taking the logarithm but 
//    accomodates non-positive values  

gen logEstVal10 = log( estVal10 )
// -> when we want to fully separate extensive and intensive margins

***************************************
* Housing bequests, transfers
***************************************

gen homeBeq = .
replace homeBeq = 1 if (xIW==1 &  xrownhm==1)
replace homeBeq = 0 if (xIW==1 & (xrownhm==0 | inlist(pwrownhm,2,3,7)))
// -> measure of whether estate includes a home, based on ownership at time of death

gen homeBeqOrIvtr = .
replace homeBeqOrIvtr = homeBeq
replace homeBeqOrIvtr = 1 if (xIW==1 & homeBeqOrIvtr==0 & ivtrhomeBG==1) // -> IVT best guess
replace homeBeqOrIvtr = 1 if (xIW==1 & homeBeqOrIvtr==0 & dispHome==1)   // -> 1. gave away
// -> broader measure of whether estate, also includes cases where home given as inter vivos xfr

***************************************
* Wealth, lagged wealth and home ownership
***************************************

* current wealth (core IWs)
xtile atotb10_deciles = atotb10, nq(10)
xtile atotb10_quintiles = atotb10, nq(5)

* lagged wealth (core IWs)
by hhid pn: gen L1atotb10 = atotb10[_n-1]
gen L1atotb10_1000s = L1atotb10 / 1000
xtile L1atotb10_quintiles = L1atotb10, nq(5)
xtile L1atotb10_deciles = L1atotb10, nq(10)
gen L1atotb10_ihs = log(L1atotb10 + sqrt(1 + (L1atotb10^2)))

* previous interview wealth, allowing for gaps (core IWs)
by hhid pn: carryforward L1atotb10, gen(cL1atotb10)
gen cL1atotb10_1000s = cL1atotb10 / 1000
xtile cL1atotb10_quintiles = cL1atotb10, nq(5)
xtile cL1atotb10_deciles = cL1atotb10, nq(10)
gen cL1atotb10_ihs = log(cL1atotb10 + sqrt(1 + (cL1atotb10^2)))

* lagged wealth (core + exit IWs)
by hhid pn: gen L1wlthhist = wlthhist[_n-1]

* previous interview wealth, allowing for gaps (core + exit IWs)
by hhid pn: carryforward L1wlthhist, gen(cL1wlthhist)

* lagged ownership
by hhid pn: gen L1own = own[_n-1]

* previous ownership, allowing for gaps
by hhid pn: carryforward L1own, gen(cL1own)

***************************************
* Wealth changes
***************************************

gen atotb10AnnDiff1 = ((atotb10 - cL1atotb10)/1000) / (iwgap/365)
// -> 1-wave change in wealth, annualized (core IWs)

gen atotb10AnnDiffcpw = ((atotb10 - cL1atotb10)/1000) / (iwgap/365)
// -> 1-interview change in wealth, annualized (core IWs)
//    use most recent previous interview, allowing for gaps

by hhid pn: gen wlthhistAnnDiff1 = (wlthhist - L1wlthhist) / (iwgap/365)
// -> 1-wave change in wealth, annualized (including core and exit IWs)

gen wlthhistAnnDiffcpw = (wlthhist - cL1wlthhist) / (iwgap/365)
// -> 1-wave change in wealth, annualized (including core and exit IWs)
//    use most recent previous interview, allowing for gaps

***************************************
* Housing ownership and liquidations
***************************************

by hhid pn: egen everOwn = max(own)
// -> ever own home

***************************************
* Permanent income
***************************************

// "permanent income" (avg HH income across all core interviews)
by hhid pn: egen pi = mean(hitot)

gen logpi = log(1 + pi)

********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************

********************************************************************************
** Select final sample for analyses
********************************************************************************

keep if (singleExitIW==1)

unique hhid pn                 // -> 3,227
count if (riwstat==1 | xIW==1) // -> 17,974

* parents
unique hhid pn if (nochild==0)                 // ->  2,869
count if (nochild==0) & (riwstat==1 | xIW==1)  // -> 16,049

* childless
unique hhid pn if (nochild==1)                 // ->    358
count if (nochild==1) & (riwstat==1 | xIW==1)  // ->  1,925

********************************************************************************
** Set up panel variable
********************************************************************************

egen hhidpn = concat(hhid pn)
destring hhidpn, replace
sort hhidpn w
xtset hhidpn w

********************************************************************************
** Save final dataset
********************************************************************************

save ${save}/decedent_sample_single_final, replace

********************************************************************************
