/*******************************************************************************

************
** Function: 
************

  Creates a supplementary data extract to merge with decedent sample.
	This extract includes information on inter-vivos housing bequests to children.

************
** Inputs: 
************	

  - decedent_sample.dta  (see Decedents_GetSample.do)
	- randkids.dta         (see Decedents_GetSample.do)
	
************
** Outputs: 
************	
	
	- decedent_sample_supplement.dta
	
*******************************************************************************/

clear *
set more off
macro drop _all

********************************************************************************
** Master data set
********************************************************************************

do GetDirNames.do

********************************************************************************
** Master data set
********************************************************************************

use ${save}/decedent_sample.dta, clear

renvars hhid pn subhh, u

********************************************************************************
* Kid level data (KwOWNHM, KwOWNRHM, KwDEED) 
********************************************************************************

// merge kid-level data to respondent-level data set 
merge 1:m HHID PN w using ${save}/randkids 

// Understand merging issues
tab _m
// merge=1:
tab _m if w~=11
// -> wave 11 is not part of randkids and is thus only available in the master data set
tab _m if w~=11 & hchild~=0 
// -> childless individuals are not part of randkids and thus only available in the master data set
tab riwstat if w~=11 & hchild~=0 & _m==1
// -> Rs interview status is in most cases non-response and so there is no counterpart in randkids
//    The remaining cases are nonsensical. 
//merge=2:
tab inw if _m==2
// -> non-respondent children only appear in the using data set randkids
// -> explains all cases with merge=2

keep if _m==3

tab inw
// -> there are also non-respondent children among the successful merges
//    they contain no information
drop if inw==0
drop _m 

// --> Have respondent-child-level data for respondents with children while
//     they are alive. That's good since the variables we are constructing here
//     are exclusively for the core interviews and are relevant for families 
//     with children.
 
******************************

gen kidPN = substr(kidid,8,3) // read out last three digits from "kidid" 
destring kidPN, replace       // convert string variable into numeric value

destring fat_E016M*, replace // contains OPN of kids who own a home

sort HHID PN w

* Check kids-own-home variable and make coding consistent 
tab fat_E015, m // whether kids own home
bys w: tab fat_E015 if (fat_E016M1==38 | fat_E016M1==993), m
replace fat_E015=1  if (fat_E016M1==38|fat_E016M1==993)
// -> in wave 6 there are some cases (30) where fat_E015==. even though should be fat_E015==1

bys w: tab fat_E015 if fat_E015==5 & fat_E016M1>0 & fat_E016M1<994
replace fat_E015=1  if fat_E015==5 & fat_E016M1>0 & fat_E016M1<994
// -> in wave 6 there are some cases (65) where fat_E015==5 even though should be fat_E015==1

replace fat_E015=.r if (fat_E015==8|fat_E015==9) 
// -> respondent refused to answer

replace fat_E015=.z if fat_E015==. & (w==8|w==10) 
// -> waves 8 and 10 question skipped for re-interviewed respondents unless they
// indicated a change in the number of children (e.g step kids); missing code
// .z stands for skipped re-interviewed respondents

replace fat_E015=.m if fat_E015==. 
// -> true missing: cannot see any other reason

recode fat_E015 (5=0)   

label define fat_E015 0 "0. no" 1 "1.yes" .m "true missing" .r "refused" .z "reinterviewed" 
label values fat_E015 fat_E015

tab fat_E015, m

********************************************************************************
* Kid homeownership based on original HRS data
********************************************************************************
gen HRSkidOwn=. 
replace HRSkidOwn=1 if inlist(kidPN  ,fat_E016M1, fat_E016M2,  fat_E016M3,  fat_E016M4,  fat_E016M5 ///
                                     ,fat_E016M6, fat_E016M7,  fat_E016M8,  fat_E016M9,  fat_E016M10 ///
                                     ,fat_E016M11,fat_E016M12, fat_E016M13, fat_E016M14, fat_E016M15 ///
                                     ,fat_E016M16,fat_E016M17, fat_E016M18)
replace HRSkidOwn=1 if (fat_E016M1==38 | fat_E016M1==993) // all kids own home
replace HRSkidOwn=1 if fat_E015==1 & fat_E016M1==. // In waves 4 and 5 HRS assigns homeownership (fat_E015=1) even though 
                                                   // fat_E016M1==. means that no child owns a home! In these two waves the question 
                                                   // of which child owns a home was skipped if the respondent has only one 
                                                   // living child in contact. In later waves the interviewer fills in the kid OPN. 
replace HRSkidOwn=.d if (fat_E016M1==997|fat_E016M1==998|fat_E016M1==999) // kid owns home (fat_E015==1), but HRS doesn't know which kid
replace HRSkidOwn=.z if (fat_E015==.z)    // re-interviewed 
replace HRSkidOwn=.r if (fat_E015==.r)    // respondent refused to answer
replace HRSkidOwn=.m if (fat_E015==.m)    // true missing
replace HRSkidOwn=0 if HRSkidOwn==.       // kids who don't own a home


label define HRSkidOwn 0 "0. no" 1 "1.yes" .m "true missing all kids" .r "refused all kids" .d "DK which kid" .z "reinterviewed" 
label values HRSkidOwn HRSkidOwn

tab HRSkidOwn, m

********************************************************************************
* Compare HRS indicator of kid homeownership (HRSkidOwn) with RAND (kownhm)
********************************************************************************
tab kownhm  if HRSkidOwn==.d
// -> RAND codes these as kids do NOT own home: kids own home but don't know which kid
replace kownhm=.d if HRSkidOwn==.d   // correct RAND variable

tab kownhm if HRSkidOwn==.r
// -> RAND codes these as kids own home, small mistake
replace kownhm=.r if (kownhm==1) & (HRSkidOwn==.r)

tab kownhm if HRSkidOwn==.z
// -> RAND codes these as "no" even though survey section never took place 
replace kownhm=.y if HRSkidOwn==.z

bys HHID PN w: egen Z_HRS = sum(HRSkidOwn) // count kid owners per respondent
bys HHID PN w: egen Z_RAND=sum(kownhm)
gen Z_diff = Z_HRS - Z_RAND
tab Z_diff
// -> difference in number of kids owning a home between HRS and RAND
// -> RAND assigns homeownership less often than HRS
count if Z_diff>0  // 4,680 


* Diff1: RAND does not assign homeownership to some residing children for which the HRS does
tab kresd 
// -> 7,896 resident children 
tab HRSkidOwn if kresd==1 & (fat_E016M1==38 | fat_E016M1==993)
// -> HRS: 600 resident children own a home (they are part of families where all kids own a home)
tab HRSkidOwn if kresd==1 & ~(fat_E016M1==38 | fat_E016M1==993)
// -> HRS: 5,157 resident children don't own a home (they are part of families where not all kids own a home)
//         888 own a home 
tab kownhm if HRSkidOwn==1 & kresd==1 & (fat_E016M1==38 | fat_E016M1==993)
// -> RAND: 600 resident children own a home (they are part of families where all kids own a home)
tab kownhm if HRSkidOwn==1 & kresd==1 & ~(fat_E016M1==38 | fat_E016M1==993) 
// -> RAND: 887 resident children do not own a home but HRS says that they do

// --> Strongly suggests that RAND makes a mistake here. There is no reason
//     that suggests that resident kids for which the HRS says that they do 
//     own a home do not own a home as RAND suggests.

gen Diff1=0 // Difference 1
replace Diff1=1 if HRSkidOwn==1 & kresd==1 & ~(fat_E016M1==38 | fat_E016M1==993)
// -> 888 residing children who own a home according to the HRS but not RAND 
bys HHID PN w: egen allDiff1=max(Diff1)
// -> mark all records (3,269)

* Diff2: RAND does not assign homeownership for waves 4 and 5 in which no OPN 
*        was assigned for families with one child in contact
tab kownhm if fat_E015==1 & fat_E016M1==.
// -> RAND codes these as kids do NOT own home: kids own home but OPN was not assigned for families with one child

gen Diff2=0 // Difference 2
replace Diff2=1 if fat_E015==1 & fat_E016M1==. // correct RAND variable
bys HHID PN w: egen allDiff2=max(Diff2)
// -> mark all records (1,072)

* Diff3: Not all KIDIDs (a RAND variable) are part of the list provided by the HRS
gen Diff3=0 // Problem  3
replace Diff3=1 if Z_diff~=0 & allDiff1==0 & allDiff2==0 
// -> 440 cases which are not accounted for by P1 and P2
bys HHID PN w: egen allDiff3=max(Diff3)
// -> mark all records (442)

tab Z_diff if allDiff1==1|allDiff2==1|allDiff3==1 // 4,680
// -> all discrepancies between HRS and RAND are now accounted for
// --> use HRS variable to indicate kid homeownership, RAND does not do any
//     improvements here

********************************************************************************
* Fill in values for skipped IWs (waves 8 and 10)
********************************************************************************
// In wave 8 (year 2006) the question regarding kid homeownership is skipped for 
// the re-interviewed households and RAND sets the missing value to .Y. 
// A respondent is not skipped if she reports having new children (e.g step children). 
// RAND follows coding from HRS when there is a kid OPN in wave 8, which implies 
// homeownership of a child, meaning RAND correctly assigns "yes". However, RAND 
// also assigns "no" values for some kids without any apparent patterns. 
// RAND also does not seem to realize that in wave 10 (year 2010) the pattern of 
// questions is as in wave 8. It simply assigns "no" to all the missing data.
 
egen KID=group(HHID PN kidid) // unique kid number

duplicates report KID w // check whether KID w uniquely identify observations
duplicates list KID w

tsset KID w // if KID w uniquely identify observations set panel dimension


** Wave 8:
tab HRSkidOwn if w==8, m
// -> 0 and 1 are for respondents who report new children (665 cases)
                                                   
replace HRSkidOwn = L1.HRSkidOwn if w==8 & HRSkidOwn==.z // Assign past homeownership status
                                                             // for reinterviewed and without change 
                                                             // in number of children
// -> 665 new missing values are introduced if the lag operator cannot find KID in
//    the previous wave (must be coincidence that number of kids missing is the same as respondents
//    who report new children)
replace HRSkidOwn = L2.HRSkidOwn if w==8 & HRSkidOwn==. // for those which were generated as missing, go back one more time
replace HRSkidOwn = F1.HRSkidOwn if w==8 & HRSkidOwn==. // for those which were generated as missing, go forward one time

tab HRSkidOwn if w==8, m
// -> 169 new missing values after going back one or two waves or looking one wave ahead
replace HRSkidOwn = .l if w==8 &  HRSkidOwn==. // reinterviewed but cannot find any information in adjacent waves

** Wave 10: (same as for wave 8)
tab HRSkidOwn if w==10, m
// -> 0 and 1 are for respondents who report new children (404 cases)
replace HRSkidOwn = L1.HRSkidOwn if w==10 & HRSkidOwn==.z // Assign past homeownership status
                                                              // for reinterviewed and without change 
                                                              // in number of children
// -> 269 new missing values are introduced                                                             
replace HRSkidOwn = L2.HRSkidOwn if w==10 & HRSkidOwn==. // for those which were generated as missing, go back one more time

tab HRSkidOwn if w==10, m
// -> 161 new missing values after going back one or two waves 
replace HRSkidOwn = .l if w==10 &  HRSkidOwn==. // reinterviewed but cannot find any information in adjacent waves

tab karel if HRSkidOwn==.l
// -> new missing values often due to kid-in-law
bys HHID PN: egen check_w8 =sum(HRSkidOwn==.l) if w==8
bys HHID PN: egen check_w10=sum(HRSkidOwn==.l) if w==10
// -> mark all records in waves 8 and 10  that are related to the missing (.l) record 
tab check_w8 if check_w8~=0 
// -> almost always only one kid has missing record and so other kids in the 
//    same family have a record. That's good since our analysis will be on 
//    the household-kid level
tab check_w10 if check_w10~=0

bys HHID PN: egen max_w8=max(HRSkidOwn) if w==8
// -> assign =1 if at least one child owns home, assign =0
//    if no child owns a home, if .l is among the values then max ignores it
//    unless all values are .l in which case max=.
// -> As soon as there is one yes max_w8 correctly states that at least one child
//    owns a home. However, if, for example, among two children one has 0 and the
//    other has .l then max assigns 0 and so we assume that the kid which has .l
//    also owns no home. 
tab HRSkidOwn if max_w8==0, m
// -> this is rarely the case and so this is not a problem
replace fat_E015=max_w8 if fat_E015==.z & w==8
// -> update variable whether kids own home for re-interviewed respondents

bys HHID PN: egen max_w10=max(HRSkidOwn) if w==10
tab HRSkidOwn if max_w10==0, m
// -> this is rarely the case and so this is not a problem
replace fat_E015=max_w10 if fat_E015==.z & w==10
// -> update variable whether kids own home for re-interviewed respondents
replace fat_E015=1 if HRSkidOwn==.d
// -> children own home but don't know which kid
replace fat_E015=HRSkidOwn if fat_E015==.

// Check:
tab HRSkidOwn, m
// -> looks reasonable on individual-kid level 
tab fat_E015, m
// -> looks reasonable on household-kid level

********************************************************************************
* Kid own's respondent's home based on original HRS data
********************************************************************************
// RAND classifies the variable KwOWNRHM under: "Kid Transfers from Respondent"
// in Section 5C of the family codebook (randfamC). However, it is not convincing 
// that RAND's interpretation of this variable being a transfer from the respondent 
// to children is correct. RAND derives the variable KwOWNRHM from the HRS housing 
// section, variables H088 (G3146 in 2000 and F2828 in 1998): "Is your home owned 
// by a relative?" and H091 (G3148M in 2000 and F2830 in 1998): "Which child is 
// that?" BUT, there is also the variable H092 (G3149 in 2000 and F2831 in 1998): 
// "Did you ever own this home?" which RAND does not use. This variable should 
// be very helpful in understanding whether a house transfer plausibly took place.  

// How does RAND construct variable KwOWNRHM based on H088 and H091?
// This is extremely tedious since the HRS implemented multiple changes on when 
// the variables H088-H092 are asked. In summary, H088-H092 are available 
// for all waves (4-10) if the ownership status is "rent-free" or "other" AND 
// R HAS MOVED since the previous wave. In wave 6, H088-H092 are also available 
// if status is "rent" and R has moved. For waves 4-5, fat_H088-fat_H092 is also 
// available for STAYERS when the status is "rent-free" and "other". Finally, 
// in wave 4 these variables are also coded for stayers if the status  is "rent". 

// It is important to know whether or not a respondent has moved since the 
// previous wave. For waves 6-10 only those who are in categories "rent-free"
// or "other" and have moved continue on to the questions that ask about the
// identity of who owns the respondent's home. In waves 4 and 5 also stayers
// are asked these questions.

// Movers and non-movers: fat_X033 HH moved since previous wave (1="yes", 3="yes", 5="no")
recode fat_X033 (3=1) (5=0) 
tab fat_X033 if riwstat==1, m
// -> variable is always coded
preserve 
collapse (sum) fat_X033 (max) riwstat, by(HHID PN w)
bys w: tab fat_X033 if riwstat==1 
// -> about 20% of Rs move per wave
restore

// It will be useful to have the respondent's homeownership indicator so we
// construct it here again based on RAND flag variable HAFHOUS just as we do in 
// do-file Decedents_ProcessData.do
gen rownhm = .
replace rownhm=1 if hafhous==1 // continuous report on primary housing value
replace rownhm=1 if hafhous==2 // complete bracket on primary housing value
replace rownhm=1 if hafhous==3 // incomplete bracket on primary housing value
replace rownhm=1 if hafhous==5 // no value/bracket on primary housing value
replace rownhm=0 if hafhous==6 // no primary housing asset (rent, rent-free, other)
replace rownhm=9 if hafhous==7 // don't know ownership 
replace rownhm=9 if hafhous==9 // no financial respondent

* When rownhm==0 want to differentiate between rent, rent-free, or other,  
replace rownhm=2 if rownhm==0 & fat_H004==2  // rent
replace rownhm=3 if rownhm==0 & fat_H004==3  // rent-free
replace rownhm=7 if rownhm==0 & fat_H004==7  // other
* separate variable if R lives in mobile home (H002=1)
replace rownhm=2 if rownhm==0 & fat_H002==1 & fat_H014==4  // rents both (site and home)
replace rownhm=2 if rownhm==0 & fat_H002==1 & fat_H014==2  // rents (only owns site)
replace rownhm=3 if rownhm==0 & fat_H002==1 & fat_H014==7  // rent-free (owns neither site nor home)
// -> H014=1 (owns both mobile home and site) and H014=3 (owns only home) should be in rownhm==1
* separate variable if R lives on farm or ranch (H001=1)
replace rownhm=2 if rownhm==0 & fat_H001==1 & fat_H008==3  // rent
replace rownhm=7 if rownhm==0 & fat_H001==1 & fat_H008==7  // other

replace rownhm=9 if rownhm==0 & rnhmliv==0
replace rownhm=0 if rnhmliv==1 & (rownhm==3|rownhm==7)

label define rownhm 0 "0. NHR ~own/rent" 1 "1.own" 2 "2.rent" 3 "3.rent-free" 7 "7.other" 9 "9. dk own"
label values rownhm rownhm


** Understand coding of H088 (coding of H089, H091 and H092 follows the same pattern)

* Wave 10 (go backwards)
tab rownhm               if riwstat==1 &  fat_X033==1 & w==10 // MOVER SINCE LAST WAVE 
// -> among movers: 21 "rent free" and 4 "other" 
bys rownhm: tab fat_H088 if riwstat==1 &  fat_X033==1 & w==10
// -> 21 "rent free" and 4 "other" are coded
tab rownhm               if riwstat==1 &  fat_X033==0 & w==10 // NON-MOVER SINCE LAST WAVE 
// -> among non-movers: 53 "rent free", 6 "other"
bys rownhm: tab fat_H088 if riwstat==1 &  fat_X033==0 & w==10
// -> non-movers are not coded

// --> Wave 10: fat_H088-fat_H092 only available for movers with "rent free" or "other" 
//              ("rent" must be a coding mistake; survey says that these should be skipped)
// --> Waves 7-9: exactly the same

* Wave 6 
tab rownhm               if riwstat==1 &  fat_X033==1 & w==6 // MOVER SINCE LAST WAVE 
// -> among movers: 105 "rent free", 60 "other", 334 "rent"
bys rownhm: tab fat_H088 if riwstat==1 &  fat_X033==1 & w==6
// -> 103 "rent free", 56 "other", and 330 "rent" are coded
tab rownhm               if riwstat==1 &  fat_X033==0 & w==6 // NON-MOVER SINCE LAST WAVE 
// -> 188 "rent free", 154 "other"
bys rownhm: tab fat_H088 if riwstat==1 &  fat_X033==0 & w==6
// -> non-movers are not coded

// --> Wave 6: exactly the same waves 7-10, but "rent" is also asked (if mover). 
//             ("own" must be a coding mistake; survey says that these should be skipped)

* Wave 5 
tab rownhm               if riwstat==1 &  fat_X033==1 & w==5 // MOVER SINCE LAST WAVE 
// -> among movers: 6 "rent free", 95 "other", 298 "rent"
bys rownhm: tab fat_H088 if riwstat==1 &  fat_X033==1 & w==5
// -> 6 "rent free", 95 "other", 296 "rent"
tab rownhm               if riwstat==1 &  fat_X033==0 & w==5 // NON-MOVER SINCE LAST WAVE 
// ->  among non-movers: 17 "rent free", 327 "other"
bys rownhm: tab fat_H088 if riwstat==1 &  fat_X033==0 & w==5
// -> 17 "rent free", 325 "other"

// --> Wave 5: non-movers are also coded if "rent free" or "other"; otherwise, as wave 6 

* Wave 4 
tab rownhm               if riwstat==1 &  fat_X033==1 & w==4 // MOVER SINCE LAST WAVE 
// -> among movers:  8 "rent free", 114 "other", 393 "rent"
bys rownhm: tab fat_H088 if riwstat==1 &  fat_X033==1 & w==4
// -> 8 "rent free", 114 "other", 393 "rent"
tab rownhm               if riwstat==1 &  fat_X033==0 & w==4 // NON-MOVER SINCE LAST WAVE 
// -> among non-movers: 17 "rent free", 204 "other", 673 "rent"
bys rownhm: tab fat_H088 if riwstat==1 &  fat_X033==0 & w==4
// -> 17 "rent free", 204 "other", 673 "rent"

// --> Wave 4: non-movers are also asked if "rent"; otherwise as wave 5

* HRS CODING OF H088-H092 IS THE FOLLOWING:
//       Wave 10:   "rent free" and "other" movers
//       Waves 7-9: "rent free" and "other" movers
//       Wave 6:    "rent free" and "other" movers                and "rent" movers
//       Wave 5:    "rent free" and "other" movers and non-movers and "rent" movers 
//       Wave 4:    "rent free" and "other" movers and non-movers and "rent" movers and "rent" non-movers




  
destring fat_H091 fat_H091M1 fat_H091M2 fat_H091M3, replace
// -> convert string OPN variables of children owning respondent's home
//    into numeric OPN variables
// -> fat_H091M* only exists for wave 5 where the option exists of adding 
//    up to three children in response to the question

// Only in wave 5 multiple OPNs for children exist
tab fat_H091M1
tab fat_H091M2
tab fat_H091M3
// -> if child owns respondent's home it is almost always owned by a single child
sort HHID PN w

gen targetGroup=0
replace targetGroup=1 if (rownhm==3|rownhm==7) // our target group: "rent-free" and "other"

* Movers among target group:
bys w: tab fat_H088 if targetGroup==1 & fat_X033==1, m
// -> question is always asked of movers of the target group
* Stayers among target group:
bys w: tab fat_H088 if targetGroup==1 & fat_X033==0, m
// -> question was asked also of stayers in waves 4 and 5

* Movers outside of target group:
bys w: tab fat_H088 if               targetGroup==0 & fat_X033==1, m
bys w: tab rownhm   if fat_H088~=. & targetGroup==0 & fat_X033==1, m
// -> in wave 6 more owners are asked question, these are mobile home owners and
//    should not have been asked. There are also a few other owners in other waves
//    who have been asked. According to the questionnaire this question is never
//    asked of owners.
* Stayers outside target group:
bys w: tab fat_H088 if               targetGroup==0 & fat_X033==0, m
bys w: tab rownhm   if fat_H088~=. & targetGroup==0 & fat_X033==0, m
// -> in waves 4 and 5 more owners are asked question, these are regular cases 
//    and appear to be coding mistakes
replace fat_H088=.m if targetGroup==0
replace fat_H089=.m if targetGroup==0 
replace fat_H091=.m if targetGroup==0
replace fat_H092=.m if targetGroup==0
// -> set cases outside of target group to missing

replace fat_H088=.r if (fat_H088==8|fat_H088==9) // respondent refused to answer
replace fat_H088=.z if  targetGroup==1 & fat_X033==0 & w>5  // target group but not asked
                                                                // since they are stayers
replace fat_H092=.z if  targetGroup==1 & fat_X033==0 & w>5

recode fat_H088 (5=0)

gen HRSkidOwnR=. // construct indicator whether kid owns respondent's home based on original HRS data
replace HRSkidOwnR=1  if inlist(kidPN, fat_H091, fat_H091M1, fat_H091M2, fat_H091M3)
replace HRSkidOwnR=1  if (fat_H091==38|fat_H091M1==38 |fat_H091==993)  // all kids own R's home (code=38 for waves 4,5; code=993 for waves 8,9)
replace HRSkidOwnR=.d if (fat_H091==998|fat_H091==999)                 // we know that child owns home but don't know which one
replace HRSkidOwnR=.r if fat_H088==.r                                  // respondent refused to answer
replace HRSkidOwnR=.z if fat_H088==.z                                  // interviewer never arrived here for stayers
replace HRSkidOwnR=.m if targetGroup==0        // not target group of respondents 
replace HRSkidOwnR=0  if HRSkidOwnR==. // kids who don't own respondent's home

label define HRSkidOwnR 0 "0. no" 1 "1.yes" .d "DK which kid" .m "not target group" .r "refused" .z "stayers" 
label values HRSkidOwnR HRSkidOwnR

tab HRSkidOwnR, m

* Want to know how often a respondent transitions from "own" to "rent-free" 
* or "other" without moving. These cases are important to us since they potentially
* capture the situation in which a respondent transfers the home to a kid but does
* not move out of the home. In these instances the questionnaire never asks about the
* identity of who owns the respondent's home for waves 6-10 since only movers are asked. 
* Thus the questionnaire is biased towards finding cases where the parent moves into 
* the kid home in which case the respondent is a mover and lives "rent-free" or "other". 

** Construct indicator variable from "own" to "rent-free" or "other" 
sort HHID PN w
by HHID PN: egen anymove =max(fat_X033)

by HHID PN w: gen RR=_n==1  // =1 if HHID PN appears for the first time
                                 // in a wave and then 0s
gen w_temp = w  // generate a temporary wave indicator in order to work with
                        // respondent-level data
replace w_temp=. if RR==0
replace RR=. if RR==0

sort HHID PN w_temp // respondent-level data
egen rrID = group(RR HHID PN)

tsset rrID w_temp 

gen own =1 if rownhm==1 & RR==1 // homeowner
gen free=1 if (rownhm==3|rownhm==7) & RR==1 // "rent-free" or "other" form of residency (not NH!)

gen own_1 = L1.own if RR==1

gen owntofree= ((free + own_1)==2) if RR==1 & anymove==0
// -> there is no moving event in the entire history of the respondent

** Compare HRS indicator of kid owns R's home (HRSkidOwnR) with RAND (kownrhm)
tab kownrhm  if HRSkidOwnR==.d
// -> RAND codes these as kids do NOT own home: kids own home but don't know which kid
replace kownrhm=.d if HRSkidOwnR==.d  // correct RAND variable

tab kownrhm if HRSkidOwnR==.z
// -> RAND codes these typically as "no" even though interview never reaches this point 
replace kownrhm=.z if HRSkidOwnR==.z

tab kownrhm if targetGroup==0         // not target group of respondents
replace kownrhm=.m if targetGroup==0 

tab HRSkidOwnR kownrhm, m
// -> two variables are basically the same now.
// --> use HRS variable to indicate kid owns R's home 

// Consistency check: Do kids own a home if they own the respondent's home?
bys w: tab HRSkidOwn if HRSkidOwnR==1, m // HRS coded kid-own-home variable
bys w: tab kownhm    if HRSkidOwnR==1    // RAND coded kid-own-home RAND variable 
// -> the variable HRSkidOwn does much better than the original RAND variable kownhm
// -> there are still some inconsistencies (about 10%)
// -> more inconsistencies in wave 8 where we assign adjacent wave kid homeownership
//    since question was skipped for re-interviewed respondents

// Fill in values for stayers 
// -> HRS stopped asking H088-H092 for waves 6-10 of non-movers
tsset KID w // sorts kid ID and waves

foreach wave in 6 7 8 9 10  {
      replace HRSkidOwnR=L.HRSkidOwnR if w==`wave' & HRSkidOwnR==.z
      replace fat_H088  =L.fat_H088   if w==`wave' & fat_H088==.z
      replace fat_H092  =L.fat_H092   if w==`wave' & fat_H092==.z
}
// -> go back one wave
foreach wave in 6 7 8 9 10  {
      replace HRSkidOwnR=L2.HRSkidOwnR if w==`wave' & HRSkidOwnR==.
      replace fat_H088  =L2.fat_H088   if w==`wave' & fat_H088==.
      replace fat_H092  =L2.fat_H092   if w==`wave' & fat_H092==.
}
// -> go back two waves
foreach wave in 6 7 8 9 10  {
      replace HRSkidOwnR=F1.HRSkidOwnR if w==`wave' & HRSkidOwnR==.
      replace fat_H088  =F1.fat_H088   if w==`wave' & fat_H088==.
      replace fat_H092  =F1.fat_H092   if w==`wave' & fat_H092==.
}
// -> go forward one wave
replace HRSkidOwnR=.l if HRSkidOwnR==. // newly introduced missing values when filling in
                                       // values for non-movers
replace fat_H088=.l if HRSkidOwnR==.l
replace fat_H088=.m if fat_H088==.
bys w: tab HRSkidOwnR, m
// -> looks reasonable

tab HRSkidOwnR


tab HRSkidOwn if HRSkidOwnR==1
// -> about the same magnitude of inconsistencies as before filling in stayers

*******************************************************************************
** Who moves to whom?
*******************************************************************************

sort HHID SUBHH PN w
order HHID SUBHH PN w kidid

* Want to know whether a kid moves into the respondent's resident or whether
* the respondent moves into the kid's residence.

* E035: child moved into R home or R moved into child home   
* E037: move made to help out child, R, or both
* -> Note: Even though the wording of the questioning suggests that these 
*          variables are only coded for children it turns out that they are
*          also coded for other relatives such as siblings and parents. 
*          To see an example, see the do-file E035.
* OPN*: OPN1 to OPN6 are up to 6 other persons that may have been involved in 
*       the move. These variables are created through a reshape command in
*       do-file Decedents_Get_Sample. These OPNs not only refer to children but also
*       to other relatives.
* E035*: E0351 to E0356 contain the values of E035 for the relevant OPN 1 to 6.   
* E037*: E0371 to E0376 contain the values of E037 for the relevant OPN 1 to 6.

// Naming conventions for temporary variables (due to reshape) OPN*, E035* and E037* 
forvalues j=1/6{
rename OPN`j'  OPN_`j'     // rename from OPN*  to OPN_*
rename E035`j' E035_`j'    // rename from E035* to E035_*
rename E037`j' E037_`j'    // rename from E037* to E037_*
}

// Generate variables to indicate whether a kid, and which kid, is involved in the move
forvalues j=1/6{
gen kidOPN_`j' = 1 if (kidPN==OPN_`j')
}

// Pick up the other variable values which correspond to the kid
forvalues j=1/6{
       gen kidE035_`j'=kidOPN_`j'*E035_`j'
       gen kidE037_`j'=kidOPN_`j'*E037_`j'
   replace kidE035_`j'=0 if kidE035_`j'==. // set missing values to 0s so that we can sum later
   replace kidE037_`j'=0 if kidE037_`j'==.
}

// Kid-level variable by summing across rows 
egen E035=rowtotal(kidE035_*)
egen E037=rowtotal(kidE037_*)

// Drop the temporary variables
drop kidOPN_* kidE035_* kidE037_* 

bys w: tab hkresd if E035<8 & E035>0 
// -> looks good

recode E035 (9=8) (4=.) // label variables

label define E035 1 "1. kid moves in" 2 "2. R moves in" 5 "5. kid always live with R" 6 "6. both moved" 7 "7. other" 8 "8. don't know" 
label values E035 E035

recode E037 (9=8) 

label define E037 1 "1. help kid " 2 "2. help R " 3 "3. help both" 4 "4. neither" 8 "8. don't know" 
label values E037 E037

tab E035 if E035~=0
tab E037 if E037~=0

tab E035 if sample==2 & E035~=0
tab E035 if sample==1 & E035~=0
// -> kid moves to R in the majority of cases in both the main sample and the alive
//    sample
tab E037 if sample==2 & E037~=0
tab E037 if sample==1 & E037~=0
// -> in the alive sample, the vast majority of moves help out the kid or both
// -> in the main sample, the majority of moves help out the R or both

gen     kmove_temp=0
replace kmove_temp=1 if E035==1 // kid moves to parent's home
gen     rmove_temp=0 
replace rmove_temp=1 if E035==2 // parent moves to kid's home
gen     kalwys_temp=0
replace kalwys_temp=1 if E035==5 // kid always lives with parent
gen     rkmove_temp=0
replace rkmove_temp=1 if E035==6 // both move
gen     othrmove_temp=0
replace othrmove_temp=1 if E035==7 // other type of move arrangement
gen     dkmove_temp=0
replace dkmove_temp=1 if E035==8 // don't know move type

// variables on respondent level
bys HHID PN w: egen kmove  = sum(kmove_temp)
bys HHID PN w: egen rmove  = sum(rmove_temp)
bys HHID PN w: egen kalwys = sum(kalwys_temp)
bys HHID PN w: egen rkmove = sum(rkmove_temp)
bys HHID PN w: egen dkmove = sum(dkmove_temp)

gen     movehlpsK_temp=0
replace movehlpsK_temp=1       if E037==1 // move helps kid
gen     movehlpsR_temp=0
replace movehlpsR_temp=1       if E037==2 // move helps parent
gen     movehlpsRK_temp=0
replace movehlpsRK_temp=1      if E037==3 // move helps both
gen     movehlpsNEITHER_temp=0  
replace movehlpsNEITHER_temp=1 if E037==4 // move helps neither
gen     movehlpsDK_temp=0
replace movehlpsDK_temp=1      if E037==8 // don't know who move helps

// variables on respondent level
bys HHID PN w: egen movehlpsK        = sum(movehlpsK_temp)
bys HHID PN w: egen movehlpsR        = sum(movehlpsR_temp)
bys HHID PN w: egen movehlpsRK       = sum(movehlpsRK_temp)
bys HHID PN w: egen movehlpsNEITHER  = sum(movehlpsNEITHER_temp)
bys HHID PN w: egen movehlpsDK       = sum(movehlpsDK_temp)

drop *_temp E037_* E035_*

*******************************************************************************
** Kid obtained deed based on original HRS data
*******************************************************************************

sort HHID PN w

destring fat_E073 fat_E074M*, replace
// -> convert string OPN variables of children obtaining deed
//    into numeric OPN variables


bys w: tab fat_E073, m
// -> Wave 4 has a different phrasing of the question as it asks about giving
//    a deed to house over the last 10 years
// -> percentages in wave 4 look somewhat different from all other waves (much lower "yes")
// -> might want to skip this wave for purposes of deed

bys w: tab fat_E074M1 if fat_E074M1>900 // check for special codes
// -> no OPNs>900 in waves 4 and 5
// -> waves 6-10: 992 (deceased child), 993 (all children), 997 (other), 998 (don't know)
bys w: tab fat_E074M1 if fat_E074M1<40 // check for special codes
// -> waves 4 and 5: 38 (all children)

tab fat_E073 if fat_E074M1==.
// -> check whether there are "yes" values when there is no OPN number: no
tab fat_E074M1 if fat_E073==.
// -> check wether there are any OPNs when there is no value for fat_E073: no

recode fat_E073 (5=0)

gen HRSkidDeed=. // construct indicator whether kid gets deed to a house
replace HRSkidDeed=1  if inlist(kidPN, fat_E074M1, fat_E074M2,fat_E074M3,fat_E074M4)
replace HRSkidDeed=1  if (fat_E074M1==38 |fat_E074M1==993) // all kids obtained house deed
replace HRSkidDeed=.d if (fat_E074M1==992|fat_E074M1==997|fat_E074M1==998) // we know that child got deed but not which one
replace HRSkidDeed=.r if (fat_E073==8    |fat_E073==9)  // respondent refused to answer
replace HRSkidDeed=0  if fat_E073~=. & HRSkidDeed==. // interviewer arrived but kid did not receive deed

bys w: tab HRSkidDeed, m

*******************************************************************************
** Generate household-kid level variables
*******************************************************************************

** Household-Kid homeownership (hkownhm)
bys HHID PN w: egen new_hkownhm = max(HRSkidOwn)
replace new_hkownhm=1 if HRSkidOwn==.d
// -> know that kid owns home but don't know which one
tab fat_E015 new_hkownhm, m
// -> 24 cases where fat_E015==1 but new_hkownhm==0
//    In these cases OPN does not coincide with RAND provided kidid
replace new_hkownhm=1 if fat_E015==1 & new_hkownhm==0  
// -> fat_E015 and new_hkownhm are now identical

** Household-Kid homeownership of respondent (hkownrhm)
bys HHID PN w: egen new_hkownrhm = max(HRSkidOwnR)
replace new_hkownrhm=1 if HRSkidOwnR==.d
// -> know that kid owns respondent's home but don't know which one
replace new_hkownrhm=.l if fat_H088==.l
replace new_hkownrhm=.m if fat_H088==.m
replace new_hkownrhm=.m if new_hkownrhm==.

tab fat_H088 new_hkownrhm, m
// -> variables are consistent (if fat_H088==1 and new_hkownrhm==0 then
//    a relative owns the home but this relative is not a kid, so this is ok)

* Check consistency
tab new_hkownrhm new_hkownhm, m

tab new_hkownhm if new_hkownrhm==1, m 
// -> when household-kid owns respondent's home it also owns a home in most
//    cases; still some inconsistencies but much less than on the individual-kid
//    level 
 
** Household-Kid deed (hkdeed)
bys HHID PN w: egen new_hkdeed = max(HRSkidDeed)
replace new_hkdeed=1 if HRSkidDeed==.d
// -> know that kid got deed but don't know which one

********************************************************************************
* collapse to respondent level
********************************************************************************

keep HHID SUBHH PN w new_hkownhm new_hkownrhm new_hkdeed owntofree fat_H092 ///
     kmove rmove kalwys rkmove dkmove movehlpsK movehlpsR movehlpsRK movehlpsNEITHER movehlpsDK

gen miss_hkownhm=0 // kid homeownership
replace miss_hkownhm=1 if new_hkownhm==.

gen miss_hkownrhm=0 // kid owns respondent home
replace miss_hkownrhm=1 if new_hkownrhm==.l | new_hkownrhm==.m

gen miss_hkdeed=0  // kid got deed 
replace miss_hkdeed=1 if new_hkdeed==.
 
replace new_hkownhm=0   if miss_hkownhm==1
replace new_hkownrhm=0  if miss_hkownrhm==1
replace new_hkdeed=0    if miss_hkdeed==1

collapse (max) new_hkownhm  new_hkownrhm  new_hkdeed owntofree fat_H092  ///
               miss_hkownhm miss_hkownrhm miss_hkdeed                     ///
               kmove rmove kalwys rkmove dkmove movehlpsK movehlpsR      ///
               movehlpsRK movehlpsNEITHER movehlpsDK, by(HHID PN w)

replace new_hkownhm=.  if miss_hkownhm==1
replace new_hkownrhm=. if miss_hkownrhm==1 
replace new_hkdeed=.   if miss_hkdeed==1

drop miss_*

sort HHID PN w 

renvars HHID PN, l

********************************************************************************
** save
********************************************************************************

save ${save}/decedent_sample_supplement.dta, replace

********************************************************************************
