/*******************************************************************************

************
** Function: 
************

  Generates helper data extract, exit interviews, 2004-2012

************
** Inputs  : 
************ 

  Functional limitations and helpers (G_HP)
	RAND Family kid file

************
** Outputs : 
************

	- helpers_exit.dta (respondent-interview-level file)
	
************
** Notes   : 
************

  Exit interviews 2004-2012
	
	Beginning in 2002 for core interviews and 2004 for exit interviews, helper indexes
	become available in the HP files. 
	
	Prior to 2002, helper files did not include all helpers. In 2000, nh helpers are
	not listed. In 1998, neither nh helpers nor spouses/partners are listed. The
	more serious issue is the missing hours for spouses/partners in 1998 since hours
	for nh helpers are never recorded. 
	
  Some NH residents have no helper-level file and are therefore not included in 
	section G of the HRS. We still want to impute hours for those individuals.
	We address this issue later when we merge the files together. 
	
	There are two ways to identify young-gen. helpers: using the helper relation-
  ship reported in the HP files or by merging to the RAND family files. Because
	we are interested in all young-gen. helpers, not only the children, we prefer
	the relationships in the HP files (the RAND family file only includes 
	children, not their spouses/partners, grandchildren, etc.).	
	
*******************************************************************************/

clear *
set more off
macro drop _all

********************************************************************************
** Directory names
********************************************************************************

do GetDirNames.do

********************************************************************************
** Initialize Family File Master List of KIDIDs
********************************************************************************

use hhid pn opn kidid using ${randfamk}, clear

sort hhid pn opn kidid

duplicates report hhid pn kidid
// -> no duplicates -> hhid pn kidid uniquely identify an observation
isid hhid pn kidid
// -> will fail if hhid pn kidid do not uniquely identify an observation

duplicates report hhid pn opn
/*
--------------------------------------
   copies | observations       surplus
----------+---------------------------
        1 |       128346             0
        2 |          562           281
--------------------------------------
*/
// -> hhid pn opn do NOT uniquely identify an observation!

by hhid pn opn: gen n=_n
tab n
/*
          n |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |    128,627       99.78       99.78
          2 |        281        0.22      100.00
------------+-----------------------------------
      Total |    128,908      100.00
*/

* To deal with this issue, make two lists of hhid pn opn's to match to helper file
* and then merge these lists (by hhid pn kidid)

preserve
keep if n == 1
drop n
save ${save}/kids1.dta, replace
restore

keep if n == 2
drop n
save ${save}/kids2.dta, replace

clear

********************************************************************************
** Exit: HRS functional limitations and helpers (G, HELPER) 
********************************************************************************

** Exit interviews 2004-2012 (w 7-11)

***** input ******
local WAVES x12 x10 x08 x06 x04 // -> file name prefixes
local w 11                      // -> two-digit year of most recent wave
******************

local FIRSTWAVE  : word 1 of `WAVES'
local LATERWAVES : list WAVES - FIRSTWAVE

foreach wave of local WAVES {
    use HHID PN ?G069 ?G070 ?G071 ?G072 ?G073  ///
                ?G074 ?G076 ?G077 ?G078 ?G079  ///
                ?ADLNDX ?IADLNDX ?MNYNDX OPN   ///
    using ${hrsexit}/`wave'G_HP, clear   	
    renvars ?G* ?ADLNDX ?IADLNDX ?MNYNDX, predrop(1)
		renvars,l
    tempfile `wave'
    save ``wave'', replace
} 


use ``FIRSTWAVE'', clear
gen w = `w'
local i = 1
foreach wave of local LATERWAVES {
  append using ``wave''
  replace w = `=`w' - `i'' if w==.
  local ++i
}

********************************************************************************
** COUNT OBSERVATIONS
********************************************************************************

sort hhid pn w
order hhid pn w

unique hhid pn w

tab w

/*
Number of unique values of hhid pn w is  5605
Number of records is  14048

          w |      Freq.     Percent        Cum.
------------+-----------------------------------
          7 |      2,493       17.75       17.75
          8 |      2,707       19.27       37.02
          9 |      2,741       19.51       56.53
         10 |      3,382       24.07       80.60
         11 |      2,725       19.40      100.00
------------+-----------------------------------
      Total |     14,048      100.00
*/

********************************************************************************
** Weekly hours of care
********************************************************************************

* Construct monthly hours-of-care variable
// Helper Hours (example: 2002)
// NOTE: In terms of answering the question of how often a helper provides help,
//       the respondent has three mutually exclusive options:
//                  1. G070: how many days of last month (most commonly used).
//                  2. G071: how many days per week.
//                  3. G072: helps every day.
//               Then, G073: on the days helper helps you, 
//                            about how many hours per day is that?
//                     G074: helper's sex. This is only asked if the gender is not obvious.
//                         -> this is useful to us since code=3 means agency/professional
//                            and so helps us to find out if 'other individual' is a formal
//                            home-care helper.  
//                     G076: whether helper is paid to help (this variable is not useful
//                            for kid helpers or spouses)
//                         -> in combination with HG074 this should be a good indicator 
//                            to find out if 'other individual' is a formal home-care helper. 
//                     G077: whether Medicaid/insurance help pay for helper
//                     G078: not counting expenses paid by Medicaid/insurance how much was 
//                            paid last month
//                     G079: is per month (1), week (2), day (3), year (5) 

// -> changing variable names for readability
ren g070 daysPerMonth
ren g071 daysPerWeek
ren g072 everyDay
ren g073 hoursPerDay

* Handle non-helper cases
* 2002+ : dayPerMonth = 0
*         IWER: ENTER 0 IN "DAYS IN LAST MONTH" IF THE PERSON DID NOT HELP IN THE LAST MONTH
*
* We will retain these helpers but assign them zero hours. 

tab w if ( daysPerMonth == 0), m
// occurs only in waves 7+ (2004+)

tab g069 if ( daysPerMonth == 0), m
// applies to many different types of helpers (note: but not NH helpers whose hours are not asked)

tab hoursPerDay if ( daysPerMonth == 0) ,m
// hours are always missing in these cases
* assign zeros to these cases
replace hoursPerDay = 0 if ( daysPerMonth == 0)

* Recode DK values to missing
tab w daysPerMonth if (daysPerMonth > 31)
tab w daysPerWeek  if (daysPerWeek  >  7)
tab w everyDay     if (everyDay     >  1)
tab w hoursPerDay  if (hoursPerDay  > 24)
// -> check what missing values look like in each wave
replace daysPerMonth = . if inlist( daysPerMonth , 96 , 98 , 99 )
replace daysPerWeek  = . if inlist( daysPerWeek , 8 , 9 )
replace everyDay     = . if inlist( everyDay , 8 , 9 )
replace hoursPerDay  = . if inlist( hoursPerDay , 98 , 99 ) 

* cap hours per day at 16 hours.
replace hoursPerDay = 16 if (hoursPerDay > 16 & hoursPerDay<=24)
// -> (1,546 real changes made)

* cap days per month at 28. (assumes that 28, 29, 30, and 31 all mean every day of the month.)
replace daysPerMonth = 28 if (daysPerMonth > 28 & daysPerMonth <= 31)
// -> (2,014 real changes made)

* use daysPerMonth and everyDay to fill in daysPerWeek
replace daysPerWeek = (daysPerMonth / 4) if mi(daysPerWeek) & !mi(daysPerMonth)
replace daysPerWeek = 7 if mi(daysPerWeek) & !mi(everyDay)

* calculate hours per week = daysPerWeek * hoursPerDay
gen hrswkly = hoursPerDay * daysPerWeek
 	
* Recode one non-missing nursing home case to missing (all should be missing)
replace hrswkly = . if (g069 == 22)
// -> (0 real changes made)

* Dummy for missing hours
gen hrsmiss = missing(hrswkly)

tab hrsmiss
/*
    hrsmiss |      Freq.     Percent        Cum.
------------+-----------------------------------
          0 |     11,284       80.32       80.32
          1 |      2,764       19.68      100.00
------------+-----------------------------------
      Total |     14,048      100.00
*/
// -> Note: a higher percentage of missing hours in exit than in core interviews

tab w hrsmiss, row nof
/*
           |        hrsmiss
         w |         0          1 |     Total
-----------+----------------------+----------
         7 |     78.94      21.06 |    100.00 
         8 |     79.79      20.21 |    100.00 
         9 |     78.44      21.56 |    100.00 
        10 |     82.14      17.86 |    100.00 
        11 |     81.76      18.24 |    100.00 
-----------+----------------------+----------
     Total |     80.32      19.68 |    100.00 
*/
// -> no clear missing pattern over time

fsum hrswkly, s(n mean min p25 p50 p75 p90 max)
/*
 Variable |        N     Mean      P25   Median      P75      P90      Min      Max                                                                                                   
----------+------------------------------------------------------------------------
  hrswkly |    11284    29.01     3.00    11.25    42.00   112.00     0.00   112.00 
*/

********************************************************************************
** Helper type
********************************************************************************

// G069:  helper relationship (spouse, kid, relative, etc.)
// Recode the helper relationships 
/* 
	    1. Self
			
      2. Spouse/Partner 
			26. Late spouse/partner
			27. Former Spouse
			
      10. Father                
      11. Father-in-law
			12. Mother
      13. Mother-in-law
      14. R's parents
			15. Brother
      16. Brother of spouse
			17. Sister
			18. Sister of spouse
			19. Other relative

			
      3. Son                      
      4. Stepson                  
      5. Spouse of daughter       
      6. Daughter                 
      7. Stepdaughter             
      8. Spouse of son            
      9. Grandchild               
			28. Unlisted child or child-in-law
      30. Former Step-child
      31. Former Child-in-law			
	    33. Spouse/Partner of Grandchild
	    90. Ambiguous child relationship
	    91. Ambiguous child-in-law relationship

			22. Employee of institution (only available for NH residents)	
			
			21. Organization	
		  23. Paid Helper
			24. Professional
			25. Professional (specify)
				
			20. Other individual		 	
      29. Not proxy interview
	    32. Relationship unknown
	     .. Missing
*/   

* Drop cases where R helps herself (1 case)
drop if (g069 == 1) // 1. self

recode g069 ///
 (2 26 27                         = 1) /// (1) spouse, partner, including former spouses/partners
 (3 4 5 6 7 8 9 28 30 31 33 90 91 = 2) /// (2) child, child-in-law, step-child, grandchild
 (10 11 12 13 14 15 16 17 18 19   = 3) /// (3) siblings/in-law, parents/in-law, other relatives
 (22                              = 4) /// (4) NH
 (21 23 24 25                     = 5) /// (5) org, professional, paid helper
 (20 29 32 .                      = 6) /// (6) other individual, relationship unknown,
, gen(hlprtypedet)                      //     not proxy interview, missing (.)

tab g069 if hlprtypedet==.,m
// -> should be no observations
tab hlprtypedet,m
// -> should be no missing cases

* Label helper type
lab def hlprtypedet ///
	1 "1. spouse/partner" ///
	2 "2. child/child spp/grandchild" ///
	3 "3. other relative" ///
	4 "4. nursing home" ///
	5 "5. other paid" ///
	6 "6. unknown relationship"
lab val hlprtypedet hlprtypedet

tab hlprtypedet,m
/*
       RECODE of g069 (HELPER |
                RELATIONSHIP) |      Freq.     Percent        Cum.
------------------------------+-----------------------------------
            1. spouse/partner |      1,861       13.25       13.25
2. child/child spp/grandchild |      6,599       46.98       60.23
            3. other relative |      1,035        7.37       67.59
              4. nursing home |      1,956       13.92       81.52
                5. other paid |      1,359        9.67       91.19
      6. unknown relationship |      1,237        8.81      100.00
------------------------------+-----------------------------------
                        Total |     14,047      100.00
*/

// the following variables are useful to identify formal home care helpers
rename g074 sexHlpr // helper's sex
rename g076 payHlpr // helper paid
rename g077 payMA   // Medicaid helps to pay. 
rename g078 payOOP  // Paid out of pocket.
rename g079 payPer  // pay frequency

* Find out more about unclassified helper types: 
tab sexHlpr if hlprtypedet==6              // =3 means agency/professional, 2=female, 1=male
tab payHlpr if hlprtypedet==6              // =1 means R paid for help
tab payHlpr if sexHlpr==3 & hlprtypedet==6 // agency/professional most are paid for
tab payHlpr if sexHlpr~=3 & hlprtypedet==6 // for the other cases, about 50% are paid for 

// Suggestion:
// -> classify as FHC if (sexHlpr=3 and hlprtypedet=6) or (payHlpr=1 and hlprtypedet=6) 
// -> place into category hlprtypedet=5 (other paid)  

** Create new helper type distinguishing between informal, formal/nh, formal/hc
clonevar hlprtype = hlprtypedet
recode hlprtype (1/3=1) (4=2) (5=3) (6=6) // -> 6 is unclassified helpers
tab hlprtypedet hlprtype
** Re-classify helper types in hlprtype==6 into formal/informal
replace hlprtype=3 if hlprtype==6 &  (payHlpr==1 | sexHlpr==3) // formal care
replace hlprtype=1 if hlprtype==6 & !(payHlpr==1 | sexHlpr==3) // informal care
// -> treat remainder (neither agency helper nor paid) as other informal care 
//    (assumes that if it is unknown whether a helper is paid, then the helper is not paid)

lab def HLPRTYPE 1 "1. informal" 2 "2. nh" 3 "3. home care"
lab val hlprtype HLPRTYPE
tab hlprtype,m
/*
   RECODE of |
g069 (HELPER |
RELATIONSHIP |
           ) |      Freq.     Percent        Cum.
-------------+-----------------------------------
 1. informal |     10,065       71.65       71.65
       2. nh |      1,956       13.92       85.58
3. home care |      2,026       14.42      100.00
-------------+-----------------------------------
       Total |     14,047      100.00
*/

********************************************************************************
** Merge to RAND Family File (by hhid pn opn)
********************************************************************************

merge m:1 hhid pn opn using ${save}/kids1, keep(1 3) gen(iskid1)
merge m:1 hhid pn opn using ${save}/kids2, keep(1 3) gen(iskid2)
lab drop _merge

egen iskid = rowmax(iskid1 iskid2)
drop iskid?
recode iskid (1=0) (3=1)

tab iskid,m
/*
      iskid |      Freq.     Percent        Cum.
------------+-----------------------------------
          0 |      9,099       64.78       64.78
          1 |      4,948       35.22      100.00
------------+-----------------------------------
      Total |     14,047      100.00
*/

tab w iskid, row nof m

/*
           |         iskid
         w |         0          1 |     Total
-----------+----------------------+----------
         7 |     66.11      33.89 |    100.00 
         8 |     64.68      35.32 |    100.00 
         9 |     64.32      35.68 |    100.00 
        10 |     63.39      36.61 |    100.00 
        11 |     65.82      34.18 |    100.00 
-----------+----------------------+----------
     Total |     64.78      35.22 |    100.00 
*/

********************************************************************************
** Make respondent-interview-level file
********************************************************************************

********************************************************************************
** Respondent-level variables
********************************************************************************

* Notation:
*
* Kid versus Non-Kid: (classification based on matches to rand family k-level file)
* kd -> kid
*
* Spouse/partner versus Non-spouse/partner (classification based on g069 categories)
* sp -> spouse/partner
*
* Young- generation (classification based on g069 categories)
* yg -> young-gen
*
* Informal versus Formal/NH versus Formal/Home-Care (classification based on g069 categories)
*                                                   (also uses sex of helper and whether paid)
* ic -> informal care
* nh -> formal/nursing home
* hc -> formal/home care
* fc -> formal care = formal/nh + formal/home-care

* Numbers of helpers (nhlpr)
gen   nhlpr = 1 // -> dummy to count all helpers
gen kdnhlpr = (iskid==1)
gen spnhlpr = (hlprtypedet==1)
gen ygnhlpr = (hlprtypedet==2)
gen icnhlpr = (hlprtype==1)
gen nhnhlpr = (hlprtype==2)
gen hcnhlpr = (hlprtype==3)
gen fcnhlpr = (hlprtype==2 | hlprtype==3)

* Weekly hours of care (hrswkly)
gen kdhrswkly = hrswkly if (iskid==1)
gen sphrswkly = hrswkly if (hlprtypedet==1)
gen yghrswkly = hrswkly if (hlprtypedet==2)
gen ichrswkly = hrswkly if (hlprtype==1)
gen nhhrswkly = hrswkly if (hlprtype==2)
gen hchrswkly = hrswkly if (hlprtype==3)
gen fchrswkly = hrswkly if (hlprtype==2 | hlprtype==3)

* Missing hours of care (hrsmiss)
gen kdhrsmiss = hrsmiss if (iskid==1)
gen sphrsmiss = hrsmiss if (hlprtypedet==1)
gen yghrsmiss = hrsmiss if (hlprtypedet==2)
gen ichrsmiss = hrsmiss if (hlprtype==1)
gen nhhrsmiss = hrsmiss if (hlprtype==2)
gen hchrsmiss = hrsmiss if (hlprtype==3)
gen fchrsmiss = hrsmiss if (hlprtype==2 | hlprtype==3)

********************************************************************************
** collapse to one observation per respondent-interview
********************************************************************************

collapse ///
    (sum) nhlpr   kdnhlpr   spnhlpr   ygnhlpr   icnhlpr   nhnhlpr   hcnhlpr   fcnhlpr   ///
    (sum) hrswkly kdhrswkly sphrswkly yghrswkly ichrswkly nhhrswkly hchrswkly fchrswkly ///
		(sum) hrsmiss kdhrsmiss sphrsmiss yghrsmiss ichrsmiss nhhrsmiss hchrsmiss fchrsmiss ///
	, by(hhid pn w)

********************************************************************************
** labels
********************************************************************************	

lab var kdnhlpr   "kid: num helpers"
lab var kdhrswkly "kid: weekly hours"
lab var kdhrsmiss "kid: num missing hours"

lab var spnhlpr   "sp/p: num helpers"
lab var sphrswkly "sp/p: weekly hours"
lab var sphrsmiss "sp/p: num missing hours"

lab var ygnhlpr   "young-gen: num helpers"
lab var yghrswkly "young-gen: weekly hours"
lab var yghrsmiss "young-gen: num missing hours"

********************************************************************************
** missing hours info
********************************************************************************

gen hrswklyf = .
replace hrswklyf = 0 if hrsmiss==0
replace hrswklyf = 1 if hrsmiss >0 & hrsmiss==nhnhlpr
replace hrswklyf = 2 if hrsmiss >0 & hrsmiss >nhnhlpr

lab def hrswklyf ///
	0 "0. no missing hours" ///
	1 "1. missing for nh only" ///
	2 "2. missing for some non-nh" 
lab val hrswklyf hrswklyf

tab hrswklyf,m
/*
                  hrswklyf |      Freq.     Percent        Cum.
---------------------------+-----------------------------------
       0. no missing hours |      3,228       57.60       57.60
    1. missing for nh only |      1,774       31.66       89.26
2. missing for some non-nh |        602       10.74      100.00
---------------------------+-----------------------------------
                     Total |      5,604      100.00
*/
// -> 57.60% of R's in HP file have no missing hours data
// -> 31.66% have missing data only for NH helpers
// -> 10.74% the remainder have some missing data for one or more non-NH helpers

// NOTE: NH helpers are a much bigger issue here in the exit than the core interviews.

********************************************************************************
** Summary info
********************************************************************************

tabstat hrswkly, by(hrswklyf) s(n mean min p25 p50 p75 p90 max)

********************************************************************************
** Save respondent-interview-level file
********************************************************************************

qui compress

save ${save}/helpers_exit.dta, replace

********************************************************************************
** Erase temporary files
********************************************************************************

erase ${save}/kids1.dta
erase ${save}/kids2.dta

********************************************************************************
