/*******************************************************************************

************
** Function: 
************

  Generates helper data extract, core interviews, 1998-2000 *only*.

************
** Inputs  : 
************ 

  Functional limitations and helpers (E_HP)
	RAND family kid-level file

************
** Outputs : 
************

	- helpers_core_add_waves.dta  (respondent-interview-level file)
	
************
** Notes   : 
************
	
	Beginning in 2002 for core interviews and 2004 for exit interviews, helper indexes
	become available in the HP files.
	
  Some NH residents have no helper-level file and are therefore not included in 
	section G of the HRS. We may still want to impute hours for those individuals.
	We address this issue later when we merge the files together. 
	
	The series for kid helpers (kdhrswkly) and for young-gen helpers (yghrswkly)
	should be comparable across the 1998-2000 and 2002-2012 periods.
	
	kd - children identified using RAND family k file
	yg - children identified using the relationships in the HP files
	
	There are two ways to identify young-gen. helpers: using the helper relation-
  ship reported in the HP files or by merging to the RAND family files. Because
	we are interested in all young-gen. helpers, not only the children, we prefer
	the relationships in the HP files (the RAND family file only includes 
	children, not their spouses/partners, grandchildren, etc.).		
	
	See additional notes at bottom of file.
		
*******************************************************************************/

clear *
set more off
macro drop _all

********************************************************************************
** Directory names
********************************************************************************

do GetDirNames.do

********************************************************************************
** Initialize Family File Master List of KIDIDs
********************************************************************************

use hhid pn opn kidid using ${randfamk}, clear

sort hhid pn opn kidid

duplicates report hhid pn kidid
// -> no duplicates -> hhid pn kidid uniquely identify an observation
isid hhid pn kidid
// -> will fail if hhid pn kidid do not uniquely identify an observation

duplicates report hhid pn opn
/*
--------------------------------------
   copies | observations       surplus
----------+---------------------------
        1 |       128346             0
        2 |          562           281
--------------------------------------
*/
// -> hhid pn opn do NOT uniquely identify an observation!

by hhid pn opn: gen n=_n
tab n
/*
          n |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |    128,627       99.78       99.78
          2 |        281        0.22      100.00
------------+-----------------------------------
      Total |    128,908      100.00
*/

* To deal with this issue, make two lists of hhid pn opn's to match to helper file
* and then merge these lists (by hhid pn kidid)

preserve
keep if n == 1
drop n
save ${save}/kids1.dta, replace
restore

keep if n == 2
drop n
save ${save}/kids2.dta, replace

clear

********************************************************************************
** Core 2000
********************************************************************************

* NOTE: no employee of institution helpers (very few), no NDX variables

use HHID PN OPN G2947A G2949 G2950 G2951 G2952 G2954 G2955 G2957 ///
					                   G2976 G2977 G2978 G2980 G2981 G2983 using ${hrscore}/h00e_hp.dta, clear

renvars,l

expand 2 if g2949 == 1, gen(spp) 	//1. MARRIED/PARTNERED
tab spp
replace spp = . if g2949 ~= 1
tab spp,m
// -> spp=0 for married/partnered helper, spp=1 for helper's spouse, 
//    spp=. for helper without spouse/partner.
ren g2950 daysPerMonth
ren g2951 daysPerWeek
ren g2952 everyDay
ren g2954 hoursPerDay
ren g2955 sexHlpr
ren g2957 payHlpr
replace daysPerMonth = g2976 if spp==1
replace daysPerWeek  = g2977 if spp==1
replace everyDay     = g2978 if spp==1
replace hoursPerDay  = g2980 if spp==1
replace sexHlpr      = g2981 if spp==1
replace payHlpr      = g2983 if spp==1

destring g2947a, replace
/*
10. R's SPOUSE

11. CHILD  -- from HHMEM grid
12. CHILD-IN-LAW -- from HHMEM grid
13. UNLISTED CHILD OR CHILD-IN-LAW -- unlisted
14. STEP/PARTNER CHILD  -- from HHMEM grid
15. FORMER STEP-CHILD -- unlisted
21. GRDKID -- from HHMEM grid
22. GRANDCHILD -- unlisted

31. SIBLING -- from HHMEM grid or sibling grid
32. SIBLING OF SPOUSE -- from HHMEM grid or sibling grid
41. PARENT OR PARENT-IN-LAW -- from HHMEM grid
51. OTHER RELATIVE -- from HHMEM grid
52. RELATIVE-OTHER -- unlisted

72. EMPLOYEE OF 'INSTITUTION' -- unlisted

71. PROFESSIONAL -- from HHMEM grid
73. ORGANIZATION -- unlisted

61. OTHER -- from HHMEM grid
62. OTHER INDIVIDUAL -- unlisted
97. Other
98. DK (don't know); NA (not ascertained)
99. RF (refused)
Blank. Information not available
*/
recode g2947a ///
  (10                   =1) /// spouse
	(11 12 13 14 15 21 22 =2) /// child, grandchild
	(31 32 41 51 52       =3) /// other relative
	(72                   =4) /// nursing home (employee of institution)
	(71 73                =5) /// organization, professional
	(61 62 97 98 99 .     =6) /// other individual, unknown
, gen(hlprtypedet)	
//note: very few nh helpers recorded in 1998-2000
//G2947A HELPER RELATIONSHIP COMBINED SOURCE

tab g2947a if hlprtypedet==.,m
// -> should be no observations
tab hlprtypedet,m
// -> should be no missing cases

* Label helper type
lab def hlprtypedet ///
	1 "1. spouse/partner" ///
	2 "2. child/child spp/grandchild" ///
	3 "3. other relative" ///
	4 "4. nursing home" ///
	5 "5. other paid" ///
	6 "6. unknown relationship"
lab val hlprtypedet hlprtypedet

tab hlprtypedet,m
/*
     RECODE of g2947a (HELPER |
RELATIONSHIP COMBINED SOURCE) |      Freq.     Percent        Cum.
------------------------------+-----------------------------------
            1. spouse/partner |      1,193       23.22       23.22
2. child/child spp/grandchild |      2,874       55.95       79.17
            3. other relative |        339        6.60       85.77
              4. nursing home |          3        0.06       85.83  <-- note: very few nh helpers recorded
                5. other paid |        273        5.31       91.14
      6. unknown relationship |        455        8.86      100.00
------------------------------+-----------------------------------
                        Total |      5,137      100.00
*/

* Find out more about unclassified helper types: 
tab sexHlpr if hlprtypedet==6              // =3 means agency/professional, 2=female, 1=male
tab payHlpr if hlprtypedet==6              // =1 means R paid for help
tab payHlpr if sexHlpr==3 & hlprtypedet==6 // agency/professional most are paid for
tab payHlpr if sexHlpr~=3 & hlprtypedet==6 // for the other cases, about 50% are paid for 

// Suggestion:
// -> classify as FHC if (sexHlpr=3 and hlprtypedet=6) or (payHlpr=1 and hlprtypedet=6) 
// -> place into category hlprtypedet=5 (other paid)  

** Create new helper type distinguishing between informal, formal/nh, formal/hc
clonevar hlprtype = hlprtypedet
recode hlprtype (1/3=1) (4=2) (5=3) (6=6) // -> 6 is unclassified helpers
tab hlprtypedet hlprtype
** Re-classify helper types in hlprtype==6 into formal/informal
replace hlprtype=3 if hlprtype==6 &  (payHlpr==1 | sexHlpr==3) // formal care
replace hlprtype=1 if hlprtype==6 & !(payHlpr==1 | sexHlpr==3) // informal care
// -> treat remainder (neither agency helper nor paid) as other informal care 
//    (assumes that if it is unknown whether a helper is paid, then the helper is not paid)

lab def HLPRTYPE 1 "1. informal" 2 "2. nh" 3 "3. home care"
lab val hlprtype HLPRTYPE
tab hlprtype,m

drop g29*
descr
tempfile h00
save `h00'

********************************************************************************
** Core 1998
********************************************************************************

* NOTE: no spouse/partner or employee of institution helpers (very few), no NDX variables

use HHID PN OPN F2639A F2641 F2642 F2643 F2644 F2646 F2647 F2649 ///
							               F2658 F2659 F2660 F2662 F2663 F2665 using ${hrscore}/h98e_hp.dta, clear

renvars,l

expand 2 if f2641 == 1, gen(spp) 	//1. MARRIED/PARTNERED
tab spp
replace spp = . if f2641 ~= 1
tab spp,m
// -> spp=0 for married/partnered helper, spp=1 for helper's spouse, 
//    spp=. for helper without spouse/partner.
ren f2642 daysPerMonth
ren f2643 daysPerWeek
ren f2644 everyDay
ren f2646 hoursPerDay
ren f2647 sexHlpr
ren f2649 payHlpr
replace daysPerMonth = f2658 if spp==1
replace daysPerWeek  = f2659 if spp==1
replace everyDay     = f2660 if spp==1
replace hoursPerDay  = f2662 if spp==1
replace sexHlpr      = f2663 if spp==1
replace payHlpr      = f2665 if spp==1

recode f2639a ///
  (10                   =1) /// spouse
	(11 12 13 14 15 21 22 =2) /// child, grandchild
	(31 32 41 51 52       =3) /// other relative
	(72                   =4) /// nursing home (employee of institution)
	(71 73                =5) /// organization, professional
	(61 62 97 98 99 .     =6) /// other individual, unknown
, gen(hlprtypedet)	
//note: very few nh helpers recorded in 1998-2000
//f2639a does not indicate whether helper is spouse/partner
//F2639A    HELPER RELATIONSHIP COMBINED SOURCE

tab f2639a if hlprtypedet==.,m
// -> should be no observations
tab hlprtypedet,m
// -> should be no missing cases

* Label helper type
lab def hlprtypedet ///
	1 "1. spouse/partner" ///
	2 "2. child/child spp/grandchild" ///
	3 "3. other relative" ///
	4 "4. nursing home" ///
	5 "5. other paid" ///
	6 "6. unknown relationship"
lab val hlprtypedet hlprtypedet

tab hlprtypedet,m
/*
     RECODE of f2639a (HELPER |
RELATIONSHIP COMBINED SOURCE) |      Freq.     Percent        Cum.
------------------------------+-----------------------------------
                                                                    <-- note: no spouses/partners recorded
2. child/child spp/grandchild |      3,335       72.50       72.50
            3. other relative |        376        8.17       80.67
              4. nursing home |          5        0.11       80.78  <-- note: very few nh helpers recorded
                5. other paid |        317        6.89       87.67
      6. unknown relationship |        567       12.33      100.00
------------------------------+-----------------------------------
                        Total |      4,600      100.00
*/

* Find out more about unclassified helper types: 
tab sexHlpr if hlprtypedet==6              // =3 means agency/professional, 2=female, 1=male
tab payHlpr if hlprtypedet==6              // =1 means R paid for help
tab payHlpr if sexHlpr==3 & hlprtypedet==6 // agency/professional most are paid for
tab payHlpr if sexHlpr~=3 & hlprtypedet==6 // for the other cases, about 50% are paid for 

// Suggestion:
// -> classify as FHC if (sexHlpr=3 and hlprtypedet=6) or (payHlpr=1 and hlprtypedet=6) 
// -> place into category hlprtypedet=5 (other paid)  

** Create new helper type distinguishing between informal, formal/nh, formal/hc
clonevar hlprtype = hlprtypedet
recode hlprtype (1/3=1) (4=2) (5=3) (6=6) // -> 6 is unclassified helpers
tab hlprtypedet hlprtype
** Re-classify helper types in hlprtype==6 into formal/informal
replace hlprtype=3 if hlprtype==6 &  (payHlpr==1 | sexHlpr==3) // formal care
replace hlprtype=1 if hlprtype==6 & !(payHlpr==1 | sexHlpr==3) // informal care
// -> treat remainder (neither agency helper nor paid) as other informal care 
//    (assumes that if it is unknown whether a helper is paid, then the helper is not paid)

lab def HLPRTYPE 1 "1. informal" 2 "2. nh" 3 "3. home care"
lab val hlprtype HLPRTYPE
tab hlprtype,m

drop f*
descr
tempfile h98
save `h98'

********************************************************************************
** Append files
********************************************************************************

append using `h00', gen(w)
replace w = 4 + w

sort hhid pn w opn
order hhid pn w opn

unique hhid pn w

tab w

/*
Number of unique values of hhid pn w is  4757
Number of records is  9737

          w |      Freq.     Percent        Cum.
------------+-----------------------------------
          4 |      4,600       47.24       47.24
          5 |      5,137       52.76      100.00
------------+-----------------------------------
      Total |      9,737      100.00
*/

** Note a few issues

tab w spp, m row nof
/*
           |               spp
         w |         0          1          . |     Total
-----------+---------------------------------+----------
         4 |     24.89      24.89      50.22 |    100.00 
         5 |     19.10      19.10      61.81 |    100.00 
-----------+---------------------------------+----------
     Total |     21.83      21.83      56.33 |    100.00
*/
// -> for partnered/married helpers, in 1998-2000, partner/spouse of helper has separate entry.

********************************************************************************
** Weekly hours of care
********************************************************************************

* Handle non-helper cases
* 2002+ : dayPerMonth = 0
*         IWER: ENTER 0 IN "DAYS IN LAST MONTH" IF THE PERSON DID NOT HELP IN THE LAST MONTH
* 1998-2000 : daysPerMonth = 96
*             96. NOT A HELPER OR DID NOT HELP IN THE LAST MONTH
*
* We will retain these helpers but assign them zero hours. 

tab w if ( daysPerMonth == 0), m
// occurs only in waves 6+ (2002+)

tab hoursPerDay if ( daysPerMonth == 0) ,m
// hours are always missing in these cases
* assign zeros to these cases
replace hoursPerDay = 0 if ( daysPerMonth == 0)

tab w if ( daysPerMonth == 96 ), m
// most cases in 1998-2000.
// the cases in 2010-2012 are something else: 96. Out of Range.  Will assign this to be missing below.

tab hoursPerDay if ( daysPerMonth == 96 ) & inlist(w,4,5), m
// for the 1998-2000 cases, hours are always missing -> assign zeros
* assign zeros to these cases
replace hoursPerDay  = 0 if ( daysPerMonth == 96 ) & inlist(w,4,5)
replace daysPerMonth = 0 if ( daysPerMonth == 96 ) & inlist(w,4,5)

* Recode DK values to missing
tab w daysPerMonth if (daysPerMonth > 31)
tab w daysPerWeek  if (daysPerWeek  >  7)
tab w everyDay     if (everyDay     >  1)
tab w hoursPerDay  if (hoursPerDay  > 24)
// -> check what missing values look like in each wave
replace daysPerMonth = . if inlist( daysPerMonth , 96 , 98 , 99 )
replace daysPerWeek  = . if inlist( daysPerWeek , 8 , 9 , 98 , 99)
replace everyDay     = . if inlist( everyDay , 8 , 9 )
replace hoursPerDay  = . if inlist( hoursPerDay , 98 , 99 ) 

* cap hours per day at 16 hours.
replace hoursPerDay = 16 if (hoursPerDay > 16 & hoursPerDay<=24)
// -> (393 real changes made)

* cap days per month at 28. (assumes that 28, 29, 30, and 31 all mean every day of the month.)
replace daysPerMonth = 28 if (daysPerMonth > 28 & daysPerMonth <= 31)
// -> (1,508 real changes made)

* use daysPerMonth and everyDay to fill in daysPerWeek
replace daysPerWeek = (daysPerMonth / 4) if mi(daysPerWeek) & !mi(daysPerMonth)
replace daysPerWeek = 7 if mi(daysPerWeek) & !mi(everyDay)

* calculate hours per week = daysPerWeek * hoursPerDay
gen hrswkly = hoursPerDay * daysPerWeek
 	
* Recode one non-missing nursing home case to missing (all should be missing)
replace hrswkly = . if (hlprtype == 2)
// -> (6 real change mades, 6 to missing)

* Dummy for missing hours
gen hrsmiss = missing(hrswkly)

tab hrsmiss
/*
    hrsmiss |      Freq.     Percent        Cum.
------------+-----------------------------------
          0 |      9,151       93.98       93.98
          1 |        586        6.02      100.00
------------+-----------------------------------
      Total |      9,737      100.00
*/

tab w hrsmiss, row nof
/*
           |        hrsmiss
         w |         0          1 |     Total
-----------+----------------------+----------
         4 |     94.28       5.72 |    100.00 
         5 |     93.71       6.29 |    100.00 
-----------+----------------------+----------
     Total |     93.98       6.02 |    100.00 

*/
// -> far fewer missings in 1998-2000, likely due to nh helpers, which are not recorded in 1998-2000
//    except for by accident

fsum hrswkly, s(n mean min p25 p50 p75 p90 max)
/*	
 Variable |        N     Mean      P25   Median      P75      P90      Min      Max                                                                                                   
----------+------------------------------------------------------------------------
  hrswkly |     9151    14.07     0.75     3.75    14.00    42.00     0.00   112.00  
*/

********************************************************************************
** Merge to RAND Family File (by hhid pn opn)
********************************************************************************

//some hhid pn opn w's are repeated (same hhid pn opn appears multiple times in a single wave) 
//because of the 'expand' commands above (for child spouses). this merge treats child spouses as children.
//(because kid's OPN is assigned to the spouse when data are expanded)

merge m:1 hhid pn opn using ${save}/kids1, keep(1 3) gen(iskid1)
merge m:1 hhid pn opn using ${save}/kids2, keep(1 3) gen(iskid2)
lab drop _merge

egen iskid = rowmax(iskid1 iskid2)
drop iskid?
recode iskid (1=0) (3=1)

tab iskid,m
/*
      iskid |      Freq.     Percent        Cum.
------------+-----------------------------------
          0 |      4,155       42.67       42.67
          1 |      5,582       57.33      100.00
------------+-----------------------------------
      Total |      9,737      100.00
*/

tab w iskid, row nof
/*
           |         iskid
         w |         0          1 |     Total
-----------+----------------------+----------
         4 |     35.17      64.83 |    100.00 
         5 |     49.39      50.61 |    100.00 
-----------+----------------------+----------
     Total |     42.67      57.33 |    100.00 
*/
// -> more in 1998-2000 due to (1) no nh helpers in 1998-2000, (2) no spp helpers in 1998,
//    (3) duplicate entries in 1998-2000 when helper is married/partnered

********************************************************************************
** Make respondent-interview-level file
********************************************************************************

********************************************************************************
** Respondent-level variables
********************************************************************************

* Notation:
*
* Kid versus Non-Kid: (classification based on matches to rand family k-level file)
* kd -> kid
*
* Spouse/partner versus Non-spouse/partner (classification based on g069 categories)
* sp -> spouse/partner
*
* Young- generation (classification based on g069 categories)
* yg -> young-gen
*
* Informal versus Formal/NH versus Formal/Home-Care (classification based on g069 categories)
*                                                   (also uses sex of helper and whether paid)
* ic -> informal care
* nh -> formal/nursing home
* hc -> formal/home care
* fc -> formal care = formal/nh + formal/home-care

* Numbers of helpers (nhlpr)
gen   nhlpr = 1 // -> dummy to count all helpers
gen kdnhlpr = (iskid==1)
gen spnhlpr = (hlprtypedet==1)
gen ygnhlpr = (hlprtypedet==2)
gen icnhlpr = (hlprtype==1)
gen nhnhlpr = (hlprtype==2)
gen hcnhlpr = (hlprtype==3)
gen fcnhlpr = (hlprtype==2 | hlprtype==3)

* Weekly hours of care (hrswkly)
gen kdhrswkly = hrswkly if (iskid==1)
gen sphrswkly = hrswkly if (hlprtypedet==1)
gen yghrswkly = hrswkly if (hlprtypedet==2)
gen ichrswkly = hrswkly if (hlprtype==1)
gen nhhrswkly = hrswkly if (hlprtype==2)
gen hchrswkly = hrswkly if (hlprtype==3)
gen fchrswkly = hrswkly if (hlprtype==2 | hlprtype==3)

* Missing hours of care (hrsmiss)
gen kdhrsmiss = hrsmiss if (iskid==1)
gen sphrsmiss = hrsmiss if (hlprtypedet==1)
gen yghrsmiss = hrsmiss if (hlprtypedet==2)
gen ichrsmiss = hrsmiss if (hlprtype==1)
gen nhhrsmiss = hrsmiss if (hlprtype==2)
gen hchrsmiss = hrsmiss if (hlprtype==3)
gen fchrsmiss = hrsmiss if (hlprtype==2 | hlprtype==3)

********************************************************************************
** collapse to one observation per respondent-interview
********************************************************************************

collapse ///
    (sum) nhlpr   kdnhlpr   spnhlpr   ygnhlpr   icnhlpr   nhnhlpr   hcnhlpr   fcnhlpr   ///
    (sum) hrswkly kdhrswkly sphrswkly yghrswkly ichrswkly nhhrswkly hchrswkly fchrswkly ///
		(sum) hrsmiss kdhrsmiss sphrsmiss yghrsmiss ichrsmiss nhhrsmiss hchrsmiss fchrsmiss ///
	, by(hhid pn w)

********************************************************************************
** labels
********************************************************************************	

lab var kdnhlpr   "kid: num helpers"
lab var kdhrswkly "kid: weekly hours"
lab var kdhrsmiss "kid: num missing hours"

lab var spnhlpr   "sp/p: num helpers"
lab var sphrswkly "sp/p: weekly hours"
lab var sphrsmiss "sp/p: num missing hours"

lab var ygnhlpr   "young-gen: num helpers"
lab var yghrswkly "young-gen: weekly hours"
lab var yghrsmiss "young-gen: num missing hours"

********************************************************************************
** Save respondent-interview-level file
********************************************************************************

qui compress

save ${save}/helpers_core_add_waves.dta, replace

********************************************************************************
** Erase temporary files
********************************************************************************

erase ${save}/kids1.dta
erase ${save}/kids2.dta

********************************************************************************

exit

********************************************************************************
/*

Additional notes:
-----------------

(*) In the respondent-level file, a unique entry is identified by hhid pn w.
    In the kid-level file, a unique entry is identified by hhid pn kidid w.   

(*) In 1998 and earlier, hours provided by a spouse are not separately identifiable.
    Since spouse hours are not of interest (because of focus on single elderly), these 
		data can be kept.

(*) In fact, spouses do not appear in helper files prior to 2000. This is clear from the questionaire,
    which states "HELPER INTRO BRANCHPOINT: IF THERE ARE NO HELPERS ON THE LIST BESIDES THE
    SPOUSE/PARTNER, GO TO E171 BRANCHPOINT" (E171 branchpoint is the end of the helper questions)
		Therefore, total hours will be significantly under-reported for parntered individuals before 2000.

(*) Before 2002, 'employees of institutions' are not identifiable in the exit helper files.
    They appear in 1998-2000 core files, but very few are recorded relative to later waves.

(*) In waves 1998-2000, can identify institional helpers using the respondent-level healthcare
    utilization files WHO HELP questions. See the response: 100. EMPLOYEE OF FACILITY. The same
		questions can identify spouse helpers. 

(*) Hours questions were not asked if relationship is "employee of institution."		
		
(*) Coding becomes consistent beginning in 2002. (Exception: indices for ADL, IADL, MNY helpers
    are not available in the exit data until 2004.)

*/
********************************************************************************
