/*******************************************************************************

************
** Function: 
************

  Provides imputations for total helper hours for nursing home care recipients.
	Also, defines disability measure.
	
	Sample includes:
	Core interviews 1998-2010
	Exit interviews 2004-2012

************
** Inputs  : 
************ 

	- helpers_combined.dta    (see HelperData_CombineData.do)
	- exit_adls.dta           (see HelperData_Get_Exit_ADLs.do)
	- HRS Tracker
	- RAND HRS longitudinal file
	- HRS Langa-Weir

************
** Outputs : 
************

	- helpers_with_imputations.dta
		
*******************************************************************************/

clear *
set more off
macro drop _all

********************************************************************************
** Directory names
********************************************************************************

do GetDirNames.do

********************************************************************************
** Data
********************************************************************************

********************************************************************************
** RAND LONGITUDINAL FILE (CORE)
********************************************************************************

local VARSW ///
  hhid pn hhidpn ragender r*iwstat r*proxy r*adla r*iadlza r*agey_e

local VARSL ///
  r@iwstat r@proxy r@adla r@iadla r@iadlza r@agey_e
			
use `VARSW' using ${randhrs}, clear
qui: reshape long `VARSL', i(hhidpn) j(w)

keep if inrange(w,4,11)
// -> 1998-2012 (waves 4-11)

drop riadla

order hhid pn w
sort  hhid pn w

unique hhid pn  // -> 37,495 unique individuals

save ${save}/tmp_randhrs, replace 

********************************************************************************
** Langa-Weir classification of cognitive function (CORE)
********************************************************************************
/*
Provides a total summary score for cognition using measures from the core HRS 
interview as well as three derived categories: Normal, Cognitively Impaired but 
not Demented (CIND), and Demented. We refer to these as the Langa-Weir 
Classifications (Crimmins et al., 2011). HRS also makes use of proxy respondents 
to reduce sample attrition. Therefore, this dataset also includes scoring of 
cognition based on data from proxy respondent interviews.

HRS has two scales for cognitive function: a 35-point (ages 65+) and a 27-point 
scale (all respondents). Langa-Weir maps onto the 27-point scale (Variable: cogtot27_imp).
(For comparison, the 35-point scale is available in RAND HRS: r*cogtot.)

For proxy respondents, cognition cannot be directly measured. It is estimated 
using IADLs, proxy respondent’s assessments of respondent’s memory and cognition. 
Generate 11-point scale: 0-5 IADLS, memory 0 (very good) – 4 (poor), cognitive 
impairment 0 (no) – 2 (has CI). (11 = 5+4+2.) No CI question prior to 2000, so 
use 9-point scale in earlier waves (1995-1998). 
(Variable: prxyscore_imp.)

Summary cognition measure (Variable: cogfunction.)
                  Normal    CIND    Demented        
27-point scale     12-27     7-11     0-6
11-point scale      0-2      3-5      6-11
 9-point scale      0-2      3-4      5-9 

* CIND = Cognitively impaired but not demented.
*/

local VARSW ///
        hhid pn ///
				cogfunction* intrview* //cogtot27_imp* prxyscore_imp*


local VARSL ///
        cogfunction@ intrview@ //cogtot27_imp@ prxyscore_imp@


use `VARSW' using ${langaweir}, clear
drop *1995 *1996 *2014 *2016
// -> want 1998-2012 (waves 4-11)
qui reshape long `VARSL', i(hhid pn) j(year)

gen w = 4 + (year-1998)/2
drop year

drop if (intrview~=1)
// -> keep only cases that were interviewed (intrview==1)
drop intrview

lab def COGFUNC ///
1 "1. Normal" ///
2 "2. CI, Not Demented" ///
3 "3. Demented"
lab val cogfunction COGFUNC

tab cogfunction, m
// -> 1 missing case (wave 11)

save ${save}/tmp_langaweir.dta, replace

********************************************************************************
** TRACKER 
********************************************************************************

use HHID PN ?IWTYPE using ${tracker}, clear

renvars, l

foreach var in iwtype {
	ren f`var' r4`var'
	ren g`var' r5`var'
	ren h`var' r6`var'
	ren j`var' r7`var'
	ren k`var' r8`var'
	ren l`var' r9`var'
	ren m`var' r10`var'
	ren n`var' r11`var'
	ren o`var' r12`var'
	ren p`var' r13`var'
	drop a`var' b`var' c`var' d`var' e`var'
}

quietly reshape long r@iwtype, i(hhid pn) j(w)

lab def IWTYPE ///
        1 "1.Core interview obtained" ///
        2 "2.Completed interview, but not released in Early Release" ///
        5 "5.Core interview not obtained" ///
        11 "11.Exit interview obtained" ///
        15 "15.Exit interview not obtained" ///
        21 "21.Post-exit interview obtained" ///
        25 "25.Post-exit interview not obtained" ///
        99 "99.Not in the sample this wave", replace
		
lab val riwtype IWTYPE

keep if inrange(w,4,11)
// -> 1998-2012 (waves 4-11)

save ${save}/tmp_tracker, replace

********************************************************************************
** Merge files
********************************************************************************

** RAND HRS
use ${save}/tmp_randhrs, clear

** Langa-weir
merge 1:1 hhid pn w using ${save}/tmp_langaweir, nogen

** Tracker
merge 1:1 hhid pn w using ${save}/tmp_tracker
drop if _merge==2 // in using only
drop _merge
lab drop _merge
// -> note that 2 individuals don't appear in the tracker but do appear in randhrs.

** Exit ADL data
merge 1:1 hhid pn w using ${save}/exit_adls, nogen

** Helper data
merge 1:1 hhid pn w using ${save}/helpers_combined
drop if _merge==1 // appears only in tracker/hrs files
drop if _merge==2 // appears only in functional limitation files
drop _merge
lab drop _merge

********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************

********************************************************************************
** Core data imputations
********************************************************************************

********************************************************************************
* NOTE: 
********************************************************************************
* Use nearest neighbor hours for NH care recipients, otherwise actual hours.

* Covariates are: indicators for each possible level of ADL limitations 0-5, IADL
* limitations 0-5, and cognitive impairment 1-3

* Missing hours are treated as correctly reported zeros

* Imputation sample:
* core interviews, 2000+ (missing spouse hours in 1998)
* no nursing home helper (nor resident of nh)
* has at least one helper

* Impute values for individuals with NH helpers and NH residents in 1998+.
********************************************************************************

* summarize data
tabstat tohrswkly if (xiw==0 & w>=5 & nhanyhlpr==0 & anyhlpr==1), s(min p5 p10 p25 p50 p75 p90 p95 max)

* hours are skewed, used log(1+Y), where the '+1' deals with a small # of zero hours cases:
gen loghrs = log(1+tohrswkly)

* imputation regression
reg loghrs rproxy i.radla i.riadlza i.cogfunction if (xiw==0 & w>=5 & nhanyhlpr==0 & anyhlpr==1)

* get fitted values
predict yhat, xb

* nearest neighbor pmm imputation
gen tohrswklyinnm = tohrswkly

mi set wide
mi register imputed tohrswklyinnm
mi set M = 1

mi impute pmm tohrswklyinnm yhat if (xiw==0 & w>=4 & anyhlpr==1), ///
  dots noisily rseed(1234) knn(1) replace force

mi extract 1, clear

* drop imputations unrelated to nursing home residents
replace tohrswklyinnm = tohrswkly if (xiw==0 & nhanyhlpr!=1)	
	
* compare reported hours variable to NNM imputation
tabstat tohrswkly tohrswklyinnm if (xiw==0 & anyhlpr==1 & nhanyhlpr==0), s(n mean p50) c(s)
// -> should match perfectly
tabstat tohrswkly tohrswklyinnm if (xiw==0 & anyhlpr==0 & nhanyhlpr==0), s(n mean p50) c(s)
// -> should be all zeros
tabstat tohrswkly tohrswklyinnm if (xiw==0 & anyhlpr==1 & nhanyhlpr==1), s(n mean p50) c(s)
// -> imputations

drop loghrs yhat	

********************************************************************************
** Disability Measure 
********************************************************************************
	
gen disabled = (tohrswklyinnm>=21) if (tohrswklyinnm<.) & (xiw==0)

********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************

********************************************************************************
** Exit data imputations
********************************************************************************

********************************************************************************
* NOTE: 
********************************************************************************
* Use nearest neighbor hours for NH care recipients, otherwise actual hours.

* Covariates are: indicators for each possible level of ADL limitations 0-5 and 
* IADL limitations 0-5

* Missing hours are treated as correctly reported zeros

* Imputation sample:
* exit interviews, 2004+
* no nursing home helper (nor resident of nh)
* has at least one helper

* Impute values for individuals with NH helpers and NH residents in 2004+.
********************************************************************************

* summarize data
tabstat tohrswkly if (xiw==1 & nhanyhlpr==0 & anyhlpr==1), s(min p5 p10 p25 p50 p75 p90 p95 max)


* hours are skewed, used log(1+Y), where the '+1' deals with a small # of zero hours cases:
gen loghrs = log(1+tohrswkly)

* imputation regression
reg loghrs i.xadla i.xiadlza if (xiw==1 & w>=7 & nhanyhlpr==0 & anyhlpr==1)

* get fitted values
predict yhat, xb

* nearest neighbor pmm imputation
// NOTE: tohrswklyinnm already generated above

mi set wide
mi register imputed tohrswklyinnm
mi set M = 1

mi impute pmm tohrswklyinnm yhat if (xiw==1 & w>=7 & anyhlpr==1), ///
  dots noisily rseed(1234) knn(1) replace force

mi extract 1, clear

* drop imputations unrelated to nursing home residents
replace tohrswklyinnm = tohrswkly if (xiw==1 & nhanyhlpr!=1)	
	
* compare reported hours variable to NNM imputation
tabstat tohrswkly tohrswklyinnm if (xiw==1 & anyhlpr==1 & nhanyhlpr==0), s(n mean p50) c(s)
// -> should match perfectly
tabstat tohrswkly tohrswklyinnm if (xiw==1 & anyhlpr==0 & nhanyhlpr==0), s(n mean p50) c(s)
// -> should be all zeros
tabstat tohrswkly tohrswklyinnm if (xiw==1 & anyhlpr==1 & nhanyhlpr==1), s(n mean p50) c(s)
// -> imputations

drop loghrs yhat

********************************************************************************
** Disability Measure 
********************************************************************************

// NOTE: already generated above
replace disabled = (tohrswklyinnm>=21) if (tohrswklyinnm<.) & (xiw==1)

********************************************************************************
** Clean up imputation variables
********************************************************************************

drop riwstat rproxy ragender ragey_e radla riadlza cogfunction riwtype
drop xage xadla xiadlza

********************************************************************************
** Keep subset of variables
********************************************************************************

local KEEPVARS ///
  anyhlpr yganyhlpr nhanyhlpr icanyhlpr ///
	yghrswkly ichrswkly tohrswkly ///
	ichrsfrac ///
	tohrswklyinnm ///
	disabled ///
	caretype caretype50 caretypen iconly iconly50 ygonly ygonly50 ///
	nhanyhlprInfo

keep hhid pn w `KEEPVARS'
     
renvars `KEEPVARS', postfix(new)

// postfix(new) included so as not to conflict with previous version of these measures
// that have since been deleted

********************************************************************************
** Save
********************************************************************************

qui compress
save ${save}/helpers_with_imputations.dta, replace

********************************************************************************
** Erase temporary files
********************************************************************************

erase ${save}/tmp_tracker.dta
erase ${save}/tmp_randhrs.dta
erase ${save}/tmp_langaweir.dta

********************************************************************************
