********************************************************************************************
***DISCERN/ ASHISH ARORA, SHARON BELENZON, LIA SHEER (DUKE UNIVERSITY) / DECEMBER 2020***
********************************************************************************************
********************************************************************************************
*Compiling Compustat financial data based North American Compustat records 
*We obtained the Compustat data through WRDS in August 2018- file name: "bdc2d66c4378743b.dta"
*We cannot provide the Compustat file due to IP restrictions
*Users should obtain the North American Compustat data file before running the code 

*code for cleaning Compustat file
use "./data/bdc2d66c4378743b.dta", clear
keep if indfmt=="INDL"
drop if fyear==.
drop if fyear>2015
drop if fyear<1980

*drop canada stocks:
drop if exchg==7 |exchg==8 |exchg==9 |exchg==10
**drop Non-US HQ firms
keep if loc=="USA"|fic=="USA"
*drop non R&D
sort gvkey fyear
by gvkey: egen mxrd=mean(xrd)
drop if mxrd==.| mxrd==0

*drop duplicates
sort gvkey fyear
drop if gvkey==gvkey[_n-1]&fyear==fyear[_n-1]

*keep only years with traded shares:
so gvkey fyear
gen miny=(fyear*(cshtr_f~=.))
replace miny=. if miny==0
egen miny1=min(miny),by(gvkey)
drop if fyear<=miny1-1
drop miny miny1

so gvkey fyear
egen maxy=max(fyear*(cshtr_f~=.)),by(gvkey)
drop if fyear>=maxy+1
drop maxy
*********

*drop after last year with sale data
so gvkey fyear
egen maxy=max(fyear*(sale~=.)),by(gvkey)
drop if fyear>=maxy+1
drop maxy

*drop before first year with sale data
so gvkey fyear
gen miny=(fyear*(sale~=.))
replace miny=. if miny==0
egen miny1=min(miny),by(gvkey)
drop if fyear<=miny1-1
drop miny miny1

*min-max year
sort gvkey fyear
by gvkey: egen fyear1=min(fyear)
by gvkey: egen fyearn=max(fyear)
by gvkey: egen sum_xrd=sum(xrd)
save "./data/compustat_sample_80_15", replace

*short version of file:
use "./data/compustat_sample_80_15", clear
duplicates drop gvkey, force
keep gvkey cusip conm fyear1 fyearn loc sic busdesc city  sum_xrd incorp state weburl
sort gvkey cusip
ren gvkey gvkey_str
destring gvkey_str, gen (gvkey)
save "./data/compustat_sample_80_15_short", replace
********************************

*compiling financial variables for main dataset
use "./data/compustat_sample_80_15", clear
ren fyear year
ren sale sales
gen lsales=log(sales)
gen lxrd=log(1+xrd)
sort gvkey year

foreach var of varlist  dlc dltt  mibt ch  invt ivaeq ivao intan  dlc dltt act cstk pstk capx{
replace `var'=0 if `var'==.
}
gen dt = dlc + dltt
gen m_value=prcc_f*csho+pstk+dt-act
gen assets=ppent+invt+ivaeq+ivao+intan

replace assets=. if assets<=0
gen lassets=log(assets)
replace m_value=. if m_value<=0
gen lm_value=ln(m_value)
gen lppent=ln(1+ppent)

keep  ch ebit ebitda assets lassets gvkey m_value lm_value ppent lppent  cusip conm year sic xrd sales lsales lxrd loc incorp state capx

destring gvkey, replace
save "./data/cusip_finance_short_80_15", replace
