********************************************************************************************
***DISCERN/ ASHISH ARORA, SHARON BELENZON, LIA SHEER (DUKE UNIVERSITY) / DECEMBER 2020***
********************************************************************************************
****************************************************************************************
*Compiling accounting data panel file and merging Patent, Publication and NPL citation data****************************
*OUTPUT: panel file:"./output_files/DISCERN_Panel_Data_1980_2015.dta"
*"permno_adj_long" is the unique UO firm id in panel file
******************************************************************************************************
****************************************************************************************************
*sample is restricted to firms with at least 1 patent stock (including reassignments):
use "./data/patent_firms.dta", clear
*merge-in relevant GVKEYs for each permno_adj 
joinby permno_adj using "./data/permno_gvkey.dta" 
*merge-in financial data - users shoud download Compustat data and run "compustat_do.do" file in advance
joinby gvkey using "./data/cusip_finance_short_80_15.dta"
sort permno_adj year
*keep relevant years
keep if year>=fyear1_adjust &year<=fyearn_adjust

*Drop non-US HQ firms detected:
gen y=(loc=="USA")
bysort permno_adj: egen maxi=max(y)
bysort permno_adj: egen mini=min(y)
drop if mini==maxi & maxi==0
drop mini maxi y
drop if permno_adj==87401
drop if permno_adj==12269
drop if permno_adj==76886
drop if permno_adj==82719
drop if permno_adj==68145 & year>2006
*****

*manually check gap in years: we fill gap for up to 4 years missing & drop otherwise
by permno_adj: gen dy=year-year[_n-1]
tab dy

drop if permno_adj==25988 & year<1996
drop if permno_adj==57998 & year<1987
drop if permno_adj==67030 & year<1992
drop if permno_adj==88521 & year>2003
drop if permno_adj==11824 & year<1996
append using "./data/compiling_data/fillin_gap_years.dta"
sort permno_adj year
drop dy 
by permno_adj: gen dy=year-year[_n-1]
tab dy


***************************merging patent variables to main panel*******************************************************************

merge 1:1 year permno_adj using "./data/pat_stock_permno_adj.dta" 
drop if _m==2
drop _m

ren pat_stock_permno_adj pat_stock_reassign


merge 1:1 year permno_adj using "./data/pat_per_year_permno_adj"
drop if _m==2
drop _m

foreach var of varlist  pat_stock_reassign  pat_yr  {
replace `var'=0 if  `var'==.
}

sort permno_adj year

gen lpat_yr=ln(1+pat_yr)

label var pat_stock_reassign "Patent stock-inc reassignment-permno_adj level"
gen dum_nopat_reassign=(pat_stock_reassign==0)
gen dum_nopat_year=(pat_yr==0)
label var dum_nopat_year "dummy for no granted pat per permno-year"
label var dum_nopat_reassign "dummy for no pat ever up to year t-inc reassignment-permno_adj level"
label var pat_yr "granted patents per permno_adj-year"
label var lpat_yr "ln(1+pat_yr)"
****************************************************************
***************************merging Publication variables to main panel*******************************************************************

merge 1:1 year permno_adj using "./data/pub_stock_permno_adj.dta" 
drop if _m==2
drop _m
ren pub_stock_permno_adj pub_stock_reassign


merge 1:1 year permno_adj using "./data/pub_per_year_permno_adj"
drop if _m==2
drop _m

foreach var of varlist  pub_stock_reassign  pub_yr  {
replace `var'=0 if  `var'==.
}
gen lpub_yr=ln(1+pub_yr)


label var pub_stock_reassign "Pub stock-inc reassignment-permno_adj level" 
gen lpub_stock_reassign=ln(1+pub_stock_reassign)
label var lpub_stock_reassign "ln(1+pub_stock_reasign)"
gen dum_nopub_year=(pub_yr==0)
gen dum_nopub_reassign=(pub_stock_reassign==0)
label var dum_nopub_year "dummy for no pubs per permno-year"
label var dum_nopub_reassign "dummy for no pubs ever up to year t-inc reassignment-permno_adj level"
label var pub_yr "publications per permno_adj-year"
label var lpub_yr "ln(1+pub_yr)"

**********************************************merging NPL cite variables to main panel*******************************************************************
merge 1:1 year permno_adj using "./data/corp_NPL_cite_per_year_firm_80_15.dta"
drop if _m==2
drop _m


foreach var of varlist  corp_exter_cites inter_cites {
replace `var'=0 if  `var'==.
}
gen linter_cites=ln(1+inter_cites)
gen lcorp_exter_cites=ln(1+corp_exter_cites)
label var inter_cites "internal npl cites to own pubs per permno_adj-pat year"
label var corp_exter_cites "corp external npl cites to own pubs per permno-pat grant year"
label var linter_cites "ln(1+inter_cites)"
label var lcorp_exter_cites "ln(1+corp_exter_cites)"

**********************************************************
*****************************************************************************

sort permno_adj year
by permno_adj: egen fyear1=min(year)
by permno_adj: egen fyearn=max(year)
sort permno_adj year
**********************************
*FIRMS ENTER AT FIRST PATENT STOCK (INCLUDING REASSIGNMENTS):
sort permno_adj year
by permno_adj: replace dum_nopat_reassign=0 if dum_nopat_reassign[_n-1]==0
drop if dum_nopat_reassign==1

*"ln(pat_stock_reassign)"
gen lpat_stock_reassign=ln(pat_stock_reassign) 
label var lpat_stock_reassign "ln(pat_stock_reassign)"



*LOCATE BIG JUMPS IN SALES, PATENT, PUBS [MOSTLY DUE TO M&A] generate id_minor to split firm : we manually examined these cases to determine the split

gen id_minor="A"
*sales jump
*sort permno_adj year
*by permno_adj:gen dsales1=(sales-sales[_n-1])/sales[_n-1]

replace id_minor="B" if permno_adj==11345 &year>1995
replace id_minor="B" if permno_adj==24097 &year>2002
replace id_minor="B" if permno_adj==42454 &year>1986
replace id_minor="B" if permno_adj==44513 &year>1997
replace id_minor="B" if permno_adj==45911 &year>2001
replace id_minor="B" if permno_adj==78975 &year>2007
replace id_minor="B" if permno_adj==10143 &year>2002
replace id_minor="B" if permno_adj==10363 &year>2008
replace id_minor="B" if permno_adj==10513 &year>1995
replace id_minor="B" if permno_adj==10531 &year>1996
replace id_minor="B" if permno_adj==11552 &year>1999
replace id_minor="B" if permno_adj==43617 &year>2002
replace id_minor="B" if permno_adj==43757 &year>2008
replace id_minor="B" if permno_adj==68523 &year>1997
replace id_minor="B" if permno_adj==76306 &year>1997
replace id_minor="B" if permno_adj==76744 &year>2010
replace id_minor="B" if permno_adj==76978 &year>2005
replace id_minor="B" if permno_adj==77103 &year>2000
replace id_minor="B" if permno_adj==77264 &year>2005
replace id_minor="B" if permno_adj==77418 &year>2000 &year<2009
replace id_minor="C" if permno_adj==77418 &year>=2009
replace id_minor="B" if permno_adj==79703 &year>2006
replace id_minor="B" if permno_adj==79879 &year>1999
replace id_minor="B" if permno_adj==80918 &year>2009
replace id_minor="B" if permno_adj==82513 &year>2011
replace id_minor="B" if permno_adj==82678 &year>2009
replace id_minor="B" if permno_adj==83111 &year>2006
replace id_minor="B" if permno_adj==84060 &year>2003
replace id_minor="B" if permno_adj==84281 &year>2003
replace id_minor="B" if permno_adj==85208 &year>2005
replace id_minor="B" if permno_adj==85686 &year>2006
replace id_minor="B" if permno_adj==88335 &year>2009
replace id_minor="B" if permno_adj==18382 &year>2002
replace id_minor="B" if permno_adj==82726 &year>1999
replace id_minor="B" if permno_adj==87241 &year>2009
replace id_minor="B" if permno_adj==89006 &year>2011
replace id_minor="B" if permno_adj==15368 &year>1994

*pub jump
*sort permno_adj year
*by permno_adj:gen dpub1=(pub_stock_reassign-pub_stock_reassign[_n-1])/pub_stock_reassign[_n-1]


replace id_minor="B" if permno_adj==12503 &year>1986
replace id_minor="B" if permno_adj==14218 &year>1984
replace id_minor="C" if permno_adj==14218 &year>1999
replace id_minor="B" if permno_adj==39570 &year>1990
replace id_minor="B" if permno_adj==66093 &year>2004
replace id_minor="B" if permno_adj==66384 &year>2002
replace id_minor="B" if permno_adj==68347 &year>1997
replace id_minor="B" if permno_adj==75444 &year>1997
replace id_minor="B" if permno_adj==75607 &year>2004
replace id_minor="B" if permno_adj==78987 &year>2009
replace id_minor="B" if permno_adj==10791 &year>1995
replace id_minor="B" if permno_adj==10890 &year>1985
replace id_minor="B" if permno_adj==14541 &year>1983
replace id_minor="B" if permno_adj==24272 &year>2007
replace id_minor="B" if permno_adj==27887 &year>1984
replace id_minor="B" if permno_adj==28505 &year>1984
replace id_minor="B" if permno_adj==38420 &year>1998
replace id_minor="B" if permno_adj==48267 &year>2006
replace id_minor="B" if permno_adj==49680 &year>2006
replace id_minor="B" if permno_adj==62092 &year>2005
replace id_minor="B" if permno_adj==76095 &year>2011
replace id_minor="B" if permno_adj==10104 &year>2009
replace id_minor="B" if permno_adj==10279 &year>1992
replace id_minor="B" if permno_adj==23318 &year>1991
replace id_minor="B" if permno_adj==54798 &year>2004
replace id_minor="B" if permno_adj==76095 &year>2006
replace id_minor="B" if permno_adj==77605 &year>2005
replace id_minor="B" if permno_adj==83950 &year>2011
replace id_minor="B" if permno_adj==86725 &year>2008
replace id_minor="B" if permno_adj==44329 &year>2006
replace id_minor="B" if permno_adj==79006 &year>2001

*pat jump
*by permno_adj:gen dpat1=(pat_stock_reassign-pat_stock_reassign[_n-1])/pat_stock_reassign[_n-1]

replace id_minor="B" if permno_adj==65083 &year>1998
replace id_minor="B" if permno_adj==12067 &year>2001
replace id_minor="B" if permno_adj==13610 &year>2006
replace id_minor="B" if permno_adj==24643 &year>1999
replace id_minor="B" if permno_adj==24766 &year>1993
replace id_minor="B" if permno_adj==29532 &year>1987
replace id_minor="B" if permno_adj==42200 &year>2010
replace id_minor="B" if permno_adj==43350 &year>2009
replace id_minor="B" if permno_adj==44601 &year>1989
replace id_minor="B" if permno_adj==53640 &year>2009
replace id_minor="B" if permno_adj==55212 &year>1997
replace id_minor="B" if permno_adj==61583 &year>1985
replace id_minor="B" if permno_adj==68591 &year>2004
replace id_minor="B" if permno_adj==77695 &year>2002
replace id_minor="B" if permno_adj==78977 &year>2004
replace id_minor="B" if permno_adj==80055 &year>2000
replace id_minor="B" if permno_adj==82766 &year>2003
replace id_minor="B" if permno_adj==85231 &year>2009
replace id_minor="B" if permno_adj==86362 &year>2009
replace id_minor="B" if permno_adj==11896 &year>2000
replace id_minor="B" if permno_adj==25778 &year>2010
replace id_minor="B" if permno_adj==80361 &year>2010


*firms w/ diff gvkey - split when gvkey changes:
replace id_minor="B" if permno_adj==10258 &year>2006
replace id_minor="B" if permno_adj==10966 &year>2010
replace id_minor="B" if permno_adj==22322 &year>1993
replace id_minor="B" if permno_adj==80452 &year>2006
replace id_minor="B" if permno_adj==82261 &year>1997
replace id_minor="B" if permno_adj==82526 &year>2007
replace id_minor="B" if permno_adj==82831 &year>2009
replace id_minor="B" if permno_adj==85293 &year>2007
replace id_minor="B" if permno_adj==88434 &year>2005
replace id_minor="B" if permno_adj==90133 &year>2003
replace id_minor="B" if permno_adj==90954 &year>2008
replace id_minor="B" if permno_adj==91078 &year>2012
replace id_minor="B" if permno_adj==91365 &year>2008


***********************************************************************************************************
*drop split firms that after split have no patents
drop if permno_adj==87241 & year>2009
drop if permno_adj==82726 
**********************************************************************************************************
*Sample is restricted to firms with at least 3 years of consecutive records from first patent:
bysort permno_adj id_minor: gen c=_N
drop if c<3

**********************range of years after split
sort permno_adj id_minor year
by permno_adj id_minor: egen fyear1_adj=min(year)
by permno_adj id_minor: egen fyearn_adj=max(year)


drop dy   fyear1_adjust fyearn_adjust min_y_permno max_y_permno
sort permno_adj id_minor year

************permno_adj_long: unique id for panel file after split of variables***********
tostring permno_adj, replace
gen permno_adj_long=permno_adj+id_minor
destring permno_adj, replace
label var permno_adj_long "unique panel file firm id after firms split by big jumps==permno_adj+id_minor"
order permno_adj_long permno_adj id_minor gvkey_str gvkey year fyear1_adj fyearn_adj fyear1 fyearn cusip conm, first
*************************************************************************************8
*patent stock granted - w/o reassignment(only focal firm granted patents)- permno_adj_long : w/ 15% depreciation

gen pat_stock=pat_yr
sort permno_adj_long year
by permno_adj_long: replace pat_stock=0.85*pat_stock[_n-1]+pat_yr if pat_stock[_n-1]!=.
gen lpat_stock=ln(1+pat_stock) 
label var pat_stock "patent stock -focal firm granted pat- permno_adj_long"
label var lpat_stock "ln(1+pat_stock)"
gen dum_nopat=(pat_stock==0)
label var dum_nopat "dummy for no granted pat ever up to year t"

*Publication stock granted - w/o reassignment (only focal firm granted pubs) -permno_adj_long : w/ 15% depreciation
gen pub_stock=pub_yr
sort permno_adj_long year
by permno_adj_long: replace pub_stock=0.85*pub_stock[_n-1]+pub_yr if pub_stock[_n-1]!=.
gen lpub_stock=ln(1+pub_stock) 
label var pub_stock "publication stock-focal firm granted pubs- permno_adj_long"
label var lpub_stock "ln(1+pub_stock)"
gen dum_nopub=(pub_stock==0)
label var dum_nopub "dummy for no granted pubs ever up to year t"

*************variables labels

label var permno_adj "UO firm unique id"
label var gvkey "UO gvkey"
label var gvkey_str "UO gvkey string version"
label var cusip "UO cusip"
label var conm "Compustat record current name in Compustat 2018 file"
label var fyear1 "first year in Compustat file by permno_adj"
label var fyearn "last year in Compustat file by permno_adj"
label var id_minor "id for split by big jumps: A/B/C"
label var permno_adj_long "unique panel file firm id after firms split by big jumps==permno_adj+id_minor"
label var year "Data Year - Fiscal" 
label var fyear1_adj "first year in panel file by permno_adj_long"
label var fyearn_adj "last year in panel file by permno_adj_long"
label var pat_yr "granted patents per permno_adj-year"
label var lpat_yr "ln(1+pat_yr)"
label var pat_stock_reassign "Patent stock-inc reassignment-permno_adj level"
label var lpat_stock_reassign "ln(pat_stock_reassign)"
label var dum_nopat_year "dummy for no granted pat per permno-year"
label var dum_nopat_reassign "dummy for no pat ever up to year t-inc reassignment-permno_adj level"
label var pat_stock "patent stock -focal firm granted pat- permno_adj_long"
label var lpat_stock "ln(1+pat_stock)"
label var dum_nopat "dummy for no granted pat ever up to year t"
label var pub_stock "publication stock-focal firm granted pubs- permno_adj_long"
label var lpub_stock "ln(1+pub_stock)"
label var dum_nopub "dummy for no granted pubs ever up to year t"
label var pub_yr "publications per permno_adj-year"
label var lpub_yr "ln(1+pub_yr)"
label var pub_stock_reassign "Pub stock-inc reassignment-permno_adj level" 
label var lpub_stock_reassign "ln(1+pub_stock_reasign)"
label var dum_nopub_year "dummy for no pubs per permno-year"
label var dum_nopub_reassign "dummy for no pubs ever up to year t-inc reassignment-permno_adj level"
label var pub_yr "publications per permno_adj-year"
label var lpub_yr "ln(1+pub_yr)"
label var inter_cites "internal npl cites to own pubs per permno_adj-pat year"
label var corp_exter_cites "corp external npl cites to own pubs per permno-pat grant year"
label var linter_cites "ln(1+inter_cites)"
label var lcorp_exter_cites "ln(1+corp_exter_cites)"
***************************
save "./output_files/DISCERN_Panel_Data_1980_2015.dta", replace

                         

*********************************adjusting final years in all files based on the panel file years**************************


use "./output_files/DISCERN_Panel_Data_1980_2015.dta", clear
keep permno_adj fyear1 fyearn
duplicates drop
save "./data/permno_min_max_year_adj_80_15.dta", replace

*PATENTS - PROVIDED
use "./data/patent_1980_2015.dta", clear
merge m:m permno_adj using "./data/permno_min_max_year_adj_80_15"
keep if _m==3
drop _m
keep if (publn_year>=fyear1 &publn_year<=fyearn)
drop fyear1 fyearn
duplicates drop
destring publn_nr, replace
label var name_std "std name matched"
label var id_name "id name_std matched"
label var permno_adj "UO firm at the time patent was granted"
label var id_name1 "id_name#sample"
label var publn_year "patent grant year"
label var publn_nr "Patent number"
label var sample "A:ORBIS-Subs;B:PRE2002Subs;C:NBER2006;U:UO-match"
save "./output_files/DISCERN_patent_database_1980_2015_final1.dta", replace

*PUBLICATIONS - NOT PROVIDED
use "./data/pub_1980_2015.dta" , clear
merge m:m permno_adj using "./data/permno_min_max_year_adj_80_15"
keep if _m==3
drop _m
keep if (year>=fyear1 &year<=fyearn)
drop fyear1 fyearn
duplicates drop
label var name_std "std name matched"
label var id_name "id name_std matched"
label var sample "A:ORBIS-Subs; B:PRE2002Subs;C:NBER2006 U:UO-match"
label var permno_adj "UO firm at the time pub was published"
label var wos_rec "WoS pub id record"
label var year "year published"
label var id_name1 "id_name#sample"
save "./output_files/DISCERN_pub_database_1980_2015_final1.dta" , replace


*NPL - NOT PROVIDED
use "./data/corp_NPL_output_merged_80_15.dta"
merge m:m permno_adj using "./data/permno_min_max_year_adj_80_15"
keep if _m==3
drop _m
keep if (j_date>=fyear1 &j_date<=fyearn)
drop fyear1 fyearn
duplicates drop
label var publn_nr "Patent number"
label var wos_rec "WoS pub id record"
label var j_date "journal publication year"
label var publn_year "patent grant year"
label var npl_publn_id "DISCERN NPL reference ID"
label var permno_adj  "cited publication related UO at time of publication"
label var permno_adj_pat1 "citing patent related UO firm 1 "
label var permno_adj_pat2 "citing patent related UO firm 2 if applicable"
label var permno_adj_pat3 "citing patent related UO firm 3 if applicable"
label var self_cite "dummy for internal cite=1"
save "./output_files/DISCERN_corp_NPL_output_All_80_15_final.dta",replace
