********************************************************************************************
***DISCERN/ ASHISH ARORA, SHARON BELENZON, LIA SHEER (DUKE UNIVERSITY) / DECEMBER 2020***
********************************************************************************************
********************************************************************************************
*Compiling Publication data: flow+stock variables including dynamic reassignment of publications
*the main publication dataset is also compiled here but is finalized at the last part of "./programs/panel_do.do"
*We obtain publications data from the Web of Science database (previously known as ISI Web of Knowledge).
*We include articles from journals covered in the “Science Citation Index” and “Conference Proceedings Citation Index - Science,” 
*while excluding social sciences, arts, and humanities articles.
*WoS data is proprietary data and cannot be published
*Publication data are only available at the aggregate permno_adj-year level
**INPUT FILE: **NOT PROVIDED** -Match file of UO & Subsidiary id_name to WOS Publications: "pub_match_id_name.dta"
*OUTPUT FILES:
*publication stock with reassignment:pub_stock_permno_adj.dta
*publication flow: pub_per_year_permno_adj.dta
*Aggregate permno_adj-year level publication data are available in the main panel file
******************************************************************************************************************
*******************************************************************************************************************
********************************************************************************************************************
********************************************************************************************************************
**********RELEVANT PERMNO_ADJ AT PUBLICATION YEAR***************
use "./data/pub_match_id_name.dta"  , clear
merge m:m id_name sample using "./data/dyn_match_All.dta"
keep if _m==3
drop _m
ren j_date year
* appropriate permno_adj to assign the pub at publication year
gen permno_adj=.
forvalue i=0/5 {
replace permno_adj = permno_adj`i' if permno_adj`i'~=. & year>=fyear`i' & year<=nyear`i'
}
*Publications enter our sample once the related UO firm is publicly traded and not before. 
*Furthermore, we do not account for publications in gap years when the related UO firm is not publicly traded.
drop if permno_adj==.
ren year  j_date 
label var name_std "std name matched"
label var id_name "id name_std matched"
label var permno_adj "UO firm at the time pub was published"
label var sample "A:ORBIS-Subs;B:PRE2002Subs;C:NBER2006;U:UO-match"
label var id_name1 "id_name#sample"
label var j_date "journal publication year"
label var wos_rec "WoS pub id record"
save "./data/pub_1980_2015.dta", replace

***********COMPUTING PUBLICATION STOCK - similar to NBER2006 code***************
********************************************************************************
use "./data/pub_1980_2015.dta"  , clear
drop permno_adj
duplicates drop
sort wos_rec
* count number of assignees for each pub
by wos_rec: egen nass = count(id_name1)


* calculate fractional pub ownership
gen npub = 1 / nass


*gen NPUB, for each ID_NAME-YEAR

sort id_name1 j_date
by id_name1 j_date: egen npub_id_name_jyear=sum(npub)

save "./data/pub_database.dta" , replace


use "./data/pub_database.dta", clear
duplicates drop id_name1 j_date, force
keep id_name1 j_date npub_id_name_jyear
ren j_date year
*name level pub count
fillin id_name1 year
replace npub_id_name_jyear=0 if npub_id_name_jyear==.

gen id_name=substr(id_name1, 2,.)
gen sample=substr(id_name1, 1,1)
destring id_name, replace

*Dynamic match of id_name to UO firms
merge m:m id_name sample using "./data/dyn_match_All.dta"
keep if _m==3
drop _m

*Appropriate permno_adj to assign the publications in each year

gen permno_adj=.

forvalue i=0/5 {
replace permno_adj = permno_adj`i' if permno_adj`i'~=. & year>=fyear`i' & year<=nyear`i'
}

*Publications enter our sample once the related UO firm is publicly traded and not before
drop if fyear0!=. & year<fyear0
drop if fyear0==.& year<fyear1

*Generating publication stock - 0.15% growth per year plus 15% depreciation
gen sum_npub_id_name=npub_id_name_jyear/0.15
sort id_name1 year
by id_name1: replace sum_npub_id_name=0.85*sum_npub_id_name[_n-1]+npub_id_name_jyear if sum_npub_id_name[_n-1]!=.


keep year id_name1 id_name sample npub_id_name_jyear _fillin sum_npub_id_name  permno_adj 
save "./data/id_name_year_npub", replace



*sum over multiple names to get publications for each permno_adj 
use "./data/id_name_year_npub", clear
drop if permno_adj==.
sort id_name1 year
collapse (sum) sum_npub_id_name,by(permno_adj year)
ren sum_npub_id_name pub_stock_permno_adj
label var pub_stock "Cumulative Pub count-including reassignment" 
label var permno_adj "UO firm at focal year"
save "./data/pub_stock_permno_adj", replace



*******************************************************************************************
**************************COMPUTING PUBLICATIONS PER YEAR***********************************
use "./data/pub_database", clear
duplicates drop id_name1 j_date, force
keep id_name1 id_name sample j_date npub_id_name_jyear
ren j_date year

merge m:m id_name sample using "./data/dyn_match_All.dta"
keep if _m==3
drop _m


*appropriate permno_adj to assign the pubs in each year
gen permno_adj=.
forvalue i=0/5 {
replace permno_adj = permno_adj`i' if permno_adj`i'~=. & year>=fyear`i' & year<=nyear`i'
}
keep if permno_adj~=.
keep permno_adj year npub_id_name_jyear

* sum over multiple names to get pubs for each  permno_adj-year
sort permno_adj year
collapse (sum) npub_id_name_jyear,by(permno_adj year)

fillin permno_adj year
replace npub_id_name_jyear=0 if npub_id_name_jyear==.

ren npub_id_name_jyear pub_yr
label var pub_yr "publications per permno_adj-year"
label var permno_adj "UO firm at publication year"
drop _fillin
save "./data/pub_per_year_permno_adj", replace
***************************************************************************
**Merging Additional Publiction Info to "pub_1980_2015.dta": ****************************
*THIS IS NOT PROVIDED- users should obtain the full WOS publication data for each wos_rec and compile by themselves
*we compiled and merged variables such as: journal, volume, page, issue, title, author, address, issn, forward cites, JIF ****

