/* 
Script for the quantitative analysis for the paper:
>> Trapped in Transformative Agreements? A Multifaceted Analysis of >1,000 Contracts <<

 Authors: Laura Rothfritz, Ulrich Herb, W. Benedikt Schmal
 
 Responsible for quantitative Analysis: W.Benedikt Schmal
 Ilmenau University of Technology (Germany)
 Economic Theory Group
 Contact: wolfgang-benedikt.schmal@tu-ilmenau.de
 
 Date: 24 September 2024
*/

clear all
set more off
set seed 1896

global main "YOURPATH\code" // adjust according to your folder structure
global plots "YOURPATH\plots" // adjust according to your folder structure

local tablist = 0  // set = 1 if you want to see the list of publishers and countries


********************************************************************************

							 *** DATA ANALYSIS ***
							 
********************************************************************************


cd "$main" // call workind directory
use esac_data, clear

if `tablist' == 1 {
	unique Publisher // #Number of distinct publishers in the dataset 
	tab Publisher, sort // >> Table A1
	tab Country, sort 	// Table A2
}

*** The role of the "Big 3" Publishers
replace big3 = big3/(-1)
graph pie Size , over(big3) title("Total Contract Volume") subtitle("Elsevier, Springer Nature, Wiley") graphregion(color(white)) scheme(s2gcolor) bgcolor(none) legend(label( 1 "Big 3") label(2 "Other Publishers"))
	cd "$plots"
	graph export big3_vol1.png, as(png) replace
	
graph pie counter , over(big3) title("Total Number of Contracts") subtitle("Elsevier, Springer Nature, Wiley") graphregion(color(white)) scheme(s2gcolor) bgcolor(none) legend(label( 1 "Big 3") label(2 "Other Publishers"))
	cd "$plots"
	graph export big3_count1.png, as(png) replace

*** Countries
graph pie Size , over(country3) title("Total Contract Volume") subtitle("Sum per Country/Group of Countries") graphregion(color(white)) scheme(s2gcolor) bgcolor(none) legend(label(1 "Rest of World") label(2 "United States") label(3 "China") label(4 "Germany") label(5 "Netherlands") label(6 "United Kingdom") label(7 "Austria") label(8 "Hungary") label(9 "Rest of Europe"))
	cd "$plots"
	graph export country_vol1.png, as(png) replace

graph pie counter , over(country3) title("Total Number of Contracts") subtitle("Sum per Country/Group of Countries") graphregion(color(white)) scheme(s2gcolor) bgcolor(none) legend(label(1 "Rest of World") label(2 "United States") label(3 "China") label(4 "Germany") label(5 "Netherlands") label(6 "United Kingdom") label(7 "Austria") label(8 "Hungary") label(9 "Rest of Europe"))
	cd "$plots"
	graph export country_count1.png, as(png) replace	

	
*** Excluding small contracts for "big 3" and country plots ***

foreach k of numlist 10 100 {
		cd "$plots"
	graph pie Size if Size >= `k', over(big3) note("Excluding TAs with less than `k' annual publications.") title("Total Contract Volume") subtitle("Elsevier, Springer Nature, Wiley") graphregion(color(white)) scheme(s2gcolor) bgcolor(none) legend(label( 1 "Big 3") label(2 "Other Publishers"))
		graph export big3_vol_min`k'.png, as(png) replace
		
	graph pie counter if Size >= `k', over(big3) note("Excluding TAs with less than `k' annual publications.") title("Total Number of Contracts") subtitle("Elsevier, Springer Nature, Wiley") graphregion(color(white)) scheme(s2gcolor) bgcolor(none) legend(label( 1 "Big 3") label(2 "Other Publishers"))
		graph export big3_count_min`k'.png, as(png) replace

		*** Countries
	graph pie Size if Size >= `k', over(country3) note("Excluding TAs with less than `k' annual publications.") title("Total Contract Volume") subtitle("Sum per Country/Group of Countries") graphregion(color(white)) scheme(s2gcolor) bgcolor(none) legend(label(1 "Rest of World") label(2 "United States") label(3 "China") label(4 "Germany") label(5 "Netherlands") label(6 "United Kingdom") label(7 "Austria") label(8 "Hungary") label(9 "Rest of Europe"))
		graph export country_vol_min`k'.png, as(png) replace

	graph pie counter if Size >= `k' , over(country3) note("Excluding TAs with less than `k' annual publications.") title("Total Number of Contracts") subtitle("Sum per Country/Group of Countries") graphregion(color(white)) scheme(s2gcolor) bgcolor(none) legend(label(1 "Rest of World") label(2 "United States") label(3 "China") label(4 "Germany") label(5 "Netherlands") label(6 "United Kingdom") label(7 "Austria") label(8 "Hungary") label(9 "Rest of Europe"))
	graph export country_count_min`k'.png, as(png) replace	
}

	
*** TA Contracts per year -- attention : 2024 is not complete!
graph bar (count), over(startyear) bar(1, color(eltblue%60)) title("Number of Transformative Agreements") subtitle("By Starting Year") note("Number for 2024 until 9 August") ytitle("Frequency") graphregion(color(white)) scheme(s2gcolor) bgcolor(none) blabel(bar, position(above) color(black) format(%9.0f))
	cd "$plots"
	graph export ta_year.png, as(png) replace
	

* TA volumes over time
gen start = startyear
reg lsize ib2024.start, r
reg lsize ib2024.start, vce(cluster Publisher)

coefplot, drop(_cons) xline(0) yline(2016.5) title("Contract Volume and Starting Year") subtitle("Relative to the year 2024") note("95% Confidence Bands displayed") graphregion(color(white)) scheme(s2gcolor) bgcolor(none)
cd "$plots"
graph export yearsize_reg.png, as(png) replace

* equivalent regression relative to 2019
	reg lsize ib2019.start, vce(cluster Publisher)
	coefplot, drop(_cons) xline(0) yline(2016.5) title("Contract Volume and Starting Year") subtitle("Relative to the year 2019") note("95% Confidence Bands displayed") graphregion(color(white)) scheme(s2gcolor) bgcolor(none)


	
replace big3 = big3*(-1)

foreach k of numlist 1 10 100 {
	tab follow_up_ta if Size >= `k'
}

 
 *** Relationship between contract length and volume
 
foreach k of numlist 1 10 100 { // larger contract sizes cause too small sample sizes
	qui: reg lsize loglength, vce(cluster Publisher), if Size >= `k'
	est sto dur_1
	qui: reg lsize i.publFE loglength, vce(cluster Publisher), if Size >= `k'
	est sto dur_2
	qui: reg lsize  i.publFE i.startyear loglength , vce(cluster Publisher), if Size >= `k'
	est sto dur_3
	qui: reg lsize i.publFE i.startyear i.countryFE loglength, vce(cluster Publisher), if Size >= `k'
	est sto dur_4
	estout dur_1 dur_2 dur_3 dur_4, keep(loglength _cons) cells(b(star fmt(3)) se(par fmt(2))) ///
	 stats(r2 bic N , fmt(%9.3f %9.3f %9.0f) labels(R$^2$ BIC)) legend label collabels(none) ///
	 varlabels(follow_up_ta 1_(Follow-Up-TA) loglength log(TA-Duration) _cons Constant) starlevels(* 0.10 ** 0.05 *** 0.01) ///
	 style(tex)
 }
 
 
 
 
* Publisher Fixed Effect
foreach k of numlist 1 10 100 {
	qui: probit followupta_exists lsize loglength, vce(cluster Publisher), if Size >= `k' & follow_up_ta == 0
	est sto dur_1
	margins, dydx(lsize)
	qui: probit followupta_exists i.publFE lsize loglength, vce(cluster Publisher), if Size >= `k' & follow_up_ta == 0
	est sto dur_3
	margins, dydx(lsize)
	qui: probit followupta_exists i.publFE i.startyear lsize loglength,  vce(cluster Publisher), if Size >= `k' & follow_up_ta == 0
	est sto dur_4
	margins, dydx(lsize)
	qui: probit followupta_exists i.publFE i.startyear i.countryFE lsize loglength,  vce(cluster Publisher), if Size >= `k' & follow_up_ta == 0
	est sto dur_5
	margins, dydx(lsize)
	qui: reg followupta_exists i.publFE i.startyear i.countryFE lsize loglength, vce(cluster Publisher), if Size >= `k' & follow_up_ta == 0
	est sto dur_6
	margins, dydx(lsize)

	estout dur_1 dur_3 dur_4 dur_5 dur_6, keep(lsize loglength _cons) cells(b(star fmt(3)) se(par fmt(2))) ///
	 stats(r2_p r2 bic N, fmt(%9.3f %9.3f %9.3f %9.0f) labels(PseudoR$^2$ R$^2$ BIC)) legend label collabels(none) ///
	 varlabels(lsize log(TA-Volume) loglength log(TA-Duration) big3 $\mathbbm{1}_{big\:3}$ _cons Constant) starlevels(* 0.10 ** 0.05 *** 0.01) ///
	 style(tex)
 }
	
	
 
su length_year
su length_year if big3 == 0
su length_year if big3 == 1

la def cstat 0 "Other" 1 "United States" 2 "China" 3 "Germany" 4 "Netherlands" 5 "UK"  6 "Austria" 7 "Hungary" 8 "Europe: Other" 
la val country3 cstat
tab country3

forval i = 0/8 {
	di "This is country No. `i'"
	su length_year if country3 == `i' & big3 == 0
	su length_year if country3 == `i' & big3 == 1
}	

gen hv_totsize = totalsize/1000
tab OALicense
cd "$plots"
graph hbar (count), over(OALicense, axis(off)) bar(1, color(eltblue%60)) ytitle("Frequency") ylabel(50(50)500) note(" ") title("License Types") subtitle("By the number of transformative agreements") graphregion(color(white)) scheme(s2gcolor) bgcolor(none)  blabel(group, position(base) color(black))
	graph export oalicense1.png, as(png) replace

graph hbar (sum) hv_totsize, over(OALicense, axis(off)) bar(1, color(eltblue%60)) ytitle("Number of Publications") note("X-Axis Scale: #Publications/1000") ylabel(100 (100) 1000) title("License Types") subtitle("By the total number of planned publications") graphregion(color(white)) scheme(s2gcolor) bgcolor(none) blabel(group, position(base) color(black))
	graph export oalicense2.png, as(png) replace

	

* TA Duration and Big 3 - Appendix Table
	
 qui: reg loglength big3, vce(cluster Publisher)
est sto dur_1
 qui: reg loglength i.startyear big3, vce(cluster Publisher)
est sto dur_3
 qui: reg loglength i.startyear i.countryFE big3, vce(cluster Publisher)
est sto dur_4

estout dur_1 dur_3 dur_4, keep(big3 _cons) cells(b(star fmt(3)) se(par fmt(2))) ///
 stats(r2 bic N , fmt(%9.3f %9.3f %9.0f) labels(R-squared BIC)) legend label collabels(none) ///
 varlabels(lsize log(TA-Volume) loglength log(TA-Duration) _cons Constant) starlevels(* 0.10 ** 0.05 *** 0.01) ///
 style(tex)
 

* 
tab WorkflowAssessment
tab WorkflowAssessment if follow_up_ta == 1

********************************************************************************
*** 							END OF CODE FILE							 ***
********************************************************************************
