*** 	"How Allowing a Little Bit of Dissent Helps Control Social Media: 
***
***		Impact of Market Structure on Censorship Compliance" 
***
***		Journal of the European Economic Association
***
********************************************************************************

*** Do-file for Tables and Figures in main text (Section 3-4) and Appendix G,H

clear

args dir_path  
global dir "`dir_path'"

* global dir "CHANGE TO YOUR OWN PATH"

cd "$dir"

set more off
set scheme s1mono

********************************************************************************
*** Table 2 - Summary Statistics on Platform Censorship 

use "$dir/data/wordcount", clear

// reshape censor count by event
gen int firmID = 1 if firm == "yy"
replace firmID = 2 if firm == "9158"
replace firmID = 3 if firm == "sinashow"
assert firmID ~= .
drop firm
reshape wide wordcount, i(date event_id dateEvent) j(firmID)
recode wordcount? (. = 0)
collapse (sum) wordcount?, by(event_id)

tabstat wordcount?, stat(mean sd min max n) save

//  Initialize a 3x5 matrix for three platforms and five stats
matrix stats = J(3, 5, .) 

// Loop through tabstat results to populate the matrix
local vars "wordcount1 wordcount2 wordcount3"

local row_index = 1
foreach var in `vars' {
    qui sum `var'
    local i = 1
    foreach stat in mean sd min max N {
        matrix stats[`row_index', `i'] = r(`stat')
        local ++i
    }
    local ++row_index
}

// Add row and column labels
matrix rownames stats = YY 9158 SinaShow
matrix colnames stats = Mean StdDev Min Max Obs

// Export the matrix to LaTeX using esttab
esttab matrix(stats, fmt(2)) ///
    using "$dir/output/table2.tex", ///
    label ///
    nomtitles ///
    title("Total Number of Keywords Censored Per Event") ///
    replace ///
    tex
	

********************************************************************************
*** Table 3 - Summary Statistics on Platform Daily Alexa Ranks

* ================================
* Part 1: Daily Alexa Rank
* ================================

use "$dir/data/SVP_siteRankData", clear
tabstat rank?, stats(mean sd min max) save

// Extract stats into a matrix
matrix stats = J(3, 4, .) 

local vars "rank1 rank2 rank3"
local row_index = 1
foreach var in `vars' {
    qui sum `var'
    local i = 1
    foreach stat in mean sd min max {
        matrix stats[`row_index', `i'] = r(`stat')
        local ++i
    }
    local ++row_index
}

matrix rownames stats = YY 9158 SinaShow
matrix colnames stats = Mean StdDev Min Max 

* Save to a combined LaTeX table (initialize)
esttab matrix(stats, fmt(%9.0fc)) ///
    using "$dir/output/table3.tex", replace ///
    label nonumber booktabs ///
	mtitles("Daily Alexa Rank and Monthly Unique Visitors")

	
* ================================
* Part 2: Monthly Unique Visitors
* ================================
use "$dir/data/SVP_rank2traffic", clear
tabstat yy _9158 sinashow, stats(mean sd) save

// Extract stats into a matrix
matrix stats = J(3, 2, .) 

local vars "yy _9158 sinashow"
local row_index = 1
foreach var in `vars' {
    qui sum `var'
    local i = 1
    foreach stat in mean sd {
        matrix stats[`row_index', `i'] = r(`stat')
        local ++i
    }
    local ++row_index
}

matrix rownames stats = YY 9158 SinaShow
matrix colnames stats = Mean StdDev 

* Append to the created LaTeX file
esttab matrix(stats, fmt(%9.0fc)) ///
    using "$dir/output/table3.tex", append ///
    label nonumber booktabs mtitles("Monthly Unique Visitors (est.)")

********************************************************************************
*** Figure 4 - Coeﬀicient plot of event-time dummies in regression (1)

use "$dir/data/eventstudy", clear

xtset event_id 
eststo reg_big: xtreg wordcount_yy 			D1 - D17 i.quarter i.year,fe 
eststo reg_med: xtreg wordcount_9158 		D1 - D17 i.quarter i.year,fe 
eststo reg_sm:  xtreg wordcount_sinashow 	D1 - D17 i.quarter i.year,fe 


// merge plots of all three firms
coefplot (reg_big,label(Big platform)    color(black*0.8)   ciopts(recast(rcap) lwidth(vvthin) lcolor(black))) ///
		 (reg_med,label(Medium platform) color(black*0.5)   ciopts(recast(rcap) lwidth(vvthin) lcolor(black))) ///
		 (reg_sm, label(Small platform)  color(black*0.2)   ciopts(recast(rcap) lwidth(vvthin) lcolor(black))), omitted /// 
keep(D*) drop(D1 D17) vertical xtitle("t: weeks relative to an event date", size(big)) ///
ytitle ("{&beta}: Coefficients of event-time dummies", size(big)) ///
plotregion(fcolor(white)) graphregion(fcolor(white)) xline(7,lwidth(vthick) lcolor(gray)) /// 
yline(0) ylab(-0.5(.5)1.8) recast(bar)  citop barwidt(0.25) legend(rows(3) ring(0) position(2)) 

graph export "$dir/output/Figure4.pdf", replace


********************************************************************************
*** Table 4: Joint test statistics of pre-event coeﬀicients

use "$dir/data/eventstudy", clear

xtset event_id 

xtreg logrank_yy 		D1 - D17 i.quarter i.year, fe  
test D2 D3 D4 D5 D6 D7 
local big_fstat = r(F)
local big_pval  = r(p)

xtreg logrank_9158 		D1 - D17 i.quarter i.year, fe 
test D2 D3 D4 D5 D6 D7 
local med_fstat = r(F)
local med_pval  = r(p)

xtreg logrank_sinashow 	D1 - D17 i.quarter i.year, fe 
test D2 D3 D4 D5 D6 D7
local sm_fstat = r(F)
local sm_pval  = r(p)

matrix fstats_pvals = ( ///
    `big_fstat', `big_pval' \ ///
    `med_fstat', `med_pval' \ ///
    `sm_fstat', `sm_pval' )

matrix rownames fstats_pvals = "Big" "Medium" "Small"
matrix colnames fstats_pvals = "F-stats" "p-value"

esttab matrix(fstats_pvals, fmt(%9.3f)) ///
    using "$dir/output/table4.tex", ///
    label nonumber booktabs replace ///
    title("Joint test statistics of pre-event coefficients") 

********************************************************************************
*** Figure 5 - Coeﬀicient plots of event-time dummies in regression (2)

use "$dir/data/eventstudy", clear

xtset event_id 

// panel (a) (c) (e) -  All Events
eststo big_all:  xtreg logrank_yy 		    D1 - D17 i.quarter i.year, fe  

eststo med_all:  xtreg logrank_9158 		D1 - D17 i.quarter i.year, fe 

eststo sm_all:   xtreg logrank_sinashow 	D1 - D17 i.quarter i.year, fe 

coefplot(big_all ,label(Big platform)), omitted /// 
keep(D*) drop(D1 D17) vertical xtitle("t: weeks relative to an event date", size(big)) ytitle ("b: percentage change in traffic", size(big)) ///
plotregion(fcolor(white)) graphregion(fcolor(white)) xline(7,lwidth(vthick) lcolor(gray)) lcolor(black) yscale(range(-.15 .15)) /// 
yline(0) recast(scatter) ciopt(color(blue) recast(rcap) lwidth(vthin)) legend(rows(3) ring(0) position(2))
graph export "$dir/output/Figure5a.pdf", replace

coefplot(med_all, label(Medium platform)), omitted /// 
keep(D*) drop(D1 D17) vertical xtitle("t: weeks relative to an event date", size(big)) ytitle ("b: percentage change in traffic",size(big)) ///
plotregion(fcolor(white)) graphregion(fcolor(white)) xline(7,lwidth(vthick) lcolor(gray)) lcolor(black) yscale(range(-.1 .1))  /// 
yline(0) recast(scatter) ciopt(color(blue) recast(rcap) lwidth(vthin)) legend(rows(3) ring(0) position(2))
graph export "$dir/output/Figure5c.pdf", replace

coefplot(sm_all, label(Small platform)), omitted /// 
keep(D*) drop(D1 D17) vertical xtitle("t: weeks relative to an event date",size(big)) ytitle ("b: percentage change in traffic", size(big)) ///
plotregion(fcolor(white)) graphregion(fcolor(white)) xline(7,lwidth(vthick) lcolor(gray)) lcolor(black) yscale(range(-1 1))  /// 
yline(0) recast(scatter) ciopt(color(blue) recast(rcap) lwidth(vthin)) legend(rows(3) ring(0) position(2))
graph export "$dir/output/Figure5e.pdf", replace

    
// panel (b) (d) (f) - Uncensored Events

use "$dir/data/eventstudy", clear

xtset event_id 

// define uncensored events
gen byte delay = (D_ > 0)
sort event_id delay
by event_id delay: egen weekly_add_yy    			= total(wordcount_yy)
by event_id delay: egen weekly_add_9158    			= total(wordcount_9158)
by event_id delay: egen weekly_add_sinashow     	= total(wordcount_sinashow)

gen byte uncensored_yy   		= (weekly_add_yy       ==0) & (weekly_add_9158 > 0 | weekly_add_sinashow >0)  & delay == 0
gen byte uncensored_9158 		= (weekly_add_9158     ==0) & (weekly_add_yy   > 0 | weekly_add_sinashow >0)  & delay == 0
gen byte uncensored_sinashow  	= (weekly_add_sinashow ==0) & (weekly_add_yy   > 0 | weekly_add_9158 >0)      & delay == 0

sort event_id
by event_id: egen uncensored_yy_event 		= max(uncensored_yy)
by event_id: egen uncensored_9158_event 	= max(uncensored_9158)
by event_id: egen uncensored_sinashow_event = max(uncensored_sinashow)

eststo big_uncensored: 	reghdfe logrank_yy D1 - D17 		if uncensored_yy_event	   == 1, absorb(event_id quarter year) 
eststo med_uncensored: 	reghdfe logrank_9158 D1 - D17   	if uncensored_9158_event	   == 1, absorb(event_id quarter year)
eststo sm_uncensored: 	reghdfe logrank_sinashow D1 - D17   if uncensored_sinashow_event == 1, absorb(event_id quarter year)


// coeff plot
coefplot(big_uncensored,label(Big platform)), omitted /// 
keep(D*) drop(D1 D17) vertical xtitle("t: weeks relative to an event date", size(big)) ytitle ("b: event-time dummy coefficient", size(big)) ///
plotregion(fcolor(white)) graphregion(fcolor(white)) xline(7,lwidth(vthick) lcolor(gray)) lcolor(black) yscale(range(-.15 .15))  /// 
yline(0) recast(scatter) ciopt(color(blue) recast(rcap) lwidth(vthin)) legend(rows(3) ring(0) position(2))
graph export "$dir/output/Figure5b.pdf", replace

coefplot(med_uncensored, label(Medium platform)), omitted /// 
keep(D*) drop(D1 D17) vertical xtitle("t: weeks relative to an event date", size(big)) ytitle ("b: event-time dummy coefficient",size(big)) ///
plotregion(fcolor(white)) graphregion(fcolor(white)) xline(7,lwidth(vthick) lcolor(gray) ) lcolor(black)  yscale(range(-.1 .1)) /// 
yline(0) recast(scatter) ciopt(color(blue) recast(rcap) lwidth(vthin)) legend(rows(3) ring(0) position(2))
graph export "$dir/output/Figure5d.pdf", replace

coefplot(sm_uncensored, label(Small platform)), omitted /// 
keep(D*) drop(D1 D17) vertical xtitle("t: weeks relative to an event date",size(big)) ytitle ("b: event-time dummy coefficient", size(big)) ///
plotregion(fcolor(white)) graphregion(fcolor(white)) xline(7,lwidth(vthick) lcolor(gray)) lcolor(black) yscale(range(-1 1)) /// 
yline(0) recast(scatter) ciopt(color(blue) recast(rcap) lwidth(vthin)) legend(rows(3) ring(0) position(2))
graph export "$dir/output/Figure5f.pdf", replace


********************************************************************************
*** Figure 7: Pre-event Traﬀic Across Different Events  

use "$dir/data/eventstudy", clear

// keep only traffic one or two weeks prior to event
keep if D_ == -1 | D_ == - 2 
// keep only relevant variables
keep date event_id rank1 rank2 rank3 
collapse (mean) rank*, by(event_id)	   

twoway bar rank1 event_id, ytitle("Alexa Rank") title("Big Platform") xlabel(2(2)30) ylabel(0(7500)15000) barw(0.5)  name(rank_big)

twoway bar rank2 event_id, ytitle("Alexa Rank")	title("Medium Platform") xlabel(2(2)30) ylabel(0(50000)100000) barw(0.5) name(rank_med)

twoway bar rank3 event_id, ytitle("Alexa Rank")	title("Small Platform") xlabel(2(2)30) ylabel(0(750000)1500000) barw(0.5) name(rank_sm)

graph combine rank_big rank_med rank_sm, col(1) iscale(1) ysize(13) xsize(8)

graph export "$dir/output/Figure7.pdf", replace

********************************************************************************
*** Table 5: Variation Coeﬀicient of Pre-event Traﬀic Across Events

use "$dir/data/eventstudy", clear
// keep only traffic one or two weeks prior to event
keep if D_ == -1 | D_ == - 2 
// keep only relevant variables
keep date event_id rank1 rank2 rank3 
collapse (mean) rank*, by(event_id)	

tabstat rank*, stat(mean sd cv min max n) save

// Extract stats into a matrix
matrix stats = J(3, 6, .) 

local vars "rank1 rank2 rank3"
local row_index = 1
foreach var in `vars' {  
    matrix temp = r(StatTotal)
    forval stat_row = 1/6 {
        matrix stats[`row_index', `stat_row'] = temp[`stat_row', `row_index']
    }   
    local ++row_index
}

matrix rownames stats = Big Medium Small
matrix colnames stats = Mean StdDev StdDev/Mean Min Max Obs

esttab matrix(stats, fmt(%9.0f %9.0f %9.2f %9.0f %9.0f %9.0f)) ///
    using "$dir/output/table5.tex", ///
    label nonumber nomtitles replace ///
    title("Coeﬀicient Variation of Pre-event Traﬀic Across Events") 

********************************************************************************
*** Appendix G, Figure 12 - Coeﬀicient plots of event-time dummies using App Annie Data

use "$dir/data/eventstudy", clear

preserve
use "$dir/data/AppAnnie", clear
tempfile AppAnnie
save "`AppAnnie'", replace
restore
merge m:1 date using "`AppAnnie'", nogenerate 

// keep only events that occurred between 7 weeks after the start date and 
// 7 weeks prior to the end date of AppAnnie rank data (01/01/2016-12/15/2016)
keep if eventdate > td(01jan2016)+7*7 & eventdate < td(15dec2016)-7*7

// define uncensored events
gen byte delay = (D_ > 0)
by event_id delay, sort: gen weekly_add_yy    	        = sum(wordcount_yy)
by event_id delay, sort: gen weekly_add_9158    		= sum(wordcount_9158)
by event_id delay, sort: gen weekly_add_sinashow     	= sum(wordcount_sinashow)

gen byte uncensored_sinashow  	= (weekly_add_sinashow == 0) & (weekly_add_yy   > 0 | weekly_add_9158   > 0) & delay == 0 
bysort event_id: egen uncensored_sinashow_event = max(uncensored_sinashow) 

gen double logrank_sinashow_appannie = log(RankCNSN)
eststo dlRankSmall: xtreg logrank_sinashow_appannie D1 - D17 i.quarter i.year , fe

coefplot(dlRankSmall), omitted /// 
keep(D*) drop(D1 D17) vertical xtitle("Weeks relative to event being censored") ytitle ("% change in Download Rank (small platform)") ///
plotregion(fcolor(white)) graphregion(fcolor(white)) xline(7,lwidth(vthick) lcolor(gray)) lcolor(black) yscale(range(-0.5 0.5)) /// 
yline(0) ciopt(color(blue) recast(rcap) lwidth(thin)) 

graph export "$dir/output/Figure12.pdf", replace


********************************************************************************
*** Appendix G, Figure 13 

// import rank data
use "$dir/data/SVP_siteRankData", clear
tempfile siteRank
save "`siteRank'", replace

// import and merge with QM historical data
use "$dir/data/QuestMobile", clear
merge 1:1 date using"`siteRank'",keep(match) nogenerate
ren DailyActiveUsersK DAU
order date

// plot raw time series data
tsset date
label var rank1 "rank"
twoway (tsline rank1, yaxis(1) lcolor(gray)) (tsline DAU, yaxis(2) lcolor(maroon)), legend(position(11) cols(1) ring(0))
graph export "$dir/output/Figure13.pdf", replace

// correlation between raw data
pwcorr DAU rank1, sig star(.001)
gen double logRank = log(rank1)
pwcorr DAU logRank, sig star(0.001)

// Augmented Dickey–Fuller test 
dfuller rank1  
dfuller DAU 

// Augmented Dickey–Fuller test for first differenced
gen double fd_logrank = logRank[_n]-logRank[_n-1]
gen double fd_DAU = DAU[_n]-DAU[_n-1]

dfuller fd_logrank
dfuller fd_DAU
pwcorr fd_logrank fd_DAU, sig star(.001)

********************************************************************************
*** Appendix H, Table 13: Runs Test on Serial Correlation 

import delimited "$dir/data/estimation.csv", clear
merge 1:1 event_id using "$dir/data/SVP_eventdate", nogen

// run test 
local firms "a1 a2 a3"
matrix results = J(4, 3, .)

local i = 1
foreach firm of local firms {
    runtest `firm', threshold(0)
	
    local j = 1
    foreach r in N_below N_above n_runs p {
        matrix results[`j', `i'] = r(`r')
		local ++j
    }
    local ++i
}

matrix colnames results = Big Medium Small
matrix rownames results = N(a_leq_0) N(a_geq_0) N(runs) p-value

esttab matrix(results, fmt(%3.2f)) /// 
		using "$dir/output/Table13.tex", ///
		label nonumber nomtitles replace ///
		title("Runs Test on Serial Correlation in Platform Censorship Choices") 


*** Appendix H, Figure 14: Runs Test on Serial Correlation 
// visualize three platforms' serial decisions via an overlay plot
replace a2 = a2 - .075 if a2 ==1
replace a2 = a2 + .075 if a2 ==0

replace a3 = a3 - .15 if a3 ==1
replace a3 = a3 + .15 if a3 ==0

twoway (scatter a1 event_id, mcolor(gray)) ///
       (scatter a2 event_id, mcolor(blue)) ///
	   (scatter a3 event_id, mcolor(red)), ///
       yline(0.5, lpattern(dash) lcolor(blue)) ///
       ylabel(0  1) xlabel(2(2)30) ///
	   legend(rows(1) order(1 "Big Platform" 2 "Medium Platform" 3 "Small Platform"))
	   
graph export "$dir/output/Figure14.pdf", replace

