*** 	"How Allowing a Little Bit of Dissent Helps Control Social Media: 
***
***		Impact of Market Structure on Censorship Compliance" 
***
***		Journal of the European Economic Association
***
********************************************************************************

clear

args dir_path 
global dir "`dir_path'"

* global dir "CHANGE TO YOUR OWN PATH"

cd "$dir"

set more off

********************************************************************************
*** 	Constructing dataset for event study

use "$dir/data/SVP_keywords", clear
keep if theme == "event"

// match with event dates
merge m:1 category using "$dir/data/SVP_eventdate", keep(match) nogen

// count daily added words per event
sort firm date event_id
by firm date event_id: gen int wordcount = _N
collapse wordcount (first) category dateEvent, by (firm date event_id)

save "$dir/data/wordcount", replace

// reshape censor count by event
encode firm, gen (firmID) 
drop firm
reshape wide wordcount, i(date event_id dateEvent) j(firmID)
recode wordcount? (. = 0)
ren wordcount1 wordcount_9158
ren wordcount2 wordcount_sinashow
ren wordcount3 wordcount_yy

//fill missing dates and events
tsset event_id date 
tsfill, full
recode wordcount* (. = 0)
bysort event_id: egen eventdate = max(dateEvent) 
format eventdate %td
drop dateEvent category

// merge with traffic rank data
merge m:1 date using "$dir/data/SVP_siteRankData", nogen
tsset event_id date 
tsfill, full
recode wordcount* (. = 0)
ren eventdate eventdate_aux
bysort event_id: egen eventdate = max(eventdate_aux) 
format eventdate %td
drop eventdate_aux 
drop if event_id == . 
drop rank*
merge m:m date using "$dir/data/SVP_siteRankData", keep(match) nogen

gen double logrank_yy 		 = -log(rank1) 
gen double logrank_9158 	 = -log(rank2)
gen double logrank_sinashow  = -log(rank3)

// create quarter, year fixed effect 
gen quarter = quarter(date)
gen year    = year(date)

// create event-week dummies
gen int D_ = floor((date - eventdate + 3)/7)
qui replace D_ = 8    if D_ >   8 
qui replace D_ = -8   if D_ < - 8 

// create labeled dummies (for coeff plots)
gen int DL = D_ + 8
labmask DL, values(D_)
tab D_, gen(D)
local dAll = 2 * 8 + 1
forval i = 1 / `dAll' {
	local D_lbl = - 8 + `i' - 1
	label var D`i' "`D_lbl'"
}

replace D8 = 0

save "$dir/data/eventstudy", replace


*** ----------------------------------------------------------------------------
*** 	Constructing dataset for estimation

use "$dir/data/wordcount", replace

// generate event-week dummies 
gen int D_ = floor((date - dateEvent + 3)/7)

// convert wordcount to binary 0/1 decision variable
gen byte a = (D_ <= 0)
gen int firmID = 1 if firm == "yy"
replace firmID = 2 if firm == "9158"
replace firmID = 3 if firm == "sinashow"
assert firmID ~= .
drop firm
collapse (max) a (first) dateEvent category, by(event_id firmID)

// reshape censor count by event
sort firmID
reshape wide a, i(event_id dateEvent category) j(firmID)
recode a? (. = 0)


gen int month = month(dateEvent)
gen int year  = year(dateEvent)
gen int week  = week(dateEvent)

preserve
use "$dir/data/eventstudy", clear
keep if D_ == -1 | D_ == - 2 
keep date event_id rank1 rank2 rank3 
collapse (mean) rank*, by(event_id)	
tempfile siteRankWeek
save "`siteRankWeek'", replace
restore

merge m:1 event_id using "`siteRankWeek'", keep (match) nogenerate
keep event_id a? rank1 rank2 rank3 
reshape long a rank, i(event_id) j(firm)
gen double traffic = exp(1 + log(1/rank)) * 10^3 // normalize each firm's traffic to be less than 1
keep event_id firm a traffic
reshape wide a traffic, i(event_id) j(firm)
order event_id a1 a2 a3 traffic1 traffic2 traffic3

export delimited "$dir/data/estimation.csv", replace
