set.seed(1206)
#options (warn = -1)
setwd("D:/Baidu/Tsinghua/wp/fear/dataclean/")
load("fullData.RData")


#datall <- as.data.frame(sapply(allData, haven::zap_labels))
dat <- as.data.frame(sapply(fullData, haven::zap_labels))
dat$zAge <- arm:::rescale(dat$age)
dat$educ <- dat$edu
dat$trt <- ifelse(dat$occR==1&dat$party==1, 1, 0)
dat$outcome <- as.numeric(dat$trust_goverR)
dat$tempo <- dat$time
dat$province <- datall$province
dat$source <- c(rep("cbs2002", 3183), rep("cbs2014", 4068), rep("cbs2019", 4941),
    rep("cgss2010", 11783), rep("pku2008", 4004), rep("pku2009", 3858),
    rep("css2006", 7061), rep("css2017", 10143), rep("css2019", 10283),
    rep("css2021", 10136), rep("cbs2011",3473))

sourceNames <- unique(dat$source)
sourceNames <- sourceNames[c(2,3,5,10,11)]
kSource <- length(sourceNames)
dat2 <- na.omit(subset(dat, subset=(dat$source %in% sourceNames)))

trendY1 <- tapply(dat2$outcome[dat2$trt==1], dat2$year[dat2$trt==1], FUN=mean)
trendY0 <- tapply(dat2$outcome[dat2$trt==0], dat2$year[dat2$trt==0], FUN=mean)


nyear <- length(unique(dat2$year))
plot(x=1:nyear, y=trendY1, type="l", axes=FALSE, frame.plot=TRUE, xlab="", ylab="", ylim=c(0.8,1))
axis(1, 1:nyear, sort(unique(dat2$year)))
axis(2)
lines(x=1:nyear, y=trendY0, col=2)

M1 <- glm(outcome ~ trt + tempo + trt * tempo + age + female + marriage + factor(edu) + hukouRural, family = quasibinomial, data = dat2)



library(MatchIt)

matchR1 <- function(data){
    require(MatchIt)
    fDataR <- ps <- NULL
    for(k in 1:kSource){
        datTmp <- subset(data, subset=dat$source == sourceNames[k])
        pfit <- matchit(trt ~ age + female + marriage + factor(edu) + hukouRural + factor(province), family = binomial, data = data, ratio=1, replace=TRUE)
        mdata <- match.data(pfit)
        ps <- c(ps, pfit$distance)
        fDataR <- rbind.data.frame(fDataR, mdata)
    }
    return(list(pscores = ps, data=fDataR))
}

MR1 <- matchR1(dat2)
fDataR <- MR1$data
didregR <- glm(outcome ~ trt + tempo + trt * tempo + age + female + marriage + factor(edu) + hukouRural + factor(province), family = quasibinomial, data = fDataR, weight=weights)



yrNames <- sort(unique(dat2$year))
yt11 <- yc11 <- NULL
for(i in 1:length(yrNames)){
    tmp <- with(fDataR, weighted.mean(outcome[trt==1&year==yrNames[i]], w=weights[trt==1&year==yrNames[i]], na.rm=TRUE))
    yt11 <- c(yt11, tmp)
    tmp <- with(fDataR, weighted.mean(outcome[trt==0&year==yrNames[i]], w=weights[trt==0&year==yrNames[i]], na.rm=TRUE))
    yc11 <- c(yc11, tmp)
}

plot(x=1:length(yt11), y=yt11, type="l", axes=FALSE, frame.plot=TRUE, xlab="", ylab="", ylim=c(0.7,1))
axis(1, 1:length(yt11), sort(unique(dat2$year)))
axis(2)
lines(x=1:length(yt11), y=yc11, col=2)





FOO <- function(data=dat2){
    betas <- NULL
    n <- dim(data)[1]
    idx <- sample(1:n, size=n, replace=TRUE)
    tmp <- as.data.frame(data[idx,])
    fDataR <- matchR1(tmp)$data
    didreg <- glm(outcome ~ trt + tempo + trt * tempo + age + female + marriage + educ + hukouRural, family = quasibinomial, data = fDataR, weight=weights)
    betas <- rbind(betas, coef(didreg))
    return(betas)
}



set.seed(123)
BOOT <- function() replicate(100, FOO(data=dat))
library(parallel)
cl <- makeCluster(parallel::detectCores())
#cl <- makeCluster(15)
clusterExport(cl = cl, c("dat2","FOO", "matchR1", "kSource", "sourceNames")) # export object to each thread
tryCatch(res1 <- clusterCall(cl=cl, fun = BOOT), finally = stopCluster(cl))
library(abind)
library(R2WinBUGS)
res1 <- abind(lapply(res1, print), along=3)
out1 <- aperm(res1)





matchR3 <- function(data){
    require(MatchIt)
    fDataR <- ps <- NULL
    for(k in 1:kSource){
        datTmp <- subset(data, subset=dat$source == sourceNames[k])
        pfit <- matchit(trt ~ age + female + marriage + educ + hukouRural, family = binomial, data = data, ratio=3, replace=TRUE)
        mdata <- match.data(pfit)
        ps <- c(ps, pfit$distance)
        fDataR <- rbind.data.frame(fDataR, mdata)
    }
    return(list(pscores = ps, data=fDataR))
}

MR3 <- matchR3(dat)
fDataN <- MR3$data
didregN <- glm(outcome ~ trt + tempo + trt * tempo + age + female + marriage + educ + hukouRural, family = quasibinomial, data = fDataR, weight=weights)



yrNames <- sort(unique(dat$year))
yt13 <- yc13 <- NULL
for(i in 1:9){
    tmp <- with(fDataN, weighted.mean(outcome[trt==1&year==yrNames[i]], w=weights[trt==1&year==yrNames[i]], na.rm=TRUE))
    yt13 <- c(yt13, tmp)
    tmp <- with(fDataN, weighted.mean(outcome[trt==0&year==yrNames[i]], w=weights[trt==0&year==yrNames[i]], na.rm=TRUE))
    yc13 <- c(yc13, tmp)
}

plot(x=1:9, y=yt13, type="l", axes=FALSE, frame.plot=TRUE, xlab="", ylab="")
axis(1, 1:9, sort(unique(dat$year)))
axis(2)
lines(x=1:9, y=yc13, col=2)

didregN <- glm(outcome ~ trt + tempo + trt * tempo + age + female + marriage + educ + party + hukouRural, family = quasibinomial, data = fDataN, weight=weights)
