# WhatIsRiskMarket Analysis Script

rm(list=ls()) # Clears memory
graphics.off() # Clears graphs
if (!require("pacman")) install.packages("pacman") #Installs package for package installation
pacman::p_load("reshape","PerformanceAnalytics","xlsx","MASS","texreg","plm","zTree","reshape2","Hmisc","car","dplyr","devtools")
library("devtools") #Loads necessary package
install_github("stpalan/SPTools") #Installs necessary package
library("SPTools")

##### Parameters

#~~~ Specify full path and file name of source files ~~~#
SourceFiles<-list.files("FilesForAnalysis2Markets/.",pattern="[0-9]{6}_[0-9]{4}.xls",full.names=T,recursive=F)
QSourceFiles<-list.files("FilesForAnalysis2Markets/.",pattern="[0-9]{6}_[0-9]{4}.sbj",full.names=T,recursive=F)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
Tables<-c("globals","returns","subjects","transactions","offers","timelog","session")
RemovePracticePeriodTables<-Tables[!Tables=="session"] # Subset of tables which have life < session
ShowPlots<-T # Should plots be shown on screen or only written to disk?
RemovePracticePeriods<-T
RemoveOutliers<-T    #Should session 7 period 1 be removed?

NumPeriods<-8
NumSubjects<-8

source("GIMS_DataPreparation_2Markets.r")    #Imports data

Data$subjects$`RiskPerception2Mkts[1]`<-Data$subjects$`RiskPerception2Mkts[1]`+1 #Recodes RiskPerception to run from 1 to 7 instead of from 0 to 6
Data$subjects$`RiskPerception2Mkts[2]`<-Data$subjects$`RiskPerception2Mkts[2]`+1 #Recodes RiskPerception to run from 1 to 7 instead of from 0 to 6
if (RemoveOutliers==T){    #Removes outliers
    Data$transactions<-Data$transactions[!(Data$transactions$R.Session==7&Data$transactions$Period==1),]
}

NumPeriods<-3
NumSubjects<-12


############## WhatIsRiskMarket ##########################

R.WRM.Lookup<-merge(Lookup["R.PeriodID"],Data$globals[,c("R.PeriodID","Distribution","StartTime","StartTimeCDA","Distribution2Mkts[1]","Distribution2Mkts[2]")])[,-1] # Generates matrix containing R.PeriodID and several variables from the globals table
R.WRM.transactions<-merge(Data$transactions,R.WRM.Lookup) # Generates matrix containing transaction data with Distribution information
R.WRM.transactions<-cbind(R.WRM.transactions, R.TradeTime=R.WRM.transactions$Time-(R.WRM.transactions$StartTimeCDA-R.WRM.transactions$StartTime)) # Adds precise timing for trades
NumDistributions<-2
NumSessions<-max(as.numeric(R.WRM.transactions[,"R.Session"]))
DistributionText<-c("NORMAL","NegSKEWNESS","PosSKEWNESS","BigLOSS","WIDER","FrequentLOSS","NoLOSS","KURTOSIS")
R.WRM.subjects<-merge(Data$subjects,R.WRM.Lookup) # Generates subjects table with Distribution information

SessionPeriods<-list()
for (Session in 1:NumSessions){    #Generates variable to list the periods per session that are in the data
    SessionPeriods[[Session]]<-Data$globals$Period[Data$globals$R.Session==Session]
}

##### Creates matrix holding average risk perception of distributions
AvgDistributionPerception<-matrix(c(6:7,rep(NA,3*2)),ncol=4)
colnames(AvgDistributionPerception)<-c("Distribution","Avg. RiskPerception (1=not risky, 7=very risky)","Avg. Price","Avg. Price Last Minute") # Prepares table for risk perception averages by distribution



### Calculates characteristic measures of distributions
Returns<-matrix(NA,nrow=100,ncol=NumDistributions) # Creates empty matrix for returns
for (Distribution in 6:7) {
    Returns[,Distribution-5]<-RawData$returns$Return[RawData$returns$Treatment==2&RawData$returns$Distribution==Distribution&RawData$returns$Date==RawData$returns$Date[1]] # Fills it
}

SummaryStats<-c("Mean","StdDev","Semivar", "PLoss", "ELoss", "Skewness","Kurtosis","MinReturn","MaxReturn")
# Defines matrix columns
DistributionCharacteristics<-matrix(rep(NA,length(c("Distribution",SummaryStats))*NumDistributions),nrow=NumDistributions) # Creates matrix
dimnames(DistributionCharacteristics)<-list(6:7,c("Distribution",SummaryStats)) # Names columns
# Fills matrix
DistributionCharacteristics[,1]<-6:7
DistributionCharacteristics[,2]<-apply(Returns,2,mean)
DistributionCharacteristics[,3]<-apply(Returns,2,sd)*(nrow(Returns)-1)/nrow(Returns)
DistributionCharacteristics[,4]<-apply(Returns,2,SemiVariance)
DistributionCharacteristics[,5]<-apply(Returns,2,function(x) sum(x<0)/nrow(Returns))
DistributionCharacteristics[,6]<-apply(Returns,2,function(x) sum(x[x<0]/nrow(Returns)))
DistributionCharacteristics[,7]<-apply(Returns,2,skewness)
DistributionCharacteristics[,8]<-apply(Returns,2,kurtosis)
DistributionCharacteristics[,9]<-apply(Returns,2,min)
DistributionCharacteristics[,10]<-apply(Returns,2,max)

### Prepares regression datasets
Data$subjects<-merge(Data$subjects,Data$globals[,c("R.Session","Period","Distribution2Mkts[1]","Distribution2Mkts[2]")],by=c("R.Session","Period"))
Data$transactions<-merge(Data$transactions,Data$globals[,c("R.Session","Period","Distribution2Mkts[1]","Distribution2Mkts[2]")],by=c("R.Session","Period"))

# Prepares matrix of market prices merged with regressor data
MeanPrices<-matrix(c(rep(1:10,each=6),rep(1:3,times=10,each=2),rep(6:7,times=30)),ncol=3)    #Recodes R.Session to numeric (from factor))
MeanPrices<-cbind(MeanPrices,matrix(rep(NA,7*nrow(MeanPrices)),nrow=nrow(MeanPrices)))
dimnames(MeanPrices)<-list(1:nrow(MeanPrices),c("R.Session","Period","Distribution","AvgPrice","AvgPriceLast60","AvgRiskPerception","Volume","VolumeLast60","MedianPrice","MedianPriceLast60"))
MeanPrices<-data.frame(MeanPrices[order(MeanPrices[,"R.Session"],MeanPrices[,"Period"],MeanPrices[,"Distribution"]),])

for (Session in 1:NumSessions){
    for (fP in 1:3){
        for (Distribution in 6:7){
            if (length(MeanPrices[MeanPrices[,"R.Session"]==Session&MeanPrices[,"Period"]==fP&MeanPrices[,"Distribution"]==Distribution,])>0){
                fM<-ifelse(Data$globals$`Distribution2Mkts[1]`[SPNum(Data$globals$R.Session)==Session&Data$globals$Period==fP]==Distribution,2,1) #Finds out which market corresponds to current distribution
                fM.inverse<-ifelse(fM==1,2,1)


                # Note: The first distribution corresponds to the first risk perception. However, the first distribution corresponds to the trading data from the second market. Thus, these have to be assembled carefully to make no mistakes.


                MeanPrices[MeanPrices$R.Session==Session&MeanPrices$Period==fP&MeanPrices$Distribution==Distribution,"AvgPrice"]<-mean(Data$transactions$Price[Data$transactions$R.Session==Session&Data$transactions$Period==fP&Data$transactions$Market==fM],na.rm=T)
                MeanPrices[MeanPrices$R.Session==Session&MeanPrices$Period==fP&MeanPrices$Distribution==Distribution,"AvgPriceLast60"]<-mean(Data$transactions$Price[Data$transactions$R.Session==Session&Data$transactions$Period==fP&Data$transactions$Market==fM&Data$transactions$Time>120],na.rm=T)
                MeanPrices[MeanPrices$R.Session==Session&MeanPrices$Distribution==Distribution,"AvgRiskPerception"]<-mean(Data$subjects[,paste("RiskPerception2Mkts[",fM.inverse,"]",sep="")][Data$subjects$R.Session==Session&Data$subjects$Period==1],na.rm=T)
                MeanPrices[MeanPrices$R.Session==Session&MeanPrices$Period==fP&MeanPrices$Distribution==Distribution,"Volume"]<-sum(Data$transactions$Volume[Data$transactions$R.Session==Session&Data$transactions$Period==fP&Data$transactions$Market==fM],na.rm=T)
                MeanPrices[MeanPrices$R.Session==Session&MeanPrices$Period==fP&MeanPrices$Distribution==Distribution,"VolumeLast60"]<-sum(Data$transactions$Volume[Data$transactions$R.Session==Session&Data$transactions$Period==fP&Data$transactions$Market==fM&Data$transactions$Time>120],na.rm=T)
                MeanPrices[MeanPrices$R.Session==Session&MeanPrices$Period==fP&MeanPrices$Distribution==Distribution,"MedianPrice"]<-median(Data$transactions$Price[Data$transactions$R.Session==Session&Data$transactions$Period==fP&Data$transactions$Market==fM],na.rm=T)
                MeanPrices[MeanPrices$R.Session==Session&MeanPrices$Period==fP&MeanPrices$Distribution==Distribution,"MedianPriceLast60"]<-median(Data$transactions$Price[Data$transactions$R.Session==Session&Data$transactions$Period==fP&Data$transactions$Market==fM&Data$transactions$Time>120],na.rm=T)
            }
        }
    }
}
MeanPrices<-merge(MeanPrices,DistributionCharacteristics,by="Distribution") # Merges in regressor data
if(RemoveOutliers){MeanPrices<-MeanPrices[!is.na(MeanPrices$AvgPrice),]} #Removes outlier

# Adds average of average prices to AvgDistributionPerception matrix
for (Distribution in 6:7){
    AvgDistributionPerception[Distribution-5,2]<-mean(MeanPrices$AvgRiskPerception[MeanPrices$Distribution==Distribution]) # Writes average price
    AvgDistributionPerception[Distribution-5,3]<-mean(MeanPrices$AvgPrice[MeanPrices$Distribution==Distribution]) # Writes average price
    AvgDistributionPerception[Distribution-5,4]<-mean(MeanPrices$AvgPriceLast60[MeanPrices$Distribution==Distribution]) # Writes average price in the last minute
}


### Regression

RiskPerceptions<-data.frame(list(
    R.Session=rep(1:NumSessions,each=NumSubjects*NumDistributions),
    Distribution=rep(6:7,each=NumSubjects,times=NumSessions),
    Subject=rep(1:NumSubjects,times=NumSessions*NumDistributions)
))
RiskPerceptions<-merge(RiskPerceptions,DistributionCharacteristics,by="Distribution")
RiskPerceptions<-RiskPerceptions[order(RiskPerceptions$R.Session,RiskPerceptions$Distribution,RiskPerceptions$Subject),]
for (fR in 1:nrow(RiskPerceptions)){
    fM<-ifelse(Data$subjects$`Distribution2Mkts[1]`[Data$subjects$R.Session==RiskPerceptions$R.Session[fR]&Data$subjects$Period==1&Data$subjects$Subject==RiskPerceptions$Subject[fR]]==RiskPerceptions$Distribution[fR],1,2)
    RiskPerceptions[fR,"RiskPerception"]<-Data$subjects[Data$subjects$R.Session==RiskPerceptions$R.Session[fR]&Data$subjects$Period==1&Data$subjects$Subject==RiskPerceptions$Subject[fR],paste("RiskPerception2Mkts[",fM,"]",sep="")]
}

# Regression on distributions

# Creates dummy variables for the distributions
{
    Temp1<-ncol(MeanPrices)
    for (Distribution in 6:7){
        MeanPrices[,Temp1+Distribution-5]<-as.integer(MeanPrices$Distribution==Distribution)
    }

    colnames(MeanPrices)<-c(colnames(MeanPrices[,1:Temp1]),DistributionText[6:7])
}


#R32

# Appendix D --------------------------------------------------------------

RiskPerceptions %>% group_by(RiskPerceptions$Distribution) %>% summarise_at(.vars = c("RiskPerception"),.funs = c(mean="mean"),na.rm=T)
wilcox.test(RiskPerceptions$RiskPerception~RiskPerceptions$Distribution,paired=T)
t.test(RiskPerceptions$RiskPerception~RiskPerceptions$Distribution,paired=T)

as.data.frame(MeanPrices %>% group_by(MeanPrices$Distribution) %>% summarise_at(.vars = c("AvgPrice"),.funs = c(mean="mean"),na.rm=T))
wilcox.test(MeanPrices$AvgPrice~MeanPrices$NoLOSS,paired=T)
t.test(MeanPrices$AvgPrice~MeanPrices$NoLOSS,paired=T)
as.data.frame(MeanPrices %>% group_by(MeanPrices$Distribution,MeanPrices$Period) %>% summarise_at(.vars = c("AvgPrice"),.funs = c(mean="mean"),na.rm=T))

MeanPrices.noOutliers<-MeanPrices[MeanPrices$R.Session!=5,]


# Figure D.9 --------------------------------------------------------------

MeanPrices.noOutliersP3<-MeanPrices.noOutliers[MeanPrices.noOutliers$Period==3,]
dev.new("RiskVsPrice")
{
    par(mfrow=c(2,2), oma=c(0,0,2,0), mar=c(5, 5, 0.5, 2),  bg="white")
    MPlot <- lm(MeanPrices.noOutliers$AvgPrice~MeanPrices.noOutliers$AvgRiskPerception)
    plot(MeanPrices.noOutliers$AvgRiskPerception,MeanPrices.noOutliers$AvgPrice, ylim=c(min(MeanPrices.noOutliers$AvgPrice),max(MeanPrices.noOutliers$AvgPrice)), xlab=substitute(paste('Average risk perception, R'^2,"=",r2),list(r2=round(summary(MPlot)$r.squared,3))),ylab="Average price", main="")
    abline(MPlot)
    MPlot <- lm(MeanPrices.noOutliers$AvgPriceLast60~MeanPrices.noOutliers$AvgRiskPerception)
    plot(MeanPrices.noOutliers$AvgRiskPerception,MeanPrices.noOutliers$AvgPriceLast60, ylim=c(min(MeanPrices.noOutliers$AvgPrice),max(MeanPrices.noOutliers$AvgPrice)), xlab=substitute(paste('Average risk perception, R'^2,"=",r2),list(r2=round(summary(MPlot)$r.squared,3))),ylab="Average price last 60s", main="")
    abline(MPlot)
    MPlot <- lm(MeanPrices.noOutliersP3$AvgPrice~MeanPrices.noOutliersP3$AvgRiskPerception)
    plot(MeanPrices.noOutliersP3$AvgRiskPerception,MeanPrices.noOutliersP3$AvgPrice, ylim=c(min(MeanPrices.noOutliers$AvgPrice),max(MeanPrices.noOutliers$AvgPrice)), xlab=substitute(paste('Average risk perception, R'^2,"=",r2),list(r2=round(summary(MPlot)$r.squared,3))),ylab="Average price last period", main="")
    abline(MPlot)
    MPlot <- lm(MeanPrices.noOutliersP3$AvgPriceLast60~MeanPrices.noOutliersP3$AvgRiskPerception)
    plot(MeanPrices.noOutliersP3$AvgRiskPerception,MeanPrices.noOutliersP3$AvgPriceLast60, ylim=c(min(MeanPrices.noOutliers$AvgPrice),max(MeanPrices.noOutliers$AvgPrice)), xlab=substitute(paste('Average risk perception, R'^2,"=",r2),list(r2=round(summary(MPlot)$r.squared,3))),ylab="Avg. price last period, last 60s", main="")
    abline(MPlot)
    dev.copy(png,"WhatIsRiskMarket_2Markets_Scatter_PricesVsAvgRiskPerception_noOutliers.png", bg="white", width=1700, height=1700, res=300)
    #dev.print(device=pdf,file="WhatIsRiskMarket_Scatter_PricesVsAvgRiskPerception.pdf", bg="white", width=1700, height=1700)
}
dev.off() # Turns off graphics device
par(mfrow=c(1,1))



#R35
MMarketAvgPrice.2Markets.2<-lm(AvgPrice~NoLOSS+Period+Period*NoLOSS, data=as.data.frame(MeanPrices.noOutliers))
MMarketAvgPrice.2Markets.2FE<-plm(AvgPrice~NoLOSS+Period+Period*NoLOSS, data=as.data.frame(MeanPrices.noOutliers), effect="individual", Model="within",index="R.Session")    #Estimates linear session fixed effects panel regression

# Table D.7 ---------------------------------------------------------------

t.Coef<-length(MMarketAvgPrice.2Markets.2$coefficients) #Temporary variable for number of coefficients in OLS model
RegressionOutput7b.LaTeX <- texreg(
    list(MMarketAvgPrice.2Markets.2, MMarketAvgPrice.2Markets.2FE),
    file="WhatIsRiskMarket_2Markets_Regression_Reg7b.tex", stars = c(0.01, 0.05, 0.1),
    custom.model.names=c("AvgPrice (OLS)", "AvgPrice (FE)"),
    custom.coef.map=list("(Intercept)"="Intercept","NoLOSS"="NoLOSS","Period"="Period","NoLOSS:Period"="NoLOSS $\\times$ Period"),
    custom.note="%stars. Standard errors in parentheses.",
    caption="OLS and session fixed effects panel regressions of average prices across assets. Standard errors clustered at the session level, in parentheses.",
    override.se=list( #Calculates standard errors clustered at the session level
        SPClusterSE(as.data.frame(MeanPrices.noOutliers),MMarketAvgPrice.2Markets.2,MeanPrices.noOutliers$R.Session)[(t.Coef+1):(2*t.Coef)],
        coeftest(MMarketAvgPrice.2Markets.2FE, vcov=vcovHC(MMarketAvgPrice.2Markets.2FE,type="HC0",cluster="group"))[(t.Coef):(2*(t.Coef-1))]
        ),
    override.pvalues=list( #Calculates p-values for standard errors clustered at the session level
        SPClusterSE(as.data.frame(MeanPrices.noOutliers),MMarketAvgPrice.2Markets.2,MeanPrices.noOutliers$R.Session)[(t.Coef*3+1):(4*t.Coef)],
        coeftest(MMarketAvgPrice.2Markets.2FE, vcov=vcovHC(MMarketAvgPrice.2Markets.2FE,type="HC0",cluster="group"))[((t.Coef-1)*3+1):((t.Coef-1)*4)]
    ),
    label="tab:Regression7b_2Mkts",
    digits=3, booktabs=TRUE, dcolumn=TRUE, sideways=F, longtable=FALSE, float.pos="ht!", use.packages=F
) #Creates LaTeX table from regression model
