## Setting working directory ##
setwd("write the address of the directory that contains the data file. For example: C:/Users/user/Desktop/")

## Reading original dataset: DTSIPBLData.txt## 
## Download available in the Suppl. Material ## 
DTSIPBLData <- read.table("DTSIPBLData.txt",header=TRUE, sep = "\t", dec = ".")

head(DTSIPBLData,12)
#Summary
summary(DTSIPBLData) 
#Number of rows- 504 rows
nrow(DTSIPBLData)
#Number of columns--> 10 columns
ncol(DTSIPBLData)

##Create a base named 'common' with the columns common to both programs: pre-EHEA and EHEA 
## That corresponds to columns from 1 to 7 (both included)
common <- DTSIPBLData[,1:7] 
common
#Again, the number of rows is 504
nrow(common)

####################  Means per academic year  ############################

###########################   CI Students   ###########################
AcadYear04  <- subset(DTSIPBLData, AcadYear==04) 
AcadYear04 
summary(AcadYear04) 
##43
##UMLPart Mean   :5.358 
##FinalMarks  Mean   :5.847
AcadYear05  <- subset(DTSIPBLData, AcadYear==05) 
AcadYear05 
summary(AcadYear05) 
##51
##UMLPart Mean   :5.700 
##FinalMarks   Mean   :5.367
AcadYear06  <- subset(DTSIPBLData, AcadYear==06) 
AcadYear06 
summary(AcadYear06) 
##38
##UMLPart Mean   :5.559 
##FinalMarks   Mean   :6.098  
AcadYear07  <- subset(DTSIPBLData, AcadYear==07) 
AcadYear07 
summary(AcadYear07) 
##55
##UMLPart Mean   :5.018 
##FinalMarks   Mean   :5.705  
AcadYear08  <- subset(DTSIPBLData, AcadYear==08) 
AcadYear08 
summary(AcadYear08) 
##43
##UMLPart Mean   :4.698 
##FinalMarks   Mean   :5.548  
AcadYear09  <- subset(DTSIPBLData, AcadYear==09) 
AcadYear09 
summary(AcadYear09) 
##32
##UMLPart Mean   :5.258 
##FinalMarks   Mean   :6.216 
###########################   PBL Students   ###########################
AcadYear10  <- subset(DTSIPBLData, AcadYear==10) 
AcadYear10 
summary(AcadYear10) 
##27
##UMLPart Mean   :5.611 
##FinalMarks   Mean   :5.617
##PBLTotalMarks Mean   :6.544 
##PBLStudentMarks Mean   :7.444
##PBLProfMarks Mean   :6.332
AcadYear11  <- subset(DTSIPBLData, AcadYear==11) 
AcadYear11 
summary(AcadYear11) 
##24
##UMLPart Mean   :5.308 
##FinalMarks   Mean   :6.466
##PBLTotalMarks Mean   :5.816 
##PBLStudentMarks Mean   :6.987
##PBLProfMarks Mean   :5.540
AcadYear12  <- subset(DTSIPBLData, AcadYear==12) 
AcadYear12 
summary(AcadYear12) 
##31
##UMLPart Mean   :5.51 
##FinalMarks   Mean   :6.661
##PBLTotalMarks Mean   : 6.959
##PBLStudentMarks Mean   :7.612
##PBLProfMarks Mean   :6.742
AcadYear13  <- subset(DTSIPBLData, AcadYear==13) 
AcadYear13 
summary(AcadYear13) 
##19
##UMLPart Mean   :4.189 
##FinalMarks   Mean   :6.389
##PBLTotalMarks Mean   :7.498
##PBLStudentMarks Mean   :7.546
##PBLProfMarks Mean   :7.484
AcadYear14  <- subset(DTSIPBLData, AcadYear==14) 
AcadYear14 
summary(AcadYear14) 
##27
##UMLPart Mean   :6.585 
##FinalMarks   Mean   :7.044
##PBLTotalMarks Mean   :7.498
##PBLStudentMarks Mean   :7.546
##PBLProfMarks Mean   :7.484
AcadYear15  <- subset(DTSIPBLData, AcadYear==15) 
AcadYear15 
summary(AcadYear15) 
##25
##UMLPart Mean   :5.736 
##FinalMarks   Mean   :7.168
##PBLTotalMarks Mean   : 8.826
##PBLStudentMarks Mean   :8.702 
##PBLProfMarks Mean   :8.867
AcadYear16  <- subset(DTSIPBLData, AcadYear==16) 
AcadYear16 
summary(AcadYear16) 
##30
##UMLPart Mean   :5.413 
##FinalMarks   Mean   :7.477
##PBLTotalMarks Mean   :7.863 
##PBLStudentMarks Mean   :8.00
##PBLProfMarks Mean   :7.817
AcadYear17  <- subset(DTSIPBLData, AcadYear==17) 
AcadYear17 
summary(AcadYear17) 
##25
##UMLPart Mean   :4.8 
##FinalMarks   Mean   :7.176
##PBLTotalMarks Mean   :7.611
##PBLStudentMarks Mean   :7.66
##PBLProfMarks Mean   :7.593
AcadYear18  <- subset(DTSIPBLData, AcadYear==18) 
AcadYear18 
summary(AcadYear18) 
##34
##UMLPart Mean   :4.868 
##FinalMarks   Mean   :7.141
##PBLTotalMarks Mean   :6.261 
##PBLStudentMarks Mean   :7.519
##PBLProfMarks Mean   :5.795

#############################  PART A  ####################################

## null hypothesis: independent variable (learning method-variable Program2) has no 
####### effect on the dependent variable (score- variable FinalMarks), that is, there 
####### are no differences between means
## alternative hypothesis: assumes that the learning method has an effect 
####### on students' scores (i.e., the means are different)

##Since p-VALUE <0.05 --> The results yielded (t(472)=8.28, p<0.01, d=0.74)
####### statistically significant differences between the total exam scores
####### of the CI and the PBL students, so we can reject the null hypothesis 
####### and accept the alternative hypothesis.

##Cohen's showed a medium-large effect (d=0.74).

###########################################################################

##Perform the t-test for final marks and program 
##Program2 corresponds to the numeric variable associated to the categorical one with the 
####name of the program (variable Program with values: CI:pre-EHEA and PBL Students:EHEA)
t.test(FinalMarks ~ Program2, data=common)
##Welch Two Sample t-test
##data:  FinalMarks by Program2
##t = -8.2882, df = 472.93, p-value = 1.198e-15
##alternative hypothesis: true difference in means is not equal to 0
##95 percent confidence interval:
##  -1.3172364 -0.8123492
##sample estimates:
##  mean in group 0 mean in group 1 
##5.755992        6.820785 


##Remember install lsr for cohen coeficient
##install.packages("lsr")
##library("lsr", lib.loc="~/R/win-library/3.5")
##d cohen --> 0.7439568
cohensD(FinalMarks ~ Program2, data=common)



#############################    PART B      ##################################

## null hypothesis: independent variable (learning method-variable Program2) has no 
####### effect on the dependent variable (UML exam part marks- variable UMLPart), that is, there 
####### are no differences between means
## alternative hypothesis: assumes that the learning method has an effect 
####### on students' UML exam part marks (i.e., the means are different)

##Since p-VALUE >0.05 --> we can not reject the null hypothesis

##Cohen coeficient is small

##############################################################################



##Perform the t-test for UML exam part marks and program 
##Program2 corresponds to the numeric variable associated to the categorical one with the 
####name of the program (variable Program with values: CI:pre-EHEA and PBL Students:EHEA)
t.test(UMLPart ~ Program2, data=common)
##Welch Two Sample t-test
##data:  UMLPart by Program2
##t = -0.66634, df = 490.94, p-value = 0.5055
##alternative hypothesis: true difference in means is not equal to 0
##95 percent confidence interval:
##  -0.4085190  0.2016046
##sample estimates:
##  mean in group 0 mean in group 1 
##5.261832        5.365289 

##d cohen --> 0.05957607
cohensD(UMLPart ~ Program2, data=common)

##Critical value 2 tail --> 1.965035
alpha <- .05
qt(1-alpha/2, 472) 



#############################    PART C      ##################################
##Create a base named 'PBL' just with the data corresponding to the PBL students 
####  group in order to see whether in the PBL project scores the students' 
####  assessments  differ significantly from the teacher's ones. 


####  Results: Means show that teachers are more critical than students: 
######## mean Students marks: 7.046033
######## mean Prof marks: 7.723967 
####  t-test: 8.274e-10 shows a significant difference between means 
####  Cohen's d: reveals a medium-large effect (d=0.6997342). 
##############################################################################
DTSIPBLData
PBL <- DTSIPBLData[263:504,] 
PBL
#Number of rows 242
nrow(PBL)


t.test(PBL$PBLProfMarks,PBL$PBLStudentMarks, data=PBL)
##Welch Two Sample t-test
##data:  PBL$PBLProfMarks and PBL$PBLStudentMarks
##t = -6.2812, df = 427.69, p-value = 8.274e-10
##alternative hypothesis: true difference in means is not equal to 0
##95 percent confidence interval:
##  -0.8900752 -0.4657926
##sample estimates:
##  mean of x mean of y 
##7.046033  7.723967 

## d cohen -->  0.6997342
cohensD( PBL$PBLStudentMarks-PBL$PBLProfMarks)



########################  Other tests with no significative results  ##################
#######################################################################################
###ANCOVAwith gender (Gender) to check whether it has any effect with the UML exam part 
model1<-aov(UMLPart ~ Program2 + Gender, data=common) 
Anova(model1, type = "III")
summary.lm(model1)

##Call:
##  aov(formula = UMLPart ~ Program2 + Gender, data = common)

##Residuals:
##  Min      1Q  Median      3Q     Max 
##-5.3589 -1.2522 -0.0372  1.1161  4.7478 

##Coefficients:
##  Estimate Std. Error t value Pr(>|t|)    
##(Intercept)  5.25222    0.11882  44.202   <2e-16 ***
##  Program2     0.10671    0.15593   0.684    0.494    
##Gender       0.03498    0.18507   0.189    0.850    
##---
##  Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

##Residual standard error: 1.738 on 501 degrees of freedom
##Multiple R-squared:  0.0009599,	Adjusted R-squared:  -0.003028 
##F-statistic: 0.2407 on 2 and 501 DF,  p-value: 0.7862