precision = TP/(TP+FP)
recall = TP/(TP+FN)
F1 = 2*precision*recall/(precision+recall)
return(list(final_misclass, best_C, c1_mis_as_c2, c2_mis_as_c1, precision, recall, F1))
}
#####################
##LEAST SQUARES SVM##
#####################
##Function LS works similarly to the function SVM
## but it uses a different approach.
##It has the same inputs and outputs as the previous function
LS <- function(data, test, cv, TC=FALSE){
ta = data[test,]  #test set
cv_da = data[cv,]  #validation set
da = data[-c(test,cv),]  #training set
##All varaibled defined are the same as the SVM function
dim = length(da[1,])
pred=da[,1:dim-1]
class = da[,dim]
Y= 2*(1*(class == class[1]))-1
n=length(Y)
n1=length(Y[Y==1])
n2=length(Y[Y==-1])
C_list = c(0.01, 0.1, 0.5, 1, 2, 5, 10, 100)
best_C = 0
best_misclass = 2
for (i in C_list){
##gamma1 and gamma2 are used here instead of C1, C2
gamma1 = i
if(TC == TRUE) {
gamma2 = n1/n2 * gamma1
}else{
gamma2 = gamma1
}
##Defining several values needed to solve the
##set of linear equations
DY= diag(Y)
Z = t(t(pred) %*% DY)
Omega = Z %*% t(Z)
Id = diag(n)
Gamma = (gamma1 * (Y==1) + gamma2 * (Y==-1))
##Constructing the matrix notation of the problem
matr = cbind(0,-t(Y))
matr2 = cbind(Y,Omega +  diag(1/Gamma,n))
LHS = rbind(matr,matr2)
RHS = c(0, rep(1,n))
##Calculating the answer
ans = solve(LHS,RHS)
##Extract b and the values of alpha from the solution and then
## calculate w using the matrix Z defined above the vector alpha
b = ans[1]
alpha = ans[1:n+1]
w = t(Z) %*% alpha
##Checking how well the hyperplane works on the validation data
pred2 = cv_da[,1:dim-1]
class2 = cv_da[,dim]
Y2 = 2*(1*(class2 == class[1]))-1
testing = as.matrix(pred2, ncol=dim)%*%as.matrix(w, nrow=dim) +b
checking = (sign(testing) == Y2)
errors = length(checking[checking == FALSE])
misclass = errors / length(cv_da[,1])
if (misclass < best_misclass){
best_misclass = misclass
best_C = gamma1
best_w = w
best_b = b
}
}
##Checking how well the resultant solution works on the test data
pred3 = ta[,1:dim-1]
class3 = ta[,dim]
Y3 = 2*(1*(class3 == class[1]))-1
testing2 = as.matrix(pred3, ncol=dim)%*%as.matrix(best_w, nrow=dim) + best_b
checking2 = (sign(testing2) == Y3)
errors2 = length(checking2[checking2 == FALSE])
final_misclass = errors2 / length(ta[,1])
##Returns the same outputs as SVM function
mis1 = (sign(testing2) == Y3)[Y3 ==1]
mis2 = (sign(testing2) == Y3)[Y3 ==-1]
c1_mis_as_c2 = length(mis1[mis1 == FALSE])/length(mis1)
c2_mis_as_c1 = length(mis2[mis2 == FALSE])/length(mis2)
TP = length(mis1[mis1 == TRUE])
TN = length(mis2[mis2 == TRUE])
FP = length(mis2[mis2 == FALSE])
FN = length(mis1[mis1 == FALSE])
precision = TP/(TP+FP)
recall = TP/(TP+FN)
F1 = 2*precision*recall/(precision+recall)
return(list(final_misclass, best_C, c1_mis_as_c2, c2_mis_as_c1, precision, recall, F1))
}
##############################
##MINIMUM CLASS VARIANCE SVM##
##############################
##Function MCV works identically to SVM but that it requires a matrix Sw.
##It has the same inputs and outputs as the previous functions
MCV <- function(data, test, cv, TC=FALSE){
## All varibles defined have been described above
ta = data[test,]
cv_da = data[cv,]
da = data[-c(test,cv),]
dim = length(da[1,])
pred=da[,1:dim-1]
class = da[,dim]
Y= 2*(1*(class == class[1]))-1
n1=length(Y[Y==1])
n2=length(Y[Y==-1])
n=length(Y)
C_list = c(0.01, 0.1, 0.5, 1, 2, 10, 100)
best_C = 0
best_misclass = 1
for (i in C_list){
C1 = i
if(TC == TRUE) {
C2 = n1/n2 * C1
}else{
C2 = C1
}
DY= diag(Y)
Sw = n1*var(pred[Y==1,]) + n2*var(pred[Y==-1,])
c=rep(-1, n)
H=DY%*%as.matrix(pred,ncol=dim-1)%*%mppower(Sw,-1,10^(-7))%*%t(as.matrix(pred,ncol=dim-1))%*%DY
H = mppower(H,1,10^(-7))
b=0
A=matrix(Y, nrow=1)
r= 10^(-10)
l=rep(0, n)
u=C1*Y*(Y==1)-C2*Y*(Y==-1)
out = try(ipop(c, H, A, b, l, u, r),silent = TRUE)
if (class(out) == "try-error"){
next
}
alpha=primal(out)
w = mppower(Sw,-1,10^(-7)) %*% t(as.matrix(pred,ncol=dim-1))%*%DY%*%alpha
N=length(alpha[alpha>0.0001])
b0= (1/N)*sum(Y[alpha>0.0001] - as.matrix(pred[alpha>0.0001,], ncol=dim-1)%*%as.matrix(w, nrow=dim-1))
##Checking how well this solution works on the validation data
pred2 = cv_da[,1:dim-1]
class2 = cv_da[,dim]
Y2 = 2*(1*(class2 == class[1]))-1
testing = as.matrix(pred2, ncol=dim)%*%as.matrix(w, nrow=dim) +b0
checking = (sign(testing) == Y2)
errors = length(checking[checking == FALSE])
misclass = errors / length(cv_da[,1])
if (misclass < best_misclass){
best_misclass = misclass
best_C = C1
best_w = w
best_b0 = b0
}
}
##Checking how well the resultant solution works on the test data
pred3 = ta[,1:dim-1]
class3 = ta[,dim]
Y3 = 2*(1*(class3 == class[1]))-1
testing2 = as.matrix(pred3, ncol=dim)%*%as.matrix(best_w, nrow=dim) + best_b0
checking2 = (sign(testing2) == Y3)
errors2 = length(checking2[checking2 == FALSE])
final_misclass = errors2 / length(ta[,1])
##Returning the necessary outputs
mis1 = (sign(testing2) == Y3)[Y3 ==1]
mis2 = (sign(testing2) == Y3)[Y3 ==-1]
c1_mis_as_c2 = length(mis1[mis1 == FALSE])/length(mis1)
c2_mis_as_c1 = length(mis2[mis2 == FALSE])/length(mis2)
TP = length(mis1[mis1 == TRUE])
TN = length(mis2[mis2 == TRUE])
FP = length(mis2[mis2 == FALSE])
FN = length(mis1[mis1 == FALSE])
precision = TP/(TP+FP)
recall = TP/(TP+FN)
F1 = 2*precision*recall/(precision+recall)
return(list(final_misclass, best_C, c1_mis_as_c2, c2_mis_as_c1, precision, recall, F1))
}
############################################
##LEAST SQUARES MINIMUM CLASS VARIANCE SVM##
############################################
##Function LSMCV works similarly to the function LS
##apart from the fact that it requires a matrix Sw.
##This function also has the same inputs and
##outputs as the previous functions
LSMCV <- function(data, test, cv, TC=FALSE){
ta = data[test,]  #test set
cv_da = data[cv,]  #validation set
da = data[-c(test,cv),]  #training set
dim = length(da[1,])  #dimensionality
pred=da[,1:dim-1]  #predictors
class = da[,dim]  #classes of predictors
#Vectror containing +1 or -1 according ot class
Y= 2*(1*(class == class[1]))-1
n=length(Y)  #length of training vector
n1=length(Y[Y==1])  #length of class 1
n2=length(Y[Y==-1])  #length of class 2
C_list = c(0.01, 0.1, 0.5, 1, 2, 10, 100)
best_C = 0
best_misclass = 2
for (i in C_list){
gamma1 = i
if(TC == TRUE) {
gamma2 = n1/n2 * gamma1
}else{
gamma2 = gamma1
}
##Defining necessary matrices to be used
Sw = n1*var(pred[Y==1,]) + n2*var(pred[Y==-1,])
DY= diag(Y)
Z = t(t(pred) %*% DY)
Omega = Z %*% mppower(Sw,-1,10^(-7)) %*% t(Z)
Id = diag(n)
Gamma = (gamma1 * (Y==1) + gamma2 * (Y==-1))
##Constructing the required matrices
matr = cbind(0,-t(Y))
matr2 = cbind(Y,Omega +  diag(1/Gamma,n))
LHS = rbind(matr,matr2)
RHS = c(0, rep(1,n))
##Solving the matrix problem
ans = solve(LHS,RHS)
#Extracting w and b from the solution to form the hyperplane
b = ans[1]
alpha = ans[1:n+1]
w = mppower(Sw,-1,10^(-7)) %*% t(Z) %*% alpha
##Checking how well this solution works on the validation data
pred2 = cv_da[,1:dim-1]
class2 = cv_da[,dim]
Y2 = 2*(1*(class2 == class[1]))-1
testing = as.matrix(pred2, ncol=dim)%*%as.matrix(w, nrow=dim) +b
checking = (sign(testing) == Y2)
errors = length(checking[checking == FALSE])
misclass = errors / length(cv_da[,1])
if (misclass < best_misclass){
best_misclass = misclass
best_C = gamma1
best_w = w
best_b = b
}
}
##Checking how well the final solution works on the test set
pred3 = ta[,1:dim-1]
class3 = ta[,dim]
Y3 = 2*(1*(class3 == class[1]))-1
testing2 = as.matrix(pred3, ncol=dim)%*%as.matrix(best_w, nrow=dim) + best_b
checking2 = (sign(testing2) == Y3)
errors2 = length(checking2[checking2 == FALSE])
final_misclass = errors2 / length(ta[,1])
##Returning the necessary outputs
mis1 = (sign(testing2) == Y3)[Y3 ==1]
mis2 = (sign(testing2) == Y3)[Y3 ==-1]
c1_mis_as_c2 = length(mis1[mis1 == FALSE])/length(mis1)
c2_mis_as_c1 = length(mis2[mis2 == FALSE])/length(mis2)
TP = length(mis1[mis1 == TRUE])
TN = length(mis2[mis2 == TRUE])
FP = length(mis2[mis2 == FALSE])
FN = length(mis1[mis1 == FALSE])
precision = TP/(TP+FP)
recall = TP/(TP+FN)
F1 = 2*precision*recall/(precision+recall)
return(list(final_misclass, best_C, c1_mis_as_c2, c2_mis_as_c1, precision, recall, F1))
}
##Reading the datassets that are to be used
da1 <- iris
da2 <- read.table("C:/Users/c1400990/Desktop/PhD/etc/SVMs/Final/Datasets/haberman.data.txt", sep= ",")
da3 <- subset(read.table("C:/Users/c1400990/Desktop/PhD/etc/SVMs/Final/Datasets/ionosphere.data.txt", sep= ","), select = -c(2))
da4 <- subset(read.table("C:/Users/c1400990/Desktop/PhD/etc/SVMs/Final/Datasets/breast-cancer.data.txt", sep= ","), select = -c(1))
da5 <- read.table("C:/Users/c1400990/Desktop/PhD/etc/SVMs/Final/Datasets/diabetes.data.txt", sep= ",")
da6 <- read.table("C:/Users/c1400990/Desktop/PhD/etc/SVMs/Final/Datasets/fertility.data.txt", sep= ",")
da7 <- read.table("C:/Users/c1400990/Desktop/PhD/etc/SVMs/Final/Datasets/seeds.data.txt", sep= "\t")
da8 <- read.table("C:/Users/c1400990/Desktop/PhD/etc/SVMs/Final/Datasets/0banknote.data.txt", sep= ",")
##Defining a list with all the datasets
da <- list(da1,da2,da3,da4,da5,da6,da7,da8)
n = 10 ##"n" is the number of iterations that each algorithm will run
##Getting the class sizes of each dataset in a matrix
ClassSize <-  matrix(0,length(da),2)
k = 0
for (i in da){
k = k+1
dim = length(i[1,])
cl = i[,dim]
Y= 2*(1*(cl == cl[1]))-1
n1 = length(Y[Y==1])
n2 = length(Y[Y==-1])
ClassSize[k,1] = n1
ClassSize[k,2] = n2
}
##Defining matrices that will store informations
##for the outputs of the algorithms
## "result" and "TCresult" store the average misclassification error
## for the one-cost and two-cost methods respectively
result <- matrix(0,length(da),4)
colnames(result) = c("SVM","LS","MCV","LSMCV")
TCresult <- matrix(0,length(da),4)
colnames(TCresult) = c("SVM","LS","MCV","LSMCV")
## "Duration" and "TCDuration" store the average times each
## algorithms needs to run for the one-cost and two-cost methods
Duration <- matrix(0,length(da),4)
colnames(Duration) = c("SVM","LS","MCV","LSMCV")
TCDuration <- matrix(0,length(da),4)
colnames(TCDuration) = c("SVM","LS","MCV","LSMCV")
## "ClassErrors" and "TCClassErrors" store the average
##error for each class in each algorithm
ClassErrors = matrix(0,length(da),8)
colnames(ClassErrors) = c("SVM1","SVM2","LS1","LS2","MCV1","MCV2","LSMCV1","LSMCV2")
TCClassErrors = matrix(0,length(da),8)
colnames(TCClassErrors) = c("SVM1","SVM2","LS1","LS2","MCV1","MCV2","LSMCV1","LSMCV2")
## "VarianceErrors" and "TCVarianceErrors" will later hold
##the variance of all misclassification errors
VarianceErrors <- matrix(0,length(da),4)
colnames(VarianceErrors) = c("SVM","LS","MCV","LSMCV")
TCVarianceErrors <- matrix(0,length(da),4)
colnames(TCVarianceErrors) = c("SVM","LS","MCV","LSMCV")
## All the lists defined below store all misclassification
##errors in order to be able to calculate the variances
SVM1_list = matrix(0,length(da),n)
SVM2_list = matrix(0,length(da),n)
LS1_list = matrix(0,length(da),n)
LS2_list = matrix(0,length(da),n)
MCV1_list = matrix(0,length(da),n)
MCV2_list = matrix(0,length(da),n)
LSMCV1_list = matrix(0,length(da),n)
LSMCV2_list = matrix(0,length(da),n)
## "Precisions", "Recalls" and "F1scores" store the average
## precision, recall and F1 score for each algorithm
Precisions = matrix(0,length(da),4)
colnames(Precisions) = c("SVM","LS","MCV","LSMCV")
Recalls = matrix(0,length(da),4)
colnames(Recalls) = c("SVM","LS","MCV","LSMCV")
F1scores = matrix(0,length(da),4)
colnames(F1scores) = c("SVM","LS","MCV","LSMCV")
c_perc = 0.2  ## the percentage of data used for cross validation
t_perc = 0.2  ## the percentage of data used for testing
for (k in 1:n){
##This for loop is here to perform "n" iterations of the algorithms
j = 0
for (i in da){
##In this for loop each dataset will be selected one
##at a time and all algorithms will be applied to it
j = j+1
##Defining the test set
test = sample(2:length(i[,1]), size = t_perc*length(i[,1]))
##Getting the remaining data
remaining = (1:length(i[,1]))[-test]
##Defining the validation set from the remaining data
cv = sample(remaining[-1], size = c_perc*length(i[,1]))
##From this point on the following things will happen four times:
##1)Run the one-cost Algorithm
##2)Calculate the duration of the algorithm
##3)Store the resultant misclassification error
##   into matrix "result"
##4)Store the percentage of wrongly classified
##   data from "Class 1" and "Cass 2"
##5)Store the misclassification error into the appropriate
##   variance list to calculate the total variance at the end
##6)Repeat (1)-(5) for the two-cost Case
##7)Store the time needed for the algorithms to run
##8)Store the average precision, recall and F1 score for each algorithm
#####################################
##CLASSICAL SUPPORT VECTOR MACHINES##
#####################################
start1 = Sys.time()
alg = SVM(i,test,cv,FALSE)  #(1)#
end1 = Sys.time()  #(2)#
result[j,1] = result[j,1] + alg[[1]]  #(3)#
ClassErrors[j,1] = ClassErrors[j,1] + alg[[3]]  #(4)#
ClassErrors[j,2] = ClassErrors[j,2] + alg[[4]]
SVM1_list[j,k] = alg[[1]]  #(5)#
#(6)#
start2 = Sys.time()
alg = SVM(i,test,cv,TRUE)
end2 = Sys.time()
TCresult[j,1] = TCresult[j,1] + alg[[1]]
TCClassErrors[j,1] = TCClassErrors[j,1] + alg[[3]]
TCClassErrors[j,2] = TCClassErrors[j,2] + alg[[4]]
SVM2_list[j,k] = alg[[1]]
#(7)#
diff1 = as.numeric(end1 - start1, units="secs")
diff2 = as.numeric(end2 - start2, units="secs")
Duration[j,1] = Duration[j,1] + diff1
TCDuration[j,1] = TCDuration[j,1] + diff2
#(8)#
Precisions[j,1] = Precisions[j,1] + alg[[5]]
Recalls[j,1] = Recalls[j,1] + alg[[6]]
F1scores[j,1] = F1scores[j,1] + alg[[7]]
#####################
##LEAST SQUARES SVM##
#####################
start1 = Sys.time()
alg = LS(i,test,cv,FALSE)  #(1)#
end1 = Sys.time()  #(2)#
result[j,2] = result[j,2] + alg[[1]]  #(3)#
ClassErrors[j,3] = ClassErrors[j,3] + alg[[3]]  #(4)#
ClassErrors[j,4] = ClassErrors[j,4] + alg[[4]]
LS1_list[j,k] = alg[[1]]  #(5)#
#(6)#
start2 = Sys.time()
alg = LS(i,test,cv,TRUE)
end2 = Sys.time()
TCresult[j,2] = TCresult[j,2] + alg[[1]]
TCClassErrors[j,3] = TCClassErrors[j,3] + alg[[3]]
TCClassErrors[j,4] = TCClassErrors[j,4] + alg[[4]]
LS2_list[j,k] = alg[[1]]
#(7)#
diff1 = as.numeric(end1 - start1, units="secs")
diff2 = as.numeric(end2 - start2, units="secs")
Duration[j,2] = Duration[j,2] + diff1
TCDuration[j,2] = TCDuration[j,2] + diff2
#(8)#
Precisions[j,2] = Precisions[j,2] + alg[[5]]
Recalls[j,2] = Recalls[j,2] + alg[[6]]
F1scores[j,2] = F1scores[j,2] + alg[[7]]
##############################
##MINIMUM CLASS VARIANCE SVM##
##############################
start1 = Sys.time()
alg = MCV(i,test,cv,FALSE)  #(1)#
end1 = Sys.time()  #(2)#
result[j,3] = result[j,3] + alg[[1]]  #(3)#
ClassErrors[j,5] = ClassErrors[j,5] + alg[[3]]  #(4)#
ClassErrors[j,6] = ClassErrors[j,6] + alg[[4]]
MCV1_list[j,k] = alg[[1]]  #(5)#
#(6)#
start2 = Sys.time()
alg = MCV(i,test,cv,TRUE)
end2 = Sys.time()
TCresult[j,3] = TCresult[j,3] + alg[[1]]
TCClassErrors[j,5] = TCClassErrors[j,5] + alg[[3]]
TCClassErrors[j,6] = TCClassErrors[j,6] + alg[[4]]
MCV2_list[j,k] = alg[[1]]
#(7)#
diff1 = as.numeric(end1 - start1, units="secs")
diff2 = as.numeric(end2 - start2, units="secs")
Duration[j,3] = Duration[j,3] + diff1
TCDuration[j,3] = TCDuration[j,3] + diff2
#(8)#
Precisions[j,3] = Precisions[j,3] + alg[[5]]
Recalls[j,3] = Recalls[j,3] + alg[[6]]
F1scores[j,3] = F1scores[j,3] + alg[[7]]
###########################################
#LEAST SQUARES MINIMUM CLASS VARIANCE SVM##
###########################################
start1 = Sys.time()
alg = LSMCV(i,test,cv,FALSE)  #(1)#
end1 = Sys.time()  #(2)#
result[j,4] = result[j,4] + alg[[1]]  #(3)#
ClassErrors[j,7] = ClassErrors[j,7] + alg[[3]]  #(4)#
ClassErrors[j,8] = ClassErrors[j,8] + alg[[4]]
LSMCV1_list[j,k] = alg[[1]]  #(5)#
#(6)#
start2 = Sys.time()
alg = LSMCV(i,test,cv,TRUE)
end2 = Sys.time()
TCresult[j,4] = TCresult[j,4] + alg[[1]]
TCClassErrors[j,7] = TCClassErrors[j,7] + alg[[3]]
TCClassErrors[j,8] = TCClassErrors[j,8] + alg[[4]]
LSMCV2_list[j,k] = alg[[1]]
#(7)#
diff1 = as.numeric(end1 - start1, units="secs")
diff2 = as.numeric(end2 - start2, units="secs")
Duration[j,4] = Duration[j,4] + diff1
TCDuration[j,4] = TCDuration[j,4] + diff2
#(8)#
Precisions[j,4] = Precisions[j,4] + alg[[5]]
Recalls[j,4] = Recalls[j,4] + alg[[6]]
F1scores[j,4] = F1scores[j,4] + alg[[7]]
}
}
##Calculate the average misclassification error for both cases
result <- result / n
TCresult <- TCresult / n
##Calculate the average Duration time needed for both cases to run
Duration <- Duration / n
TCDuration <- TCDuration / n
##Calculate the average misclass. errors of each Class for both cases
ClassErrors <- ClassErrors / n
TCClassErrors <- TCClassErrors / n
##Using the lists defined above (that store all misclassification errors)
##the overall variance for each algorithm is calculated
for (p in 1:length(da)){
VarianceErrors[p,1] = var(SVM1_list[p,])
TCVarianceErrors[p,1] = var(SVM2_list[p,])
VarianceErrors[p,2] = var(LS1_list[p,])
TCVarianceErrors[p,2] = var(LS2_list[p,])
VarianceErrors[p,3] = var(MCV1_list[p,])
TCVarianceErrors[p,3] = var(MCV2_list[p,])
VarianceErrors[p,4] = var(LSMCV1_list[p,])
TCVarianceErrors[p,4] = var(LSMCV2_list[p,])
}
##Calculate the average precision, recall and F1 score for each algorithm
Precisions <- Precisions / n
Recalls <- Recalls / n
F1scores <- F1scores / n
#Total Misclassification error (One Cost case)
result
#Total Misclassification error (Two Cost Case)
TCresult
#Variance of Total Misclassification error (One cost case)
VarianceErrors
#Variance of Total Misclassification error (Two cost case)
TCVarianceErrors
#Seperate Misclassification error (One Cost case)
ClassErrors
#Seperate Misclassification error (Two Cost case)
TCClassErrors
#Class Sizes
ClassSize
#Duration of one cost Algorithm
Duration
#Duration of two cost algorithm
TCDuration
#Average Precision
Precisions
#Average Recall
Recalls
#Average F1 score
F1scores
install.packages("kernlab")
