## Analysis of UDH and DCIS 
# Input (Features) = Full_FeaturesWithLabel_Cleaned.csv: 
# Row 1-116   = 116 MGH Cases (DCIS=80, UDH=36)  File name pattern (S13-95 A1-8, S13-94 A1-1, S13-93 A1-2,1-UDH --- 10-UDH,11-DICS --- 12-DCIS)
# Row 117-167 = 51 BIDMC Cases (DCIS=20, UDH=31) File name pattern (D22,D23, UD32,U40)

rm(list=ls())
library(glmnet)
library(ROCR)
setwd("C:/Users/hirshad/Dropbox/DCIS.UDH.Revise/Upload/")
Data=read.csv("ComputedFeatures_Label.csv")
x=data.matrix(Data[,!is.element(colnames(Data),c("CaseName","Class","ClassGrade","ER"))])
y=Data[,"Class"]
table(y)

## Cross Validation on All Cases (MGH=116 + BIDMC=51, Total=167)
set.seed(12345)
cv1=cv.glmnet(x,y,family="bin",type="auc",nfolds=9,alpha=1,keep=T)
cv.preds1=cv1$fit.preval[,cv1$lambda==cv1$lambda.min]
pred1 <- prediction(cv.preds1, y)
perf1 <- performance(pred1, measure = "tpr", x.measure = "fpr") 
performance(pred1, measure = "auc") 

tiff("__4_Figure_CV_167Cases.tiff", width=10, height=5, units="in",pointsize="10",compression="lzw",bg="white",res=500, antialias="none")
par(mfrow=c(1,2))
plot(cv1)
plot(perf1,col="red",main="DCIS vs UDH Classification on both datasets, AUC=0.93")
dev.off()

x.tr=x[c(1:116),]
y.tr=y[c(1:116)]

x.te=x[c(117:167),]
y.te=y[c(117:167)]

## Cross Validation on Training MGH Cases (116)
set.seed(12345)
cv2=cv.glmnet(x.tr,y.tr,family="bin",type="auc",nfolds=9,alpha=1,keep=T)
cv.preds2=cv2$fit.preval[,cv2$lambda==cv2$lambda.min]
pred2 <- prediction(cv.preds2, y.tr)
perf2 <- performance(pred2, measure = "tpr", x.measure = "fpr") 
performance(pred2, measure = "auc") 

tiff("__2_Figure_CV_116Cases.tiff", width=10, height=5, units="in",pointsize="10",compression="lzw",bg="white",res=500, antialias="none")
par(mfrow=c(1,2))
plot(cv2) 
plot(perf2,col="red",main="DCIS vs UDH Classification on MGH Cases, AUC=0.95")
dev.off()

## Classification on Evaluation BIDMC Cases (51)
f1=glmnet(x.tr,y.tr,family="bin",lambda=cv2$lambda.min)
sum(f1$beta[,1]!=0)
f1$beta[f1$beta[,1]!=0,]
f2=predict(f1,type="coef")
sum(f2!=0)
f2
p1=predict(f1,newx=x.te,type="resp")
pred3 <- prediction(p1, y.te)
perf3 <- performance(pred3, measure = "tpr", x.measure = "fpr") 
performance(pred3, measure = "auc") 

tiff("__3_Figure_Evalution_51Cases.tiff", width=10, height=5, units="in",pointsize="10",compression="lzw",bg="white",res=500, antialias="none")
par(mfrow=c(1,2))
plot(p1[order(p1)],col=(y.te[order(p1)]=="DCIS")+1, main="Plot on BIDMC Cases (Red=DCIS,Black=UDH)",ylab ="Predicted Probability" )
plot(perf3,col="red", main="DCIS vs UDH Classification on BIDMC Cases, AUC=0.86")
dev.off()

Features = f1$beta[f1$beta[,1]!=0,]
write.csv(Features,"SelectedFeatures.csv")

######################################################################################################################
###################################### UDH and Different Grades of DCIS Analysis #####################################
######################################################################################################################

DCIS.Low = Data[Data$ClassGrade == "Low_DCIS",]
DCIS.Medium = Data[Data$ClassGrade == "Medium_DCIS",]
DCIS.High = Data[Data$ClassGrade == "High_DCIS",]
UDH = Data[Data$ClassGrade == "UDH",]

## UDH vs High DCIS
UDH.DCIS = rbind(UDH, DCIS.High)
xx = data.matrix(UDH.DCIS[,!is.element(colnames(UDH.DCIS),c("CaseName","Class","ClassGrade","ER"))])
yy = UDH.DCIS[,"ClassGrade"]
yy <- factor(yy)
table(yy)

set.seed(12345)
cv=cv.glmnet(xx,yy,family="bin",type="auc",nfolds=5,alpha=1,keep=T)
cv.preds=cv$fit.preval[,cv$lambda==cv$lambda.min]
pred <- prediction(cv.preds, yy)
perf <- performance(pred, measure = "tpr", x.measure = "fpr") 
performance(pred, measure = "auc") 

tiff("UDH_vs_High_DCIS.tiff",width=1000,height=500)
par(mfrow=c(1,2))
plot(cv) 
plot(perf,col="red",main="ROC Curve for UDH vs High Grade DCIS, AUC=0.97")
dev.off()

## UDH vs Medium DCIS
UDH.DCIS = rbind(UDH, DCIS.Medium)
xx = data.matrix(UDH.DCIS[,!is.element(colnames(UDH.DCIS),c("CaseName","Class","ClassGrade","ER"))])
yy = UDH.DCIS[,"ClassGrade"]
yy <- factor(yy)
table(yy)

set.seed(12345)
cv=cv.glmnet(xx,yy,family="bin",type="auc",nfolds=5,alpha=1,keep=T)
cv.preds=cv$fit.preval[,cv$lambda==cv$lambda.min]
pred <- prediction(cv.preds, yy)
perf <- performance(pred, measure = "tpr", x.measure = "fpr") 
performance(pred, measure = "auc") 

tiff("UDH_vs_Medium_DCIS.tiff",width=1000,height=500)
par(mfrow=c(1,2))
plot(cv) 
plot(perf,col="red",main="ROC Curve for UDH vs Medium Grade DCIS, AUC=0.90")
dev.off()

## UDH vs Low DCIS
UDH.DCIS = rbind(UDH, DCIS.Low)
xx = data.matrix(UDH.DCIS[,!is.element(colnames(UDH.DCIS),c("CaseName","Class","ClassGrade","ER"))])
yy = UDH.DCIS[,"ClassGrade"]
yy <- factor(yy)
table(yy)

set.seed(12345)
cv=cv.glmnet(xx,yy,family="bin",type="auc",nfolds=5,alpha=1,keep=T)
cv.preds=cv$fit.preval[,cv$lambda==cv$lambda.min]
pred <- prediction(cv.preds, yy)
perf <- performance(pred, measure = "tpr", x.measure = "fpr") 
performance(pred, measure = "auc") 

tiff("UDH_vs_Low_DCIS.tiff",width=1000,height=500)
par(mfrow=c(1,2))
plot(cv) 
plot(perf,col="red",main="ROC Curve for UDH vs Low Grade DCIS, AUC=0.95")
dev.off()

## Low DCIS vs High DCIS
UDH.DCIS = rbind(DCIS.Low, DCIS.High)
xx = data.matrix(UDH.DCIS[,!is.element(colnames(UDH.DCIS),c("CaseName","Class","ClassGrade","ER"))])
yy = UDH.DCIS[,"ClassGrade"]
yy <- factor(yy)
table(yy)

set.seed(12345)
cv=cv.glmnet(xx,yy,family="bin",type="auc",nfolds=5,alpha=1,keep=T)
cv.preds=cv$fit.preval[,cv$lambda==cv$lambda.min]
pred <- prediction(cv.preds, yy)
perf <- performance(pred, measure = "tpr", x.measure = "fpr") 
performance(pred, measure = "auc") 

tiff("Low_DCIS_vs_High_DCIS.tiff",width=1000,height=500)
par(mfrow=c(1,2))
plot(cv) 
plot(perf,col="red",main="ROC Curve for Low vs High Grade DCIS, AUC=0.98")
dev.off()

## Low DCIS vs Medium DCIS
UDH.DCIS = rbind(DCIS.Low, DCIS.Medium)
xx = data.matrix(UDH.DCIS[,!is.element(colnames(UDH.DCIS),c("CaseName","Class","ClassGrade","ER"))])
yy = UDH.DCIS[,"ClassGrade"]
yy <- factor(yy)
table(yy)

set.seed(12345)
cv=cv.glmnet(xx,yy,family="bin",type="auc",nfolds=5,alpha=1,keep=T)
cv.preds=cv$fit.preval[,cv$lambda==cv$lambda.min]
pred <- prediction(cv.preds, yy)
perf <- performance(pred, measure = "tpr", x.measure = "fpr") 
performance(pred, measure = "auc") 

tiff("Low_DCIS_vs_Medium_DCIS.tiff",width=1000,height=500)
par(mfrow=c(1,2))
plot(cv) 
plot(perf,col="red",main="ROC Curve for Low vs Medium Grade DCIS, AUC=0.83")
dev.off()

## Meidum DCIS vs High DCIS
UDH.DCIS = rbind(DCIS.Medium, DCIS.High)
xx = data.matrix(UDH.DCIS[,!is.element(colnames(UDH.DCIS),c("CaseName","Class","ClassGrade","ER"))])
yy = UDH.DCIS[,"ClassGrade"]
yy <- factor(yy)
table(yy)

set.seed(12345)
cv=cv.glmnet(xx,yy,family="bin",type="auc",nfolds=5,alpha=1,keep=T)
cv.preds=cv$fit.preval[,cv$lambda==cv$lambda.min]
pred <- prediction(cv.preds, yy)
perf <- performance(pred, measure = "tpr", x.measure = "fpr") 
performance(pred, measure = "auc") 

tiff("Meidum_DCIS_vs_High_DCIS.tiff",width=1000,height=500)
par(mfrow=c(1,2))
plot(cv) 
plot(perf,col="red",main="ROC Curve for Medium vs High Grade DCIS, AUC=0.69")
dev.off()

########################################################################################################################
##################################### Feature Subset Analysis ##########################################################
########################################################################################################################

Mor <- Data[,c(1:24)]
Int <- Data[,c(25:34,71:80,117:126,163:172,209:218,255:264,301:310,347:356)]
Tex <- Data[,c(35:70,81:116,127:162,173:208,219:254,265:300,311:346,357:392)]

Red <- Data[,c(25:70)]
Green <- Data[,c(71:116)]
Blue <- Data[,c(117:162)]
HSV <- Data[,c(163:208)]
Lab <- Data[,c(209:254)]
Luv <- Data[,c(255:300)]
HE <- Data[,c(301:346)]
BR <- Data[,c(347:392)]

## Morphology Features
xx = data.matrix(Mor)
set.seed(12345)
cv=cv.glmnet(xx,y,family="bin",type="auc",nfolds=9,alpha=1,keep=T)
cv.preds=cv$fit.preval[,cv$lambda==cv$lambda.min]
pred <- prediction(cv.preds, y)
perf <- performance(pred, measure = "tpr", x.measure = "fpr") 
performance(pred, measure = "auc") 

tiff("Morphology_Features.tiff",width=1000,height=500)
par(mfrow=c(1,2))
plot(cv) 
plot(perf,col="red",main="ROC Curve Using Morphology, AUC=0.89")
dev.off()

## Intensity Features
xx = data.matrix(Int)
set.seed(12345)
cv=cv.glmnet(xx,y,family="bin",type="auc",nfolds=9,alpha=1,keep=T)
cv.preds=cv$fit.preval[,cv$lambda==cv$lambda.min]
pred <- prediction(cv.preds, y)
perf <- performance(pred, measure = "tpr", x.measure = "fpr") 
performance(pred, measure = "auc") 

tiff("Intensity_Features.tiff",width=1000,height=500)
par(mfrow=c(1,2))
plot(cv) 
plot(perf,col="red",main="ROC Curve Using Intensity, AUC=0.85")
dev.off()

## Texture Features
xx = data.matrix(Tex)
set.seed(12345)
cv=cv.glmnet(xx,y,family="bin",type="auc",nfolds=9,alpha=1,keep=T)
cv.preds=cv$fit.preval[,cv$lambda==cv$lambda.min]
pred <- prediction(cv.preds, y)
perf <- performance(pred, measure = "tpr", x.measure = "fpr") 
performance(pred, measure = "auc") 

tiff("Texture_Features.tiff",width=1000,height=500)
par(mfrow=c(1,2))
plot(cv) 
plot(perf,col="red",main="ROC Curve Using Texture, AUC=0.91")
dev.off()

## Red Features
xx = data.matrix(Red)
set.seed(12345)
cv=cv.glmnet(xx,y,family="bin",type="auc",nfolds=9,alpha=1,keep=T)
cv.preds=cv$fit.preval[,cv$lambda==cv$lambda.min]
pred <- prediction(cv.preds, y)
perf <- performance(pred, measure = "tpr", x.measure = "fpr") 
performance(pred, measure = "auc") 

tiff("Red_Features.tiff",width=1000,height=500)
par(mfrow=c(1,2))
plot(cv) 
plot(perf,col="red",main="ROC Curve Using Red Channel (RGB), AUC=0.90")
dev.off()

## Green Features
xx = data.matrix(Green)
set.seed(12345)
cv=cv.glmnet(xx,y,family="bin",type="auc",nfolds=9,alpha=1,keep=T)
cv.preds=cv$fit.preval[,cv$lambda==cv$lambda.min]
pred <- prediction(cv.preds, y)
perf <- performance(pred, measure = "tpr", x.measure = "fpr") 
performance(pred, measure = "auc") 

tiff("Green_Features.tiff",width=1000,height=500)
par(mfrow=c(1,2))
plot(cv) 
plot(perf,col="red",main="ROC Curve Using Green Channel (RGB) Features, AUC=0.84")
dev.off()

## Blue Features
xx = data.matrix(Blue)
set.seed(12345)
cv=cv.glmnet(xx,y,family="bin",type="auc",nfolds=9,alpha=1,keep=T)
cv.preds=cv$fit.preval[,cv$lambda==cv$lambda.min]
pred <- prediction(cv.preds, y)
perf <- performance(pred, measure = "tpr", x.measure = "fpr") 
performance(pred, measure = "auc") 

tiff("Blue_Features.tiff",width=1000,height=500)
par(mfrow=c(1,2))
plot(cv) 
plot(perf,col="red",main="ROC Curve Using Blue Channel (RGB), AUC=0.84")
dev.off()

## HSV Features
xx = data.matrix(HSV)
set.seed(12345)
cv=cv.glmnet(xx,y,family="bin",type="auc",nfolds=9,alpha=1,keep=T)
cv.preds=cv$fit.preval[,cv$lambda==cv$lambda.min]
pred <- prediction(cv.preds, y)
perf <- performance(pred, measure = "tpr", x.measure = "fpr") 
performance(pred, measure = "auc") 

tiff("HSV_Features.tiff",width=1000,height=500)
par(mfrow=c(1,2))
plot(cv) 
plot(perf,col="red",main="ROC Curve Using V Channel (HSV), AUC=0.89")
dev.off()

## L (Lab) Features
xx = data.matrix(Lab)
set.seed(12345)
cv=cv.glmnet(xx,y,family="bin",type="auc",nfolds=9,alpha=1,keep=T)
cv.preds=cv$fit.preval[,cv$lambda==cv$lambda.min]
pred <- prediction(cv.preds, y)
perf <- performance(pred, measure = "tpr", x.measure = "fpr") 
performance(pred, measure = "auc") 

tiff("Lab_Features.tiff",width=1000,height=500)
par(mfrow=c(1,2))
plot(cv) 
plot(perf,col="red",main="ROC Curve Using L Channel (Lab), AUC=0.88")
dev.off()

## L (Luv) Features
xx = data.matrix(Luv)
set.seed(12345)
cv=cv.glmnet(xx,y,family="bin",type="auc",nfolds=9,alpha=1,keep=T)
cv.preds=cv$fit.preval[,cv$lambda==cv$lambda.min]
pred <- prediction(cv.preds, y)
perf <- performance(pred, measure = "tpr", x.measure = "fpr") 
performance(pred, measure = "auc") 

tiff("Luv_Features.tiff",width=1000,height=500)
par(mfrow=c(1,2))
plot(cv) 
plot(perf,col="red",main="ROC Curve Using L Channel (Luv), AUC=0.88")
dev.off()

## HE Features
xx = data.matrix(HE)
set.seed(12345)
cv=cv.glmnet(xx,y,family="bin",type="auc",nfolds=9,alpha=1,keep=T)
cv.preds=cv$fit.preval[,cv$lambda==cv$lambda.min]
pred <- prediction(cv.preds, y)
perf <- performance(pred, measure = "tpr", x.measure = "fpr") 
performance(pred, measure = "auc") 

tiff("HE_Features.tiff",width=1000,height=500)
par(mfrow=c(1,2))
plot(cv) 
plot(perf,col="red",main="ROC Curve Using Hematoxylin Channel (H&E), AUC=0.82")
dev.off()

## BlueRatio Features
xx = data.matrix(BR)
set.seed(12345)
cv=cv.glmnet(xx,y,family="bin",type="auc",nfolds=9,alpha=1,keep=T)
cv.preds=cv$fit.preval[,cv$lambda==cv$lambda.min]
pred <- prediction(cv.preds, y)
perf <- performance(pred, measure = "tpr", x.measure = "fpr") 
performance(pred, measure = "auc") 

tiff("BR_Features.tiff",width=1000,height=500)
par(mfrow=c(1,2))
plot(cv) 
plot(perf,col="red",main="ROC Curve Using BlueRatio Image, AUC=0.85")
dev.off()
