extractETCdata <- function(data.type){
	if(data.type == 1){
		filename <- "data/onc2015280x2-A.txt"
	}else{
		filename <- "data/onc2015280x2-B.txt"
	}
	# find total number of lines in the file
	lineCnt <- system(command=paste("wc -l ",filename,sep=""),intern=TRUE)
	x <- unlist(strsplit(lineCnt," "))
	for(i in x){
		if(is.na(as.numeric(i))){}
		else{nLines <- as.numeric(i); break;}
	}
	
	# extraction information from the file
	connecTion <- file(description = filename, open = "r")
	cnt <- 0
	#browser()
	while(cnt <= nLines){
		cnt <- cnt + 1
		sentence <- readLines(con = connecTion, n = 1)
		cat(sentence,"\n")
		if(cnt == 1){
		}
		else if(cnt == 2){
			tempSentence <- unlist(strsplit(x = sentence, split = "+"))
			tempSentenceLength <- length(tempSentence)
			delimPlusPos <- which(tempSentence == "+")
			cnames <- c()
			indx <- 1
			for(i in delimPlusPos){
				if(indx == 1){
					tempName <- capture.output(cat(tempSentence[1:(i-1)],sep=""))
					cnames <- c(cnames, tempName) 
				}else if(indx == length(delimPlusPos)){
					tempName <- capture.output(cat(tempSentence[(delimPlusPos[indx-1]+1):(i-1)],sep=""))
					cnames <- c(cnames, tempName)				
					tempName <- capture.output(cat(tempSentence[(delimPlusPos[indx]+1):tempSentenceLength],sep=""))
					cnames <- c(cnames, tempName)
				}else{
					tempName <- capture.output(cat(tempSentence[(delimPlusPos[indx-1]+1):(i-1)],sep=""))
					cnames <- c(cnames, tempName)
				}
				indx <- indx + 1
			}
			for(i in cnames){
				if(length(grep(pattern = "sym", x = i)) != 0){
					Genesymbol <- c()
				}else if(length(grep(pattern = "ID", x = i)) != 0){
					ENSEMBLgeneID <- c()
				}else if(length(grep(pattern = "des", x = i)) != 0){
					Genedescription <- c()
				}else if(length(grep(pattern = "fold", x = i)) != 0){
					logTwoFC <- c()
				}else if(length(grep(pattern = "value", x = i)) != 0){
					BHadjustedPvalue <- c()
				}
			}
		}else{
			tempSentence <- unlist(strsplit(x = sentence, split = "+"))
			delimPlusPos <- which(tempSentence == "+")
			if(length(delimPlusPos) != 4){cat("Correction needed at - ",cnt);break}
			indx <- 1
			for(i in delimPlusPos){
				if(indx == 1){
					tempName <- capture.output(cat(tempSentence[1:(i-1)],sep=""))
					Genesymbol <- rbind(Genesymbol, tempName) 
				}else if(indx == length(delimPlusPos)){
					tempName <- capture.output(cat(tempSentence[(delimPlusPos[indx-1]+1):(i-1)],sep=""))
					logTwoFC <- rbind(logTwoFC, as.numeric(tempName))				
	#				tempName <- capture.output(cat(tempSentence[(delimPlusPos[indx]+1):(tempSentenceLength-1)],sep=""))
					tempName <- capture.output(cat(tempSentence[delimPlusPos[indx]+(1:7)],sep=""))				
					BHadjustedPvalue <- rbind(BHadjustedPvalue, tempName)
				}else if(indx == 2){
					tempName <- capture.output(cat(tempSentence[(delimPlusPos[indx-1]+1):(i-1)],sep=""))
					ENSEMBLgeneID <- rbind(ENSEMBLgeneID, tempName)
				}else if(indx == 3){
					tempName <- capture.output(cat(tempSentence[(delimPlusPos[indx-1]+1):(i-1)],sep=""))
					Genedescription <- rbind(Genedescription, tempName)
				}
				indx <- indx + 1
			}		
		}
	}
	close(connecTion)
	
	oncETC <- data.frame(Genesymbol, ENSEMBLgeneID,
				     Genedescription, logTwoFC, BHadjustedPvalue)
	return(oncETC)
}