SHELL = /bin/sh
MKDIR_P = mkdir --parents

METADATA = data/meta.rds
FIGS = figures

EWAS = ewas.txt
CHRY = chromosomeY.txt
REPL = technical_replicates.txt

GSE60655  = data/GSE60655_RAW.tar
GSE61496  = data/GSE61496_RAW.tar
GSE63106  = data/GSE63106_RAW.tar
GSE65163  = data/GSE65163_RAW.tar
GSE69502  = data/GSE69502_RAW.tar
GSE74432  = data/GSE74432_RAW.tar
GSE75196  = data/GSE75196_RAW.tar
GSE75248  = data/GSE75248_RAW.tar
GSE85042  = data/GSE85042_RAW.tar
GSE85566  = data/GSE85566_RAW.tar
GSE86961  = data/GSE86961_RAW.tar
GSE87571  = data/GSE87571_RAW.tar
GSE89251  = data/GSE89251_RAW.tar
GSE90871  = data/GSE90871_RAW.tar
GSE97362  = data/GSE97362_RAW.tar
GSE99863  = data/GSE99863_RAW.tar
GSE102177 = data/GSE102177_RAW.tar

.PHONY: all

all: $(REPL) $(EWAS) $(CHRY)

clean:
	rm -r $(DESTDIR)

# Download datasets from GEO FTP server
$(GSE60655):
	$(MKDIR_P) $(@D)
	wget --timestamping --directory-prefix=data ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE60nnn/GSE60655/suppl/GSE60655_RAW.tar

$(GSE61496):
	$(MKDIR_P) $(@D)
	wget --timestamping --directory-prefix=data ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE61nnn/GSE61496/suppl/GSE61496_RAW.tar

$(GSE63106):
	$(MKDIR_P) $(@D)
	wget --timestamping --directory-prefix=data ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE63nnn/GSE63106/suppl/GSE63106_RAW.tar

$(GSE65163):
	$(MKDIR_P) $(@D)
	wget --timestamping --directory-prefix=data ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE65nnn/GSE65163/suppl/GSE65163_RAW.tar

$(GSE69502):
	$(MKDIR_P) $(@D)
	wget --timestamping --directory-prefix=data ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE69nnn/GSE69502/suppl/GSE69502_RAW.tar

$(GSE74432):
	$(MKDIR_P) $(@D)
	wget --timestamping --directory-prefix=data ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE74nnn/GSE74432/suppl/GSE74432_RAW.tar

$(GSE75196):
	$(MKDIR_P) $(@D)
	wget --timestamping --directory-prefix=data ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE75nnn/GSE75196/suppl/GSE75196_RAW.tar

$(GSE75248):
	$(MKDIR_P) $(@D)
	wget --timestamping --directory-prefix=data ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE75nnn/GSE75248/suppl/GSE75248_RAW.tar

$(GSE85042):
	$(MKDIR_P) $(@D)
	wget --timestamping --directory-prefix=data ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE85nnn/GSE85042/suppl/GSE85042_RAW.tar

$(GSE85566):
	$(MKDIR_P) $(@D)
	wget --timestamping --directory-prefix=data ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE85nnn/GSE85566/suppl/GSE85566_RAW.tar

$(GSE86961):
	$(MKDIR_P) $(@D)
	wget --timestamping --directory-prefix=data ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE86nnn/GSE86961/suppl/GSE86961_RAW.tar

$(GSE87571):
	$(MKDIR_P) $(@D)
	wget --timestamping --directory-prefix=data ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE87nnn/GSE87571/suppl/GSE87571_RAW.tar

$(GSE89251):
	$(MKDIR_P) $(@D)
	wget --timestamping --directory-prefix=data ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE89nnn/GSE89251/suppl/GSE89251_RAW.tar

$(GSE90871):
	$(MKDIR_P) $(@D)
	wget --timestamping --directory-prefix=data ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE90nnn/GSE90871/suppl/GSE90871_RAW.tar

$(GSE97362):
	$(MKDIR_P) $(@D)
	wget --timestamping --directory-prefix=data ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE97nnn/GSE97362/suppl/GSE97362_RAW.tar

$(GSE99863):
	$(MKDIR_P) $(@D)
	wget --timestamping --directory-prefix=data ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE99nnn/GSE99863/suppl/GSE99863_RAW.tar

$(GSE102177):
	$(MKDIR_P) $(@D)
	wget --timestamping --directory-prefix=data ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE102nnn/GSE102177/suppl/GSE102177_RAW.tar

# Script to download and parse metadata from GEO
$(METADATA): ./scripts/retrieve_metadata.R
	$(MKDIR_P) $(@D)
	Rscript --vanilla ./scripts/retrieve_metadata.R $@ >retrieve_metadata.txt 2>&1

.INTERMEDIATE: extract

extract: | $(GSE60655) $(GSE61496) $(GSE63106) $(GSE65163) $(GSE69502) $(GSE74432) \
	     $(GSE75196) $(GSE75248) $(GSE85042) $(GSE85566) $(GSE86961) $(GSE87571) \
	     $(GSE89251) $(GSE90871) $(GSE97362) $(GSE99863) $(GSE102177)
	
	tar --extract --file $(GSE60655)  --directory data
	tar --extract --file $(GSE61496)  --directory data
	tar --extract --file $(GSE63106)  --directory data
	tar --extract --file $(GSE65163)  --directory data
	tar --extract --file $(GSE69502)  --directory data
	tar --extract --file $(GSE74432)  --directory data
	tar --extract --file $(GSE75196)  --directory data
	tar --extract --file $(GSE75248)  --directory data
	tar --extract --file $(GSE85042)  --directory data
	tar --extract --file $(GSE85566)  --directory data
	tar --extract --file $(GSE86961)  --directory data
	tar --extract --file $(GSE87571)  --directory data
	tar --extract --file $(GSE89251)  --directory data
	tar --extract --file $(GSE90871)  --directory data
	tar --extract --file $(GSE97362)  --directory data
	tar --extract --file $(GSE99863)  --directory data
	tar --extract --file $(GSE102177) --directory data

	rm --force \
	 $(DESTDIR)/data/GPL13534_450K_Manifest_header_Descriptions.xlsx.gz \
	 $(DESTDIR)/data/GPL13534_HumanMethylation450_15017482_v.1.1.bpm.txt.gz \
	 $(DESTDIR)/data/GPL13534_HumanMethylation450_15017482_v.1.1.csv.gz \
	 $(DESTDIR)/data/GPL13534_HumanMethylation450_15017482_v.1.2.bpm.gz

$(CHRY): $(METADATA) ./scripts/chromosomeY.R | extract
	$(MKDIR_P) figures
	Rscript --vanilla --verbose ./scripts/chromosomeY.R $(METADATA) $(FIGS) >$@ 2>&1

$(EWAS): $(METADATA) ./scripts/ewas_impact.R | extract
	$(MKDIR_P) figures
	Rscript --vanilla --verbose ./scripts/ewas_impact.R $(METADATA) $(FIGS) >$@ 2>&1

$(REPL): $(METADATA) ./scripts/technical_replicates.R | extract
	$(MKDIR_P) figures
	Rscript --vanilla --verbose ./scripts/technical_replicates.R $(METADATA) $(FIGS) >$@ 2>&1
