# This makefile is used when compiling gpu tests on puhti

# Before compiling on Puhti, load gcc, cuda-mpi and cuda
# module load gcc/8.3.0 hpcx-mpi/2.5.0-cuda cuda/10.1.168
# To run interactively on 2 GPUs run
# srun --ntasks=2  --time=00:10:00 --pty --account=Project_2001973 --partition=gpu --gres=gpu:v100:2 bash
#
# On other systems run mpicc -showme:link and include the 
# Replace MPI_LIBS below with the -L and -l parameters it
# prints.
#
MPI_LIBS = -L/appl/spack/install-tree/gcc-8.3.0/hpcx-mpi-2.4.0-7gyvq3/lib -lmpi 

OPTS = -DNDIM=1

INCLUDES = 


# The next line is set up for Puhti. If testing on laptop, comment it out and uncomment the line after
HILAPP = ../../bin/hilapp -target:CUDA -DCUDA -DPUHTI -DHILAPP -I../../../llvm/lib/clang/8.0.1/include/ $(INCLUDES) 
#HILAPP = ../../bin/hilapp -target:CUDA -DCUDA -DHILAPP $(INCLUDES) 
CC = nvcc -dc -x cu -gencode arch=compute_70,code=sm_70 -fmad=false -DCUDA -I../../../cub/  $(INCLUDES)
LD = nvcc -gencode arch=compute_70,code=sm_70 -fmad=false $(MPI_LIBS)


CFLAGS = -g -G $(OPTS)
CXXFLAGS = -g -G $(OPTS)

.PRECIOUS: %.cpt %.o

%.exe: %.tr.o Makefile inputs.o mersenne_inline.o lattice.o setup_layout_generic.o map_node_layout_trivial.o hila_cuda.o com_mpi.o
		$(LD) -o $@ $< inputs.o mersenne_inline.o lattice.o setup_layout_generic.o map_node_layout_trivial.o hila_cuda.o com_mpi.o

%.cpt: %.cpp
		$(HILAPP) $(OPTS) $<

%.tr.o : %.cpt
		$(CC) $(CXXFLAGS) $< -c -o $@

%.o : plumbing/%.cpp
		$(CC) $(CXXFLAGS) $< -c


%.o : plumbing/backend_cuda/%.cpp
	$(CC) $(CXXFLAGS) $< -c


%.o : %.c
		$(CC) $(CFLAGS) $< -c

clean:
		rm -rf *.cpt

cleanall:
		rm -rf *.o *.exe

