CXX = mpicxx
CC  = mpicc
LD  = mpicxx 
F90  = ifort

.SUFFIXES: .o .cpp .ptx .cu

CUDA_TK  = /usr/local/cuda

# OFLAGS = -O0 -g -Wall 
OFLAGS = -O3 -g -Wall -fopenmp 
CXXFLAGS =  -fPIC $(OFLAGS) -I$(CUDA_TK)/include 




# NVCC      = $(CUDA_TK)/bin/nvcc  --device-emulation
# NVCCFLAGS = -D_DEBUG -O0 -g -I$(CUDA_SDK)/common/inc -arch=sm_12 --maxrregcount=64  --opencc-options -OPT:Olimit=0 -I$(CUDPP)/cudpp/include
NVCC      = $(CUDA_TK)/bin/nvcc  
#NVCCFLAGS = -arch sm_20 -ftz=true -prec-div=false -prec-sqrt=false 
#NVCCFLAGS = -arch sm_20 -g -G 
NVCCFLAGS = -arch sm_20 


# Use with Mac OS X
# NVCCFLAGS = -arch sm_12 -Xcompiler="-Duint=unsigned\ int"

LDFLAGS = -lcuda -lOpenCL  

#For tipsy support uncomment the following line
CXXFLAGS += -DTIPSYOUTPUT


INCLUDEPATH = ./include
CXXFLAGS  += -I$(INCLUDEPATH)
NVCCFLAGS += -I$(INCLUDEPATH)

CUDAKERNELSPATH = CUDAkernels
CUDAKERNELS = build_tree.cu \
	compute_properties.cu \
	compute_propertiesD.cu \
	dev_approximate_gravity.cu \
	dev_approximate_gravity_let.cu \
	scanKernels.cu \
	sortKernels.cu \
	support_kernels.cu \
	timestep.cu
CUDAPTX = $(CUDAKERNELS:%.cu=%.ptx)

SRCPATH = src
SRC = main.cpp octree.cpp load_kernels.cpp build.cpp compute_properties.cpp sort_bodies_gpu.cpp gpu_iterate.cpp parallel.cpp
OBJ = $(SRC:%.cpp=%.o)

PROG = main




all:	  $(OBJ) $(CUDAPTX) $(PROG)
kernels:  $(CUDAPTX)

$(PROG): $(OBJ)
	$(LD) $(LDFLAGS) $^ -o $@ 

%.o: $(SRCPATH)/%.cpp
	$(CXX) $(CXXFLAGS) -c $< -o $@

%.ptx: $(CUDAKERNELSPATH)/%.cu
	$(NVCC) $(NVCCFLAGS) -ptx $< -o $@

clean:
	/bin/rm -rf *.o *.ptx main

$(OBJ): $(INCLUDEPATH)/*.h

support_kernels.ptx: $(INCLUDEPATH)/node_specs.h
build_tree.ptx: $(CUDAKERNELSPATH)/support_kernels.cu $(INCLUDEPATH)/node_specs.h
compute_properties.ptx: $(CUDAKERNELSPATH)/support_kernels.cu $(INCLUDEPATH)/node_specs.h
compute_propertiesD.ptx: $(CUDAKERNELSPATH)/support_kernels.cu $(INCLUDEPATH)/node_specs.h
dev_approximate_gravity.ptx: $(CUDAKERNELSPATH)/support_kernels.cu $(INCLUDEPATH)/node_specs.h
dev_approximate_gravity_let.ptx: $(CUDAKERNELSPATH)/support_kernels.cu $(INCLUDEPATH)/node_specs.h
timestep.ptx: $(CUDAKERNELSPATH)/support_kernels.cu $(INCLUDEPATH)/node_specs.h
sortKernels.ptx: $(CUDAKERNELSPATH)/scanKernels.cu  $(CUDAKERNELSPATH)/support_kernels.cu $(INCLUDEPATH)/node_specs.h








