# This is the makefile for the mesa matrix library (a subset of lapack)

MESA_DIR = ../..

#################################################################
#
# PREAMBLE

include $(MESA_DIR)/utils/makefile_header

ifeq ($(MTX_OFFLOAD),YES)
# compile all files for MIC
CC += -Doffload $(MIC_OFFLOAD)
FC += -Doffload $(MIC_OFFLOAD)
endif

CC += $(SPECIAL_C_FLAGS)

#################################################################
#
# SOURCE FILES

ifeq ($(WHICH_LAPACK),USE_SRCS)
   LAPACK_SRCS = \
      dgbcon.f \
      dgbequ.f \
      dgbrfs.f \
      dgbsv.f \
      dgbsvx.f \
      dgbtf2.f \
      dgbtrf.f \
      dgbtrs.f \
      dgecon.f \
      dgeequ.f \
      dgerfs.f \
      dgesv.f \
      dgesvx.f \
      dgetf2.f \
      dgetrf.f \
      dgetrs.f \
      dgtcon.f \
      dgtrfs.f \
      dgtsv.f \
      dgtsvx.f \
      dgttrf.f \
      dgttrs.f \
      dgtts2.f \
      disnan.f \
      dlacn2.f \
      dlabad.f \
      dlacpy.f \
      dlagtm.f \
      dlangb.f \
      dlange.f \
      dlangt.f \
      dlantb.f \
      dlantr.f \
      dlaqgb.f \
      dlaqge.f \
      dlassq.f \
      dlaswp.f \
      dlatbs.f \
      dlatrs.f \
      drscl.f \
      dgetri.f \
      dgesvd.f \
      dnrm2.f \
      dlasq3.f \
      dlasq4.f \
      dbdsqr.f \
      dgelqf.f \
      dlascl.f \
      dorgbr.f \
      dorgqr.f \
      dtrtri.f \
      dgebrd.f \
      dgeqrf.f \
      dlaset.f \
      dorglq.f \
      dormbr.f \
      dgebd2.f \
      dgeqr2.f \
      dlarfb.f \
      dlartg.f \
      dlasq1.f \
      dlasv2.f \
      dorgl2.f \
      dormqr.f \
      dtrmm.f \
      dgelq2.f \
      dlabrd.f \
      dlarft.f \
      dlas2.f \
      dlasr.f \
      dorg2r.f \
      dormlq.f \
      drot.f \
      dtrti2.f \
      dtrtrs.f \
      dlarf.f \
      dlarfg.f \
      dlasrt.f \
      dorm2r.f \
      dorml2.f \
      dlapy2.f \
      dlasq2.f \
      dtrmv.f \
      dlazq3.f \
      dlasq5.f \
      dlasq6.f \
      dlazq4.f \
      dcabs1.f \
      dlamch.f \
      dlaisnan.f \
      icmax1.f \
      ilaclr.f \
      iladlr.f \
      ilaslc.f \
      ilauplo.f \
      ilazlr.f \
      izmax1.f \
      ieeeck.f \
      iladiag.f \
      ilaenv.f \
      ilaslr.f \
      ilaver.f \
      iparmq.f \
      ilaclc.f \
      iladlc.f \
      ilaprec.f \
      ilatrans.f \
      ilazlc.f \
      izamax.f \
      sgesv.f \
      sgetf2.f \
      sgetrf.f \
      sgetrs.f \
      slaswp.f \
      zgbtrf.f \
      zgbtrs.f \
      zgetrf.f \
      zgetrs.f \
      zgetf2.f \
      zlaswp.f \
      zlacgv.f \
      zswap.f \
      zgeru.f \
      ztbsv.f \
      zgemv.f \
      ztrsm.f \
      zgbtf2.f \
      zgttrf.f \
      zgttrs.f \
      zgtts2.f \
      zscal.f \
      zcopy.f \
      zgemm.f
endif

LAPACK_QUAD_SRCS = \
   qgttrf.f \
   qgttrs.f \
   qgtts2.f \
   qgetrf.f \
   qgetrs.f \
   qgemv.f \
   qgemm.f \
   qgetf2.f \
   qswap.f \
   iqamax.f \
   qtrsm.f \
   qtrsv.f \
   qger.f \
   qscal.f \
   qlamch.f \
   qlaswp.f

ifeq ($(WHICH_BLAS),USE_SRCS)
   BLAS_SRCS = \
      dgemm.f \
      dtrsm.f \
      xerbla.f \
      lsame.f \
      dger.f \
      dcopy.f \
      dgemv.f \
      dtbsv.f \
      daxpy.f \
      ddot.f \
      dgbmv.f \
      dasum.f \
      dtrsv.f \
      dscal.f \
      dswap.f \
      idamax.f \
      strsm.f \
      sgemm.f \
      slamch.f \
      isamax.f \
      sswap.f \
      sscal.f \
      sger.f \
      zaxpy.f \
      zdotc.f \
      zgerc.f \
      zhemv.f \
      zher2.f \
      ztrsv.f
endif

AMD_SRCS = \
   amd_aat.c \
   amd_1.c \
   amd_2.c \
   amd_postorder.c \
   amd_post_tree.c \
   amd_defaults.c \
   amd_order.c \
   amd_control.c \
   amd_info.c \
   amd_valid.c \
   amd_preprocess.c \
   amd_global.c

BTF_SRCS = \
   btf_order.c \
   btf_maxtrans.c \
   btf_strongcomp.c

COLAMD_SRCS = \
   colamd_global.c \
   colamd.c

#KLU_D_DEBUG_SRCS = klu_d_dump.c
KLU_D_SRCS = \
   klu_d.c \
   klu_d_kernel.c \
   $(KLU_D_DEBUG_SRCS) \
   klu_d_factor.c \
   klu_d_free_numeric.c \
   klu_d_solve.c \
   klu_d_scale.c \
   klu_d_refactor.c \
   klu_d_tsolve.c \
   klu_d_diagnostics.c \
   klu_d_sort.c \
   klu_d_extract.c \
   klu_d_free_symbolic.c \
   klu_d_defaults.c \
   klu_d_analyze_given.c \
   klu_d_analyze.c \
   klu_d_memory.c \
   klu_d_c_fortran.c

#KLU_Q_DEBUG_SRCS = klu_q_dump.c
KLU_Q_SRCS = \
   klu_q.c \
   klu_q_kernel.c \
   $(KLU_Q_DEBUG_SRCS) \
   klu_q_factor.c \
   klu_q_free_numeric.c \
   klu_q_solve.c \
   klu_q_scale.c \
   klu_q_refactor.c \
   klu_q_tsolve.c \
   klu_q_diagnostics.c \
   klu_q_sort.c \
   klu_q_extract.c \
   klu_q_free_symbolic.c \
   klu_q_defaults.c \
   klu_q_analyze_given.c \
   klu_q_analyze.c klu_q_memory.c \
   klu_q_c_fortran.c

KLU_SRCS = $(BTF_SRCS) $(COLAMD_SRCS) $(AMD_SRCS) $(KLU_D_SRCS) $(KLU_Q_SRCS)

MTX_SRCS = \
   my_lapack95_dble.f90 \
   my_lapack95_quad.f90 \
   mtx_support.f90 \
   mod_klu_dble.f90 \
   mod_klu_quad.f90 \
   thomas_block_dble.f90 \
   thomas_block_quad.f90 \
   thomas_block_klu.f90 \
   thomas_block_sparse.f90 \
   bcyclic.f90 \
   mtx_lib.f90

ifneq ($(MAKECMDGOALS),clean)
   $(shell $(CPP) -DDBLE $(MOD_PRIVATE_DIR)/mod_klu.F90 > mod_klu_dble.f90)
   $(shell $(CPP)        $(MOD_PRIVATE_DIR)/mod_klu.F90 > mod_klu_quad.f90)
   $(shell $(CPP) -DDBLE $(MOD_PRIVATE_DIR)/my_lapack95.F90 > my_lapack95_dble.f90)
   $(shell $(CPP)        $(MOD_PRIVATE_DIR)/my_lapack95.F90 > my_lapack95_quad.f90)
   $(shell $(CPP) -DDBLE $(MOD_PRIVATE_DIR)/thomas_block.F90 > thomas_block_dble.f90)
   $(shell $(CPP)        $(MOD_PRIVATE_DIR)/thomas_block.F90 > thomas_block_quad.f90)
endif

#################################################################
#
# TARGETS

LIB = libmtx.a
MTX_OBJS = $(patsubst %.f,%.o,$(patsubst %.f90,%.o,$(MTX_SRCS)))
LAPACK_QUAD_OBJS = $(patsubst %.f,%.o,$(patsubst %.f90,%.o,$(LAPACK_QUAD_SRCS)))
LIB_OBJS = $(MTX_OBJS) $(LAPACK_QUAD_OBJS)
$(LIB) : mtx_def.o $(LIB_OBJS)
	$(LIB_TOOL) $(LIB) $(LIB_OBJS)

ifeq ($(WHICH_LAPACK),USE_SRCS)
   LAPACK_LIB = libmesalapack.a
   LAPACK_OBJS = $(patsubst %.f,%.o,$(patsubst %.f90,%.o,$(LAPACK_SRCS)))
   $(LAPACK_LIB) : $(LAPACK_OBJS)
	$(LIB_TOOL) $(LAPACK_LIB) $(LAPACK_OBJS)	
endif

ifeq ($(WHICH_BLAS),USE_SRCS)
   BLAS_LIB = libmesablas.a
   BLAS_OBJS = $(patsubst %.f,%.o,$(patsubst %.f90,%.o,$(BLAS_SRCS)))
   $(BLAS_LIB) : $(BLAS_OBJS)
	$(LIB_TOOL) $(BLAS_LIB) $(BLAS_OBJS)
endif

KLU_LIB = libmesaklu.a
KLU_OBJS = $(patsubst %.c,%.o,$(KLU_SRCS))
$(KLU_LIB) : $(KLU_OBJS)
	$(LIB_TOOL) $(KLU_LIB) $(KLU_OBJS)	

all : $(BLAS_LIB) $(LAPACK_LIB) $(KLU_LIB) $(LIB)

.DEFAULT_GOAL = all

#################################################################
#
# COMPILATION RULES

# C

CCchecks = 
CCwarn = 
CCchecks = -fbounds-check -Wuninitialized -Warray-bounds
#CCwarn = -Wunused-value -Werror -W -Wno-unused-parameter

KLU_C = $(CC) -O3 -fno-common -fexceptions $(CCchecks) $(CCwarn)
KLU_I = -I../klu_src

amd_global.o: amd_global.c
	$(KLU_C) $(KLU_I) -c $<

amd_%.o: amd_%.c
	$(KLU_C) $(KLU_I) -DDINT -c $<

colamd_l.o: colamd.c
	$(KLU_C) $(KLU_I) -DDLONG -c $< -o $@

klu_d.o: klu.c
	$(KLU_C) $(KLU_I) -c $< -o $@

klu_d_%.o: klu_%.c
	$(KLU_C) $(KLU_I) -c $< -o $@

klu_q.o: klu.c
	$(KLU_C) -DUSE_FLOAT128 $(KLU_I) -c $< -o $@

klu_q_%.o: klu_%.c
	$(KLU_C) -DUSE_FLOAT128 $(KLU_I) -c $< -o $@

%.o : %.c
	$(KLU_C) $(KLU_I) -c $<

# Fortran

#COMPILE = $(COMPILE_TO_TEST) $(FCfixed)
COMPILE = $(COMPILE_TO_DEPLOY) $(FCfixed)
#COMPILE_FREE = $(COMPILE_TO_TEST) $(FCfree)
COMPILE_FREE = $(COMPILE_TO_DEPLOY) $(FCfree)

#COMPILE_FREE = $(COMPILE_TO_DEPLOY) $(FCfree) $(FCwarn_unused)

COMPILE_XTRA = $(COMPILE_BASIC) $(FCwarn) $(FCimpno) $(FCopt) -c $(FCfixed)
COMPILE_XTRA_TEST = $(COMPILE_BASIC) $(FCwarn) $(FCimpno) $(FCopt) $(FCchecks) -g -c $(FCfixed)
COMPILE_XTRA_FREE = $(COMPILE_BASIC) $(FCwarn) $(FCimpno) $(FCopt) -c $(FCfree)
COMPILE_XTRA_NO_OPT = $(COMPILE_BASIC) $(FCnowarn) $(FCfixed) -c

COMPILE_CMD = $(COMPILE)

mtx_lib.o: COMPILE_CMD = $(COMPILE_FREE)
bcyclic.o: COMPILE_CMD = $(COMPILE_FREE)
mtx_support.o: COMPILE_CMD = $(COMPILE_FREE)
mod_klu_dble.o: COMPILE_CMD = $(COMPILE_FREE)
mod_klu_quad.o: COMPILE_CMD = $(COMPILE_FREE)
my_lapack95_dble.o: COMPILE_CMD = $(COMPILE_FREE)
my_lapack95_quad.o: COMPILE_CMD = $(COMPILE_FREE)
thomas_block_dble.o: COMPILE_CMD = $(COMPILE_FREE)
thomas_block_quad.o: COMPILE_CMD = $(COMPILE_FREE)
thomas_block_klu.o: COMPILE_CMD = $(COMPILE_FREE)
thomas_block_sparse.o: COMPILE_CMD = $(COMPILE_FREE)

$(LAPACK_OBJS) : COMPILE_CMD = $(COMPILE_XTRA) -w
$(LAPACK_QUAD_OBJS) : COMPILE_CMD = $(COMPILE_XTRA) -w

# must turn off optimization for dlamch or can get infinite loop!!!
dlamch.o: COMPILE_CMD = $(COMPILE_XTRA_NO_OPT) -w $<
$(filter-out dlamch.o,$(BLAS_OBJS)) : COMPILE_CMD = $(COMPILE_XTRA) -w

%.o : %.f
	$(COMPILE_CMD) $<
%.o : %.f90
	$(COMPILE_CMD) $<
%.mod : %.o
	@true

clean:
	-@rm -f *.o *.f90 *.mod *.modmic *genmod.f90 *.a .depend

#################################################################
#
# DEPENDENCIES

# Paths to search for source & include files

VPATH = $(MOD_PUBLIC_DIR):$(MOD_PRIVATE_DIR):../klu_src:../blas_src:../lapack_src:../lapack_quad

# Automatic dependency generation using makedepf90

nodeps : $(.DEFAULT_GOAL)

NODEPS = $(or $(filter nodeps,$(MAKECMDGOALS)),$(filter clean,$(MAKECMDGOALS)))

ifeq ($(NODEPS),)

  DEPEND_DIRS = $(MOD_PUBLIC_DIR):$(MOD_PRIVATE_DIR)

  .depend :
	makedepf90 -m %m.mod -I$(DEPEND_DIRS) $(MTX_SRCS) > .depend
  -include .depend

endif
