      module mod_superlu_dist
      use superlu_mod
      implicit none
      
      include 'mpif.h'

      integer, parameter :: op_create_and_init = 0
      integer, parameter :: op_setup_mtx = 1
      integer, parameter :: op_solve_mtx = 2
      integer, parameter :: op_dealloc_mtx = 3
      integer, parameter :: op_quit_work = 4
      integer, parameter :: op_release_and_destroy = 5

      integer, pointer :: rowind(:), colptr(:)
      double precision, pointer :: values(:), b(:)
      integer :: iam, nprow, npcol, n, m, nnz
      
      integer (superlu_ptr) :: &
         grid, options, ScalePermstruct, LUstruct, SOLVEstruct, A, stat


      contains
      
      
      subroutine do_superlu_dist_before(iam, nprocs, nprow, npcol, ierr)
         integer, intent(out) :: iam, nprocs, nprow, npcol, ierr
         
         include 'formats.dek'

         call do_superlu_dist_mpi_init(ierr)
         if (ierr /= 0) return
         
         call do_superlu_dist_size(nprocs, ierr)
         if (ierr /= 0) return
         
         if (.true.) then
            nprow = floor(sqrt(dble(nprocs)))
            if (nprow <= 0) then
               ierr = -1
               return
            end if
            npcol = nprocs/nprow
         else
            npcol = floor(sqrt(dble(nprocs)))
            if (npcol <= 0) then
               ierr = -1
               return
            end if
            nprow = nprocs/npcol
         end if
      
         call do_superlu_dist_create_and_init(nprow, npcol, iam, ierr)
         if (ierr /= 0) return

         if (iam < nprow * npcol .and. iam /= 0) then
            call do_superlu_dist_start_work(ierr)
            if (ierr /= 0) return
         end if
         
      end subroutine do_superlu_dist_before
      
      
      subroutine do_superlu_dist_after(ierr)
         integer, intent(out) :: ierr
         call do_superlu_dist_release_and_destroy(ierr)
         if (ierr /= 0) return
         call do_superlu_dist_mpi_finalize(ierr)
      end subroutine do_superlu_dist_after
      
            
      subroutine create_and_init(ierr)
         integer, intent(out) :: ierr
         include 'formats.dek'
         ierr = 0
         call f_create_gridinfo_handle(grid)
         call f_create_options_handle(options)
         call f_create_ScalePerm_handle(ScalePermstruct)
         call f_create_LUstruct_handle(LUstruct)
         call f_create_SOLVEstruct_handle(SOLVEstruct)
         call f_create_SuperMatrix_handle(A)
         call f_create_SuperLUStat_handle(stat)
         call f_superlu_gridinit(MPI_COMM_WORLD, nprow, npcol, grid)
         call get_GridInfo(grid, iam=iam)
      end subroutine create_and_init

      
      subroutine setup_matrix(ierr)
         integer, intent(out) :: ierr
         ierr = 0
         if (iam /= 0) then
            allocate(colptr(n+1), rowind(nnz), values(nnz), stat=ierr)
            if (ierr /= 0) return
         end if
         call f_dcreate_dist_matrix(A, m, n, nnz, values, rowind, colptr, grid)
         call f_set_default_options(options)
         call set_superlu_options(options,ParSymbFact=YES,PrintStat=NO) 
         call get_SuperMatrix(A, nrow=m, ncol=n)
         call f_ScalePermstructInit(m, n, ScalePermstruct)
         call f_LUstructInit(m, n, LUstruct)
      end subroutine setup_matrix
      
      
      subroutine solver(ierr)
         integer, intent(out) :: ierr
         integer, parameter :: nrhs = 1
         integer :: i
         !integer :: grid_comm
         real*8  :: berr(nrhs)
         logical, parameter :: dbg = .false.
         include 'formats.dek'
         
         ierr = 0
         if (iam /= 0) allocate(b(n))
         
         !call get_GridInfo(grid, comm=grid_comm)     <<<< fix this
         !grid_comm = MPI_COMM_WORLD
         
         if (iam == 0 .and. dbg) then
            write(*,2) 'n', n
            write(*,2) 'nnz', nnz
            do i=1,n+1
               write(*,3) 'colptr', i, colptr(i)
            end do
            do i=1,nnz
               write(*,3) 'rowind', i, rowind(i)
            end do
            do i=1,nnz
               write(*,2) 'values', i, values(i)
            end do
            do i=1,n
               write(*,2) 'b', i, b(i)
            end do
         end if
         
         call mpi_bcast(b, n, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
         if (ierr /= 0) return
         call f_PStatInit(stat)
         ! ACTUALLY doing ABglobal -- loader complained when I tried to rename it
         call f_pdgssvx(options, A, ScalePermstruct, b, n, nrhs, &
                        grid, LUstruct, berr, stat, ierr)
         call f_PStatFree(stat)
         call set_superlu_options(options,Fact=FACTORED) 
         if (iam /= 0) deallocate(b)
         
         if (iam == 0 .and. dbg) then
            do i=1,n
               write(*,2) 'x', i, b(i)
            end do
         end if
      end subroutine solver
      
      
      subroutine dealloc_mtx(ierr)
         integer, intent(out) :: ierr
         integer :: init
         ierr = 0
         call f_Destroy_SuperMat_Store_dist(A)
         call f_ScalePermstructFree(ScalePermstruct)
         call f_Destroy_LU(n, grid, LUstruct)
         call f_LUstructFree(LUstruct)
         call get_superlu_options(options, SolveInitialized=init)
         if (init == YES) then
            call f_dSolveFinalize(options, SOLVEstruct)
         endif
         if (iam /= 0) then
            deallocate(colptr, rowind, values)
         end if
      end subroutine dealloc_mtx
      
      
      subroutine release_and_destroy(ierr)
         integer, intent(out) :: ierr
         ierr = 0
         call f_superlu_gridexit(grid)
         call f_destroy_gridinfo_handle(grid)
         call f_destroy_options_handle(options)
         call f_destroy_ScalePerm_handle(ScalePermstruct)
         call f_destroy_LUstruct_handle(LUstruct)
         call f_destroy_SOLVEstruct_handle(SOLVEstruct)
         call f_destroy_SuperMatrix_handle(A)
         call f_destroy_SuperLUStat_handle(stat)
      end subroutine release_and_destroy


      subroutine do_superlu_dist_quit_work(ierr)
         integer, intent(out) :: ierr
         logical :: quit
         call do1_task(op_quit_work, quit, ierr)
      end subroutine do_superlu_dist_quit_work
      
      
      subroutine do_superlu_dist_release_and_destroy(ierr)
         integer, intent(out) :: ierr
         logical :: quit
         call do1_task(op_release_and_destroy, quit, ierr)
      end subroutine do_superlu_dist_release_and_destroy
      
      
      subroutine do_superlu_dist_create_and_init(nprow_in, npcol_in, iam_out, ierr)
         integer, intent(in) :: nprow_in, npcol_in
         integer, intent(out) :: iam_out, ierr
         logical :: quit
         nprow = nprow_in
         npcol = npcol_in
         call do1_task(op_create_and_init, quit, ierr)
         iam_out = iam
      end subroutine do_superlu_dist_create_and_init
      
      
      subroutine do_superlu_dist_start_work(ierr)
         integer, intent(out) :: ierr
         integer :: op
         logical :: quit
         op = -1
         !call omp_set_num_threads(1) 
         do ! loop until time to quit
            call do1_task(op, quit, ierr)
            if (quit) return
         end do
      end subroutine do_superlu_dist_start_work
      
      
      subroutine do1_task(op_in, quit, ierr)
         integer, intent(in) :: op_in
         logical, intent(out) :: quit
         integer, intent(out) :: ierr
         integer :: op
         ierr = 0
         quit = .false.
         op = op_in
         ! send or receive the next assignment
         
         !call get_GridInfo(grid, comm=grid_comm)     <<<< fix this
         
         
         call mpi_bcast(op, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
         if (ierr /= 0) return
         select case(op)
            case (op_create_and_init)
               call create_and_init(ierr)
            case (op_setup_mtx)
               call setup_matrix(ierr)
            case (op_solve_mtx)
               call solver(ierr)
            case (op_dealloc_mtx)
               call dealloc_mtx(ierr)
            case (op_release_and_destroy)
               call release_and_destroy(ierr)
            case (op_quit_work)
               quit = .true.
         end select
      end subroutine do1_task
      
      
      logical function use_superlu_dist()
         use_superlu_dist = .true.
      end function use_superlu_dist
      
      
      subroutine do_superlu_dist_mpi_init(ierr)
         integer, intent(out) :: ierr
         call mpi_init(ierr)
      end subroutine do_superlu_dist_mpi_init
      
      
      subroutine do_superlu_dist_mpi_finalize(ierr)
         integer, intent(out) :: ierr
         call mpi_finalize(ierr)
      end subroutine do_superlu_dist_mpi_finalize


      subroutine do_superlu_dist_work_sizes(n,nzmax,lrd,lid)
         integer, intent(in) :: n,nzmax
         integer, intent(out) :: lrd, lid
         lid = 0
         lrd = 0
      end subroutine do_superlu_dist_work_sizes
      
      
      subroutine do_superlu_dist_size(size, ierr)
         integer, intent(out) :: size, ierr
         ierr = 0
         call MPI_COMM_SIZE(MPI_COMM_WORLD,size,ierr) 
      end subroutine do_superlu_dist_size
      
      
      subroutine do_superlu_dist_decsols( &
            iopt,n_in,nz,colptr_in,rowind_in,values_in,b_inout, &
            lrd,rpar_decsol,lid,ipar_decsol,ierr)
         integer, intent(in) :: iopt, n_in, nz, lrd, lid
         integer, intent(in), target :: colptr_in(n+1), rowind_in(nz)
         double precision, intent(in), target :: values_in(nz)
         double precision, intent(inout), target :: b_inout(n)
         double precision, intent(inout), target :: rpar_decsol(lrd)
         integer, intent(inout), target :: ipar_decsol(lid)
         integer, intent(out) :: ierr
         logical :: quit
         include 'formats.dek'
         ierr = 0
         nnz = nz
         n = n_in
         m = n
         colptr => colptr_in
         rowind => rowind_in
         values => values_in
         b => b_inout
         if (iopt == 0) then ! setup
            call do1_task(op_setup_mtx, quit, ierr)
         else if (iopt == 1) then ! solve
            call do1_task(op_solve_mtx, quit, ierr)
         else if (iopt == 2) then ! deallocate
            call do1_task(op_dealloc_mtx, quit, ierr)
         else
            ierr = -1
            write(*,2) 'superlu_mt_decsols: bad iopt', iopt
         end if
      end subroutine do_superlu_dist_decsols


      end module mod_superlu_dist
