! ***********************************************************************
!
!   Copyright (C) 2012  Bill Paxton, P. Garaud, and J.D. Garaud
!
!   MESA is free software; you can use it and/or modify
!   it under the combined terms and restrictions of the MESA MANIFESTO
!   and the GNU General Library Public License as published
!   by the Free Software Foundation; either version 2 of the License,
!   or (at your option) any later version.
!
!   You should have received a copy of the MESA MANIFESTO along with
!   this software; if not, it is available at the mesa website:
!   http://mesa.sourceforge.net/
!
!   MESA is distributed in the hope that it will be useful,
!   but WITHOUT ANY WARRANTY; without even the implied warranty of
!   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
!   See the GNU Library General Public License for more details.
!
!   You should have received a copy of the GNU Library General Public License
!   along with this software; if not, write to the Free Software
!   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
!
! ***********************************************************************

! based on MPI version of trisolve
! written by P. Garaud and J.D. Garaud; V1. released October 1st 2007.



#ifdef DBLE
   module trisolve_dble
   use const_def, only: dp
   use my_lapack95_dble
   implicit none
#else
   module trisolve_quad
   use const_def, only: qp, dp
   use my_lapack95_quad
   implicit none
#endif         
   
   integer, parameter :: dbg_proc = -1
   
   
   contains

      
   subroutine do_trisolve_work_sizes(nvar,nz,nproc,lwork,liwork)
      integer, intent(in) :: nvar,nz,nproc
      integer, intent(out) :: lwork,liwork
      integer :: nzppmax
      lwork = nvar*(2*nvar+1)*nproc
      nzppmax = nz/nproc + 1 ! max number of zones per proc. 
      liwork = 4*nzppmax*nproc
   end subroutine do_trisolve_work_sizes


   subroutine do_trisolve( &
         nvar, nz, nproc_in, ublk1, dblk1, lblk1, sol1, &
         lwork, work, liwork, iwork, ierr)
      use utils_lib
      real(fltp), dimension(:), pointer, intent(inout) :: &
         lblk1, dblk1, ublk1 ! =(nvar,nvar,nz)
         ! row(i) of mtx has lblk(:,:,i), dblk(:,:,i), ublk(:,:,i)
         ! lblk(:,:,1) is not used; ublk(:,:,nz) is not used.
      real(fltp), pointer, intent(inout) :: sol1(:) ! =(nvar,nz)
      integer, intent(in) :: nvar, nz, nproc_in, lwork, liwork
      real(dp), dimension(:), pointer :: work ! (lwork)
      integer, dimension(:), pointer :: iwork ! (liwork)
      integer, intent(out) :: ierr

      !     Routine solves MX = B where 
      !     M is a block-tridiagonal matrix with nz zones
      !     of nvar*nvar sized-blocks
      !     On entry, sol is the rhs, B.
      !     On exit, sol is the solution.




      integer, dimension(nproc_in) :: nzpp,nend,noffset
      integer :: &
         nproc, nzpploc, nendloc, nzavg, nbrest, &
         nblocks, op_err, nstore, nsend, nzppmax, next_proc, &
         idest, isource, nshift, nc, nl, i, j, k, n, proc
      real(fltp) :: piv
      real(fltp), dimension(:,:), pointer :: RPB ! (nvar,nproc)
      real(fltp), dimension(:,:,:), pointer :: &
         work1d2, RPABOVE ! (nvar,nvar,nproc)
  
      integer, dimension(:,:,:), pointer :: NBELOW ! (2,nbppmax,nproc)
      integer, dimension(:,:), pointer :: NDIAG, NABOVE ! (nbppmax,nproc)
      integer, dimension(nproc_in) :: NRPABOVE, NRPDIAG
      integer, dimension(2,nproc_in) :: NRPBELOW, NV, NV2
      character*2 :: mmtype
      character :: vmtype


      real(fltp), dimension(:,:,:), pointer :: ublk, dblk, lblk
      real(fltp), dimension(:,:), pointer :: sol
      
      ublk(1:nvar,1:nvar,1:nz) => ublk1(1:nvar*nvar*nz)
      dblk(1:nvar,1:nvar,1:nz) => dblk1(1:nvar*nvar*nz)
      lblk(1:nvar,1:nvar,1:nz) => lblk1(1:nvar*nvar*nz)
      sol(1:nvar,1:nz) => sol1(1:nvar*nz)
      


      include 'formats.dek'
      ierr = 0
      
      ! set pointers for work arrays
      nproc = min(nproc_in, nz)
      nzppmax = nz/nproc + 1 ! max number of zones per proc. 

      i = 0
      NDIAG(1:nzppmax,1:nproc) => iwork(i+1:i+nzppmax*nproc)
      i = i + nzppmax*nproc
      NABOVE(1:nzppmax,1:nproc) => iwork(i+1:i+nzppmax*nproc)
      i = i + nzppmax*nproc
      NBELOW(1:2,1:nzppmax,1:nproc) => iwork(i+1:i+2*nzppmax*nproc)
      i = i + 2*nzppmax*nproc
      
      if (i > liwork) then
         write(*,*) 'liwork too small for trisolve'
         ierr = -1
         return
      end if

#ifdef DBLE
      i = 0
      RPB(1:nvar,1:nproc) => work(i+1:i+nvar*nproc)
      i = i + nvar*nproc   
      work1d2(1:nvar,1:nvar,1:nproc) => work(i+1:i+nvar*nvar*nproc)
      i = i + nvar*nvar*nproc
      RPABOVE(1:nvar,1:nvar,1:nproc) => work(i+1:i+nvar*nvar*nproc)
      i = i + nvar*nvar*nproc
      
      if (i > lwork) then
         write(*,*) 'lwork too small for trisolve'
         ierr = -1
         return
      end if

#else
      allocate( &
         RPB(nvar,nproc), &
         work1d2(nvar,nvar,nproc), &
         RPABOVE(nvar,nvar,nproc))
#endif

      nzavg = nz/nproc
      nbrest = mod(nz,nproc)
      do i=1,nbrest
         nzpp(i) = nzavg+1
      end do
      do i=nbrest+1,nproc
         nzpp(i) = nzavg
      end do
      nend(1) = nzpp(1)
      do i=2,nproc
         nend(i) = nend(i-1) + nzpp(i)
      end do
      noffset(1) = 0
      do i=2,nproc
         noffset(i) = nend(i-1)
      end do

      NDIAG(:,:) = nvar
      NBELOW(:,:,:) = nvar
      NABOVE(:,:) = nvar
      NBELOW(:,1,1) = 0
      NABOVE(nzpp(nproc),nproc) = 0

!$OMP PARALLEL DO PRIVATE(proc,nendloc,nzpploc,nshift,nstore,i,piv,j,nsend,op_err,nc,nl,k)
      do proc = 1,nproc
         if (proc == dbg_proc) write(*,*) 'Structured reduction, first sweep'
         nendloc=nend(proc)
         nzpploc=nzpp(proc)
         nshift=noffset(proc)
         do nstore=1,nzpploc
            k = nstore+nshift
            do i=1,nvar
               piv = dblk(i,i,k)
               sol(i,k) = sol(i,k)/piv
               do j=1,nvar
                  dblk(i,j,k) = dblk(i,j,k)/piv
                  lblk(i,j,k) = lblk(i,j,k)/piv
                  ublk(i,j,k) = ublk(i,j,k)/piv
               end do
            end do
         end do
         ! ************************** First part : *****************************
         ! Structured reduction, first sweep. Pivot one line, update the next.
         ! *********************************************************************
         do nstore = 1,nzpploc-1
            k = nstore+nshift
            
            if (proc == dbg_proc) write(*,*) 'P_pivot', k
            op_err = 0
            call P_pivot( &
               lblk(:,:,k), dblk(:,:,k), ublk(:,:,k), sol(:,k),&
               nvar, proc, k, op_err)
            if (op_err /= 0) then
               write(*,2) 'first sweep P_pivot failed for', k
               ierr = op_err
            end if
            
            call my_gemv(nvar,nvar,lblk(:,:,k+1),nvar,sol(:,k),sol(:,k+1))
            if (proc == dbg_proc) then
               write(*,'(99(i1,a))') &
                  proc, ' sol(', k+1, ') = sol(', k+1, ') - l(', k+1, ')*sol(', k, ')'
            end if

            call my_gemm(nvar,nvar,nvar,&
               lblk(:,:,k+1),nvar,ublk(:,:,k),nvar,dblk(:,:,k+1),nvar)
            if (proc == dbg_proc) write(*,'(99(i1,a))') &
                  proc, ' dblk(', k+1, ') = dblk(', k+1, ') - l(', k+1, ')*u(', k, ')'

            work1d2(:,:,proc) = 0d0
            call my_gemm(nvar,nvar,nvar,&
               lblk(:,:,k+1),nvar,lblk(:,:,k),nvar,work1d2(:,:,proc),nvar)
            do nc = 1, nvar
               do nl = 1, nvar
                  lblk(nl,nc,k+1) = work1d2(nl,nc,proc)
               end do
            end do
            if (proc == dbg_proc) write(*,'(99(i1,a))') &
                  proc, ' lblk(', k+1, ') = -lblk(', k+1, ')*lblk(', k, ')'

         end do

      end do
!$OMP END PARALLEL DO

      if (ierr /= 0) then
         !stop 'trisolve'
         call dealloc
         return
      end if

      ! **********************************************************************
      ! Note that the last diagonal blocks have not been pivoted yet.
      ! Structured reduction,second sweep step. All processors pass their 
      ! last blocks to the next,which now sweep through the above matrices.
      ! **********************************************************************
      
      do proc = 1, nproc-1
         k = nend(proc)
         next_proc = proc+1
         do i=1,nvar
            do j=1,nvar 
               RPABOVE(i,j,next_proc) = ublk(i,j,k)
            end do
            RPB(i,next_proc) = sol(i,k)
         end do
      end do

!$OMP PARALLEL DO PRIVATE(proc,nendloc,nzpploc,nshift,nl,nc,nstore,k)
      do proc = 2,nproc  ! NOTE: skip 1st processor         
         nshift=noffset(proc)
         
         do nstore = 1,nzpp(proc)-1
            k = nshift + nstore
            
            call my_gemv( &
               nvar,nvar,RPABOVE(:,:,proc),nvar,sol(:,k),RPB(:,proc))
            if (proc == dbg_proc) write(*,'(99(i1,a))') &
                  proc, ' RPB(', proc, ') = RPB(', proc, ') - RPABOVE(', proc, ')*sol(', k, ')'

            call my_gemm(nvar,nvar,nvar,&
               RPABOVE(:,:,proc),nvar,lblk(:,:,k),nvar,dblk(:,:,nshift),nvar)
            if (proc == dbg_proc) write(*,'(99(i1,a))') &
                  proc, ' dblk(', nshift, ') = dblk(', nshift, ') - RPABOVE(', proc, ')*lblk(', k, ')'

            work1d2(:,:,proc) = 0d0
            call my_gemm(nvar,nvar,nvar,&
               RPABOVE(:,:,proc),nvar,ublk(:,:,k),nvar,work1d2(:,:,proc),nvar)
            do nc = 1, nvar
               do nl = 1, nvar
                  RPABOVE(nl,nc,proc) = work1d2(nl,nc,proc)
               end do
            end do
            if (proc == dbg_proc) write(*,'(99(i1,a))') &
                  proc, ' RPABOVE(', proc, ') = -RPABOVE(', proc, ')*ublk(', k, ')'

         end do
         
      end do
!$OMP END PARALLEL DO

      ! *********************************************************************
      ! At this point,all the processors posses one unknown block left to be 
      ! inverted in one big tridiagonal system spanning the last blocks in each
      ! processor
      ! *********************************************************************

      ! *********************************************************************
      ! Last steps: sweeping reduction across processors. 
      ! *********************************************************************
      
      do proc = 1,nproc ! NOT PARALLEL
         if (proc == dbg_proc) write(*,*)
         if (proc == dbg_proc) write(*,*) 'sweeping reduction across processors'

         nendloc=nend(proc)
         nzpploc=nzpp(proc)
         nshift=noffset(proc)
         
         if (proc < nproc) then
            k = nend(proc)
            next_proc = proc+1
            do i=1,nvar
               do j=1,nvar 
                  ublk(i,j,k) = RPABOVE(i,j,next_proc)
               end do
               sol(i,k) = RPB(i,next_proc)
            end do
         end if

         if (proc > 1) then
         
            if (proc == dbg_proc) &
               write(*,*) 'recv RPBELOW,RPDIAG,RPABOVE,RPB etc from prev'
         
            call my_gemm(nvar,nvar,nvar,&
               lblk(:,:,nendloc),nvar,&
               ublk(:,:,nshift),nvar,&
               dblk(:,:,nendloc),nvar)
            if (proc == dbg_proc) &
               write(*,'(99(i1,a))') &
                  proc, ' dblk(', nendloc, ') = dblk(', nendloc, &
                  ') - lblk(', nendloc, ')*ublk(', nshift, ')'
               
            call my_gemv( &
               nvar,nvar,lblk(:,:,nendloc),nvar,sol(:,nshift),sol(:,nendloc))
            if (proc == dbg_proc) &
               write(*,'(99(i1,a))') &
                  proc, ' sol(', nendloc, ') = sol(', nendloc, &
                  ') - lblk(', nendloc, ')*sol(', nshift, ')'
            
         end if

         if (proc == dbg_proc) write(*,*) 'P_pivot', nendloc
         call P_pivot(&
            lblk(:,:,nendloc),dblk(:,:,nendloc),ublk(:,:,nendloc),&
            sol(:,nendloc),nvar,proc,0,ierr)
         if (ierr /= 0) then
            !write(*,2) 'Last steps P_pivot failed for', k
            exit
         end if

      end do

      if (ierr /= 0) then
         call dealloc
         return
      end if

      ! Now the very last proc has the real solution for its
      ! last block in B.

      ! ******************************************************************8
      ! First step of the backsub: send the solution back to each processors
      ! now in decreasing order,and solve for the "last-block" variables.
      ! ******************************************************************8

      do proc = nproc-1,1,-1 ! NOT PARALLEL
         nendloc = nend(proc)
         call my_gemv( &
            nvar,nvar,ublk(:,:,nendloc),nvar,sol(:,nend(proc+1)),sol(:,nendloc))
         if (proc == dbg_proc) write(*,'(99(i1,a))') &
               proc, ' sol(', nendloc, ') = sol(', nendloc, &
               ') - ublk(', nendloc, ')*sol(', nend(proc+1), ')'
      end do

      ! So by now each processor has the solution to the "lastblock"
      ! variable stored in the B of its last block. 

      ! Now each processor can continue the backsubstitution entirely
      ! independently.
      
!$OMP PARALLEL DO PRIVATE(proc,nendloc,nzpploc,nshift,nstore,nl,k)
      do proc = 1,nproc
         nshift=noffset(proc)
         do nstore=nzpp(proc)-1,1,-1         
            k = nstore + nshift
            call my_gemv( &
               nvar,nvar,ublk(:,:,k),nvar,sol(:,k+1),sol(:,k))
            if (proc == dbg_proc) write(*,'(99(i1,a))') &
                  proc, ' sol(', k, ') = sol(', k, &
                  ') - ublk(', k, ')*sol(', k+1, ')'
            if (proc > 1) then
               call my_gemv( &
                  nvar,nvar,lblk(:,:,k),nvar,sol(:,nshift),sol(:,k))
               if (proc == dbg_proc) write(*,'(99(i1,a))') &
                     proc, ' sol(', k, ') = sol(', k, &
                     ') - lblk(', k, ')*sol(', nshift, ')'
            end if
         end do                 
      end do
!$OMP END PARALLEL DO
      
      call dealloc
      

      contains


      subroutine my_gemv(m,n,a,lda,x,y) ! y = y - a*x
         integer lda,m,n
         real(fltp) :: a(:,:) ! (lda,*)
         real(fltp) :: x(:), y(:)
         real(fltp) :: tmp
         real(fltp), parameter :: zero=0
         ! trans = 'n'
         ! alpha = -1
         ! beta = 1
         ! incx = 1
         ! incy = 1
         integer :: j, i
         do j = 1,n
            tmp = x(j)
            if (tmp.ne.zero) then
               do i = 1,m
                  y(i) = y(i) - tmp*a(i,j)
               end do
            end if
         end do
      end subroutine my_gemv
      
      
      subroutine my_gemm(m,n,k,a,lda,b,ldb,c,ldc) ! c := c - a*b
         integer, intent(in) :: k,lda,ldb,ldc,m,n
         real(fltp), dimension(:,:) :: a, b, c ! a(lda,*),b(ldb,*),c(ldc,*)
         real(fltp) :: tmp
         real(fltp), parameter :: zero=0
         integer :: j, i, l
         ! transa = 'n'
         ! transb = 'n'
         ! alpha = -1
         ! beta = 1
         ! assumes other args are valid
         do j = 1,n
            do l = 1,k
               tmp = b(l,j)
               if (tmp .ne. zero) then
                  do i = 1,m
                     c(i,j) = c(i,j) - tmp*a(i,l)
                  end do
               end if
            end do
         end do      
      end subroutine my_gemm



      
      
      subroutine dealloc
#ifdef DBLE
#else
         deallocate( &
            RPB, &
            work1d2, &
            RPABOVE)
#endif
      end subroutine dealloc


      subroutine show_diag_and_sol(k)
         integer, intent(in) :: k
         integer :: i, j
         return
         do i=1,nvar
            do j=1,nvar
               if (abs(dblk(i,j,k) - 1d0) < 1d-50) then
                  write(*,'(i10)',advance='no') 1
               else if (abs(dblk(i,j,k) + 1d0) < 1d-50) then
                  write(*,'(i10)',advance='no') -1
               else if (abs(dblk(i,j,k)) < 1d-50) then
                  write(*,'(i10)',advance='no') 0
               else
                  write(*,'(e10.1)',advance='no') dblk(i,j,k)
               end if
            end do
            write(*,'(5x,e15.6)') sol(i,k)
         end do
         write(*,*)
      end subroutine show_diag_and_sol


      subroutine show_sol(k)
         integer, intent(in) :: k
         integer :: i
         !return
         do i=1,nvar
            write(*,'(e15.6)') sol(i,k)
         end do
         write(*,*)
      end subroutine show_sol


      subroutine show_RPB(proc)
         integer, intent(in) :: proc
         integer :: i
         !return
         do i=1,nvar
            write(*,'(e15.6)') RPB(i,proc)
         end do
         write(*,*)
      end subroutine show_RPB

   end subroutine do_trisolve


   subroutine P_pivot(BELOW,DIAG,ABOVE,B,nvar,proc,k,ierr)

      ! Subroutine diagonalises matrix DIAG by implicit partial pivoting,
      ! and carries out the same operations on matrices BELOW,ABOVE and
      ! on the RHS vector B
      !
      ! Required parameters are the following
      ! Matrix BELOW(nvar,nvar)
      ! Matrix DIAG(nvar,nvar)
      ! Matrix ABOVE(nvar,nvar)
      ! Vector B(nvar)
      
      real(fltp), dimension(:,:) :: BELOW,DIAG,ABOVE ! (nvar,nvar)
      real(fltp), dimension(:) :: B ! (nvar)
      integer, intent(in) :: nvar,proc,k
      integer, intent(out) :: ierr

      integer :: i,j,ip,jp
      real(fltp) :: piv,dum,amax(nvar)

      ierr = 0

      ! First step, find largest element/row in DIAG (for implicit pivoting)

      do i=1,nvar
         amax(i) = 0.d0
         do j=1,nvar
            if(abs(DIAG(i,j)).gt.amax(i)) amax(i)=abs(DIAG(i,j))
         end do
      end do

      ! Second step, loop over all columns jp the pivoting algorithm

      do jp=1,nvar

      ! 1. Find pivot in column jp (jp = index of column to be pivoted)

      piv=0.d0
      ip = 0
      do i=jp,nvar
         dum = DIAG(i,jp)/amax(i)
         if (abs(dum).gt.piv) then
            piv=dabs(dum)
            ip = i           ! ip = index of row which holds pivot
         end if
      end do
      if(piv.eq.0.d0) then
         ierr = -1
         return
      end if

      ! 2. Swap row ip with row jp if ip.ne.jp, swap amax, and swap rows in 
      ! matrices to be stored s and b

      if (ip /= jp) then ! swap lines
         dum = amax(ip)
         amax(ip) = amax(jp)
         amax(jp) = dum
         dum = B(ip)
         B(ip) = B(jp)
         B(jp) = dum
         do j = 1, nvar
            dum = DIAG(ip,j)
            DIAG(ip,j) = DIAG(jp,j)
            DIAG(jp,j) = dum
            dum = ABOVE(ip,j)
            ABOVE(ip,j) = ABOVE(jp,j)
            ABOVE(jp,j) = dum
            dum = BELOW(ip,j)
            BELOW(ip,j) = BELOW(jp,j)
            BELOW(jp,j) = dum
         end do
      endif

      ! pivot is now placed at position (ip,jp)

      ip=jp

      ! 3. Normalise row with pivot

      piv=DIAG(ip,jp)
      if (piv.ne.1.d0) then
         do j=1,nvar
            DIAG(ip,j) = DIAG(ip,j)/piv
            ABOVE(ip,j) = ABOVE(ip,j)/piv
            BELOW(ip,j) = BELOW(ip,j)/piv
         end do
         B(ip)=B(ip)/piv
      end if

      ! 4. Eliminate all elements in the column but the pivot, 
      ! and keep track of the operations on the other columns and stored vectors
      ! L_i -> L_i - p_(i,jp) L_ip 

      do i= 1,nvar
         if (i.ne.jp) then
            if (DIAG(i,jp).ne.0.d0) then
               piv = DIAG(i,jp)
               do j = jp,nvar
                  DIAG(i,j) = DIAG(i,j) - piv*DIAG(ip,j)
               end do
               do j = 1,nvar
                  ABOVE(i,j) = ABOVE(i,j) - piv*ABOVE(ip,j)
                  BELOW(i,j) = BELOW(i,j) - piv*BELOW(ip,j)
               end do
               B(i) = B(i) - piv*B(ip)
            end if
         end if
      end do

      ! 5. End loop when all columns have been treated that way

      end do


   end subroutine P_pivot


   subroutine matmult(AMAT,BMAT,RESULT,maxi,NA,NB,ND,mmtype)
      integer :: maxi,ND
      double precision, dimension(maxi,maxi) :: AMAT,BMAT,RESULT
      integer, dimension(2) :: NA, NB
      character*2 :: mmtype

      integer :: imin,imax,jmin,jmax,kmin,kmax
      double precision :: res
      integer :: i,j,k

      ! This routine multiplies the matrices AMAT and BMAT 
      ! together and return the result in result. The 
      ! multiplication is optimized. The number of lines
      ! is ND. The number and positions of non-zero
      ! columns in the two matrices is taken into account.

      imin = 1
      imax = ND

      if(mmtype(1:1).eq.'a') then
         kmin = 1
         kmax = NA(1)
      else if(mmtype(1:1).eq.'b') then
         kmin = NA(1)-NA(2)+1
         kmax = NA(1)
      else
         stop 'bad mmtype(1:1)'
      endif

      if(mmtype(2:2).eq.'a') then
         jmin = 1
         jmax = NB(1)
      else if(mmtype(2:2).eq.'b') then
         jmin = NB(1)-NB(2)+1
         jmax = NB(1)
      else
         stop 'bad mmtype(2:2)'
      endif

      do j=1,maxi
         do i=1,maxi
            RESULT(i,j) = 0.d0
         enddo
      enddo

      do j=jmin,jmax
         do i=imin,imax
            res = 0.d0
            do k = kmin,kmax
               res = res + AMAT(i,k)*BMAT(k,j)
            enddo
            RESULT(i,j) = res
         enddo
      enddo

   end subroutine matmult


   subroutine vecmult(AMAT,BVEC,RESULT,maxi,NA,ND,vmtype)
      integer :: maxi,ND
      integer, dimension(2) :: NA
      double precision, dimension(maxi,maxi) :: AMAT 
      double precision, dimension(maxi) :: BVEC,RESULT
      character :: vmtype

      integer :: imin,imax,kmin,kmax,i,k
      double precision :: res

      ! Calculates the product between an AMAT matrix and a
      ! Bvec vector, and stores it in result. 

      ! ND is the number of lines in the multiplication. 
      ! Is either ii (all blocks
      ! except the first and last) or ka (first block) or kb 
      ! (last block)

      ! The multiplication is optimized. If the multiplying
      ! matrix is of "above" type then only the first NA
      ! columns are non-zero.
      ! If the multiplying matrix is of "below" type
      ! then NA has two indices, the first one is the number
      ! of lines/columns, the second one is the number
      ! of non-zero columns.

      !      write(50,*) ' - Starting vecmult - '

      imin = 1
      imax = ND

      if(vmtype.eq.'b') then
         kmin = NA(1)-NA(2)+1
         kmax = NA(1)
      else if(vmtype.eq.'a') then
         kmin = 1
         kmax = NA(1)
      else
         stop 'bad vmtype'
      endif

      do i=1,maxi
         RESULT(i)= 0.d0
      enddo

      do i=imin,imax
         res = 0.d0
         do k = kmin,kmax
            res = res + AMAT(i,k)*BVEC(k)
         enddo
         RESULT(i) = res
      enddo

   end subroutine vecmult



#ifdef DBLE
   end module trisolve_dble
#else
   end module trisolve_quad
#endif
