! ***********************************************************************
!
!   Copyright (C) 2012  Bill Paxton
!
!   MESA is free software; you can use it and/or modify
!   it under the combined terms and restrictions of the MESA MANIFESTO
!   and the GNU General Library Public License as published
!   by the Free Software Foundation; either version 2 of the License,
!   or (at your option) any later version.
!
!   You should have received a copy of the MESA MANIFESTO along with
!   this software; if not, it is available at the mesa website:
!   http://mesa.sourceforge.net/
!
!   MESA is distributed in the hope that it will be useful,
!   but WITHOUT ANY WARRANTY; without even the implied warranty of
!   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
!   See the GNU Library General Public License for more details.
!
!   You should have received a copy of the GNU Library General Public License
!   along with this software; if not, write to the Free Software
!   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
!
! ***********************************************************************


#ifdef DBLE
      module mod_newton_dble
#else
      module mod_newton_quad
#endif
      use const_def, only: dp, qp
      use utils_lib, only: 
     >   is_bad_num, is_bad_quad, set_int_pointer_2,
     >   set_pointer_1, set_pointer_2, set_pointer_4, 
     >   set_quad_pointer_1, set_quad_pointer_2, set_quad_pointer_4     
      use num_def
      use alert_lib, only: alert
      use mtx_def
      use mtx_lib
      
      implicit none

#ifdef DBLE
      integer, parameter :: fltp = dp
#else
      integer, parameter :: fltp = qp
#endif
      
#ifdef DBLE
#define set_ptr_1 set_pointer_1
#define set_ptr_2 set_pointer_2
#define set_ptr_4 set_pointer_4
#define is_bad is_bad_num
#else
#define set_ptr_1 set_quad_pointer_1
#define set_ptr_2 set_quad_pointer_2
#define set_ptr_4 set_quad_pointer_4
#define is_bad is_bad_quad
#endif
      
      
      
      contains


      subroutine do_newton_wrapper(
     >   nz, nvar, x, xold,
     >   matrix_type, mljac, mujac,
#ifdef DBLE
     >   decsol, decsolblk, decsols, 
#endif
     >   decsolblk_quad,
     >   lrd, rpar_decsol, lid, ipar_decsol, which_decsol,
     >   tol_correction_norm,
     >   set_primaries, set_secondaries, set_xscale, Bdomain, xdomain, eval_equations,
     >   size_equ, sizeb, inspectB,
     >   enter_setmatrix, exit_setmatrix, failed_in_setmatrix, force_another_iteration,
     >   xscale, equ, ldy, nsec, y, work, lwork, iwork, liwork, qwork, lqwork, AF,
     >   lrpar, rpar, lipar, ipar, convergence_failure, ierr)
         
         ! the primary variables
         integer, intent(in) :: nz ! number of zones
         integer, intent(in) :: nvar ! number of variables per zone
         ! the total number of primary variables is nvar*nz
         real(fltp), pointer, dimension(:,:) :: x ! (nvar, nz) 
         ! new vector of primaries
         real(fltp), pointer, dimension(:,:) :: xold ! (nvar, nz)
         ! old vector of primaries

         ! information about the jacobian matrix
         integer, intent(in) :: matrix_type ! see num_def.f for values
         ! if matrix_type == banded_matrix_type, mljac and mujac give the bandwidths.
         ! if matrix_type /= banded_matrix_type, mljac and mujac are not used.

         integer, intent(in) :: mljac ! number of subdiagonals within the band of the jacobian
         integer, intent(in) :: mujac ! number of superdiagonals
         ! for example if you have a centered 3 zone stencil, 
         ! then you will have mljac and mujac both equal to 2*nvar-1
         ! mljac and mujac are only used for matrix_type == banded matrix type
                  
         ! matrix routines
         ! there are implementations of the matrix routines available in mesa/mtx.
         ! for example, the LAPACK versions are called lapack_dec, lapack_sol, etc.
         interface
#ifdef DBLE
            include "mtx_decsol.dek"
            include "mtx_decsolblk_dble.dek"
            include "mtx_decsols.dek"
#endif
            include "mtx_decsolblk_quad.dek"
         end interface
         ! these arrays provide optional extra working storage for the matrix routines.
         ! the implementations in mesa/mtx include routines to determine the sizes.
         ! for example, the LAPACK version is called lapack_work_sizes.
         integer, intent(in) :: lrd, lid
         integer, intent(inout), pointer :: ipar_decsol(:) ! (lid)
         real(dp), intent(inout), pointer :: rpar_decsol(:) ! (lrd)
         integer, intent(in) :: which_decsol

         real(fltp), pointer, dimension(:,:) :: xscale ! (nvar, nz)
         ! typical values for x.  set by set_xscale.
         real(fltp), pointer, dimension(:,:) :: equ ! (nvar, nz)
         ! equ(i) has the residual for equation i, i.e., the difference between
         ! the left and right hand sides of the equation.

         ! the secondary variables
            ! the "secondaries" for zone k depend only on the primaries of zone k
            ! and therefore need not be recomputed is the zone k primaries have not been modified.
            ! using this information can significantly accelerate the computation of numerical jacobians.
            ! for stellar evolution, the secondaries include such expensive-to-compute items
            ! as equation of state, 
            ! nuclear reaction rates, and opacities.
         integer, intent(in) :: ldy ! leading dimension of y, >= nz
         integer, intent(in) :: nsec ! number of secondaries per zone
         real(fltp), pointer, dimension(:,:) :: y ! the values. (ldy, nsec)

         ! work arrays. required sizes provided by the routine newton_work_sizes.
         ! for standard use, set work and iwork to 0 before calling.
         ! NOTE: these arrays contain some optional parameter settings and outputs.
         ! see num_def for details.
         integer, intent(in) :: lwork, liwork, lqwork
         real(fltp), intent(inout), target :: work(:) ! (lwork)
         integer, intent(inout), target :: iwork(:) ! (liwork)
         real(qp), intent(inout), target :: qwork(:) ! (lqwork)
         real(fltp), pointer, dimension(:,:) :: AF ! for factored jacobian
            ! will be allocated or reallocated as necessary.  

         ! convergence criteria
         real(fltp), intent(in) :: tol_correction_norm
            ! a trial solution is considered to have converged if
            ! max_correction <= tol_max_correction and
            !
            ! either
            !          (correction_norm <= tol_correction_norm)  
            !    .and. (residual_norm <= tol_residual_norm)
            ! or
            !          (correction_norm*residual_norm <= tol_corr_resid_product)
            !    .and. (abs(slope) <= tol_abs_slope_min)
            !
            ! where "slope" is slope of the line for line search in the newton solver,
            ! and is analogous to the slope of df/dx in a 1D newton root finder.

         ! parameters for caller-supplied routines
         integer, intent(in) :: lrpar, lipar
         real(dp), intent(inout) :: rpar(:) ! (lrpar)
         integer, intent(inout) :: ipar(:) ! (lipar)
         
         ! output
         logical, intent(out) :: convergence_failure
         integer, intent(out) :: ierr ! 0 means okay.
         
         ! the following routines implement the problem-specific aspects of the newton solver.
         ! see num/include/newton_procs.dek for documentation.
         ! there are default implementations for most of the routines (see below).
         ! the only one without a default is the "eval_equations" routine that computes
         ! the equation residuals for your particular problem.
         interface
#ifdef DBLE
            include 'newton_procs_dble.dek' 
#else
            include 'newton_procs_quad.dek' 
#endif
         end interface
         
         integer :: ldAF, neqns
         real(fltp), pointer :: AF_copy(:,:) ! (ldAF, nvar*nz)
         
         ! for sparse
         integer :: isparse, n, nzmax, need_lrd, need_lid, lfil, maxits, iout
         real(fltp) :: eps, droptol
         integer, pointer :: sprs_ia(:), sprs_ja(:)
         real(fltp), pointer :: sprs_sa(:)
         logical :: sparse_flag
               
         real(fltp) :: sparse_nzmax_factor
         integer :: test_time0, test_time1, clock_rate
         logical :: do_test_timing
         
         include 'formats.dek'

         do_test_timing = (work(r_test_time) /= 0)
         work(r_test_time) = 0

         ierr = 0
         
         if (bad_sizes(x,nvar,nz,'x')) return
         if (bad_sizes(xold,nvar,nz,'xold')) return
         if (bad_sizes(xscale,nvar,nz,'xscale')) return
         if (bad_sizes(equ,nvar,nz,'equ')) return
         
         if (bad_sizes(y,ldy,nsec,'y')) return
         
         if (bad_isize(ipar_decsol,lid,'ipar_decsol')) return
         if (bad_size_dble(rpar_decsol,lrd,'rpar_decsol')) return
         
         if (bad_isize(iwork,liwork,'iwork')) return
         if (bad_size(work,lwork,'work')) return
         
         if (bad_isize(ipar,lipar,'ipar')) return
         if (bad_size_dble(rpar,lrpar,'rpar')) return
         
         sparse_flag = .false.

         nullify(sprs_ia)
         nullify(sprs_ja)
         nullify(sprs_sa)
         
         isparse = -1
         nzmax = 0
         if (which_decsol == lapack) then
            call lapack_work_sizes(n, need_lrd, need_lid)
         else if (which_decsol == block_thomas_dble) then
            call block_thomas_dble_work_sizes(nvar, nz, need_lrd, need_lid)
         else if (which_decsol == block_thomas_quad) then
            call block_thomas_quad_work_sizes(nvar, nz, need_lrd, need_lid)
         else if (which_decsol == block_thomas_klu) then
            call block_thomas_klu_work_sizes(nvar, nz, need_lrd, need_lid)
         else if (which_decsol == block_dc_mt_dble) then
            call block_dc_mt_dble_work_sizes(nvar, nz, need_lrd, need_lid)
         else if (which_decsol == block_dc_mt_quad) then
            call block_dc_mt_quad_work_sizes(nvar, nz, need_lrd, need_lid)
         else if (which_decsol == block_dc_mt_klu) then
            call block_dc_mt_klu_work_sizes(nvar, nz, need_lrd, need_lid)
         else if (which_decsol == star_split) then
            call star_split_work_sizes(nvar, nz, need_lrd, need_lid)
         else ! a sparse decsol
            n = nz*nvar
            if (work(r_sparse_non_zero_max_factor) == 0) then
               sparse_nzmax_factor = 1
            else
               sparse_nzmax_factor = max(1d-10, min(1d0, work(r_sparse_non_zero_max_factor)))
            end if
            
            if (matrix_type == square_matrix_type) then
               nzmax = n*n*sparse_nzmax_factor
            else
               nzmax = n*(mljac+mujac+1)*sparse_nzmax_factor
            end if
            
            allocate(sprs_ia(n+1), sprs_ja(nzmax), sprs_sa(nzmax), stat=ierr)
            if (ierr /= 0) return         
            if (which_decsol == mkl_pardiso) then
               isparse = mkl_pardiso_compressed_format
               call mkl_pardiso_work_sizes(n, nzmax, need_lrd, need_lid)
            else if (which_decsol == klu) then
               isparse = klu_compressed_format
               call klu_work_sizes(n, nzmax, need_lrd, need_lid)            
            else   
               write(*,*) 'newton: unknown value for matrix solver option'
               ierr = -1
               call dealloc
               return 
            end if
            sparse_flag = .true.
         end if
         
         if (need_lrd > lrd .or. need_lid > lid) then
            write(*,*) 'bad lrd or lid for newton'
            write(*,2) 'need_lrd', need_lrd
            write(*,2) '     lrd', lrd
            write(*,2) 'need_lid', need_lid
            write(*,2) '     lid', lid
            ierr = -1
            call dealloc
            return
         end if

         neqns = nvar*nz
         if (matrix_type == block_tridiag_dble_matrix_type .or.
     >       matrix_type == block_tridiag_quad_matrix_type) then
            ldAF = 3*nvar
         else if (matrix_type == square_matrix_type) then
            ldAF = neqns
         else
            ldAF = 2*mljac+mujac+1
         end if
         
         if (associated(AF)) then
            if (size(AF,dim=1)*size(AF,dim=2) < ldAF*neqns) then
               deallocate(AF)
               nullify(AF)
            end if
         end if
         
         if (.not. associated(AF)) then
            allocate(AF(ldAF+2,neqns+200), stat=ierr)
            if (ierr /= 0) then
               call dealloc
               return
            end if
         end if
         
         call set_ptr_2(AF_copy, AF, ldAF, neqns)

         if (do_test_timing) call system_clock(test_time0,clock_rate)            
         
         call do_newton(
     >      nz, nvar, x, xold, AF_copy, ldAF, neqns, 
     >      matrix_type, isparse, sparse_flag, mljac, mujac,
#ifdef DBLE
     >      decsol, decsolblk, decsols, 
#endif
     >      decsolblk_quad,
     >      lrd, rpar_decsol, lid, ipar_decsol,
     >      nzmax, sprs_ia, sprs_ja, sprs_sa,
     >      tol_correction_norm,
     >      set_primaries, set_secondaries, set_xscale, Bdomain, xdomain, eval_equations,
     >      size_equ, sizeb, inspectB,
     >      enter_setmatrix, exit_setmatrix, failed_in_setmatrix, force_another_iteration,
     >      xscale, equ, ldy, nsec, y, work, lwork, iwork, liwork, qwork, lqwork,
     >      lrpar, rpar, lipar, ipar, convergence_failure, ierr)
         
         call dealloc

         if (do_test_timing) then
            call system_clock(test_time1,clock_rate)
            work(r_test_time) = work(r_test_time) + dble(test_time1 - test_time0) / clock_rate
         end if
        
         
         contains
         
         
         subroutine dealloc
            if (associated(sprs_ia)) deallocate(sprs_ia)
            if (associated(sprs_ja)) deallocate(sprs_ja)
            if (associated(sprs_sa)) deallocate(sprs_sa)
         end subroutine dealloc
         
      
         logical function bad_isize(a,sz,str)
            integer :: a(:)
            integer, intent(in) :: sz
            character (len=*), intent(in) :: str
            bad_isize = (size(a,dim=1) < sz)
            if (.not. bad_isize) return
            ierr = -1
            call alert(ierr, 'interpolation: bad sizes for ' // trim(str))
            return
         end function bad_isize
         
      
         logical function bad_size(a,sz,str)
            real(fltp) :: a(:)
            integer, intent(in) :: sz
            character (len=*), intent(in) :: str
            bad_size = (size(a,dim=1) < sz)
            if (.not. bad_size) return
            ierr = -1
            call alert(ierr, 'interpolation: bad sizes for ' // trim(str))
            return
         end function bad_size
         
      
         logical function bad_size_dble(a,sz,str)
            real(dp) :: a(:)
            integer, intent(in) :: sz
            character (len=*), intent(in) :: str
            bad_size_dble = (size(a,dim=1) < sz)
            if (.not. bad_size_dble) return
            ierr = -1
            call alert(ierr, 'interpolation: bad sizes for ' // trim(str))
            return
         end function bad_size_dble
         
      
         logical function bad_sizes(a,sz1,sz2,str)
            real(fltp) :: a(:,:)
            integer, intent(in) :: sz1,sz2
            character (len=*), intent(in) :: str
            bad_sizes = (size(a,dim=1) < sz1 .or. size(a,dim=2) < sz2)
            if (.not. bad_sizes) return
            ierr = -1
            call alert(ierr, 'interpolation: bad sizes for ' // trim(str))
            return
         end function bad_sizes
         
         
      end subroutine do_newton_wrapper


      subroutine do_newton(
     >   nz, nvar, x, xold, AF, ldAF, neq,
     >   matrix_type, isparse, sparse_flag, mljac, mujac, 
#ifdef DBLE
     >   decsol, decsolblk, decsols, 
#endif
     >   decsolblk_quad,
     >   lrd, rpar_decsol, lid, ipar_decsol,
     >   sprs_nzmax, sprs_ia, sprs_ja, sprs_sa,
     >   tol_correction_norm,
     >   set_primaries, set_secondaries, set_xscale, Bdomain, xdomain, eval_equations,
     >   size_equ, sizeb, inspectB,
     >   enter_setmatrix, exit_setmatrix, failed_in_setmatrix, force_another_iteration,
     >   xscale, equ, ldy, nsec, y, work, lwork, iwork, liwork, qwork, lqwork, 
     >   lrpar, rpar, lipar, ipar, convergence_failure, ierr)

         use mtx_lib    

         integer, intent(in) :: nz, nvar, mljac, mujac, ldy, nsec, isparse, ldAF, neq
         
         integer, intent(in) :: matrix_type
         logical, intent(in) :: sparse_flag

         real(fltp), pointer, dimension(:,:) :: x, xold, equ, xscale         
         real(fltp), pointer, dimension(:,:) :: AF ! (ldAF, neq), neq = nvar*nz
         real(fltp), pointer, dimension(:,:) :: y ! the values. (ldy, nsec)
            ! the values.  set by set_secondaries.
                  
         ! matrix routines
         interface
#ifdef DBLE
            include "mtx_decsol.dek"
            include "mtx_decsolblk_dble.dek"
            include "mtx_decsols.dek"
#endif
            include "mtx_decsolblk_quad.dek"
         end interface
         integer, intent(in) :: lrd, lid
         integer, intent(inout), pointer :: ipar_decsol(:) ! (lid)
         real(dp), intent(inout), pointer :: rpar_decsol(:) ! (lrd)

         integer, intent(in) :: sprs_nzmax
         integer, pointer :: sprs_ia(:), sprs_ja(:)
         real(fltp), pointer :: sprs_sa(:)

         ! controls         
         real(fltp), intent(in) :: tol_correction_norm

         ! parameters for caller-supplied routines
         integer, intent(in) :: lrpar, lipar
         real(dp), intent(inout) :: rpar(:) ! (lrpar)
         integer, intent(inout) :: ipar(:) ! (lipar)

         ! work arrays
         integer, intent(in) :: lwork, liwork, lqwork
         real(fltp), intent(inout), target :: work(:) ! (lwork)
         integer, intent(inout), target :: iwork(:) ! (liwork)
         real(qp), intent(inout), target :: qwork(:) ! (lqwork)

         ! output
         logical, intent(out) :: convergence_failure
         integer, intent(out) :: ierr
         
         ! procedures
         interface
#ifdef DBLE
            include 'newton_procs_dble.dek' 
#else
            include 'newton_procs_quad.dek' 
#endif
         end interface

         ! info saved in work and iwork
         real(fltp), dimension(:,:), pointer :: A, Acopy
         real(fltp), dimension(:,:), pointer :: xsave, dxsave, B, grad_f, B_init, wrk_for_refine
         real(fltp), dimension(:), pointer :: B1, B_init1, wrk_for_refine1
         real(fltp), dimension(:,:), pointer ::  rhs
         integer, dimension(:), pointer :: ipiv
         real(fltp), dimension(:,:), pointer :: dx, xgg, dxd, dxdd, xder, equsave
         real(fltp), dimension(:,:), pointer :: y1, y2
         
         real(fltp), dimension(:,:,:), pointer :: lblk, dblk, ublk
         real(fltp), dimension(:,:,:), pointer :: lblkF, dblkF, ublkF
         real(qp), dimension(:,:,:), pointer :: lblkF_quad, dblkF_quad, ublkF_quad
         real(qp), dimension(:,:), pointer ::B_quad
         integer, dimension(:,:), pointer :: ipiv_blk
         
         ! locals
         real(fltp)  :: 
     >      coeff, f, slope, residual_norm, max_residual, corr_norm_min, resid_norm_min, correction_factor,
     >      residual_norm_save, corr_norm_min_save, resid_norm_min_save, correction_factor_save,
     >      correction_norm, corr_norm_initial, max_correction, slope_extra,
     >      tol_max_correction, tol_residual_norm, tol_abs_slope_min, tol_corr_resid_product,
     >      corr_coeff_limit, tol_max_residual, max_corr_min, max_resid_min, 
     >      D_norm_kappa, D_norm, prev_D_norm, D_norm_theta, D_norm_err_est
         integer :: iiter, max_tries, ndiag, zone, idiag, tiny_corr_cnt, ldA, i, j, k, info,
     >      last_jac_iter, max_iterations_for_jacobian, lsvar_lo, lsvar_hi, force_iter_value,
     >      max_iter_for_enforce_resid_tol, max_iter_for_resid_tol2, max_iter_for_resid_tol3,
     >      test_time0, test_time1, time0, time1, clock_rate, caller_id, min_for_check_D_norm_converging
         character (len=256) :: err_msg
         logical :: first_try, dbg_msg, passed_tol_tests, use_D_norm,
     >      overlay_AF, do_mtx_timing, do_test_timing, have_called_decsols, doing_extra
         integer, parameter :: num_tol_msgs = 15
         character (len=32) :: tol_msg(num_tol_msgs)
         character (len=64) :: message
         real(fltp), pointer, dimension(:) :: p1_1, p1_2
         
         include 'formats.dek'
               
         
         do_mtx_timing = (work(r_mtx_time) /= 0)
         work(r_mtx_time) = 0

         tol_msg(1)  = 'avg corr'
         tol_msg(2)  = 'max corr '
         tol_msg(3)  = 'avg+max corr'
         tol_msg(4)  = 'avg resid'
         tol_msg(5)  = 'avg corr+resid'
         tol_msg(6)  = 'max corr, avg resid'
         tol_msg(7)  = 'avg+max corr, avg resid'
         tol_msg(8)  = 'max resid'
         tol_msg(9)  = 'avg corr, max resid'
         tol_msg(10) = 'max corr+resid'
         tol_msg(11) = 'avg+max corr, max resid'
         tol_msg(12) = 'avg+max resid'
         tol_msg(13) = 'avg corr, avg+max resid'
         tol_msg(14) = 'max corr, avg+max resid'
         tol_msg(15) = 'avg+max corr+resid'
 
         ierr = 0
         have_called_decsols = .false.
         iiter = 0

         call set_param_defaults
         dbg_msg = (iwork(i_debug) /= 0)
         tol_residual_norm = work(r_tol_residual_norm)
         tol_max_residual = work(r_tol_max_residual)
         tol_max_correction = work(r_tol_max_correction)
         tol_abs_slope_min = work(r_tol_abs_slope_min)
         tol_corr_resid_product = work(r_tol_corr_resid_product)
         corr_coeff_limit = work(r_corr_coeff_limit)
         
         max_iter_for_enforce_resid_tol = iwork(i_max_iter_for_enforce_resid_tol)
         max_iter_for_resid_tol2 = iwork(i_max_iter_for_resid_tol2)
         max_iter_for_resid_tol3 = iwork(i_max_iter_for_resid_tol3)
         
         min_for_check_D_norm_converging = iwork(i_min_for_check_D_norm_converging)
         caller_id = iwork(i_caller_id)
         D_norm_kappa = work(r_D_norm_kappa)
         use_D_norm = (D_norm_kappa > 0d0)
         D_norm_theta = -1
         
         if (ldy < nz .and. nsec > 0) then
            ierr = -1
            call alert(ierr, 'value of ldy must be >= nz for newton')
            return
         end if

         if (matrix_type == block_tridiag_dble_matrix_type .or. 
     >       matrix_type == block_tridiag_quad_matrix_type) then
            ndiag = 3*nvar
         else if (matrix_type == square_matrix_type) then
            ndiag = neq
         else
            idiag = mujac+1
            ndiag = mljac+mujac+1
         end if
            
         ldA = ndiag
         call pointers(ierr)
         if (ierr /= 0) return

         if (iwork(i_do_core_dump) /= 0) then
            call newton_core_dump(x, dx, xold)
            return
         end if
      
         doing_extra = .false.
         passed_tol_tests = .false. ! goes true when pass the tests
         convergence_failure = .false. ! goes true when time to give up
         coeff = 1.
         xscale = 1.
  
         residual_norm=0
         max_residual=0
         corr_norm_min=1d99
         max_corr_min=1d99
         max_resid_min=1d99
         resid_norm_min=1d99
         correction_factor=0
         D_norm_err_est = 0
         
         
         do k=1,nz
            do i=1,nvar
               dx(i,k) = x(i,k) - xold(i,k)
            end do
         end do
         
         call xdomain(iiter, nvar, nz, x, dx, xold, lrpar, rpar, lipar, ipar, ierr)
         if (ierr /= 0) then
            if (dbg_msg)
     >         write(*, *) 'newton failure: xdomain returned ierr', ierr
            convergence_failure = .true.
            return
         end if
         call set_xscale(nvar, nz, xold, xscale, lrpar, rpar, lipar, ipar, ierr) ! set xscale
         if (ierr /= 0) then
            if (dbg_msg)
     >         write(*, *) 'newton failure: set_xscale returned ierr', ierr
            convergence_failure = .true.
            return
         end if
         call setequ(nvar, nz, x, equ, lrpar, rpar, lipar, ipar, ierr)
         if (ierr /= 0) then
            if (dbg_msg)
     >         write(*, *) 'newton failure: setequ returned ierr', ierr
            convergence_failure = .true.
            return
         end if
         call size_equ(
     >      iiter, nvar, nz, equ, residual_norm, max_residual, lrpar, rpar, lipar, ipar, ierr)
         if (ierr /= 0) then
            if (dbg_msg)
     >         write(*, *) 'newton failure: size_equ returned ierr', ierr
            convergence_failure = .true.
            return
         end if

         first_try = .true.
         iiter = 1
         max_tries = abs(iwork(i_max_tries))
         last_jac_iter = 0
         tiny_corr_cnt = 0
         D_norm = 0
         
         if (iwork(i_lsvar_lo) == 0) then
            lsvar_lo = 1
         else
            lsvar_lo = iwork(i_lsvar_lo)
         end if
         
         if (iwork(i_lsvar_hi) == 0) then
            lsvar_hi = nvar
         else
            lsvar_hi = iwork(i_lsvar_hi)
         end if
         
         if (iwork(i_max_iterations_for_jacobian) == 0) then
            max_iterations_for_jacobian = 1000000
         else
            max_iterations_for_jacobian = iwork(i_max_iterations_for_jacobian)
         end if

         do while (.not. passed_tol_tests)
            
            if (dbg_msg .and. first_try) write(*, *)
                  
            if (iiter >= max_iter_for_enforce_resid_tol) then
               if (iiter >= max_iter_for_resid_tol2) then
                  if (iiter >= max_iter_for_resid_tol3) then ! shut down
                     tol_residual_norm = 1d200
                     tol_max_residual = 1d200
                  else ! >= max_iter_for_resid_tol2 and but < max_iter_for_resid_tol3
                     tol_residual_norm = work(r_tol_residual_norm3)
                     tol_max_residual = work(r_tol_max_residual3)
                  end if
               else ! >= max_iter_for_enforce_resid_tol but < max_iter_for_resid_tol2
                  tol_residual_norm = work(r_tol_residual_norm2)
                  tol_max_residual = work(r_tol_max_residual2)
               end if
            end if

#ifdef DBLE
            overlay_AF = (corr_coeff_limit == 1) .and. 
     >         (iwork(i_refine_mtx_solution) == 0) .and.            
     >            (matrix_type == banded_matrix_type .or.            
     >               matrix_type == block_tridiag_dble_matrix_type)
#else
            overlay_AF = (corr_coeff_limit == 1) .and. 
     >         (iwork(i_refine_mtx_solution) == 0) .and.            
     >            (matrix_type == block_tridiag_quad_matrix_type)
            overlay_AF = .false. ! NEEDS DEBUGGING
#endif

            ! NOTE: for banded matrix, the jacobian A is a part of the array AF
            ! AF has extra rows for storing banded LU factored matrix.
            if (overlay_AF) then
               A => AF
               ldA = ldAF
               if (matrix_type == banded_matrix_type) then
                  idiag = mljac+mujac+1
               else if (matrix_type == block_tridiag_dble_matrix_type) then
                  ublk => ublkF
                  dblk => dblkF
                  lblk => lblkF
#ifdef DBLE
#else
               else if (matrix_type == block_tridiag_quad_matrix_type .or.
     >                  matrix_type == block_tridiag_dble_matrix_type) then
                  ublk => ublkF_quad
                  dblk => dblkF_quad
                  lblk => lblkF_quad
#endif
               else
                  stop 'confusion about matrix_type'
               end if
            else
               A => Acopy
               ldA = ndiag
               idiag = mujac+1
            end if

            call setmatrix(neq, x, dx, xscale, xsave, dxsave, lrpar, rpar, lipar, ipar, ierr)
            if (ierr /= 0) then
               if (any(dx /= 0)) then
                  call write_msg('setmatrix returned ierr /= 0; retry with dx = 0.')
                  x = xold; dx = 0
                  iiter=iiter+1
                  first_try = .false.
                  cycle
               end if
               call write_msg('setmatrix returned ierr /= 0; dx already = 0, so give up.')
               convergence_failure = .true.; exit
            end if
            iwork(i_num_jacobians) = iwork(i_num_jacobians) + 1
            last_jac_iter = iiter
            
            if (.not. solve_equ()) then ! either singular or horribly ill-conditioned
               write(err_msg, '(a, i5, 3x, a)') 'info', ierr, 'bad_matrix'
               call oops(err_msg)
               exit
            end if
            iwork(i_num_solves) = iwork(i_num_solves) + 1

            ! inform caller about the correction
            call inspectB(iiter, nvar, nz, x, B, xscale, lrpar, rpar, lipar, ipar, ierr)
            if (ierr /= 0) then
               call oops('inspectB returned ierr')
               exit
            end if

            ! compute size of scaled correction B
            call sizeB(iiter, nvar, nz, x, B, xscale, max_correction, correction_norm, 
     >               lrpar, rpar, lipar, ipar, ierr)
            if (ierr /= 0) then
               call oops('correction rejected by sizeB')
               exit
            end if
            correction_norm = abs(correction_norm)
            max_correction = abs(max_correction)
            corr_norm_min = min(correction_norm, corr_norm_min)
            max_corr_min = min(max_correction, max_corr_min)
            if (use_D_norm) then
               prev_D_norm = D_norm
               D_norm = correction_norm
            end if

            if (is_bad(correction_norm) .or. is_bad(max_correction)) then 
               ! bad news -- bogus correction
               call oops('bad result from sizeb -- correction info either NaN or Inf')
               exit
            end if

            if ((.not. use_D_norm) .and.
     >            (correction_norm > work(r_corr_param_factor)*work(r_scale_correction_norm)) .and.
     >            (iwork(i_try_really_hard) == 0)) then
               call oops('avg corr too large')
               exit
            endif
         
            ! shrink the correction if it is too large
            correction_factor = 1
            
            if (correction_norm*correction_factor > work(r_scale_correction_norm)) then
               correction_factor = work(r_scale_correction_norm)/correction_norm
            end if
            
            if (max_correction*correction_factor > work(r_scale_max_correction)) then
               correction_factor = work(r_scale_max_correction)/max_correction
            end if
            
            ! fix B if out of definition domain
            call Bdomain(
     >         iiter, nvar, nz, B, x, xscale, correction_factor, lrpar, rpar, lipar, ipar, ierr)
            if (ierr /= 0) then ! correction cannot be fixed
               call oops('correction rejected by Bdomain')
               exit
            end if

            ! save previous
            residual_norm_save = residual_norm
            corr_norm_min_save = corr_norm_min
            resid_norm_min_save = resid_norm_min
            correction_factor_save = correction_factor
            
            !write(*,*) 'corr_coeff_limit', corr_coeff_limit

            if (corr_coeff_limit < 1) then
               ! compute gradient of f = equ<dot>jacobian
               ! NOTE: NOT jacobian<dot>equ
#ifdef DBLE
               if (matrix_type == block_tridiag_dble_matrix_type .or.
     >             matrix_type == block_tridiag_quad_matrix_type) then
                  call block_multiply_xa(nvar, nz, lblk, dblk, ublk, equ, grad_f)
               else if (matrix_type == square_matrix_type) then
                  call set_ptr_1(p1_1, equ, nvar*nz)
                  call set_ptr_1(p1_2, grad_f, nvar*nz)
                  call multiply_xa(neq, A, p1_1, p1_2)
               else
                  call set_ptr_1(p1_1, equ, neq)
                  call set_ptr_1(p1_2, grad_f, neq)
                  call band_multiply_xa(neq, mljac, mujac, A, ldA, p1_1, p1_2)
               end if
#else
               call quad_block_multiply_xa(nvar, nz, lblk, dblk, ublk, equ, grad_f)
#endif
            
               slope = eval_slope(nvar, nz, grad_f, B)
               !write(*,*) 'slope', slope
               !if (is_bad(slope)) then
               !   call oops('bad slope value')
               !   exit
               !end if
               if (is_bad(slope) .or. slope > 0) then ! a bad sign
                  ! but give it a chance before give up
                  !write(*,*) 'slope', slope
                  slope = 0
                  corr_coeff_limit = 1
               end if
               
            else
            
               slope = 0

            end if
      
            call set_ptr_1(p1_1, grad_f, neq)
            call adjust_correction(
     >         corr_coeff_limit, p1_1, f, slope, correction_factor, coeff,
     >         err_msg, lrpar, rpar, lipar, ipar, ierr)
            if (ierr /= 0) then
               call oops(err_msg)
               exit
            end if
            
            ! coeff is factor by which adjust_correction rescaled the correction vector
            if (coeff > work(r_tiny_corr_factor)*corr_coeff_limit) then
               tiny_corr_cnt = 0
            else
               tiny_corr_cnt = tiny_corr_cnt + 1
            end if

            ! check the residuals for the equations
            call size_equ(iiter, nvar, nz, equ, residual_norm, max_residual, lrpar, rpar, lipar, ipar, ierr)
            if (ierr /= 0) then
               call oops('size_equ returned ierr')
               exit
            end if
            if (is_bad(residual_norm)) then
               call oops('residual_norm is a a bad number (NaN or Infinity)')
               exit
            end if
            if (is_bad(max_residual)) then
               call oops('max_residual is a a bad number (NaN or Infinity)')
               exit
            end if
            residual_norm = abs(residual_norm)
            max_residual = abs(max_residual)
            resid_norm_min = min(residual_norm, resid_norm_min)
            max_resid_min = min(max_residual, max_resid_min)
            
            if (use_D_norm) then ! see Hairer & Wanner IV.8, pg 120.
               if (iiter == 1) then
                  passed_tol_tests = .false.
               else
                  D_norm_theta = D_norm/prev_D_norm
                  if (iiter >= min_for_check_D_norm_converging) then 
                     ! estimate error if use all the remaining iterations
                     if (D_norm_theta >= 1d0) then
                        call oops('not converging')
                        exit
                     end if
                     D_norm_err_est = D_norm*D_norm_theta**(max_tries-iiter)/(1-D_norm_theta)
                     if (D_norm_err_est > D_norm_kappa) then
                        call oops('not converging fast enough')
                        exit
                     end if
                  end if
                  ! estimate error if stop now
                  if (D_norm_theta < 1) then
                     D_norm_err_est = D_norm*D_norm_theta/(1-D_norm_theta)
                     if (.false.) then
                        write(*,*) 'D_norm_theta', iiter, D_norm_theta
                        write(*,*) 'prev_D_norm', iiter, prev_D_norm
                        write(*,*) 'D_norm', iiter, D_norm
                        write(*,*) 'D_norm_err_est', iiter, D_norm_err_est
                        write(*,*)
                     end if
                  else
                     D_norm_err_est = 1d2
                  end if
                  work(r_D_norm_err_est) = D_norm_err_est
                  passed_tol_tests = (D_norm_err_est <= D_norm_kappa) .and. 
     >                  (D_norm < 1 .or. iiter >= 3)
               end if
            else if (max_correction > tol_max_correction*coeff .or. max_residual > tol_max_residual*coeff) then
               passed_tol_tests = .false.
            else
               passed_tol_tests =
     >               (correction_norm <= tol_correction_norm*coeff .and. 
     >                residual_norm <= tol_residual_norm*coeff)
     >          .or.      
     >               (abs(slope) <= tol_abs_slope_min .and. 
     >                correction_norm*residual_norm <= tol_corr_resid_product*coeff*coeff)
            end if
            
            if (.not. passed_tol_tests) then
               if (iiter >= max_tries) then
                  if (dbg_msg) then
                     call get_message
                     message = trim(message) // ' -- give up'
                     call write_msg(message)
                  end if
                  convergence_failure = .true.; exit
               else if (iwork(i_try_really_hard) == 0) then
                  if (coeff < corr_coeff_limit) then
                     call oops('coeff < corr_coeff_limit')
                     exit
                  else if (use_D_norm) then ! nothing to do here
                  else if (correction_norm > tol_correction_norm*coeff
     >                  .and. (correction_norm > work(r_corr_norm_jump_limit)*corr_norm_min)
     >                  .and. (.not. first_try)) then
                     call oops('avg corrrection jumped')
                     exit
                  else if (residual_norm > tol_residual_norm*coeff
     >                  .and. (residual_norm > work(r_resid_norm_jump_limit)*resid_norm_min)
     >                  .and. (.not. first_try)) then
                     call oops('avg residual jumped')
                     exit
                  else if (max_correction > tol_max_correction*coeff
     >                  .and. (max_correction > work(r_max_corr_jump_limit)*max_corr_min)
     >                  .and. (.not. first_try)) then
                     call oops('max corrrection jumped')
                     exit
                  else if (residual_norm > tol_residual_norm*coeff
     >                  .and. (max_residual > work(r_max_resid_jump_limit)*max_resid_min)
     >                  .and. (.not. first_try)) then
                     call oops('max residual jumped')
                     exit
                  else if (tiny_corr_cnt >= iwork(i_tiny_corr_coeff_limit)
     >                  .and. corr_coeff_limit < 1) then
                     call oops('tiny corrections')
                     exit
                  end if
               end if
            end if
            
            if (dbg_msg) then
               if (.not. passed_tol_tests) then
                  call get_message
                  call write_msg(message)
               else if (iiter < iwork(i_itermin)) then     
                  call write_msg('iiter < itermin')
               else if ((.not. doing_extra) .and.
     >                  (iiter+1 < max_tries) .and. 
     >                  (iwork(i_refine_solution) /= 0) .and.
     >                  correction_norm > tol_correction_norm/10) then 
                  call write_msg('okay, but do one extra with same jacobian')
                  passed_tol_tests = .false.
                  doing_extra = .true.
               else
                  call write_msg('okay!')
               end if
            end if
            
            if (passed_tol_tests .and. (iiter+1 < max_tries)) then 
               ! about to declare victory... but may want to do another iteration
               force_iter_value = force_another_iteration(
     >                              iiter, iwork(i_itermin), lrpar, rpar, lipar, ipar)
               if (force_iter_value > 0) then
                  passed_tol_tests = .false. ! force another
                  tiny_corr_cnt = 0 ! reset the counter
                  corr_norm_min = 1d99
                  resid_norm_min = 1d99
                  max_corr_min = 1d99
                  max_resid_min = 1d99
               else if (force_iter_value < 0) then ! failure
                  call oops('force iter')
                  convergence_failure = .true.
                  exit
               end if
            end if

            iiter=iiter+1
            first_try = .false.

         end do
            
         call dealloc
         

         contains
         
         
         subroutine dealloc
#ifdef DBLE
            if (have_called_decsols) then ! deallocate
               call decsols(
     >            2, nvar*nz, sprs_nzmax, sprs_ia, sprs_ja, sprs_sa, b, 
     >            lrd, rpar_decsol, lid, ipar_decsol, info)
            end if
#endif
         end subroutine dealloc
         
         
         subroutine get_message
            include 'formats.dek'
            i = 0
            if (use_D_norm) then
               message = 'D_norm'
               return
            end if
            if (correction_norm > tol_correction_norm*coeff) i = i+1
            if (max_correction > tol_max_correction*coeff) i = i+2
            if (residual_norm > tol_residual_norm*coeff) i = i+4
            if (max_residual > tol_max_residual*coeff) i = i+8
            if (i == 0) then
               message = 'out of tries'
            else
               message = tol_msg(i)
            end if
         end subroutine get_message

         
         subroutine set_param_defaults
         
            if (iwork(i_itermin) == 0) iwork(i_itermin) = 2
            if (iwork(i_max_tries) == 0) iwork(i_max_tries) = 50
            if (iwork(i_tiny_corr_coeff_limit) == 0) iwork(i_tiny_corr_coeff_limit) = 25
            
            if (work(r_tol_residual_norm)==0) work(r_tol_residual_norm)=1d99
            if (work(r_tol_max_residual)==0) work(r_tol_max_residual)=1d99
            if (work(r_tol_max_correction)==0) work(r_tol_max_correction)=1d99
            if (work(r_target_corr_factor) == 0) work(r_target_corr_factor) = 0.9d0
            if (work(r_scale_correction_norm) == 0) work(r_scale_correction_norm) = 2d0
            if (work(r_corr_param_factor) == 0) work(r_corr_param_factor) = 10d0
            if (work(r_scale_max_correction) == 0) work(r_scale_max_correction) = 1d99
            if (work(r_corr_norm_jump_limit) == 0) work(r_corr_norm_jump_limit) = 1d99
            if (work(r_max_corr_jump_limit) == 0) work(r_max_corr_jump_limit) = 1d99
            if (work(r_resid_norm_jump_limit) == 0) work(r_resid_norm_jump_limit) = 1d99
            if (work(r_max_resid_jump_limit) == 0) work(r_max_resid_jump_limit) = 1d99
            if (work(r_corr_coeff_limit) == 0) work(r_corr_coeff_limit) = 1d-3
            if (work(r_slope_alert_level) == 0) work(r_slope_alert_level) = 1d0
            if (work(r_slope_crisis_level) == 0) work(r_slope_crisis_level) = 1d0
            if (work(r_tiny_corr_factor) == 0) work(r_tiny_corr_factor) = 2d0

         end subroutine set_param_defaults
         
         
         subroutine oops(msg)
            character (len=*), intent(in) :: msg
            character (len=256) :: full_msg
            full_msg = trim(msg) // ' -- give up'
            call write_msg(full_msg)
            convergence_failure = .true.
         end subroutine oops

      
         subroutine setequ(nvar, nz, x, equ, lrpar, rpar, lipar, ipar, ierr)
            integer, intent(in) :: nvar, nz
            real(fltp), pointer :: x(:,:) ! (nvar, nz)
            real(fltp), pointer :: equ(:,:) ! (nvar, nz)
            integer, intent(in) :: lrpar, lipar
            real(dp), intent(inout) :: rpar(:) ! (lrpar)
            integer, intent(inout) :: ipar(:) ! (lipar)
            integer, intent(out) :: ierr
            call set_primaries(nvar, nz, x, lrpar, rpar, lipar, ipar, ierr); if (ierr /= 0) return
            call set_secondaries(0, lrpar, rpar, lipar, ipar, ierr); if (ierr /= 0) return
            call eval_equations(iiter, nvar, nz, x, xscale, equ, lrpar, rpar, lipar, ipar, ierr)
            if (ierr /= 0) return
         end subroutine setequ


         subroutine adjust_correction(
     >         actual_corr_coeff_limit, grad_f, f, slope, correction_factor, coeff, 
     >         err_msg, lrpar, rpar, lipar, ipar, ierr)
            real(fltp), intent(in) :: actual_corr_coeff_limit ! make adjust coeff >= corr_coeff_limit
            real(fltp), intent(in) :: correction_factor
            real(fltp), intent(in) :: grad_f(:) ! (neq) ! gradient df/dx at xold
            real(fltp), intent(out) :: f ! 1/2 fvec^2. minimize this.
            real(fltp), intent(in) :: slope 
            real(fltp), intent(out) :: coeff 
            ! returns coeff in range corr_coeff_limit to correction_factor
            ! the new correction is coeff*xscale*B and, if all goes well,
            ! the new x will give an improvement in f
            character (len=*), intent(out) :: err_msg
            integer, intent(in) :: lrpar, lipar
            real(dp), intent(inout) :: rpar(:) ! (lrpar)
            integer, intent(inout) :: ipar(:) ! (lipar)
            integer, intent(out) :: ierr
      
            integer :: i, j, k, iter, k_max_corr, i_max_corr
            character (len=256) :: message
            logical :: first_time
            real(fltp) :: a1, alam, alam2, alamin, a2, disc, f2,
     >         rhs1, rhs2, temp, test, tmplam, max_corr, fold, corr_coeff_limit
            real(fltp) :: coeff_max, frac, f_target
     
            real(fltp), parameter :: alf = 1d-2 ! ensures sufficient decrease in f

            real(fltp), parameter :: alam_factor = 0.2d0
            
            include 'formats.dek'
         
            ierr = 0                  
            coeff = 0

            if (actual_corr_coeff_limit == 1) then
               f = 0
            else
               do k=1,nz
                  do i=1,nvar
                     xsave(i,k) = x(i,k)
                     dxsave(i,k) = dx(i,k)
                  end do
               end do
               f = eval_f(nvar,nz,equ)
               if (is_bad(f)) then
                  ierr = -1
                  write(err_msg,*) 'adjust_correction failed in eval_f'
                  if (dbg_msg) write(*,*) 'adjust_correction: eval_f(nvar,nz,equ)', eval_f(nvar,nz,equ)
                  return
               end if
            end if
            fold = f
            
            corr_coeff_limit = actual_corr_coeff_limit
            alam = correction_factor
            coeff_max = max(corr_coeff_limit, alam)
            first_time = .true.
            f2 = 0
            alam2 = 0

         search_loop: do iter = 1, 1000
            
               coeff = max(corr_coeff_limit, alam) 
               call apply_coeff(nvar, nz, x, xsave, B, xscale, coeff, actual_corr_coeff_limit) ! changes x
               do k=1,nz
                  do i=1,nvar
                     dx(i,k) = x(i,k) - xold(i,k)
                  end do
               end do
               call xdomain(iiter, nvar, nz, x, dx, xold, lrpar, rpar, lipar, ipar, ierr)
               if (ierr /= 0) then
                  write(err_msg,*) 'adjust_correction failed in xdomain'
                  if (dbg_msg) write(*,*) 'adjust_correction failed in xdomain: alam', alam
                  corr_coeff_limit = min(corr_coeff_limit,actual_corr_coeff_limit)
                  if (alam <= corr_coeff_limit) return
                  ierr = 0
                  alam = max(alam*alam_factor, corr_coeff_limit)
                  cycle
               end if
               
               call setequ(nvar, nz, x, equ, lrpar, rpar, lipar, ipar, ierr)
               if (ierr /= 0) then
                  corr_coeff_limit = min(corr_coeff_limit,actual_corr_coeff_limit)
                  if (alam > corr_coeff_limit) then
                     alam = max(alam/10, corr_coeff_limit)
                     ierr = 0
                     cycle
                  end if
                  ierr = -1
                  write(err_msg,*) 'adjust_correction failed in setequ'
                  if (dbg_msg) write(*,*) 'adjust_correction: setequ returned ierr', ierr
                  exit search_loop
               end if
               
               if (actual_corr_coeff_limit == 1) return
            
               f = eval_f(nvar,nz,equ)
               if (is_bad(f)) then
                  corr_coeff_limit = min(corr_coeff_limit,actual_corr_coeff_limit)
                  if (alam > corr_coeff_limit) then
                     alam = max(alam/10, corr_coeff_limit)
                     ierr = 0
                     cycle
                  end if
                  err_msg = 'equ norm is NaN or other bad num'
                  ierr = -1
                  exit search_loop
               end if
               
               f_target = max(fold/2, fold + alf*coeff*slope)
               if (f <= f_target) then
                  return ! sufficient decrease in f
               end if

               if (alam <= corr_coeff_limit) then
                  return ! time to give up
               end if

               ! reduce alam and try again
               if (first_time) then
                  tmplam = -slope/(2*(f-fold-slope))
                  first_time = .false.
               else ! have two prior f values to work with
                  rhs1 = f - fold - alam*slope
                  rhs2 = f2 - fold - alam2*slope
                  a1 = (rhs1/alam**2 - rhs2/alam2**2)/(alam - alam2)
                  a2 = (-alam2*rhs1/alam**2 + alam*rhs2/alam2**2)/(alam - alam2)
                  if (a1 == 0) then
                     tmplam = -slope/(2*a2)
                  else
                     disc = a2*a2-3*a1*slope
                     if (disc < 0) then
                        tmplam = alam*alam_factor
                     else if (a2 <= 0) then
                        tmplam = (-a2+sqrt(disc))/(3*a1)
                     else
                        tmplam = -slope/(a2+sqrt(disc))
                     end if
                  end if
                  if (tmplam > alam*alam_factor) tmplam = alam*alam_factor
               end if
            
               alam2 = alam
               f2 = f
               alam = max(tmplam, alam*alam_factor, corr_coeff_limit)
     
            end do search_loop

            do k=1,nz
               do i=1,nvar
                  x(i,k) = xsave(i,k)
                  dx(i,k) = dxsave(i,k)
               end do
            end do
         
         end subroutine adjust_correction
         
         
         subroutine apply_coeff(nvar, nz, x, xsave, B, xscale, coeff, actual_corr_coeff_limit)
            integer, intent(in) :: nvar, nz
            real(fltp), intent(out), dimension(:,:) :: x
            real(fltp), intent(in), dimension(:,:) :: xsave, B, xscale
            real(fltp), intent(in) :: coeff, actual_corr_coeff_limit
            integer :: i, k
            if (actual_corr_coeff_limit == 1) then
               forall (i=1:nvar,k=1:nz) x(i,k) = x(i,k) + xscale(i,k)*B(i,k)
               return
            end if
            if (coeff == 1) then
               forall (i=1:nvar,k=1:nz) x(i,k) = xsave(i,k) + xscale(i,k)*B(i,k)
               return
            end if
            ! coeff only applies to vars from lsvar_lo to lsvar_hi
            forall (i=lsvar_lo:lsvar_hi,k=1:nz) x(i,k) = xsave(i,k) + coeff*xscale(i,k)*B(i,k)
            forall (i=1:lsvar_lo-1,k=1:nz) x(i,k) = xsave(i,k) + xscale(i,k)*B(i,k)
            forall (i=lsvar_hi+1:nvar,k=1:nz) x(i,k) = xsave(i,k) + xscale(i,k)*B(i,k)
         end subroutine apply_coeff


         logical function solve_equ()    
            integer ::  nrhs, ldafb, ldb, ldx, lda, i, j, n, sprs_nz
            real(fltp) :: ferr, berr
            
            include 'formats.dek'

            solve_equ=.true.
            do k=1,nz
               do i=1,nvar
                  b(i,k) = -equ(i,k)
               end do
            end do
            n = nvar*nz

            nrhs=1
            lda=mljac+1+mujac
            ldafb=2*mljac+mujac+1
            ldb=n
            ldx=n
            
            info = 0
            if (do_mtx_timing) call system_clock(time0,clock_rate)            
            call factor_mtx(n, ldafb, sprs_nz)
            if (info == 0) call solve_mtx(n, ldafb, sprs_nz)
            if (do_mtx_timing) then
               call system_clock(time1,clock_rate)
               work(r_mtx_time) = work(r_mtx_time) + dble(time1 - time0) / clock_rate
            end if

            if (info /= 0) then 
               solve_equ=.false.
               b(1:nvar,1:nz)=0
            end if
         
         end function solve_equ
         
         
         subroutine factor_mtx(n, ldafb, sprs_nz)
            integer, intent(in) :: n, ldafb
            integer, intent(out) :: sprs_nz
            integer :: i, j, k, info_dealloc
            sprs_nz = 0
#ifdef DBLE
            if (matrix_type == block_tridiag_dble_matrix_type) then
               if (.not. overlay_AF) then
                  do k = 1, nz
                     do j = 1, nvar
                        do i = 1, nvar
                           lblkF(i,j,k) = lblk(i,j,k)
                           dblkF(i,j,k) = dblk(i,j,k)
                           ublkF(i,j,k) = ublk(i,j,k)
                        end do
                     end do
                  end do
               end if          
               call decsolblk(
     >                  0, caller_id, nvar, nz, lblkF, dblkF, ublkF, b, ipiv_blk,
     >                  lrd, rpar_decsol, lid, ipar_decsol, info)
               if (info /= 0) then
                  call decsolblk(
     >               2, caller_id, nvar, nz, lblkF, dblkF, ublkF, B, ipiv_blk, 
     >               lrd, rpar_decsol, lid, ipar_decsol, info_dealloc)  
               end if
            else if (matrix_type == block_tridiag_quad_matrix_type) then
#endif
               if (.not. overlay_AF) then
                  forall (k=1:nz, j=1:nvar, i=1:nvar)
                     lblkF_quad(i,j,k) = lblk(i,j,k)
                     dblkF_quad(i,j,k) = dblk(i,j,k)
                     ublkF_quad(i,j,k) = ublk(i,j,k)
                  end forall
               end if          
               call decsolblk_quad(
     >                  0, caller_id, nvar, nz, lblkF_quad, dblkF_quad, ublkF_quad, B_quad, ipiv_blk,
     >                  lrd, rpar_decsol, lid, ipar_decsol, info)
               if (info /= 0) then
                  call decsolblk_quad(
     >               2, caller_id, nvar, nz, lblkF_quad, dblkF_quad, ublkF_quad, B_quad, ipiv_blk, 
     >               lrd, rpar_decsol, lid, ipar_decsol, info_dealloc)  
               end if
#ifdef DBLE
            else if (matrix_type == square_matrix_type) then
               do i=1,n
                  AF(1:n,i) = A(1:n,i)
               end do
               if (sparse_flag) then
                  call factor_sparse_dense_mtx(n, sprs_nz)
               else
                  call decsol(0, n, n, AF, n, n, b, ipiv, 
     >                  lrd, rpar_decsol, lid, ipar_decsol, info)
               end if
            else ! banded_matrix_type
               if (.not. overlay_AF) then
                  do j=1,neq
                     do i=1,ldA
                        AF(mljac+i,j) = A(i,j)
                     end do
                  end do
               end if                  
               if (sparse_flag) then
                  call factor_sparse_banded_mtx(n, ldafb, sprs_nz)
               else
                  call decsol(
     >                     0, n, ldafb, AF, mljac, mujac, b, ipiv, 
     >                     lrd, rpar_decsol, lid, ipar_decsol, info)
               end if
            end if
#endif
         end subroutine factor_mtx
         
         
#ifdef DBLE
         subroutine factor_sparse_dense_mtx(n, sprs_nz)
            integer, intent(in) :: n
            integer, intent(out) :: sprs_nz
            integer :: ndim
            ndim = size(AF,dim=1)
            call dense_to_sparse_with_diag(
     >         isparse, n, ndim, AF, sprs_nzmax, sprs_nz, sprs_ia, sprs_ja, sprs_sa, info)
            if (info == 0) then
               if (have_called_decsols) then ! deallocate the old one
                  call decsols(
     >                  2, n, sprs_nz, sprs_ia, sprs_ja, sprs_sa, b, 
     >                  lrd, rpar_decsol, lid, ipar_decsol, info)
                  if (info /= 0) then
                     write(*,*) 'failed in decsols(2)'
                  end if
               end if
               call decsols(
     >               0, n, sprs_nz, sprs_ia, sprs_ja, sprs_sa, b, 
     >               lrd, rpar_decsol, lid, ipar_decsol, info)
               have_called_decsols = .true.
            end if
         end subroutine factor_sparse_dense_mtx
#endif
         
         
#ifdef DBLE
         subroutine factor_sparse_banded_mtx(n, ldafb, sprs_nz)
            integer, intent(in) :: n, ldafb
            integer, intent(out) :: sprs_nz
            integer :: ierr
            if (isparse == compressed_row_sparse) then
               call band_to_row_sparse_with_diag(
     >               n, mljac, mujac, AF, ldafb, sprs_nzmax, sprs_nz, 
     >               sprs_ia, sprs_ja, sprs_sa, info)
            else if (isparse == compressed_column_sparse) then
               call band_to_col_sparse_with_diag(
     >               n, mljac, mujac, AF, ldafb, sprs_nzmax, sprs_nz, 
     >               sprs_ia, sprs_ja, sprs_sa, info)
               if (.false.) then ! DEBUGGING
                  open(unit=33, file='sparse_mtx.rua', action='write', iostat=info)
                  call mtx_write_hbcode1(33, n, sprs_nz, sprs_sa, sprs_ja, sprs_ia, ierr)
                  write(*,*) 'done write_hbcode1 sparse_mtx.rua ', n, sprs_nz
                  stop 'newton'
               end if
            else
               info = -1
            end if                     
            if (info == 0) then
               if (have_called_decsols) then ! deallocate the old one
                  call decsols(
     >               2, n, sprs_nz, sprs_ia, sprs_ja, sprs_sa, b, 
     >               lrd, rpar_decsol, lid, ipar_decsol, info)
                  if (info /= 0) then
                     write(*,*) 'failed in decsols(2)'
                  end if
               end if
               call decsols(
     >               0, n, sprs_nz, sprs_ia, sprs_ja, sprs_sa, b, 
     >               lrd, rpar_decsol, lid, ipar_decsol, info)
               have_called_decsols = .true.
            end if
         end subroutine factor_sparse_banded_mtx
#endif
         
         
         subroutine solve_mtx(n, ldafb, sprs_nz)
            integer, intent(in) :: n, ldafb, sprs_nz
            character(1) :: trans
            logical :: refine
            integer :: info_solve, info_refine, info_dealloc, i, j
            info = 0; info_solve=0; info_refine=0; info_dealloc=0
            refine = (iwork(i_refine_mtx_solution) /= 0)
            ! solve overwrites B, so save it first if doing refine.
            if (refine) forall (i=1:nvar,k=1:nz) B_init(i,k) = B(i,k) 
            trans = 'N'
#ifdef DBLE
            if (sparse_flag) then
               call solve_sparse_mtx(n, sprs_nz, refine)
            else if (matrix_type == block_tridiag_dble_matrix_type) then
               call decsolblk(
     >            1, caller_id, nvar, nz, lblkF, dblkF, ublkF, b, ipiv_blk,
     >            lrd, rpar_decsol, lid, ipar_decsol, info_solve)
               if (info_solve == 0 .and. refine) then
                  call block_dble_refine1(
     >               lblk, dblk, ublk, lblkF, dblkF, ublkF, ipiv_blk, decsolblk, B_init, B, 
     >               wrk_for_refine, caller_id, lrd, rpar_decsol, lid, ipar_decsol, info_refine)
               end if
               call decsolblk(
     >            2, caller_id, nvar, nz, lblkF, dblkF, ublkF, B, ipiv_blk, 
     >            lrd, rpar_decsol, lid, ipar_decsol, info_dealloc)               
            else if (matrix_type == block_tridiag_quad_matrix_type) then
#endif
               forall (k=1:nz, j=1:nvar) B_quad(j,k) = B(j,k)
               call decsolblk_quad(
     >            1, caller_id, nvar, nz, lblkF_quad, dblkF_quad, ublkF_quad, B_quad, ipiv_blk,
     >            lrd, rpar_decsol, lid, ipar_decsol, info_solve)
               call decsolblk_quad(
     >            2, caller_id, nvar, nz, lblkF_quad, dblkF_quad, ublkF_quad, B_quad, ipiv_blk, 
     >            lrd, rpar_decsol, lid, ipar_decsol, info_dealloc)
               forall (k=1:nz, j=1:nvar) B(j,k) = B_quad(j,k)
#ifdef DBLE
            else if (matrix_type == square_matrix_type) then
               call decsol(
     >            1, n, n, AF, n, n, B, ipiv, 
     >            lrd, rpar_decsol, lid, ipar_decsol, info_solve)
               if (info_solve == 0 .and. refine) then
                  call factored_square_dble_refine1(AF, B_init1, ipiv, B1, info_refine)
               end if
               call decsol(
     >            2, n, n, AF, n, n, B, ipiv, 
     >            lrd, rpar_decsol, lid, ipar_decsol, info_dealloc)               
            else ! banded_matrix_type
               call decsol(
     >            1, n, ldafb, AF, mljac, mujac, B, ipiv, 
     >            lrd, rpar_decsol, lid, ipar_decsol, info_solve)     
               if (info_solve == 0 .and. refine) then
                  call banded_dble_refine1( 
     >               n, ldafb, AF, lda, A, mljac, mujac, ipiv, 
     >               B_init1, B1, wrk_for_refine1, 
     >               decsol, lrd, rpar_decsol, lid, ipar_decsol, info_refine)
               end if
               call decsol(
     >            2, n, ldafb, AF, mljac, mujac, B, ipiv, 
     >            lrd, rpar_decsol, lid, ipar_decsol, info_dealloc)
            end if
#endif
            if (info_solve /= 0 .or. info_refine /= 0 .or. info_dealloc /= 0) info = -1
         end subroutine solve_mtx
         
         
#ifdef DBLE
         subroutine solve_sparse_mtx(n, sprs_nz, refine)
            integer, intent(in) :: n, sprs_nz
            logical, intent(in) :: refine
            integer :: info_solve, info_refine, info_dealloc
            info = 0; info_solve=0; info_refine=0; info_dealloc=0
            include 'formats.dek'
            if (.false.) then ! DEBUGGING
               call write_hbcode1(6, n, n, sprs_nz, sprs_sa, sprs_ja, sprs_ia)
               write(*,*) 'argument b'
               do i = 1, n
                  write(*,*) '', i, b(i,1)
               end do
               write(*,*)
            end if
            call decsols(
     >            1, n, sprs_nz, sprs_ia, sprs_ja, sprs_sa, b, 
     >            lrd, rpar_decsol, lid, ipar_decsol, info_solve)
            if (info_solve /= 0) then
               write(*,*) 'failed in decsols 1', info_solve
               write(*,2) 'n', n
               write(*,2) 'mljac', mljac
               write(*,2) 'mujac', mujac
               write(*,2) 'ldAF', ldAF
               write(*,2) 'sprs_nzmax', sprs_nzmax
               write(*,2) 'sprs_nz', sprs_nz
               write(*,*) 
               stop 1
            end if
            if (refine) then
               write(*,*) 'need to do refine for sparse mtx in newton'
            end if     
            call decsols(
     >            2, n, sprs_nz, sprs_ia, sprs_ja, sprs_sa, b, 
     >            lrd, rpar_decsol, lid, ipar_decsol, info_dealloc)
            if (info_dealloc /= 0) then
               write(*,*) 'failed in decsols 2', info_dealloc
               write(*,2) 'n', n
               write(*,2) 'mljac', mljac
               write(*,2) 'mujac', mujac
               write(*,2) 'ldAF', ldAF
               write(*,2) 'sprs_nzmax', sprs_nzmax
               write(*,2) 'sprs_nz', sprs_nz
               write(*,*) 
               stop 1
            end if            
            if (.false.) then ! DEBUGGING
               write(*,*) 'solution b'
               do i = 1, n
                  write(*,*) '', i, b(i,1)
               end do
               write(*,*)
               open(unit=33, file='sparse_mtx.data', action='write', iostat=info)
               call write_hbcode1(33, n, n, sprs_nz, sprs_sa, sprs_ja, sprs_ia)
               write(*,*) 'done write_hbcode1 sparse_mtx.data ', n, sprs_nz
               stop 'newton'
            end if
            if (info_solve /= 0 .or. info_refine /= 0 .or. info_dealloc /= 0) info = -1
         end subroutine solve_sparse_mtx
#endif
         
         
         logical function do_enter_setmatrix(
     >            neq, x, dx, xscale, lrpar, rpar, lipar, ipar, ierr)
            ! create jacobian by using numerical differences to approximate the partial derivatives
            implicit none
            integer, intent(in) :: neq
            real(fltp), pointer, dimension(:,:) :: x, dx, xscale
            integer, intent(in) :: lrpar, lipar
            real(dp), intent(inout) :: rpar(:) ! (lrpar)
            integer, intent(inout) :: ipar(:) ! (lipar)
            integer, intent(out) :: ierr
            logical :: need_solver_to_eval_jacobian
            need_solver_to_eval_jacobian = .true.
            call enter_setmatrix(iiter, 
     >                  nvar, nz, neq, x, xold, xscale, xder, need_solver_to_eval_jacobian, 
     >                  ldA, A, idiag, lrpar, rpar, lipar, ipar, ierr)
            do_enter_setmatrix = need_solver_to_eval_jacobian
         end function do_enter_setmatrix


         subroutine setmatrix(neq, x, dx, xscale, xsave, dxsave, lrpar, rpar, lipar, ipar, ierr)
            ! create jacobian by using numerical differences to approximate the partial derivatives
            implicit none
            integer, intent(in) :: neq
            real(fltp), pointer, dimension(:,:) :: x, dx, xscale, xsave, dxsave
            integer, intent(in) :: lrpar, lipar
            real(dp), intent(inout) :: rpar(:) ! (lrpar)
            integer, intent(inout) :: ipar(:) ! (lipar)
            integer, intent(out) :: ierr

            integer :: i, j, ii, jj, k, kk, ij, ik, ivar, jvar, iz, jz, jzz, ideb, ifin
            integer, dimension(nvar) :: nskip, gskip, dskip
            real(fltp) :: dscale, partial
            logical :: need_solver_to_eval_jacobian
            
            include 'formats.dek'

            ierr = 0
            
            need_solver_to_eval_jacobian = do_enter_setmatrix(
     >            neq, x, dx, xscale, lrpar, rpar, lipar, ipar, ierr)     
            if (ierr /= 0) return
            if (.not. need_solver_to_eval_jacobian) return
            
            if (matrix_type == block_tridiag_dble_matrix_type .or. 
     >          matrix_type == block_tridiag_quad_matrix_type) then
               write(*,*) 'sorry: newton does not support numerical block triangular jacobians yet.'
               write(*,*) 'try using a banded matrix instead'
               ierr = -1
               return
            end if
            
            ! allocate working arrays for numerical jacobian calculation
            allocate(xgg(nvar,nz), dxd(nvar,nz), dxdd(nvar,nz), equsave(nvar,nz), stat=ierr)

            forall (j=1:nvar,k=1:nz) ! save stuff
               xsave(j,k) = x(j,k)
               dxsave(j,k) = dx(j,k)
               equsave(j,k) = equ(j,k)
            end forall
            if (nsec > 0) y1=y      

            ! some info about the stencil
            ! gskip zones on left
            ! dskip zones on right
            ! nskip zones in total
            gskip=mljac/nvar
            dskip=mujac/nvar
            nskip=1+dskip+gskip

            A=0
            ! loop on variables
            do ivar=1, nvar
               forall (j=1:nvar,k=1:nz) dxd(j,k) = dxsave(j,k)
               do k=1, nz
                  do ii=1, 20 ! may need to increase xder
                     dxd(ivar,k)=dxd(ivar,k)+xder(ivar,k)
                     if (dxd(ivar,k)-dxsave(ivar,k) /= 0) exit
                     xder(ivar,k)=xder(ivar,k)*2
                  end do
               end do
               forall (j=1:nvar,k=1:nz)
                  dx(j,k) = dxd(j,k)
                  x(j,k) = xold(j,k) + dx(j,k)
                  dx(j,k) = x(j,k) - xold(j,k)
               end forall
               call xdomain(iiter, nvar, nz, x, dx, xold, lrpar, rpar, lipar, ipar, ierr)
               if (ierr /= 0) then
                  if (nsec > 0) y = y1 
                  call cleanup_after_setmatrix
                  call failed_in_setmatrix(0, lrpar, rpar, lipar, ipar, ierr)
                  return
               end if
               forall (j=1:nvar,k=1:nz) dxd(j,k) = dx(j,k)
               call set_primaries(nvar, nz, x, lrpar, rpar, lipar, ipar, ierr)
               if (ierr /= 0) then
                  if (nsec > 0) y = y1
                  call cleanup_after_setmatrix
                  call failed_in_setmatrix(0, lrpar, rpar, lipar, ipar, ierr)
                  return
               end if
               ! compute secondary variables for modified primaries
               call set_secondaries(ivar, lrpar, rpar, lipar, ipar, ierr)
               if (ierr /= 0) then
                  if (nsec > 0) y = y1
                  call cleanup_after_setmatrix
                  call failed_in_setmatrix(0, lrpar, rpar, lipar, ipar, ierr)
                  return
               end if
               if (nsec > 0) y2 = y

               ! now use the modified primaries and secondaries to get modified equations
               do kk=0, nskip(ivar)-1

                  if (nsec > 0) y = y1
                  forall (j=1:nvar,k=1:nz) dx(j,k) = dxsave(j,k)                 
                  ! primaries are changed only on the zones of the comb
                  do k = 1+kk, nz, nskip(ivar)
                     dx(ivar,k) = dxd(ivar,k)
                  end do
                  forall (j=1:nvar,k=1:nz)
                     x(j,k) = xold(j,k) + dx(j,k)
                     dxdd(j,k) = dx(j,k)
                  end forall
                  call set_primaries(nvar, nz, x, lrpar, rpar, lipar, ipar, ierr)
                  if (ierr /= 0) then
                     if (nsec > 0) y = y1
                     call cleanup_after_setmatrix
                     call failed_in_setmatrix(0, lrpar, rpar, lipar, ipar, ierr)
                     return
                  end if
                  
                  if (nsec > 0) then
                     ! note that we can use the previously computed secondaries
                     ! since, by definition, they depend only on the primaries of their own zone.
                     do j=1+kk, nz, nskip(ivar)
                        y(j, 1:nsec)=y2(j, 1:nsec)
                     enddo
                  end if
                  
                  ! compute the equations using these primaries and secondaries
                  call eval_equations(iiter, nvar, nz, x, xscale, equ, lrpar, rpar, lipar, ipar, ierr)
                  if (ierr /= 0) then
                     if (nsec > 0) y = y1
                     call cleanup_after_setmatrix
                     call failed_in_setmatrix(0, lrpar, rpar, lipar, ipar, ierr)
                     return
                  end if

                  ! compute derivatives
                  do j = ivar+kk*nvar, neq, nvar*nskip(ivar)
                     zone = (j-1)/nvar + 1
                     if (dxdd(ivar,zone) == dxsave(ivar,zone)) then 
                        ! can happen if the xdomain routine changed dx in a bad way.
                        ierr = -1
                        write(err_msg, '(a, i5, 99e20.10)') 
     >                     'failed trying to create numerical derivative for variable ',
     >                     j, dxsave(ivar,zone), xsave(ivar,zone), xder(ivar,zone)
                        call alert(ierr, err_msg)
                        if (nsec > 0) y = y1 
                        call cleanup_after_setmatrix
                        call failed_in_setmatrix(j, lrpar, rpar, lipar, ipar, ierr)
                        return
                     endif
                     ideb=max(1, (zone-gskip(ivar)-1)*nvar+1)
                     ifin=min(neq, (zone+dskip(ivar))*nvar)
                     ideb=max(ideb, j-mljac)
                     ifin=min(ifin, j+mujac)
                     do i = ideb, ifin
                        ik = (i-1)/nvar + 1
                        ij = i - (ik-1)*nvar
                        partial=xscale(ivar,zone)*
     >                     (equ(ij,ik)-equsave(ij,ik))/(dxdd(ivar,zone)-dxsave(ivar,zone))
                        if (matrix_type == square_matrix_type) then
                           A(i,j)=partial
                        else
                           A(i-j+idiag,j)=partial
                        end if
                     end do
                  end do

                  if (nsec > 0) then ! restore the secondaries that correspond to the unmodified primaries
                     do j=1+kk, nz, nskip(ivar)       
                        y(j, 1:nsec)=y1(j, 1:nsec)
                     enddo
                  end if

               enddo
         
            enddo

            if (nsec > 0) y = y1 
            call cleanup_after_setmatrix

            call exit_setmatrix(iiter, nvar, nz, neq, 
     >            dx, ldA, A, idiag, xscale, lrpar, rpar, lipar, ipar, ierr)

         end subroutine setmatrix
         
         
         subroutine cleanup_after_setmatrix
            integer :: i, k
            do k=1,nz
               do i=1,nvar
                  x(i,k) = xsave(i,k)
                  dx(i,k) = dxsave(i,k)
                  equ(i,k) = equsave(i,k)
               end do
            end do
            deallocate(xgg, dxd, dxdd, equsave)
         end subroutine cleanup_after_setmatrix
         
      
         subroutine write_msg(msg)
            real(fltp), parameter :: secyer = 3.1558149984d7 ! seconds per year
            character(*)  :: msg
            if (.not. dbg_msg) return
            if (use_D_norm) then
    2       format(i6, 4x, i3, 3(4x, a, f12.5), 4x, a, e14.5, 4x, a)            
            write(*,2)
     >         iwork(i_model_number), iiter,
     >         'D_norm', D_norm, 
     >         'D_norm_theta', D_norm_theta, 
     >         'lg dt', log10(max(1d-99,work(r_dt)/secyer)), 
     >         'D_norm_err_est', D_norm_err_est, 
     >         trim(msg) 
            return
            end if
            
    1       format(i6, 2x, i3, 2x, a, f8.4, 6(2x, a, 1x, e10.3), 2x, a, f6.2, 2x, a)            
            write(*,1)
     >         iwork(i_model_number), iiter,
     >         'coeff', coeff, 
     >         'slope', slope, 
     >         'f', f,
     >         'avg resid', residual_norm, 
     >         'max resid', max_residual, 
     >         'avg corr', correction_norm, 
     >         'max corr', max_correction, 
     >         'lg dt', log10(max(1d-99,work(r_dt)/secyer)), 
     >         trim(msg)            
         end subroutine write_msg
      
      
         subroutine newton_core_dump(x, dx, xold)
            real(fltp), dimension(:,:) :: x 
            ! new vector of primaries, x = xold+dx
            real(fltp), dimension(:,:) :: dx 
            ! increment vector from previous vector of primaries.
            real(fltp), dimension(:,:) :: xold 
            ! xold = x-dx.  xold is kept constant; x and dx change.
            integer :: i, j, k
         
    1       format(a20, i16) ! integers
    2       format(a20, 1pe26.16) ! reals
    3       format(a20, i6, 1x, 1pe26.16) ! 1 index reals
    4       format(a20, 2(i6, 1x), 1pe26.16) ! 2 index reals
    5       format(a20, i6, 1x, i16) ! 1 index integers
         
            ! only printout args and things that are carried over from one call to next
            ! e.g., skip work arrays that are written on each call before they are read
         
            write(*, *) 'newton core dump'
            write(*, 1) 'nz', nz
            write(*, 1) 'nvar', nvar
            write(*, 1) 'mljac', mljac
            write(*, 1) 'mujac', mujac
            write(*, 1) 'liwork', liwork
            write(*, 1) 'lwork', lwork
            write(*, 1) 'ldy', ldy
            write(*, 1) 'nsec', nsec
            write(*, 1) 'ldAF', ldAF
            write(*, 1) 'ndiag', ndiag

            write(*, 2) 'tol_correction_norm', tol_correction_norm
         
            do j=1, ndiag
               do k=1, nz
                  write(*, 4) 'A', j, k, A(j, k)
               end do
            end do
         
            do j=1, ldAF
               do k=1, nz
                  write(*, 4) 'AF', j, k, AF(j, k)
               end do
            end do
         
            do k=1, nz
               write(*, 5) 'ipiv', k, ipiv(k)
            end do
         
            do k=1, nz
               do j=1, nvar
                  write(*, 4) 'x', j, k, x(j, k)
                  write(*, 4) 'dx', j, k, x(j, k)
                  write(*, 4) 'xold', j, k, x(j, k)
               end do
            end do
         
         end subroutine newton_core_dump


         subroutine pointers(ierr)
            integer, intent(out) :: ierr
      
            integer :: i, neq
            character (len=256) :: err_msg
            real(fltp), pointer :: blk3(:, :, :, :)
            real(qp), pointer :: blk3_quad(:, :, :, :)

            neq = nvar*nz
            ierr = 0         
            i = num_work_params+1
            
            call set_ptr_2(A, work(i:i+ndiag*neq-1), ndiag, neq); i = i+ndiag*neq
            Acopy => A
            call set_ptr_2(xsave, work(i:i+neq-1), nvar, nz); i = i+neq
            call set_ptr_2(dxsave, work(i:i+neq-1), nvar, nz); i = i+neq
            B1 => work(i:i+neq-1)
            call set_ptr_2(B, B1, nvar, nz); i = i+neq
            B_init1 => work(i:i+neq-1)
            call set_ptr_2(B_init, B_init1, nvar, nz); i = i+neq
            wrk_for_refine1 => work(i:i+neq-1)
            call set_ptr_2(wrk_for_refine, wrk_for_refine1, nvar, nz); i = i+neq
            call set_ptr_2(grad_f, work(i:i+neq-1), nvar, nz); i = i+neq
            call set_ptr_2(rhs, work(i:i+neq-1), nvar, nz); i = i+neq
            call set_ptr_2(xder, work(i:i+neq-1), nvar, nz); i = i+neq
            call set_ptr_2(dx, work(i:i+neq-1), nvar, nz); i = i+neq
            
            if (nsec > 0) then
               call set_ptr_2(y1, work(i:i+nsec*neq-1), neq, nsec); i = i+nsec*neq
               call set_ptr_2(y2, work(i:i+nsec*neq-1), neq, nsec); i = i+nsec*neq
            else
               nullify(y1)
               nullify(y2)
            end if

            if (i-1 > lwork) then
               ierr = -1
               write(err_msg, 
     >                  '(a, i6, a, 99i6)') 'newton: lwork is too small.  must be at least', i-1,
     >                  '   but is only ', lwork, neq, ndiag, ldAF, nsec
               call alert(ierr, err_msg)
               return
            end if
         
            i = num_iwork_params+1
            ipiv => iwork(i:i+neq-1); i = i+neq
            if (i-1 > liwork) then
               ierr = -1
               write(err_msg, '(a, i6, a, i6)') 
     >                  'newton: liwork is too small.  must be at least', i, 
     >                  '   but is only ', liwork
               write(*, *) trim(err_msg)
               call alert(ierr, err_msg)
               return
            end if
            
            if (matrix_type == block_tridiag_dble_matrix_type .or.
     >          matrix_type == block_tridiag_quad_matrix_type) then
               call set_int_pointer_2(ipiv_blk, ipiv, nvar, nz)
               call set_ptr_4(blk3, A, nvar, nvar, nz, 3)
               ublk => blk3(:,:,:,1)
               dblk => blk3(:,:,:,2)
               lblk => blk3(:,:,:,3)
            end if
               
            if (matrix_type == block_tridiag_dble_matrix_type) then
               call set_ptr_4(blk3, AF, nvar, nvar, nz, 3)
               ublkF => blk3(:,:,:,1)
               dblkF => blk3(:,:,:,2)
               lblkF => blk3(:,:,:,3)
            end if
            
            if (matrix_type == block_tridiag_quad_matrix_type) then
               i = 0
               call set_quad_pointer_2(B_quad, qwork(i+1:i+nvar*nz), nvar, nz)
               i = i + nvar*nz
               call set_quad_pointer_4(blk3_quad, qwork(i+1:i+nvar*nvar*nz*3), nvar, nvar, nz, 3)
               i = i + nvar*nvar*nz*3
               ublkF_quad => blk3_quad(:,:,:,1)
               dblkF_quad => blk3_quad(:,:,:,2)
               lblkF_quad => blk3_quad(:,:,:,3)
               if (lqwork < nvar*nz) then
                  ierr = -1
                  write(err_msg, '(a, i6, a, i6)') 
     >                  'newton: lqwork is too small.  must be at least', nvar*nz, 
     >                  '   but is only ', lqwork
                  write(*, *) trim(err_msg)
                  call alert(ierr, err_msg)
                  return
               end if
            end if
         
         end subroutine pointers
         
         
         real(fltp) function eval_slope(nvar, nz, grad_f, B)
            integer, intent(in) :: nvar, nz
            real(fltp), intent(in), dimension(:,:) :: grad_f, B
            integer :: k, i
            eval_slope = 0
            do i=lsvar_lo,lsvar_hi
               eval_slope = eval_slope + dot_product(grad_f(i,1:nz),B(i,1:nz))
            end do
         end function eval_slope
         
         
         real(fltp) function eval_f(nvar, nz, equ)
            integer, intent(in) :: nvar, nz
            real(fltp), intent(in), dimension(:,:) :: equ
            integer :: k, i
            real*8 :: q
            include 'formats.dek'
            eval_f = 0
            do k = 1, nz
               do i = lsvar_lo, lsvar_hi
                  q = equ(i,k)
                  eval_f = eval_f + q*q
               end do
            end do
            eval_f = eval_f/2
            !write(*,1) 'do_newton: eval_f', eval_f
         end function eval_f


      end subroutine do_newton
      
   
      subroutine get_newton_work_sizes(
     >      mljac, mujac, nvar, nz, nsec, matrix_type, lwork, liwork, lqwork, ierr)
         integer, intent(in) :: mljac, mujac, nvar, nz, nsec
         integer, intent(in) :: matrix_type
         integer, intent(out) :: lwork, liwork, lqwork
         integer, intent(out) :: ierr
         
         integer :: i, ndiag, ldAF, neq
         
         include 'formats.dek'

         ierr = 0
         neq = nvar*nz
         
         if (matrix_type == square_matrix_type) then
            ndiag = neq
            ldAF = ndiag
         else if (matrix_type == block_tridiag_dble_matrix_type .or. 
     >            matrix_type == block_tridiag_quad_matrix_type) then
            ndiag = 3*nvar
            ldAF = ndiag
         else
            ndiag = mljac+mujac+1
            ldAF = mljac+ndiag
         end if
         
         liwork = num_iwork_params + neq     
         lwork = num_work_params + neq*(ndiag + 9 + 2*nsec)
         if (matrix_type == block_tridiag_quad_matrix_type) then
            lqwork = neq*(ndiag + 1)
         else
            lqwork = 0
         end if
         
      end subroutine get_newton_work_sizes


#ifdef DBLE
      end module mod_newton_dble
#else
      end module mod_newton_quad
#endif

