#include "cppdefs.h"
!-----------------------------------------------------------------------
!BOP
!
! !IROUTINE:  adaptive vertical coordinates
! \label{sec-adaptive-coordinates}
!
! !INTERFACE:
   subroutine adaptive_coordinates_6(first,hotstart)
!
! !DESCRIPTION:
!
! The vertical grid adaptivity is partially given by a vertical diffusion
! equation for the vertical layer positions, with diffusivities being
! proportional to shear, stratification and distance from the boundaries.
! In the horizontal, the grid can be smoothed with respect to $z$-levels,
! grid layer slope and density. Lagrangian tendency of the grid movement
! is supported. The adaptive terrain-following grid can be set to be an
! Eulerian-Lagrangian grid, a hybrid $\sigma$-$\rho$ or $\sigma$-$z$ grid
! and combinations of these with great flexibility. With this, internal
! flow structures such as thermoclines can be well resolved and
! followed by the grid. A set of idealised examples is presented in
! Hofmeister et al. (2009), which show that the introduced adaptive grid
! strategy reduces pressure gradient errors and numerical mixing significantly.
!
! For the configuration of parameters, a seperate namelist file adaptcoord.inp
! has to be given with parameters as following:
! \\
! faclag - Factor on Lagrangian coords., 0.le.faclag.le.1\\
! cNN - dependence on stratification\\
! cSS - dependence on shear\\
! d\_dens - Typical density difference for scaling cSS adaption\\
! d\_vel - Typical velocity difference for scaling cNN adaption\\
! hsurf - target size for surface cell [m]\\
! hbott - target size for bottom cell [m]\\
! tgrid - Time scale of grid adaptation [s]\\
! preadapt - number of iterations for pre-adaptation\\
! \\
! The parameters cNN,cSS are used for the vertical adaption and
! have to be less or equal 1 in sum. The difference to 1 is describing a
! background value which forces the coordinates back to a sigma distribution.
! The values ddu and ddl from the domain namelist are used for weighting
! the zooming to surface and bottom if cdd>0.
! The option preadapt allows for a pre-adaption of
! coordinates to the initial density field and bathymetry. The number
! defines the number of iterations (change coordinates, vertically advect
! tracer, calculate vertical gradients)  used for the preadaption.
! The initial temperature and salinity fields are re-interpolated
! onto the adapted grid afterwards.
!
! !USES:
   use parameters, only: rk
   use domain, only: ga,imin,imax,jmin,jmax,kmax,H,az,au,av,min_depth
   use variables_3d, only: dt,kmin,kumin,kvmin,ho,hn,huo,hvo,hun,hvn
   use variables_3d, only: Dn,Dun,Dvn,sseo,ssen
   use variables_3d, only: kmin_pmz,kumin_pmz,kvmin_pmz
   use variables_3d, only: preadapt
   use variables_3d, only: Dgrid
   use vertical_coordinates,only: hcheck
   use vertical_coordinates,only: restart_with_ho,restart_with_hn

! ADAPTIVE-BEGIN
   use  parameters,  only: g,rho_0
   use variables_3d, only: uu,vv,SS
#ifndef NO_BAROCLINIC
   use variables_3d, only: NN
   use variables_3d, only: rho
#endif
   use domain,       only: ddu,ddl
   use halo_zones, only: update_3d_halo,wait_halo
   use halo_zones, only: H_TAG,U_TAG,V_TAG
   use domain,       only: dxv,dyu,arcd1

!ADAPTIVE-END

   IMPLICIT NONE
!
! !INPUT PARAMETERS:
   logical, intent(in)                 :: first
   logical, intent(in)                 :: hotstart
! !OUTPUT PARAMETERS:
!   integer, intent(out)                :: preadapt
!
! !REVISION HISTORY:
!  Original author(s): Richard Hofmeister and Hans Burchard
!      Heavily modified 2021, Bjarne Buchmann.
!
! !LOCAL VARIABLES:
   integer         :: i,j,k,rc, ifilt
!   REALTYPE, dimension(:), pointer  :: aav ! total grid diffus.
!   REALTYPE, save, dimension(:),     allocatable  :: avd ! dist.-rel. grid diff.
!   REALTYPE, save, dimension(:,:,:), allocatable  :: zpos ! new pos. of z-levels
   REALTYPE, save, dimension(:,:,:), allocatable  :: zposo! old pos. of z-levels
   REALTYPE, save, dimension(:,:,:), allocatable  :: ztgt,htgt ! tgt z and h (to be relaxed against)
   REALTYPE, save, dimension(:,:),   allocatable  :: wrk2d
   REALTYPE, save, dimension(:,:,:), allocatable  :: wrk3d
   REALTYPE     :: dNN, sNN, sSS
   REALTYPE     :: faclag=_ZERO_    ! Factor on Lagrangian coords., 0.le.faclag.le.1
!   REALTYPE     :: facdif=3*_TENTH_ ! Factor on thickness filter,   0.le.faclag.le.1
!   REALTYPE     :: fachor=_TENTH_   ! Factor on position filter,  0.le.faclag.le.1
!   REALTYPE     :: faciso=_ZERO_    ! Factor for isopycnal tendency
   REALTYPE     :: wrk0
   REALTYPE     :: chmin=_HALF_      ! Internal Dgrid coeff for shallow-water regions
!   REALTYPE     :: Ncrit=_ONE_/1000000
   integer      :: mhor=1    ! this number is experimental - it has to be 1 for now-
! These are for the namelist:
   ! All th "c*"-coeffs add to Dgrid, for a unified grid update
   !  Each c*-coefficient should be [0:1]. Larger coefficients are allowed,
   !   but then it might be better to just adjust the grid time scale, tgrid.
   !  Three different dependencies control maximum cell thickness:
   !   chbott, chsurf and cmidd. The first two are maximum at the 
   !   boundaries (k=1 and k=kmax, respectively) and decay exponentially
   !   away from the wall. In contrast, cmidd is applied uniformly everywhere.
   !   For each of the three, a reference cell thickness is defined 
   !   (hbott, hsurf, dmidd). By default (given a positive value) it is 
   !   interpreted as a value in meters, while a negative value means 
   !   a factor to the average cell size over the depth (abs value).
   !   Ie. "hmidd=-2.0" would inhibit cells as they grow closer to 2.0*H/kmax.
   !   There is an exponent [hpow], which controlls how fast Dgrid grows
   !   with cell size, as the cell grows closer to the reference value.
   !   An integer in the range 1-5 is recommended (TODO: still need to test various schemes here).
   !   hwallmult determine how fast the "wall-bound cell controls" decay away
   !   from the wall. Value must be in the range [0:1], and ~0.75 seems good,
   !   but note that the neighbor-limiter can add similar effect.
   !   cneigh and rneigh limits how large neighbor cells can be relative to
   !   each other. cneigh is a coefficient like the others, and rneigh 
   !   is "maximum relative growth" factor [0:[. Values near zero will 
   !   force the distribution to be more equidistant. This effect will 
   !   try to limit cell size ratios to be h/hneigh < 1+rneigh.
   !   It is suggested to keep rneigh well below unity; a value around 0.25-0.50
   !   may suit many users.
   !   The effect is not completely unsimilar to increased vertical filtering.
   !   Overall, the combination of neighbor-limiter and somve vertical filtering 
   !   may work well.
   !
   ! In shallow waters, the layer distribution should be uniform, ie equidistant 
   ! layers over the water column. To achieve this effect, it seems appropriate 
   ! that Dgrid should be uniform (constant over depth) and relatively large in
   ! shallow waters. The shallow-water contribution must then be added after all 
   ! other effects, and especially after the "small cell limiter" which can 
   ! effectively turn off all other effects, and thus setting Dgrid=0 in shallow 
   ! waters.
   !
   ! Regarding water depths and layer thicknesses the following holds:
   !    Dn = ssen + H       ! For any i,j
   !    Do = sseo + H       ! Not actually computed
   !    sum(ho(1:kmax) = Do ! On input
   !    sum(hn(1:kmax) = Dn ! Must hold on output
   ! For convenience we define:
   !    haux = ho * (Dn/Do) ! Any i,j,k
   ! Gammasrc will be defined from haux/Dn ie from ho/Do=ho/(sseo+H)
   ! Gammatgt will refer to hn/Dn
   ! 
   REALTYPE     :: csig      =  0.01_rk   ! tendency to uniform sigma, aka background
   REALTYPE     :: cgvc      =  0.0_rk    ! tendency to "standard" gvc (w/ ddu,ddl)
   REALTYPE     :: chsurf    =  0.5_rk    ! tendency to keep surface layer bounded
   REALTYPE     :: chbott    =  0.3_rk    ! tendency to keep bottom layer bounded
   REALTYPE     :: chmidd    =  0.2_rk    ! tendency to keep all layers bounded
   REALTYPE     :: cneigh    =  0.1_rk    ! tendency to keep neighbors of similar size
   REALTYPE     :: hsurf     =  0.5_rk    ! reference thickness, surface layer (relative to avg cell)
   REALTYPE     :: hbott     = -0.25_rk   ! reference thickness, bottom layer (relative to avg cell)
   REALTYPE     :: hmidd     = -4.0_rk    ! reference thickness, other layers (relative to avg cell)
   REALTYPE     :: hwallmult =  0.8_rk    ! surface/bottom effect - decay by layer
   REALTYPE     :: rneigh    =  0.25_rk   ! reference relative growth between neighbors
   INTEGER      :: hpow      =  3         ! exponent for growth of Dgrid (ramp between 0 and c* tendencies)
   REALTYPE     :: cNN       =  0.5_rk    ! dependence on NN (density zooming)
   REALTYPE     :: cSS       =  0.25_rk   ! dependence on SS (shear zooming)
   REALTYPE     :: d_dens    =  0.3_rk    ! reference value for NN density between neighbor cells
   REALTYPE     :: d_vel     =  0.1_rk    ! reference value for SS absolute shear between neighbor cells
   INTEGER      :: nfiltvert =  2         ! Number of vertical Dgrid filter iterations [0:]
   REALTYPE     :: wfiltvert =  0.3_rk    ! Strength of vertical filter-of-Dgrid [0:~0.5]
   REALTYPE     :: wfilthorz =  0.4_rk    ! Strength of horizontal filter-of-Dgrid [0:~0.5]
   REALTYPE     :: hmin      =  0.3_rk    !
   REALTYPE     :: tgrid     = 14400.0_rk ! Time scale of grid adaptation
   INTEGER      :: split     =  1         ! Take this many partial-steps for diffusion eq.
! These will be derived from the namelist.
   REALTYPE     :: hminm1             ! 1/hmin
   REALTYPE     :: tgridm1            ! 1/tgrid
   REALTYPE     :: rneighm1           ! 1/rneigh
   REALTYPE     :: wfiltneigh, wfiltcentr
   REALTYPE     :: trelax,trelax2     ! temporal relaxation coef, dt/tgrid.
   REALTYPE     :: wsurf, wbott       ! surface and bottom scaling work params
! These two basically stems from hsurf && hbott [TODO: Get rid of these and use Dn?]
   REALTYPE, save, dimension(:,:), allocatable :: hmaxm1_surf ! 1/target max h(k=kmax)
   REALTYPE, save, dimension(:,:), allocatable :: hmaxm1_bott ! 1/target max h(k=1)
   REALTYPE, save, dimension(:,:), allocatable :: hmaxm1_midd ! 1/target max h(:)
!  For lateral filtering - neighbors and their sum-of-values:
   INTEGER      :: n1,n2,n3,n4, nneighs
   REALTYPE     :: Dneighs
!
   REALTYPE     :: cepsi = _ONE_/100000 ! Switch off tendencies where c<cepsi.
! Tmp vars: [TODO: Rename?]
   REALTYPE :: havg, hlimi, wwh, hdash, hrel
   REALTYPE :: dki, dref
   INTEGER  :: ki
   INTEGER  :: isplit
! Shorthands for computational speedup (to assist the compiler):
   REALTYPE     :: Hm1            ! 1./H(i,j) [local value in ij-sweep]
   REALTYPE     :: Dnm1           ! 1./Dn(i,j) [local value in ij-sweep]
   REALTYPE     :: hsum
   REALTYPE     :: kmaxm1         ! 1./kmax
   REALTYPE     :: tmult          ! temporal multiplier to Dgrid (grid diffusivity)
   REALTYPE     :: scal           ! local scaling for boyancy and shear tendencies: incl vertical length-scale
   REALTYPE     :: chaux, chaux2  ! Scales from ho to haux
   LOGICAL      :: gotsmall       ! Local flag  for small cells in column
!   The most used vectors (for local columns) we will define on the STACK.
!   Each of these will be reused in i-j-sweeps, ie many access many times per call.
   REALTYPE     :: mata(0:kmax)     ! Matrix lower-diagonal (for tridiag solver) [was "aau"]
   REALTYPE     :: matb(0:kmax)     ! Matrix diagonal [was "bu"]
   REALTYPE     :: matc(0:kmax)     ! Matrix upper-diagonal [was "cu"]
   REALTYPE     :: rhs(0:kmax)      ! Right-hand-side for tridiag solver [was "du"]
   REALTYPE     :: Dgridloc(0:kmax) ! local Dgrid column vector
! 
!  Tmp? test for time relaxation rather than dt/tgrid in diffusion eq.
   REALTYPE     :: ztgtloc(0:kmax), htgtloc(0:kmax)
   LOGICAL :: adaptrelax=.false. ! Set to true to use time-relaxation (dt/tgrid) *AFTER* solution of diffusion eq.
!
!   The following col-vectors are also used *many* times per ij-sweep, 
!   (at least ~ imax * jmax times), so they could be moved to stack. 
!   TODO: Beware any of these where the *save* paramter is necessary, 
!         because then they should NOT go on stack!
!   NOTE2: The allocated vectors go on HEAP and will not be thread-safe.
!         If we want to parallelize the present routine, then we need to
!         look at how we make the 1D-vertical temporary vectors thread-safe
   REALTYPE, save, dimension(:),     allocatable  :: gvcloc   ! local GVC target column
   REALTYPE, save, dimension(:),     allocatable  :: hauxloc  ! ho scaled to new depth (local col copy)
   REALTYPE, save, dimension(:),     allocatable  :: NNloc    ! local NN column vector
   REALTYPE, save, dimension(:),     allocatable  :: SSloc    ! local SS column vector
   REALTYPE, save, dimension(:),     allocatable  :: gammatgt ! new relative coord, gamma^n
   REALTYPE, save, dimension(:),     allocatable  :: gammasrc ! old (before update) relative coord, gamma^{n-1}
   REALTYPE, save, dimension(:),     allocatable  :: avn ! NN-rel. grid diffus.
   REALTYPE, save, dimension(:),     allocatable  :: avs ! SS-rel. grid diffus.
   REALTYPE, save, dimension(:),     allocatable  :: dminrlx   ! Relaxation for minimum cell thickness
   REALTYPE, save, dimension(:),     allocatable  :: wcolsurf,wcolbott ! surface and bottom scaling
   REALTYPE, save, dimension(:),     allocatable  :: wrk1da,wrk1db

      namelist /adapt_coord/   csig,cgvc, &
                  chsurf,chbott,chmidd,cneigh,   &
                   hsurf, hbott, hmidd,hwallmult,rneigh,hpow, &
                  cNN,   cSS, &
                  d_dens,d_vel, &
                  nfiltvert, wfiltvert, wfilthorz, &
                  hmin,tgrid,split
#ifdef INPUT_DIR
   character(len=PATH_MAX)   :: input_dir=trim(INPUT_DIR) // '/'
#else
   character(len=PATH_MAX)   :: input_dir=''
#endif

! BJB 2021-03: For now *only* "split=1", ie NO SPLIT; 
!              it was not correctly implemented anyway.
!              The split-part has been removed from the code (for now)
! BJB 2021-04-06: Comment on loop limits:
!      Most of the present (2021-04) implementation works *solely* within each
!      vertical column, the only cross-column (i- or j- directional)
!      "smoothing" or "filtering" being the explicit 1-pass lateral filter.
!      Before the lateral filter, all bounds must be with HALO, but for the 
!      lateral filter and later, loops without HALO are appropriate.
!      In all cases, a final HALO-update seems necessary.

!EOP
!-----------------------------------------------------------------------
!BOC
   integer, save :: Ncall = 0
   Ncall = Ncall+1
#ifdef DEBUG
   write(debug,*) 'coordinates() # ',Ncall
#endif

#ifdef DEBUG
STDERR 'adaptive_coordinates_6()'
#endif

   if (first) then
!read namelist
      open(ADAPTNML,status='unknown',file=(trim(input_dir) // 'adaptcoord6.inp'))
      read(ADAPTNML,adapt_coord)
      close(ADAPTNML)

      if (.not. allocated(ga)) then
         allocate(ga(0:kmax),stat=rc)
         if (rc /= 0) stop 'coordinates: Error allocating (ga)'
      end if
      do k=0,kmax
         ga(k) = k
      end do

!      allocate(zpos(I3DFIELD),stat=rc)  ! z-coord. of interface
!      if (rc /= 0) stop 'coordinates: Error allocating memory (zpos)'
      allocate(zposo(I3DFIELD),stat=rc)  ! old z-coord. of interface
      if (rc /= 0) stop 'coordinates: Error allocating memory (zposo)'
      allocate(ztgt(I3DFIELD),stat=rc)  ! target z-coord. of interface
      if (rc /= 0) stop 'coordinates: Error allocating memory (ztgt)'
      ztgt(:,:,:)=_ZERO_
      allocate(htgt(I3DFIELD),stat=rc)  ! target layer heights
      if (rc /= 0) stop 'coordinates: Error allocating memory (htgt)'
      htgt(:,:,:)=_ZERO_
      allocate(dminrlx(0:kmax),stat=rc)  ! 
      if (rc /= 0) stop 'coordinates: Error allocating memory (dminrlx)'
      dminrlx(:) = _ZERO_
!      allocate(HHo(I2DFIELD),stat=rc)  !
!      if (rc /= 0) stop 'coordinates: Error allocating memory (HHo)'
      allocate(wcolsurf(0:kmax),stat=rc)  !
      if (rc /= 0) stop 'coordinates: Error allocating memory (wcolsurf)'
      allocate(wcolbott(0:kmax),stat=rc)  !
      if (rc /= 0) stop 'coordinates: Error allocating memory (wcolbott)'
      allocate(wrk1da(0:kmax),stat=rc)  !
      if (rc /= 0) stop 'coordinates: Error allocating memory (wrk1da)'
      allocate(wrk1db(0:kmax),stat=rc)  !
      if (rc /= 0) stop 'coordinates: Error allocating memory (wrk1db)'
      allocate(hmaxm1_surf(I2DFIELD),stat=rc)  !
      if (rc /= 0) stop 'coordinates: Error allocating memory (hmaxm1_surf)'
      allocate(hmaxm1_bott(I2DFIELD),stat=rc)  !
      if (rc /= 0) stop 'coordinates: Error allocating memory (hmaxm1_bott)'
      allocate(hmaxm1_midd(I2DFIELD),stat=rc)  !
      if (rc /= 0) stop 'coordinates: Error allocating memory (hmaxm1_midd)'
      allocate(wrk2d(I2DFIELD),stat=rc)  !
      if (rc /= 0) stop 'coordinates: Error allocating memory (wrk2d)'
      allocate(wrk3d(I3DFIELD),stat=rc)  !
      if (rc /= 0) stop 'coordinates: Error allocating memory (wrk3d)'
      allocate(gvcloc(0:kmax),stat=rc)     ! working space
      if (rc /= 0) STOP 'coordinates: Error allocating (gvcloc)'
      allocate(hauxloc(0:kmax),stat=rc)     ! Local col copy of ho
      if (rc /= 0) STOP 'coordinates: Error allocating (hauxloc)'
      allocate(NNloc(0:kmax),stat=rc)     ! working space
      if (rc /= 0) STOP 'coordinates: Error allocating (NNloc)'
      allocate(SSloc(0:kmax),stat=rc)     ! working space
      if (rc /= 0) STOP 'coordinates: Error allocating (SSloc)'
      allocate(avn(0:kmax),stat=rc)     ! working space
      if (rc /= 0) STOP 'coordinates: Error allocating (avn)'
      allocate(avs(0:kmax),stat=rc)     ! working space
      if (rc /= 0) STOP 'coordinates: Error allocating (avs)'
!      allocate(avd(0:kmax),stat=rc)     ! working space
!      if (rc /= 0) STOP 'coordinates: Error allocating (avd)'
!      allocate(aav(0:kmax),stat=rc)     ! working space
!      if (rc /= 0) STOP 'coordinates: Error allocating (aav)'
      allocate(gammatgt(0:kmax),stat=rc)     ! working space
      if (rc /= 0) STOP 'coordinates: Error allocating (gammatgt)'
      allocate(gammasrc(0:kmax),stat=rc)     ! working space
      if (rc /= 0) STOP 'coordinates: Error allocating (gammasrc)'
      kmaxm1= _ONE_/kmax
      LEVEL2 "avc6: allocated memory"
      !
      ! Sanity/clipping for parameters:
#ifdef NO_BAROCLINIC
      cNN=_ZERO_
#endif
      csig   = max(_ZERO_,csig)
      cgvc   = max(_ZERO_,cgvc)
      chsurf = max(_ZERO_,chsurf)
      chbott = max(_ZERO_,chbott)
      chmidd = max(_ZERO_,chmidd)
      cneigh = max(_ZERO_,cneigh)
      cNN    = max(_ZERO_,cNN)
      cSS    = max(_ZERO_,cSS)
      rneigh = max(_ZERO_,rneigh)
      hpow   = max(1,hpow)
      wfiltvert = max(_ZERO_,min(_QUART_,wfiltvert))
      nfiltvert = max(0,nfiltvert)
      tgrid = max(dt,tgrid)


      LEVEL3 'factors for diffusion of interface positions:'
      LEVEL4 ' csig (background):', real(csig)
      LEVEL4 ' cgvc (gvc-coords):', real(cgvc)
      LEVEL4 ' chsurf (surface cell):', real(chsurf),real(hsurf)
      LEVEL4 ' chbott (bottom cell):', real(chbott),real(hbott)
      LEVEL4 ' chmidd (all cells):', real(chmidd),real(hmidd)
      LEVEL4 ' cneigh (all cells):', real(cneigh),real(rneigh)
      LEVEL4 ' cNN (stratification):', real(cNN)
      LEVEL4 ' cSS (shear):', real(cSS)
      LEVEL4 ' wfiltvert (vertical diffusion filter):', real(wfiltvert),int(nfiltvert)
      LEVEL4 ' wfilthorz (horizontal diffusion filter):', real(wfilthorz)
      LEVEL4 ' split [dtgrid=dt/split]:',split
! For bottom and surface cell limiters (tendencies), we will save 
! them as a field (once and for all). This opens for later reading 
! the values as fields as part of the config.
      if (abs(hsurf).lt.cepsi) then
         hmaxm1_surf(:,:)=_ONE_ ! This is only to avoid division-by-zero later on
      elseif (hsurf.gt._ZERO_) then
         ! Direct size [m] given:
         hmaxm1_surf(:,:)=_ONE_/hsurf
      else
         ! <0: So relative to average cell size (for that depth):
         do j=jmin-HALO,jmax+HALO
            do i=imin-HALO,imax+HALO
               if ( az(i,j).ge.1 ) then
                  !  Average cell height is havg=H/kmax (really, it is Dn/kmax)
                  !  hmax [cell height limiter] is havg*hsurf=hsurf*H/kmax
                  !  and the inverse (1/) is then
                  hmaxm1_surf(i,j)=  kmax/(-hsurf*H(i,j))
               else
                  hmaxm1_surf(i,j)= _ONE_ ! Just a non-problematic land value
               end if
            end do
         end do
      end if
      ! Repeat for bottom and domain interior (midd)
      if (abs(hbott).lt.cepsi) then
         hmaxm1_bott(:,:)=_ONE_
      elseif (hbott.gt._ZERO_) then
         hmaxm1_bott(:,:)=_ONE_/hbott
      else
         do j=jmin-HALO,jmax+HALO
            do i=imin-HALO,imax+HALO
               if ( az(i,j).ge.1 ) then
                  hmaxm1_bott(i,j)=  kmax/(-hbott*H(i,j))
               else
                  hmaxm1_bott(i,j)= _ONE_
               end if
            end do
         end do
      end if
      if (abs(hmidd).lt.cepsi) then
         hmaxm1_midd(:,:)=_ONE_
      elseif (hmidd.gt._ZERO_) then
         hmaxm1_midd(:,:)=_ONE_/hmidd
      else
         do j=jmin-HALO,jmax+HALO
            do i=imin-HALO,imax+HALO
               if ( az(i,j).ge.1 ) then
                  hmaxm1_midd(i,j)=  kmax/(-hmidd*H(i,j))
               else
                  hmaxm1_midd(i,j)= _ONE_
               end if
            end do
         end do
      end if
!
! In the case of no-baroclinic, the nnloc will never be updated,
! so we initialize it here. Same for avn: Both just remain zero.
      NNloc(:) = _ZERO_
      avn(:)   = _ZERO_
      if (.not. restart_with_hn) then
         ! Change initials to use "background" gvc
         if (hotstart) then
            if ( cgvc .ge. csig+cepsi ) then
               LEVEL2 'WARNING: assume gvc coordinates for hn'
            else
               LEVEL2 'WARNING: assume uniform sigma coordinates for hn'
            end if
         end if
         if ( cgvc .ge. csig+cepsi ) then
            call avc6_gvc(hn)
         else
            do j=jmin-HALO,jmax+HALO
               do i=imin-HALO,imax+HALO
                  if (az(i,j) .ge. 1) then
                     hn(i,j,:) = Dn(i,j) * kmaxm1
                  end if
               end do
            end do
         end if
      end if
!     If this is not a hotstart, or if it is a really-old hostart file type, 
!     then we do not have ho. We will then assume that it is the same as hn
!     (either from hotstart of computed previously)
      if ( (.not. hotstart) .or. (.not. restart_with_ho) ) then
         LEVEL2 'WARNING: assume ho=hn'
         ho=hn
         call hcheck(ho,sseo+H,az)
      end if

      kmin=1
      kumin=1
      kvmin=1
      kmin_pmz=1
      kumin_pmz=1
      kvmin_pmz=1

      ! possibly useful:
      !tfac_hor=dt*kmax/tgrid
      !LEVEL2 "horizontal time scale:",tfac_hor
      !
      ! used in Hofmeister et al. 2010:
      !tfac_hor=_ONE_

! Surface and bottom cell zooming effects - precompute dropoff away from surface.
      !  Let the effect drop exponentially with #cells from the surface.
      !  We could have a namelist parameter to stat *how* fast, but for now 
      !  just hardcode it as 2/3 (anything ~ [0.5:1[ could be used)
      wcolsurf(0)   =_ZERO_
      wcolsurf(kmax)=_ONE_
      do k=kmax-1,1,-1
         wcolsurf(k)=wcolsurf(k+1)*hwallmult
      end do
      STDERR 'AVC6 wcs:',wcolsurf
      ! Same for bottom, except go from bottom and upwards
      wcolbott(0)=_ZERO_
      wcolbott(1)=_ONE_
      do k=2,kmax
         wcolbott(k)=wcolbott(k-1)*hwallmult
      end do
! TODO: BJB 2021-04: Maybe we should compute Dgrid even for first call.
!       It may be used for output already at t=0.
   else !not first

! TODO BJB 2021-03: Add a "lagrangian target" grid. Also to be relaxed against.

! TODO/COMMENT: bjb 2021-03: All grid weights will be computed on "previous" (old) grid,
!                            and will be "joined" to new grid. Thus, we do not need to 
!                            "interpolate" value from old to new grids in this step,
!                            especially, as the "new" grid is not known.
!
! BJB 2021-03: Removed mhor loop - it is hardcoded to one repetition anyway
!     TODO: Need to re-apply some kind of lateral/horizontal filtering (on Dgrid at least)
!

!
! PROCESS for grid tendencies (Dgrid):
!  There will be (up to) three big i-j-loop blocks:
!   A: Create Dgrid - column-by-column - and store in 3D array.
!    The following works on Dgridloc (a column of Dgrid):
!     a. Define background value (sigma)
!     b. Add GVC tendency
!     c. Add "mid-column" zooming (punish relatively big cells).
!     d. Add surface- and bottom-cell zooming tendencies
!     e. Add NN and SS tendencies
!     f. Reduce if cells are already appraching "too small"
!     g. Store in Dgrid
!   B: Apply lateral (i,j) filtering.
!   C: Apply vertical filter
!      Solve (column-by-column) for target z-values.
!
      kmaxm1  = _ONE_/kmax
      tgridm1 = _ONE_/tgrid
      rneighm1= _ONE_/rneigh
#ifndef NO_BAROCLINIC
      sNN=rho_0/(d_dens*g)
#endif
      sSS = _ONE_/d_vel
      hminm1=_ONE_/hmin
!  For filtering the vertical, we have always two neighbors, as 
!  we will not modify the surface or bottom values for Dgrid.
      wfiltneigh=_HALF_*wfiltvert
      wfiltcentr=_ONE_-wfiltvert
!
!  All the temporal (and constant) multiplier for Dgrid can be computed just once 
!  and reused for each column (and each element in the column).
!     TODO: Add correct reference for this eq.
!  Note that the _TWO_ part in this eq is to get the right exponential time scale 
!   (=tgrid) when Dgridloc~0.5.
       tmult = _TWO_/tgrid
!
! DGRID LOOP A:
      do j=jmin+1-HALO,jmax-1+HALO
         do i=imin+1-HALO,imax-1+HALO
            if (az(i,j) .lt. 1) then
               ! Skip to next column.
               cycle
            end if
!      Quick way out for shallow areas.
!      If the total water depth Dn(i,j) is less than kmax*hmin, then the solution 
!      will be a squeezing of the cells to h(:)=Dn/kmax and the grid diffusion 
!      (needed only for filtering and output) will be Dgrid=chmin.
!      NOTE: Enabling this could potentially introduce lateral inconsistencies/
!        discontinuities in Dgrid and hn
if ( .false. ) then
            if ( Dn(i,j) .le. kmax*hmin ) then
               !Dgridloc(:) = chmin ! We dont really need the col-vector here
               Dgrid(i,j,:)= tmult*chmin
               cycle
            end if
end if
!      Precompute 1/H and 1/Dn, as we will use it quite a few times in the following:
            Hm1   = _ONE_/H(i,j)
            Dnm1  = _ONE_/Dn(i,j)
            havg  = Dn(i,j)*kmaxm1
            chaux = Dn(i,j)/(sseo(i,j)+H(i,j)) ! = (ssen+H)/(sseo+H)
!      Fetch ho for this column to array for speed and shorthand notation.
!      (Just replace hauxloc(k) below with ho(i,j,k) to go back)
            hauxloc(:)=chaux*ho(i,j,:)
!      a.
!         Setup background weights for the vertical adaptation.
!         Initial is just a constant - it will give a tendency towards uniform (sigma) 
!         coordinates. Note that this value may - in many cases - be zero.
!         If csig>0, then this will push towards a uniform distribution.
!         It should be noted, that this "drive" may be significantly slower than
!         other effects, which react on *local* cell sizes.
!         Will this even change a distribution? 
!           Or should we use something like (ho/htgt-_HALF_)**pow like the wall tendencies?
            Dgridloc(:) = csig
!
!      b.
!         Tendency toward "classical" GVC with ddu,ddl zooming.
!         The tridiag Dgrid*h solution is (roughly) inversely proportional to Dgrid,
!         so we set the Dgrid values proportional to 1/target h.
!         The *average* cell thickness is HHo/kmax = HHo*kmaxm1
!         For classical gvc, the *minimum* gvc target cell thickness is presumably 
!         always at the surface, so we may use that as scaling to hit unity as
!         max (before the cgvc coeff).
!         Note: It is possible to implement a diffusion according to eg (ho(k)/htgt(k) - C)**pow
!               just as for eg the wall-tendencies below
            if ( cgvc .gt. cepsi ) then
               call avc6_gvc_col(i,j,gvcloc) ! gvcloc is now target gvc h-values.
               do k=1,kmax
                  Dgridloc(k)=Dgridloc(k) + cgvc*(gvcloc(kmax)/gvcloc(k)) !*Hm1
               end do
            end if
! 
!      c. Keep cells from growing arbitrarily large, ie. increase resolution for 
!         relatively large cells.
!          1. Only work on cells larger than the average cell size (of the column),
!             so zero effect for smaller-than-average cells.
!          2. "Full effect" from reference cell size (hmax_midd)
!         Use pre-computed href^-1(i,j) for speed.
            if ( chmidd .gt. cepsi) then
               do k=1,kmax
                  ! Alt 1:
                  !wwh = chmidd * MIN(_ONE_,( (hauxloc(k)*hmaxm1_midd(i,j))**hpow))
                  ! Alt 2:
                  !hrel=hauxloc(k)*hmaxm1_midd(i,j) ! Relative cell size. Unity is target.
                  ! Alt 3:
                  !hrel=hauxloc(k)/havg-_ONE_ ! /havg ! *hmaxm1_midd(i,j) ! Relative cell size. Unity is target.
                  ! Alt 4:
                  !hrel=hauxloc(k)*hmaxm1_midd(i,j) - _HALF_
                  ! Alt 5 - test 2021-05-04:
                  !hrel=hauxloc(k)*hmaxm1_midd(i,j)-0.2
                  ! Alt 6 - test 2021-05-05:
                  !    hrel linearly goes from zero to unity when ho goes from havg to hmax.
                  !    So with this approach the "large-cell limiter" is only effective for 
                  !    cells larger than the average.
                  !    TODO: If we go with this, then we need to not have hmax**-1 stored
                  !    TODO: Must ensure that hmax_midd>havg, or this will not work.
                  !hrel= (hauxloc(k)-havg)/(_ONE_/hmaxm1_midd(i,j) - havg)
                  hrel= hauxloc(k)*hmaxm1_midd(i,j) - _HALF_
                  !
                  if     ( hrel .le. _ZERO_ ) then
                     wwh = _ZERO_
                  elseif ( hrel .ge. _ONE_  ) then
                     wwh = chmidd
                  else
                     wwh = chmidd * (hrel**hpow)
                  end if
                  Dgridloc(k)=Dgridloc(k) + wwh
               end do
            end if
!
!      d. 
!       Tendency toward surface zooming (trying to keep surface cell of size hsurf).
!          The idea here is that if ho is too large, then the coef should be large,
!          but if ho is already small, then the coef may -> 0
!
            if ( chsurf .gt. cepsi ) then
              ! Surface-layer scaling. 
               !   chsurf * ( ho / hsurf )**pow
               !  TODO: For speed we should precompute and store 1/hmaxm1_surf as array.
               ! Alt 1:
               !wwh = chsurf * MIN(_ONE_,( (hauxloc(kmax)*hmaxm1_surf(i,j))**hpow)) !*Hm1
               ! Alt 2:
               !   Relative cell size. Unity is target. 
               !   Shift by half to not affect small cells at all
               hrel=hauxloc(1)*hmaxm1_surf(i,j)-_HALF_ 
               if     ( hrel .le. _ZERO_ ) then
                  wwh = _ZERO_
               elseif ( hrel .ge. _ONE_  ) then
                  wwh = chsurf
               else
                  wwh = chsurf * (hrel**hpow)
               !   wwh = chsurf * hrel
               end if
               !  Now combine the scale (2d) with the pre-computed column decay and add to D.
               do k=1,kmax
                  Dgridloc(k)=Dgridloc(k) + wwh*wcolsurf(k)
               end do
            end if
!
!       Tendency toward bottom zooming (trying to keep bottom cell of size hbott).
!       This is basically, a repetition of chsurf,hsurf, only from bottom-up.
            if ( chbott .gt. cepsi ) then
               hrel=hauxloc(1)*hmaxm1_bott(i,j)-_HALF_
               if     ( hrel .le. _ZERO_ ) then
                  wwh = _ZERO_
               elseif ( hrel .ge. _ONE_ ) then
                  wwh = chbott
               else
                  wwh = chbott * (hrel**hpow)
               end if
               !  Now combine the scale (2d) with the pre-computed column decay and add to D.
               do k=1,kmax
                  Dgridloc(k)=Dgridloc(k) + wwh*wcolbott(k)
               end do
               ! Alt 3 (1/(d+d0)):
               !   This goes "pretty deep" and is difficult to control wrt the target thickness.
               !   especially, as it will forever try to decrease the cell size.
               !wbott = -_HALF_ * hauxloc(1) ! cell dist from bottm. Lowest cell
               !do k=1,kmax
               !   wbott = wbott+hauxloc(k) ! Update distace-to-wall
               !   !     H/(d+hmax) = (H/hmax)/(d/hmax+1)=H*hmaxm1/(1+d*hmaxm1)
               !   wwh=MIN(_ONE_,kmaxm1*H(i,j)*hmaxm1_bott(i,j)/(_ONE_+wbott*hmaxm1_bott(i,j)) )
               !   Dgridloc(k)=Dgridloc(k) + chbott*wwh
               !end do
            end if
!
!   Neighbor-cell size-ratio limiter.
!     This effect will limit the size-ratio between neighboring cells in the column.
!     For each cell, we will find the ratio to the smallest neighbor, and then compute 
!     a diffusion-coefficient part to inhibit large cells with small neighbours.
            if ( cneigh .gt. cepsi ) then ! TODO: Should be something with eg cneigh
               ! rneigh could be max growth eg .25 for 25%.
               do k=1,kmax
                  ! First compute the relative size between self and smallest neighbor:
                  if      ( k==1    ) then
                     hrel=hauxloc(1)/hauxloc(2)
                  else if ( k==kmax ) then
                     hrel=hauxloc(kmax)/hauxloc(kmax-1)
                  else
                     hrel=hauxloc(k)/MIN(hauxloc(k-1),hauxloc(k+1))
                  end if
                  ! Shift by unity (so equal-sized have zero ratio) and eliminate negatives 
                  ! (no added diffusion if I am smaller than neighbor):
                  hrel=MAX(_ZERO_, hrel-_ONE_)
                  ! Finally, scale by max size-ratio.
                  wwh=MIN(_ONE_,hrel*rneighm1 )
                  ! TODO: Could apply power if necessary?
                  wwh = cneigh * (wwh**hpow)
                  Dgridloc(k)=Dgridloc(k) + wwh
               end do
            end if
!
!      e. Bouyancy and shear
#ifndef NO_BAROCLINIC
!              Stratification
!               Based on bouyancy frequency, avg or max over cell interfaces
!            Note: Compared to "avc5" the length-scale is here - and inside 
!            the min/max limiter.
!            Length scale may be either H(i,j), Dn(i,j) or Dn(i,j)/kmax.
!            But it should *not* be ho(i,j,k), as that will punish small cells.
            if ( cNN .gt. cepsi ) then
               scal = (Dn(i,j)*kmaxm1) * sNN
               NNloc=NN(i,j,:)
               NNloc(kmax)=NNloc(kmax-1)
               NNloc(0)=NNloc(1)
               ! TODO: The two k-loops could be consolidated into one,
               !       thus eliminating the avn-vector.
               do k=1,kmax
                  avn(k)=min(_ONE_,scal*max(_ZERO_,_HALF_*(NNloc(k)+NNloc(k-1)) ))
               end do
               do k=1,kmax
                  Dgridloc(k)=Dgridloc(k)+cNN*avn(k)
               end do
            end if
#endif
!              Shear
!               Based on shear frequency, avg or max over cell interfaces
            if ( cSS .gt. cepsi ) then
               scal = (Dn(i,j)*kmaxm1) * sSS
               SSloc=SS(i,j,:)
               SSloc(kmax)=SSloc(kmax-1)
               SSloc(0)=SSloc(1)
               do k=1,kmax
                  avs(k)=min(_ONE_,scal*sqrt(max(_ZERO_,_HALF_*(SSloc(k)+SSloc(k-1)))))
               end do
               do k=1,kmax
                  Dgridloc(k)=Dgridloc(k)+cSS*avs(k)
               end do
            end if
! 
!      f. Small-cell limiter:
!          Examine ho and put compute a limiter for cells getting close
!          to the minimum size. The idea here is to compute a factor to 
!          *multiply* onto each Dgrid-element. Normally, the factor will 
!          be unity, but it could be closer to zero if a cell is 
!          already "approaching too fine". 
!          Basically, if ho=hmin (or smaller!), then Dgrid will be 
!          multiplied by something small (zero or 1/10).
!          From there the factor increases rapidly/linearly (the first 
!          coefficient below, ie _TWO_), so that we reach unity (_ONE_) 
!          when ho=1.5*hmin. 
!          An extra fuzz parameter (namelist parameter) could be implemented 
!          to control the steepness of the cut-off.
!          In essence, this step can switch off all the previous addends
!          (tendencies) if the cells get "too small".
            do k=1,kmax
               wrk0=_TWO_*(hauxloc(k)*hminm1-_ONE_)
               wrk0=max(_ZERO_,min(_ONE_,wrk0)) ! ? Try _ZERO_?
               !wrk0=max(_TENTH_,min(_ONE_,wrk0))
               Dgridloc(k)= wrk0 * Dgridloc(k)
            end do
!
!      *. Shallow-water effect. Add something which is relatively large and 
!         constant over depth in shallow waters.
!          Maximum effect from D=kmax*hmin
!          Turn off at 1.5x that depth.
!         Note: The alternative to this is to simply force h:=hmin whereever h<hmin
!               and then rescale the entire column.
            hrel= _TWO_ * (hmin/havg - _ONE_)
            if      ( hrel .lt. _ZERO_ ) then
               wwh = _ZERO_
            else if ( hrel .gt. _ONE_  ) then
               wwh = chmin
            else
               wwh = hrel*chmin
            end if
            do k=1,kmax
               Dgridloc(k)= Dgridloc(k) + wwh
            end do
!
!      g. Apply vertical filter aka grid diffusivity.
!         For the filters, (1-w) is kept from the central point, and the 
!         remaining is taken from the neighbours.
            if ( (nfiltvert .gt. 0) .and. (wfiltvert.gt.cepsi) ) then
               do ifilt=1,nfiltvert
                  wrk1da(0)   = Dgridloc(0)
                  wrk1da(1)   = Dgridloc(1)
                  wrk1da(kmax)= Dgridloc(kmax)
                  do k=2,kmax-1
                     wrk1da(k)=wfiltneigh*Dgridloc(k-1)+wfiltcentr*Dgridloc(k)+wfiltneigh*Dgridloc(k+1)
                  end do
                  Dgridloc(:)=wrk1da(:)
               end do
            end if
!
!       Multiply by (2/tgrid) - so unit will be 1/s.
!       The x2 is to ensure that we hit the correct exponential time scale when Dgridloc ~= 0.5.
!         And store in global array
            Dgrid(i,j,:)= tmult*Dgridloc(:)
         end do
      end do
!
!  From here, we work only on the "interior" of the subdomain.
!  HALOs will be exchanged later.
!
! DGRID LOOP B:
!    Horizontal/lateral diffusivity. Filter one layer (k) at a time.
      if (wfilthorz .gt. cepsi ) then
         wfiltcentr=_ONE_-wfilthorz
         wfiltneigh=wfilthorz
         do k=1,kmax
            do j=jmin,jmax
               do i=imin,imax
                  if (az(i,j) .lt. 1) then
                     ! Skip to next column.
                     cycle
                  end if
                  ! Counter - zero for neighbor-is-land, unity otherwise
                  n1=min(1,az(i-1,j))
                  n2=min(1,az(i+1,j))
                  n3=min(1,az(i,j-1))
                  n4=min(1,az(i,j+1))
                  ! Number of neighbors - used for weighting
                  nneighs=n1+n2+n3+n4
                  ! Sum of Dgrid over neighbors. 
                  Dneighs=(n1*Dgrid(i-1,j,k)+n2*Dgrid(i+1,j,k)+n3*Dgrid(i,j-1,k)+n4*Dgrid(i,j+1,k))/nneighs
                  ! Put filtered value in work array (copy back later)
                  wrk2d(i,j) = wfiltcentr*Dgrid(i,j,k)+wfiltneigh*Dneighs
               end do
            end do
            ! Copy data for this layer back into Dgrid:
            do j=jmin,jmax
               do i=imin,imax
                  if (az(i,j) .lt. 1) then
                     ! Skip to next column.
                     cycle
                  end if
                  Dgrid(i,j,k)=wrk2d(i,j)
               end do
            end do
         end do
         ! NOTE: It is possible to apply horizontal filtering more than once, 
         !   but that would require a halo-exchange on Dgrid for all repetitions
         !   except the last. (Or possibly every other iteration, if we loop over imin-1,imax+1).
      end if
!
!     Get pre-3D-step z positions (same as NN, SS) used for rhs [du(k)] below
      call htoz(ho,zposo) ! We may not actually need to do this!
!
! DGRID LOOP C:
!      Quick initialization of bottom layer (consequitive memory, so quick).
!      Then, the rest can just update 1:kmax.
      hn(:,:,0)=_ZERO_
      do j=jmin,jmax
         do i=imin,imax
            if (az(i,j) .lt. 1) then
               ! Skip to next column.
               cycle
            end if
!      Quick way out for shallow areas.
!      If the total water depth Dn(i,j) is less than kmax*hmin, then the solution 
!      will be a squeezing of the cells to h(:)=Dn/kmax:
!      NOTE: Enabling this could potentially introduce lateral inconsistencies/
!        discontinuities in Dgrid and hn
if ( .false. ) then
            if ( Dn(i,j) .le. kmax*hmin ) then
               hn(i,j,1:kmax)= Dn(i,j)*kmaxm1
               cycle
            end if
end if
!
!      Local shorthands for ease:
            Dnm1 = _ONE_/Dn(i,j)
!
!      a. Retrieve local column from global array.
!         Scale by dt ("dtgrid"), and possibly kmax**2 and c=0.01 as per publications [ref needed]
!         This step makes sure that "Dgridloc" is dimensionless, and thus that all elements of the 
!         tridiagonal linear system are dimensionless.
!         (BJB 2021-04: For now we start with dt, and then we may add effect from kmax as necessary)
!            Dgridloc(:)=(dt*kmax*_TENTH_)*Dgrid(i,j,:) ! TODO: Possibly add kmax*kmax/100
            Dgridloc(:)=(dt/split)*Dgrid(i,j,:) ! TestC: Sync w/ tmult
!
!      b. "Copy-down" to avoid problems from k=0:
            Dgridloc(0)=Dgridloc(1)
!
!      d. Apply Dgrid: Setup and solve tri-diagonal for z.
!
! TODO: Multiply by (dtgrid) ! and possibly by *kmax**2/100.
! => TODO: Look at Richards code and apply "Calculation of grid diffusivity",
!          In particular the "[H(i,j)/tgrid]*" and "*dtgrid*kmax**2/100." are missing.
!          [But what is with the kmax**2 and /100?]
!                     aav(k)=H(i,j)/tgrid*(c1ad*avn(k)+c2ad*avs(k)+ &
!                            c3ad*avd(k)+c4ad/H(i,j))
!                     aav(k)=aav(k)*dtgrid*kmax**2/100.
!          With that we go straight to gaa and zpos -> hn
!          But is this different than a scaling? [presumably it is OK, we will see].
!        Make a commit until this point.
! BJB TEST - can se go directly from h [dividing -H:ssen] to gamma [-1:0]
            ! BJB DEBUG There may be a problem so sum(ho) != Dn(i,j) !
            chaux2 = _ONE_ / (sseo(i,j)+H(i,j)) ! aka 1/Do
            gammasrc(0)=-_ONE_
            do k=1,kmax
               !gammasrc(k)=gammasrc(k-1)+hauxloc(k)/hsum !*Dnm1
               !gammasrc(k)=gammasrc(k-1)+ho(i,j,k)/hsum !*Dnm1
               !gammasrc(k)=gammasrc(k-1)+hauxloc(k)*Dnm1
               gammasrc(k)=gammasrc(k-1)+ho(i,j,k)*chaux2
            end do
            gammasrc(kmax)=_ZERO_ ! Optional: Just to get rid of accumulated rounding errors. (We could compute from above and below)
!
!        BJB 2021-04: aav -> Dgridloc
!               aav(0:kmax) => Dgrid(i,j,0:kmax)
            do k=1,kmax-1
               mata(k)=-Dgridloc(k)
               matc(k)=-Dgridloc(k+1)
               matb(k)=_ONE_ +Dgridloc(k) +Dgridloc(k+1) ! same as  -mata(k)-matc(k)
               !rhs(k)=( zposo(i,j,k)-ssen(i,j))*Dnm1 ! gaa -> gaaold [aka gammasrc]
               rhs(k)=gammasrc(k)
            end do
            matc(0)=_ZERO_
            matb(0)=_ONE_
            rhs(0)=-_ONE_
            matb(kmax)=_ONE_
            mata(kmax)=_ZERO_
            rhs(kmax)=_ZERO_
            do isplit=1,split
               call getm_tridiagonal(kmax,0,kmax,mata,matb,matc,rhs,gammatgt)
               if ( isplit .lt. split) then
                  rhs(:)=gammatgt(:)
               end if
            end do
            ! TEMPORARY SANITY CHECK
            do k=0,kmax
               if ( gammatgt(k) .lt. -1.0000001 .or. gammatgt(k) .gt. 0.0000001 ) then
                  STDERR 'AVC6: out-of-range Gamma_tgt:',gammatgt(k)
                  STDERR 'AVC6:  i,j,k=',i,j,k
                  STDERR 'Dn:',Dn(i,j),Dnm1
                  STDERR 'hsum:',hsum
                  STDERR 'haux=',hauxloc
                  STDERR 'gammasrc=',gammasrc
                  STDERR 'dgrid=',Dgridloc
                  STDERR 'rhs=',rhs
                  STDERR 'mata=',mata
                  STDERR 'matb=',matb
                  STDERR 'matc=',matc
                  
                  STDERR 'gammatgt=',gammatgt
                  STOP
               end if
            end do
            do k=1,kmax
               if ( gammatgt(k) .le. gammatgt(k-1) ) then
                  STDERR 'AVC6: Invalid gamma seq:',gammatgt(k),gammatgt(k+1)
                  STDERR 'AVC6:  i,j,k=',i,j,k
                  STDERR 'dgrid=',Dgridloc
                  STOP
               end if
            end do
            ! gaa(:) would be the target [inf time] grid for this distribution -
            !    in [-1:0], where -1 is bottom, and 0 is surface(?)
            !ztgtloc(:)   = gammatgt(:)*Dn(i,j)+ssen(i,j)
            htgtloc(0)   = _ZERO_
            !htgtloc(1:kmax) = ztgtloc(1:kmax)-ztgtloc(0:kmax-1)
            htgtloc(1:kmax) = (gammatgt(1:kmax)-gammatgt(0:kmax-1))*Dn(i,j) ! Without intermediate ztgt step
            !ztgt(i,j,:)= gammatgt(:)*Dn(i,j)+ssen(i,j)
! Rescale column in case one or more cells are < hmin.
! This will also ensure that cells tend to uniform distribution in shallower water.
            gotsmall=.false.
            do k=1,kmax
               if ( htgtloc(k) .lt. hmin ) then
                  htgtloc(k)=hmin ! This part could be done with a MAX()
                  gotsmall=.true. ! But not this. Gotsmall allow us to skip next loop in most cases
               end if
            end do
            if ( gotsmall ) then
               hsum=_ZERO_
               do k=1,kmax
                  hsum=hsum+htgtloc(k)
               end do
               scal = Dn(i,j)/hsum
               do k=1,kmax
                  htgtloc(k)=htgtloc(k)*scal
               end do
            end if
            ! Store in global target array for hn:
            hn(i,j,1:kmax)=htgtloc(1:kmax)
            ! Direct conversion to ztgt 3D array is OK for dt/tgrid-scaled diffusion eq.
            ! <BJB DEBUG>
            if ( abs(gammatgt(0)) .gt. 100000*_ONE_ ) then
               LEVEL0 'ERROR: gammatgt out of range',gammatgt(0)
               LEVEL0 gammatgt(:)
               STOP
            end if
            if ( abs(ztgt(i,j,0)) .gt. 100000*_ONE_ ) then
               LEVEL0 'ERROR: ztgt out of range',ztgt(i,j,0)
               LEVEL0 ztgt(i,j,:)
               STOP
            end if
            ! </BJB DEBUG>
!            if (i==50 .and. j==15) then
!               STDERR '>Gam >',gammatgt(kmax-2:kmax)
!               STDERR '>Ztgt>',ztgt(i,j,kmax-2:kmax)
!            end if
            ! BJB 2021-05-04:
            ! Conversion to htgt could be done here on a vertical-local array.
            ! This is particularly useful if we want to make a time-relaxation 
            ! approach rather than solve the diffusion equation on dt/tgrid scale.
         end do
      end do
!
! Convert the "target z" back to "target h" (which is really hn).
      !call ztoh(ztgt,hn,hmin)
!     call hcheck(hn,Dn,az)
!
   end if ! end not-first part
!
   call hcheck(hn,Dn,az)
!
! Finally derive interface grid sizes for uu and vv
! Interface treatment and check

!  Update the halo zones
   call update_3d_halo(hn,hn,az,imin,jmin,imax,jmax,kmax,H_TAG)
   call wait_halo(H_TAG)
   call mirror_bdy_3d(hn,H_TAG)

! uu
   do k=1,kmax
      do j=jmin-HALO,jmax+HALO
         do i=imin-HALO,imax+HALO-1
            hun(i,j,k)=_QUART_*(ho(i,j,k)+ho(i+1,j,k)+hn(i,j,k)+hn(i+1,j,k))
         end do
      end do
   end do

! vv
   do k=1,kmax
      do j=jmin-HALO,jmax+HALO-1
         do i=imin-HALO,imax+HALO
            hvn(i,j,k)=_QUART_*(ho(i,j,k)+ho(i,j+1,k)+hn(i,j,k)+hn(i,j+1,k))
         end do
      end do
   end do

#ifndef _NEW_DAF_
!  KK-TODO: although the layer heights in the center points are consistent
!           with the total water depth, in the present implementation we
!           cannot rely on depth-coinciding layer heights in velocity points
!           (vel_depth_method + min_depth for interface heights)
   call hcheck(hun,Dun,au)
   call hcheck(hvn,Dvn,av)
#endif

!  KK-TODO: do we really need these mirror_bdy's?!
   call mirror_bdy_3d(hun,U_TAG)
   call mirror_bdy_3d(hvn,V_TAG)

!  only for backward compatibility
   if (first) then
      huo=hun
      hvo=hvn
!     KK-TODO: hcheck of h[u|v]o for hotstarts?
      if (.not. hotstart) then
         ho=hn
      end if
   end if

#ifdef DEBUG
   write(debug,*) 'Leaving adaptive_coordinates_6()'
   write(debug,*)
#endif
   return
   end subroutine adaptive_coordinates_6
!EOC

! This routine repeats some of the GVC (coord type=3) computations for use with AVC.
! Preferably, the code could be renamed and put somewhere, where both 
! coordinate types may use it.
! Actual doc to be made.
! BJB 2021-03-26
subroutine avc6_gvc(hfield)
! !USES:
   use domain, only: ddu,ddl,d_gamma,gamma_surf
   use domain, only: imin,imax,jmin,jmax,kmax,H,HU,HV,az,au,av,min_depth
   use variables_3d, only: Dn,sseo
!
!   logical, intent(in)                 :: first
! Output is a 3D field
   REALTYPE, intent(out)               :: hfield(I3DFIELD)
! !LOCAL VARIABLES:
   integer, save   :: Ncall = 0
   integer         :: i,j,k,rc,kk
   REALTYPE        :: alpha
   REALTYPE        :: HH,zz,r,bek
   REALTYPE, save, dimension(:),     allocatable  :: ga,dga,be,sig
   REALTYPE, save, dimension(:,:,:), allocatable  :: gga

   Ncall = Ncall+1
#ifdef DEBUG
   write(debug,*) 'avc6_gvc() # ',Ncall
#endif
   ! Use Ncall rather than "first" to ensure one single initialization
   if (Ncall .eq. 1) then
      allocate(ga(0:kmax),stat=rc)
      if (rc /= 0) stop 'coordinates avc6_gvc: Error allocating (ga)'
      allocate(sig(0:kmax),stat=rc)    ! dimensionless sigma-coordinate
      if (rc /= 0) STOP 'coordinates avc6_gvc: Error allocating (sig)'
      allocate(be(0:kmax),stat=rc)     ! dimensionless beta-coordinate
      if (rc /= 0) STOP 'coordinates avc6_gvc: Error allocating (be)'
      allocate(gga(I3DFIELD),stat=rc)  ! dimensionless gamma-coordinate
      if (rc /= 0) stop 'coordinates avc6_gvc: Error allocating memory (gga)'
      !
      do k=0,kmax
         ga(k) = k
      end do
      !
      sig(   0) = -_ONE_
      sig(kmax) =  _ZERO_
      do k=1,kmax-1
         sig(k)=k*_ONE_/kmax-_ONE_
      end do
      gga(:,:,   0) = -_ONE_
      gga(:,:,kmax) =  _ZERO_

      if (kmax .gt. 1) then
        be(:) = sig(:)
        if (ddu .gt. _ZERO_ .or. ddl .gt. _ZERO_) then
           if (ddu .lt. _ZERO_) ddu=_ZERO_
           if (ddl .lt. _ZERO_) ddl=_ZERO_
           do k=1,kmax-1
             bek=tanh(k*(ddl+ddu)/kmax-ddl)+tanh(ddl)
             be(k)=bek/(tanh(ddl)+tanh(ddu))-_ONE_
           end do
        end if
     end if
   end if ! End if first / Ncall==1
!
! Everything else depends on water depth, so needs recomputation at every time step.
   if (kmax .gt. 1) then
      if (gamma_surf) then
         kk=kmax
      else
         kk=1
      end if
      do j=jmin-HALO,jmax+HALO
         do i=imin-HALO,imax+HALO
            HH=max(sseo(i,j)+H(i,j),min_depth)
            alpha=min(&
                     ((be(kk)-be(kk-1))-D_gamma/HH&
                      *(sig(kk)-sig(kk-1)))&
                      /((be(kk)-be(kk-1))-(sig(kk)-sig(kk-1))),_ONE_)
            do k=1,kmax-1
               gga(i,j,k)=alpha*sig(k)+(1.-alpha)*be(k)
               if (gga(i,j,k) .lt. gga(i,j,k-1)) then
                  STDERR kk,(be(kk)-be(kk-1)),(sig(kk)-sig(kk-1))
                  STDERR D_gamma,HH
                  STDERR alpha
                  STDERR k-1,gga(i,j,k-1),be(k-1),sig(k-1)
                  STDERR k,gga(i,j,k),be(k),sig(k)
                  stop 'coordinates'
               end if
            end do
         end do
      end do
   end if ! End kmax.gt.1
!
!  Here, the (initial) layer distribution is calculated.
   do k=1,kmax
      do j=jmin-HALO,jmax+HALO
         do i=imin-HALO,imax+HALO
            HH=max(Dn(i,j),min_depth)
            hfield(i,j,k)=HH*(gga(i,j,k)-gga(i,j,k-1))
         end do
      end do
   end do
end subroutine avc6_gvc

! This computes the GVC-coordinates (coord type=3), but only for a single 
! column, given by i,j. 
! Actual doc to be made.
! BJB 2021-04-07
subroutine avc6_gvc_col(i,j,hcol)
! !USES:
   use domain, only: ddu,ddl,d_gamma,gamma_surf
   use domain, only: imin,imax,jmin,jmax,kmax,H,HU,HV,az,au,av,min_depth
   use variables_3d, only: Dn,sseo
!
!   logical, intent(in)                 :: first
   integer, intent(in)                 :: i,j
! Output is a sincle column
   REALTYPE, intent(out)               :: hcol(0:kmax)
! !LOCAL VARIABLES:
   integer, save   :: Ncall = 0
   integer         :: k,rc,kk
   REALTYPE        :: alpha
   REALTYPE        :: HH,zz,r,bek
   REALTYPE, save, dimension(:), allocatable  :: ga,dga,be,sig
   REALTYPE, save, dimension(:), allocatable  :: gga

   Ncall = Ncall+1
#ifdef DEBUG
   write(debug,*) 'avc6_gvc() # ',Ncall
#endif
   ! Use Ncall rather than "first" to ensure one single initialization
   if (Ncall .eq. 1) then
      allocate(ga(0:kmax),stat=rc)
      if (rc /= 0) stop 'coordinates avc6_gvc: Error allocating (ga)'
      allocate(sig(0:kmax),stat=rc)  ! dimensionless sigma-coordinate
      if (rc /= 0) STOP 'coordinates avc6_gvc: Error allocating (sig)'
      allocate(be(0:kmax),stat=rc)   ! dimensionless beta-coordinate
      if (rc /= 0) STOP 'coordinates avc6_gvc: Error allocating (be)'
      allocate(gga(0:kmax),stat=rc)  ! dimensionless gamma-coordinate
      if (rc /= 0) stop 'coordinates avc6_gvc: Error allocating memory (gga)'
      !
      do k=0,kmax
         ga(k) = k
      end do
      !
      sig(   0) = -_ONE_
      sig(kmax) =  _ZERO_
      do k=1,kmax-1
         sig(k)=k*_ONE_/kmax-_ONE_
      end do
      gga(   0) = -_ONE_
      gga(kmax) =  _ZERO_

      if (kmax .gt. 1) then
        be(:) = sig(:)
        if (ddu .gt. _ZERO_ .or. ddl .gt. _ZERO_) then
           if (ddu .lt. _ZERO_) ddu=_ZERO_
           if (ddl .lt. _ZERO_) ddl=_ZERO_
           do k=1,kmax-1
             bek=tanh(k*(ddl+ddu)/kmax-ddl)+tanh(ddl)
             be(k)=bek/(tanh(ddl)+tanh(ddu))-_ONE_
           end do
        end if
     end if
   end if ! End if first / Ncall==1
!
! Everything else depends on water depth, so needs recomputation at every time step.
   if (kmax .gt. 1) then
      if (gamma_surf) then
         kk=kmax
      else
         kk=1
      end if
      ! TODO: We could have an assert / sanity check on i,j
      !do j=jmin-HALO,jmax+HALO
      !do i=imin-HALO,imax+HALO
      HH=max(sseo(i,j)+H(i,j),min_depth)
      alpha=min(&
           ((be(kk)-be(kk-1))-D_gamma/HH&
           *(sig(kk)-sig(kk-1)))&
           /((be(kk)-be(kk-1))-(sig(kk)-sig(kk-1))),_ONE_)
      do k=1,kmax-1
         gga(k)=alpha*sig(k)+(1.-alpha)*be(k)
         if (gga(k) .lt. gga(k-1)) then
            STDERR kk,(be(kk)-be(kk-1)),(sig(kk)-sig(kk-1))
            STDERR D_gamma,HH
            STDERR alpha
            STDERR k-1,gga(k-1),be(k-1),sig(k-1)
            STDERR k,gga(k),be(k),sig(k)
            stop 'avc6 gvc_col'
         end if
      end do
   end if ! End kmax.gt.1
!
!  Here, the (initial) layer distribution is calculated.
   do k=1,kmax
      HH=max(Dn(i,j),min_depth)
      hcol(k)=HH*(gga(k)-gga(k-1))
   end do

end subroutine avc6_gvc_col

! This temporary debug helper routine will just dump min,max and positions of a 
! given array
! BJB 2021-03-31
subroutine avc6_showminmax(hfield)
  use domain, only: imin,imax,jmin,jmax,kmax,az
  REALTYPE, intent(in)               :: hfield(I3DFIELD)
  REALTYPE        :: hmin,hmax
  integer :: i,j,k
  integer :: ip_min,jp_min,kp_min
  integer :: ip_max,jp_max,kp_max
  hmin=9999*_ONE_
  hmax=-9999*_ONE_
  do k=1,kmax
     do j=jmin+1-HALO,jmax-1+HALO
        do i=imin+1-HALO,imax-1+HALO
           if (az(i,j) .ge. 1) then
              if ( hfield(i,j,k) .lt. hmin ) then
                 ip_min=i
                 jp_min=j
                 kp_min=k
                 hmin=hfield(i,j,k)
              end if
              if ( hfield(i,j,k) .gt. hmax ) then
                 ip_max=i
                 jp_max=j
                 kp_max=k
                 hmax=hfield(i,j,k)
              end if
           end if
        end do
     end do
  end do
  STDERR 'adaptive_coordinates_6() hmin',ip_min,jp_min,kp_min,hmin
  STDERR 'adaptive_coordinates_6() hmax',ip_max,jp_max,kp_max,hmax
end subroutine avc6_showminmax


!-----------------------------------------------------------------------
! Copyright (C) 2007 - Hans Burchard and Karsten Bolding               !
!-----------------------------------------------------------------------
