#ifndef AMREX_MLEBABECLAP_3D_K_H_
#define AMREX_MLEBABECLAP_3D_K_H_
#include <AMReX_Config.H>
#include <AMReX_REAL.H>

#include <AMReX_EB_LeastSquares_3D_K.H>

namespace amrex {

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void mlebabeclap_adotx_centroid (Box const& box, Array4<Real> const& y,
                        Array4<Real const> const& x, Array4<Real const> const& a,
                        Array4<Real const> const& bX, Array4<Real const> const& bY,
                        Array4<Real const> const& bZ,
                        Array4<EBCellFlag const> const& flag,
                        Array4<Real const> const& vfrc, Array4<Real const> const& apx,
                        Array4<Real const> const& apy, Array4<Real const> const& apz,
                        Array4<Real const> const& fcx, Array4<Real const> const& fcy,
                        Array4<Real const> const& fcz,
                        Array4<Real const> const& ccent, Array4<Real const> const& ba,
                        Array4<Real const> const& bcent, Array4<Real const> const& beb,
                        Array4<Real const> const& phieb,
                        const int& domlo_x, const int& domlo_y, const int& domlo_z,
                        const int& domhi_x, const int& domhi_y, const int& domhi_z,
                        const bool& on_x_face, const bool& on_y_face, const bool& on_z_face,
                        bool is_eb_dirichlet, bool is_eb_inhomog,
                        GpuArray<Real,AMREX_SPACEDIM> const& dxinv,
                        Real alpha, Real beta, int ncomp) noexcept
{
    Real dhx = beta*dxinv[0]*dxinv[0];
    Real dhy = beta*dxinv[1]*dxinv[1];
    Real dhz = beta*dxinv[2]*dxinv[2];

    amrex::Loop(box, ncomp, [=] (int i, int j, int k, int n) noexcept
    {
        if (flag(i,j,k).isCovered())
        {
            y(i,j,k,n) = Real(0.0);
        }
        else if (flag(i,j,k).isRegular() &&
                 ((flag(i-1,j  ,k  ).isRegular() && flag(i+1,j  ,k  ).isRegular() &&
                  flag(i  ,j-1,k  ).isRegular() && flag(i  ,j+1,k  ).isRegular() &&
                  flag(i  ,j  ,k-1).isRegular() && flag(i  ,j  ,k+1).isRegular()) ))
        {
            y(i,j,k,n) = alpha*a(i,j,k)*x(i,j,k,n)
                - dhx * (bX(i+1,j,k,n)*(x(i+1,j,k,n) - x(i  ,j,k,n))
                        -bX(i  ,j,k,n)*(x(i  ,j,k,n) - x(i-1,j,k,n)))
                - dhy * (bY(i,j+1,k,n)*(x(i,j+1,k,n) - x(i,j  ,k,n))
                        -bY(i,j  ,k,n)*(x(i,j  ,k,n) - x(i,j-1,k,n)))
                - dhz * (bZ(i,j,k+1,n)*(x(i,j,k+1,n) - x(i,j,k  ,n))
                        -bZ(i,j,k  ,n)*(x(i,j,k  ,n) - x(i,j,k-1,n)));
        }
        else
        {
            Real kappa = vfrc(i,j,k);
            Real apxm = apx(i,j,k);
            Real apxp = apx(i+1,j,k);
            Real apym = apy(i,j,k);
            Real apyp = apy(i,j+1,k);
            Real apzm = apz(i,j,k);
            Real apzp = apz(i,j,k+1);

            // First get EB-aware slope that doesn't know about extdir
            bool needs_bdry_stencil = (i <= domlo_x) || (i >= domhi_x) ||
                                      (j <= domlo_y) || (j >= domhi_y) ||
                                      (k <= domlo_z) || (k >= domhi_z);

            Real fxm = bX(i,j,k,n)*(x(i,j,k,n) - x(i-1,j,k,n));
            if ( (apxm != Real(0.0)) && (vfrc(i,j,k) != Real(1.0) || vfrc(i-1,j,k) != Real(1.0) || vfrc(i+1,j,k) != Real(1.0)) ) {
                Real yloc_on_xface = fcx(i,j,k,0);
                Real zloc_on_xface = fcx(i,j,k,1);

                if(needs_bdry_stencil) {
                  fxm = grad_x_of_phi_on_centroids_extdir(i,j,k,n,x,phieb,flag,ccent,bcent,vfrc,
                                                          yloc_on_xface,zloc_on_xface,
                                                          is_eb_dirichlet,is_eb_inhomog,
                                                          on_x_face,domlo_x,domhi_x,
                                                          on_y_face,domlo_y,domhi_y,
                                                          on_z_face,domlo_z,domhi_z);
                } else {
                  fxm = grad_x_of_phi_on_centroids(i,j,k,n,x,phieb,flag,ccent,bcent,
                                                 yloc_on_xface,zloc_on_xface,is_eb_dirichlet,is_eb_inhomog);
                }

                fxm *= bX(i,j,k,n);
            }

            Real fxp = bX(i+1,j,k,n)*(x(i+1,j,k,n) - x(i,j,k,n));
            if ( (apxp != Real(0.0)) && (vfrc(i,j,k) != Real(1.0) || vfrc(i+1,j,k) != Real(1.0) || vfrc(i-1,j,k) != Real(1.0)) ) {
                Real yloc_on_xface = fcx(i+1,j,k,0);
                Real zloc_on_xface = fcx(i+1,j,k,1);

                if(needs_bdry_stencil) {
                  fxp = grad_x_of_phi_on_centroids_extdir(i+1,j,k,n,x,phieb,flag,ccent,bcent,vfrc,
                                                          yloc_on_xface,zloc_on_xface,
                                                          is_eb_dirichlet,is_eb_inhomog,
                                                          on_x_face,domlo_x,domhi_x,
                                                          on_y_face,domlo_y,domhi_y,
                                                          on_z_face,domlo_z,domhi_z);
                } else {
                  fxp = grad_x_of_phi_on_centroids(i+1,j,k,n,x,phieb,flag,ccent,bcent,
                                                 yloc_on_xface,zloc_on_xface,is_eb_dirichlet,is_eb_inhomog);
                }

                fxp *= bX(i+1,j,k,n);
            }

            Real fym = bY(i,j,k,n)*(x(i,j,k,n) - x(i,j-1,k,n));
            if ( (apym != Real(0.0)) && (vfrc(i,j,k) != Real(1.0) || vfrc(i,j-1,k) != Real(1.0) || vfrc(i,j+1,k) != Real(1.0)) ) {
                Real xloc_on_yface = fcy(i,j,k,0);
                Real zloc_on_yface = fcy(i,j,k,1);

                if(needs_bdry_stencil) {
                  fym = grad_y_of_phi_on_centroids_extdir(i,j,k,n,x,phieb,flag,ccent,bcent,vfrc,
                                                          xloc_on_yface,zloc_on_yface,
                                                          is_eb_dirichlet,is_eb_inhomog,
                                                          on_x_face,domlo_x,domhi_x,
                                                          on_y_face,domlo_y,domhi_y,
                                                          on_z_face,domlo_z,domhi_z);
                } else {
                  fym = grad_y_of_phi_on_centroids(i,j,k,n,x,phieb,flag,ccent,bcent,
                                                 xloc_on_yface,zloc_on_yface,is_eb_dirichlet,is_eb_inhomog);
                }

                fym *= bY(i,j,k,n);
            }

            Real fyp = bY(i,j+1,k,n)*(x(i,j+1,k,n) - x(i,j,k,n));
            if ( (apyp != Real(0.0)) && (vfrc(i,j,k) != Real(1.0) || vfrc(i,j+1,k) != Real(1.0) || vfrc(i,j-1,k) != Real(1.0)) ) {
                Real xloc_on_yface = fcy(i,j+1,k,0);
                Real zloc_on_yface = fcy(i,j+1,k,1);

                if(needs_bdry_stencil) {
                  fyp = grad_y_of_phi_on_centroids_extdir(i,j+1,k,n,x,phieb,flag,ccent,bcent,vfrc,
                                                          xloc_on_yface,zloc_on_yface,
                                                          is_eb_dirichlet,is_eb_inhomog,
                                                          on_x_face,domlo_x,domhi_x,
                                                          on_y_face,domlo_y,domhi_y,
                                                          on_z_face,domlo_z,domhi_z);
                } else {
                  fyp = grad_y_of_phi_on_centroids(i,j+1,k,n,x,phieb,flag,ccent,bcent,
                                                 xloc_on_yface,zloc_on_yface,is_eb_dirichlet,is_eb_inhomog);
                }

                fyp *= bY(i,j+1,k,n);
            }

            Real fzm = bZ(i,j,k,n)*(x(i,j,k,n) - x(i,j,k-1,n));
            if ( (apzm != Real(0.0)) && (vfrc(i,j,k) != Real(1.0) || vfrc(i,j,k-1) != Real(1.0) || vfrc(i,j,k+1) != Real(1.0)) ) {
                Real xloc_on_zface = fcz(i,j,k,0);
                Real yloc_on_zface = fcz(i,j,k,1);

                if(needs_bdry_stencil) {
                  fzm = grad_z_of_phi_on_centroids_extdir(i,j,k,n,x,phieb,flag,ccent,bcent,vfrc,
                                                          xloc_on_zface,yloc_on_zface,
                                                          is_eb_dirichlet,is_eb_inhomog,
                                                          on_x_face,domlo_x,domhi_x,
                                                          on_y_face,domlo_y,domhi_y,
                                                          on_z_face,domlo_z,domhi_z);
                } else {
                  fzm = grad_z_of_phi_on_centroids(i,j,k,n,x,phieb,flag,ccent,bcent,
                                                 xloc_on_zface,yloc_on_zface,is_eb_dirichlet,is_eb_inhomog);
                }

                fzm *= bZ(i,j,k,n);
            }

            Real fzp = bZ(i,j,k+1,n)*(x(i,j,k+1,n) - x(i,j,k,n));
            if ( (apzp != Real(0.0)) && (vfrc(i,j,k) != Real(1.0) || vfrc(i,j,k+1) != Real(1.0)  || vfrc(i,j,k-1) != Real(1.0)) ) {
                Real xloc_on_zface = fcz(i,j,k+1,0);
                Real yloc_on_zface = fcz(i,j,k+1,1);

                if(needs_bdry_stencil) {
                  fzp = grad_z_of_phi_on_centroids_extdir(i,j,k+1,n,x,phieb,flag,ccent,bcent,vfrc,
                                                          xloc_on_zface,yloc_on_zface,
                                                          is_eb_dirichlet,is_eb_inhomog,
                                                          on_x_face,domlo_x,domhi_x,
                                                          on_y_face,domlo_y,domhi_y,
                                                          on_z_face,domlo_z,domhi_z);
                } else {
                  fzp = grad_z_of_phi_on_centroids(i,j,k+1,n,x,phieb,flag,ccent,bcent,
                                                 xloc_on_zface,yloc_on_zface,is_eb_dirichlet,is_eb_inhomog);
                }

                fzp *= bZ(i,j,k+1,n);
            }

            Real feb = Real(0.0);
            if (is_eb_dirichlet && flag(i,j,k).isSingleValued()) {
                Real dapx = apxm-apxp;
                Real dapy = apym-apyp;
                Real dapz = apzm-apzp;
                Real anorm = std::sqrt(dapx*dapx+dapy*dapy+dapz*dapz);
                Real anorminv = Real(1.0)/anorm;
                Real anrmx = dapx * anorminv;
                Real anrmy = dapy * anorminv;
                Real anrmz = dapz * anorminv;

                feb = grad_eb_of_phi_on_centroids_extdir(i,j,k,n,x,phieb,flag,ccent,bcent,vfrc,
                                                         anrmx,anrmy,anrmz,is_eb_inhomog,
                                                         on_x_face,domlo_x,domhi_x,
                                                         on_y_face,domlo_y,domhi_y,
                                                         on_z_face,domlo_z,domhi_z);
                feb *= ba(i,j,k) * beb(i,j,k,n);
            }

            y(i,j,k,n) = alpha*a(i,j,k)*x(i,j,k,n) + (Real(1.0)/kappa) *
                (dhx*(apxm*fxm - apxp*fxp) +
                 dhy*(apym*fym - apyp*fyp) +
                 dhz*(apzm*fzm - apzp*fzp) - dhx*feb);
        }
    });
}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void mlebabeclap_adotx (Box const& box, Array4<Real> const& y,
                        Array4<Real const> const& x, Array4<Real const> const& a,
                        Array4<Real const> const& bX, Array4<Real const> const& bY,
                        Array4<Real const> const& bZ, Array4<const int> const& ccm,
                        Array4<EBCellFlag const> const& flag,
                        Array4<Real const> const& vfrc, Array4<Real const> const& apx,
                        Array4<Real const> const& apy, Array4<Real const> const& apz,
                        Array4<Real const> const& fcx, Array4<Real const> const& fcy,
                        Array4<Real const> const& fcz, Array4<Real const> const& ba,
                        Array4<Real const> const& bc, Array4<Real const> const& beb,
                        bool is_dirichlet, Array4<Real const> const& phieb,
                        bool is_inhomog, GpuArray<Real,AMREX_SPACEDIM> const& dxinv,
                        Real alpha, Real beta, int ncomp,
                        bool beta_on_centroid, bool phi_on_centroid) noexcept
{
    Real dhx = beta*dxinv[0]*dxinv[0];
    Real dhy = beta*dxinv[1]*dxinv[1];
    Real dhz = beta*dxinv[2]*dxinv[2];

    bool beta_on_center = !(beta_on_centroid);
    bool  phi_on_center = !( phi_on_centroid);

    amrex::Loop(box, ncomp, [=] (int i, int j, int k, int n) noexcept
    {
        if (flag(i,j,k).isCovered())
        {
            y(i,j,k,n) = Real(0.0);
        }
        else if (flag(i,j,k).isRegular())
        {
            y(i,j,k,n) = alpha*a(i,j,k)*x(i,j,k,n)
                - dhx * (bX(i+1,j,k,n)*(x(i+1,j,k,n) - x(i  ,j,k,n))
                        -bX(i  ,j,k,n)*(x(i  ,j,k,n) - x(i-1,j,k,n)))
                - dhy * (bY(i,j+1,k,n)*(x(i,j+1,k,n) - x(i,j  ,k,n))
                        -bY(i,j  ,k,n)*(x(i,j  ,k,n) - x(i,j-1,k,n)))
                - dhz * (bZ(i,j,k+1,n)*(x(i,j,k+1,n) - x(i,j,k  ,n))
                        -bZ(i,j,k  ,n)*(x(i,j,k  ,n) - x(i,j,k-1,n)));
        }
        else
        {
            Real kappa = vfrc(i,j,k);
            Real apxm = apx(i,j,k);
            Real apxp = apx(i+1,j,k);
            Real apym = apy(i,j,k);
            Real apyp = apy(i,j+1,k);
            Real apzm = apz(i,j,k);
            Real apzp = apz(i,j,k+1);

            Real fxm = bX(i,j,k,n)*(x(i,j,k,n) - x(i-1,j,k,n));
            if (apxm != Real(0.0) && apxm != Real(1.0)) {
                int jj = j + static_cast<int>(std::copysign(Real(1.0), fcx(i,j,k,0)));
                int kk = k + static_cast<int>(std::copysign(Real(1.0), fcx(i,j,k,1)));
                Real fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) ? std::abs(fcx(i,j,k,0)) : Real(0.0);
                Real fracz = (ccm(i-1,j,kk) || ccm(i,j,kk)) ? std::abs(fcx(i,j,k,1)) : Real(0.0);
                if (beta_on_center && phi_on_center)
                {
                    fxm = (Real(1.0)-fracy)*(Real(1.0)-fracz)*fxm +
                        fracy*(Real(1.0)-fracz)*bX(i,jj,k ,n)*(x(i,jj,k ,n)-x(i-1,jj,k ,n)) +
                        fracz*(Real(1.0)-fracy)*bX(i,j ,kk,n)*(x(i,j ,kk,n)-x(i-1,j ,kk,n)) +
                        fracy*     fracz *bX(i,jj,kk,n)*(x(i,jj,kk,n)-x(i-1,jj,kk,n));
                }
                else if (beta_on_centroid && phi_on_center)
                {
                    fxm = (Real(1.0)-fracy)*(Real(1.0)-fracz)*(x(i, j, k,n)-x(i-1, j, k,n)) +
                               fracy *(Real(1.0)-fracz)*(x(i,jj, k,n)-x(i-1,jj, k,n)) +
                               fracz *(Real(1.0)-fracy)*(x(i, j,kk,n)-x(i-1, j,kk,n)) +
                               fracy *     fracz *(x(i,jj,kk,n)-x(i-1,jj,kk,n));
                    fxm *= bX(i,j,k,n);
                }
            }

            Real fxp = bX(i+1,j,k,n)*(x(i+1,j,k,n) - x(i,j,k,n));
            if (apxp != Real(0.0) && apxp != Real(1.0)) {
                int jj = j + static_cast<int>(std::copysign(Real(1.0),fcx(i+1,j,k,0)));
                int kk = k + static_cast<int>(std::copysign(Real(1.0),fcx(i+1,j,k,1)));
                Real fracy = (ccm(i,jj,k) || ccm(i+1,jj,k)) ? std::abs(fcx(i+1,j,k,0)) : Real(0.0);
                Real fracz = (ccm(i,j,kk) || ccm(i+1,j,kk)) ? std::abs(fcx(i+1,j,k,1)) : Real(0.0);
                if (beta_on_center && phi_on_center)
                {
                    fxp = (Real(1.0)-fracy)*(Real(1.0)-fracz)*fxp +
                        fracy*(Real(1.0)-fracz)*bX(i+1,jj,k ,n)*(x(i+1,jj,k ,n)-x(i,jj,k ,n)) +
                        fracz*(Real(1.0)-fracy)*bX(i+1,j ,kk,n)*(x(i+1,j ,kk,n)-x(i,j ,kk,n)) +
                        fracy*     fracz *bX(i+1,jj,kk,n)*(x(i+1,jj,kk,n)-x(i,jj,kk,n));
                }
                else if (beta_on_centroid && phi_on_center)
                {
                    fxp = (Real(1.0)-fracy)*(Real(1.0)-fracz)*(x(i+1, j, k,n)-x(i, j, k,n)) +
                               fracy *(Real(1.0)-fracz)*(x(i+1,jj, k,n)-x(i,jj, k,n)) +
                               fracz *(Real(1.0)-fracy)*(x(i+1, j,kk,n)-x(i, j,kk,n)) +
                               fracy *     fracz *(x(i+1,jj,kk,n)-x(i,jj,kk,n));
                    fxp *= bX(i+1,j,k,n);

                }
            }

            Real fym = bY(i,j,k,n)*(x(i,j,k,n) - x(i,j-1,k,n));
            if (apym != Real(0.0) && apym != Real(1.0)) {
                int ii = i + static_cast<int>(std::copysign(Real(1.0),fcy(i,j,k,0)));
                int kk = k + static_cast<int>(std::copysign(Real(1.0),fcy(i,j,k,1)));
                Real fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) ? std::abs(fcy(i,j,k,0)) : Real(0.0);
                Real fracz = (ccm(i,j-1,kk) || ccm(i,j,kk)) ? std::abs(fcy(i,j,k,1)) : Real(0.0);
                if (beta_on_center && phi_on_center)
                {
                    fym = (Real(1.0)-fracx)*(Real(1.0)-fracz)*fym +
                        fracx*(Real(1.0)-fracz)*bY(ii,j,k ,n)*(x(ii,j,k ,n)-x(ii,j-1,k ,n)) +
                        fracz*(Real(1.0)-fracx)*bY(i ,j,kk,n)*(x(i ,j,kk,n)-x(i ,j-1,kk,n)) +
                        fracx*     fracz *bY(ii,j,kk,n)*(x(ii,j,kk,n)-x(ii,j-1,kk,n));
                }
                else if (beta_on_centroid && phi_on_center)
                {
                    fym = (Real(1.0)-fracx)*(Real(1.0)-fracz)*(x( i,j, k,n)-x( i,j-1, k,n)) +
                               fracx *(Real(1.0)-fracz)*(x(ii,j, k,n)-x(ii,j-1, k,n)) +
                               fracz *(Real(1.0)-fracx)*(x(i ,j,kk,n)-x( i,j-1,kk,n)) +
                               fracx *     fracz *(x(ii,j,kk,n)-x(ii,j-1,kk,n));
                    fym *= bY(i,j,k,n);

                }
            }

            Real fyp = bY(i,j+1,k,n)*(x(i,j+1,k,n) - x(i,j,k,n));
            if (apyp != Real(0.0) && apyp != Real(1.0)) {
                int ii = i + static_cast<int>(std::copysign(Real(1.0),fcy(i,j+1,k,0)));
                int kk = k + static_cast<int>(std::copysign(Real(1.0),fcy(i,j+1,k,1)));
                Real fracx = (ccm(ii,j,k) || ccm(ii,j+1,k)) ? std::abs(fcy(i,j+1,k,0)) : Real(0.0);
                Real fracz = (ccm(i,j,kk) || ccm(i,j+1,kk)) ? std::abs(fcy(i,j+1,k,1)) : Real(0.0);
                if (beta_on_center && phi_on_center)
                {
                    fyp = (Real(1.0)-fracx)*(Real(1.0)-fracz)*fyp +
                        fracx*(Real(1.0)-fracz)*bY(ii,j+1,k ,n)*(x(ii,j+1,k ,n)-x(ii,j,k ,n)) +
                        fracz*(Real(1.0)-fracx)*bY(i ,j+1,kk,n)*(x(i ,j+1,kk,n)-x(i ,j,kk,n)) +
                        fracx*     fracz *bY(ii,j+1,kk,n)*(x(ii,j+1,kk,n)-x(ii,j,kk,n));
                }
                else if (beta_on_centroid && phi_on_center)
                {
                    fyp = (Real(1.0)-fracx)*(Real(1.0)-fracz)*(x( i,j+1, k,n)-x( i,j, k,n)) +
                               fracx *(Real(1.0)-fracz)*(x(ii,j+1, k,n)-x(ii,j, k,n)) +
                               fracz *(Real(1.0)-fracx)*(x( i,j+1,kk,n)-x( i,j,kk,n)) +
                               fracx *     fracz *(x(ii,j+1,kk,n)-x(ii,j,kk,n));
                    fyp *= bY(i,j+1,k,n);

                }
            }

            Real fzm = bZ(i,j,k,n)*(x(i,j,k,n) - x(i,j,k-1,n));
            if (apzm != Real(0.0) && apzm != Real(1.0)) {
                int ii = i + static_cast<int>(std::copysign(Real(1.0),fcz(i,j,k,0)));
                int jj = j + static_cast<int>(std::copysign(Real(1.0),fcz(i,j,k,1)));
                Real fracx = (ccm(ii,j,k-1) || ccm(ii,j,k)) ? std::abs(fcz(i,j,k,0)) : Real(0.0);
                Real fracy = (ccm(i,jj,k-1) || ccm(i,jj,k)) ? std::abs(fcz(i,j,k,1)) : Real(0.0);
                if (beta_on_center && phi_on_center)
                {
                    fzm = (Real(1.0)-fracx)*(Real(1.0)-fracy)*fzm +
                        fracx*(Real(1.0)-fracy)*bZ(ii,j ,k,n)*(x(ii,j ,k,n)-x(ii,j ,k-1,n)) +
                        fracy*(Real(1.0)-fracx)*bZ(i ,jj,k,n)*(x(i ,jj,k,n)-x(i ,jj,k-1,n)) +
                        fracx*     fracy *bZ(ii,jj,k,n)*(x(ii,jj,k,n)-x(ii,jj,k-1,n));
                }
                else if (beta_on_centroid && phi_on_center)
                {
                    fzm = (Real(1.0)-fracx)*(Real(1.0)-fracy)*(x( i, j,k,n)-x( i, j,k-1,n)) +
                               fracx *(Real(1.0)-fracy)*(x(ii, j,k,n)-x(ii, j,k-1,n)) +
                               fracy *(Real(1.0)-fracx)*(x( i,jj,k,n)-x( i,jj,k-1,n)) +
                               fracx *     fracy *(x(ii,jj,k,n)-x(ii,jj,k-1,n));
                    fzm *= bZ(i,j,k,n);

                }
            }

            Real fzp = bZ(i,j,k+1,n)*(x(i,j,k+1,n) - x(i,j,k,n));
            if (apzp != Real(0.0) && apzp != Real(1.0)) {
                int ii = i + static_cast<int>(std::copysign(Real(1.0),fcz(i,j,k+1,0)));
                int jj = j + static_cast<int>(std::copysign(Real(1.0),fcz(i,j,k+1,1)));
                Real fracx = (ccm(ii,j,k) || ccm(ii,j,k+1)) ? std::abs(fcz(i,j,k+1,0)) : Real(0.0);
                Real fracy = (ccm(i,jj,k) || ccm(i,jj,k+1)) ? std::abs(fcz(i,j,k+1,1)) : Real(0.0);
                if (beta_on_center && phi_on_center)
                {
                    fzp = (Real(1.0)-fracx)*(Real(1.0)-fracy)*fzp +
                        fracx*(Real(1.0)-fracy)*bZ(ii,j ,k+1,n)*(x(ii,j ,k+1,n)-x(ii,j ,k,n)) +
                        fracy*(Real(1.0)-fracx)*bZ(i ,jj,k+1,n)*(x(i ,jj,k+1,n)-x(i ,jj,k,n)) +
                        fracx*     fracy *bZ(ii,jj,k+1,n)*(x(ii,jj,k+1,n)-x(ii,jj,k,n));
                }
                else if (beta_on_centroid && phi_on_center)
                {
                    fzp = (Real(1.0)-fracx)*(Real(1.0)-fracy)*(x( i, j,k+1,n)-x( i, j,k,n)) +
                               fracx *(Real(1.0)-fracy)*(x(ii, j,k+1,n)-x(ii, j,k,n)) +
                               fracy *(Real(1.0)-fracx)*(x( i,jj,k+1,n)-x( i,jj,k,n)) +
                               fracx *     fracy *(x(ii,jj,k+1,n)-x(ii,jj,k,n));
                    fzp *= bZ(i,j,k+1,n);

                }
            }

            Real feb = Real(0.0);
            if (is_dirichlet) {
                Real dapx = apxm-apxp;
                Real dapy = apym-apyp;
                Real dapz = apzm-apzp;
                Real anorm = std::sqrt(dapx*dapx+dapy*dapy+dapz*dapz);
                Real anorminv = Real(1.0)/anorm;
                Real anrmx = dapx * anorminv;
                Real anrmy = dapy * anorminv;
                Real anrmz = dapz * anorminv;

                Real phib = is_inhomog ? phieb(i,j,k,n) : Real(0.0);

                Real bctx = bc(i,j,k,0);
                Real bcty = bc(i,j,k,1);
                Real bctz = bc(i,j,k,2);
                Real dx_eb = get_dx_eb(kappa);

                Real dg = dx_eb / amrex::max(std::abs(anrmx), std::abs(anrmy),
                                             std::abs(anrmz));
                Real gx = bctx - dg*anrmx;
                Real gy = bcty - dg*anrmy;
                Real gz = bctz - dg*anrmz;
                Real sx = std::copysign(Real(1.0),anrmx);
                Real sy = std::copysign(Real(1.0),anrmy);
                Real sz = std::copysign(Real(1.0),anrmz);
                int ii = i - static_cast<int>(sx);
                int jj = j - static_cast<int>(sy);
                int kk = k - static_cast<int>(sz);

                gx = sx*gx;
                gy = sy*gy;
                gz = sz*gz;
                Real gxy = gx*gy;
                Real gxz = gx*gz;
                Real gyz = gy*gz;
                Real gxyz = gx*gy*gz;
                Real phig = (Real(1.0)+gx+gy+gz+gxy+gxz+gyz+gxyz) * x(i ,j ,k ,n)
                    +       (-gz - gxz - gyz - gxyz)        * x(i ,j ,kk,n)
                    +       (-gy - gxy - gyz - gxyz)        * x(i ,jj,k ,n)
                    +       (gyz + gxyz)                    * x(i ,jj,kk,n)
                    +       (-gx - gxy - gxz - gxyz)        * x(ii,j ,k ,n)
                    +       (gxz + gxyz)                    * x(ii,j ,kk,n)
                    +       (gxy + gxyz)                    * x(ii,jj,k ,n)
                    +       (-gxyz)                         * x(ii,jj,kk,n);

                Real dphidn = (phib-phig)/dg;

                feb = dphidn * ba(i,j,k) * beb(i,j,k,n);
            }

            y(i,j,k,n) = alpha*a(i,j,k)*x(i,j,k,n) + (Real(1.0)/kappa) *
                (dhx*(apxm*fxm - apxp*fxp) +
                 dhy*(apym*fym - apyp*fyp) +
                 dhz*(apzm*fzm - apzp*fzp) - dhx*feb);
        }
    });
}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void mlebabeclap_ebflux (int i, int j, int k, int n,
                         Array4<Real> const& feb,
                         Array4<Real const> const& x,
                         Array4<EBCellFlag const> const& flag,
                         Array4<Real const> const& vfrc,
                         Array4<Real const> const& apx,
                         Array4<Real const> const& apy,
                         Array4<Real const> const& apz,
                         Array4<Real const> const& bc,
                         Array4<Real const> const& beb,
                         Array4<Real const> const& phieb,
                         bool is_inhomog,
                         GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
{
    Real dhx = dxinv[0];

    if (!flag(i,j,k).isSingleValued())
    {
        feb(i,j,k,n) = Real(0.0);
    }
    else
    {
        Real kappa = vfrc(i,j,k);
        Real apxm = apx(i,j,k);
        Real apxp = apx(i+1,j,k);
        Real apym = apy(i,j,k);
        Real apyp = apy(i,j+1,k);
        Real apzm = apz(i,j,k);
        Real apzp = apz(i,j,k+1);

        Real dapx = apxm-apxp;
        Real dapy = apym-apyp;
        Real dapz = apzm-apzp;
        Real anorm = std::sqrt(dapx*dapx+dapy*dapy+dapz*dapz);
        Real anorminv = Real(1.0)/anorm;
        Real anrmx = dapx * anorminv;
        Real anrmy = dapy * anorminv;
        Real anrmz = dapz * anorminv;

        Real phib = is_inhomog ? phieb(i,j,k,n) : Real(0.0);

        Real bctx = bc(i,j,k,0);
        Real bcty = bc(i,j,k,1);
        Real bctz = bc(i,j,k,2);
        Real dx_eb = get_dx_eb(kappa);

        Real dg = dx_eb / amrex::max(std::abs(anrmx), std::abs(anrmy), std::abs(anrmz));
        Real gx = bctx - dg*anrmx;
        Real gy = bcty - dg*anrmy;
        Real gz = bctz - dg*anrmz;
        Real sx = std::copysign(Real(1.0),anrmx);
        Real sy = std::copysign(Real(1.0),anrmy);
        Real sz = std::copysign(Real(1.0),anrmz);
        int ii = i - static_cast<int>(sx);
        int jj = j - static_cast<int>(sy);
        int kk = k - static_cast<int>(sz);

        gx = sx*gx;
        gy = sy*gy;
        gz = sz*gz;
        Real gxy = gx*gy;
        Real gxz = gx*gz;
        Real gyz = gy*gz;
        Real gxyz = gx*gy*gz;
        Real phig = (Real(1.0)+gx+gy+gz+gxy+gxz+gyz+gxyz) * x(i ,j ,k ,n)
            +       (-gz - gxz - gyz - gxyz)        * x(i ,j ,kk,n)
            +       (-gy - gxy - gyz - gxyz)        * x(i ,jj,k ,n)
            +       (gyz + gxyz)                    * x(i ,jj,kk,n)
            +       (-gx - gxy - gxz - gxyz)        * x(ii,j ,k ,n)
            +       (gxz + gxyz)                    * x(ii,j ,kk,n)
            +       (gxy + gxyz)                    * x(ii,jj,k ,n)
            +       (-gxyz)                         * x(ii,jj,kk,n);

        Real dphidn = dhx*(phib-phig)/dg;
        feb(i,j,k,n) = -beb(i,j,k,n) * dphidn;
    }
}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void mlebabeclap_gsrb (Box const& box,
                       Array4<Real> const& phi, Array4<Real const> const& rhs,
                       Real alpha, Array4<Real const> const& a,
                       Real dhx, Real dhy, Real dhz,
                       Array4<Real const> const& bX, Array4<Real const> const& bY,
                       Array4<Real const> const& bZ,
                       Array4<int const> const& m0, Array4<int const> const& m2,
                       Array4<int const> const& m4,
                       Array4<int const> const& m1, Array4<int const> const& m3,
                       Array4<int const> const& m5,
                       Array4<Real const> const& f0, Array4<Real const> const& f2,
                       Array4<Real const> const& f4,
                       Array4<Real const> const& f1, Array4<Real const> const& f3,
                       Array4<Real const> const& f5,
                       Array4<const int> const& ccm, Array4<Real const> const& beb,
                       EBData const& ebdata,
                       bool is_dirichlet, bool beta_on_centroid, bool phi_on_centroid,
                       Box const& vbox, int redblack, int ncomp) noexcept
{
    constexpr Real omega = 1.15;

    const auto vlo = amrex::lbound(vbox);
    const auto vhi = amrex::ubound(vbox);

//    amrex::Loop(box, ncomp, [=] (int i, int j, int k, int n) noexcept
    // amrex::Loop here causes gcc 8 to crash.
    const auto lo = amrex::lbound(box);
    const auto hi = amrex::ubound(box);
    for (int n = 0; n < ncomp; ++n) {
    for (int k = lo.z; k <= hi.z; ++k) {
    for (int j = lo.y; j <= hi.y; ++j) {
    for (int i = lo.x; i <= hi.x; ++i)
    {
        if ((i+j+k+redblack) % 2 == 0)
        {
            auto const flag = ebdata.get<EBData_t::cellflag>(i,j,k);
            if (flag.isCovered())
            {
                phi(i,j,k,n) = Real(0.0);
            }
            else
            {
                Real cf0 = (i == vlo.x && m0(vlo.x-1,j,k) > 0)
                    ? f0(vlo.x,j,k,n) : Real(0.0);
                Real cf1 = (j == vlo.y && m1(i,vlo.y-1,k) > 0)
                    ? f1(i,vlo.y,k,n) : Real(0.0);
                Real cf2 = (k == vlo.z && m2(i,j,vlo.z-1) > 0)
                    ? f2(i,j,vlo.z,n) : Real(0.0);
                Real cf3 = (i == vhi.x && m3(vhi.x+1,j,k) > 0)
                    ? f3(vhi.x,j,k,n) : Real(0.0);
                Real cf4 = (j == vhi.y && m4(i,vhi.y+1,k) > 0)
                    ? f4(i,vhi.y,k,n) : Real(0.0);
                Real cf5 = (k == vhi.z && m5(i,j,vhi.z+1) > 0)
                    ? f5(i,j,vhi.z,n) : Real(0.0);

                if (flag.isRegular())
                {
                    Real gamma = alpha*a(i,j,k)
                        + dhx*(bX(i+1,j,k,n) + bX(i,j,k,n))
                        + dhy*(bY(i,j+1,k,n) + bY(i,j,k,n))
                        + dhz*(bZ(i,j,k+1,n) + bZ(i,j,k,n));

                    Real rho = dhx*(bX(i+1,j  ,k  ,n)*phi(i+1,j  ,k  ,n) +
                                    bX(i  ,j  ,k  ,n)*phi(i-1,j  ,k  ,n))
                        +      dhy*(bY(i  ,j+1,k  ,n)*phi(i  ,j+1,k  ,n) +
                                    bY(i  ,j  ,k  ,n)*phi(i  ,j-1,k  ,n))
                        +      dhz*(bZ(i  ,j  ,k+1,n)*phi(i  ,j  ,k+1,n) +
                                    bZ(i  ,j  ,k  ,n)*phi(i  ,j  ,k-1,n));

                    Real delta = dhx*(bX(i,j,k,n)*cf0 + bX(i+1,j,k,n)*cf3)
                        +        dhy*(bY(i,j,k,n)*cf1 + bY(i,j+1,k,n)*cf4)
                        +        dhz*(bZ(i,j,k,n)*cf2 + bZ(i,j,k+1,n)*cf5);

                    Real res = rhs(i,j,k,n) - (gamma*phi(i,j,k,n) - rho);
                    phi(i,j,k,n) += omega*res/(gamma-delta);
                }
                else
                {
                    Real kappa = ebdata.get<EBData_t::volfrac>(i,j,k);
                    Real apxm = ebdata.get<EBData_t::apx>(i  ,j  ,k  );
                    Real apxp = ebdata.get<EBData_t::apx>(i+1,j  ,k  );
                    Real apym = ebdata.get<EBData_t::apy>(i  ,j  ,k  );
                    Real apyp = ebdata.get<EBData_t::apy>(i  ,j+1,k  );
                    Real apzm = ebdata.get<EBData_t::apz>(i  ,j  ,k  );
                    Real apzp = ebdata.get<EBData_t::apz>(i  ,j  ,k+1);

                    Real fxm = -bX(i,j,k,n)*phi(i-1,j,k,n);
                    Real oxm = -bX(i,j,k,n)*cf0;
                    Real sxm =  bX(i,j,k,n);
                    if (apxm != Real(0.0) && apxm != Real(1.0)) {
                        auto fcx0 = ebdata.get<EBData_t::fcx>(i,j,k,0);
                        auto fcx1 = ebdata.get<EBData_t::fcx>(i,j,k,1);
                        int jj = j + static_cast<int>(std::copysign(Real(1.0), fcx0));
                        int kk = k + static_cast<int>(std::copysign(Real(1.0), fcx1));
                        Real fracy = (ccm(i-1,jj,k) || ccm(i,jj,k))
                            ? std::abs(fcx0) : Real(0.0);
                        Real fracz = (ccm(i-1,j,kk) || ccm(i,j,kk))
                            ? std::abs(fcx1) : Real(0.0);
                        if (!beta_on_centroid && !phi_on_centroid)
                        {
                            fxm = (Real(1.0)-fracy)*(Real(1.0)-fracz)*fxm
                                 +     fracy *(Real(1.0)-fracz)*bX(i,jj,k ,n)*(phi(i,jj,k ,n)-phi(i-1,jj,k ,n))
                                 +(Real(1.0)-fracy)*     fracz *bX(i,j ,kk,n)*(phi(i,j ,kk,n)-phi(i-1,j ,kk,n))
                                 +     fracy *     fracz *bX(i,jj,kk,n)*(phi(i,jj,kk,n)-phi(i-1,jj,kk,n));
                        }
                        else if (beta_on_centroid && !phi_on_centroid)
                        {
                            fxm = (Real(1.0)-fracy)*(Real(1.0)-fracz)*(              -phi(i-1, j, k,n))
                                 +     fracy *(Real(1.0)-fracz)*(phi(i,jj,k ,n)-phi(i-1,jj, k,n))
                                 +(Real(1.0)-fracy)*     fracz *(phi(i,j ,kk,n)-phi(i-1, j,kk,n))
                                 +     fracy *     fracz *(phi(i,jj,kk,n)-phi(i-1,jj,kk,n));
                            fxm *= bX(i,j,k,n);

                        }
                        oxm = Real(0.0);
                        sxm = (Real(1.0)-fracy)*(Real(1.0)-fracz)*sxm;
                    }

                    Real fxp =  bX(i+1,j,k,n)*phi(i+1,j,k,n);
                    Real oxp =  bX(i+1,j,k,n)*cf3;
                    Real sxp = -bX(i+1,j,k,n);
                    if (apxp != Real(0.0) && apxp != Real(1.0)) {
                        auto fcx0 = ebdata.get<EBData_t::fcx>(i+1,j,k,0);
                        auto fcx1 = ebdata.get<EBData_t::fcx>(i+1,j,k,1);
                        int jj = j + static_cast<int>(std::copysign(Real(1.0),fcx0));
                        int kk = k + static_cast<int>(std::copysign(Real(1.0),fcx1));
                        Real fracy = (ccm(i,jj,k) || ccm(i+1,jj,k))
                            ? std::abs(fcx0) : Real(0.0);
                        Real fracz = (ccm(i,j,kk) || ccm(i+1,j,kk))
                            ? std::abs(fcx1) : Real(0.0);
                        if (!beta_on_centroid && !phi_on_centroid)
                        {
                            fxp = (Real(1.0)-fracy)*(Real(1.0)-fracz)*fxp
                                  +    fracy *(Real(1.0)-fracz)*bX(i+1,jj,k ,n)*(phi(i+1,jj,k ,n)-phi(i,jj,k ,n))
                                 +(Real(1.0)-fracy)*     fracz *bX(i+1,j ,kk,n)*(phi(i+1,j ,kk,n)-phi(i,j ,kk,n))
                                 +     fracy *     fracz *bX(i+1,jj,kk,n)*(phi(i+1,jj,kk,n)-phi(i,jj,kk,n));
                        }
                        else if (beta_on_centroid && !phi_on_centroid)
                        {
                            fxp = (Real(1.0)-fracy)*(Real(1.0)-fracz)*(phi(i+1, j, k,n)               ) +
                                       fracy *(Real(1.0)-fracz)*(phi(i+1,jj, k,n)-phi(i,jj, k,n)) +
                                       fracz *(Real(1.0)-fracy)*(phi(i+1, j,kk,n)-phi(i, j,kk,n)) +
                                       fracy *     fracz *(phi(i+1,jj,kk,n)-phi(i,jj,kk,n));
                            fxp *= bX(i+1,j,k,n);

                        }

                        oxp = Real(0.0);
                        sxp = (Real(1.0)-fracy)*(Real(1.0)-fracz)*sxp;
                    }

                    Real fym = -bY(i,j,k,n)*phi(i,j-1,k,n);
                    Real oym = -bY(i,j,k,n)*cf1;
                    Real sym =  bY(i,j,k,n);
                    if (apym != Real(0.0) && apym != Real(1.0)) {
                        auto fcy0 = ebdata.get<EBData_t::fcy>(i,j,k,0);
                        auto fcy1 = ebdata.get<EBData_t::fcy>(i,j,k,1);
                        int ii = i + static_cast<int>(std::copysign(Real(1.0),fcy0));
                        int kk = k + static_cast<int>(std::copysign(Real(1.0),fcy1));
                        Real fracx = (ccm(ii,j-1,k) || ccm(ii,j,k))
                            ? std::abs(fcy0) : Real(0.0);
                        Real fracz = (ccm(i,j-1,kk) || ccm(i,j,kk))
                            ? std::abs(fcy1) : Real(0.0);
                        if (!beta_on_centroid && !phi_on_centroid)
                        {
                            fym = (Real(1.0)-fracx)*(Real(1.0)-fracz)*fym
                                +      fracx *(Real(1.0)-fracz)*bY(ii,j,k ,n)*(phi(ii,j,k ,n)-phi(ii,j-1,k ,n))
                                + (Real(1.0)-fracx)*     fracz *bY(i ,j,kk,n)*(phi(i ,j,kk,n)-phi(i ,j-1,kk,n))
                                +      fracx *     fracz *bY(ii,j,kk,n)*(phi(ii,j,kk,n)-phi(ii,j-1,kk,n));
                        }
                        else if (beta_on_centroid && !phi_on_centroid)
                        {
                            fym = (Real(1.0)-fracx)*(Real(1.0)-fracz)*(              -phi( i,j-1, k,n))
                                +      fracx *(Real(1.0)-fracz)*(phi(ii,j,k ,n)-phi(ii,j-1, k,n))
                                + (Real(1.0)-fracx)*     fracz *(phi(i ,j,kk,n)-phi( i,j-1,kk,n))
                                +      fracx *     fracz *(phi(ii,j,kk,n)-phi(ii,j-1,kk,n));
                            fym *= bY(i,j,k,n);

                        }
                        oym = Real(0.0);
                        sym = (Real(1.0)-fracx)*(Real(1.0)-fracz)*sym;
                    }

                    Real fyp =  bY(i,j+1,k,n)*phi(i,j+1,k,n);
                    Real oyp =  bY(i,j+1,k,n)*cf4;
                    Real syp = -bY(i,j+1,k,n);
                    if (apyp != Real(0.0) && apyp != Real(1.0)) {
                        auto fcy0 = ebdata.get<EBData_t::fcy>(i,j+1,k,0);
                        auto fcy1 = ebdata.get<EBData_t::fcy>(i,j+1,k,1);
                        int ii = i + static_cast<int>(std::copysign(Real(1.0),fcy0));
                        int kk = k + static_cast<int>(std::copysign(Real(1.0),fcy1));
                        Real fracx = (ccm(ii,j,k) || ccm(ii,j+1,k))
                            ? std::abs(fcy0) : Real(0.0);
                        Real fracz = (ccm(i,j,kk) || ccm(i,j+1,kk))
                            ? std::abs(fcy1) : Real(0.0);
                        if (!beta_on_centroid && !phi_on_centroid)
                        {
                            fyp = (Real(1.0)-fracx)*(Real(1.0)-fracz)*fyp
                                +      fracx *(Real(1.0)-fracz)*bY(ii,j+1,k ,n)*(phi(ii,j+1,k ,n)-phi(ii,j,k ,n))
                                + (Real(1.0)-fracx)*     fracz *bY(i ,j+1,kk,n)*(phi(i ,j+1,kk,n)-phi(i ,j,kk,n))
                                +      fracx *     fracz *bY(ii,j+1,kk,n)*(phi(ii,j+1,kk,n)-phi(ii,j,kk,n));
                        }
                        else if (beta_on_centroid && !phi_on_centroid)
                        {
                            fyp = (Real(1.0)-fracx)*(Real(1.0)-fracz)*(phi( i,j+1, k,n)               )
                                +      fracx *(Real(1.0)-fracz)*(phi(ii,j+1, k,n)-phi(ii,j, k,n))
                                + (Real(1.0)-fracx)*     fracz *(phi( i,j+1,kk,n)-phi( i,j,kk,n))
                                +      fracx *     fracz *(phi(ii,j+1,kk,n)-phi(ii,j,kk,n));
                            fyp *= bY(i,j+1,k,n);

                        }
                        oyp = Real(0.0);
                        syp = (Real(1.0)-fracx)*(Real(1.0)-fracz)*syp;
                    }

                    Real fzm = -bZ(i,j,k,n)*phi(i,j,k-1,n);
                    Real ozm = -bZ(i,j,k,n)*cf2;
                    Real szm =  bZ(i,j,k,n);
                    if (apzm != Real(0.0) && apzm != Real(1.0)) {
                        auto fcz0 = ebdata.get<EBData_t::fcz>(i,j,k,0);
                        auto fcz1 = ebdata.get<EBData_t::fcz>(i,j,k,1);
                        int ii = i + static_cast<int>(std::copysign(Real(1.0),fcz0));
                        int jj = j + static_cast<int>(std::copysign(Real(1.0),fcz1));
                        Real fracx = (ccm(ii,j,k-1) || ccm(ii,j,k))
                            ? std::abs(fcz0) : Real(0.0);
                        Real fracy = (ccm(i,jj,k-1) || ccm(i,jj,k))
                            ? std::abs(fcz1) : Real(0.0);
                        if (!beta_on_centroid && !phi_on_centroid)
                        {
                            fzm = (Real(1.0)-fracx)*(Real(1.0)-fracy)*fzm
                                 +     fracx *(Real(1.0)-fracy)*bZ(ii, j,k,n)*(phi(ii, j,k,n)-phi(ii, j,k-1,n))
                                 +(Real(1.0)-fracx)*     fracy *bZ( i,jj,k,n)*(phi( i,jj,k,n)-phi( i,jj,k-1,n))
                                 +     fracx *     fracy *bZ(ii,jj,k,n)*(phi(ii,jj,k,n)-phi(ii,jj,k-1,n));
                        }
                        else if (beta_on_centroid && !phi_on_centroid)
                        {
                            fzm = (Real(1.0)-fracx)*(Real(1.0)-fracy)*(              -phi( i, j,k-1,n))
                                +      fracx *(Real(1.0)-fracy)*(phi(ii, j,k,n)-phi(ii, j,k-1,n))
                                + (Real(1.0)-fracx)*     fracy *(phi( i,jj,k,n)-phi(i ,jj,k-1,n))
                                +      fracx *     fracy *(phi(ii,jj,k,n)-phi(ii,jj,k-1,n));
                            fzm *= bZ(i,j,k,n);

                        }
                        ozm = Real(0.0);
                        szm = (Real(1.0)-fracx)*(Real(1.0)-fracy)*szm;
                    }

                    Real fzp =  bZ(i,j,k+1,n)*phi(i,j,k+1,n);
                    Real ozp =  bZ(i,j,k+1,n)*cf5;
                    Real szp = -bZ(i,j,k+1,n);
                    if (apzp != Real(0.0) && apzp != Real(1.0)) {
                        auto fcz0 = ebdata.get<EBData_t::fcz>(i,j,k+1,0);
                        auto fcz1 = ebdata.get<EBData_t::fcz>(i,j,k+1,1);
                        int ii = i + static_cast<int>(std::copysign(Real(1.0),fcz0));
                        int jj = j + static_cast<int>(std::copysign(Real(1.0),fcz1));
                        Real fracx = (ccm(ii,j,k) || ccm(ii,j,k+1))
                            ? std::abs(fcz0) : Real(0.0);
                        Real fracy = (ccm(i,jj,k) || ccm(i,jj,k+1))
                            ? std::abs(fcz1) : Real(0.0);
                        if (!beta_on_centroid && !phi_on_centroid)
                        {
                            fzp = (Real(1.0)-fracx)*(Real(1.0)-fracy)*fzp
                                +      fracx *(Real(1.0)-fracy)*bZ(ii,j ,k+1,n)*(phi(ii,j ,k+1,n)-phi(ii,j ,k,n))
                                + (Real(1.0)-fracx)*     fracy *bZ(i ,jj,k+1,n)*(phi(i ,jj,k+1,n)-phi(i ,jj,k,n))
                                +      fracx *     fracy *bZ(ii,jj,k+1,n)*(phi(ii,jj,k+1,n)-phi(ii,jj,k,n));
                        }
                        else if (beta_on_centroid && !phi_on_centroid)
                        {
                            fzp = (Real(1.0)-fracx)*(Real(1.0)-fracy)*(phi( i, j,k+1,n)               )
                                +      fracx *(Real(1.0)-fracy)*(phi(ii, j,k+1,n)-phi(ii, j,k,n))
                                + (Real(1.0)-fracx)*     fracy *(phi( i,jj,k+1,n)-phi( i,jj,k,n))
                                +      fracx *     fracy *(phi(ii,jj,k+1,n)-phi(ii,jj,k,n));
                            fzp *= bZ(i,j,k+1,n);

                        }
                        ozp = Real(0.0);
                        szp = (Real(1.0)-fracx)*(Real(1.0)-fracy)*szp;
                    }

                    Real vfrcinv = Real(1.0)/kappa;
                    Real gamma = alpha*a(i,j,k) + vfrcinv *
                        (dhx*(apxm*sxm-apxp*sxp) +
                         dhy*(apym*sym-apyp*syp) +
                         dhz*(apzm*szm-apzp*szp));

                    Real rho = -vfrcinv *
                        (dhx*(apxm*fxm-apxp*fxp) +
                         dhy*(apym*fym-apyp*fyp) +
                         dhz*(apzm*fzm-apzp*fzp));

                    Real delta = -vfrcinv *
                        (dhx*(apxm*oxm-apxp*oxp) +
                         dhy*(apym*oym-apyp*oyp) +
                         dhz*(apzm*ozm-apzp*ozp));

                    if (is_dirichlet) {
                        Real dapx = apxm-apxp;
                        Real dapy = apym-apyp;
                        Real dapz = apzm-apzp;
                        Real anorm = std::sqrt(dapx*dapx+dapy*dapy+dapz*dapz);
                        Real anorminv = Real(1.0)/anorm;
                        Real anrmx = dapx * anorminv;
                        Real anrmy = dapy * anorminv;
                        Real anrmz = dapz * anorminv;
                        Real bctx = ebdata.get<EBData_t::bndrycent>(i,j,k,0);
                        Real bcty = ebdata.get<EBData_t::bndrycent>(i,j,k,1);
                        Real bctz = ebdata.get<EBData_t::bndrycent>(i,j,k,2);
                        Real dx_eb = get_dx_eb(kappa);

                        Real dg = dx_eb / amrex::max(std::abs(anrmx),std::abs(anrmy),
                                                     std::abs(anrmz));

                        Real gx = bctx - dg*anrmx;
                        Real gy = bcty - dg*anrmy;
                        Real gz = bctz - dg*anrmz;
                        Real sx = std::copysign(Real(1.0),anrmx);
                        Real sy = std::copysign(Real(1.0),anrmy);
                        Real sz = std::copysign(Real(1.0),anrmz);
                        int ii = i - static_cast<int>(sx);
                        int jj = j - static_cast<int>(sy);
                        int kk = k - static_cast<int>(sz);

                        gx *= sx;
                        gy *= sy;
                        gz *= sz;
                        Real gxy = gx*gy;
                        Real gxz = gx*gz;
                        Real gyz = gy*gz;
                        Real gxyz = gx*gy*gz;
                        Real phig_gamma = (Real(1.0)+gx+gy+gz+gxy+gxz+gyz+gxyz);
                        Real phig = (-gz - gxz - gyz - gxyz) * phi(i,j,kk,n)
                            + (-gy - gxy - gyz - gxyz) * phi(i,jj,k,n)
                            + (gyz + gxyz) * phi(i,jj,kk,n)
                            + (-gx - gxy - gxz - gxyz) * phi(ii,j,k,n)
                            + (gxz + gxyz) * phi(ii,j,kk,n)
                            + (gxy + gxyz) * phi(ii,jj,k,n)
                            + (-gxyz) * phi(ii,jj,kk,n);

                        Real ba = ebdata.get<EBData_t::bndryarea>(i,j,k);

                        Real dphidn    = (    -phig)/dg;
                        Real feb_gamma = -phig_gamma/dg * ba * beb(i,j,k,n);
                        gamma += vfrcinv*(-dhx)*feb_gamma;
                        Real feb = dphidn * ba * beb(i,j,k,n);
                        rho += -vfrcinv*(-dhx)*feb;
                    }

                    Real res = rhs(i,j,k,n) - (gamma*phi(i,j,k,n) - rho);
                    phi(i,j,k,n) += omega*res/(gamma-delta);
                }
            }
        }
    }}}}
//    });
}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void mlebabeclap_flux_x (Box const& box, Array4<Real> const& fx, Array4<Real const> const& apx,
                         Array4<Real const> const& fcx, Array4<Real const> const& sol,
                         Array4<Real const> const& bX, Array4<int const> const& ccm,
                         Real dhx, int face_only, int ncomp, Box const& xbox,
                         bool beta_on_centroid, bool phi_on_centroid) noexcept
{
    int lof = xbox.smallEnd(0);
    int hif = xbox.bigEnd(0);
    amrex::LoopConcurrent(box, ncomp, [=] (int i, int j, int k, int n) noexcept
    {
        if (!face_only || lof == i || hif == i) {
            if (apx(i,j,k) == Real(0.0)) {
                fx(i,j,k,n) = Real(0.0);
            } else if (apx(i,j,k) == Real(1.0)) {
                fx(i,j,k,n) = -dhx*bX(i,j,k,n)*(sol(i,j,k,n)-sol(i-1,j,k,n));
            } else {
                Real fxm = bX(i,j,k,n)*(sol(i,j,k,n) - sol(i-1,j,k,n));
                int jj = j + static_cast<int>(std::copysign(Real(1.0), fcx(i,j,k,0)));
                int kk = k + static_cast<int>(std::copysign(Real(1.0), fcx(i,j,k,1)));
                Real fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) ? std::abs(fcx(i,j,k,0)) : Real(0.0);
                Real fracz = (ccm(i-1,j,kk) || ccm(i,j,kk)) ? std::abs(fcx(i,j,k,1)) : Real(0.0);
                if (!beta_on_centroid && !phi_on_centroid)
                {
                    fxm = (Real(1.0)-fracy)*(Real(1.0)-fracz)*fxm +
                               fracy *(Real(1.0)-fracz)*bX(i,jj,k ,n)*(sol(i,jj,k ,n)-sol(i-1,jj,k ,n)) +
                               fracz *(Real(1.0)-fracy)*bX(i,j ,kk,n)*(sol(i,j ,kk,n)-sol(i-1,j ,kk,n)) +
                               fracy*      fracz *bX(i,jj,kk,n)*(sol(i,jj,kk,n)-sol(i-1,jj,kk,n));
                }
                else if (beta_on_centroid && !phi_on_centroid)
                {
                    fxm = (Real(1.0)-fracy)*(Real(1.0)-fracz)*(sol(i, j, k,n)-sol(i-1, j, k,n)) +
                               fracy *(Real(1.0)-fracz)*(sol(i,jj, k,n)-sol(i-1,jj, k,n)) +
                               fracz *(Real(1.0)-fracy)*(sol(i, j,kk,n)-sol(i-1, j,kk,n)) +
                               fracy*      fracz *(sol(i,jj,kk,n)-sol(i-1,jj,kk,n));
                    fxm *= bX(i,j,k,n);

                }
                fx(i,j,k,n) = -dhx*fxm;
            }
        }
    });
}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void mlebabeclap_flux_y (Box const& box, Array4<Real> const& fy, Array4<Real const> const& apy,
                         Array4<Real const> const& fcy, Array4<Real const> const& sol,
                         Array4<Real const> const& bY, Array4<int const> const& ccm,
                         Real dhy, int face_only, int ncomp, Box const& ybox,
                         bool beta_on_centroid, bool phi_on_centroid) noexcept
{
    int lof = ybox.smallEnd(1);
    int hif = ybox.bigEnd(1);
    amrex::LoopConcurrent(box, ncomp, [=] (int i, int j, int k, int n) noexcept
    {
        if (!face_only || lof == j || hif == j) {
            if (apy(i,j,k) == Real(0.0)) {
                fy(i,j,k,n) = Real(0.0);
            } else if (apy(i,j,k) == Real(1.0)) {
                fy(i,j,k,n) = -dhy*bY(i,j,k,n)*(sol(i,j,k,n)-sol(i,j-1,k,n));
            } else {
                Real fym = bY(i,j,k,n)*(sol(i,j,k,n) - sol(i,j-1,k,n));
                int ii = i + static_cast<int>(std::copysign(Real(1.0),fcy(i,j,k,0)));
                int kk = k + static_cast<int>(std::copysign(Real(1.0),fcy(i,j,k,1)));
                Real fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) ? std::abs(fcy(i,j,k,0)) : Real(0.0);
                Real fracz = (ccm(i,j-1,kk) || ccm(i,j,kk)) ? std::abs(fcy(i,j,k,1)) : Real(0.0);
                if (!beta_on_centroid && !phi_on_centroid)
                {
                    fym = (Real(1.0)-fracx)*(Real(1.0)-fracz)*fym +
                               fracx *(Real(1.0)-fracz)*bY(ii,j,k ,n)*(sol(ii,j,k ,n)-sol(ii,j-1,k ,n)) +
                               fracz *(Real(1.0)-fracx)*bY(i ,j,kk,n)*(sol(i ,j,kk,n)-sol(i ,j-1,kk,n)) +
                               fracx *     fracz *bY(ii,j,kk,n)*(sol(ii,j,kk,n)-sol(ii,j-1,kk,n));
                }
                else if (beta_on_centroid && !phi_on_centroid)
                {
                    fym = (Real(1.0)-fracx)*(Real(1.0)-fracz)*(sol( i,j, k,n)-sol( i,j-1, k,n)) +
                               fracx *(Real(1.0)-fracz)*(sol(ii,j, k,n)-sol(ii,j-1, k,n)) +
                               fracz *(Real(1.0)-fracx)*(sol( i,j,kk,n)-sol( i,j-1,kk,n)) +
                               fracx *     fracz *(sol(ii,j,kk,n)-sol(ii,j-1,kk,n));
                    fym *= bY(i,j,k,n);
                }

                fy(i,j,k,n) = -dhy*fym;
            }
        }
    });
}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void mlebabeclap_flux_z (Box const& box, Array4<Real> const& fz, Array4<Real const> const& apz,
                         Array4<Real const> const& fcz, Array4<Real const> const& sol,
                         Array4<Real const> const& bZ, Array4<int const> const& ccm,
                         Real dhz, int face_only, int ncomp, Box const& zbox,
                         bool beta_on_centroid, bool phi_on_centroid) noexcept
{
    int lof = zbox.smallEnd(2);
    int hif = zbox.bigEnd(2);
    amrex::LoopConcurrent(box, ncomp, [=] (int i, int j, int k, int n) noexcept
    {
        if (!face_only || lof == k || hif == k) {
            if (apz(i,j,k) == Real(0.0)) {
                fz(i,j,k,n) = Real(0.0);
            } else if (apz(i,j,k) == Real(1.0)) {
                fz(i,j,k,n) = -dhz*bZ(i,j,k,n)*(sol(i,j,k,n)-sol(i,j,k-1,n));
            } else {
                Real fzm = bZ(i,j,k,n)*(sol(i,j,k,n) - sol(i,j,k-1,n));
                int ii = i + static_cast<int>(std::copysign(Real(1.0),fcz(i,j,k,0)));
                int jj = j + static_cast<int>(std::copysign(Real(1.0),fcz(i,j,k,1)));
                Real fracx = (ccm(ii,j,k-1) || ccm(ii,j,k)) ? std::abs(fcz(i,j,k,0)) : Real(0.0);
                Real fracy = (ccm(i,jj,k-1) || ccm(i,jj,k)) ? std::abs(fcz(i,j,k,1)) : Real(0.0);
                if (!beta_on_centroid && !phi_on_centroid)
                {
                    fzm = (Real(1.0)-fracx)*(Real(1.0)-fracy)*fzm +
                        fracx*(Real(1.0)-fracy)*bZ(ii,j ,k,n)*(sol(ii,j ,k,n)-sol(ii,j ,k-1,n)) +
                        fracy*(Real(1.0)-fracx)*bZ(i ,jj,k,n)*(sol(i ,jj,k,n)-sol(i ,jj,k-1,n)) +
                        fracx*     fracy *bZ(ii,jj,k,n)*(sol(ii,jj,k,n)-sol(ii,jj,k-1,n));
                }
                else if (beta_on_centroid && !phi_on_centroid)
                {
                    fzm = (Real(1.0)-fracx)*(Real(1.0)-fracy)*(sol( i, j,k,n)-sol( i, j,k-1,n)) +
                               fracx *(Real(1.0)-fracy)*(sol(ii, j,k,n)-sol(ii, j,k-1,n)) +
                               fracy *(Real(1.0)-fracx)*(sol( i,jj,k,n)-sol( i,jj,k-1,n)) +
                               fracx *     fracy *(sol(ii,jj,k,n)-sol(ii,jj,k-1,n));
                    fzm *= bZ(i,j,k,n);

                }

                fz(i,j,k,n) = -dhz*fzm;
            }
        }
    });
}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void mlebabeclap_flux_x_0 (Box const& box, Array4<Real> const& fx, Array4<Real const> const& apx,
                           Array4<Real const> const& sol, Array4<Real const> const& bX,
                           Real dhx, int face_only, int ncomp, Box const& xbox) noexcept
{
    int lof = xbox.smallEnd(0);
    int hif = xbox.bigEnd(0);
    amrex::LoopConcurrent(box, ncomp, [=] (int i, int j, int k, int n) noexcept
    {
        if (!face_only || lof == i || hif == i) {
            if (apx(i,j,k) == Real(0.0)) {
                fx(i,j,k,n) = Real(0.0);
            } else {
                fx(i,j,k,n) = -dhx*bX(i,j,k,n)*(sol(i,j,k,n)-sol(i-1,j,k,n));
            }
        }
    });
}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void mlebabeclap_flux_y_0 (Box const& box, Array4<Real> const& fy, Array4<Real const> const& apy,
                           Array4<Real const> const& sol, Array4<Real const> const& bY,
                           Real dhy, int face_only, int ncomp, Box const& ybox) noexcept
{
    int lof = ybox.smallEnd(1);
    int hif = ybox.bigEnd(1);
    amrex::LoopConcurrent(box, ncomp, [=] (int i, int j, int k, int n) noexcept
    {
        if (!face_only || lof == j || hif == j) {
            if (apy(i,j,k) == Real(0.0)) {
                fy(i,j,k,n) = Real(0.0);
            } else {
                fy(i,j,k,n) = -dhy*bY(i,j,k,n)*(sol(i,j,k,n)-sol(i,j-1,k,n));
            }
        }
    });
}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void mlebabeclap_flux_z_0 (Box const& box, Array4<Real> const& fz, Array4<Real const> const& apz,
                           Array4<Real const> const& sol, Array4<Real const> const& bZ,
                           Real dhz, int face_only, int ncomp, Box const& zbox) noexcept
{
    int lof = zbox.smallEnd(2);
    int hif = zbox.bigEnd(2);
    amrex::LoopConcurrent(box, ncomp, [=] (int i, int j, int k, int n) noexcept
    {
        if (!face_only || lof == k || hif == k) {
            if (apz(i,j,k) == Real(0.0)) {
                fz(i,j,k,n) = Real(0.0);
            } else {
                fz(i,j,k,n) = -dhz*bZ(i,j,k,n)*(sol(i,j,k,n)-sol(i,j,k-1,n));
            }
        }
    });
}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void mlebabeclap_grad_x (Box const& box, Array4<Real> const& gx, Array4<Real const> const& sol,
                         Array4<Real const> const& apx, Array4<Real const> const& fcx,
                         Array4<int const> const& ccm,
                         Real dxi, int ncomp, bool phi_on_centroid) noexcept
{
    amrex::LoopConcurrent(box, ncomp, [=] (int i, int j, int k, int n) noexcept
    {
        if (apx(i,j,k) == Real(0.0)) {
            gx(i,j,k,n) = Real(0.0);
        } else if (apx(i,j,k) == Real(1.0)) {
            gx(i,j,k,n) = dxi*(sol(i,j,k,n)-sol(i-1,j,k,n));
        } else {
            Real gxm = (sol(i,j,k,n) - sol(i-1,j,k,n));
            int jj = j + static_cast<int>(std::copysign(Real(1.0), fcx(i,j,k,0)));
            int kk = k + static_cast<int>(std::copysign(Real(1.0), fcx(i,j,k,1)));
            Real fracy = (ccm(i-1,jj,k) || ccm(i,jj,k)) ? std::abs(fcx(i,j,k,0)) : Real(0.0);
            Real fracz = (ccm(i-1,j,kk) || ccm(i,j,kk)) ? std::abs(fcx(i,j,k,1)) : Real(0.0);
            if (!phi_on_centroid)
            {
                gxm = (Real(1.0)-fracy)*(Real(1.0)-fracz)*gxm +
                    fracy*(Real(1.0)-fracz)*(sol(i,jj,k ,n)-sol(i-1,jj,k ,n)) +
                    fracz*(Real(1.0)-fracy)*(sol(i,j ,kk,n)-sol(i-1,j ,kk,n)) +
                    fracy*     fracz *(sol(i,jj,kk,n)-sol(i-1,jj,kk,n));
            }
            gx(i,j,k,n) = dxi*gxm;
        }
    });
}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void mlebabeclap_grad_y (Box const& box, Array4<Real> const& gy, Array4<Real const> const& sol,
                         Array4<Real const> const& apy, Array4<Real const> const& fcy,
                         Array4<int const> const& ccm,
                         Real dyi, int ncomp, bool phi_on_centroid) noexcept
{
    amrex::LoopConcurrent(box, ncomp, [=] (int i, int j, int k, int n) noexcept
    {
        if (apy(i,j,k) == Real(0.0)) {
            gy(i,j,k,n) = Real(0.0);
        } else if (apy(i,j,k) == Real(1.0)) {
            gy(i,j,k,n) = dyi*(sol(i,j,k,n)-sol(i,j-1,k,n));
        } else {
            Real gym = (sol(i,j,k,n) - sol(i,j-1,k,n));
            int ii = i + static_cast<int>(std::copysign(Real(1.0),fcy(i,j,k,0)));
            int kk = k + static_cast<int>(std::copysign(Real(1.0),fcy(i,j,k,1)));
            Real fracx = (ccm(ii,j-1,k) || ccm(ii,j,k)) ? std::abs(fcy(i,j,k,0)) : Real(0.0);
            Real fracz = (ccm(i,j-1,kk) || ccm(i,j,kk)) ? std::abs(fcy(i,j,k,1)) : Real(0.0);
            if (!phi_on_centroid)
            {
                gym = (Real(1.0)-fracx)*(Real(1.0)-fracz)*gym +
                    fracx*(Real(1.0)-fracz)*(sol(ii,j,k ,n)-sol(ii,j-1,k ,n)) +
                    fracz*(Real(1.0)-fracx)*(sol(i ,j,kk,n)-sol(i ,j-1,kk,n)) +
                    fracx*     fracz *(sol(ii,j,kk,n)-sol(ii,j-1,kk,n));
            }
            gy(i,j,k,n) = dyi*gym;
        }
    });
}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void mlebabeclap_grad_z (Box const& box, Array4<Real> const& gz, Array4<Real const> const& sol,
                         Array4<Real const> const& apz, Array4<Real const> const& fcz,
                         Array4<int const> const& ccm,
                         Real dzi, int ncomp, bool phi_on_centroid) noexcept
{
    amrex::LoopConcurrent(box, ncomp, [=] (int i, int j, int k, int n) noexcept
    {
        if (apz(i,j,k) == Real(0.0)) {
            gz(i,j,k,n) = Real(0.0);
        } else if (apz(i,j,k) == Real(1.0)) {
            gz(i,j,k,n) = dzi*(sol(i,j,k,n)-sol(i,j,k-1,n));
        } else {
            Real gzm = (sol(i,j,k,n) - sol(i,j,k-1,n));
            int ii = i + static_cast<int>(std::copysign(Real(1.0),fcz(i,j,k,0)));
            int jj = j + static_cast<int>(std::copysign(Real(1.0),fcz(i,j,k,1)));
            Real fracx = (ccm(ii,j,k-1) || ccm(ii,j,k)) ? std::abs(fcz(i,j,k,0)) : Real(0.0);
            Real fracy = (ccm(i,jj,k-1) || ccm(i,jj,k)) ? std::abs(fcz(i,j,k,1)) : Real(0.0);
            if (!phi_on_centroid)
            {
                gzm = (Real(1.0)-fracx)*(Real(1.0)-fracy)*gzm +
                    fracx*(Real(1.0)-fracy)*(sol(ii,j ,k,n)-sol(ii,j ,k-1,n)) +
                    fracy*(Real(1.0)-fracx)*(sol(i ,jj,k,n)-sol(i ,jj,k-1,n)) +
                    fracx*     fracy *(sol(ii,jj,k,n)-sol(ii,jj,k-1,n));
            }
            gz(i,j,k,n) = dzi*gzm;
        }
    });
}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void mlebabeclap_grad_x_0 (Box const& box, Array4<Real> const& gx, Array4<Real const> const& sol,
                           Array4<Real const> const& apx, Real dxi, int ncomp) noexcept
{
    amrex::LoopConcurrent(box, ncomp, [=] (int i, int j, int k, int n) noexcept
    {
        if (apx(i,j,k) == Real(0.0)) {
            gx(i,j,k,n) = Real(0.0);
        } else {
            gx(i,j,k,n) = dxi*(sol(i,j,k,n)-sol(i-1,j,k,n));
        }
    });
}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void mlebabeclap_grad_y_0 (Box const& box, Array4<Real> const& gy, Array4<Real const> const& sol,
                           Array4<Real const> const& apy, Real dyi, int ncomp) noexcept
{
    amrex::LoopConcurrent(box, ncomp, [=] (int i, int j, int k, int n) noexcept
    {
        if (apy(i,j,k) == Real(0.0)) {
            gy(i,j,k,n) = Real(0.0);
        } else {
            gy(i,j,k,n) = dyi*(sol(i,j,k,n)-sol(i,j-1,k,n));
        }
    });
}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void mlebabeclap_grad_z_0 (Box const& box, Array4<Real> const& gz, Array4<Real const> const& sol,
                           Array4<Real const> const& apz, Real dzi, int ncomp) noexcept
{
    amrex::LoopConcurrent(box, ncomp, [=] (int i, int j, int k, int n) noexcept
    {
        if (apz(i,j,k) == Real(0.0)) {
            gz(i,j,k,n) = Real(0.0);
        } else {
            gz(i,j,k,n) = dzi*(sol(i,j,k,n)-sol(i,j,k-1,n));
        }
    });
}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void mlebabeclap_normalize (Box const& box, Array4<Real> const& phi,
                            Real alpha, Array4<Real const> const& a,
                            Real dhx, Real dhy, Real dhz,
                            Array4<Real const> const& bX, Array4<Real const> const& bY,
                            Array4<Real const> const& bZ,
                            Array4<const int> const& ccm, Array4<EBCellFlag const> const& flag,
                            Array4<Real const> const& vfrc,
                            Array4<Real const> const& apx, Array4<Real const> const& apy,
                            Array4<Real const> const& apz,
                            Array4<Real const> const& fcx, Array4<Real const> const& fcy,
                            Array4<Real const> const& fcz,
                            Array4<Real const> const& ba, Array4<Real const> const& bc,
                            Array4<Real const> const& beb,
                            bool is_dirichlet, bool beta_on_centroid, int ncomp) noexcept
{
    amrex::Loop(box, ncomp, [=] (int i, int j, int k, int n) noexcept
    {
        if (flag(i,j,k).isRegular())
        {
            phi(i,j,k,n) /= alpha*a(i,j,k) + dhx*(bX(i,j,k,n) + bX(i+1,j,k,n))
                                           + dhy*(bY(i,j,k,n) + bY(i,j+1,k,n))
                                           + dhz*(bZ(i,j,k,n) + bZ(i,j,k+1,n));
        }
        else if (flag(i,j,k).isSingleValued())
        {
            Real kappa = vfrc(i,j,k);
            Real apxm = apx(i,j,k);
            Real apxp = apx(i+1,j,k);
            Real apym = apy(i,j,k);
            Real apyp = apy(i,j+1,k);
            Real apzm = apz(i,j,k);
            Real apzp = apz(i,j,k+1);

            Real sxm =  bX(i,j,k,n);
            if (apxm != Real(0.0) && apxm != Real(1.0) && !beta_on_centroid) {
                int jj = j + static_cast<int>(std::copysign(Real(1.0), fcx(i,j,k,0)));
                int kk = k + static_cast<int>(std::copysign(Real(1.0), fcx(i,j,k,1)));
                Real fracy = (ccm(i-1,jj,k) || ccm(i,jj,k))
                    ? std::abs(fcx(i,j,k,0)) : Real(0.0);
                Real fracz = (ccm(i-1,j,kk) || ccm(i,j,kk))
                    ? std::abs(fcx(i,j,k,1)) : Real(0.0);
                sxm = (Real(1.0)-fracy)*(Real(1.0)-fracz)*sxm;
            }

            Real sxp = -bX(i+1,j,k,n);
            if (apxp != Real(0.0) && apxp != Real(1.0) && !beta_on_centroid) {
                int jj = j + static_cast<int>(std::copysign(Real(1.0),fcx(i+1,j,k,0)));
                int kk = k + static_cast<int>(std::copysign(Real(1.0),fcx(i+1,j,k,1)));
                Real fracy = (ccm(i,jj,k) || ccm(i+1,jj,k))
                    ? std::abs(fcx(i+1,j,k,0)) : Real(0.0);
                Real fracz = (ccm(i,j,kk) || ccm(i+1,j,kk))
                    ? std::abs(fcx(i+1,j,k,1)) : Real(0.0);
                sxp = (Real(1.0)-fracy)*(Real(1.0)-fracz)*sxp;
            }

            Real sym =  bY(i,j,k,n);
            if (apym != Real(0.0) && apym != Real(1.0) && !beta_on_centroid) {
                int ii = i + static_cast<int>(std::copysign(Real(1.0),fcy(i,j,k,0)));
                int kk = k + static_cast<int>(std::copysign(Real(1.0),fcy(i,j,k,1)));
                Real fracx = (ccm(ii,j-1,k) || ccm(ii,j,k))
                    ? std::abs(fcy(i,j,k,0)) : Real(0.0);
                Real fracz = (ccm(i,j-1,kk) || ccm(i,j,kk))
                    ? std::abs(fcy(i,j,k,1)) : Real(0.0);
                sym = (Real(1.0)-fracx)*(Real(1.0)-fracz)*sym;
            }

            Real syp = -bY(i,j+1,k,n);
            if (apyp != Real(0.0) && apyp != Real(1.0) && !beta_on_centroid) {
                int ii = i + static_cast<int>(std::copysign(Real(1.0),fcy(i,j+1,k,0)));
                int kk = k + static_cast<int>(std::copysign(Real(1.0),fcy(i,j+1,k,1)));
                Real fracx = (ccm(ii,j,k) || ccm(ii,j+1,k))
                    ? std::abs(fcy(i,j+1,k,0)) : Real(0.0);
                Real fracz = (ccm(i,j,kk) || ccm(i,j+1,kk))
                    ? std::abs(fcy(i,j+1,k,1)) : Real(0.0);
                syp = (Real(1.0)-fracx)*(Real(1.0)-fracz)*syp;
            }

            Real szm =  bZ(i,j,k,n);
            if (apzm != Real(0.0) && apzm != Real(1.0) && !beta_on_centroid) {
                int ii = i + static_cast<int>(std::copysign(Real(1.0),fcz(i,j,k,0)));
                int jj = j + static_cast<int>(std::copysign(Real(1.0),fcz(i,j,k,1)));
                Real fracx = (ccm(ii,j,k-1) || ccm(ii,j,k))
                    ? std::abs(fcz(i,j,k,0)) : Real(0.0);
                Real fracy = (ccm(i,jj,k-1) || ccm(i,jj,k))
                    ? std::abs(fcz(i,j,k,1)) : Real(0.0);
                szm = (Real(1.0)-fracx)*(Real(1.0)-fracy)*szm;
            }

            Real szp = -bZ(i,j,k+1,n);
            if (apzp != Real(0.0) && apzp != Real(1.0) && !beta_on_centroid) {
                int ii = i + static_cast<int>(std::copysign(Real(1.0),fcz(i,j,k+1,0)));
                int jj = j + static_cast<int>(std::copysign(Real(1.0),fcz(i,j,k+1,1)));
                Real fracx = (ccm(ii,j,k) || ccm(ii,j,k+1))
                    ? std::abs(fcz(i,j,k+1,0)) : Real(0.0);
                Real fracy = (ccm(i,jj,k) || ccm(i,jj,k+1))
                    ? std::abs(fcz(i,j,k+1,1)) : Real(0.0);
                szp = (Real(1.0)-fracx)*(Real(1.0)-fracy)*szp;
            }

            Real vfrcinv = Real(1.0)/kappa;
            Real gamma = alpha*a(i,j,k) + vfrcinv *
                (dhx*(apxm*sxm-apxp*sxp) +
                 dhy*(apym*sym-apyp*syp) +
                 dhz*(apzm*szm-apzp*szp));

            if (is_dirichlet) {
                Real dapx = apxm-apxp;
                Real dapy = apym-apyp;
                Real dapz = apzm-apzp;
                Real anorm = std::sqrt(dapx*dapx+dapy*dapy+dapz*dapz);
                Real anorminv = Real(1.0)/anorm;
                Real anrmx = dapx * anorminv;
                Real anrmy = dapy * anorminv;
                Real anrmz = dapz * anorminv;
                Real bctx = bc(i,j,k,0);
                Real bcty = bc(i,j,k,1);
                Real bctz = bc(i,j,k,2);
                Real dx_eb = get_dx_eb(vfrc(i,j,k));

                Real dg = dx_eb / amrex::max(std::abs(anrmx),std::abs(anrmy),
                                             std::abs(anrmz));

                Real gx = bctx - dg*anrmx;
                Real gy = bcty - dg*anrmy;
                Real gz = bctz - dg*anrmz;
                Real sx = std::copysign(Real(1.0),anrmx);
                Real sy = std::copysign(Real(1.0),anrmy);
                Real sz = std::copysign(Real(1.0),anrmz);

                gx *= sx;
                gy *= sy;
                gz *= sz;
                Real gxy = gx*gy;
                Real gxz = gx*gz;
                Real gyz = gy*gz;
                Real gxyz = gx*gy*gz;
                Real phig_gamma = (Real(1.0)+gx+gy+gz+gxy+gxz+gyz+gxyz);
                Real feb_gamma = -phig_gamma/dg * ba(i,j,k) * beb(i,j,k,n);
                gamma += vfrcinv*(-dhx)*feb_gamma;
            }

            phi(i,j,k,n) /= gamma;
        }
    });
}

}

#endif
