! Copyright 2024
!
! For a comprehensive list of the developers that contributed to these codes
! see the UK-AMOR website.
!
! This file is part of UKRmol-out (UKRmol+ suite).
!
!     UKRmol-out is free software: you can redistribute it and/or modify
!     it under the terms of the GNU General Public License as published by
!     the Free Software Foundation, either version 3 of the License, or
!     (at your option) any later version.
!
!     UKRmol-out is distributed in the hope that it will be useful,
!     but WITHOUT ANY WARRANTY; without even the implied warranty of
!     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!     GNU General Public License for more details.
!
!     You should have received a copy of the GNU General Public License
!     along with  UKRmol-out (in source/COPYING). Alternatively, you can also visit
!     <https://www.gnu.org/licenses/>.
!
!> \brief   Asymptotic approximation of multi-photon matrix elements
!> \author  J Benda
!> \date    2024
!>
module multidip_asy

    use precisn_gbl, only: wp

    implicit none

contains

    !> \brief   Hydrogen 1s one-photon ionization
    !> \author  J Benda
    !> \date    2024
    !>
    !> Evaluate the radial factor in the one-photon ionization amplitude of H(1s). This can be expressed as
    !> \f[
    !>      M^{(1)} = \int_0^\infty F_l(k, r) r P_{10}(r) dr \,.
    !> \f]
    !>
    real(wp) function M1_H1s (l, k) result (M1)

        use multidip_params,  only: pi
        use multidip_special, only: complex_gamma

        integer,  intent(in) :: l
        real(wp), intent(in) :: k

        real(wp) :: Cl, nu

        nu = 2

        Cl = 2**l * exp(0.5*pi/k) * abs(complex_gamma(l, -1/k)) / real(complex_gamma(2*l + 1, 0._wp))
        M1 = 2*(nu - 1._wp/(l + 1))*gamma(2*l + 3._wp)*Cl*k**(l + 1)/(nu*nu + k*k)**(l + 2) * exp(-2/k * atan(k/nu))

    end function M1_H1s


    !> \brief   Near-field part of the continuum-continuum transition integral
    !> \authors J Benda
    !> \date    2024
    !>
    !> Numerically evaluates the integral
    !> \f[
    !>      I_{cc}(a) = \int\limits_0^a F_l(k,r) r^b H_\lambda^+(\kappa,r) dr \,.
    !> \f]
    !>
    !> This function splits the integral to two pieces at the classical turning point \f$ a_0 \f$ of the Coulomb functions.
    !> The non-oscillatory, classically forbidden part \f$ (0, a_0) \f$ is integrated using Romberg quadrature, while
    !> the remainder \f$ (a_0, a) \f$ is integrated using Levin quadrature.
    !>
    complex(wp) function Icca (Z, a, b, k, l, kappa, lambda) result (val)

        use multidip_levin,   only: levin_adapt
        use multidip_params,  only: imu, pi
        use multidip_special, only: coul

        integer,  intent(in) :: lambda, l, b
        real(wp), intent(in) :: Z, a, kappa, k

        integer,  parameter :: mmax = 15
        logical,  parameter :: use_levin = .true.
        real(wp), parameter :: epsrel = 1e-6

        integer     :: i, m, n
        real(wp)    :: Ff, Ffp, Gf, Gfp, Fn, Fnp, Gn, Gnp, r, dr, a0
        complex(wp) :: romb(0:mmax, 0:mmax), res

        a0 = a  ! Romberg quadrature endpoint
        val = 0 ! value to be returned

        ! optionally restrict Romberg to the classically forbidden region
        if (use_levin) then

            if (lambda == 0 .and. l == 0) then
                a0 = pi/(2*min(kappa, k))
            else
                a0 = (sqrt(kappa**2*max(lambda, l)*max(lambda + 1, l + 1) + Z**2) - Z)/kappa**2
            end if

            if (a0 >= a) then
                a0 = a
            else
                val = levin_adapt(Z, a0, a, 0._wp, b, 0, +1, l, lambda, cmplx(k, 0, wp), cmplx(kappa, 0, wp))
            end if

        end if

        ! use Romberg quadrature in the classically forbidden region (if any)
        if (a0 > 0) then

            ! evaluate Coulomb functions at r = a0
            call coul(Z, lambda, kappa**2/2, a0, Fn, Fnp, Gn, Gnp)
            call coul(Z, l, k**2/2, a0, Ff, Ffp, Gf, Gfp)

            ! initialize using endpoints (r = 0 and r = a0)
            res = Ff * a0**b * (Gn + imu*Fn)/2
            romb = 0
            romb(0, 0) = res*a0

            ! integrate numerically up to r = a0 using Romberg method
            richardson_iterations: do m = 1, mmax

                ! define grid spacing at this subdivision level
                n = 2**m
                dr = a0/n

                ! apply nested trapezoidal quadrature rule
                do i = 1, n, 2
                    r = i*dr
                    call coul(Z, lambda, kappa**2/2, r, Fn, Fnp, Gn, Gnp)
                    call coul(Z, l, k**2/2, r, Ff, Ffp, Gf, Gfp)
                    res = res + r**b * Ff * (Gn + imu*Fn)
                end do

                ! update Richardson extrapolation table
                romb(m, 0) = val + res*dr
                do n = 1, m
                    do i = n, m
                        romb(i, n) = (4**n*romb(i, n - 1) - romb(i - 1, n - 1))/(4**n - 1)
                    end do
                end do

                ! exit if converged
                if (m >= 5) then
                    if (abs(romb(m, m) - romb(m - 1, m - 1)) < epsrel*abs(romb(m, m))) then
                        val = romb(m, m)
                        exit richardson_iterations
                    end if
                end if

                ! exit if non-converged
                if (m == mmax) then
                    print '(a,i0,a)', 'Warning: Romberg quadrature failed to converge in ', mmax, ' subdivisions.'
                    val = romb(m, m)
                end if

            end do richardson_iterations

        end if

    end function Icca


    !> \brief   Near-field part of the double continuum-continuum transition integral
    !> \authors J Benda
    !> \date    2024
    !>
    !> Numerically evaluate the integral
    !> \f[
    !>      I = \int\limits_0^a \int\limits_0^a F_{l_f}(k_f,r) r g_{l_n}^{(+)}(k_n; r, r') r' H_{l_i}^+(r') dr' \,.
    !> \f]
    !>
    complex(wp) function I2cca (Z, a, kf, lf, kn, ln, ki, li) result (val)

        use multidip_special, only: coulH

        real(wp), intent(in) :: Z, a, ki, kn, kf
        integer,  intent(in) :: li, ln, lf

        real(wp), parameter :: c = 0        ! damping factor
        integer,  parameter :: mmax = 13    ! log2 of the maximal number of 2D quadrature points in each dimension

        real(wp),    allocatable :: Ff(:), Fn(:)
        complex(wp), allocatable :: Hn(:), Hi(:)

        integer     :: i, j, qi, qj, m, n, stride
        complex(wp) :: Hlf, Hln, Hli, romb(0:mmax, 0:mmax)
        real(wp)    :: dr, ri, rj, wt

        ! allocate workspaces for the double integral
        !   - the zeroth index corresponds to left boundary of the integration domain where the integrand is zero, so it is not used
        !     neither allocated
        !   - the last index corresponds to the right boundary
        allocate (Ff(2**mmax), Fn(2**mmax), Hn(2**mmax), Hi(2**mmax))

        ! obtain all needed function values
        do i = 1, 2**mmax
            ri = i*a/2**mmax
            Hlf = coulH(Z, +1, lf, kf*kf/2, ri)
            Hln = coulH(Z, +1, ln, kn*kn/2, ri)
            Hli = coulH(Z, +1, li, ki*ki/2, ri)
            Ff(i) = aimag(Hlf)
            Fn(i) = aimag(Hln)
            Hn(i) = Hln
            Hi(i) = Hli
        end do

        ! loop over all subdivision levels for the double integral
        val = 0
        romb = 0
        do m = 0, mmax

            ! define grid spacing at this subdivision level
            n = 2**m
            stride = 2**(mmax - m)
            dr = a/n

            ! apply nested 2D trapezoidal quadrature rule
            do i = 1, n
                ri = i*dr
                qi = i*stride
                do j = i, n
                    rj = j*dr
                    qj = j*stride
                    if (mod(i, 2) /= 0 .or. mod(j, 2) /= 0) then
                        wt = 1._wp/(1 + merge(1, 0, i == n) + merge(1, 0, j == n))
                        val = val + wt*Ff(qi)*ri*Fn(qi)*Hn(qj)*rj*Hi(qj)
                        if (i /= j) then
                            val = val + wt*Ff(qj)*rj*Hn(qj)*Fn(qi)*ri*Hi(qi)
                        end if
                    end if
                end do
            end do

            ! update Richardson extrapolation table
            romb(m, 0) = val*dr*dr
            do n = 1, m
                do i = n, m
                    romb(i, n) = (4**n*romb(i, n - 1) - romb(i - 1, n - 1))/(4**n - 1)
                end do
            end do

        end do

        val = -2/kn*romb(mmax, mmax)

    end function I2cca


    !> \brief   Far-field part of the continuum-continuum transition integral
    !> \authors J Benda
    !> \date    2024
    !>
    !> Asymptotically evaluates the integral
    !> \f[
    !>      A_{cc}(a) = \int\limits_a^{+\infty} F_l(k,r) r^b H_\lambda^+(\kappa,r) dr \,.
    !> \f]
    !>
    complex(wp) function Acca (Z, a, b, k, l, kappa, lambda) result (val)

        use multidip_integ,  only: nested_coul_integ
        use multidip_params, only: imu

        integer,  intent(in) :: lambda, l, b
        real(wp), intent(in) :: Z, a, kappa, k

        real(wp), parameter :: c = 0

        val = (nested_coul_integ(Z, a, c, 1, [ b ], [+1, +1], [l, lambda], [cmplx(k, 0, wp), cmplx(kappa, 0, wp)]) &
             - nested_coul_integ(Z, a, c, 1, [ b ], [-1, +1], [l, lambda], [cmplx(k, 0, wp), cmplx(kappa, 0, wp)])) / (2*imu)

    end function Acca


    !> \brief   Far-field part of the double continuum-continuum transition integral
    !> \authors J Benda
    !> \date    2024
    !>
    !> Asymptotically evaluates the integral
    !> \f[
    !>      A_{cc}(a) = \int\limits_a^{+\infty} \int\limits_a^{+\infty}
    !>                   F_{l_f}(k_f,r) r g_{l_n}^{(+)}(k_n; r, r') r' H_{l_i}^+(k_i,r') dr dr' \,.
    !> \f]
    !>
    complex(wp) function A2cca (Z, a, kf, lf, kn, ln, ki, li) result (val)

        use multidip_integ,  only: nested_cgreen_integ
        use multidip_params, only: imu

        integer,  intent(in) :: li, ln, lf
        real(wp), intent(in) :: Z, ki, kn, kf, a

        real(wp), parameter :: c = 0

        integer     :: ls(3)
        complex(wp) :: ks(3)

        ks(1) = cmplx(ki, 0, wp);  ls(1) = li
        ks(2) = cmplx(kn, 0, wp);  ls(2) = ln
        ks(3) = cmplx(kf, 0, wp);  ls(3) = lf

        val = (nested_cgreen_integ(Z, a, a, c, 2, +1, +1, [ 1, 1 ], ls, ks) &
             - nested_cgreen_integ(Z, a, a, c, 2, +1, -1, [ 1, 1 ], ls, ks)) / (2*imu)

    end function A2cca


    !> \brief   Continuum-continuum transition integral
    !> \authors J Benda
    !> \date    2024
    !>
    !> Calculate integral of regular Coulomb function times r^b times the outgoing Coulomb-Hankel function:
    !> \f[
    !>      I_{cc} = \int\limits_0^{+\infty} F_l(k,r) r^b H_\lambda^+(\kappa,r) dr \,.
    !> \f]
    !>
    complex(wp) function Icc (Z, b, k, l, kappa, lambda) result (val)

        use multidip_params, only: imu

        integer,  intent(in) :: lambda, l, b
        real(wp), intent(in) :: Z, kappa, k

        real(wp), parameter :: a = 200

        val = Icca(Z, a, b, k, l, kappa, lambda) + Acca(Z, a, b, k, l, kappa, lambda)

    end function Icc


    !> \brief   Two-dimensional continuum-continuum transition integral
    !> \authors J Benda
    !> \date    2024
    !>
    !> Calculate the integral
    !> \f[
    !>      I_{cc} = \int\limits_0^{+\infty}\int\limits_0^{+\infty}
    !>               F_{l_f}(k_f,r) r g_{l_n}^{(+)}(k_n;r,r') r' H_{l_i}^+(k_i,r') dr dr' = I_1 + I_2 + I_3 + I_4
    !> \f]
    !> by separation into four contributions. The near-field region (\f$ r < a, r' < a \f$) is integrated numerically,
    !> \f[
    !>      I_1 = \int\limits_0^a \int\limits_0^a F_{l_f}(k_f,r) r g_{l_n}^{(+)}(k_n;r,r') r' H_{l_i}^+(k_i,r') dr dr' \,,
    !> \f]
    !> the far-field region is integrated asymptotically,
    !> \f[
    !>      I_2 = \int\limits_a^{+\infty} \int\limits_a^{+\infty} F_{l_f}(k_f,r)rg_{l_n}^{(+)}(k_n;r,r')r'H_{l_i}^+(k_i,r') drdr'\,,
    !> \f]
    !> and the mixed regions factorize into one-dimensional integrals
    !> \f[
    !>      I_3 = -\frac{2}{k_n} \int\limits_a^{+\infty} F_{l_f}(k_f,r) r H_{l_n}^+(k_n,r) dr
    !>                           \int\limits_0^a         F_{l_n}(k_n,r') r' H_{l_i}^+(k_n,r') dr' \,,
    !> \f]
    !> \f[
    !>      I_4 = -\frac{2}{k_n} \int\limits_0^a         F_{l_f}(k_f,r) r F_{l_n}(k_n,r) dr
    !>                           \int\limits_0^{+\infty} H_{l_n}^+(k_n,r') r' H_{l_i}^+(k_n,r') dr' \,.
    !> \f]
    !>
    complex(wp) function I2cc (Z, kf, lf, kn, ln, ki, li) result (val)

        use multidip_integ,  only: nested_coul_integ
        use multidip_params, only: imu

        integer,  intent(in) :: li, ln, lf
        real(wp), intent(in) :: Z, ki, kn, kf

        real(wp), parameter :: a = 200, b = 250, c = 0
        logical,  parameter :: debug = .false.

        complex(wp) :: I2cca_fni, A2cca_fni, Icca_fn, Iccb_fn, Icca_ni, Iccb_ni, Acca_fn, Acca_ni, A2ccb_fni, Accb_fn, Accb_ni
        complex(wp) :: I1, I2, I3, I4

        I2cca_fni = I2cca(Z, a, kf, lf, kn, ln, ki, li)
        A2cca_fni = A2cca(Z, a, kf, lf, kn, ln, ki, li)

        Icca_fn = aimag(Icca(Z, a, 1, kf, lf, kn, ln))
        Icca_ni = Icca(Z, a, 1, kn, ln, ki, li)

        Acca_fn = (nested_coul_integ(Z, a, c, 1, [ 1 ], [+1, +1], [ln, lf], [cmplx(kn, 0, wp), cmplx(kf, 0, wp)]) &
                 - nested_coul_integ(Z, a, c, 1, [ 1 ], [+1, -1], [ln, lf], [cmplx(kn, 0, wp), cmplx(kf, 0, wp)])) / (2*imu)
        Acca_ni =  nested_coul_integ(Z, a, c, 1, [ 1 ], [+1, +1], [li, ln], [cmplx(ki, 0, wp), cmplx(kn, 0, wp)])

        I1 = I2cca_fni
        I2 = A2cca_fni
        I3 = -2/kn*Acca_fn*Icca_ni
        I4 = -2/kn*Icca_fn*Acca_ni

        if (debug) then
            Iccb_fn = aimag(Icca(Z, b, 1, kf, lf, kn, ln))
            Iccb_ni = Icca(Z, b, 1, kn, ln, ki, li)
            A2ccb_fni = A2cca(Z, b, kf, lf, kn, ln, ki, li)
            Accb_fn = (nested_coul_integ(Z, b, c, 1, [ 1 ], [+1, +1], [ln, lf], [cmplx(kn, 0, wp), cmplx(kf, 0, wp)]) &
                     - nested_coul_integ(Z, b, c, 1, [ 1 ], [+1, -1], [ln, lf], [cmplx(kn, 0, wp), cmplx(kf, 0, wp)])) / (2*imu)
            Accb_ni =  nested_coul_integ(Z, b, c, 1, [ 1 ], [+1, +1], [li, ln], [cmplx(ki, 0, wp), cmplx(kn, 0, wp)])
            I2 = A2cca_fni - A2ccb_fni + 2/kn*(Iccb_fn - Icca_fn)*Accb_ni + 2/kn*(Iccb_ni - Icca_ni)*Accb_fn
            I3 = -2/kn*(Acca_fn - Accb_fn)*Icca_ni
            I4 = -2/kn*Icca_fn*(Acca_ni - Accb_ni)
        end if

        val = I1 + I2 + I3 + I4

        if (debug) then
            print '(a)', 'Calculating I2cc'
            print '(4x,f7.4,i4)', ki, li
            print '(4x,f7.4,i4)', kn, ln
            print '(4x,f7.4,i4)', kf, lf
            print '(4x,a,*(e25.15))', 'I1     ', I1
            print '(4x,a,*(e25.15))', 'I2     ', I2
            print '(4x,a,*(e25.15))', 'I3     ', I3
            print '(4x,a,*(e25.15))', 'I4     ', I4
            print '(4x,a,*(e25.15))', 'val    ', val
        end if

    end function I2cc


    !> \brief   Partial-wave-dependent asymptotic correction
    !> \authors J Benda
    !> \date    2024
    !>
    !> Approximate correcting factor for calculation of two-photon matrix element from a one-photon matrix element.
    !>
    complex(wp) function Akkl (kappa, lambda, k, l, b)

        use multidip_params,  only: imu
        use multidip_special, only: cphase

        integer,  intent(in) :: lambda, l, b
        real(wp), intent(in) :: kappa, k

        real(wp) :: sigma, Z = 1

        sigma = cphase(Z, l, k) - cphase(Z, lambda, kappa)
        Akkl = 2/sqrt(kappa*k) * imu**(lambda - l) * exp(imu*sigma) * Icc(Z, b, k, l, kappa, lambda)

    end function Akkl


    !> \brief   Third-order asymptotic correction
    !> \authors J Benda
    !> \date    2024
    !>
    complex(wp) function Akkl3 (ki, li, kn, ln, kf, lf)

        use multidip_params,  only: imu
        use multidip_special, only: cphase

        integer,  intent(in) :: li, ln, lf
        real(wp), intent(in) :: ki, kn, kf

        real(wp) :: sigma, Z = 1

        sigma = cphase(Z, lf, kf) - cphase(Z, li, ki)
        Akkl3 = 2/sqrt(kf*ki) * imu**(li - lf) * exp(imu*sigma) * I2cc(Z, kf, lf, kn, ln, ki, li)

    end function Akkl3


    !> \brief   Prefactor in asymptotic 2-photon matrix element
    !> \author  J Benda
    !> \date    2021 - 2024
    !>
    !> Target-independent complex prefactor in the 2-photon ionization matrix element as obtained
    !> in the asymptotic theory of Dahlström et al (2013), including the long-range amplitude correction.
    !>
    complex(wp) function Akk (kappa, k, l, cc)

        use multidip_params,  only: imu, pi
        use multidip_special, only: complex_gamma, cphase

        real(wp), intent(in) :: kappa, k
        integer,  intent(in) :: l, cc

        complex(wp) :: G
        real(wp)    :: eta, Z = 1

        if (cc == 2) then
            ! hydrogen s-p-s correction (or s-p-p if l = 0)
            Akk = 2/kappa * M2_H1s(l, 1, kappa, 1 - l, k) / (exp(imu*cphase(Z, 1, kappa)) * M1_H1s(1, kappa))
            return
        end if

        if (cc == 3) then
            ! exact continuum-continuum transition integral for p-s pathway
            Akk = Akkl(kappa, 1, k, 0, l)
            return
        end if

        eta = Z/kappa - Z/k
        G = complex_gamma(l, eta)

        Akk = exp(-eta*pi/2)/sqrt(kappa*k) * (imu/(kappa - k))**(l + 1) &
            * (2*kappa)**(imu/kappa)/(2*k)**(imu/k) * G / (kappa - k)**(imu*eta)

        if (cc == 1) then
            ! long-range amplitude correction
            Akk = Akk * (1 + imu/2*(1/kappa**2 + 1/k**2)*(kappa - k)/cmplx(1, 1/kappa - 1/k, wp))
        end if

    end function Akk


    !> \brief   Two-photon ionization of hydrogen atom
    !> \authors J Benda
    !> \date    2024
    !>
    !> Evaluate the full partial amplitude of two-photon ionization of hydrogen atom from the ground state via the
    !> s→p→lf angular momentum pathway. The result is proportional to a double indefinite integral, but one of the
    !> integrations is effectively definite due to the exponential decay of the hydrogen bound state wave function.
    !> Disregarding some normalization factors we have
    !> \f[
    !>      M_l^{(2)} = i^{-l} e^{i\sigma_l} \int_0^\infty \int_0^\infty F_l(r) r^b F_1(r_<) H_1^+(r_>) r' P_{10}(r') dr dr'.
    !> \f]
    !> The integral can be decomposed into two contributions, the finite-range double integral
    !> \f[
    !>      I = \int_0^a \int_0^a F_l(r) r^b F_1(r_<) H_1^+(r_>) r' P_{10}(r') dr dr',
    !> \f]
    !> which is integrated by means of a two-dimensional Romberg quadrature, and the product of two one-dimensional
    !> integrals
    !> \f[
    !>      J = \int_a^\infty F_l(r) r^b H_1^+(r) dr  \int_0^\infty F_1(r) r P_{10}(r) dr ,
    !> \f]
    !> of which the first is integrated by asymptotic integration routine from MULTIDIP and the second has a closed
    !> form. The radius "a" has to be sufficiently large so that the electron density of the bound state vanishes and
    !> also to make sure that the asymptotic integration gives meaningful results. In the present implementation it
    !> is set to 100 atomic units.
    !>
    complex(wp) function M2_H1s (b, ln, kn, lf, kf) result (M2)

        use multidip_integ,   only: nested_coul_integ
        use multidip_params,  only: imu
        use multidip_special, only: coulH, cphase

        real(wp), intent(in) :: kn, kf
        integer,  intent(in) :: b, ln, lf

        real(wp), parameter :: a = 200      ! inner region size
        real(wp), parameter :: c = 0        ! damping factor
        integer,  parameter :: mmax = 13    ! log2 of the maximal number of 2D quadrature points in each dimension

        real(wp), allocatable :: Flf(:), F1n(:), G1n(:), P1s(:)

        integer     :: i, j, qi, qj, m, n, stride
        complex(wp) :: IA, II, IJ, Hlf, H1n, romb(0:mmax, 0:mmax)
        real(wp)    :: IB, dr, ri, rj, wt, nu, Z = 1

        nu = 2

        ! evaluate the separated part
        IA = (nested_coul_integ(Z, a, c, 1, [ b ], [+1, +1], [lf, ln], [cmplx(kf, 0, wp), cmplx(kn, 0, wp)]) &
            - nested_coul_integ(Z, a, c, 1, [ b ], [-1, +1], [lf, ln], [cmplx(kf, 0, wp), cmplx(kn, 0, wp)])) / (2*imu)
        IB = M1_H1s(ln, kn)
        IJ = IA*IB

        ! allocate workspaces for the double integral
        !   - the zeroth index corresponds to left boundary of the integration domain where the integrand is zero, so it is not used
        !     neither allocated
        !   - the last index corresponds to the right boundary
        allocate (Flf(2**mmax), F1n(2**mmax), G1n(2**mmax), P1s(2**mmax))

        ! obtain all needed function values
        do i = 1, 2**mmax
            ri = i*a/2**mmax
            Hlf = coulH(Z, +1, lf, kf*kf/2, ri)
            H1n = coulH(Z, +1, ln, kn*kn/2, ri)
            Flf(i) = aimag(Hlf)
            F1n(i) = aimag(H1n)
            G1n(i) = real(H1n)
            P1s(i) = 2 * ri**ln * exp(-nu*ri)
        end do

        ! loop over all subdivision levels for the double integral
        II = 0
        romb = 0
        do m = 0, mmax

            ! define grid spacing at this subdivision level
            n = 2**m
            stride = 2**(mmax - m)
            dr = a/n

            ! apply nested 2D trapezoidal quadrature rule
            do i = 1, n
                ri = i*dr
                qi = i*stride
                do j = i, n
                    rj = j*dr
                    qj = j*stride
                    if (mod(i, 2) /= 0 .or. mod(j, 2) /= 0) then
                        wt = 1._wp/(1 + merge(1, 0, i == n) + merge(1, 0, j == n))
                        II = II + wt*Flf(qi)*ri**b*F1n(qi)*(G1n(qj) + imu*F1n(qj))*rj*P1s(qj)
                        if (i /= j) then
                            II = II + wt*Flf(qj)*rj**b*(G1n(qj) + imu*F1n(qj))*F1n(qi)*ri*P1s(qi)
                        end if
                    end if
                end do
            end do

            ! update Richardson extrapolation table
            romb(m, 0) = II*dr*dr
            do n = 1, m
                do i = n, m
                    romb(i, n) = (4**n*romb(i, n - 1) - romb(i - 1, n - 1))/(4**n - 1)
                end do
            end do

        end do

        II = romb(mmax, mmax)
        M2 = imu**(-lf) * exp(imu*cphase(Z, lf, kf)) * (II + IJ)

    end function M2_H1s

end module multidip_asy
