! Copyright 2021
!
! For a comprehensive list of the developers that contributed to these codes
! see the UK-AMOR website.
!
! This file is part of UKRmol-out (UKRmol+ suite).
!
!     UKRmol-out is free software: you can redistribute it and/or modify
!     it under the terms of the GNU General Public License as published by
!     the Free Software Foundation, either version 3 of the License, or
!     (at your option) any later version.
!
!     UKRmol-out is distributed in the hope that it will be useful,
!     but WITHOUT ANY WARRANTY; without even the implied warranty of
!     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!     GNU General Public License for more details.
!
!     You should have received a copy of the GNU General Public License
!     along with  UKRmol-out (in source/COPYING). Alternatively, you can also visit
!     <https://www.gnu.org/licenses/>.
!
!> \brief   Levin quadrature for numerical integration
!> \author  J Benda
!> \date    2021
!>
!> This module implements the efficient Levin quadrature for numerical integration of oscillatory functions. It is used
!> by multidip_integ to integrate products of Coulomb-Hankel functions. Its advantage lies particularly in the fact that
!> it only requires evaluation of the Coulomb functions at the endpoints of the integration range; within the integration
!> range it works with recurrence formulas only. Given how expensive the evaluation of the Coulomb-Hankel function is,
!> using Levin integration in place of the straightforward Romberg integration speeds up the calculations by several
!> orders of magnitude.
!>
!> See the following papers for more details on the theory:
!> - D. Levin, *Fast integration of rapidly oscillatory functions*, J. Comput. Appl. Math. **67** (1996) 95-101.
!> - J. L. Powell, *Recurrence formulas for Coulomb functions*, Phys. Rev. **72** (1947) 626.
!>
module multidip_levin

    use precisn_gbl,    only: wp

    implicit none

contains

    !> \brief   Numerically correct asymptotic approximation of the Coulomb-Green integral (Levin integration)
    !> \author  J Benda
    !> \date    2021 - 2023
    !>
    !> This function computes the integral of the Coulomb-Green's integrand over Q = (a,+∞)^N \ (b,+∞)^N numerically.
    !> Currently, Levin integration based on the Chebyshev interpolation is used.
    !>
    !> \param Z  Residual ion charge
    !> \param a  Lower bound
    !> \param b  Upper bound
    !> \param c  Damping factor (additional exp(-c*r) added to all r^m functions)
    !> \param N  Dimension (number of integration variables)
    !> \param sa Sign of the right-most Coulomb-Hankel function
    !> \param sb Sign of the left-most Coulomb-Hankel function
    !> \param m  Array of integer powers of length N
    !> \param l  Array of angular momenta of length N + 1
    !> \param k  Array of linear momenta of length N + 1
    !> \param v  Value of the asymptotic integral to correct (updated on exit from this subroutine)
    !>
    !> \warning This is currently only implemented for one-dimensional case containing no
    !>          Green's function at all.
    !>
    !> \todo Generalize for arbitrary dimension!
    !>
    subroutine nested_cgreen_correct_levin (Z, a, b, c, N, sa, sb, m, l, k, v)

        integer,     intent(in)    :: N, sa, sb, m(:), l(:)
        real(wp),    intent(in)    :: Z, a, b, c
        complex(wp), intent(in)    :: k(:)
        complex(wp), intent(inout) :: v

        ! WARNING: Implemented for 1D integrals only
        if (N == 1) then
            v = v + levin_adapt(Z, a, b, c, m(1), sa, sb, l(1), l(2), k(1), k(2))
        end if

    end subroutine nested_cgreen_correct_levin


    !> \brief   One-dimensional adaptive Levin integration
    !> \author  J Benda
    !> \date    2021 - 2023
    !>
    !> As needed, recursively subdivide the complete integration range and apply a fixed-order Levin quadrature to the
    !> subintervals until further subdivision changes the resulting integral only a little (within tolerance).
    !>
    !> A scheme illustrating the indexing of nodes at different subdivision levels:
    !>
    !> \verbatim
    !>  (ra)                                    (rb)
    !>   1                                       0
    !>   |---------------------------------------|              (depth 0)
    !>
    !>   2                   3                   0
    !>   |-------------------|-------------------|              (depth 1)
    !>
    !>   4         5         6         7         0
    !>   |---------|---------|---------|---------|              (depth 2)
    !>
    !>   8    9   10   11   12   13   14   15    0
    !>   |----|----|----|----|----|----|----|----|              (depth 3)
    !>
    !>   ... etc ...
    !> \endverbatim
    !>
    !> The advantage of this scheme is that Nodes whose indices differ only by an even multiplicative factor are equivalent
    !> (collocated) and can be uniquely indexed by the further irreducible odd number obtained by repeated division by two.
    !> The repeated division by two can be efficiently implemented as a right bit shift (`shiftr`) by offset given by the number
    !> of trailing zeros (`trailz`) in the binary representation of the index, often together amounting to just two machine
    !> instructions.
    !>
    !> Intervals have the same index as their left point. That is, the top-level interval is the interval 1, consisting of two
    !> subintervals 2 and 3, each of which is further composed of further subintervals.
    !>
    !> The positive-energy Coulomb wave functions are evaluated only in nodes adjacent to intervals where the integral has not
    !> yet converged. The negative-energy Coulomb functions as well as the rest of the integrand is repeatedly evaluated in
    !> Chebyshev nodes within each subinterval, see levin_integrate_2x2 and levin_integrate_4x4.
    !>
    !> \param Z  Residual ion charge
    !> \param ra Lower bound
    !> \param rb Upper bound
    !> \param c  Damping factor (additional exp(-c*r) added to all r^m functions)
    !> \param m  Radial coordinate power
    !> \param s1 Sign of the first Coulomb-Hankel function
    !> \param s2 Sign of the second Coulomb-Hankel function
    !> \param l1 Angular momentum of the first Coulomb-Hankel function
    !> \param l2 Angular momentum of the second Coulomb-Hankel function
    !> \param k1 Complex momentum of the first Coulomb-Hankel function
    !> \param k2 Complex momentum of the second Coulomb-Hankel function
    !>
    complex(wp) function levin_adapt (Z, ra, rb, c, m, s1, s2, l1, l2, k1, k2) result (integ)

        use multidip_params, only: max_levin_level

        real(wp),    intent(in) :: Z, ra, rb, c
        complex(wp), intent(in) :: k1, k2
        integer,     intent(in) :: m, s1, s2, l1, l2

        integer, parameter :: max_intervals = 2**max_levin_level

        integer     :: depth
        logical     :: converged(max_intervals)
        complex(wp) :: estimates(max_intervals)
        complex(wp) :: Hl_buffer(2, 2, max_intervals + 1)

        ! at the beginning the integral over the whole integration range is definitely not converged
        converged(1) = .false.

        ! for all binary subdivision orders
        do depth = 0, max_levin_level - 1

            ! precalculate Coulomb function in subinterval endpoints for this subdivision of the integration interval
            call levin_prepare(Z, ra, rb, s1, s2, l1, l2, k1, k2, depth, converged, Hl_buffer)

            ! improve accuracy of not-yet-converged subintervals from previous subdivision
            call levin_improve(Z, ra, rb, c, m, l1, l2, k1, k2, 0, 1, depth, converged, Hl_buffer, estimates)

            ! abort on NaN
            if (.not. estimates(1) == estimates(1)) then
                print '(a)', 'WARNING: NaN encountered during Levin quadrature'
                exit
            end if

            ! if the top-level integral converged, terminate
            if (converged(1)) exit

        end do

        ! if full subdivision allowance was exhausted without convergence, complain
        if (depth == max_levin_level) then
            print '(A,I0,A)', 'WARNING: Adaptive Levin integration did not converge in ', max_levin_level, ' subdivisions'
        end if

        ! return the best stimate of the top-level integration interval
        integ = estimates(1)

    end function levin_adapt


    !> \brief   Precalculate Coulomb-Hankel functions for subsequent Levin integration
    !> \author  J Benda
    !> \date    2021 - 2023
    !>
    !> Set up the given subdivision depth for subsequent use. This involves evaluation of the Coulomb functions at the
    !> endpoints of the subintervals of the not-yet-converged parent intervals, as well as marking these subintervals
    !> of the not-yet-converged parent intervals as "not converged", while marking subintervals of the already converged
    !> parent intervals as "converged" (and not evaluating Coulomb functions for their sake).
    !>
    !> \param Z  Residual ion charge
    !> \param ra Lower bound
    !> \param rb Upper bound
    !> \param s1 Sign of the first Coulomb-Hankel function
    !> \param s2 Sign of the second Coulomb-Hankel function
    !> \param l1 Angular momentum of the first Coulomb-Hankel function
    !> \param l2 Angular momentum of the second Coulomb-Hankel function
    !> \param k1 Complex momentum of the first Coulomb-Hankel function
    !> \param k2 Complex momentum of the second Coulomb-Hankel function
    !> \param depth      Current subdivision depth resulting in 2**depth subintervals
    !> \param converged  Logical flags per interval at all subdivision depths indicating whether its value needs improving
    !> \param Hl_buffer  Coulomb-Hankel functions evaluated at unique subdivision nodes (for l1, l1+1, l2 and l2+1)
    !>
    subroutine levin_prepare (Z, ra, rb, s1, s2, l1, l2, k1, k2, depth, converged, Hl_buffer)

        integer,     intent(in)    :: s1, s2, l1, l2, depth
        real(wp),    intent(in)    :: Z, ra, rb
        complex(wp), intent(in)    :: k1, k2
        complex(wp), intent(inout) :: Hl_buffer(2, 2, *)
        logical,     intent(inout) :: converged(:)

        real(wp) :: r
        integer  :: num_intervals, first_interval_idx, interval_idx, parent_interval_idx, evaluation_point_idx, storage_point_idx

        num_intervals = 2**depth
        first_interval_idx = 2**depth

        ! when called for the first time, evaluate Coulomb functions at the end points of the total integration range
        if (depth == 0) then
            call levin_eval(Z, ra, s1, s2, l1, l2, k1, k2, Hl_buffer(:, :, 2))
            call levin_eval(Z, rb, s1, s2, l1, l2, k1, k2, Hl_buffer(:, :, 1))
            return
        end if

        ! evaluate Coulomb functions in all other needed points at this subdivision level
        do interval_idx = first_interval_idx, first_interval_idx + num_intervals - 1, 2

            ! check if the parent interval (if any) of this sub-interval is converged
            if (depth > 0) then

                parent_interval_idx = interval_idx / 2

                ! if yes, mark this sub-interval as converged, too, and move on
                if (converged(parent_interval_idx)) then
                    converged(interval_idx) = .true.
                    converged(interval_idx + 1) = .true.
                    cycle
                end if

            end if

            ! otherwise mark it as unconverged
            converged(interval_idx) = .false.
            converged(interval_idx + 1) = .false.

            ! only odd index (new in this subdivision level) needs to be precomputed, which is the right endpoint of this interval
            evaluation_point_idx = interval_idx + 1
            storage_point_idx = (evaluation_point_idx + 3)/2

            ! calculate position
            r = Ra + (evaluation_point_idx - first_interval_idx) * (Rb - Ra) / num_intervals

            ! perform the evaluation of Coulomb functions
            call levin_eval(Z, r, s1, s2, l1, l2, k1, k2, Hl_buffer(:, :, storage_point_idx))

        end do

    end subroutine levin_prepare


    !> \brief   Evaluate a pair of Coulomb-Hankel functions in given point
    !> \author  J Benda
    !> \date    2021 - 2024
    !>
    !> Evaluate elements of the Coulomb-Hankel functions matrix:
    !> \f[
    !>     \mathsf{H} = \left(\begin{matrix}
    !>          H_{l_1    }^{s_1}(-1/k_1, k_1 r)  &  H_{l_2    }^{s_2}(-1/k_2, k_2 r) \\
    !>          H_{l_1 + 1}^{s_1}(-1/k_1, k_1 r)  &  H_{l_2 + 1}^{s_2}(-1/k_2, k_2 r)
    !>     \end{matrix}\right)
    !> \f]
    !> This subroutine will not distinguish between positive- and negative-energy Coulomb functions (the input momenta
    !> are complex), but only the positive-energy values are actually later used in the rest of the code. If the sign
    !> s1 or s2 is zero, the standing regular Coulomb function F is assumed instead of the Coulomb-Hankel function.
    !>
    !> \param Z  Residual ion charge
    !> \param r  Evaluation radius
    !> \param s1 Sign of the first Coulomb-Hankel function
    !> \param s2 Sign of the second Coulomb-Hankel function
    !> \param l1 Angular momentum of the first Coulomb-Hankel function
    !> \param l2 Angular momentum of the second Coulomb-Hankel function
    !> \param k1 Complex momentum of the first Coulomb-Hankel function
    !> \param k2 Complex momentum of the second Coulomb-Hankel function
    !> \param Hl Coulomb-Hankel functions evaluated at the evaluation radius (for l1, l1+1, l2 and l2+1)
    !>
    subroutine levin_eval (Z, r, s1, s2, l1, l2, k1, k2, Hl)

        use multidip_special, only: coulH

        real(wp),    intent(in)    :: Z, r
        integer,     intent(in)    :: s1, s2, l1, l2
        complex(wp), intent(in)    :: k1, k2
        complex(wp), intent(inout) :: Hl(2, 2)

        real(wp) :: Ek1, Ek2

        Ek1 = real(k1**2, wp)/2
        Ek2 = real(k2**2, wp)/2

        if (s1 == 0) then
            Hl(1, 1) = aimag(coulH(Z, +1, l1,     Ek1, r))
            Hl(2, 1) = aimag(coulH(Z, +1, l1 + 1, Ek1, r))
        else
            Hl(1, 1) = coulH(Z, s1, l1,     Ek1, r)
            Hl(2, 1) = coulH(Z, s1, l1 + 1, Ek1, r)
        end if

        if (s2 == 0) then
            Hl(1, 2) = aimag(coulH(Z, +1, l2,     Ek2, r))
            Hl(2, 2) = aimag(coulH(Z, +1, l2 + 1, Ek2, r))
        else
            Hl(1, 2) = coulH(Z, s2, l2,     Ek2, r)
            Hl(2, 2) = coulH(Z, s2, l2 + 1, Ek2, r)
        end if

    end subroutine levin_eval


    !> \brief   Perform one subdivision iteration of adaptive Levin integration and update estimates
    !> \author  J Benda
    !> \date    2021 - 2023
    !>
    !> Recurse to the deepest target depth and evaluate integral estimates in all sub-intervals marked as "not converged".
    !> Use these new values to improve estimates of all parent intervals (on all parent levels). If the updated estimate of
    !> any interval changes within the builtin tolerance, mark it (and its subintervals) as converged.
    !>
    !> This subroutine checks the convergence using a relative and absolute tolerance. For convergence, either the relative
    !> change between the base and improved estimate has to meet the relative tolerance, or the absolute difference between
    !> these two estimates has to meet the absolute tolerance. The absolute tolerance is calculated as a product of the
    !> relative tolerance and the estimate of the top-level integral.
    !>
    !> \param Z  Residual ion charge
    !> \param ra Lower bound
    !> \param rb Upper bound
    !> \param c  Damping factor (additional exp(-c*r) added to all r^m functions)
    !> \param m  Radial coordinate power
    !> \param l1 Angular momentum of the first Coulomb-Hankel function
    !> \param l2 Angular momentum of the second Coulomb-Hankel function
    !> \param k1 Complex momentum of the first Coulomb-Hankel function
    !> \param k2 Complex momentum of the second Coulomb-Hankel function
    !> \param depth         Subdivision depth that gave rise to the current integration interval
    !> \param interval_idx  Index of the current integration interval
    !> \param tgt_depth     Current subdivision depth resulting in 2**depth subintervals
    !> \param converged     Logical flags per interval at all subdivision depths indicating whether its value needs improving
    !> \param Hl            Coulomb-Hankel functions evaluated at unique subdivision nodes (for l1, l1+1, l2 and l2+1)
    !> \param estimates     Integral estimates for all intervals in all subdivision levels
    !>
    recursive subroutine levin_improve (Z, ra, rb, c, m, l1, l2, k1, k2, depth, interval_idx, tgt_depth, converged, Hl, estimates)

        real(wp),    intent(in)    :: Z, ra, rb, c
        complex(wp), intent(in)    :: k1, k2
        integer,     intent(in)    :: m, l1, l2, depth, interval_idx, tgt_depth
        complex(wp), intent(in)    :: Hl(2, 2, *)
        logical,     intent(inout) :: converged(:)
        complex(wp), intent(inout) :: estimates(:)

        integer  :: num_local_intervals, first_interval_idx, max_global_point_idx, left_point_global_idx, right_point_global_idx
        integer  :: left_point_local_idx, right_point_local_idx, left_point_unique_idx, right_point_unique_idx
        integer  :: left_point_storage_idx, right_point_storage_idx, left_subinterval_idx, right_subinterval_idx
        real(wp) :: a, b, epsrel, epsabs, delta, reference

        complex(wp) :: best_estimate,improved_estimate

        ! at the target depth simply evaluate the integrals over the given subinterval and store the estimates
        if (depth == tgt_depth) then

            num_local_intervals = 2**depth
            first_interval_idx = 2**depth
            max_global_point_idx = 2**(depth + 1)

            left_point_global_idx = interval_idx;
            right_point_global_idx = left_point_global_idx + 1

            left_point_local_idx = interval_idx - first_interval_idx
            right_point_local_idx = left_point_local_idx + 1

            left_point_unique_idx = shiftr(left_point_global_idx, trailz(left_point_global_idx))
            right_point_unique_idx = shiftr(mod(right_point_global_idx, max_global_point_idx), trailz(right_point_global_idx))

            left_point_storage_idx = (left_point_unique_idx + 3)/2
            right_point_storage_idx = merge(1, (right_point_unique_idx + 3)/2, right_point_unique_idx == 0)

            a = ra + left_point_local_idx * (rb - ra) / num_local_intervals
            b = ra + right_point_local_idx * (rb - ra) / num_local_intervals

            estimates(interval_idx) = levin_integrate(Z, a, b, c, m, l1, l2, k1, k2, &
                                                      Hl(:, :, left_point_storage_idx), &
                                                      Hl(:, :, right_point_storage_idx))

            return

        end if

        ! get subinterval indices
        left_subinterval_idx = 2*interval_idx;
        right_subinterval_idx = 2*interval_idx + 1;

        ! update all non-converged sub-intervals
        if (.not. converged(left_subinterval_idx)) then
            call levin_improve(Z, ra, rb, c, m, l1, l2, k1, k2, &
                               depth + 1, left_subinterval_idx, tgt_depth, converged, Hl, estimates)
        end if
        if (.not. converged(right_subinterval_idx)) then
            call levin_improve(Z, ra, rb, c, m, l1, l2, k1, k2, &
                               depth + 1, right_subinterval_idx, tgt_depth, converged, Hl, estimates)
        end if

        ! check convergence by comparing estimate for this interval to the sum of the estimates for the two sub.intervals
        best_estimate = estimates(interval_idx)
        improved_estimate = estimates(left_subinterval_idx) + estimates(right_subinterval_idx)
        epsrel = 1e-7
        epsabs = epsrel * abs(estimates(1))  ! TODO ... take into account there are more unconverged subintervals in this depth
        delta = abs(best_estimate - improved_estimate)
        reference = 0.5 * (abs(best_estimate) + abs(improved_estimate))
        if (delta < epsabs * reference .or. delta < epsabs) then
            converged(interval_idx) = .true.
            converged(left_subinterval_idx) = .true.
            converged(right_subinterval_idx) = .true.
        end if

        ! update the estimate for this interval with the sum of estimates for the two subintervals
        estimates(interval_idx) = improved_estimate

    end subroutine levin_improve


    !> \brief   Fixed-order Levin integration
    !> \author  J Benda
    !> \date    2021 - 2024
    !>
    !> Integrate product of two Coulomb-Hankel functions, coordinate power and radial exponential,
    !> \f[
    !>      I = \int\limits_{r_a}^{r_b} H_{l_1}^{s_1}(\eta_1, k_1 r) r^{m}
    !>                 \mathrm{e}^{-cr} H_{l_2}^{s_2}(\eta_2, k_2 r) \mathrm{d}r
    !> \f]
    !> (accepting both positive- and negative-energy Coulomb functions) using the method from
    !> "D. Levin, Fast integration of rapidly oscillatory functions, J. Comput. Appl. Math. 67 (1996) 95-101".
    !>
    !> Coulomb functions at end points are not evaluated in this function and need to be provided via arguments `Hla` and `Hlb`,
    !> see levin_eval.
    !>
    !> \param Z  Residual ion charge
    !> \param ra Lower bound
    !> \param rb Upper bound
    !> \param c  Damping factor (additional exp(-c*r) added to all r^m functions)
    !> \param m  Radial coordinate power
    !> \param l1 Angular momentum of the first Coulomb-Hankel function
    !> \param l2 Angular momentum of the second Coulomb-Hankel function
    !> \param k1 Complex momentum of the first Coulomb-Hankel function
    !> \param k2 Complex momentum of the second Coulomb-Hankel function
    !> \param Hla Coulomb-Hankel functions evaluated at ra (for l1, l1+1, l2 and l2+1)
    !> \param Hlb Coulomb-Hankel functions evaluated at rb (for l1, l1+1, l2 and l2+1)
    !>
    complex(wp) function levin_integrate (Z, ra, rb, c, m, l1, l2, k1, k2, Hla, Hlb) result (integ)

        use mpi_gbl, only: mpi_xermsg

        real(wp),    intent(in)    :: Z, ra, rb, c
        complex(wp), intent(in)    :: k1, k2
        integer,     intent(in)    :: m, l1, l2
        complex(wp), intent(in)    :: Hla(2, 2), Hlb(2, 2)

        if (aimag(k1) == 0 .and. aimag(k2) == 0) then
            ! free-free transition, both Coulomb functions have positive energy and oscillate -> use 4-dimensional Levin integrator
            integ = levin_integrate_4x4(Z, ra, rb, c, m, l1, l2, real(k1, wp), real(k2, wp), Hla, Hlb)
        else if (aimag(k1) == 0 .and. aimag(k2) /= 0) then
            ! free-closed transition, only H1 has positive energy and oscillates -> use 2-dimensional Levin integrator
            integ = levin_integrate_2x2(Z, ra, rb, c, m, l2, l1, aimag(k2), real(k1, wp), Hla(:, 1), Hlb(:, 1))
        else if (aimag(k1) /= 0 .and. aimag(k2) == 0) then
            ! closed-free transition, only H2 has positive energy and oscillates -> use 2-dimensional Levin integrator
            integ = levin_integrate_2x2(Z, ra, rb, c, m, l1, l2, aimag(k1), real(k2, wp), Hla(:, 2), Hlb(:, 2))
        else
            call mpi_xermsg('multidip_levin', 'levin_integrate', 'Levin integrator cannot integrate closed-closed integrals.', 1, 1)
        end if

    end function levin_integrate


    !> \brief   Fixed-order Levin integration (dim 2)
    !> \author  J Benda
    !> \date    2021 - 2024
    !>
    !> Integrate product of a Coulomb-Hankel function, decreasing real Whittaker function, coordinate power and radial exponential,
    !> \f[
    !>      I = \int\limits_{r_a}^{r_b} H_{l_1}^{s_1}(\eta_1, k_1 r) r^{m}
    !>                 \mathrm{e}^{-cr} W_{1/|k_2|,l_2+1/2}(2 |k_2| r) \mathrm{d}r
    !> \f]
    !> using the method from
    !> "D. Levin, Fast integration of rapidly oscillatory functions, J. Comput. Appl. Math. 67 (1996) 95-101".
    !>
    !> The Levin's objective function \f$ \mathbf{w}(r) \f$ has the following two components, where the first of these
    !> it the one of interest:
    !> \f[
    !>     \mathbf{w}(r) = \left(\begin{matrix}
    !>          H_{l_1    }^{s_1}(\eta_1, k_1 r) \\
    !>          H_{l_1 + 1}^{s_1}(\eta_1, k_1 r)
    !>     \end{matrix}\right) .
    !> \f]
    !>
    !> The Levin matrix \f$ \mathsf{A}(r) = \mathsf{a}/r + \mathsf{b} \f$ is constructed from the known
    !> Coulomb function recursion relations,
    !> \f[
    !>      \frac{\mathrm{d}}{\mathrm{d}r} H_{l+1}(\eta, kr)
    !>           = k R_{l+1}(\eta) H_{l}(\eta, kr) - k S_{l+1}(\eta, kr) H_{l+1}(\eta, kr) \,,
    !> \f]
    !> \f[
    !>      \frac{\mathrm{d}}{\mathrm{d}r} H_{l}(\eta, kr)
    !>           = k S_{l+1}(\eta, kr) H_{l}(\eta, kr) - k R_{l+1}(\eta) H_{l+1}(\eta, kr) \,.
    !> \f]
    !> where
    !> \f[
    !>      R_{l}(\eta) = \sqrt{1 + \eta^2/l^2} \,, \qquad
    !>      S_{l}(\eta, \rho) = l/\rho + \eta/l \,.
    !> \f]
    !> See for example "J. L. Powell, Recurrence formulas for Coulomb functions, Phys. Rev. 72 (1947) 626" or DLMF §33.4.
    !>
    !> The two-component Levin auxiliary non-oscillatory function \f$ \mathbf{p}(r) \f$ is expanded in Chebyshev polynomials
    !> of order 5 and the expansion coefficients obtained from collocation equation of rank 6. Collocation points are chosen
    !> to be idential to Chebyshev nodes. This should result in the best possible interpolation of \f$ \mathbf{p}(r) \f$
    !> for given Chebyshev order.
    !>
    !> Coulomb functions at end points are not evaluated in this function and need to be provided via arguments `wa` and `wb`.
    !>
    !> \param Z  Residual ion charge
    !> \param ra Lower bound
    !> \param rb Upper bound
    !> \param c  Damping factor (additional exp(-c*r) added to all r^m functions)
    !> \param m  Radial coordinate power
    !> \param lc Angular momentum of the negative-energy Coulomb-Hankel function
    !> \param lo Angular momentum of the positive-energy Coulomb-Hankel function
    !> \param kc Magnitude of the imaginary momentum of the negative-energy Coulomb-Hankel function
    !> \param ko Magnitude of the real momentum of the positive-energy Coulomb-Hankel function
    !> \param wa Positive-energy Coulomb-Hankel function evaluated at ra (for lo and lo+1)
    !> \param wb Positive-energy Coulomb-Hankel function evaluated at rb (for lo and lo+1)
    !>
    complex(wp) function levin_integrate_2x2 (Z, ra, rb, c, m, lc, lo, kc, ko, wa, wb) result (integ)

        use blas_lapack_gbl,  only: blasint
        use mpi_gbl,          only: mpi_xermsg
        use multidip_params,  only: rone, rhalf, cheb_order
        use multidip_special, only: blas_dgetrf => dgetrf, blas_dgetrs => dgetrs, coulH
        use phys_const_gbl,   only: pi

        real(wp),    intent(in)    :: Z, ra, rb, c, kc, ko
        integer,     intent(in)    :: m, lc, lo
        complex(wp), intent(in)    :: wa(2), wb(2)

        integer, parameter :: levin_dim = 2
        integer, parameter :: mat_dim = levin_dim * (cheb_order + 1)

        integer  :: ipoly, inode, a, b, i, j, row, col, sgn, ifun

        real(wp) :: x, r, Rl, Wl, elem, cx
        real(wp) :: cheb_node(0 : cheb_order)
        real(wp) :: cheb_value(0 : cheb_order, 0 : cheb_order)
        real(wp) :: cheb_deriv(0 : cheb_order, 0 : cheb_order)
        real(wp) :: Aa(levin_dim, levin_dim)
        real(wp) :: Ab(levin_dim, levin_dim)
        real(wp) :: mat(mat_dim, mat_dim)
        real(wp) :: coef(mat_dim)
        real(wp) :: pa(levin_dim), pb(levin_dim)

        complex(wp) :: contrib_a, contrib_b, delta

        integer(blasint) :: n, pivots(mat_dim), info

        ! only allow positive non-empty intervals
        if (ra >= rb) then
            call mpi_xermsg('multidip_levin', 'levin_integrate_2x2', &
                            'Levin integration not implemented for non-positive intervals', 1, 1)
        end if

        ! populate split matrices of the recurrence relations (total matrix is LevinA = Aa/r + Ab)
        Rl = sqrt(ko*ko + Z*Z/((lo + 1)*(lo + 1)))

        Aa(1, 1) = + lo + 1;  Aa(1, 2) = 0
        Aa(2, 1) = 0;         Aa(2, 2) = - lo - 1

        Ab(1, 1) = -Z/(lo + 1);     Ab(1, 2) = -Rl
        Ab(2, 1) = +Rl;             Ab(2, 2) = +Z/(lo + 1)

        ! evaluate Chebyshev nodes as well as Chebyshev polynomials and their derivatives in these nodes
        do inode = 0, cheb_order

            x = cos((inode + rhalf) * pi / (cheb_order + 1))

            cheb_node(inode) = x

            cheb_value(inode, 0) = 1;   cheb_deriv(inode, 0) = 0
            cheb_value(inode, 1) = x;   cheb_deriv(inode, 1) = 1

            do ipoly = 1, cheb_order - 1
                cheb_value(inode, ipoly + 1) = 2*x*cheb_value(inode, ipoly) - cheb_value(inode, ipoly - 1)
                cheb_deriv(inode, ipoly + 1) = 2*(ipoly + 1)*cheb_value(inode, ipoly) &
                                                + (ipoly + 1)*cheb_deriv(inode, ipoly - 1) / max(1, ipoly - 1)
            end do

        end do

        ! construct the matrix of the equations
        do a = 1, levin_dim
            do i = 0, cheb_order
                row = (a - 1) * (cheb_order + 1) + i + 1
                do b = 1, levin_dim
                    do j = 0, cheb_order
                        col = (b - 1) * (cheb_order + 1) + j + 1
                        x = cheb_node(i)
                        r = ra + (x + 1) / 2 * (rb - ra)
                        elem = Aa(b, a)/r + Ab(b, a)
                        mat(row, col) = elem * cheb_value(i, j)
                        if (a == b) then
                            mat(row, col) = mat(row, col) + 2/(Rb - Ra) * cheb_deriv(i, j)
                        end if
                    end do
                end do
            end do
        end do

        ! construct the right-hand side (only populate elements pertaining to the first function)
        do a = 1, levin_dim
            do i = 0, cheb_order
                row = (a - 1) * (cheb_order + 1) + i + 1
                x = cheb_node(i)
                r = ra + (x + 1) * (rb - ra) / 2
                if (a == 1) then
                    Wl = real(coulH(Z, 0, lc, -kc*kc/2, r), wp)
                    coef(row) = r**m * exp(-c*r) * Wl
                else
                    coef(row) = 0
                end if
            end do
        end do

        ! solve the set of equations
        n = mat_dim
        call blas_dgetrf(n, n, mat, n, pivots, info)
        if (info /= 0) then
            call mpi_xermsg('multidip_levin', 'levin_integrate_2x2', &
                            'LU decomposition in Levin integration failed', int(info), 1)
        end if
        call blas_dgetrs('N', n, 1_blasint, mat, n, pivots, coef, n, info)
        if (info /= 0) then
            call mpi_xermsg('multidip_levin', 'levin_integrate_2x2', &
                            'LU backsubstitution in Levin integration failed', int(info), 1)
        end if

        ! evaluate the boundary terms
        do a = 1, levin_dim
            pa(a) = 0
            pb(a) = 0
            do i = 0, cheb_order
                row = (a - 1) * (cheb_order + 1) + i + 1
                cx = coef(row)
                sgn = merge(+1, -1, mod(i, 2) == 0)
                pa(a) = pa(a) + sgn*cx
                pb(a) = pb(a) + cx
            end do
        end do

        ! evaluate the integral
        integ = 0
        do ifun = 1, levin_dim
            contrib_a = wa(ifun) * pa(ifun)
            contrib_b = wb(ifun) * pb(ifun)
            delta = contrib_b - contrib_a
            integ = integ + delta
        end do

    end function levin_integrate_2x2


    !> \brief   Fixed-order Levin integration (dim 4)
    !> \author  J Benda
    !> \date    2021 - 2024
    !>
    !> Integrate product of two Coulomb-Hankel functions, coordinate power and radial exponential,
    !> \f[
    !>      I = \int\limits_{r_a}^{r_b} H_{l_1}^{s_1}(\eta_1, k_1 r) r^{m}
    !>                 \mathrm{e}^{-cr} H_{l_2}^{s_2}(\eta_2, k_2 r) \mathrm{d}r
    !> \f]
    !> using the method from
    !> "D. Levin, Fast integration of rapidly oscillatory functions, J. Comput. Appl. Math. 67 (1996) 95-101".
    !>
    !> The Levin's objective function \f$ \mathbf{w}(r) \f$ has the following four components, where the first of these
    !> it the one of interest:
    !> \f[
    !>     \mathbf{w}(r) = \left(\begin{matrix}
    !>          H_{l_1    }^{s_1}(\eta_1, k_1 r) H_{l_2    }^{s_2}(\eta_2, k_2 r) \\
    !>          H_{l_1    }^{s_1}(\eta_1, k_1 r) H_{l_2 + 1}^{s_2}(\eta_2, k_2 r) \\
    !>          H_{l_1 + 1}^{s_1}(\eta_1, k_1 r) H_{l_2    }^{s_2}(\eta_2, k_2 r) \\
    !>          H_{l_1 + 1}^{s_1}(\eta_1, k_1 r) H_{l_2 + 1}^{s_2}(\eta_2, k_2 r)
    !>     \end{matrix}\right) .
    !> \f]
    !>
    !> The Levin matrix \f$ \mathsf{A}(r) = \mathsf{a}/r + \mathsf{b} \f$ is constructed from the known
    !> Coulomb function recursion relations,
    !> \f[
    !>      \frac{\mathrm{d}}{\mathrm{d}r} H_{l+1}(\eta, kr)
    !>           = k R_{l+1}(\eta) H_{l}(\eta, kr) - k S_{l+1}(\eta, kr) H_{l+1}(\eta, kr) \,,
    !> \f]
    !> \f[
    !>      \frac{\mathrm{d}}{\mathrm{d}r} H_{l}(\eta, kr)
    !>           = k S_{l+1}(\eta, kr) H_{l}(\eta, kr) - k R_{l+1}(\eta) H_{l+1}(\eta, kr) \,.
    !> \f]
    !> where
    !> \f[
    !>      R_{l}(\eta) = \sqrt{1 + \eta^2/l^2} \,, \qquad
    !>      S_{l}(\eta, \rho) = l/\rho + \eta/l \,.
    !> \f]
    !> See for example "J. L. Powell, Recurrence formulas for Coulomb functions, Phys. Rev. 72 (1947) 626" or DLMF §33.4.
    !>
    !> The four-component Levin auxiliary non-oscillatory function \f$ \mathbf{p}(r) \f$ is expanded in Chebyshev polynomials
    !> of order 5 and the expansion coefficients obtained from collocation equation of rank 6. Collocation points are chosen
    !> to be idential to Chebyshev nodes. This should result in the best possible interpolation of \f$ \mathbf{p}(r) \f$
    !> for given Chebyshev order.
    !>
    !> Coulomb functions at end points are not evaluated in this function and need to be provided via arguments `Hla` and `Hlb`.
    !>
    !> \param Z  Residual ion charge.
    !> \param ra Lower bound
    !> \param rb Upper bound
    !> \param c  Damping factor (additional exp(-c*r) added to all r^m functions)
    !> \param m  Radial coordinate power
    !> \param l1 Angular momentum of the first Coulomb-Hankel function
    !> \param l2 Angular momentum of the second Coulomb-Hankel function
    !> \param k1 Complex momentum of the first Coulomb-Hankel function
    !> \param k2 Complex momentum of the second Coulomb-Hankel function
    !> \param Hla Coulomb-Hankel functions evaluated at ra  (for l1, l1+1, l2 and l2+1)
    !> \param Hlb Coulomb-Hankel functions evaluated at rb  (for l1, l1+1, l2 and l2+1)
    !>
    complex(wp) function levin_integrate_4x4 (Z, ra, rb, c, m, l1, l2, k1, k2, Hla, Hlb) result (integ)

        use blas_lapack_gbl,  only: blasint
        use mpi_gbl,          only: mpi_xermsg
        use multidip_params,  only: rone, rhalf, cheb_order
        use multidip_special, only: blas_dgetrf => dgetrf, blas_dgetrs => dgetrs
        use phys_const_gbl,   only: pi

        real(wp),    intent(in)    :: Z, ra, rb, c, k1, k2
        integer,     intent(in)    :: m, l1, l2
        complex(wp), intent(in)    :: Hla(2, 2), Hlb(2, 2)

        integer, parameter :: levin_dim = 4
        integer, parameter :: mat_dim = levin_dim * (cheb_order + 1)

        integer  :: ipoly, inode, a, b, i, j, row, col, sgn, ifun

        real(wp) :: x, r, R1, R2, elem, cx
        real(wp) :: cheb_node(0 : cheb_order)
        real(wp) :: cheb_value(0 : cheb_order, 0 : cheb_order)
        real(wp) :: cheb_deriv(0 : cheb_order, 0 : cheb_order)
        real(wp) :: Aa(levin_dim, levin_dim)
        real(wp) :: Ab(levin_dim, levin_dim)
        real(wp) :: mat(mat_dim, mat_dim)
        real(wp) :: coef(mat_dim)
        real(wp) :: pa(levin_dim), pb(levin_dim)

        complex(wp) :: Hln1a, Hln2a, Hln1b, Hln2b, Hlp1a, Hlp2a, Hlp1b, Hlp2b, wa(levin_dim), wb(levin_dim)
        complex(wp) :: contrib_a, contrib_b, delta

        integer(blasint) :: n, pivots(mat_dim), info

        ! only allow positive non-empty intervals
        if (ra >= rb) then
            call mpi_xermsg('multidip_levin', 'levin_integrate_4x4', &
                            'Levin integration not implemented for non-positive intervals', 1, 1)
        end if

        ! populate split matrices of the recurrence relations (total matrix is LevinA = Aa/r + Ab)
        Aa = 0
        Ab = 0

        R1 = sqrt(k1*k1 + Z/((l1 + 1)*(l1 + 1)))
        R2 = sqrt(k2*k2 + Z/((l2 + 1)*(l2 + 1)))

        Aa(1, 1) = + l1 + l2 + 2
        Aa(2, 2) = + l1 - l2
        Aa(3, 3) = - l1 + l2
        Aa(4, 4) = - l1 - l2 - 2

        Ab(1, 1) = - Z/(l1 + 1) - Z/(l2 + 1)
        Ab(2, 2) = - Z/(l1 + 1) + Z/(l2 + 1)
        Ab(3, 3) = + Z/(l1 + 1) - Z/(l2 + 1)
        Ab(4, 4) = + Z/(l1 + 1) + Z/(l2 + 1)

        Ab(1, 2) = -R2;  Ab(1, 3) = -R1
        Ab(2, 1) = +R2;  Ab(2, 4) = -R1
        Ab(3, 1) = +R1;  Ab(3, 4) = -R2
        Ab(4, 2) = +R1;  Ab(4, 3) = +R2

        ! evaluate Chebyshev nodes as well as Chebyshev polynomials and their derivatives in these nodes
        do inode = 0, cheb_order

            x = cos((inode + rhalf) * pi / (cheb_order + 1))

            cheb_node(inode) = x

            cheb_value(inode, 0) = 1;   cheb_deriv(inode, 0) = 0
            cheb_value(inode, 1) = x;   cheb_deriv(inode, 1) = 1

            do ipoly = 1, cheb_order - 1
                cheb_value(inode, ipoly + 1) = 2*x*cheb_value(inode, ipoly) - cheb_value(inode, ipoly - 1)
                cheb_deriv(inode, ipoly + 1) = 2*(ipoly + 1)*cheb_value(inode, ipoly) &
                                                + (ipoly + 1)*cheb_deriv(inode, ipoly - 1) / max(1, ipoly - 1)
            end do

        end do

        ! construct the matrix of the equations
        do a = 1, levin_dim
            do i = 0, cheb_order
                row = (a - 1) * (cheb_order + 1) + i + 1
                do b = 1, levin_dim
                    do j = 0, cheb_order
                        col = (b - 1) * (cheb_order + 1) + j + 1
                        x = cheb_node(i)
                        r = ra + (x + 1) / 2 * (rb - ra)
                        elem = Aa(b, a)/r + Ab(b, a)
                        mat(row, col) = elem * cheb_value(i, j)
                        if (a == b) then
                            mat(row, col) = mat(row, col) + 2/(Rb - Ra) * cheb_deriv(i, j)
                        end if
                    end do
                end do
                !print '(8x,SP,*(E10.3,1x))', real(mat(row, :), wp)
            end do
        end do

        ! construct the right-hand side (only populate elements pertaining to the first function)
        do a = 1, levin_dim
            do i = 0, cheb_order
                row = (a - 1) * (cheb_order + 1) + i + 1
                x = cheb_node(i)
                r = ra + (x + 1) * (rb - ra) / 2
                if (a == 1) then
                    coef(row) = r**m * exp(-c*r)
                else
                    coef(row) = 0
                end if
            end do
        end do

        ! solve the set of equations
        n = mat_dim
        call blas_dgetrf(n, n, mat, n, pivots, info)
        if (info /= 0) then
            call mpi_xermsg('multidip_levin', 'levin_integrate_4x4', &
                            'LU decomposition in Levin integration failed', int(info), 1)
        end if
        call blas_dgetrs('N', n, 1_blasint, mat, n, pivots, coef, n, info)
        if (info /= 0) then
            call mpi_xermsg('multidip_levin', 'levin_integrate_4x4', &
                            'LU backsubstitution in Levin integration failed', int(info), 1)
        end if

        Hln1a = Hla(1, 1); Hlp1a = Hla(2, 1)
        Hln2a = Hla(1, 2); Hlp2a = Hla(2, 2)
        Hln1b = Hlb(1, 1); Hlp1b = Hlb(2, 1)
        Hln2b = Hlb(1, 2); Hlp2b = Hlb(2, 2)

        wa = [ Hln1a * Hln2a, Hln1a * Hlp2a, Hlp1a * Hln2a, Hlp1a * Hlp2a ]
        wb = [ Hln1b * Hln2b, Hln1b * Hlp2b, Hlp1b * Hln2b, Hlp1b * Hlp2b ]

        ! evaluate the boundary terms
        do a = 1, levin_dim
            pa(a) = 0
            pb(a) = 0
            do i = 0, cheb_order
                row = (a - 1) * (cheb_order + 1) + i + 1
                cx = coef(row)
                sgn = merge(+1, -1, mod(i, 2) == 0)
                pa(a) = pa(a) + sgn*cx
                pb(a) = pb(a) + cx
            end do
        end do

        ! evaluate the integral
        integ = 0
        do ifun = 1, levin_dim
            contrib_a = wa(ifun) * pa(ifun)
            contrib_b = wb(ifun) * pb(ifun)
            delta = contrib_b - contrib_a
            integ = integ + delta
        end do

    end function levin_integrate_4x4

end module multidip_levin
