! Copyright 2020
!
! For a comprehensive list of the developers that contributed to these codes
! see the UK-AMOR website.
!
! This file is part of UKRmol-out (UKRmol+ suite).
!
!     UKRmol-out is free software: you can redistribute it and/or modify
!     it under the terms of the GNU General Public License as published by
!     the Free Software Foundation, either version 3 of the License, or
!     (at your option) any later version.
!
!     UKRmol-out is distributed in the hope that it will be useful,
!     but WITHOUT ANY WARRANTY; without even the implied warranty of
!     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!     GNU General Public License for more details.
!
!     You should have received a copy of the GNU General Public License
!     along with  UKRmol-out (in source/COPYING). Alternatively, you can also visit
!     <https://www.gnu.org/licenses/>.
!
!> \brief   Special functions and objects used by MULTIDIP
!> \author  J Benda
!> \date    2020
!>
module multidip_special

    use, intrinsic :: iso_c_binding,   only: c_double, c_int, c_f_pointer
    use, intrinsic :: iso_fortran_env, only: error_unit, input_unit, int32, int64, real64, output_unit

    use blas_lapack_gbl,  only: blasint
    use phys_const_gbl,   only: pi, imu
    use precisn_gbl,      only: wp

    implicit none

#ifdef WITH_GSL
    !> Derived type used by GSL for returning results
    type, bind(C) :: gsl_sf_result
        real(c_double) :: val
        real(c_double) :: err
    end type gsl_sf_result

    interface
        ! switch off the hard error handler
        function gsl_set_error_handler_off () bind(C, name='gsl_set_error_handler_off')
            use, intrinsic :: iso_c_binding, only: c_ptr
            type(c_ptr) :: gsl_set_error_handler_off
        end function gsl_set_error_handler_off
    end interface

    interface
        ! logarithm of the complex gamma function from GSL
        function gsl_sf_lngamma_complex_e (zr, zi, lnr, arg) bind(C, name='gsl_sf_lngamma_complex_e')
            use, intrinsic :: iso_c_binding, only: c_int, c_double
            import gsl_sf_result
            real(c_double), value :: zi, zr
            type(gsl_sf_result)   :: lnr, arg
            integer(c_int)        :: gsl_sf_lngamma_complex_e
        end function gsl_sf_lngamma_complex_e
    end interface

    interface
        ! Coulomb wave function from GSL
        function gsl_sf_coulomb_wave_FG_e (eta, rho, l, k, F, Fp, G, Gp, expF, expG) bind(C, name='gsl_sf_coulomb_wave_FG_e')
            use, intrinsic :: iso_c_binding, only: c_int, c_double
            import gsl_sf_result
            real(c_double), value :: eta, rho, l
            integer(c_int), value :: k
            type(gsl_sf_result)   :: F, Fp, G, Gp
            real(c_double)        :: expF, expG
            integer(c_int)        :: gsl_sf_coulomb_wave_FG_e
        end function gsl_sf_coulomb_wave_FG_e
    end interface

    interface
        ! irregular confluent hypergeometric function U
        pure function gsl_sf_hyperg_U (a, b, x) bind(C, name='gsl_sf_hyperg_U')
            use, intrinsic :: iso_c_binding, only: c_double
            real(c_double), value :: a, b, x
            real(c_double)        :: gsl_sf_hyperg_U
        end function gsl_sf_hyperg_U
    end interface
#endif

    interface
        ! Coulomb phase from UKRmol-out
        function cphaz (l, eta, iwrite)
            import wp
            integer  :: l, iwrite
            real(wp) :: eta, cphaz
        end function cphaz
    end interface

    interface
        ! Coulomb wave function from UKRmol-out
        subroutine coulfg (xx, eta1, xlmin, xlmax, fc, gc, fcp, gcp, mode1, kfn, ifail)
            import wp
            integer  :: mode1, kfn, ifail
            real(wp) :: xx, eta1, xlmin, xlmax, fc(*), gc(*), fcp(*), gcp(*)
        end subroutine coulfg
    end interface

    interface
        ! Decaying Whittaker function
        subroutine couln (l, Z, ERy, r, U, Up, acc, efx, ierr, nlast, fnorm)
            import wp
            integer  :: l, ierr, nlast
            real(wp) :: Z, Ery, r, U, Up, acc, efx, fnorm
        end subroutine couln
    end interface

    interface
        !Exponentially decaying solution with given (negative) energy and angular momentum
        subroutine decay (k, l, r, U, Up, iwrite)
            import wp
            integer  :: l, iwrite
            real(wp) :: k, r, U, Up
        end subroutine decay
    end interface

    interface
        ! real matrix-matrix multiplication from BLAS
        subroutine dgemm (transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc)
            import real64, blasint
            character(len=1) :: transa, transb
            integer(blasint) :: m, n, k, lda, ldb, ldc
            real(real64)     :: A(lda, *), B(ldb, *), C(ldc, *), alpha, beta
        end subroutine dgemm
    end interface

    interface
        ! real matrix-vector multiplication from BLAS
        subroutine dgemv (trans, m, n, alpha, A, lda, X, incx, beta, Y, incy)
            import real64, blasint
            character(len=1) :: trans
            integer(blasint) :: m, n, incx, incy, lda
            real(real64)     :: A(lda, *), X(*), Y(*), alpha, beta
        end subroutine dgemv
    end interface

    interface
        ! real symmetric matrix eigendecomposition from LAPACK
        subroutine dsyev (jobz, uplo, n, A, lda, eigs, work, lwork, info)
            import real64, blasint
            character(len=1) :: jobz, uplo
            integer(blasint) :: n, lda, lwork, info
            real(real64)     :: A(lda, *), eigs(*), work(*)
        end subroutine dsyev
    end interface

    interface
        ! real LU decomposition from LAPACK
        subroutine dgetrf (m, n, A, lda, ipiv, info)
            import real64, blasint
            integer(blasint) :: m, n, lda, ipiv(*), info
            real(real64)     :: A(lda, *)
        end subroutine dgetrf
    end interface

    interface
        ! real LU backsubstitution from LAPACK
        subroutine dgetrs (trans, n, nrhs, A, lda, ipiv, B, ldb, info)
            import real64, blasint
            character(len=1) :: trans
            integer(blasint) :: n, nrhs, lda, ldb, info, ipiv(*)
            real(real64)     :: A(lda, *), B(ldb, *)
        end subroutine dgetrs
    end interface

    interface
        ! complex matrix-matrix multiplication from BLAS
        subroutine zgemm (transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc)
            import real64, blasint
            character(len=1) :: transa, transb
            integer(blasint) :: m, n, k, lda, ldb, ldc
            complex(real64)  :: A(lda, *), B(ldb, *), C(ldc, *), alpha, beta
        end subroutine zgemm
    end interface

    interface
        ! complex matrix-vector multiplication from BLAS
        subroutine zgemv (trans, m, n, alpha, A, lda, X, incx, beta, Y, incy)
            import real64, blasint
            character(len=1) :: trans
            integer(blasint) :: m, n, incx, incy, lda
            complex(real64)  :: A(lda, *), X(*), Y(*), alpha, beta
        end subroutine zgemv
    end interface

    interface
        ! complex LU decomposition from LAPACK
        subroutine zgetrf (m, n, A, lda, ipiv, info)
            import real64, blasint
            integer(blasint) :: m, n, lda, ipiv(*), info
            complex(real64)  :: A(lda, *)
        end subroutine zgetrf
    end interface

    interface
        ! complex LU backsubstitution from LAPACK
        subroutine zgetrs (trans, n, nrhs, A, lda, ipiv, B, ldb, info)
            import real64, blasint
            character(len=1) :: trans
            integer(blasint) :: n, nrhs, lda, ldb, info, ipiv(*)
            complex(real64)  :: A(lda, *), B(ldb, *)
        end subroutine zgetrs
    end interface

    interface
        ! complex matrix inversion from LAPACK
        subroutine zgetri (n, A, lda, ipiv, work, lwork, info)
            import real64, blasint
            integer(blasint) :: n, lda, ipiv(*), lwork, info
            complex(real64)  :: A(lda, *), work(*)
        end subroutine zgetri
    end interface

contains

    !> \brief   Calculate scattering R-matrix
    !> \authors J Benda
    !> \date    2023
    !>
    !> R-matrix is calculated from boundary amplitudes and R-matrix poles provided using the formula
    !> \f[
    !>      R_{uv}(E) = \sum_{j} w_{uj}(a) \frac{1}{E_j - E} w_{vj}(a) \,.
    !> \f]
    !>
    !> \param[in] nchan   Number of partial wave channels in this symmetry.
    !> \param[in] nstat   Number of inner region states in this symmetry.
    !> \param[in] wmat    Boundary amplitudes for this symmetry.
    !> \param[in] eig     R-matrix poles for this symmetry.
    !> \param[in] Etot    Total energy of the target + electron system.
    !> \param[inout] Rmat R-matrix to calculate.
    !>
    subroutine calculate_R_matrix (nchan, nstat, wmat, eig, Etot, Rmat)

        integer,  intent(in)    :: nchan, nstat
        real(wp), intent(in)    :: wmat(:, :), eig(:), Etot
        real(wp), intent(inout) :: Rmat(:, :)
        real(wp), allocatable   :: E_wmat(:, :)
        integer(blasint)        :: lda, ldc, n, m, istat

        allocate (E_wmat(nchan, nstat))

        lda = size(wmat, 1)
        ldc = size(Rmat, 1)

        n = nchan
        m = nstat

        !$omp parallel do private(istat)
        do istat = 1, nstat
            E_wmat(1:nchan, istat) = wmat(1:nchan, istat) / (eig(istat) - Etot)
        end do

        call dgemm('N', 'T', n, n, m, 0.5_wp, wmat, lda, E_wmat, n, 0.0_wp, Rmat, ldc)

    end subroutine calculate_R_matrix


    !> \brief   Calculate T-matrix from K-matrix
    !> \authors J Benda
    !> \date    2024
    !>
    !> Use real algebra to obtain the scattering T-matrix defined as
    !> \f[
    !>     T = S - 1 = 2iK (1 - iK)^{-1} = 2i (1 - iK)^{-1} K = -2K (1 + K^2)^{-1} K + 2i (1 + K^2)^{-1} K
    !> \f]
    !> Only the open-open sub-block of the K-matrix is used.
    !>
    subroutine calculate_T_matrix (K, T, nopen)

        use multidip_params, only: rone, rzero

        real(wp),    intent(in)    :: K(:, :)
        complex(wp), intent(inout) :: T(:, :)
        integer,     intent(in)    :: nopen

        integer(blasint)              :: i, j, n, ldk, info
        integer(blasint), allocatable :: ipiv(:)
        real(wp),         allocatable :: A(:, :), ReT(:, :), ImT(:, :)

        allocate (A(nopen, nopen), ipiv(nopen), ReT(nopen, nopen), ImT(nopen, nopen))

        n = nopen
        ldk = size(K, 1)

        !$omp parallel do private(i, j)
        do j = 1, nopen
            do i = 1, nopen
                ImT(i, j) = 2*K(i, j)
            end do
        end do

        call dgemm('T', 'N', n, n, n, rone, K, ldk, K, ldk, rzero, A, n)

        !omp parallel do
        do i = 1, nopen
            A(i, i) = 1 + A(i, i)
        end do

        call dgetrf(n, n, A, n, ipiv, info)

        if (info /= 0) then
            print '(a)', 'ERROR: Failed to factorize 1 + K^2'
            stop 1
        end if

        call dgetrs('N', n, n, A, n, ipiv, ImT, n, info)

        if (info /= 0) then
            print '(a)', 'ERROR: Failed to back-substitute (1 + K^2) X = 2K'
            stop 1
        end if

        call dgemm('T', 'N', n, n, n, -rone, K, ldk, ImT, n, rzero, ReT, n)

        !$omp parallel do private (i, j)
        do j = 1, nopen
            do i = 1, nopen
                T(i, j) = cmplx(ReT(i, j), ImT(i, j), wp)
            end do
        end do

    end subroutine calculate_T_matrix


    !> \brief   Calculate S-matrix from T-matrix
    !> \author  J Benda
    !> \date    2020 - 2024
    !>
    !> Obtain the S-matrix from the definition formula
    !> \f[
    !>    S = (1 + iK) (1 - iK)^{-1} = 1 + T
    !> \f]
    !>
    subroutine calculate_S_matrix (T, S, nopen)

        use multidip_params, only: cone, czero

        complex(wp), intent(inout) :: S(:, :)
        complex(wp), intent(in)    :: T(:, :)
        integer,     intent(in)    :: nopen

        integer :: j

        !$omp parallel do
        do j = 1, nopen
            S(1:nopen, j) = T(1:nopen, j)
            S(j, j) = 1 + S(j, j)
        end do

    end subroutine calculate_S_matrix


    !> \brief   Photoionization coefficient
    !> \author  J Benda
    !> \date    2020
    !>
    !> Evaluates the wave function coefficient Ak for the final stationary photoionization wave.
    !>
    !> \param[in] rmatr  R-matrix radius for Coulomb wave matching
    !> \param[in] eig    Inner eigenenergies (R-matrix poles) for this irreducible representation
    !> \param[in] etot   Total energy of the system
    !> \param[in] w      Boundary amplitudes
    !> \param[in] Ek     Photoelectron kinetic energies in *all* channels (open and closed)
    !> \param[in] l      Photoelectron angular momentum in *all* channels (open and closed)
    !> \param[in] Km     K-matrix
    !> \param[inout] Ak  Wave function coefficients, needs to be allocated to neig x nopen and only that part will be written
    !>
    subroutine scatAk (rmatr, eig, etot, w, Ek, l, Km, Ak)

        use multidip_params, only: rzero, rone

        complex(wp), intent(inout) :: Ak(:,:)
        real(wp),    intent(in)    :: rmatr, eig(:), etot, Ek(:), w(:,:), Km(:,:)
        integer,     intent(in)    :: l(:)

        complex(wp), allocatable :: T(:,:)
        real(wp),    allocatable :: V(:,:), TT(:,:,:), Sp(:,:), Cp(:,:), P(:), XX(:,:,:), wFp(:,:,:), k(:)

        integer(blasint) :: i, nchan, nstat, nopen, nochf, nochf2, ldwmp
        real(wp)         :: F, Fp, G, Gp, Z = 1

        nchan = size(Km, 1)         ! number of channels
        nopen = count(Ek > 0)       ! number of open channels
        nochf = size(Ak, 2)         ! number of channels for which to calcualte the Ak coefficients
        nstat = size(eig)           ! number of inner eigenstates for this irreducible representation
        ldwmp = size(w, 1)          ! leading dimension of the matrix where the boundary amplitudes are stored
        k = sqrt(2*abs(Ek))         ! linear momentum of the photoelectron

        nopen = min(nopen, nchan)
        nochf = min(nopen, nochf)
        nochf2 = 2*nochf

        allocate (V(nchan, nchan), T(nopen, nopen), Sp(nchan, nchan), Cp(nchan, nchan), TT(nchan, 2, nopen), &
                  XX(nchan, 2, nochf), wFp(nstat, 2, nochf), P(nstat))

        T = 0; V = 0; Sp = 0; Cp = 0

        ! construct, factorize and invert for T = (1 + iK)⁻¹ (nopen-by-nopen)
        do i = 1, nopen
            T(i,i) = 1
        end do
        T = T + imu*Km(1:nopen, 1:nopen)
        call invert_matrix(T)
        TT(1:nopen, 1, 1:nochf) = real(T(1:nopen, 1:nochf), wp)
        TT(1:nopen, 2, 1:nochf) = aimag(T(1:nopen, 1:nochf))

        ! evaluate the Coulomb functions and the normalization factor (nchan-by-nchan)
        do i = 1, nchan
            call coul(Z, l(i), Ek(i), rmatr, F, Fp, G, Gp)
            V(i,i) = sqrt(2/(pi*k(i)))
            Sp(i,i) = Fp
            Cp(i,i) = Gp
        end do

        ! wave function coefficient (nstat-by-nochf)
        call dgemm('N', 'N', nchan, nopen,  nchan, rone, Cp, nchan, Km, nchan, rone,  Sp,  nchan)  ! S' + C' K
        call dgemm('N', 'N', nchan, nochf2, nopen, rone, Sp, nchan, TT, nchan, rzero, XX,  nchan)  ! (S' + C' K) T
        call dgemm('N', 'N', nchan, nochf2, nchan, rone, V,  nchan, XX, nchan, rzero, TT,  nchan)  ! V (S' + C' K) T
        call dgemm('T', 'N', nstat, nochf2, nchan, rone, w,  ldwmp, TT, nchan, rzero, wFp, nstat)  ! wT V (S' + C' K) T
        P = 0.5 / (eig - etot)
        do i = 1, nochf
            Ak(:, i) = P * cmplx(wFp(:, 1, i), wFp(:, 2, i), wp)
        end do

    end subroutine scatAk


    !> \brief   Coulomb phase
    !> \author  J Benda
    !> \date    2020 - 2023
    !>
    !> Return the Coulomb phase, arg Gamma(l + 1 - iZ/k). Uses GSL or the UKRmol-out
    !> library, depending on the configuration.
    !>
    real(wp) function cphase (Z, l, k) result (sigma)

        integer,  intent(in) :: l
        real(wp), intent(in) :: Z, k
#ifdef WITH_GSL
        type(gsl_sf_result) :: lnr, arg
        integer(c_int)      :: err
        real(c_double)      :: zr, zi

        zr = l + 1
        zi = -Z/k
        err = gsl_sf_lngamma_complex_e(zr, zi, lnr, arg)
        sigma = arg % val
#else
        sigma = cphaz(l, -Z/k, 0)
#endif

    end function cphase


    !> \brief   Complex Gamma function
    !> \author  J Benda
    !> \date    2021 - 2024
    !>
    !> Evaluate the complex Gamma function Γ(l + 1 + i*x), where l is integer.
    !>
    recursive complex(wp) function complex_gamma (l, x) result (G)

        real(wp), intent(in) :: x
        integer,  intent(in) :: l

        real(wp) :: absval, phase, Z = 1
        integer  :: k

        ! case without x reduces to l!
        if (x == 0) then
            G = 1
            do k = 2, l
                G = G * k
            end do
            return
        end if

        ! use reflection formula for negative l
        if (l < 0) then
            G = imu * (-1)**l * pi / (sinh(pi*x) * complex_gamma(-l - 1, -x))
            return
        end if

        ! absolute value from a closed formula from Wikipedia
        absval = 1
        do k = 1, l
            absval = absval * (k**2 + x**2)
        end do
        absval = absval * pi*x/sinh(pi*x)
        absval = sqrt(absval)

        ! phase is the Coulomb phase
        phase = cphase(Z, l, -1/x)

        ! final result
        G = absval * cmplx(cos(phase), sin(phase), wp)

    end function complex_gamma


    !> \brief   Coulomb functions
    !> \author  J Benda
    !> \date    2020 - 2023
    !>
    !> Evaluate the Coulomb wave (regular, irregular and derivatives). Uses GSL or the UKRmol-out
    !> library, depending on the configuration. For negative energies, evaluates the exponentially
    !> decreasing solution (into G and Gp) obtained from the Whittaker function (if charged) or
    !> from solution of the appropriate equation (if neutral).
    !>
    !> The derivatives returned are with respect to \f$ r \f$ already, so they should not be multiplied
    !> by the additional factor of \f$ k \f$.
    !>
    subroutine coul (Z, l, Ek, r, F, Fp, G, Gp)

        integer,  intent(in)    :: l
        real(wp), intent(in)    :: Z, Ek, r
        real(wp), intent(inout) :: F, Fp, G, Gp
#ifdef WITH_GSL
        call coul_gsl(Z, l, Ek, r, F, Fp, G, Gp)
#else
        call coul_ukrmol(Z, l, Ek, r, F, Fp, G, Gp)
#endif
    end subroutine coul


    !> \brief   Coulomb-Hankel function
    !> \author  J Benda
    !> \date    2021 - 2023
    !>
    !> See \ref coul for explanation of the arguments.
    !>
    complex(wp) function coulH (Z, s, l, Ek, r) result(H)

        integer,  intent(in)    :: s, l
        real(wp), intent(in)    :: Z, Ek, r

        real(wp) :: F, Fp, G, Gp

        call coul(Z, l, Ek, r, F, Fp, G, Gp)

        H = cmplx(G, s*F, wp)

    end function coulH


    !> \brief   Coulomb-Hankel function (asymptotic form)
    !> \author  J Benda
    !> \date    2021 - 2023
    !>
    !> Implements the asymptotic formula for Coulomb-Hankel function, DLMF §33.11.1.
    !> The number of terms is centrally controlled by a parameter in module multidip_params
    !> to be consistent with what is used in the integration routines.
    !>
    !> See \ref coul for explanation of the arguments.
    !>
    complex(wp) function coulH_asy (Z, s, l, Ek, r) result(H)

        use multidip_params, only: nTermsAsy

        integer,  intent(in) :: s, l
        real(wp), intent(in) :: Z, Ek, r

        integer     :: n
        complex(wp) :: a, b, term
        real(wp)    :: k

        k = sqrt(2*abs(Ek))

        if (Ek > 0) then
            a = l + 1 - Z*imu/k
            b = -l - Z*imu/k
        else
            a = l + 1 - Z/k
            b = -l - Z/k
        end if

        term = 1
        H = 1

        do n = 1, nTermsAsy
            if (Ek > 0) then
                term = term * (a + n - 1) * (b + n - 1) / (2*s*imu*n*k*r)
            else
                term = term * (a + n - 1) * (b + n - 1) / (-2*s*n*k*r)
            end if
            H = H + term
        end do

        if (Ek > 0) then
            H = H * exp(imu*s*(k*r + Z*log(2*k*r)/k - pi*l/2 + cphase(Z, l, k)))
        else
            H = H * exp(s*(-k*r + Z*log(2*k*r)/k))
        end if

    end function coulH_asy


    !> \brief   Coulomb functions (GSL)
    !> \author  J Benda
    !> \date    2020 - 2023
    !>
    !> Coulomb functions (positive- and negative-energy) calculated using GSL. The derivative is with respect to \f$ r \f$
    !> in the argument \f$ kr \f$, so the obtained derivatives contain an additional multiplication factor \f$ k \f$ compared to
    !> the standard normalization of the Coulomb functions.
    !>
    subroutine coul_gsl (Z, l, Ek, r, F, Fp, G, Gp)

        integer,  intent(in)    :: l
        real(wp), intent(in)    :: Z, Ek, r
        real(wp), intent(inout) :: F, Fp, G, Gp
#ifdef WITH_GSL
        type(gsl_sf_result) :: resF, resFp, resG, resGp
        real(c_double)      :: expF, expG, eta, rho, lc, k, a, b, x, U, Up
        integer(c_int)      :: err, zero = 0

        if (r <= 0) then
            F = 0;  Fp = 0
            G = 0;  Gp = 0
        else if (Ek > 0) then
            ! positive-energy solution
            k = sqrt(2*Ek)
            rho = k*r
            eta = -Z/k
            lc = l
            err = gsl_sf_coulomb_wave_FG_e(eta, rho, lc, zero, resF, resFp, resG, resGp, expF, expG)
            F  = resF  % val
            Fp = resFp % val * k
            G  = resG  % val
            Gp = resGp % val * k
        else
            ! negative-energy solution, Whittaker function W (WARNING: charged only)
            k = sqrt(-2*Ek)
            a = l + 1 - Z/k
            b = 2*l + 2
            x = 2*k*r
            U = gsl_sf_hyperg_U(a, b, x)
            Up = -2*k*a * gsl_sf_hyperg_U(a + 1, b + 1, x)  ! DLMF §13.3.22
            F = 0
            Fp = 0
            G = exp(-x/2) * x**(l + 1) * U  ! DLMF §13.14.3
            Gp = exp(-x/2) * x**(l + 1) * (-k * U + (l + 1)/r * U + Up)
        end if
#else
        F = 0; Fp = 0
        G = 0; Gp = 0
#endif
    end subroutine coul_gsl


    !> \brief   Coulomb functions (UKRmol)
    !> \author  J Benda
    !> \date    2020 - 2023
    !>
    !> Coulomb functions (positive- and negative-energy) calculated using UKRmol-out. The derivative is with respect to \f$ r \f$
    !> in the argument \f$ kr \f$, so the obtained derivatives contain an additional multiplication factor \f$ k \f$ compared to
    !> the standard normalization of the Coulomb functions.
    !>
    subroutine coul_ukrmol (Z, l, Ek, r, F, Fp, G, Gp)

        integer,  intent(in)    :: l
        real(wp), intent(in)    :: Z, Ek, r
        real(wp), intent(inout) :: F, Fp, G, Gp
        integer  :: ifail, mode = 1, kfn = 0, nterms, iwrite = 0
        real(wp) :: fc(l + 1), gc(l + 1), fcp(l + 1), gcp(l + 1), lc, efx, fnorm, acc = 1e-10_wp, k

        lc = l
        if (Ek > 0) then
            ! positive-energy solution
            k = sqrt(2*Ek)
            call coulfg(k*r, -Z/k, lc, lc, fc, gc, fcp, gcp, mode, kfn, ifail)
            F = fc(l + 1)
            G = gc(l + 1)
            Fp = fcp(l + 1) * k
            Gp = gcp(l + 1) * k
        else
            ! negative energy solution (charged or neutral)
            if (Z /= 0) then
                call couln(l, Z, 2*Ek, r, G, Gp, acc, efx, ifail, nterms, fnorm)
            else
                call decay(2*Ek, l, r, G, Gp, iwrite)
            end if
            F = 0
            Fp = 0
        end if

    end subroutine coul_ukrmol


    !> \brief   Diagonalize a real symmetric matrix
    !> \authors J Benda
    !> \date    2023
    !>
    !> Obtain eigenvectors and eigenvalues of a real symmetric matrix. The phase of the eigenvectors is fixed so that
    !> the element of each eigenvectors largest in magnitude is positive.
    !>
    !> \param[in]    M      Matrix to diagonalize (N × N)
    !> \param[inout] eigs   On exit the eigenvalues (N)
    !> \param[inout] eigv   On exit the eigenvectors (N × N)
    !> \param[in]    check  Optional threshold. If positive, check that the obtained eigenvectors are orthonormal to this accuracy.
    !>
    subroutine diagonalize_symm_matrix (M, eigs, eigv, check)

        real(wp), intent(in)           :: M(:, :)
        real(wp), intent(inout)        :: eigs(:), eigv(:, :)
        real(wp), intent(in), optional :: check

        integer(blasint)      :: i, n, info, lwork
        real(wp), allocatable :: work(:), tmp(:, :)
        real(wp)              :: diag, offdiag

        n = size(eigs)
        eigv = M
        allocate (work(1))
        call dsyev('V', 'U', n, eigv, n, eigs, work, -1_blasint, info)

        if (info /= 0) then
            write (error_unit, '(a,i0,a)') 'Error ', info, ' when preparing matrix diagonalization'
            stop 1
        end if

        lwork = work(1)
        deallocate (work)
        allocate (work(lwork))
        call dsyev('V', 'U', n, eigv, n, eigs, work, lwork, info)

        if (info /= 0) then
            write (error_unit, '(a,i0,a)') 'Error ', info, ' when diagonalizing matrix'
            stop 1
        end if

        ! fix phase of the eigenvectors (element largest in magnitude has to be positive)
        do i = 1, n
            if (eigv(maxloc(abs(eigv(:, i)), 1), i) < 0) then
                eigv(:, i) = -eigv(:, i)
            end if
        end do

        ! verify that the eigenvector matrix is orthogonal
        if (present(check)) then
            if (check > 0) then
                allocate (tmp(n, n))
                call dgemm('N', 'T', n, n, n, 1._wp, eigv, n, eigv, n, 0._wp, tmp, n)
                do i = 1, n
                    diag = tmp(i, i)
                    offdiag = sqrt(sum(abs(tmp(:, i))**2) - diag**2)
                    if (abs(diag - 1) > check .or.  abs(offdiag) > check) then
                        write (error_unit, '(a,i0,2e25.15)') 'WARNING: Orthogonality error in symmetric diagonalization ', &
                            i, diag, offdiag
                        stop 1
                    end if
                end do
            end if
        end if

    end subroutine diagonalize_symm_matrix


    !> \brief   Invert a complex matrix
    !> \author  J Benda
    !> \date    2020
    !>
    !> Calculate inverse of a complex matrix. Use the standard LAPACK decompose+solve sequence.
    !>
    subroutine invert_matrix (T)

        complex(wp), allocatable, intent(inout) :: T(:, :)

        complex(wp), allocatable :: work(:)
        complex(wp)              :: nwork(1)

        integer(blasint), allocatable :: ipiv(:)
        integer(blasint)              :: m, info, lwork

        m = size(T, 1)

        ! calculate LU decomposition
        allocate (ipiv(m))
        call zgetrf(m, m, T, m, ipiv, info)

        ! query for workspace size and allocate memory
        lwork = -1
        call zgetri(m, T, m, ipiv, nwork, lwork, info)
        lwork = int(nwork(1))
        allocate (work(lwork))

        ! compute the inverse
        call zgetri(m, T, m, ipiv, work, lwork, info)

    end subroutine invert_matrix


    !> \brief   Solve system of equations with complex matrix
    !> \author  J Benda
    !> \date    2020
    !>
    !> The matrix A is complex. The columns of matrices X and Y correspond to real and imaginary
    !> part of the right-hand side and of the solution, respectively.
    !>
    subroutine solve_complex_system (n, A, X, Y)

        complex(wp), allocatable, intent(inout) :: A(:, :)
        real(wp),    allocatable, intent(inout) :: X(:, :), Y(:, :)
        integer,                  intent(in)    :: n

        integer(blasint)              :: m, info, nrhs, lda
        integer(blasint), allocatable :: ipiv(:)
        complex(wp),      allocatable :: XX(:)

        allocate (ipiv(n))

        ! calculate LU decomposition
        m = n
        lda = size(A, 1)
        call zgetrf(m, m, A, lda, ipiv, info)

        ! combine real and imaginary part of the right-hand side
        XX = cmplx(X(1:n, 1), X(1:n, 2), wp)

        ! solve the equation
        nrhs = 1
        call zgetrs('N', m, nrhs, A, lda, ipiv, XX, m, info)

        Y(1:n, 1) = real(XX, wp)
        Y(1:n, 2) = aimag(XX)

    end subroutine solve_complex_system


    !> \brief   Construct or advance permutation
    !> \author  J Benda
    !> \date    2020
    !>
    !> If the given integer array contains a negative element, fill it with identical permutation (i.e.
    !> the sequence 1, 2, ..., N) and return TRUE. Otherwise attempt to generate the "next" permutation
    !> of the N values in the manner compatible with C++ `std::next_permutation` (lexicographically ordered
    !> sequences). When no further permutation is possible, return FALSE.
    !>
    !> The algorithm is shamelessly copied from the source of GCC's libstdc++ library and (except for the
    !> added initialization option) faithfully mimics the behaviour of the built-on C++ function `std::next_permutation`.
    !>
    logical function next_permutation (p) result (ok)

        integer, intent(inout) :: p(:)
        integer                :: i, j, k, n

        n = size(p)

        ! no permutation in empty array
        if (n == 0) then
            ok = .false.
            return
        end if

        ! initialize a new permutation
        if (any(p < 1)) then
            do i = 1, n
                p(i) = i
            end do
            ok = .true.
            return
        end if

        ! one-element permutation cannot be advanced further
        if (n == 1) then
            ok = .false.
            return
        end if

        i = n

        do
            k = i
            i = i - 1
            if (p(i) < p(k)) then
                j = n
                do while (p(i) >= p(j))
                    j = j - 1
                end do
                call swap(p(i), p(j))
                call reverse(p(k:n))
                ok = .true.
                return
            else if (i == 1) then
                ok = .false.
                return
            end if
        end do

    end function next_permutation


    !> \brief   Exchange value of two integers
    !> \author  J Benda
    !> \date    2020
    !>
    !> This subroutine mimics the behaviour of the built-in C++ function std::swap.
    !>
    subroutine swap (a, b)

        integer, intent(inout) :: a, b
        integer                :: c

        c = a
        a = b
        b = c

    end subroutine swap


    !> \brief   Reverse order of elements in array
    !> \author  J Benda
    !> \date    2020
    !>
    !> This subroutine mimics the behaviour of the built-in C++ function `std::reverse`.
    !>
    subroutine reverse (a)

        integer, intent(inout) :: a(:)
        integer                :: i, n

        n = size(a)

        do i = 1, n/2
            call swap(a(i), a(n + 1 - i))
        end do

    end subroutine reverse


    !> \brief   Compensated summation
    !> \author  J Benda
    !> \date    2020
    !>
    !> Add dX to X, keep track of numerical error. Uses Kahan's algorithm. This subroutine is in infinite
    !> precision equivalent to just
    !> ```
    !>     X = X + dX
    !>     err = 0
    !> ```
    !> but in the finite precision arithmetic it compensates the running numerical error. It mustn't be
    !> optimized away by the compiler! Flags like `-ffast-math` or `-Ofast` are detrimental here. Common
    !> optimization flags like `-O2` or `-O3` should be safe (but it may depend on the compiler).
    !>
    subroutine kahan_add (X, dX, err)

        complex(wp), intent(inout) :: X, err
        complex(wp), intent(in)    :: dX

        complex(wp) :: Y, Z

        Y = dX - err
        Z = X + Y
        err = (Z - X) - Y  ! the parenthesis must be obeyed, otherwise we get rubbish
        X = Z

    end subroutine kahan_add


    !> \brief   Angular part of the beta parameters
    !> \author  J Benda
    !> \date    2020
    !>
    !> Evaluates the contraction coefficient \f$ T_{m p_1 q_1 q_1'\dots p_n q_n q_n'}^{Jll'} \f$ in
    !> \f[
    !>     \frac{\mathrm{d}\sigma^{(p_1\dots p_n)}}{\mathrm{d}\Omega} = \frac{1}{4\pi} \sum_{J} b_J^{(p_1\dots p_n)} P_J(\cos\theta)
    !>         = \frac{1}{4\pi} \sum_{Jlml'm'\atop q_1 q_1' \dots q_n q_n'}
    !>         M_{mq_1\dots q_n}^{(l)} M_{m'q_1'\dots q_n'}^{(l')*} T_{m m', p_1 q_1 q_1'\dots p_n q_n q_n'}^{Jll'} P_J(\cos\theta),
    !> \f]
    !> which has the following explicit form:
    !> \f[
    !>     T_{m m', p_1 q_1 q_1'\dots p_n q_n q_n'}^{Jll'} = \sum_{\mu} (-1)^{\mu} (2J + 1) \sqrt{(2l + 1)(2l' + 1)}
    !>         \left(\begin{matrix} l & l' & J \\ 0 & 0 & 0 \end{matrix}\right)
    !>         \left(\begin{matrix} l & l' & J \\ \mu & -\mu & 0 \end{matrix}\right)
    !>         \frac{1}{8\pi^2} \int D_{\mu m}^{(l)} D_{\mu m'}^{(l')*} D_{p_1 q_1}^{(1)*} D_{p_1 q_1'}^{(1)} \dots
    !>            D_{p_n q_n}^{(1)*} D_{p_n q_n}^{(1)} \mathrm{d}\hat{R} .
    !> \f]
    !> The real averaging integral over orientations of the molecule is re-expressed as
    !> \f[
    !>     (-1)^{m + m'}
    !>     \frac{1}{8\pi^2} \int D_{-\mu, -m}^{(l)*} D_{-\mu, -m'}^{(l')} D_{p_1 q_1}^{(1)*} D_{p_1 q_1'}^{(1)} \dots
    !>            D_{p_n q_n}^{(1)*} D_{p_n q_n}^{(1)} \mathrm{d}\hat{R}
    !> \f]
    !> and then calculated in \ref wigner_D_multiint.
    !>
    !> \note In the present implementation, all laboratory-frame polarisations \f$ p_i \f$ are considered equal to zero, i.e.
    !>       the field is linearly polarized.
    !>
    real(wp) function beta_contraction_tensor (J, n, p, li, mi, qi, lj, mj, qj) result (T)

        use dipelm_special_functions, only: threej
        use multidip_params,          only: rone

        integer, intent(in) :: J, n, p(n), li, mi, qi(n), lj, mj, qj(n)
        integer :: mu, l(n + 1), lp(n + 1), a(n + 1), ap(n + 1), b(n + 1), bp(n + 1)
        real(wp) :: I, Q

        l(1) = li; l(2:) = 1
        lp(1) = lj; lp(2:) = 1

        b(1) = -mi; b(2:) = qi
        bp(1) = -mj; bp(2:) = qj

        T = 0

        do mu = -li, li

            a(1) = -mu; a(2:) = p
            ap(1) = -mu; ap(2:) = p

            I = (-1)**(mi + mj) * wigner_D_multiint(n + 1, l, a, b, lp, ap, bp)
            Q = (-1)**mu * (2*J + 1) * sqrt((2*li + rone)*(2*lj + rone))  &
                    * threej(2*li, 0, 2*lj, 0, 2*J, 0) * threej(2*li, 2*mu, 2*lj, -2*mu, 2*J, 0)

            T = T + I*Q

        end do

    end function beta_contraction_tensor


    !> \brief   Orientation averaging
    !> \author  J Benda
    !> \date    2020
    !>
    !> Calculate the value of the integral
    !> \f[
    !>    \frac{1}{8\pi^2} \int D_{a_1 b_1}^{(l_1)} D_{a_1' b_1'}^{(l_1')*} \dots
    !>                          D_{a_1 b_1}^{(l_n)} D_{a_n' b_n'}^{(l_n')*} \mathrm{d}\hat{R}
    !> \f]
    !> over all orientations (Euler angles) \f$ \hat{R} \f$. The calculation is done recursively using the formula
    !> \f[
    !>    D_{a_1 b_1}^{(l_1)} D_{a_2 b_2}^{(l_2)} = \sum_{juv} (-1)^{u - v} (2j + 1)
    !>     \left(\begin{matrix} l_1 & l_2 & j \\ a_1 & a_2 & -u \end{matrix}\right)
    !>     \left(\begin{matrix} l_1 & l_2 & j \\ b_1 & b_2 & -v \end{matrix}\right)
    !>     D_{u v}^{(j)}
    !> \f]
    !> that is applied to \f$ D_{p_1 q_1}^{(l_1)} D_{p_2 q_2}^{(l_2)} \f$ and to \f$ D_{p_1 q_1'}^{(l_1)} D_{p_2 q_2'}^{(l_2)} \f$,
    !> leading to the linear combination of shorter integrals
    !> \f[
    !>    \frac{1}{8\pi^2} \int D_{u v}^{(j)} D_{u' v'}^{(j')*} D_{a_3 b_3}^{(l_3)} D_{a_3' b_3'}^{(l_3)*}
    !>                          \dots D_{a_n b_n}^{(l_n)} D_{a_n' b_n'}^{(l_n)*} \mathrm{d}\hat{R}
    !> \f]
    !> The recursion terminates when only the pair \f$ D_{u v}^{(j)} D_{u' v'}^{(j')*} \f$ is left,
    !> by means of the orthogonality relation
    !> \f[
    !>     \frac{1}{8\pi^2} \int D_{u v}^{(j)} D_{u' v'}^{(j')*} \mathrm{d}\hat{R}
    !>     = \frac{1}{2j + 1} \delta_{jj'} \delta_{uu'} \delta_{vv'} \,.
    !> \f]
    !>
    recursive real(wp) function wigner_D_multiint (n, l, a, b, lp, ap, bp) result (K)

        use dipelm_special_functions, only: threej
        use multidip_params,          only: rone

        integer, intent(in) :: n
        integer, intent(in) :: l(n), a(n), b(n), lp(n), ap(n), bp(n)

        integer  :: u(n - 1), v(n - 1), up(n - 1), vp(n - 1), j(n - 1), jp(n - 1), j1, jp1
        real(wp) :: wa, wb, wap, wbp, W

        K = 0

        ! orthogonality relation
        if (n == 1) then
            if (l(1) == lp(1) .and. a(1) == ap(1) .and. b(1) == bp(1)) K = 1 / (2*l(1) + rone)
            return
        end if

        u(1) = a(1) + a(2); u(2:) = a(3:)
        v(1) = b(1) + b(2); v(2:) = b(3:)

        up(1) = ap(1) + ap(2); up(2:) = ap(3:)
        vp(1) = bp(1) + bp(2); vp(2:) = bp(3:)

        j(2:) = l(3:)
        jp(2:) = lp(3:)

        ! recursion: couple the first two angular momenta (both primed and unprimed)
        do j1 = max(abs(u(1)), max(abs(v(1)), abs(l(1) - l(2)))), l(1) + l(2)
            do jp1 = max(abs(up(1)), max(abs(vp(1)), abs(lp(1) - lp(2)))), lp(1) + lp(2)

                j(1) = j1
                jp(1) = jp1

                wa  = threej(2*l(1), 2*a(1), 2*l(2), 2*a(2), 2*j(1), -2*u(1))
                wb  = threej(2*l(1), 2*b(1), 2*l(2), 2*b(2), 2*j(1), -2*v(1))
                wap = threej(2*lp(1), 2*ap(1), 2*lp(2), 2*ap(2), 2*jp(1), -2*up(1))
                wbp = threej(2*lp(1), 2*bp(1), 2*lp(2), 2*bp(2), 2*jp(1), -2*vp(1))
                W = wigner_D_multiint(n - 1, j, u, v, jp, up, vp)
                K = K + (-1)**(u(1) - v(1) + up(1) - vp(1)) * (2*j1 + 1) * (2*jp1 + 1) * wa * wap * wb * wbp * W

            end do
        end do

    end function wigner_D_multiint

    !> \brief   Two-photon asymmetry parameter for the arbitrary polarized case
    !> \author  Z Masin
    !> \date    2023
    !>
    !> Explicit form of the two-photon asymmetry parameter from Ertel et al, Journal of Chemical Physics, submitted, (2023).
    !> This routine implements the most general case when the polarizations of all four photons involved are chosen arbitrarily.
    !> The resulting asymmetry parameter can have a non-zero M value corresponding to a spherical harmonic Y_{L,M}^{*}.
    !>
    !> The routine uses two different conventions for the asymmetry parameters. In case of M = 0 (no net difference between
    !> the polarizations in the two 2-photon arms) the routine uses the Legendre polynomials P_{L} as the basis:
    !> \f[
    !>    I = \frac{1}{4\pi}\sum_{L=0}^{4}b_{L}P_{L}(\cos\theta).
    !> \f]
    !> In the general case with M /= 0 the asymmetry parameters are coefficients in spherical harmonic expansion:
    !> \f[
    !>    I = \sum_{L,M}\hat{b}_{L,M}Y_{L,M}^{*}(\mathbf{k}).
    !> \f]
    !> Note the conjugation of the spherical harmonic of momentum.
    !>
    !> The input for this routine differs from that for beta_2p_demekhin and beta_contraction_tensor in the choice of the
    !> polarizations of the photons for the first (A) and second (B) 2-photon amplitudes by means of the arrays pA and pB.
    !> Each of them specifies the polarizations of the first and second photons.
    !>
    real(wp) function beta_2p_arb_pol_sph_harm (Lbig, Mbig, n, pB, lp, mp, qB, pA, l, m, qA) result (T)

        use dipelm_special_functions, only: threej
        use dipelm_defs,              only: pi
        use multidip_params,          only: rone

        integer, intent(in) :: Lbig, Mbig, n, pA(n), pB(n), lp, mp, qA(n), l, m, qB(n)
        integer :: K1, K2, M1, M2, Mp1, Mp2, mu
        real(wp) :: I, I_el, fac

        if (n /= 2) then
           print *,'Not implemented for orders other than 2', n
           stop 1
        endif

        Mp1 = -(-pA(1) + pB(1))
        Mp2 = -(-pA(2) + pB(2))
        T = 0

        ! selection rule from the photon-electron couplings
        if ( Mbig /= -(Mp1 + Mp2) ) return

        if (Mbig == 0) then
           ! to be used when the spherical harmonic Y_{L,0} is expressed as sqrt((2*L+1)/(4*pi))*P_{L} and the cross section as: 1/(4*pi)*\sum_{L}b_{L}P_{L}
           fac = sqrt((2*l + rone)*(2*lp + rone))*(2*Lbig + rone)
        else
           !to be used in all other cases when the cross section is expressed as: \sum_{L}b_{LM}Y_{L,M}^{*}
           fac = sqrt((2*l + rone)*(2*lp + rone)*(2*Lbig + rone) / (4*pi))
        endif

        ! electron-related couplings
        mu = -(m - mp)
        I_el = (-1)**(mp) * fac * threej(2*l, 2*m, 2*lp, -2*mp, 2*Lbig, 2*mu) * threej(2*l, 0, 2*lp, 0, 2*Lbig, 0)

        do K1 = 0, 2
           do K2 = 0, 2

              ! lab-frame polarization-related couplings
              I = (-1)**(pA(1) + pA(2)) * threej(2*1, 2*(-pA(1)), 2*1, 2*pB(1), 2*K1, 2*Mp1) * &
                                         &threej(2*1, 2*(-pA(2)), 2*1, 2*pB(2), 2*K2, 2*Mp2)

              ! photon-related couplings
              M1 = -(-qA(1) + qB(1))
              M2 = -(-qA(2) + qB(2))

              I = I * (2*K1 + 1) * (2*K2 + 1) * (-1)**(-qA(1)-qA(2)) * threej(2*1, 2*(-qA(1)), 2*1, 2*qB(1), 2*K1, 2*M1) * &
                                                                 threej(2*1, 2*(-qA(2)), 2*1, 2*qB(2), 2*K2, 2*M2)

              ! photon-electron couplings: Mbig = -(Mp1 + Mp2)
              I = I * I_el * threej(2*K1, 2*Mp1, 2*K2, 2*Mp2, 2*Lbig, 2*Mbig) * threej(2*K1, 2*M1, 2*K2, 2*M2, 2*Lbig, 2*mu)

              T = T + I

           enddo !K2
        enddo !K1

    end function beta_2p_arb_pol_sph_harm


    !> \brief   Two-photon asymmetry parameter
    !> \author  J Benda
    !> \date    2020
    !>
    !> Explicit form of the two-photon asymmetry parameter from Demekhin, Lagutin, Petrov, Phys. Rev. A 85 (2012) 023416.
    !>
    !> \note The explicit formula contains an apparent extra factor of (-1)^(L + lj), which is however fully absorbed
    !>       into the phase of the wave function in the present approach.
    !>
    real(wp) function beta_2p_demekhin (L, p1, p2, li, mi, q1i, q2i, lj, mj, q1j, q2j) result (b)

        use dipelm_special_functions, only: threej
        use multidip_params,          only: rone

        integer, intent(in) :: L, p1, p2, li, mi, lj, mj, q1i, q1j, q2i, q2j

        integer  :: M_L, J, M_J, K, M_K
        real(wp) :: w(6)

        b = 0
        M_L = -(mi - mj)

        do J = 0, 2
            do M_J = -J, +J
                do K = 0, 2
                    do M_K = -K, +K
                        w(1) = threej(2*1, 2*p1,  2*1, -2*p1,  2*J,    0)
                        w(2) = threej(2*1, 2*q1i, 2*1, -2*q1j, 2*J, -2*M_J)
                        w(3) = threej(2*1, 2*q2i, 2*1, -2*q2j, 2*K, -2*M_K)
                        w(4) = threej(2*1, 2*p2,  2*1, -2*p2,  2*K,    0)
                        w(5) = threej(2*J, 2*M_J, 2*K,  2*M_K, 2*L,  2*M_L)
                        w(6) = threej(2*J,   0,   2*K,    0,   2*L,    0)
                        b = b + (-1)**(q1j + q2j) * (2*J + 1) * (2*K + 1) * product(w(1:6))
                    end do
                end do
            end do
        end do

        w(1) = threej(2*li,    0,  2*lj,   0,  2*L,    0)
        w(2) = threej(2*li, -2*mi, 2*lj, 2*mj, 2*L, -2*M_L)

        b = b * (-1)**mi * (2*L + 1) * sqrt((2*li + rone)*(2*lj + rone)) * product(w(1:2))

    end function beta_2p_demekhin


    !> \brief   Return Cartesian invariant
    !> \author  J Benda
    !> \date    2022 - 2023
    !>
    !> Calculate angular average of a product of two sets of Cartesian components of a unit vector.
    !> The general formula is
    !> \f[
    !>      \frac{1}{4\pi} \int n_{i_1} \dots n_{i_N} = \frac{1}{(N + 1)!!} {\sum_\pi}'
    !>              \delta_{i_{\pi(1)} i_{\pi(2)}} \dots \delta_{i_{\pi(N-1)} i_{\pi(N)}} .
    !> \f]
    !> The prime denotes summation only over unique terms. Some explicit examples are:
    !> \f[
    !>      \frac{1}{4\pi} \int n_i n_j = \frac{1}{3} \delta_{ij} \,,
    !> \f]
    !> where i = q(1) and j = q(2), and
    !> \f[
    !>      \frac{1}{4\pi} \int n_i n_j n_k n_l =
    !>                     \frac{1}{15} (\delta_{ij}\delta_{kl} + \delta_{ik}\delta_{jl} + \delta_{il}\delta_{jk}) \,,
    !> \f]
    !> where i = q(1), j = q(2), k = q(3) and l = q(4).
    !>
    real(wp) recursive function cartesian_vector_component_product_average (q) result (A)

        integer, intent(in) :: q(:)
        integer             :: i, N, iq(size(q))

        N = size(q)

        ! set up an index array
        do i = 1, N
            iq(i) = i
        end do

        if (N == 0) then
            ! trivial integral of unity
            A = 1
        else if (mod(N, 2) == 1) then
            ! for odd number of indices the integral is zero
            A = 0
        else
            ! collect contributions from all permutations
            A = 0
            do i = 2, N
                if (q(1) == q(i)) then
                    A = A + cartesian_vector_component_product_average(pack(q, iq /= 1 .and. iq /= i))
                end if
            end do
        end if

        ! add the normalization factor
        if (A /= 0) then
            A = A / (N + 1)
        end if

    end function cartesian_vector_component_product_average

end module multidip_special
