! Copyright 2019
!
! For a comprehensive list of the developers that contributed to these codes
! see the UK-AMOR website.
!
! This file is part of UKRmol-in (UKRmol+ suite).
!
!     UKRmol-in is free software: you can redistribute it and/or modify
!     it under the terms of the GNU General Public License as published by
!     the Free Software Foundation, either version 3 of the License, or
!     (at your option) any later version.
!
!     UKRmol-in is distributed in the hope that it will be useful,
!     but WITHOUT ANY WARRANTY; without even the implied warranty of
!     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!     GNU General Public License for more details.
!
!     You should have received a copy of the GNU General Public License
!     along with  UKRmol-in (in source/COPYING). Alternatively, you can also visit
!     <https://www.gnu.org/licenses/>.

!> \brief   CSF module
!> \authors A Al-Refaie, J Benda
!> \date    2017 - 2019
!>
!> This module handles reading and storing of configuration state functions from CONGEN.
!> It provides a clean way of handling CSFs than multiple arrays! Hopefully!
!>
!> When MPI-3 shared memory is available, the configurations (\ref CSFManager::int64dtrs) and
!> coefficients (\ref CSFManager::coeffs) are stored in an array shared across all processes.
!>
!> \note 16/01/2019 - Jakub Benda: Unifom coding style and expanded documentation.
!>
module CSF_module

    use const_gbl,              only: stdout
    use mpi_memory_gbl,         only: mpi_memory_allocate_integer_2dim, mpi_memory_allocate_real, mpi_memory_synchronize, &
                                      mpi_memory_deallocate_integer_2dim, mpi_memory_deallocate_real, local_master
    use mpi_gbl,                only: mpi_mod_barrier, shared_enabled
    use precisn,                only: longint, wp
    use scatci_routines,        only: RDNFTO, RDWF, SETPOSWF
    use MemoryManager_module,   only: master_memory
    use Options_module,         only: Options
    use Orbital_module,         only: OrbitalTable
    use Parallelization_module, only: grid => process_grid

    implicit none

    private

    public CSFObject, CSFManager, CSFOrbital

    !> \brief   Configuration state function factory
    !> \authors A Al-Refaie, J Benda
    !> \date    2017 - 2019
    !>
    !> This is a central object that takes care of reading and storing all configurations state functions (CSFs)
    !> generated by CONGEN. The determinants forming the CSFs are compressed to a long bit array, which can be
    !> shared by all processes in a MPI group. The manager class creates instances of \ref CSFObject (individual
    !> CSFs) and \ref CSFOrbital (individual determinants). Both of them just effectively address into shared arrays
    !> manager by this object.
    !>
    type :: CSFManager
#ifdef mpithree
        integer(longint), pointer     :: int64dtrs(:,:)     !< Unpacked determinants as a bit array (spin-orbital occupation numbers).
        real(wp),         pointer     :: coeffdtrs(:)       !< Coefficients of determinants stored in int64dtrs.
#else
        integer(longint), allocatable :: int64dtrs(:,:)     !< Unpacked determinants as a bit array (spin-orbital occupation numbers).
        real(wp),         allocatable :: coeffdtrs(:)       !< Coefficients of determinants stored in int64dtrs.
#endif

        integer :: int64_win = -1                           !< MPI-3 shared memory window ID for int64dtrs.
        integer :: coeffs_win = -1                          !< MPI-3 shared memory window ID for coeffs.

        integer          :: megul                           !< CONGEN output unit with generated configuration state functions.
        integer          :: num_csfs                        !< Old name: NOCSF  - # of CSFs
        integer          :: length_coeff
        integer          :: length_dtrs
        integer, pointer :: num_dtr_csf_out(:)              !< Old Name: NODO   - Number of determinants in CSF in output

        !---------CSF body------------------------------!
        integer,  allocatable :: index_dtrs(:)              !< Old Name: INDO    - Index of determinants
        integer,  allocatable :: index_coeffs(:)            !< Old Name: ICDO    - index of coefficients
        integer,  allocatable :: packed_dtrs(:)             !< Old Name: NDO     - Packed slater determinants
        real(wp), allocatable :: coeffs(:)                  !< Old Name: NCO     - coeffcients

        class(Options), pointer :: option_ptr
        integer :: largest_num_configs

        !This is for the fast slater rules
        integer :: num_MO
        integer :: num_SO
        integer :: num_electrons
        integer :: Nints            !< Number of integers forming the shared determinant storage (bit array).

        integer, allocatable :: orbitals(:)
        integer, allocatable :: spin(:)
        integer, allocatable :: reference_dtrs(:)
        integer(longint), allocatable :: ref_int64dtrs(:)
        class(OrbitalTable), pointer  :: pt_orb
    contains
        procedure, public  :: initialize => read_csf_body
        procedure, public  :: print      => print_all_csfs
        procedure, public  :: create_csfs
        procedure, public  :: toggle_determinant
        procedure, public  :: finalize   => finalize_manager
    end type CSFManager


    !> \brief   Single determinant
    !> \authors A Al-Refaie, J Benda
    !> \date    2017 - 2019
    !>
    !> Class holding data for a single determinant that forms a part of a configuration state function (CSF).
    !> At the moment, it holds just offset in the shared contiguous determinant bit array, which is managed
    !> by \ref CSFManager type.
    !>
    type CSFOrbital
        type(CSFObject), pointer :: csf     !< Pointer to the CSFObject with CSF this determinant belongs to.
        integer                  :: id_dtr  !< Offset in the global determinant array.
    contains
        procedure, public :: compare_excitations_fast
        procedure, public :: get_determinants
        final             :: destroy_orbitals
    end type CSFOrbital


    !> \brief   Single configuration state function
    !> \authors A Al-Refaie, J Benda
    !> \date    2017 - 2019
    !>
    !> Set of determinants comprising a single configuration state function (CSF). The determinants are
    !> represented by instances of the \ref CSFOrbital type.
    !>
    type CSFObject
        type(CSFManager), pointer     :: manager    !< Pointer to the creator CSFManager object.
        type(CSFOrbital), allocatable :: orbital(:) !< List of determinants.
        integer :: num_orbitals
        integer :: id
        integer :: orbital_sequence_number      !< Pointer to mapping of energies.
        logical :: is_continuum                 !< Whether this is a continuum function or not.
        integer :: target_symmetry              !< The i discussed in the paper.
        integer :: continuum_symmetry           !< The gamma discussed in the paper.
        integer :: continuum_orbital_idx        !< The j = 1 or 2 discussed in the paper.
        integer :: target_configuration         !< The 'm' discussed in the paper.
    contains
        procedure, public :: print => print_csf
        procedure, public :: check_slater => does_it_obey_slater
    end type CSFObject

contains

    !> \brief   Reads the CSFs
    !> \authors A Al-Refaie
    !> \date    2017
    !>
    !> Reads all configurations from CONGEN output using \ref Legacy_SCATCI_module::RDWF
    !> and stores them in the CONGEN format. The translation from CONGEN format to MPI-SCATCI
    !> internal (bitset) representation is done in \ref create_csfs.
    !>
    subroutine read_csf_body (this, option, orbital)
        class(CSFManager),   intent(inout)      :: this
        class(Options),      intent(in)         :: option
        class(OrbitalTable), intent(in), target :: orbital

        integer :: ido, ifail, size_mem

        ! Store important values for the CSF generation
        this % num_csfs     = option % num_csfs
        this % length_dtrs  = option % length_dtrs
        this % length_coeff = option % length_coeff
        this % megul        = option % megul

        allocate(this % num_dtr_csf_out(this % num_csfs), stat = ifail)

        call master_memory % track_memory(kind(this % num_dtr_csf_out), &
                                          size(this % num_dtr_csf_out), ifail, 'CSFMANAGER::num_dtr_csr_out')

        this % num_dtr_csf_out = option  % num_dtr_csf_out
        this % num_electrons   = option  % num_electrons

        this % num_MO = orbital % total_num_orbitals
        this % num_SO = orbital % total_num_spin_orbitals

        allocate(this % orbitals(this % num_SO),              &
                 this % reference_dtrs(this % num_electrons), &
                 this % spin(this % num_SO), stat = ifail)

        call master_memory % track_memory(kind(this % orbitals), &
                                          size(this % orbitals),       ifail, 'CSFMANAGER::orbitals')
        call master_memory % track_memory(kind(this % reference_dtrs), &
                                          size(this % reference_dtrs), ifail, 'CSFMANAGER::reference_dtrs')
        call master_memory % track_memory(kind(this % spin), &
                                          size(this % spin),           ifail, 'CSFMANAGER::spin')

        do ido = 1, this % num_SO
            this % orbitals(ido) = orbital % get_orbital_number(ido)
            this % spin(ido) = orbital % get_spin(ido)
        end do

        this % reference_dtrs(:) = option % reference_dtrs(:)
        this % Nints = this % num_SO / 64 + 1

        allocate(this % ref_int64dtrs(this % Nints), stat = ifail)

        call master_memory % track_memory(kind(this % ref_int64dtrs), &
                                          size(this % ref_int64dtrs), ifail, 'CSFMANAGER::ref_int64dtrs')

        this % ref_int64dtrs(:) = 0
        write (stdout, "('Nints = ',i4)") this % Nints
        this % pt_orb => orbital

        do ido = 1, this % num_electrons
            call this % toggle_determinant(this % reference_dtrs(ido), this % ref_int64dtrs, this % Nints)
        end do

        if ((shared_enabled .and. grid % lrank == local_master) .or. .not. shared_enabled) then
            ! Allocate arrays for reading from congen output
            allocate(this % index_dtrs(this % num_csfs + 1),    &
                     this % index_coeffs(this % num_csfs + 1),  &
                     this % packed_dtrs(this % length_dtrs),    &
                     this % coeffs(this % length_coeff), stat = ifail)

            ! Rewind and reposition CONGEN file to the beginning of the wavefunction data
            call SETPOSWF(option % positron_flag,  &
                          option % num_continuum,  &
                          option % num_target_sym, &
                          this % megul)

            ! Read from congen
            call RDWF(this % num_csfs + 1, &
                      this % index_coeffs, &
                      this % index_dtrs,   &
                      this % packed_dtrs,  &
                      this % length_dtrs,  &
                      this % coeffs,       &
                      this % length_coeff, &
                      this % megul)
         end if

    end subroutine read_csf_body


    !> \brief   Release all memory used by the instance
    !> \authors J Benda
    !> \date    2019
    !>
    !> \param[inout] this Manager object to update.
    !>
    subroutine finalize_manager (this)
        class(CSFManager), intent(inout), target :: this

        call master_memory % free_memory(kind(this % num_dtr_csf_out), size(this % num_dtr_csf_out))
        call master_memory % free_memory(kind(this % orbitals),        size(this % orbitals))
        call master_memory % free_memory(kind(this % reference_dtrs),  size(this % reference_dtrs))
        call master_memory % free_memory(kind(this % spin),            size(this % spin))
        call master_memory % free_memory(kind(this % ref_int64dtrs),   size(this % ref_int64dtrs))

        if (associated(this % num_dtr_csf_out)) deallocate (this % num_dtr_csf_out)
        if (allocated(this % orbitals))         deallocate (this % orbitals)
        if (allocated(this % reference_dtrs))   deallocate (this % reference_dtrs)
        if (allocated(this % spin))             deallocate (this % spin)
        if (allocated(this % ref_int64dtrs))    deallocate (this % ref_int64dtrs)

        if (allocated(this % index_dtrs))       deallocate (this % index_dtrs)
        if (allocated(this % index_coeffs))     deallocate (this % index_coeffs)
        if (allocated(this % packed_dtrs))      deallocate (this % packed_dtrs)
        if (allocated(this % coeffs))           deallocate (this % coeffs)

        call mpi_memory_deallocate_integer_2dim(this % int64dtrs, size(this % int64dtrs), this % int64_win, grid % lcomm)
        call mpi_memory_deallocate_real(this % coeffdtrs, size(this % coeffdtrs), this % coeffs_win, grid % lcomm)

    end subroutine finalize_manager


    !> \brief   This subroutine seperates out the CSFs and creates individual objects out of them
    !> \authors A Al-Refaie, J Benda
    !> \date    2017 - 2019
    !>
    !> The unpacked determinants forming each CSF are stored as bit fields in \ref CSFManager::int64dtrs (potentially
    !> shared across the MPI processes participating in the diagonalization) of length equal to the number
    !> of spin-orbitals and values 0 and 1 representing absence or presence of an electron in the spin-orbital.
    !> The coefficients of these determinants are then adjusted to be compatible with ascending order
    !> of spin-orbitals in the determinant (may result in sign flip).
    !>
    !> \param[inout] this                  Manager object to update.
    !> \param[inout] CSF                   Configuration set to construct (will be allocated).
    !> \param[in]    orbital_sequence      Pointer to orbital sequence (KPT).
    !> \param[in]    num_ci_target_sym     Number of CI components of each target symmetry (NCTGT).
    !> \param[in]    continuum_projection  Lambda value of the continuum orbitals associated with each target state (MCONT).
    !>
    subroutine create_csfs (this, CSF, orbital_sequence, num_ci_target_sym, continuum_projection)

        class(CSFManager), intent(inout),              target :: this
        type(CSFObject),   intent(inout), allocatable, target :: CSF(:)
        integer,           intent(in)                         :: num_ci_target_sym(:), continuum_projection(:)

        integer :: orbital_sequence(this % num_csfs)
        integer :: csf_det(this % num_electrons)

        real(wp) :: phase_fold
        integer  :: ido, jdo, kdo, ifail, num_orbs, start_index_dtr, start_index_coeff, num_spin_orb
        integer  :: dtr_size, elec_idx, total_number_orbs, orb_counter, count_number, symmetry_idx

        symmetry_idx = 1
        count_number = 1
        this % largest_num_configs = 0

        write (stdout, "('Creating our CSFs.........')", advance = "no")

        total_number_orbs = 0
        orb_counter = 0
        do ido = 1, this % num_csfs
            total_number_orbs = total_number_orbs + this % num_dtr_csf_out(ido)
        end do

        write (stdout, "('Total number of determinants = ',i12)") total_number_orbs

        allocate(CSF(this % num_csfs))

        this % int64_win = mpi_memory_allocate_integer_2dim(this % int64dtrs, this % Nints, total_number_orbs, grid % lcomm)
        this % coeffs_win = mpi_memory_allocate_real(this % coeffdtrs, total_number_orbs, grid % lcomm)

        call mpi_mod_barrier(ifail, grid % gcomm)
        call mpi_memory_synchronize(this % int64_win, grid % lcomm)
        call mpi_memory_synchronize(this % coeffs_win, grid % lcomm)

        do ido = 1, this % num_csfs
            num_orbs = this % num_dtr_csf_out(ido)

            CSF(ido) % id = ido
            CSF(ido) % manager => this
            CSF(ido) % num_orbitals = num_orbs

            ! Store the orbital number from mkpt
            CSF(ido) % orbital_sequence_number = orbital_sequence(ido)

            this % largest_num_configs = max(this % largest_num_configs, num_orbs)

            if (CSF(ido) % orbital_sequence_number > 0) then

                CSF(ido) % is_continuum = .true.
                CSF(ido) % continuum_orbital_idx = mod(ido - 1, 2)
                CSF(ido) % target_symmetry = symmetry_idx
                CSF(ido) % target_configuration = ((count_number - 1) / 2) + 1
                CSF(ido) % continuum_symmetry = continuum_projection(symmetry_idx) + 1

                count_number = count_number + 1

                if (count_number > num_ci_target_sym(symmetry_idx) * 2) then
                    symmetry_idx = symmetry_idx + 1
                    count_number = 1
                end if

            else

                CSF(ido) % is_continuum = .false.

            end if

            orb_counter = orb_counter + 1
            allocate(CSF(ido) % orbital(num_orbs), stat = ifail)    ! Allocate our orbitals

            CSF(ido) % orbital(1) % id_dtr =  orb_counter
            CSF(ido) % orbital(1) % csf => CSF(ido)

            if ((shared_enabled .and. grid % lrank == local_master) .or. .not. shared_enabled) then
                !Assign the coefficient to the first
                start_index_dtr = this % index_dtrs(ido)
                start_index_coeff = this % index_coeffs(ido) - 1
                num_spin_orb = this % packed_dtrs(start_index_dtr)
                this % int64dtrs(:,orb_counter) = this % ref_int64dtrs(:)
                csf_det(:) = this % reference_dtrs(:)

                do kdo = 1, num_spin_orb
                    call this % toggle_determinant(this % packed_dtrs(start_index_dtr + kdo), &
                                                   this % int64dtrs(:,orb_counter), this % Nints)
                    call this % toggle_determinant(this % packed_dtrs(start_index_dtr + num_spin_orb + kdo), &
                                                   this % int64dtrs(:,orb_counter), this % Nints)
                    elec_idx = this % pt_orb % get_electron_number(this % packed_dtrs(start_index_dtr + kdo))
                    csf_det(elec_idx) = this % packed_dtrs(start_index_dtr + num_spin_orb + kdo)
                end do

                phase_fold = 1_wp
                call QsortDets(csf_det, phase_fold)
                this % coeffdtrs(orb_counter) = this % coeffs(start_index_coeff + 1) * phase_fold
                start_index_dtr = start_index_dtr + 2 * num_spin_orb + 1
            end if

            !Move to the next orbital

            !Do for the rest
            do jdo = 2, num_orbs
                orb_counter = orb_counter + 1
                CSF(ido) % orbital(jdo) % id_dtr = orb_counter
                CSF(ido) % orbital(jdo) % csf => CSF(ido)

                if ((shared_enabled .and. grid % lrank == local_master) .or. .not. shared_enabled) then
                    num_spin_orb = this % packed_dtrs(start_index_dtr)
                    this % int64dtrs(:,orb_counter) = this % ref_int64dtrs(:)
                    csf_det(:) = this % reference_dtrs(:)

                    do kdo = 1, num_spin_orb
                        call this % toggle_determinant(this % packed_dtrs(start_index_dtr + kdo), &
                                                       this % int64dtrs(:,orb_counter), this % Nints)
                        call this % toggle_determinant(this % packed_dtrs(start_index_dtr + num_spin_orb + kdo), &
                                                       this % int64dtrs(:,orb_counter), this % Nints)
                        elec_idx = this % pt_orb % get_electron_number(this % packed_dtrs(start_index_dtr + kdo))
                        csf_det(elec_idx) = this % packed_dtrs(start_index_dtr + num_spin_orb + kdo)
                    end do

                    phase_fold = 1_wp
                    call QsortDets(csf_det, phase_fold)
                    this % coeffdtrs(orb_counter) = this % coeffs(start_index_coeff + jdo) * phase_fold
                    start_index_dtr = start_index_dtr + 2 * num_spin_orb + 1
                end if
            end do
        end do

        write (stdout, "('success!')")
        write (stdout, "('Largest number of configurations in a single CSF is ',i10)") this % largest_num_configs

        call mpi_mod_barrier(ifail, grid % gcomm)
        call mpi_memory_synchronize(this % int64_win, grid % lcomm)
        call mpi_memory_synchronize(this % coeffs_win, grid % lcomm)
       !call master_memory % free_memory(kind(this % index_dtrs),     size(this % index_dtrs))
       !call master_memory % free_memory(kind(this % index_coeffs),   size(this % index_coeffs))
       !call master_memory % free_memory(kind(this % packed_dtrs),    size(this % packed_dtrs))
        call master_memory % free_memory(kind(this % num_dtr_csf_out),size(this % num_dtr_csf_out))

        ! We don't need them anymore
        if ((shared_enabled .and. grid % lrank == local_master) .or. .not. shared_enabled) then
            deallocate(this % index_dtrs, this % index_coeffs, this % packed_dtrs, this % coeffs)
        end if
        deallocate(this % num_dtr_csf_out)

    end subroutine create_csfs


    subroutine print_csf (this)
        class(CSFObject), intent(in) :: this
    end subroutine print_csf


    subroutine print_all_csfs (this, csf)
        class(CSFManager), intent(in) :: this
        class(CSFObject),  intent(in) :: csf(:)
        integer :: ido

        do ido = 1, this % num_csfs
            call CSF(ido) % print
        end do

    end subroutine print_all_csfs


    subroutine destroy_orbitals (this)
        type(CSFOrbital) :: this
    end subroutine destroy_orbitals


    subroutine compare_excitations_fast (this, that, num_electrons, coeff, num_excitations, output_dtrs)
        class(CSFOrbital), intent(in)  :: this, that
        class(CSFManager), pointer     :: manager
        integer,           intent(in)  :: num_electrons
        integer,           intent(out) :: num_excitations
        integer,           intent(out) :: output_dtrs(4)
        real(wp),          intent(out) :: coeff
        real(wp) :: phase

        phase = 1
        coeff = 0.0_wp
        output_dtrs = 0

        manager => this % csf % manager

        !DIR$ FORCEINLINE
        call get_excitation(manager % int64dtrs(:, this % id_dtr), &
                            manager % int64dtrs(:, that % id_dtr), &
                            output_dtrs, num_excitations, phase, manager % Nints)

        coeff = phase * manager % coeffdtrs(this % id_dtr) &
                      * manager % coeffdtrs(that % id_dtr)

    end subroutine compare_excitations_fast


    integer function does_it_obey_slater (this, that)
        class(CSFObject),  intent(in) :: this, that
        class(CSFManager), pointer    :: manager
        integer                       :: min_num_excitation, ido

        manager => this % manager

        does_it_obey_slater = pzero(manager % int64dtrs(:, this % orbital(1) % id_dtr), &
                                    manager % int64dtrs(:, that % orbital(1) % id_dtr), &
                                    manager % Nints)

    end function does_it_obey_slater


    subroutine toggle_determinant (this, det0, det1, Nints)
        class(CSFManager), intent(in) :: this
        integer,           intent(in) :: Nints
        integer          :: det0
        integer(longint) :: det1(Nints)
        integer(longint) :: bit_set
        integer          :: spin_position, array_position, local_orbital

        bit_set = 0
        array_position = (det0 - 1) / 64 + 1
        ! odd is alpha even is beta

        local_orbital = det0 - (array_position - 1) * 64 - 1
        bit_set = ibset(bit_set, local_orbital)

        det1(array_position) = ieor(det1(array_position), bit_set)

    end subroutine toggle_determinant


    integer function pzero (det1, det2, Nint)
        integer,          intent(in) :: Nint
        integer(longint), intent(in) :: det1(Nint), det2(Nint)
        integer(longint) :: xor1
        integer          :: l, nexcitations = 0

        xor1 = ieor(det1(1), det2(1))
        pzero = popcnt(ieor(get_beta(xor1), get_alpha(xor1)))

        do l = 2, Nint
            xor1 = ieor(det1(l), det2(l))
            pzero = pzero + popcnt(ieor(get_beta(xor1), get_alpha(xor1)))
        end do

    end function pzero


    ! \brief  Select only beta orbitals
    ! \date   2017 - 2020
    ! \author A Al-Refaie, J Benda
    !
    ! Given a section of a bit array representing the population of spin-orbitals, mask out the the alpha
    ! orbitals (odd bits), keeping only the even bits. On return, the even bits are shifted one bit back,
    ! to odd positions.
    !
    ! Note that rather than masking out odd bits and then shifting the result, we do it the other way round (first
    ! shift, then mask out *even* bits). This is because in some circumstances the leading non-zero bit is reserved
    ! for sign and constructing a mask that explicitly sets it may result in "Arithmetic Overflow" errors.
    !
    elemental integer function get_beta (det)
        integer(longint), intent(in) :: det
        integer(longint), parameter  :: mask = int(z'5555555555555555', longint)  ! = 0101...0101

        get_beta = iand(ishft(det, -1), mask)

    end function get_beta


    ! \brief  Select only alpha orbitals
    ! \date   2017 - 2020
    ! \author A Al-Refaie, J Benda
    !
    ! Given a section of a bit array representing the population of spin-orbitals, mask out the the beta
    ! orbitals (even bits), keeping only the odd bits.
    !
    elemental integer function get_alpha (det)
        integer(longint), intent(in) :: det
        integer(longint), parameter  :: mask = int(z'5555555555555555', longint)  ! = 0101...0101

        get_alpha = iand(det, mask)

    end function get_alpha


    integer function n_excitations (det1, det2, Nint)
        integer,          intent(in) :: Nint
        integer(longint), intent(in) :: det1(Nint), det2(Nint)

        integer :: l

        n_excitations = popcnt(ieor(det1(1), det2(1)))

        do l = 2, Nint
            n_excitations = n_excitations + popcnt(ieor(det1(l), det2(l)))
        end do

        n_excitations = ishft(n_excitations, -1)

    end function n_excitations


    subroutine get_excitation (det1, det2, exc, degree, phase, Nint)
        integer,          intent(in)    :: Nint
        integer(longint), intent(in)    :: det1(Nint,2), det2(Nint,2)
        integer,          intent(out)   :: exc(4)
        integer,          intent(out)   :: degree
        double precision, intent(inout) :: phase

        degree = n_excitations(det1, det2, Nint)

        select case (degree)
            case (3:)
                return
            case (2)
                call get_double_excitation(det1, det2, exc, phase, Nint)
                return
            case (1)
                call get_single_excitation(det1, det2, exc(3:4), phase, Nint)
                return
            case (0)
                return
        end select

    end subroutine get_excitation


    subroutine get_single_excitation (det1, det2, exc, phase, Nint)
        integer,          intent(in)    :: Nint
        integer(longint), intent(in)    :: det1(Nint)
        integer(longint), intent(in)    :: det2(Nint)
        integer,          intent(inout) :: exc(2)
        double precision, intent(out)   :: phase

        integer :: tz, l, ispin, ishift, nperm, i, j, k, m, n, high, low
        integer :: holes(1)
        integer :: particles(1)
        integer(longint) :: hole, particle, tmp, masklow, maskhigh
        double precision, parameter :: phase_dble(0:1) = (/ 1.d0, -1.d0 /)

        nperm = 0
        exc(:) = 0
        ishift = -63
        do l = 1, Nint
            ishift = ishift + 64
            if (det1(l) == det2(l)) cycle
            tmp = ieor(det1(l), det2(l))
            particle = iand(tmp, det2(l))
            hole     = iand(tmp, det1(l))
            if (particle /= 0_8) then
                tz = trailz(particle)
                exc(2) = tz + ishift
            end if
            if (hole /= 0_8) then
                tz = trailz(hole)
               !exc(0,1,ispin) = 1
                exc(1) = tz + ishift
            end if
            if (exc(1) /= 0 .and. exc(2) /=0 ) then
                low  = min(exc(1), exc(2))
                high = max(exc(1), exc(2))
                j = ishft(low - 1, -6) + 1
                n = iand(low - 1, 63)
                k = ishft(high - 1, -6) + 1
                m = iand(high - 1, 63)
                ! masklow = not(ishft(1_8,n+1))+1
                ! maskhigh = ishft(1_8,m)-1
                if (j == k) then
                    nperm = nperm + popcnt(iand(det1(j), iand(not(ishft(1_8, n + 1)) + 1, ishft(1_8, m) - 1)))
                else
                    nperm = nperm + popcnt(iand(det1(k), ishft(1_8, m) - 1)) &
                                + popcnt(iand(det1(j), not(ishft(1_8, n + 1)) + 1))
                    do i = j + 1, k - 1
                        nperm = nperm + popcnt(det1(i))
                    end do
                end if
                phase = phase_dble(iand(nperm, 1))
                return
            end if
        end do

    end subroutine get_single_excitation


    subroutine get_double_excitation (det1, det2, exc, phase, Nint)
        integer,          intent(in)  :: Nint
        integer(longint), intent(in)  :: det1(Nint), det2(Nint)
        integer,          intent(out) :: exc(4)
        double precision, intent(out) :: phase
        integer :: l, ispin, idx_hole, idx_particle, ishift
        integer :: i, j, k, m, n, high, low, a, b, c, d, nperm, tz, nexc, num_holes
        integer(longint) :: hole, particle, tmp, spin_total, masklow, maskhigh
        integer   :: holes(2), particles(2), comp(2)
        double precision, parameter :: phase_dble(0:1) = (/ 1.d0, -1.d0 /)

        exc(:) = 0
        nexc = 0
        nperm = 0
        idx_particle = 0
        idx_hole = 0
        ishift = -63
        num_holes = 0
        do l = 1, Nint
            ishift = ishift + 64
            if (det1(l) == det2(l)) cycle
            tmp = ieor(det1(l), det2(l))
            particle = iand(tmp, det2(l))
            hole     = iand(tmp, det1(l))
            do while (particle /= 0_8)
                tz = trailz(particle)
                nexc = nexc + 1
                idx_particle = idx_particle + 1
                !exc(2*idx_particle) = tz+ishift
                particles(idx_particle) = tz + ishift
                particle = iand(particle, particle - 1_8)
            end do
            do while (hole /= 0_8)
                tz = trailz(hole)
                nexc = nexc + 1
                idx_hole = idx_hole + 1
                holes(idx_hole) = tz + ishift
                num_holes = num_holes + 1
                !exc( idx_hole*2 -1) = tz+ishift
                hole = iand(hole, hole - 1_8)
            end do
            if (nexc == 4) exit
        end do

        !if(exc(4) < exc(2)) then
        !     tmp = exc(4)
        !     exc(4) = exc(2)
        !     exc(2) = tmp
        ! endif
        ! if(exc(3) < exc(1)) then
        !     tmp = exc(3)
        !     exc(1) = exc(3)
        !     exc(3) = tmp
        ! endif
        !if(holes(2) < holes(1)) then
        !    tmp = holes(1)
        !    holes(1) = holes(2)
        !    holes(2) = tmp
        !endif

        !write(stdout,*) particles

        !if(particles(2) < particles(1)) then
        !    tmp = particles(1)
        !    particles(1) = particles(2)
        !    particles(2) = tmp
        !endif
        ! write(stdout,*) particles

        do i = 1, num_holes
            low  = min(particles(i), holes(i))
            high = max(particles(i), holes(i))
            j = ishft(low - 1, -6) + 1
            n = iand(low - 1, 63)
            k = ishft(high - 1, -6) + 1
            m = iand(high - 1, 63)

            if (j == k) then
                nperm = nperm + popcnt(iand(det1(j), iand(not(ishft(1_8, n + 1)) + 1, ishft(1_8, m) - 1)))
            else
                nperm = nperm + popcnt(iand(det1(k), ishft(1_8, m) - 1)) &
                              + popcnt(iand(det1(j), not(ishft(1_8, n + 1)) + 1))
                do l = j + 1, k - 1
                    nperm = nperm + popcnt(det1(l))
                end do
            end if
        end do

        ! spin_total = iand(particles(1),1) + iand(particles(2),1) + iand(holes(1),1) + iand(holes(2),1)
        ! if (spin_total == 0 .or. spin_total == 4) then

        if (num_holes == 2) then
            a = min(holes(1), particles(1))
            b = max(holes(1), particles(1))
            c = min(holes(2), particles(2))
            d = max(holes(2), particles(2))
            if (c > a .and. c < b .and. d > b) nperm = nperm + 1
        end if

        ! end do
        !exc(1) = holes(1)
        !exc(2) = particles(1)
        !exc(3) = holes(2)
        !exc(4) = particles(2)
        !if(holes(1) > holes(2)) then
        !     comp(1) = particles(1)
        !     comp(2) = particles(2)
        ! else
        !      comp(1) = particles(2)
        !     comp(2) = particles(1)
        ! endif
        ! if(comp(1) > comp(2)) nperm = nperm + 1
        !  exit
        !  end if
        ! end do
        exc(1) = holes(1)
        exc(2) = particles(1)
        exc(3) = holes(2)
        exc(4) = particles(2)

        phase = phase_dble(iand(nperm, 1))

    end subroutine get_double_excitation


    subroutine assign_pointer (targ, point, Nints)
        integer,                   intent(in)  :: Nints
        integer(longint), target,  intent(in)  :: targ(Nints)
        integer(longint), pointer, intent(out) :: point(:)

        point(1:Nints) => targ(1:Nints)

    end subroutine assign_pointer


    subroutine get_determinants (this, dtrs, nelec)
        integer,           intent(in)  :: nelec
        class(CSFOrbital), intent(in)  :: this
        integer,           intent(out) :: dtrs(nelec)
        integer(longint) :: current_so
        integer          :: tz, ishift = -63, ints, idx_dtr, nexec

        idx_dtr = 0
        dtrs = 0
        ishift = -63

        do ints = 1, this % csf % manager % Nints
            ishift = ishift + 64
            current_so = this % csf % manager % int64dtrs(ints, this % id_dtr)
            do while (current_so /= 0)
                tz = trailz(current_so)
                idx_dtr = idx_dtr + 1
                dtrs(idx_dtr) = tz + ishift
                current_so = iand(current_so, current_so - 1)
            end do
        end do

    end subroutine get_determinants


    recursive subroutine QsortDets(A,phase)
        integer,  intent(inout), dimension(:) :: A
        real(wp), intent(inout)               :: phase
        integer :: iq

        if (size(A) > 1) then
            call Partition(A, iq, phase)
            call QsortDets(A(:iq-1), phase)
            call QsortDets(A(iq:), phase)
        end if

    end subroutine QsortDets


    subroutine Partition (A, marker, phase)
        integer,  intent(inout) :: A(:)
        integer,  intent(out)   :: marker
        real(wp), intent(inout) :: phase
        integer :: i, j
        integer :: temp
        integer :: x      ! pivot point

        x = A(1)
        i = 0
        j = size(A) + 1

        do
            j = j - 1
            do
                if (A(j) <= x) exit
                j = j-1
            end do
            i = i + 1
            do
                if (A(i) >= x) exit
                i = i + 1
            end do
            if (i < j) then
                ! exchange A(i) and A(j)
                temp = A(i)
                A(i) = A(j)
                A(j) = temp
                phase = -phase
            elseif (i == j) then
                marker = i + 1
                return
            else
                marker = i
                return
            end if
        end do

    end subroutine Partition

end module CSF_module
