! Copyright 2019
!
! Alex G. Harvey with ontributions from Danilo S. Brambila and Zdenek Masin.
!
! This file is part of UKRmol-out (UKRmol+ suite).
!
!     UKRmol-out is free software: you can redistribute it and/or modify
!     it under the terms of the GNU General Public License as published by
!     the Free Software Foundation, either version 3 of the License, or
!     (at your option) any later version.
!
!     UKRmol-out is distributed in the hope that it will be useful,
!     but WITHOUT ANY WARRANTY; without even the implied warranty of
!     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!     GNU General Public License for more details.
!
!     You should have received a copy of the GNU General Public License
!     along with  UKRmol-out (in source/COPYING). Alternatively, you can also visit
!     <https://www.gnu.org/licenses/>.
!
SUBROUTINE MPI_R_SOLVE(ifail)
! ZM 13/03/2015: converted to F90 and MPI-parallelized the loop over energies.
!    The parallelization is done cyclically redistributing the energy points
!    among all tasks. This achieves optimal load balancing. When run using >1
!    tasks the wavefunction coefficient output is not implemented. When run
!    using only 1 task the behavior is identical to the original RSOLVE. No
!    effort has been made to retain functionality of the non-adiabatic part of
!    the code. I've also removed allocation of some arrays that had not been
!    used. Replaced implicit variable declaration with explicit variable 
!    declaration. Dipoles for energies skipped during propagation are no 
!    longer saved.
! ZM 08/04/2015: added option to calculate and save T-matrices only (i.e.
! discarding the K-matrices). This is done via a new namelist variable
! calc_tmat. If set to .true. then only the T-matrices are calculated and saved.
! The other variables controling the T-matrix calculation through the RSLVIN namelist are:
! LUTMT,NTSET,MAXI,MAXF,ITFORM.
! It is not possible to declare the TMATIN namelist here and to read-in the
! input since apparently during linking the TMATIN namelist from TMATRX is
! used which is much larger and probably causes overwriting of some local
! variables and leads to incorrect propagation.
    USE photo_outerio, ONLY: read_transdip2, idp, writsh, writsc, write_pw_dipoles
    USE compak_procs, ONLY: compak
    USE blas_lapack_gbl, ONLY: blasint
    USE mpi_memory_gbl, ONLY: mpi_memory_allocate_real, mpi_memory_synchronize, local_master, mpi_memory_deallocate_real
    USE const_gbl
    USE mpi_gbl
    USE omp_lib
    USE linalg_cl, ONLY: linalg_cl_init, residr_cl, finalize_cl
    USE iso_c_binding, ONLY: c_int

    IMPLICIT NONE

!   ZM This common block is needed by WRITTH 
    COMMON/INTFS/ NCHAN,ION,NVIB,NDIS,IWRITE,MGVN,STOT,GUTOT
!
!***********************************************************************
!
!     MAXIMUM DIMENSIONS ARE SET BY THE FOLLOWING PARAMETER STATEMENT
!     VARIABLE DIMENSIONS ARE USED IN ALL LOWER LEVEL ROUTINES, EXCEPT
!     VIBINI 
!
    INTEGER, PARAMETER :: MAXPTS=30, MAXTGT=500, MAXENR=10
!
!     MAXPTS = MAXIMUM NUMBER OF GEOMETRIES
!     MAXTGT = MAXIMUM NUMBER OF TARGET ELECTRONIC STATES
!     MAXENR = MAXIMUM NUMBER OF INPUT SCATTERING ENERGY PAIRS (E0,DE)
!
    CHARACTER(1024) :: inpname
    CHARACTER(80) :: NAME
    CHARACTER(11) :: RFORM,CHFORM,WFORM,KFORM,NRFORM,VCFORM,MODDAT,TFORM
    CHARACTER(11) :: sform, form_pw_dipoles  ! -AlexH
    CHARACTER(9) :: FORM
    CHARACTER(1) :: IRFORM,ICFORM,IWFORM,IKFORM,INRFRM,IVCFRM,ITFORM
    CHARACTER(3) :: EUNIT(2)
    CHARACTER BLANK*8, daytim*20
    INTEGER :: LUCHAN, LURMT, LUVCHN, LUNRMT, LUKMT, LUWFN, IKTYPE, NPOLE, NDIS, NGEOM,&
               IWRITE, ISMAX, NESCAT(MAXENR), IPRNT(6), NCHSET(MAXPTS), NRMSET(MAXPTS), NVCSET, NNREST, &
               NKSET, NRQUAD, NERANG, NWSET, MGVN, IEUNIT, NNRSET, NVTARG(MAXTGT), MDMAX, NLPOLE, NBIGSET, NEWBUT, IFPROP2, &
               NSET_LU_PW_DIPOLES, IREAD, NKNOT, IVPROP, IDPROP, NVIB, NCOL, IBACK, I, IFAIL, NEXT, IG, NESC, IEN, ivt0(2),ivu0(2),&
               NCHANF, NVIB0, NDIS0, NTARG, ION, NFBUT, ISMX, NSTAT, NOCSF, NPLX, IEX, NCHANS, NDISS, NTARGV, NNBUT, ISMVX, NHD, &
               NPVEC, NCHAN, ISFMAX, NTV, NCF, IG1, IET, IEG, IWA, IBUT, ISF, IEC, IRC, NCHAN0, NTARG0, NCI, NVCHAN, NVIBD, NQUAD, &
               ITEMPD, ND2, NCHSQ, NVCHSQ, IES, NES, IE, NOPEN, NSTAT_NEUT, NBOUND_DIP, NVOPEN, NDOPEN, K, IJ, J, &
               IR, IQ, IP, no_scat_energies, nworkers
    DOUBLE PRECISION :: BBLOCH, BIGB, POLEPROX, ZERO, HALF, ONE, TWO, RYD, EMIN, EMAX, EMINR, EMAXR, EINC(2,MAXENR), &
                        EINR(2,MAXENR), RK(MAXPTS+4),R(MAXPTS),ezero(maxpts), vec(1), RMASS, RMATR, TWOM, RMATN, RR, E0, EBASE, &
                        KNOTS, DUM, DUM1(1), DUM2(1,1), RAFINV, SCALE, EMINM, EMAXM, RAFIND, ENRYD, DE, ETOT, ENTOP, E2M
    INTEGER :: STOT,GUTOT 
    INTEGER :: calcak, calcdip(3),lu_inner_dipoles, nset_lu_inner_dipoles, &
    lu_pw_dipoles, nset_pw_dipoles, nstat_dip, maxprop,&
    lmax_property, cnt, idat
    INTEGER(kind=mpiint) :: rank
    real(kind=idp) :: dummy_one(1)
    double precision :: smooth !AlexH 2012
    integer :: LUSCT,NSSET !-AlexH 17/11/10
    integer :: n_dip_el, nchan_ak, nchan_dip
    integer, allocatable :: ichord(:),ivtarg(:),ivnu(:),starg(:), &
    gtarg(:),mtarg(:),ivchl(:),lvchl(:),mvchl(:),ichl(:),ncsf(:), &
    dip_comp_present(:), istart(:)
    integer(blasint), allocatable :: ipiv(:)
    double precision, allocatable :: rvib(:),rmn(:),fx(:),fxp(:), &
    akmat(:),rres(:),ampn(:),eign(:),evib(:),etarg(:), &
    vibfn(:),rquad(:),qwts(:),evchl(:), &
    adm(:),fv(:),fvp(:),fd(:),fdp(:),crv(:),crd(:),bloch(:), &
    amc(:),adc(:), fkmat(:),  & !fkmat AlexH 11.11.10
    ar(:,:), ai(:,:),rvib2(:),  & !AlexH 17/11/10
    y_arr(:),dy_arr(:), &
    escat(:),&
    ampae(:,:),all_kmat(:),linear_kmat(:)
    double precision, allocatable :: buf_re(:), buf_im(:),AA(:,:),BB(:,:),TR(:),TI(:), workr(:,:), worki(:,:),ftmat(:,:,:,:)
    real(kind=idp), pointer :: cf(:), epole(:), wamp(:), butc(:), sfac(:), ecex(:), rcex(:)
    integer :: cf_win, epole_win, wamp_win, butc_win, sfac_win, ecex_win, rcex_win
    real(kind=idp), allocatable :: inner_dipoles_temp(:,:,:),  &
    inner_dipoles(:,:,:),  &
    re_pw_dipoles_temp(:,:,:), &
    im_pw_dipoles_temp(:,:,:), &
    re_pw_dipoles(:,:,:,:), &
    im_pw_dipoles(:,:,:,:), &
    bound_state_energies(:), &
    re_pw_dipoles_gathered(:,:,:,:), &
    im_pw_dipoles_gathered(:,:,:,:)
    real(kind=16), allocatable :: rmat_quad(:) !AH
    double precision :: dummy(2)
    integer :: ch_i, ch_f, owner
    logical :: master_writer, gpu
    double precision :: test, start_t, end_t
!    EXTERNAL POTL,DISPOT
    integer :: LUTMT, MAXCHI, MAXCHF, NTSET, NAPPR, MAXI, MAXF, GET_TKMAT
!
!***********************************************************************
!
!     BASIC DATA IS INPUT VIA NAMELIST /RSLVIN/
!     OTHER DATA IS INPUT VIA NAMELISTS IN ROUTINES VIBINI AND ASYM1
!
!      BBLOCH   = COEFFICIENT IN ELECTRONIC BLOCH OPERATOR
!      BIGB     = COEFFICIENT IN NUCLEAR BLOCH OPERATOR
!      EINC     = Scattering energies relative to lowest (vibrational)
!                 level of target 
!                 EINC(1,I) = initial energy in sub range I
!                 EINC(2,I) = energy increment in this subrange
!                 units are as specified by IEUNIT
!      GUTOT    = G/U SYMMETRY OF TOTAL SYSTEM +1=G, -1=U
!      ICFORM   = Formatted/unformatted switch for unit LUCHAN
!      IEUNIT   = UNITS IN WHICH INPUT SCATTERING ENERGIES ARE INPUT
!                 1= RYD, 2= EV
!      IKFORM   = Formatted/unformatted switch for unit LUKMT
!      IKTYPE   = Write open-open subset of K-matrix (= 0) or full (= 1)
!      INRFRM   = Formatted/unformatted switch for unit LUNRMT
!      IPRNT    = DEBUG PRINT SWITCHES
!                 (1) =1 Print all input data
!                 (2) =1 Print vibrational wavefuction data
!                 (3) =1 Debug output in dissociating channels
!                 (4)  not used
!                 (5) =1 Print R-matrices
!                 (6) =1 Print all output data
!      IRFORM   = Formatted/unformatted switch for unit LURMT
!      ISMAX    = Highest multipole to be used in asymptotic expansion
!                 of asymptotic potentials
!      IWFORM   = Formatted/unformatted switch for unit LUWFN
!      IWRITE   = Logical unit for printed output
!      LUCHAN   = Logical unit holding fixed nuclei channel and target 
!                 data
!      LUKMT    = LOGICAL UNIT FOR K-MATRIX OUTPUT 
!      LUWFN    = Logical unit for R-matrix and wavefunction output
!      LUNRMT   = Logical unit holding non adiabatic R-matrix data
!      LURMT    = Logical unit holding fixed nuclei R-matrix data
!      LUVCHN   = Logical unit holding vibrational/dissociating
!                 channel data
!      LUSCT	= Logical unit for A_k coefficient -AlexH 17/11/10
!      MDMAX    = maximum multipole to be retained in expansion of
!                 asymptotic internuclear (dissociation) potential
!      MGVN     = TOTAL SYMMETRY OF SYSTEM
!      NAME     = TITLE FOR OUTPUT
!      NCHSET   = Set numbers for input fixed nuclei channel/target
!                 data for each geometry
!      NDIS     = NUMBER OF DISSOCIATING CHANNELS
!      NERANG   = Number of subranges of scattering energies
!      NESCAT   = NUMBER OF INPUT SCATTERING ENERGIES in each subrange
!      NEWBUT   = switch on energy parameter in Buttle correction
!      NGEOM    = NUMBER OF GEOMETRIES
!      NKSET    = Set number for output K-matrices 
!      NNRSET   = Set number for input non-adiabatic R-matrix data
!      NPOLE    = NUMBER OF ELECTRONIC R-MATRIX POLES TO BE TREATED
!                 NON-ADIABATICALLY
!      NRMSET   = Set numbers for input fixed nuclei R-matrix data for
!                 each geometry
!      NRQUAD   = NUMBER OF QUADRATURE POINTS FOR INTEGRALS IN ADIABATIC
!                 APPROXIMATION ( IF =0 THEN CODE DECIDES)
!      NVCHSET  = Set number for input vibrational/dissociating channel
!                 data
!      NVTARG   = NUMBER OF VIBRATIONAL LEVELS FOR EACH TARGET STATE
!      NWSET    = Set number for output R-matrices and wavefunctions
!      R        = ARRAY HOLDING INTERNUCLEAR SEPARATIONS
!      STOT     = SPIN MULTIPLICITY 2*S+1 WHERE S = TOTAL SPIN OF SYSTEM
!
!      MASTER_WRITER = WHETHER TO DEDICATE THE MASTER PROCESS TO WRITING ONLY
!
    NAMELIST/RSLVIN/LUCHAN,LURMT,LUVCHN,LUNRMT,LUKMT,LUWFN,NPOLE,NDIS, &
    BBLOCH,BIGB,NGEOM,R,IWRITE,ISMAX,NAME,NESCAT,EINC,IKTYPE, &
    IPRNT,NCHSET,NRMSET,NVCSET,NNRSET,NKSET,NRQUAD, &
    ICFORM,IRFORM,IVCFRM,INRFRM,IKFORM,NERANG,NWSET, &
    MGVN,STOT,GUTOT,IEUNIT,NVTARG,MDMAX,IWFORM,NLPOLE, &
    nbigset,newbut,CALCAK,calcdip,smooth,master_writer,gpu, &
    lusct, nsset, sform, ifprop2,poleprox, & !-AlexH 17/11/10
    lu_inner_dipoles,nset_lu_inner_dipoles, lmax_property, &
    lu_pw_dipoles, nset_lu_pw_dipoles, form_pw_dipoles,&
    GET_TKMAT,LUTMT,NTSET,MAXI,MAXF,ITFORM !ZM added calc_tmat,LUTMT,NTSET,MAXI,MAXF,ITFORM to enable calculation of T-matrices
!
!***********************************************************************
!
    DATA IREAD,LUCHAN,LURMT,LUKMT,LUWFN,IKTYPE/5,10,21,19,0,0/, &
    LUVCHN,LUNRMT/28,29/,IPRNT/6*0/,NERANG/1/,IEUNIT/1/,newbut/1/, &
    NKNOT/0/,IVPROP/1/,IDPROP/1/,NPOLE/0/, &
    EINC/MAXENR*0.D0,MAXENR*0.D0/,NVCSET,NNRSET,NKSET,NWSET/4*1/, &
    ISMAX/-1/,BBLOCH,BIGB/2*0.D0/,NVTARG/MAXTGT*1/,NRQUAD/0/,NGEOM/1/ &
    ,NESCAT/MAXENR*10/,NLPOLE/1/,nbigset/1/,CALCAK/0/, &
    LUSCT/88/,NSSET/1/, ifprop2/1/,smooth/0.0/ !-AlexH 17/11/10
    DATA ZERO/0.D0/,HALF/0.5D0/,ONE/1.D0/,TWO/2.D0/,NCOL/6/,MDMAX/-1/
    DATA FORM,CHFORM,RFORM,VCFORM,NRFORM,KFORM,WFORM/7*'FORMATTED'/ &
    ,ICFORM,IRFORM,IVCFRM,INRFRM,IKFORM,IWFORM/6*'U'/,poleprox/0.D0/
    DATA EUNIT/'RYD','EV'/,RYD/0.073500D0/,BLANK/'        '/
    DATA IBACK/1/
    DATA MODDAT/'30-Jul-2021'/
    DATA sform/'UNFORMATTED'/ ! AlexH 17/11/10
    DATA form_pw_dipoles/'UNFORMATTED'/ ! AlexH 
    DATA LUTMT/12/,NTSET/1/,MAXI/1/,MAXF/0/,ITFORM/'U'/,TFORM/'FORMATTED'/ !ZM defaults set to save T-matrix for g.s. only

    ! INTFS COMMON block
    IWRITE=6
    NVIB=0
    NDIS=0
    LUSCT=88

    lu_inner_dipoles=624
    nset_lu_inner_dipoles=1
    calcdip=0
    lu_pw_dipoles=142
    nset_pw_dipoles=1
    lmax_property=1
    master_writer=.false.
    gpu=.false.
    GET_TKMAT = 2 !0 = no observables, 1 = T-matrices, 2 = K-matrices, 3 = both T- and K-matrices
!
!---- WRITE HEADER
!      WRITE(*,*)
!      WRITE(*,*) ' This is a modified version of the program that'
!      WRITE(*,*) 'allows to automatically propagate inwards or '
!      WRITE(*,*) 'outwards.'
!      WRITE(*,*) ' Modified by Jimena Gorfinkiel.'
!---- SET UP DEFAULT VALUES OF POINTERS NCHSET AND NRMSET 
    DO 111 I=1,MAXPTS
        NCHSET(I) = I
        NRMSET(I) = I
        r(i) = zero
    111 END DO
!
    IFAIL = 0
    NEXT = 1
    GUTOT = 0
!
!---- Read basic data via namelist /RSLVIN/
!     ZM read from the input file 'inp' and redirect all text output
!     into the unit STDOUT
    IF (COMMAND_ARGUMENT_COUNT() > 0) THEN
        CALL GET_COMMAND_ARGUMENT(1, inpname)
    ELSE
        inpname = 'inp'
    END IF
    OPEN(NEWUNIT=IDAT,FILE=inpname,FORM='FORMATTED',STATUS='UNKNOWN')
    READ(IDAT,RSLVIN)
    IWRITE = STDOUT
    IF(ICFORM == 'U') CHFORM='UN'//FORM
    IF(IRFORM == 'U') RFORM='UN'//FORM
    IF(INRFRM == 'U') NRFORM='UN'//FORM
    IF(IKFORM == 'U') KFORM='UN'//FORM
    IF(IWFORM == 'U') WFORM='UN'//FORM
    IF(IVCFRM == 'U') VCFORM='UN'//FORM
!
    IF(NGEOM > MAXPTS) GO TO 96
!
!---- Date stamp run and print title
    CALL DATEST(DAytim)
    NAME(61:) = DAytim
    WRITE(IWRITE,12)MODDAT,NAME,MGVN,STOT,GUTOT
    IF(NGEOM == 1) THEN
        WRITE(IWRITE,20) R(1)
    ELSE
        WRITE(IWRITE,10)NDIS,(R(I),I=1,NGEOM)
    ENDIF
    IF(NCHSET(NGEOM) == 0 .OR. NRMSET(NGEOM) == 0) GO TO 89
    WRITE(IWRITE,11)CHFORM,LUCHAN,(NCHSET(IG),IG=1,NGEOM)
    WRITE(IWRITE,33)RFORM,LURMT,(NRMSET(IG),IG=1,NGEOM)
    IF(NGEOM > 1) WRITE(IWRITE,21)VCFORM,LUVCHN,NVCSET,NRFORM,LUNRMT,NNRSET
    IF (IAND(GET_TKMAT, 1) /= 0) THEN
       write(IWRITE,'(/,1X,"T-matrices will be calculated and saved.",/)')
       IF(ITFORM == 'U') TFORM='UN'//FORM
       WRITE(IWRITE,49)TFORM,LUTMT,NTSET
    ELSEIF (IAND(GET_TKMAT, 2) /= 0) THEN
       write(IWRITE,'(/,1X,"K-matrices will be saved.",/)')
       WRITE(IWRITE,31)KFORM,LUKMT,NKSET
    ELSEIF (GET_TKMAT .eq. 0) THEN
       write(IWRITE,'(/,1X,"T-matrices will NOT be calculated and saved.")')
       write(IWRITE,'(  1X,"K-matrices will NOT be saved.",/)')
    ELSEIF (GET_TKMAT < 0 .OR. 3 < GET_TKMAT) THEN
       write(IWRITE,'(/,1X,"On input GET_TKMAT was out of range: 0,1,2,3.")')
       call mpi_xermsg('rsolve','main','Error in input: see output for details.',1,1)
    ENDIF
    IF(LUWFN /= 0) THEN
        WRITE(IWRITE,32) WFORM,LUWFN,NWSET
        call mpi_xermsg('rsolve','main','WF output not supported in MPI RSOLVE.',2,1)
    ENDIF
!
!---- Calculate total number of scattering energies, NESC and max and 
!     min energies EMIN and EMAX
    EMIN = EINC(1,1)
    EMAX = EMIN
    NESC = 0
    DO 9 IEN=1,NERANG
        NESC = NESC+NESCAT(IEN)
        EMIN = MIN(EMIN,EINC(1,IEN))
        EMAX = MAX(EMAX,EINC(1,IEN)+NESCAT(IEN)*EINC(2,IEN))
    9 END DO
    WRITE(IWRITE,13) NESC,EMIN,EMAX,EUNIT(IEUNIT)
!---- Convert scattering energies to Rydbergs
    IF(IEUNIT == 2) THEN
        EMINR = EMIN*RYD
        EMAXR = EMAX*RYD
        DO 36 IEN=1,NERANG
            EINR(1,IEN) = EINC(1,IEN)*RYD
            EINR(2,IEN) = EINC(2,IEN)*RYD
        36 END DO
    ELSE
        EMINR = EMIN
        EMAXR = EMAX
        DO 37 IEN=1,NERANG
            EINR(1,IEN) = EINC(1,IEN)
            EINR(2,IEN) = EINC(2,IEN)
        37 END DO
    ENDIF
!
!---- Find first fixed-nuclei R-matrix input set and read dimension 
!     information
    WRITE(IWRITE,17)
    CALL READRH(LURMT,NRMSET(nbigset),RFORM,MGVN,STOT,GUTOT,NCHANF,NVIB0,NDIS0,NTARG,ION,R(1),RMASS,RMATR,NFBUT,ISMX,nstat,NOCSF,&
                NPLX,ezero(1),iex,IWRITE,IPRNT(1),IFAIL)
    IF(IFAIL /= 0) RETURN
    TWOM = TWO*RMASS
!
!---- Read header on non-adiabatic R-matrix file
    IF(NGEOM > 1) THEN
        WRITE(IWRITE,19)
        CALL READRH(LUNRMT,NNRSET,NRFORM,MGVN,STOT,GUTOT,NCHANS,NVIB,NDISS,NTARGv,ION,ZERO,RMASS,RMATN,NNBUT,ISMVX,nstat,NHD,NPVEC,&
                    ezero(1),iex,IWRITE,IPRNT(1),IFAIL)
        IF(IFAIL /= 0) RETURN
        IF(NDISS /= NDIS .AND. NDIS > 0) THEN
            WRITE(IWRITE,23) NDIS,NDISS
            NDIS = NDISS
        ENDIF
        NCHAN = NCHANS
        RR = ZERO
    ELSE
        NVIB = 0
        NDIS = 0
        NCHAN = NCHANF
        ntargv = ntarg
        RR = R(1)
        ISMVX = ISMX
    ENDIF
    IF(ISMAX == -1 .OR. ISMAX > ISMX) THEN
        ISFMAX = ISMX
    ELSE
        ISFMAX = ISMAX
    ENDIF
    WRITE(IWRITE,34) ISFMAX
    IF(NDIS /= 0) THEN
        IF(MDMAX == -1 .OR. MDMAX > ISMVX) MDMAX=ISMVX
        WRITE(IWRITE,35) MDMAX
    ENDIF
    ISMAX = MAX(ISFMAX,MDMAX)
!
!---- Assign storage for fixed nuclei data
    ntv = max(ntarg,NTARGv)
    ncf = max(NCHAN,nchanf)
    allocate (etarg(ngeom*ntv),starg(ntv),mtarg(ntv),gtarg(ntv))
    allocate (evchl(ncf),lvchl(ncf),mvchl(ncf),ivchl(ncf))
    allocate (ichl(nchanf),amc(ISMAX*NCHAN*NCHAN),adc(MDMAX*NDIS*NDIS),adm(5*ndis),ncsf(ngeom))
    allocate (rmat_quad(nchan*nchan))
!
!---- Arrays read from LURMT are allocated only once per node (if shared memory is enabled) and read by local node master
    cf_win    = mpi_memory_allocate_real(cf,    ISMAX*ncf*(ncf+1)/2)
    epole_win = mpi_memory_allocate_real(epole, nstat*ngeom)
    wamp_win  = mpi_memory_allocate_real(wamp,  nstat*NCHANF*NGEOM)
    butc_win  = mpi_memory_allocate_real(butc,  3*NCHANF*NGEOM)
    if (abs(nfbut) > 1) then
        sfac_win = mpi_memory_allocate_real(sfac, nchanf*ngeom)
        ecex_win = mpi_memory_allocate_real(ecex, iex*ngeom)
        rcex_win = mpi_memory_allocate_real(rcex, iex*nchanf*ngeom)
    end if
!
!---- Set boundary amplitudes to zero before reading them in
    if (.not. shared_enabled .or. local_rank == local_master) then
        wamp = 0.0d0
    end if
    call mpi_memory_synchronize(wamp_win)
!
!---- LOOP OVER GEOMETRIES
    WRITE(IWRITE,17)
    DO 2 IG=1,NGEOM
        !
        !---- Storage allocation for current geometry
        IG1 = IG-1
        IET = 1+IG1*NTARG
        IEG = 1+IG1*nstat
        IWA = 1+IG1*nstat*NCHANF
        IBUT =1+IG1*3*NCHANF
        isf  =1+IG1*nchanf
        iec  =1+IG1*iex
        irc  =1+IG1*iex*nchanf
        !
        !
        !---- Read target and channel data 
        NCHAN0 = NCHANF
        NTARG0 = NTARG
        CALL READTC(LUCHAN,NCHSET(IG),NCHAN0,NVIB0,NDIS0,NTARG0,ION,IVT0,IVU0,ICHL,LVCHL,MVCHL,EVCHL, &
                    STARG,MTARG,GTARG,etarg(IET:),R(IG),RMASS,CHFORM,IWRITE,IPRNT(1),IFAIL)
        IF(NCHAN0 /= NCHANF .OR. NTARG0 /= NTARG) GO TO 92
        !
        !---- Read R-matrix header 
        CALL READRH(LURMT,NRMSET(IG),RFORM,MGVN,STOT,GUTOT,NCHAN0,NVIB0,NDIS0,NTARG0,ION,R(IG),RMASS, &
                    RMATR,NFBUT,ISMX,NCSF(ig),nci,NPLX,ezero(ig),iex,IWRITE,IPRNT(1),IFAIL)
        IF(NCHAN0 /= NCHANF .OR. NTARG0 /= NTARG) GO TO 92
        !
        !---- Read remainder of fixed nuclei R-matrix data
        !todo this can take majority of time when using many processes and a
        !large rmatrix file - change it so only master reads + broadcast!!!
        start_t = omp_get_wtime()
        if (shared_enabled .and. local_rank /= local_master) then
            ! dummy read: skip the records only
            CALL SKIPRM(LURMT, RFORM, NCHANF, NCSF(ig), nci, ISMX, isfmax, NPLX, 0, NFBUT, IFAIL)
        else
            ! actual read: local node master reads the data (or everyone if shared memory is disabled)
            if (abs(nfbut) > 1) then
                CALL READRM(LURMT, RFORM, NCHANF, NCSF(ig), nci, ISMX, isfmax, NPLX, 0, NFBUT, &
                            cf, epole(IEG:), wamp(IWA:), vec, butc(IBUT:), sfac(isf:), iex, ecex(iec:), rcex(irc:), IFAIL)
            else
                CALL READRM(LURMT, RFORM, NCHANF, NCSF(ig), nci, ISMX, isfmax, NPLX, 0, NFBUT, &
                            cf, epole(IEG:), wamp(IWA:), vec, butc(IBUT:), DUM1, iex, DUM1, DUM1, IFAIL)
            end if
        end if
        ! sync the shared memory among the tasks on a node
        call mpi_memory_synchronize(cf_win)
        call mpi_memory_synchronize(epole_win)
        call mpi_memory_synchronize(wamp_win)
        call mpi_memory_synchronize(butc_win)
        if (abs(nfbut) > 1) then
            call mpi_memory_synchronize(sfac_win)
            call mpi_memory_synchronize(ecex_win)
            call mpi_memory_synchronize(rcex_win)
        end if
        end_t = omp_get_wtime()
        write(iwrite,'("READRM took ",f15.4)') end_t-start_t
        !
        IF(IFAIL /= 0) RETURN
        !
    2 END DO
!
    IF(NGEOM == 1) THEN
        !
        !---- Set up fixed nuclei calculation
        e0 = etarg(1)
        NVCHAN = NCHAN
        !
    ELSE
        WRITE(IWRITE,22)
        NVCHAN = NCHAN-NDISS
        IF(NDIS == 0) NCHAN = NVCHAN
        !
        !---- Read vibrational channel data 
        NVIBD = NVIB+NDIS
        allocate (evib(ntargv),ivtarg(nvibd),ivnu(nvibd))
        !
        CALL READTC(LUVCHN,NVCSET,NCHAN,NVIB,NDIS,NTARGv,ION,IVTARG,IVNU,IVCHL,LVCHL,MVCHL,EVCHL,STARG,MTARG,GTARG,Evib,ZERO,RMASS,&
                    VCFORM,IWRITE,IPRNT(1),IFAIL)
        !
        deallocate(evib,ivtarg,ivnu)
        !
        !---- Initialize acquisition of vibrational functions
        EBASE = zero
        CALL VIBINI(IREAD,IWRITE,NTARG,NVTARG,RMASS,ebase,IPRNT(2))
        WRITE(IWRITE,47) EBASE
        !
        !---- Set up quadrature scheme for integrals in adiabatic nuclei approx
        IF(NRQUAD == 0) THEN
        !     THIS IS A BIT ARBITRARY AND NOT THOROUGHLY TESTED
            NQUAD = 15*NVIB+1
            IF(MOD(NQUAD,2) == 0) NQUAD=NQUAD+1
        ELSE
            NQUAD = NRQUAD
        ENDIF
        allocate (rquad(nquad),qwts(nquad))
        !
        CALL VMESH(R(1),R(NGEOM),NQUAD,RQUAD,QWTS)
        !
        !---- Initialize spline interpolation
        CALL SPLINI(NKNOT,KNOTS,RK,MAXPTS,NGEOM,R,IWRITE)
        !
        !----- GET TARGET VIBRATIONAL WAVEFUNCTIONS ON QUADRATURE MESH
        !
        allocate (evib(nvibd),ivtarg(nvibd),ivnu(nvibd),ichord(nvchan),vibfn(NVIB*NQUAD))
        !
        CALL RVIBR(NVIB,NQUAD,EVIB,IVTarg,IVnU,VIBFN,dum,RQUAD)
        e0 = evib(1)
        !
        !---- Set up pointer from VIBINI ordering to channel ordering
        if(ntarg > 1) call REORDI(nvchan,evchl,nvib,evib,ichord)
        !
        
        IF(IPRNT(2) > 0) CALL CHECKQ(NQUAD,NVIB,QWTS,VIBFN,IWRITE)
        !
        !---- Storage allocation for non-adiabatic data
        allocate (ampn(nstat*NCHANS),eign(nstat))
        !
        !---- Read rest of non-adiabatic R-matrix file
        CALL READRM(LUNRMT,NRFORM,NCHANS,nstat,NHD,ISMVX,ISMAX,0,0,0,cf,EIGn,AMPn,DUM2,DUM2,DUM1,0,DUM1,DUM1,IFAIL)
        !
    ENDIF
!
!---- Save multipole coefficients as square matrix
    IF(ISMAX > 0) THEN
        CALL SQUARM(NVCHAN,ISMAX,cf,AMC)
        IF(NDIS > 0 .AND. MDMAX > 0) THEN
            !---- Unpack dissociation potential data.  This code must match
            !     DISINI in VIBRMT
            ITEMPD = ISMAX*NVCHAN*(NVCHAN+1)/2+1
            ND2 = NDIS*(NDIS+1)/2+6*NDIS
            CALL SPLITM(NDIS,ND2,MDMAX,cf(ITEMPD:),ADC,ADM)
        ENDIF
    ENDIF
!      deallocate (cf)
!
!----- INITIALIZE ASYMPTOTIC ROUTINES FOR VIBRATIONAL CHANNELS
    IF(NVCHAN > 0) THEN
        RAFINV = RMATR
        SCALE = ONE
        CALL ASYM1(NVCHAN,LVCHL,ION,ISMAX,AMC,RMATR,RAFINV,SCALE,BBLOCH,EVCHL,EMINR,EMAXR,IVPROP,POTL,IWRITE,IDAT)
    ENDIF
!
!----- INITIALIZE ASYMPTOTIC ROUTINES FOR DISSOCIATING CHANNELS
    IF(NDIS > 0) THEN
        EMINM = TWOM*EMINR
        EMAXM = TWOM*EMAXR
        IF(IVPROP == 0) IDPROP=0
        RAFIND = RMATN
        SCALE = ONE/TWOM
        CALL ASYM1(NDIS,LVCHL(1+NVCHAN),0,MDMAX,ADC,RMATN,RAFIND,SCALE,BIGB,EVCHL(1+NVCHAN),EMINM,EMAXM,IDPROP,DISPOT,IWRITE,IDAT)
        IF(IDPROP /= IVPROP) GO TO 94
    ELSE
        IDPROP = 0
    ENDIF
!
!----- INITIALIZE OUTPUT OF T-MATRICES
    IF (IAND(GET_TKMAT, 1) /= 0) THEN
!
       if (maxi .le. 0) maxi = 1
       if (maxf .le. 0) maxf = ntarg
!
       allocate(ISTART(NTARG),stat=ifail)
       if (ifail .ne. 0) call mpi_xermsg('rsolve','main','istart memory allocation failed.',ifail,1)
       !
       !----- Find out how many channels (MAXCHI,MAXCHF) correspond to the subset of the T-matrix that we want to save.
       CALL CHSUB(NCHAN,NTARG,MAXI,MAXF,MAXCHI,MAXCHF,ICHL,ISTART)
!
       IF (myrank .eq. master) THEN
          !
          !---- Write header to T-matrix file
          NAPPR = 2
          CALL WRITTH(LUTMT,NAME,NTSET,MAXCHI,MAXCHF,NESC,NERANG,NESCAT,EINR,NAPPR,NTARG,ICHL,LVCHL,MVCHL,EVCHL,RR,TFORM,IPRNT(6),&
                      IFAIL)
          IF(IFAIL.NE.0) RETURN
       ENDIF
    END IF
    IF (IAND(GET_TKMAT, 2) /= 0) THEN
!
!----- INITIALIZE OUTPUT OF K-MATRICES 
       IF(LUKMT /= 0 .AND. myrank == master) THEN
         CALL WRITKH(LUKMT,NKSET,KFORM,NAME,MGVN,STOT,GUTOT,ION,RR,RMASS,NCHAN,NVIB,NDIS,NTARG,NERANG,NESCAT,EINR,NESC,IPRNT(6),&
                     IWRITE,IFAIL)
       ENDIF
    ENDIF
!
!----- Initialize output of R-matrices and wavefunctions
    IF(LUWFN /= 0 .AND. myrank == master) THEN
      CALL WRITWH(LUWFN,NWSET,WFORM,NAME,MGVN,STOT,GUTOT,ION,RR,RMASS,NCHAN,NVIB,NDIS,NTARG,NERANG,NESCAT,EINR,NESC,IPRNT(6),&
                  IWRITE,IFAIL)
    ENDIF
!
!---- Store Bloch coefficients
    allocate (bloch(nvchan+ndis))
    DO 4 I=1,NVCHAN
        BLOCH(I) = BBLOCH
    4 END DO
    DO 5 I=1,NDIS
        BLOCH(NVCHAN+I) = BIGB 
    5 END DO
!
!---- Storage allocation for energy loop
    NCHSQ= NCHAN*NCHAN
    NVCHSQ = NVCHAN*NVCHAN
    allocate (rvib(nchsq),fx(2*nchsq),fxp(2*nchsq),fv(2*nvchsq),fvp(2*nvchsq),fd(2*ndis*ndis),fdp(2*ndis*ndis),akmat(nchsq),&
              rres(NGEOM*NCHANF*(NCHANF+1)/2),crv(2*NVCHSQ+NVCHAN),fkmat(nchsq),crd(NDIS*(2*NDIS+1)),rvib2(nchsq)) !AlexH 23/11/10
    if(npole > 0) allocate (rmn(NCHAN*(NCHAN+1)/2))
!
!-----------------------------------------------------------------------
!
!     ENERGY LOOP
!
!----- ZM Calculate how many energies I am going to process
    write(stdout,'(/,"List of my energies to process (and open channels) follows:")')
    CNT = 0
    nworkers = merge(nprocs - 1_mpiint, nprocs, master_writer)
    DO IES=1,NERANG
        ENRYD = EINR(1,IES)
        DE = EINR(2,IES)
        NES = NESCAT(IES)
        !
        DO IE=1,NES
            CNT = CNT + 1
            owner = mod(CNT - 1, nworkers) + nprocs - nworkers
            !
            !----- Calculate the number of open channels
            NOPEN = 0
            DO I=1,NCHAN
                IF(ENRYD-EVCHL(I) > ZERO) NOPEN=NOPEN+1
            END DO
            !
            IF (owner == myrank) THEN
                WRITE(IWRITE,'(i4,e25.15,i4)') (CNT + nworkers - 1) / nworkers, ENRYD, NOPEN
            END IF
            ENRYD = ENRYD+DE
        END DO
    END DO
    WRITE(IWRITE,'(/,"Processing ",i4," energies")') merge(0, (CNT + nworkers - 1) / nworkers, master_writer .and. myrank == master)

    if (iand(GET_TKMAT, 1) /= 0) then
        ! Storage for the T-matrices as they are being calcualted for my set of energies. 
        ALLOCATE(TR(NCHAN * NCHAN),TI(NCHAN * NCHAN),linear_kmat(NCHAN*(NCHAN+1)/2),stat=ifail)
        if (ifail /= 0) call mpi_xermsg('rsolve','main','tr,ti memory allocation failed',ifail,1)
        TR = 0.0D0; TI = 0.0D0; linear_kmat = 0.0D0
    endif
!
    IEN = 0

    if (calcak /= 0 .and. calcdip(1) /= 0 .and. calcak /= calcdip(3)) then
        call mpi_xermsg('rsolve', 'main', 'CALCAK and CALCDIP(3) must be the same when calculating both', 1, 1)
    end if

    if (calcak /= 0) then
        !Determine number of channels to keep.
        nchan_ak = 0
        do i=1,nchan
            if(ICHL(i) > calcak) then
                nchan_ak = i - 1
                exit
            end if
        end do
        if (nchan_ak == 0) nchan_ak = nchan

        ! allocate storage for single energy
        allocate (ar(nocsf,nchan), ai(nocsf,nchan))
        ar = 0d0; ai = 0d0 !AlexH 17/11/10

        ! initialize the output file
        if (myrank == master) then
            call WRITSH(LUSCT,NSSET,SFORM,NAME,MGVN,STOT,GUTOT,nchan_ak,ICHL(1:nchan_ak),LVCHL(1:nchan_ak), &
                        MVCHL(1:nchan_ak),EVCHL(1:nchan_ak),NOCSF,NOCSF,NESC,0d0,IPRNT(6),IWRITE,IFAIL)
            if (ifail /= 0) then
                call mpi_xermsg('rsolve', 'main', 'Failed to initialize Ak-coeffs file.', ifail, 1)
            end if
        end if
    end if

!     Read inner region dipoles for constructing the 
!     partial wave dipoles.
!     ---------------------------------------------- 

    if (calcdip(1) /= 0) then
        !Determine number of channels to keep.
        nchan_dip = 0
        do i=1,nchan
            if(ICHL(i) > calcdip(3)) then
                nchan_dip = i - 1
                exit
            end if
            
        end do
        if (nchan_dip == 0) nchan_dip = nchan
        
        nstat_dip=nstat+calcdip(2)
        !ZM The number of neutral states for which I want the dipoles is given in calcdip(2) so I can use this to eliminate overallocation and to read-in only the dipoles that I want.
        nstat_neut = calcdip(2)
        
        maxprop=lmax_property**2+2*lmax_property
        allocate( dip_comp_present(maxprop),bound_state_energies(nstat_neut))
        call read_transdip2(iwrite, lu_inner_dipoles, nset_lu_inner_dipoles, nstat_neut, nstat_dip, lmax_property, &
                            inner_dipoles_temp, dip_comp_present, bound_state_energies, ifail)

        !Pickout the part that we want
        if (calcdip(1) == 1) then       
            allocate(inner_dipoles(nstat_neut,nstat,maxprop)) !No quads
            inner_dipoles=0
            nbound_dip=merge(nstat_dip-nstat,0,nstat_dip>0) ! No of bound states in the dipole file

            if (nbound_dip > 0) then
                inner_dipoles=inner_dipoles_temp( 1:nstat_neut,nbound_dip+1:nstat_dip,: )
            end if
            ! M inner_dipoles_temp can be large if nstat_neut is large so get rid of it as soon as possible
            deallocate(inner_dipoles_temp)
            
        else if (calcdip(1) == 2) then
            call mpi_xermsg('rsolve','main','calcdip(1) == 2: not implemented yet',2,1)
        end if

        allocate (re_pw_dipoles_temp(calcdip(2),nchan,maxprop), im_pw_dipoles_temp(calcdip(2),nchan,maxprop))
        allocate (re_pw_dipoles(calcdip(2),nchan_dip,maxprop,nesc), im_pw_dipoles(calcdip(2),nchan_dip,maxprop,nesc))
        re_pw_dipoles_temp=0d0; im_pw_dipoles_temp=0d0
        re_pw_dipoles=0d0; im_pw_dipoles=0d0
    end if
    
    allocate (escat(nesc))
    escat = -1.0_wp
!
    allocate(y_arr(nchan),dy_arr(nchan)) ! AlexH 23/11/10 for inwards prop.
    y_arr=0d0; dy_Arr=0d0
!
!----- ZM: ampae is the space for the energy-dependent auxiliary amplitude used by RESIDR.
!          AA,BB,IPIV are work arrays for KMAT_MKL.
    allocate(AMPAE(NSTAT,NCHANF),AA(NCHAN,NCHAN),BB(NCHAN,NCHAN),IPIV(nchan),stat=ifail)
    if (ifail /= 0) call mpi_xermsg('rsolve','main','AMPAE memory allocation error.',ifail,1)
    AMPAE(:,:) = 0.0D0
    if (gpu) then
        call linalg_cl_init(-1_c_int, merge(-1_c_int, int(myrank, c_int), nprocs == 1), iwrite)
        call residr_cl(0_c_int, int(nchanf, c_int), int(nstat, c_int), 1_c_int, 1._wp, epole, 0._wp, wamp, int(nchanf, c_int), rres)
    end if
!
    IEN = 0
    cnt = 0

    energy_range_loop: DO IES=1,NERANG

        NES = NESCAT(IES)
        ENRYD = EINR(1,IES)
        DE    = EINR(2,IES)

        energy_sample_loop: DO IE=1,NES

            cnt = cnt + 1
            owner = mod(cnt - 1, nworkers) + nprocs - nworkers

            my_energy_sample: if (owner == myrank) then

                ETOT  = e0+HALF*ENRYD
                IF(IPRNT(5) > 0 .OR. IPRNT(6) > 0) WRITE(IWRITE,28) ENRYD
                WRITE(IWRITE,'("Energy no. ",i4,e25.15)') (cnt + nworkers - 1) / nworkers, ENRYD

                NVOPEN = 0
                NDOPEN = 0
                ifail = 0

                !----- Calculate contribution to R-matrix from non-adiabatic poles
                IF(NPOLE > 0) THEN
                    CALL VRMAT2(NCHAN,NHD,RMN,ETOT,AMPn,EIGn,NLPOLE)
                    IF(IPRNT(5) /= 0) THEN
                        WRITE(IWRITE,24) NPOLE
                        CALL MATTPT(NCHAN,RMN,IWRITE)
                    ENDIF
                ENDIF

                !----- CALCULATE CONTRIBUTIONS TO FIXED NUCLEI R-MATRICES FROM HIGHER POLES
                start_t = omp_get_wtime()
                if(newbut == 0) nfbut=-nfbut
                if (abs(nfbut) > 1) then
                    CALL RESIDR(ETOT,NCHANF,NTARG,ETARG,NLPOLE,NPOLE,nstat,NGEOM,ncsf,ichl, &
                                AMPAE,WAMP,EPOLE,NFBUT,BUTC,RRES,ezero,sfac,&
                                iex,ecex,rcex,IWRITE,IFAIL,stdout)
                else if (gpu) then
                    call residr_cl(1_c_int, int(nchanf, c_int), int(nstat, c_int), 1_c_int, 1._wp, epole, etot, wamp, &
                                   int(nchanf, c_int), rres)
                else
                    CALL RESIDR(ETOT,NCHANF,NTARG,ETARG,NLPOLE,NPOLE,nstat,NGEOM,ncsf,ichl,&
                                AMPAE,WAMP,EPOLE,NFBUT,BUTC,RRES,ezero,&
                                dummy,iex,dummy,dummy,IWRITE,IFAIL,stdout)
                endif
                end_t = omp_get_wtime()
                write(iwrite,'("RESIDR took ",f15.4)') end_t-start_t
                IF(IFAIL /= 0) THEN
                    IF(IEUNIT == 2) Entop = Enryd/RYD
                    IF(IFAIL == 1) THEN
                        WRITE(IWRITE,39) ENtop,eunit(ieunit)
                        IFAIL = 0
                        CYCLE energy_range_loop
                    ELSE
                        WRITE(IWRITE,38) ENtop,eunit(ieunit)
                        IFAIL = 0
                        EXIT my_energy_sample
                    ENDIF
                ELSE
                    IEN = IEN+1
                ENDIF

                !----- CALCULATE RESIDUAL R-MATRIX IN THE ADIABATIC NUCLEI APPROX. AND
                !      ADD IT TO THE VIBRATIONAL R-MATRIX OBTAINED IN VRMAT2.  A SQUARE
                !      MATRIX IS OUTPUT FOR INPUT TO ASYMPTOTIC CODE.
                !
                IF(npole > 0) THEN
                    CALL ADNUC(NGEOM,R,NCHANF,ICHL,NTARG,NVTARG,VIBFN,KNOTS,RK,NVCHAN,RRES,RVIB,dum,1,NQUAD,RQUAD,QWTS)
                    !
                    !---- Reorder elements to match channel labels
                    if(ntarg > 1) call REORDV(nvchan,ichord,rvib)
                    !
                    !Add adiabatic component of R-matrix to the non-adiabatic
                    K = 0
                    IJ = 0
                    DO 71 I=1,NVCHAN
                        DO 7 J=1,I
                            K = K+1
                            IJ = IJ+1
                            rmn(K) = rmn(K)+rvib(IJ)
                        7 END DO
                    71 END DO
                    CALL SQUARM(NCHAN,1,RMN,RVIB)
                ELSE
                    CALL SQUARM(NCHAN,1,RRES,RVIB)
                ENDIF
                !
                IF(IPRNT(5) > 0) THEN
                    WRITE(IWRITE,14)
                    CALL WRECMT(RVIB,NCHAN,NCHAN,NCHAN,NCHAN,NCOL,IWRITE)
                ENDIF
                !
                !----- GET SOLUTIONS, DERIVATIVES AND GLOBAL X IN VIBRATIONAL
                !      CHANNELS AT R=RAFINV
                IF(NVCHAN > 0) THEN
                    CALL ASYM2(NVCHAN,NVOPEN,LVCHL,ION,ISMAX,AMC,CRV,RAFINV,EVCHL,ENRYD,FV,FVP,IVPROP,ifail)
                    if(ifail > 1) exit My_energy_sample
                    ifail = 0
                ENDIF

                !
                !----- GET SOLUTIONS, DERIVATIVES AND GLOBAL R-MATRIX IN DISSOCIATING
                !      CHANNELS AT R=RAFIND
                IF(NDIS > 0) THEN
                    E2M = RMASS*ENRYD
                    CALL ASYM2(NDIS,NDOPEN,LVCHL(1+NVCHAN),ION,MDMAX,ADC,CRD,RAFIND,EVCHL(1+NVCHAN),E2M,FD,FDP,IDPROP,ifail)
                    if(ifail > 1) exit my_energy_sample
                    ifail = 0
                    !
                    !----- IF NO PROPAGATION MUST USE NUMERICAL INTEGRATION
                    IF(IDPROP == 0 .AND. RAFIND > R(NGEOM)) THEN
                        CALL ASYMD(E2M,NDIS,TWOM,R(NGEOM),RAFIND,EVCHL(1+NVCHAN),FD,FDP,ADM,IWRITE,IPRNT(3))
                    ENDIF
                ENDIF
                !
                !----- MERGE SOLUTIONS AND DERIVATIVES
                NOPEN = NVOPEN+NDOPEN
                if (nopen == 0) exit My_energy_sample
                CALL MERG(NCHAN,NVCHAN,NDIS,NVOPEN,NDOPEN,FX,FXP,FV,FVP,FD,FDP)
                !
                rvib2=rvib !Save pre-propagation R-matrix AlexH 23/11/10
                if(RMATR > RAFINV) IBACK=-1
                !----- PROPAGATE R-MATRICES IF REQUIRED
                IF(IDPROP > 0 .OR. IVPROP > 0) THEN
                    start_t = omp_get_wtime()
                    CALL RPROPX(NCHAN,NVCHAN,NDIS,CRV,CRD,RVIB,IPRNT(5),IWRITE,IBACK)
                    end_t = omp_get_wtime()
                    write(iwrite,'("RPROPX took ",f15.4)') end_t-start_t
                ENDIF
                !
                !----- COMPUTE K-MATRIX
                start_t = omp_get_wtime()
                if (smooth > 1d-10) then
                    ! !     Assumes no propagation
                    !          call KMATREG(EPOLE,ENRYD,E0,NSTAT,NCHAN,BLOCH,NOPEN,WAMP,FX,
                    !      1             FXP,AKMAT,fkmat)
                else
                    !          call KMATREG(EPOLE,ENRYD,E0,NSTAT,NCHAN,BLOCH,NOPEN,WAMP,FX,
                    !      1             FXP,AKMAT,fkmat)
                        CALL KMAT_MKL(NCHAN,BLOCH,NOPEN,FX,FXP,RVIB,AKMAT,fkmat,AA,BB(:,1:NOPEN),IPIV)
                end if
                end_t = omp_get_wtime()
                write(iwrite,'("KMAT_MKL took ",f15.4)') end_t-start_t
                !
                !----- Calculate the T-matrix immediately if required
                if (iand(GET_TKMAT, 1) /= 0) then
                    !Put the K-matrix into a linear array as required by the TMAT routine.
                    DO J=1,NOPEN
                        DO I=1,J
                            IJ = I + NOPEN*(J-1)
                            LINEAR_KMAT(J*(J-1)/2+I) = AKMAT(IJ)
                        ENDDO
                    ENDDO
                    CALL TMAT(NOPEN,LINEAR_KMAT,TR,TI)
                endif
                !
                ! Calculate wavefunction coefficients if requested
                ! ------------------------------------------------
                if (CALCAK /= 0) then
                    start_t = omp_get_wtime()
                    if ((RAFINV > RMATR) .AND. (ifprop2 == 1)) then
                        !Back propagate the reduced radial functions
                        write(iwrite,*) "Use arbitrary precision version of rsolve "
                        write(iwrite,*) "for back propapagation (rsolve_multi_prec)"
                    else
                        !Calculate wave function coefficients
                        ar=0;ai=0;
                        call compak(NCHAN,NOPEN,FX,FXP,NOCSF,EPOLE,WAMP,IFAIL,AKMAT,fkmat,etarg,EVCHL,cnt,ENRYD,NTARG,RVIB2,RMATR,&
                                    ar,ai)
                    end if
                    end_t = omp_get_wtime()
                    write(iwrite,'("Ak coeffs took ",f15.4)') end_t-start_t
                end if
                !
                !Calculate partial wave dipoles if requested
                !-------------------------------------------
                if (CALCDIP(1) /= 0) then
                    start_t = omp_get_wtime()
                    if((RAFINV > RMATR) .AND. (ifprop2 == 1)) then
                        !Back propagate the reduced radial functions
                        write(iwrite,*) "Use arbitrary precion version of rsolve "
                        write(iwrite,*) "for back propapagation (rsolve_multi_prec)"
                    else
                        !Calculate partial wave dipoles
                        re_pw_dipoles_temp=0;im_pw_dipoles_temp=0;
                        call compak(NCHAN,NOPEN,FX,FXP,NOCSF,EPOLE,WAMP,IFAIL,AKMAT,fkmat,etarg,EVCHL,cnt,ENRYD,NTARG,RVIB2,RMATR,&
                                    inner_dipoles=inner_dipoles, re_pw_dipoles=re_pw_dipoles_temp, im_pw_dipoles=im_pw_dipoles_temp)
                    end if
                    end_t = omp_get_wtime()
                    write(iwrite,'("COMPAK took ",f15.4)') end_t-start_t
                end if

                !ZM we will only get here if the propagation has been successful,
                !i.e. if no cycle/exit has been called to skip this energy.
                escat(cnt) = half*enryd

            end if my_energy_sample

            start_t = omp_get_wtime()

            ! collect data from processes and write to disk
            if (iand(get_tkmat, 2) /= 0) then
                call gather_and_write_kmatrices(cnt, nworkers, escat, iktype, akmat, fkmat, nchan, ndis, nopen, ndopen, enryd)
            end if
            if (iand(get_tkmat, 1) /= 0) then
                call gather_and_write_tmatrices(cnt, nworkers, escat, tr, ti, maxchi, maxchf, nopen, ndis, enryd)
            end if
            if (calcak /= 0) then
                call gather_and_write_akcoeffs(cnt, nworkers, escat, lusct, sform, nocsf, nchan_ak, ar, ai)
            end if
            if (calcdip(1) /= 0) then
                call gather_pw_dipoles(cnt, nworkers, escat, re_pw_dipoles, re_pw_dipoles_temp, &
                                       im_pw_dipoles, im_pw_dipoles_temp, calcdip(2), nchan, nchan_dip, maxprop)
            end if

            end_t = omp_get_wtime()

            if (myrank == master .or. owner == myrank) then
                write (iwrite, '(a,i0,a,f15.4)') 'Save ', cnt, ' took ', end_t - start_t
            end if

            ENRYD = ENRYD+DE

        END DO energy_sample_loop

    END DO energy_range_loop

    if (gpu) then
        call residr_cl(2_c_int, int(nchanf, c_int), int(nstat, c_int), 1_c_int, 1._wp, epole, 0._wp, wamp, int(nchanf, c_int), rres)
        call finalize_cl
    end if

    IF(IEN <= 0) IFAIL=1
!
!     END OF ENERGY LOOP
!
!-----------------------------------------------------------------------

    if (calcdip(1) /= 0 .AND. myrank == master) then
        call compress_and_count_energies(escat, re_pw_dipoles, im_pw_dipoles, nesc)
        call write_pw_dipoles(lu_pw_dipoles, nset_pw_dipoles, form_pw_dipoles, name, mgvn, stot, gutot, &
                              starg(1:ichl(nchan_dip)),mtarg(1:ichl(nchan_dip)),gtarg(1:ichl(nchan_dip)), &
                              ichl(1:nchan_dip),lvchl(1:nchan_dip),mvchl(1:nchan_dip),evchl(1:nchan_dip), &
                              escat(1:nesc),lmax_property, dip_comp_present, bound_state_energies,etarg(1), &
                              re_pw_dipoles(:,:,:,1:nesc),im_pw_dipoles(:,:,:,1:nesc), 1, iwrite, ifail)
        deallocate (re_pw_dipoles, im_pw_dipoles)
        close (unit=lu_pw_dipoles)
    end if
!    
    IF (IFAIL == 0) WRITE(IWRITE,18)
!
    deallocate (rvib,fx,fxp,akmat,fkmat,bloch,fv,fvp,fd,fdp,rres,crv,crd)
    deallocate (etarg,starg,mtarg,gtarg,evchl,lvchl,mvchl,ivchl)
    deallocate (ichl,amc,adc,adm)

    call mpi_memory_deallocate_real(cf,    size(cf),    cf_win)
    call mpi_memory_deallocate_real(epole, size(epole), epole_win)
    call mpi_memory_deallocate_real(wamp,  size(wamp),  wamp_win)
    call mpi_memory_deallocate_real(butc,  size(butc),  butc_win)

    if (abs(nfbut) > 1) then
        call mpi_memory_deallocate_real(sfac, size(sfac), sfac_win)
        call mpi_memory_deallocate_real(ecex, size(ecex), ecex_win)
        call mpi_memory_deallocate_real(rcex, size(rcex), rcex_win)
    end if

    CLOSE(UNIT=LUCHAN)
    CLOSE(UNIT=LURMT)
    CLOSE(UNIT=LUKMT)
    IF(NGEOM > 1) THEN
        deallocate (ichord,rquad,qwts,evib,ivtarg,ivnu,vibfn,rmn,ampn,eign)     
        CLOSE(UNIT=LUVCHN)
        CLOSE(UNIT=LUNRMT)
    ENDIF
!
    RETURN
!
    89 WRITE(IWRITE,91) NGEOM,NCHSET(NGEOM),NRMSET(NGEOM),R(NGEOM)
    91 FORMAT(/' ERROR IN GEOMETRY RELATED DATA'/' NGEOM =',I3,'  NCHSET(NGEOM) =',I3,'  NRMSET(NGEOM) =',I3,'  R(NGEOM) =',F6.3)
    GO TO 90
    92 WRITE(IWRITE,93) NCHAN0,NCHAN,NTARG0,NTARG
    93 FORMAT(' INCONSISTENT DATA ON INPUT FILES'/' NCHAN0 =',I5,5X,'NCHAN =',I5,5X,'NTARG0 =',I5,5X,'NTARG =',I5)
    GO TO 90
    94 WRITE(IWRITE,95)IVPROP,IDPROP
    95 FORMAT(/' INCONSISTENT PROPAGATION FLAGS',2I5)
    GO TO 90
    96 WRITE(IWRITE,98) NTARG,NGEOM,MAXTGT,MAXPTS
    98 FORMAT(/' INPUT DATA WILL EXCEED FIXED DIMENSIONS'/' INPUT  ',2I5/' MAXIMA ',2I5)
    GO TO 90
    90 IFAIL = 1
    RETURN
!
    10 FORMAT(/' Vibrationally resolved calculation '//' Number of dissociating channels',I3//' Input geometries'/' R =',&
       7F10.5,(/4X,7F10.5))
    11 FORMAT(/' Input datasets:',33X,'Unit  Set numbers'/' Target and channel data     LUCHAN (',A11,')',I3,5X,30I3/(35X,30I3))
    12 FORMAT(//' Program RSOLVE  (last modified ',A,' )'//A//' Symmetry data  MGVN =',I2,' STOT =',I2,' GUTOT =',I2)
    13 FORMAT(/' K-matrices will be calculated for',I5,' energies in the range [',F8.4,',',F8.4,'] ',A)
    14 FORMAT(/' SUPER R-MATRIX')
    15 FORMAT(/' K-MATRIX')
    16 FORMAT(I3,12F10.5/(3X,12F10.5))
    17 FORMAT(/' *** FIXED NUCLEI DATA ***')
    18 FORMAT(/' *** Task successfully completed ***')
    19 FORMAT(/' *** NON-ADIABATIC DATA ***')
    20 FORMAT(/' Fixed nuclei calculation for R =',F6.3)
    21 FORMAT(/' Vibrational channel data    LUVCHN (',A11,')',I3,5X,I3 /' Non-adiabatic R-matrix data LUNRMT (',A11,')',I3,5X,I3)
    22 FORMAT(/' *** END OF FIXED NUCLEI DATA *** ')
    23 FORMAT(/' NDIS =',I2,' IS INCOMPATIBLE WITH DATA FROM VIBRMT',2X, 'CHANGED TO ',I2)
    24 FORMAT(/' CONTRIBUTION TO ELECTRONIC R-MATRIX FROM FIRST',I3, ' POLES')
    25 FORMAT(/' COUPLING R-MATRIX')
    26 FORMAT(/' NUCLEAR MOTION R-MATRIX')
    27 FORMAT(10A8)
    28 FORMAT(/100('-')//' INCIDENT ENERGY',F10.5,' RYD')
    31 FORMAT(/' Output datasets:',32X,'Unit  Set number'/ ' K-matrices',18X,'LUKMT  (',A11,')',I3,5X,I3)
    49 FORMAT(/' Output datasets:',32X,'Unit  Set number'/ ' T-matrices',18X,'LUTMT  (',A11,')',I3,5X,I3)
    32 FORMAT(' Wavefunction data           LUWFN  (',A11,')',I3,5X,30I3/(35X,30I3))
    33 FORMAT(' Fixed nuclei R-matrix data  LURMT  (',A11,')',I3,5X,30I3/(35X,30I3))
    34 FORMAT(/' Maximum multipole USED in asymptotic scattering potentials   ISMAX =',I3)
    35 FORMAT(/' Maximum multipole USED in asymptotic dissociating potentials MDMAX =',I3)
    38 FORMAT(/' Adiabatic approx. to contribution from higher',' poles failed at E =',F7.4,1x,a/&
               ' If higher energies are required, increase NPOLE')
    39 FORMAT(/' Adiabatic approx. to contribution from lower',' poles failed at E =',F7.4,1x,a/&
               ' If lower energies are required, decrease NPOLE')
    47 FORMAT(/' Base energy used in nuclear motion code  EBASE =',F11.5,' au')
    48 FORMAT(/'Number of energy points for which the wf. coefs. have actually been calculated: ', I6)
!
    CONTAINS
!    
    SUBROUTINE reordi(nchan,echl,ntarg,etarg,ichord)
    IMPLICIT DOUBLE PRECISION(A-H,O-Z)
!
!***********************************************************************
!
!---- Determine consistent ordering of channels to match target states
!
!***********************************************************************
!
    double precision :: etarg, echl
    integer :: ichord, istart, iend, itgord, it, ic, ifound, i, j, nchan, ntarg
    dimension istart(ntarg),ichord(nchan),itgord(ntarg), &
    etarg(ntarg),echl(nchan),iend(ntarg)
    DOUBLE PRECISION :: tol = 1.d-10, two = 2.d0
!
!---- Determine energy ordering of target states
    call SORT_OUTER(ntarg,istart,etarg)
    do 1 i=1,ntarg
        itgord(istart(i)) = i
    1 END DO
!
!---- Set pointer to first channel corresponding to each target state
    do 5 it = 1,ntarg
        et = two*(etarg(it)-etarg(1))
        ifound = 0
        do 4 i=1,nchan
            if(abs(et-echl(i)) > tol) go to 4
            ic = i
            if(ifound > 0) go to 4
            ifound = 1
            istart(it) = i
        4 END DO
        iend(it) = ic
    5 END DO
!
    j = 0
    do 21 it=1,ntarg
        do 2 i=istart(it),iend(it)
            j = j+1
            ichord(j) = i
        2 END DO
    21 END DO
!
    return
    END
    SUBROUTINE reordv(nchan,ichord,rm)
    IMPLICIT DOUBLE PRECISION(A-H,O-Z)
    integer :: ichord, nchan, i, j
    double precision :: rm, work
    dimension work(nchan*nchan),ichord(nchan),rm(nchan*nchan)
!
!***********************************************************************
!
!---- Reorder R-matrix elements
    do 6 i=1,nchan
        do 5 j=1,i
            work(nchan*(max(ichord(i),ichord(j))-1)+min(ichord(i),ichord(j))) &
            = rm(i*(i-1)/2+j)
        5 END DO
    6 END DO
    do 8 i=1,nchan
        do 7 j=1,i
            rm(i*(i-1)/2+j) = work(nchan*(i-1)+j)
        7 END DO
    8 END DO
!
    return
    END
    SUBROUTINE ASYM2(NCHAN,NOPEN,LCHL,ION,ISMAX,CF,CR,RAFIN,ETHR, &
    E,FX,FXP,IPROP,ifail)
!
!***********************************************************************
!
!     ASYM2 CONTAINS CALLS TO ENERGY DEPENDANT PARTS OF PROPAGATOR AND
!           ASYMPTOTIC PACKAGES
!
!***********************************************************************
!
    IMPLICIT DOUBLE PRECISION (A-H,O-Z)
    INTEGER :: NCHAN, ICF1, INC
    INTEGER :: NOPEN, LCHL, ION, ISMAX, IPROP, IFAIL, LAMAX, I, J, K, IJK
    INTEGER :: NSOL, NMX, NHD, IRAD, NLEG, NEIGEN, NRANGE, NAMPX, nleg0
    INTEGER :: IWRITE, IASY, IGAIL, IPFLG, IDUM, IWRON, ierror, MAXPTS
    DIMENSION LCHL(NCHAN),ETHR(NCHAN),CF(NCHAN,NCHAN,*), &
    FX(*),FXP(*),CR(*),y(nchan),dy(nchan),en(nchan),nleg0(1)
    COMMON/NAGPT/ cfnag,ennag,elnag,ZZNAG,LAMAX,ICF1,INC
    COMMON/ASYMS/DEGENY,EPS,EWRON,NLEG,IASY,IWRITE,IPFLG(10),IWRON, &
    RMATR,HX,TOL,MAXPTS,neigen,nampx,nrange,peigen,pampa,igail
    double precision, pointer :: cfnag(:),ennag(:),elnag(:),pampa, &
    peigen
    DOUBLE PRECISION :: ZERO = 0.D0
    INTEGER :: ICOL = 6
!
    NCHSQ = NCHAN*NCHAN
!
!---  CALCULATE CHANNEL ENERGIES
    NOPEN = 0
    ZZNAG = 2*ION
    LAMAX = ISMAX
    DO 31 I=1,NCHAN
        EN(I) = E-ETHR(I)
        IF(EN(I) > ZERO) NOPEN=NOPEN+1
    31 END DO
    NSOL = 2-(NCHAN-NOPEN)/NCHAN
!
    IF(IPROP > 0) THEN
        NMX = NCHAN
    !
    !       CALL CURLYR TO GENERATE GLOBAL PROPAGATOR UP TO RADIUS RAFIN
        NHD =    NLEG*NCHAN
        IF(NEIGEN /= NHD*NRANGE) GO TO 90
        IF(NAMPX /= 2*NEIGEN*NCHAN) GO TO 90
    !
        nleg0(1)=nleg
        CALL CURLYR(CR,E,NCHAN,NRANGE,NLEG0,pampa,peigen,dummy,ifail)
        if(ifail.gt.1) return
    !
    ENDIF
!
    IRAD = 0
    FX(1:2*NCHSQ)=0d0 !AlexH - Testing
    FXP(1:2*NCHSQ)=0d0 !AlexH - Testing
    CALL GAILIT(EN,LCHL,NCHAN,ION,CF,LAMAX,RAFIN,IWRITE,IRAD, &
    IASY,IGAIL,DEGENY,EPS,IPFLG,FX,FXP,dummy,idum,idum,ifail)
    
    if(ifail > 1) return
!
!     CHECK WRONSKIAN
!
    IF(IWRON /= 0) CALL WRONSK(NCHAN,NOPEN,FX,FXP,IWRITE,IPFLG(10), &
    EWRON)
!
    IF(IPFLG(9) /= 0) THEN
    !
    !----- PRINT SOLUTIONS AND DERIVATIVES
        DO 14 K=1,NSOL
            WRITE(IWRITE,17) RAFIN
            17 FORMAT(/' SOLUTIONS AT RAFIN=',F8.3)
            IJK=(K-1)*NCHSQ+1
            CALL WRECMT(FX(IJK),NCHAN,NCHAN,NCHAN,NCHAN,ICOL,IWRITE)
        14 END DO
        DO 15 K=1,NSOL
            WRITE(IWRITE,18)RAFIN
            18 FORMAT(/' DERIVATIVES AT RAFIN',F8.3)
            IJK=(K-1)*NCHSQ+1
            CALL WRECMT(FXP(IJK),NCHAN,NCHAN,NCHAN,NCHAN,ICOL,IWRITE)
        15 END DO
    ENDIF
!
    IF(IPROP >= 0) RETURN
!
!      if(igail == 1) then
    allocate (ennag(nchan),elnag(nchan))
    DO 32 I=1,NCHAN
        ennag(i) = en(i)
        elnag(i) = dble(LCHL(I)*(LCHL(I)+1))
    32 END DO
    allocate (cfnag(lamax*nchan*nchan),stat=ierror)
    if(ierror /= 0) then
        print *,' unable to allocate cfnag ',ierror
        stop
    endif
    ijk = 0
    do 42 k=1,lamax
        do 41 i=1,nchan
            do 40 j=1,nchan
                ijk = ijk+1
                cfnag(ijk) = cf(i,j,k)
            40 END DO
        41 END DO
    42 END DO
!      endif
!
!     FUNCTION PROPAGATION
!
    CALL INTIN(RMATR,RAFIN,FX,FXP,NCHAN,NOPEN,Y,DY,HX,MAXPTS,TOL, &
    IPFLG(10),IWRITE)
!
    IF(IPFLG(9) /= 0) THEN
    !----- PRINT SOLUTIONS AND DERIVATIVES
        DO 24 K=1,NSOL
            WRITE(IWRITE,27) RMATR
            27 FORMAT(/' SOLUTIONS AT RMATR =',F8.3)
            IJK=(K-1)*NCHSQ+1
            CALL WRECMT(FX(IJK),NCHAN,NCHAN,NCHAN,NCHAN,ICOL,IWRITE)
        24 END DO
        DO 25 K=1,NSOL
            WRITE(IWRITE,28)RMATR
            28 FORMAT(/' DERIVATIVES AT RMATR',F8.3)
            IJK=(K-1)*NCHSQ+1
            CALL WRECMT(FXP(IJK),NCHAN,NCHAN,NCHAN,NCHAN,ICOL,IWRITE)
        25 END DO
    ENDIF
!
    RETURN
    90 WRITE(IWRITE,91) NEIGEN,NAMPX,NLEG,NRANGE,NCHAN
    91 FORMAT(' INCONSISTENT DATA IN ASYM2',5I8)
    STOP
    END
    SUBROUTINE NAGRHS(NCHAN,R,Y,YDP)
    IMPLICIT DOUBLE PRECISION (A-H,O-Z)
!
!***********************************************************************
!
!     NAGRHS calculates the right hand sides of the asymptotic equations
!     in the form required by the NAG routine D02LAF
!
!     THE POTENTIALS ARE EXPANDED IN INVERSE POWERS OF THE RADIAL
!     DISTANCE R, WITH EXPANSION COEFFICIENTS GIVEN IN THE MATRIX CF
!
!***********************************************************************
!
    integer :: LAMAX,ICF1,INC,I,J,K,NCHAN
    DIMENSION Y(*),YDP(NCHAN)
    COMMON/NAGPT/ cfnag,ennag,elnag,ZZNAG,LAMAX,ICF1,INC
    double precision, pointer :: cfnag(:),ennag(:),elnag(:)
!
    double precision :: ZERO = 0.0D0, ONE = 1.0D0, RK, R
!
    DO 1 I=1,NCHAN
        YDP(I) = (-ENnag(I)+ELnag(I)/(R*R)-ZZNAG/R)*Y(I)
    1 END DO
    DO 4 I=1,NCHAN
        DO 3 K=1,LAMAX
            INC = (I-1+(K-1)*NCHAN)*NCHAN
            RK = ONE/R**(K+1)
            DO 2 J=1,NCHAN
                YDP(I) = YDP(I)+Y(J)*cfnag(INC+J)*RK
            2 END DO
        3 END DO
    4 END DO
!
    RETURN
    END
    SUBROUTINE NAGPOT(R,Y,DY)
    IMPLICIT DOUBLE PRECISION (A-H,O-Z)
!
!***********************************************************************
!
!     NAGPOT CALCULATES THE ASYMPTOTIC POTENTIAL IN THE DISSOCIATING
!     CHANNEL. CALLING SEQUENCE IS AS REQUIRED BY NAG ROUTINE D02BAF
!
!***********************************************************************
!
    COMMON/NAGPT/ cfnag,ennag,elnag,ZZNAG,LAMAX,ICF1,INC
    COMMON/DISPAR/DE,D,BETA,RE,TWOM,BIGKSQ,EPSD,RA
    double precision, pointer :: cfnag(:),ennag(:),elnag(:)
    DIMENSION Y(*),DY(*)
    double precision :: ZERO = 0.D0, ONE = 1.D0
    integer :: LAMAX, K, ICF1, INC
!
!----- GAILIT SHOULD BE USED FOR ENTIRE RANGE WHERE MULTIPOLE EXPANSION
!      IS VALID
    IF(R <= RA) THEN
    !
    !---- MORSE POTENTIAL
        V = D*(ONE-EXP(-BETA*(R-RE)))**2+DE
    !
    ELSE
    !
    !---- MULTIPOLE EXPANSION
        V = ZERO
        DO 1 K=1,LAMAX
            V = V+cfnag(ICF1+(K-1)*INC)/R**(K+1)
        1 END DO
    !
    ENDIF
!
    DY(1) = Y(2)
    DY(2) = (V-BIGKSQ)*Y(1)
!
    RETURN
    END
    SUBROUTINE ASYM1(NCHAN,LCHL,ION,LAMAX,CF,RMTR,RAFIN,SCALE,BLOCH, &
    ETHR,EMIN,EMAX,IPROP,POTL,IWR,IDAT)
!
!***********************************************************************
!
!     ASYM1 CARRIES OUT THE ENERGY INDEPENDANT INITIALIZATION OF THE
!      ASYMPTOTIC ROUTINES.  SINCE A MODIFIED VERSION OF CFASYM IS
!      USED IN THIS CODE, WHICH DOES NOT REQUIRE SEPARATE INITIALIZATION
!      THIS ROUTINE IS LARGELY CONCERNED WITH SETTING UP THE CALL TO
!      THE PROPAGATOR PACKAGE VIA RPROP1
!
!***********************************************************************
!
    IMPLICIT DOUBLE PRECISION(A-H,O-Z)
    CHARACTER(14) :: METHOD(3)
    EXTERNAL POTL
    COMMON/ASYMS/DEGENY,EPS,EWRON,NLEG,IASY,IWRITE,IPFLG(10),IWRON, &
    RMATR,HX,TOL,MAXPTS,neigen,nampx,nrange,peigen,pampa,igail
    INTEGER :: LCHL, LAMAX, IPROP, IWR, IDAT, ION, IWRITE
    INTEGER :: neigen,nampx,IASY,NCHAN
    DIMENSION LCHL(NCHAN),CF(NCHAN,NCHAN,*),ETHR(NCHAN), &
    nleg0(1)
    COMMON/NAGPT/ cfnag,ennag,elnag,ZZNAG,nAMAX,ICF1,INC
    double precision, pointer :: cfnag(:),ennag(:),elnag(:)
    double precision, allocatable :: work(:),vc(:)
    double precision, allocatable, target :: eigen(:),ampa(:)
    double precision, pointer :: peigen,pampa
    save eigen,ampa
    integer :: LBUG(6), IDISC = 0, IGAIL, NLEG, NRANGE, IPFLG, IWRON, lpr, nleg0
    integer :: IFPROP = 0, MAXPTS, MLEG = 10, ISPROP, namax, nmx, nbigvc, ICF1, INC
    double precision :: ZERO = 0.0D0, HALF = 0.5D0
!
!***********************************************************************
!
!     DATA RELATING TO THE ASYMPTOTIC PACKAGES RPROP AND CFASYM ARE
!     INPUT VIA NAMELIST /BPROP/
!
!     EBIG   = LARGEST VALUE OF |K**2| IN ANY CHANNEL IF DIFFERENT
!              FROM VALUE INPUT FROM CALLING PROGRAM
!     ESMALL = SMALLEST VALUE OF |K**2| IF DIFFERENT FROM INPUT
!     IDISC  = LOGICAL UNIT OF SCRATCH DISC IF REQUIRED
!     IGAIL  = TYPE OF ASYMPTOTIC EXPANSION, 0=BURKE+SCHEY, 1=GAILITIS,
!              2=BESSEL/COULOMB FUNCTIONS (DEFAULT =1)
!     IPFLG  = DEBUG PRINT SWITCHES FOR CFASYM (SEE CFASYM WRITE UP)
!     IWRON  = 0 WRONSKIAN OF SOLUTIONS IN NOT CHECKED
!     LBUG   = DEBUG PRINT SWITCHES FOR RPROP (SEE COMMENTS IN RPROP1)
!     NLEG   = NUMBER OF LEGENDRE POLYNOMIALS TO BE USED IN PROPAGATION
!              (MAXIMUM AND DEFAULT = 10)
!     NRANGE = NUMBER OF SUBRANGES IN PROPAGATION (DEFAULT= 0, CODE
!               DECIDES HOW MANY)
!     RAF    = RADIUS AT WHICH CONTINUED FRACTION METHOD CAN BE USED
!              (DEFAULT RAF=RMATR)
!
    NAMELIST/BPROP/LBUG,RAF,IDISC,IGAIL,NLEG,NRANGE,ESMALL,EBIG, &
    IPFLG,IWRON,IFPROP,HX,TOL,MAXPTS
!
!***** SOME CONVERGENCE CRITERIA HAVE BEEN RELAXED
!      FINAL EIGENPHASES ETC APPEAR GOOD TO 4 FIGURES
    DATA TINY/1.D-8/,DELTA/6.D0/
    DATA METHOD/'BURKE/SCHEY','GAILITIS','COULOMB/BESSEL'/
!
!     *******     ENERGY INDEPENDANT PART OF CALCULATION     *******
!
    HX = 0.2
    TOL = 1.D-10
    MAXPTS = 200
    RAF = ZERO
    RMATR = RMTR
    NLEG = 0
    NRANGE = 0
    IWRITE = IWR
    DEGENY = 1.D-5
    IWRON = 0
    IASY = 20
    ESMALL = 0.D0
    EBIG = 0.D0
    EPS = 5.D-5
    EWRON = 5.D-5
    IGAIL = 1
    DO 8 I=1,6
        LBUG(I) = 0
    8 END DO
    DO 9 I=1,10
        IPFLG(I) = 0
    9 END DO
    ISPROP = IPROP
!
    READ(IDAT,BPROP)
!
    IF(RAF > TINY) RAFIN = RAF
    IF(IPROP == 0) RETURN
!
    IF(NLEG > MLEG) GO TO 95
    IF(NLEG == 0) NLEG=MLEG
    IF(ESMALL < TINY) ESMALL = EMIN
    IF(EBIG < TINY) EBIG = EMAX
!
!     IF THERE ARE NO LONG RANGE POTENTIALS NO PROPAGATION IS REQUIRED
!     AND SIMPLE COULOMB WAVE FUNCTIONS MAY BE USED IN ASYMPTOTIC REGION
!
    CFMAX = ZERO
    DO 32 I=1,NCHAN
        E1 = ABS(EMIN-ETHR(I))
        E2 = EMAX-ETHR(I)
        ESMALL = MIN(ESMALL,E1,ABS(E2))
        EBIG = MAX(EBIG,E2)
        DO 31 J=1,NCHAN
            DO 30 K=1,LAMAX
                COEF = ABS(CF(I,J,K))
                IF(COEF > CFMAX) CFMAX=COEF
            30 END DO
        31 END DO
    32 END DO
    IF(CFMAX <= TINY) IGAIL=2
    namax = lamax
!
!     DETERMINE WHETHER PROPAGATION IS REQUIRED
!
    IF(NRANGE == 0) THEN
        IF(RAFIN == RMATR) THEN
            IPROP = 0
            RAFIN = RMATR
        ELSE
            IPROP = 1
            NRANGE = MAX(INT(SQRT(EBIG)*ABS(RAFIN-RMATR)/DELTA+HALF),1)
        ENDIF
    ELSE
        IF(RAFIN <= RMATR) RAFIN=RMATR+NRANGE*DELTA/SQRT(EBIG)
        IPROP = 1
    ENDIF
    IF(IFPROP /= 0 .OR. ISPROP < 0) IPROP=-IPROP
!
!     PRINT DETAILS OF OPTIONS SELECTED
!
    WRITE(IWRITE,51) METHOD(IGAIL+1),RAFIN
    IF(IPROP == 1) WRITE(IWRITE,52) NRANGE
    IF(IPROP == -1) WRITE(IWRITE,520) 
    IF(IPFLG(1) /= 0) WRITE(IWRITE,53)EMIN,IASY,DEGENY
!
!     ALLOCATE SPACE FROM DYNAMIC STORAGE TO PROPAGATOR PACKAGE
!
    IF(IPROP > 0) THEN
        NMX = NCHAN
        NHD = NCHAN*NLEG
        NBIGVC = NHD*NHD
        LPR = 10*NHD+NBIGVC
        NAMPX = 2*NHD*NCHAN*NRANGE
        NEIGEN= NHD*NRANGE
    !
        allocate (vc(nbigvc),work(nbigvc))
        if(allocated(eigen)) deallocate (eigen,ampa)
        allocate (eigen(neigen),ampa(nampx))
        peigen => eigen(1)
        pampa => ampa(1)
    !
        IF(IPFLG(1) /= 0) WRITE(IWRITE,8844)NRANGE,NCHAN,NLEG,IDISC, &
        LAMAX
        8844 FORMAT(' NRANGE  =',I10,' NCHAN  =',I10,' NLEG   =',I10,/, &
        ' IDISC  =',I2,' LAMAX  =',I10)
    !
    !     INITIALIZE PROPAGATOR PACKAGE
    !
        nleg0(1)=nleg
        CALL RPROP1_MPI(NCHAN,RMATR,RAFIN,ETHR,NMX,NRANGE,NLEG0,EMAX,SCALE, &
        LAMAX,ION,LCHL,CF,BLOCH,LBUG,IWRITE,IDISC,AMPA,EIGEN, &
        NAMPX,VC,WORK,POTL)
        deallocate (work,vc)
    !
    ENDIF
!
    RETURN
!
!     STORAGE OVERFLOW ... TERMINATE THE CALCULATION
!
    95 WRITE(IWRITE,96)NLEG,MLEG
    96 FORMAT(' TOO MANY BASIS FUNCTIONS, GIVEN',I3,' MAXIMUM',I3)
    STOP
    51 FORMAT(/' ASYMPTOTIC METHOD SELECTED : ',A/' EXPANSION USED AT RAD&
   &IUS =',F10.4)
    52 FORMAT(/' R-MATRIX WILL BE PROPAGATED ACROSS',I3,' SUBRANGES')
    520 FORMAT(/' SOLUTIONS WILL BE PROPAGATED USING NAG ROUTINE D02LAF')
    53 FORMAT(' CONVERGENCE RADIUS FOR ENERGY,     EMIN  =',D16.8,/, &
    ' TERMS RETAINED IN ASYMPTOTIC SERIES, IASY =',I16,/, &
    ' MINIMUM SEPARATION FOR NONDEGENERATE',/, &
    ' CHANNELS (RYDBERGS),               DEGENY =',D16.8,//)
    END
      SUBROUTINE RPROP1_MPI(NCH,A,B,ETHR,NMX,MRANGE,NBASIS,EBIG,SCALE, &
      LAMAX,ION,LCHL,CF,BBLOCH,LBUG,IWR,IDISK,AMPA,EIGEN,NAMPX,BIGVEC, &
      WORK,POTL)
      USE mpi_gbl
      USE omp_lib
!
!***********************************************************************
!
!     RPROP1 IS THE ENERGY INDEPENDANT ENTRY TO PROPAGATOR PACKAGE
!      (GLOBAL PROPAGATOR VERSION)
!
!     ZM: introduced a trivial MPI parallelization over the sectors.
!         The sectors are cyclically redistributed among the MPI tasks.
!         Using more MPI tasks than the number of sectors does not speed
!         up this routine further. The intention is that this routine is
!         used together with MPI_R_SOLVE which includes
!         parallelization over scattering energies.
!
!***********************************************************************
!
!      INPUT PARAMETERS ARE
!
!      NCH    = NUMBER OF COUPLED EQUATIONS
!      A      = STARTING RADIUS OF PROPAGATION
!      B      = FINAL RADIUS OF PROPAGATION
!      ETHR   = THRESHOLD ENERGY OF EACH CHANNEL IN RYDBERGS
!      NMX    = DIMENSION OF ARRAYS ETHR AND RMAT IN CALLING ROUTINE
!      MRANGE = NUMBER OF SUBRANGES TO BE USED IN PROPAGATION.
!               IF THIS IS SET TO ZERO THEN THE PROGRAM WILL CHOSE
!               AN APPROPRIATE VALUE BASED ON EBIG
!      NBASIS = ARRAY OF LENGTH MRANGE (OR 1 IF MRANGE=0) HOLDING
!               THE NUMBER OF BASIS FUNCTIONS TO BE USED IN EACH
!               SUBRANGE. IF ANY ELEMENT IS SET TO ZERO OR IS
!               GREATER THAN THE CURRENT MAXIMUM (MLEG SEE BELOW)
!               THEN IT IS SET TO MLEG. IF MRANGE=0 BUT NBASIS.NE.0 THEN
!               THE PROGRAM WILL USE NBASIS BASIS FUNCTIONS IN ALL
!               SUBRANGES.
!      EBIG   = LARGEST MODULUS OF ENERGY IN ANY CHANNEL OF CURRENT RUN
!               IN RYD. (USED TO DETERMINE NUMBER OF SUB-RANGES REQUIRED
!               IN PROPAGATION) ONLY REQUIRED IF MRANGE=0
!      BBLOCH = PARAMETER IN BLOCH OPERATOR (USUALLY SET TO ZERO)
!      LBUG   = ARRAY HOLDING DEBUG PRINT SWITCHES (SEE BELOW)
!      IWR    = LOGICAL UNIT NUMBER OF OUTPUT DEVICE
!      IDISK  = LOGICAL UNIT NUMBER OF SCRATCH DISC FOR TEMPORARY
!               STORAGE OF SURFACE AMPLITUDES (IF THIS IS NOT REQUIRED
!               SET IDISK=0)
!      AMPA   = ARRAY USED FOR STORAGE OF BOTH THE SURFACE AMPLITUDES
!               AND THE POTENTIAL MATRIX.
!      EIGEN  = ARRAY USED TO HOLD BOTH THE EIGENVALUES OF THE
!               HAMILTONIAN MATRICES AND THE RADII AT WHICH THE
!               POTENTIAL MATRIX IS EVALUATED.
!      NAMPX  = ACTUAL DIMENSION OF AMPA AS DECLARED IN CALLING PROGRAM
!               THIS SHOULD BE AS LARGE AS POSSIBLE AND SHOULD BE A
!               MULTIPLE OF 2*NCH*NCH
!      BIGVEC AND WORK ARE ARRAYS USED AS WORK SPACE, THEIR DIMENSIONS
!               ARE SPECIFIED BELOW
!      POTL   = NAME OF SUBPROGRAM WHICH GENERATES THE POTENTIAL
!              MATRIX. SEE BELOW.
!
!***********************************************************************
!
!      DEBUG PRINT SWITCHES ARE
!
!      LBUG(1)=1...FOR RADII AT WHICH THE POTENTIAL MATRIX IS EVALUATED
!                  AND NUMBER OF BASIS FUNCTIONS USED IN EACH SUBRANGE
!      LBUG(1)=2...AS ABOVE BUT ROUTINE EXITS WITH THE RADII STORED
!                  IN ARRAY EIGEN BUT WITHOUT CARRYING OUT ANY
!                  FURTHER CALCULATIONS.
!      LBUG(2)=1...FOR HAMILTONIAN MATRIX ELEMENTS IN SETMTR
!      LBUG(3)=1...FOR EIGENVALUES IN AMPLTD
!      LBUG(3)=2...FOR EIGENVALUES,EIGENVECTORS AND SURFACE AMPLITUDES
!                  IN AMPLTD
!      LBUG(4)=1...FOR PROPAGATED R-MATRIX AT END OF EACH SUB-RANGE.
!
!***********************************************************************
!
!      COMMON/RPROPS/ HOLDS VARIABLES LOCAL TO THIS PACKAGE, THESE ARE
!            NCHAN  = NCH
!            IWRITE = IWR
!            IDISC  = IDISK
!            IXMAX  = NUMBER OF WEIGHTS AND ABSCISSAE IN QUADRATURE
!            NHD    = DIMENSION OF HAMLITONIAN MATRIX IN  SUBRANGE
!            NHSIZE = NHD*(NHD+1)/2
!            NLEG   = NUMBER OF BASIS FUNCTIONS USED IN CURRENT SUBRANGE
!            MLEG   = MAXIMUM PERMITTED NUMBER OF BASIS FUNCTIONS
!
!***********************************************************************
!
!      PRESENT DIMENSIONS ARE SET FOR A 10-POINT GAUSS-LEGENDRE
!      QUADRATURE AND UP TO 10 BASIS FUNCTIONS (LEGENDRE POLYNOMIALS)
!      THE VARIABLE MLEG SPECIFIES THIS MAXIMUM AND IS SET BELOW
!
!***********************************************************************
!
!      DIMENSIONS OF THE ARRAYS ARE DEFINED AS FOLLOWS
!
!       ETHR(NMX),RMAT(NMX,NMX),NBASIS(MRANGE)
!       PL(MLEG,IXMAX),XI(IXMAX),WTS(IXMAX),
!       WORK(NCH*NLEG*10)
!       BIGVEC((NCH*NLEG)*(NCH*NLEG+1)/2),
!       AMPA(NAMPX),EIGEN(10*NCH*NRANG),
!       (NLEG= MAXIMUM NUMBER OF BASIS FUNCTIONS IN ANY SUBRANGE)
!         THE PROGRAM ATTEMPTS TO STORE ALL POTENTIALS AND AMPLITUDES
!         IN CORE. IT CAN DO THIS IF THE NUMBER OF SUBRANGES IN THE
!         PROPAGATION IS LESS THAN OR EQUAL TO
!             NAMPX/(4*IXMAX*NCH*NCH)
!         IF THIS IS NOT THE CASE THEN A SCRATCH FILE WILL BE REQUIRED
!         AND NAMPX SHOULD BE SET EQUAL TO 2*NCH*NCH*MAX(NLEG,IXMAX)
!         THE DIMENSION OF EIGEN CAN ALSO BE REDUCED TO 2*IXMAX*NCH.
!
!***********************************************************************
!
      COMMON/RPROPS/NCHAN,IWRITE,IDISC,IXMAX,NHSIZE,NHD,NLEG,MLEG,&
       IREV,INPR
      DIMENSION LBUG(3),NBASIS(*),LCHL(*)
      INTEGER(mpiint) :: I
      INTEGER, ALLOCATABLE :: RANGE_EIGEN(:,:)
      INTEGER, ALLOCATABLE :: RANGE_AMPA1(:,:),RANGE_AMPA2(:,:)
      DOUBLE PRECISION :: START_T, END_T, HALF, DELTA, EPS
      INTEGER :: NMX, MRANGE, NBASIS, LAMAX, ION, LCHL, LBUG, IWR,&
                 IDISK, NAMPX, NCHAN, IWRITE, IDISC, IXMAX, MLEG,&
                 NRANGE, NLEG, INPR, MHD, MINDIM, MAMP, NAMPR, MBUG,&
                 IREV, IR2, NRANG, NRAMP, NREIG, I1, I2, NHD, NHSIZE,&
                 NAMP, NPTS, NCH
      DOUBLE PRECISION :: XI(5),WTS(5),PL(10,5), RANGE, DR, RA2, RA1
      DOUBLE PRECISION :: A, B, ETHR(*), SCALE, CF(NCH,NCH,*), BBLOCH,&
                          AMPA(*),EIGEN(*),BIGVEC(*),WORK(*), EBIG
!
!      ABSCISSAE AND WEIGHTS FOR GAUSS-LEGENDRE QUADRATURE8
!
      DATA XI(1)/0.973906528517172d0 /,WTS(1)/0.066671344308688d0 /
      DATA XI(2)/0.865063366688985d0 /,WTS(2)/0.149451349150581d0 /
      DATA XI(3)/0.679409568299024d0 /,WTS(3)/0.219086362515982d0 /
      DATA XI(4)/0.433395394129247d0 /,WTS(4)/0.269266719309996d0 /
      DATA XI(5)/0.148874338981631d0 /,WTS(5)/0.295524224714753d0 /
!
      DATA HALF/0.5D0/,DELTA/6.D0/,EPS/1.D-10/
!
      IF (.NOT. mpi_started) THEN
         PRINT *,'MPI MODULE HAS NOT BEEN INITIALIZED'
         STOP
      ENDIF
!
      NCHAN = NCH
      IWRITE = IWR
      IDISC  = 0
      IXMAX = 5
      MLEG = 10
!
      RANGE =  ABS(B-A)
      IF(RANGE.LT.EPS) GO TO 75
!
!      CALCULATE THE NUMBER OF SUBRANGES IF NOT ALREADY SPECIFIED
!
      IF(MRANGE.EQ.0) THEN
        NRANGE = SQRT(EBIG)*RANGE/DELTA+HALF
        NRANGE = MAX(NRANGE,1)
      ELSE
        NRANGE = MRANGE
      ENDIF
!
      ALLOCATE(RANGE_EIGEN(2,NRANGE),&
               RANGE_AMPA1(2,NRANGE),RANGE_AMPA2(2,NRANGE))
!
!     CHECK NUMBER OF BASIS FUNCTIONS TO BE INCLUDED IN EACH SUBRANGE
!    THIS HAS BEEN MODIFIED TO PROHIBIT THE USE OF DIFFERENT NUMBERS
!   OF BASIS FUNCTIONS IN DIFFERENT SUBRANGES WHICH MAKES INTERFACING
!     MUCH EASIER
!
      IF(NBASIS(1).EQ.0) THEN
        NLEG = MLEG
        INPR = 0
!     ELSE IF (MRANGE.LE.1) THEN
      ELSE
        NLEG = NBASIS(1)
        INPR = 0
!     ELSE
!       NLEG = 0
!       DO 19 IR=1,NRANGE
!       IF(NBASIS(IR).LE.0) NBASIS(IR)=MLEG
!       IF(NBASIS(IR).GT.MLEG) NBASIS(IR)=MLEG
!       IF(NBASIS(IR).GT.NLEG) NLEG=NBASIS(IR)
!19     CONTINUE
!       INPR = 1
      ENDIF
!
!     THE FOLLOWING DIMENSIONS ARE REQUIRED BY POTL
      MHD = 2*NCHAN*IXMAX
      MINDIM = 2*NCHAN*NCHAN*MAX(NLEG,IXMAX)
      MAMP = NCHAN*MHD
      NAMPR = MAMP*NRANGE
!
!      CHECK THAT THERE IS SUFFICIENT SPACE TO STORE SURFACE AMPLITUDES
!      IF THERE IS NOT, CHECK THAT A SCRATCH DISC IS AVAILABLE.
!
      IF(2*NAMPR.GT.NAMPX) THEN
        IF(IDISK.EQ.0.OR.MINDIM.GT.NAMPX) GO TO 74
        NAMPR = MAMP
        IDISC = IDISK
        REWIND IDISC
      ENDIF
!
!      INITIALIZE DEBUG PRINTS
!
      MBUG = 0
      DO I=1,3
      MBUG = MAX(MBUG,LBUG(I))
      ENDDO   
      IF(MBUG.GT.0) WRITE(IWRITE,20)A,B,NRANGE
      IF(LBUG(1).NE.0.AND.INPR.EQ.0) WRITE(IWRITE,21)NLEG
      IF(LBUG(1).NE.0.AND.INPR.NE.0) WRITE(IWRITE,21)(NBASIS(IR),IR=1,NRANGE)
!
!     INITIALIZE FORWARD/BACKWARD PROPAGATION
!
      IF(B.GT.A) THEN
        IREV = 0
        IR2 = 1
      ELSE
        IREV = 1
        IR2 = 0
      ENDIF
      DR = RANGE/ FLOAT(NRANGE)
!
!      LEGNDR EVALUATES LEGENDRE POLYNOMIALS PL(X)
!      AT THE ABSCISSAE OF THE GAUSS-LEGENDRE QUADRATURE SCHEME
!      MLEG, THE MAXIMUM NUMBER OF POLYNOMIALS INCLUDED IN THE
!      EXPANSION IS EQUAL TO 2*IXMAX, THE NUMBER OF POINTS IN
!      THE QUADRATURE SCHEME.
!
      CALL LEGNDR(PL,XI,MLEG,IXMAX)
!
      RA2 = MIN(A,B)
      NRAMP = 1
      NREIG = 1
!
!      LOOP OVER SUBRANGES
!
      DO NRANG=1,NRANGE
      RA1 = RA2
      RA2 = RA2+DR
      I1 = NRAMP+IREV*NAMPR
      I2 = NRAMP+IR2*NAMPR
      IF(INPR.NE.0) NLEG=NBASIS(NRANG)
      NHD = NCHAN*NLEG
      NHSIZE = NHD*(NHD+1)/2
      NAMP = NCHAN*NHD
!
!     SET UP POTENTIAL MATRIX
!
!     THIS IS STORED AS AN (NCHAN*NCHAN*NPTS) ARRAY AND INCLUDES THE
!     CENTRIFUGAL TERM.
!     THE NUMBER OF MESH POINTS (NPTS) AND THEIR RADII ARE GENERATED
!     BY SUBROUTINE MESH. (THEY CANNOT BE CHOSEN BY THE MAIN PROGRAM)
!     IN ORDER TO SAVE SPACE THE MESH IS STORED IN ARRAY EIGEN AND
!     THE POTENTIALS IN AMPA, THEY WILL EVENTUALLY BE OVERWRITTEN.
!     THE MATRIX IS SET UP FOR THE ENTIRE RANGE (A,B) ON THE FIRST
!     PASS THROUGH THE SUBRANGE LOOP UNLESS A SCRATCH DISC IS BEING
!     USED TO STORE THE AMPLITUDES. IN THIS CASE THE POTENTIAL MUST
!     BE SET UP SEPARATELY FOR EACH SUBRANGE AS IT WOULD OTHERWISE
!     BE OVERWRITTEN.
!
      IF(IDISC.NE.0.OR.NRANG.EQ.1) THEN
!
        START_T = OMP_GET_WTIME()
!
!     MUST CALCULATE POTENTIALS
        IF(IDISC.NE.0) THEN
          NPTS = 2*IXMAX
          CALL MESH(RA1,RA2,1,IXMAX,XI,NPTS,EIGEN)
        ELSE
          NPTS = 2*IXMAX*NRANGE
          CALL MESH(A,B,NRANGE,IXMAX,XI,NPTS,EIGEN)
        ENDIF
        IF(LBUG(1).NE.0) WRITE(IWRITE,24)(EIGEN(I),I=1,NPTS)
        IF(LBUG(2).EQ.2) CYCLE
!
        CALL POTL(NCHAN,LAMAX,ION,LCHL,CF,NPTS,EIGEN,AMPA)
!
        END_T = OMP_GET_WTIME()
        WRITE(IWRITE,'("Potentials took:",f15.3,"s")') end_t-start_t
!
      ENDIF
!
!     REDISTRIBUTE CONSTRUCTION OF THE SECTOR AMPLITUDES AND EIGENVALUES
!     AMONG THE MPI TASKS.
!
      IF (MOD(NRANG,INT(NPROCS)) == MYRANK) THEN

      WRITE(IWRITE,'("Task ",i3," processing sector ",i3)') MYRANK, NRANG
!
!      SETMTR SETS UP MATRIX ELEMENTS OF THE HAMILTONIAN
!      ON THE SUBRANGE (RA1,RA2) IN ARRAY BIGVEC.
!
      START_T = OMP_GET_WTIME()

      CALL SETMTR(RA1,RA2,BIGVEC,WTS,PL,AMPA(NRAMP),ETHR,BBLOCH,SCALE,LBUG(2))

      END_T = OMP_GET_WTIME()
      WRITE(IWRITE,'("SETMTR took:",f15.3,"s")') end_t-start_t
!
!       AMPLTD DIAGONALIZES THE SUBRANGE HAMILTONIAN AND
!       EVALUATES THE SURFACE AMPLITUDES AT RA1 AND RA2, STORING
!      THEM IN MATRIX AMPA AND THE EIGENVALUES IN ARRAY EIGEN.
!
      START_T = OMP_GET_WTIME()

      CALL AMPLTD(RA1,RA2,BIGVEC,AMPA(I1),AMPA(I2),EIGEN(NREIG),WORK,LBUG(3))

      END_T = OMP_GET_WTIME()
      WRITE(IWRITE,'("AMPLTD took:",f15.3,"s")') end_t-start_t
!
!     END OF REDISTRIBUTION OVER MPI TASKS
!
      ENDIF
!
      RANGE_EIGEN(1,NRANG) = NREIG
      RANGE_EIGEN(2,NRANG) = NREIG+MHD-1
      RANGE_AMPA1(1,NRANG) = I1
      RANGE_AMPA1(2,NRANG) = I1+NAMP-1
      RANGE_AMPA2(1,NRANG) = I2
      RANGE_AMPA2(2,NRANG) = I2+NAMP-1
!
!      WRITE AMPLITUDES TO DISC IF NECESSARY
!
!
      IF(IDISC.EQ.0) THEN
        DO I=NREIG,NREIG+MHD-1
        EIGEN(I) = EIGEN(I)/SCALE
        ENDDO
        NRAMP = NRAMP+MAMP
        NREIG = NREIG+MHD
      ELSE
        DO I=1,NHD
        EIGEN(I) = EIGEN(I)/SCALE
        ENDDO
        WRITE(IDISC)(AMPA(I),I=1,2*NAMP),(EIGEN(I),I=1,NHD)
      ENDIF
!
      ENDDO !NRANG
!
!      END OF SUBRANGE LOOP
!

!
!      DISTRIBUTE THE CALCULATED BLOCKS TO OTHER TASKS
!
      DO NRANG=1,NRANGE
!
         DO I=0,NPROCS-1

            IF (MOD(NRANG,INT(NPROCS)) == I) THEN
               WRITE(IWRITE,'("Rank ",i3," is broadcasting range ",i3)') I,NRANG
!
               CALL MPI_MOD_BCAST(EIGEN(RANGE_EIGEN(1,NRANG):RANGE_EIGEN(2,NRANG)),I)
!
               CALL MPI_MOD_BCAST(AMPA(RANGE_AMPA1(1,NRANG):RANGE_AMPA1(2,NRANG)),I)
!
               CALL MPI_MOD_BCAST(AMPA(RANGE_AMPA2(1,NRANG):RANGE_AMPA2(2,NRANG)),I)
            ENDIF

         ENDDO
      ENDDO

      RETURN
!
!      ERROR MESSAGES
!
 74   WRITE(IWRITE,22)NCHAN,NLEG,NRANGE
      STOP
 75   WRITE(IWRITE,23)A,B,EBIG
      NRANGE = 0
      RETURN
!
!      FORMAT STATEMENTS
!
 20   FORMAT(//' R-MATRIX PROPAGATION  INITIAL RADIUS=',F10.5,3X,' FINAL RADIUS=',F10.5,3X,'NO. OF RANGES=',I2/)
 21   FORMAT(' THE NUMBER OF BASIS FUNCTIONS INCLUDED IN EACH SUBRANGE IS',8I5/(20I5)/)
 22   FORMAT(/' INSUFFICIENT SPACE IN AMPA AND EIGEN FOR NCHAN=',I2,3X,'NLEG=',I2,3X,'NRANGE=',I2/)
 23   FORMAT(/' NO PROPAGATION REQUIRED FOR A=',F14.7,3X,'B=',F14.7,3X,'EBIG=',F14.7/)
 24   FORMAT(/' THE POTENTIAL MATRIX IS EVALUATED AT THE FOLLOWING RADII'/(12F10.4))
      END
    SUBROUTINE RESIDR(ETOTR,NCHAN,NTARG,ETARG,NLPOLE,NUPOLE,nstat, &
    NGEOM,nocsf,ichl,AMPAE,WAMP,EPOLE,IBUTTL,BCOEF,RTEMP,ezero,sfac, &
    iex,ecex,rcex,IWRITE,IFAIL,stdout)
    use lapack95_compak
    use blas95_compak
    IMPLICIT DOUBLE PRECISION(A-H,O-Z)
!
!***********************************************************************
!
!      RESIDR SETS UP ELECTRONIC R-MATRIX, EXCLUDING POLES ALREADY DEALT
!      WITH, BUT INCLUDING BUTTLE TERM, AT EACH INTERNUCLEAR SEPARATION
!
!***********************************************************************
!
    integer :: nocsf, ichl, ibuttl, iwrite, ifail, nchan, nstat, ngeom, ntarg, iex
    DIMENSION EPOLE(nstat,NGEOM),WAMP(NCHAN,nstat,NGEOM),RTEMP(*), &
    BCOEF(3,NCHAN,NGEOM),ETARG(NTARG,NGEOM),nocsf(ngeom),ichl(nchan), &
    ezero(ngeom),sfac(nchan,ngeom),ecex(iex,ngeom), &
    rcex(NCHAN,iex,NGEOM),AMPAE(NSTAT,NCHAN)
    INTEGER :: stdout, NP1, np2, ipass
    
    double precision :: ZERO=0.D0,ONE=1.D0,TWO=2.D0
    DOUBLE PRECISION :: WEN
    double precision, allocatable :: r2dtemp(:,:)
    integer :: nlpole, nupole, IR, I, J, K, KL, M, itgt
!
    DO 1 K=1,NCHAN*(NCHAN+1)*NGEOM/2
        RTEMP(K) = ZERO
    1 END DO
    
    allocate(r2dtemp(NCHAN, NCHAN))
    r2dtemp=0.d0
!
!----- CONSTRUCT CONTRIBUTION TO R-MATRIX FROM REMAINING POLES
    NP1 = 1
    ipass = 1
!
!----- If non-adiabatic calculation, check that this energy is low
!      enough for R-matrix contribution from remaining poles to be
!      treated adiabatically
!
    20 if(ipass == 2 .OR. nlpole > 1) then
        EDIFF = EPOLE(NP1,1)-ETOTR
        DO 5 IG=2,NGEOM
            ENEXT = EPOLE(NP1,IG)-ETOTR
            IF(EDIFF*ENEXT < ZERO) GO TO 90
            EDIFF = ENEXT
        5 END DO
    endif
!
    K = 0
    DO 23 IR=1,NGEOM
        if(ipass == 1) then
            np2 = nlpole-1
        else
            np2 = nocsf(ir)
        endif
        if (abs(ibuttl) > 1) eps=one/(ezero(ir)-etotr)
        if (abs(ibuttl) > 2) stop "Buttle correction not implemented"
    !
    !---- ZM: Compute the energy-dependent amplitude in the transposed form. 
    !         This removes the floating point division in the R-matrix formula.
    !
        AMPAE(1:NP1-1,:) = 0.0D0
    !$OMP PARALLEL DEFAULT(NONE) PRIVATE(M,K,WEN) SHARED(NP1,NP2,NCHAN,EPOLE,ETOTR,IR,AMPAE,WAMP)
    !$OMP DO
        DO K=NP1,NP2
            WEN = 1.0D0/(EPOLE(K,IR)-ETOTR) ! for gemm
        !~          WEN = sqrt(1.0D0/(EPOLE(K,IR)-ETOTR)) ! for syrk
            DO M=1,NCHAN
                AMPAE(K,M) = WAMP(M,K,IR)*WEN 
            ENDDO
        ENDDO
    !$OMP END DO
    !$OMP END PARALLEL
    !----- AH: use mkl lapack functions for constructing R-matrix
    !      Try first with gemm then with syrk.
        
        if ((abs(ibuttl) < 2) .AND. (np2 /= 0)) then
            call gemm(WAMP(1:NCHAN,NP1:NP2,IR), AMPAE(NP1:NP2,1:NCHAN), r2dtemp(1:NCHAN,1:NCHAN))
        !~         call syrk(AMPAE(NP1:NP2,1:NCHAN), 
        !~      1             r2dtemp(1:NCHAN,1:NCHAN), trans='T', uplo='L')
        end if
        
        K = 0      
        do I=1,NCHAN
            do J=1,I
                K = K+1
                if (abs(ibuttl) < 2) then
                    RTEMP(K) = RTEMP(K) + r2dtemp(I,J)
                else
                    DO 221 KL=NP1,NP2
                        SUM = SUM+WAMP(I,KL,IR)*WAMP(J,KL,IR) &
                        *(one/(EPOLE(KL,IR)-ETOTR) - eps)
                    221 END DO
                    RTEMP(K) = RTEMP(K) + SUM         
                end if
                
            end do
        end do
    23 END DO
!
    IF(ipass == 1) THEN
        NP1 = NUPOLE+1
        ipass = 2
        GO TO 20
    ENDIF
!
!----- ADD BUTTLE CORRECTION
    IF(abs(IBUTTL) == 1) THEN
        K = 0
        DO 3 IR=1,NGEOM
            DO 4 I=1,NCHAN
                if(ibuttl < 0) then
                    itgt = 1
                else
                    itgt = ichl(i)
                endif
                E = TWO*(ETOTR-ETARG(itgt,IR))
                BUTTL = BCOEF(1,I,IR)+E*BCOEF(2,I,IR)+E*E*BCOEF(3,I,IR)
                K = K+I
                RTEMP(K) = RTEMP(K)+BUTTL
            4 END DO
        3 END DO
    ENDIF
!-----add higher poles contribution for partitioned R-matrix
    if (abs(IBUTTL) > 1) THEN
        K = 0
        DO 33 IR=1,NGEOM
            eps=one/(ezero(ir)-etotr)
            DO 34 I=1,NCHAN
                K = K+I
                RTEMP(K) = RTEMP(K)+sfac(i,ir)*eps
                DO 35 KL=1,iex
                    RTEMP(K) = RTEMP(K)+rcex(i,kl,ir) &
                    *(one/(ecex(KL,IR)-ETOTR) - eps)
                    
                35 END DO
            34 END DO
        33 END DO
    ENDIF
    RETURN
    90 IF(ipass == 1) THEN
        IFAIL = 1
    ELSE
        IFAIL = 2
    ENDIF
    RETURN
    END
    SUBROUTINE VRMAT2(NCHAN,NHD,RMATRX,ETOTR,AMPA,EIGEN,NPOLE)
    IMPLICIT DOUBLE PRECISION (A-H,O-Z)
!
!***********************************************************************
!
!      VRMAT2 IS ENERGY DEPENDANT ENTRY TO NUCLEAR R-MATRIX CODE
!
!***********************************************************************
!
!     INPUT PARAMETERS ARE
!
!      ETOTR  = ENERGY OF INCIDENT PARTICLE ON LOWEST ENERGY STATE IN
!               RYDBERGS
!      AMPA,EIGEN ARE AS DEFINED IN VRMAT1
!
!***********************************************************************
!
    INTEGER :: NPOLE, K, KL, M, N, NCHAN, NHD
    DIMENSION RMATRX(*),AMPA(NHD,NCHAN),EIGEN(NHD)
    DOUBLE PRECISION :: ZERO = 0.0D0, SUM, RMATRX,AMPA,EIGEN,ETOTR
!
    K = 0
    DO 40 M=1,NCHAN
        DO 4 N=1,M
            K = K+1
            SUM = ZERO
            DO 2 KL=NPOLE,NHD
                SUM=SUM+AMPA(KL,M)*AMPA(KL,N)/(EIGEN(KL)-ETOTR)
            2 END DO
            RMATRX(K) = SUM
        4 END DO
    40 END DO
!
    RETURN
!
    END
    SUBROUTINE REGRMAT(NCHAN,NSTAT,RMATRX,ETOTR,AMPA,EIGEN,NPOLE)
    IMPLICIT DOUBLE PRECISION (A-H,O-Z)
!
!***********************************************************************
!
!      REGRMAT creates regular part of the R-matrix
!
!***********************************************************************
!
!     INPUT PARAMETERS ARE
!
!      ETOTR  = ENERGY OF INCIDENT PARTICLE ON LOWEST ENERGY STATE IN
!               RYDBERGS
!      NPOLE= index of closest pole
!      AMPA,EIGEN ARE AS DEFINED IN VRMAT1
!
!***********************************************************************
!
    INTEGER :: NCHAN,NSTAT,NPOLE,K,M,N,KL
    DIMENSION RMATRX(*),AMPA(NCHAN,NSTAT,1),EIGEN(NSTAT,1)
    DOUBLE PRECISION :: ZERO = 0.0D0, RMATRX,ETOTR,AMPA,EIGEN,SUM
    print*, 'NPOLE=',NPOLE
    K = 0
    DO 40 M=1,NCHAN
        DO 4 N=1,M
            K = K+1
            SUM = ZERO
            DO 2 KL=1,NSTAT
                if (KL /= NPOLE) then 
                    SUM=SUM+AMPA(M,KL,1)*AMPA(N,KL,1)/(EIGEN(KL,1)-ETOTR)
                end if
            2 END DO
            RMATRX(K) = SUM
        4 END DO
    40 END DO
!
    RETURN
!
    END
    SUBROUTINE SINGRMAT(NCHAN,NSTAT,RMATRX,WAMP,NPOLE)
    IMPLICIT DOUBLE PRECISION (A-H,O-Z)
!
!***********************************************************************
!
!      SINGRMAT creates singular part of the R-matrix
!      (without dividing by the energy difference with the pole)
!
!***********************************************************************
!
!     INPUT PARAMETERS ARE
!
!   
!            
!      NPOLE= index of closets pole
!      
!
!***********************************************************************
!
    INTEGER :: NCHAN,NSTAT,NPOLE,K,M,N
    DIMENSION RMATRX(*),WAMP(NCHAN,nstat,1)
    DOUBLE PRECISION :: ZERO = 0.0D0,RMATRX,WAMP
!
    K = 0
    DO 40 M=1,NCHAN
        DO 4 N=1,M
            K = K+1
            RMATRX(K) = WAMP(M,NPOLE,1)*WAMP(N,NPOLE,1)
        4 END DO
    40 END DO
!
    RETURN
!
    END
    SUBROUTINE DISPOT(NDIS,LAMAX,ION,LCHL,CF,NPTS,RR,VM)
    IMPLICIT DOUBLE PRECISION (A-H,O-Z)
!
!***********************************************************************
!
!     DISPOT CALCULATES THE ASYMPTOTIC POTENTIAL IN THE DISSOCIATING
!     CHANNEL
!
!***********************************************************************
!
    INTEGER :: INC, NDIS,LAMAX,ION,LCHL,NPTS,IR,I,K,J
    DIMENSION RR(NPTS),VM(NDIS,NDIS,NPTS),CF(NDIS,NDIS,*),LCHL(*)
    DOUBLE PRECISION :: ZERO = 0.0D0,ONE=1.D0,RR,VM,CF,RA,R
!
    DO 10 IR=1,NPTS
        R = RR(IR)
    !
        INC = -5
        DO 1 I=1,NDIS
        !
        !----- GAILIT SHOULD BE USED FOR ENTIRE RANGE WHERE MULTIPOLE EXPANSION
        !      IS VALID
            INC = INC+5
            RA = CF(INC+1,1,LAMAX+1)
            IF(R <= RA) THEN
            !
            !---- MORSE POTENTIAL
                RE = CF(INC+2,1,LAMAX+1)
                D  = CF(INC+3,1,LAMAX+1)
                DE = CF(INC+4,1,LAMAX+1)
                BETA = CF(INC+5,1,LAMAX+1)
                V = D*(ONE-EXP(-BETA*(R-RE)))**2+DE
            !
            ELSE
            !
            !---- MULTIPOLE EXPANSION
                V = ZERO
                DO 2 K=1,LAMAX
                    V = V+CF(I,I,K)/R**(K+1)
                2 END DO
            !
            ENDIF
        !
            DO 3 J=1,NDIS
                VM(I,J,IR) = ZERO
            3 END DO
            VM(I,I,IR) = V
        1 END DO
    10 END DO
!
    RETURN
    END
    SUBROUTINE ASYMD(ETOT,NDIS,TWORM,AO,RAF,ETHR,F,DF,VM,IWRITE, &
    LBUG)
    IMPLICIT DOUBLE PRECISION (A-H,O-Z)
!
!***********************************************************************
!
!     ASYMD solves radial equation(s) in dissociating channel(s) for
!     R .ge. AO (assumed uncoupled)
!
!***********************************************************************
!
    EXTERNAL NAGPOT
    integer, parameter :: neq=2
    integer :: iwrite, lbug,NDIS,INC,ICF1,NK,LAMAX
    COMMON/DISPAR/DE,D,BETA,RE,TWOM,BIGKSQ,EPSD,RA
    COMMON/NAGPT/ cfnag,ennag,elnag,ZZNAG,LAMAX,ICF1,INC
    double precision, pointer :: cfnag(:),ennag(:),elnag(:)
    DIMENSION F(NDIS,NDIS,2),DF(NDIS,NDIS,2),G(2),ETHR(NDIS), &
    VM(5,NDIS),work(14*neq),thres(neq),gp(neq),gmax(neq)
    double precision :: ZERO = 0.0D0
!
    EPSD = 1.D-8
    do 10 i=1,neq
        thres(i) = epsd
    10 END DO
!
    TWOM = TWORM
    INC = NDIS*NDIS
    ICF1 = -NDIS
    DO 3 I=1,NDIS
        RA = VM(1,NDIS)
        RE = VM(2,NDIS)
        D  = VM(3,NDIS)
        DE = VM(4,NDIS)
        BETA = VM(5,NDIS)
        ICF1 = ICF1+NDIS+1
        BIGKSQ = ETOT-ETHR(I)
        IF(BIGKSQ < ZERO) THEN
            NK = 1
        ELSE
            NK = 2
        ENDIF
        DO 4 K=1,NK
            RAFIN = RAF
            G(1) = F(I,I,K)
            G(2) = DF(I,I,K)
            DO 5 J=1,NDIS
                F(I,J,K) = ZERO
                DF(I,J,K) = ZERO
            5 END DO
            IFAIL = 1
        !
        !      CALL D02BAF(RAFIN,AO,NEQ,G,EPSD,NAGPOT,work,IFAIL)
        !
        !    The following line is a warning that should be removed once D02PVF
        !    and D02PCF are replaced.
            WRITE(IWRITE,*) 'NAG routine not replaced. This part of the program &
           &does not work in this version of the code'
        ! V-03      CALL D02PVF(neq,RAFIN,G,AO,EPSD,thres,2,'usualtask',.false.,
        ! V-03     * zero,work,14*neq,IFAIL)
        ! V-03      CALL D02PCF(nagpot,AO,rafin,G,gp,gmax,work,IFAIL)
        !   
        !      RAFIN SHOULD EQUAL A0 ON EXIT IF NO ERRORS
            IF(IFAIL /= 0) WRITE(IWRITE,1) IFAIL,RAFIN
            1 FORMAT(/' D02BAF failed IFAIL=',I1,' at R=',F8.4)
        !
            F(I,I,K) = G(1)
            DF(I,I,K) = G(2)
        4 END DO
    3 END DO
!
    IF(LBUG >= 1) WRITE(IWRITE,2) RAFIN,BIGKSQ,((F(I,I,K),DF(I,I,K), &
    K=1,2),I=1,NDIS)
    2 FORMAT(/' SOLUTIONS AT R=',F6.3,'  KSQ=',F10.4,/(8D15.6))
    RETURN
    END
    SUBROUTINE SPLITM(NDIS,ND2,MDMAX,TRIANG,SQUARC,DISM)
    IMPLICIT DOUBLE PRECISION (A-H,O-Z)
!
!***********************************************************************
!
!     SPLITM unpacks data describing the potentials in the dissociating
!     channels. It must match DISINI in VIBRMT.
!
!***********************************************************************
!
    INTEGER :: MDMAX,NDIS,ND2
    DIMENSION SQUARC(NDIS,NDIS,*),DISM(5,NDIS),TRIANG(ND2,*)
    DOUBLE PRECISION :: EPS=1.D-4,ZERO=0.0D0
    INTEGER :: NDSQ,I,J,K,LAST,MDMIN,II,IPT,KK
!
    NDSQ = NDIS*(NDIS+1)/2
    DO 11 K=1,MDMAX
        DO 10 J=1,NDIS
            DO 1 I=1,NDIS
                SQUARC(I,J,K) = ZERO
            1 END DO
        10 END DO
    11 END DO
    LAST = NDSQ+6*(NDIS-1)+1
    MDMIN = INT(TRIANG(LAST,1)+EPS)
    II = 0
    IPT = NDSQ-5
    DO 20 I=1,NDIS
        II = II+I
        IPT = IPT+6
        KK = 0
        DO 3 K=MDMIN,MDMAX
            KK = KK+1
            SQUARC(I,I,K) = TRIANG(II,KK)
        3 END DO
        DO 2 K=1,5
            DISM(K,I) = TRIANG(IPT+K,1)
        2 END DO
    20 END DO
!
    RETURN
    END
    SUBROUTINE INTIN(RMATR,RAFIN,FX,FX1,NP,NA,Y,DY,HX,MAXPTS, &
    TOL,IBUG,IWRITE)
    IMPLICIT DOUBLE PRECISION(A-H,O-Z)
!
!     PERFORMS INWARD INTEGRATION FROM POINT RAFIN TO RMATR
!
    EXTERNAL NAGRHS
    LOGICAL :: START
    INTEGER :: NP, NA, IWRITE, MAXPTS, IBUG
    DIMENSION FX(NP,NP,2),FX1(NP,NP,2),Y(NP),DY(NP)
    double precision, allocatable :: fr(:),frm(:),yr(:),wk(:)
    logical :: ONESTP=.FALSE.,HIGH=.TRUE.
    integer :: IFAIL=0, lrwork, J, JA, I, nsucc,nfail,natt
    double precision :: ZERO = 0.0D0
!
!      LRWORK = 16+20*NP
    lrwork = 16+1000*np
    allocate (fr(np),frm(np),yr(np),wk(lrwork))
!
!     GENERATE (NP BY NP+NA)-DIMENSIONAL SOLUTION MATRIX IN REGION 2.
!     NP EQUATIONS ARE INTEGRATED INWARD NP+NA TIMES 
!
    DO 90 J=1,NP+NA
        JA = J - NP
    !
    !     SET THE BOUNDARY CONDITIONS-
    !
        IF(J <= NP) THEN
            DO 30 I=1,NP
                Y(I) = FX(I,J,1)
                DY(I) = FX1(I,J,1)
            30 END DO
        ELSE
            DO 40 I=1,NP
                Y(I) = FX(I,JA,2)
                DY(I) = FX1(I,JA,2)
            40 END DO
        ENDIF
    !
        START = .TRUE. 
        FR(1) = ZERO
        FRM(1) = ZERO
    !    The following line is a warning that should be removed once D02LXF
    !    is replaced.
    !      WRITE(IWRITE,*) 'NAG routine D02LXF not replaced. This part of the
    !     1 program does not work in this version of the code'
    ! V-03      CALL D02LXF(NP,HX,TOL,FR,FRM,MAXPTS,START,ONESTP,HIGH,WK,LRWORK,
    ! V-03      * IFAIL)
    !
    !      X = RAFIN
    !
    ! V-03 10   CALL D02LAF(NAGRHS,NP,X,RMATR,Y,DY,YR,WK,LRWORK,IFAIL)
    !
    !      IF(X.LT.RMATR) GO TO 10
    !    The following line is a warning that should be removed once D02LAF
    !    is replaced.
    !  10  write(IWRITE,*)'Replacement for NAG integration routines not
    !     1  implemented. This part of the  program does not work in
    !     2  this version of the code (NV, November 2003)'
    !
    !     OPTIONAL DIAGNOSTICS
    !      IF(IBUG.NE.0) THEN
    ! V-03        CALL D02LYF(NP,HNEXT,HUSED,HSTART,NSUCC,NFAIL,NATT,FR,
    ! V-03     1  FRM,WK,LRWORK,IFAIL)
    !    The following line is a warning that should be removed once D02LYF
    !    is replaced.
    !        write(IWRITE,*)'Replacement for NAG integration routines
    !     1  (diagnostics) not implemented. This part of the  program does
    !     2  not work in this version of the code (NV, November 2003)'
    ! V-03        WRITE(IWRITE,1001) HSTART,HUSED,HNEXT,NSUCC,NFAIL,NATT
    !      ENDIF
    !
    !  If Runge-Kutta-Nystrom integration is used to propagate wavefunction,
    !  following tens of statements labeled with 'CCC' have to be activated.
    !
    !      setup Runge-Kutta-Nystrom integration
        high = .FALSE. !AlexH
        call rknset(np,hx,tol,fr,frm,maxpts,start,onestp,high,lrwork,wk, &
        ifail)
        
        X = RAFIN
    !      do Runge-Kutta-Nystrom intergration
        10 call rknint(nagrhs,np,x,rmatr,y,dy,yr,wk,ifail)
    ! rint *, 'x=',X
    !      if rmat radius not reached, keep trying
    !      IF(X.LT.RMATR) GO TO 10
        if(x > rmatr) then
            ifail=0
            go to 10
        end if
    !      OPTIONAL DIAGNOSTICS
        IF(IBUG /= 0) THEN !recomment to turn off de bug AlexH
            call rkndia(np,hnext,hused,hstart,nsucc,nfail,natt,fr,frm,wk)
            WRITE(IWRITE,1001) HSTART,HUSED,HNEXT,NSUCC,NFAIL,NATT
            
        ENDIF
    !
    !     STORE THE VALUES OF THE FUNCTION AND DERIVATIVE AT THE BOUNDARY
    !
        IF(J <= NP) THEN
            DO 80 I=1,NP
                FX(I,J,1) = Y(I)
                FX1(I,J,1) = DY(I)
            80 END DO
        ELSE
            DO 81 I=1,NP
                FX(I,JA,2) = Y(I)
                FX1(I,JA,2) = DY(I)
            81 END DO
        ENDIF
    !
    90 END DO
    deallocate (fr,frm,yr,wk)
!
    RETURN
    1001 FORMAT(/'  D02LAF DIAGNOSTICS'//'   START MESH ',D12.4, &
    '   FINAL MESH ',D12.4,'    NEXT MESH ',D12.4/ &
    '   SUCCESSES',I5,'   FAILURES',I5,'    ATTEMPTS',I5)
    END
    SUBROUTINE MERG(NCHAN,NVCHAN,NDIS,NVOPEN,NDOPEN,FX,FXP,FV,FVP,FD, &
    FDP)
    IMPLICIT DOUBLE PRECISION(A-H,O-Z)
!
!***********************************************************************
!
!     MERG MERGES SOLUTIONS AND DERIVATIVES IN BOTH VIBRATIONAL AND
!      DISSOCIATING CHANNELS
!
!***********************************************************************
!
    INTEGER :: NDIS, NVOPEN, NDOPEN, KJ, NCHAN,NVCHAN
    DIMENSION FX(NCHAN,NCHAN,2),FXP(NCHAN,NCHAN,2), &
    FV(NVCHAN,NVCHAN,2),FVP(NVCHAN,NVCHAN,2),FD(*),FDP(*)
    double precision :: ZERO=0.0D0
    INTEGER :: I,J,K,NOPEN
!
    NOPEN = NVOPEN+NDOPEN
    DO 12 K=1,2
        DO 11 J=1,NCHAN
            DO 1 I=1,NCHAN
                FX(I,J,K) = ZERO
                FXP(I,J,K) = ZERO
            1 END DO
        11 END DO
    12 END DO
!
!---- OPEN VIBRATIONAL CHANNELS
    DO 22 K=1,2
        DO 21 J=1,NVOPEN
            DO 2 I=1,NVCHAN
                FX(I,J,K) = FV(I,J,K)
                FXP(I,J,K) = FVP(I,J,K)
            2 END DO
        21 END DO
    22 END DO
!
!---- PUT CLOSED CHANNEL FUNCTIONS IN CORRECT PLACES
    DO 31 J=1,NVCHAN-NVOPEN
        DO 3 I=1,NVCHAN
            FX(I,J+NOPEN,2) = FV(I,J+NVOPEN,1)
            FX(I,J+NOPEN,1) = ZERO
            FXP(I,J+NOPEN,2) = FVP(I,J+NVOPEN,1)
            FXP(I,J+NOPEN,1) = ZERO
        3 END DO
    31 END DO
!
!---- DISSOCIATING CHANNELS
    KJ = 0
    DO 41 K=1,2
        DO 4 J=1,NDIS
            DO 5 I=1,NDIS
                KJ = KJ+1
                IF(J > NDOPEN) THEN
                    IF(K == 1) THEN
                        FX(NVCHAN+I,NVCHAN+J,2) = FD(KJ)
                        FXP(NVCHAN+I,NVCHAN+J,2) = FDP(KJ)
                    ELSE
                        FX(NVCHAN+I,NVCHAN+J,1) = ZERO
                        FXP(NVCHAN+I,NVCHAN+J,1) = ZERO
                    ENDIF
                ELSE
                    FX(NVCHAN+I,NVOPEN+J,K) = FD(KJ)
                    FXP(NVCHAN+I,NVOPEN+J,K) = FDP(KJ)
                ENDIF
            5 END DO
        4 END DO
    41 END DO
!
    RETURN
    END
    SUBROUTINE POTL(NCHAN,LAMAX,ION,LCHL,CF,NBASIS,R,V)
    IMPLICIT DOUBLE PRECISION (A-H,O-Z)
!
!***********************************************************************
!
!     POTL CALCULATES THE VALUES OF THE ASYMPTOTIC POTENTIAL IN THE FORM
!     REQUIRED BY THE R-MATRIX PROPAGATOR ROUTINE RPROP.
!
!     THE POTENTIALS ARE EXPANDED IN INVERSE POWERS OF THE RADIAL
!     DISTANCE R, WITH EXPANSION COEFFICIENTS GIVEN IN THE MATRIX CF
!
!***********************************************************************
!
    INTEGER :: NCHAN,LAMAX,ION,LCHL,NBASIS
    DIMENSION R(NBASIS),V(NCHAN,NCHAN,NBASIS)
    DIMENSION CF(NCHAN,NCHAN,*),LCHL(NCHAN)
!
    DOUBLE PRECISION :: ZERO=0.0D0,ONE=1.0D0,TWO=2.0D0,RR
    INTEGER :: I,J,IR,K,EL2
!
    DO 4 I=1,NCHAN
        EL2 = LCHL(I)*(LCHL(I)+1)
        DO 3 J=1,NCHAN
            DO 1 IR=1,NBASIS
                VP = ZERO
                RR = ONE/R(IR)
                IF(I == J) VP=-TWO*Dble(ION)*RR+EL2*RR*RR
                DO 2 K=1,LAMAX
                    VP = VP+CF(I,J,K)*RR**(K+1)
                2 END DO
                V(I,J,IR) = VP
            1 END DO
        3 END DO
    4 END DO
!
    RETURN
    END
    SUBROUTINE SQUARM(NDIM,NMAT,TRIANG,SQUARE)
    IMPLICIT DOUBLE PRECISION (A-H,O-Z)
!
!***********************************************************************
!
!     SQUARM PUTS LOWER TRIANGLES BACK INTO SQUARE MATRICES)
!
!***********************************************************************
!
    INTEGER :: NDIM,NMAT,K,I,J,L
    DIMENSION SQUARE(NDIM,NDIM,NMAT),TRIANG(*)
!
    K = 0
    DO 3 L=1,NMAT
        DO 2 I=1,NDIM
            DO 1 J=1,I
                K = K+1
                SQUARE(I,J,L) = TRIANG(K)
                SQUARE(J,I,L) = TRIANG(K)
            1 END DO
        2 END DO
    3 END DO
!
    RETURN
    END
    SUBROUTINE KMAT_MKL(NCHAN,BSTO,NOPEN,F,FP,RMAT,AKMAT,fkmat,AA,BB,IPIV)
    use blas_lapack_gbl, only: blasint
    use lapack95_compak
    use blas95_compak
    IMPLICIT DOUBLE PRECISION(A-H,O-Z)
    INTEGER :: NCHAN,NOPEN
!
!***********************************************************************
!
!     K-MATRIX CALCULATION
!
!     NCHAN        NUMBER OF CHANNELS
!     BSTO         LOGARITHMIC DERIVATIVE/MATCHING RADIUS
!     NOPEN        NUMBER OF OPEN CHANNELS
!     F, FP        EXTERNAL REGION SOLUTIONS AND DERIVATIVES
!                  (ASSUMED IN CORRECT LOCATIONS)
!     RMAT         INTERNAL REGION R-MATRIX
!
!     OUTPUT :
!     AKMAT        K-MATRIX
!     fkmat        On exit, the full K-matrix including closed channels
!                  needed for compak - AlexH 11.11.10. Also used as
!                  workspace when calculating intermediate matrices.
!
!     ZM: these must be allocated externally and supplied as parameters
!         (this prevents segfaults for small stacks)
!     AA,BB        DP WORK SPACE, EACH OF LENGTH NCHAN*NCHAN
!     IPIV         INTEGER WORK SPACE, OF LENGTH NCHAN
!
!***********************************************************************
!
    DIMENSION RMAT(NCHAN,NCHAN),AKMAT(NOPEN,NOPEN),F(NCHAN,NCHAN,2), &
    FP(NCHAN,NCHAN,2),AA(NCHAN,NCHAN),BB(NCHAN,NOPEN), &
    BSTO(NCHAN), fkmat(nchan,nchan)
!
    INTEGER :: I,J
    INTEGER(blasint) :: IPIV(NCHAN)
!
    !$OMP PARALLEL DEFAULT(NONE) PRIVATE(I,J) SHARED(fkmat,AA,F,FP,BSTO,NCHAN)
    !$OMP DO
    DO J=1,NCHAN
        DO I=1,NCHAN
            AA(I,J) = F(I,J,2)
            fkmat(I,J) = FP(I,J,2)-BSTO(I)*F(I,J,2)
        END DO
    END DO
    !$OMP END DO
    !$OMP END PARALLEL
!
    call gemm(RMAT, fkmat, AA, 'N', 'N', -1.0d0, 1.0d0)
!
    !$OMP PARALLEL DEFAULT(NONE) PRIVATE(I,J) SHARED(fkmat,BB,F,FP,BSTO,NOPEN,NCHAN)
    !$OMP DO
    DO J=1,NOPEN
        DO I=1,NCHAN
            BB(I,J)=-F(I,J,1)
            fkmat(I,J) = FP(I,J,1)-BSTO(I)*F(I,J,1)
        END DO
    END DO
    !$OMP END DO
    !$OMP END PARALLEL
!
    call gemm(RMAT, fkmat, BB, 'N', 'N', +1.0d0, 1.0d0)
!
    IF(NCHAN == 1) THEN
        BB(1,1)=BB(1,1)/AA(1,1)
    ELSE
        ipiv=0
        call getrf(AA,ipiv)
        call getrs(AA,ipiv,BB)
    ENDIF
!
    !$OMP PARALLEL DEFAULT(NONE) PRIVATE(I,J) SHARED(BB,NOPEN,NCHAN,fkmat,akmat)
    !$OMP DO
    DO J=1,NCHAN
       IF (J <= NOPEN) THEN
          DO I=1,NOPEN
             AKMAT(I,J) = BB(I,J)
             fkmat(I,J) = BB(I,J)
          END DO
          DO I=NOPEN+1,NCHAN
             fkmat(I,J) = BB(I,J)
          END DO
       ELSE
          DO I=1,NCHAN
             fkmat(I,J) = 0
          END DO
       ENDIF
    ENDDO
    !$OMP END DO
    !$OMP END PARALLEL

    RETURN
    END
    SUBROUTINE RPROPX(NCHAN,NVCHAN,NDIS,CRV,CRD,RMAT,IPFLG,IWRITE,IBACK)

    IMPLICIT DOUBLE PRECISION(A-H,O-Z)
!
!***********************************************************************
!
!     RPROPX MERGES GLOBAL R-MATRICES INTO A SUPER GLOBAL R-MATRIX
!      THEN PROPAGATES THE INPUT SUPER R-MATRIX IN ONE STEP TO THE
!      REQUIRED RADII
!
!***********************************************************************
!
    INTEGER :: NCHAN
    DIMENSION RMAT(NCHAN,NCHAN),CRV(*),CRD(*),wk(nchan), &
    r11(nchan*(nchan+1)/2),r22(nchan*(nchan+1)/2), &
    r12(nchan*nchan)
    DOUBLE PRECISION :: ZERO=0.0D0
    INTEGER :: ICOL=6, NVCHAN, NDIS, IPFLG, IWRITE, IBACK
    INTEGER :: NCHSQ,NCH2,NVCHSQ,NVCH2,NDSQ,ND2,IV11,IV12,IV22,ID11,ID12,ID22
    INTEGER :: I,K,J,KD,KR
!
!     SIGN is set to 1 for propagating forward and -1 for 
!          propagating backwards
    SIGN=REAL(IBACK)
!
    NCHSQ = NCHAN*NCHAN
    NCH2 = (NCHSQ+NCHAN)/2
    NVCHSQ = NVCHAN*NVCHAN
    NVCH2 = (NVCHSQ+NVCHAN)/2
    NDSQ = NDIS*NDIS
    ND2 = (NDSQ+NDIS)/2
!
    IV11 = 1
    IV12 = IV11+NVCH2
    IV22 = IV12+NVCHSQ
    ID11 = 1
    ID12 = ID11+ND2
    ID22 = ID12+NDSQ
!
    DO 7 I=1,nch2
        r11(I) = ZERO
        r22(i) = zero
    7 END DO
    do 8 i=1,nchsq
        r12(i) = zero
    8 END DO
!
    K = 0
    DO 11 I=1,NVCHAN
        DO 1 J=1,I
            K = K+1
            R11(K) = CRV(IV11+K-1)
            R22(K) = CRV(IV22+K-1)
        1 END DO
    11 END DO
    KD = -1
    DO 21 I=1,NDIS
        K = K+NVCHAN
        DO 2 J=1,I
            KD = KD+1
            K = K+1
            R11(K) = CRD(ID11+KD)
            R22(K) = CRD(ID22+KD)
        2 END DO
    21 END DO
!
    KR = IV12-1
    K = -NCHAN
    DO 31 I=1,NVCHAN
        K = K+NCHAN
        DO 3 J=1,NVCHAN
            KR = KR+1
            R12(K+J) = CRV(KR)
        3 END DO
    31 END DO
    K = K+NCHAN
    KD = ID12-1
    DO 41 I=1,NDIS
        K = K+NVCHAN
        DO 4 J=1,NDIS
            K = K+1
            KD = KD+1
            R12(K) = CRD(KD)
        4 END DO
    41 END DO
!
!----- ADD LOWER TRIANGLE OF R-MULTIPLIED R-MATRIX TO R11
    K = 0
    DO 51 J=1,NCHAN
        DO 5 I=1,J
            K = K+1
            R11(K) = R11(K)+ SIGN*RMAT(I,J)
        5 END DO
    51 END DO
!
!----- EVALUATE MATRIX EXPRESSION TO GET LOWER TRIANGLE OF NEW
!      R-MULTIPLIED R-MATRIX
    CALL FACTOR(NCHAN,R11,WK)
    CALL MULTC(NCHAN,NCHAN,R11,R12)
    CALL MULTD(NCHAN,NCHAN,R12,R22,R11,WK)
!
!      UNPACK FINAL R-MATRIX
    K = 0
    DO 61 J=1,NCHAN
        DO 6 I=1,J
            K = K+1
            RMAT(I,J) = SIGN*R11(K)
            RMAT(J,I) = RMAT(I,J)
        6 END DO
    61 END DO
!
    IF(IPFLG > 0) THEN
        WRITE(IWRITE,1013)
        1013 FORMAT(/' FINAL R-MATRIX IS'/)
        CALL WRECMT(RMAT,NCHAN,NCHAN,NCHAN,NCHAN,ICOL,IWRITE)
    ENDIF
!
    RETURN
    END
    SUBROUTINE WRONSK(NCHAN,NOPEN,F,FP,IWRITE,IDIAG,EPS)
!
!***********************************************************************
!
!     WRONSK  PRINTS THE ASYMPTOTIC WAVEFUNCTIONS, F AND THEIR
!                    DERIVATIVES, FP, IF IDIAG IS NONZERO.
!
!             THE MULTICHANNEL WRONSKIAN RELATIONS ARE EVALUATED AND A
!             WARNING PRINTED IF THEY ARE VIOLATED BY MORE THAN EPS.
!
!             NCHAN     IS THE NUMBER OF CHANNELS
!             NOPEN     IS THE NUMBER OF OPEN CHANNELS ( ORDERED FIRST )
!             IWRITE    UNIT NUMBER OF THE PRINTER OR OUTPUT DEVICE
!
!             F,FP      ASYMPTOTIC FUNCTIONS AND DERIVATIVES
!
!***********************************************************************
!
    IMPLICIT DOUBLE PRECISION(A-H,O-Z)
!
    INTEGER :: NOPEN, IWRITE, IDIAG, NCHAN
    DIMENSION F(NCHAN,NCHAN,2),FP(NCHAN,NCHAN,2)
!
    DOUBLE PRECISION :: ZERO = 0.0D0,ONE=1.0D0
    INTEGER :: I,I0,J1,J2
!
    1000 FORMAT(1X,4D30.15)
    1010 FORMAT(/,' REGULAR FUNCTIONS')
    1020 FORMAT(/,' REGULAR DERIVATIVES')
    1030 FORMAT(/,' IRREGULAR FUNCTIONS')
    1040 FORMAT(/,' IRREGULAR DERIVATIVES')
    1050 FORMAT(/,' MULTICHANNEL WRONSKIAN RELATIONS :',/,' I0 =',I3,/)
    1060 FORMAT(' I1 =',I3,5X,' I2 =',I3,5X,'WRONSKIAN =',D30.17)
    1070 FORMAT(' I0 =',I3,5X,' I1 =',I3,5X,' I2 =',I3,5X,'WRONSKIAN =', &
    D30.17)
!
!     PRINT VALUES OF ASYMPTOTIC FUNCTIONS AND THEIR DERIVATIVES
!
    IF(IDIAG == 0) GO TO 50
    WRITE(IWRITE,1010)
    DO 10 I=1,NCHAN
        WRITE(IWRITE,1000) (F(I,J,1),J=1,NCHAN)
    10 END DO
    WRITE(IWRITE,1020)
    DO 20 I=1,NCHAN
        WRITE(IWRITE,1000) (FP(I,J,1),J=1,NCHAN)
    20 END DO
    WRITE(IWRITE,1030)
    DO 30 I=1,NCHAN
        WRITE(IWRITE,1000) (F(I,J,2),J=1,NCHAN)
    30 END DO
    WRITE(IWRITE,1040)
    DO  40  I=1,NCHAN
        WRITE(IWRITE,1000) ( FP(I,J,2),J=1,NCHAN)
    40 END DO
!
!     CHECK MULTICHANNEL WRONSKIAN RELATIONS FOR SOLUTIONS
!
    50 DO 82 I0=1,2
        IF(IDIAG /= 0) WRITE(IWRITE,1050) I0
    !
        DO 81 J1=1,NCHAN
            DO 80 J2=1,NCHAN
                SUM=ZERO
                TSUM=ZERO
                DO 60 I=1,NCHAN
                    SUM=SUM+FP(I,J1,1)*F(I,J2,I0)-F(I,J1,1)*FP(I,J2,I0)
                60 END DO
            !
                IF(IDIAG == 0) then
                    IF(I0 == 2 .AND. J1 == J2 .AND. J1 <= NOPEN) TSUM=ONE
                    IF(DABS(SUM-TSUM) > EPS) WRITE(IWRITE,1070)I0,J1,J2,SUM
                else
                    WRITE(IWRITE,1060)J1,J2,SUM
                endif
            !
            80 END DO
        81 END DO
    82 END DO
!
    RETURN
    END
    subroutine curlyr_back_prop_test(nchan,rmatr,rafinv,rmat,cr,rvib2)
!
!***********************************************************************
!
!     CURLYR_BACK_PROP PROPAGATES THE RADIAL FUNCTIONS FROM THE MATCHING  
!                      RADIUS TO THE R-MATRIX BOUNDARY USING THE GLOBAL
!                      R-MATRIX PROPAGATOR USED FOR THE FOWARD
!                      PROPAGATION.
!
!             THE MULTICHANNEL WRONSKIAN RELATIONS ARE EVALUATED AND A
!             WARNING PRINTED IF THEY ARE VIOLATED BY MORE THAN EPS.
!
!             INPUTS
!
!             NCHAN     IS THE NUMBER OF CHANNELS
!             CR        THE CURLY R MATRIX
!             RMAT      THE
!             IWRITE    UNIT NUMBER OF THE PRINTER OR OUTPUT DEVICE
!
!
!             F,FP      ASYMPTOTIC FUNCTIONS AND DERIVATIVES
!
!***********************************************************************
    use lapack95_compak
    implicit none
    
!     Argument variables
    integer :: nchan
    real(kind=8) :: rmatr,rafinv
    real(kind=16) :: rmat(nchan*nchan)
    real(kind=8) ::  cr(2*nchan*nchan+nchan),rvib2(nchan*nchan)
    
!     Local variables
    integer :: ir11,ir12,ir22, i,j,k,m,info
    integer, allocatable :: ipiv(:)
    real(kind=16), allocatable ::r11(:,:),r12(:,:),r21(:,:), &
    r22(:,:), rmat_test(:,:),matrix_identity(:,:),rmat_inverse(:,:),  &
    temp_matrix(:,:), r22_minus_rmat(:,:)
    real(kind=16) :: total_error
    
!         Test back propagation of the R-matrix
!         Expand and extract curly R-matrices
    print *, 'Before any allocation'
    allocate(r11(nchan,nchan),r12(nchan,nchan), &
    r21(nchan,nchan),r22(nchan,nchan), &
    r22_minus_rmat(nchan,nchan))
    r11=0.0d0;r12=0.0d0;r21=0.0d0;r22=0.0d0;r22_minus_rmat=0.0d0
    print *, 'After first allocation'
!         should be declared 
    ir11=1
    ir12=ir11+(nchan**2+nchan)/2
    ir22=ir12+nchan**2
    
    k=0;m=0
    do j=1,nchan
        do i=1,nchan
            r12(i,j)=cr(ir12+m)
            r21(j,i)=cr(ir12+m)
            m=m+1
        end do
    end do
    call quad_squarm(nchan,1,cr(ir11:ir12-1),r11)
    call quad_squarm(nchan,1,cr(ir22:),r22)
    
    
    
    k=1
    do j=1,nchan
        do i=1,nchan
            r22_minus_rmat(i,j)=r22(i,j)-rmat(k) ! Back propagation
        !               r11(i,j)=r11(i,j)+rvib2(k) ! Forward propagation
            k=k+1
        end do
    end do
    
!       Invert R-matrix
    allocate(matrix_identity(nchan,nchan))
    print *, 'After 2nd allocation'
    matrix_identity=0.0d0
    
    do i=1,nchan
        matrix_identity(i,i)=1.0d0
    end do
    
    info=0
    allocate(ipiv(nchan),temp_matrix(nchan,nchan))
    print *, 'After 3rd allocation'
    ipiv=0 
    temp_matrix=r22_minus_rmat  ! Back propagation
!           temp_matrix=r11  ! Forward propagation
    
    
    
    allocate(rmat_inverse(nchan,nchan))
    print *, 'After 4th allocation'
    rmat_inverse=matrix_identity
    
!     MKL LAPACK f95 routines
!       call getrf(temp_matrix,ipiv)
!       call getrs(temp_matrix,ipiv,rmat_inverse)
    
!     Quad precision version of MA01A
    
    call quad_MA01A_f95(temp_matrix,rmat_inverse,nchan,nchan)
    rmat_inverse=temp_matrix
    
    
    allocate(rmat_test(nchan,nchan))
    rmat_test=0.0d0        
    
    rmat_test=matmul(matmul(r12,rmat_inverse),r21)-r11!Back prop
!           rvib_test=r22-matmul(matmul(r21,rmat_inverse),r12) !Forward
    
    total_error=0.d0
    k=1
    do j=1,nchan
        do i=1,nchan
            total_error=total_error+rmat_test(i,j)-rvib2(k)
            write(11236,'(i5,i5,2d20.5)') i,j,rmat_test(i,j),rvib2(k)
            k=k+1
        end do
    end do
    write(11236,*) ""
    write(11235,'(/,"TOTAL ERROR", d20.5)')  total_error
    
!       write(11235,'(/,"R-matrix at r=a")') 
!        write(11235,*) rmat_test
    
    end subroutine curlyr_back_prop_test
    subroutine curlyr_forward_prop_test(nchan,rmatr,rafinv,rmat,cr, &
    rvib2,rmat_quad)
!
!***********************************************************************
!
! CURLYR_FORWARD_PROP PROPAGATES THE RADIAL FUNCTIONS FROM THE MATCHING  
!                      RADIUS TO THE R-MATRIX BOUNDARY USING THE GLOBAL
!                      R-MATRIX PROPAGATOR USED FOR THE FOWARD
!                      PROPAGATION.
!
!             THE MULTICHANNEL WRONSKIAN RELATIONS ARE EVALUATED AND A
!             WARNING PRINTED IF THEY ARE VIOLATED BY MORE THAN EPS.
!
!             INPUTS
!
!             NCHAN     IS THE NUMBER OF CHANNELS
!             CR        THE CURLY R MATRIX
!             RMAT      THE
!             IWRITE    UNIT NUMBER OF THE PRINTER OR OUTPUT DEVICE
!
!
!             F,FP      ASYMPTOTIC FUNCTIONS AND DERIVATIVES
!
!***********************************************************************
    use lapack95_compak
    implicit none
    
!     Argument variables
    integer :: nchan
    real(kind=8) :: rmatr,rafinv
    real(kind=8) :: rmat(nchan*nchan), cr(2*nchan*nchan+nchan), &
    rvib2(nchan*nchan)
    real(kind=16) :: rmat_quad(nchan*nchan)
!     Local variables
    integer :: ir11,ir12,ir22, i,j,k,m,info
    integer, allocatable :: ipiv(:)
    real(kind=16), allocatable ::r11(:,:),r12(:,:),r21(:,:), &
    r22(:,:), rmat_test(:,:),matrix_identity(:,:),rmat_inverse(:,:),  &
    temp_matrix(:,:), r11_plus_rmat(:,:)
    real(kind=16) :: total_error
    
!         Test back propagation of the R-matrix
!         Expand and extract curly R-matrices
    
    allocate(r11(nchan,nchan),r12(nchan,nchan), &
    r21(nchan,nchan),r22(nchan,nchan), &
    r11_plus_rmat(nchan,nchan))
    r11=0.0d0;r12=0.0d0;r21=0.0d0;r22=0.0d0;r11_plus_rmat=0.0d0
    
    ir11=1
    ir12=ir11+(nchan**2+nchan)/2
    ir22=ir12+nchan**2
    
    k=0;m=0
    do j=1,nchan
        do i=1,nchan
            r12(i,j)=cr(ir12+m)
            r21(j,i)=cr(ir12+m)
            m=m+1
        end do
    end do
!       call squarm(nchan,1,cr(ir11:ir12-1),r11)
!       call squarm(nchan,1,cr(ir22:),r22)
    call quad_squarm(nchan,1,cr(ir11:ir12-1),r11)
    call quad_squarm(nchan,1,cr(ir22:),r22)
    
    
    write(11237,*) 'R11,  RMAT'
    k=1
    do j=1,nchan
        do i=1,nchan
            write(11237,'(i5,i5,2d40.20)') i,j,r11(i,j),rvib2(k)
            k=k+1
        end do
    end do
    write(11237,*) ""
    
    k=1
    do j=1,nchan
        do i=1,nchan
            r11_plus_rmat(i,j)=r11(i,j)+rvib2(k) ! Forward propagation
            k=k+1
        end do
    end do
    
    write(11237,*) 'R11 + RMAT'
    do j=1,nchan
        do i=1,nchan
            write(11237,'(i5,i5,2d40.20)') i,j,r11_plus_rmat(i,j)
            
        end do
    end do
    write(11237,*) ""
    
!       Invert R-matrix
    allocate(matrix_identity(nchan,nchan))
    matrix_identity=0.0d0
    
    do i=1,nchan
        matrix_identity(i,i)=1.0d0
    end do
    
    info=0
    allocate(ipiv(nchan),temp_matrix(nchan,nchan))
    ipiv=0 
    temp_matrix=r11_plus_rmat
    
    allocate(rmat_inverse(nchan,nchan))
    rmat_inverse=matrix_identity
    
!     MKL LAPACK f95 routines
    
!       call getrf(temp_matrix,ipiv)
!       call getrs(temp_matrix,ipiv,rmat_inverse)
    
!     Quad precision version of MA01A
    
    call quad_MA01A_f95(temp_matrix,rmat_inverse,nchan,nchan)
    rmat_inverse=temp_matrix
    
    write(11237,*) 'R11 + RMAT INVERTED'
    do j=1,nchan
        do i=1,nchan
            write(11237,'(i5,i5,2d40.20)') i,j,rmat_inverse(i,j)
            
        end do
    end do
    write(11237,*) ""
    
    allocate(rmat_test(nchan,nchan))
    rmat_test=0.0d0
    
!       rmat_test=matmul(matmul(r12,rmat_inverse),r21)-r11!Back prop
    rmat_test=r22-matmul(matmul(r21,rmat_inverse),r12) !Forward
    
    total_error=0.d0
    k=1
    write(11237,*) 'RMATRIX COMPARISON'
    do j=1,nchan
        do i=1,nchan
            write(11237,'(i5,i5,2d40.20)') i,j,rmat_test(i,j),rmat(k)
            rmat_quad(k)=rmat_test(i,j)
            k=k+1
        end do
    end do
    write(11237,*) ""
    
    
    
!       write(11235,'(/,"R-matrix at r=a")') 
!        write(11235,*) rmat_test
    
    end subroutine curlyr_forward_prop_test
    subroutine curlyr_back_prop(nchan,rmatr,rafinv,rmat_at_a,rmat,cr, &
    fx,fxp)
!
!***********************************************************************
!
!     CURLYR_BACK_PROP PROPAGATES THE RADIAL FUNCTIONS FROM THE MATCHING  
!                      RADIUS TO THE R-MATRIX BOUNDARY USING THE GLOBAL
!                      R-MATRIX PROPAGATOR USED FOR THE FOWARD
!                      PROPAGATION.
!
!             THE MULTICHANNEL WRONSKIAN RELATIONS ARE EVALUATED AND A
!             WARNING PRINTED IF THEY ARE VIOLATED BY MORE THAN EPS.
!
!             INPUTS
!
!             NCHAN     IS THE NUMBER OF CHANNELS
!             CR        THE CURLY R MATRIX
!             RMAT      THE R-MATRIX
!             IWRITE    UNIT NUMBER OF THE PRINTER OR OUTPUT DEVICE
!
!
!             FX,FXP      ASYMPTOTIC FUNCTIONS AND DERIVATIVES
!
!***********************************************************************
    use blas_lapack_gbl, only: blasint
    use lapack95_compak
    implicit none
    
!     Argument variables
    integer :: nchan
    double precision :: rmatr,rafinv
    double precision :: rmat_at_a(nchan*nchan),rmat(nchan*nchan), &
    cr(2*nchan*nchan+nchan),fx(nchan,nchan,2),fxp(nchan,nchan,2)
    
!     Local variables
    integer :: ir11,ir12,ir22, i,j,k,m,info
    integer(blasint), allocatable :: ipiv(:)
    double precision, allocatable ::r11(:,:),r12(:,:),r21(:,:), &
    r22(:,:), rmat_prop(:,:),matrix_identity(:,:),rmat_inverse(:,:),  &
    temp_matrix(:,:),fx_prop(:,:,:),fxp_prop(:,:,:), &
    curlyr_inverse(:,:), r22_minus_rmat(:,:)
    
!     Expand and extract curly R-matrices
!     -----------------------------------
    allocate(r11(nchan,nchan),r12(nchan,nchan), &
    r21(nchan,nchan),r22(nchan,nchan), &
    r22_minus_rmat(nchan,nchan))
    r11=0.0d0;r12=0.0d0;r21=0.0d0;r22=0.0d0;r22_minus_rmat=0.0d0
    
!     Determine first index of each matrix
    ir11=1
    ir12=ir11+(nchan**2+nchan)/2
    ir22=ir12+nchan**2
    
    k=0;m=0
    do j=1,nchan
        do i=1,nchan
            r12(i,j)=cr(ir12+m)
            r21(j,i)=cr(ir12+m)
            m=m+1
        end do
    end do
    call squarm(nchan,1,cr(ir11:ir12-1),r11)
    call squarm(nchan,1,cr(ir22:),r22)
    
    k=1
    do j=1,nchan
        do i=1,nchan
            r22_minus_rmat(i,j)=r22(i,j)-rmat(k) ! Back propagation
        !               r11(i,j)=r11(i,j)+rvib2(k) ! Forward propagation
            k=k+1
        end do
    end do
    
!     Invert R-matrices
    allocate(matrix_identity(nchan,nchan))
    matrix_identity=0.0d0
    
    do i=1,nchan
        matrix_identity(i,i)=1.0d0
    end do
    
    info=0
    allocate(ipiv(nchan),temp_matrix(nchan,nchan))
    ipiv=0 
    temp_matrix=r21  ! Back propagation
    
    call getrf(temp_matrix,ipiv)
    
    allocate(curlyr_inverse(nchan,nchan))
    
    curlyr_inverse=matrix_identity
    
    call getrs(temp_matrix,ipiv,curlyr_inverse)
    
    ipiv=0; info=0;
    
    k=1
    do j=1,nchan
        do i=1,nchan
            temp_matrix(i,j)=rmat_at_a(k)
            k=k+1
        end do
    end do
    
    call getrf(temp_matrix,ipiv)
    
    allocate(rmat_inverse(nchan,nchan))
    
    rmat_inverse=matrix_identity
    
    call getrs(temp_matrix,ipiv,rmat_inverse)
    
    
    allocate(rmat_prop(nchan,nchan))
    rmat_prop=0.0d0        
    
    rmat_prop=r12-matmul(matmul(r11,curlyr_inverse),r22_minus_rmat)
    
    allocate(fx_prop(nchan,nchan,2),fxp_prop(nchan,nchan,2))
    
    fx_prop(:,:,1)=matmul(rmat_prop,fxp(:,:,1))
    fx_prop(:,:,2)=matmul(rmat_prop,fxp(:,:,2))
    
    
    fxp_prop(:,:,1)=matmul(curlyr_inverse, &
    matmul(r22,fxp(:,:,1))-fx(:,:,1))
    fxp_prop(:,:,2)=matmul(curlyr_inverse, &
    matmul(r22,fxp(:,:,2))-fx(:,:,2))
    
!       fxp_prop(:,:,1)=matmul(rmat_inverse,fx_prop(:,:,1))
!       fxp_prop(:,:,2)=matmul(rmat_inverse,fx_prop(:,:,2))
    
    fx=fx_prop    
    fxp=fxp_prop
    
    return
    end subroutine curlyr_back_prop
!
!
!     QUAD PRECISION VERSIONS OF SUBROUTINES - Alex H
!     Needed for back propagation
!
!
    subroutine quad_curlyr_back_prop(nchan,rmatr,rafinv,rmat_at_a, &
    rmat,cr,fx,fxp)
!
!***********************************************************************
!
!     CURLYR_BACK_PROP PROPAGATES THE RADIAL FUNCTIONS FROM THE MATCHING  
!                      RADIUS TO THE R-MATRIX BOUNDARY USING THE GLOBAL
!                      R-MATRIX PROPAGATOR USED FOR THE FOWARD
!                      PROPAGATION.
!
!             THE MULTICHANNEL WRONSKIAN RELATIONS ARE EVALUATED AND A
!             WARNING PRINTED IF THEY ARE VIOLATED BY MORE THAN EPS.
!
!             INPUTS
!
!             NCHAN     IS THE NUMBER OF CHANNELS
!             CR        THE CURLY R MATRIX
!             RMAT      THE R-MATRIX
!             IWRITE    UNIT NUMBER OF THE PRINTER OR OUTPUT DEVICE
!
!
!             FX,FXP      ASYMPTOTIC FUNCTIONS AND DERIVATIVES
!
!***********************************************************************
    use lapack95_compak
    implicit none
    
!     Argument variables
    integer :: nchan
    double precision :: rmatr,rafinv
    double precision :: rmat_at_a(nchan*nchan), &
    cr(2*nchan*nchan+nchan),fx(nchan,nchan,2),fxp(nchan,nchan,2)
    real(kind=16) :: rmat(nchan*nchan)
!     Local variables
    integer :: ir11,ir12,ir22, i,j,k,m,info
    integer, allocatable :: ipiv(:)
    real(kind=16), allocatable ::r11(:,:),r12(:,:),r21(:,:), &
    r22(:,:), rmat_prop(:,:),matrix_identity(:,:),rmat_inverse(:,:),  &
    temp_matrix(:,:),fx_prop(:,:,:),fxp_prop(:,:,:), &
    curlyr_inverse(:,:), r22_minus_rmat(:,:)
    
!     Expand and extract curly R-matrices
!     -----------------------------------
    allocate(r11(nchan,nchan),r12(nchan,nchan), &
    r21(nchan,nchan),r22(nchan,nchan), &
    r22_minus_rmat(nchan,nchan))
    r11=0.0d0;r12=0.0d0;r21=0.0d0;r22=0.0d0;r22_minus_rmat=0.0d0
    
!     Determine first index of each matrix
    ir11=1
    ir12=ir11+(nchan**2+nchan)/2
    ir22=ir12+nchan**2
    
    k=0;m=0
    do j=1,nchan
        do i=1,nchan
            r12(i,j)=cr(ir12+m)
            r21(j,i)=cr(ir12+m)
            m=m+1
        end do
    end do
    call quad_squarm(nchan,1,cr(ir11:ir12-1),r11)
    call quad_squarm(nchan,1,cr(ir22:),r22)
    
    k=1
    do j=1,nchan
        do i=1,nchan
            r22_minus_rmat(i,j)=r22(i,j)-rmat(k) ! Back propagation
        !               r11(i,j)=r11(i,j)+rvib2(k) ! Forward propagation
            k=k+1
        end do
    end do
    
!     Invert R-matrices
    allocate(matrix_identity(nchan,nchan))
    matrix_identity=0.0d0
    
    do i=1,nchan
        matrix_identity(i,i)=1.0d0
    end do
    
    info=0
    allocate(ipiv(nchan),temp_matrix(nchan,nchan))
    ipiv=0 
    temp_matrix=r21  ! Back propagation
    
    allocate(curlyr_inverse(nchan,nchan))      
    curlyr_inverse=matrix_identity
    
    
    call quad_MA01A_f95(temp_matrix,curlyr_inverse,nchan,nchan)
    curlyr_inverse=temp_matrix
    
!     LAPACK inversion (not quad prec.)
!       call getrf(temp_matrix,ipiv)
!       call getrs(temp_matrix,ipiv,curlyr_inverse)
    
    
    ipiv=0; info=0;
    
    k=1
    do j=1,nchan
        do i=1,nchan
            temp_matrix(i,j)=rmat_at_a(k)
            k=k+1
        end do
    end do
    
    allocate(rmat_inverse(nchan,nchan))
    rmat_inverse=matrix_identity
    
    call quad_MA01A_f95(temp_matrix,rmat_inverse,nchan,nchan)
    rmat_inverse=temp_matrix
    
!     LAPACK inversion (not quad prec.)
!       call getrf(temp_matrix,ipiv)
!       call getrs(temp_matrix,ipiv,rmat_inverse)
    
    
    allocate(rmat_prop(nchan,nchan))
    rmat_prop=0.0d0        
    
    rmat_prop=r12-matmul(matmul(r11,curlyr_inverse),r22_minus_rmat)
    
    allocate(fx_prop(nchan,nchan,2),fxp_prop(nchan,nchan,2))
    
    fx_prop(:,:,1)=matmul(rmat_prop,fxp(:,:,1))
    fx_prop(:,:,2)=matmul(rmat_prop,fxp(:,:,2))
    
    
    fxp_prop(:,:,1)=matmul(curlyr_inverse, &
    matmul(r22,fxp(:,:,1))-fx(:,:,1))
    fxp_prop(:,:,2)=matmul(curlyr_inverse, &
    matmul(r22,fxp(:,:,2))-fx(:,:,2))
    
!     Method 2 for propagating fxp (needs rmat_invers)
!       fxp_prop(:,:,1)=matmul(rmat_inverse,fx_prop(:,:,1))
!       fxp_prop(:,:,2)=matmul(rmat_inverse,fx_prop(:,:,2))
    
    fx=fx_prop    
    fxp=fxp_prop
    
    return
    end subroutine quad_curlyr_back_prop
!
    subroutine quad_curlyr_back_prop_no_inversion(nchan,rmatr, &
    rafinv,rmat_at_a,rmat,cr,fx,fxp)
!
!***********************************************************************
!
!     CURLYR_BACK_PROP PROPAGATES THE RADIAL FUNCTIONS FROM THE MATCHING  
!                      RADIUS TO THE R-MATRIX BOUNDARY USING THE GLOBAL
!                      R-MATRIX PROPAGATOR USED FOR THE FOWARD
!                      PROPAGATION.
!
!             THE MULTICHANNEL WRONSKIAN RELATIONS ARE EVALUATED AND A
!             WARNING PRINTED IF THEY ARE VIOLATED BY MORE THAN EPS.
!
!             INPUTS
!
!             NCHAN     IS THE NUMBER OF CHANNELS
!             CR        THE CURLY R MATRIX
!             RMAT      THE R-MATRIX
!             IWRITE    UNIT NUMBER OF THE PRINTER OR OUTPUT DEVICE
!
!
!             FX,FXP      ASYMPTOTIC FUNCTIONS AND DERIVATIVES
!
!***********************************************************************
    use blas_lapack_gbl, only: blasint
    use lapack95_compak
    implicit none
    
!     Argument variables
    integer :: nchan
    double precision :: rmatr,rafinv
    double precision :: rmat_at_a(nchan*nchan), &
    cr(2*nchan*nchan+nchan),fx(nchan,nchan,2),fxp(nchan,nchan,2)
    real(kind=16) :: rmat(nchan*nchan)
!     Local variables
    integer :: ir11,ir12,ir22, i,j,k,m,info
    integer(blasint), allocatable :: ipiv(:)
    real(kind=16), allocatable ::r11(:,:),r12(:,:),r21(:,:), &
    r22(:,:), rmat_prop(:,:),matrix_identity(:,:),rmat_inverse(:,:),  &
    temp_matrix(:,:),fx_prop(:,:,:),fxp_prop(:,:,:), &
    curlyr_inverse(:,:), r22_minus_rmat(:,:)
    
    real(kind=8), allocatable :: lhs_a(:,:),lhs_af(:,:), rhs(:,:), &
    sol_mat(:,:)
    
    real(kind=8) :: ferr(nchan),berr(nchan),anorm,anorm_temp(nchan)
    real(kind=8) :: rcond
!     Expand and extract curly R-matrices
!     -----------------------------------
    allocate(r11(nchan,nchan),r12(nchan,nchan), &
    r21(nchan,nchan),r22(nchan,nchan), &
    r22_minus_rmat(nchan,nchan))
    r11=0.0d0;r12=0.0d0;r21=0.0d0;r22=0.0d0;r22_minus_rmat=0.0d0
    
!     Determine first index of each matrix
    ir11=1
    ir12=ir11+(nchan**2+nchan)/2
    ir22=ir12+nchan**2
    
    k=0;m=0
    do j=1,nchan
        do i=1,nchan
            r12(i,j)=cr(ir12+m)
            r21(j,i)=cr(ir12+m)
            m=m+1
        end do
    end do
    call quad_squarm(nchan,1,cr(ir11:ir12-1),r11)
    call quad_squarm(nchan,1,cr(ir22:),r22)
    
    k=1
    do j=1,nchan
        do i=1,nchan
            r22_minus_rmat(i,j)=r22(i,j)-rmat(k) ! Back propagation
        !               r11(i,j)=r11(i,j)+rvib2(k) ! Forward propagation
            k=k+1
        end do
    end do
    
!     Invert R-matrices
    allocate(matrix_identity(nchan,nchan))
    matrix_identity=0.0d0
    
    do i=1,nchan
        matrix_identity(i,i)=1.0d0
    end do
    
    info=0
    allocate(ipiv(nchan),temp_matrix(nchan,nchan))
    ipiv=0 
    temp_matrix=r21  ! Back propagation
    
    
    allocate(fx_prop(nchan,nchan,2),fxp_prop(nchan,nchan,2))
    fxp_prop(:,:,1)=matmul(r22,fxp(:,:,1))-fx(:,:,1)
    fxp_prop(:,:,2)=matmul(r22,fxp(:,:,2))-fx(:,:,2)
    
!     TESTING ITERATIVE REFINEMENT
!     -----------------------------
    allocate(lhs_a(nchan,nchan),sol_mat(nchan,nchan), &
    lhs_af(nchan,nchan),rhs(nchan,nchan))
!     LAPACK inversion (not quad prec.)
    
    lhs_a=r21
    lhs_af=r21
    call getrf(lhs_af,ipiv)
    
!     compute the 1-norm for gettinf the condition
    anorm_temp=0
    do i=1,nchan
        anorm_temp=anorm_temp+abs(r21(i,:))
    end do
    anorm=maxval(anorm_temp) 
    
    call gecon( lhs_af, anorm, rcond)
    write(1003,*) 'anorm,rcond', anorm,rcond
    
    sol_mat=fxp_prop(:,:,1)
    call getrs(lhs_af,ipiv,sol_mat)
    rhs=fxp_prop(:,:,1)
    call gerfs( lhs_a, lhs_af, ipiv, rhs, sol_mat,'N',ferr,berr)
!       call gerfsx( lhs_a, lhs_af, ipiv, rhs, sol_mat) 
    fxp_prop(:,:,1)=sol_mat
    write(1001,*) 'FERR 1'
    write(1001,*) FERR
    write(1001,*) 'BERR 1'
    write(1001,*) BERR
    write(1001,*) ''
    
    sol_mat=fxp_prop(:,:,2)
    call getrs(lhs_af,ipiv,sol_mat)
    rhs=fxp_prop(:,:,2)
    call gerfs( lhs_a, lhs_af, ipiv, rhs, sol_mat,'N',ferr,berr)
!       call gerfsx( lhs_a, lhs_af, ipiv, rhs, sol_mat)
    fxp_prop(:,:,2)=sol_mat
    write(1002,*) 'FERR 2'
    write(1002,*) FERR
    write(1002,*) 'BERR 2'
    write(1002,*) BERR
    write(1002,*) ''
!     -----------------------------
    
!       call quad_MA01A_f95(temp_matrix,fxp_prop(:,:,1),nchan,nchan)
!       fxp_prop(:,:,1)=temp_matrix
! 
!       temp_matrix=r21
! 
!       call quad_MA01A_f95(temp_matrix,fxp_prop(:,:,2),nchan,nchan)
!       fxp_prop(:,:,2)=temp_matrix
! 
! !       allocate(rmat_prop(nchan,nchan))
! !       rmat_prop=0.0d0        
! ! 
! !       rmat_prop=r12-matmul(matmul(r11,curlyr_inverse),r22_minus_rmat)
    
    
    fx_prop(:,:,1)=matmul(r12,fxp(:,:,1))-matmul(r11,fxp_prop(:,:,1))
    fx_prop(:,:,2)=matmul(r12,fxp(:,:,2))-matmul(r11,fxp_prop(:,:,2))
    
    
    
    fx=fx_prop    
    fxp=fxp_prop
    
    return
    end subroutine quad_curlyr_back_prop_no_inversion
!
    subroutine quad_curlyr_back_prop_with_K(nchan,rmatr, &
    rafinv,rmat_at_a,rmat,cr,fx,fxp,fkmat)
!
!***********************************************************************
!
!     CURLYR_BACK_PROP PROPAGATES THE RADIAL FUNCTIONS FROM THE MATCHING  
!                      RADIUS TO THE R-MATRIX BOUNDARY USING THE GLOBAL
!                      R-MATRIX PROPAGATOR USED FOR THE FOWARD
!                      PROPAGATION.
!
!             THE MULTICHANNEL WRONSKIAN RELATIONS ARE EVALUATED AND A
!             WARNING PRINTED IF THEY ARE VIOLATED BY MORE THAN EPS.
!
!             INPUTS
!
!             NCHAN     IS THE NUMBER OF CHANNELS
!             CR        THE CURLY R MATRIX
!             RMAT      THE R-MATRIX
!             IWRITE    UNIT NUMBER OF THE PRINTER OR OUTPUT DEVICE
!
!
!             FX,FXP      ASYMPTOTIC FUNCTIONS AND DERIVATIVES
!
!***********************************************************************
    use blas_lapack_gbl, only: blasint
    use lapack95_compak
    implicit none
    
!     Argument variables
    integer :: nchan
    double precision :: rmatr,rafinv
    double precision :: rmat_at_a(nchan*nchan),fkmat(nchan,nchan), &
    cr(2*nchan*nchan+nchan),fx(nchan,nchan,2),fxp(nchan,nchan,2)
    real(kind=16) :: rmat(nchan*nchan)
!     Local variables
    integer :: ir11,ir12,ir22, i,j,k,m,info
    integer(blasint), allocatable :: ipiv(:)
    real(kind=16), allocatable ::r11(:,:),r12(:,:),r21(:,:), &
    r22(:,:), rmat_prop(:,:),matrix_identity(:,:),rmat_inverse(:,:),  &
    temp_matrix(:,:),fx_prop(:,:,:),fxp_prop(:,:,:), &
    curlyr_inverse(:,:), r22_minus_rmat(:,:), &
    fkmat_long(:,:), fx_with_K(:,:), fxp_with_K(:,:), &
    fx_with_K_prop(:,:),fxp_with_K_prop(:,:)
    
    real(kind=8), allocatable :: lhs_a(:,:),lhs_af(:,:), rhs(:,:), &
    sol_mat(:,:)
    
    real(kind=8) :: ferr(nchan),berr(nchan),anorm,anorm_temp(nchan)
    real(kind=8) :: rcond
!     Expand and extract curly R-matrices
!     -----------------------------------
    allocate(r11(nchan,nchan),r12(nchan,nchan), &
    r21(nchan,nchan),r22(nchan,nchan), &
    r22_minus_rmat(nchan,nchan))
    r11=0.0d0;r12=0.0d0;r21=0.0d0;r22=0.0d0;r22_minus_rmat=0.0d0
    
!     Determine first index of each matrix
    ir11=1
    ir12=ir11+(nchan**2+nchan)/2
    ir22=ir12+nchan**2
    
    k=0;m=0
    do j=1,nchan
        do i=1,nchan
            r12(i,j)=cr(ir12+m)
            r21(j,i)=cr(ir12+m)
            m=m+1
        end do
    end do
    call quad_squarm(nchan,1,cr(ir11:ir12-1),r11)
    call quad_squarm(nchan,1,cr(ir22:),r22)
    
    k=1
    do j=1,nchan
        do i=1,nchan
            r22_minus_rmat(i,j)=r22(i,j)-rmat(k) ! Back propagation
        !               r11(i,j)=r11(i,j)+rvib2(k) ! Forward propagation
            k=k+1
        end do
    end do
    
!     Invert R-matrices
    allocate(matrix_identity(nchan,nchan))
    matrix_identity=0.0d0
    
    do i=1,nchan
        matrix_identity(i,i)=1.0d0
    end do
    
    info=0
    allocate(ipiv(nchan),temp_matrix(nchan,nchan))
    ipiv=0 
    temp_matrix=r21  ! Back propagation
    
    allocate(fkmat_long(nchan,nchan), fx_with_K(nchan,nchan), &
    fxp_with_K(nchan,nchan))
    
    fkmat_long=fkmat
    fx_with_K=fx(:,:,1)+matmul(fx(:,:,2),fkmat_long)
    fxp_with_K=fxp(:,:,1)+matmul(fxp(:,:,2),fkmat_long)
    
    allocate(fx_with_K_prop(nchan,nchan), &
    fxp_with_K_prop(nchan,nchan))
    
    fxp_with_K_prop(:,:)=matmul(r22,fxp_with_K(:,:))-fx_with_K(:,:)
    
    
!     TESTING ITERATIVE REFINEMENT
!     -----------------------------
    allocate(lhs_a(nchan,nchan),sol_mat(nchan,nchan), &
    lhs_af(nchan,nchan),rhs(nchan,nchan))
!     LAPACK inversion (not quad prec.)
    
    lhs_a=r21
    lhs_af=r21
    call getrf(lhs_af,ipiv)
    
!     compute the 1-norm for gettinf the condition
    anorm_temp=0
    do i=1,nchan
        anorm_temp=anorm_temp+abs(r21(i,:))
    end do
    anorm=maxval(anorm_temp) 
    
    call gecon( lhs_af, anorm, rcond)
    write(1003,*) 'anorm,rcond', anorm,rcond
    
    sol_mat=fxp_with_K_prop(:,:)
    call getrs(lhs_af,ipiv,sol_mat)
    rhs=fxp_with_K_prop(:,:)
    call gerfs( lhs_a, lhs_af, ipiv, rhs, sol_mat,'N',ferr,berr)
!       call gerfsx( lhs_a, lhs_af, ipiv, rhs, sol_mat) 
    fxp_with_K_prop(:,:)=sol_mat
    write(1001,*) 'FERR 1'
    write(1001,*) FERR
    write(1001,*) 'BERR 1'
    write(1001,*) BERR
    write(1001,*) ''
    
!     -----------------------------
    
!       call quad_MA01A_f95(temp_matrix,fxp_prop(:,:,1),nchan,nchan)
!       fxp_prop(:,:,1)=temp_matrix
! 
!       temp_matrix=r21
! 
!       call quad_MA01A_f95(temp_matrix,fxp_prop(:,:,2),nchan,nchan)
!       fxp_prop(:,:,2)=temp_matrix
! 
! !       allocate(rmat_prop(nchan,nchan))
! !       rmat_prop=0.0d0        
! ! 
! !       rmat_prop=r12-matmul(matmul(r11,curlyr_inverse),r22_minus_rmat)
    
    
    fx_with_K_prop(:,:)=matmul(r12,fxp_with_K(:,:)) &
    -matmul(r11,fxp_with_K_prop(:,:))
    
    
    fx(:,:,1)=fx_with_K_prop
    fxp(:,:,1)=fxp_with_K_prop
    return
    end subroutine quad_curlyr_back_prop_with_K
    
    subroutine quad_MA01A_f95(A,B,M,N)
    implicit none
    
!     Arguments
    integer :: M, N
    real(kind=16), dimension(:,:) :: A(M,M),B(M,N)
    
!     Local
    integer :: M1,IAC,IBC
    integer,allocatable :: IND(:)
    real(kind=16), allocatable :: C(:)
    
!       M=size(A,1)
!       N=size(B,2)
    M1=1
    IAC=M
    IBC=N
    print*, 'M=',M
    print*, 'N=',N
    allocate(IND(2*2*M), C(2*2*M))
    
    call quad_MA01A(A,B,M,N,M1,IAC,IBC,C,IND)
    
!       CALL MA01A(CRCP_temp,CRCPINV,NCHAN,NCHAN,1,
!      X           NCHAN,NCHAN,X,X(NCHAN+1))
    deallocate(IND,C)
    end subroutine quad_MA01A_f95
!
    subroutine quad_MA01A(A,B,M,N,M1,IAC,IBC,C,IND)
    IMPLICIT real(kind=16) (A-H,O-Z)
!
!      SOLUTION OF SIMULTANEOUS EQUATIONS AND OR MATRIX INVERSION
!
!
!      A           THE M*M MATRIX OF LEFT HAND SIDES OR THE MATRIX BEING
!                  INVERTED. OVERWRITTEN ON EXIT BY THE INVERSE MATRIX
!
!      B           THE M*N MATRIX OF THE RIGHT HAND SIDES. OVERWRITTEN
!                  ON EXIT BY SOLUTIONS
!
!      M           THE ORDER OF THE A-MATRIX. THIS MUST BE GREATER
!                  THAN 1 AND NOT GREATER THAN 100.THE UPPER LIMIT
!                  CAN BE EXTENDED BY RECOMPILING WITH LARGER
!                  DIMENSIONS FOR THE PRIVATE ARRAYS C AND IND
!
!      N           THE NUMBER OF THE RIGHT HAND SIDES IN THE
!                  SIMULTANEOUS EQUATIONS
!
!      IAR,IAC     DEFINE THE DIMENSIONS OF THE ARRAY WHERE THE A-MATRIX
!                  IS STORED
!
!      IBR,IBC     DEFINE THE DIMENSIONS OF THE ARRAY WHERE THE B-MATRIX
!                  IS STORED
!
!
!      M1          =0 ONLY SIMULTANEOUS EQUATIONS ARE SOLVED IF N.GT.0
!                       IF N=0 A FURTHER ENTRY TO MA01A WITH M1.LT.0
!                       REQUIRED TO OBTAIN THE INVERSE OF A
!                  .GT.0 MATRIX INVERSION. IN ADDITION SIMULTANEOUS
!                       EQUATIONS ARE SOLVED IF N.GT.0
!                  .LT.0 ONLY USED IF PREVIOUS ENTRY TO MA01A
!                       WITH M1=0. IN THIS CASE THE MATRIX INVERSION IS
!                       COMPLETED
!
!
    INTEGER :: M,N,M1,IAC,IBC,IND
    DIMENSION A(IAC,IAC),B(IAC,IBC),C(*),IND(*)
    INTEGER :: I,I4,MM,ISTO,K,J1,I2,J2,I1
!
    IF(M1 < 0) GO TO 65
    AMAX=0.0
!
!      FIND THE FIRST PIVOTAL ELEMENT AND STORE THE CORRESPONDING ROW
!      NUMBER IN I4. IND DEFINES THE ORDER OF THE ROWS OF THE ORIGINAL
!      A-MATRIX BEFORE ROW INTERCHANGE
!
    DO I=1,M
        IND(I)=I
        IF(ABS (A(I,1)) <= AMAX) CYCLE
        AMAX=ABS (A(I,1))
        I4=I
    END DO
    MM=M-1
!
!      EACH TIME THROUGH THE FOLLOWING LOOP THE A-MATRIX IS
!      REDUCED BY ONE
!
    DO 111 J=1,MM
    !
    !      INTERCHANGE THE I4TH AND THE JTH ROWS OF THE A-MATRIX AND STORE
    !      ORDER IN IND IF I4 .NE.J
    !
        IF(I4 <= J)GO TO 6
        ISTO=IND(J)
        IND(J)=IND(I4)
        IND(I4)=ISTO
        DO 5 K=1,M
            STO=A(I4,K)
            A(I4,K)=A(J,K)
            A(J,K)=STO
        5 END DO
    !
    !      INTERCHANGE THE I4TH AND THE JTH ROWS OF THE B-MATRIX IF N.GT. 0
    !
        IF(N <= 0) GO TO 6
        DO K=1,N
            STO=B(I4,K)
            B(I4,K)=B(J,K)
            B(J,K)=STO
        END DO
    !
    !      THE JTH ROW NOW CONTAINS THE PIVOTAL ELEMENT IN THE JTH POSITION
    !      ELIMINATE THE JTH ELEMENT FROM EACH OF THE REMAINING ROWS OF THE
    !      A-MATRIX AND THE B-MATRIX AND STORE THE MULTIPLIERS IN THE LOWER
    !      TRIANGLE
    !
        6 AMAX=0.0
        J1=J+1
        DO 11 I=J1,M
            A(I,J)=A(I,J)/A(J,J)
            DO 10 K=J1,M
                A(I,K)=A(I,K)-A(I,J)*A(J,K)
                IF (K > J1) GO TO 10
            !
            !      FIND THE NEXT PIVOTAL ELEMENT AND STORE THE CORRESPONDING ROW
            !      NUMBER IN I4
            !
                IF(ABS (A(I,K)) <= AMAX) CYCLE
                AMAX=ABS (A(I,K))
                I4=I
            10 END DO
            9 IF(N <= 0) GO TO 11
            DO 13 K=1,N
                B(I,K)=B(I,K)-A(I,J)*B(J,K)
            13 END DO
        11 END DO
    111 END DO
!
!      THE ELIMINATION IS NOW COMPLETE AND THE A-MATRIX HAS BEEN
!      REDUCED TO THE PRODUCT OF AN UPPER AND LOWER TRIANGLE MATRIX
!
    IF(N <= 0) GO TO 18
!
!      NOW CARRY OUT THE BACK SUBSTITUTION AND STORE RESULT IN THE
!      B-MATRIX IF THERE IS AT LEAST ONE RIGHT HAND SIDE
!
    DO 127 I1=1,M
        DO 227 J=1,N
            I=M+1-I1
            IF(M <= I) GO TO 327
            I2=I+1
            DO 32 K=I2,M
                B(I,J)=B(I,J)-A(I,K)*B(K,J)
            32 END DO
            327 B(I,J)=B(I,J)/A(I,I)
        227 END DO
    127 END DO
    18 IF(M1 <= 0) GO TO 64
!
!      REPLACE THE A-MATRIX BY ITS INVERSE WHEN M1.NE. ZERO
!
!      FIRST INVERT THE LOWER TRIANGLE MATRIX AND STORE ON ITSELF
!
    65 DO 140 I1=1,MM
        I=M+1-I1
        I2=I-1
        DO 41 J1=1,I2
            J=I2+1-J1
            J2=J+1
            W1=-A(I,J)
            IF(I2 < J2) GO TO 141
            DO 42 K=J2,I2
                W1=W1-A(K,J)*C(K)
            42 END DO
            141 C(J)=W1
        41 END DO
        DO 40 K=1,I2
            A(I,K)=C(K)
        40 END DO
    140 END DO
!
!      NOW INVERT THE UPPER TRIANGLE MATRIX AND FORM THE ORIGINAL
!      A-MATRIX APART 6ROM COLUMN INTERCHANGE. THIS OVERWRITES THE
!      ORIGINAL A-MATRIX
!
    DO 150 I1=1,M
        I=M+1-I1
        I2=I+1
        W=1.0/A(I,I)
        DO 56 J=1,M
            IF (I < J) THEN
              GO TO 52
            ELSE IF (I == J) THEN
              GO TO 53
            ELSE
              GO TO 54
            END IF
            52 W1=0.0
            GO TO 55
            53 W1=1.0
            GO TO 55
            54 W1=A(I,J)
            55 IF(I1 <= 1) GO TO 156
            DO 58 K=I2,M
                W1=W1-A(I,K)*A(K,J)
            58 END DO
            156 C(J)=W1
        56 END DO
        DO 50 J=1,M
            A(I,J)=C(J)*W
        50 END DO
    150 END DO
!
!      RE-ORDER THE COLUMNS OF THE INVERSE A-MATRIX TO COINCIDE WITH
!      THE ORDER OF THE ROWS OF THE A-MATRIX ON INPUT
!
    DO 60 I=1,M
        63 IF(IND(I) == I) GO TO 60
        J=IND(I)
        DO 62 K=1,M
            STO=A(K,I)
            A(K,I)=A(K,J)
            A(K,J)=STO
        62 END DO
        ISTO=IND(J)
        IND(J)=J
        IND(I)=ISTO
        GO TO 63
    60 END DO
    64 RETURN
    END
    SUBROUTINE quad_SQUARM(NDIM,NMAT,TRIANG,SQUARE)
    IMPLICIT DOUBLE PRECISION (A-H,O-Z)
!
!***********************************************************************
!
!     SQUARM PUTS LOWER TRIANGLES BACK INTO SQUARE MATRICES)
!
!***********************************************************************
!
    real(kind=16) :: square
    INTEGER :: NDIM,NMAT,I,J,K,L
    DIMENSION SQUARE(NDIM,NDIM,NMAT),TRIANG(*)
!
    K = 0
    DO 3 L=1,NMAT
        DO 2 I=1,NDIM
            DO 1 J=1,I
                K = K+1
                SQUARE(I,J,L) = TRIANG(K)
                SQUARE(J,I,L) = TRIANG(K)
            1 END DO
        2 END DO
    3 END DO
!
    RETURN
    END
    SUBROUTINE quad_KMAT(NCHAN,BSTO,NOPEN,F,FP,rmat_quad,AKMAT,fkmat)
    use lapack95_compak
    implicit none
!
!***********************************************************************
!
!     K-MATRIX CALCULATION
!
!     NCHAN        NUMBER OF CHANNELS
!     BSTO         LOGARITHMIC DERIVATIVE/MATCHING RADIUS
!     NOPEN        NUMBER OF OPEN CHANNELS
!     F, FP        EXTERNAL REGION SOLUTIONS AND DERIVATIVES
!                  (ASSUMED IN CORRECT LOCATIONS)
!     RMAT         INTERNAL REGION R-MATRIX
!
!     OUTPUT :
!     AKMAT        K-MATRIX
!     fkmat          The full K-matrix including closed channels
!              needed for compak - AlexH 11.11.10
!
!     AA,BB        WORK SPACE, EACH OF LENGTH NCHAN*NCHAN
!     X            WORK SPACE, OF LENGTH 2*NCHAN
!
!***********************************************************************
    
    integer, parameter :: iqp=selected_real_kind(16)
    
!     Arguments
    integer ::       nchan,nopen
    real(kind=8)  :: akmat(nopen,nopen),fkmat(nchan,nchan),bsto(nchan) &
    ,f(nchan,nchan,2),fp(nchan,nchan,2)
    real(kind=16) :: rmat_quad(nchan,nchan)
    
!     Local variables
    integer :: i,j,k
    real(kind=16) :: df
    real(kind=16), allocatable :: quad_akmat(:,:),quad_fkmat(:,:), &
    X(:), AA(:,:),BB(:,:), &
    quad_f(:,:,:), quad_fp(:,:,:),    &
    quad_bsto(:)
    real(kind=8), allocatable :: rhs(:,:), lhs_a(:,:),lhs_af(:,:), &
    sol_mat(:,:)
    integer :: ninfo
    integer, allocatable :: ipiv(:), ind(:)
!
    allocate(quad_akmat(NOPEN,NOPEN),quad_fkmat(nchan,nchan), &
    X(2*nchan),AA(nchan,nchan),BB(nchan,nchan), &
    quad_f(nchan,nchan,2), quad_fp(nchan,nchan,2), &
    quad_bsto(nchan),ind(2*nchan))
    
    AKMAT=0.d0;fkmat=0.d0;AA=0._iqp;BB=0._iqp;X=0._iqp
    quad_f=real(f,kind=16)
    quad_fp=real(fp,kind=16)
    quad_bsto=real(bsto,kind=16)
    df=0._iqp
    
    do j=1,nchan
        do i=1,nchan
            AA(i,j) = quad_f(i,j,2)
        end do
    end do
!
    do j=1,nchan
        do k=1,nchan
            df =  quad_fp(k,j,2)- quad_bsto(k)* quad_f(k,j,2)
            do i=1,nchan
                AA(i,j) = AA(i,j)-rmat_quad(i,k)*df
            end do
        end do
    end do
!
    do j=1,nopen
        do i=1,nchan
            BB(i,j)=- quad_f(i,j,1)
        end do
    end do
!
    do j=1,nopen
        do k=1,nchan
            df =  quad_fp(k,j,1)- quad_bsto(k)* quad_f(k,j,1)
            do i=1,nchan
                BB(i,j)=BB(i,j)+rmat_quad(i,k)*df
            end do
        end do
    end do
!
    
    931 format(i5,3x,i5,3x,2(d15.5,3x))
    if(nchan == 1) then
        BB(1,1)=BB(1,1)/AA(1,1)
    else
        call quad_ma01a(aa,bb,nchan,nopen,0,nchan,nchan,x,ind(nchan+1)) !ZM changed x(nchan+1) to ind(nchan+1)
        
    ! !       Try with iterative refinement (lapack)
    ! !       ------------------------------------
    !         allocate(rhs(nchan,nchan), lhs_a(nchan,nchan), 
    !      *           lhs_af(nchan,nchan))
    !         allocate(sol_mat(nchan,nchan), ipiv(nchan))
    !         ipiv=0;lhs_af=0;sol_mat=0;rhs=0;lhs_a=0;lhs_af=0
    !         lhs_a=AA
    !         lhs_af=AA
    !         call getrf(lhs_af,ipiv)
    ! 
    ! ! !       compute the 1-norm for gettinf the condition
    ! !         anorm_temp=0
    ! !         do i=1,nchan
    ! !           anorm_temp=anorm_temp+abs(r21(i,:))
    ! !         end do
    ! !         anorm=maxval(anorm_temp) 
    ! 
    ! !         call gecon( lhs_af, anorm, rcond)
    ! !         write(1003,*) 'anorm,rcond', anorm,rcond
    ! 
    !         sol_mat=BB
    !         call getrs(lhs_af,ipiv,sol_mat)
    !         rhs=BB
    !         call gerfs( lhs_a, lhs_af, ipiv, rhs, sol_mat)
    ! !       ---------------------------------------
    ! 
    endif
! c
!       fkmat=sol_mat
!       do j=1,nopen
!          do i=1,nopen
!             akmat(i,j) = sol_mat(i,j)
!          end do
!       end do
    
    fkmat=bb
    do j=1,nopen
        do i=1,nopen
            akmat(i,j) = bb(i,j)
        end do
    end do
    
    
    
!
    
    end subroutine quad_KMAT
    
!     Does svd decomposition of A
!     returns singular values in B
    
    subroutine rsvd_decomposition(m,n,A)
    use lapack95_compak
    implicit none
    
!     Arguments
    integer :: m,n
    real(kind=8)  :: A(m,n)
    
!     Local
    integer :: min_dim,i,j,k
    real(kind=8), allocatable  ::AA(:,:), d(:), e(:)
    
    min_dim=min(m,n)
    allocate(AA(m,n))
    allocate(d(min_dim), e(min_dim-1))
    d=0;e=0
    
    
    AA=A
     

    call gebrd(AA,d,e)
    call bdsqr(d, e) 
    
    A=0
    do i=1,min_dim
        A(i,i)=d(i)
        print *, i, d(i)
    end do
    end subroutine rsvd_decomposition
    
    subroutine svd_truncate(m,n,A, svd_thresh)
    use lapack95_compak
    implicit none
    
!     Arguments
    integer :: m,n
    real(kind=8)  :: A(m,n),svd_thresh
    
!     Local
    integer :: min_dim,i,j,k
    real(kind=8)  :: u(m,m), vt(n,n)
    real(kind=8), allocatable  ::AA(:,:), s(:), sigma(:,:)
    
    min_dim=min(m,n)
    allocate(AA(m,n))
    allocate(s(min_dim))
    s=0
    AA=A
    
    call gesvd(AA,s, u, vt)
    
    allocate(sigma(m,n))
    sigma=0
    
    do i=1,min_dim
        if (s(i) > svd_thresh) then
            sigma(i,i)=s(i)
        end if
    end do
    
    A=matmul(u,matmul(sigma,vt))
    
    
    end subroutine svd_truncate
    
    subroutine tri_rsvd_decomposition(m,A,n,AAA)
    use lapack95_compak
    implicit none
    
!     Arguments
    integer :: m,n
    real(kind=8)  :: A(m,n)
    
    real(kind=8) ::AAA((m**2+m)/2)
!     Local
    integer :: min_dim,i,j,k
    real(kind=8), allocatable  ::AA(:,:), d(:), e(:)
    
    min_dim=min(m,n)
    allocate(AA(m,n))
    allocate(d(min_dim), e(min_dim-1))
    d=0;e=0
    
    
    k=1
    do i=1, m
        do j=1,i
            AA(i,j)=AAA(k)
            AA(j,i)=AAA(k)
            k=k+1
        end do
    end do
     

    call gebrd(AA,d,e)
    call bdsqr(d, e) 
    
    A=0
    do i=1,min_dim
        A(i,i)=d(i)
    end do     
    
    end subroutine tri_rsvd_decomposition
    
    subroutine transform_f(nchan,nopen,fx, fxp)
    use lapack95_compak
    implicit none
    
!     Arguments
    integer :: nchan,nopen
    real(kind=8)  :: fx(nchan,nchan,2), fxp(nchan,nchan,2)
    
!     Local
    integer :: i,j,k
    
    do i=nopen+1,nchan
        do j=1,nopen
            fx(i,j,1)=0
            fx(i,j,2)=0
            fxp(i,j,1)=0
            fxp(i,j,2)=0
        end do
    end do
    do i=nopen+1,nchan
        do j=1,nopen+1,nchan
            fx(i,j,2)=0
            fxp(i,j,2)=0
        end do
    end do     
    do i=nopen+1,nchan  
        fx(i,i,2)=1
        fxp(i,i,2)=1
    end do     
    
    end subroutine transform_f


    !> \brief  Collect partial wave dipoles from an energy iteration
    !> \author J Benda
    !> \date   2021
    !>
    !> This subroutine is called by all ranks every time they finish (successfully of unsuccessfully) a single iteration
    !> of the energy loop, if the calculation of photodipoles is requested. The rank-zero process will collect
    !> the photodipoles from all other ranks and write them to disk. Energies, for which the calculation failed, will
    !> be omitted in the file.
    !>
    subroutine gather_pw_dipoles (ie, nworkers, escat, re_pw_dipoles, re_pw_dipoles1, im_pw_dipoles, im_pw_dipoles1, &
                                  nneut, nchan, nchan_dip, maxprop)

        integer,               intent(in)    :: ie, nworkers, nneut, nchan, nchan_dip, maxprop
        real(wp), allocatable, intent(inout) :: escat(:), re_pw_dipoles1(:, :, :), im_pw_dipoles1(:, :, :)
        real(wp), allocatable, intent(inout) :: re_pw_dipoles(:, :, :, :), im_pw_dipoles(:, :, :, :)

        real(wp), allocatable :: buffer(:)
        integer(mpiint)       :: owner
        integer               :: tag, n

        tag = 0
        n = nneut * nchan * maxprop
        owner = mod(ie - 1, nworkers) + nprocs - nworkers

        ! save master's own data (if valid)
        if (owner == myrank .and. myrank == master .and. escat(ie) > 0) then
            re_pw_dipoles(:, 1:nchan_dip, :, ie) = re_pw_dipoles1(:, 1:nchan_dip, :)
            im_pw_dipoles(:, 1:nchan_dip, :, ie) = im_pw_dipoles1(:, 1:nchan_dip, :)
        end if

        ! each non-master process sends their data to master (if valid)
        if (owner == myrank .and. myrank /= master) then
            call mpi_mod_send(master, escat(ie:ie), tag, 1)
            if (escat(ie) > 0) then
                call mpi_mod_send(master, reshape(re_pw_dipoles1, [n]), tag, n)
                call mpi_mod_send(master, reshape(im_pw_dipoles1, [n]), tag, n)
                call mpi_mod_recv(master, tag, escat(ie:ie), 1)  ! wait for receipt (block when buffered)
            end if
        end if

        ! receive data from non-master processes (if valid)
        if (owner /= myrank .and. myrank == master) then
            allocate (buffer(n))
            call mpi_mod_recv(owner, tag, escat(ie:ie), 1)
            if (escat(ie) > 0) then
                call mpi_mod_recv(owner, tag, buffer, n);  re_pw_dipoles1(:,:,:) = reshape(buffer, [nneut, nchan, maxprop])
                call mpi_mod_recv(owner, tag, buffer, n);  im_pw_dipoles1(:,:,:) = reshape(buffer, [nneut, nchan, maxprop])
                call mpi_mod_send(owner, escat(ie:ie), tag, 1)  ! send back receipt
                re_pw_dipoles(:, 1:nchan_dip, :, ie) = re_pw_dipoles1(:, 1:nchan_dip, :)
                im_pw_dipoles(:, 1:nchan_dip, :, ie) = im_pw_dipoles1(:, 1:nchan_dip, :)
            end if
        end if

    end subroutine gather_pw_dipoles


    !> \brief  Collect Ak coefficients from an energy iteration
    !> \author J Benda
    !> \date   2021
    !>
    !> This subroutine is called by all ranks every time they finish (successfully of unsuccessfully) a single iteration
    !> of the energy loop, if the calculation of Ak-coefficients is requested. The rank-zero process will collect
    !> the Ak-coefficients from all other ranks and write them to disk. Energies, for which the calculation failed, will
    !> be omitted in the file.
    !>
    subroutine gather_and_write_akcoeffs (ie, nworkers, escat, lusct, sform, nocsf, nchan, ar, ai)

        real(wp), allocatable, intent(inout) :: escat(:), ar(:,:), ai(:,:)
        integer,               intent(in)    :: ie, nworkers, nchan, nocsf, lusct
        character(len=*),      intent(in)    :: sform

        real(wp), allocatable :: buffer(:)
        integer(mpiint)       :: owner
        integer               :: tag, n

        tag = 0
        n = nocsf * nchan
        owner = mod(ie - 1, nworkers) + nprocs - nworkers

        ! master writes its own data to file (if valid)
        if (owner == myrank .and. myrank == master .and. escat(ie) > 0) then
            call writsc(lusct, sform, escat(ie:ie), nchan, nocsf, 1, ar, ai)
        end if

        ! each non-master process sends their data to master synchronously (if valid)
        if (owner == myrank .and. myrank /= master) then
            call mpi_mod_send(master, escat(ie:ie), tag, 1)
            if (escat(ie) > 0) then
                call mpi_mod_send(master, reshape(ar(:, 1:nchan), [n]), tag, n)
                call mpi_mod_send(master, reshape(ai(:, 1:nchan), [n]), tag, n)
                call mpi_mod_recv(master, tag, escat(ie:ie), 1)  ! wait for receipt (block when buffered)
            end if
        end if

        ! receive data from non-master processes and write to disk (if valid)
        if (owner /= myrank .and. myrank == master) then
            allocate (buffer(n))
            call mpi_mod_recv(owner, tag, escat(ie:ie), 1)
            if (escat(ie) > 0) then
                call mpi_mod_recv(owner, tag, buffer, n);  ar(:, 1:nchan) = reshape(buffer, [nocsf, nchan])
                call mpi_mod_recv(owner, tag, buffer, n);  ai(:, 1:nchan) = reshape(buffer, [nocsf, nchan])
                call mpi_mod_send(owner, escat(ie:ie), tag, 1)  ! send back receipt
                call writsc(lusct, sform, escat(ie:ie), nchan, nocsf, 1, ar, ai)
            end if
        end if

    end subroutine gather_and_write_akcoeffs


    !> \brief  Collect K-matrices from an energy iteration
    !> \author J Benda
    !> \date   2021
    !>
    !> This subroutine is called by all ranks every time they finish (successfully of unsuccessfully) a single iteration
    !> of the energy loop, if the calculation of K-matrices is requested. The rank-zero process will collect
    !> the K-matrices from all other ranks and write them to disk. Energies, for which the calculation failed, will
    !> be omitted in the file.
    !>
    subroutine gather_and_write_kmatrices (ie, nworkers, escat, iktype, akmat, fkmat, nchan, ndis, nopen, ndopen, enryd)

        integer,               intent(in)    :: ie, nworkers, iktype, nchan, nopen, ndis, ndopen
        real(wp),              intent(in)    :: enryd
        real(wp), allocatable, intent(inout) :: escat(:), akmat(:), fkmat(:)

        integer(mpiint) :: owner

        real(wp) :: ery(1)
        integer  :: nch(2), tag
        logical  :: full

        tag = 0
        full = (iktype == 1)                ! determine whether we are working with full or open-open K-matrix
        owner = mod(ie - 1, nworkers) + nprocs - nworkers

        ery(1) = enryd                      ! scattering energy in Rydbergs needed by WRITKM
        nch(1) = merge(nchan, nopen, full)  ! number of vibronic channels
        nch(2) = merge(ndis, ndopen, full)  ! number of dissociation channels

        ! symmetrize the full K-matrix first: it contains lower triangle, but WRITKM uses the upper one
        if (full) then
            do i = 1, nchan
                do j = 1, i
                    fkmat(j+(i-1)*nchan) = fkmat(i+(j-1)*nchan)
                end do
            end do
        end if

        ! master writes its own data to file (if valid)
        if (owner == myrank .and. myrank == master .and. escat(ie) > 0) then
            select case (iktype)
                case (0); call writkm(nopen, ndopen, enryd, akmat)
                case (1); call writkm(nchan, ndis, enryd, fkmat)
            end select
        end if

        ! each non-master process sends their data to master (if valid)
        if (owner == myrank .and. myrank /= master) then
            call mpi_mod_send(master, escat(ie:ie), tag, 1)
            call mpi_mod_send(master, ery, tag, 1)
            if (escat(ie) > 0) then
                select case (iktype)
                    case (0)
                        call mpi_mod_send(master, nch, tag, 2)
                        call mpi_mod_send(master, akmat, tag, nopen*nopen)
                    case (1)
                        call mpi_mod_send(master, nch, tag, 2)
                        call mpi_mod_send(master, fkmat, tag, nchan*nchan)
                end select
                call mpi_mod_recv(master, tag, escat(ie:ie), 1)  ! wait for receipt (block when buffered)
            end if
        end if

        ! receive data from non-master processes and write to disk (if valid)
        if (owner /= myrank .and. myrank == master) then
            call mpi_mod_recv(owner, tag, escat(ie:ie), 1)
            call mpi_mod_recv(owner, tag, ery, 1)
            if (escat(ie) > 0) then
                select case (iktype)
                    case (0)
                        akmat = -1
                        call mpi_mod_recv(owner, tag, nch, 2)
                        call mpi_mod_recv(owner, tag, akmat, nch(1)*nch(1))
                        call writkm(nch(1), nch(2), ery(1), akmat)
                    case (1)
                        fkmat = -1
                        call mpi_mod_recv(owner, tag, nch, 2)
                        call mpi_mod_recv(owner, tag, fkmat, nch(1)*nch(1))
                        call writkm(nch(1), nch(2), ery(1), fkmat)
                end select
                call mpi_mod_send(owner, escat(ie:ie), tag, 1)  ! send back receipt
            end if
        end if

    end subroutine gather_and_write_kmatrices


    !> \brief  Collect T-matrices from an energy iteration
    !> \author J Benda
    !> \date   2021
    !>
    !> This subroutine is called by all ranks every time they finish (successfully of unsuccessfully) a single iteration
    !> of the energy loop, if the calculation of T-matrices is requested. The rank-zero process will collect
    !> the T-matrices from all other ranks and write them to disk. Energies, for which the calculation failed, will
    !> be omitted in the file.
    !>
    subroutine gather_and_write_tmatrices (ie, nworkers, escat, tr, ti, maxchi, maxchf, nopen, ndis, enryd)

        real(wp), allocatable, intent(inout) :: escat(:), tr(:), ti(:)
        real(wp),              intent(in)    :: enryd
        integer,               intent(in)    :: ie, nworkers, maxchi, maxchf, nopen, ndis

        real(wp), allocatable :: rtr(:,:), rti(:,:)  ! for reshaping of tr, ti to square matrices
        real(wp), allocatable :: wtr(:,:), wti(:,:)  ! for writing tr, ti to file

        real(wp)        :: ery(1)
        integer(mpiint) :: owner
        integer         :: tag, nelem, mvi(1), mvj(1), mvd(1), nop(1)

        tag = 0
        ery = enryd
        owner = mod(ie - 1, nworkers) + nprocs - nworkers
        nop = nopen
        mvi = min(nopen, maxchi)
        mvj = min(nopen, maxchf)
        mvd = 0

        allocate (wtr(maxchi, maxchf), wti(maxchi, maxchf))

        ! master writes its own data to file (if valid)
        if (owner == myrank .and. myrank == master .and. escat(ie) > 0) then
            ! reshape T-matrix to rectangle and cut out the requested subset
            allocate (rtr(nopen, nopen), rti(nopen, nopen))
            rtr = reshape(tr, [nopen, nopen])
            rti = reshape(ti, [nopen, nopen])
            wtr(1:mvi(1), 1:mvj(1)) = rtr(1:mvi(1), 1:mvj(1))
            wti(1:mvi(1), 1:mvj(1)) = rti(1:mvi(1), 1:mvj(1))
            call writet(lutmt, 1, maxchi, maxchf, mvi, mvj, mvd, wtr, wti, ery)
            deallocate (rtr, rti)
        end if

        ! each non-master process sends their data to master (if valid)
        if (owner == myrank .and. myrank /= master) then
            call mpi_mod_send(master, escat(ie:ie), tag, 1)
            if (escat(ie) > 0) then
                nelem = nopen**2
                call mpi_mod_send(master, ery, tag, 1)
                call mpi_mod_send(master, nop, tag, 1)
                call mpi_mod_send(master, tr(1:nelem), tag, nelem)
                call mpi_mod_send(master, ti(1:nelem), tag, nelem)
                call mpi_mod_recv(master, tag, escat(ie:ie), 1)  ! wait for receipt (block when buffered)
            end if
        end if

        ! receive data from non-master processes and write to disk (if valid)
        if (owner /= myrank .and. myrank == master) then
            call mpi_mod_recv(owner, tag, escat(ie:ie), 1)
            if (escat(ie) > 0) then
                call mpi_mod_recv(owner, tag, ery, 1)
                call mpi_mod_recv(owner, tag, nop, 1)
                nelem = nop(1)**2
                call mpi_mod_recv(owner, tag, tr, nelem)
                call mpi_mod_recv(owner, tag, ti, nelem)
                call mpi_mod_send(owner, escat(ie:ie), tag, 1)  ! send back receipt
                mvi = min(nop(1), maxchi)
                mvj = min(nop(1), maxchf)
                ! reshape T-matrix to rectangle and cut out the requested subset
                allocate (rtr(nop(1), nop(1)), rti(nop(1), nop(1)))
                rtr = reshape(tr, [nop(1), nop(1)])
                rti = reshape(ti, [nop(1), nop(1)])
                wtr(1:mvi(1), 1:mvj(1)) = rtr(1:mvi(1), 1:mvj(1))
                wti(1:mvi(1), 1:mvj(1)) = rti(1:mvi(1), 1:mvj(1))
                call writet(lutmt, 1, maxchi, maxchf, mvi, mvj, mvd, wtr, wti, ery)
                deallocate (rtr, rti)
            end if
        end if

    end subroutine gather_and_write_tmatrices


    !> \brief  Count valid energies and pack storage
    !> \author J Benda
    !> \date   2021
    !>
    !> The provided array of energies contains negative values indicated failed calculations. This subroutine calculates
    !> all the successfull calculations and then squeezes the energy storage so that the failed entries are removed.
    !>
    !> The same packing operation is performed also on the partial wave dipole storage.
    !>
    subroutine compress_and_count_energies (escat, re_pw_dipoles, im_pw_dipoles, nesc)

        real(wp), allocatable, intent(inout) :: escat(:), re_pw_dipoles(:,:,:,:), im_pw_dipoles(:,:,:,:)
        integer,               intent(out)   :: nesc

        integer :: ie

        nesc = 0
        do ie = 1, size(escat)
            if (escat(ie) > 0) then
                nesc = nesc + 1
                if (nesc /= ie) then
                    escat(nesc) = escat(ie)
                    re_pw_dipoles(:,:,:,nesc) = re_pw_dipoles(:,:,:,ie)
                    im_pw_dipoles(:,:,:,nesc) = im_pw_dipoles(:,:,:,ie)
                end if
            end if
        end do

    end subroutine compress_and_count_energies

END SUBROUTINE MPI_R_SOLVE
