!dirac_copyright_start
!      Copyright (c) by the authors of DIRAC.
!
!      This program is free software; you can redistribute it and/or
!      modify it under the terms of the GNU Lesser General Public
!      License version 2.1 as published by the Free Software Foundation.
!
!      This program is distributed in the hope that it will be useful,
!      but WITHOUT ANY WARRANTY; without even the implied warranty of
!      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
!      Lesser General Public License for more details.
!
!      If a copy of the GNU LGPL v2.1 was not distributed with this
!      code, you can obtain one at https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html.
!dirac_copyright_end

! define task symbols for CALL DIRAC_PARCTL( task )
#include "dirac_partask.h"
C&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
C/* Deck TRDR4T */
      SUBROUTINE TRDR4T(WORK,KFREE,LFREE,IPRINT,INTFLG,KGAB,KDRIJ,
     &                  NSTR,ANTIS,LMP2,KINDX,KQ,KE,KIBE,DINTSKP)
C***********************************************************************
C
C     Driver for scheme 4: (pq|rs) --> (ij|kl)
C     Loop over distributions (pq|**), transform 2 indices 
C     (pq|**) --> (ij|pq) and write half-transformed integrals 
C     (ij|pq) to disk in (pq)-batches.
C
C     Parallel scheme.
C
C     * VECTORS:  
C       Qi             - coefficients for index i
C       NSTR(ifrp,0,i) - total number of orbitals for index i
C       NSTR(ifrp,1,i) - number of electronic orbitals for index i
C       NSTR(ifrp,2,i) - number of positronic orbitals for index i
C     * AO-INTEGRALS:
C     Distributions (pq|**) are fetched by CALDIS and are packed on
C     boson irreps. The information necessary for symmetry packing
C     is provided by the integer array INDX (generated by NINSH):
C       INDX(1,INDA)   - Position of function in block
C       INDX(2,INDA)   - Irreducible representation of function
C       INDX(3,INDA)   - Position of function within this particular irrep
C                        and block
C                        (INDA refers to index of SO-orbital)
C     * CONTROL INFORMATION:
C       LMP2  = .TRUE. - evaluate MP2 energy
C       EIGn           - eigenvalues (for  MP2 calculation)
C       ANTIS = .TRUE. - anti-symmetrize integrals
C       INTFLG - flag of what integral types to transform
C
C     
C     Written by Luuk Visscher, Februari 1997
C     Screening added August 20 by T.Saue
C
C***********************************************************************
      use interface_to_mpi
#include "implicit.h"
#include "priunit.h"
      PARAMETER(D0=0.0D0)
#include "maxorb.h"
#include "aovec.h"
#if defined (VAR_MPI)
      INTEGER   ISTAT(df_MPI_STATUS_SIZE), IBUFMPI(5)
#endif
#include "infpar.h"
#include "dcbgen.h"
#include "dcborb.h"
#include "dcbtra.h"
#include "dcbtr3.h"
#include "dcbbas.h"
#include "dgroup.h"

#include "blocks.h"
      LOGICAL ANTIS,LMP2,TRIAN(2),LBIT
      DIMENSION NSTR(2,0:2,4),
     &          KQ(2,4),KE(2,4),KIBE(2,4),WORK(*),DINTSKP(*)
      CHARACTER*7 CIC(3)
      DATA CIC/'(LL|??)', '(SS|??)', '(LS|??)'/
C
      CALL QENTER('TRDR4T')
      KFRSAV = KFREE
C     Initialize the timing
      CALL XTIME(0,-1,'                             ')
C
C     Define the packing of the 4-index transformed integrals
C
      TRIAN(1) = ISAME(1).EQ.ISAME(2)
      TRIAN(2) = .FALSE.
      CALL PCK2IN(NSTR,TRIAN,IPRINT)
C
C     *****************************************
C     ****** P A R A L L E L    C O D E  ******
C     *****************************************
C
#if defined (VAR_MPI)
      IF (PARCAL) THEN
C
C       Get hold of the slaves
C       ( ITASK = 2 for parallel integral transformation )
C
        CALL DIRAC_PARCTL( MOLTRA_PAR )
C       
C       Initialize
C
        CALL TRAPARI(WORK,KFREE,LFREE,KQ,KE,KIBE,
     &       KINDX,KGAB,KDRIJ,INTFLG,NSTR,ANTIS,LMP2)
C
C       Initialize scratch file as permanent file for Master if NOMDCINT.
C
        IF (NOMDCINT) THEN
           CALL XTIME(1,1,' Initializing MS4IND file     ')
           CALL MDSCRI
           CALL XTIME(1,2,' Initializing MS4IND file     ')
        END IF
C
C     We send out all the tasks (shell pairs in this case) to the slaves
C     - reverse order of IC and ISHLA to get better load balancing
C       - S blocks are generally bigger than L blocks
C       - if IBSTEP is big the load is generally larger for bigger ISHLA
C         because ISHLB loop in TRDRV4 must end at ISHLA
C         
C       FIXME: calculate IBSTEP dynamically based on number of nodes
C       and size of shells.
C       (currently, it is just an input parameter for the wizards)
        MXIBSTEP = IPAR4BS
        IF (MXIBSTEP .LE. 0) THEN
C           ... determine MXIBSTEP based on number of nodes and
C               number of shell pairs to give optimal load balancing
C               (minimize N**5*nbstep in second half transformation
C                while having work for all nodes) /LV+HJAaJ Aug 2001
C
           MXIBSTEP = NTR4BSTEP(INTFLG,I2TYP,NODES)
           WRITE(LUPRI,'(/A,I4)')
     &     ' * Max no. of B shells in a task determined to be',
     &     MXIBSTEP
        END IF
C
        CALL GETTIM(CPUBEF,WALLBEF)
        DO IC = 2, 1, -1
CLV:Gaunt DO IC = 3, 1, -1
        IF(LBIT(INTFLG,IC)) THEN
           WRITE(LUPRI,'(A,I2,2A)') ' - Integral class',IC,' : ',CIC(IC)
           CALL SHRNGE (IC,INTFLG,I2TYP,IASTRT,IBSTRT,IASMAX,IBSMAX)
           IBUFMPI(1) = IC
           IBUFMPI(2) = I2TYP
           DO ISHLA = IASMAX, IASTRT, -1
              IBUFMPI(3) = ISHLA
              CALL GETTIM(CPUST,WALLST)
              WRITE(LUPRI,'(3X,A,I4,A,F14.2,A)')
     &           '- Starting shell A no. ',ISHLA,' after ',
     &           WALLST - WALLBEF,' seconds.'
              CALL FLSHFO(LUPRI)
              IF (IC.LE.2) THEN
                 IBLAST = ISHLA
              ELSE
                 IBLAST = IBSMAX
              ENDIF
C             Distribute number of B's evenly
              NBSTEP = (IBLAST-IBSTRT)/MXIBSTEP + 1
              IBSTEP = -(IBLAST-IBSTRT)/NBSTEP - 1
           DO IBEND = IBLAST, IBSTRT, IBSTEP
C
              ISHLB = MAX(IBEND+IBSTEP+1,IBSTRT)
              IBUFMPI(4) = ISHLB
              IBUFMPI(5) = IBEND
C
              CALL XTIME(2,1,' Slave requesting task from master')
              call interface_mpi_RECV(ITEST,1,df_MPI_ANY_SOURCE,20,
     &                      global_communicator,ISTAT)
              CALL XTIME(2,2,' Slave requesting task from master')
              NWHO = ISTAT(df_MPI_SOURCE)
              IF (ITEST .EQ. 0) THEN
C             ... slave wants new set of A,B shells
              CALL GETTIM(CPUST,WALLST)
              IF (IPRINT .GE. 1) THEN
                 WRITE(LUPRI,'(3X,4(A,I4),A,F10.2,A)') 
     &              '- Sending shells (',ISHLA,',',ISHLB,
     &              '-',IBEND,') to node',NWHO,
     &              ' after',WALLST-WALLBEF,' seconds'
                 CALL FLSHFO(LUPRI)
              END IF
              CALL XTIME(3,1,' Master and slave negotiating task')
              call interface_mpi_SEND(IBUFMPI,5,NWHO,30,
     &                      global_communicator)
              CALL XTIME(3,2,' Master and slave negotiating task')
              ELSE IF (ABS(ITEST) .LE. 2) THEN
C             ... slave wants to lock or free file
                 CALL QUIT('lock/free file not implemented yet')
              ELSE
                 CALL QUIT('Illegal ITEST')
              END IF
           ENDDO
           ENDDO
        ENDIF
        ENDDO
C
C       Get the slaves out of the transformation loop
C       and receive final results
C
        CALL TRARES(WORK,KFREE,LFREE,DINTSKP,LMP2,DUM,DUM,WALLBEF)
      ELSE
#endif
C
C     *************************************
C     ****** S E R I A L    C O D E  ******
C     *************************************
C
C
C     Initialize scratch output file for final integrals
C
        CALL XTIME(1,1,' Initializing MS4IND file     ')
        CALL MDSCRI
        CALL XTIME(1,2,' Initializing MS4IND file     ')
C
        CALL XTIME(5,1,' Computing+transform. integrals')
        CALL GETTIM(CPUBEF,WALLBEF)
        DO IC = 1, 2
CLV:Gaunt  DO IC = 1, 3
        IF(LBIT(INTFLG,IC)) THEN
           WRITE(LUPRI,'(A,I2,2A)') ' - Integral class',IC,' : ',CIC(IC)
           CALL SHRNGE (IC,INTFLG,I2TYP,IASTRT,IBSTRT,IASMAX,IBSMAX)
           DO ISHLA = IASTRT, IASMAX
              CALL GETTIM(CPUST,WALLST)
              IF (IC.LE.2) THEN
                 IBEND = ISHLA
                 WRITE(LUPRI,'(3X,2(A,I4),A,F8.0,A,F8.0,A)') 
     &              '- Beginning task',ISHLA,' of',IASMAX,
     &              ' after',WALLST-WALLBEF,' seconds and',
     &              CPUST-CPUBEF,' CPU-seconds'
              ELSE
                 IBEND = IBSMAX
                 WRITE(LUPRI,'(3X,2(A,I4),A,F8.0,A,F8.0,A)') 
     &              '- Beginning Gaunt task',ISHLA-IASTRT+1,
     &              ' of',IASMAX-IASTRT+1,
     &              ' after',WALLST-WALLBEF,' seconds and',
     &              CPUST-CPUBEF,' CPU-seconds'
              ENDIF
              CALL FLSHFO(LUPRI)
              CALL TRDRV4(WORK,KFREE,LFREE,IPRINT,INTFLG,
     &                   NSTR,ANTIS,LMP2,WORK(KINDX),TRIAN,
     &                   KQ,KE,KIBE,IC,I2TYP,IBSTRT,IBEND,
     &                   WORK(KGAB),WORK(KDRIJ),DINTSKP)
           ENDDO
        ENDIF
        ENDDO
        CALL XTIME(5,2,' Computing+transform. integrals')
#if defined (VAR_MPI)
      ENDIF
#endif
C
C     Screening statistics
C
      IF(SCRTRA.GT.D0) THEN
        CALL ST4SCR(DINTSKP)
      ENDIF
C
C     Initialize MOLFDIR integral file
C
      CALL FLSHFO(LUPRI)
      IF (.NOT. NOMDCINT) THEN
         CALL MDINTI(IPRINT)
      ELSE
         CALL INI4INDINFO
      END IF
C
C     Complete the integrals to the desired classes
C
      CALL MEMGET('INTE',KDXKR12,NFPCK12T*2,WORK,KFREE,LFREE)
      CALL MKINDXKR (NSTR,TRIAN(1),WORK(KDXKR12))
      CALL MEMGET('INTE',KDXB12,2*NFPCK12T,WORK,KFREE,LFREE)

!MI: fixes dues to out-of-bound checks
      IKIBE21=KIBE(2,1)
      IKIBE22=KIBE(2,2)
      IKIBE23=KIBE(2,3)
      IKIBE24=KIBE(2,4)
      IF (KIBE(2,1).LE.0) IKIBE21=1
      IF (KIBE(2,2).LE.0) IKIBE22=1
      IF (KIBE(2,3).LE.0) IKIBE23=1
      IF (KIBE(2,4).LE.0) IKIBE24=1

      CALL MKINDXB (NSTR,WORK(KIBE(1,1)),WORK(IKIBE21),
     &              WORK(KIBE(1,2)),WORK(IKIBE22),
     &              TRIAN(1),WORK(KDXB12))
      CALL MEMGET('INTE',KDXB34,2*NFPCK34T,WORK,KFREE,LFREE)
      CALL MKINDXB (NSTR,WORK(KIBE(1,3)),WORK(IKIBE23),
     &              WORK(KIBE(1,4)),WORK(IKIBE24),
     &              TRIAN(2),WORK(KDXB34))
C
      CALL GETTIM(CPUST,WALLST)
      CALL XTIME(4,1,' Symmetrizing MO integrals    ')
      WRITE(LUPRI,'(/A,F14.2,A)') 
     & ' - Starting symmetrization after ',
     &  WALLST-WALLBEF,' seconds'
      CALL FLSHFO(LUPRI)
C
C     Collect integrals from slaves
C     and write to MDCINT file if requested.
C
      NIJOFF = 0
      NKLOFF = 0
      DO IREPIJ = 1, NFSYM
         NIJ = NFPCK12(IREPIJ)
         NKL = NFPCK34(IREPIJ)
         CALL DRV4SYM(WORK,KFREE,LFREE,IPRINT,
     &        NSTR(1,0,3),NSTR(1,0,4),
     &        IREPIJ,NIJ,WORK(KDXKR12),NIJOFF,
     &        WORK(KDXB12),WORK(KDXB34),NKLOFF)
         NIJOFF = NIJOFF + NIJ
         NKLOFF = NKLOFF + NKL
      ENDDO
C        
      CALL XTIME(4,2,' Symmetrizing MO integrals    ')
      CALL GETTIM(CPUST,WALLST)
      WRITE(LUPRI,'(A,F14.2,A/)') 
     &     ' - Finished symmetrization after ',
     &     WALLST-WALLBEF,' seconds'
      CALL FLSHFO(LUPRI)
C
      IF (.NOT. NOMDCINT) THEN
C        
C        Close MOLFDIR integral file and
C        delete scratch file(s) (only opened on slaves, if parcal)
C        
         CALL MDINTF
         IF (.NOT.PARCAL) CALL MDSCRF
C
      ELSE
C
C        Write pointers etc. for 4IND*XXXX0 files to 4INDINFO file.
C
C        Number of IJ indices for each fermion symmetry
C        Number of KL indices for each fermion symmetry
C        Offsets
         WRITE(LUMLF2) NFPCK12,NFPCK12T,NFPCK34,NFPCK34T,NSTR
C        I and J indices
         CALL WRITI(LUMLF2,NFPCK12T*2,WORK(KDXKR12))
C        Boson irrep for IJ indices
         CALL WRITI(LUMLF2,NFPCK12T*2,WORK(KDXB12))
C        Boson irrep for KL indices
         CALL WRITI(LUMLF2,NFPCK34T*2,WORK(KDXB34))
C        Boson irreps for each index
         DO I = 1, 4
            DO J = 1, NFSYM
               CALL WRITI(LUMLF2,NSTR(J,0,I),WORK(KIBE(J,I)))
            END DO
         END DO
C
C        Close 4INDINFO and the direct access 4IND*XXXX0 files.
C
         CLOSE (LUMLF2, STATUS='KEEP')
         CALL MDSCRF
C
      END IF
C
C     Print timing report
C
      CALL XTIME(0,4,' integral transformation      ')
C
C     Release the slaves if we run in parallel.
C        ( ITASK = -1 )
C
      IF (PARCAL) CALL DIRAC_PARCTL( EXIT_NODEMENU )
C
#if defined (VAR_PFS)
C
C       close global file (opened in TRAPARI)
C
        CALL GLOBAL_FILE_CLOSE_READ
#endif
C
      CALL MEMREL('TRDR4T',WORK,KFRSAV,KFRSAV,KFREE,LFREE)
      CALL QEXIT('TRDR4T')
C
      RETURN
C
      END
C&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
C/* Deck TRDRV4 */
      SUBROUTINE TRDRV4(WORK,KFREE,LFREE,IPRINT,INTFLG,
     &                  NSTR,ANTIS,LMP2,INDX,TRIAN,KQ,KE,KIBE,
     &                  IC,I2TYP,IBSTRT,IBEND,GABRAO,DRIJ,DINTSKP)
C
C     Written by Luuk Visscher, december 1996
C     Screening by T.Saue Sep 8 1998
C
C&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
#include "implicit.h"
#include "maxaqn.h"
#include "priunit.h"
#include "iratdef.h"
#include "maxorb.h"
#include "aovec.h"
#include "mxcent.h"
C
#include "dcbgen.h"
#include "dcbtra.h"
#include "dcbtr3.h"
#include "dcbbas.h"
#include "dcborb.h"
#include "dgroup.h"
#include "twosta.h"
#include "blocks.h"
#include "nuclei.h"
C     MXGBFSZ determines direct access record lengths, can be used to tune I/O
      PARAMETER (MXGBFSZ=4096)
      LOGICAL ANTIS, LMP2, NOPV, NODV, TRIAN(2)
      CHARACTER SPDCAR*1,COMP(2)*1
      DIMENSION NSTR(2,0:2,4),INDX(3,*)
      DIMENSION KQ(2,4),KE(2,4),KIBE(2,4)
      DIMENSION IJP12(0:7)
      DIMENSION IHTM(0:7),MHTM(2),NHTM(2), IOFFB(0:7)
      DIMENSION IRECC(2),NBFSZ(2),NBUF(2),NIJBUF(2),NHTIM(2),IBOFF(2)
      DIMENSION WORK(*),GABRAO(*),DRIJ(*),DINTSKP(*)
C
      DATA COMP/'L','S'/
C
      CALL QENTER('TRDRV4')
      KFRSAV = KFREE
C
C     We transform all shells in the first halftransformation
C
      CALL ICOPY ( 32,NSPCK,1,NSPCK12,1)
      CALL ICOPY (196,ISPCK,1,ISPCK12,1)
      CALL ICOPY ( 24,NBBAS,1,NBBAS1, 1)
      CALL ICOPY ( 24,NBBAS,1,NBBAS2, 1)
      CALL ICOPY ( 24,IBBAS,1,IBBAS1, 1)
      CALL ICOPY ( 24,IBBAS,1,IBBAS2, 1)
      CALL ICOPY (  2,IBAS, 1,IBAS1,  1)
      CALL ICOPY (  2,IBAS, 1,IBAS2,  1)
C
C     Copy the first half of the NDMOQ and ICMOQ arrays
C
      CALL ICOPY (8,NDMOQR,1,NDMOQS,1)
      CALL ICOPY (8,NDMOQR,1,NDMOQT,1)
      CALL ICOPY (4,ICMOQR,1,ICMOQS,1)
      CALL ICOPY (4,ICMOQR,1,ICMOQT,1)
C
C     Set up symmetry packing for reduced shell space in the second
C     half transformation. The fourth index is taken completely, the
C     third has only one shell active.
C
C     Now with range for B index /may 2001 jth+hjaaj
C
      CALL GISPCK(NSPCK34,ISPCK34,ISHLA,IBSTRT,IBEND,
     &     NBBAS3,NBBAS4,IBBAS3,IBBAS4,IBAS3,IBAS4)
C
C     Determine the type of the transformed indices :
C     Coulomb is LL or SS, Gaunt is SL.
C
      IF (IC.LE.2) THEN
         IC3 = IC
         IC4 = IC
      ELSE
         IC3 = 2
         IC4 = 1
      ENDIF
C
C     Generate pointers for reduced coefficient matrices
C
C
      NQ3S = 0
      NQ4S = 0
      NQ3T = 0
      NQ4T = 0
      DO IFSYM = 1, NFSYM
         NROW3 = 0
         NROW4 = 0
         DO IREPR = 0, NBSYM-1 
            IF (IFSYM.EQ.JBTOF(IREPR,IC3)) THEN
               NROW3 = NROW3 + NBBAS3(IREPR,IC3)
            ENDIF
            IF (IFSYM.EQ.JBTOF(IREPR,IC4)) THEN
               NROW4 = NROW4 + NBBAS4(IREPR,IC4)
            ENDIF
         ENDDO
         NDMOQS(1,IFSYM,3) = NROW3
         NDMOQS(2,IFSYM,3) = NDMOQR(2,IFSYM,3)
C
         NDMOQS(1,IFSYM,4) = NROW4
         NDMOQS(2,IFSYM,4) = NDMOQR(2,IFSYM,4)
C
         NDMOQT(1,IFSYM,3) = NROW4
         NDMOQT(2,IFSYM,3) = NDMOQR(2,IFSYM,3)
C
         NDMOQT(1,IFSYM,4) = NROW3
         NDMOQT(2,IFSYM,4) = NDMOQR(2,IFSYM,4)
C
         ICMOQS(IFSYM,3) = NQ3S + 1
         ICMOQS(IFSYM,4) = NQ4S + 1
         ICMOQT(IFSYM,3) = NQ3T + 1
         ICMOQT(IFSYM,4) = NQ4T + 1
C
         NQ3S = NQ3S + NROW3 * NDMOQR(2,IFSYM,3) * NZ
         NQ4S = NQ4S + NROW4 * NDMOQR(2,IFSYM,4) * NZ
         NQ3T = NQ3T + NROW4 * NDMOQR(2,IFSYM,3) * NZ
         NQ4T = NQ4T + NROW3 * NDMOQR(2,IFSYM,4) * NZ
      ENDDO
C
C     Make index array for half-transformed integrals
C     For the Coulomb integrals we have a real operator
C     multiplied by one quaternion B-matrix (see 
C     L. Visscher, J. Comp Chem, 2002). For the Gaunt
C     integrals the operator is now q-imaginary, so we
C     need to store three times more. Note also
C     the change of parity of SL relative to LL or SS. 
C
      IF (IC.LE.2) THEN
        NHTMT = 0
        DO IREPIJ = 1, NFSYM
           MHTM(IREPIJ) = 0
           NHTM(IREPIJ) = 0
           DO IREPAB = 0, NBSYM-1
              IF (IREPIJ.EQ.JBTOF(IREPAB,1)) THEN
                 IHTM(IREPAB) = MHTM(IREPIJ) + NHTMT
                 MHTM(IREPIJ) = MHTM(IREPIJ) + NSPCK34(IREPAB,IC)*NZ
              ENDIF
            NHTM(IREPIJ) = MHTM(IREPIJ)*NFPCK12(IREPIJ)
           ENDDO
           NHTMT = NHTMT + NHTM(IREPIJ)
        ENDDO
      ELSE
        NHTMT = 0
        DO IREPIJ = 1, NFSYM
           MHTM(IREPIJ) = 0
           NHTM(IREPIJ) = 0
           DO IREPAB = 0, NBSYM-1
              IF (IREPIJ.EQ.JBTOF(IREPAB,2)) THEN
                 IHTM(IREPAB) = MHTM(IREPIJ) + NHTMT
                 MHTM(IREPIJ) = MHTM(IREPIJ) + NSPCK34(IREPAB,IC)*NZ*3
              ENDIF
            NHTM(IREPIJ) = MHTM(IREPIJ)*NFPCK12(IREPIJ)
           ENDDO
           NHTMT = NHTMT + NHTM(IREPIJ)
        ENDDO
      ENDIF
C
      NSIZET = 0
      NBUFM = 0
      DO IREPIJ = 1, NFSYM
C
         IF (NHTM(IREPIJ).EQ.0) THEN
            NIJBUF(IREPIJ) = 0 
            NHTIM (IREPIJ) = 0 
            NBUF  (IREPIJ) = 0
            NBFSZ (IREPIJ) = 0
            GOTO 90 
         ENDIF
C
C        Maximum size of the number of half-transformed integrals
C        that we can keep in core. Fix it to 1 million.
C
         NHTIM(IREPIJ) = 1 000 000
         NHTIM(IREPIJ) = MIN0(NHTM(IREPIJ),NHTIM(IREPIJ))
C
C        Calculate the maximum number of ij-pairs that we can handle
C
         NIJBUF(IREPIJ) = NHTIM(IREPIJ) / MHTM(IREPIJ)
C
C        We can now deduce the actual size of NHTIM(IREPIJ)
C
         NHTIM(IREPIJ) = NIJBUF(IREPIJ) * MHTM(IREPIJ)
C
C        Calculate the number of buffers
C
         NBUF(IREPIJ) = NFPCK12(IREPIJ) / NIJBUF(IREPIJ)
         IF (MOD(NFPCK12(IREPIJ),NIJBUF(IREPIJ)).NE.0) THEN
            NBUF(IREPIJ) = NBUF(IREPIJ) + 1
         ENDIF
         NBUFM = MAX0(NBUFM,NBUF(IREPIJ)+1)
C
C        Choose the size of the buffer array : it should be a multiple
C        (named NB) of the number of ij-pairs in one batch multiplied 
C        by NZ
C
         NSIZE = 100 000
         NSIZE = MIN0(NHTM(IREPIJ),NSIZE)
         NB = NSIZE / (NIJBUF(IREPIJ)*NBUF(IREPIJ)*NZ)
         IF (NB.LT.1) NB = 1
         NSIZE = NB * NIJBUF(IREPIJ) * NBUF(IREPIJ) * NZ
C
C        Pointer to the begin of buffer blocks for this irrep
C        All buffers are stored in one 1-dimensional array, but you may
C        think of it as NFSYM sets of 2-dimensional matrices, with the
C        first dimension the size of the buffer and the second the
C        number of buffers for this fermion irrep
C
         IBOFF(IREPIJ) = NSIZET+1
         NSIZET = NSIZET + NSIZE
         NBFSZ(IREPIJ) = NB * NIJBUF(IREPIJ) * NZ
  90     CONTINUE
         IF (IPRINT .GE. 2) WRITE (LUPRI,'(//,4(1X,A,I8,/))')
     &              'Buffer information for parity :',irepij,
     &              'Number of buffers :',nbuf(irepij),
     &              'Pairs in  buffers :',nijbuf(irepij),
     &              'Size   of buffers :',nbfsz(irepij) 
      ENDDO
C
C     Get the memory for the buffers
C
      CALL MEMGET('REAL',KJBOFF,2*NBUFM,WORK,KFREE,LFREE)
      CALL MEMGET('REAL',KRBUF,NSIZET,WORK,KFREE,LFREE)
      CALL MEMGET('INTE',KLBUF1,NSIZET,WORK,KFREE,LFREE)
C
      CALL INITBUF (IRECC,NBFSZ,NBUF,WORK(KJBOFF))
C
      IF (I2TYP.LT.0) THEN
         GOTO 999
      ELSEIF (I2TYP.EQ.1) THEN
         ICS12 = 1
         ICF12 = 1
      ELSEIF (I2TYP.EQ.2) THEN
         ICS12 = 2
         ICF12 = 2
      ELSEIF (I2TYP.EQ.4) THEN
         ICS12 = 3
         ICF12 = 3
      ELSE
         ICS12 = 1
         ICF12 = 2
      ENDIF
C
C     Memory requirements for HERMIT:
C
      MWHER = MXMEMHER(IC,I2TYP)
C
C     Loop over the shells
C
      CALL IZERO(IOFFB,8)
      DO ISHLB = IBSTRT, IBEND
C
         KFRSAV3 = KFREE
         NINSHA  = NINSH(ISHLA,-1,INDX,IPRINT)
         NINSHB  = NINSH(ISHLB,-1,INDX,IPRINT)
         IF (IPRINT .GE. 3) THEN
            NCENTA = NCNTSH(ISHLA)   
            NCENTB = NCNTSH(ISHLB)   
            ICA    = LCLASH(ISHLA)
            ICB    = LCLASH(ISHLB)

            WRITE(LUPRI,'(/A,2(I4,A1,A4,1X,3A1))')
     &      '    ---> TRDRV4 : integrals for shell A/B:',
     &        ISHLA,'(',NAMN(NCENTA),COMP(ICA),
     &        SPDCAR(NHKTSH(ISHLA)-1),')',
     &        ISHLB,'(',NAMN(NCENTB),COMP(ICB),
     &        SPDCAR(NHKTSH(ISHLB)-1),')'
         ENDIF
C
         LDXAB = 5*NINSHA*NINSHB
         CALL MEMGET('INTE',KDXAB  ,LDXAB  ,WORK,KFREE,LFREE)
C
C           Set up gather array for distributions, 
C           calculate memory requirements for this batch.
C
C           NOTE : The actual dimension of INDXAB is kept in the
C           common block dcbtra.h. When we want to parallelize it may
C           be better to allocate it always as MXINSH*MXINSH*4,
C           or (even better) to send it also down to HERMIT.
C           Luuk. TODO
C
C           First a dummy run to calculate the memory requirements
C           for GMAT. We assume that HMAT is not more than twice GMAT,
C           and reserve the same memory also for use in HERMIT.
C
C        The size of HMAT is approximately: 
C           NUMDIS * NFPCK12(IREPIJ) * NZ / NPASS
C
C        (1) overhead: Subtract 10% or 1MW from available memory 
C
         MFREE = LFREE - MIN(LFREE/10, 1 000 000)
C
C        (2) We must be able to hold
C            (a) GMAT + HERMIT
C            (b) GMAT + HMAT
C            simultaneously in memory.
C 
C
         IF (MWHER .GT. MFREE) CALL QUIT('Not enough memory for HERMIT')
C            
         MWHMAT = 0
         IT = 0
 2000    CONTINUE
            IT = IT + 1
            NSIZG = MFREE - MAX(MWHMAT,MWHER)
C
C           Calculate NUMDIS and NSIZG
C           --------------------------
C
            NUMDIS = NDISTRN(I2TYP,IPRINT,.FALSE.,.FALSE.,.FALSE.,
     &              .FALSE.,INDX,WORK(KFREE),IJP12,IDUM,NSIZG,IDUM)
C
C           Setup INDXAB & IJPASS
C           ---------------------
C
            CALL MEMGET('INTE',KIJPASS,8*NPASS,WORK,KFREE,LFREE)
C
            NUMDIS = NDISTRN(I2TYP,IPRINT,.TRUE.,.TRUE.,.FALSE.,.FALSE.,
     &                       INDX,WORK(KDXAB),IJP12,IDUM,
     &                       NSIZG,WORK(KIJPASS))
C
C           Calculate MWHMAT:
C           -----------------
C
            MWHMAT = MWHMATCALC(WORK(KIJPASS),NPASS,NFPCK12)
C
            CALL MEMREL('TRDRV4.mwhmat calc',WORK,1,KIJPASS,KFREE,LFREE)
C
C           Check that we fit in memory
C           ---------------------------
C
            IF (IPRINT .GE. 3) THEN
               WRITE(LUPRI,'(A,I3,A/4(A,I16))')
     &         '       - TRDRV4 memory estimate iteration no. ',IT,' :',
     &         '         Free: ',MFREE,', Hermit: ',MWHER,
     &         ', HMAT ',MWHMAT,', GMAT ',NSIZG
            END IF
            CALL FLSHFO(LUPRI)
            IF ( (NSIZG + MAX(MWHMAT,MWHER) ) .GT. MFREE ) THEN
               IF ( MWHMAT .GT. MFREE) THEN
                  MWHMAT = MFREE * 9 / 10
               END IF
               GOTO 2000
            END IF
C
C        Final calculation of INDXAB & IJPASS
C
         CALL MEMGET('INTE',KIJPASS,8*NPASS,WORK,KFREE,LFREE)
C
         NUMDIS = NDISTRN(I2TYP,IPRINT,.TRUE.,.TRUE.,.FALSE.,.FALSE.,
     &                    INDX,WORK(KDXAB),IJP12,IDUM,
     &                    NSIZG,WORK(KIJPASS))
C
         IF (IPRINT .GE. 3) THEN
            WRITE (LUPRI,'(//,4(1X,A,2I4,/))')
     &      ' Calculation of integral distribution set:',ISHLA,ISHLB
            WRITE (LUPRI,'(//,4(1X,A,I16,/))')
     &      ' Number of distributions in this set:     ',NUMDIS,
     &      ' Maximum size of scalar integral batch:   ',NSIZG,
     &      ' Number of passes :                       ',NPASS
         END IF
C
         IF (NPASS.GT.1) THEN
C
C           Initialize scalar integral buffer file and arrays
C           Use the space reserved for GMAT now as buffer storage
C
            LGFIL = LUTRA2
            NGBFSZ = (IRAT*NSIZG)/((1+IRAT)*NPASS)
C
C           The buffer size may give rise to too large record lengths, restrict it to
C           a size that gives an acceptable performance. Be ware that this is machine
C           dependent and may be used to tune I/O performance.
C
            NGBFSZ = MIN(NGBFSZ,MXGBFSZ)
C
C           Allocate the buffer arrays in WORK and set KGMAT to the
C           start of this allocation section. We now only need to
C           send down GMAT into HERMIT to find this section.
C
            CALL MEMGET('INTE',KALGREC,NPASS,WORK,KFREE,LFREE)
            CALL MEMGET('INTE',KALGBUF,NPASS,WORK,KFREE,LFREE)
            CALL MEMGET('INTE',KAIGBUF,NPASS*NGBFSZ,WORK,KFREE,LFREE)
            CALL MEMGET('REAL',KARGBUF,NPASS*NGBFSZ,WORK,KFREE,LFREE)
C
C           As we refer to the places inside the section we need
C           to subtract KGMAT.
C
            KGMAT  = KALGREC
            KLGREC = KALGREC - KGMAT + 1
            KLGBUF = KALGBUF - KGMAT + 1
            KIGBUF = KAIGBUF - KGMAT + 1
            KRGBUF = KARGBUF - KGMAT + 1
         ELSE
C
C           Write directly into the array GMAT
C
            CALL MEMGET('REAL',KGMAT,NSIZG,WORK,KFREE,LFREE)
            CALL DZERO(WORK(KGMAT),NSIZG)
            KLGBUF = 1
            KALGREC = KFREE
         ENDIF
C
C        Get distributions
C
         NODV = NASHT.EQ.0
         NOPV = NASHT.LT.2
         CALL CALDIS(I2TYP,WORK(KGMAT),INDX,WORK(KDXAB),
     &               NODV,NOPV,GABRAO,DRIJ,DINTSKP,SCRTRA,
     &               WORK(KFREE),LFREE,IPRINT)
C
C        Do the transformation of the first index pair and sort the
C        half-transformed integrals to file.
C
C        De-allocate buffers and allocate GMAT if necessary.
C
         IF (NPASS.GT.1) THEN
            CALL MEMREL('TRDRV4.1',WORK,1,KALGBUF,KFREE,LFREE)
            CALL MEMGET('REAL',KGMAT,NSIZG,WORK,KFREE,LFREE)
            CALL DZERO(WORK(KGMAT),NSIZG)
         ENDIF
C
         CALL MS4IN1D(WORK,KFREE,LFREE,IPRINT,.TRUE.,.TRUE.,IOFFB,
     &                WORK(KALGREC),ICS12,ICF12,IC,
     &                TRIAN(1),NSTR(1,0,1),NSTR(1,0,2),
     &                WORK(KIJPASS),WORK(KQ(1,1)),WORK(KQ(1,2)),
     &                WORK(KGMAT),NSIZG,
     &                MHTM,IHTM,INDX,WORK(KDXAB),IRECC,WORK(KJBOFF),
     &                IBOFF,NBUF,NIJBUF,NBFSZ,WORK(KRBUF),
     &                WORK(KLBUF1))
C
         CALL MEMREL('TRDRV4.2',WORK,1,KFRSAV3,KFREE,LFREE)
C
C        Delete scalar integral buffer file
C
         IF (NPASS.GT.1) THEN
            CALL DELGBUF(LGFIL)
         ENDIF
C
         DO IREPB = 0,NBSYM-1
            IOFFB(IREPB) = IOFFB(IREPB) + NINSH(ISHLB,IREPB,INDX,IPRINT)
         END DO
      ENDDO
C
C     Write out last set of buffers
C     De-allocate buffer space
C
      CALL FINABUF (IRECC,NBFSZ,NBUF,IBOFF,
     &              WORK(KJBOFF),WORK(KRBUF),WORK(KLBUF1))
      CALL MEMREL('TRDRV4.3',WORK,1,KJBOFF,KFREE,LFREE)
C
C     Second half-transformation
C
C
C     Get the reduced coefficient matrices (only active shells)
C
      CALL MEMGET('REAL',KQ3S,NQ3S,WORK,KFREE,LFREE)
      CALL MEMGET('REAL',KQ4S,NQ4S,WORK,KFREE,LFREE)
      CALL MEMGET('REAL',KQ3T,NQ3T,WORK,KFREE,LFREE)
      CALL MEMGET('REAL',KQ4T,NQ4T,WORK,KFREE,LFREE)
C
      DO IFSYM = 1, NFSYM
C       index A (3S and 4T)
        IF(NDMOQR(2,IFSYM,3)*NDMOQS(2,IFSYM,3).NE.0) THEN
          CALL QREDUC (ISHLA,ISHLA,IFSYM,WORK(KQ(IFSYM,3)),
     &                NDMOQR(1,IFSYM,3),NDMOQR(2,IFSYM,3),
     &                WORK(KQ3S+ICMOQS(IFSYM,3)-1),
     &                NDMOQS(1,IFSYM,3),NDMOQS(2,IFSYM,3))
        ENDIF
        IF(NDMOQR(2,IFSYM,4)*NDMOQT(2,IFSYM,4).NE.0) THEN
         CALL QREDUC (ISHLA,ISHLA,IFSYM,WORK(KQ(IFSYM,4)),
     &                NDMOQR(1,IFSYM,4),NDMOQR(2,IFSYM,4),
     &                WORK(KQ4T+ICMOQT(IFSYM,4)-1),
     &                NDMOQT(1,IFSYM,4),NDMOQT(2,IFSYM,4))
        ENDIF
C       index B (4S and 3T)
        IF(NDMOQR(2,IFSYM,4)*NDMOQS(2,IFSYM,4).NE.0) THEN
          CALL QREDUC (IBSTRT,IBEND,IFSYM,WORK(KQ(IFSYM,4)),
     &                NDMOQR(1,IFSYM,4),NDMOQR(2,IFSYM,4),
     &                WORK(KQ4S+ICMOQS(IFSYM,4)-1),
     &                NDMOQS(1,IFSYM,4),NDMOQS(2,IFSYM,4))
        ENDIF
        IF(NDMOQR(2,IFSYM,3)*NDMOQT(2,IFSYM,3).NE.0) THEN
         CALL QREDUC (IBSTRT,IBEND,IFSYM,WORK(KQ(IFSYM,3)),
     &                NDMOQR(1,IFSYM,3),NDMOQR(2,IFSYM,3),
     &                WORK(KQ3T+ICMOQT(IFSYM,3)-1),
     &                NDMOQT(1,IFSYM,3),NDMOQT(2,IFSYM,3))
        ENDIF
      ENDDO
C
C
      ICS34 = IC
      ICF34 = IC
C
      CALL MEMGET('INTE',KDXKR12,NFPCK12T*2,WORK,KFREE,LFREE)
      CALL MKINDXKR (NSTR,TRIAN(1),WORK(KDXKR12))
      CALL MEMGET('INTE',KDXB12,2*NFPCK12T,WORK,KFREE,LFREE)

!MI: fixes dues to out-of-bound checks
      IKIBE21=KIBE(2,1)
      IKIBE22=KIBE(2,2)
      IKIBE23=KIBE(2,3)
      IKIBE24=KIBE(2,4)
      IF (KIBE(2,1).LE.0) IKIBE21=1
      IF (KIBE(2,2).LE.0) IKIBE22=1
      IF (KIBE(2,3).LE.0) IKIBE23=1
      IF (KIBE(2,4).LE.0) IKIBE24=1

      CALL MKINDXB (NSTR,WORK(KIBE(1,1)),WORK(IKIBE21),
     &              WORK(KIBE(1,2)),WORK(IKIBE22),
     &              TRIAN(1),WORK(KDXB12))
      CALL MEMGET('INTE',KDXB34,2*NFPCK34T,WORK,KFREE,LFREE)
      CALL MKINDXB (NSTR,WORK(KIBE(1,3)),WORK(IKIBE23),
     &              WORK(KIBE(1,4)),WORK(IKIBE24),
     &              TRIAN(2),WORK(KDXB34))
C
      CALL MS4IN2D(WORK,KFREE,LFREE,IPRINT,ICS34,ICF34,
     &             NSTR(1,0,3),NSTR(1,0,4),
     &             WORK(KQ3S),WORK(KQ4S),
     &             WORK(KQ3T),WORK(KQ4T),
     &             IRECC,NBFSZ,NBUF,NIJBUF,MHTM,WORK(KDXKR12),
     &             WORK(KDXB12),WORK(KDXB34))
C
C     Delete half-transformed integral files and intermediary file
C
      CALL DELEBUF
C
C     Release all memory and exit
C
 999  CONTINUE
      CALL MEMREL('TRDRV4.5',WORK,1,KFRSAV,KFREE,LFREE)
      CALL QEXIT('TRDRV4')
C
      RETURN
C
c1000 FORMAT (/' Storing all half-transformed integrals to disk',
c    & /' Disk requirements ',F10.3,' Megabytes')
      END
C&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
C/* Deck DRV4SYM*/
      SUBROUTINE DRV4SYM(WORK,KFREE,LFREE,IPRINT,NSTR3,NSTR4,
     & IREPIJ,NIJ,INDXKR,NIJOFF,INDXB12,INDXB34,NKLOFF)
C&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
C
C     Written by Luuk Visscher January 1997.
C
C     PURPOSE : Do 4-index transformation to molecular spinor basis
C               Symmetrization of integrals (which are on local disks).
C
C     Input :
C
C     - IPRINT       Print flag
C     - NSTR3        Number of active spinors for index 3
C     - NSTR4        Number of active spinors for index 4
C     - IREPIJ       Fermion symmetry (parity)
C     - NIJ          Number of blocks for this fermion symmetry
C     - Q3           Coefficients for index 3
C     - Q4           Coefficients for index 4
C
C     Output is written directly in the MOLFDIR-type file MDCINT
C
      use interface_to_mpi
#include "implicit.h"
#include "priunit.h"
#include "maxorb.h"
#include "dcbgen.h"
#include "infpar.h"
      INTEGER   NSTR3(2),NSTR4(2)
      DIMENSION WORK(*)
      DIMENSION INDXKR(2,*),INDXB12(2,*),INDXB34(2,*)
#include "dgroup.h"
#include "dcbtra.h"
#include "dcbtr3.h"
C
      KFRSAV = KFREE
C
C     Number of boson symmetry for each fermion irrep (parity)
C
      NBSYMP = NBSYM/NFSYM
C
C     Size of integral batches.
C
      NKL = NFPCK34(IREPIJ)
      LHMAT = NKL*NZ*NZ*NBSYMP
      CALL MEMGET('REAL',KHMAT,LHMAT,WORK,KFREE,LFREE)
      CALL MEMGET('REAL',KHMATS,LHMAT,WORK,KFREE,LFREE)
      CALL DZERO (WORK(KHMAT),LHMAT)
C
#if defined (VAR_MPI)
      IF (PARCAL) THEN
        DO IJ = 1, NIJ
          IKR = INDXKR(1,IJ+NIJOFF)
          JKR = INDXKR(2,IJ+NIJOFF)
C
C         Read integrals on all slaves.
C
          IF (MYTID.NE.MPARID)
     &       CALL MDSCRR(IREPIJ,IJ,LHMAT,WORK(KHMAT))
C
C         Sum up all contribution on the master node.
C
          call interface_mpi_REDUCE(WORK(KHMAT),WORK(KHMATS),LHMAT,
     &                              op_MPI_SUM,MPARID,
     &                              global_communicator)
C
C         Put integrals on all nodes
C
          IF (MDCSCAT) THEN
C
             call interface_mpi_BCAST(WORK(KHMATS),LHMAT,
     &                                MPARID,global_communicator)
C
          END IF
C
          IF (.NOT. NOMDCINT) THEN
             IF (MYTID.EQ.MPARID .OR. MDCSCAT) THEN
                CALL SYMFINT(IPRINT,IREPIJ,
     &                       INDXB12(1,IJ+NIJOFF),INDXB34(1,1+NKLOFF),
     &                       IJ,IKR,JKR,NSTR3,NSTR4,NKL,WORK(KHMATS))
             END IF
          ELSE
             IF (MYTID .EQ. MPARID .OR. MDCSCAT) THEN
                CALL MDSCRW(IREPIJ,IJ,LHMAT,WORK(KHMATS))
             END IF
          END IF
        ENDDO
      ELSE
#endif
         IF (.NOT. NOMDCINT) THEN
            DO IJ = 1, NIJ
               IKR = INDXKR(1,IJ+NIJOFF)
               JKR = INDXKR(2,IJ+NIJOFF)
               CALL MDSCRR(IREPIJ,IJ,LHMAT,WORK(KHMAT))
               CALL SYMFINT(IPRINT,IREPIJ,
     &                      INDXB12(1,IJ+NIJOFF),INDXB34(1,1+NKLOFF),
     &                      IJ,IKR,JKR,NSTR3,NSTR4,NKL,WORK(KHMAT))
            END DO
         END IF
#if defined (VAR_MPI)
      ENDIF
#endif
      CALL MEMREL('DRV4SYM',WORK,1,KFRSAV,KFREE,LFREE)
C
      RETURN
      END
C&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
C  /* Deck qreduc */
      SUBROUTINE QREDUC (ISHELSTA,ISHELEND,IFSYM,
     &                   QM,NDIM1,NDIM2,QMR,NDIMR1,NDIMR2)
C
C     Takes out active rows of coefficient matrix for a given irrep.
C     NDIM and NDIMR is now NDIM1,NDIM2,NDIMR1 and NDIMR2
C
C     Luuk Visscher, 25-8-97
C     Modified for ISHEL range May 2001 /jth+hjaaj
C
#include "implicit.h"
#include "priunit.h"
#include "maxorb.h"
#include "aovec.h"
#include "blocks.h"
#include "dcbtra.h"
#include "dgroup.h"
#include "dcbbas.h"
#include "mxcent.h"
#include "maxaqn.h"
#include "symmet.h"
C
      DIMENSION QM(NDIM1,NDIM2,NZ),QMR(NDIMR1,NDIMR2,NZ)
C
#include "ibtfun.h"
C
      INDA2 = 0
      DO IREPA1 = 0, MAXREP
C
         DO ISHELA1 = ISHELSTA, ISHELEND
C
            ICA    = LCLASH(ISHELA1)
            NHKTA1 = NHKTSH(ISHELA1)
            KHKTA1 = KHKTSH(ISHELA1)
            MULA1  = ISTBSH(ISHELA1)
            NORBA1 = NORBSH(ISHELA1)
            NSTRA1 = IORBSB(IORBSH(ISHELA1,1))
C
C
C        Loop over components
C
         DO NA = 1,KHKTA1
            NSTRNA = NSTRA1 + NA
            ITYNA  = ISYMAO(NHKTA1,NA)
            IF ( JBTOF(IREPA1,ICA).EQ.IFSYM .AND.
     &           IBTAND(MULA1,IBTXOR(IREPA1,ITYNA)) .EQ. 0) THEN
C
C              Loop over contracted orbitals
C
               DO IA = 1,NORBA1
                  INDA = IPTSYM(NSTRNA + KHKTA1*(IA-1),IREPA1)
                  INDA1 = INDA - ICOS(IREPA1+1,ICA) 
     &                  + IBBAS(IREPA1,ICA) - IBAS(IFSYM)
                  INDA2 = INDA2 + 1
C
C                 This function is active : copy the row to the reduced
C                 matrix
C
                  DO IZ = 1, NZ
                     CALL DCOPY (NDIMR2,QM(INDA1,1,IZ),NDIM1,
     &                           QMR(INDA2,1,IZ),NDIMR1)
                  ENDDO
               ENDDO
            ENDIF
         ENDDO
C        ... end do NA
      ENDDO
C     ... end do IREPA1
      ENDDO
C     ... end do ISHELA1
      IF (INDA2 .NE. NDIMR1) THEN
         CALL QUIT('QREDUC: program error INDA2 .ne. NDIMR1')
      END IF
      RETURN
      END
C&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
C Start of parallel section : activitate compilation only when necessary
C&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
#if defined (VAR_MPI)
C&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
C/* Deck Tr4par */
      SUBROUTINE TR4PAR(WORK,KFREE,LFREE,IPRINT,KQ,KE,KIBE,
     &             KINDX,GABRAO,DRIJ,INTFLG,NSTR,ANTIS,
     &             LMP2,EMP2,DINTSKP)
C***********************************************************************
C
C     Slave driver for scheme 4.
C
C     Written by T.Saue Sep 17 1998
C     Revised May 15 2001 jth/hjaaj
C
C***********************************************************************
      use interface_to_mpi
#include "implicit.h"
#include "priunit.h"
C
#include "maxorb.h"
#include "infpar.h"
#include "dcbgen.h"
#include "dcborb.h"
#include "dcbtra.h"
#include "dcbtr3.h"
#include "dcbbas.h"
#include "dgroup.h"
      LOGICAL ANTIS,LMP2,TRIAN(2)
      DIMENSION KQ(2,4),KE(2,4),KIBE(2,4),NSTR(2,0:2,4)
      DIMENSION DRIJ(*),GABRAO(*)
      DIMENSION DINTSKP(*),WORK(*), IBUFMPI(5)
C
C     Define the packing of the 2-index transformed integrals
C
      KFRSAV = KFREE
      CALL XTIME(0,-1,'                             ')
      TRIAN(1) = ISAME(2).EQ.ISAME(1)
      TRIAN(2) = .FALSE.
      CALL PCK2IN(NSTR,TRIAN,IPRINT)
C
C     Initialize output file for final integrals
C
      CALL MDSCRI
C
C     Wait for tasks until we receive the end message
C
  100 CONTINUE
#if defined (VAR_MPI)
         ITEST = 0
C        ... ask for new set of A,B shells
         CALL XTIME(2,1,' Slave requesting task from master')
         call interface_mpi_SEND(ITEST,1,MPARID,20,
     &                 global_communicator)
         call interface_mpi_RECV(IBUFMPI,5,MPARID,30,
     &                 global_communicator)
         IC     = IBUFMPI(1)
         I2TYP  = IBUFMPI(2)
         ISHLA  = IBUFMPI(3)
         ISHLBS = IBUFMPI(4)
         ISHLBE = IBUFMPI(5)
         CALL XTIME(2,2,' Slave requesting task from master')
         IF (ISHLA.EQ.-1) GOTO 200
C
C        ISHLB range: ISHLBS to ISHLBE
C
#endif
         CALL XTIME(5,1,' Computing+transform. integrals')
         CALL TRDRV4(WORK,KFREE,LFREE,IPRINT,INTFLG,
     &               NSTR(1,0,1),ANTIS,LMP2,WORK(KINDX),TRIAN,
     &               KQ,KE,KIBE,IC,I2TYP,ISHLBS,ISHLBE,
     &               GABRAO,DRIJ,DINTSKP)
         CALL XTIME(5,2,' Computing+transform. integrals')
         GOTO 100
  200 CONTINUE
C
C     Send statistics (and EMP2 if LMP2)
C
      CALL TRARES(WORK,KFREE,LFREE,DINTSKP,LMP2,DUM,DUM,DUM)
C
C     Complete the integrals to the desired classes
C
      CALL MEMGET('INTE',KDXKR12,NFPCK12T*2,WORK,KFREE,LFREE)
      CALL MKINDXKR (NSTR,TRIAN(1),WORK(KDXKR12))
      CALL MEMGET('INTE',KDXB12,2*NFPCK12T,WORK,KFREE,LFREE)
      CALL MKINDXB (NSTR,WORK(KIBE(1,1)),WORK(KIBE(2,1)),
     &              WORK(KIBE(1,2)),WORK(KIBE(2,2)),
     &              TRIAN(1),WORK(KDXB12))
      CALL MEMGET('INTE',KDXB34,2*NFPCK34T,WORK,KFREE,LFREE)
      CALL MKINDXB (NSTR,WORK(KIBE(1,3)),WORK(KIBE(2,3)),
     &              WORK(KIBE(1,4)),WORK(KIBE(2,4)),
     &              TRIAN(2),WORK(KDXB34))
      IF (MDCSCAT) THEN
C
C        Initialize MOLFDIR integral file on slave
C
         CALL MDINTI(IPRINT)
      END IF
C
      CALL XTIME(4,1,' Symmetrizing MO integrals    ')
      NIJOFF = 0
      NKLOFF = 0
      DO IREPIJ = 1, NFSYM
         NIJ = NFPCK12(IREPIJ)
         NKL = NFPCK34(IREPIJ)
         CALL DRV4SYM(WORK,KFREE,LFREE,IPRINT,NSTR(1,0,3),NSTR(1,0,4),
     &                IREPIJ,NIJ,WORK(KDXKR12),NIJOFF,
     &                WORK(KDXB12),WORK(KDXB34),NKLOFF)
         NIJOFF = NIJOFF + NIJ
         NKLOFF = NKLOFF + NKL
      ENDDO
      CALL XTIME(4,2,' Symmetrizing MO integrals    ')
C
C     Close MOLFDIR integral file and delete direct access file
C
      IF (MDCSCAT) CALL MDINTF
C
 1000 CONTINUE
      CALL MDSCRF
      CALL XTIME(0,4,' MOLTRA integral transformation')
      CALL MEMREL('TR4PAR',WORK,KFRSAV,KFRSAV,KFREE,LFREE)
      RETURN
C
      END
C&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
C& End of parallel section : activitate compilation only when necessary
#endif
      FUNCTION MWHMATCALC(IJPASS,NPASS,NFPCK12)
#include "implicit.h"
#include "priunit.h"
C
#include "dgroup.h"
C
      DIMENSION IJPASS(0:7,NPASS)
      DIMENSION NFPCK12(2)
C
      MSIZH = 0
      DO IPASS = 1, NPASS
         NSIZH = 0
         DO IREPAB = 0, NBSYM - 1
            IREPIJ = JBTOF(IREPAB,1)
            NSIZH = NSIZH + IJPASS(IREPAB,IPASS)*NFPCK12(IREPIJ)
         END DO 
         MSIZH = MAX(MSIZH,NSIZH)
      END DO
      MWHMATCALC = NZ * MSIZH
      RETURN 
      END
      FUNCTION NTR4BSTEP(INTFLG,I2TYP,NODES)
C
C     HJAaJ+LV Aug 2001
C
C     Calculate max number of B shells for fair
C     load balancing. The tradeoff is between
C     work to all nodes, and that the second half
C     transformation will grow with a factor equal
C     to the number of blocks of B shells done for
C     a fixed A shell.
C
#include "implicit.h"
#include "priunit.h"
C
C     get NA1 = number of S A-shells and NA2 = number of L A-shells
C     NA3 = number of S A-shells for Gaunt when active; otherwise
C     it is zero
C
      CALL SHRNGE (3,INTFLG,I2TYP,IASTR3,IBSTR3,IASMA3,IBSMA3)
      CALL SHRNGE (2,INTFLG,I2TYP,IASTR2,IBSTR2,IASMA2,IBSMA2)
      CALL SHRNGE (1,INTFLG,I2TYP,IASTR1,IBSTR1,IASMA1,IBSMA1)
      NA3 = IASMA3-IASTR3 + 1
      NB3 = IBSMA3-IBSTR3 + 1
      NA2 = IASMA2-IASTR2 + 1
      NA1 = IASMA1-IASTR1 + 1
      NDTASK = 2*NODES
C     ... desired number of tasks
C         (we try 2 times the number of nodes)
      MXIBSTEP = MAX(NA1,NA2) + 1
   10 CONTINUE
         MXIBSTEP = MXIBSTEP - 1
         NTASKS = 0
C
C        For Coulomb type integrals the second shell index
C        should always be smaller than the first so the number
C        of B-shells depend on the value of A.
C
         DO I = 1,NA1
            NTASKS = NTASKS + (I-1)/MXIBSTEP + 1
         END DO
         DO I = 1,NA2
            NTASKS = NTASKS + (I-1)/MXIBSTEP + 1
         END DO
C
C        For Gaunt type integrals the second shell index
C        runs always over the full set of L-shells
C
         DO I = 1,NA3
            NTASKS = NTASKS + NB3/MXIBSTEP + 1
         END DO
      IF (NTASKS .LT. NDTASK .AND. MXIBSTEP.GT.1) GO TO 10
      NTR4BSTEP = MXIBSTEP
      RETURN
      END
