
module mod_comb
implicit none        

integer, parameter :: nbitsL=8, nbitsU=8 

type comb_type
 integer Nconf,jL(0:2**nbitsL-1),jU(0:2**nbitsU-1)
 integer, allocatable :: conf(:)
end type

type(comb_type), allocatable :: combs(:,:)

public

contains

 function Comb(n,k)
  integer n,k,Comb,j
  integer, parameter :: dp=selected_real_kind(15,30) 
  real(dp) lnComb

   if (k==0) then
    Comb=1
   else
    lnComb=0d0
    do j=1,k
     lnComb=lnComb+log(real(n-j+1,8))-log(real(j,8))
    end do
    Comb=nint(exp(lnComb))
   end if

 end function

 subroutine generate_basis_Lin_tables(n,k,ndim,basis,jL,jU)
  integer n,k,ndim,jL(0:2**nbitsL-1),jU(0:2**nbitsU-1), &
   indx(0:nbitsL+1),conf,i(0:nbitsL),nL,kL,kU,ipos,j
  integer, allocatable, dimension(:) :: basis,basisL

   nL=min(n,nbitsL) 

   ndim=Comb(n,k)

   allocate(basis(ndim),basisL(2**nL))

   indx(0)=1
   do kL=0,nL
    indx(kL+1)=indx(kL)+Comb(nL,kL)
   end do

   i=0
   do conf=0,2**nL-1
    kL=0
    do ipos=0,nL-1
     if (btest(conf,ipos)) kL=kL+1
    end do
    i(kL)=i(kL)+1
    jL(conf)=i(kL)
    basisL(indx(kL)+i(kL)-1)=conf
   end do

   j=0
   do conf=0,2**min(max(0,n-nbitsL),nbitsU)-1 
    kU=0
    do ipos=0,n-1
     if (btest(conf,ipos)) kU=kU+1
    end do
    if (kU>k .or. kU<k-nbitsL) cycle 
    kL=k-kU 
    jU(conf)=j
    j=j+Comb(nL,kL)
    basis(jU(conf)+1:j)=ishft(conf,nbitsL)+basisL(indx(kL):indx(kL+1)-1)
   end do

 end subroutine

 subroutine generate_combs(Nmax,verbose)
  integer Nmax,n,k,mem
  integer, optional :: verbose

   if (allocated(combs)) then
    if (ubound(combs,1)==Nmax) return
    call destroy_combs
   end if

   allocate(combs(0:Nmax,0:Nmax))

   mem=0
   do n=0,Nmax
    do k=0,n
     call generate_basis_Lin_tables(n,k,combs(n,k)%Nconf,combs(n,k)%conf,combs(n,k)%jL,combs(n,k)%jU)
     mem=mem+4*( 1 + 2**nbitsL + 2**nbitsU + combs(n,k)%Nconf )
    end do
    do k=n+1,Nmax
     mem=mem+4*( 1 + 2**nbitsL + 2**nbitsU )
    end do
   end do

   if (present(verbose)) then
    if (verbose>0) write(*,fmt="(a,i3,a,f8.2,a/)") "Nmax:",Nmax," mem:",mem/1024.**2,"MB"
   end if

 end subroutine

 subroutine destroy_combs
  integer n,k

   do n=0,ubound(combs,1)
    do k=0,n
     deallocate(combs(n,k)%conf)
    end do
   end do

   deallocate(combs)

 end subroutine

 subroutine test_Lin
  integer, allocatable :: basis_orig(:),indx(:)
  integer Nmax,n,k,ndim,errs,ib,jb,conf
  character(len=5) res
  real time1,time2

   Nmax=ubound(combs,1)

   do n=0,Nmax
    do k=0,n
     call CPU_time(time1)

     ndim=Comb(n,k)

     errs=0
     if (abs(combs(n,k)%Nconf-ndim)>0) errs=1

     allocate(basis_orig(ndim),indx(0:2**n-1))
     call generate_basis(n,k,basis_orig,indx)

     if (sum(abs(combs(n,k)%conf-basis_orig))/=0) errs=errs+1

     do ib=1,ndim
      conf=basis_orig(ib)
      jb=combs(n,k)%jL(ibits(conf,     0,nbitsL))+ &
         combs(n,k)%jU(ibits(conf,nbitsL,nbitsU))
      if (jb/=ib) errs=errs+1
     end do

     deallocate(basis_orig,indx)

     call CPU_time(time2)
     res="OK"
     if (errs>0) res="error"
     write(*,fmt="(2i4,2i12,3x,a,f5.2,a)") n,k,ndim,2**n,res,time2-time1,"s"

    end do
   end do

 contains

  subroutine generate_basis(n,k,basis,indx)
   integer n,k,basis(1:),indx(0:),i,j,cnt,ipos

    i=0
    do j=0,2**n-1
     cnt=0
     do ipos=0,n-1
      if (btest(j,ipos)) cnt=cnt+1
     end do
     if (cnt==k) then
      i=i+1
      basis(i)=j
      indx(j)=i
     end if
    end do

  end subroutine

 end subroutine

end module
module mod_common
implicit none      

integer, parameter :: base_kind=selected_int_kind(15) 

integer, parameter :: dp=selected_real_kind(15,30), & 
                      long=selected_int_kind(15)      

real(dp), parameter :: pi=3.141592653589793238d0

integer(long) rtime_vals(10)
real(dp) ctime_vals(10)
integer :: tindx=0

private rtime_vals,ctime_vals,tindx

contains

 subroutine timestamp
  integer dt(8)

   call date_and_time(values=dt)
   write(*,fmt="(/1x,2(i2.2,a),i4,3x,i2,2(a,i2.2)/)") &
    dt(3),".",dt(2),".",dt(1),dt(5),":",dt(6),":",dt(7)

 end subroutine

 subroutine real_time(secs)
  real(dp) secs
  integer dt(8),days,y,m
  integer, parameter :: mds(12)=[31,28,31,30,31,30,31,31,30,31,30,31]

   call date_and_time(values=dt)

   days=0

   do y=2024,dt(1)-1
    days=days+365
    if (mod(y,4)==0) days=days+1 
   end do

   do m=1,dt(2)-1
    days=days+mds(m)
    if (m==2 .and. mod(dt(1),4)==0) days=days+1 
   end do

   secs=days*86400.0_dp+(((dt(3)-1)*24+dt(5))*60+dt(6))*60+dt(7)+0.001_dp*dt(8)

 end subroutine

 subroutine stopwatch(flag,msg,rtime,ctime)
  integer flag,ndigr,ndigc
  character(len=*) msg
  real(dp) rtime,ctime,drtime,dctime
  optional msg,rtime,ctime
  integer(long) rtime2,rate
  character(len=80) timestr

   select case(flag)
   case(1)

    tindx=tindx+1

    call system_clock(rtime_vals(tindx))
    call CPU_time(ctime_vals(tindx))

    if (present(msg)) write(*,fmt="(a)") trim(msg)

   case(2:4)

    call system_clock(rtime2,rate)
    call CPU_time(dctime)

    drtime=(rtime2-rtime_vals(tindx))/real(rate,dp)
    dctime=dctime-ctime_vals(tindx)

    ndigr=max(1,ceiling(log10(drtime)))+6
    ndigc=max(1,ceiling(log10(dctime)))+6

    if (flag==2) &
     write(timestr,fmt="(a,"// &
       "f"//char(48+ndigr/10)//char(48+mod(ndigr,10))//".3,a,"// &
       "f"//char(48+ndigc/10)//char(48+mod(ndigc,10))//".3,a,f7.2)") &
       "   time:",drtime,"s   CPUtime:",dctime,"s   ratio:",dctime/drtime

    if (flag==3) &
     write(timestr,fmt="(3x,f"//char(48+ndigr/10)//char(48+mod(ndigr,10))//".3,a)") drtime,"s"

    if (flag/=4) then
     if (present(msg)) then
      write(*,fmt="(a)") trim(msg)//trim(timestr)
     else
      write(*,fmt="(a)") trim(timestr)
     end if
    end if

    if (present(rtime)) rtime=drtime
    if (present(ctime)) ctime=dctime

    tindx=tindx-1

   end select

 end subroutine

end module

module mod_lanczos
use mod_common
implicit none

integer :: mod_lanczos_verbose=0, mod_lanczos_include_negative_w=1

real(dp) :: mod_lanczos_degen_thrs1=1d-9, mod_lanczos_degen_thrs2=1d-2, mod_lanczos_degen_reqacc=1d-0

complex(dp), allocatable :: expfarr(:),expfarr2(:)
integer, allocatable :: btrev(:)
integer Nlog2

character(len=80) msg

private expfarr,expfarr2,btrev,Nlog2,msg

interface rndvec
 module procedure rndvec_real,rndvec_complex
end interface

contains

 subroutine eigen_RS(N,mtx,eigval,eigvec)
  integer N,info,lwork,liwork,i,j
  real(dp) mtx(N,N),eigval(N)
  real(dp), optional :: eigvec(N,N)
  real(dp), allocatable :: work(:),diag(:)
  integer, allocatable :: iwork(:)
  integer, parameter :: Nmax=32000 

   lwork=-1
   liwork=-1
   allocate(work(1),iwork(1))
   iwork(1)=1
   if (present(eigvec)) then
    if (N<=Nmax) call DSYEVD("V","L",N,eigvec,N,eigval,work,lwork,iwork,liwork,info)
    if (N>Nmax)  call DSYEV("V","L",N,eigvec,N,eigval,work,lwork,info)
   else
    if (N<=Nmax) call DSYEVD("N","L",N,mtx,N,eigval,work,lwork,iwork,liwork,info)
    if (N>Nmax)  call DSYEV("N","L",N,mtx,N,eigval,work,lwork,info)
   end if
   lwork=int(work(1))
   liwork=iwork(1)
   deallocate(work,iwork)

   allocate(work(lwork),iwork(liwork))
   if (present(eigvec)) then
    eigvec=mtx
    if (N<=Nmax) call DSYEVD("V","L",N,eigvec,N,eigval,work,lwork,iwork,liwork,info)
    if (N>Nmax)  call DSYEV("V","L",N,eigvec,N,eigval,work,lwork,info)
   else
    allocate(diag(N))
    do i=1,N
     diag(i)=mtx(i,i)
    end do
    if (N<=Nmax) call DSYEVD("N","L",N,mtx,N,eigval,work,lwork,iwork,liwork,info)
    if (N>Nmax)  call DSYEV("N","L",N,mtx,N,eigval,work,lwork,info)
    do i=1,N
     mtx(i,i)=diag(i)
     do j=i+1,N
      mtx(j,i)=mtx(i,j)
     end do
    end do
    deallocate(diag)
   end if
   deallocate(work,iwork)

   if (info/=0) write(*,*) 'DSYEVD returned INFO=',info

 end subroutine

 subroutine eigen_RST(N,diag,subdiag,eigval,eigvec)
  integer N,info,lwork,liwork
  real(dp) diag(N),subdiag(N-1),eigval(N),aux(1,1)
  real(dp), optional :: eigvec(N,N)
  real(dp), allocatable, dimension(:) :: work,subdiag2
  integer, allocatable :: iwork(:)

   lwork=-1
   liwork=-1
   allocate(work(1),iwork(1))
   iwork(1)=1
   if (present(eigvec)) then
    call DSTEVD("V",N,diag,subdiag,eigvec,N,work,lwork,iwork,liwork,info)
   else
    call DSTEVD("N",N,diag,subdiag,aux,1,work,lwork,iwork,liwork,info)
   end if
   lwork=int(work(1))
   liwork=iwork(1)
   deallocate(work,iwork)

   allocate(work(lwork),iwork(liwork),subdiag2(N-1))
   eigval=diag
   subdiag2=subdiag
   if (present(eigvec)) then
    call DSTEVD("V",N,eigval,subdiag2,eigvec,N,work,lwork,iwork,liwork,info)
   else
    call DSTEVD("N",N,eigval,subdiag2,aux,1,work,lwork,iwork,liwork,info)
   end if
   deallocate(work,iwork,subdiag2)

   if (info/=0) write(*,*) 'DSTEVD returned INFO=',info

 end subroutine

 subroutine eigen_CH(N,mtx,eigval,eigvec)
  integer N,info,lwork,lrwork,liwork,i,j
  complex(dp) mtx(N,N)
  real(dp) eigval(N)
  complex(dp), optional :: eigvec(N,N)
  complex(dp), allocatable :: work(:)
  real(dp), allocatable :: rwork(:),diag(:)
  integer, allocatable :: iwork(:)

   lwork=-1
   lrwork=-1
   liwork=-1
   allocate(work(1),rwork(1),iwork(1))
   if (present(eigvec)) then
    call ZHEEVD("V","L",N,eigvec,N,eigval,work,lwork,rwork,lrwork,iwork,liwork,info)
   else
    call ZHEEVD("N","L",N,mtx,N,eigval,work,lwork,rwork,lrwork,iwork,liwork,info)
   end if
   lwork=int(real(work(1)))
   lrwork=int(rwork(1))
   liwork=iwork(1)
   deallocate(work,rwork,iwork)

   allocate(work(lwork),rwork(lrwork),iwork(liwork))
   if (present(eigvec)) then
    eigvec=mtx
    call ZHEEVD("V","L",N,eigvec,N,eigval,work,lwork,rwork,lrwork,iwork,liwork,info)
   else
    allocate(diag(N))
    do i=1,N
     diag(i)=mtx(i,i)
    end do
    call ZHEEVD("N","L",N,mtx,N,eigval,work,lwork,rwork,lrwork,iwork,liwork,info)
    do i=1,N
     mtx(i,i)=diag(i)
     do j=i+1,N
      mtx(j,i)=conjg(mtx(i,j))
     end do
    end do
    deallocate(diag)
   end if
   deallocate(work,rwork,iwork)

   if (info/=0) write(*,*) 'ZHEEVD returned INFO=',info

 end subroutine

 subroutine FFT_init(N) 
  integer N,j,i

   if (allocated(btrev)) then
    if (N==2**Nlog2) return
    deallocate(btrev,expfarr,expfarr2)
   end if

   allocate(btrev(0:N-1),expfarr(0:N/2-1),expfarr2(0:N/2))

   Nlog2=nint(log(N*1d0)/log(2d0))

   do j=0,N-1
    btrev(j)=0
    do i=1,Nlog2
     if (btest(j,i-1)) btrev(j)=ibset(btrev(j),Nlog2-i)
    end do
   end do

   do j=0,N/2-1
    expfarr(j)=exp(-j*2*(0d0,pi)/N)
   end do

   expfarr2(0:N/2-1)=sqrt(expfarr)
   expfarr2(N/2)=(0d0,-1d0)

 end subroutine

 subroutine FFT(N,f,c,sgn) 
  integer N,sgn,lev,M,M2,i,j,indx1,indx2
  complex(dp) f(0:N-1),c(0:N-1),A,B,expf(0:N/2-1)

   call FFT_init(N) 

   c=f(btrev)     

   do i=0,N-1,2   
    A=c(i)
    B=c(i+1)
    c(i)=A+B
    c(i+1)=A-B
   end do

   M=2
   do lev=2,Nlog2 
    M2=M
    M=2*M
    if (sgn==-1) expf(0:M2-1)=expfarr(0::N/M)
    if (sgn==+1) expf(0:M2-1)=conjg(expfarr(0::N/M))
    do i=0,N-1,M
     do j=0,M2-1
      indx1=i+j
      indx2=indx1+M2
      A=c(indx1)
      B=expf(j)*c(indx2)
      c(indx1)=A+B
      c(indx2)=A-B
     end do
    end do
   end do

 end subroutine

 subroutine forwRFFT(N,f,c) 
  integer N,k
  real(dp) f(0:N-1)
  complex(dp) c(0:N/2),aux(0:N/2-1),G1,G2,A,B

   aux=cmplx(f(0::2),f(1::2),dp)
   call FFT(N/2,aux,c(0:N/2-1),-1)
   c(N/2)=c(0)

   do k=0,N/4
    G1=c(k)
    G2=conjg(c(N/2-k))
    A=(G1+G2)/2
    B=(G1-G2)*(0.0d0,-0.5d0)*expfarr2(k)
    c(k)=A+B
    c(N/2-k)=conjg(A-B)
   end do

 end subroutine

 subroutine backRFFT(N,f,c) 
  integer N,k
  real(dp) f(0:N-1)
  complex(dp) c(0:N/2),G1,G2,A,B
  complex(dp), dimension(0:N/2-1) :: aux,aux2

   call FFT_init(N/2)

   do k=0,N/4
    A=c(k)
    B=conjg(c(N/2-k))
    G1=(A+B)/2
    G2=(A-B)*(0.0d0,0.5d0)*conjg(expfarr2(k))
    aux(k)=G1+G2
    if (k>0) aux(N/2-k)=conjg(G1-G2)
   end do

   call FFT(N/2,aux,aux2,+1)
   f(0::2)=real(aux2)/(N/2)
   f(1::2)=aimag(aux2)/(N/2)

 end subroutine

 subroutine vecOMP_copy_real(x,y)
  real(dp) x(:),y(:)
  integer(base_kind) N,j

   N=ubound(x,1,base_kind)

   do j=1,N
    y(j)=x(j)
   end do

 end subroutine

 subroutine vecOMP_copy_complex(x,y)
  complex(dp) x(:),y(:)
  integer(base_kind) N,j

   N=ubound(x,1,base_kind)

   do j=1,N
    y(j)=x(j)
   end do

 end subroutine

 subroutine vecOMP_add_lincomb_real(y,a,x)
  real(dp) y(:),a,x(:)
  integer(base_kind) N,j

   N=ubound(y,1,base_kind)

   do j=1,N
    y(j)=y(j)+a*x(j)
   end do

 end subroutine

 subroutine vecOMP_add_lincomb_complex(y,a,x)
  complex(dp) y(:),x(:)
  real(dp) a
  integer(base_kind) N,j

   N=ubound(y,1,base_kind)

   do j=1,N
    y(j)=y(j)+a*x(j)
   end do

 end subroutine

 subroutine vecOMP_add_lincomb_complex2(y,a,x)
  complex(dp) y(:),x(:),a
  integer(base_kind) N,j

   N=ubound(y,1,base_kind)

   do j=1,N
    y(j)=y(j)+a*x(j)
   end do

 end subroutine

 subroutine vecOMP_add_lincomb2_real(y,a,x1,b,x2)
  real(dp) y(:),a,x1(:),b,x2(:)
  integer(base_kind) N,j

   N=ubound(y,1,base_kind)

   do j=1,N
    y(j)=y(j)+a*x1(j)+b*x2(j)
   end do

 end subroutine

 subroutine vecOMP_add_lincomb2_complex(y,a,x1,b,x2)
  complex(dp) y(:),x1(:),x2(:)
  real(dp) a,b
  integer(base_kind) N,j

   N=ubound(y,1,base_kind)

   do j=1,N
    y(j)=y(j)+a*x1(j)+b*x2(j)
   end do

 end subroutine

 subroutine vecOMP_dot_real(x,y,val)
  real(dp) x(:),y(:),val,aux
  integer(base_kind) N,j

   N=ubound(x,1,base_kind)

   aux=0d0
   do j=1,N
    aux=aux+x(j)*y(j)
   end do
   val=aux

 end subroutine

 subroutine vecOMP_dot_complex(x,y,val)
  complex(dp) x(:),y(:),aux
  real(dp) val
  integer(base_kind) N,j

   N=ubound(x,1,base_kind)

   aux=(0d0,0d0)
   do j=1,N
    aux=aux+conjg(x(j))*y(j)
   end do
   val=aux

 end subroutine

 subroutine vecOMP_dot_complex2(x,y,val)
  complex(dp) x(:),y(:),val,aux
  integer(base_kind) N,j

   N=ubound(x,1,base_kind)

   aux=(0d0,0d0)
   do j=1,N
    aux=aux+conjg(x(j))*y(j)
   end do
   val=aux

 end subroutine

 subroutine vecOMP_normalize_real(x,norm)
  real(dp) x(:),norm,aux
  integer(base_kind) N,j
  optional norm

   N=ubound(x,1,base_kind)

   aux=0d0
   do j=1,N
    aux=aux+x(j)**2
   end do

   aux=sqrt(aux)
   if (present(norm)) norm=aux

   aux=1/aux
   do j=1,N
    x(j)=x(j)*aux
   end do

 end subroutine

 subroutine vecOMP_normalize_complex(x,norm)
  complex(dp) x(:)
  real(dp) norm,aux
  integer(base_kind) N,j
  optional norm

   N=ubound(x,1,base_kind)

   aux=0d0
   do j=1,N
    aux=aux+abs(x(j))**2
   end do

   aux=sqrt(aux)
   if (present(norm)) norm=aux

   aux=1/aux
   do j=1,N
    x(j)=x(j)*aux
   end do

 end subroutine

 subroutine vecOMP_normalize_outofplace_real(x,y,norm)
  real(dp) x(:),y(:),norm,aux
  integer(base_kind) N,j

   N=ubound(x,1,base_kind)

   aux=0d0
   do j=1,N
    aux=aux+x(j)**2
   end do

   norm=sqrt(aux)

   aux=1/norm
   do j=1,N
    y(j)=x(j)*aux
   end do

 end subroutine

 subroutine vecOMP_normalize_outofplace_complex(x,y,norm)
  complex(dp) x(:),y(:)
  real(dp) norm,aux
  integer(base_kind) N,j

   N=ubound(x,1,base_kind)

   aux=0d0
   do j=1,N
    aux=aux+abs(x(j))**2
   end do

   norm=sqrt(aux)

   aux=1/norm
   do j=1,N
    y(j)=x(j)*aux
   end do

 end subroutine

 subroutine vecOMP_scale_outofplace_real(a,x,y)
  real(dp) a,x(:),y(:)
  integer(base_kind) N,j

   N=ubound(x,1,base_kind)

   do j=1,N
    y(j)=a*x(j)
   end do

 end subroutine

 subroutine vecOMP_scale_outofplace_complex(a,x,y)
  real(dp) a
  complex(dp) x(:),y(:)
  integer(base_kind) N,j

   N=ubound(x,1,base_kind)

   do j=1,N
    y(j)=a*x(j)
   end do

 end subroutine

 subroutine vecOMP_avgabsdiff_real(a,x,y,val)
  real(dp) a,x(:),y(:),aux,val
  integer(base_kind) N,j

   N=ubound(x,1,base_kind)

   aux=0d0
   do j=1,N
    aux=aux+abs(a*x(j)-y(j))
   end do
   val=aux/N

 end subroutine

 subroutine vecOMP_avgabsdiff_complex(a,x,y,val)
  real(dp) a,aux,val
  complex(dp) x(:),y(:)
  integer(base_kind) N,j

   N=ubound(x,1,base_kind)

   aux=0d0
   do j=1,N
    aux=aux+abs(a*x(j)-y(j))
   end do
   val=aux/N

 end subroutine

 subroutine rndvec_real(x)
  real(dp) x(:)
  integer(base_kind) N,j
  integer(base_kind), parameter :: Nchunk=2**25 

   if (mod_lanczos_verbose>0) call stopwatch(1)

   N=ubound(x,1,base_kind)

   if (N>Nchunk) then

    call random_number(x(1:Nchunk))
    x(1:Nchunk)=x(1:Nchunk)-0.5d0

    do j=1,N
     x(j)=x(mod(j,Nchunk)+1)
    end do

   else
    call random_number(x)
    x=x-0.5d0
   end if

   call vecOMP_normalize_real(x)

   if (mod_lanczos_verbose>0) then
    write(msg,fmt="(a,i12)") " generated real random vector of dimension ",N
    call stopwatch(2,msg)
   end if

 end subroutine

 subroutine rndvec_complex(x)
  complex(dp) x(:)
  integer(base_kind) N,j
  integer(base_kind), parameter :: Nchunk=2**25 
  real(dp) aux(2)

   if (mod_lanczos_verbose>0) call stopwatch(1)

   N=ubound(x,1,base_kind)

   do j=1,min(N,Nchunk)
    call random_number(aux)
    x(j)=cmplx(aux(1)-0.5d0,aux(2)-0.5d0,dp)
   end do

   if (N>Nchunk) then
    do j=1,N
     x(j)=x(mod(j,Nchunk)+1)
    end do
   end if

   call vecOMP_normalize_complex(x)

   if (mod_lanczos_verbose>0) then
    write(msg,fmt="(a,i12)") " generated complex random vector of dimension ",N
    call stopwatch(2,msg)
   end if

 end subroutine

 function test_cnvg(M,overlap) result(val)
  integer M,j
  real(dp) overlap(M)
  logical val
  integer, parameter :: Mwght=10
  real(dp) x(0:Mwght+1),wght(Mwght),aux(Mwght)

   val=.false.
   if (M<=Mwght+1) return

   do j=1,Mwght
    wght(j)=sqrt(real(j))
   end do
   wght=wght/sum(wght)

   x=overlap(M-Mwght-1:M)

   aux=0.0
   do j=1,Mwght
    if (min(x(j-1),x(j+1))>x(j)) aux(j)=1.0
    if (max(x(j-1),x(j+1))<x(j)) aux(j)=1.0
   end do

   if (sum(wght*aux)>=0.5 .and. abs(overlap(M))>1d-5) val=.true.

 end function

 subroutine iter_Lanczos1_REAL(Hmul,N,M,init,alpha,beta,tmpfile)
  interface
   subroutine Hmul(x,y)
    REAL(8) x(:),y(:) 
   end subroutine
  end interface
  integer(base_kind) N
  integer M,j,fid
  REAL(dp) init(N)
  real(dp) alpha(0:M),beta(1:M)
  REAL(dp), allocatable :: x1(:),x2(:),y(:)
  character(len=*), optional :: tmpfile 

   allocate(x1(N),x2(N),y(N))

   call vecOMP_copy_REAL(init,x1)

   call Hmul(x1,y)
   call vecOMP_dot_REAL(x1,y,alpha(0))

   do j=0,M-1
    if (mod_lanczos_verbose>0) call stopwatch(1)

    if (j==0) call vecOMP_add_lincomb_REAL(y,-alpha(j)  ,x1)
    if (j>=1) call vecOMP_add_lincomb2_REAL(y,-alpha(j)  ,x1,-beta(j)  ,x2)
    call vecOMP_copy_REAL(x1,x2)
    call vecOMP_normalize_outofplace_REAL(y,x1,beta(j+1))
    call Hmul(x1,y)
    call vecOMP_dot_REAL(x1,y,alpha(j+1))

    if (present(tmpfile)) then
     open(newunit=fid,file=tmpfile,form="unformatted")
     write(fid) M,j,alpha,beta
     close(fid)
    end if

    if (mod_lanczos_verbose>0) then
     write(msg,fmt="(a,i5,a,i5)") " Lanczos iteration ",j,"/",M-1
     call stopwatch(2,msg)
    end if
   end do

   deallocate(x1,x2,y)

 end subroutine

 subroutine iter_Lanczos2_REAL(Hmul,N,M,init,alpha,beta,psi)
  interface
   subroutine Hmul(x,y)
    REAL(8) x(:),y(:) 
   end subroutine
  end interface
  integer(base_kind) N
  integer M,j
  REAL(dp) init(N),psi(N,0:M)
  real(dp) alpha(0:M),beta(1:M)
  REAL(dp), allocatable :: y(:)

   allocate(y(N))

   call vecOMP_copy_REAL(init,psi(:,0))

   call Hmul(psi(:,0),y)
   call vecOMP_dot_REAL(psi(:,0),y,alpha(0))

   do j=0,M-1
    if (mod_lanczos_verbose>0) call stopwatch(1)

    if (j==0) call vecOMP_add_lincomb_REAL(y,-alpha(j)  ,psi(:,j))
    if (j>=1) call vecOMP_add_lincomb2_REAL(y,-alpha(j)  ,psi(:,j),-beta(j)  ,psi(:,j-1))
    call vecOMP_normalize_outofplace_REAL(y,psi(:,j+1),beta(j+1))
    call Hmul(psi(:,j+1),y)
    call vecOMP_dot_REAL(psi(:,j+1),y,alpha(j+1))

    if (mod_lanczos_verbose>0) then
     write(msg,fmt="(a,i5,a,i5)") " Lanczos iteration ",j,"/",M-1
     call stopwatch(2,msg)
    end if
   end do

   deallocate(y)

 end subroutine

 subroutine eigen_Lanczos1_REAL(Hmul,N,M,EGS,init,a,b)
  interface
   subroutine Hmul(x,y)
    REAL(8) x(:),y(:) 
   end subroutine
  end interface
  integer(base_kind) N
  integer M,j
  real(dp) EGS,a(0:M),b(1:M),alpha(0:M),beta(1:M),eigval(0:M)
  REAL(dp) init(N)
  REAL(dp), allocatable :: x1(:),x2(:),y(:)
  optional init,a,b

   allocate(x1(N),x2(N),y(N))

   if (present(init)) then
    call vecOMP_copy_REAL(init,x1)
   else
    call rndvec_REAL(x1)
   end if

   call Hmul(x1,y)
   call vecOMP_dot_REAL(x1,y,alpha(0))

   do j=0,M-1
    if (mod_lanczos_verbose>0) call stopwatch(1)

    if (j==0) call vecOMP_add_lincomb_REAL(y,-alpha(j)  ,x1)
    if (j>=1) call vecOMP_add_lincomb2_REAL(y,-alpha(j)  ,x1,-beta(j)  ,x2)
    call vecOMP_copy_REAL(x1,x2)
    call vecOMP_normalize_outofplace_REAL(y,x1,beta(j+1))
    call Hmul(x1,y)
    call vecOMP_dot_REAL(x1,y,alpha(j+1))

    if (mod_lanczos_verbose>0) then
     write(msg,fmt="(a,i5,a,i5)") " Lanczos iteration ",j,"/",M-1
     call stopwatch(2,msg)
    end if
   end do

   call eigen_RST(M+1,alpha,beta,eigval)

   EGS=eigval(0)

   if (present(a)) a=alpha
   if (present(b)) b=beta

   deallocate(x1,x2,y)

 end subroutine

 subroutine eigen_Lanczos2_REAL(Hmul,N,Mmin,Mmax,EGS,psiGS,acc,Mval,init,a,b,Emax)
  interface
   subroutine Hmul(x,y)
    REAL(8) x(:),y(:) 
   end subroutine
  end interface
  integer(base_kind) N
  integer Mmin,Mmax,Mval,M,j
  real(dp) EGS,acc,a(0:Mmax),b(1:Mmax),Emax, &
   alpha(0:Mmax),beta(1:Mmax),overlap(Mmax)
  real(dp), allocatable :: eigval(:),eigvec(:,:)
  REAL(dp) psiGS(N),init(N)
  REAL(dp), allocatable :: psi(:,:),y(:)
  optional acc,Mval,init,a,b,Emax

   allocate(psi(N,0:Mmax),y(N))

   if (present(init)) then
    call vecOMP_copy_REAL(init,psi(:,0))
   else
    call rndvec_REAL(psi(:,0))
   end if

   call Hmul(psi(:,0),y)
   call vecOMP_dot_REAL(psi(:,0),y,alpha(0))

   do j=0,Mmax-1
    if (mod_lanczos_verbose>0) call stopwatch(1)

    if (j==0) call vecOMP_add_lincomb_REAL(y,-alpha(j),psi(:,j))
    if (j>=1) call vecOMP_add_lincomb2_REAL(y,-alpha(j),psi(:,j),-beta(j),psi(:,j-1))
    call vecOMP_normalize_outofplace_REAL(y,psi(:,j+1),beta(j+1))
    call Hmul(psi(:,j+1),y)
    call vecOMP_dot_REAL(psi(:,j+1),y,alpha(j+1))
    M=j+1
    call vecOMP_dot_REAL(psi(:,0),psi(:,M),overlap(M))

    if (mod_lanczos_verbose>0) then
     write(msg,fmt="(a,i5,a,i5)") " Lanczos iteration ",j,"/",M-1
     call stopwatch(2,msg)
    end if

    if (M>=Mmin .and. test_cnvg(M,overlap(1:M))) exit
   end do

   allocate(eigval(0:M),eigvec(0:M,0:M))
   call eigen_RST(M+1,alpha(0:M),beta(1:M),eigval,eigvec)

   EGS=eigval(0)

   if (mod_lanczos_verbose>0) call stopwatch(1," combining Lanczos vectors...")
   psiGS=0d0
   do j=0,M
    call vecOMP_add_lincomb_REAL(psiGS,eigvec(j,0),psi(:,j))
   end do
   call vecOMP_normalize_REAL(psiGS)
   if (mod_lanczos_verbose>0) call stopwatch(2)

   if (present(a)) a=alpha
   if (present(b)) b=beta
   if (present(Emax)) Emax=eigval(M)

   if (present(acc)) then
    call Hmul(psiGS,y)
    call vecOMP_avgabsdiff_REAL(EGS,psiGS,y,acc)
   end if

   if (present(Mval)) Mval=M

   deallocate(psi,y,eigval,eigvec)

 end subroutine

 subroutine eigen_Lanczos3_REAL(Hmul,N,Mmin,Mmax,EGS,psiGS,acc,Mval,init,a,b,Emax)
  interface
   subroutine Hmul(x,y)
    REAL(8) x(:),y(:) 
   end subroutine
  end interface
  integer(base_kind) N
  integer Mmin,Mmax,Mval,M,j
  real(dp) EGS,acc,a(0:Mmax),b(1:Mmax),Emax, &
   alpha(0:Mmax),beta(1:Mmax),overlap(Mmax)
  real(dp), allocatable :: eigval(:),eigvec(:,:)
  REAL(dp) psiGS(N),init(N)
  REAL(dp), allocatable :: x1(:),x2(:),y(:)
  optional acc,Mval,init,a,b,Emax

   allocate(x1(N),x2(N),y(N))

   if (present(init)) then
    call vecOMP_copy_REAL(init,x1)
   else
    call rndvec_REAL(x1)
   end if
   call vecOMP_copy_REAL(x1,psiGS) 

   call Hmul(x1,y)
   call vecOMP_dot_REAL(x1,y,alpha(0))

   do j=0,Mmax-1
    if (mod_lanczos_verbose>0) call stopwatch(1)

    if (j==0) call vecOMP_add_lincomb_REAL(y,-alpha(j),x1)
    if (j>=1) call vecOMP_add_lincomb2_REAL(y,-alpha(j),x1,-beta(j),x2)
    call vecOMP_copy_REAL(x1,x2)
    call vecOMP_normalize_outofplace_REAL(y,x1,beta(j+1))
    call Hmul(x1,y)
    call vecOMP_dot_REAL(x1,y,alpha(j+1))
    M=j+1
    call vecOMP_dot_REAL(psiGS,x1,overlap(M))

    if (mod_lanczos_verbose>0) then
     write(msg,fmt="(a,i5,a,i5)") " Lanczos iteration A ",j,"/",Mmax-1
     call stopwatch(2,msg)
    end if

    if (M>=Mmin .and. test_cnvg(M,overlap(1:M))) exit
   end do

   allocate(eigval(0:M),eigvec(0:M,0:M))
   call eigen_RST(M+1,alpha(0:M),beta(1:M),eigval,eigvec)

   EGS=eigval(0)

   if (present(a)) a=alpha
   if (present(b)) b=beta
   if (present(Emax)) Emax=eigval(M)

   call vecOMP_copy_REAL(psiGS,x1)
   psiGS=eigvec(0,0)*x1
   do j=1,M
    if (mod_lanczos_verbose>0) call stopwatch(1)

    call Hmul(x1,y)
    if (j==1) call vecOMP_add_lincomb_REAL(y,-alpha(j-1),x1)
    if (j>=2) call vecOMP_add_lincomb2_REAL(y,-alpha(j-1),x1,-beta(j-1),x2)
    call vecOMP_copy_REAL(x1,x2)
    call vecOMP_scale_outofplace_REAL(1/beta(j),y,x1)
    call vecOMP_add_lincomb_REAL(psiGS,eigvec(j,0),x1)

    if (mod_lanczos_verbose>0) then
     write(msg,fmt="(a,i5,a,i5)") " Lanczos iteration B ",j,"/",M
     call stopwatch(2,msg)
    end if
   end do
   call vecOMP_normalize_REAL(psiGS)

   if (present(acc)) then
    call Hmul(psiGS,y)
    call vecOMP_avgabsdiff_REAL(EGS,psiGS,y,acc)
   end if

   if (present(Mval)) Mval=M

   deallocate(x1,x2,y,eigval,eigvec)

 end subroutine

 subroutine eigen_GSvec_REAL(Hmul,N,Mmin,Mmax,Ndegmax,Ndeg,EGS,psiGS,flag,acc,Mval)
  interface
   subroutine Hmul(x,y)
    REAL(8) x(:),y(:) 
   end subroutine
  end interface
  integer(base_kind) N
  integer Mmin,Mmax,Ndegmax,Ndeg,flag,Mval,M,Msum,i,j,jtry
  real(dp) EGS,acc,accj,EGSaux(Ndegmax),Seig(Ndegmax)
  REAL(dp), allocatable :: psiGS(:,:),psiGSaux(:,:)
  complex(dp), dimension(Ndegmax,Ndegmax) :: Smtx,Svec
  optional acc,Mval

   allocate(psiGSaux(N,Ndegmax))
   if (allocated(psiGS) .and. ubound(psiGS,1)/=N) deallocate(psiGS)
   if (.not.allocated(psiGS)) allocate(psiGS(N,1))

   Ndeg=Ndegmax
   Msum=0
   do j=1,Ndegmax
    if (mod_lanczos_verbose>0) write(*,fmt="(a,i5,a,i5)") " GS vector ",j,"/",Ndegmax
    do jtry=1,3
     if (flag==1) call eigen_Lanczos2_REAL(Hmul,N,Mmin,Mmax,EGSaux(j),psiGSaux(:,j),Mval=M)
     if (flag==2) call eigen_Lanczos3_REAL(Hmul,N,Mmin,Mmax,EGSaux(j),psiGSaux(:,j),Mval=M)
     if (mod_lanczos_degen_reqacc>1d-2) then
      exit 
     else
      call Hmul(psiGSaux(:,j),psiGS(:,1))
      call vecOMP_avgabsdiff_REAL(EGSaux(j),psiGSaux(:,j),psiGS(:,1),acc)
      write(*,*) "new vector acc:",acc
      if (acc<=mod_lanczos_degen_reqacc) exit
     end if
    end do

    Msum=Msum+M

    Smtx(j,j)=dot_product(psiGSaux(:,j),psiGSaux(:,j))
    if (j>1) then
     do i=1,j-1
     Smtx(i,j)=dot_product(psiGSaux(:,i),psiGSaux(:,j))
      Smtx(j,i)=conjg(Smtx(i,j))
     end do
    end if
    call eigen_CH(j,Smtx(1:j,1:j),Seig(1:j))
    do i=1,j
     write(*,*) i,Seig(i)
    end do
    write(*,*)

    if (minval(Seig(1:j))<mod_lanczos_degen_thrs1*maxval(Seig(1:j)) &
        .or. abs(Seig(1))<mod_lanczos_degen_thrs2*abs(Seig(2))) then
     Ndeg=j-1
     exit
    end if
   end do

   if (allocated(psiGS) .and. ubound(psiGS,2)/=Ndeg) deallocate(psiGS)
   if (.not.allocated(psiGS)) allocate(psiGS(N,Ndeg))

   if (Ndeg>1) then
    call eigen_CH(Ndeg,Smtx(1:Ndeg,1:Ndeg),Seig(1:Ndeg),Svec(1:Ndeg,1:Ndeg))
    do j=1,Ndeg
     psiGS(:,j)=0d0
     do i=1,Ndeg
      psiGS(:,j)=psiGS(:,j)+Svec(i,j)*psiGSaux(:,i)
     end do
     call vecOMP_normalize_REAL(psiGS(:,j))
    end do
    EGS=sum(EGSaux(1:Ndeg))/Ndeg
   else
    call vecOMP_copy_REAL(psiGSaux(:,1),psiGS(:,1))
    EGS=EGSaux(1)
   end if

   if (present(Mval)) Mval=Msum/min(Ndeg+1,Ndegmax)

   if (present(acc)) then
    acc=0d0
    do j=1,Ndeg
     call Hmul(psiGS(:,j),psiGSaux(:,j))
     call vecOMP_avgabsdiff_REAL(EGS,psiGS(:,j),psiGSaux(:,j),accj)
     acc=max(acc,accj)
    end do
   end if

   deallocate(psiGSaux)

 end subroutine

 subroutine iter_Lanczos1_complex(Hmul,N,M,init,alpha,beta,tmpfile)
  interface
   subroutine Hmul(x,y)
    complex(8) x(:),y(:) 
   end subroutine
  end interface
  integer(base_kind) N
  integer M,j,fid
  complex(dp) init(N)
  real(dp) alpha(0:M),beta(1:M)
  complex(dp), allocatable :: x1(:),x2(:),y(:)
  character(len=*), optional :: tmpfile 

   allocate(x1(N),x2(N),y(N))

   call vecOMP_copy_complex(init,x1)

   call Hmul(x1,y)
   call vecOMP_dot_complex(x1,y,alpha(0))

   do j=0,M-1
    if (mod_lanczos_verbose>0) call stopwatch(1)

    if (j==0) call vecOMP_add_lincomb_complex(y,-alpha(j)  ,x1)
    if (j>=1) call vecOMP_add_lincomb2_complex(y,-alpha(j)  ,x1,-beta(j)  ,x2)
    call vecOMP_copy_complex(x1,x2)
    call vecOMP_normalize_outofplace_complex(y,x1,beta(j+1))
    call Hmul(x1,y)
    call vecOMP_dot_complex(x1,y,alpha(j+1))

    if (present(tmpfile)) then
     open(newunit=fid,file=tmpfile,form="unformatted")
     write(fid) M,j,alpha,beta
     close(fid)
    end if

    if (mod_lanczos_verbose>0) then
     write(msg,fmt="(a,i5,a,i5)") " Lanczos iteration ",j,"/",M-1
     call stopwatch(2,msg)
    end if
   end do

   deallocate(x1,x2,y)

 end subroutine

 subroutine iter_Lanczos2_complex(Hmul,N,M,init,alpha,beta,psi)
  interface
   subroutine Hmul(x,y)
    complex(8) x(:),y(:) 
   end subroutine
  end interface
  integer(base_kind) N
  integer M,j
  complex(dp) init(N),psi(N,0:M)
  real(dp) alpha(0:M),beta(1:M)
  complex(dp), allocatable :: y(:)

   allocate(y(N))

   call vecOMP_copy_complex(init,psi(:,0))

   call Hmul(psi(:,0),y)
   call vecOMP_dot_complex(psi(:,0),y,alpha(0))

   do j=0,M-1
    if (mod_lanczos_verbose>0) call stopwatch(1)

    if (j==0) call vecOMP_add_lincomb_complex(y,-alpha(j)  ,psi(:,j))
    if (j>=1) call vecOMP_add_lincomb2_complex(y,-alpha(j)  ,psi(:,j),-beta(j)  ,psi(:,j-1))
    call vecOMP_normalize_outofplace_complex(y,psi(:,j+1),beta(j+1))
    call Hmul(psi(:,j+1),y)
    call vecOMP_dot_complex(psi(:,j+1),y,alpha(j+1))

    if (mod_lanczos_verbose>0) then
     write(msg,fmt="(a,i5,a,i5)") " Lanczos iteration ",j,"/",M-1
     call stopwatch(2,msg)
    end if
   end do

   deallocate(y)

 end subroutine

 subroutine eigen_Lanczos1_complex(Hmul,N,M,EGS,init,a,b)
  interface
   subroutine Hmul(x,y)
    complex(8) x(:),y(:) 
   end subroutine
  end interface
  integer(base_kind) N
  integer M,j
  real(dp) EGS,a(0:M),b(1:M),alpha(0:M),beta(1:M),eigval(0:M)
  complex(dp) init(N)
  complex(dp), allocatable :: x1(:),x2(:),y(:)
  optional init,a,b

   allocate(x1(N),x2(N),y(N))

   if (present(init)) then
    call vecOMP_copy_complex(init,x1)
   else
    call rndvec_complex(x1)
   end if

   call Hmul(x1,y)
   call vecOMP_dot_complex(x1,y,alpha(0))

   do j=0,M-1
    if (mod_lanczos_verbose>0) call stopwatch(1)

    if (j==0) call vecOMP_add_lincomb_complex(y,-alpha(j)  ,x1)
    if (j>=1) call vecOMP_add_lincomb2_complex(y,-alpha(j)  ,x1,-beta(j)  ,x2)
    call vecOMP_copy_complex(x1,x2)
    call vecOMP_normalize_outofplace_complex(y,x1,beta(j+1))
    call Hmul(x1,y)
    call vecOMP_dot_complex(x1,y,alpha(j+1))

    if (mod_lanczos_verbose>0) then
     write(msg,fmt="(a,i5,a,i5)") " Lanczos iteration ",j,"/",M-1
     call stopwatch(2,msg)
    end if
   end do

   call eigen_RST(M+1,alpha,beta,eigval)

   EGS=eigval(0)

   if (present(a)) a=alpha
   if (present(b)) b=beta

   deallocate(x1,x2,y)

 end subroutine

 subroutine eigen_Lanczos2_complex(Hmul,N,Mmin,Mmax,EGS,psiGS,acc,Mval,init,a,b,Emax)
  interface
   subroutine Hmul(x,y)
    complex(8) x(:),y(:) 
   end subroutine
  end interface
  integer(base_kind) N
  integer Mmin,Mmax,Mval,M,j
  real(dp) EGS,acc,a(0:Mmax),b(1:Mmax),Emax, &
   alpha(0:Mmax),beta(1:Mmax),overlap(Mmax)
  real(dp), allocatable :: eigval(:),eigvec(:,:)
  complex(dp) psiGS(N),init(N)
  complex(dp), allocatable :: psi(:,:),y(:)
  optional acc,Mval,init,a,b,Emax

   allocate(psi(N,0:Mmax),y(N))

   if (present(init)) then
    call vecOMP_copy_complex(init,psi(:,0))
   else
    call rndvec_complex(psi(:,0))
   end if

   call Hmul(psi(:,0),y)
   call vecOMP_dot_complex(psi(:,0),y,alpha(0))

   do j=0,Mmax-1
    if (mod_lanczos_verbose>0) call stopwatch(1)

    if (j==0) call vecOMP_add_lincomb_complex(y,-alpha(j),psi(:,j))
    if (j>=1) call vecOMP_add_lincomb2_complex(y,-alpha(j),psi(:,j),-beta(j),psi(:,j-1))
    call vecOMP_normalize_outofplace_complex(y,psi(:,j+1),beta(j+1))
    call Hmul(psi(:,j+1),y)
    call vecOMP_dot_complex(psi(:,j+1),y,alpha(j+1))
    M=j+1
    call vecOMP_dot_complex(psi(:,0),psi(:,M),overlap(M))

    if (mod_lanczos_verbose>0) then
     write(msg,fmt="(a,i5,a,i5)") " Lanczos iteration ",j,"/",M-1
     call stopwatch(2,msg)
    end if

    if (M>=Mmin .and. test_cnvg(M,overlap(1:M))) exit
   end do

   allocate(eigval(0:M),eigvec(0:M,0:M))
   call eigen_RST(M+1,alpha(0:M),beta(1:M),eigval,eigvec)

   EGS=eigval(0)

   if (mod_lanczos_verbose>0) call stopwatch(1," combining Lanczos vectors...")
   psiGS=0d0
   do j=0,M
    call vecOMP_add_lincomb_complex(psiGS,eigvec(j,0),psi(:,j))
   end do
   call vecOMP_normalize_complex(psiGS)
   if (mod_lanczos_verbose>0) call stopwatch(2)

   if (present(a)) a=alpha
   if (present(b)) b=beta
   if (present(Emax)) Emax=eigval(M)

   if (present(acc)) then
    call Hmul(psiGS,y)
    call vecOMP_avgabsdiff_complex(EGS,psiGS,y,acc)
   end if

   if (present(Mval)) Mval=M

   deallocate(psi,y,eigval,eigvec)

 end subroutine

 subroutine eigen_Lanczos3_complex(Hmul,N,Mmin,Mmax,EGS,psiGS,acc,Mval,init,a,b,Emax)
  interface
   subroutine Hmul(x,y)
    complex(8) x(:),y(:) 
   end subroutine
  end interface
  integer(base_kind) N
  integer Mmin,Mmax,Mval,M,j
  real(dp) EGS,acc,a(0:Mmax),b(1:Mmax),Emax, &
   alpha(0:Mmax),beta(1:Mmax),overlap(Mmax)
  real(dp), allocatable :: eigval(:),eigvec(:,:)
  complex(dp) psiGS(N),init(N)
  complex(dp), allocatable :: x1(:),x2(:),y(:)
  optional acc,Mval,init,a,b,Emax

   allocate(x1(N),x2(N),y(N))

   if (present(init)) then
    call vecOMP_copy_complex(init,x1)
   else
    call rndvec_complex(x1)
   end if
   call vecOMP_copy_complex(x1,psiGS) 

   call Hmul(x1,y)
   call vecOMP_dot_complex(x1,y,alpha(0))

   do j=0,Mmax-1
    if (mod_lanczos_verbose>0) call stopwatch(1)

    if (j==0) call vecOMP_add_lincomb_complex(y,-alpha(j),x1)
    if (j>=1) call vecOMP_add_lincomb2_complex(y,-alpha(j),x1,-beta(j),x2)
    call vecOMP_copy_complex(x1,x2)
    call vecOMP_normalize_outofplace_complex(y,x1,beta(j+1))
    call Hmul(x1,y)
    call vecOMP_dot_complex(x1,y,alpha(j+1))
    M=j+1
    call vecOMP_dot_complex(psiGS,x1,overlap(M))

    if (mod_lanczos_verbose>0) then
     write(msg,fmt="(a,i5,a,i5)") " Lanczos iteration A ",j,"/",Mmax-1
     call stopwatch(2,msg)
    end if

    if (M>=Mmin .and. test_cnvg(M,overlap(1:M))) exit
   end do

   allocate(eigval(0:M),eigvec(0:M,0:M))
   call eigen_RST(M+1,alpha(0:M),beta(1:M),eigval,eigvec)

   EGS=eigval(0)

   if (present(a)) a=alpha
   if (present(b)) b=beta
   if (present(Emax)) Emax=eigval(M)

   call vecOMP_copy_complex(psiGS,x1)
   psiGS=eigvec(0,0)*x1
   do j=1,M
    if (mod_lanczos_verbose>0) call stopwatch(1)

    call Hmul(x1,y)
    if (j==1) call vecOMP_add_lincomb_complex(y,-alpha(j-1),x1)
    if (j>=2) call vecOMP_add_lincomb2_complex(y,-alpha(j-1),x1,-beta(j-1),x2)
    call vecOMP_copy_complex(x1,x2)
    call vecOMP_scale_outofplace_complex(1/beta(j),y,x1)
    call vecOMP_add_lincomb_complex(psiGS,eigvec(j,0),x1)

    if (mod_lanczos_verbose>0) then
     write(msg,fmt="(a,i5,a,i5)") " Lanczos iteration B ",j,"/",M
     call stopwatch(2,msg)
    end if
   end do
   call vecOMP_normalize_complex(psiGS)

   if (present(acc)) then
    call Hmul(psiGS,y)
    call vecOMP_avgabsdiff_complex(EGS,psiGS,y,acc)
   end if

   if (present(Mval)) Mval=M

   deallocate(x1,x2,y,eigval,eigvec)

 end subroutine

 subroutine eigen_GSvec_complex(Hmul,N,Mmin,Mmax,Ndegmax,Ndeg,EGS,psiGS,flag,acc,Mval)
  interface
   subroutine Hmul(x,y)
    complex(8) x(:),y(:) 
   end subroutine
  end interface
  integer(base_kind) N
  integer Mmin,Mmax,Ndegmax,Ndeg,flag,Mval,M,Msum,i,j,jtry
  real(dp) EGS,acc,accj,EGSaux(Ndegmax),Seig(Ndegmax)
  complex(dp), allocatable :: psiGS(:,:),psiGSaux(:,:)
  complex(dp), dimension(Ndegmax,Ndegmax) :: Smtx,Svec
  optional acc,Mval

   allocate(psiGSaux(N,Ndegmax))
   if (allocated(psiGS) .and. ubound(psiGS,1)/=N) deallocate(psiGS)
   if (.not.allocated(psiGS)) allocate(psiGS(N,1))

   Ndeg=Ndegmax
   Msum=0
   do j=1,Ndegmax
    if (mod_lanczos_verbose>0) write(*,fmt="(a,i5,a,i5)") " GS vector ",j,"/",Ndegmax
    do jtry=1,3
     if (flag==1) call eigen_Lanczos2_complex(Hmul,N,Mmin,Mmax,EGSaux(j),psiGSaux(:,j),Mval=M)
     if (flag==2) call eigen_Lanczos3_complex(Hmul,N,Mmin,Mmax,EGSaux(j),psiGSaux(:,j),Mval=M)
     if (mod_lanczos_degen_reqacc>1d-2) then
      exit 
     else
      call Hmul(psiGSaux(:,j),psiGS(:,1))
      call vecOMP_avgabsdiff_complex(EGSaux(j),psiGSaux(:,j),psiGS(:,1),acc)
      write(*,*) "new vector acc:",acc
      if (acc<=mod_lanczos_degen_reqacc) exit
     end if
    end do

    Msum=Msum+M

    Smtx(j,j)=dot_product(psiGSaux(:,j),psiGSaux(:,j))
    if (j>1) then
     do i=1,j-1
     Smtx(i,j)=dot_product(psiGSaux(:,i),psiGSaux(:,j))
      Smtx(j,i)=conjg(Smtx(i,j))
     end do
    end if
    call eigen_CH(j,Smtx(1:j,1:j),Seig(1:j))
    do i=1,j
     write(*,*) i,Seig(i)
    end do
    write(*,*)

    if (minval(Seig(1:j))<mod_lanczos_degen_thrs1*maxval(Seig(1:j)) &
        .or. abs(Seig(1))<mod_lanczos_degen_thrs2*abs(Seig(2))) then
     Ndeg=j-1
     exit
    end if
   end do

   if (allocated(psiGS) .and. ubound(psiGS,2)/=Ndeg) deallocate(psiGS)
   if (.not.allocated(psiGS)) allocate(psiGS(N,Ndeg))

   if (Ndeg>1) then
    call eigen_CH(Ndeg,Smtx(1:Ndeg,1:Ndeg),Seig(1:Ndeg),Svec(1:Ndeg,1:Ndeg))
    do j=1,Ndeg
     psiGS(:,j)=0d0
     do i=1,Ndeg
      psiGS(:,j)=psiGS(:,j)+Svec(i,j)*psiGSaux(:,i)
     end do
     call vecOMP_normalize_complex(psiGS(:,j))
    end do
    EGS=sum(EGSaux(1:Ndeg))/Ndeg
   else
    call vecOMP_copy_complex(psiGSaux(:,1),psiGS(:,1))
    EGS=EGSaux(1)
   end if

   if (present(Mval)) Mval=Msum/min(Ndeg+1,Ndegmax)

   if (present(acc)) then
    acc=0d0
    do j=1,Ndeg
     call Hmul(psiGS(:,j),psiGSaux(:,j))
     call vecOMP_avgabsdiff_complex(EGS,psiGS(:,j),psiGSaux(:,j),accj)
     acc=max(acc,accj)
    end do
   end if

   deallocate(psiGSaux)

 end subroutine

 subroutine broaden(flag,Nw,w1,w2,Emin,Emax,peaktype,HWHM,NFT,wmin,chiaux,chi)
  integer flag,Nw,peaktype,NFT,j,j1,j2
  real(dp) w1,w2,Emin,Emax,HWHM,chi(Nw),wmin,wmax,dw,gval
  real(dp), allocatable :: chiaux(:)
  complex(dp), allocatable :: chiauxFT(:)

   dw=abs(w2-w1)/(Nw-1)

   select case(flag)
   case(1)

    wmin=min(w1,w2,Emin)-10*HWHM
    wmax=max(w1,w2,Emax)+10*HWHM

    NFT=1
    do j=1,20
     NFT=NFT*2
     if (dw*NFT >= wmax-wmin) exit
    end do
    allocate(chiaux(0:NFT-1))

    wmin=wmin-(dw*NFT-wmax+wmin)/2
    wmin=dw*nint((wmin-w1)/dw)+w1

   case(2)

    allocate(chiauxFT(0:NFT/2))

    if (peaktype>0) then
     call forwRFFT(NFT,chiaux,chiauxFT) 
     do j=0,NFT/2
      if (peaktype==1) gval=exp(-HWHM*j*2*pi/(dw*NFT))
      if (peaktype==2) gval=exp(-0.25d0/log(2d0)*(HWHM*j*2*pi/(dw*NFT))**2)
      chiauxFT(j)=chiauxFT(j)*gval
     end do
     call backRFFT(NFT,chiaux,chiauxFT) 
    end if

    j1=nint((w1-wmin)/dw)
    j2=nint((w2-wmin)/dw)
    if (j2>j1) chi=chiaux(j1:j2)
    if (j2<j1) chi=chiaux(j1:j2:-1)

   end select

 end subroutine

 function voigt(x,x0,fwhmL,fwhmG)
  real(dp) x,x0,fwhmL,fwhmG,GammaL,GammaG
  complex(dp) z1,z2,wz1,wz2,voigt

   GammaL=2*fwhmL/2
   GammaG=sqrt(2d0)*fwhmG/(2*sqrt(2*log(2d0)))

   z1=cmplx(x-x0,GammaL/2,dp)/GammaG
   z2=cmplx(x+x0,GammaL/2,dp)/GammaG
   call humlicek(aimag(z1),real(z1),wz1)
   call humlicek(aimag(z2),real(z2),wz2)

   if (mod_lanczos_include_negative_w==1) then
    voigt=(0d0,1d0)*(wz1-wz2)/(GammaG*sqrt(pi))
   else
    voigt=(0d0,1d0)*(wz1)/(GammaG*sqrt(pi))
   end if

 end function

 subroutine humlicek(a,v,W)
  real(dp) a,v,s
  complex(dp) W,z,u

   z=cmplx(a,-v,dp)
   s=abs(v)+a

   if (s >= 15d0) then
    W=(z*0.5641896d0)/(0.5d0+z*z)
   else if (s>=5.5d0) then
    u=z*z
    W=(z*(1.410474d0+u*0.5641896d0))/(0.75d0+(u*(3d0+u)))
   else if (a>=0.195d0*abs(v)-0.176d0) then
    W=(16.4955d0+z*(20.20933d0+z*(11.96482d0+z*(3.778987d0+0.5642236d0*z)))) / &
      (16.4955d0+z*(38.82363d0+z*(39.27121d0+z*(21.69274d0+z*(6.699398d0+z)))))
   else
    u=z*z
    W=exp(u)-(z*(36183.31d0-u*(3321.99d0-u*(1540.787d0-u*(219.031d0-u*(35.7668d0-u*(1.320522d0-u*0.56419d0)))))) / &
             (32066.6d0-u*(24322.84d0-u*(9022.228d0-u*(2186.181d0-u*(364.2191d0-u*(61.57037d0-u*(1.841439d0-u))))))))
   endif

 end subroutine

 subroutine resol_Lanczos1(M,alpha,beta,norm2,EGS,Nw,w1,w2,delta,imresol)
  integer M,Nw,j,n
  real(dp) alpha(0:M),beta(1:M),norm2,EGS,w1,w2,delta,imresol(Nw)
  complex(dp) z,R

   do j=1,Nw
    z=cmplx((w1*(Nw-j)+w2*(j-1))/(Nw-1)+EGS,delta,dp)

    R=1/(z-alpha(M))
    do n=M-1,0,-1
     R=1/(z-alpha(n)-beta(n+1)**2*R)
    end do

    imresol(j)=-aimag(R)*norm2
   end do

 end subroutine

 subroutine resol_Lanczos2(M,alpha,beta,norm2,EGS,Nw,w1,w2,peaktype,HWHM,chi)
  integer M,Nw,peaktype
  real(dp) alpha(0:M),beta(1:M),norm2,EGS,w1,w2,HWHM,chi(Nw)
  real(dp) delta_pos(M+1),delta_wght(M+1)

   call resol_Lanczos2A(M,alpha,beta,norm2,EGS,delta_pos,delta_wght)
   call resol_Lanczos2B(M,delta_pos,delta_wght,Nw,w1,w2,peaktype,HWHM,chi)

 end subroutine

 subroutine resol_Lanczos2A(M,alpha,beta,norm2,EGS,delta_pos,delta_wght)
  integer M,j
  real(dp) alpha(0:M),beta(1:M),norm2,EGS, &
   delta_pos(M+1),delta_wght(M+1),eigval(0:M)
  real(dp), allocatable :: eigvec(:,:)

   allocate(eigvec(0:M,0:M))

   call eigen_RST(M+1,alpha,beta,eigval,eigvec)

   do j=0,M
    delta_pos(j+1)=eigval(j)-EGS
    delta_wght(j+1)=norm2*pi*eigvec(0,j)**2
   end do

 end subroutine

 subroutine resol_Lanczos2B(M,delta_pos,delta_wght,Nw,w1,w2,peaktype,HWHM,chi)
  integer M,Nw,peaktype,NFT,j,jw
  real(dp) delta_pos(M+1),delta_wght(M+1),w1,w2,HWHM,chi(Nw),dw,wmin,pos,wght
  real(dp), allocatable :: chiaux(:)

   call broaden(1,Nw,w1,w2,delta_pos(1), &
    delta_pos(M+1),peaktype,HWHM,NFT,wmin,chiaux,chi)

   dw=abs(w2-w1)/(Nw-1)

   chiaux=0d0
   do j=1,M+1
    pos=delta_pos(j)-wmin
    wght=delta_wght(j)

    jw=int(pos/dw)
    pos=pos-dw*jw

    chiaux(jw)=chiaux(jw)+(1-pos)*wght/dw
    chiaux(jw+1)=chiaux(jw+1)+pos*wght/dw
   end do

   call broaden(2,Nw,w1,w2,delta_pos(1), &
    delta_pos(M+1),peaktype,HWHM,NFT,wmin,chiaux,chi)

   deallocate(chiaux)

 end subroutine

 subroutine resol_Lanczos3(M,alpha,beta,norm2,EGS,Nw,w1,w2,peaktype,HWHM,chi,fwhmL,fwhmG)
  integer M,Nw,peaktype
  real(dp) alpha(0:M),beta(1:M),norm2,EGS,w1,w2,HWHM,fwhmL,fwhmG
  real(dp) delta_pos(M+1),delta_wght(M+1)
  complex(dp) chi(Nw)
  optional fwhmL,fwhmG

   call resol_Lanczos2A(M,alpha,beta,norm2,EGS,delta_pos,delta_wght)
   if (peaktype<=2) then
    call resol_Lanczos3B(M,delta_pos,delta_wght,Nw,w1,w2,peaktype,HWHM,chi)
   else
    if (present(fwhmL).and.present(fwhmG)) then
     call resol_Lanczos3B(M,delta_pos,delta_wght,Nw,w1,w2,peaktype,HWHM,chi,fwhmL_in=fwhmL,fwhmG_in=fwhmG)
    else
     call resol_Lanczos3B(M,delta_pos,delta_wght,Nw,w1,w2,peaktype,HWHM,chi,fwhmL_in=HWHM,fwhmG_in=HWHM)
    end if
   end if

 end subroutine

 subroutine resol_Lanczos3B(M,delta_pos,delta_wght,Nw,w1,w2,peaktype,HWHM,chi,fwhmL_in,fwhmG_in)
  integer M,Nw,peaktype,j,jw
  real(dp) delta_pos(M+1),delta_wght(M+1),w1,w2,HWHM,fwhmL_in,fwhmG_in,w,fwhmL,fwhmG
  real(dp), allocatable :: chiaux(:)
  complex(dp) chi(Nw)
  optional fwhmL_in,fwhmG_in 

   select case(peaktype)
   case(1)
    fwhmL=2*HWHM
    fwhmG=1d-3*fwhmL
   case(2)
    fwhmG=2*HWHM
    fwhmL=1d-3*fwhmG
   case(3)
    fwhmL=fwhmL_in
    fwhmG=fwhmG_in
   end select

   do jw=1,Nw
    w=(w1*(Nw-jw)+w2*(jw-1))/(Nw-1)
    chi(jw)=0
    do j=1,M+1
     chi(jw)=chi(jw)+delta_wght(j)*voigt(w,delta_pos(j),fwhmL,fwhmG)
    end do
   end do

 end subroutine

end module

module mod_Hubb_eg
use mod_comb
use mod_common
use mod_lanczos
implicit none

integer Nsite,Nbnd
real(dp), allocatable, dimension(:,:) :: Rvec
integer, allocatable, dimension(:) :: j1bnd,j2bnd,bndtype 

real(dp) :: Ud=0, JH=0, Delta=0, t=0, Bfield=0

integer nup_H,ndn_H

character(len=80) selops_lbl(50) 
logical selops_active(50)

contains

 subroutine load_cluster(fname)
  character(len=*) fname
  integer i,s,b

   if (allocated(Rvec)) deallocate(Rvec,j1bnd,j2bnd,bndtype)

   open(1,file=fname)

   read(1,*) Nsite
   allocate(Rvec(2,Nsite))
   do i=1,Nsite
    read(1,*) s,Rvec(:,i)
   end do

   read(1,*) Nbnd
   allocate(j1bnd(Nbnd),j2bnd(Nbnd),bndtype(Nbnd))
   do i=1,Nbnd
    read(1,*) b,j1bnd(i),j2bnd(i),bndtype(i)
   end do

   close(1)

 end subroutine

 subroutine Hmul(x,y)
  real(dp) x(:),y(:)

   call apply_Hamiltonian(nup_H,ndn_H,x,y)

 end subroutine

 subroutine apply_Hamiltonian(nup,ndn,psi,Hpsi)
  integer nup,ndn,Ndim1,Ndim2,jb,jb1,jb2,b1,b2,jsite,nxup,nxdn,nzup,nzdn,mask, jbond, b1new,b2new,jb1new,jb2new,jbnew
  real(dp) psi(:),Hpsi(:),Hdiag,Hval
  integer, allocatable, dimension(:) :: basis1,basis2
  integer, dimension(0:2**nbitsL-1) :: jL1,jL2
  integer, dimension(0:2**nbitsU-1) :: jU1,jU2

  integer Nelem,jelem
  integer, allocatable :: hopjb_ini(:),hopjb_fin(:),hopjb_offs(:)
  real(dp), allocatable :: hop_tval(:)

   call generate_basis_Lin_tables(2*Nsite,nup,Ndim1,basis1,jL1,jU1)
   call generate_basis_Lin_tables(2*Nsite,ndn,Ndim2,basis2,jL2,jU2)

   do jb1=1,Ndim1
    do jb2=1,Ndim2
     jb=Ndim2*(jb1-1)+jb2
     Hpsi(jb)=0 

     b1=basis1(jb1)
     b2=basis2(jb2)

     Hdiag=0

     do jsite=1,Nsite

      nxup=0; if (btest(b1,2*jsite-1)) nxup=1
      nzup=0; if (btest(b1,2*jsite-2)) nzup=1
      nxdn=0; if (btest(b2,2*jsite-1)) nxdn=1
      nzdn=0; if (btest(b2,2*jsite-2)) nzdn=1

      Hdiag=Hdiag + Ud*(nxup*nxdn + nzup*nzdn)        &
                  + (Ud-3*JH)*(nxup*nzup + nxdn*nzdn) &
                  + (Ud-2*JH)*(nxup*nzdn + nxdn*nzup) &
                  + Delta/2 * (nxup + nxdn - nzup - nzdn) &
                  - Bfield * (nxup + nzup - nxdn - nzdn)/2

      if ( (btest(b1,2*jsite-1).neqv.btest(b1,2*jsite-2)) .and. &
           (btest(b2,2*jsite-1).neqv.btest(b2,2*jsite-2)) ) then

       mask=ibset(ibset(0,2*jsite-1),2*jsite-2)
       b1new=ieor(b1,mask)
       b2new=ieor(b2,mask)

       jb1new=jL1(ibits(b1new,0,nbitsL))+jU1(ibits(b1new,nbitsL,nbitsU))
       jb2new=jL2(ibits(b2new,0,nbitsL))+jU2(ibits(b2new,nbitsL,nbitsU))
       jbnew=Ndim2*(jb1new-1)+jb2new

       Hpsi(jb)=Hpsi(jb)+JH*psi(jbnew)
      end if
     end do

     Hpsi(jb)=Hpsi(jb)+Hdiag*psi(jb)

    end do
   end do

   call gen_Hhop(Ndim1,basis1,jL1,jU1)

   do jb1=1,Ndim1
    do jelem=hopjb_offs(jb1)+1,hopjb_offs(jb1+1)
     jb1new=hopjb_fin(jelem)
     Hval=hop_tval(jelem)
     do jb2=1,Ndim2
      jb=Ndim2*(jb1-1)+jb2
      jbnew=Ndim2*(jb1new-1)+jb2
      Hpsi(jb)=Hpsi(jb)+Hval*psi(jbnew)
     end do
    end do
   end do

   call gen_Hhop(Ndim2,basis2,jL2,jU2)

   do jb1=1,Ndim1
    do jelem=1,Nelem
     jb2=hopjb_ini(jelem)
     jb2new=hopjb_fin(jelem)
     Hval=hop_tval(jelem)
     jb=Ndim2*(jb1-1)+jb2
     jbnew=Ndim2*(jb1-1)+jb2new
     Hpsi(jb)=Hpsi(jb)+Hval*psi(jbnew)
    end do
   end do

 contains

  subroutine gen_Hhop(Ndim,basis,jL,jU)
   integer Ndim,basis(Ndim),jL(0:2**nbitsL-1),jU(0:2**nbitsU-1)
   integer round,jbond,i,j,sgn,jb,jbnew,b,bnew, term,pos1,pos2,popbeg,poplen
   real(dp) val

    do round=1,2

     if (round==2) then
      if (allocated(hop_tval)) deallocate(hopjb_ini,hopjb_fin,hop_tval,hopjb_offs)
      allocate(hopjb_ini(Nelem),hopjb_fin(Nelem),hop_tval(Nelem),hopjb_offs(Ndim+1))
     end if

     Nelem=0

     do jb=1,Ndim
      if (round==2) hopjb_offs(jb)=Nelem

      b=basis(jb)

      do jbond=1,Nbnd

       i=min(j1bnd(jbond),j2bnd(jbond))
       j=max(j1bnd(jbond),j2bnd(jbond))
       if (bndtype(jbond)==0) sgn=-1 
       if (bndtype(jbond)==1) sgn=+1 

       do term=1,8
        select case(term)
         case(1); pos1=2*i-2; pos2=2*j-2; val=1; popbeg=2*i-1; poplen=2*(j-i)-1 
         case(2); pos1=2*j-2; pos2=2*i-2; val=1; popbeg=2*i-1; poplen=2*(j-i)-1 
         case(3); pos1=2*i-1; pos2=2*j-1; val=3; popbeg=2*i;   poplen=2*(j-i)-1 
         case(4); pos1=2*j-1; pos2=2*i-1; val=3; popbeg=2*i;   poplen=2*(j-i)-1 
         case(5); pos1=2*i-2; pos2=2*j-1; val=sgn*sqrt(3.0_dp); popbeg=2*i-1; poplen=2*(j-i)   
         case(6); pos1=2*i-1; pos2=2*j-2; val=sgn*sqrt(3.0_dp); popbeg=2*i;   poplen=2*(j-i)-2 
         case(7); pos1=2*j-2; pos2=2*i-1; val=sgn*sqrt(3.0_dp); popbeg=2*i;   poplen=2*(j-i)-2 
         case(8); pos1=2*j-1; pos2=2*i-2; val=sgn*sqrt(3.0_dp); popbeg=2*i-1; poplen=2*(j-i)   
        end select

        if (btest(b,pos1) .and. .not.btest(b,pos2)) then
         Nelem=Nelem+1
         if (round==2) then
          bnew=ibset(ibclr(b,pos1),pos2)
          jbnew=jL(ibits(bnew,0,nbitsL))+jU(ibits(bnew,nbitsL,nbitsU))
          hopjb_ini(Nelem)=jb
          hopjb_fin(Nelem)=jbnew
          hop_tval(Nelem)= -t/3*val * (1-2*poppar(ibits(b,popbeg,poplen)))
         end if
        end if

       end do
      end do
     end do
    end do     

    hopjb_offs(Ndim+1)=Nelem

  end subroutine

 end subroutine

 subroutine init_selops
  integer j

   selops_lbl=""

   selops_lbl( 1) = "n  - number of electrons at the selected site (0..4)"
   selops_lbl( 2) = "n↑ - number of ↑ electrons"
   selops_lbl( 3) = "n↓ - number of ↓ electrons"
   selops_lbl( 4) = "nz - number of z electrons"
   selops_lbl( 5) = "nx - number of x electrons"
   selops_lbl( 6) = "P0 - projection to 0 electrons"
   selops_lbl( 7) = "P1 - projection to 1 electrons"
   selops_lbl( 8) = "P2 - projection to 2 electrons"
   selops_lbl( 9) = "P3 - projection to 3 electrons"
   selops_lbl(10) = "P4 - projection to 4 electrons"

   selops_lbl(11) = "local Sz^2"
   selops_lbl(12) = "local Sz - on-site spin z-projection"
   selops_lbl(13) = "Sz.Sz correlations on NN bond"
   selops_lbl(14) = "Sz.Sz correlations on 2nd NN bond"

   selops_lbl(21) = "Ps   - projection to singlet |s> = (z↑+ z↓+)|>"
   selops_lbl(22) = "Psxz - projection to singlet |sxz> = (x↑+ z↓+ - x↓+ z↑+)/sqrt2 |>"
   selops_lbl(23) = "Psxx - projection to singlet |sxx> = (x↑+ x↓+)|>"
   selops_lbl(24) = "PA   - projection to singlet |sA> - exact ionic GS made of zz,xx"
   selops_lbl(25) = "PB   - projection to singlet |sB> - orthogonal combination of zz,xx"

   selops_lbl(26) = "PT(+1) - projection to |T+1> = (x↑+ z↑+)|>"
   selops_lbl(27) = "PT(0)  - projection to |T0>  = (x↑+ z↓+ + x↓+ z↑+)/sqrt2 |>"
   selops_lbl(28) = "PT(-1) - projection to |T-1> = (x↓+ z↓+)|>"
   selops_lbl(29) = "nT"

   selops_lbl(30) = "Pf↑  - projection to |f↑> = (z↑+)|>"
   selops_lbl(31) = "Pf↓  - projection to |f↓> = (z↓+)|>"
   selops_lbl(32) = "Pfx↑ - projection to |fx↑> = (x↑+)|>"
   selops_lbl(33) = "Pfx↓ - projection to |fx↓> = (x↓+)|>"

   selops_active=.false.
   do j=1,size(selops_lbl)
    if (len(trim(selops_lbl(j)))>1) selops_active(j)=.true.
   end do

 end subroutine

 subroutine apply_sel_oper(nup,ndn,psi,oper,Opsi)
  integer nup,ndn,site,oper,site1,site2,j
  real(dp) psi(:),Opsi(:),dist
  real(dp), allocatable :: aux(:)
  logical bond_found

   if (oper==29) then

    allocate(aux(ubound(psi,dim=1)))
    call apply_local_oper(nup,ndn,psi,1,26,Opsi)               
    call apply_local_oper(nup,ndn,psi,1,27,aux); Opsi=Opsi+aux 
    call apply_local_oper(nup,ndn,psi,1,28,aux); Opsi=Opsi+aux 

   else if (oper==13 .or. oper==14) then

    bond_found=.false.
    do j=0,Nsite**2-1
     site1=j/Nsite+1
     site2=mod(j,Nsite)+1
     dist=sqrt(sum( (Rvec(:,site1)-Rvec(:,site2))**2 ))
     if ( (oper==13 .and. abs(dist-1.0      )<1e-3) .or. &
          (oper==14 .and. abs(dist-sqrt(2.0))<1e-3) ) then
      bond_found=.true.
      exit
     end if
    end do

    if (bond_found) then
     allocate(aux(ubound(psi,dim=1)))
     call apply_local_oper(nup,ndn,psi,site1,12,aux) 
     call apply_local_oper(nup,ndn,aux,site2,12,Opsi)
    else
     Opsi=0
    end if

   else

    call apply_local_oper(nup,ndn,psi,1,oper,Opsi)

   end if

 end subroutine

 subroutine apply_local_oper(nup,ndn,psi,site,oper,Opsi,Oavg)
  integer nup,ndn,site,oper
  real(dp) psi(:),Opsi(:),Oavg
  optional Opsi,Oavg

  integer, allocatable, dimension(:) :: basis1,basis2
  integer, dimension(0:2**nbitsL-1) :: jL1,jL2
  integer, dimension(0:2**nbitsU-1) :: jU1,jU2
  integer Ndim1,Ndim2,jb1,jb2,jb,b1,b2, &
   nzup,nzdn,nxup,nxdn,ntot,s1,s2,b1flip,b2flip,jb1flip,jb2flip,jbflip,sgn
  real(dp) val,avg,coef,theta,cthe,sthe

   call generate_basis_Lin_tables(2*Nsite,nup,Ndim1,basis1,jL1,jU1)
   call generate_basis_Lin_tables(2*Nsite,ndn,Ndim2,basis2,jL2,jU2)

   theta=pi/4-atan(Delta/JH)/2
   cthe=cos(theta)
   sthe=sin(theta)

   avg=0

   do jb1=1,Ndim1
    do jb2=1,Ndim2
     jb=Ndim2*(jb1-1)+jb2

     b1=basis1(jb1)
     b2=basis2(jb2)

     if (any(oper==[22,24,25,27])) then

      if (oper==22 .or. oper==27) then

       s1=ibits(b1,2*site-2,2)
       s2=ibits(b2,2*site-2,2)

       if (s1==2 .and. s2==1) then 

        b1flip=ibset(ibclr(b1,2*site-1),2*site-2)
        b2flip=ibset(ibclr(b2,2*site-2),2*site-1)

        jb1flip=jL1(ibits(b1flip,0,nbitsL))+jU1(ibits(b1flip,nbitsL,nbitsU))
        jb2flip=jL2(ibits(b2flip,0,nbitsL))+jU2(ibits(b2flip,nbitsL,nbitsU))
        jbflip=Ndim2*(jb1flip-1)+jb2flip

        if (oper==27) sgn=+1 
        if (oper==22) sgn=-1 

        coef=(psi(jb)-sgn*psi(jbflip))/2 

        if (present(Opsi)) then
         Opsi(jb)=coef
         Opsi(jbflip)=-sgn*coef
        end if

        if (present(Oavg)) then
         avg=avg+coef*(psi(jb)-sgn*psi(jbflip))
        end if

       end if

       if (present(Opsi) .and. &
            .not.((s1==2 .and. s2==1) .or. (s1==1 .and. s2==2))) Opsi(jb)=0 

      end if

      if (oper==24 .or. oper==25) then

       s1=ibits(b1,2*site-2,2)
       s2=ibits(b2,2*site-2,2)

       if (s1==1 .and. s2==1) then 

        b1flip=ibset(ibclr(b1,2*site-2),2*site-1)
        b2flip=ibset(ibclr(b2,2*site-2),2*site-1)

        jb1flip=jL1(ibits(b1flip,0,nbitsL))+jU1(ibits(b1flip,nbitsL,nbitsU))
        jb2flip=jL2(ibits(b2flip,0,nbitsL))+jU2(ibits(b2flip,nbitsL,nbitsU))
        jbflip=Ndim2*(jb1flip-1)+jb2flip

        if (oper==24) then 
         coef=psi(jb)*cthe-psi(jbflip)*sthe

         if (present(Opsi)) then
          Opsi(jb)=coef*cthe
          Opsi(jbflip)=-coef*sthe
         end if

         if (present(Oavg)) avg=avg+coef**2
        end if

        if (oper==25) then
         coef=psi(jb)*sthe+psi(jbflip)*cthe

         if (present(Opsi)) then
          Opsi(jb)=coef*sthe
          Opsi(jbflip)=coef*cthe
         end if

         if (present(Oavg)) avg=avg+coef**2
        end if

       end if

       if (present(Opsi) .and. &
            .not.((s1==1 .and. s2==1) .or. (s1==2 .and. s2==2))) Opsi(jb)=0 

      end if

     else

      val=0

      nzup=0; if (btest(b1,2*site-2)) nzup=1
      nzdn=0; if (btest(b2,2*site-2)) nzdn=1
      nxup=0; if (btest(b1,2*site-1)) nxup=1
      nxdn=0; if (btest(b2,2*site-1)) nxdn=1

      ntot=nzup+nzdn+nxup+nxdn

      s1=ibits(b1,2*site-2,2)
      s2=ibits(b2,2*site-2,2)

      select case(oper)

        case(1); val=ntot       
        case(2); val=nzup+nxup  
        case(3); val=nzdn+nxdn  
        case(4); val=nzup+nzdn  
        case(5); val=nxup+nxdn  

        case( 6); if (ntot==0) val=1  
        case( 7); if (ntot==1) val=1  
        case( 8); if (ntot==2) val=1  
        case( 9); if (ntot==3) val=1  
        case(10); if (ntot==4) val=1  

        case(11); val=0.25_dp*(nzup-nzdn+nxup-nxdn)**2  
        case(12); val=0.5_dp*(nzup-nzdn+nxup-nxdn)      

        case(21); if (s1==1 .and. s2==1) val=1  
        case(23); if (s1==2 .and. s2==2) val=1  
        case(26); if (s1==3 .and. s2==0) val=1  
        case(28); if (s1==0 .and. s2==3) val=1  

        case(30); if (s1==1 .and. s2==0) val=1  
        case(31); if (s1==0 .and. s2==1) val=1  
        case(32); if (s1==2 .and. s2==0) val=1  
        case(33); if (s1==0 .and. s2==2) val=1  

       end select

      if (present(Opsi)) Opsi(jb)=val*psi(jb)
      if (present(Oavg)) avg=avg+val*psi(jb)**2

     end if

    end do
   end do

   if (present(Oavg)) Oavg=avg

 end subroutine

 subroutine eval_corrs_d8_degen(nup,ndn,Ndeg,psi_set,corrmtx)
  integer nup,ndn,Ndeg
  real(dp) psi_set(:,:),corrmtx(Ndeg,Ndeg,6,6,Nsite,6,6,Nsite)

  integer d8(0:3,0:3),Dnup(6,6),Dndn(6,6), &
   bup(6),bdn(6),magic_up(6,6,Nsite),magic_dn(6,6,Nsite),sgn_table(6,6)

  integer, allocatable, dimension(:) :: basis1,basis2
  integer, dimension(0:2**nbitsL-1) :: jL1,jL2
  integer, dimension(0:2**nbitsU-1) :: jU1,jU2

  real(dp), allocatable :: corrmtx_aux(:,:,:,:,:,:,:,:)

  integer Ndim1,Ndim2,jb1,jb2,jb,jb1fin,jb2fin,jbfin, b1ini,b2ini,b1fin,b2fin, &
   site,site1,site2, state(Nsite), alpha,beta,gama,delta, beta_min,beta_max, sgn, i,j

   d8=0
   d8(1,1)=1
   d8(3,0)=2
   d8(2,1)=3
   d8(1,2)=4
   d8(0,3)=5
   d8(2,2)=6

   Dnup=spread([1,2,1,1,0,1],dim=2,ncopies=6)-spread([1,2,1,1,0,1],dim=1,ncopies=6)
   Dndn=spread([1,0,1,1,2,1],dim=2,ncopies=6)-spread([1,0,1,1,2,1],dim=1,ncopies=6)

   bup=[1,3,2,1,0,2]
   bdn=[1,0,1,2,3,2]

   do alpha=1,6
    do beta=1,6
     do site=1,Nsite
      magic_up(alpha,beta,site)=ishft(bup(alpha),2*site-2)-ishft(bup(beta),2*site-2)
      magic_dn(alpha,beta,site)=ishft(bdn(alpha),2*site-2)-ishft(bdn(beta),2*site-2)
     end do
    end do
   end do

   sgn_table(1,:)=[1,3,1,2,3,1]
   sgn_table(2,:)=[4,1,4,3,1,4]
   sgn_table(3,:)=[1,3,1,2,3,1]
   sgn_table(4,:)=[2,4,2,1,4,2]
   sgn_table(5,:)=[4,1,4,3,1,4]
   sgn_table(6,:)=[1,3,1,2,3,1]

   call generate_basis_Lin_tables(2*Nsite,nup,Ndim1,basis1,jL1,jU1)
   call generate_basis_Lin_tables(2*Nsite,ndn,Ndim2,basis2,jL2,jU2)

   allocate(corrmtx_aux(Ndeg,Ndeg,6,6,Nsite,6,6,Nsite))
   corrmtx_aux=0

   call stopwatch(1,"evaluating d8 correlations ...")
   do jb1=1,Ndim1
    do jb2=1,Ndim2
     jb=Ndim2*(jb1-1)+jb2

     b1ini=basis1(jb1)
     b2ini=basis2(jb2)

     do site=1,Nsite
      state(site)=d8(ibits(b1ini,2*site-2,2),ibits(b2ini,2*site-2,2))
     end do

     do site1=1,Nsite
      if (state(site1)>0) then
       do site2=1,Nsite
        if (state(site2)>0) then

         delta=state(site2)

         if (site1==site2) then
          beta_min=1 
          beta_max=6
         else
          beta_min=state(site1)
          beta_max=state(site1)
         end if

         do alpha=1,6
          do beta=beta_min,beta_max
           do gama=1,6

            if (Dnup(alpha,beta)+Dnup(gama,delta)==0 .and. Dndn(alpha,beta)+Dndn(gama,delta)==0) then

             sgn=0

             if (site1==site2) then
              if (beta==gama) then
               sgn=sgn_perm(sgn_table(alpha,delta),b1ini,b2ini,site1)
               b1fin=b1ini+magic_up(alpha,delta,site1)
               b2fin=b2ini+magic_dn(alpha,delta,site1)
              end if
             else

              sgn=sgn_perm(sgn_table(gama,delta),b1ini,b2ini,site2)
              b1fin=b1ini+magic_up(gama,delta,site2)
              b2fin=b2ini+magic_dn(gama,delta,site2)

              sgn=sgn*sgn_perm(sgn_table(alpha,beta),b1fin,b2fin,site1)
              b1fin=b1fin+magic_up(alpha,beta,site1)
              b2fin=b2fin+magic_dn(alpha,beta,site1)
             end if

             if (sgn/=0) then
              jb1fin=jL1(ibits(b1fin,0,nbitsL))+jU1(ibits(b1fin,nbitsL,nbitsU))
              jb2fin=jL2(ibits(b2fin,0,nbitsL))+jU2(ibits(b2fin,nbitsL,nbitsU))
              jbfin=Ndim2*(jb1fin-1)+jb2fin

              do i=1,Ndeg
               do j=1,Ndeg
                corrmtx_aux(i,j,alpha,beta,site1,gama,delta,site2)= &
                 corrmtx_aux(i,j,alpha,beta,site1,gama,delta,site2) &
                  +sgn*psi_set(jbfin,i)*psi_set(jb,j)
               end do
              end do
             end if

            end if
           end do
          end do
         end do

        end if
       end do
      end if
     end do

    end do
   end do
   call stopwatch(2)

   corrmtx=corrmtx_aux

 contains

  function sgn_perm(stab,b1,b2,site) result(sgn)
   integer stab,b1,b2,site,sgn,cnt

    sgn=+1

    if (stab==3 .or. stab==4) then
     cnt=popcnt(ibits(b1,0,2*Nsite)) 
     if (site<Nsite) cnt=cnt+popcnt(ibits(b1,2*site,2*(Nsite-site))) &
                            +popcnt(ibits(b2,2*site,2*(Nsite-site)))
     sgn=1-2*mod(cnt,2)
    end if

    if (stab==2 .or. stab==4) sgn=-sgn

  end function

 end subroutine

 subroutine extract_corrsq_d8_degen(Ndeg,corrmtx,Omtx,qvec,OOval,include_onsite)
  integer Ndeg,i,j,a,b,c,d
  real(dp) corrmtx(Ndeg,Ndeg,6,6,Nsite,6,6,Nsite),qvec(2),OOmtx(Ndeg,Ndeg)
  complex(dp) Omtx(6,6),OOval,coefs(6,6,Nsite,6,6,Nsite)
  integer, optional :: include_onsite
  logical onsite
  complex(dp), parameter :: im=(0,1)

   onsite=.false.
   if (present(include_onsite)) then
    if (include_onsite==1) onsite=.true.
   end if

   do i=1,Nsite
    do j=1,Nsite
     do a=1,6
     do b=1,6
     do c=1,6
     do d=1,6
      coefs(a,b,i,c,d,j)=Omtx(a,b)*Omtx(c,d) &
       *exp(im*dot_product(qvec,Rvec(:,i)-Rvec(:,j))) / Nsite
      if (.not.onsite .and. i==j) coefs(a,b,i,c,d,j)=0 
     end do
     end do
     end do
     end do
    end do
   end do

   do i=1,Ndeg
    do j=1,Ndeg
     OOmtx(i,j)=sum(corrmtx(Ndeg,Ndeg,:,:,:,:,:,:)*coefs)
    end do
   end do

   OOval=0
   do i=1,Ndeg
    OOval=OOval+OOmtx(i,i)/Ndeg
   end do

 end subroutine

 function convert_Omtx_sT_d8(Omtx_sTxyz) result(Omtx6)
  complex(dp) Omtx_sTxyz(4,4),Omtx6(6,6),Tmtx(4,6)
  complex(dp), parameter :: im=(0,1)

   Tmtx=0

   Tmtx(1,:)=[1,0,0,0,0,0]                   
   Tmtx(2,:)=-im/sqrt(2.0_dp)*[0,1,0,0,-1,0] 
   Tmtx(3,:)=+1/sqrt(2.0_dp)*[0,1,0,0,1,0]   
   Tmtx(4,:)=+im/sqrt(2.0_dp)*[0,0,1,1,0,0]  

   Omtx6=matmul(conjg(transpose(Tmtx)),matmul(Omtx_sTxyz,Tmtx))

 end function

 subroutine eval_corrs_sTf_degen(nup,ndn,Ndeg,psi_set,avgmtx,corrmtx)
  integer nup,ndn,Ndeg
  real(dp) psi_set(:,:),avgmtx(Ndeg,Ndeg,6,Nsite),corrmtx(Ndeg,Ndeg,6,Nsite,6,Nsite)

  integer(base_kind) Ndim
  real(dp), allocatable :: aux(:,:,:,:)
  integer i,i1,i2,jsite,jsite1,jsite2,n,n1,n2 

   Ndim=ubound(psi_set,dim=1)

   allocate(aux(Ndim,6,Nsite,Ndeg))

   call stopwatch(1,"evaluating s-T-f correlations ...")
   call stopwatch(1,"applying operators...")
   do i=1,Ndeg
    do jsite=1,Nsite
     call apply_local_oper(nup,ndn,psi_set(:,i),jsite,oper=31,Opsi=aux(:,1,jsite,i)) 
     call apply_local_oper(nup,ndn,psi_set(:,i),jsite,oper=32,Opsi=aux(:,2,jsite,i)) 
     call apply_local_oper(nup,ndn,psi_set(:,i),jsite,oper=33,Opsi=aux(:,3,jsite,i)) 
     call apply_local_oper(nup,ndn,psi_set(:,i),jsite,oper=34,Opsi=aux(:,4,jsite,i)) 
     call apply_local_oper(nup,ndn,psi_set(:,i),jsite,oper=21,Opsi=aux(:,5,jsite,i)) 
     call apply_local_oper(nup,ndn,psi_set(:,i),jsite,oper=22,Opsi=aux(:,6,jsite,i)) 
    end do
   end do
   call stopwatch(2)

   call stopwatch(1,"evaluating scalar products 1 ...")
   do i2=1,Ndeg
    do jsite=1,Nsite
     do n=1,6
      do i1=1,Ndeg
       call vecOMP_dot_real(psi_set(:,i1),aux(:,n,jsite,i2),avgmtx(i1,i2,n,jsite))
      end do
     end do
    end do
   end do
   call stopwatch(2)

   call stopwatch(1,"evaluating scalar products 2 ...")
   do i2=1,Ndeg
    do jsite2=1,Nsite
     do n2=1,6
      do i1=1,Ndeg
       do jsite1=1,Nsite
        do n1=1,6
         call vecOMP_dot_real(aux(:,n1,jsite1,i1),aux(:,n2,jsite2,i2),corrmtx(i1,i2,n1,jsite1,n2,jsite2))
        end do
       end do
      end do
     end do
    end do
   end do
   call stopwatch(2)
   call stopwatch(2)

 end subroutine

 subroutine apply_s_T0_sxz(nup,ndn,psi,sgn,kx,ky,Opsi)
  integer nup,ndn,sgn
  real(dp) psi(:),kx,ky,Opsi(:)

  integer, allocatable, dimension(:) :: basis1,basis2
  integer, dimension(0:2**nbitsL-1) :: jL1,jL2
  integer, dimension(0:2**nbitsU-1) :: jU1,jU2
  integer Ndim1,Ndim2,jb1,jb2,jb,b1,b2,jsite,s1,s2,b1new,b2new,jb1new,jb2new,jbnew
  real(dp) blochfac(Nsite)

   blochfac=exp((0,1)*(kx*Rvec(1,:)+ky*Rvec(2,:)))/sqrt(real(Nsite,dp))

   call generate_basis_Lin_tables(2*Nsite,nup,Ndim1,basis1,jL1,jU1)
   call generate_basis_Lin_tables(2*Nsite,ndn,Ndim2,basis2,jL2,jU2)

   do jb1=1,Ndim1
    do jb2=1,Ndim2
     jb=Ndim2*(jb1-1)+jb2

     b1=basis1(jb1)
     b2=basis2(jb2)

     Opsi(jb)=0 

     do jsite=1,Nsite
      s1=ibits(b1,2*jsite-2,2)
      s2=ibits(b2,2*jsite-2,2)

      if ( s1==2 .and. s2==1 ) then

       b1new = ibset(ibclr(b1,2*jsite-1),2*jsite-2)
       jb1new=jL1(ibits(b1new,0,nbitsL))+jU1(ibits(b1new,nbitsL,nbitsU))

       jbnew=Ndim2*(jb1new-1)+jb2
       Opsi(jb)=Opsi(jb)+blochfac(jsite)/sqrt(2.0_dp)*psi(jbnew)
      end if

      if ( s1==1 .and. s2==2 ) then

       b2new = ibset(ibclr(b2,2*jsite-1),2*jsite-2)
       jb2new=jL2(ibits(b2new,0,nbitsL))+jU2(ibits(b2new,nbitsL,nbitsU))

       jbnew=Ndim2*(jb1-1)+jb2new
       Opsi(jb)=Opsi(jb)-sgn*blochfac(jsite)/sqrt(2.0_dp)*psi(jbnew)
      end if
     end do

    end do
   end do

 end subroutine

 subroutine apply_xz_annihil(nup,ndn,psi,flag_xz,kx,ky,site,fpsi)
  integer nup,ndn,flag_xz,site
  real(dp) psi(:),kx,ky,fpsi(:)
  optional kx,ky,site

  integer, allocatable, dimension(:) :: basis1,basis1f,basis2
  integer, dimension(0:2**nbitsL-1) :: jL1,jL1f,jL2
  integer, dimension(0:2**nbitsU-1) :: jU1,jU1f,jU2
  integer Ndim1,Ndim2,Ndim1f,jb,jb1,jb2,b1,jsite,b1new,jb1new,jbnew, sgn
  real(dp) blochfac(Nsite)

   blochfac=0

   if (present(kx).and.present(ky)) &
    blochfac=exp((0,1)*(kx*Rvec(1,:)+ky*Rvec(2,:)))/sqrt(real(Nsite,dp))

   if (present(site)) &
    blochfac(site)=1

   call generate_basis_Lin_tables(2*Nsite,nup,Ndim1,basis1,jL1,jU1)
   call generate_basis_Lin_tables(2*Nsite,ndn,Ndim2,basis2,jL2,jU2)

   call generate_basis_Lin_tables(2*Nsite,nup-1,Ndim1f,basis1f,jL1f,jU1f)

   do jb1=1,Ndim1f

    do jb2=1,Ndim2
     jb=Ndim2*(jb1-1)+jb2
     fpsi(jb)=0 
    end do

    b1=basis1f(jb1) 

    do jsite=1,Nsite

     b1new=0
     sgn=1

     select case(flag_xz)
     case(1)
      if (.not.btest(b1,2*jsite-1)) then
       b1new=ibset(b1,2*jsite-1)
       if (jsite<Nsite) sgn=1-2*poppar(ibits(b1,2*jsite,2*(Nsite-jsite)))
      end if
     case(2) 
      if (.not.btest(b1,2*jsite-2)) then
       b1new=ibset(b1,2*jsite-2)
       sgn=1-2*poppar(ibits(b1,2*jsite-1,2*(Nsite-jsite)+1))
      end if
     end select

     if (b1new>0) then
      jb1new=jL1(ibits(b1new,0,nbitsL))+jU1(ibits(b1new,nbitsL,nbitsU))
      do jb2=1,Ndim2
       jb=Ndim2*(jb1-1)+jb2
       jbnew=Ndim2*(jb1new-1)+jb2
       fpsi(jb)=fpsi(jb)+sgn*blochfac(jsite)*psi(jbnew)
      end do
     end if

    end do
   end do

 end subroutine

end module

module mod_Hubb_eg_tasks
use mod_Hubb_eg
implicit none

integer :: NLancz_min=50, NLancz_max=500, flag_RAM=0, NLancz_maxtrials=2
real(dp) :: acc_thrs=1e-12, deg_thrs=1e-6

real(dp) :: time_abort=1e100_dp

type parpoint
 real(dp) Ud,JH,Delta,thop,Bfield
 character(len=200) id,input
end type

type(parpoint), allocatable :: parpoint_list(:)

contains

 subroutine benchmark
  real(dp), allocatable :: x(:),y(:)
  real(dp) time,val
  integer :: nup,ndn,Ndim,j
  integer, parameter :: Noper=24,oper(Noper)=[1,2,3,4,5,6,7,10,11,12,13,14,21,22,23,24,31,32,33,34,35,36,37,38]
  character(len=2) idstr

   Ud=5.0_dp; JH=0.6_dp; Delta=2.0_dp; t=0.3_dp; Bfield=0

   call load_cluster("cluster08A")

   nup=Nsite; ndn=Nsite; Ndim=Comb(2*Nsite,nup)*Comb(2*Nsite,ndn)

   write(*,*) "dimension:",Ndim
   allocate(x(Ndim),y(Ndim))

   call rndvec(x)

   call stopwatch(1)
   do j=1,10
    call stopwatch(1,"multiplication by H...")
    nup_H=nup
    ndn_H=ndn
    if (mod(j,2)==1) call Hmul(x,y)
    if (mod(j,2)==0) call Hmul(y,x)
    call stopwatch(2)
   end do
   call stopwatch(2,rtime=time)
   write(*,fmt="(f12.3,a)") time/10,"s per multiplication"

   do j=1,Noper
    write(idstr,fmt="(i2.2)") oper(j)
    call stopwatch(1)
    call apply_local_oper(nup,ndn,x,1,oper(j),Opsi=y)
    call stopwatch(2,"local operator "//idstr//" Opsi:")
    call stopwatch(1)
    call apply_local_oper(nup,ndn,x,1,oper(j),Oavg=val)
    call stopwatch(2,"local operator "//idstr//" Oavg:")
   end do

 end subroutine

 subroutine tests_N4

   call load_cluster("cluster04")

   Ud=5.0_dp; JH=0.6_dp; Delta=1.9_dp; t=0.3_dp; Bfield=1e-6_dp

   call test_N4_Eeig(nel=8,fname_ref="xref_N4_nel8_U5.0_JH0.6_D1.9_t0.3_B1e-6_Eeig.bin")
   call test_N4_Eeig(nel=7,fname_ref="xref_N4_nel7_U5.0_JH0.6_D1.9_t0.3_B1e-6_Eeig.bin")
   call test_N4_Eeig(nel=9,fname_ref="xref_N4_nel9_U5.0_JH0.6_D1.9_t0.3_B1e-6_Eeig.bin")

   call test_N4_GS_corrs_d8(4,4,"xref_N4_nel8_U5.0_JH0.6_D1.9_t0.3_B1e-6_corrs_d8.bin")
   call test_N4_GS_corrs_d8(4,3,"xref_N4_nel7_U5.0_JH0.6_D1.9_t0.3_B1e-6_corrs_d8.bin")
   call test_N4_GS_corrs_d8(5,4,"xref_N4_nel9_U5.0_JH0.6_D1.9_t0.3_B1e-6_corrs_d8.bin")

 end subroutine

 subroutine full_diag(nup,ndn,N,EGS,psiGS,eigvals,eigvecs)
  integer nup,ndn,N,i
  real(dp), optional :: EGS,psiGS(N),eigvals(N),eigvecs(N,N)
  real(dp), allocatable :: Hmtx(:,:),eigval(:),eigvec(:,:),auxvec(:)

   nup_H=nup
   ndn_H=ndn

   allocate(Hmtx(N,N),eigval(N),eigvec(N,N),auxvec(N))

   do i=1,N
    auxvec=0
    auxvec(i)=1
    call Hmul(auxvec,Hmtx(:,i))
   end do

   call eigen_RS(N,Hmtx,eigval,eigvec)

   if (present(EGS)) EGS=eigval(1)
   if (present(psiGS)) psiGS=eigvec(:,1)
   if (present(eigvals)) eigvals=eigval
   if (present(eigvecs)) eigvecs=eigvec

 end subroutine

 subroutine test_N4_Eeig(nel,fname_ref)
  integer nel,Ndim_tot,nel_up,nel_dn,Ndim,pos
  character(len=*) fname_ref
  real(dp), allocatable :: Eeig(:),Eeig_ref(:)
  real(dp) avgdiff,maxdiff

   write(*,fmt="(/a/)") "checking energies against "//fname_ref

   Ndim_tot=Comb(4*Nsite,nel)
   allocate(Eeig(Ndim_tot),Eeig_ref(Ndim_tot))

   pos=0

   do nel_up=0,nel
    nel_dn=nel-nel_up
    if (max(nel_up,nel_dn)>2*Nsite) cycle

    Ndim=Comb(2*Nsite,nel_up)*Comb(2*Nsite,nel_dn)
    write(*,fmt="(a,i1,a,i1,a,i6)") "nup=",nel_up," ndn=",nel_dn,"   subspace dimension:",Ndim

    call full_diag(nel_up,nel_dn,Ndim,eigvals=Eeig(pos+1:pos+Ndim))

    pos=pos+Ndim
   end do

   call simple_sort(Ndim_tot,Eeig)

   open(1,file=fname_ref,form="unformatted")
   read(1) Eeig_ref
   close(1)

   avgdiff=sum(abs(Eeig-Eeig_ref))/Ndim_tot
   maxdiff=maxval(abs(Eeig-Eeig_ref))

   write(*,fmt="(/a,2g12.3)") "difference - avg,max:",avgdiff,maxdiff

 contains

  subroutine simple_sort(N,vals)
   integer N,i,j
   real(dp) vals(N),aux

    do i=1,N
     j=sum(minloc(vals(i:N)))+i-1
     if (j/=i) then
      aux=vals(i)
      vals(i)=vals(j)
      vals(j)=aux
     end if
    end do

  end subroutine

 end subroutine

 subroutine test_N4_GS_chars(nel_up,nel_dn,fname_ref)
  integer nel_up,nel_dn,Ndim
  character(len=*) fname_ref
  real(dp), allocatable :: psiGS(:,:)
  real(dp) avgdiff,maxdiff

   write(*,fmt="(/a/)") "checking GS characteristics against "//fname_ref

   Ndim=Comb(2*Nsite,nel_up)*Comb(2*Nsite,nel_dn)
   write(*,fmt="(a,i1,a,i1,a,i6)") "nup=",nel_up," ndn=",nel_dn,"   subspace dimension:",Ndim

   allocate(psiGS(Ndim,1))

   call full_diag(nel_up,nel_dn,Ndim,psiGS=psiGS(:,1))

 end subroutine

 subroutine test_N4_GS_corrs_d8(nel_up,nel_dn,fname_ref)
  integer nel_up,nel_dn,Ndim
  character(len=*) fname_ref
  real(dp), allocatable :: psiGS(:,:)
  real(dp) corrmtx_d8(1,1,6,6,4,6,6,4),corrmtx_d8_ref(6,6,4,6,6,4) 
  real(dp) avgdiff,maxdiff

   write(*,fmt="(/a/)") "checking d8 correlations against "//fname_ref

   Ndim=Comb(2*Nsite,nel_up)*Comb(2*Nsite,nel_dn)
   write(*,fmt="(a,i1,a,i1,a,i6)") "nup=",nel_up," ndn=",nel_dn,"   subspace dimension:",Ndim

   allocate(psiGS(Ndim,1))

   call full_diag(nel_up,nel_dn,Ndim,psiGS=psiGS(:,1))

   call eval_corrs_d8_degen(nel_up,nel_dn,Ndeg=1,psi_set=psiGS,corrmtx=corrmtx_d8)

   open(1,file=fname_ref,form="unformatted")
   read(1) corrmtx_d8_ref
   close(1)

   avgdiff=sum(abs(corrmtx_d8(1,1,:,:,:,:,:,:)-corrmtx_d8_ref))/size(corrmtx_d8)
   maxdiff=maxval(abs(corrmtx_d8(1,1,:,:,:,:,:,:)-corrmtx_d8_ref))

   write(*,fmt="(/a,2g12.3)") "difference - avg,max:",avgdiff,maxdiff

 end subroutine

 subroutine test_N4_GS_corrs_sTf(nel_up,nel_dn,fname_ref)
  integer nel_up,nel_dn,Ndim
  character(len=*) fname_ref
  real(dp), allocatable :: psiGS(:,:)
  real(dp) corrmtx_sTf(1,1,6,4,6,4),avgmtx(1,1,6,4),corrmtx_sTf_ref(6,4,6,4) 
  real(dp) avgdiff,maxdiff

   write(*,fmt="(/a/)") "checking s-T-f correlations against "//fname_ref

   Ndim=Comb(2*Nsite,nel_up)*Comb(2*Nsite,nel_dn)
   write(*,fmt="(a,i1,a,i1,a,i6)") "nup=",nel_up," ndn=",nel_dn,"   subspace dimension:",Ndim

   allocate(psiGS(Ndim,1))

   call full_diag(nel_up,nel_dn,Ndim,psiGS=psiGS(:,1))

   call eval_corrs_sTf_degen(nel_up,nel_dn,Ndeg=1,psi_set=psiGS,avgmtx=avgmtx,corrmtx=corrmtx_sTf)

   open(1,file=fname_ref,form="unformatted")
   read(1) corrmtx_sTf_ref
   close(1)

   avgdiff=sum(abs(corrmtx_sTf(1,1,:,:,:,:)-corrmtx_sTf_ref))/size(corrmtx_sTf)
   maxdiff=maxval(abs(corrmtx_sTf(1,1,:,:,:,:)-corrmtx_sTf_ref))

   write(*,fmt="(/a,2g12.3)") "difference - avg,max:",avgdiff,maxdiff

 end subroutine

 subroutine prepare_GSset(nel_up,nel_dn,Ndim, Ndeg_max, Ndeg,EGS,psiGS,NLancz,acc, fname_bin)
  integer nel_up,nel_dn, Ndeg_max, Ndeg,NLancz, NL(Ndeg_max),n,i,ibest,fid, Ntrial
  integer(base_kind) Ndim
  integer(kind=8) fsize
  real(dp) EGS,psiGS(Ndim,Ndeg_max),acc(Ndeg_max), &
   E(Ndeg_max),Smtx(Ndeg_max,Ndeg_max),Seigval(Ndeg_max),Seigvec(Ndeg_max,Ndeg_max),time1,time2
  character(len=*), optional :: fname_bin
  real(dp), allocatable :: aux1(:),aux2(:),HpsiGS(:)
  logical fex

   if (present(fname_bin)) then

    inquire(file=fname_bin,exist=fex,size=fsize)
    if (fex .and. fsize>0) then

     write(*,fmt="(a)") "loading ",fname_bin

     call real_time(time1)
     open(newunit=fid,file=fname_bin,form="unformatted")
     read(fid) Ndeg,EGS,NLancz
     read(fid) acc(1:Ndeg)
     do n=1,Ndeg
      read(fid) psiGS(:,n)
     end do
     close(fid)
     call real_time(time2)

     write(*,fmt="(f8.2,a)") (fsize/(time2-time1))/1024.0**2,"MB/s"

     return
    end if

   end if

   nup_H=nel_up
   ndn_H=nel_dn

   EGS=0
   NLancz=0

   Ndeg=Ndeg_max

   do n=1,Ndeg_max

    call stopwatch(1,"calculating ground state...")
    Ntrial=min(NLancz_maxtrials,Ndeg_max-n+1)
    do i=0,Ntrial-1

     call real_time(time1)
     if (time1>time_abort) stop 

     if (flag_RAM==1) &
      call eigen_Lanczos2_REAL(Hmul,Ndim,NLancz_min,NLancz_max,E(n+i),psiGS(:,n+i),acc(n+i),NL(n+i))

     if (flag_RAM==0) &
      call eigen_Lanczos3_REAL(Hmul,Ndim,NLancz_min,NLancz_max,E(n+i),psiGS(:,n+i),acc(n+i),NL(n+i))

     if (acc(n+i)<=acc_thrs) then
      if (i>0) then
       call vecOMP_copy_real(psiGS(:,n+i),psiGS(:,n))
       E(n)=E(n+i)
       acc(n)=acc(n+i)
       NL(n)=NL(n+i)
      end if
      exit
     else
      write(*,fmt="(a,g10.3,a)") "   acc ",acc(n+i)," too large"
      if (i==Ntrial-1) then
       ibest=sum(minloc(acc(n:n+i)))-1
       call vecOMP_copy_real(psiGS(:,n+ibest),psiGS(:,n))
       E(n)=E(n+ibest)
       acc(n)=acc(n+ibest)
       NL(n)=NL(n+ibest)
      end if
     end if

    end do
    write(*,fmt="(a,i5,g12.3)") "   NLancz,acc:",NL(n),acc(n)
    call stopwatch(2)

    call vecOMP_dot_real(psiGS(:,n),psiGS(:,n),Smtx(n,n))
    if (n>1) then
     do i=1,n-1
      call vecOMP_dot_real(psiGS(:,i),psiGS(:,n),Smtx(i,n))
      Smtx(n,i)=Smtx(i,n)
     end do
    end if

    call eigen_RS(n,Smtx(1:n,1:n),Seigval(1:n),Seigvec(1:n,1:n))

    if (any(abs(Seigval(1:n))<maxval(deg_thrs*Seigval(1:n)))) then
     write(*,fmt="(a)") "overlap eigenvalues:"
     do i=n,1,-1
      write(*,fmt="(f23.18)") Seigval(i)
     end do
     Ndeg=n-1
     write(*,fmt="(a,i5)") "degeneracy:",Ndeg
     exit
    end if

   end do

   EGS=sum(E(1:Ndeg))/Ndeg     
   NLancz=sum(NL(1:Ndeg))/Ndeg 

   call stopwatch(1,"orthogonalizing ground states...")

   call eigen_RS(Ndeg,Smtx(1:Ndeg,1:Ndeg),Seigval(1:Ndeg),Seigvec(1:Ndeg,1:Ndeg))

   allocate(aux1(Ndeg),aux2(Ndeg))

   do i=1,Ndim
    aux1=psiGS(i,1:Ndeg)
    do n=1,Ndeg
     aux2(n)=sum(Seigvec(1:Ndeg,n)*aux1)
    end do
    psiGS(i,:)=aux2
   end do

   call stopwatch(2)

   call stopwatch(1,"finalizing ground states...")

   allocate(HpsiGS(Ndim))

   do n=1,Ndeg
    call vecOMP_normalize_REAL(psiGS(:,n))
    call Hmul(psiGS(:,n),HpsiGS)
    call vecOMP_avgabsdiff_REAL(EGS,psiGS(:,n),HpsiGS,acc(n))
    write(*,fmt="(i4,a,g10.3)") n,"  acc:",acc(n)
   end do

   call stopwatch(2)

   if (present(fname_bin)) then
    open(newunit=fid,file=fname_bin,form="unformatted")
    write(fid) Ndeg,EGS,NLancz
    write(fid) acc(1:Ndeg)
    do n=1,Ndeg
     write(fid) psiGS(:,n)
    end do
    close(fid)
   end if

 end subroutine

 subroutine eval_GSchars_degen(nup,ndn,Ndeg,psi_set,GSchars,GSchars_mtx)
  integer nup,ndn,Ndeg,oper,i,j
  real(dp) psi_set(:,:),GSchars(3,50),GSchars_mtx(Ndeg,Ndeg,50),eigval(Ndeg)
  integer(base_kind) Ndim
  real(dp), allocatable :: aux(:) 

   Ndim=ubound(psi_set,dim=1)
   allocate(aux(Ndim))

   GSchars=0
   GSchars_mtx=0

   call stopwatch(1,"evaluating GS characteristics ...")
   do oper=1,size(selops_active)
    if (.not.selops_active(oper)) cycle

    do j=1,Ndeg
     call apply_sel_oper(nup,ndn,psi_set(:,j),oper,aux)
     do i=1,Ndeg
      call vecOMP_dot_real(psi_set(:,i),aux,GSchars_mtx(i,j,oper))
     end do
    end do

    call eigen_RS(Ndeg,GSchars_mtx(:,:,oper),eigval)
    GSchars(:,oper)=[ sum(eigval)/Ndeg, minval(eigval), maxval(eigval) ]
   end do
   call stopwatch(2)

 end subroutine

 subroutine load_parpoint_list(fname)
  character(len=*) fname
  character(len=200) input
  integer fid,ierr,Npt,jpt,i
  real(dp) aux(5)
  type(parpoint) p

   if (allocated(parpoint_list)) deallocate(parpoint_list)

   open(newunit=fid,file=trim(fname))

   Npt=0
   do
    read(fid,fmt="(a)",iostat=ierr) input
    if (ierr/=0) exit
    read(input,fmt=*,iostat=ierr) aux
    if (ierr/=0) exit
    Npt=Npt+1
   end do

   rewind(fid)

   allocate(parpoint_list(Npt))
   do jpt=1,Npt
    read(fid,fmt="(a)") input
    read(input,*) p%Ud,p%JH,p%Delta,p%thop,p%Bfield

    i=index(trim(input)," ",back=.true.)
    p%id=trim(input(i+1:))

    p%input=input

    parpoint_list(jpt)=p
   end do

   close(fid)

 end subroutine

 subroutine set_parpoint(jpt,id,input)
  integer jpt
  character(len=*) id,input
  optional input

   Ud     = parpoint_list(jpt)%Ud
   JH     = parpoint_list(jpt)%JH
   Delta  = parpoint_list(jpt)%Delta
   t      = parpoint_list(jpt)%thop
   Bfield = parpoint_list(jpt)%Bfield

   id=parpoint_list(jpt)%id

   if (present(input)) input=parpoint_list(jpt)%input

 end subroutine

 subroutine task_prepare_GS_single
  character(len=80) fname_clust,fname_bin
  integer nup,ndn,Ndeg_max,Ndeg,NLancz
  integer(base_kind) Ndim
  real(dp) EGS
  real(dp), allocatable :: psiGS(:,:),acc(:)

   write(*,fmt="(a)") "cluster file:"
   read(*,*) fname_clust

   write(*,fmt="(a)") "n↑,n↓:"
   read(*,*) nup,ndn

   write(*,fmt="(a)") "U,JH,Delta,t,B:"
   read(*,*) Ud,JH,Delta,t,Bfield

   write(*,fmt="(a)") "NLancz - min,max, keep in memory (0/1), maximum deg, required acc:"
   read(*,*) NLancz_min,NLancz_max,flag_RAM,Ndeg_max,acc_thrs

   write(*,fmt="(a)") "binary output file:"
   read(*,*) fname_bin

   call load_cluster(trim(fname_clust))

   Ndim=Comb(2*Nsite,nup)*Comb(2*Nsite,ndn)
   write(*,fmt="(/a,i12/)") "dimension:",Ndim

   allocate(psiGS(Ndim,Ndeg_max),acc(Ndeg_max))

   call prepare_GSset(nup,ndn,Ndim, Ndeg_max, Ndeg,EGS,psiGS,NLancz,acc,trim(fname_bin))

   write(*,fmt="(/a,f25.16,i6,g12.3)") "EGS, degeneracy, worst acc:",EGS,Ndeg,maxval(acc(1:Ndeg))

 end subroutine

 subroutine task_GS_set
  real(dp), allocatable :: psiGS(:,:),acc(:),GSchars(:,:),GSchars_mtx(:,:,:), &
   corrmtx_d8(:,:,:,:,:,:,:,:),avgmtx(:,:,:,:),corrmtx_sTf(:,:,:,:,:,:)
  real(dp) time_max,rtime_beg,rtime_curr,time_est,EGS
  integer nup,ndn,Ndeg_max,flag_actions,Ndeg,NLancz,Npt,Npt_calc,jpt,fid_bin,prep_GS_with_file
  integer(base_kind) Ndim
  integer(kind=8) fsize1
  logical fex1,fex2,fex3,fex4,comp_GS,comp_GSchar,comp_corrs_d8,comp_corrs_sTf
  character(len=200) fname_clust,fname_list,fname,id,fname_bin1,fname_bin2,fname_bin3,fname_bin4

   write(*,fmt="(a)") "cluster file:"
   read(*,*) fname_clust

   write(*,fmt="(a)") "n↑,n↓:"
   read(*,*) nup,ndn

   write(*,fmt="(a)") "file containing a list of parameter points:"
   read(*,*) fname_list
   call load_parpoint_list(trim(fname_list))

   write(*,fmt="(a)") "filename prefix:"
   read(*,*) fname

   write(*,fmt="(a)") "NLancz - min,max, keep in memory (0/1), maximum deg, required acc:"
   read(*,*) NLancz_min,NLancz_max,flag_RAM,Ndeg_max,acc_thrs

   write(*,fmt="(a)") "available time [h]"
   read(*,*) time_max
   time_max=time_max*3600

   write(*,fmt="(a)") "actions:  +1 - store GS in binary file"
   write(*,fmt="(a)") "          +2 - GS characteristics"
   write(*,fmt="(a)") "          +4 - d8 correlations"
   write(*,fmt="(a)") "          +8 - s-T-f correlations"
   read(*,*) flag_actions

   call load_cluster(trim(fname_clust))

   Ndim=Comb(2*Nsite,nup)*Comb(2*Nsite,ndn)
   write(*,fmt="(/a,i12/)") "dimension:",Ndim

   allocate(psiGS(Ndim,Ndeg_max),acc(Ndeg_max))

   call real_time(rtime_beg)

   time_abort=rtime_beg+time_max

   Npt=size(parpoint_list)
   Npt_calc=0

   do jpt=1,Npt
    call set_parpoint(jpt,id)

    if (Npt_calc>0) then
     call real_time(rtime_curr)
     time_est=(Npt_calc+1)*(rtime_curr-rtime_beg)/Npt_calc 
     if (time_est>time_max) exit 
    end if

    fname_bin1=trim(fname)//trim(id)//".bin_GS"
    fname_bin2=trim(fname)//trim(id)//".bin_GSchar"
    fname_bin3=trim(fname)//trim(id)//".bin_corrs_d8"
    fname_bin4=trim(fname)//trim(id)//".bin_corrs_sTf"

    inquire(file=trim(fname_bin1),exist=fex1,size=fsize1)
    inquire(file=trim(fname_bin2),exist=fex2)
    inquire(file=trim(fname_bin3),exist=fex3)
    inquire(file=trim(fname_bin4),exist=fex4)

    comp_GS=.false.
    comp_GSchar=.false.
    comp_corrs_d8=.false.
    comp_corrs_sTf=.false.

    if (fex1 .and. fsize1==0) cycle

    prep_GS_with_file=0

    if (fex1 .and. fsize1>0) prep_GS_with_file=1

    if (.not.fex1 .and. mod(flag_actions/1,2)==1) then
     prep_GS_with_file=1
     comp_GS=.true.
     open(newunit=fid_bin,file=trim(fname_bin1)) 
     close(fid_bin)
    end if

    if (.not.fex2 .and. mod(flag_actions/2,2)==1) then
     comp_GSchar=.true.
     open(newunit=fid_bin,file=trim(fname_bin2)) 
     close(fid_bin)
    end if

    if (.not.fex3 .and. mod(flag_actions/4,2)==1) then
     comp_corrs_d8=.true.
     open(newunit=fid_bin,file=trim(fname_bin3)) 
     close(fid_bin)
    end if

    if (.not.fex4 .and. mod(flag_actions/8,2)==1) then
     comp_corrs_sTf=.true.
     open(newunit=fid_bin,file=trim(fname_bin4)) 
     close(fid_bin)
    end if

    if (comp_GSchar .or. comp_corrs_d8 .or. comp_corrs_sTf) comp_GS=.true. 

    if (any([comp_GS,comp_GSchar,comp_corrs_d8,comp_corrs_sTf])) then

     write(*,fmt="(/a/)") trim(fname)//trim(id)

     if (prep_GS_with_file==1) then
      call prepare_GSset(nup,ndn,Ndim, Ndeg_max, Ndeg,EGS,psiGS,NLancz,acc, trim(fname_bin1))
     else
      call prepare_GSset(nup,ndn,Ndim, Ndeg_max, Ndeg,EGS,psiGS,NLancz,acc)
     end if
     write(*,fmt="(/a,f25.16,i6,g12.3)") "EGS, degeneracy, worst acc:",EGS,Ndeg,maxval(acc(1:Ndeg))

     if (comp_GSchar) then
      allocate(GSchars(3,50),GSchars_mtx(Ndeg,Ndeg,50))

      call eval_GSchars_degen(nup,ndn,Ndeg,psiGS,GSchars,GSchars_mtx)

      open(newunit=fid_bin,file=trim(fname_bin2),form="unformatted")
      write(fid_bin) Ndeg,EGS
      write(fid_bin) GSchars,GSchars_mtx
      write(fid_bin) acc(1:Ndeg)
      close(fid_bin)

      deallocate(GSchars,GSchars_mtx)
     end if

     if (comp_corrs_d8) then
      allocate(corrmtx_d8(Ndeg,Ndeg,6,6,Nsite,6,6,Nsite))

      call eval_corrs_d8_degen(nup,ndn,Ndeg,psi_set=psiGS,corrmtx=corrmtx_d8)

      open(newunit=fid_bin,file=trim(fname_bin3),form="unformatted")
      write(fid_bin) Ndeg
      write(fid_bin) corrmtx_d8
      write(fid_bin) acc(1:Ndeg)
      close(fid_bin)

      deallocate(corrmtx_d8)
     end if

     if (comp_corrs_sTf) then
      allocate(avgmtx(Ndeg,Ndeg,6,Nsite),corrmtx_sTf(Ndeg,Ndeg,6,Nsite,6,Nsite))

      call eval_corrs_sTf_degen(nup,ndn,Ndeg,psiGS,avgmtx,corrmtx_sTf)

      open(newunit=fid_bin,file=trim(fname_bin4),form="unformatted")
      write(fid_bin) Ndeg
      write(fid_bin) avgmtx,corrmtx_sTf
      write(fid_bin) acc(1:Ndeg)
      close(fid_bin)

      deallocate(avgmtx,corrmtx_sTf)
     end if

     Npt_calc=Npt_calc+1
    end if

   end do

 end subroutine

 subroutine task_GS_set_extract
  real(dp), allocatable :: GSchars(:,:),GSchars_mtx(:,:,:),corrmtx_d8(:,:,:,:,:,:,:,:)
  real(dp) EGS,acc(100),corrs_q00(9),corrs_qpp(9)
  integer flag_outputs,Ndeg,fid_bin,fid_out_GSch,fid_out_cd8,jpt,jop,valid
  complex(dp) OOval
  character(len=200) fname,fname_out,id,fname_bin
  logical fex

   write(*,fmt="(a)") "cluster file:"
   read(*,*) fname
   call load_cluster(trim(fname))

   write(*,fmt="(a)") "file containing a list of parameter points:"
   read(*,*) fname
   call load_parpoint_list(trim(fname))

   write(*,fmt="(a)") "filename prefix:"
   read(*,*) fname

   write(*,fmt="(a)") "actions:  +1 - GS characteristics"
   write(*,fmt="(a)") "          +2 - selected d8 correlations"
   read(*,*) flag_outputs

   write(*,fmt="(a)") "output file:"
   read(*,*) fname_out

   if (mod(flag_outputs/1,2)==1) then
    open(newunit=fid_out_GSch,file=trim(fname_out)//"_GSchars")
    write(fid_out_GSch,fmt="(a)") "# 1:U  2:JH  3:Δ  4:t  5:B  6:valid(1/0)  7:degeneracy  8:acc"
    write(fid_out_GSch,fmt="(a)") "#"
    do jop=1,50
     if (selops_active(jop)) &
      write(fid_out_GSch,fmt="(a,3i4,a)") "#",8+jop,58+jop,108+jop," avg/min/max - "//trim(selops_lbl(jop))
    end do
    write(fid_out_GSch,fmt="(a)") "#"
   end if

   if (mod(flag_outputs/2,2)==1) then
    open(newunit=fid_out_cd8,file=trim(fname_out)//"_corrs_d8")
    write(fid_out_cd8,fmt="(a)") "# 1:U  2:JH  3:Δ  4:t  5:B  6:valid(1/0)"
    write(fid_out_cd8,fmt="(a)") "#"
    write(fid_out_cd8,fmt="(a)") "# q=(0,0) correlations" 
    write(fid_out_cd8,fmt="(a)") "#  7, 8, 9: S~α.S~α corrs, α=x,y,z"
    write(fid_out_cd8,fmt="(a)") "# 10,11,12: Sα.Sα corrs, α=x,y,z"
    write(fid_out_cd8,fmt="(a)") "# 13,14,15: Q0Q0, Q2Q2, and QxyQxy corrs"
    write(fid_out_cd8,fmt="(a)") "#"
    write(fid_out_cd8,fmt="(a)") "# q=(π,π) correlations"
    write(fid_out_cd8,fmt="(a)") "# 16,17,18: S~α.S~α corrs, α=x,y,z"
    write(fid_out_cd8,fmt="(a)") "# 19,20,21: Sα.Sα corrs, α=x,y,z"
    write(fid_out_cd8,fmt="(a)") "# 22,23,24: Q0Q0, Q2Q2, and QxyQxy corrs"
    write(fid_out_cd8,fmt="(a)") "#"
   end if

   do jpt=1,size(parpoint_list)
    call set_parpoint(jpt,id)

    if (mod(flag_outputs/1,2)==1) then

     fname_bin=trim(fname)//trim(id)//".bin_GSchar"
     inquire(file=trim(fname_bin),exist=fex)

     if (fex) then
      valid=1
      open(newunit=fid_bin,file=trim(fname_bin),form="unformatted")
      read(fid_bin) Ndeg,EGS
      allocate(GSchars(3,50),GSchars_mtx(Ndeg,Ndeg,50))
      read(fid_bin) GSchars,GSchars_mtx
      read(fid_bin) acc(1:Ndeg)
      close(fid_bin)
     else
      valid=0
      Ndeg=1; EGS=0
      allocate(GSchars(3,50),GSchars_mtx(Ndeg,Ndeg,50))
      GSchars=0; GSchars=0
      acc=1
     end if

     write(fid_out_GSch,fmt="(5g25.15,i3,i5,g10.3,150g25.15)") &
      Ud,JH,Delta,t,Bfield,valid,Ndeg,maxval(acc(1:Ndeg)),GSchars(1,:),GSchars(2,:),GSchars(3,:)

     deallocate(GSchars,GSchars_mtx)
    end if

    if (mod(flag_outputs/2,2)==1) then

     fname_bin=trim(fname)//trim(id)//".bin_corrs_d8"
     inquire(file=trim(fname_bin),exist=fex)

     if (fex) then
      valid=1
      open(newunit=fid_bin,file=trim(fname_bin),form="unformatted")
      read(fid_bin) Ndeg
      allocate(corrmtx_d8(Ndeg,Ndeg,6,6,Nsite,6,6,Nsite))
      read(fid_bin) corrmtx_d8
      read(fid_bin) acc(1:Ndeg)
      close(fid_bin)

      do jop=1,9
       call extract_corrsq_d8_degen(Ndeg,corrmtx_d8, &
        convert_Omtx_sT_d8(Omtx_sT(jop)), [0.0_dp,0.0_dp], OOval, include_onsite=0)

       corrs_q00(jop)=OOval

       call extract_corrsq_d8_degen(Ndeg,corrmtx_d8, &
        convert_Omtx_sT_d8(Omtx_sT(jop)), [pi,pi], OOval, include_onsite=0)

       corrs_qpp(jop)=OOval
      end do

      deallocate(corrmtx_d8)
     else
      valid=0
      Ndeg=0; acc=1; corrs_q00=0; corrs_qpp=0
     end if

     write(fid_out_cd8,fmt="(5g25.15,i3,18g25.15)") Ud,JH,Delta,t,Bfield,valid, corrs_q00,corrs_qpp
    end if

   end do

   if (mod(flag_outputs/1,2)==1) close(fid_out_GSch)
   if (mod(flag_outputs/2,2)==1) close(fid_out_cd8)

 contains

  function Omtx_sT(flag_oper) result(O)
   integer flag_oper
   complex(dp) O(4,4)
   complex(dp), parameter :: im=(0,1)

    select case(flag_oper)
    case(1)
      O(1,:)=[0,1,0,0]*(-im)
      O(2,:)=[1,0,0,0]*(+im)
      O(3,:)=[0,0,0,0]
      O(4,:)=[0,0,0,0]
    case(2)
      O(1,:)=[0,0,1,0]*(-im)
      O(2,:)=[0,0,0,0]
      O(3,:)=[1,0,0,0]*(+im)
      O(4,:)=[0,0,0,0]
    case(3)
      O(1,:)=[0,0,0,1]*(-im)
      O(2,:)=[0,0,0,0]
      O(3,:)=[0,0,0,0]
      O(4,:)=[1,0,0,0]*(+im)
    case(4)
      O(1,:)=[0,0,0,0]
      O(2,:)=[0,0,0,0]
      O(3,:)=[0,0,0,1]*(-im)
      O(4,:)=[0,0,1,0]*(+im)
    case(5)
      O(1,:)=[0,0,0,0]
      O(2,:)=[0,0,0,1]*(+im)
      O(3,:)=[0,0,0,0]
      O(4,:)=[0,1,0,0]*(-im)
    case(6)
      O(1,:)=[0,0,0,0]
      O(2,:)=[0,0,1,0]*(-im)
      O(3,:)=[0,1,0,0]*(+im)
      O(4,:)=[0,0,0,0]
    case(7)
      O(1,:)=[0,0,0,0]
      O(2,:)=[0, 1, 0, 0]/sqrt(3.0_dp)
      O(3,:)=[0, 0, 1, 0]/sqrt(3.0_dp)
      O(4,:)=[0, 0, 0,-2]/sqrt(3.0_dp)
    case(8)
      O(1,:)=[0,0,0,0]
      O(2,:)=[0,-1, 0, 0]
      O(3,:)=[0, 0, 1, 0]
      O(4,:)=[0, 0, 0, 0]
    case(9)
      O(1,:)=[0,0,0,0]
      O(2,:)=[0, 0,-1, 0]
      O(3,:)=[0,-1, 0, 0]
      O(4,:)=[0, 0, 0, 0]
     end select

  end function

 end subroutine

 subroutine task_dynamic_corrs
  character(len=80) fname_clust,fname_GSbin,fname_out,fname_bin
  integer nup,ndn,Ndeg_max,Ndeg,NLancz,NLancz_resp,flag_resp,flag_qvec,Nw,flag_peak,jq,jdeg,jw
  integer(base_kind) Ndim,Ndim2
  real(dp) wmin,wmax,HWHM,EGS,qx,qy
  real(dp), allocatable :: norm2(:),alpha(:,:),beta(:,:),spect(:,:),psiGS(:,:),acc(:),psiexc(:)
  character(len=1) qvec_id
  logical fex

   write(*,fmt="(a)") "cluster file:"
   read(*,*) fname_clust

   write(*,fmt="(a)") "n↑,n↓:"
   read(*,*) nup,ndn

   write(*,fmt="(a)") "U,JH,Delta,t,B:"
   read(*,*) Ud,JH,Delta,t,Bfield

   write(*,fmt="(a)") "GS calculation: NLancz - min,max, keep in memory (0/1), maximum deg, required acc:"
   read(*,*) NLancz_min,NLancz_max,flag_RAM,Ndeg_max,acc_thrs

   write(*,fmt="(a)") "GS binary file:"
   read(*,*) fname_GSbin

   write(*,fmt="(a)") "response function: 1 - z↑ hole spect. fun."
   write(*,fmt="(a)") "                   2 - x↑ hole spect. fun."
   write(*,fmt="(a)") "                   3 - s->T0 excitation"
   write(*,fmt="(a)") "                   4 - s->Sxz excitation"
   write(*,fmt="(a)") "                   5 - z↑ hole LDOS"
   write(*,fmt="(a)") "                   6 - x↑ hole LDOS"
   read(*,*) flag_resp

   if (flag_resp<=4) then
    write(*,fmt="(a)") "q-vectors: 0:all 1:Γ 2:M 3:X"
    read(*,*) flag_qvec
   else
    flag_qvec=1 
   end if

   write(*,fmt="(a)") "response calculation: NLancz:"
   read(*,*) NLancz_resp

   write(*,fmt="(a)") "wmin, wmax, Nw, HWHM, peak type (1-lorentz,2-gauss):"
   read(*,*) wmin,wmax,Nw,HWHM,flag_peak

   write(*,fmt="(a)") "output file name:"
   read(*,*) fname_out

   mod_lanczos_verbose=1

   call load_cluster(trim(fname_clust))

   allocate(norm2(Ndeg_max),alpha(0:NLancz_resp,Ndeg_max),beta(1:NLancz_resp,Ndeg_max),spect(Nw,Ndeg_max))

   do jq=1,3
    if (flag_qvec/=0 .and. flag_qvec/=jq) cycle

    select case(jq)
    case(1)
     qx=0 ; qy=0 ; qvec_id="G"
    case(2)
     qx=pi; qy=pi; qvec_id="M"
    case(3)
     qx=pi; qy=0 ; qvec_id="X"
    end select

    if (flag_resp<=4) fname_bin=trim(fname_out)//"_"//qvec_id//".bin"
    if (flag_resp>=5) fname_bin=trim(fname_out)//".bin"

    inquire(file=trim(fname_bin),exist=fex)
    write(*,fmt="(l1,1x,a)") fex,trim(fname_bin)

    if (fex) then

     open(1,file=trim(fname_bin),form="unformatted")
     read(1) Ndeg
     read(1) EGS,norm2(1:Ndeg),alpha(:,1:Ndeg),beta(:,1:Ndeg)
     close(1)

    else

     if (.not. allocated(psiGS)) then

      Ndim=Comb(2*Nsite,nup)*Comb(2*Nsite,ndn)
      allocate(psiGS(Ndim,Ndeg_max),acc(Ndeg_max))

      call prepare_GSset(nup,ndn,Ndim, Ndeg_max, Ndeg,EGS,psiGS,NLancz,acc,trim(fname_GSbin))

      write(*,fmt="(/a,f25.16,i6,g12.3)") "EGS, degeneracy, worst acc:",EGS,Ndeg,maxval(acc(1:Ndeg))
     end if

     select case(flag_resp)
      case(1:2); nup_H=nup-1; ndn_H=ndn 
      case(3:4); nup_H=nup;   ndn_H=ndn 
      case(5:6); nup_H=nup-1; ndn_H=ndn 
     end select

     Ndim2=Comb(2*Nsite,nup_H)*Comb(2*Nsite,ndn_H)
     write(*,fmt="(a,2i12)") "dimensions:",Ndim,Ndim2

     if (.not.allocated(psiexc)) allocate(psiexc(Ndim2))

     do jdeg=1,Ndeg

      call stopwatch(1,"applying excitation operator to GS...")
      select case(flag_resp)
      case(1); call apply_xz_annihil(nup,ndn,psiGS(:,jdeg),2,qx,qy,fpsi=psiexc) 
      case(2); call apply_xz_annihil(nup,ndn,psiGS(:,jdeg),1,qx,qy,fpsi=psiexc) 
      case(3); call apply_s_T0_Sxz(nup,ndn,psiGS(:,jdeg),+1,qx,qy,psiexc)  
      case(4); call apply_s_T0_Sxz(nup,ndn,psiGS(:,jdeg),-1,qx,qy,psiexc)  
      case(5); call apply_xz_annihil(nup,ndn,psiGS(:,jdeg),2,site=1,fpsi=psiexc) 
      case(6); call apply_xz_annihil(nup,ndn,psiGS(:,jdeg),1,site=1,fpsi=psiexc) 
      end select
      call stopwatch(2)

      norm2(jdeg)=dot_product(psiexc,psiexc)
      psiexc=psiexc/sqrt(norm2(jdeg))

      call stopwatch(1,"Lanczos iterations...")
      call iter_Lanczos1_real(Hmul,Ndim2,NLancz_resp,psiexc,alpha(:,jdeg),beta(:,jdeg))
      call stopwatch(2)

     end do

     open(1,file=trim(fname_bin),form="unformatted")
     write(1) Ndeg
     write(1) EGS,norm2(1:Ndeg),alpha(:,1:Ndeg),beta(:,1:Ndeg)
     close(1)

    end if

    do jdeg=1,Ndeg
     call resol_Lanczos2(NLancz_resp, &
      alpha(:,jdeg),beta(:,jdeg),norm2(jdeg),EGS,Nw,wmin,wmax,flag_peak,HWHM,spect(:,jdeg))
    end do

    if (flag_resp<=4) open(1,file=trim(fname_out)//"_"//qvec_id)
    if (flag_resp>=5) open(1,file=trim(fname_out))
    do jw=1,Nw
     write(1,fmt="(5g15.5)") (wmin*(Nw-jw)+wmax*(jw-1))/(Nw-1),sum(spect(jw,:))/Ndeg
    end do
    close(1)

   end do

 end subroutine

 subroutine task_check_GS_set
  character(len=200) fname,fname_out,id,input,fname_bin
  integer Npt,Nmiss,Nfail,fid_out,fid_miss,fid_fail,fid_bin,Ndeg,NLancz,jpt
  real(dp) EGS,acc(100)
  logical fex
  integer(kind=8) fsize

   write(*,fmt="(a)") "file containing a list of parameter points:"
   read(*,*) fname
   call load_parpoint_list(trim(fname))

   write(*,fmt="(a)") "filename prefix:"
   read(*,*) fname

   write(*,fmt="(a)") "required acc:"
   read(*,*) acc_thrs

   write(*,fmt="(a)") "output file:"
   read(*,*) fname_out

   Npt=size(parpoint_list)
   Nmiss=0
   Nfail=0

   open(newunit=fid_out,file=trim(fname_out))
   write(fid_out,fmt="(a)") "# 1-6:input  7:EGS  8:degeneracy  9-11:acc(avg/min/max)  12:NLancz"
   open(newunit=fid_miss,file=trim(fname_out)//"_missing")
   open(newunit=fid_fail,file=trim(fname_out)//"_failed")
   do jpt=1,Npt
    call set_parpoint(jpt,id,input)

    fname_bin=trim(fname)//trim(id)//".bin_GS"
    inquire(file=trim(fname_bin),exist=fex,size=fsize)

    if (fex .and. fsize>0) then
     open(newunit=fid_bin,file=trim(fname_bin),form="unformatted")
     read(fid_bin) Ndeg,EGS,NLancz
     read(fid_bin) acc(1:Ndeg)
     close(fid_bin)

     if (any(acc(1:Ndeg)>acc_thrs)) then
      Nfail=Nfail+1
      write(fid_fail,fmt="(a)") trim(input)
     end if

    else
     Nmiss=Nmiss+1
     write(fid_miss,fmt="(a)") trim(input)
     Ndeg=1; EGS=0; NLancz=0; acc=1
    end if

    write(fid_out,fmt="(a,f25.15,i4,3g10.3,i6)") &
     trim(input),EGS,Ndeg,sum(acc(1:Ndeg))/Ndeg,minval(acc(1:Ndeg)),maxval(acc(1:Ndeg)),NLancz

   end do
   close(fid_out)
   close(fid_miss)
   close(fid_fail)

   write(*,*)
   write(*,fmt="(i6,1x,a)") Npt,"points"
   write(*,fmt="(i6,1x,a)") Nmiss,"missing"
   write(*,fmt="(i6,1x,a)") Nfail,"failed"

 end subroutine

 subroutine Hmul_new(x,y,a,b,c)
  REAL(dp) x(:),y(:),a,b,c
  real(dp), allocatable :: Hx(:)
  integer(base_kind) N,j

   N=ubound(x,dim=1,kind=base_kind)

   allocate(Hx(N))

   call Hmul(x,Hx)

   do j=1,N
    y(j)=a*Hx(j)+b*x(j)+c*y(j)
   end do

 end subroutine

 subroutine CPGF_comp_Hrng(N,Mmax,Emin,Emax,tol,a,b)
  integer(base_kind) N
  integer Mmax,j,i
  real(dp) Emin,Emax,tol,a,b,acoef,bcoef,alpha(0:Mmax),beta(1:Mmax),eigval(0:Mmax),time
  optional a,b
  REAL(dp), allocatable :: x1(:),x2(:),y(:)
  logical converged

   Emin=+1e12
   Emax=-1e12

   acoef=0.0_dp
   if (present(a)) acoef=a

   bcoef=1.0_dp
   if (present(b)) bcoef=b

   allocate(x1(N),x2(N),y(N))

   call rndvec_REAL(x1)

   call Hmul_new(x1,y,1/bcoef,-acoef/bcoef,0.0_dp)

   call vecOMP_dot_REAL(x1,y,alpha(0)) 

   do j=0,Mmax-1
    call stopwatch(1)

    if (j==0) call vecOMP_add_lincomb_REAL(y,-alpha(j),x1)
    if (j>=1) call vecOMP_add_lincomb2_REAL(y,-alpha(j),x1,-beta(j),x2)
    call vecOMP_copy_REAL(x1,x2)

    call vecOMP_normalize_outofplace_REAL(y,x1,beta(j+1))

    call Hmul_new(x1,y,1/bcoef,-acoef/bcoef,0.0_dp)

    call vecOMP_dot_REAL(x1,y,alpha(j+1))

    call eigen_RST(j+2,alpha(0:j+1),beta(1:j+1),eigval(0:j+1))

    converged=.false.
    if (max( abs(eigval(0)-Emin), abs(eigval(j+1)-Emax) )<=tol) converged=.true.

    Emin=eigval(0)
    Emax=eigval(j+1)

    call stopwatch(4,rtime=time)
    write(*,fmt="(a,i5,f10.3,a,2f20.10)") &
     "Lanczos iteration ",j,time,"s   Emin,Emax:",Emin,Emax

    if (converged) exit
   end do

 end subroutine

 subroutine CPGF_comp_Cheb_coef(N,Niter,psiexc,a,b,coef)
  integer(base_kind) N
  integer Niter,m
  real(dp) psiexc(N),a,b,coef(0:2*Niter),aux
  REAL(dp), allocatable :: x1(:),x2(:)

   allocate(x1(N),x2(N))

   call vecOMP_copy_real(psiexc,x1)

   call vecOMP_dot_REAL(x1,x1,coef(0))
   write(*,fmt="(i6,g25.15)") 0,coef(0)

   call Hmul_new(x1,x2,1/b,-a/b,0.0_dp)

   call vecOMP_dot_REAL(x1,x2,coef(1))
   call vecOMP_dot_REAL(x2,x2,aux)
   coef(2)=2*aux-coef(0)

   write(*,fmt="(i6,g25.15)") 1,coef(1)
   write(*,fmt="(i6,g25.15)") 2,coef(2)

   do m=2,Niter

    if (mod(m,2)==0) then

     call Hmul_new(x2,x1,2/b,-2*a/b,-1.0_dp)

     call vecOMP_dot_REAL(x1,x2,aux)
     coef(2*m-1)=2*aux-coef(1)

     call vecOMP_dot_REAL(x1,x1,aux)
     coef(2*m)=2*aux-coef(0)
    end if

    if (mod(m,2)==1) then

     call Hmul_new(x1,x2,2/b,-2*a/b,-1.0_dp)

     call vecOMP_dot_REAL(x2,x1,aux)
     coef(2*m-1)=2*aux-coef(1)

     call vecOMP_dot_REAL(x2,x2,aux)
     coef(2*m)=2*aux-coef(0)
    end if

    write(*,fmt="(i6,g25.15)") 2*m-1,coef(2*m-1)
    write(*,fmt="(i6,g25.15)") 2*m  ,coef(2*m)
   end do

 end subroutine

 subroutine CPGF_comp_Cheb_eval(Emin,Emax,NE,delta,EGS,a,b,N,coef,fname)
  real(dp) Emin,Emax,delta,EGS,a,b,coef(0:),E
  integer NE,N,i,j
  character(len=*) fname
  complex(dp) z,gn(0:N),G

   open(1,file=fname)
   do i=1,NE
    E=(Emin*(NE-i)+Emax*(i-1))/(NE-1)
    z=cmplx(E+EGS-a,delta)/b

    do j=0,N
     gn(j)=(0,-2)*(z-(0,1)*sqrt(1-z**2))**j/sqrt(1-z**2)
    end do
    gn(0)=gn(0)/2

    G=sum(gn*coef(0:N))

    write(1,*) E,-aimag(G)/b/pi
   end do
   close(1)

 end subroutine

 subroutine devel_CPGF(flag_comp)
  integer flag_comp,nup,ndn,Ndeg,NLancz,Niter,j,NE
  integer(base_kind) Ndim,Ndim2
  real(dp) EGS,acc(1),Emin,Emax,a,b,HWHM
  real(dp), allocatable :: psiGS(:,:),psiexc(:),coef(:)
  character(len=1) lbl
  integer, allocatable :: Ncoef(:)
  character(len=5) str

   Ud=5.0_dp; JH=0.6_dp; Delta=2.0_dp; t=0.3_dp; Bfield=0

   call load_cluster("cluster08A")

   nup=8; ndn=8

   Ndim=Comb(2*Nsite,nup)*Comb(2*Nsite,ndn)
   Ndim2=Comb(2*Nsite,nup-1)*Comb(2*Nsite,ndn)

   allocate(psiGS(Ndim,1),psiexc(Ndim2))

   NLancz_min=50; NLancz_max=300; flag_RAM=1; mod_lanczos_verbose=1
   call prepare_GSset(nup,ndn,Ndim,1,Ndeg,EGS,psiGS,NLancz,acc,"xGS.bin")
   write(*,fmt="(/a,i12,f25.16,g12.3)") "dimension,EGS, acc:",Ndim,EGS,acc

   nup_H=nup-1
   ndn_H=ndn

   Emin=18.3225991543
   Emax=85.5041396289

   a=(Emin+Emax)/2
   b=1.01*(Emax-Emin)/2

   Niter=10000
   allocate(coef(0:2*Niter))

   select case(flag_comp)
   case(1)
    call apply_xz_annihil(nup,ndn,psiGS(:,1),2,site=1,fpsi=psiexc) 
    lbl="z"
   case(2)
    call apply_xz_annihil(nup,ndn,psiGS(:,1),1,site=1,fpsi=psiexc)
    lbl="x"
   end select

   call CPGF_comp_Cheb_coef(Ndim2,Niter,psiexc,a,b,coef)

   open(1,file="xcoef_LDOS"//lbl//".bin",form="unformatted")
   write(1) coef
   close(1)

   EGS=22.8121257336750247_dp

   open(1,file="xcoef_LDOS"//lbl//".bin",form="unformatted")
   read(1) coef
   close(1)

   Emin=-8.0_dp
   Emax=+12.0_dp
   HWHM=0.02_dp
   NE=20001

   Ncoef=[500,1000,2000,4000,8000,12000,16000,20000]

   do j=1,size(Ncoef)
    write(str,fmt="(i5.5)") Ncoef(j)
    call CPGF_comp_Cheb_eval(Emin,Emax,NE,HWHM,EGS,a,b, &
     Ncoef(j),coef(0:Ncoef(j)),"xLDOS"//lbl//"_N"//str)
   end do

 end subroutine

end module

program prg_Hubb_eg
use mod_Hubb_eg_tasks
implicit none

integer task

 call init_selops

 call devel_CPGF(1); call devel_CPGF(2); stop

 write(*,fmt="(a)") "task - 1: single GS preparation"
 write(*,fmt="(a)") "       2: preparation of a set of GS + chars/corrs"
 write(*,fmt="(a)") "       3: extraction of chars/corrs"
 write(*,fmt="(a)") "       4: dynamic correlations"
 write(*,fmt="(a)") "TODO   5: energy spectrum"
 write(*,fmt="(a)") "      10: check GS set (completeness + accuracy)"
 read(*,*) task

 select case(task)
 case(1)
  call task_prepare_GS_single
 case(2)
  call task_GS_set
 case(3)
  call task_GS_set_extract
 case(4)
  call task_dynamic_corrs
 case(10)
  call task_check_GS_set
 end select

end program

