function [Z,D,Y,res,niter,timings,out]=lr_adi_sylv(A,B,M,C,F,G,pA,pB,maxiter,rtol,opts)
% function [Z,D,Y,res,niter,timings,out]=lr_adi_sylv(A,B,M,C,F,G,pA,pB,maxiter,rtol,opts)
%
% Generate a low rank matrices Z,Y and diagonal D such that X=Z*D*Y' solves the
% Sylvester equation:
%
% A*X*C + M*X*B + F*G'=0                              (1)
%
% The function implements the (inexact) low rank ADI method based on [1-3]
%
% Inputs:
%
% A,B,E,C,F,G      Coefficient matrices in the above euqation.
%  pA,pB       vectors with pairs of shift parameters
% maxiter  maximum iteration number.
% rtol     tolerance for the residual norm based stopping criterion
% opts - settings structure for INNER LINEAR SOLVES (A-systems and B-systems):
%        inner_tol: inner fixed residual accuracy (default: tol(1)/10)
%        maxit_inner: max inner iters (default 100)
%        linsolverA,B: 'exact'=\, 'gmres', 'minres', 'bicgstab', 'bicgstabl',
%        'tfqmr',... (default: 'exact')
%        intolstrat: fixed inner tol: 'fixed'
%                    relaxed tols: 'relax',
%                    balanc. relax 'bal_relax',
%        bal_q: for 'bal_relax'-mode: 0 for A-system, 1 for B-system
%        itolmax, itolmin: max/min inner tols (1e-2,1e-12)
%        rgap_update: cheap estimate of norm(res.gap) (default: 1, only needed when backlook=1, only set to 0 for small problems) ),
%        backlook: back-looking (incorporate previous inner residuals (backlooking strategy, default: 1)
%        savg: safeguard constant (default 1)
%        debug: default 0, debug/dev-mode: calculate+shows various further (expensive) quantities
%        MtypeA,MtypeB: type of precondititioning of A/B systems: 'right', 'left', 'center' (default right)
%        M1A,M2A,M1B,M2B: preconditioners of A/B systems: M=M1*M2, matrices or function handles
%        cycl: subspace dim for restarted gmres (default [])
%       ----------------
%       debug: 1 switches on estimation of res.norm via lanzcos for cross-checking
%              2 switches on expensive debug mode for various extra
%              quantities (only use for small examples!!!)
%       -------- experimental settings ----
%       updprec 0 / 1: update preconditioner in every step
%       updprec.precfunA,B - function handle to prec. comp. for A/B sys.
%       updprec.poptsA,B - options for precfun

% Outputs:
%
% Z,D,Y     low-rank solution factors Z,D,Y such that approx. Z*D*Y'=X solves (1).
% res       Sylvester residuals: 1st row - computed residuals
%                              2nd row - true residuals (estim. by lanzcos)
% niter     the number of iteration steps taken
% timings   2 x niter matrix with timings, row 1: time of whole ADI step
%                                          row 2,3: time for residual comp.
% out       various extra output data
%                   matvec: vector with numbers of matrix-vector products per step
%                   itol: vector with generated inner tolerances per step
%
% [1] Benner/Li/Truhar: On the ADI method for Sylvester equations,
%                           J. Comput. Appl. Math., 2009
% [2] Benner/Kuerschner: Computing Real Low-rank Solutions of Sylvester equations by the Factored ADI
%                       Method, Comput. Math. Appl., 2014
% [3] Kuerschner: Inexact linear solves in the low-rank ADI iteration for
%      large Sylvester equations, ETNA, 2024

% Patrick Kuerschner 2022-24

% All rights reserved.
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Redistribution and use in source and binary forms, with or without
% modification, are permitted provided that the following conditions are
% met:
%
% 1. Redistributions of source code must retain the above copyright notice,
%    this list of conditions and the following disclaimer.
%
% 2. Redistributions in binary form must reproduce the above copyright
%    notice, this list of conditions and the following disclaimer in the
%    documentation and/or other materials provided with the distribution.
%
% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
% IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
% THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
% PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
% CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
% EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
% PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
% PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
% LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
% NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% input parameters are not fully checked!

if nargin < 11 || isempty(opts), % default opts for lin.sys.
    inner_tol=rtol(1)/10;
    itolmax=1e-2;
    itolmin=1e-12;
    maxit_inner=100;
    linsolverA='exact';
    linsolverB='exact';
    intolstrat='relax';
    rgap_update = 1;
    backlook=1;
    debug=0;
    symA=0;
    symB=0;
    savg=1;
    opts.M1A=[];
    opts.M2A=[];
    opts.M1B=[];
    opts.M2B=[];
    opts.MtypeA=[]; opts.MtypeB=[];
else    % provided opts.
    inner_tol=opts.inner_tol;
    itolmax=opts.itolmax;
    itolmin=opts.itolmin;
    maxit_inner=opts.maxit_inner;
    linsolverA=opts.linsolverA;
    linsolverB=opts.linsolverB;
    intolstrat=opts.intolstrat;
    rgap_update= opts.rgap_update;
    backlook=opts.backlook;
    debug=opts.debug;
    q=opts.bal_q;
    symA=opts.symA;
    symB=opts.symB;
    savg=opts.savg;
end

% dims
n=size(A,1);
m=size(B,1);
r=size(F,2);
lA=length(pA);
lB=length(pB);
Ir=speye(r);

if (any(strfind(intolstrat,'relax')) && backlook) || debug>0,
    u=0;
end

% spectral constants
cA=2; cB=2;
if ~symA, cA=cA+sqrt(2); end
if ~symB, cB=cB+sqrt(2); end
ck=max(cB,cA);
e=0;


tadi=tic; % measure time
% norm rhs
[~,Rg]=qr(F,0);
[~,Rf]=qr(G,0);
res0=norm(Rf*Rg');
timings(1,1)=toc(tadi);

% initialize various quantities
ac=pA(1);
bc=pB(1);
Vt=F; Wt=G;
Z=[];Y=[];D=[];
res=zeros(2,maxiter);
% settings for eigs
eigsopts.isreal=false;
eigsopts.issym=true;
timings=zeros(2,maxiter);
rA=zeros(n,maxiter*r);
rB=zeros(m,maxiter*r);
% debug quatities
sumgap=0;sumg1=0; sumg2=0;
nsumg1=0; nsumg2=0;sumappr=0;sumappr2=0;

for i=1:maxiter
    tadi=tic;
    % shifts (currently only precomputed and cyclic usage)
    ipA=mod(i+lA-1,lA)+1;
    ipB=mod(i+lB-1,lB)+1;
    ac=pA(ipA);
    bc=pB(ipB);
    w=norm(Vt);
    t=norm(Wt);
    % choose the inner solve tolerance
    switch(intolstrat)
        case 'fixed' % fixed tol
            inner_tol(1:2,:)=[opts.inner_tol/w;opts.inner_tol/t];
        case {'relax','bal_relax'}
            if i>1 && backlook %estimate the  residual gap
                if rgap_update, %cheap update via triangular inequality
                    u = u + norm(reA)*norm(gCW)+norm(reB)*norm(gEV);
                else %from scratch (not recommended)
                    Rgap=-M*Z*D*rB(:,1:i*r-r)'-rA(:,1:i*r-r)*D*Y'*C;
                    u = norm(Rgap);
                end
            end
            e=savg*rtol*res0/(2*ck*maxiter);
            if i>1 && backlook
                e = abs(i*e-u)/ck;
            end
            if ~strcmp(linsolverA,'exact') && ~strcmp(linsolverB,'exact')
                %        both solves iterative
                b1=e/t;
                b2=e/w;
                if strcmp(intolstrat,'bal_relax') % favor either A or B systems
                    if  opts.bal_q==0,
                        s1=[max(min([b1,itolmax]),itolmin)];
                        s2=max(min([(e-s1*t)/(2*s1+w),itolmax]),itolmin);
                    elseif opts.bal_q==1,
                        s2=max(min([b2,itolmax]),itolmin);
                        s1=max(min([(e-s2*w)/(2*s2+t),itolmax]),itolmin);
                    end
                else % standard midpoints approach
                    Pc=0.5*[max(min([b1,itolmax]),itolmin);max(min([b2,itolmax]),itolmin)];
                    s1=max(min([Pc(1),itolmax]),itolmin);
                    s2=max(min([(e-s1*t)/(2*s1+w),itolmax]),itolmin);
                end
                % only one lin.sys. iterative, the other one direct
            elseif strcmp(linsolverA,'exact') && ~strcmp(linsolverB,'exact')
                s1=1e-15;
                s2=max(min([(e-s1*t)/(2*s1+w),itolmax]),itolmin);
            else
                s2 = 1e-15;
                s1=max(min([(e-s2*w)/(2*s2+t),itolmax]),itolmin);
            end

            inner_tol(1)=(s1)/w;
            inner_tol(2)=(s2)/t;
            %      inner_tol boundaries
            if inner_tol(1)<=itolmin,
                inner_tol(1)=itolmin;
            elseif inner_tol(1)>=itolmax,
                inner_tol(1)=itolmax;
            end
            if inner_tol(2)<=itolmin,
                inner_tol(2)=itolmin;
            elseif inner_tol(2)>=itolmax,
                inner_tol(2)=itolmax;
            end
            % store generated tolerances
            out.itol(1:2,i)=inner_tol;
            out.itol(3:4,i)=[w;t];

    end
    % preconditioner updating (experimental)
    if isfield(opts,'updprec') && i>1,
        if ~strcmp(linsolverA,'exact')
            [opts.M1A]=opts.updprec.precfunA(-(A+bc*M),opts.updprec.poptsA);
            opts.M2A=opts.M1A';
        end
        if ~strcmp(linsolverB,'exact')
            [opts.M1B]=opts.updprec.precfunB(-(B'+conj(ac)*C'),opts.updprec.poptsB);
            opts.M2B=opts.M1B';
        end
    end
    % solve system  V1=(A+bc*E)\Vt;
    [V1,relres,out.nrit(1,i),reA]=solve_LS(@(x) MV(A,x)+bc*MV(M,x),Vt,inner_tol(1),maxit_inner,linsolverA,[],opts.M1A,opts.M2A,opts.MtypeA);
    if ~rgap_update && backlook, rA(:,(i-1)*r+1:i*r)=reA; end
    out.inres(1,i)=norm(reA)/w;

    %  solve system   W1=(B+ac*C)'\Wt;
    [W1,relresB,out.nrit(2,i),reB]=solve_LS(@(x) MV(B',x)+conj(ac)*MV(C',x),Wt,inner_tol(2),maxit_inner,linsolverB,[],opts.M1B,opts.M2B,opts.MtypeB);
    if ~rgap_update && backlook, rB(:,(i-1)*r+1:i*r)=reB; end
    out.inres(2,i)=norm(reB)/t;

    % assemble Low-rank factors
    Z=[Z V1];
    Y=[Y W1];
    gam=-(bc+ac);
    D=blkdiag(D,gam*Ir);
    if debug>0, Vto=Vt; Wto=Wt; w0=w; t0=t; end
    gEV=gam*MV(M,V1); gCW=conj(gam)*MV(C',W1);
    % residual factors
    Vt=Vt+gEV; Wt=Wt+gCW;

    denomR=res0;

    restime=tic;
    [~,Rf]=qr(Vt,0);
    [~,Rg]=qr(Wt,0);

    res(1,i)=norm(Rf*Rg')/denomR;
    timings(2,i)=toc(restime);


    if opts.debug  % debug mode: estimate res.norm via Lanczos
        restime=tic;
        res(2,i)=syl_r_norm(A,B,F,G,Z,D,Y,m,M,C,[],[])/denomR;
        timings(3,i)=toc(restime);
        fprintf(1,['step: %4d  n.resLR: %2.4e resLan: %2.4e\n'],i,res(1,i),res(2,i));
    else
        fprintf(1,['step: %4d  n.resLR: %2.4e\n'],i,res(1,i));
    end

    % fprintf(1,['inresA %2.4e\t inresB %2.4e\n'],norm(reA)/w,norm(reB)/t);
    % fprintf(1,['initerA %i\t initerB %i\n'],out.nrit(1,i),out.nrit(2,i));

    if opts.debug>1 % debug mode (expensive)
        fprintf(1,['nW/nWo= %2.4e\t nT/nTo= %2.4e\t prod=%2.4e\n'],norm(Vt)/norm(Vto),norm(Wt)/norm(Wto),norm(Vt)/norm(Vto)*norm(Wt)/norm(Wto));
        R1=A*Z*D*Y'*C+M*Z*D*Y'*B+F*G';
        Rgap=-M*Z*D*rB(:,1:i*r)'-rA(:,1:i*r)*D*Y'*C;
        R2=Vt*Wt'+Rgap;
        fprintf(1,['R1-R2: %2.4e\t Rgap: %2.4e\t R1-Rc: %2.4e\n'],...
            norm(R1-R2),norm(Rgap),norm(R1-Vt*Wt'));
        ngEV= norm(gam*M*V1-gam*M*(((A+bc*M)\(Vto-reA))));
        ngCW= norm(gam*C'*W1-gam*C'*(((B+ac*C)'\(Wto-reB))));
        fprintf(1,['g*E*V1= %2.4e\t ngCW= %2.4e\n'],ngEV,ngCW);
    end



    if res(1,i)<rtol % sylv. res. small enough?
        fprintf('\n\n');
        timings(1,i)=timings(1,i)+toc(tadi);
        break;
    end
    timings(1,i)=timings(1,i)+toc(tadi);
end
niter=i;
timings=timings(:,1:niter);
res=res(:,1:niter);