function [X_B,X_L]=Lyap_Banded(A,B,param)
% Approximation to the solution matrix of the Lyapunov equation 
%
% A*X+X*A=B,    A SPD and banded, ill-conditioned, B symm. and banded
%
% by lyap_banded.
% A coeff matrix  n x n  spd banded
% B known data matrix ("rhs" matrix)  n x n  sym banded
% param : struct containing all the parameters and thresholds of the algorithm.
% In particular,
% param.resrel: threshold for the relative residual norm
% param.maxit: maximun number of iterations allowed for the iterative computation of the low-rank part
% param.epsilon_quad: threshold for the adaptive Gauss-Lobatto quadrature formula
% param.nu: number of terms in the rational Chebyshev expation
% param.epsilon_tau: threshold for the computation of tau
% param.bandexp: maximum bandwidth allowed for exp(-tau*A), then
%                bandwidth(X_B) <= 2*param.bandexp+bandwidth(B) 
% param.epsilon_sparseinv: truncation tolerance for the sparse
%                          approximate inverse algorithm
% Output: Approx solution obtained as X = X_B + X_L * X_L'
%
% Reference manuscript:
%
% Davide Palitta and Valeria Simoncini
% NUMERICAL METHODS FOR LARGE-SCALE LYAPUNOV EQUATIONS WITH SYMMETRIC BANDED DATA
% July 2018, SISC
%
% REQUIREMENT: the subroutines TruncSubGen_mex and lowrank_normF which make use of LAPACK and C-BLAS subroutines. 
% 
%
% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 
% FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 
% COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 
% IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 
% CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 

% scale the problem
n=size(A,1);
Borig=B;
Aorig=A;
I=speye(n);
opts.tol=1e-4;
a = eigs(A,1,'sm',opts);
A = A/a;
B = B/a;
a = 1;
b = eigs(A,1,'lm',opts);
condA=b/a;
fprintf('Estimated condition number of A : %10.5e\n',condA)    

% Compute tau: we approximate for which tau, the entries in position (1,band_exp) of 
% exp(-tau*A) is less than epsilon. 
rho=(b-a)/4;
epsilon_tau=param.epsilon_tau;
band_Exp=param.bandexp;
tau=(-5*rho*log(epsilon_tau/10)-sqrt(25*(rho*log(epsilon_tau/10))^2-20*rho*band_Exp^2*a))/(10*rho*a);
fprintf('Computed tau: %10.5e\n',tau)

fprintf('Approx solution X = X_B + X_L*X_Lt:\n')
fprintf('\n')
% Compute the banded part of the solution
tic
X_B=AdaptLob_MatrixExp(A,B,0,tau,a,b,param,0);
timeY=toc;
fprintf('Bandwidth(X_B)=%3d\n',bandwidth(X_B))
fprintf('Time to compute X_B: %10.5e\n',timeY)
fprintf('\n')

% Compute the low-rank part of the solution
tic
[X_L,rel_res]=iterativeZ(A,B,X_B,tau,randn(n,1),param);
timeZ=toc;

      

fprintf('Rank(X_L)=%3d\n',size(X_L,2))
fprintf('Time to compute X_L: %10.5e\n',timeZ)
fprintf('\n')
fprintf('Total exec time: %10.5e\n',timeY+timeZ)
fprintf('Relative residual norm: %10.5e\n',rel_res)

% real relative residual norm. It shouldn't be computed for large scale
% problems
reply=input('Do you want to compute the true residual F-norm? ,Y/N [N]:','s');
       if (isempty(reply) || 's'=='N')
          reply = 'N';
       else
          wrk=Aorig*X_L;
          true_rel_res=norm(Aorig*X_B+wrk*X_L'+X_B*Aorig'+X_L*wrk'-Borig,'fro')/norm(Borig,'fro')
       end
end


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function Q=AdaptLob_MatrixExp(A,B,a,b,lambda_min,lambda_max,param,trace)
% function Q=AdaptLob_MatrixExp(A,B,a,b,lambda_min,lambda_max,tol,trace)
% Function that approximates the integral 
%
% \int_a^b \exp(-tA)*B*exp(-tA)
%
% with an adaptive Gauss-Lobatto quadrature formula up to the tolerance
% tol. We have modified the algorithm presented in
%
% W. Gander and W. Gautschi, Adaptive quadrature—revisited, BIT, 40 (2000),
% pp. 84–101,
% 
% for the case of the matrix-valued function \exp(-tA)*B*exp(-tA).
% Notice that a truncation of the smallest value of the iterates is
% performed.
%
% INPUT
% A, B: coefficint matrices
% a, b: interval of integration
% lambda_min, lambda_max: smallest and largest eigenvalues of A
% param struct containg all the parameters and thresholds of the algorithm.
% In particular,
% param.resrel: threshold for the relative residual norm
% param.maxit: maximun number of iterations allowed for the iterative computation of th low-rank part
% param.epsilon_quad: threshold for the adaptive Gauss-Lobatto quadrature formula
% param.nu: number of terms in the rational Chebyshev expation
% param.epsilon_tau: threshold for compution tau
% param.bandexp: maximum bandwidth allowed for exp(-tau*A), then
%                bandwidth(X_B) <= 2*param.bandexp+bandwidth(B) 
% param.epsilon_sparseinv: truncation tolerance for the sparse
%                          approximate inverde algorithm
% trace: 1 to print some info of the integration process
%
% OUTPUT:
% Q: matrix, approximating \int_a^b \exp(-tA)*B*exp(-tA)
%
% Reference manuscript:
%
% Davide Palitta and Valeria Simoncini
% NUMERICAL METHODS FOR LARGE-SCALE LYAPUNOV EQUATIONS WITH SYMMETRIC BANDED DATA
% July 2018, SISC
%
% REQUIREMENT: the subroutines TruncSubGen_mex which makes use of LAPACK subroutines. 
% 
%
% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 
% FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 
% COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 
% IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 
% CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 

global termination2 omega xi counter

% compute the nodes and wheights of the rational Chebyshev expansion once
% for all as they don't depend on t
%[omega,xi]=rational(6,1);
nu=param.nu; [bb,aa]=cheby(nu);  [omega,xi,~]=residue(bb(nu+1:-1:1),aa(nu+1:-1:1));
tol=param.epsilon_quad;

counter=1;
termination2=0;
m=(a+b)/2;
h=(b-a)/2;
alpha=sqrt(2/3);
beta=1/sqrt(5);
x1=.942882415695480;
x2=.641853342345781;
x3=.236383199662150;
x=[a, m-x1*h, m-alpha*h, m-x2*h, m-beta*h, m-x3*h, m, m+x3*h, m+beta*h, ...
    m+x2*h, m+alpha*h, m+x1*h, b];

local=cell(13);
n=size(A,1);
threshold=tol;
for j=1:13
    if x(j)==0
        expA=speye(n);
    else
        expA=ExpRationalApprox(A,x(j),omega,xi,lambda_min,lambda_max,param);
    end
    % truncate the smallest entries of expA
    [rows,columns]=find(abs(expA)>threshold);
    expa=expA(abs(expA)>threshold);
    expA=sparse(rows,columns,expa,n,n);
    % ExpRationalApprox approximates only the lower triangular part of
    % exp(-t*A) exploiting symmetry. However we need the full matrix to
    % perform the matri-matrix multiplication
    expA=tril(expA)+tril(expA,-1).';
    local{j}=expA*B*expA; 
end

fa=local{1};
fb=local{13};
i2=(h/6)*(local{1}+local{13}+5*(local{5}+local{9}));
i1=(h/1470)*(77*(local{1}+local{13})+432*(local{3}+local{11})+625*(local{5}+local{9})+672*local{7});
is=h*(.0158271919734802*(local{1}+local{13})+.0942738402188500*(local{2}+local{12})+...
    .155071987336585*(local{3}+local{11})+.188821573960182*(local{4}+local{10})+...
    .199773405226859*(local{5}+local{9})+.224926465333340*(local{6}+local{8})+.242611071901408*local{7});

erri1=norm(i1-is,'fro');
erri2=norm(i2-is,'fro');
R=erri1/erri2;
if(R>0 && R<1), tol=tol/R; end
is=norm(is,'fro')*tol/eps;
if(is==0), is=b-a; end
Q=AdaptLobStep(A,B,a,b,fa,fb,lambda_min,lambda_max,is,trace,threshold,param);
end

% ----------------------------------------------------------

function Q=AdaptLobStep(A,B,a,b,fa,fb,lambda_min,lambda_max,is,trace,threshold,param)

global termination2 omega xi counter
m=(a+b)/2;
h=(b-a)/2;
alpha=sqrt(2/3);
beta=1/sqrt(5);
mll=m-alpha*h;
ml=m-beta*h;
mrr=m+alpha*h;
mr=m+beta*h;
x=[mll,ml,m,mr,mrr];

local=cell(5);
n=size(A,1);

for j=1:5
    if x(j)==0
        expA=speye(n);
    else
        expA=ExpRationalApprox(A,x(j),omega,xi,lambda_min,lambda_max,param);
    end
    % truncate the smallest entries of expA
    [rows,columns]=find(abs(expA)>threshold);
    expa=expA(abs(expA)>threshold);
    expA=sparse(rows,columns,expa,n,n);
    % ExpRationalApprox approximates only the lower triangular part of
    % exp(-t*A) exploiting symmetry. However we need the full matrix to
    % perform the matri-matrix multiplication
    expA=tril(expA)+tril(expA,-1).';
    local{j}=expA*B*expA; 
end

fmll=local{1};
fml=local{2};
fm=local{3};
fmr=local{4};
fmrr=local{5};
i2=(h/6)*(fa+fb+5*(fml+fmr));
i1=(h/1470)*(77*(fa+fb)+432*(fmll+fmrr)+625*(fml+fmr)+672*fm);

if(is+norm(i1-i2,'fro')==is) || (mll<=a) || (b<=mrr)
    if ((m<=a) || (b<=m)) && (termination2==0)
        warning(['Interval contains no more machine number\n', 'Required tolerance may not be met']);
        termination2=1;
    end
    Q=i1;
    if(trace), disp([a b-a counter]), counter=counter+1; end;
else
    Q=AdaptLobStep(A,B,a,mll,fa,fmll,lambda_min,lambda_max,is,trace,threshold,param)+...
        AdaptLobStep(A,B,mll,ml,fmll,fml,lambda_min,lambda_max,is,trace,threshold,param)+...
        AdaptLobStep(A,B,ml,m,fml,fm,lambda_min,lambda_max,is,trace,threshold,param)+...
        AdaptLobStep(A,B,m,mr,fm,fmr,lambda_min,lambda_max,is,trace,threshold,param)+...
        AdaptLobStep(A,B,mr,mrr,fmr,fmrr,lambda_min,lambda_max,is,trace,threshold,param)+...
        AdaptLobStep(A,B,mrr,b,fmrr,fb,lambda_min,lambda_max,is,trace,threshold,param);
end
end


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [Z,res]=iterativeZ(A,B,X_B,tau,v0,param)
% function [Z,res,Lambda]=iterativeZ(A,B,Y,tau,v0,maxit,tol)
% Function that approximates the low-rank part of the solution X of the
% Lyapunov equation 
%
% A*X+X*A=B,    A SPD and banded, B symm. and banded.
%
% To this end the Krylov subspace K_m(A^{-1},v0) is constructed.
%
% INPUT
% A, B: data of the Lyapunov equation
% X_B: banded part of the solution X (already computed)
% tau:
% v0: starting vector for the Krylov subspace
% param struct containg all the parameters and thresholds of the algorithm.
% In particular,
% param.resrel: threshold for the relative residual norm
% param.maxit: maximun number of iterations allowed for the iterative computation of th low-rank part
% param.epsilon_quad: threshold for the adaptive Gauss-Lobatto quadrature formula
% param.nu: number of terms in the rational Chebyshev expation
% param.epsilon_tau: threshold for compution tau
% param.bandexp: maximum bandwidth allowed for exp(-tau*A), then
%                bandwidth(X_B) <= 2*param.bandexp+bandwidth(B) 
% param.epsilon_sparseinv: truncation tolerance for the sparse
%                          approximate inverde algorithm
%
% OUTPUT
% Z: low-rank part of the solution
% res: relative residual norm
%
% Reference manuscript:
%
% Davide Palitta and Valeria Simoncini
% NUMERICAL METHODS FOR LARGE-SCALE LYAPUNOV EQUATIONS WITH SYMMETRIC BANDED DATA
% July 2018, SISC
% 
% REQUIREMENT: the subroutine lowrank_normF which makes use of LAPACK and C-BLAS subroutines. 
%
% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 
% FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 
% COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 
% IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 
% CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 


% Computes the Cholesky factor of A once for all
L=chol(A,'lower');

maxit=param.maxit;
tol=param.resrel;
tol2=param.epsilon_quad;

% Preallocation
n=size(A,1);
normB=norm(B,'fro');
delta=0;
V=zeros(n,maxit+1); 
H=zeros(maxit+1,maxit);
i=1;
k=1;
res=zeros(1,maxit);
res(1)=1;
diffZ=1;
% Compute the first basis vector
V(1:n,1) = v0;  
V(1:n,1)=V(1:n,1)/norm(V(1:n,1));

% start projecting A, B and A*X_B+X_B*A-B onto the subspace
C=A*X_B+X_B*A-B;
norm0=norm(C,'fro');
gamma=norm0;
K=V(1:n,1)'*(A*V(1:n,1));
P=V(1:n,1)'*(B*V(1:n,1));
S=V(1:n,1)'*(C*V(1:n,1));


% start the loop
while res(k)>tol && diffZ>tol2 && i<maxit 

    
  % Build the space
  i1=i+1;  t=0.;
  V(:,i1) = L'\(L\V(:,i));
  
  % Orthogonalization by modified Gram-Schmidt
  index=1;
  for l=1:2
      hinorm=0.;
      for j=index:i
        t = V(1:n,j)'*V(1:n,i1);
        hinorm = hinorm + abs(t^2); H(j,i)=H(j,i)+t;
        V(1:n,i1)=V(1:n,i1)-t*V(1:n,j);
      end
  end
  
  t = norm(V(1:n,i1));
  H(i1,i)=t;
  if (t ~= 0.)
    t=1.0/t; V(1:n,i1)=V(1:n,i1)*t;
  end
  
  % extend K, P and S
  k_new=V(1:n,1:i1)'*(A*V(1:n,i1));
  K=[K,k_new(1:end-1);k_new'];
  p_new=V(1:n,1:i1)'*(B*V(1:n,i1));
  P=[P,p_new(1:end-1);p_new'];
  s_new=V(1:n,1:i1)'*(C*V(1:n,i1));
  S=[S,s_new(1:end-1);s_new'];
  
  % Compute the residual norm every 10 iterations.
  % See Proposition 3.6 in 
  % Davide Palitta and Valeria Simoncini
  % NUMERICAL METHODS FOR LARGE-SCALE LYAPUNOV EQUATIONS WITH SYMMETRIC BANDED DATA
  % July 2018, SISC
  % for details about the residual norm computation
  if mod(i,10)==0
      % Compute the eigendecomposition of K
      [Ritz,Lambda]=eig(full(K(1:i,1:i)));
      lambda=diag(Lambda);
      k=k+1;
      %  
      ZZ=diag(exp(-tau*lambda))*((Ritz'*P(1:i,1:i)*Ritz)./(lambda*ones(1,i)+ones(i,1)*lambda'))*diag(exp(-tau*lambda));
% VAL
%     zexp=exp(-tau*lambda);
%     ZZ=repmat(zexp,1,i).*((Ritz'*P(1:i,1:i)*Ritz)./(lambda*ones(1,i)+ones(i,1)*lambda')).*repmat(zexp,1,i);

      [uZ,sZ]=eig(ZZ); [sZ,id]=sort(abs(diag(sZ)));
      sZ=flipud(sZ); uZ=uZ(:,id(end:-1:1));
      is=sum(sqrt(abs(sZ)/max(sZ))>1e-12);
      Z0 = uZ(:,1:is)*diag(sqrt(sZ(1:is)));
      dold=delta;
      delta = Ritz*Z0;   
      % Monitor relative change in the residual norm, to prevent stagnation
      if k>2
          normddT=lowrank_normF(full(delta'),full((delta'*delta)*delta'));
          err_it=norm([delta,-[dold;zeros(size(delta,1)-size(dold,1),size(dold,2))]]*[delta,[dold;zeros(size(delta,1)-size(dold,1),size(dold,2))]]','fro');
          diffZ=err_it/normddT;
      end
      vhat=A*V(:,i1); hhat=V(:,1:i)'*vhat; vhat=vhat - V(:,1:i)*hhat; beta=norm(vhat);vhat=vhat/beta;
      Hinv=inv(H(1:i,1:i));
      G1=[eye(i) hhat; zeros(1,i) beta]*[Hinv; -H(i1,i)*Hinv(i,:)];
      G=[eye(i+1,i),G1];
      J=[zeros(i,i),delta*delta'; delta*delta',zeros(i,i)];
      J1 = G*J*G';
      normJ = norm(J1,'fro');
      ss_new=[V(:,1:i),vhat]'*(C*vhat);
      SS=[S(1:i,1:i),ss_new(1:end-1);ss_new'];
      trace_extra = trace(J1'*SS);

      % relative residual norm
      res(k)=sqrt(gamma^2+normJ^2+2*trace_extra)/normB;
  end
  i=i1;
  
end    
i=i-1;

% Compute Z
if i==maxit-1
    % if we stopped because the maximum number of iterations has been reached,
    % then we have to compute Z0 first 
    [Ritz,Lambda]=eig(full(K(1:i,1:i)));    
    lambda=diag(Lambda);  
    ZZ=diag(exp(-tau*lambda))*((Ritz'*P(1:i,1:i)*Ritz)./(lambda*ones(1,i)+ones(i,1)*lambda'))*diag(exp(-tau*lambda));
  
    [uZ,sZ]=eig(ZZ); [sZ,id]=sort(diag(sZ));
    sZ=flipud(sZ); uZ=uZ(:,id(end:-1:1));
    is=sum(sqrt(abs(sZ)/max(sZ))>1e-12);
    Z0 = uZ(:,1:is)*diag(sqrt(sZ(1:is)));
    Z = V(1:n,1:i)*Ritz*Z0;  
else
    Z = V(1:n,1:i)*Ritz*Z0;
end  
  
res=res(k);

end



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [p,q]=cheby(nu)
% Ref: 
%Cody, W.J., Meinardus, G. and Varga, R. S., 
%Chebyshev rational approximations to e^{-x} in [0,+\infty) and applications to heat-conduction problems 
%(1969).

if (nu==1)
    p=[1.0669
        -1.1535e-1];
    q=[1
        1.7275];
end

if (nu==2)
    p=[9.92641e-1
        -1.88332e-1
        4.21096e-3];
    q=[1
        6.69295e-1
        5.72258e-1];
end

if (nu==3)
    p=[1.000799
        -2.236578e-1
        1.249962e-2
        -9.981009e-5];
    q=[1
        7.982923e-1
        2.204115e-1
        1.248580e-1];
end

if (nu==4)
    p=[9.9991347e-1
        -2.4025373e-1
        1.8400509e-2
        -4.4981230e-4
        1.6765142e-6];
    q=[1
        7.5668322e-1
        2.9175468e-1
        4.5750212e-2
        1.9376980e-2];
end

if (nu==5)
    p=[1.00000935
        -2.50230902e-1
        2.24805919e-2
        -8.33629264e-4
        1.07797622e-5
        -2.19125327e-8];
    q=[1
        7.50174555e-1
        2.69910157e-1
        6.76687392e-2
        6.93457968e-3
        2.34468866e-3];
end

if (nu==6)
    p=[9.99998991e-1
        -2.56774988e-1
        2.53896499e-2
        -1.17690441e-3
        2.48209105e-5
        -1.90699255e-7
        2.34264503e-10];
    q=[1
        7.43173208e-1
        2.68982436e-1
        6.15930326e-2
        1.13649362e-2
        8.25674222e-4
        2.32303566e-4];
end

if (nu==7)

    p=[1.000000109
    -2.613998104e-01
    2.754893180e-02
    -1.467589943e-03
    4.060544787e-05
    -5.370676308e-07
    2.653910891e-09
    -2.118933743e-012];

    q=[1
    7.38606624e-01
    2.660947238e-01
    6.221006831e-02
    1.022960372e-02
    1.487848134e-03
    8.088769796e-05
    1.948334848e-05];
end   

if (nu==8)
    p=[9.9999998828e-1
        -2.6483406521e-1
        2.9206990785e-2
        -1.7107669530e-3
        5.6307621623e-5
        -1.0147731374e-6
        9.0012946140e-9
        -3.0312244065e-11
        1.6607892788e-14];
    q=[1
        7.3516514516e-1
        2.6438063240e-1
        6.1718804886e-2
        1.0520836925e-2
        1.3283462347e-3
        1.5910392054e-4
        6.7270200039e-6
        1.4167726615e-6];
end

if (nu==9)
    p=[1.00000000126
        -2.67485669919e-1
        3.05175283666e-2
        -1.91477639225e-3
        7.11036342529e-5
        -1.56780173525e-6
        1.95356666454e-8
        -1.22095569141e-10
        2.92870663734e-13
        -1.14850409022e-16];
    q=[1
        7.32514425277e-1
        2.63030802595e-1
        6.15308528374e-2
        1.04926248194e-2
        1.39500667140e-3
        1.41160606892e-4
        1.43514073911e-5
        4.85965892273e-7
        9.09160466590e-8];
end

if (nu==10)
    p=[9.999999998639e-1
    -2.695935538219e-1
    3.157786404717e-2
    -2.087230287556e-3
    8.469462611579e-5
    -2.152957893424e-6
    3.359540105285e-8
    -3.024379165793e-10
    1.383512200113e-12
    -2.447944782724e-15
    7.105957443307e-19];
    q=[1 
    7.304064348333e-01 
    2.619844511628e-01 
    6.135987852481e-02
    1.052222566719e-02
    1.400441832430e-03
    1.516294286389e-04
    1.267060421895e-05
    1.117985726664e-06
    3.102408125045e-08
    5.220777185774e-09];
end

if (nu==11)
    p=[1.0000000000147
        -2.7130869737149e-1
        3.2452583980923e-2
        -2.2343438385897e-3
        9.7032753192328e-5
        -2.7417669166461e-6
        5.0236265041453e-8
        -5.7754991658630e-10
        3.8861942441125e-12
        -1.3413312302919e-14
        1.8009807948555e-17
        -3.9776294455404e-21];
    q=[1
        7.2869130396820e-1
        2.6114386818246e-1
        6.1230652273991e-2
        1.0537296909746e-2
        1.4150145103337e-3
        1.5333152911587e-4
        1.3934862063194e-5
        9.8315736139162e-7
        7.6574419206722e-8
        1.7740034787683e-9
        2.7128051110139e-10];
end

if (nu==12)
    p=[9.99999999998420e-1
        -2.72732010381007e-1
        3.31862748878945e-2
        -2.36102860933434e-3
        1.08182047214783e-4
        -3.31706704552847e-6
        6.85640066472736e-8
        -9.40255674650549e-10
        8.21592178522494e-12
        -4.24605372941828e-14
        1.13357453225507e-16
        -1.18241932729819e-19
        2.03287742523846e-23];
    q=[1
        7.27267989462094e-1
        2.60454266874069e-1
        6.11258943974162e-2
        1.05516595641775e-2
        1.42483770484151e-3
        1.56277807995398e-4
        1.41984908194392e-5
        1.10762177820598e-6
        6.71223492994341e-8
        4.67724663200848e-9
        9.18591910070282e-11
        1.28708777575584e-11];
end

if (nu==13)
    p=[1.000000000000170
        -2.739314032102750e-1
        3.381011641046875e-2
        -2.471069318770823e-3
        1.182461339791637e-4
        -3.869175393266464e-6
        8.784673485471303e-8
        -1.376678957647893e-9
        1.454609363079049e-11
        -9.904112443378351e-14
        4.020148351552472e-16
        -8.475382786761699e-19
        7.005053668034527e-22
        -9.558067295074149e-26];
    q=[1
        7.260685968079845e-1
        2.598787129003795e-1
        6.104001400483738e-2
        1.056366214946976e-2
        1.433396498147585e-3
        1.583680794313270e-4
        1.460945210319625e-5
        1.137269821971446e-6
        7.748287423893217e-8
        4.088647564162874e-9
        2.576669260640894e-10
        4.345519072416265e-12
        5.618520584448164e-13];
end

if (nu==14)

    p=[9.9999999999998168e-01
    -2.7495604296300043e-01
    3.4346984175671475e-02
    -2.5674439819028618e-03
    1.2734070715233181e-04
    -4.3932808492511236e-06
    1.0753202054485227e-07
    -1.8710255961089453e-09
    2.2849515765300155e-11
    -1.9038640942835345e-13
    1.0310151365350495e-15
    -3.3490280333667533e-18
    5.6743825539523501e-21
    -3.7794523874503295e-24
    4.1609826642376613e-28];

    q=[1.
    7.2504395703488666e-01
    2.5939094125018012e-01
    6.0968185127283595e-02
    1.0574049161691156e-02
    1.4405527154587316e-03
    1.6019211440612190e-04
    1.4908234724800242e-05
    1.1820291576355772e-06
    8.0163997982357503e-08
    4.8361800878648281e-09
    2.2472030400428529e-10
    1.2922802705792779e-11
    1.8921838854022449e-13
    2.2710680218891295e-14];

%else
%    error('nu must be an integer between 1 and 14')
end
end


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [X]=ExpRationalApprox(A,t,omega,xi,lambda_min,lambda_max,param)
% function [X]=ExpRationalApprox(A,t,lambda_min,lambda_max)
% Function that approximates the matrix exponential exp(-tA) by nu=length(omega) terms of
% the rational Chebyshev function. 
%
% exp(-t*A) \approx 
%         \sum_{i=1, i odd}^{nu-1} 2*real(omega(i)*inv(t*A-xi(i)*I)) + omega(nu)*inv(t*A-xi(nu)*I)
% 
% The matrices inv(t*A-xi(i)*I)) are approximated by banded approximations
% by means of the subroutine SparseInvLU. In this implementation such
% approximations are computed sequentally even though they can be computed
% in parallel by a simple parfor.
%
% INPUT:
% A: coefficient matrix
% t: parameter
% omega, xi: wheights and nodes of the rational Chebyshev function
% lambda_min,lambda_max: extreme eigenvalues of A
% param struct containg all the parameters and thresholds of the algorithm.
% In particular,
% param.resrel: threshold for the relative residual norm
% param.maxit: maximun number of iterations allowed for the iterative computation of th low-rank part
% param.epsilon_quad: threshold for the adaptive Gauss-Lobatto quadrature formula
% param.nu: number of terms in the rational Chebyshev expation
% param.epsilon_tau: threshold for compution tau
% param.bandexp: maximum bandwidth allowed for exp(-tau*A), then
%                bandwidth(X_B) <= 2*param.bandexp+bandwidth(B) 
% param.epsilon_sparseinv: truncation tolerance for the sparse
%                          approximate inverde algorithm
% OUTPUT:
% X: computed approximation X \approx exp(-tA)
%
% Reference manuscript:
%
% Davide Palitta and Valeria Simoncini
% NUMERICAL METHODS FOR LARGE-SCALE LYAPUNOV EQUATIONS WITH SYMMETRIC BANDED DATA
% July 2018, SISC
%
% REQUIREMENT: the subroutines TruncSubGen_mex which makes use of LAPACK subroutines. 
% 
%
% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 
% FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 
% COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 
% IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 
% CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 

n=size(A,1);
tol=param.epsilon_sparseinv;
X=spalloc(n,n,1);

% check if the number of nodes and poles is odd or even
if mod(length(xi),2)==1
    % if it's odd, we already compute the inverse of the last term in the
    % expansion.
    AsparseInv=SparseInvLU(A,t,xi(end),lambda_max,lambda_min,tol);
    % compute the last term
    X=omega(end)*AsparseInv;
    nu=length(xi)-1;
else
    nu=length(xi);
end
  
    
for i=1:2:nu
    AsparseInv=SparseInvLU(A,t,xi(i),lambda_max,lambda_min,tol);
    % the poles come complex conjugate
    X=X+2*real(omega(i)*AsparseInv);
end


end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function AsparseInv=SparseInvLU(A,t,xi,lambda_max,lambda_min,tol)
% function AsparseInv=SparseInvLU(A,t,xi,tol)
% Function that computes the Sparse Approximate inverse of the matrix
%                 t*A-xi*I
% where t is a real scalar while xi is complex. A is supposed to be banded.
%
% INPUT:
% A: coefficient matrix
% t: scaling parameter
% xi: complex shift
% lambda_max,lambda_min: extreme eigenvalues of A
% param struct containg all the parameters and thresholds of the algorithm.
% In particular,
% param.resrel: threshold for the relative residual norm
% param.maxit: maximun number of iterations allowed for the iterative computation of th low-rank part
% param.epsilon_quad: threshold for the adaptive Gauss-Lobatto quadrature formula
% param.nu: number of terms in the rational Chebyshev expation
% param.epsilon_tau: threshold for compution tau
% param.bandexp: maximum bandwidth allowed for exp(-tau*A), then
%                bandwidth(X_B) <= 2*param.bandexp+bandwidth(B) 
% param.epsilon_sparseinv: truncation tolerance for the sparse
%                          approximate inverde algorithm
%
% OUTPUT:
% AsparseInv: banded matrix that approximate t*A+xi*I
%
% Reference manuscript:
%
% Davide Palitta and Valeria Simoncini
% NUMERICAL METHODS FOR LARGE-SCALE LYAPUNOV EQUATIONS WITH SYMMETRIC BANDED DATA
% July 2018, SISC
%
% REQUIREMENT: the subroutines TruncSubGen_mex which makes use of LAPACK subroutines. 
% 
%
% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 
% FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 
% COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 
% IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 
% CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 


n=size(A,1);
%scale and shift A
I=speye(n);
AA=t*A-xi*I;    
%extreme eigenvalues of t*A-xi*I
lambda1 = t*lambda_max-xi;
lambda2 = t*lambda_min-xi;

% bounds on the entries of inv(t*A+xi*I)
% from R. Freund, On polynomial approximations to f a (z)(z − a) −1 with complex a and some
% applications to certain non-Hermitian matrices, Approx. Theory Appl., 5 (1989), pp. 15–31.
a=(lambda2+lambda1)/(lambda2-lambda1);
alpha=(abs(lambda1)+abs(lambda2))/abs(lambda2-lambda1);
R=alpha+sqrt(alpha^2-1);
alphaR=0.5*(R+1/R);
betaR=0.5*(R-1/R);
cosPsi=real(a)/alphaR;
B=R/(betaR*sqrt(alphaR^2-cosPsi^2)*(alphaR+sqrt(alphaR^2-cosPsi^2)));
beta=bandwidth(AA);
bound_part1=2*R/abs(lambda1-lambda2)*B;
Rinv=1/R;
bound=bound_part1*Rinv.^(abs([1:n]-1)/beta);
% we want to compute betaA such that the entry in position (betaA,1) of
% the banded approximation of inv(t*A+xi*I) is larger than tol while 
% the one in postion (betaA+1,1) is smaller
betaA=find(abs(bound)>tol,1,'last')-1;   

% Compute the LU factorization of AA. Here the LDLt factorization would be
% preferable in order to exploit symmetry but the matlab ldl does not work
% with complex matrices. One can do
% [Y]=lu(AA);
% L=tril(Y,-1)+speye(n);
% D=diag(diag(Y));
% but the LU factorization would be computed anyhow
[L,U]=lu(AA,0);


% takes the real and complex part of the diagonals of L and U
n_band=bandwidth(L);
l=zeros(n-1,n_band);
u=l;
for i=1:bandwidth(L)
    l(i:end,i)=full(diag(L,-i));
    u(1:end-i+1,i)=full(diag(U,i));
end
lreal=real(l);
lcomplex=imag(l);
ureal=real(u);
ucomplex=imag(u);
u1=full(diag(U));
udiagreal=real(u1);
udiagcomplex=imag(u1);


% compute AsparseInv column-wise by computing only the necessary entries within the estimated bandwidth. 
% We perform a "truncated" forward and backward substitution.
%
[y1,y2,rows,columns]=TruncSubGen_mex(lreal,lcomplex,ureal,ucomplex,udiagreal,udiagcomplex,betaA,n_band);

% Assemble the lower triangular matrix
AsparseInv=sparse(rows,columns,y1+1i*y2,n,n);

end

















