/* -- MAGMA (version 1.12.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver @date @precisions normal z -> s d c @author Stan Tomov @author Mark Gates */ #include "common_magmamagma_internal.h" /** Purpose ------- ZLAHRU is an auxiliary MAGMA routine that is used in ZGEHRD to update the trailing sub-matrices after the reductions of the corresponding panels. See further details below. Arguments --------- @param[in] n INTEGER The order of the matrix A. N >= 0. @param[in] ihi INTEGER Last row to update. Same as IHI in zgehrd. @param[in] k INTEGER Number of rows of the matrix Am (see details below) @param[in] nb INTEGER Block size @param[out] A COMPLEX_16 array, dimension (LDA,N-K) On entry, the N-by-(N-K) general matrix to be updated. The computation is done on the GPU. After Am is updated on the GPU only Am(1:NB) is transferred to the CPU - to update the corresponding Am matrix. See Further Details below. @param[in] lda INTEGER The leading dimension of the array A. LDA >= max(1,N). @param[in,out] dA COMPLEX_16 array on the GPU, dimension (LDDA,N-K). On entry, the N-by-(N-K) general matrix to be updated. On exit, the 1st K rows (matrix Am) of A are updated by applying an orthogonal transformation from the right Am = Am (I-V T V'), and sub-matrix Ag is updated by Ag = (I - V T V') Ag (I - V T V(NB+1:)' ) where Q = I - V T V' represent the orthogonal matrix (as a product of elementary reflectors V) used to reduce the current panel of A to upper Hessenberg form. After Am is updated Am(:,1:NB) is sent to the CPU. See Further Details below. @param[in] ldda INTEGER The leading dimension of the array dA. LDDA >= max(1,N). @param[in,out] dY (workspace) COMPLEX_16 array on the GPU, dimension (LDDY, NB). On entry the (N-K)-by-NB Y = A V. It is used internally as workspace, so its value is changed on exit. @param[in] lddy INTEGER The leading dimension of the array dY. LDDY >= max(1,N). @param[in,out] dV (workspace) COMPLEX_16 array on the GPU, dimension (LDDV, NB). On entry the (N-K)-by-NB matrix V of elementary reflectors used to reduce the current panel of A to upper Hessenberg form. The rest K-by-NB part is used as workspace. V is unchanged on exit. @param[in] lddv INTEGER The leading dimension of the array dV. LDDV >= max(1,N). @param[in] dT COMPLEX_16 array on the GPU, dimension (NB, NB). On entry the NB-by-NB upper trinagular matrix defining the orthogonal Hessenberg reduction transformation matrix for the current panel. The lower triangular part are 0s. @param dwork (workspace) COMPLEX_16 array on the GPU, dimension N*NB. Further Details --------------- This implementation follows the algorithm and notations described in: S. Tomov and J. Dongarra, "Accelerating the reduction to upper Hessenberg form through hybrid GPU-based computing," University of Tennessee Computer Science Technical Report, UT-CS-09-642 (also LAPACK Working Note 219), May 24, 2009. The difference is that here Am is computed on the GPU. M is renamed Am, G is renamed Ag. @ingroup magma_zgeev_aux ********************************************************************/ extern "C" magma_int_t magma_zlahru( magma_int_t n, magma_int_t ihi, magma_int_t k, magma_int_t nb, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex_ptr dA, magma_int_t ldda, magmaDoubleComplex_ptr dY, magma_int_t lddy, magmaDoubleComplex_ptr dV, magma_int_t lddv, magmaDoubleComplex_ptr dT, magmaDoubleComplex_ptr dwork, magma_queue_t queue ) { #define dA(i_,j_) (dA + (i_) + (j_)*ldda) magmaDoubleComplex c_zero = MAGMA_Z_ZERO; magmaDoubleComplex c_one = MAGMA_Z_ONE; magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; magmaDoubleComplex *dYm = dV + ihi - k; magma_int_t info = 0; if (n < 0) { info = -1; } else if (ihi < 0 || ihi > n) { info = -2; } else if (k < 0 || k > n) { info = -3; } else if (nb < 1 || nb > n) { info = -4; } else if (lda < max(1,n)) { info = -6; } else if (ldda < max(1,n)) { info = -8; } else if (lddy < max(1,n)) { info = -10; } else if (lddv < max(1,n)) { info = -12; } if (info != 0) { magma_xerbla( __func__, -(info) ); return info; } // top part of Y, above panel, hasn't been computed yet, so do that now // Ym = Am V = A(0:k-1, 0:ihi-k-1) * V(0:ihi-k-1, 0:nb-1) magma_zgemm( MagmaNoTrans, MagmaNoTrans, k, nb, ihi-k, c_one, dA, ldda, dV, lddv, c_zero, dYm, ldda, queue ); // ----- // on right, A := A Q = A - A V T V' // Update Am = Am - Am V T V' = Am - Ym W', with W = V T' // W = V T' = V(0:ihi-k-1, 0:nb-1) * T(0:nb-1, 0:nb-1)' magma_zgemm( MagmaNoTrans, MagmaConjTrans, ihi-k, nb, nb, c_one, dV, lddv, dT, nb, c_zero, dwork, ldda, queue ); // Am = Am - Ym W' = A(0:k-1, 0:ihi-k-1) - Ym(0:k-1, 0:nb-1) * W(0:ihi-k-1, 0:nb-1)' magma_zgemm( MagmaNoTrans, MagmaConjTrans, k, ihi-k, nb, c_neg_one, dYm, ldda, dwork, ldda, c_one, dA, ldda, queue ); // copy first nb columns of Am, A(0:k-1, 0:nb-1), to host magma_zgetmatrix( k, nb, dA, ldda, A, lda, queue ); // ----- // on right, A := A Q = A - A V T V' // Update Ag = Ag - Ag V T V' = Ag - Y W' // Ag = Ag - Y W' = A(k:ihi-1, nb:ihi-k-1) - Y(0:ihi-k-1, 0:nb-1) * W(nb:ihi-k-1, 0:nb-1)' magma_zgemm( MagmaNoTrans, MagmaConjTrans, ihi-k, ihi-k-nb, nb, c_neg_one, dY, ldda, dwork + nb, ldda, c_one, dA(k,nb), ldda, queue ); // ----- // on left, A := Q' A = A - V T' V' A // Ag2 = Ag2 - V T' V' Ag2 = W Yg, with W = V T' and Yg = V' Ag2 // Note that Ag is A(k:ihi, nb+1:ihi-k) // while Ag2 is A(k:ihi, nb+1: n -k) // Z = V(0:ihi-k-1, 0:nb-1)' * A(k:ihi-1, nb:n-k-1); Z is stored over Y magma_zgemm( MagmaConjTrans, MagmaNoTrans, nb, n-k-nb, ihi-k, c_one, dV, lddv, dA(k,nb), ldda, c_zero, dY, nb, queue ); // Ag2 = Ag2 - W Z = A(k:ihi-1, nb:n-k-1) - W(nb:n-k-1, 0:nb-1) * Z(0:nb-1, nb:n-k-1) magma_zgemm( MagmaNoTrans, MagmaNoTrans, ihi-k, n-k-nb, nb, c_neg_one, dwork, ldda, dY, nb, c_one, dA(k,nb), ldda, queue ); return info; }