PaStiX Handbook: Blas-Lapack like kernels

PastixComplex32 BLAS kernels
void	core_cplrnt (int m, int n, pastix_complex32_t *A, int lda, int gM, int m0, int n0, unsigned long long int seed)
	Generate a random tile. More...

void	core_cgetmo (int m, int n, const pastix_complex32_t A, int lda, pastix_complex32_t B, int ldb)
	Transposes a m-by-n matrix out of place using an extra workspace of size m-by-n. More...

int	core_cgeadd (pastix_trans_t trans, pastix_int_t M, pastix_int_t N, pastix_complex32_t alpha, const pastix_complex32_t A, pastix_int_t LDA, pastix_complex32_t beta, pastix_complex32_t B, pastix_int_t LDB)
	Add two matrices together. More...

int	core_cgemdm (pastix_trans_t transA, pastix_trans_t transB, int M, int N, int K, pastix_complex32_t alpha, const pastix_complex32_t A, int LDA, const pastix_complex32_t B, int LDB, pastix_complex32_t beta, pastix_complex32_t C, int LDC, const pastix_complex32_t D, int incD, pastix_complex32_t *WORK, int LWORK)
	Perform one of the following matrix-matrix operations. More...

int	core_cpqrcp (float tol, pastix_int_t maxrank, int full_update, pastix_int_t nb, pastix_int_t m, pastix_int_t n, pastix_complex32_t A, pastix_int_t lda, pastix_int_t jpvt, pastix_complex32_t tau, pastix_complex32_t work, pastix_int_t lwork, float *rwork)
	Compute a rank-reavealing QR factorization. More...

int	core_crqrcp (float tol, pastix_int_t maxrank, int refine, pastix_int_t nb, pastix_int_t m, pastix_int_t n, pastix_complex32_t A, pastix_int_t lda, pastix_int_t jpvt, pastix_complex32_t tau, pastix_complex32_t work, pastix_int_t lwork, float *rwork)
	Compute a randomized QR factorization. More...

int	core_crqrrt (float tol, pastix_int_t maxrank, pastix_int_t nb, pastix_int_t m, pastix_int_t n, pastix_complex32_t A, pastix_int_t lda, pastix_complex32_t tau, pastix_complex32_t B, pastix_int_t ldb, pastix_complex32_t tau_b, pastix_complex32_t *work, pastix_int_t lwork, float normA)
	Compute a randomized QR factorization with rotation technique. More...

int	core_ctqrcp (float tol, pastix_int_t maxrank, int unused, pastix_int_t nb, pastix_int_t m, pastix_int_t n, pastix_complex32_t A, pastix_int_t lda, pastix_int_t jpvt, pastix_complex32_t tau, pastix_complex32_t work, pastix_int_t lwork, float *rwork)
	Compute a randomized QR factorization with truncated updates. More...

int	core_ctradd (pastix_uplo_t uplo, pastix_trans_t trans, pastix_int_t M, pastix_int_t N, pastix_complex32_t alpha, const pastix_complex32_t A, pastix_int_t LDA, pastix_complex32_t beta, pastix_complex32_t B, pastix_int_t LDB)
	Add two triangular matrices together as in PBLAS pctradd. More...

int	core_cscalo (pastix_trans_t trans, pastix_int_t M, pastix_int_t N, const pastix_complex32_t A, pastix_int_t lda, const pastix_complex32_t D, pastix_int_t ldd, pastix_complex32_t *B, pastix_int_t ldb)
	Scale a matrix by a diagonal out of place. More...

PastixComplex32 Othogonalization kernels for low-rank updates
pastix_fixdbl_t	core_clrorthu_fullqr (pastix_int_t M, pastix_int_t N, pastix_int_t rank, pastix_complex32_t U, pastix_int_t ldu, pastix_complex32_t V, pastix_int_t ldv)
	Try to orthognalize the u part of the low-rank form, and update the v part accordingly using full QR. More...

pastix_fixdbl_t	core_clrorthu_partialqr (pastix_int_t M, pastix_int_t N, pastix_int_t r1, pastix_int_t r2ptr, pastix_int_t offx, pastix_int_t offy, pastix_complex32_t U, pastix_int_t ldu, pastix_complex32_t *V, pastix_int_t ldv)
	Try to orthognalize the U part of the low-rank form, and update the V part accordingly using partial QR. More...

pastix_fixdbl_t	core_clrorthu_cgs (pastix_int_t M1, pastix_int_t N1, pastix_int_t M2, pastix_int_t N2, pastix_int_t r1, pastix_int_t r2ptr, pastix_int_t offx, pastix_int_t offy, pastix_complex32_t U, pastix_int_t ldu, pastix_complex32_t *V, pastix_int_t ldv)
	Try to orthognalize the U part of the low-rank form, and update the V part accordingly using CGS. More...

PastixComplex32 LAPACK kernels
void	core_cpotrfsp (pastix_int_t n, pastix_complex32_t A, pastix_int_t lda, pastix_int_t nbpivots, float criterion)
	Compute the block static pivoting Cholesky factorization of the matrix n-by-n A = L * L^t . More...

void	core_cpxtrfsp (pastix_int_t n, pastix_complex32_t A, pastix_int_t lda, pastix_int_t nbpivots, float criterion)
	Compute the block static pivoting LL^t factorization of the matrix n-by-n A = L * L^t . More...

void	core_cgetrfsp (pastix_int_t n, pastix_complex32_t A, pastix_int_t lda, pastix_int_t nbpivots, float criterion)
	Compute the block static pivoting LU factorization of the matrix m-by-n A = L * U. More...

void	core_csytrfsp (pastix_int_t n, pastix_complex32_t A, pastix_int_t lda, pastix_int_t nbpivots, float criterion)
	Compute the block static pivoting factorization of the symmetric matrix n-by-n A such that A = L * D * L^t. More...

PastixFloat BLAS kernels
void	core_splrnt (int m, int n, float *A, int lda, int gM, int m0, int n0, unsigned long long int seed)
	Generate a random tile. More...

void	core_sgetmo (int m, int n, const float A, int lda, float B, int ldb)
	Transposes a m-by-n matrix out of place using an extra workspace of size m-by-n. More...

int	core_sgeadd (pastix_trans_t trans, pastix_int_t M, pastix_int_t N, float alpha, const float A, pastix_int_t LDA, float beta, float B, pastix_int_t LDB)
	Add two matrices together. More...

int	core_sgemdm (pastix_trans_t transA, pastix_trans_t transB, int M, int N, int K, float alpha, const float A, int LDA, const float B, int LDB, float beta, float C, int LDC, const float D, int incD, float *WORK, int LWORK)
	Perform one of the following matrix-matrix operations. More...

int	core_spqrcp (float tol, pastix_int_t maxrank, int full_update, pastix_int_t nb, pastix_int_t m, pastix_int_t n, float A, pastix_int_t lda, pastix_int_t jpvt, float tau, float work, pastix_int_t lwork, float *rwork)
	Compute a rank-reavealing QR factorization. More...

int	core_srqrcp (float tol, pastix_int_t maxrank, int refine, pastix_int_t nb, pastix_int_t m, pastix_int_t n, float A, pastix_int_t lda, pastix_int_t jpvt, float tau, float work, pastix_int_t lwork, float *rwork)
	Compute a randomized QR factorization. More...

int	core_srqrrt (float tol, pastix_int_t maxrank, pastix_int_t nb, pastix_int_t m, pastix_int_t n, float A, pastix_int_t lda, float tau, float B, pastix_int_t ldb, float tau_b, float *work, pastix_int_t lwork, float normA)
	Compute a randomized QR factorization with rotation technique. More...

int	core_stqrcp (float tol, pastix_int_t maxrank, int unused, pastix_int_t nb, pastix_int_t m, pastix_int_t n, float A, pastix_int_t lda, pastix_int_t jpvt, float tau, float work, pastix_int_t lwork, float *rwork)
	Compute a randomized QR factorization with truncated updates. More...

int	core_stradd (pastix_uplo_t uplo, pastix_trans_t trans, pastix_int_t M, pastix_int_t N, float alpha, const float A, pastix_int_t LDA, float beta, float B, pastix_int_t LDB)
	Add two triangular matrices together as in PBLAS pstradd. More...

int	core_sscalo (pastix_trans_t trans, pastix_int_t M, pastix_int_t N, const float A, pastix_int_t lda, const float D, pastix_int_t ldd, float *B, pastix_int_t ldb)
	Scale a matrix by a diagonal out of place. More...

PastixFloat Othogonalization kernels for low-rank updates
pastix_fixdbl_t	core_slrorthu_fullqr (pastix_int_t M, pastix_int_t N, pastix_int_t rank, float U, pastix_int_t ldu, float V, pastix_int_t ldv)
	Try to orthognalize the u part of the low-rank form, and update the v part accordingly using full QR. More...

pastix_fixdbl_t	core_slrorthu_partialqr (pastix_int_t M, pastix_int_t N, pastix_int_t r1, pastix_int_t r2ptr, pastix_int_t offx, pastix_int_t offy, float U, pastix_int_t ldu, float *V, pastix_int_t ldv)
	Try to orthognalize the U part of the low-rank form, and update the V part accordingly using partial QR. More...

pastix_fixdbl_t	core_slrorthu_cgs (pastix_int_t M1, pastix_int_t N1, pastix_int_t M2, pastix_int_t N2, pastix_int_t r1, pastix_int_t r2ptr, pastix_int_t offx, pastix_int_t offy, float U, pastix_int_t ldu, float *V, pastix_int_t ldv)
	Try to orthognalize the U part of the low-rank form, and update the V part accordingly using CGS. More...

PastixFloat LAPACK kernels
void	core_spotrfsp (pastix_int_t n, float A, pastix_int_t lda, pastix_int_t nbpivots, float criterion)
	Compute the block static pivoting Cholesky factorization of the matrix n-by-n A = L * L^t . More...

void	core_sgetrfsp (pastix_int_t n, float A, pastix_int_t lda, pastix_int_t nbpivots, float criterion)
	Compute the block static pivoting LU factorization of the matrix m-by-n A = L * U. More...

void	core_ssytrfsp (pastix_int_t n, float A, pastix_int_t lda, pastix_int_t nbpivots, float criterion)
	Compute the block static pivoting factorization of the symmetric matrix n-by-n A such that A = L * D * L^t. More...

PastixDouble BLAS kernels
void	core_dplrnt (int m, int n, double *A, int lda, int gM, int m0, int n0, unsigned long long int seed)
	Generate a random tile. More...

void	core_dgetmo (int m, int n, const double A, int lda, double B, int ldb)
	Transposes a m-by-n matrix out of place using an extra workspace of size m-by-n. More...

int	core_dgeadd (pastix_trans_t trans, pastix_int_t M, pastix_int_t N, double alpha, const double A, pastix_int_t LDA, double beta, double B, pastix_int_t LDB)
	Add two matrices together. More...

int	core_dgemdm (pastix_trans_t transA, pastix_trans_t transB, int M, int N, int K, double alpha, const double A, int LDA, const double B, int LDB, double beta, double C, int LDC, const double D, int incD, double *WORK, int LWORK)
	Perform one of the following matrix-matrix operations. More...

int	core_dpqrcp (double tol, pastix_int_t maxrank, int full_update, pastix_int_t nb, pastix_int_t m, pastix_int_t n, double A, pastix_int_t lda, pastix_int_t jpvt, double tau, double work, pastix_int_t lwork, double *rwork)
	Compute a rank-reavealing QR factorization. More...

int	core_drqrcp (double tol, pastix_int_t maxrank, int refine, pastix_int_t nb, pastix_int_t m, pastix_int_t n, double A, pastix_int_t lda, pastix_int_t jpvt, double tau, double work, pastix_int_t lwork, double *rwork)
	Compute a randomized QR factorization. More...

int	core_drqrrt (double tol, pastix_int_t maxrank, pastix_int_t nb, pastix_int_t m, pastix_int_t n, double A, pastix_int_t lda, double tau, double B, pastix_int_t ldb, double tau_b, double *work, pastix_int_t lwork, double normA)
	Compute a randomized QR factorization with rotation technique. More...

int	core_dtqrcp (double tol, pastix_int_t maxrank, int unused, pastix_int_t nb, pastix_int_t m, pastix_int_t n, double A, pastix_int_t lda, pastix_int_t jpvt, double tau, double work, pastix_int_t lwork, double *rwork)
	Compute a randomized QR factorization with truncated updates. More...

int	core_dtradd (pastix_uplo_t uplo, pastix_trans_t trans, pastix_int_t M, pastix_int_t N, double alpha, const double A, pastix_int_t LDA, double beta, double B, pastix_int_t LDB)
	Add two triangular matrices together as in PBLAS pdtradd. More...

int	core_dscalo (pastix_trans_t trans, pastix_int_t M, pastix_int_t N, const double A, pastix_int_t lda, const double D, pastix_int_t ldd, double *B, pastix_int_t ldb)
	Scale a matrix by a diagonal out of place. More...

PastixDouble Othogonalization kernels for low-rank updates
pastix_fixdbl_t	core_dlrorthu_fullqr (pastix_int_t M, pastix_int_t N, pastix_int_t rank, double U, pastix_int_t ldu, double V, pastix_int_t ldv)
	Try to orthognalize the u part of the low-rank form, and update the v part accordingly using full QR. More...

pastix_fixdbl_t	core_dlrorthu_partialqr (pastix_int_t M, pastix_int_t N, pastix_int_t r1, pastix_int_t r2ptr, pastix_int_t offx, pastix_int_t offy, double U, pastix_int_t ldu, double *V, pastix_int_t ldv)
	Try to orthognalize the U part of the low-rank form, and update the V part accordingly using partial QR. More...

pastix_fixdbl_t	core_dlrorthu_cgs (pastix_int_t M1, pastix_int_t N1, pastix_int_t M2, pastix_int_t N2, pastix_int_t r1, pastix_int_t r2ptr, pastix_int_t offx, pastix_int_t offy, double U, pastix_int_t ldu, double *V, pastix_int_t ldv)
	Try to orthognalize the U part of the low-rank form, and update the V part accordingly using CGS. More...

PastixDouble LAPACK kernels
void	core_dpotrfsp (pastix_int_t n, double A, pastix_int_t lda, pastix_int_t nbpivots, double criterion)
	Compute the block static pivoting Cholesky factorization of the matrix n-by-n A = L * L^t . More...

void	core_dgetrfsp (pastix_int_t n, double A, pastix_int_t lda, pastix_int_t nbpivots, double criterion)
	Compute the block static pivoting LU factorization of the matrix m-by-n A = L * U. More...

void	core_dsytrfsp (pastix_int_t n, double A, pastix_int_t lda, pastix_int_t nbpivots, double criterion)
	Compute the block static pivoting factorization of the symmetric matrix n-by-n A such that A = L * D * L^t. More...

PastixComplex64 BLAS kernels
void	core_zplrnt (int m, int n, pastix_complex64_t *A, int lda, int gM, int m0, int n0, unsigned long long int seed)
	Generate a random tile. More...

void	core_zgetmo (int m, int n, const pastix_complex64_t A, int lda, pastix_complex64_t B, int ldb)
	Transposes a m-by-n matrix out of place using an extra workspace of size m-by-n. More...

int	core_zgeadd (pastix_trans_t trans, pastix_int_t M, pastix_int_t N, pastix_complex64_t alpha, const pastix_complex64_t A, pastix_int_t LDA, pastix_complex64_t beta, pastix_complex64_t B, pastix_int_t LDB)
	Add two matrices together. More...

int	core_zgemdm (pastix_trans_t transA, pastix_trans_t transB, int M, int N, int K, pastix_complex64_t alpha, const pastix_complex64_t A, int LDA, const pastix_complex64_t B, int LDB, pastix_complex64_t beta, pastix_complex64_t C, int LDC, const pastix_complex64_t D, int incD, pastix_complex64_t *WORK, int LWORK)
	Perform one of the following matrix-matrix operations. More...

int	core_zpqrcp (double tol, pastix_int_t maxrank, int full_update, pastix_int_t nb, pastix_int_t m, pastix_int_t n, pastix_complex64_t A, pastix_int_t lda, pastix_int_t jpvt, pastix_complex64_t tau, pastix_complex64_t work, pastix_int_t lwork, double *rwork)
	Compute a rank-reavealing QR factorization. More...

int	core_zrqrcp (double tol, pastix_int_t maxrank, int refine, pastix_int_t nb, pastix_int_t m, pastix_int_t n, pastix_complex64_t A, pastix_int_t lda, pastix_int_t jpvt, pastix_complex64_t tau, pastix_complex64_t work, pastix_int_t lwork, double *rwork)
	Compute a randomized QR factorization. More...

int	core_zrqrrt (double tol, pastix_int_t maxrank, pastix_int_t nb, pastix_int_t m, pastix_int_t n, pastix_complex64_t A, pastix_int_t lda, pastix_complex64_t tau, pastix_complex64_t B, pastix_int_t ldb, pastix_complex64_t tau_b, pastix_complex64_t *work, pastix_int_t lwork, double normA)
	Compute a randomized QR factorization with rotation technique. More...

int	core_ztqrcp (double tol, pastix_int_t maxrank, int unused, pastix_int_t nb, pastix_int_t m, pastix_int_t n, pastix_complex64_t A, pastix_int_t lda, pastix_int_t jpvt, pastix_complex64_t tau, pastix_complex64_t work, pastix_int_t lwork, double *rwork)
	Compute a randomized QR factorization with truncated updates. More...

int	core_ztradd (pastix_uplo_t uplo, pastix_trans_t trans, pastix_int_t M, pastix_int_t N, pastix_complex64_t alpha, const pastix_complex64_t A, pastix_int_t LDA, pastix_complex64_t beta, pastix_complex64_t B, pastix_int_t LDB)
	Add two triangular matrices together as in PBLAS pztradd. More...

int	core_zscalo (pastix_trans_t trans, pastix_int_t M, pastix_int_t N, const pastix_complex64_t A, pastix_int_t lda, const pastix_complex64_t D, pastix_int_t ldd, pastix_complex64_t *B, pastix_int_t ldb)
	Scale a matrix by a diagonal out of place. More...

PastixComplex64 Othogonalization kernels for low-rank updates
pastix_fixdbl_t	core_zlrorthu_fullqr (pastix_int_t M, pastix_int_t N, pastix_int_t rank, pastix_complex64_t U, pastix_int_t ldu, pastix_complex64_t V, pastix_int_t ldv)
	Try to orthognalize the u part of the low-rank form, and update the v part accordingly using full QR. More...

pastix_fixdbl_t	core_zlrorthu_partialqr (pastix_int_t M, pastix_int_t N, pastix_int_t r1, pastix_int_t r2ptr, pastix_int_t offx, pastix_int_t offy, pastix_complex64_t U, pastix_int_t ldu, pastix_complex64_t *V, pastix_int_t ldv)
	Try to orthognalize the U part of the low-rank form, and update the V part accordingly using partial QR. More...

pastix_fixdbl_t	core_zlrorthu_cgs (pastix_int_t M1, pastix_int_t N1, pastix_int_t M2, pastix_int_t N2, pastix_int_t r1, pastix_int_t r2ptr, pastix_int_t offx, pastix_int_t offy, pastix_complex64_t U, pastix_int_t ldu, pastix_complex64_t *V, pastix_int_t ldv)
	Try to orthognalize the U part of the low-rank form, and update the V part accordingly using CGS. More...

PastixComplex64 LAPACK kernels
void	core_zpotrfsp (pastix_int_t n, pastix_complex64_t A, pastix_int_t lda, pastix_int_t nbpivots, double criterion)
	Compute the block static pivoting Cholesky factorization of the matrix n-by-n A = L * L^t . More...

void	core_zpxtrfsp (pastix_int_t n, pastix_complex64_t A, pastix_int_t lda, pastix_int_t nbpivots, double criterion)
	Compute the block static pivoting LL^t factorization of the matrix n-by-n A = L * L^t . More...

void	core_zgetrfsp (pastix_int_t n, pastix_complex64_t A, pastix_int_t lda, pastix_int_t nbpivots, double criterion)
	Compute the block static pivoting LU factorization of the matrix m-by-n A = L * U. More...

void	core_zsytrfsp (pastix_int_t n, pastix_complex64_t A, pastix_int_t lda, pastix_int_t nbpivots, double criterion)
	Compute the block static pivoting factorization of the symmetric matrix n-by-n A such that A = L * D * L^t. More...

Detailed Description

This module contains all the BLAS and LAPACK-like kernels that are working on lapack layout matrices.

Function Documentation

◆ core_cplrnt()

void core_cplrnt	(	int	m,
		int	n,
		pastix_complex32_t *	A,
		int	lda,
		int	gM,
		int	m0,
		int	n0,
		unsigned long long int	seed
	)

Generate a random tile.

Parameters

[in]	m	The number of rows of the tile A. m >= 0.
[in]	n	The number of columns of the tile A. n >= 0.
[in,out]	A	On entry, the m-by-n tile to be initialized. On exit, the tile initialized in the mtxtype format.
[in]	lda	The leading dimension of the tile A. lda >= max(1,m).
[in]	gM	The global number of rows of the full matrix, A is belonging to. gM >= (m0+M).
[in]	m0	The index of the first row of tile A in the full matrix. m0 >= 0.
[in]	n0	The index of the first column of tile A in the full matrix. n0 >= 0.
[in]	seed	The seed used for random generation. Must be the same for all tiles initialized with this routine.

Definition at line 91 of file core_cplrnt.c.

◆ core_cgetmo()

void core_cgetmo	(	int	m,
		int	n,
		const pastix_complex32_t *	A,
		int	lda,
		pastix_complex32_t *	B,
		int	ldb
	)

Transposes a m-by-n matrix out of place using an extra workspace of size m-by-n.

Parameters

[in]	m	Number of rows of A.
[in]	n	Number of columns of A.
[in]	A	Matrix to be transposed.
[in]	lda	Leading dimension of matrix A.
[in,out]	B	On exit B = trans(A).
[in]	ldb	Leading dimension of matrix B.

Definition at line 49 of file core_cgetmo.c.

Referenced by core_clr2ge().

◆ core_cgeadd()

int core_cgeadd	(	pastix_trans_t	trans,
		pastix_int_t	M,
		pastix_int_t	N,
		pastix_complex32_t	alpha,
		const pastix_complex32_t *	A,
		pastix_int_t	LDA,
		pastix_complex32_t	beta,
		pastix_complex32_t *	B,
		pastix_int_t	LDB
	)

Add two matrices together.

Perform the operation: B <- alpha * op(A) + B

Parameters

[in]	trans	PastixNoTrans: No transpose, op( A ) = A; PastixTrans: Transpose, op( A ) = A'; PastixConjTrans: Conjugate Transpose, op( A ) = conjf(A').
[in]	M	Number of rows of the matrix B. Number of rows of the matrix A, if trans == PastixNoTrans, number of columns of A otherwise.
[in]	N	Number of columns of the matrix B. Number of columns of the matrix A, if trans == PastixNoTrans, number of rows of A otherwise.
[in]	alpha	Scalar factor of A.
[in]	A	Matrix of size LDA-by-N, if trans == PastixNoTrans, LDA-by-M, otherwise.
[in]	LDA	Leading dimension of the array A. LDA >= max(1,K). K = M if trans == PastixNoTrans, K = N otherwise.
[in]	beta	Scalar factor of B.
[in,out]	B	Matrix of size LDB-by-N.
[in]	LDB	Leading dimension of the array B. LDB >= max(1,M)

Return values

PASTIX_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
1,not	yet implemented

Definition at line 78 of file core_cgeadd.c.

References PastixConjTrans, and PastixNoTrans.

Referenced by core_clrconcatenate_v(), core_clrcpy(), core_ctradd(), cpublok_cadd_frfr(), cpucblk_cadd_frfr(), cpucblk_cdiff(), cpucblk_cgeaddsp1d(), cpucblk_cgetrfsp1d_getrf(), cpucblk_cgetschur_fr(), and cpucblk_cgetschur_lr().

◆ core_cgemdm()

int core_cgemdm	(	pastix_trans_t	transA,
		pastix_trans_t	transB,
		int	M,
		int	N,
		int	K,
		pastix_complex32_t	alpha,
		const pastix_complex32_t *	A,
		int	LDA,
		const pastix_complex32_t *	B,
		int	LDB,
		pastix_complex32_t	beta,
		pastix_complex32_t *	C,
		int	LDC,
		const pastix_complex32_t *	D,
		int	incD,
		pastix_complex32_t *	WORK,
		int	LWORK
	)

Perform one of the following matrix-matrix operations.

  C := alpha*op( A )*D*op( B ) + beta*C,

where op( X ) is one of

  op( X ) = X   or   op( X ) = X',

alpha and beta are scalars, and A, B, C and D are matrices, with

  op( A ) an m by k matrix,
  op( B ) an k by n matrix,
  C an m by n matrix and
  D an k by k matrix.

Parameters

[in]	transA	PastixNoTrans : No transpose, op( A ) = A; PastixConjTrans : Transpose, op( A ) = A'.
[in]	transB	PastixNoTrans : No transpose, op( B ) = B; PastixConjTrans : Transpose, op( B ) = B'.
[in]	M	The number of rows of the matrix op( A ) and of the matrix C. M must be at least zero.
[in]	N	The number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero.
[in]	K	The number of columns of the matrix op( A ), the number of rows of the matrix op( B ), and the number of rows and columns of matrix D. K must be at least zero.
[in]	alpha	On entry, alpha specifies the scalar alpha.
[in]	A	pastix_complex32_t array of DIMENSION ( LDA, ka ), where ka is k when transA = PastixTrans, and is m otherwise. Before entry with transA = PastixTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A.
[in]	LDA	On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. When transA = PastixTrans then LDA must be at least max( 1, m ), otherwise LDA must be at least max( 1, k ).
[in]	B	pastix_complex32_t array of DIMENSION ( LDB, kb ), where kb is n when transB = PastixTrans, and is k otherwise. Before entry with transB = PastixTrans, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B.
[in]	LDB	On entry, LDB specifies the first dimension of B as declared in the calling (sub) program. When transB = PastixTrans then LDB must be at least max( 1, k ), otherwise LDB must be at least max( 1, n ).
[in]	beta	On entry, beta specifies the scalar beta. When beta is supplied as zero then C need not be set on input.
[in]	C	pastix_complex32_t array of DIMENSION ( LDC, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the m by n matrix ( alphaop( A )Dop( B ) + betaC ).
[in]	LDC	On entry, LDC specifies the first dimension of C as declared in the calling (sub) program. LDC must be at least max( 1, m ).
[in]	D	pastix_complex32_t array of DIMENSION ( LDD, k ). Before entry, the leading k by k part of the array D must contain the matrix D.
[in]	incD	On entry, LDD specifies the first dimension of D as declared in the calling (sub) program. LDD must be at least max( 1, k ).
[in]	WORK	pastix_complex32_t array, dimension (MAX(1,LWORK))
[in]	LWORK	The length of WORK. On entry, if transA = PastixTrans and transB = PastixTrans then LWORK >= max(1, KN). Otherwise LWORK >= max(1, MK).

Return values

PASTIX_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

Definition at line 139 of file core_cgemdm.c.

References PASTIX_SUCCESS, PastixConjTrans, and PastixNoTrans.

Referenced by core_chetrfsp1d_gemm(), and core_csytrfsp1d_gemm().

◆ core_cpqrcp()

int core_cpqrcp	(	float	tol,
		pastix_int_t	maxrank,
		int	full_update,
		pastix_int_t	nb,
		pastix_int_t	m,
		pastix_int_t	n,
		pastix_complex32_t *	A,
		pastix_int_t	lda,
		pastix_int_t *	jpvt,
		pastix_complex32_t *	tau,
		pastix_complex32_t *	work,
		pastix_int_t	lwork,
		float *	rwork
	)

Compute a rank-reavealing QR factorization.

This routine is originated from the LAPACK kernels cgeqp3/claqps and was modified by A. Buttari for MUMPS-BLR. In this version the stopping criterion is based on the frobenius norm of the residual, and not on the estimate of the two-norm making it more restrictive. Thus, the returned ranks are larger, but this gives a better accuracy.

Parameters

[in]	tol	The relative tolerance criterion. Computations are stopped when the frobenius norm of the residual matrix is lower than tol. If tol < 0, then maxrank reflectors are computed.
[in]	maxrank	Maximum number of reflectors computed. Computations are stopped when the rank exceeds maxrank. If maxrank < 0, all reflectors are computed or up to the tolerance criterion.
[in]	full_update	If true, all the trailing submatrix is updated, even if maxrank is reached. If false, the trailing submatrix is not updated as soon as it is not worth it. (Unused for now but kept to match API of RQRCP and TQRCP)
[in]	nb	Tuning parameter for the GEMM blocking size. if nb < 0, nb is set to 32.
[in]	m	Number of rows of the matrix A.
[in]	n	Number of columns of the matrix A.
[in]	A	The matrix of dimension lda-by-n that needs to be compressed.
[in]	lda	The leading dimension of the matrix A. lda >= max(1, m).
[out]	jpvt	The array that describes the permutation of A.
[out]	tau	Contains scalar factors of the elementary reflectors for the matrix Q.
[in]	work	Workspace array of size lwork.
[in]	lwork	The dimension of the work area. lwork >= (nb * n + max(n, m) ) If lwork == -1, the functions returns immediately and work[0] contains the optimal size of work.
[in]	rwork	Workspace array used to store partial and exact column norms (2-by-n)

Returns: This routine will return the rank of A (>=0) or -1 if it didn't manage to compress within the margins of tolerance and maximum rank.

If maximum rank is 0, then either the matrix norm is below the tolerance, and we can return a null rank matrix, or it is not and we need to return a full rank matrix.

Definition at line 105 of file core_cpqrcp.c.

References pastix_int_t.

Referenced by core_cge2lr_pqrcp(), core_crqrcp(), core_crradd_pqrcp(), and core_ctqrcp().

◆ core_crqrcp()

int core_crqrcp	(	float	tol,
		pastix_int_t	maxrank,
		int	refine,
		pastix_int_t	nb,
		pastix_int_t	m,
		pastix_int_t	n,
		pastix_complex32_t *	A,
		pastix_int_t	lda,
		pastix_int_t *	jpvt,
		pastix_complex32_t *	tau,
		pastix_complex32_t *	work,
		pastix_int_t	lwork,
		float *	rwork
	)

Compute a randomized QR factorization.

This kernel implements the algorithm described in: Fast Parallel Randomized QR with Column Pivoting Algorithms for Reliable Low-rank Matrix Approximations. Jianwei Xiao, Ming Gu, and Julien Langou

The main difference in this algorithm relies in two points: 1) First, we stop the iterative porcess based on a tolerance criterion forwarded to the QR with column pivoting kernel, while they have a fixed number of iterations defined by parameter. 2) Second, we perform an extra PQRCP call on the trailing submatrix to finalize the computations, while in the paper above they use a spectrum revealing algorithm to refine the solution.

More information can also be found in Finding Structure with Randomness: Probabilistic Algorithms for Constructing Approximate Matrix Decompositions. N. Halko, P. G. Martinsson, and J. A. Tropp

Based on this paper, we set the p parameter to 5, as it seems sufficient, and because we iterate multiple times to get the final rank.

Parameters

[in]	tol	The relative tolerance criterion. Computations are stopped when the frobenius norm of the residual matrix is lower than tol. If tol < 0, then maxrank reflectors are computed.
[in]	maxrank	Maximum number of reflectors computed. Computations are stopped when the rank exceeds maxrank. If maxrank < 0, all reflectors are computed or up to the tolerance criterion.
[in]	refine	Enable/disable the extra refinement step that performs an additional PQRCP on the trailing submatrix.
[in]	nb	Tuning parameter for the GEMM blocking size. if nb < 0, nb is set to 32.
[in]	m	Number of rows of the matrix A.
[in]	n	Number of columns of the matrix A.
[in]	A	The matrix of dimension lda-by-n that needs to be compressed.
[in]	lda	The leading dimension of the matrix A. lda >= max(1, m).
[out]	jpvt	The array that describes the permutation of A.
[out]	tau	Contains scalar factors of the elementary reflectors for the matrix Q.
[in]	work	Workspace array of size lwork.
[in]	lwork	The dimension of the work area. lwork >= (nb * n + max(n, m) ) If lwork == -1, the functions returns immediately and work[0] contains the optimal size of work.
[in]	rwork	Workspace array used to store partial and exact column norms (2-by-n)

Returns: This routine will return the rank of A (>=0) or -1 if it didn't manage to compress within the margins of tolerance and maximum rank.

If maximum rank is 0, then either the matrix norm is below the tolerance, and we can return a null rank matrix, or it is not and we need to return a full rank matrix.

Definition at line 114 of file core_crqrcp.c.

References core_cpqrcp(), and pastix_int_t.

Referenced by core_cge2lr_rqrcp(), and core_crradd_rqrcp().

◆ core_crqrrt()

int core_crqrrt	(	float	tol,
		pastix_int_t	maxrank,
		pastix_int_t	nb,
		pastix_int_t	m,
		pastix_int_t	n,
		pastix_complex32_t *	A,
		pastix_int_t	lda,
		pastix_complex32_t *	tau,
		pastix_complex32_t *	B,
		pastix_int_t	ldb,
		pastix_complex32_t *	tau_b,
		pastix_complex32_t *	work,
		pastix_int_t	lwork,
		float	normA
	)

Compute a randomized QR factorization with rotation technique.

This kernel implements the second method (he did not write a pseudocode for the second method) described in :

Blocked rank-revealing QR factorizations: How randomized sampling can be used to avoid single-vector pivoting. P. G. Martinsson

Note that we only use the trailing matrix for resampling. We don't use power of it for getting better results, since it would be too costly.

Difference from randomized QRCP is : 1) resampling is used instead of sample update formula 2) Instead of pivoting A, rotation is applied on it 3) Instead of working with B and omega, directly their transposes are handled for simplicity

The main difference in this algorithm compared to figure 5 in the Martinsson's paper: 1) First, we stop the iterative process based on a tolerance criterion 2) Second, we do not apply SVD for gathering the mass on the diagonal blocks, so our method is less costly but we expect it to be less close to SVD result 3) Third, we do not apply the power iteration for resampling for a closer result to SVD, since it is too costly

Parameters

[in]	tol	The relative tolerance criterion. Computations are stopped when the frobenius norm of the residual matrix is lower than tol. If tol < 0, then maxrank reflectors are computed.
[in]	maxrank	Maximum number of reflectors computed. Computations are stopped when the rank exceeds maxrank. If maxrank < 0, all reflectors are computed or up to the tolerance criterion.
[in]	nb	Tuning parameter for the GEMM blocking size. if nb < 0, nb is set to 32.
[in]	m	Number of rows of the matrix A.
[in]	n	Number of columns of the matrix A.
[in,out]	A	The matrix of dimension lda-by-n that needs to be compressed. On output, the A matrix is computed up to the founded rank k (k first columns and rows). Everything else, should be dropped.
[in]	lda	The leading dimension of the matrix A. lda >= max(1, m).
[out]	tau	Contains scalar factors of the elementary reflectors for the matrix A.
[out]	B	The matrix of dimension ldb-by-maxrank that will holds the partial projection of the matrix A. On output, each block of 32 columns can be used to computed the reverse rotation of the R part of A.
[in]	ldb	The leading dimension of the matrix B. ldb >= max(1, n).
[out]	tau_b	Contains scalar factors of the elementary reflectors for the matrix B.
[in]	work	Workspace array of size lwork.
[in]	lwork	The dimension of the work area. lwork >= (nb * max(n,n)) If lwork == -1, the function returns immediately and work[0] contains the optimal size of work.
[in]	normA	The norm of the input matrixA. If negative, the norm is computed by the kernel.

Returns: This routine will return the rank of A (>=0) or -1 if it didn't manage to compress within the margins of tolerance and maximum rank.

If maximum rank is 0, then either the matrix norm is below the tolerance, and we can return a null rank matrix, or it is not and we need to return a full rank matrix.

Definition at line 126 of file core_crqrrt.c.

References pastix_int_t.

Referenced by core_cge2lr_rqrrt().

◆ core_ctqrcp()

int core_ctqrcp	(	float	tol,
		pastix_int_t	maxrank,
		int	refine,
		pastix_int_t	nb,
		pastix_int_t	m,
		pastix_int_t	n,
		pastix_complex32_t *	A,
		pastix_int_t	lda,
		pastix_int_t *	jpvt,
		pastix_complex32_t *	tau,
		pastix_complex32_t *	work,
		pastix_int_t	lwork,
		float *	rwork
	)

Compute a randomized QR factorization with truncated updates.

This routine is derivated from "Randomized QR with Column Pivoting", J. A. Duersch and M. Gu, SIAM Journal on Scientific Computing, vol. 39, no. 4, pp. C263-C291, 2017.

Parameters

[in]	tol	The relative tolerance criterion. Computations are stopped when the frobenius norm of the residual matrix is lower than tol. If tol < 0, then maxrank reflectors are computed.
[in]	maxrank	Maximum number of reflectors computed. Computations are stopped when the rank exceeds maxrank. If maxrank < 0, all reflectors are computed or up to the tolerance criterion.
[in]	refine	TODO
[in]	nb	Tuning parameter for the GEMM blocking size. if nb < 0, nb is set to 32.
[in]	m	Number of rows of the matrix A.
[in]	n	Number of columns of the matrix A.
[in]	A	The matrix of dimension lda-by-n that needs to be compressed.
[in]	lda	The leading dimension of the matrix A. lda >= max(1, m).
[out]	jpvt	The array that describes the permutation of A.
[out]	tau	Contains scalar factors of the elementary reflectors for the matrix Q.
[in]	work	Workspace array of size lwork.
[in]	lwork	The dimension of the work area. lwork >= (nb * n + max(n, m) ) If lwork == -1, the functions returns immediately and work[0] contains the optimal size of work.
[in]	rwork	Workspace array used to store partial and exact column norms (2-by-n)

Returns: This routine will return the rank of A (>=0) or -1 if it didn't manage to compress within the margins of tolerance and maximum rank.

If maximum rank is 0, then either the matrix norm is below the tolerance, and we can return a null rank matrix, or it is not and we need to return a full rank matrix.

Definition at line 99 of file core_ctqrcp.c.

References core_cpqrcp(), and pastix_int_t.

Referenced by core_cge2lr_tqrcp(), and core_crradd_tqrcp().

◆ core_ctradd()

int core_ctradd	(	pastix_uplo_t	uplo,
		pastix_trans_t	trans,
		pastix_int_t	M,
		pastix_int_t	N,
		pastix_complex32_t	alpha,
		const pastix_complex32_t *	A,
		pastix_int_t	LDA,
		pastix_complex32_t	beta,
		pastix_complex32_t *	B,
		pastix_int_t	LDB
	)

Add two triangular matrices together as in PBLAS pctradd.

B <- alpha * op(A) + beta * B,

where op(X) = X, X', or conjf(X')

Parameters

[in]	uplo	Specifies the shape of A and B matrices: PastixUpperLower: A and B are general matrices. PastixUpper: op(A) and B are upper trapezoidal matrices. PastixLower: op(A) and B are lower trapezoidal matrices.
[in]	trans	Specifies whether the matrix A is non-transposed, transposed, or conjugate transposed PastixNoTrans: op(A) = A PastixTrans: op(A) = A' PastixConjTrans: op(A) = conjf(A')
[in]	M	Number of rows of the matrices op(A) and B.
[in]	N	Number of columns of the matrices op(A) and B.
[in]	alpha	Scalar factor of A.
[in]	A	Matrix of size LDA-by-N, if trans = PastixNoTrans, LDA-by-M otherwise.
[in]	LDA	Leading dimension of the array A. LDA >= max(1,M) if trans = PastixNoTrans, LDA >= max(1,N) otherwise.
[in]	beta	Scalar factor of B.
[in,out]	B	Matrix of size LDB-by-N. On exit, B = alpha * op(A) + beta * B
[in]	LDB	Leading dimension of the array B. LDB >= max(1,M)

Return values

PASTIX_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

PastixLower

PastixUpper

Definition at line 79 of file core_ctradd.c.

References core_cgeadd(), PASTIX_SUCCESS, PastixConjTrans, PastixLower, PastixNoTrans, PastixUpper, and PastixUpperLower.

◆ core_cscalo()

int core_cscalo	(	pastix_trans_t	trans,
		pastix_int_t	M,
		pastix_int_t	N,
		const pastix_complex32_t *	A,
		pastix_int_t	lda,
		const pastix_complex32_t *	D,
		pastix_int_t	ldd,
		pastix_complex32_t *	B,
		pastix_int_t	ldb
	)

Scale a matrix by a diagonal out of place.

Perform the operation: B <- op(A) * D, where A is a general matrix, and D a diagonal matrix.

Parameters

[in]	trans	PastixNoTrans: No transpose, op( A ) = A; PastixTrans: Transpose, op( A ) = A; PastixConjTrans: Conjugate Transpose, op( A ) = conjf(A).
[in]	M	Number of rows of the matrix B. Number of rows of the matrix A.
[in]	N	Number of columns of the matrix B. Number of columns of the matrix A.
[in]	A	Matrix of size lda-by-N.
[in]	lda	Leading dimension of the array A. lda >= max(1,M).
[in]	D	Diagonal matrix of size ldd-by-N.
[in]	ldd	Leading dimension of the array D. ldd >= 1.
[in,out]	B	Matrix of size LDB-by-N.
[in]	ldb	Leading dimension of the array B. ldb >= max(1,M)

Return values

PASTIX_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
1,not	yet implemented

Definition at line 74 of file core_cscalo.c.

References pastix_int_t, PastixConjTrans, and PastixNoTrans.

Referenced by cpublok_cscalo(), and cpucblk_cscalo().

◆ core_clrorthu_fullqr()

pastix_fixdbl_t core_clrorthu_fullqr	(	pastix_int_t	M,
		pastix_int_t	N,
		pastix_int_t	rank,
		pastix_complex32_t *	U,
		pastix_int_t	ldu,
		pastix_complex32_t *	V,
		pastix_int_t	ldv
	)

Try to orthognalize the u part of the low-rank form, and update the v part accordingly using full QR.

This function considers a low-rank matrix resulting from the addition of two matrices B += A, with A of smaller or equal size to B. The product has the form: U * V^t

The U part of the low-rank form must be orthognalized to get the smaller possible rank during the rradd operation. This function perfoms this by applying a full QR factorization on the U part.

U = Q R, then U' = Q, and V' = R * V

Parameters

[in]	M	The number of rows of the u1u2 matrix.
[in]	N	The number of columns of the v1v2 matrix.
[in]	rank	The number of columns of the U matrix, and the number of rows of the V part in the v1v2 matrix.
[in,out]	U	The U matrix of size ldu -by- rank. On exit, Q from U = Q R.
[in]	ldu	The leading dimension of the U matrix. ldu >= max(1, M)
[in,out]	V	The V matrix of size ldv -by- N. On exit, R * V, with R from U = Q R.
[in]	ldv	The leading dimension of the V matrix. ldv >= max(1, rank)

Returns: The number of flops required to perform the operation.

Definition at line 83 of file core_clrothu.c.

References pastix_int_t, and PastixLeft.

◆ core_clrorthu_partialqr()

pastix_fixdbl_t core_clrorthu_partialqr	(	pastix_int_t	M,
		pastix_int_t	N,
		pastix_int_t	r1,
		pastix_int_t *	r2ptr,
		pastix_int_t	offx,
		pastix_int_t	offy,
		pastix_complex32_t *	U,
		pastix_int_t	ldu,
		pastix_complex32_t *	V,
		pastix_int_t	ldv
	)

Try to orthognalize the U part of the low-rank form, and update the V part accordingly using partial QR.

This function considers a low-rank matrix resulting from the addition of two matrices B += A, with A of smaller or equal size to B. The product has the form: U * V^t

The U part of the low-rank form must be orthognalized to get the smaller possible rank during the rradd operation. This function perfoms this by applying a full QR factorization on the U part.

In that case, it takes benefit from the fact that U = [ u1, u2 ], and V = [ v1, v2 ] with u2 and v2 wich are matrices of respective size M2-by-r2, and r2-by-N2, offset by offx and offy

The steps are:

Scaling of u2 with removal of the null columns
Orthogonalization of u2 relatively to u1
Application of the update to v2
orthogonalization through QR of u2
Update of V

Parameters

[in]	M	The number of rows of the u1u2 matrix.
[in]	N	The number of columns of the v1v2 matrix.
[in]	r1	The number of columns of the U matrix in the u1 part, and the number of rows of the V part in the v1 part.
[in,out]	r2ptr	The number of columns of the U matrix in the u2 part, and the number of rows of the V part in the v2 part. On exit, this rank is reduced y the number of null columns found in U.
[in]	offx	The row offset of the matrix u2 in U.
[in]	offy	The column offset of the matrix v2 in V.
[in,out]	U	The U matrix of size ldu -by- rank. On exit, the orthogonalized U.
[in]	ldu	The leading dimension of the U matrix. ldu >= max(1, M)
[in,out]	V	The V matrix of size ldv -by- N. On exit, the updated V matrix.
[in]	ldv	The leading dimension of the V matrix. ldv >= max(1, rank)

Returns: The number of flops required to perform the operation.

Definition at line 197 of file core_clrothu.c.

References core_clrdbg_check_orthogonality_AB(), pastix_int_t, and PastixLeft.

◆ core_clrorthu_cgs()

pastix_fixdbl_t core_clrorthu_cgs	(	pastix_int_t	M1,
		pastix_int_t	N1,
		pastix_int_t	M2,
		pastix_int_t	N2,
		pastix_int_t	r1,
		pastix_int_t *	r2ptr,
		pastix_int_t	offx,
		pastix_int_t	offy,
		pastix_complex32_t *	U,
		pastix_int_t	ldu,
		pastix_complex32_t *	V,
		pastix_int_t	ldv
	)

Try to orthognalize the U part of the low-rank form, and update the V part accordingly using CGS.

This function considers a low-rank matrix resulting from the addition of two matrices B += A, with A of smaller or equal size to B. The product has the form: U * V^t

The U part of the low-rank form must be orthognalized to get the smaller possible rank during the rradd operation. This function perfoms this by applying a full QR factorization on the U part.

In that case, it takes benefit from the fact that U = [ u1, u2 ], and V = [ v1, v2 ] with u2 and v2 wich are matrices of respective size M2-by-r2, and r2-by-N2, offset by offx and offy

The steps are:

for each column of u2
- Scaling of u2 with removal of the null columns
- Orthogonalization of u2 relatively to u1
- Remove the column if null

Parameters

[in]	M1	The number of rows of the U matrix.
[in]	N1	The number of columns of the U matrix.
[in]	M2	The number of rows of the u2 part of the U matrix.
[in]	N2	The number of columns of the v2 part of the V matrix.
[in]	r1	The number of columns of the U matrix in the u1 part, and the number of rows of the V part in the v1 part.
[in,out]	r2ptr	The number of columns of the U matrix in the u2 part, and the number of rows of the V part in the v2 part. On exit, this rank is reduced y the number of null columns found in U.
[in]	offx	The row offset of the matrix u2 in U.
[in]	offy	The column offset of the matrix v2 in V.
[in,out]	U	The U matrix of size ldu -by- rank. On exit, the orthogonalized U.
[in]	ldu	The leading dimension of the U matrix. ldu >= max(1, M)
[in,out]	V	The V matrix of size ldv -by- N. On exit, the updated V matrix.
[in]	ldv	The leading dimension of the V matrix. ldv >= max(1, rank)

Returns: The number of flops required to perform the operation.

Definition at line 430 of file core_clrothu.c.

References core_clrdbg_check_orthogonality_AB(), and pastix_int_t.

◆ core_cpotrfsp()

void core_cpotrfsp	(	pastix_int_t	n,
		pastix_complex32_t *	A,
		pastix_int_t	lda,
		pastix_int_t *	nbpivots,
		float	criterion
	)

Compute the block static pivoting Cholesky factorization of the matrix n-by-n A = L * L^t .

Parameters

[in]	n	The number of rows and columns of the matrix A.
[in,out]	A	The matrix A to factorize with Cholesky factorization. The matrix is of size lda -by- n.
[in]	lda	The leading dimension of the matrix A.
[in,out]	nbpivots	Pointer to the number of piovting operations made during factorization. It is updated during this call
[in]	criterion	Threshold use for static pivoting. If diagonal value is under this threshold, its value is replaced by the threshold and the nu,ber of pivots is incremented.

Warning: This routine will fail if it discovers a null or negative value on the diagonal during factorization.

Definition at line 144 of file core_cpotrfsp.c.

References core_cpotf2sp(), and pastix_int_t.

◆ core_cpxtrfsp()

void core_cpxtrfsp	(	pastix_int_t	n,
		pastix_complex32_t *	A,
		pastix_int_t	lda,
		pastix_int_t *	nbpivots,
		float	criterion
	)

Compute the block static pivoting LL^t factorization of the matrix n-by-n A = L * L^t .

Parameters

[in]	n	The number of rows and columns of the matrix A.
[in,out]	A	The matrix A to factorize with LL^t factorization. The matrix is of size lda -by- n.
[in]	lda	The leading dimension of the matrix A.
[in,out]	nbpivots	Pointer to the number of piovting operations made during factorization. It is updated during this call
[in]	criterion	Threshold use for static pivoting. If diagonal value is under this threshold, its value is replaced by the threshold and the nu,ber of pivots is incremented.

Warning: This routine will fail if it discovers a null or negative value on the diagonal during factorization.

Definition at line 136 of file core_cpxtrfsp.c.

References core_cpxtf2sp(), and pastix_int_t.

◆ core_cgetrfsp()

void core_cgetrfsp	(	pastix_int_t	n,
		pastix_complex32_t *	A,
		pastix_int_t	lda,
		pastix_int_t *	nbpivots,
		float	criterion
	)

Compute the block static pivoting LU factorization of the matrix m-by-n A = L * U.

Parameters

[in]	n	The number of rows and columns of the matrix A.
[in,out]	A	The matrix A to factorize with LU factorization. The matrix is of size lda -by- n.
[in]	lda	The leading dimension of the matrix A.
[in,out]	nbpivots	Pointer to the number of piovting operations made during factorization. It is updated during this call
[in]	criterion	Threshold use for static pivoting. If diagonal value is under this threshold, its value is replaced by the threshold and the number of pivots is incremented.

Definition at line 138 of file core_cgetrfsp.c.

References core_cgetf2sp(), and pastix_int_t.

◆ core_csytrfsp()

void core_csytrfsp	(	pastix_int_t	n,
		pastix_complex32_t *	A,
		pastix_int_t	lda,
		pastix_int_t *	nbpivots,
		float	criterion
	)

Compute the block static pivoting factorization of the symmetric matrix n-by-n A such that A = L * D * L^t.

Parameters

[in]	n	The number of rows and columns of the matrix A.
[in,out]	A	The matrix A to factorize with LDL^t factorization. The matrix is of size lda -by- n.
[in]	lda	The leading dimension of the matrix A.
[in,out]	nbpivots	Pointer to the number of piovting operations made during factorization. It is updated during this call
[in]	criterion	Threshold use for static pivoting. If diagonal value is under this threshold, its value is replaced by the threshold and the nu,ber of pivots is incremented.

Definition at line 143 of file core_csytrfsp.c.

References core_csytf2sp(), and pastix_int_t.

◆ core_splrnt()

void core_splrnt	(	int	m,
		int	n,
		float *	A,
		int	lda,
		int	gM,
		int	m0,
		int	n0,
		unsigned long long int	seed
	)

Generate a random tile.

Parameters

[in]	m	The number of rows of the tile A. m >= 0.
[in]	n	The number of columns of the tile A. n >= 0.
[in,out]	A	On entry, the m-by-n tile to be initialized. On exit, the tile initialized in the mtxtype format.
[in]	lda	The leading dimension of the tile A. lda >= max(1,m).
[in]	gM	The global number of rows of the full matrix, A is belonging to. gM >= (m0+M).
[in]	m0	The index of the first row of tile A in the full matrix. m0 >= 0.
[in]	n0	The index of the first column of tile A in the full matrix. n0 >= 0.
[in]	seed	The seed used for random generation. Must be the same for all tiles initialized with this routine.

Definition at line 91 of file core_splrnt.c.

◆ core_sgetmo()

void core_sgetmo	(	int	m,
		int	n,
		const float *	A,
		int	lda,
		float *	B,
		int	ldb
	)

Transposes a m-by-n matrix out of place using an extra workspace of size m-by-n.

Parameters

[in]	m	Number of rows of A.
[in]	n	Number of columns of A.
[in]	A	Matrix to be transposed.
[in]	lda	Leading dimension of matrix A.
[in,out]	B	On exit B = trans(A).
[in]	ldb	Leading dimension of matrix B.

Definition at line 49 of file core_sgetmo.c.

Referenced by core_slr2ge().

◆ core_sgeadd()

int core_sgeadd	(	pastix_trans_t	trans,
		pastix_int_t	M,
		pastix_int_t	N,
		float	alpha,
		const float *	A,
		pastix_int_t	LDA,
		float	beta,
		float *	B,
		pastix_int_t	LDB
	)

Add two matrices together.

Perform the operation: B <- alpha * op(A) + B

Parameters

[in]	trans	PastixNoTrans: No transpose, op( A ) = A; PastixTrans: Transpose, op( A ) = A'; PastixTrans: Conjugate Transpose, op( A ) = (A').
[in]	M	Number of rows of the matrix B. Number of rows of the matrix A, if trans == PastixNoTrans, number of columns of A otherwise.
[in]	N	Number of columns of the matrix B. Number of columns of the matrix A, if trans == PastixNoTrans, number of rows of A otherwise.
[in]	alpha	Scalar factor of A.
[in]	A	Matrix of size LDA-by-N, if trans == PastixNoTrans, LDA-by-M, otherwise.
[in]	LDA	Leading dimension of the array A. LDA >= max(1,K). K = M if trans == PastixNoTrans, K = N otherwise.
[in]	beta	Scalar factor of B.
[in,out]	B	Matrix of size LDB-by-N.
[in]	LDB	Leading dimension of the array B. LDB >= max(1,M)

Return values

PASTIX_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
1,not	yet implemented

Definition at line 78 of file core_sgeadd.c.

References PASTIX_SUCCESS, PastixNoTrans, and PastixTrans.

Referenced by core_slrconcatenate_v(), core_slrcpy(), core_stradd(), cpublok_sadd_frfr(), cpucblk_sadd_frfr(), cpucblk_sdiff(), cpucblk_sgeaddsp1d(), cpucblk_sgetrfsp1d_getrf(), cpucblk_sgetschur_fr(), and cpucblk_sgetschur_lr().

◆ core_sgemdm()

int core_sgemdm	(	pastix_trans_t	transA,
		pastix_trans_t	transB,
		int	M,
		int	N,
		int	K,
		float	alpha,
		const float *	A,
		int	LDA,
		const float *	B,
		int	LDB,
		float	beta,
		float *	C,
		int	LDC,
		const float *	D,
		int	incD,
		float *	WORK,
		int	LWORK
	)

Perform one of the following matrix-matrix operations.

  C := alpha*op( A )*D*op( B ) + beta*C,

where op( X ) is one of

  op( X ) = X   or   op( X ) = X',

alpha and beta are scalars, and A, B, C and D are matrices, with

  op( A ) an m by k matrix,
  op( B ) an k by n matrix,
  C an m by n matrix and
  D an k by k matrix.

Parameters

[in]	transA	PastixNoTrans : No transpose, op( A ) = A; PastixTrans : Transpose, op( A ) = A'.
[in]	transB	PastixNoTrans : No transpose, op( B ) = B; PastixTrans : Transpose, op( B ) = B'.
[in]	M	The number of rows of the matrix op( A ) and of the matrix C. M must be at least zero.
[in]	N	The number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero.
[in]	K	The number of columns of the matrix op( A ), the number of rows of the matrix op( B ), and the number of rows and columns of matrix D. K must be at least zero.
[in]	alpha	On entry, alpha specifies the scalar alpha.
[in]	A	float array of DIMENSION ( LDA, ka ), where ka is k when transA = PastixTrans, and is m otherwise. Before entry with transA = PastixTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A.
[in]	LDA	On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. When transA = PastixTrans then LDA must be at least max( 1, m ), otherwise LDA must be at least max( 1, k ).
[in]	B	float array of DIMENSION ( LDB, kb ), where kb is n when transB = PastixTrans, and is k otherwise. Before entry with transB = PastixTrans, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B.
[in]	LDB	On entry, LDB specifies the first dimension of B as declared in the calling (sub) program. When transB = PastixTrans then LDB must be at least max( 1, k ), otherwise LDB must be at least max( 1, n ).
[in]	beta	On entry, beta specifies the scalar beta. When beta is supplied as zero then C need not be set on input.
[in]	C	float array of DIMENSION ( LDC, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the m by n matrix ( alphaop( A )Dop( B ) + betaC ).
[in]	LDC	On entry, LDC specifies the first dimension of C as declared in the calling (sub) program. LDC must be at least max( 1, m ).
[in]	D	float array of DIMENSION ( LDD, k ). Before entry, the leading k by k part of the array D must contain the matrix D.
[in]	incD	On entry, LDD specifies the first dimension of D as declared in the calling (sub) program. LDD must be at least max( 1, k ).
[in]	WORK	float array, dimension (MAX(1,LWORK))
[in]	LWORK	The length of WORK. On entry, if transA = PastixTrans and transB = PastixTrans then LWORK >= max(1, KN). Otherwise LWORK >= max(1, MK).

Return values

PASTIX_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

Definition at line 139 of file core_sgemdm.c.

References PASTIX_SUCCESS, PastixNoTrans, and PastixTrans.

Referenced by core_ssytrfsp1d_gemm().

◆ core_spqrcp()

int core_spqrcp	(	float	tol,
		pastix_int_t	maxrank,
		int	full_update,
		pastix_int_t	nb,
		pastix_int_t	m,
		pastix_int_t	n,
		float *	A,
		pastix_int_t	lda,
		pastix_int_t *	jpvt,
		float *	tau,
		float *	work,
		pastix_int_t	lwork,
		float *	rwork
	)

Compute a rank-reavealing QR factorization.

This routine is originated from the LAPACK kernels sgeqp3/slaqps and was modified by A. Buttari for MUMPS-BLR. In this version the stopping criterion is based on the frobenius norm of the residual, and not on the estimate of the two-norm making it more restrictive. Thus, the returned ranks are larger, but this gives a better accuracy.

Parameters

[in]	tol	The relative tolerance criterion. Computations are stopped when the frobenius norm of the residual matrix is lower than tol. If tol < 0, then maxrank reflectors are computed.
[in]	maxrank	Maximum number of reflectors computed. Computations are stopped when the rank exceeds maxrank. If maxrank < 0, all reflectors are computed or up to the tolerance criterion.
[in]	full_update	If true, all the trailing submatrix is updated, even if maxrank is reached. If false, the trailing submatrix is not updated as soon as it is not worth it. (Unused for now but kept to match API of RQRCP and TQRCP)
[in]	nb	Tuning parameter for the GEMM blocking size. if nb < 0, nb is set to 32.
[in]	m	Number of rows of the matrix A.
[in]	n	Number of columns of the matrix A.
[in]	A	The matrix of dimension lda-by-n that needs to be compressed.
[in]	lda	The leading dimension of the matrix A. lda >= max(1, m).
[out]	jpvt	The array that describes the permutation of A.
[out]	tau	Contains scalar factors of the elementary reflectors for the matrix Q.
[in]	work	Workspace array of size lwork.
[in]	lwork	The dimension of the work area. lwork >= (nb * n + max(n, m) ) If lwork == -1, the functions returns immediately and work[0] contains the optimal size of work.
[in]	rwork	Workspace array used to store partial and exact column norms (2-by-n)

Returns: This routine will return the rank of A (>=0) or -1 if it didn't manage to compress within the margins of tolerance and maximum rank.

If maximum rank is 0, then either the matrix norm is below the tolerance, and we can return a null rank matrix, or it is not and we need to return a full rank matrix.

Definition at line 105 of file core_spqrcp.c.

References pastix_int_t.

Referenced by core_sge2lr_pqrcp(), core_srqrcp(), core_srradd_pqrcp(), and core_stqrcp().

◆ core_srqrcp()

int core_srqrcp	(	float	tol,
		pastix_int_t	maxrank,
		int	refine,
		pastix_int_t	nb,
		pastix_int_t	m,
		pastix_int_t	n,
		float *	A,
		pastix_int_t	lda,
		pastix_int_t *	jpvt,
		float *	tau,
		float *	work,
		pastix_int_t	lwork,
		float *	rwork
	)

Compute a randomized QR factorization.

This kernel implements the algorithm described in: Fast Parallel Randomized QR with Column Pivoting Algorithms for Reliable Low-rank Matrix Approximations. Jianwei Xiao, Ming Gu, and Julien Langou

More information can also be found in Finding Structure with Randomness: Probabilistic Algorithms for Constructing Approximate Matrix Decompositions. N. Halko, P. G. Martinsson, and J. A. Tropp

Based on this paper, we set the p parameter to 5, as it seems sufficient, and because we iterate multiple times to get the final rank.

Parameters

[in]	tol	The relative tolerance criterion. Computations are stopped when the frobenius norm of the residual matrix is lower than tol. If tol < 0, then maxrank reflectors are computed.
[in]	maxrank	Maximum number of reflectors computed. Computations are stopped when the rank exceeds maxrank. If maxrank < 0, all reflectors are computed or up to the tolerance criterion.
[in]	refine	Enable/disable the extra refinement step that performs an additional PQRCP on the trailing submatrix.
[in]	nb	Tuning parameter for the GEMM blocking size. if nb < 0, nb is set to 32.
[in]	m	Number of rows of the matrix A.
[in]	n	Number of columns of the matrix A.
[in]	A	The matrix of dimension lda-by-n that needs to be compressed.
[in]	lda	The leading dimension of the matrix A. lda >= max(1, m).
[out]	jpvt	The array that describes the permutation of A.
[out]	tau	Contains scalar factors of the elementary reflectors for the matrix Q.
[in]	work	Workspace array of size lwork.
[in]	lwork	The dimension of the work area. lwork >= (nb * n + max(n, m) ) If lwork == -1, the functions returns immediately and work[0] contains the optimal size of work.
[in]	rwork	Workspace array used to store partial and exact column norms (2-by-n)

Returns: This routine will return the rank of A (>=0) or -1 if it didn't manage to compress within the margins of tolerance and maximum rank.

If maximum rank is 0, then either the matrix norm is below the tolerance, and we can return a null rank matrix, or it is not and we need to return a full rank matrix.

Definition at line 114 of file core_srqrcp.c.

References core_spqrcp(), and pastix_int_t.

Referenced by core_sge2lr_rqrcp(), and core_srradd_rqrcp().

◆ core_srqrrt()

int core_srqrrt	(	float	tol,
		pastix_int_t	maxrank,
		pastix_int_t	nb,
		pastix_int_t	m,
		pastix_int_t	n,
		float *	A,
		pastix_int_t	lda,
		float *	tau,
		float *	B,
		pastix_int_t	ldb,
		float *	tau_b,
		float *	work,
		pastix_int_t	lwork,
		float	normA
	)

Compute a randomized QR factorization with rotation technique.

This kernel implements the second method (he did not write a pseudocode for the second method) described in :

Blocked rank-revealing QR factorizations: How randomized sampling can be used to avoid single-vector pivoting. P. G. Martinsson

Note that we only use the trailing matrix for resampling. We don't use power of it for getting better results, since it would be too costly.

Parameters

[in]	tol	The relative tolerance criterion. Computations are stopped when the frobenius norm of the residual matrix is lower than tol. If tol < 0, then maxrank reflectors are computed.
[in]	maxrank	Maximum number of reflectors computed. Computations are stopped when the rank exceeds maxrank. If maxrank < 0, all reflectors are computed or up to the tolerance criterion.
[in]	nb	Tuning parameter for the GEMM blocking size. if nb < 0, nb is set to 32.
[in]	m	Number of rows of the matrix A.
[in]	n	Number of columns of the matrix A.
[in,out]	A	The matrix of dimension lda-by-n that needs to be compressed. On output, the A matrix is computed up to the founded rank k (k first columns and rows). Everything else, should be dropped.
[in]	lda	The leading dimension of the matrix A. lda >= max(1, m).
[out]	tau	Contains scalar factors of the elementary reflectors for the matrix A.
[out]	B	The matrix of dimension ldb-by-maxrank that will holds the partial projection of the matrix A. On output, each block of 32 columns can be used to computed the reverse rotation of the R part of A.
[in]	ldb	The leading dimension of the matrix B. ldb >= max(1, n).
[out]	tau_b	Contains scalar factors of the elementary reflectors for the matrix B.
[in]	work	Workspace array of size lwork.
[in]	lwork	The dimension of the work area. lwork >= (nb * max(n,n)) If lwork == -1, the function returns immediately and work[0] contains the optimal size of work.
[in]	normA	The norm of the input matrixA. If negative, the norm is computed by the kernel.

Returns: This routine will return the rank of A (>=0) or -1 if it didn't manage to compress within the margins of tolerance and maximum rank.

If maximum rank is 0, then either the matrix norm is below the tolerance, and we can return a null rank matrix, or it is not and we need to return a full rank matrix.

Definition at line 126 of file core_srqrrt.c.

References pastix_int_t.

Referenced by core_sge2lr_rqrrt().

◆ core_stqrcp()

int core_stqrcp	(	float	tol,
		pastix_int_t	maxrank,
		int	refine,
		pastix_int_t	nb,
		pastix_int_t	m,
		pastix_int_t	n,
		float *	A,
		pastix_int_t	lda,
		pastix_int_t *	jpvt,
		float *	tau,
		float *	work,
		pastix_int_t	lwork,
		float *	rwork
	)

Compute a randomized QR factorization with truncated updates.

This routine is derivated from "Randomized QR with Column Pivoting", J. A. Duersch and M. Gu, SIAM Journal on Scientific Computing, vol. 39, no. 4, pp. C263-C291, 2017.

Parameters

[in]	tol	The relative tolerance criterion. Computations are stopped when the frobenius norm of the residual matrix is lower than tol. If tol < 0, then maxrank reflectors are computed.
[in]	maxrank	Maximum number of reflectors computed. Computations are stopped when the rank exceeds maxrank. If maxrank < 0, all reflectors are computed or up to the tolerance criterion.
[in]	refine	TODO
[in]	nb	Tuning parameter for the GEMM blocking size. if nb < 0, nb is set to 32.
[in]	m	Number of rows of the matrix A.
[in]	n	Number of columns of the matrix A.
[in]	A	The matrix of dimension lda-by-n that needs to be compressed.
[in]	lda	The leading dimension of the matrix A. lda >= max(1, m).
[out]	jpvt	The array that describes the permutation of A.
[out]	tau	Contains scalar factors of the elementary reflectors for the matrix Q.
[in]	work	Workspace array of size lwork.
[in]	lwork	The dimension of the work area. lwork >= (nb * n + max(n, m) ) If lwork == -1, the functions returns immediately and work[0] contains the optimal size of work.
[in]	rwork	Workspace array used to store partial and exact column norms (2-by-n)

Returns: This routine will return the rank of A (>=0) or -1 if it didn't manage to compress within the margins of tolerance and maximum rank.

If maximum rank is 0, then either the matrix norm is below the tolerance, and we can return a null rank matrix, or it is not and we need to return a full rank matrix.

Definition at line 99 of file core_stqrcp.c.

References core_spqrcp(), and pastix_int_t.

Referenced by core_sge2lr_tqrcp(), and core_srradd_tqrcp().

◆ core_stradd()

int core_stradd	(	pastix_uplo_t	uplo,
		pastix_trans_t	trans,
		pastix_int_t	M,
		pastix_int_t	N,
		float	alpha,
		const float *	A,
		pastix_int_t	LDA,
		float	beta,
		float *	B,
		pastix_int_t	LDB
	)

Add two triangular matrices together as in PBLAS pstradd.

B <- alpha * op(A) + beta * B,

where op(X) = X, X', or (X')

Parameters

[in]	uplo	Specifies the shape of A and B matrices: PastixUpperLower: A and B are general matrices. PastixUpper: op(A) and B are upper trapezoidal matrices. PastixLower: op(A) and B are lower trapezoidal matrices.
[in]	trans	Specifies whether the matrix A is non-transposed, transposed, or conjugate transposed PastixNoTrans: op(A) = A PastixTrans: op(A) = A' PastixTrans: op(A) = (A')
[in]	M	Number of rows of the matrices op(A) and B.
[in]	N	Number of columns of the matrices op(A) and B.
[in]	alpha	Scalar factor of A.
[in]	A	Matrix of size LDA-by-N, if trans = PastixNoTrans, LDA-by-M otherwise.
[in]	LDA	Leading dimension of the array A. LDA >= max(1,M) if trans = PastixNoTrans, LDA >= max(1,N) otherwise.
[in]	beta	Scalar factor of B.
[in,out]	B	Matrix of size LDB-by-N. On exit, B = alpha * op(A) + beta * B
[in]	LDB	Leading dimension of the array B. LDB >= max(1,M)

Return values

PASTIX_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

PastixLower

PastixUpper

Definition at line 79 of file core_stradd.c.

References core_sgeadd(), PASTIX_SUCCESS, PastixLower, PastixNoTrans, PastixTrans, PastixUpper, and PastixUpperLower.

◆ core_sscalo()

int core_sscalo	(	pastix_trans_t	trans,
		pastix_int_t	M,
		pastix_int_t	N,
		const float *	A,
		pastix_int_t	lda,
		const float *	D,
		pastix_int_t	ldd,
		float *	B,
		pastix_int_t	ldb
	)

Scale a matrix by a diagonal out of place.

Perform the operation: B <- op(A) * D, where A is a general matrix, and D a diagonal matrix.

Parameters

[in]	trans	PastixNoTrans: No transpose, op( A ) = A; PastixTrans: Transpose, op( A ) = A; PastixTrans: Conjugate Transpose, op( A ) = (A).
[in]	M	Number of rows of the matrix B. Number of rows of the matrix A.
[in]	N	Number of columns of the matrix B. Number of columns of the matrix A.
[in]	A	Matrix of size lda-by-N.
[in]	lda	Leading dimension of the array A. lda >= max(1,M).
[in]	D	Diagonal matrix of size ldd-by-N.
[in]	ldd	Leading dimension of the array D. ldd >= 1.
[in,out]	B	Matrix of size LDB-by-N.
[in]	ldb	Leading dimension of the array B. ldb >= max(1,M)

Return values

PASTIX_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
1,not	yet implemented

Definition at line 74 of file core_sscalo.c.

References pastix_int_t, PASTIX_SUCCESS, PastixNoTrans, and PastixTrans.

Referenced by cpublok_sscalo(), and cpucblk_sscalo().

◆ core_slrorthu_fullqr()

pastix_fixdbl_t core_slrorthu_fullqr	(	pastix_int_t	M,
		pastix_int_t	N,
		pastix_int_t	rank,
		float *	U,
		pastix_int_t	ldu,
		float *	V,
		pastix_int_t	ldv
	)

Try to orthognalize the u part of the low-rank form, and update the v part accordingly using full QR.

This function considers a low-rank matrix resulting from the addition of two matrices B += A, with A of smaller or equal size to B. The product has the form: U * V^t

The U part of the low-rank form must be orthognalized to get the smaller possible rank during the rradd operation. This function perfoms this by applying a full QR factorization on the U part.

U = Q R, then U' = Q, and V' = R * V

Parameters

[in]	M	The number of rows of the u1u2 matrix.
[in]	N	The number of columns of the v1v2 matrix.
[in]	rank	The number of columns of the U matrix, and the number of rows of the V part in the v1v2 matrix.
[in,out]	U	The U matrix of size ldu -by- rank. On exit, Q from U = Q R.
[in]	ldu	The leading dimension of the U matrix. ldu >= max(1, M)
[in,out]	V	The V matrix of size ldv -by- N. On exit, R * V, with R from U = Q R.
[in]	ldv	The leading dimension of the V matrix. ldv >= max(1, rank)

Returns: The number of flops required to perform the operation.

Definition at line 83 of file core_slrothu.c.

References pastix_int_t, and PastixLeft.

◆ core_slrorthu_partialqr()

pastix_fixdbl_t core_slrorthu_partialqr	(	pastix_int_t	M,
		pastix_int_t	N,
		pastix_int_t	r1,
		pastix_int_t *	r2ptr,
		pastix_int_t	offx,
		pastix_int_t	offy,
		float *	U,
		pastix_int_t	ldu,
		float *	V,
		pastix_int_t	ldv
	)

Try to orthognalize the U part of the low-rank form, and update the V part accordingly using partial QR.

This function considers a low-rank matrix resulting from the addition of two matrices B += A, with A of smaller or equal size to B. The product has the form: U * V^t

The U part of the low-rank form must be orthognalized to get the smaller possible rank during the rradd operation. This function perfoms this by applying a full QR factorization on the U part.

In that case, it takes benefit from the fact that U = [ u1, u2 ], and V = [ v1, v2 ] with u2 and v2 wich are matrices of respective size M2-by-r2, and r2-by-N2, offset by offx and offy

The steps are:

Scaling of u2 with removal of the null columns
Orthogonalization of u2 relatively to u1
Application of the update to v2
orthogonalization through QR of u2
Update of V

Parameters

[in]	M	The number of rows of the u1u2 matrix.
[in]	N	The number of columns of the v1v2 matrix.
[in]	r1	The number of columns of the U matrix in the u1 part, and the number of rows of the V part in the v1 part.
[in,out]	r2ptr	The number of columns of the U matrix in the u2 part, and the number of rows of the V part in the v2 part. On exit, this rank is reduced y the number of null columns found in U.
[in]	offx	The row offset of the matrix u2 in U.
[in]	offy	The column offset of the matrix v2 in V.
[in,out]	U	The U matrix of size ldu -by- rank. On exit, the orthogonalized U.
[in]	ldu	The leading dimension of the U matrix. ldu >= max(1, M)
[in,out]	V	The V matrix of size ldv -by- N. On exit, the updated V matrix.
[in]	ldv	The leading dimension of the V matrix. ldv >= max(1, rank)

Returns: The number of flops required to perform the operation.

Definition at line 197 of file core_slrothu.c.

References core_slrdbg_check_orthogonality_AB(), pastix_int_t, and PastixLeft.

◆ core_slrorthu_cgs()

pastix_fixdbl_t core_slrorthu_cgs	(	pastix_int_t	M1,
		pastix_int_t	N1,
		pastix_int_t	M2,
		pastix_int_t	N2,
		pastix_int_t	r1,
		pastix_int_t *	r2ptr,
		pastix_int_t	offx,
		pastix_int_t	offy,
		float *	U,
		pastix_int_t	ldu,
		float *	V,
		pastix_int_t	ldv
	)

Try to orthognalize the U part of the low-rank form, and update the V part accordingly using CGS.

This function considers a low-rank matrix resulting from the addition of two matrices B += A, with A of smaller or equal size to B. The product has the form: U * V^t

The U part of the low-rank form must be orthognalized to get the smaller possible rank during the rradd operation. This function perfoms this by applying a full QR factorization on the U part.

In that case, it takes benefit from the fact that U = [ u1, u2 ], and V = [ v1, v2 ] with u2 and v2 wich are matrices of respective size M2-by-r2, and r2-by-N2, offset by offx and offy

The steps are:

for each column of u2
- Scaling of u2 with removal of the null columns
- Orthogonalization of u2 relatively to u1
- Remove the column if null

Parameters

[in]	M1	The number of rows of the U matrix.
[in]	N1	The number of columns of the U matrix.
[in]	M2	The number of rows of the u2 part of the U matrix.
[in]	N2	The number of columns of the v2 part of the V matrix.
[in]	r1	The number of columns of the U matrix in the u1 part, and the number of rows of the V part in the v1 part.
[in,out]	r2ptr	The number of columns of the U matrix in the u2 part, and the number of rows of the V part in the v2 part. On exit, this rank is reduced y the number of null columns found in U.
[in]	offx	The row offset of the matrix u2 in U.
[in]	offy	The column offset of the matrix v2 in V.
[in,out]	U	The U matrix of size ldu -by- rank. On exit, the orthogonalized U.
[in]	ldu	The leading dimension of the U matrix. ldu >= max(1, M)
[in,out]	V	The V matrix of size ldv -by- N. On exit, the updated V matrix.
[in]	ldv	The leading dimension of the V matrix. ldv >= max(1, rank)

Returns: The number of flops required to perform the operation.

Definition at line 430 of file core_slrothu.c.

References core_slrdbg_check_orthogonality_AB(), and pastix_int_t.

◆ core_spotrfsp()

void core_spotrfsp	(	pastix_int_t	n,
		float *	A,
		pastix_int_t	lda,
		pastix_int_t *	nbpivots,
		float	criterion
	)

Compute the block static pivoting Cholesky factorization of the matrix n-by-n A = L * L^t .

Parameters

[in]	n	The number of rows and columns of the matrix A.
[in,out]	A	The matrix A to factorize with Cholesky factorization. The matrix is of size lda -by- n.
[in]	lda	The leading dimension of the matrix A.
[in,out]	nbpivots	Pointer to the number of piovting operations made during factorization. It is updated during this call
[in]	criterion	Threshold use for static pivoting. If diagonal value is under this threshold, its value is replaced by the threshold and the nu,ber of pivots is incremented.

Warning: This routine will fail if it discovers a null or negative value on the diagonal during factorization.

Definition at line 144 of file core_spotrfsp.c.

References core_spotf2sp(), and pastix_int_t.

◆ core_sgetrfsp()

void core_sgetrfsp	(	pastix_int_t	n,
		float *	A,
		pastix_int_t	lda,
		pastix_int_t *	nbpivots,
		float	criterion
	)

Compute the block static pivoting LU factorization of the matrix m-by-n A = L * U.

Parameters

[in]	n	The number of rows and columns of the matrix A.
[in,out]	A	The matrix A to factorize with LU factorization. The matrix is of size lda -by- n.
[in]	lda	The leading dimension of the matrix A.
[in,out]	nbpivots	Pointer to the number of piovting operations made during factorization. It is updated during this call
[in]	criterion	Threshold use for static pivoting. If diagonal value is under this threshold, its value is replaced by the threshold and the number of pivots is incremented.

Definition at line 138 of file core_sgetrfsp.c.

References core_sgetf2sp(), and pastix_int_t.

◆ core_ssytrfsp()

void core_ssytrfsp	(	pastix_int_t	n,
		float *	A,
		pastix_int_t	lda,
		pastix_int_t *	nbpivots,
		float	criterion
	)

Compute the block static pivoting factorization of the symmetric matrix n-by-n A such that A = L * D * L^t.

Parameters

[in]	n	The number of rows and columns of the matrix A.
[in,out]	A	The matrix A to factorize with LDL^t factorization. The matrix is of size lda -by- n.
[in]	lda	The leading dimension of the matrix A.
[in,out]	nbpivots	Pointer to the number of piovting operations made during factorization. It is updated during this call
[in]	criterion	Threshold use for static pivoting. If diagonal value is under this threshold, its value is replaced by the threshold and the nu,ber of pivots is incremented.

Definition at line 143 of file core_ssytrfsp.c.

References core_ssytf2sp(), and pastix_int_t.

◆ core_dplrnt()

void core_dplrnt	(	int	m,
		int	n,
		double *	A,
		int	lda,
		int	gM,
		int	m0,
		int	n0,
		unsigned long long int	seed
	)

Generate a random tile.

Parameters

[in]	m	The number of rows of the tile A. m >= 0.
[in]	n	The number of columns of the tile A. n >= 0.
[in,out]	A	On entry, the m-by-n tile to be initialized. On exit, the tile initialized in the mtxtype format.
[in]	lda	The leading dimension of the tile A. lda >= max(1,m).
[in]	gM	The global number of rows of the full matrix, A is belonging to. gM >= (m0+M).
[in]	m0	The index of the first row of tile A in the full matrix. m0 >= 0.
[in]	n0	The index of the first column of tile A in the full matrix. n0 >= 0.
[in]	seed	The seed used for random generation. Must be the same for all tiles initialized with this routine.

Definition at line 91 of file core_dplrnt.c.

◆ core_dgetmo()

void core_dgetmo	(	int	m,
		int	n,
		const double *	A,
		int	lda,
		double *	B,
		int	ldb
	)

Transposes a m-by-n matrix out of place using an extra workspace of size m-by-n.

Parameters

[in]	m	Number of rows of A.
[in]	n	Number of columns of A.
[in]	A	Matrix to be transposed.
[in]	lda	Leading dimension of matrix A.
[in,out]	B	On exit B = trans(A).
[in]	ldb	Leading dimension of matrix B.

Definition at line 49 of file core_dgetmo.c.

Referenced by core_dlr2ge().

◆ core_dgeadd()

int core_dgeadd	(	pastix_trans_t	trans,
		pastix_int_t	M,
		pastix_int_t	N,
		double	alpha,
		const double *	A,
		pastix_int_t	LDA,
		double	beta,
		double *	B,
		pastix_int_t	LDB
	)

Add two matrices together.

Perform the operation: B <- alpha * op(A) + B

Parameters

[in]	trans	PastixNoTrans: No transpose, op( A ) = A; PastixTrans: Transpose, op( A ) = A'; PastixTrans: Conjugate Transpose, op( A ) = (A').
[in]	M	Number of rows of the matrix B. Number of rows of the matrix A, if trans == PastixNoTrans, number of columns of A otherwise.
[in]	N	Number of columns of the matrix B. Number of columns of the matrix A, if trans == PastixNoTrans, number of rows of A otherwise.
[in]	alpha	Scalar factor of A.
[in]	A	Matrix of size LDA-by-N, if trans == PastixNoTrans, LDA-by-M, otherwise.
[in]	LDA	Leading dimension of the array A. LDA >= max(1,K). K = M if trans == PastixNoTrans, K = N otherwise.
[in]	beta	Scalar factor of B.
[in,out]	B	Matrix of size LDB-by-N.
[in]	LDB	Leading dimension of the array B. LDB >= max(1,M)

Return values

PASTIX_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
1,not	yet implemented

Definition at line 78 of file core_dgeadd.c.

References PASTIX_SUCCESS, PastixNoTrans, and PastixTrans.

Referenced by core_dlrconcatenate_v(), core_dlrcpy(), core_dtradd(), cpublok_dadd_frfr(), cpucblk_dadd_frfr(), cpucblk_ddiff(), cpucblk_dgeaddsp1d(), cpucblk_dgetrfsp1d_getrf(), cpucblk_dgetschur_fr(), and cpucblk_dgetschur_lr().

◆ core_dgemdm()

int core_dgemdm	(	pastix_trans_t	transA,
		pastix_trans_t	transB,
		int	M,
		int	N,
		int	K,
		double	alpha,
		const double *	A,
		int	LDA,
		const double *	B,
		int	LDB,
		double	beta,
		double *	C,
		int	LDC,
		const double *	D,
		int	incD,
		double *	WORK,
		int	LWORK
	)

Perform one of the following matrix-matrix operations.

  C := alpha*op( A )*D*op( B ) + beta*C,

where op( X ) is one of

  op( X ) = X   or   op( X ) = X',

alpha and beta are scalars, and A, B, C and D are matrices, with

  op( A ) an m by k matrix,
  op( B ) an k by n matrix,
  C an m by n matrix and
  D an k by k matrix.

Parameters

[in]	transA	PastixNoTrans : No transpose, op( A ) = A; PastixTrans : Transpose, op( A ) = A'.
[in]	transB	PastixNoTrans : No transpose, op( B ) = B; PastixTrans : Transpose, op( B ) = B'.
[in]	M	The number of rows of the matrix op( A ) and of the matrix C. M must be at least zero.
[in]	N	The number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero.
[in]	K	The number of columns of the matrix op( A ), the number of rows of the matrix op( B ), and the number of rows and columns of matrix D. K must be at least zero.
[in]	alpha	On entry, alpha specifies the scalar alpha.
[in]	A	double array of DIMENSION ( LDA, ka ), where ka is k when transA = PastixTrans, and is m otherwise. Before entry with transA = PastixTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A.
[in]	LDA	On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. When transA = PastixTrans then LDA must be at least max( 1, m ), otherwise LDA must be at least max( 1, k ).
[in]	B	double array of DIMENSION ( LDB, kb ), where kb is n when transB = PastixTrans, and is k otherwise. Before entry with transB = PastixTrans, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B.
[in]	LDB	On entry, LDB specifies the first dimension of B as declared in the calling (sub) program. When transB = PastixTrans then LDB must be at least max( 1, k ), otherwise LDB must be at least max( 1, n ).
[in]	beta	On entry, beta specifies the scalar beta. When beta is supplied as zero then C need not be set on input.
[in]	C	double array of DIMENSION ( LDC, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the m by n matrix ( alphaop( A )Dop( B ) + betaC ).
[in]	LDC	On entry, LDC specifies the first dimension of C as declared in the calling (sub) program. LDC must be at least max( 1, m ).
[in]	D	double array of DIMENSION ( LDD, k ). Before entry, the leading k by k part of the array D must contain the matrix D.
[in]	incD	On entry, LDD specifies the first dimension of D as declared in the calling (sub) program. LDD must be at least max( 1, k ).
[in]	WORK	double array, dimension (MAX(1,LWORK))
[in]	LWORK	The length of WORK. On entry, if transA = PastixTrans and transB = PastixTrans then LWORK >= max(1, KN). Otherwise LWORK >= max(1, MK).

Return values

PASTIX_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

Definition at line 139 of file core_dgemdm.c.

References PASTIX_SUCCESS, PastixNoTrans, and PastixTrans.

Referenced by core_dsytrfsp1d_gemm().

◆ core_dpqrcp()

int core_dpqrcp	(	double	tol,
		pastix_int_t	maxrank,
		int	full_update,
		pastix_int_t	nb,
		pastix_int_t	m,
		pastix_int_t	n,
		double *	A,
		pastix_int_t	lda,
		pastix_int_t *	jpvt,
		double *	tau,
		double *	work,
		pastix_int_t	lwork,
		double *	rwork
	)

Compute a rank-reavealing QR factorization.

This routine is originated from the LAPACK kernels dgeqp3/dlaqps and was modified by A. Buttari for MUMPS-BLR. In this version the stopping criterion is based on the frobenius norm of the residual, and not on the estimate of the two-norm making it more restrictive. Thus, the returned ranks are larger, but this gives a better accuracy.

Parameters

[in]	tol	The relative tolerance criterion. Computations are stopped when the frobenius norm of the residual matrix is lower than tol. If tol < 0, then maxrank reflectors are computed.
[in]	maxrank	Maximum number of reflectors computed. Computations are stopped when the rank exceeds maxrank. If maxrank < 0, all reflectors are computed or up to the tolerance criterion.
[in]	full_update	If true, all the trailing submatrix is updated, even if maxrank is reached. If false, the trailing submatrix is not updated as soon as it is not worth it. (Unused for now but kept to match API of RQRCP and TQRCP)
[in]	nb	Tuning parameter for the GEMM blocking size. if nb < 0, nb is set to 32.
[in]	m	Number of rows of the matrix A.
[in]	n	Number of columns of the matrix A.
[in]	A	The matrix of dimension lda-by-n that needs to be compressed.
[in]	lda	The leading dimension of the matrix A. lda >= max(1, m).
[out]	jpvt	The array that describes the permutation of A.
[out]	tau	Contains scalar factors of the elementary reflectors for the matrix Q.
[in]	work	Workspace array of size lwork.
[in]	lwork	The dimension of the work area. lwork >= (nb * n + max(n, m) ) If lwork == -1, the functions returns immediately and work[0] contains the optimal size of work.
[in]	rwork	Workspace array used to store partial and exact column norms (2-by-n)

Returns: This routine will return the rank of A (>=0) or -1 if it didn't manage to compress within the margins of tolerance and maximum rank.

If maximum rank is 0, then either the matrix norm is below the tolerance, and we can return a null rank matrix, or it is not and we need to return a full rank matrix.

Definition at line 105 of file core_dpqrcp.c.

References pastix_int_t.

Referenced by core_dge2lr_pqrcp(), core_drqrcp(), core_drradd_pqrcp(), and core_dtqrcp().

◆ core_drqrcp()

int core_drqrcp	(	double	tol,
		pastix_int_t	maxrank,
		int	refine,
		pastix_int_t	nb,
		pastix_int_t	m,
		pastix_int_t	n,
		double *	A,
		pastix_int_t	lda,
		pastix_int_t *	jpvt,
		double *	tau,
		double *	work,
		pastix_int_t	lwork,
		double *	rwork
	)

Compute a randomized QR factorization.

This kernel implements the algorithm described in: Fast Parallel Randomized QR with Column Pivoting Algorithms for Reliable Low-rank Matrix Approximations. Jianwei Xiao, Ming Gu, and Julien Langou

More information can also be found in Finding Structure with Randomness: Probabilistic Algorithms for Constructing Approximate Matrix Decompositions. N. Halko, P. G. Martinsson, and J. A. Tropp

Based on this paper, we set the p parameter to 5, as it seems sufficient, and because we iterate multiple times to get the final rank.

Parameters

[in]	tol	The relative tolerance criterion. Computations are stopped when the frobenius norm of the residual matrix is lower than tol. If tol < 0, then maxrank reflectors are computed.
[in]	maxrank	Maximum number of reflectors computed. Computations are stopped when the rank exceeds maxrank. If maxrank < 0, all reflectors are computed or up to the tolerance criterion.
[in]	refine	Enable/disable the extra refinement step that performs an additional PQRCP on the trailing submatrix.
[in]	nb	Tuning parameter for the GEMM blocking size. if nb < 0, nb is set to 32.
[in]	m	Number of rows of the matrix A.
[in]	n	Number of columns of the matrix A.
[in]	A	The matrix of dimension lda-by-n that needs to be compressed.
[in]	lda	The leading dimension of the matrix A. lda >= max(1, m).
[out]	jpvt	The array that describes the permutation of A.
[out]	tau	Contains scalar factors of the elementary reflectors for the matrix Q.
[in]	work	Workspace array of size lwork.
[in]	lwork	The dimension of the work area. lwork >= (nb * n + max(n, m) ) If lwork == -1, the functions returns immediately and work[0] contains the optimal size of work.
[in]	rwork	Workspace array used to store partial and exact column norms (2-by-n)

Returns: This routine will return the rank of A (>=0) or -1 if it didn't manage to compress within the margins of tolerance and maximum rank.

If maximum rank is 0, then either the matrix norm is below the tolerance, and we can return a null rank matrix, or it is not and we need to return a full rank matrix.

Definition at line 114 of file core_drqrcp.c.

References core_dpqrcp(), and pastix_int_t.

Referenced by core_dge2lr_rqrcp(), and core_drradd_rqrcp().

◆ core_drqrrt()

int core_drqrrt	(	double	tol,
		pastix_int_t	maxrank,
		pastix_int_t	nb,
		pastix_int_t	m,
		pastix_int_t	n,
		double *	A,
		pastix_int_t	lda,
		double *	tau,
		double *	B,
		pastix_int_t	ldb,
		double *	tau_b,
		double *	work,
		pastix_int_t	lwork,
		double	normA
	)

Compute a randomized QR factorization with rotation technique.

This kernel implements the second method (he did not write a pseudocode for the second method) described in :

Blocked rank-revealing QR factorizations: How randomized sampling can be used to avoid single-vector pivoting. P. G. Martinsson

Note that we only use the trailing matrix for resampling. We don't use power of it for getting better results, since it would be too costly.

Parameters

[in]	tol	The relative tolerance criterion. Computations are stopped when the frobenius norm of the residual matrix is lower than tol. If tol < 0, then maxrank reflectors are computed.
[in]	maxrank	Maximum number of reflectors computed. Computations are stopped when the rank exceeds maxrank. If maxrank < 0, all reflectors are computed or up to the tolerance criterion.
[in]	nb	Tuning parameter for the GEMM blocking size. if nb < 0, nb is set to 32.
[in]	m	Number of rows of the matrix A.
[in]	n	Number of columns of the matrix A.
[in,out]	A	The matrix of dimension lda-by-n that needs to be compressed. On output, the A matrix is computed up to the founded rank k (k first columns and rows). Everything else, should be dropped.
[in]	lda	The leading dimension of the matrix A. lda >= max(1, m).
[out]	tau	Contains scalar factors of the elementary reflectors for the matrix A.
[out]	B	The matrix of dimension ldb-by-maxrank that will holds the partial projection of the matrix A. On output, each block of 32 columns can be used to computed the reverse rotation of the R part of A.
[in]	ldb	The leading dimension of the matrix B. ldb >= max(1, n).
[out]	tau_b	Contains scalar factors of the elementary reflectors for the matrix B.
[in]	work	Workspace array of size lwork.
[in]	lwork	The dimension of the work area. lwork >= (nb * max(n,n)) If lwork == -1, the function returns immediately and work[0] contains the optimal size of work.
[in]	normA	The norm of the input matrixA. If negative, the norm is computed by the kernel.

Returns: This routine will return the rank of A (>=0) or -1 if it didn't manage to compress within the margins of tolerance and maximum rank.

If maximum rank is 0, then either the matrix norm is below the tolerance, and we can return a null rank matrix, or it is not and we need to return a full rank matrix.

Definition at line 126 of file core_drqrrt.c.

References pastix_int_t.

Referenced by core_dge2lr_rqrrt().

◆ core_dtqrcp()

int core_dtqrcp	(	double	tol,
		pastix_int_t	maxrank,
		int	refine,
		pastix_int_t	nb,
		pastix_int_t	m,
		pastix_int_t	n,
		double *	A,
		pastix_int_t	lda,
		pastix_int_t *	jpvt,
		double *	tau,
		double *	work,
		pastix_int_t	lwork,
		double *	rwork
	)

Compute a randomized QR factorization with truncated updates.

This routine is derivated from "Randomized QR with Column Pivoting", J. A. Duersch and M. Gu, SIAM Journal on Scientific Computing, vol. 39, no. 4, pp. C263-C291, 2017.

Parameters

[in]	tol	The relative tolerance criterion. Computations are stopped when the frobenius norm of the residual matrix is lower than tol. If tol < 0, then maxrank reflectors are computed.
[in]	maxrank	Maximum number of reflectors computed. Computations are stopped when the rank exceeds maxrank. If maxrank < 0, all reflectors are computed or up to the tolerance criterion.
[in]	refine	TODO
[in]	nb	Tuning parameter for the GEMM blocking size. if nb < 0, nb is set to 32.
[in]	m	Number of rows of the matrix A.
[in]	n	Number of columns of the matrix A.
[in]	A	The matrix of dimension lda-by-n that needs to be compressed.
[in]	lda	The leading dimension of the matrix A. lda >= max(1, m).
[out]	jpvt	The array that describes the permutation of A.
[out]	tau	Contains scalar factors of the elementary reflectors for the matrix Q.
[in]	work	Workspace array of size lwork.
[in]	lwork	The dimension of the work area. lwork >= (nb * n + max(n, m) ) If lwork == -1, the functions returns immediately and work[0] contains the optimal size of work.
[in]	rwork	Workspace array used to store partial and exact column norms (2-by-n)

Returns: This routine will return the rank of A (>=0) or -1 if it didn't manage to compress within the margins of tolerance and maximum rank.

If maximum rank is 0, then either the matrix norm is below the tolerance, and we can return a null rank matrix, or it is not and we need to return a full rank matrix.

Definition at line 99 of file core_dtqrcp.c.

References core_dpqrcp(), and pastix_int_t.

Referenced by core_dge2lr_tqrcp(), and core_drradd_tqrcp().

◆ core_dtradd()

int core_dtradd	(	pastix_uplo_t	uplo,
		pastix_trans_t	trans,
		pastix_int_t	M,
		pastix_int_t	N,
		double	alpha,
		const double *	A,
		pastix_int_t	LDA,
		double	beta,
		double *	B,
		pastix_int_t	LDB
	)

Add two triangular matrices together as in PBLAS pdtradd.

B <- alpha * op(A) + beta * B,

where op(X) = X, X', or (X')

Parameters

[in]	uplo	Specifies the shape of A and B matrices: PastixUpperLower: A and B are general matrices. PastixUpper: op(A) and B are upper trapezoidal matrices. PastixLower: op(A) and B are lower trapezoidal matrices.
[in]	trans	Specifies whether the matrix A is non-transposed, transposed, or conjugate transposed PastixNoTrans: op(A) = A PastixTrans: op(A) = A' PastixTrans: op(A) = (A')
[in]	M	Number of rows of the matrices op(A) and B.
[in]	N	Number of columns of the matrices op(A) and B.
[in]	alpha	Scalar factor of A.
[in]	A	Matrix of size LDA-by-N, if trans = PastixNoTrans, LDA-by-M otherwise.
[in]	LDA	Leading dimension of the array A. LDA >= max(1,M) if trans = PastixNoTrans, LDA >= max(1,N) otherwise.
[in]	beta	Scalar factor of B.
[in,out]	B	Matrix of size LDB-by-N. On exit, B = alpha * op(A) + beta * B
[in]	LDB	Leading dimension of the array B. LDB >= max(1,M)

Return values

PASTIX_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

PastixLower

PastixUpper

Definition at line 79 of file core_dtradd.c.

References core_dgeadd(), PASTIX_SUCCESS, PastixLower, PastixNoTrans, PastixTrans, PastixUpper, and PastixUpperLower.

◆ core_dscalo()

int core_dscalo	(	pastix_trans_t	trans,
		pastix_int_t	M,
		pastix_int_t	N,
		const double *	A,
		pastix_int_t	lda,
		const double *	D,
		pastix_int_t	ldd,
		double *	B,
		pastix_int_t	ldb
	)

Scale a matrix by a diagonal out of place.

Perform the operation: B <- op(A) * D, where A is a general matrix, and D a diagonal matrix.

Parameters

[in]	trans	PastixNoTrans: No transpose, op( A ) = A; PastixTrans: Transpose, op( A ) = A; PastixTrans: Conjugate Transpose, op( A ) = (A).
[in]	M	Number of rows of the matrix B. Number of rows of the matrix A.
[in]	N	Number of columns of the matrix B. Number of columns of the matrix A.
[in]	A	Matrix of size lda-by-N.
[in]	lda	Leading dimension of the array A. lda >= max(1,M).
[in]	D	Diagonal matrix of size ldd-by-N.
[in]	ldd	Leading dimension of the array D. ldd >= 1.
[in,out]	B	Matrix of size LDB-by-N.
[in]	ldb	Leading dimension of the array B. ldb >= max(1,M)

Return values

PASTIX_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
1,not	yet implemented

Definition at line 74 of file core_dscalo.c.

References pastix_int_t, PASTIX_SUCCESS, PastixNoTrans, and PastixTrans.

Referenced by cpublok_dscalo(), and cpucblk_dscalo().

◆ core_dlrorthu_fullqr()

pastix_fixdbl_t core_dlrorthu_fullqr	(	pastix_int_t	M,
		pastix_int_t	N,
		pastix_int_t	rank,
		double *	U,
		pastix_int_t	ldu,
		double *	V,
		pastix_int_t	ldv
	)

Try to orthognalize the u part of the low-rank form, and update the v part accordingly using full QR.

This function considers a low-rank matrix resulting from the addition of two matrices B += A, with A of smaller or equal size to B. The product has the form: U * V^t

The U part of the low-rank form must be orthognalized to get the smaller possible rank during the rradd operation. This function perfoms this by applying a full QR factorization on the U part.

U = Q R, then U' = Q, and V' = R * V

Parameters

[in]	M	The number of rows of the u1u2 matrix.
[in]	N	The number of columns of the v1v2 matrix.
[in]	rank	The number of columns of the U matrix, and the number of rows of the V part in the v1v2 matrix.
[in,out]	U	The U matrix of size ldu -by- rank. On exit, Q from U = Q R.
[in]	ldu	The leading dimension of the U matrix. ldu >= max(1, M)
[in,out]	V	The V matrix of size ldv -by- N. On exit, R * V, with R from U = Q R.
[in]	ldv	The leading dimension of the V matrix. ldv >= max(1, rank)

Returns: The number of flops required to perform the operation.

Definition at line 83 of file core_dlrothu.c.

References pastix_int_t, and PastixLeft.

◆ core_dlrorthu_partialqr()

pastix_fixdbl_t core_dlrorthu_partialqr	(	pastix_int_t	M,
		pastix_int_t	N,
		pastix_int_t	r1,
		pastix_int_t *	r2ptr,
		pastix_int_t	offx,
		pastix_int_t	offy,
		double *	U,
		pastix_int_t	ldu,
		double *	V,
		pastix_int_t	ldv
	)

Try to orthognalize the U part of the low-rank form, and update the V part accordingly using partial QR.

This function considers a low-rank matrix resulting from the addition of two matrices B += A, with A of smaller or equal size to B. The product has the form: U * V^t

The U part of the low-rank form must be orthognalized to get the smaller possible rank during the rradd operation. This function perfoms this by applying a full QR factorization on the U part.

In that case, it takes benefit from the fact that U = [ u1, u2 ], and V = [ v1, v2 ] with u2 and v2 wich are matrices of respective size M2-by-r2, and r2-by-N2, offset by offx and offy

The steps are:

Scaling of u2 with removal of the null columns
Orthogonalization of u2 relatively to u1
Application of the update to v2
orthogonalization through QR of u2
Update of V

Parameters

[in]	M	The number of rows of the u1u2 matrix.
[in]	N	The number of columns of the v1v2 matrix.
[in]	r1	The number of columns of the U matrix in the u1 part, and the number of rows of the V part in the v1 part.
[in,out]	r2ptr	The number of columns of the U matrix in the u2 part, and the number of rows of the V part in the v2 part. On exit, this rank is reduced y the number of null columns found in U.
[in]	offx	The row offset of the matrix u2 in U.
[in]	offy	The column offset of the matrix v2 in V.
[in,out]	U	The U matrix of size ldu -by- rank. On exit, the orthogonalized U.
[in]	ldu	The leading dimension of the U matrix. ldu >= max(1, M)
[in,out]	V	The V matrix of size ldv -by- N. On exit, the updated V matrix.
[in]	ldv	The leading dimension of the V matrix. ldv >= max(1, rank)

Returns: The number of flops required to perform the operation.

Definition at line 197 of file core_dlrothu.c.

References core_dlrdbg_check_orthogonality_AB(), pastix_int_t, and PastixLeft.

◆ core_dlrorthu_cgs()

pastix_fixdbl_t core_dlrorthu_cgs	(	pastix_int_t	M1,
		pastix_int_t	N1,
		pastix_int_t	M2,
		pastix_int_t	N2,
		pastix_int_t	r1,
		pastix_int_t *	r2ptr,
		pastix_int_t	offx,
		pastix_int_t	offy,
		double *	U,
		pastix_int_t	ldu,
		double *	V,
		pastix_int_t	ldv
	)

Try to orthognalize the U part of the low-rank form, and update the V part accordingly using CGS.

This function considers a low-rank matrix resulting from the addition of two matrices B += A, with A of smaller or equal size to B. The product has the form: U * V^t

The U part of the low-rank form must be orthognalized to get the smaller possible rank during the rradd operation. This function perfoms this by applying a full QR factorization on the U part.

In that case, it takes benefit from the fact that U = [ u1, u2 ], and V = [ v1, v2 ] with u2 and v2 wich are matrices of respective size M2-by-r2, and r2-by-N2, offset by offx and offy

The steps are:

for each column of u2
- Scaling of u2 with removal of the null columns
- Orthogonalization of u2 relatively to u1
- Remove the column if null

Parameters

[in]	M1	The number of rows of the U matrix.
[in]	N1	The number of columns of the U matrix.
[in]	M2	The number of rows of the u2 part of the U matrix.
[in]	N2	The number of columns of the v2 part of the V matrix.
[in]	r1	The number of columns of the U matrix in the u1 part, and the number of rows of the V part in the v1 part.
[in,out]	r2ptr	The number of columns of the U matrix in the u2 part, and the number of rows of the V part in the v2 part. On exit, this rank is reduced y the number of null columns found in U.
[in]	offx	The row offset of the matrix u2 in U.
[in]	offy	The column offset of the matrix v2 in V.
[in,out]	U	The U matrix of size ldu -by- rank. On exit, the orthogonalized U.
[in]	ldu	The leading dimension of the U matrix. ldu >= max(1, M)
[in,out]	V	The V matrix of size ldv -by- N. On exit, the updated V matrix.
[in]	ldv	The leading dimension of the V matrix. ldv >= max(1, rank)

Returns: The number of flops required to perform the operation.

Definition at line 430 of file core_dlrothu.c.

References core_dlrdbg_check_orthogonality_AB(), and pastix_int_t.

◆ core_dpotrfsp()

void core_dpotrfsp	(	pastix_int_t	n,
		double *	A,
		pastix_int_t	lda,
		pastix_int_t *	nbpivots,
		double	criterion
	)

Compute the block static pivoting Cholesky factorization of the matrix n-by-n A = L * L^t .

Parameters

[in]	n	The number of rows and columns of the matrix A.
[in,out]	A	The matrix A to factorize with Cholesky factorization. The matrix is of size lda -by- n.
[in]	lda	The leading dimension of the matrix A.
[in,out]	nbpivots	Pointer to the number of piovting operations made during factorization. It is updated during this call
[in]	criterion	Threshold use for static pivoting. If diagonal value is under this threshold, its value is replaced by the threshold and the nu,ber of pivots is incremented.

Warning: This routine will fail if it discovers a null or negative value on the diagonal during factorization.

Definition at line 144 of file core_dpotrfsp.c.

References core_dpotf2sp(), and pastix_int_t.

◆ core_dgetrfsp()

void core_dgetrfsp	(	pastix_int_t	n,
		double *	A,
		pastix_int_t	lda,
		pastix_int_t *	nbpivots,
		double	criterion
	)

Compute the block static pivoting LU factorization of the matrix m-by-n A = L * U.

Parameters

[in]	n	The number of rows and columns of the matrix A.
[in,out]	A	The matrix A to factorize with LU factorization. The matrix is of size lda -by- n.
[in]	lda	The leading dimension of the matrix A.
[in,out]	nbpivots	Pointer to the number of piovting operations made during factorization. It is updated during this call
[in]	criterion	Threshold use for static pivoting. If diagonal value is under this threshold, its value is replaced by the threshold and the number of pivots is incremented.

Definition at line 138 of file core_dgetrfsp.c.

References core_dgetf2sp(), and pastix_int_t.

◆ core_dsytrfsp()

void core_dsytrfsp	(	pastix_int_t	n,
		double *	A,
		pastix_int_t	lda,
		pastix_int_t *	nbpivots,
		double	criterion
	)

Compute the block static pivoting factorization of the symmetric matrix n-by-n A such that A = L * D * L^t.

Parameters

[in]	n	The number of rows and columns of the matrix A.
[in,out]	A	The matrix A to factorize with LDL^t factorization. The matrix is of size lda -by- n.
[in]	lda	The leading dimension of the matrix A.
[in,out]	nbpivots	Pointer to the number of piovting operations made during factorization. It is updated during this call
[in]	criterion	Threshold use for static pivoting. If diagonal value is under this threshold, its value is replaced by the threshold and the nu,ber of pivots is incremented.

Definition at line 143 of file core_dsytrfsp.c.

References core_dsytf2sp(), and pastix_int_t.

◆ core_zplrnt()

void core_zplrnt	(	int	m,
		int	n,
		pastix_complex64_t *	A,
		int	lda,
		int	gM,
		int	m0,
		int	n0,
		unsigned long long int	seed
	)

Generate a random tile.

Parameters

[in]	m	The number of rows of the tile A. m >= 0.
[in]	n	The number of columns of the tile A. n >= 0.
[in,out]	A	On entry, the m-by-n tile to be initialized. On exit, the tile initialized in the mtxtype format.
[in]	lda	The leading dimension of the tile A. lda >= max(1,m).
[in]	gM	The global number of rows of the full matrix, A is belonging to. gM >= (m0+M).
[in]	m0	The index of the first row of tile A in the full matrix. m0 >= 0.
[in]	n0	The index of the first column of tile A in the full matrix. n0 >= 0.
[in]	seed	The seed used for random generation. Must be the same for all tiles initialized with this routine.

Definition at line 91 of file core_zplrnt.c.

◆ core_zgetmo()

void core_zgetmo	(	int	m,
		int	n,
		const pastix_complex64_t *	A,
		int	lda,
		pastix_complex64_t *	B,
		int	ldb
	)

Transposes a m-by-n matrix out of place using an extra workspace of size m-by-n.

Parameters

[in]	m	Number of rows of A.
[in]	n	Number of columns of A.
[in]	A	Matrix to be transposed.
[in]	lda	Leading dimension of matrix A.
[in,out]	B	On exit B = trans(A).
[in]	ldb	Leading dimension of matrix B.

Definition at line 49 of file core_zgetmo.c.

Referenced by core_zlr2ge().

◆ core_zgeadd()

int core_zgeadd	(	pastix_trans_t	trans,
		pastix_int_t	M,
		pastix_int_t	N,
		pastix_complex64_t	alpha,
		const pastix_complex64_t *	A,
		pastix_int_t	LDA,
		pastix_complex64_t	beta,
		pastix_complex64_t *	B,
		pastix_int_t	LDB
	)

Add two matrices together.

Perform the operation: B <- alpha * op(A) + B

Parameters

[in]	trans	PastixNoTrans: No transpose, op( A ) = A; PastixTrans: Transpose, op( A ) = A'; PastixConjTrans: Conjugate Transpose, op( A ) = conj(A').
[in]	M	Number of rows of the matrix B. Number of rows of the matrix A, if trans == PastixNoTrans, number of columns of A otherwise.
[in]	N	Number of columns of the matrix B. Number of columns of the matrix A, if trans == PastixNoTrans, number of rows of A otherwise.
[in]	alpha	Scalar factor of A.
[in]	A	Matrix of size LDA-by-N, if trans == PastixNoTrans, LDA-by-M, otherwise.
[in]	LDA	Leading dimension of the array A. LDA >= max(1,K). K = M if trans == PastixNoTrans, K = N otherwise.
[in]	beta	Scalar factor of B.
[in,out]	B	Matrix of size LDB-by-N.
[in]	LDB	Leading dimension of the array B. LDB >= max(1,M)

Return values

PASTIX_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
1,not	yet implemented

Definition at line 78 of file core_zgeadd.c.

References PastixConjTrans, and PastixNoTrans.

Referenced by core_zlrconcatenate_v(), core_zlrcpy(), core_ztradd(), cpublok_zadd_frfr(), cpucblk_zadd_frfr(), cpucblk_zdiff(), cpucblk_zgeaddsp1d(), cpucblk_zgetrfsp1d_getrf(), cpucblk_zgetschur_fr(), and cpucblk_zgetschur_lr().

◆ core_zgemdm()

int core_zgemdm	(	pastix_trans_t	transA,
		pastix_trans_t	transB,
		int	M,
		int	N,
		int	K,
		pastix_complex64_t	alpha,
		const pastix_complex64_t *	A,
		int	LDA,
		const pastix_complex64_t *	B,
		int	LDB,
		pastix_complex64_t	beta,
		pastix_complex64_t *	C,
		int	LDC,
		const pastix_complex64_t *	D,
		int	incD,
		pastix_complex64_t *	WORK,
		int	LWORK
	)

Perform one of the following matrix-matrix operations.

  C := alpha*op( A )*D*op( B ) + beta*C,

where op( X ) is one of

  op( X ) = X   or   op( X ) = X',

alpha and beta are scalars, and A, B, C and D are matrices, with

  op( A ) an m by k matrix,
  op( B ) an k by n matrix,
  C an m by n matrix and
  D an k by k matrix.

Parameters

[in]	transA	PastixNoTrans : No transpose, op( A ) = A; PastixConjTrans : Transpose, op( A ) = A'.
[in]	transB	PastixNoTrans : No transpose, op( B ) = B; PastixConjTrans : Transpose, op( B ) = B'.
[in]	M	The number of rows of the matrix op( A ) and of the matrix C. M must be at least zero.
[in]	N	The number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero.
[in]	K	The number of columns of the matrix op( A ), the number of rows of the matrix op( B ), and the number of rows and columns of matrix D. K must be at least zero.
[in]	alpha	On entry, alpha specifies the scalar alpha.
[in]	A	pastix_complex64_t array of DIMENSION ( LDA, ka ), where ka is k when transA = PastixTrans, and is m otherwise. Before entry with transA = PastixTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A.
[in]	LDA	On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. When transA = PastixTrans then LDA must be at least max( 1, m ), otherwise LDA must be at least max( 1, k ).
[in]	B	pastix_complex64_t array of DIMENSION ( LDB, kb ), where kb is n when transB = PastixTrans, and is k otherwise. Before entry with transB = PastixTrans, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B.
[in]	LDB	On entry, LDB specifies the first dimension of B as declared in the calling (sub) program. When transB = PastixTrans then LDB must be at least max( 1, k ), otherwise LDB must be at least max( 1, n ).
[in]	beta	On entry, beta specifies the scalar beta. When beta is supplied as zero then C need not be set on input.
[in]	C	pastix_complex64_t array of DIMENSION ( LDC, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the m by n matrix ( alphaop( A )Dop( B ) + betaC ).
[in]	LDC	On entry, LDC specifies the first dimension of C as declared in the calling (sub) program. LDC must be at least max( 1, m ).
[in]	D	pastix_complex64_t array of DIMENSION ( LDD, k ). Before entry, the leading k by k part of the array D must contain the matrix D.
[in]	incD	On entry, LDD specifies the first dimension of D as declared in the calling (sub) program. LDD must be at least max( 1, k ).
[in]	WORK	pastix_complex64_t array, dimension (MAX(1,LWORK))
[in]	LWORK	The length of WORK. On entry, if transA = PastixTrans and transB = PastixTrans then LWORK >= max(1, KN). Otherwise LWORK >= max(1, MK).

Return values

PASTIX_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

Definition at line 139 of file core_zgemdm.c.

References PASTIX_SUCCESS, PastixConjTrans, and PastixNoTrans.

Referenced by core_zhetrfsp1d_gemm(), and core_zsytrfsp1d_gemm().

◆ core_zpqrcp()

int core_zpqrcp	(	double	tol,
		pastix_int_t	maxrank,
		int	full_update,
		pastix_int_t	nb,
		pastix_int_t	m,
		pastix_int_t	n,
		pastix_complex64_t *	A,
		pastix_int_t	lda,
		pastix_int_t *	jpvt,
		pastix_complex64_t *	tau,
		pastix_complex64_t *	work,
		pastix_int_t	lwork,
		double *	rwork
	)

Compute a rank-reavealing QR factorization.

This routine is originated from the LAPACK kernels zgeqp3/zlaqps and was modified by A. Buttari for MUMPS-BLR. In this version the stopping criterion is based on the frobenius norm of the residual, and not on the estimate of the two-norm making it more restrictive. Thus, the returned ranks are larger, but this gives a better accuracy.

Parameters

[in]	tol	The relative tolerance criterion. Computations are stopped when the frobenius norm of the residual matrix is lower than tol. If tol < 0, then maxrank reflectors are computed.
[in]	maxrank	Maximum number of reflectors computed. Computations are stopped when the rank exceeds maxrank. If maxrank < 0, all reflectors are computed or up to the tolerance criterion.
[in]	full_update	If true, all the trailing submatrix is updated, even if maxrank is reached. If false, the trailing submatrix is not updated as soon as it is not worth it. (Unused for now but kept to match API of RQRCP and TQRCP)
[in]	nb	Tuning parameter for the GEMM blocking size. if nb < 0, nb is set to 32.
[in]	m	Number of rows of the matrix A.
[in]	n	Number of columns of the matrix A.
[in]	A	The matrix of dimension lda-by-n that needs to be compressed.
[in]	lda	The leading dimension of the matrix A. lda >= max(1, m).
[out]	jpvt	The array that describes the permutation of A.
[out]	tau	Contains scalar factors of the elementary reflectors for the matrix Q.
[in]	work	Workspace array of size lwork.
[in]	lwork	The dimension of the work area. lwork >= (nb * n + max(n, m) ) If lwork == -1, the functions returns immediately and work[0] contains the optimal size of work.
[in]	rwork	Workspace array used to store partial and exact column norms (2-by-n)

Returns: This routine will return the rank of A (>=0) or -1 if it didn't manage to compress within the margins of tolerance and maximum rank.

If maximum rank is 0, then either the matrix norm is below the tolerance, and we can return a null rank matrix, or it is not and we need to return a full rank matrix.

Definition at line 105 of file core_zpqrcp.c.

References pastix_int_t.

Referenced by core_zge2lr_pqrcp(), and core_zrradd_pqrcp().

◆ core_zrqrcp()

int core_zrqrcp	(	double	tol,
		pastix_int_t	maxrank,
		int	refine,
		pastix_int_t	nb,
		pastix_int_t	m,
		pastix_int_t	n,
		pastix_complex64_t *	A,
		pastix_int_t	lda,
		pastix_int_t *	jpvt,
		pastix_complex64_t *	tau,
		pastix_complex64_t *	work,
		pastix_int_t	lwork,
		double *	rwork
	)

Compute a randomized QR factorization.

This kernel implements the algorithm described in: Fast Parallel Randomized QR with Column Pivoting Algorithms for Reliable Low-rank Matrix Approximations. Jianwei Xiao, Ming Gu, and Julien Langou

More information can also be found in Finding Structure with Randomness: Probabilistic Algorithms for Constructing Approximate Matrix Decompositions. N. Halko, P. G. Martinsson, and J. A. Tropp

Based on this paper, we set the p parameter to 5, as it seems sufficient, and because we iterate multiple times to get the final rank.

Parameters

[in]	tol	The relative tolerance criterion. Computations are stopped when the frobenius norm of the residual matrix is lower than tol. If tol < 0, then maxrank reflectors are computed.
[in]	maxrank	Maximum number of reflectors computed. Computations are stopped when the rank exceeds maxrank. If maxrank < 0, all reflectors are computed or up to the tolerance criterion.
[in]	refine	Enable/disable the extra refinement step that performs an additional PQRCP on the trailing submatrix.
[in]	nb	Tuning parameter for the GEMM blocking size. if nb < 0, nb is set to 32.
[in]	m	Number of rows of the matrix A.
[in]	n	Number of columns of the matrix A.
[in]	A	The matrix of dimension lda-by-n that needs to be compressed.
[in]	lda	The leading dimension of the matrix A. lda >= max(1, m).
[out]	jpvt	The array that describes the permutation of A.
[out]	tau	Contains scalar factors of the elementary reflectors for the matrix Q.
[in]	work	Workspace array of size lwork.
[in]	lwork	The dimension of the work area. lwork >= (nb * n + max(n, m) ) If lwork == -1, the functions returns immediately and work[0] contains the optimal size of work.
[in]	rwork	Workspace array used to store partial and exact column norms (2-by-n)

Returns: This routine will return the rank of A (>=0) or -1 if it didn't manage to compress within the margins of tolerance and maximum rank.

If maximum rank is 0, then either the matrix norm is below the tolerance, and we can return a null rank matrix, or it is not and we need to return a full rank matrix.

Definition at line 114 of file core_zrqrcp.c.

References pastix_int_t.

Referenced by core_zge2lr_rqrcp(), and core_zrradd_rqrcp().

◆ core_zrqrrt()

int core_zrqrrt	(	double	tol,
		pastix_int_t	maxrank,
		pastix_int_t	nb,
		pastix_int_t	m,
		pastix_int_t	n,
		pastix_complex64_t *	A,
		pastix_int_t	lda,
		pastix_complex64_t *	tau,
		pastix_complex64_t *	B,
		pastix_int_t	ldb,
		pastix_complex64_t *	tau_b,
		pastix_complex64_t *	work,
		pastix_int_t	lwork,
		double	normA
	)

Compute a randomized QR factorization with rotation technique.

This kernel implements the second method (he did not write a pseudocode for the second method) described in :

Blocked rank-revealing QR factorizations: How randomized sampling can be used to avoid single-vector pivoting. P. G. Martinsson

Note that we only use the trailing matrix for resampling. We don't use power of it for getting better results, since it would be too costly.

Parameters

[in]	tol	The relative tolerance criterion. Computations are stopped when the frobenius norm of the residual matrix is lower than tol. If tol < 0, then maxrank reflectors are computed.
[in]	maxrank	Maximum number of reflectors computed. Computations are stopped when the rank exceeds maxrank. If maxrank < 0, all reflectors are computed or up to the tolerance criterion.
[in]	nb	Tuning parameter for the GEMM blocking size. if nb < 0, nb is set to 32.
[in]	m	Number of rows of the matrix A.
[in]	n	Number of columns of the matrix A.
[in,out]	A	The matrix of dimension lda-by-n that needs to be compressed. On output, the A matrix is computed up to the founded rank k (k first columns and rows). Everything else, should be dropped.
[in]	lda	The leading dimension of the matrix A. lda >= max(1, m).
[out]	tau	Contains scalar factors of the elementary reflectors for the matrix A.
[out]	B	The matrix of dimension ldb-by-maxrank that will holds the partial projection of the matrix A. On output, each block of 32 columns can be used to computed the reverse rotation of the R part of A.
[in]	ldb	The leading dimension of the matrix B. ldb >= max(1, n).
[out]	tau_b	Contains scalar factors of the elementary reflectors for the matrix B.
[in]	work	Workspace array of size lwork.
[in]	lwork	The dimension of the work area. lwork >= (nb * max(n,n)) If lwork == -1, the function returns immediately and work[0] contains the optimal size of work.
[in]	normA	The norm of the input matrixA. If negative, the norm is computed by the kernel.

Returns: This routine will return the rank of A (>=0) or -1 if it didn't manage to compress within the margins of tolerance and maximum rank.

If maximum rank is 0, then either the matrix norm is below the tolerance, and we can return a null rank matrix, or it is not and we need to return a full rank matrix.

Definition at line 126 of file core_zrqrrt.c.

References pastix_int_t.

Referenced by core_zge2lr_rqrrt().

◆ core_ztqrcp()

int core_ztqrcp	(	double	tol,
		pastix_int_t	maxrank,
		int	refine,
		pastix_int_t	nb,
		pastix_int_t	m,
		pastix_int_t	n,
		pastix_complex64_t *	A,
		pastix_int_t	lda,
		pastix_int_t *	jpvt,
		pastix_complex64_t *	tau,
		pastix_complex64_t *	work,
		pastix_int_t	lwork,
		double *	rwork
	)

Compute a randomized QR factorization with truncated updates.

This routine is derivated from "Randomized QR with Column Pivoting", J. A. Duersch and M. Gu, SIAM Journal on Scientific Computing, vol. 39, no. 4, pp. C263-C291, 2017.

Parameters

[in]	tol	The relative tolerance criterion. Computations are stopped when the frobenius norm of the residual matrix is lower than tol. If tol < 0, then maxrank reflectors are computed.
[in]	maxrank	Maximum number of reflectors computed. Computations are stopped when the rank exceeds maxrank. If maxrank < 0, all reflectors are computed or up to the tolerance criterion.
[in]	refine	TODO
[in]	nb	Tuning parameter for the GEMM blocking size. if nb < 0, nb is set to 32.
[in]	m	Number of rows of the matrix A.
[in]	n	Number of columns of the matrix A.
[in]	A	The matrix of dimension lda-by-n that needs to be compressed.
[in]	lda	The leading dimension of the matrix A. lda >= max(1, m).
[out]	jpvt	The array that describes the permutation of A.
[out]	tau	Contains scalar factors of the elementary reflectors for the matrix Q.
[in]	work	Workspace array of size lwork.
[in]	lwork	The dimension of the work area. lwork >= (nb * n + max(n, m) ) If lwork == -1, the functions returns immediately and work[0] contains the optimal size of work.
[in]	rwork	Workspace array used to store partial and exact column norms (2-by-n)

Returns: This routine will return the rank of A (>=0) or -1 if it didn't manage to compress within the margins of tolerance and maximum rank.

If maximum rank is 0, then either the matrix norm is below the tolerance, and we can return a null rank matrix, or it is not and we need to return a full rank matrix.

Definition at line 99 of file core_ztqrcp.c.

References pastix_int_t.

Referenced by core_zge2lr_tqrcp(), and core_zrradd_tqrcp().

◆ core_ztradd()

int core_ztradd	(	pastix_uplo_t	uplo,
		pastix_trans_t	trans,
		pastix_int_t	M,
		pastix_int_t	N,
		pastix_complex64_t	alpha,
		const pastix_complex64_t *	A,
		pastix_int_t	LDA,
		pastix_complex64_t	beta,
		pastix_complex64_t *	B,
		pastix_int_t	LDB
	)

Add two triangular matrices together as in PBLAS pztradd.

B <- alpha * op(A) + beta * B,

where op(X) = X, X', or conj(X')

Parameters

[in]	uplo	Specifies the shape of A and B matrices: PastixUpperLower: A and B are general matrices. PastixUpper: op(A) and B are upper trapezoidal matrices. PastixLower: op(A) and B are lower trapezoidal matrices.
[in]	trans	Specifies whether the matrix A is non-transposed, transposed, or conjugate transposed PastixNoTrans: op(A) = A PastixTrans: op(A) = A' PastixConjTrans: op(A) = conj(A')
[in]	M	Number of rows of the matrices op(A) and B.
[in]	N	Number of columns of the matrices op(A) and B.
[in]	alpha	Scalar factor of A.
[in]	A	Matrix of size LDA-by-N, if trans = PastixNoTrans, LDA-by-M otherwise.
[in]	LDA	Leading dimension of the array A. LDA >= max(1,M) if trans = PastixNoTrans, LDA >= max(1,N) otherwise.
[in]	beta	Scalar factor of B.
[in,out]	B	Matrix of size LDB-by-N. On exit, B = alpha * op(A) + beta * B
[in]	LDB	Leading dimension of the array B. LDB >= max(1,M)

Return values

PASTIX_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

PastixLower

PastixUpper

Definition at line 79 of file core_ztradd.c.

References core_zgeadd(), PASTIX_SUCCESS, PastixConjTrans, PastixLower, PastixNoTrans, PastixUpper, and PastixUpperLower.

◆ core_zscalo()

int core_zscalo	(	pastix_trans_t	trans,
		pastix_int_t	M,
		pastix_int_t	N,
		const pastix_complex64_t *	A,
		pastix_int_t	lda,
		const pastix_complex64_t *	D,
		pastix_int_t	ldd,
		pastix_complex64_t *	B,
		pastix_int_t	ldb
	)

Scale a matrix by a diagonal out of place.

Perform the operation: B <- op(A) * D, where A is a general matrix, and D a diagonal matrix.

Parameters

[in]	trans	PastixNoTrans: No transpose, op( A ) = A; PastixTrans: Transpose, op( A ) = A; PastixConjTrans: Conjugate Transpose, op( A ) = conj(A).
[in]	M	Number of rows of the matrix B. Number of rows of the matrix A.
[in]	N	Number of columns of the matrix B. Number of columns of the matrix A.
[in]	A	Matrix of size lda-by-N.
[in]	lda	Leading dimension of the array A. lda >= max(1,M).
[in]	D	Diagonal matrix of size ldd-by-N.
[in]	ldd	Leading dimension of the array D. ldd >= 1.
[in,out]	B	Matrix of size LDB-by-N.
[in]	ldb	Leading dimension of the array B. ldb >= max(1,M)

Return values

PASTIX_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
1,not	yet implemented

Definition at line 74 of file core_zscalo.c.

References pastix_int_t, PastixConjTrans, and PastixNoTrans.

Referenced by cpublok_zscalo(), and cpucblk_zscalo().

◆ core_zlrorthu_fullqr()

pastix_fixdbl_t core_zlrorthu_fullqr	(	pastix_int_t	M,
		pastix_int_t	N,
		pastix_int_t	rank,
		pastix_complex64_t *	U,
		pastix_int_t	ldu,
		pastix_complex64_t *	V,
		pastix_int_t	ldv
	)

Try to orthognalize the u part of the low-rank form, and update the v part accordingly using full QR.

This function considers a low-rank matrix resulting from the addition of two matrices B += A, with A of smaller or equal size to B. The product has the form: U * V^t

The U part of the low-rank form must be orthognalized to get the smaller possible rank during the rradd operation. This function perfoms this by applying a full QR factorization on the U part.

U = Q R, then U' = Q, and V' = R * V

Parameters

[in]	M	The number of rows of the u1u2 matrix.
[in]	N	The number of columns of the v1v2 matrix.
[in]	rank	The number of columns of the U matrix, and the number of rows of the V part in the v1v2 matrix.
[in,out]	U	The U matrix of size ldu -by- rank. On exit, Q from U = Q R.
[in]	ldu	The leading dimension of the U matrix. ldu >= max(1, M)
[in,out]	V	The V matrix of size ldv -by- N. On exit, R * V, with R from U = Q R.
[in]	ldv	The leading dimension of the V matrix. ldv >= max(1, rank)

Returns: The number of flops required to perform the operation.

Definition at line 83 of file core_zlrothu.c.

References pastix_int_t, and PastixLeft.

◆ core_zlrorthu_partialqr()

pastix_fixdbl_t core_zlrorthu_partialqr	(	pastix_int_t	M,
		pastix_int_t	N,
		pastix_int_t	r1,
		pastix_int_t *	r2ptr,
		pastix_int_t	offx,
		pastix_int_t	offy,
		pastix_complex64_t *	U,
		pastix_int_t	ldu,
		pastix_complex64_t *	V,
		pastix_int_t	ldv
	)

Try to orthognalize the U part of the low-rank form, and update the V part accordingly using partial QR.

This function considers a low-rank matrix resulting from the addition of two matrices B += A, with A of smaller or equal size to B. The product has the form: U * V^t

The U part of the low-rank form must be orthognalized to get the smaller possible rank during the rradd operation. This function perfoms this by applying a full QR factorization on the U part.

In that case, it takes benefit from the fact that U = [ u1, u2 ], and V = [ v1, v2 ] with u2 and v2 wich are matrices of respective size M2-by-r2, and r2-by-N2, offset by offx and offy

The steps are:

Scaling of u2 with removal of the null columns
Orthogonalization of u2 relatively to u1
Application of the update to v2
orthogonalization through QR of u2
Update of V

Parameters

[in]	M	The number of rows of the u1u2 matrix.
[in]	N	The number of columns of the v1v2 matrix.
[in]	r1	The number of columns of the U matrix in the u1 part, and the number of rows of the V part in the v1 part.
[in,out]	r2ptr	The number of columns of the U matrix in the u2 part, and the number of rows of the V part in the v2 part. On exit, this rank is reduced y the number of null columns found in U.
[in]	offx	The row offset of the matrix u2 in U.
[in]	offy	The column offset of the matrix v2 in V.
[in,out]	U	The U matrix of size ldu -by- rank. On exit, the orthogonalized U.
[in]	ldu	The leading dimension of the U matrix. ldu >= max(1, M)
[in,out]	V	The V matrix of size ldv -by- N. On exit, the updated V matrix.
[in]	ldv	The leading dimension of the V matrix. ldv >= max(1, rank)

Returns: The number of flops required to perform the operation.

Definition at line 197 of file core_zlrothu.c.

References core_zlrdbg_check_orthogonality_AB(), pastix_int_t, and PastixLeft.

◆ core_zlrorthu_cgs()

pastix_fixdbl_t core_zlrorthu_cgs	(	pastix_int_t	M1,
		pastix_int_t	N1,
		pastix_int_t	M2,
		pastix_int_t	N2,
		pastix_int_t	r1,
		pastix_int_t *	r2ptr,
		pastix_int_t	offx,
		pastix_int_t	offy,
		pastix_complex64_t *	U,
		pastix_int_t	ldu,
		pastix_complex64_t *	V,
		pastix_int_t	ldv
	)

Try to orthognalize the U part of the low-rank form, and update the V part accordingly using CGS.

This function considers a low-rank matrix resulting from the addition of two matrices B += A, with A of smaller or equal size to B. The product has the form: U * V^t

The U part of the low-rank form must be orthognalized to get the smaller possible rank during the rradd operation. This function perfoms this by applying a full QR factorization on the U part.

In that case, it takes benefit from the fact that U = [ u1, u2 ], and V = [ v1, v2 ] with u2 and v2 wich are matrices of respective size M2-by-r2, and r2-by-N2, offset by offx and offy

The steps are:

for each column of u2
- Scaling of u2 with removal of the null columns
- Orthogonalization of u2 relatively to u1
- Remove the column if null

Parameters

[in]	M1	The number of rows of the U matrix.
[in]	N1	The number of columns of the U matrix.
[in]	M2	The number of rows of the u2 part of the U matrix.
[in]	N2	The number of columns of the v2 part of the V matrix.
[in]	r1	The number of columns of the U matrix in the u1 part, and the number of rows of the V part in the v1 part.
[in,out]	r2ptr	The number of columns of the U matrix in the u2 part, and the number of rows of the V part in the v2 part. On exit, this rank is reduced y the number of null columns found in U.
[in]	offx	The row offset of the matrix u2 in U.
[in]	offy	The column offset of the matrix v2 in V.
[in,out]	U	The U matrix of size ldu -by- rank. On exit, the orthogonalized U.
[in]	ldu	The leading dimension of the U matrix. ldu >= max(1, M)
[in,out]	V	The V matrix of size ldv -by- N. On exit, the updated V matrix.
[in]	ldv	The leading dimension of the V matrix. ldv >= max(1, rank)

Returns: The number of flops required to perform the operation.

Definition at line 430 of file core_zlrothu.c.

References core_zlrdbg_check_orthogonality_AB(), and pastix_int_t.

◆ core_zpotrfsp()

void core_zpotrfsp	(	pastix_int_t	n,
		pastix_complex64_t *	A,
		pastix_int_t	lda,
		pastix_int_t *	nbpivots,
		double	criterion
	)

Compute the block static pivoting Cholesky factorization of the matrix n-by-n A = L * L^t .

Parameters

[in]	n	The number of rows and columns of the matrix A.
[in,out]	A	The matrix A to factorize with Cholesky factorization. The matrix is of size lda -by- n.
[in]	lda	The leading dimension of the matrix A.
[in,out]	nbpivots	Pointer to the number of piovting operations made during factorization. It is updated during this call
[in]	criterion	Threshold use for static pivoting. If diagonal value is under this threshold, its value is replaced by the threshold and the nu,ber of pivots is incremented.

Warning: This routine will fail if it discovers a null or negative value on the diagonal during factorization.

Definition at line 144 of file core_zpotrfsp.c.

References core_zpotf2sp(), and pastix_int_t.

◆ core_zpxtrfsp()

void core_zpxtrfsp	(	pastix_int_t	n,
		pastix_complex64_t *	A,
		pastix_int_t	lda,
		pastix_int_t *	nbpivots,
		double	criterion
	)

Compute the block static pivoting LL^t factorization of the matrix n-by-n A = L * L^t .

Parameters

[in]	n	The number of rows and columns of the matrix A.
[in,out]	A	The matrix A to factorize with LL^t factorization. The matrix is of size lda -by- n.
[in]	lda	The leading dimension of the matrix A.
[in,out]	nbpivots	Pointer to the number of piovting operations made during factorization. It is updated during this call
[in]	criterion	Threshold use for static pivoting. If diagonal value is under this threshold, its value is replaced by the threshold and the nu,ber of pivots is incremented.

Warning: This routine will fail if it discovers a null or negative value on the diagonal during factorization.

Definition at line 136 of file core_zpxtrfsp.c.

References core_zpxtf2sp(), and pastix_int_t.

◆ core_zgetrfsp()

void core_zgetrfsp	(	pastix_int_t	n,
		pastix_complex64_t *	A,
		pastix_int_t	lda,
		pastix_int_t *	nbpivots,
		double	criterion
	)

Compute the block static pivoting LU factorization of the matrix m-by-n A = L * U.

Parameters

[in]	n	The number of rows and columns of the matrix A.
[in,out]	A	The matrix A to factorize with LU factorization. The matrix is of size lda -by- n.
[in]	lda	The leading dimension of the matrix A.
[in,out]	nbpivots	Pointer to the number of piovting operations made during factorization. It is updated during this call
[in]	criterion	Threshold use for static pivoting. If diagonal value is under this threshold, its value is replaced by the threshold and the number of pivots is incremented.

Definition at line 138 of file core_zgetrfsp.c.

References core_zgetf2sp(), and pastix_int_t.

◆ core_zsytrfsp()

void core_zsytrfsp	(	pastix_int_t	n,
		pastix_complex64_t *	A,
		pastix_int_t	lda,
		pastix_int_t *	nbpivots,
		double	criterion
	)

Compute the block static pivoting factorization of the symmetric matrix n-by-n A such that A = L * D * L^t.

Parameters

[in]	n	The number of rows and columns of the matrix A.
[in,out]	A	The matrix A to factorize with LDL^t factorization. The matrix is of size lda -by- n.
[in]	lda	The leading dimension of the matrix A.
[in,out]	nbpivots	Pointer to the number of piovting operations made during factorization. It is updated during this call
[in]	criterion	Threshold use for static pivoting. If diagonal value is under this threshold, its value is replaced by the threshold and the nu,ber of pivots is incremented.

Definition at line 143 of file core_zsytrfsp.c.

References core_zsytf2sp(), and pastix_int_t.

PastixComplex32 BLAS kernels

PastixComplex32 Othogonalization kernels for low-rank updates

PastixComplex32 LAPACK kernels

PastixFloat BLAS kernels

PastixFloat Othogonalization kernels for low-rank updates

PastixFloat LAPACK kernels

PastixDouble BLAS kernels

PastixDouble Othogonalization kernels for low-rank updates

PastixDouble LAPACK kernels

PastixComplex64 BLAS kernels

PastixComplex64 Othogonalization kernels for low-rank updates

PastixComplex64 LAPACK kernels

Detailed Description

Function Documentation

◆ core_cplrnt()

◆ core_cgetmo()

◆ core_cgeadd()

◆ core_cgemdm()

◆ core_cpqrcp()

◆ core_crqrcp()

◆ core_crqrrt()

◆ core_ctqrcp()

◆ core_ctradd()

◆ core_cscalo()

◆ core_clrorthu_fullqr()

◆ core_clrorthu_partialqr()

◆ core_clrorthu_cgs()

◆ core_cpotrfsp()

◆ core_cpxtrfsp()

◆ core_cgetrfsp()

◆ core_csytrfsp()

◆ core_splrnt()

◆ core_sgetmo()

◆ core_sgeadd()

◆ core_sgemdm()

◆ core_spqrcp()

◆ core_srqrcp()

◆ core_srqrrt()

◆ core_stqrcp()

◆ core_stradd()

◆ core_sscalo()

◆ core_slrorthu_fullqr()

◆ core_slrorthu_partialqr()

◆ core_slrorthu_cgs()

◆ core_spotrfsp()

◆ core_sgetrfsp()

◆ core_ssytrfsp()

◆ core_dplrnt()

◆ core_dgetmo()

◆ core_dgeadd()

◆ core_dgemdm()

◆ core_dpqrcp()

◆ core_drqrcp()

◆ core_drqrrt()

◆ core_dtqrcp()

◆ core_dtradd()

◆ core_dscalo()

◆ core_dlrorthu_fullqr()

◆ core_dlrorthu_partialqr()

◆ core_dlrorthu_cgs()

◆ core_dpotrfsp()

◆ core_dgetrfsp()

◆ core_dsytrfsp()

◆ core_zplrnt()

◆ core_zgetmo()

◆ core_zgeadd()

◆ core_zgemdm()

◆ core_zpqrcp()

◆ core_zrqrcp()

◆ core_zrqrrt()

◆ core_ztqrcp()

◆ core_ztradd()

◆ core_zscalo()

◆ core_zlrorthu_fullqr()

◆ core_zlrorthu_partialqr()

◆ core_zlrorthu_cgs()

◆ core_zpotrfsp()

◆ core_zpxtrfsp()

◆ core_zgetrfsp()

◆ core_zsytrfsp()