28 #ifndef DOXYGEN_SHOULD_SKIP_THIS
113 int SEED[4] = {26, 67, 52, 197};
120 pastix_int_t size_B, size_O, size_W, size_Y, size_A, size_T, sublw;
124 float tolB = sqrtf( (
float)(bp) ) * tol;
127 minMN = pastix_imin(m, n);
131 maxrank = pastix_imin( maxrank, minMN );
138 size_W = n * maxrank;
144 sublw = n * bp + pastix_imax( bp, n );
145 sublw = pastix_imax( sublw, size_O );
146 sublw = pastix_imax( sublw, b * maxrank );
148 lwkopt = size_A + size_Y + size_W
149 + size_T + size_B + n + sublw;
162 if (lda < pastix_imax(1, m)) {
165 if( lwork < lwkopt ) {
175 if ( maxrank == 0 ) {
180 norm = LAPACKE_clange_work( LAPACK_COL_MAJOR,
'f', m, n,
200 ret = LAPACKE_claset_work( LAPACK_COL_MAJOR,
'A', b, b,
208 ret = LAPACKE_clacpy_work( LAPACK_COL_MAJOR,
'A', m, n,
213 for (j=0; j<n; j++) jpvt[j] = j;
218 ret = LAPACKE_clarnv_work(3, SEED, size_O, omega);
220 cblas_cgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
222 CBLAS_SADDR(cone), omega, bp,
224 CBLAS_SADDR(czero), B, ldb );
230 ib = pastix_imin( b, minMN-rk );
235 subw, sublw, rwork );
249 if ( (rk + d) > maxrank ) {
255 for (j = rk; j < rk + d; j++) {
256 if (jpvt_b[j] >= 0) {
261 jpvt_b[k] = - jpvt_b[k] - 1;
263 while( jpvt_b[in] >= 0 ) {
266 cblas_cswap( m, A + k * lda, 1,
268 cblas_cswap( m, AP + k * m, 1,
276 cblas_cswap( rk, WT + k * ldw, 1,
281 jpvt_b[in] = - jpvt_b[in] - 1;
290 cblas_cgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
292 CBLAS_SADDR(mcone), A + rk, lda,
294 CBLAS_SADDR(cone), A + rk * lda + rk, lda );
300 ret = LAPACKE_cgeqrf_work( LAPACK_COL_MAJOR, m-rk, d,
301 A + rk * lda + rk, lda, tau + rk,
305 ret = LAPACKE_clarft_work( LAPACK_COL_MAJOR,
'F',
'C', m-rk, d,
306 A + rk * lda + rk, lda, tau + rk, T, b );
319 ret = LAPACKE_clacpy_work( LAPACK_COL_MAJOR,
'L', d-1, d-1,
320 A + lda * rk + rk + 1, lda,
325 cblas_cgemm( CblasColMajor, CblasConjTrans, CblasNoTrans,
327 CBLAS_SADDR(cone), Y, ldy,
329 CBLAS_SADDR(czero), WT + rk, ldw );
333 cblas_cgemm( CblasColMajor, CblasConjTrans, CblasNoTrans,
335 CBLAS_SADDR(cone), A + rk * lda + rk + d, lda,
337 CBLAS_SADDR(cone), WT + rk, ldw );
347 cblas_cgemm( CblasColMajor, CblasConjTrans, CblasNoTrans,
349 CBLAS_SADDR(cone), Y, ldy,
351 CBLAS_SADDR(czero), subw, d );
355 cblas_cgemm( CblasColMajor, CblasConjTrans, CblasNoTrans,
357 CBLAS_SADDR(cone), A + rk * lda + rk + d, lda,
359 CBLAS_SADDR(cone), subw, d );
362 cblas_cgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
364 CBLAS_SADDR(mcone), subw, d,
366 CBLAS_SADDR(cone), WT + rk, ldw );
373 cblas_ctrmm( CblasColMajor, CblasLeft, CblasUpper, CblasConjTrans, CblasNonUnit,
374 d, n, CBLAS_SADDR(cone),
380 cblas_cgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
382 CBLAS_SADDR(mcone), A + rk, lda,
383 WT + (rk+d)*ldw, ldw,
384 CBLAS_SADDR(cone), A + rk + (rk+d)*lda, lda );
386 cblas_cgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
388 CBLAS_SADDR(mcone), Y, ldy,
389 WT + rk + (rk+d)*ldw, ldw,
390 CBLAS_SADDR(cone), A + rk + (rk+d)*lda, lda );
393 if ( loop && (rk+d < maxrank) ) {
398 ret = LAPACKE_claset_work( LAPACK_COL_MAJOR,
'L', d-1, d-1,
399 0, 0, B + rk*ldb + 1, ldb );
404 cblas_ctrsm( CblasColMajor, CblasRight, CblasUpper,
405 CblasNoTrans, CblasNonUnit,
407 CBLAS_SADDR(cone), A + rk*lda + rk, lda,
411 cblas_cgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
413 CBLAS_SADDR(mcone), B + rk *ldb, ldb,
414 A + (rk+d)*lda + rk, lda,
415 CBLAS_SADDR(cone), B + (rk+d)*ldb, ldb );
537 const void *alphaptr,
548 M1, N1, A, M2, N2, B, offx, offy );
Manage nancheck for lowrank kernels. This header describes all the LAPACKE functions used for low-ran...
BEGIN_C_DECLS typedef int pastix_int_t
float _Complex pastix_complex32_t
int core_ctqrcp(float tol, pastix_int_t maxrank, int refine, pastix_int_t nb, pastix_int_t m, pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *jpvt, pastix_complex32_t *tau, pastix_complex32_t *work, pastix_int_t lwork, float *rwork)
Compute a randomized QR factorization with truncated updates.
int core_cpqrcp(float tol, pastix_int_t maxrank, int full_update, pastix_int_t nb, pastix_int_t m, pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *jpvt, pastix_complex32_t *tau, pastix_complex32_t *work, pastix_int_t lwork, float *rwork)
Compute a rank-reavealing QR factorization.
Structure to define the type of function to use for the low-rank kernels and their parameters.
The block low-rank structure to hold a matrix in low-rank form.
pastix_fixdbl_t core_cge2lr_tqrcp(int use_reltol, pastix_fixdbl_t tol, pastix_int_t rklimit, pastix_int_t m, pastix_int_t n, const void *A, pastix_int_t lda, pastix_lrblock_t *Alr)
Convert a full rank matrix in a low rank matrix, using TQRCP.
pastix_fixdbl_t core_crradd_qr(core_crrqr_cp_t rrqrfct, const pastix_lr_t *lowrank, pastix_trans_t transA1, const void *alphaptr, pastix_int_t M1, pastix_int_t N1, const pastix_lrblock_t *A, pastix_int_t M2, pastix_int_t N2, pastix_lrblock_t *B, pastix_int_t offx, pastix_int_t offy)
Template to perform the addition of two low-rank structures with compression kernel based on QR decom...
pastix_fixdbl_t core_cge2lr_qrcp(core_crrqr_cp_t rrqrfct, int use_reltol, pastix_fixdbl_t tol, pastix_int_t rklimit, pastix_int_t m, pastix_int_t n, const void *Avoid, pastix_int_t lda, pastix_lrblock_t *Alr)
Template to convert a full rank matrix into a low rank matrix through QR decompositions.
pastix_fixdbl_t core_crradd_tqrcp(const pastix_lr_t *lowrank, pastix_trans_t transA1, const void *alphaptr, pastix_int_t M1, pastix_int_t N1, const pastix_lrblock_t *A, pastix_int_t M2, pastix_int_t N2, pastix_lrblock_t *B, pastix_int_t offx, pastix_int_t offy)
Add two LR structures A=(-u1) v1^T and B=u2 v2^T into u2 v2^T.
enum pastix_trans_e pastix_trans_t
Transpostion.