27 #ifndef DOXYGEN_SHOULD_SKIP_THIS
28 static float msone = -1.0;
29 static float sone = 1.0;
30 static float szero = 0.0;
128 int SEED[4] = {26, 67, 52, 197};
140 float tolB = sqrtf( (
float)(bp) ) * tol;
143 float *tau_b = B + size_B;
144 float *omega = tau_b + n;
145 float *subw = tau_b + n;
147 sublw = pastix_imax( sublw, size_O );
150 #if defined(PRECISION_c) || defined(PRECISION_z)
156 lwkopt = size_B + n + sublw;
158 work[0] = (float)lwkopt;
168 if (lda < pastix_imax(1, m)) {
171 if( lwork < lwkopt ) {
176 minMN = pastix_imin(m, n);
180 maxrank = pastix_imin( maxrank, minMN );
187 if ( maxrank == 0 ) {
192 norm = LAPACKE_slange_work( LAPACK_COL_MAJOR,
'f', m, n,
200 #if defined(PASTIX_DEBUG_LR)
201 B = malloc( size_B *
sizeof(
float) );
202 tau_b = malloc( n *
sizeof(
float) );
203 omega = malloc( size_O *
sizeof(
float) );
204 subw = malloc( sublw *
sizeof(
float) );
208 for (j=0; j<n; j++) jpvt[j] = j;
211 ret = LAPACKE_slarnv_work(3, SEED, size_O, omega);
215 cblas_sgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
224 ib = pastix_imin( b, minMN-rk );
245 if ( (rk + d) > maxrank ) {
251 for (j = rk; j < rk + d; j++) {
252 if (jpvt_b[j] >= 0) {
257 jpvt_b[k] = - jpvt_b[k] - 1;
259 while( jpvt_b[in] >= 0 ) {
262 cblas_sswap( m, A + k * lda, 1,
270 jpvt_b[in] = - jpvt_b[in] - 1;
280 ret = LAPACKE_sgeqrf_work( LAPACK_COL_MAJOR, m-rk, d,
281 A + rk*lda + rk, lda, tau + rk,
290 ret = LAPACKE_sormqr_work( LAPACK_COL_MAJOR,
'L', trans,
292 A + rk *lda + rk, lda, tau + rk,
293 A + (rk+d)*lda + rk, lda,
301 ret = LAPACKE_slaset_work( LAPACK_COL_MAJOR,
'L', d-1, d-1,
302 0, 0, B + rk*ldb + 1, ldb );
308 cblas_strsm( CblasColMajor, CblasRight, CblasUpper,
309 CblasNoTrans, CblasNonUnit,
311 (sone), A + rk*lda + rk, lda,
314 cblas_sgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
316 (msone), B + rk *ldb, ldb,
317 A + (rk+d)*lda + rk, lda,
318 (sone), B + (rk+d)*ldb, ldb );
325 if ( refine && !loop && (rk < maxrank) ) {
332 A + rk * lda + rk, lda,
334 work, lwork, rwork );
337 for (j=0; j<d; j++) {
338 if (jpvt_b[j] >= 0) {
343 jpvt_b[k] = - jpvt_b[k] - 1;
345 while( jpvt_b[in] >= 0 ) {
348 cblas_sswap( rk, A + (rk + k ) * lda, 1,
349 A + (rk + in) * lda, 1 );
352 jpvt[rk + k] = jpvt[rk + in];
353 jpvt[rk + in] = itmp;
356 jpvt_b[in] = - jpvt_b[in] - 1;
364 if ( rk > maxrank ) {
371 #if defined(PASTIX_DEBUG_LR)
493 const void *alphaptr,
504 M1, N1, A, M2, N2, B, offx, offy );
BEGIN_C_DECLS typedef int pastix_int_t
int core_spqrcp(float tol, pastix_int_t maxrank, int full_update, pastix_int_t nb, pastix_int_t m, pastix_int_t n, float *A, pastix_int_t lda, pastix_int_t *jpvt, float *tau, float *work, pastix_int_t lwork, float *rwork)
Compute a rank-reavealing QR factorization.
int core_srqrcp(float tol, pastix_int_t maxrank, int refine, pastix_int_t nb, pastix_int_t m, pastix_int_t n, float *A, pastix_int_t lda, pastix_int_t *jpvt, float *tau, float *work, pastix_int_t lwork, float *rwork)
Compute a randomized QR factorization.
Structure to define the type of function to use for the low-rank kernels and their parameters.
The block low-rank structure to hold a matrix in low-rank form.
pastix_fixdbl_t core_sge2lr_rqrcp(int use_reltol, pastix_fixdbl_t tol, pastix_int_t rklimit, pastix_int_t m, pastix_int_t n, const void *A, pastix_int_t lda, pastix_lrblock_t *Alr)
Convert a full rank matrix in a low rank matrix, using RQRCP.
pastix_fixdbl_t core_sge2lr_qrcp(core_srrqr_cp_t rrqrfct, int use_reltol, pastix_fixdbl_t tol, pastix_int_t rklimit, pastix_int_t m, pastix_int_t n, const void *Avoid, pastix_int_t lda, pastix_lrblock_t *Alr)
Template to convert a full rank matrix into a low rank matrix through QR decompositions.
pastix_fixdbl_t core_srradd_qr(core_srrqr_cp_t rrqrfct, const pastix_lr_t *lowrank, pastix_trans_t transA1, const void *alphaptr, pastix_int_t M1, pastix_int_t N1, const pastix_lrblock_t *A, pastix_int_t M2, pastix_int_t N2, pastix_lrblock_t *B, pastix_int_t offx, pastix_int_t offy)
Template to perform the addition of two low-rank structures with compression kernel based on QR decom...
pastix_fixdbl_t core_srradd_rqrcp(const pastix_lr_t *lowrank, pastix_trans_t transA1, const void *alphaptr, pastix_int_t M1, pastix_int_t N1, const pastix_lrblock_t *A, pastix_int_t M2, pastix_int_t N2, pastix_lrblock_t *B, pastix_int_t offx, pastix_int_t offy)
Add two LR structures A=(-u1) v1^T and B=u2 v2^T into u2 v2^T.
enum pastix_trans_e pastix_trans_t
Transpostion.
Manage nancheck for lowrank kernels. This header describes all the LAPACKE functions used for low-ran...