25 #ifndef DOXYGEN_SHOULD_SKIP_THIS
105 cblas_cgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transB,
107 CBLAS_SADDR(cone), A->u, ldau,
109 CBLAS_SADDR(czero), AB->
u, M );
110 flops = FLOPS_CGEMM( M, N, K );
163 ldbv = ( B->rk == -1 ) ? -1 : B->rkmax;
168 if ( B->rk > Brkmin ) {
173 pastix_fixdbl_t flops1 = FLOPS_CGEMM( M, B->rk, K ) + FLOPS_CGEMM( M, N, B->rk );
174 pastix_fixdbl_t flops2 = FLOPS_CGEMM( K, N, B->rk ) + FLOPS_CGEMM( M, N, K );
181 if ( flops1 <= flops2 ) {
194 cblas_cgemm( CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
196 CBLAS_SADDR(cone), A->u, ldau,
198 CBLAS_SADDR(czero), tmp, M );
200 cblas_cgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transB,
202 CBLAS_SADDR(cone), tmp, M,
204 CBLAS_SADDR(czero), AB->
u, M );
221 cblas_cgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
223 CBLAS_SADDR(cone), B->u, ldbu,
225 CBLAS_SADDR(czero), tmp, K );
227 cblas_cgemm( CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
229 CBLAS_SADDR(cone), A->u, ldau,
231 CBLAS_SADDR(czero), AB->
u, M );
241 AB->
rkmax = B->rkmax;
251 cblas_cgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transB,
253 CBLAS_SADDR(cone), A->u, ldau,
255 CBLAS_SADDR(czero), AB->
u, M );
256 flops = FLOPS_CGEMM( M, B->rk, K );
308 ldav = ( A->rk == -1 ) ? -1 : A->rkmax;
314 if ( A->rk > Arkmin ) {
319 pastix_fixdbl_t flops1 = FLOPS_CGEMM( A->rk, N, K ) + FLOPS_CGEMM( M, N, A->rk );
320 pastix_fixdbl_t flops2 = FLOPS_CGEMM( M, K, A->rk ) + FLOPS_CGEMM( M, N, K );
327 if ( flops1 <= flops2 ) {
340 cblas_cgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transB,
342 CBLAS_SADDR(cone), A->v, ldav,
344 CBLAS_SADDR(czero), tmp, A->rk );
346 cblas_cgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
348 CBLAS_SADDR(cone), A->u, ldau,
350 CBLAS_SADDR(czero), AB->
u, M );
367 cblas_cgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
369 CBLAS_SADDR(cone), A->u, ldau,
371 CBLAS_SADDR(czero), tmp, M );
373 cblas_cgemm( CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
375 CBLAS_SADDR(cone), tmp, M,
377 CBLAS_SADDR(czero), AB->
u, M );
397 cblas_cgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transB,
399 CBLAS_SADDR(cone), A->v, ldav,
401 CBLAS_SADDR(czero), AB->
v, AB->
rkmax );
403 flops = FLOPS_CGEMM( A->rk, N, K );
453 assert( A->rk <= A->rkmax && A->rk > 0 );
454 assert( B->rk <= B->rkmax && B->rk > 0 );
459 ldau = (A->rk == -1) ? A->rkmax : M;
461 ldbu = (B->rk == -1) ? B->rkmax : N;
472 cblas_cgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transB,
474 CBLAS_SADDR(cone), A->v, ldav,
476 CBLAS_SADDR(czero), work2, A->rk );
477 flops = FLOPS_CGEMM( A->rk, B->rk, K );
482 flops += lowrank->core_ge2lr( lowrank->use_reltol, lowrank->tolerance, -1, A->rk, B->rk, work2, A->rk, &rArB );
487 if ( rArB.
rk == -1 ) {
488 if ( A->rk <= B->rk ) {
504 cblas_cgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transB,
506 CBLAS_SADDR(cone), work2, A->rk,
508 CBLAS_SADDR(czero), AB->
v, AB->
rkmax );
509 flops += FLOPS_CGEMM( A->rk, N, B->rk );
526 cblas_cgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
528 CBLAS_SADDR(cone), A->u, ldau,
530 CBLAS_SADDR(czero), AB->
u, M );
531 flops += FLOPS_CGEMM( M, B->rk, A->rk );
536 else if ( rArB.
rk == 0 ) {
555 AB->
v = work + M * rArB.
rk;
558 cblas_cgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
560 CBLAS_SADDR(cone), A->u, ldau,
562 CBLAS_SADDR(czero), AB->
u, M );
564 cblas_cgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transB,
566 CBLAS_SADDR(cone), rArB.
v, rArB.
rkmax,
568 CBLAS_SADDR(czero), AB->
v, rArB.
rk );
570 flops += FLOPS_CGEMM( M, rArB.
rk, A->rk ) + FLOPS_CGEMM( rArB.
rk, N, B->rk );
BEGIN_C_DECLS typedef int pastix_int_t
float _Complex pastix_complex32_t
#define PASTE_CORE_CLRMM_PARAMS(_a_)
Initialize all the parameters of the core_clrmm family functions to ease the access.
static pastix_complex32_t * core_clrmm_getws(core_clrmm_t *params, ssize_t newsize)
Function to get a workspace pointer if space is available in the one provided.
pastix_fixdbl_t core_clrfr2lr(core_clrmm_t *params, pastix_lrblock_t *AB, int *infomask, pastix_int_t Arkmin)
Perform the operation AB = op(A) * op(B), with B full-rank and A and AB low-rank.
#define PASTE_CORE_CLRMM_VOID
Void all the parameters of the core_clrmm family functions to silent warnings.
pastix_fixdbl_t core_cfrlr2lr(core_clrmm_t *params, pastix_lrblock_t *AB, int *infomask, pastix_int_t Brkmin)
Perform the operation AB = op(A) * op(B), with A full-rank and B and AB low-rank.
pastix_fixdbl_t core_cfrfr2lr(core_clrmm_t *params, pastix_lrblock_t *AB, int *infomask, pastix_int_t Kmax)
Perform the operation AB = op(A) * op(B), with A and B full-rank and AB low-rank.
pastix_fixdbl_t core_clrlr2lr(core_clrmm_t *params, pastix_lrblock_t *AB, int *infomask)
Perform the operation AB = op(A) * op(B), with A, B, and AB low-rank.
Structure to store all the parameters of the core_clrmm family functions.
#define PASTIX_LRM3_ALLOCV
Macro to specify if the V part of a low-rank matrix has been allocated and need to be freed or not (U...
#define PASTIX_LRM3_TRANSB
Macro to specify if the the operator on B, still needs to be applied to the V part of the low-rank ma...
#define PASTIX_LRM3_ALLOCU
Macro to specify if the U part of a low-rank matrix has been allocated and need to be freed or not (U...
#define PASTIX_LRM3_ORTHOU
Macro to specify if the U part of a low-rank matrix is orthogonal or not (Used in LRMM functions).
The block low-rank structure to hold a matrix in low-rank form.
void core_clrfree(pastix_lrblock_t *A)
Free a low-rank matrix.