25 #ifndef DOXYGEN_SHOULD_SKIP_THIS
26 static pastix_complex64_t zone = 1.0;
27 static pastix_complex64_t zzero = 0.0;
97 work = malloc( M * N *
sizeof(pastix_complex64_t) );
105 cblas_zgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transB,
107 CBLAS_SADDR(zone), A->u, ldau,
109 CBLAS_SADDR(zzero), AB->
u, M );
110 flops = FLOPS_ZGEMM( M, N, K );
163 ldbv = ( B->rk == -1 ) ? -1 : B->rkmax;
168 if ( B->rk > Brkmin ) {
173 pastix_fixdbl_t flops1 = FLOPS_ZGEMM( M, B->rk, K ) + FLOPS_ZGEMM( M, N, B->rk );
174 pastix_fixdbl_t flops2 = FLOPS_ZGEMM( K, N, B->rk ) + FLOPS_ZGEMM( M, N, K );
175 pastix_complex64_t *tmp;
181 if ( flops1 <= flops2 ) {
183 work = malloc( (M * B->rk + M * N) *
sizeof(pastix_complex64_t) );
194 cblas_zgemm( CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
196 CBLAS_SADDR(zone), A->u, ldau,
198 CBLAS_SADDR(zzero), tmp, M );
200 cblas_zgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transB,
202 CBLAS_SADDR(zone), tmp, M,
204 CBLAS_SADDR(zzero), AB->
u, M );
210 work = malloc( (K * N + M * N) *
sizeof(pastix_complex64_t) );
221 cblas_zgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
223 CBLAS_SADDR(zone), B->u, ldbu,
225 CBLAS_SADDR(zzero), tmp, K );
227 cblas_zgemm( CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
229 CBLAS_SADDR(zone), A->u, ldau,
231 CBLAS_SADDR(zzero), AB->
u, M );
241 AB->
rkmax = B->rkmax;
246 work = malloc( M * B->rk *
sizeof(pastix_complex64_t) );
251 cblas_zgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transB,
253 CBLAS_SADDR(zone), A->u, ldau,
255 CBLAS_SADDR(zzero), AB->
u, M );
256 flops = FLOPS_ZGEMM( M, B->rk, K );
308 ldav = ( A->rk == -1 ) ? -1 : A->rkmax;
314 if ( A->rk > Arkmin ) {
319 pastix_fixdbl_t flops1 = FLOPS_ZGEMM( A->rk, N, K ) + FLOPS_ZGEMM( M, N, A->rk );
320 pastix_fixdbl_t flops2 = FLOPS_ZGEMM( M, K, A->rk ) + FLOPS_ZGEMM( M, N, K );
321 pastix_complex64_t *tmp;
327 if ( flops1 <= flops2 ) {
329 work = malloc( (A->rk * N + M * N) *
sizeof(pastix_complex64_t) );
340 cblas_zgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transB,
342 CBLAS_SADDR(zone), A->v, ldav,
344 CBLAS_SADDR(zzero), tmp, A->rk );
346 cblas_zgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
348 CBLAS_SADDR(zone), A->u, ldau,
350 CBLAS_SADDR(zzero), AB->
u, M );
356 work = malloc( (M * K + M * N) *
sizeof(pastix_complex64_t) );
367 cblas_zgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
369 CBLAS_SADDR(zone), A->u, ldau,
371 CBLAS_SADDR(zzero), tmp, M );
373 cblas_zgemm( CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
375 CBLAS_SADDR(zone), tmp, M,
377 CBLAS_SADDR(zzero), AB->
u, M );
392 work = malloc( A->rk * N *
sizeof(pastix_complex64_t) );
397 cblas_zgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transB,
399 CBLAS_SADDR(zone), A->v, ldav,
401 CBLAS_SADDR(zzero), AB->
v, AB->
rkmax );
403 flops = FLOPS_ZGEMM( A->rk, N, K );
448 pastix_complex64_t *work2;
453 assert( A->rk <= A->rkmax && A->rk > 0 );
454 assert( B->rk <= B->rkmax && B->rk > 0 );
459 ldau = (A->rk == -1) ? A->rkmax : M;
461 ldbu = (B->rk == -1) ? B->rkmax : N;
465 work2 = malloc( A->rk * B->rk *
sizeof(pastix_complex64_t) );
472 cblas_zgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transB,
474 CBLAS_SADDR(zone), A->v, ldav,
476 CBLAS_SADDR(zzero), work2, A->rk );
477 flops = FLOPS_ZGEMM( A->rk, B->rk, K );
482 flops += lowrank->core_ge2lr( lowrank->use_reltol, lowrank->tolerance, -1, A->rk, B->rk, work2, A->rk, &rArB );
487 if ( rArB.
rk == -1 ) {
488 if ( A->rk <= B->rk ) {
494 work = malloc( A->rk * N *
sizeof(pastix_complex64_t) );
504 cblas_zgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transB,
506 CBLAS_SADDR(zone), work2, A->rk,
508 CBLAS_SADDR(zzero), AB->
v, AB->
rkmax );
509 flops += FLOPS_ZGEMM( A->rk, N, B->rk );
517 work = malloc( B->rk * M *
sizeof(pastix_complex64_t) );
526 cblas_zgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
528 CBLAS_SADDR(zone), A->u, ldau,
530 CBLAS_SADDR(zzero), AB->
u, M );
531 flops += FLOPS_ZGEMM( M, B->rk, A->rk );
536 else if ( rArB.
rk == 0 ) {
548 work = malloc( (M + N) * rArB.
rk *
sizeof(pastix_complex64_t) );
555 AB->
v = work + M * rArB.
rk;
558 cblas_zgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
560 CBLAS_SADDR(zone), A->u, ldau,
562 CBLAS_SADDR(zzero), AB->
u, M );
564 cblas_zgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transB,
566 CBLAS_SADDR(zone), rArB.
v, rArB.
rkmax,
568 CBLAS_SADDR(zzero), AB->
v, rArB.
rk );
570 flops += FLOPS_ZGEMM( M, rArB.
rk, A->rk ) + FLOPS_ZGEMM( rArB.
rk, N, B->rk );
BEGIN_C_DECLS typedef int pastix_int_t
#define PASTE_CORE_ZLRMM_PARAMS(_a_)
Initialize all the parameters of the core_zlrmm family functions to ease the access.
static pastix_complex64_t * core_zlrmm_getws(core_zlrmm_t *params, ssize_t newsize)
Function to get a workspace pointer if space is available in the one provided.
pastix_fixdbl_t core_zfrlr2lr(core_zlrmm_t *params, pastix_lrblock_t *AB, int *infomask, pastix_int_t Brkmin)
Perform the operation AB = op(A) * op(B), with A full-rank and B and AB low-rank.
#define PASTE_CORE_ZLRMM_VOID
Void all the parameters of the core_zlrmm family functions to silent warnings.
pastix_fixdbl_t core_zlrfr2lr(core_zlrmm_t *params, pastix_lrblock_t *AB, int *infomask, pastix_int_t Arkmin)
Perform the operation AB = op(A) * op(B), with B full-rank and A and AB low-rank.
pastix_fixdbl_t core_zfrfr2lr(core_zlrmm_t *params, pastix_lrblock_t *AB, int *infomask, pastix_int_t Kmax)
Perform the operation AB = op(A) * op(B), with A and B full-rank and AB low-rank.
pastix_fixdbl_t core_zlrlr2lr(core_zlrmm_t *params, pastix_lrblock_t *AB, int *infomask)
Perform the operation AB = op(A) * op(B), with A, B, and AB low-rank.
Structure to store all the parameters of the core_zlrmm family functions.
#define PASTIX_LRM3_ALLOCV
Macro to specify if the V part of a low-rank matrix has been allocated and need to be freed or not (U...
#define PASTIX_LRM3_TRANSB
Macro to specify if the the operator on B, still needs to be applied to the V part of the low-rank ma...
#define PASTIX_LRM3_ALLOCU
Macro to specify if the U part of a low-rank matrix has been allocated and need to be freed or not (U...
#define PASTIX_LRM3_ORTHOU
Macro to specify if the U part of a low-rank matrix is orthogonal or not (Used in LRMM functions).
The block low-rank structure to hold a matrix in low-rank form.
void core_zlrfree(pastix_lrblock_t *A)
Free a low-rank matrix.