163 ldbv = ( B->rk == -1 ) ? -1 : B->rkmax;
168 if ( B->rk > Brkmin ) {
173 pastix_fixdbl_t flops1 = FLOPS_SGEMM( M, B->rk, K ) + FLOPS_SGEMM( M, N, B->rk );
174 pastix_fixdbl_t flops2 = FLOPS_SGEMM( K, N, B->rk ) + FLOPS_SGEMM( M, N, K );
181 if ( flops1 <= flops2 ) {
183 work = malloc( (M * B->rk + M * N) *
sizeof(
float) );
194 cblas_sgemm( CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
200 cblas_sgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transB,
210 work = malloc( (K * N + M * N) *
sizeof(
float) );
221 cblas_sgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
227 cblas_sgemm( CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
241 AB->
rkmax = B->rkmax;
246 work = malloc( M * B->rk *
sizeof(
float) );
251 cblas_sgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transB,
256 flops = FLOPS_SGEMM( M, B->rk, K );
308 ldav = ( A->rk == -1 ) ? -1 : A->rkmax;
314 if ( A->rk > Arkmin ) {
319 pastix_fixdbl_t flops1 = FLOPS_SGEMM( A->rk, N, K ) + FLOPS_SGEMM( M, N, A->rk );
320 pastix_fixdbl_t flops2 = FLOPS_SGEMM( M, K, A->rk ) + FLOPS_SGEMM( M, N, K );
327 if ( flops1 <= flops2 ) {
329 work = malloc( (A->rk * N + M * N) *
sizeof(
float) );
340 cblas_sgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transB,
344 (szero), tmp, A->rk );
346 cblas_sgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
356 work = malloc( (M * K + M * N) *
sizeof(
float) );
367 cblas_sgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
373 cblas_sgemm( CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
392 work = malloc( A->rk * N *
sizeof(
float) );
397 cblas_sgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transB,
401 (szero), AB->
v, AB->
rkmax );
403 flops = FLOPS_SGEMM( A->rk, N, K );
453 assert( A->rk <= A->rkmax && A->rk > 0 );
454 assert( B->rk <= B->rkmax && B->rk > 0 );
459 ldau = (A->rk == -1) ? A->rkmax : M;
461 ldbu = (B->rk == -1) ? B->rkmax : N;
465 work2 = malloc( A->rk * B->rk *
sizeof(
float) );
472 cblas_sgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transB,
476 (szero), work2, A->rk );
477 flops = FLOPS_SGEMM( A->rk, B->rk, K );
482 flops += lowrank->core_ge2lr( lowrank->use_reltol, lowrank->tolerance, -1, A->rk, B->rk, work2, A->rk, &rArB );
487 if ( rArB.
rk == -1 ) {
488 if ( A->rk <= B->rk ) {
494 work = malloc( A->rk * N *
sizeof(
float) );
504 cblas_sgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transB,
506 (sone), work2, A->rk,
508 (szero), AB->
v, AB->
rkmax );
509 flops += FLOPS_SGEMM( A->rk, N, B->rk );
517 work = malloc( B->rk * M *
sizeof(
float) );
526 cblas_sgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
531 flops += FLOPS_SGEMM( M, B->rk, A->rk );
536 else if ( rArB.
rk == 0 ) {
548 work = malloc( (M + N) * rArB.
rk *
sizeof(
float) );
555 AB->
v = work + M * rArB.
rk;
558 cblas_sgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
564 cblas_sgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transB,
566 (sone), rArB.
v, rArB.
rkmax,
568 (szero), AB->
v, rArB.
rk );
570 flops += FLOPS_SGEMM( M, rArB.
rk, A->rk ) + FLOPS_SGEMM( rArB.
rk, N, B->rk );
pastix_fixdbl_t core_slrlr2lr(core_slrmm_t *params, pastix_lrblock_t *AB, int *infomask)
Perform the operation AB = op(A) * op(B), with A, B, and AB low-rank.
pastix_fixdbl_t core_sfrlr2lr(core_slrmm_t *params, pastix_lrblock_t *AB, int *infomask, pastix_int_t Brkmin)
Perform the operation AB = op(A) * op(B), with A full-rank and B and AB low-rank.
pastix_fixdbl_t core_slrfr2lr(core_slrmm_t *params, pastix_lrblock_t *AB, int *infomask, pastix_int_t Arkmin)
Perform the operation AB = op(A) * op(B), with B full-rank and A and AB low-rank.
pastix_fixdbl_t core_sfrfr2lr(core_slrmm_t *params, pastix_lrblock_t *AB, int *infomask, pastix_int_t Kmax)
Perform the operation AB = op(A) * op(B), with A and B full-rank and AB low-rank.