22 #include "kernels_trace.h"
23 #ifndef DOXYGEN_SHOULD_SKIP_THIS
24 static float sone = 1.0;
25 static float szero = 0.0;
50 pastix_int_t ldau, ldbu, ldcu;
52 pastix_fixdbl_t flops;
59 Cptr += ldcu * offy + offx;
61 pastix_atomic_lock( lock );
62 assert( C->rk == -1 );
67 cblas_sgemm( CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
72 flops = FLOPS_SGEMM( M, N, K );
74 pastix_atomic_unlock( lock );
105 pastix_int_t ldau, ldbu, ldbv, ldcu;
106 pastix_fixdbl_t flops1 = FLOPS_SGEMM( M, B->rk, K ) + FLOPS_SGEMM( M, N, B->rk );
107 pastix_fixdbl_t flops2 = FLOPS_SGEMM( K, N, B->rk ) + FLOPS_SGEMM( M, N, K );
108 pastix_fixdbl_t flops;
113 ldbv = ( B->rk == -1 ) ? -1 : B->rkmax;
117 Cptr += ldcu * offy + offx;
122 if ( flops1 <= flops2 ) {
124 work = malloc( M * B->rk *
sizeof(
float) );
131 cblas_sgemm( CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
137 pastix_atomic_lock( lock );
138 assert( C->rk == -1 );
139 cblas_sgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transB,
143 (beta), Cptr, ldcu );
145 pastix_atomic_unlock( lock );
149 work = malloc( K * N *
sizeof(
float) );
156 cblas_sgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
162 pastix_atomic_lock( lock );
163 assert( C->rk == -1 );
164 cblas_sgemm( CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
168 (beta), Cptr, ldcu );
171 pastix_atomic_unlock( lock );
206 pastix_int_t ldau, ldav, ldbu, ldcu;
207 pastix_fixdbl_t flops1 = FLOPS_SGEMM( A->rk, N, K ) + FLOPS_SGEMM( M, N, A->rk );
208 pastix_fixdbl_t flops2 = FLOPS_SGEMM( M, K, A->rk ) + FLOPS_SGEMM( M, N, K );
209 pastix_fixdbl_t flops;
213 ldav = ( A->rk == -1 ) ? -1 : A->rkmax;
218 Cptr += ldcu * offy + offx;
223 if ( flops1 <= flops2 ) {
225 work = malloc( A->rk * N *
sizeof(
float) );
232 cblas_sgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transB,
236 (szero), work, A->rk );
238 pastix_atomic_lock( lock );
239 assert( C->rk == -1 );
240 cblas_sgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
244 (beta), Cptr, ldcu );
247 pastix_atomic_unlock( lock );
251 work = malloc( M * K *
sizeof(
float) );
258 cblas_sgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
264 pastix_atomic_lock( lock );
265 assert( C->rk == -1 );
266 cblas_sgemm( CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
270 (beta), Cptr, ldcu );
273 pastix_atomic_unlock( lock );
312 pastix_fixdbl_t flops;
316 Cptr += ldcu * offy + offx;
319 assert( AB.
rk != -1 );
320 assert( AB.
rkmax != -1 );
329 pastix_atomic_lock( lock );
330 assert( C->rk == -1 );
332 cblas_sgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)trans,
336 (beta), Cptr, ldcu );
337 flops = FLOPS_SGEMM( M, N, AB.
rk );
338 pastix_atomic_unlock( lock );
#define PASTE_CORE_SLRMM_PARAMS(_a_)
Initialize all the parameters of the core_slrmm family functions to ease the access.
pastix_fixdbl_t core_sfrfr2fr(core_slrmm_t *params)
Perform the full-rank operation C = alpha * op(A) * op(B) + beta C.
pastix_fixdbl_t core_slrfr2fr(core_slrmm_t *params)
Perform the operation C = alpha * op(A) * op(B) + beta C, with B and C full-rank and A low-rank.
pastix_fixdbl_t core_slrlr2fr(core_slrmm_t *params)
Perform the operation C = alpha * op(A) * op(B) + beta C, with A and B low-rank and C full-rank.
static float * core_slrmm_getws(core_slrmm_t *params, ssize_t newsize)
Function to get a workspace pointer if space is available in the one provided.
pastix_fixdbl_t core_slrlr2lr(core_slrmm_t *params, pastix_lrblock_t *AB, int *infomask)
Perform the operation AB = op(A) * op(B), with A, B, and AB low-rank.
#define PASTE_CORE_SLRMM_VOID
Void all the parameters of the core_slrmm family functions to silent warnings.
pastix_fixdbl_t core_sfrlr2fr(core_slrmm_t *params)
Perform the operation C = alpha * op(A) * op(B) + beta C, with A and C full-rank and B low-rank.
Structure to store all the parameters of the core_slrmm family functions.
#define PASTIX_LRM3_ALLOCV
Macro to specify if the V part of a low-rank matrix has been allocated and need to be freed or not (U...
#define PASTIX_LRM3_TRANSB
Macro to specify if the the operator on B, still needs to be applied to the V part of the low-rank ma...
#define PASTIX_LRM3_ALLOCU
Macro to specify if the U part of a low-rank matrix has been allocated and need to be freed or not (U...
The block low-rank structure to hold a matrix in low-rank form.
enum pastix_trans_e pastix_trans_t
Transpostion.