25 #ifndef DOXYGEN_SHOULD_SKIP_THIS
26 static float sone = 1.0;
27 static float szero = 0.0;
69 Cfr += Cm * offy + offx;
71 assert( C->rk == -1 );
76 kernel_trace_start_lvl2( PastixKernelLvl2_LR_add2C_updateCfr );
80 kernel_trace_stop_lvl2( flops );
83 flops = FLOPS_SGEMM( M, N, AB->
rk );
84 kernel_trace_start_lvl2( PastixKernelLvl2_LR_add2C_updateCfr );
85 cblas_sgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transV,
87 (alpha), AB->
u, ldabu,
90 kernel_trace_stop_lvl2( flops );
139 assert( (C->rk >= 0) && (C->rk <= C->rkmax) );
144 if ( (C->rk + rAB) > rklimit )
149 Cfr = malloc( Cm * Cn *
sizeof(
float) );
152 Coff = Cfr + Cm * offy + offx;
154 kernel_trace_start_lvl2( PastixKernelLvl2_LR_add2C_uncompress );
155 cblas_sgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
160 flops = FLOPS_SGEMM( Cm, Cn, C->rk );
163 if ( AB->
rk == -1 ) {
167 flops += (2. * M * N);
170 cblas_sgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transV,
172 (alpha), AB->
u, ldabu,
175 flops += FLOPS_SGEMM( M, N, AB->
rk );
177 kernel_trace_stop_lvl2( flops );
178 total_flops += flops;
181 kernel_trace_start_lvl2( PastixKernelLvl2_LR_add2C_recompress );
183 flops = lowrank->core_ge2lr( lowrank->use_reltol, lowrank->tolerance, -1, Cm, Cn, Cfr, Cm, C );
184 kernel_trace_stop_lvl2_rank( flops, C->rk );
185 total_flops += flops;
195 total_flops += lowrank->core_rradd( lowrank, transV, &alpha,
254 assert( C->rk == 0 );
256 if ( AB->
rk > rklimit ) {
259 Cfr = malloc( Cm * Cn *
sizeof(
float) );
262 Coff = Cfr + Cm * offy + offx;
265 if ( (M != Cm) || (N != Cn) ) {
266 memset( Cfr, 0, Cm * Cn *
sizeof(
float) );
270 flops = FLOPS_SGEMM( M, N, AB->
rk );
271 kernel_trace_start_lvl2( PastixKernelLvl2_LR_add2C_uncompress );
272 cblas_sgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transV,
274 (alpha), AB->
u, ldabu,
277 kernel_trace_stop_lvl2( flops );
278 total_flops += flops;
281 kernel_trace_start_lvl2( PastixKernelLvl2_LR_add2C_recompress );
282 flops = lowrank->core_ge2lr( lowrank->use_reltol, lowrank->tolerance, -1, Cm, Cn, Cfr, Cm, C );
283 kernel_trace_stop_lvl2_rank( flops, C->rk );
284 total_flops += flops;
300 kernel_trace_start_lvl2( PastixKernelLvl2_LR_add2C_orthou );
304 ABfr = malloc( M * N *
sizeof(
float) );
308 cblas_sgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transV,
310 (sone), AB->
u, ldabu,
313 flops = FLOPS_SGEMM( M, N, AB->
rk );
320 flops += lowrank->core_ge2lr( lowrank->use_reltol, lowrank->tolerance, rklimit,
321 M, N, ABfr, M, &backup );
324 M, N, &backup, Cm, Cn, C,
327 kernel_trace_stop_lvl2( flops );
329 total_flops += flops;
392 pastix_atomic_lock( params->
lock );
415 pastix_atomic_unlock( params->
lock );
static pastix_fixdbl_t core_slr2fr(core_slrmm_t *params, const pastix_lrblock_t *AB, pastix_trans_t transV)
Perform the addition of the low-rank matrix AB and the full-rank matrix C.
static pastix_fixdbl_t core_slr2lr(core_slrmm_t *params, const pastix_lrblock_t *AB, pastix_trans_t transV)
Perform the addition of the low-rank matrix AB and the low-rank matrix C.
static pastix_fixdbl_t core_slr2null(core_slrmm_t *params, const pastix_lrblock_t *AB, pastix_trans_t transV, int infomask)
Perform the addition of the low-rank matrix AB into the null matrix C.
BEGIN_C_DECLS typedef int pastix_int_t
int core_sgeadd(pastix_trans_t trans, pastix_int_t M, pastix_int_t N, float alpha, const float *A, pastix_int_t LDA, float beta, float *B, pastix_int_t LDB)
Add two matrices together.
pastix_atomic_lock_t * lock
#define PASTE_CORE_SLRMM_PARAMS(_a_)
Initialize all the parameters of the core_slrmm family functions to ease the access.
static float * core_slrmm_getws(core_slrmm_t *params, ssize_t newsize)
Function to get a workspace pointer if space is available in the one provided.
#define PASTE_CORE_SLRMM_VOID
Void all the parameters of the core_slrmm family functions to silent warnings.
pastix_fixdbl_t core_slradd(core_slrmm_t *params, const pastix_lrblock_t *A, pastix_trans_t transV, int infomask)
Perform the addition of two low-rank matrices.
Structure to store all the parameters of the core_slrmm family functions.
pastix_int_t(* core_get_rklimit)(pastix_int_t, pastix_int_t)
Compute the maximal rank accepted for a given matrix size. The pointer is set according to the low-ra...
#define PASTIX_LRM3_ORTHOU
Macro to specify if the U part of a low-rank matrix is orthogonal or not (Used in LRMM functions).
The block low-rank structure to hold a matrix in low-rank form.
void core_slrcpy(const pastix_lr_t *lowrank, pastix_trans_t transAv, float alpha, pastix_int_t M1, pastix_int_t N1, const pastix_lrblock_t *A, pastix_int_t M2, pastix_int_t N2, pastix_lrblock_t *B, pastix_int_t offx, pastix_int_t offy)
Copy a small low-rank structure into a large one.
void core_slrfree(pastix_lrblock_t *A)
Free a low-rank matrix.
enum pastix_trans_e pastix_trans_t
Transpostion.