19 #include "common/common.h"
21 #include "kernels_trace.h"
65 static inline pastix_fixdbl_t
70 pastix_complex32_t *work,
78 pastix_complex32_t *A;
80 pastix_fixdbl_t flops = 0.;
84 assert( !(cblkA->
cblktype & CBLK_COMPRESSED) );
85 assert( cblkB->
cblktype & CBLK_COMPRESSED );
86 assert( cblkA->
cblktype & CBLK_LAYOUT_2D );
103 params.
alpha = alpha;
108 params.
lwork = lwork;
120 for (; blokA < lblokA; blokA++) {
184 static inline pastix_fixdbl_t
189 pastix_complex32_t *work,
198 pastix_fixdbl_t flops = 0.;
201 assert( (cblkA->
cblktype & CBLK_COMPRESSED) );
202 assert( (cblkB->
cblktype & CBLK_COMPRESSED) );
210 params.
alpha = alpha;
215 params.
lwork = lwork;
224 for (; blokA < lblokA; blokA++) {
274 static inline pastix_fixdbl_t
280 pastix_complex32_t *A, *B;
282 pastix_int_t m = cblkA->
stride;
283 pastix_fixdbl_t flops = m * n;
285 assert( !(cblkA->
cblktype & CBLK_COMPRESSED) );
286 assert( !(cblkB->
cblktype & CBLK_COMPRESSED) );
297 assert( (A != NULL) && (B != NULL) );
303 pastix_cblk_lock( cblkB );
307 pastix_cblk_unlock( cblkB );
310 pastix_complex32_t *bA, *bB;
315 pastix_int_t lda, ldb;
318 assert( cblkA->
cblktype & CBLK_LAYOUT_2D );
319 assert( cblkB->
cblktype & CBLK_LAYOUT_2D );
321 for (; blokA < lblokA; blokA++) {
338 pastix_cblk_lock( cblkB );
342 pastix_cblk_unlock( cblkB );
385 pastix_ktype_t ktype = PastixKernelGEADDCblkFRFR;
386 pastix_fixdbl_t time, flops = 0.0;
387 pastix_int_t m = cblkA->
stride;
394 if ( cblkB->
cblktype & CBLK_COMPRESSED ) {
395 if ( cblkA->
cblktype & CBLK_COMPRESSED ) {
396 ktype = PastixKernelGEADDCblkLRLR;
397 time = kernel_trace_start( ktype );
402 ktype = PastixKernelGEADDCblkFRLR;
403 time = kernel_trace_start( ktype );
409 if ( cblkA->
cblktype & CBLK_COMPRESSED ) {
411 time = kernel_trace_start( ktype );
414 ktype = PastixKernelGEADDCblkFRFR;
415 time = kernel_trace_start( ktype );
420 kernel_trace_stop( cblkB->
fblokptr->
inlast, ktype, m, n, 0, flops, time );
static pastix_fixdbl_t cpucblk_cadd_lrlr(pastix_coefside_t side, pastix_int_t alpha, const SolverCblk *cblkA, SolverCblk *cblkB, pastix_complex32_t *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Add two column bloks in full rank format.
static pastix_fixdbl_t cpucblk_cadd_frfr(pastix_coefside_t side, pastix_int_t alpha, const SolverCblk *cblkA, SolverCblk *cblkB)
Add two column bloks in full rank format.
static pastix_fixdbl_t cpucblk_cadd_frlr(pastix_coefside_t side, pastix_int_t alpha, const SolverCblk *cblkA, SolverCblk *cblkB, pastix_complex32_t *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Add two column bloks in full rank format.
int core_cgeadd(pastix_trans_t trans, pastix_int_t M, pastix_int_t N, pastix_complex32_t alpha, const pastix_complex32_t *A, pastix_int_t LDA, pastix_complex32_t beta, pastix_complex32_t *B, pastix_int_t LDB)
Add two matrices together.
void cpucblk_cadd(pastix_coefside_t side, float alpha, const SolverCblk *cblkA, SolverCblk *cblkB, const pastix_lr_t *lowrank)
Add two column bloks in full rank format.
pastix_atomic_lock_t * lock
const pastix_lr_t * lowrank
const pastix_lrblock_t * B
const pastix_lrblock_t * A
pastix_complex32_t * work
pastix_fixdbl_t core_clradd(core_clrmm_t *params, const pastix_lrblock_t *A, pastix_trans_t transV, int infomask)
Perform the addition of two low-rank matrices.
Structure to store all the parameters of the core_clrmm family functions.
#define PASTIX_LRM3_ORTHOU
Macro to specify if the U part of a low-rank matrix is orthogonal or not (Used in LRMM functions).
Structure to define the type of function to use for the low-rank kernels and their parameters.
The block low-rank structure to hold a matrix in low-rank form.
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
static pastix_int_t blok_rownbr(const SolverBlok *blok)
Compute the number of rows of a block.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
static int is_block_inside_fblock(const SolverBlok *blok, const SolverBlok *fblok)
Check if a block is included inside another one.
pastix_atomic_lock_t lock
pastix_lrblock_t * LRblock[2]
Solver column block structure.