27 #ifndef DOXYGEN_SHOULD_SKIP_THIS
28 static float sone = 1.0;
91 assert( fblok + 1 < cblk[1].fblokptr );
93 assert(!(cblk->
cblktype & CBLK_LAYOUT_2D));
98 kernel_trace_start_lvl2( PastixKernelLvl2_FR_TRSM );
99 cblas_strsm(CblasColMajor,
100 (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag,
104 kernel_trace_stop_lvl2( FLOPS_STRSM( side, M, N ) );
167 assert( cblk->
cblktype & CBLK_LAYOUT_2D );
169 for (blok=fblok+1; blok<lblok; blok++) {
175 kernel_trace_start_lvl2( PastixKernelLvl2_FR_TRSM );
176 cblas_strsm(CblasColMajor,
177 (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag,
181 kernel_trace_stop_lvl2( FLOPS_STRSM( side, M, N ) );
256 assert( lrA->
rk == -1 );
258 assert( cblk->
cblktype & CBLK_COMPRESSED );
259 assert( cblk->
cblktype & CBLK_LAYOUT_2D );
262 for (blok=fblok+1; blok<lblok; blok++, lrC++) {
285 if ( lrC->
rk != 0 ) {
286 if ( lrC->
rk != -1 ) {
287 kernel_trace_start_lvl2( PastixKernelLvl2_LR_TRSM );
288 cblas_strsm(CblasColMajor,
289 (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag,
293 flops_c = FLOPS_STRSM( side, lrC->
rk, N );
294 kernel_trace_stop_lvl2( flops_c );
297 kernel_trace_start_lvl2( PastixKernelLvl2_FR_TRSM );
298 cblas_strsm(CblasColMajor,
299 (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag,
303 flops_c = FLOPS_STRSM( side, M, N );
304 kernel_trace_stop_lvl2( flops_c );
308 flops += flops_lr + flops_c;
365 if ( cblk[0].fblokptr + 1 < cblk[1].fblokptr )
372 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
377 cblk, A, C, lowrank );
380 if ( cblk->
cblktype & CBLK_LAYOUT_2D ) {
473 assert( cblk->
cblktype & CBLK_LAYOUT_2D );
475 blok = fblok + blok_m;
480 for (; (blok < lblok) && (blok->
fcblknm == cblk_m); blok++) {
482 Cptr = C + blok->
coefind - offset;
486 cblas_strsm( CblasColMajor,
487 (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag,
492 flops += FLOPS_STRSM( side, M, N );
497 full_m, N, 0, flops, time );
577 assert( cblk->
cblktype & CBLK_COMPRESSED );
578 assert( cblk->
cblktype & CBLK_LAYOUT_2D );
581 assert( lrA->
rk == -1 );
583 blok = fblok + blok_m;
588 for (; (blok < lblok) && (blok->
fcblknm == cblk_m); blok++, lrC++) {
608 if ( lrC->
rk != 0 ) {
609 if ( lrC->
rk != -1 ) {
610 cblas_strsm(CblasColMajor,
611 (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag,
616 flops += FLOPS_STRSM( side, lrC->
rk, N );
620 cblas_strsm(CblasColMajor,
621 (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag,
626 flops += FLOPS_STRSM( side, M, N );
634 full_m, N, full_n, flops, time );
700 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
702 cblk, blok_m, A, C, lowrank );
706 cblk, blok_m, A, C );
BEGIN_C_DECLS typedef int pastix_int_t
enum pastix_ktype_e pastix_ktype_t
List of the Level 1 events that may be traced in PaStiX.
static void kernel_trace_stop(int8_t inlast, pastix_ktype_t ktype, int m, int n, int k, double flops, double starttime)
Stop the trace of a single kernel.
static double kernel_trace_start(pastix_ktype_t ktype)
Start the trace of a single kernel.
static pastix_fixdbl_t core_strsmsp_lr(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const pastix_lrblock_t *lrA, pastix_lrblock_t *lrC, const pastix_lr_t *lowrank)
Computes the updates associated to one off-diagonal block between two cblk stored in low-rank format.
static void core_strsmsp_2d(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const float *A, float *C)
Compute the updates associated to one off-diagonal block between two cblk stored in 2D.
static pastix_fixdbl_t core_strsmsp_2dsub(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, pastix_int_t blok_m, const float *A, float *C)
Compute the updates associated to one off-diagonal block between two cblk stored in 2D.
static pastix_fixdbl_t core_strsmsp_lrsub(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, pastix_int_t blok_m, const pastix_lrblock_t *lrA, pastix_lrblock_t *lrC, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block between two cblk stored in low-rank format.
static void core_strsmsp_1d(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const float *A, float *C)
Apply all the trsm updates on a panel stored in 1D layout.
void cpucblk_strsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to a column of off-diagonal blocks.
pastix_fixdbl_t cpublok_strsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, pastix_int_t blok_m, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
pastix_fixdbl_t cpublok_scompress(const pastix_lr_t *lowrank, pastix_int_t M, pastix_int_t N, pastix_lrblock_t *lrA)
Compress a single block from full-rank to low-rank format.
pastix_int_t compress_min_width
pastix_int_t compress_min_height
Structure to define the type of function to use for the low-rank kernels and their parameters.
The block low-rank structure to hold a matrix in low-rank form.
enum pastix_diag_e pastix_diag_t
Diagonal.
enum pastix_uplo_e pastix_uplo_t
Upper/Lower part.
enum pastix_side_e pastix_side_t
Side of the operation.
enum pastix_trans_e pastix_trans_t
Transpostion.
static pastix_int_t blok_rownbr(const SolverBlok *blok)
Compute the number of rows of a block.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Solver column block structure.