27#ifndef DOXYGEN_SHOULD_SKIP_THIS
28static pastix_complex64_t zone = 1.0;
77 const pastix_complex64_t *A,
78 pastix_complex64_t *C )
91 assert( fblok + 1 < cblk[1].fblokptr );
93 assert(!(cblk->
cblktype & CBLK_LAYOUT_2D));
98 kernel_trace_start_lvl2( PastixKernelLvl2_FR_TRSM );
99 cblas_ztrsm(CblasColMajor,
100 (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag,
102 CBLAS_SADDR(zone), A, lda,
104 kernel_trace_stop_lvl2( FLOPS_ZTRSM( side, M, N ) );
154 const pastix_complex64_t *A,
155 pastix_complex64_t *C )
159 pastix_complex64_t *blokC;
167 assert( cblk->
cblktype & CBLK_LAYOUT_2D );
169 for (blok=fblok+1; blok<lblok; blok++) {
175 kernel_trace_start_lvl2( PastixKernelLvl2_FR_TRSM );
176 cblas_ztrsm(CblasColMajor,
177 (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag,
179 CBLAS_SADDR(zone), A, lda,
181 kernel_trace_stop_lvl2( FLOPS_ZTRSM( side, M, N ) );
244 pastix_complex64_t *A;
256 assert( lrA->
rk == -1 );
258 assert( cblk->
cblktype & CBLK_COMPRESSED );
259 assert( cblk->
cblktype & CBLK_LAYOUT_2D );
262 for (blok=fblok+1; blok<lblok; blok++, lrC++) {
285 if ( lrC->
rk != 0 ) {
286 if ( lrC->
rk != -1 ) {
287 kernel_trace_start_lvl2( PastixKernelLvl2_LR_TRSM );
288 cblas_ztrsm(CblasColMajor,
289 (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag,
291 CBLAS_SADDR(zone), A, lda,
293 flops_c = FLOPS_ZTRSM( side, lrC->
rk, N );
294 kernel_trace_stop_lvl2( flops_c );
297 kernel_trace_start_lvl2( PastixKernelLvl2_FR_TRSM );
298 cblas_ztrsm(CblasColMajor,
299 (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag,
301 CBLAS_SADDR(zone), A, lda,
303 flops_c = FLOPS_ZTRSM( side, M, N );
304 kernel_trace_stop_lvl2( flops_c );
308 flops += flops_lr + flops_c;
365 if ( cblk[0].fblokptr + 1 < cblk[1].fblokptr )
372 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
377 cblk, A, C, lowrank );
380 if ( cblk->
cblktype & CBLK_LAYOUT_2D ) {
458 const pastix_complex64_t *A,
459 pastix_complex64_t *C )
463 pastix_complex64_t *Cptr;
473 assert( cblk->
cblktype & CBLK_LAYOUT_2D );
475 blok = fblok + blok_m;
480 for (; (blok < lblok) && (blok->
fcblknm == cblk_m); blok++) {
482 Cptr = C + blok->
coefind - offset;
486 cblas_ztrsm( CblasColMajor,
487 (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag,
489 CBLAS_SADDR(zone), A, lda,
492 flops += FLOPS_ZTRSM( side, M, N );
497 full_m, N, 0, flops, time );
566 pastix_complex64_t *A;
577 assert( cblk->
cblktype & CBLK_COMPRESSED );
578 assert( cblk->
cblktype & CBLK_LAYOUT_2D );
581 assert( lrA->
rk == -1 );
583 blok = fblok + blok_m;
588 for (; (blok < lblok) && (blok->
fcblknm == cblk_m); blok++, lrC++) {
608 if ( lrC->
rk != 0 ) {
609 if ( lrC->
rk != -1 ) {
610 cblas_ztrsm(CblasColMajor,
611 (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag,
613 CBLAS_SADDR(zone), A, lda,
616 flops += FLOPS_ZTRSM( side, lrC->
rk, N );
620 cblas_ztrsm(CblasColMajor,
621 (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag,
623 CBLAS_SADDR(zone), A, lda,
626 flops += FLOPS_ZTRSM( side, M, N );
634 full_m, N, full_n, flops, time );
700 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
702 cblk, blok_m, A, C, lowrank );
706 cblk, blok_m, A, C );
BEGIN_C_DECLS typedef int pastix_int_t
enum pastix_ktype_e pastix_ktype_t
List of the Level 1 events that may be traced in PaStiX.
static void kernel_trace_stop(int8_t inlast, pastix_ktype_t ktype, int m, int n, int k, double flops, double starttime)
Stop the trace of a single kernel.
static double kernel_trace_start(pastix_ktype_t ktype)
Start the trace of a single kernel.
static pastix_fixdbl_t core_ztrsmsp_lrsub(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, pastix_int_t blok_m, const pastix_lrblock_t *lrA, pastix_lrblock_t *lrC, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block between two cblk stored in low-rank format.
static void core_ztrsmsp_1d(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const pastix_complex64_t *A, pastix_complex64_t *C)
Apply all the trsm updates on a panel stored in 1D layout.
static pastix_fixdbl_t core_ztrsmsp_2dsub(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, pastix_int_t blok_m, const pastix_complex64_t *A, pastix_complex64_t *C)
Compute the updates associated to one off-diagonal block between two cblk stored in 2D.
static void core_ztrsmsp_2d(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const pastix_complex64_t *A, pastix_complex64_t *C)
Compute the updates associated to one off-diagonal block between two cblk stored in 2D.
static pastix_fixdbl_t core_ztrsmsp_lr(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const pastix_lrblock_t *lrA, pastix_lrblock_t *lrC, const pastix_lr_t *lowrank)
Computes the updates associated to one off-diagonal block between two cblk stored in low-rank format.
void cpucblk_ztrsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to a column of off-diagonal blocks.
pastix_fixdbl_t cpublok_zcompress(const pastix_lr_t *lowrank, pastix_int_t M, pastix_int_t N, pastix_lrblock_t *lrA)
Compress a single block from full-rank to low-rank format.
pastix_fixdbl_t cpublok_ztrsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, pastix_int_t blok_m, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
pastix_int_t compress_min_width
pastix_int_t compress_min_height
Structure to define the type of function to use for the low-rank kernels and their parameters.
The block low-rank structure to hold a matrix in low-rank form.
enum pastix_diag_e pastix_diag_t
Diagonal.
enum pastix_uplo_e pastix_uplo_t
Upper/Lower part.
enum pastix_side_e pastix_side_t
Side of the operation.
enum pastix_trans_e pastix_trans_t
Transpostion.
static pastix_int_t blok_rownbr(const SolverBlok *blok)
Compute the number of rows of a block.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Solver column block structure.