29 #ifndef DOXYGEN_SHOULD_SKIP_THIS
92 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
94 assert( lrA->
rk == -1 );
104 CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo,
105 (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag,
107 CBLAS_SADDR(cone), A, lda,
203 assert( (blok > bowner[0].fblokptr) &&
204 (blok < bowner[1].fblokptr) );
206 if ( bowner->
cblktype & CBLK_COMPRESSED ) {
214 pastix_cblk_lock( fcbk );
216 CblasColMajor, (CBLAS_TRANSPOSE)trans, CblasNoTrans,
218 CBLAS_SADDR(mcone), lrA->
u, lda,
220 CBLAS_SADDR(cone), C + offC, ldc );
221 pastix_cblk_unlock( fcbk );
227 CblasColMajor, (CBLAS_TRANSPOSE)trans, CblasNoTrans,
229 CBLAS_SADDR(cone), lrA->
v, lrA->
rkmax,
231 CBLAS_SADDR(czero), tmp, lrA->
rk );
233 pastix_cblk_lock( fcbk );
235 CblasColMajor, (CBLAS_TRANSPOSE)trans, CblasNoTrans,
237 CBLAS_SADDR(mcone), lrA->
u, lda,
239 CBLAS_SADDR(cone), C + offC, ldc );
240 pastix_cblk_unlock( fcbk );
244 CblasColMajor, (CBLAS_TRANSPOSE)trans, CblasNoTrans,
246 CBLAS_SADDR(cone), lrA->
u, lda,
248 CBLAS_SADDR(czero), tmp, lrA->
rk );
250 pastix_cblk_lock( fcbk );
252 CblasColMajor, (CBLAS_TRANSPOSE)trans, CblasNoTrans,
254 CBLAS_SADDR(mcone), lrA->
v, lrA->
rkmax,
256 CBLAS_SADDR(cone), C + offC, ldc );
257 pastix_cblk_unlock( fcbk );
267 pastix_cblk_lock( fcbk );
269 CblasColMajor, (CBLAS_TRANSPOSE)trans, CblasNoTrans,
271 CBLAS_SADDR(mcone), A, lda,
273 CBLAS_SADDR(cone), C + offC, ldc );
274 pastix_cblk_unlock( fcbk );
311 const void *dataA = NULL;
348 #if defined(PRECISION_z) || defined(PRECISION_c)
362 assert( !( cblk->
cblktype & (CBLK_FANIN|CBLK_RECV) ) );
364 if ( (cblk->
cblktype & CBLK_IN_SCHUR) && (mode != PastixSolvModeSchur) ) {
374 flops_lvl2 = FLOPS_CTRSM( side, k, rhsb->
n );
375 kernel_trace_start_lvl2( PastixKernelLvl2_FR_TRSM );
377 tA, diag, cblk, rhsb->
n,
380 kernel_trace_stop_lvl2( flops_lvl2 );
381 flops_lvl1 += flops_lvl2;
384 for (blok = cblk[0].fblokptr+1; blok < cblk[1].
fblokptr; blok++ ) {
387 if ( (fcbk->
cblktype & CBLK_IN_SCHUR) && (mode == PastixSolvModeLocal) ) {
390 assert( !(fcbk->
cblktype & CBLK_RECV) );
396 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
410 if ( fcbk->
cblktype & CBLK_FANIN ) {
415 if ( !pastix_atomic_cas_xxb( &(rhsb->
cblkb[ - fcbk->
bcscnum - 1 ]),
416 (uint64_t)NULL, (uint64_t)C,
sizeof(
void*) ) )
429 flops_lvl2 = FLOPS_CGEMM(
blok_rownbr( blok ), rhsb->
n, k );
430 kernel_trace_start_lvl2( PastixKernelLvl2_FR_GEMM );
433 dataA, B, ldb, C, ldc );
434 kernel_trace_stop_lvl2( flops_lvl2 );
435 flops_lvl1 += flops_lvl2;
478 const void *dataA = NULL;
535 if ( (cblk->
cblktype & CBLK_IN_SCHUR) && (mode == PastixSolvModeLocal) ) {
536 for (j = cblk[0].brownum; j < cblk[1].
brownum; j++ ) {
540 if ( fcbk->
cblktype & CBLK_IN_SCHUR ) {
552 assert( !(cblk->
cblktype & CBLK_RECV) );
553 if ( cblk->
cblktype & CBLK_FANIN ) {
564 if ( !(cblk->
cblktype & (CBLK_FANIN|CBLK_RECV) ) &&
565 (!(cblk->
cblktype & CBLK_IN_SCHUR) || (mode == PastixSolvModeSchur)) )
568 flops_lvl2 = FLOPS_CTRSM( side, k, rhsb->
n );
569 kernel_trace_start_lvl2( PastixKernelLvl2_FR_TRSM );
574 kernel_trace_stop_lvl2( flops_lvl2 );
575 flops_lvl1 += flops_lvl2;
579 for (j = cblk[1].brownum-1; j>=cblk[0].
brownum; j-- ) {
583 if ( (fcbk->
cblktype & CBLK_IN_SCHUR) && (mode == PastixSolvModeInterface) ) {
588 #if defined( PASTIX_WITH_MPI )
591 assert( datacode->
reqtab != NULL );
592 cpucblk_cisend_rhs_bwd( datacode, rhsb, fcbk );
597 assert( !(fcbk->
cblktype & CBLK_FANIN) );
603 if ( fcbk->
cblktype & CBLK_COMPRESSED ) {
621 flops_lvl2 = FLOPS_CGEMM(
blok_rownbr( blok ), rhsb->
n, k );
622 kernel_trace_start_lvl2( PastixKernelLvl2_FR_GEMM );
625 dataA, B, ldb, C, ldc );
626 kernel_trace_stop_lvl2( flops_lvl2 );
627 flops_lvl1 += flops_lvl2;
633 if ( cblk->
cblktype & CBLK_FANIN ) {
679 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
682 assert( lrA->
rkmax == lda );
692 for (j=0; j<tempn; j++, b++, A+=lda) {
699 if ( work == NULL ) {
702 cblas_ccopy( tempn, A, lda, tmp, 1 );
705 for (k=0; k<nrhs; k++, b+=ldb)
707 for (j=0; j<tempn; j++) {
712 if ( work == NULL ) {
BEGIN_C_DECLS typedef int pastix_int_t
float _Complex pastix_complex32_t
static void kernel_trace_stop(int8_t inlast, pastix_ktype_t ktype, int m, int n, int k, double flops, double starttime)
Stop the trace of a single kernel.
static double kernel_trace_start(pastix_ktype_t ktype)
Start the trace of a single kernel.
void cpucblk_crelease_rhs_fwd_deps(const args_solve_t *enums, SolverMatrix *solvmtx, pastix_rhs_t rhsb, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
void cpucblk_crelease_rhs_bwd_deps(const args_solve_t *enums, SolverMatrix *solvmtx, pastix_rhs_t rhsb, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
The block low-rank structure to hold a matrix in low-rank form.
void solve_cblk_ctrsmsp_backward(const args_solve_t *enums, SolverMatrix *datacode, SolverCblk *cblk, pastix_rhs_t rhsb)
Apply a backward solve related to one cblk to all the right hand side.
void solve_cblk_cdiag(const SolverCblk *cblk, const void *dataA, int nrhs, pastix_complex32_t *b, int ldb, pastix_complex32_t *work)
Apply the diagonal solve related to one cblk to all the right hand side.
void solve_blok_ctrsm(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, int nrhs, const void *dataA, pastix_complex32_t *b, int ldb)
Apply a solve trsm update related to a diagonal block of the matrix A.
void solve_cblk_ctrsmsp_forward(const args_solve_t *enums, SolverMatrix *datacode, const SolverCblk *cblk, pastix_rhs_t rhsb)
Apply a forward solve related to one cblk to all the right hand side.
void solve_blok_cgemm(pastix_side_t side, pastix_trans_t trans, pastix_int_t nrhs, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcbk, const void *dataA, const pastix_complex32_t *B, pastix_int_t ldb, pastix_complex32_t *C, pastix_int_t ldc)
Apply a solve gemm update related to a single block of the matrix A.
enum pastix_diag_e pastix_diag_t
Diagonal.
enum pastix_solv_mode_e pastix_solv_mode_t
Solve Schur modes.
enum pastix_uplo_e pastix_uplo_t
Upper/Lower part.
enum pastix_side_e pastix_side_t
Side of the operation.
enum pastix_trans_e pastix_trans_t
Transpostion.
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
Main PaStiX RHS structure.
static pastix_int_t blok_rownbr(const SolverBlok *blok)
Compute the number of rows of a block.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
SolverBlok *restrict bloktab
static pastix_int_t cblk_rownbr(const SolverCblk *cblk)
Compute the number of rows of a column block.
static void * cblk_getdata(const SolverCblk *cblk, pastix_coefside_t side)
Get the pointer to the data associated to the side part of the cblk.
pastix_int_t *restrict browtab
SolverCblk *restrict cblktab
Solver column block structure.
Solver column block structure.