29 #ifndef DOXYGEN_SHOULD_SKIP_THIS
30 static double dzero = 0.0;
31 static double done = 1.0;
32 static double mdone = -1.0;
92 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
94 assert( lrA->
rk == -1 );
99 A = (
const double *)dataA;
104 CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo,
105 (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag,
203 assert( (blok > bowner[0].fblokptr) &&
204 (blok < bowner[1].fblokptr) );
206 if ( bowner->
cblktype & CBLK_COMPRESSED ) {
214 pastix_cblk_lock( fcbk );
216 CblasColMajor, (CBLAS_TRANSPOSE)trans, CblasNoTrans,
218 (mdone), lrA->
u, lda,
220 (done), C + offC, ldc );
221 pastix_cblk_unlock( fcbk );
224 MALLOC_INTERN( tmp, lrA->
rk * nrhs,
double);
227 CblasColMajor, (CBLAS_TRANSPOSE)trans, CblasNoTrans,
229 (done), lrA->
v, lrA->
rkmax,
231 (dzero), tmp, lrA->
rk );
233 pastix_cblk_lock( fcbk );
235 CblasColMajor, (CBLAS_TRANSPOSE)trans, CblasNoTrans,
237 (mdone), lrA->
u, lda,
239 (done), C + offC, ldc );
240 pastix_cblk_unlock( fcbk );
244 CblasColMajor, (CBLAS_TRANSPOSE)trans, CblasNoTrans,
248 (dzero), tmp, lrA->
rk );
250 pastix_cblk_lock( fcbk );
252 CblasColMajor, (CBLAS_TRANSPOSE)trans, CblasNoTrans,
254 (mdone), lrA->
v, lrA->
rkmax,
256 (done), C + offC, ldc );
257 pastix_cblk_unlock( fcbk );
264 const double *A = dataA;
267 pastix_cblk_lock( fcbk );
269 CblasColMajor, (CBLAS_TRANSPOSE)trans, CblasNoTrans,
273 (done), C + offC, ldc );
274 pastix_cblk_unlock( fcbk );
311 const void *dataA = NULL;
348 #if defined(PRECISION_z) || defined(PRECISION_c)
362 assert( !( cblk->
cblktype & (CBLK_FANIN|CBLK_RECV) ) );
364 if ( (cblk->
cblktype & CBLK_IN_SCHUR) && (mode != PastixSolvModeSchur) ) {
374 flops_lvl2 = FLOPS_DTRSM( side, k, rhsb->
n );
375 kernel_trace_start_lvl2( PastixKernelLvl2_FR_TRSM );
377 tA, diag, cblk, rhsb->
n,
380 kernel_trace_stop_lvl2( flops_lvl2 );
381 flops_lvl1 += flops_lvl2;
384 for (blok = cblk[0].fblokptr+1; blok < cblk[1].
fblokptr; blok++ ) {
387 if ( (fcbk->
cblktype & CBLK_IN_SCHUR) && (mode == PastixSolvModeLocal) ) {
390 assert( !(fcbk->
cblktype & CBLK_RECV) );
396 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
410 if ( fcbk->
cblktype & CBLK_FANIN ) {
414 C = calloc( ldc * rhsb->
n,
sizeof(
double ) );
415 if ( !pastix_atomic_cas_xxb( &(rhsb->
cblkb[ - fcbk->
bcscnum - 1 ]),
416 (uint64_t)NULL, (uint64_t)C,
sizeof(
void*) ) )
429 flops_lvl2 = FLOPS_DGEMM(
blok_rownbr( blok ), rhsb->
n, k );
430 kernel_trace_start_lvl2( PastixKernelLvl2_FR_GEMM );
433 dataA, B, ldb, C, ldc );
434 kernel_trace_stop_lvl2( flops_lvl2 );
435 flops_lvl1 += flops_lvl2;
478 const void *dataA = NULL;
535 if ( (cblk->
cblktype & CBLK_IN_SCHUR) && (mode == PastixSolvModeLocal) ) {
536 for (j = cblk[0].brownum; j < cblk[1].
brownum; j++ ) {
540 if ( fcbk->
cblktype & CBLK_IN_SCHUR ) {
552 assert( !(cblk->
cblktype & CBLK_RECV) );
553 if ( cblk->
cblktype & CBLK_FANIN ) {
564 if ( !(cblk->
cblktype & (CBLK_FANIN|CBLK_RECV) ) &&
565 (!(cblk->
cblktype & CBLK_IN_SCHUR) || (mode == PastixSolvModeSchur)) )
568 flops_lvl2 = FLOPS_DTRSM( side, k, rhsb->
n );
569 kernel_trace_start_lvl2( PastixKernelLvl2_FR_TRSM );
574 kernel_trace_stop_lvl2( flops_lvl2 );
575 flops_lvl1 += flops_lvl2;
579 for (j = cblk[1].brownum-1; j>=cblk[0].
brownum; j-- ) {
583 if ( (fcbk->
cblktype & CBLK_IN_SCHUR) && (mode == PastixSolvModeInterface) ) {
588 #if defined( PASTIX_WITH_MPI )
589 if ( datacode->
reqtab != NULL ) {
590 cpucblk_disend_rhs_bwd( datacode, rhsb, fcbk );
595 assert( !(fcbk->
cblktype & CBLK_FANIN) );
601 if ( fcbk->
cblktype & CBLK_COMPRESSED ) {
619 flops_lvl2 = FLOPS_DGEMM(
blok_rownbr( blok ), rhsb->
n, k );
620 kernel_trace_start_lvl2( PastixKernelLvl2_FR_GEMM );
623 dataA, B, ldb, C, ldc );
624 kernel_trace_stop_lvl2( flops_lvl2 );
625 flops_lvl1 += flops_lvl2;
631 if ( cblk->
cblktype & CBLK_FANIN ) {
677 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
680 assert( lrA->
rkmax == lda );
683 A = (
const double*)dataA;
690 for (j=0; j<tempn; j++, b++, A+=lda) {
697 if ( work == NULL ) {
698 MALLOC_INTERN( tmp, tempn,
double );
700 cblas_dcopy( tempn, A, lda, tmp, 1 );
703 for (k=0; k<nrhs; k++, b+=ldb)
705 for (j=0; j<tempn; j++) {
710 if ( work == NULL ) {
BEGIN_C_DECLS typedef int pastix_int_t
static void kernel_trace_stop(int8_t inlast, pastix_ktype_t ktype, int m, int n, int k, double flops, double starttime)
Stop the trace of a single kernel.
static double kernel_trace_start(pastix_ktype_t ktype)
Start the trace of a single kernel.
void cpucblk_drelease_rhs_fwd_deps(const args_solve_t *enums, SolverMatrix *solvmtx, pastix_rhs_t rhsb, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
void cpucblk_drelease_rhs_bwd_deps(const args_solve_t *enums, SolverMatrix *solvmtx, pastix_rhs_t rhsb, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
The block low-rank structure to hold a matrix in low-rank form.
void solve_blok_dgemm(pastix_side_t side, pastix_trans_t trans, pastix_int_t nrhs, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcbk, const void *dataA, const double *B, pastix_int_t ldb, double *C, pastix_int_t ldc)
Apply a solve gemm update related to a single block of the matrix A.
void solve_cblk_dtrsmsp_forward(const args_solve_t *enums, SolverMatrix *datacode, const SolverCblk *cblk, pastix_rhs_t rhsb)
Apply a forward solve related to one cblk to all the right hand side.
void solve_blok_dtrsm(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, int nrhs, const void *dataA, double *b, int ldb)
Apply a solve trsm update related to a diagonal block of the matrix A.
void solve_cblk_dtrsmsp_backward(const args_solve_t *enums, SolverMatrix *datacode, SolverCblk *cblk, pastix_rhs_t rhsb)
Apply a backward solve related to one cblk to all the right hand side.
void solve_cblk_ddiag(const SolverCblk *cblk, const void *dataA, int nrhs, double *b, int ldb, double *work)
Apply the diagonal solve related to one cblk to all the right hand side.
enum pastix_diag_e pastix_diag_t
Diagonal.
enum pastix_solv_mode_e pastix_solv_mode_t
Solve Schur modes.
enum pastix_uplo_e pastix_uplo_t
Upper/Lower part.
enum pastix_side_e pastix_side_t
Side of the operation.
enum pastix_trans_e pastix_trans_t
Transpostion.
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
Main PaStiX RHS structure.
static pastix_int_t blok_rownbr(const SolverBlok *blok)
Compute the number of rows of a block.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
SolverBlok *restrict bloktab
static pastix_int_t cblk_rownbr(const SolverCblk *cblk)
Compute the number of rows of a column block.
static void * cblk_getdata(const SolverCblk *cblk, pastix_coefside_t side)
Get the pointer to the data associated to the side part of the cblk.
pastix_int_t *restrict browtab
SolverCblk *restrict cblktab
Solver column block structure.
Solver column block structure.