24 #include "kernels_trace.h"
28 #ifndef DOXYGEN_SHOULD_SKIP_THIS
29 static float szero = 0.0;
30 static float sone = 1.0;
31 static float msone = -1.0;
91 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
93 assert( lrA->
rk == -1 );
103 CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo,
104 (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag,
171 pastix_int_t m, n, lda;
172 pastix_int_t offB, offC;
202 assert( (blok > bowner[0].fblokptr) &&
203 (blok < bowner[1].fblokptr) );
205 if ( bowner->
cblktype & CBLK_COMPRESSED ) {
213 pastix_cblk_lock( fcbk );
215 CblasColMajor, (CBLAS_TRANSPOSE)trans, CblasNoTrans,
217 (msone), lrA->
u, lda,
219 (sone), C + offC, ldc );
220 pastix_cblk_unlock( fcbk );
223 MALLOC_INTERN( tmp, lrA->
rk * nrhs,
float);
226 CblasColMajor, (CBLAS_TRANSPOSE)trans, CblasNoTrans,
228 (sone), lrA->
v, lrA->
rkmax,
230 (szero), tmp, lrA->
rk );
232 pastix_cblk_lock( fcbk );
234 CblasColMajor, (CBLAS_TRANSPOSE)trans, CblasNoTrans,
236 (msone), lrA->
u, lda,
238 (sone), C + offC, ldc );
239 pastix_cblk_unlock( fcbk );
243 CblasColMajor, (CBLAS_TRANSPOSE)trans, CblasNoTrans,
247 (szero), tmp, lrA->
rk );
249 pastix_cblk_lock( fcbk );
251 CblasColMajor, (CBLAS_TRANSPOSE)trans, CblasNoTrans,
253 (msone), lrA->
v, lrA->
rkmax,
255 (sone), C + offC, ldc );
256 pastix_cblk_unlock( fcbk );
263 const float *A = dataA;
266 pastix_cblk_lock( fcbk );
268 CblasColMajor, (CBLAS_TRANSPOSE)trans, CblasNoTrans,
272 (sone), C + offC, ldc );
273 pastix_cblk_unlock( fcbk );
302 SolverMatrix *datacode,
310 const void *dataA = NULL;
314 pastix_int_t ldb, ldc;
342 #if defined(PRECISION_z) || defined(PRECISION_c)
356 assert( !( cblk->
cblktype & (CBLK_FANIN|CBLK_RECV) ) );
358 if ( (cblk->
cblktype & CBLK_IN_SCHUR) && (mode != PastixSolvModeSchur) ) {
368 tA, diag, cblk, rhsb->n,
373 for (blok = cblk[0].fblokptr+1; blok < cblk[1].
fblokptr; blok++ ) {
374 fcbk = datacode->cblktab + blok->
fcblknm;
376 if ( (fcbk->
cblktype & CBLK_IN_SCHUR) && (mode == PastixSolvModeLocal) ) {
379 assert( !(fcbk->
cblktype & CBLK_RECV) );
385 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
399 if ( fcbk->
cblktype & CBLK_FANIN ) {
400 C = rhsb->cblkb[ - fcbk->
bcscnum - 1 ];
403 C = calloc( ldc * rhsb->n,
sizeof(
float ) );
404 if ( !pastix_atomic_cas_xxb( &(rhsb->cblkb[ - fcbk->
bcscnum - 1 ]),
405 (uint64_t)NULL, (uint64_t)C,
sizeof(
void*) ) )
408 C = rhsb->cblkb[ - fcbk->
bcscnum - 1 ];
420 dataA, B, ldb, C, ldc );
451 SolverMatrix *datacode,
460 const void *dataA = NULL;
464 pastix_int_t ldb, ldc;
513 if ( (cblk->
cblktype & CBLK_IN_SCHUR) && (mode == PastixSolvModeLocal) ) {
514 for (j = cblk[0].brownum; j < cblk[1].
brownum; j++ ) {
515 blok = datacode->bloktab + datacode->browtab[j];
516 fcbk = datacode->cblktab + blok->
lcblknm;
518 if ( fcbk->
cblktype & CBLK_IN_SCHUR ) {
530 assert( !(cblk->
cblktype & CBLK_RECV) );
531 if ( cblk->
cblktype & CBLK_FANIN ) {
532 B = rhsb->cblkb[ - cblk->
bcscnum - 1 ];
541 if ( !(cblk->
cblktype & (CBLK_FANIN|CBLK_RECV) ) &&
542 (!(cblk->
cblktype & CBLK_IN_SCHUR) || (mode == PastixSolvModeSchur)) )
553 for (j = cblk[1].brownum-1; j>=cblk[0].
brownum; j-- ) {
554 blok = datacode->bloktab + datacode->browtab[j];
555 fcbk = datacode->cblktab + blok->
lcblknm;
557 if ( (fcbk->
cblktype & CBLK_IN_SCHUR) && (mode == PastixSolvModeInterface) ) {
562 #if defined( PASTIX_WITH_MPI )
563 if ( datacode->reqtab != NULL ) {
564 cpucblk_sisend_rhs_bwd( datacode, rhsb, fcbk );
569 assert( !(fcbk->
cblktype & CBLK_FANIN) );
575 if ( fcbk->
cblktype & CBLK_COMPRESSED ) {
595 dataA, B, ldb, C, ldc );
600 if ( cblk->
cblktype & CBLK_FANIN ) {
601 memFree_null( rhsb->cblkb[ - cblk->
bcscnum - 1 ] );
637 pastix_int_t k, j, tempn, lda;
643 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
655 for (j=0; j<tempn; j++, b++, A+=lda) {
662 if ( work == NULL ) {
663 MALLOC_INTERN( tmp, tempn,
float );
665 cblas_scopy( tempn, A, lda, tmp, 1 );
668 for (k=0; k<nrhs; k++, b+=ldb)
670 for (j=0; j<tempn; j++) {
675 if ( work == NULL ) {
void cpucblk_srelease_rhs_bwd_deps(const args_solve_t *enums, SolverMatrix *solvmtx, pastix_rhs_t rhsb, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
void cpucblk_srelease_rhs_fwd_deps(const args_solve_t *enums, SolverMatrix *solvmtx, pastix_rhs_t rhsb, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
The block low-rank structure to hold a matrix in low-rank form.
void solve_blok_strsm(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, int nrhs, const void *dataA, float *b, int ldb)
Apply a solve trsm update related to a diagonal block of the matrix A.
void solve_cblk_strsmsp_backward(const args_solve_t *enums, SolverMatrix *datacode, SolverCblk *cblk, pastix_rhs_t rhsb)
Apply a backward solve related to one cblk to all the right hand side.
void solve_cblk_strsmsp_forward(const args_solve_t *enums, SolverMatrix *datacode, const SolverCblk *cblk, pastix_rhs_t rhsb)
Apply a forward solve related to one cblk to all the right hand side.
void solve_blok_sgemm(pastix_side_t side, pastix_trans_t trans, pastix_int_t nrhs, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcbk, const void *dataA, const float *B, pastix_int_t ldb, float *C, pastix_int_t ldc)
Apply a solve gemm update related to a single block of the matrix A.
void solve_cblk_sdiag(const SolverCblk *cblk, int nrhs, float *b, int ldb, float *work)
Apply the diagonal solve related to one cblk to all the right hand side.
enum pastix_diag_e pastix_diag_t
Diagonal.
enum pastix_solv_mode_e pastix_solv_mode_t
Solve Schur modes.
enum pastix_uplo_e pastix_uplo_t
Upper/Lower part.
enum pastix_side_e pastix_side_t
Side of the operation.
enum pastix_trans_e pastix_trans_t
Transpostion.
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
static pastix_int_t blok_rownbr(const SolverBlok *blok)
Compute the number of rows of a block.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
static void * cblk_getdata(const SolverCblk *cblk, pastix_coefside_t side)
Get the pointer to the data associated to the side part of the cblk.
pastix_lrblock_t * LRblock[2]
Solver column block structure.