25 #include "sopalin/sopalin_data.h"
28 #if defined(PASTIX_WITH_STARPU)
32 #if defined(PASTIX_WITH_MPI)
43 sopalin_data_t *sopalin_data;
45 volatile int32_t taskcnt;
72 sopalin_data_t *sopalin_data,
80 if ( enums->solve_step == PastixSolveBackward ) {
81 cblknbr = (enums->mode == PastixSolvModeLocal) ? datacode->
cblkschur : datacode->
cblknbr;
83 cblk = datacode->
cblktab + cblknbr - 1;
84 for (i=0; i<cblknbr; i++, cblk--){
99 pastix_complex64_t *work;
100 MALLOC_INTERN( work, datacode->colmax * rhsb->
n, pastix_complex64_t );
102 cblknbr = (enums->mode == PastixSolvModeSchur) ? datacode->
cblknbr : datacode->
cblkschur;
104 for (i=0; i<cblknbr; i++, cblk++){
125 for( i=0; i<nbbuffers; i++ ) {
126 assert( rhsb->
cblkb[i] == NULL );
152 struct args_ztrsm_t *arg = (
struct args_ztrsm_t*)args;
153 sopalin_data_t *sopalin_data = arg->sopalin_data;
166 cblkfirst = (datacode->
cblknbr / thrd_size ) * thrd_rank;
167 cblklast = (datacode->
cblknbr / thrd_size ) * (thrd_rank + 1);
168 if ( thrd_rank == (thrd_size-1) ) {
172 tasknbr = datacode->ttsknbr[thrd_rank];
173 tasktab = datacode->ttsktab[thrd_rank];
176 if ( enums->solve_step == PastixSolveBackward ) {
178 cblk = datacode->
cblktab + cblkfirst;
179 for (ii=cblkfirst; ii<cblklast; ii++, cblk++) {
180 if ( (cblk->
cblktype & CBLK_IN_SCHUR) && (enums->mode != PastixSolvModeSchur) ) {
187 isched_barrier_wait( &(ctx->global_ctx->barrier) );
189 for (ii=tasknbr-1; ii>=0; ii--) {
191 t = datacode->tasktab + i;
206 cblk = datacode->
cblktab + cblkfirst;
207 for (ii=cblkfirst; ii<cblklast; ii++, cblk++) {
210 isched_barrier_wait( &(ctx->global_ctx->barrier) );
212 for (ii=0; ii<tasknbr; ii++) {
214 t = datacode->tasktab + i;
217 if ( (cblk->
cblktype & CBLK_IN_SCHUR) &&
218 (enums->mode != PastixSolvModeSchur) ) {
224 datacode, cblk, rhsb ) ) {
257 sopalin_data_t *sopalin_data,
260 struct args_ztrsm_t args_ztrsm = { pastix_data, enums, sopalin_data, rhsb, 0 };
283 struct args_ztrsm_t *arg = (
struct args_ztrsm_t*)args;
285 sopalin_data_t *sopalin_data = arg->sopalin_data;
291 int32_t local_taskcnt = 0;
299 cblkfirst = (datacode->
cblknbr / thrd_size ) * thrd_rank;
300 cblklast = (datacode->
cblknbr / thrd_size ) * (thrd_rank + 1);
301 if ( thrd_rank == (thrd_size-1) ) {
305 MALLOC_INTERN( datacode->computeQueue[thrd_rank], 1,
pastix_queue_t );
307 tasknbr = datacode->ttsknbr[thrd_rank];
308 computeQueue = datacode->computeQueue[thrd_rank];
312 if ( enums->solve_step == PastixSolveBackward ) {
314 cblk = datacode->
cblktab + cblkfirst;
315 for (ii=cblkfirst; ii<cblklast; ii++, cblk++) {
316 if ( (cblk->
cblktype & CBLK_IN_SCHUR) && (enums->mode != PastixSolvModeSchur) ) {
322 if ( !(cblk->
ctrbcnt) && !(cblk->
cblktype & (CBLK_FANIN | CBLK_RECV)) ) {
326 isched_barrier_wait( &(ctx->global_ctx->barrier) );
328 while( arg->taskcnt > 0 ) {
331 #if defined(PASTIX_WITH_MPI)
334 cpucblk_zmpi_rhs_bwd_progress( enums, datacode, rhsb, thrd_rank );
340 if ( cblknum == -1 ) {
341 if ( local_taskcnt ) {
342 pastix_atomic_sub_32b( &(arg->taskcnt), local_taskcnt );
345 cblknum =
stealQueue( datacode, thrd_rank, thrd_size );
349 if ( cblknum == -1 ) {
353 cblk = datacode->
cblktab + cblknum;
364 cblk = datacode->
cblktab + cblkfirst;
365 for (ii=cblkfirst; ii<cblklast; ii++, cblk++) {
368 if (!(cblk->
cblktype & (CBLK_FANIN|CBLK_RECV)) ) {
373 isched_barrier_wait( &(ctx->global_ctx->barrier) );
375 while( arg->taskcnt > 0 ) {
378 #if defined(PASTIX_WITH_MPI)
381 cpucblk_zmpi_rhs_fwd_progress( enums, datacode, rhsb, thrd_rank );
387 if ( cblknum == -1 ) {
388 if ( local_taskcnt ) {
389 pastix_atomic_sub_32b( &(arg->taskcnt), local_taskcnt );
397 if ( cblknum == -1 ) {
401 cblk = datacode->
cblktab + cblknum;
404 if ( (cblk->
cblktype & CBLK_IN_SCHUR) &&
405 (enums->mode != PastixSolvModeSchur) ) {
415 isched_barrier_wait( &(ctx->global_ctx->barrier) );
416 assert( computeQueue->
used == 0 );
418 memFree_null( computeQueue );
447 sopalin_data_t *sopalin_data,
451 int32_t taskcnt = datacode->tasknbr - (datacode->
cblknbr - datacode->
cblkschur);
452 struct args_ztrsm_t args_ztrsm = { pastix_data, enums, sopalin_data, rhsb, taskcnt };
455 if ( enums->solve_step == PastixSolveBackward ) {
460 MALLOC_INTERN( datacode->computeQueue,
465 memFree_null( datacode->computeQueue );
468 #if defined(PASTIX_WITH_MPI)
493 sopalin_data_t *sopalin_data,
501 coeftab_gather( datacode, datacode->solv_comm, 0, PastixComplex64 );
503 if ( sopalin_data->solvmtx->clustnum == 0 ) {
506 if ( enums->solve_step == PastixSolveBackward ) {
507 cblknbr = (enums->mode == PastixSolvModeLocal) ? datacode->
cblkschur : datacode->
cblknbr;
509 cblk = datacode->
cblktab + cblknbr - 1;
510 for ( i=0; i<cblknbr; i++, cblk-- ) {
511 assert( !(cblk->
cblktype & (CBLK_FANIN | CBLK_RECV)) );
517 cblknbr = (enums->mode == PastixSolvModeSchur) ? datacode->
cblknbr : datacode->
cblkschur;
519 for (i=0; i<cblknbr; i++, cblk++){
525 coeftab_nullify( datacode );
528 memset( rhsb->
b, 0, rhsb->
ld * rhsb->
n *
sizeof(pastix_complex64_t) );
535 #ifndef DOXYGEN_SHOULD_SKIP_THIS
541 #if defined(PASTIX_WITH_PARSEC)
546 #if defined(PASTIX_WITH_STARPU)
596 sopalin_data_t *sopalin_data,
601 sopalin_data_t *,
pastix_rhs_t ) = ztrsm_table[ sched ];
605 enum_list->solve_step = solve_step;
607 enum_list->side = side;
608 enum_list->uplo = uplo;
609 enum_list->trans = trans;
610 enum_list->diag = diag;
618 #if defined ( PASTIX_WITH_MPI )
621 ztrsm = runtime_ztrsm;
633 enum_list->sched = sched;
634 ztrsm( pastix_data, enum_list, sopalin_data, rhsb );
639 if ( solve_step == PastixSolveForward ) {
649 #if defined(PASTIX_WITH_MPI)
BEGIN_C_DECLS typedef int pastix_int_t
void bvec_zallreduce(const pastix_data_t *pastix_data, pastix_complex64_t *y)
Apply an all reduce of the vector on all nodes.
volatile pastix_int_t used
static void pqueuePush1(pastix_queue_t *q, pastix_int_t elt, double key1)
Push an element with a single key.
void pqueueExit(pastix_queue_t *)
Free the structure associated to the queue.
static pastix_int_t pqueuePop(pastix_queue_t *q)
Pop the head of the queue whithout returning the keys.
int pqueueInit(pastix_queue_t *, pastix_int_t)
Initialize the queue structure with an initial space to store the elements.
void solverRequestExit(SolverMatrix *solvmtx)
Free the arrays related to the requests.
void solverRhsRecvExit(SolverMatrix *solvmtx)
Frees the array linked to pending reception.
void solverRhsRecvInit(solve_step_t solve_step, SolverMatrix *solvmtx, pastix_coeftype_t flttype, pastix_rhs_t rhsb)
Allocates the reception buffer, and initiate the first persistant reception.
void solverRequestInit(solve_step_t solve_step, SolverMatrix *solvmtx)
Instanciate the arrays for the requests according to the scheduler.
void cpucblk_zrequest_rhs_bwd_cleanup(const args_solve_t *enums, pastix_int_t sched, SolverMatrix *solvmtx, pastix_rhs_t rhsb)
Waitall routine for current cblk request.
void cpucblk_zsend_rhs_backward(const SolverMatrix *solvmtx, SolverCblk *cblk, pastix_rhs_t rhsb)
Send the rhs associated to a cblk->lcolidx to the remote node.
int cpucblk_zincoming_rhs_fwd_deps(int rank, const args_solve_t *enums, SolverMatrix *solvmtx, SolverCblk *cblk, pastix_rhs_t rhsb)
Wait for incoming dependencies, and return when cblk->ctrbcnt has reached 0.
void cpucblk_zrequest_rhs_fwd_cleanup(const args_solve_t *enums, pastix_int_t sched, SolverMatrix *solvmtx, pastix_rhs_t rhsb)
Waitall routine for current cblk request.
void cpucblk_zsend_rhs_forward(const SolverMatrix *solvmtx, SolverCblk *cblk, pastix_rhs_t rhsb)
Send the rhs associated to a cblk->lcolidx to the remote node.
int cpucblk_zincoming_rhs_bwd_deps(int rank, const args_solve_t *enums, SolverMatrix *solvmtx, SolverCblk *cblk, pastix_rhs_t rhsb)
Wait for incoming dependencies, and return when cblk->ctrbcnt has reached 0.
void cpucblk_zrecv_rhs_backward(const SolverMatrix *solvmtx, SolverCblk *cblk, pastix_rhs_t rhsb)
Receive the rhs associated to a cblk->lcolidx to the remote node.
void cpucblk_zrecv_rhs_forward(const SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex64_t *work, pastix_rhs_t rhsb)
Receive the rhs associated to a cblk->lcolidx to the remote node.
void solve_cblk_ztrsmsp_forward(const args_solve_t *enums, SolverMatrix *datacode, const SolverCblk *cblk, pastix_rhs_t rhsb)
Apply a forward solve related to one cblk to all the right hand side.
void solve_cblk_ztrsmsp_backward(const args_solve_t *enums, SolverMatrix *datacode, SolverCblk *cblk, pastix_rhs_t rhsb)
Apply a backward solve related to one cblk to all the right hand side.
enum pastix_diag_e pastix_diag_t
Diagonal.
enum pastix_uplo_e pastix_uplo_t
Upper/Lower part.
enum pastix_side_e pastix_side_t
Side of the operation.
enum pastix_trans_e pastix_trans_t
Transpostion.
PASTIX_Comm inter_node_comm
Main PaStiX data structure.
Main PaStiX RHS structure.
void starpu_ztrsm(pastix_data_t *pastix_data, const args_solve_t *enums, sopalin_data_t *sopalin_data, pastix_rhs_t rhsb)
Apply the TRSM solve (StarPU version).
void thread_ztrsm_dynamic(isched_thread_t *ctx, void *args)
Applies the Dynamic Forward or Backward solve.
void thread_ztrsm_static(isched_thread_t *ctx, void *args)
Applies the Static Forward or Backward solve.
void dynamic_ztrsm(pastix_data_t *pastix_data, const args_solve_t *enums, sopalin_data_t *sopalin_data, pastix_rhs_t rhsb)
Applies the Dynamic Forward or Backward solve.
void static_ztrsm(pastix_data_t *pastix_data, const args_solve_t *enums, sopalin_data_t *sopalin_data, pastix_rhs_t rhsb)
Applies the Static Forward or Backward solve.
void sequential_ztrsm(pastix_data_t *pastix_data, const args_solve_t *enums, sopalin_data_t *sopalin_data, pastix_rhs_t rhsb)
Applies the Sequential Forward or Backward solve.
void sopalin_ztrsm(pastix_data_t *pastix_data, pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, sopalin_data_t *sopalin_data, pastix_rhs_t rhsb)
Calls the sequential, static, dynamic or runtime solve according to scheduler.
static pastix_int_t stealQueue(SolverMatrix *solvmtx, int rank, int nbthreads)
Task stealing method.
static solve_step_t compute_solve_step(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans)
Computes the current solve step.
SolverCblk *restrict cblktab
enum solve_step_e solve_step_t
Tags used in MPI communications.
Solver column block structure.
Solver column block structure.
The task structure for the numerical factorization.