20 #include "common/common.h"
52 #if defined(PASTIX_WITH_MPI)
59 assert( colnbr <= solvmtx->colmax );
60 assert( cblk->
cblktype & CBLK_FANIN );
61 assert( rhsb->
cblkb[ idx ] != NULL );
63 #if defined (PASTIX_DEBUG_MPI)
64 fprintf( stderr,
"[%2d] RHS Fwd: Send cblk %ld to %2d at index %ld of size %ld\n",
66 (
long)cblk->
lcolidx, (
long)colnbr );
69 b = (
double*)(rhsb->
cblkb[ idx ]);
72 rc = MPI_Send( b, size, PASTIX_MPI_DOUBLE,
74 assert( rc == MPI_SUCCESS );
76 memFree_null( rhsb->
cblkb[ idx ] );
109 #if defined(PASTIX_WITH_MPI)
115 assert( colnbr <= solvmtx->colmax );
116 assert( cblk->
cblktype & CBLK_RECV );
117 assert( rhsb->
cblkb[ idx ] == NULL );
119 #if defined (PASTIX_DEBUG_MPI)
120 fprintf( stderr,
"[%2d] RHS Bwd: Send cblk %ld to %2d at index %ld of size %ld\n",
122 (
long)cblk->
lcolidx, (
long)colnbr );
127 rhsb->
cblkb[ idx ] = malloc( colnbr * rhsb->
n *
sizeof(
double) );
128 rc = LAPACKE_dlacpy_work( LAPACK_COL_MAJOR,
'A', colnbr, rhsb->
n, b, rhsb->
ld, rhsb->
cblkb[ idx ], colnbr );
130 b = rhsb->
cblkb[ idx ];
133 rc = MPI_Send( b, colnbr * rhsb->
n, PASTIX_MPI_DOUBLE,
135 assert( rc == MPI_SUCCESS );
138 memFree_null( rhsb->
cblkb[ idx ] );
172 #if defined(PASTIX_WITH_MPI)
178 assert( colnbr <= solvmtx->colmax );
179 assert( cblk->
cblktype & CBLK_FANIN );
181 #if defined (PASTIX_DEBUG_MPI)
182 fprintf( stderr,
"[%2d] RHS Bwd: Recv cblk %ld from %ld at index %ld of size %ld\n",
184 (
long)cblk->
lcolidx, (
long)colnbr );
187 assert( rhsb->
cblkb[ idx ] == NULL );
188 rhsb->
cblkb[ idx ] = malloc( colnbr * rhsb->
n *
sizeof(
double) );
190 rc = MPI_Recv( rhsb->
cblkb[ idx ], colnbr * rhsb->
n, PASTIX_MPI_DOUBLE,
192 assert( rc == MPI_SUCCESS );
194 #if defined (PASTIX_DEBUG_MPI)
195 fprintf( stderr,
"[%2d] RHS Bwd: Received cblk %ld from %2d\n",
196 solvmtx->clustnum, (
long)cblk->
gcblknum, status.MPI_SOURCE );
234 #if defined(PASTIX_WITH_MPI)
240 assert( colnbr <= solvmtx->colmax );
241 assert( cblk->
cblktype & CBLK_RECV );
243 #if defined (PASTIX_DEBUG_MPI)
244 fprintf( stderr,
"[%2d] RHS Fwd: Recv cblk %ld from %ld at index %ld of size %ld\n",
246 (
long)cblk->
lcolidx, (
long)colnbr );
249 rc = MPI_Recv( work, colnbr * rhsb->
n, PASTIX_MPI_DOUBLE,
251 assert( rc == MPI_SUCCESS );
253 #if defined (PASTIX_DEBUG_MPI)
254 fprintf( stderr,
"[%2d] RHS Fwd: Received cblk %ld from %2d\n",
255 solvmtx->clustnum, (
long)cblk->
gcblknum, status.MPI_SOURCE );
272 #if defined( PASTIX_WITH_MPI )
303 assert( colnbr <= solvmtx->colmax );
304 assert( cblk->
cblktype & CBLK_FANIN );
305 assert( rhsb->
cblkb[ idx ] != NULL );
307 #if defined(PASTIX_DEBUG_MPI)
308 fprintf( stderr,
"[%2d] RHS Fwd: Post Isend cblk %ld to %2d at index %ld of size %ld\n",
310 (
long)cblk->
lcolidx, (
long)(size *
sizeof(
double) ) );
313 b = (
double*)(rhsb->
cblkb[ idx ]);
316 rc = MPI_Isend( b, size, PASTIX_MPI_DOUBLE, cblk->
ownerid, cblk->
gcblknum,
317 solvmtx->solv_comm, &request );
318 assert( rc == MPI_SUCCESS );
320 solverCommMatrixAdd( solvmtx, cblk->
ownerid, size *
sizeof(
double) );
323 pastix_atomic_lock( &(solvmtx->
reqlock) );
326 assert( solvmtx->
reqnum >= 0 );
333 pastix_atomic_unlock( &(solvmtx->
reqlock) );
360 cpucblk_drequest_rhs_fwd_handle_send(
const args_solve_t *enums,
367 assert( cblk->
cblktype & CBLK_FANIN );
368 assert( enums->solve_step == PastixSolveForward );
370 #if defined(PASTIX_DEBUG_MPI)
372 size_t cblksize =
cblk_colnbr( cblk ) * rhsb->
n *
sizeof(double);
374 fprintf( stderr,
"[%2d] RHS Fwd: Isend for cblk %ld toward %2d ( %ld Bytes ) (DONE)\n",
375 solvmtx->clustnum, (
long)cblk->
gcblknum, cblk->
ownerid, (
long)cblksize );
379 memFree_null( rhsb->
cblkb[ idx ] );
411 cpucblk_drequest_rhs_fwd_handle_recv(
const args_solve_t *enums,
415 const MPI_Status *status,
420 int src = status->MPI_SOURCE;
421 int tag = status->MPI_TAG;
424 assert( ( 0 <= src ) && ( src < solvmtx->clustnbr ) );
425 assert( ( 0 <= tag ) && ( tag < solvmtx->gcblknbr ) );
434 while( cblk->
ownerid != src ) {
436 assert( cblk >= solvmtx->
cblktab );
438 assert( cblk->
cblktype & CBLK_RECV );
443 #if defined(PASTIX_DEBUG_MPI)
449 rc = MPI_Get_count( status, MPI_CHAR, &count );
450 assert( rc == MPI_SUCCESS );
451 assert( count == size );
454 fprintf( stderr,
"[%2d] RHS Fwd : recv of size %d/%ld for cblk %ld (DONE)\n",
455 solvmtx->clustnum, count, (
long)size, (
long)cblk->
gcblknum );
465 pastix_cblk_lock( fcbk );
469 pastix_cblk_unlock( fcbk );
476 memFree_null( recvbuf );
517 cpucblk_drequest_rhs_fwd_handle(
const args_solve_t *enums,
523 const MPI_Status *statuses )
526 int nbrequest = outcount;
528 for ( i = 0; i < outcount; i++ ) {
534 if ( solvmtx->
reqidx[reqid] == -1 ) {
540 memcpy( &status, statuses + i,
sizeof(MPI_Status) );
541 MPI_Get_count( &status, PASTIX_MPI_DOUBLE, &size );
543 MALLOC_INTERN( recvbuf, size,
double );
544 memcpy( recvbuf, solvmtx->
rcoeftab, size *
sizeof(
double) );
549 assert( solvmtx->
recvcnt >= 0 );
551 MPI_Start( solvmtx->
reqtab + reqid );
555 MPI_Request_free( solvmtx->
reqtab + reqid );
556 solvmtx->
reqtab[reqid] = MPI_REQUEST_NULL;
559 cpucblk_drequest_rhs_fwd_handle_recv( enums, solvmtx, rhsb,
560 threadid, &status, recvbuf );
567 assert( cblk->
cblktype & CBLK_FANIN );
569 cpucblk_drequest_rhs_fwd_handle_send( enums, solvmtx, rhsb, cblk );
572 solvmtx->
reqidx[ reqid ] = -1;
608 cpucblk_dmpi_rhs_fwd_progress(
const args_solve_t *enums,
613 pthread_t tid = pthread_self();
615 int nbrequest, nbfree;
616 int indexes[ solvmtx->
reqnbr ];
617 MPI_Status statuses[ solvmtx->
reqnbr ];
620 pthread_mutex_lock( &pastix_comm_lock );
621 if ( pastix_comm_tid == (pthread_t)-1 ) {
622 pastix_comm_tid = tid;
624 pthread_mutex_unlock( &pastix_comm_lock );
626 if ( tid != pastix_comm_tid ) {
640 pastix_atomic_lock( &(solvmtx->
reqlock) );
641 nbrequest = solvmtx->
reqnum;
642 pastix_atomic_unlock( &(solvmtx->
reqlock) );
644 while( (outcount > 0) && (nbrequest > 0) )
646 MPI_Testsome( nbrequest, solvmtx->
reqtab, &outcount, indexes, statuses );
650 if ( outcount > 0 ) {
651 nbfree = cpucblk_drequest_rhs_fwd_handle( enums, solvmtx, rhsb, threadid,
652 outcount, indexes, statuses );
659 pastix_atomic_lock( &(solvmtx->
reqlock) );
661 cpucblk_dupdate_reqtab( solvmtx );
663 nbrequest = solvmtx->
reqnum;
664 pastix_atomic_unlock( &(solvmtx->
reqlock) );
667 pastix_comm_tid = (pthread_t)-1;
708 #if defined(PASTIX_WITH_MPI)
709 if ( cblk->
cblktype & CBLK_FANIN ) {
724 cpucblk_dmpi_rhs_fwd_progress( enums, solvmtx, rhsb, rank );
727 assert( !(cblk->
cblktype & (CBLK_FANIN | CBLK_RECV)) );
728 do { pastix_yield(); }
while( cblk->
ctrbcnt > 0 );
771 ctrbcnt = pastix_atomic_dec_32b( &(fcbk->
ctrbcnt) );
773 #if defined(PASTIX_WITH_MPI)
774 if ( ( fcbk->
cblktype & CBLK_FANIN ) &&
775 ( enums->solve_step == PastixSolveForward ) ) {
776 cpucblk_disend_rhs_fwd( solvmtx, rhsb, fcbk );
783 if ( solvmtx->computeQueue ) {
831 #if defined(PASTIX_WITH_MPI)
835 int reqnbr = solvmtx->
reqnum;
838 #if defined(PASTIX_DEBUG_MPI)
839 fprintf( stderr,
"[%2d] Wait for all pending communications\n",
843 for ( i=0; i<reqnbr; i++ ) {
844 if ( solvmtx->
reqtab[i] == MPI_REQUEST_NULL ) {
851 assert( solvmtx->
reqidx[i] != -1 );
853 rc = MPI_Wait( solvmtx->
reqtab + i, &status );
854 assert( rc == MPI_SUCCESS );
859 assert( cblk->
cblktype & CBLK_FANIN );
861 cpucblk_drequest_rhs_fwd_handle_send( enums, solvmtx, rhsb, cblk );
865 assert( solvmtx->
reqnum == 0 );
BEGIN_C_DECLS typedef int pastix_int_t
static void pqueuePush1(pastix_queue_t *q, pastix_int_t elt, double key1)
Push an element with a single key.
int core_dgeadd(pastix_trans_t trans, pastix_int_t M, pastix_int_t N, double alpha, const double *A, pastix_int_t LDA, double beta, double *B, pastix_int_t LDB)
Add two matrices together.
int cpucblk_dincoming_rhs_fwd_deps(int rank, const args_solve_t *enums, SolverMatrix *solvmtx, SolverCblk *cblk, pastix_rhs_t rhsb)
Wait for incoming dependencies, and return when cblk->ctrbcnt has reached 0.
void cpucblk_dsend_rhs_backward(const SolverMatrix *solvmtx, SolverCblk *cblk, pastix_rhs_t rhsb)
Send the rhs associated to a cblk->lcolidx to the remote node.
void cpucblk_drecv_rhs_forward(const SolverMatrix *solvmtx, SolverCblk *cblk, double *work, pastix_rhs_t rhsb)
Receive the rhs associated to a cblk->lcolidx to the remote node.
void cpucblk_drecv_rhs_backward(const SolverMatrix *solvmtx, SolverCblk *cblk, pastix_rhs_t rhsb)
Receive the rhs associated to a cblk->lcolidx to the remote node.
void cpucblk_dsend_rhs_forward(const SolverMatrix *solvmtx, SolverCblk *cblk, pastix_rhs_t rhsb)
Send the rhs associated to a cblk->lcolidx to the remote node.
void cpucblk_drelease_rhs_fwd_deps(const args_solve_t *enums, SolverMatrix *solvmtx, pastix_rhs_t rhsb, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
void cpucblk_drequest_rhs_fwd_cleanup(const args_solve_t *enums, pastix_int_t sched, SolverMatrix *solvmtx, pastix_rhs_t rhsb)
Waitall routine for current cblk request.
Main PaStiX RHS structure.
pastix_atomic_lock_t reqlock
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
SolverCblk *restrict cblktab
Solver column block structure.
Solver column block structure.