18 #include "common/common.h"
27 #if defined( PASTIX_WITH_MPI )
58 assert( colnbr <= solvmtx->colmax );
59 assert( cblk->
cblktype & CBLK_RECV );
61 #if defined (PASTIX_DEBUG_MPI)
62 fprintf( stderr,
"[%2d] RHS Bwd: Post Isend cblk %ld to %2d at index %ld of size %ld\n",
64 (
long)cblk->
lcolidx, (
long)colnbr );
72 assert( rhsb->
cblkb[ idx ] == NULL );
74 rhsb->
cblkb[ idx ] = malloc( size *
sizeof(
double) );
76 rc = LAPACKE_dlacpy_work( LAPACK_COL_MAJOR,
'A', colnbr, rhsb->
n,
77 b, rhsb->
ld, rhsb->
cblkb[ idx ], colnbr );
80 b = rhsb->
cblkb[ idx ];
83 rc = MPI_Isend( b, size, PASTIX_MPI_DOUBLE,
85 assert( rc == MPI_SUCCESS );
87 solverCommMatrixAdd( solvmtx, cblk->
ownerid, size *
sizeof(
double) );
90 pastix_atomic_lock( &(solvmtx->
reqlock) );
93 assert( solvmtx->
reqnum >= 0 );
100 pastix_atomic_unlock( &(solvmtx->
reqlock) );
127 cpucblk_drequest_rhs_bwd_handle_send(
const args_solve_t *enums,
134 assert( cblk->
cblktype & CBLK_RECV );
135 assert( enums->solve_step == PastixSolveBackward );
137 #if defined(PASTIX_DEBUG_MPI)
139 size_t cblksize =
cblk_colnbr( cblk ) * rhsb->
n *
sizeof(double);
141 fprintf( stderr,
"[%2d] RHS Bwd: Isend for cblk %ld toward %2d ( %ld Bytes ) (DONE)\n",
142 solvmtx->clustnum, (
long)cblk->
gcblknum, cblk->
ownerid, (
long)cblksize );
146 if ( rhsb->
cblkb[ idx ] ) {
147 memFree_null( rhsb->
cblkb[ idx ] );
181 cpucblk_drequest_rhs_bwd_handle_recv(
const args_solve_t *enums,
185 const MPI_Status *status,
189 int src = status->MPI_SOURCE;
190 int tag = status->MPI_TAG;
193 assert( ( 0 <= src ) && ( src < solvmtx->clustnbr ) );
194 assert( ( 0 <= tag ) && ( tag < solvmtx->gcblknbr ) );
200 assert( cblk->
cblktype & CBLK_FANIN );
202 #if defined(PASTIX_DEBUG_MPI)
210 size = colnbr * rhsb->
n *
sizeof(double);
212 rc = MPI_Get_count( status, MPI_CHAR, &count );
213 assert( rc == MPI_SUCCESS );
214 assert( count == size );
217 fprintf( stderr,
"[%2d] RHS Bwd : recv of size %d/%ld for cblk %ld (DONE)\n",
218 solvmtx->clustnum, count, (
long)size, (
long)cblk->
gcblknum );
223 rhsb->
cblkb[ idx ] = recvbuf;
226 if ( solvmtx->computeQueue ) {
236 assert( rhsb->
cblkb[ idx ] == NULL );
280 cpucblk_drequest_rhs_bwd_handle(
const args_solve_t *enums,
286 const MPI_Status *statuses )
289 int nbrequest = outcount;
291 for ( i = 0; i < outcount; i++ ) {
297 if ( solvmtx->
reqidx[reqid] == -1 ) {
303 memcpy( &status, statuses + i,
sizeof(MPI_Status) );
304 MPI_Get_count( &status, PASTIX_MPI_DOUBLE, &size );
306 MALLOC_INTERN( recvbuf, size,
double );
307 memcpy( recvbuf, solvmtx->
rcoeftab, size *
sizeof(
double) );
314 MPI_Start( solvmtx->
reqtab + reqid );
318 MPI_Request_free( solvmtx->
reqtab + reqid );
319 solvmtx->
reqtab[reqid] = MPI_REQUEST_NULL;
322 cpucblk_drequest_rhs_bwd_handle_recv( enums, solvmtx, rhsb,
323 threadid, &status, recvbuf );
330 assert( cblk->
cblktype & CBLK_RECV );
332 cpucblk_drequest_rhs_bwd_handle_send( enums, solvmtx, rhsb, cblk );
335 solvmtx->
reqidx[ reqid ] = -1;
371 cpucblk_dmpi_rhs_bwd_progress(
const args_solve_t *enums,
376 pthread_t tid = pthread_self();
378 int nbrequest, nbfree;
379 int indexes[ solvmtx->
reqnbr ];
380 MPI_Status statuses[ solvmtx->
reqnbr ];
383 pthread_mutex_lock( &pastix_comm_lock );
384 if ( pastix_comm_tid == (pthread_t)-1 ) {
385 pastix_comm_tid = tid;
387 pthread_mutex_unlock( &pastix_comm_lock );
389 if ( tid != pastix_comm_tid ) {
403 pastix_atomic_lock( &(solvmtx->
reqlock) );
404 nbrequest = solvmtx->
reqnum;
405 pastix_atomic_unlock( &(solvmtx->
reqlock) );
407 while( (outcount > 0) && (nbrequest > 0) )
409 MPI_Testsome( nbrequest, solvmtx->
reqtab, &outcount, indexes, statuses );
413 if ( outcount > 0 ) {
414 nbfree = cpucblk_drequest_rhs_bwd_handle( enums, solvmtx, rhsb, threadid,
415 outcount, indexes, statuses );
422 pastix_atomic_lock( &(solvmtx->
reqlock) );
424 cpucblk_dupdate_reqtab( solvmtx );
426 nbrequest = solvmtx->
reqnum;
427 pastix_atomic_unlock( &(solvmtx->
reqlock) );
430 pastix_comm_tid = (pthread_t)-1;
471 #if defined(PASTIX_WITH_MPI)
472 if ( cblk->
cblktype & CBLK_FANIN ) {
487 cpucblk_dmpi_rhs_bwd_progress( enums, solvmtx, rhsb, rank );
490 assert( !(cblk->
cblktype & (CBLK_FANIN | CBLK_RECV)) );
491 do { pastix_yield(); }
while( cblk->
ctrbcnt > 0 );
537 ctrbcnt = pastix_atomic_dec_32b( &(fcbk->
ctrbcnt) );
539 #if defined(PASTIX_WITH_MPI)
541 cpucblk_disend_rhs_bwd( solvmtx, rhsb, fcbk );
547 if ( solvmtx->computeQueue ) {
596 #if defined(PASTIX_WITH_MPI)
600 int reqnbr = solvmtx->
reqnum;
603 #if defined(PASTIX_DEBUG_MPI)
604 fprintf( stderr,
"[%2d] Wait for all pending communications\n",
608 for ( i=0; i<reqnbr; i++ ) {
609 if ( solvmtx->
reqtab[i] == MPI_REQUEST_NULL ) {
616 assert( solvmtx->
reqidx[i] != -1 );
618 rc = MPI_Wait( solvmtx->
reqtab + i, &status );
619 assert( rc == MPI_SUCCESS );
624 assert( cblk->
cblktype & CBLK_RECV );
626 cpucblk_drequest_rhs_bwd_handle_send( enums, solvmtx, rhsb, cblk );
630 assert( solvmtx->
reqnum == 0 );
BEGIN_C_DECLS typedef int pastix_int_t
static void pqueuePush1(pastix_queue_t *q, pastix_int_t elt, double key1)
Push an element with a single key.
void cpucblk_drequest_rhs_bwd_cleanup(const args_solve_t *enums, pastix_int_t sched, SolverMatrix *solvmtx, pastix_rhs_t rhsb)
Waitall routine for current cblk request.
int cpucblk_dincoming_rhs_bwd_deps(int rank, const args_solve_t *enums, SolverMatrix *solvmtx, SolverCblk *cblk, pastix_rhs_t rhsb)
Wait for incoming dependencies, and return when cblk->ctrbcnt has reached 0.
void cpucblk_drelease_rhs_bwd_deps(const args_solve_t *enums, SolverMatrix *solvmtx, pastix_rhs_t rhsb, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
void solve_cblk_dtrsmsp_backward(const args_solve_t *enums, SolverMatrix *datacode, SolverCblk *cblk, pastix_rhs_t rhsb)
Apply a backward solve related to one cblk to all the right hand side.
Main PaStiX RHS structure.
pastix_atomic_lock_t reqlock
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
SolverBlok *restrict bloktab
pastix_int_t *restrict browtab
SolverCblk *restrict cblktab
Solver column block structure.
Solver column block structure.