20 #include "common/common.h"
52 #if defined(PASTIX_WITH_MPI)
59 assert( colnbr <= solvmtx->colmax );
60 assert( cblk->
cblktype & CBLK_FANIN );
61 assert( rhsb->
cblkb[ idx ] != NULL );
63 #if defined (PASTIX_DEBUG_MPI)
64 fprintf( stderr,
"[%2d] RHS Fwd: Send cblk %ld to %2d at index %ld of size %ld\n",
66 (
long)cblk->
lcolidx, (
long)colnbr );
72 rc = MPI_Send( b, size, PASTIX_MPI_COMPLEX32,
74 assert( rc == MPI_SUCCESS );
76 memFree_null( rhsb->
cblkb[ idx ] );
109 #if defined(PASTIX_WITH_MPI)
115 assert( colnbr <= solvmtx->colmax );
116 assert( cblk->
cblktype & CBLK_RECV );
117 assert( rhsb->
cblkb[ idx ] == NULL );
119 #if defined (PASTIX_DEBUG_MPI)
120 fprintf( stderr,
"[%2d] RHS Bwd: Send cblk %ld to %2d at index %ld of size %ld\n",
122 (
long)cblk->
lcolidx, (
long)colnbr );
128 rc = LAPACKE_clacpy_work( LAPACK_COL_MAJOR,
'A', colnbr, rhsb->
n, b, rhsb->
ld, rhsb->
cblkb[ idx ], colnbr );
130 b = rhsb->
cblkb[ idx ];
133 rc = MPI_Send( b, colnbr * rhsb->
n, PASTIX_MPI_COMPLEX32,
135 assert( rc == MPI_SUCCESS );
138 memFree_null( rhsb->
cblkb[ idx ] );
172 #if defined(PASTIX_WITH_MPI)
178 assert( colnbr <= solvmtx->colmax );
179 assert( cblk->
cblktype & CBLK_FANIN );
181 #if defined (PASTIX_DEBUG_MPI)
182 fprintf( stderr,
"[%2d] RHS Bwd: Recv cblk %ld from %ld at index %ld of size %ld\n",
184 (
long)cblk->
lcolidx, (
long)colnbr );
187 assert( rhsb->
cblkb[ idx ] == NULL );
190 rc = MPI_Recv( rhsb->
cblkb[ idx ], colnbr * rhsb->
n, PASTIX_MPI_COMPLEX32,
192 assert( rc == MPI_SUCCESS );
194 #if defined (PASTIX_DEBUG_MPI)
195 fprintf( stderr,
"[%2d] RHS Bwd: Received cblk %ld from %2d\n",
196 solvmtx->clustnum, (
long)cblk->
gcblknum, status.MPI_SOURCE );
234 #if defined(PASTIX_WITH_MPI)
240 assert( colnbr <= solvmtx->colmax );
241 assert( cblk->
cblktype & CBLK_RECV );
243 #if defined (PASTIX_DEBUG_MPI)
244 fprintf( stderr,
"[%2d] RHS Fwd: Recv cblk %ld from %ld at index %ld of size %ld\n",
246 (
long)cblk->
lcolidx, (
long)colnbr );
249 rc = MPI_Recv( work, colnbr * rhsb->
n, PASTIX_MPI_COMPLEX32,
251 assert( rc == MPI_SUCCESS );
253 #if defined (PASTIX_DEBUG_MPI)
254 fprintf( stderr,
"[%2d] RHS Fwd: Received cblk %ld from %2d\n",
255 solvmtx->clustnum, (
long)cblk->
gcblknum, status.MPI_SOURCE );
272 #if defined( PASTIX_WITH_MPI )
303 assert( colnbr <= solvmtx->colmax );
304 assert( cblk->
cblktype & CBLK_FANIN );
305 assert( rhsb->
cblkb[ idx ] != NULL );
307 #if defined(PASTIX_DEBUG_MPI)
308 fprintf( stderr,
"[%2d] RHS Fwd: Post Isend cblk %ld to %2d at index %ld of size %ld\n",
316 rc = MPI_Isend( b, size, PASTIX_MPI_COMPLEX32, cblk->
ownerid, cblk->
gcblknum,
317 solvmtx->solv_comm, &request );
318 assert( rc == MPI_SUCCESS );
323 pastix_atomic_lock( &(solvmtx->
reqlock) );
326 assert( solvmtx->
reqnum >= 0 );
333 pastix_atomic_unlock( &(solvmtx->
reqlock) );
360 cpucblk_crequest_rhs_fwd_handle_send(
const args_solve_t *enums,
367 assert( cblk->
cblktype & CBLK_FANIN );
368 assert( enums->solve_step == PastixSolveForward );
370 #if defined(PASTIX_DEBUG_MPI)
374 fprintf( stderr,
"[%2d] RHS Fwd: Isend for cblk %ld toward %2d ( %ld Bytes ) (DONE)\n",
375 solvmtx->clustnum, (
long)cblk->
gcblknum, cblk->
ownerid, (
long)cblksize );
379 memFree_null( rhsb->
cblkb[ idx ] );
412 cpucblk_crequest_rhs_fwd_handle_recv(
const args_solve_t *enums,
416 const MPI_Status *status,
421 int src = status->MPI_SOURCE;
422 int tag = status->MPI_TAG;
425 assert( ( 0 <= src ) && ( src < solvmtx->clustnbr ) );
426 assert( ( 0 <= tag ) && ( tag < solvmtx->gcblknbr ) );
435 while( cblk->
ownerid != src ) {
437 assert( cblk >= solvmtx->
cblktab );
439 assert( cblk->
cblktype & CBLK_RECV );
444 #if defined(PASTIX_DEBUG_MPI)
450 rc = MPI_Get_count( status, MPI_CHAR, &count );
451 assert( rc == MPI_SUCCESS );
452 assert( count == size );
455 fprintf( stderr,
"[%2d] RHS Fwd : recv of size %d/%ld for cblk %ld (DONE)\n",
456 solvmtx->clustnum, count, (
long)size, (
long)cblk->
gcblknum );
466 pastix_cblk_lock( fcbk );
470 pastix_cblk_unlock( fcbk );
477 memFree_null( recvbuf );
518 cpucblk_crequest_rhs_fwd_handle(
const args_solve_t *enums,
524 const MPI_Status *statuses )
527 int nbrequest = outcount;
529 for ( i = 0; i < outcount; i++ ) {
535 if ( solvmtx->
reqidx[reqid] == -1 ) {
541 memcpy( &status, statuses + i,
sizeof(MPI_Status) );
542 MPI_Get_count( &status, PASTIX_MPI_COMPLEX32, &size );
550 assert( solvmtx->
recvcnt >= 0 );
552 MPI_Start( solvmtx->
reqtab + reqid );
556 MPI_Request_free( solvmtx->
reqtab + reqid );
557 solvmtx->
reqtab[reqid] = MPI_REQUEST_NULL;
560 cpucblk_crequest_rhs_fwd_handle_recv( enums, solvmtx, rhsb,
561 threadid, &status, recvbuf );
568 assert( cblk->
cblktype & CBLK_FANIN );
570 cpucblk_crequest_rhs_fwd_handle_send( enums, solvmtx, rhsb, cblk );
573 solvmtx->
reqidx[ reqid ] = -1;
609 cpucblk_cmpi_rhs_fwd_progress(
const args_solve_t *enums,
614 pthread_t tid = pthread_self();
616 int nbrequest, nbfree;
617 int indexes[ solvmtx->
reqnbr ];
618 MPI_Status statuses[ solvmtx->
reqnbr ];
621 pthread_mutex_lock( &pastix_comm_lock );
622 if ( pastix_comm_tid == (pthread_t)-1 ) {
623 pastix_comm_tid = tid;
625 pthread_mutex_unlock( &pastix_comm_lock );
627 if ( tid != pastix_comm_tid ) {
641 pastix_atomic_lock( &(solvmtx->
reqlock) );
642 nbrequest = solvmtx->
reqnum;
643 pastix_atomic_unlock( &(solvmtx->
reqlock) );
645 while( (outcount > 0) && (nbrequest > 0) )
647 MPI_Testsome( nbrequest, solvmtx->
reqtab, &outcount, indexes, statuses );
651 if ( outcount > 0 ) {
652 nbfree = cpucblk_crequest_rhs_fwd_handle( enums, solvmtx, rhsb, threadid,
653 outcount, indexes, statuses );
660 pastix_atomic_lock( &(solvmtx->
reqlock) );
662 cpucblk_cupdate_reqtab( solvmtx );
664 nbrequest = solvmtx->
reqnum;
665 pastix_atomic_unlock( &(solvmtx->
reqlock) );
668 pastix_comm_tid = (pthread_t)-1;
709 #if defined(PASTIX_WITH_MPI)
710 if ( cblk->
cblktype & CBLK_FANIN ) {
725 cpucblk_cmpi_rhs_fwd_progress( enums, solvmtx, rhsb, rank );
728 assert( !(cblk->
cblktype & (CBLK_FANIN | CBLK_RECV)) );
729 do { pastix_yield(); }
while( cblk->
ctrbcnt > 0 );
772 ctrbcnt = pastix_atomic_dec_32b( &(fcbk->
ctrbcnt) );
774 #if defined(PASTIX_WITH_MPI)
775 if ( ( fcbk->
cblktype & CBLK_FANIN ) &&
776 ( enums->solve_step == PastixSolveForward ) ) {
777 cpucblk_cisend_rhs_fwd( solvmtx, rhsb, fcbk );
784 if ( solvmtx->computeQueue ) {
832 #if defined(PASTIX_WITH_MPI)
836 int reqnbr = solvmtx->
reqnum;
839 #if defined(PASTIX_DEBUG_MPI)
840 fprintf( stderr,
"[%2d] Wait for all pending communications\n",
844 for ( i=0; i<reqnbr; i++ ) {
845 if ( solvmtx->
reqtab[i] == MPI_REQUEST_NULL ) {
852 assert( solvmtx->
reqidx[i] != -1 );
854 rc = MPI_Wait( solvmtx->
reqtab + i, &status );
855 assert( rc == MPI_SUCCESS );
860 assert( cblk->
cblktype & CBLK_FANIN );
862 cpucblk_crequest_rhs_fwd_handle_send( enums, solvmtx, rhsb, cblk );
866 assert( solvmtx->
reqnum == 0 );
BEGIN_C_DECLS typedef int pastix_int_t
float _Complex pastix_complex32_t
static void pqueuePush1(pastix_queue_t *q, pastix_int_t elt, double key1)
Push an element with a single key.
int core_cgeadd(pastix_trans_t trans, pastix_int_t M, pastix_int_t N, pastix_complex32_t alpha, const pastix_complex32_t *A, pastix_int_t LDA, pastix_complex32_t beta, pastix_complex32_t *B, pastix_int_t LDB)
Add two matrices together.
void cpucblk_crelease_rhs_fwd_deps(const args_solve_t *enums, SolverMatrix *solvmtx, pastix_rhs_t rhsb, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
void cpucblk_crequest_rhs_fwd_cleanup(const args_solve_t *enums, pastix_int_t sched, SolverMatrix *solvmtx, pastix_rhs_t rhsb)
Waitall routine for current cblk request.
int cpucblk_cincoming_rhs_fwd_deps(int rank, const args_solve_t *enums, SolverMatrix *solvmtx, SolverCblk *cblk, pastix_rhs_t rhsb)
Wait for incoming dependencies, and return when cblk->ctrbcnt has reached 0.
void cpucblk_crecv_rhs_forward(const SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex32_t *work, pastix_rhs_t rhsb)
Receive the rhs associated to a cblk->lcolidx to the remote node.
void cpucblk_csend_rhs_backward(const SolverMatrix *solvmtx, SolverCblk *cblk, pastix_rhs_t rhsb)
Send the rhs associated to a cblk->lcolidx to the remote node.
void cpucblk_csend_rhs_forward(const SolverMatrix *solvmtx, SolverCblk *cblk, pastix_rhs_t rhsb)
Send the rhs associated to a cblk->lcolidx to the remote node.
void cpucblk_crecv_rhs_backward(const SolverMatrix *solvmtx, SolverCblk *cblk, pastix_rhs_t rhsb)
Receive the rhs associated to a cblk->lcolidx to the remote node.
Main PaStiX RHS structure.
pastix_atomic_lock_t reqlock
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
SolverCblk *restrict cblktab
Solver column block structure.
Solver column block structure.