20 #include "common/common.h"
29 #if defined( PASTIX_WITH_MPI )
52 SolverMatrix *solvmtx,
60 assert( cblk->
cblktype & CBLK_FANIN );
66 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
76 #if defined(PASTIX_DEBUG_MPI)
77 fprintf( stderr,
"[%2d] Post Isend for cblk %ld toward %2d ( %ld Bytes )\n",
81 rc = MPI_Isend( buffer, bufsize, MPI_CHAR,
83 assert( rc == MPI_SUCCESS );
85 solverCommMatrixAdd( solvmtx, cblk->
ownerid, bufsize );
88 pastix_atomic_lock( &(solvmtx->reqlock) );
90 assert( solvmtx->reqidx[ solvmtx->reqnum ] == -1 );
91 assert( solvmtx->reqnum >= 0 );
92 assert( solvmtx->reqnum < solvmtx->reqnbr );
94 solvmtx->reqtab[ solvmtx->reqnum ] = request;
95 solvmtx->reqidx[ solvmtx->reqnum ] = cblk - solvmtx->cblktab;
98 pastix_atomic_unlock( &(solvmtx->reqlock) );
125 const SolverMatrix *solvmtx,
128 assert( cblk->
cblktype & CBLK_FANIN );
130 #if defined(PASTIX_DEBUG_MPI)
133 cblksize = cblksize *
cblk_colnbr( cblk ) * cblk->
stride *
sizeof(pastix_complex64_t);
135 fprintf( stderr,
"[%2d] Isend for cblk %ld toward %2d ( %ld Bytes ) (DONE)\n",
136 solvmtx->clustnum, (
long)cblk->
gcblknum, cblk->
ownerid, (
long)cblksize );
140 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
185 SolverMatrix *solvmtx,
187 const MPI_Status *status,
191 int src = status->MPI_SOURCE;
192 int tag = status->MPI_TAG;
194 assert( ( 0 <= src ) && ( src < solvmtx->clustnbr ) );
195 assert( ( 0 <= tag ) && ( tag < solvmtx->gcblknbr ) );
200 fcbk = solvmtx->cblktab + solvmtx->gcbl2loc[ tag ];
204 while( cblk->
ownerid != src ) {
206 assert( cblk >= solvmtx->cblktab );
208 assert( cblk->
cblktype & CBLK_RECV );
212 #if defined(PASTIX_DEBUG_MPI)
216 pastix_int_t size = (
cblk_colnbr(cblk) * cblk->
stride) *
sizeof(pastix_complex64_t);
222 rc = MPI_Get_count( status, MPI_CHAR, &count );
223 assert( rc == MPI_SUCCESS );
224 assert( (cblk->
cblktype & CBLK_COMPRESSED) ||
225 (!(cblk->
cblktype & CBLK_COMPRESSED) && (count == size)) );
228 fprintf( stderr,
"[%2d] Irecv of size %d/%ld for cblk %ld (DONE)\n",
229 solvmtx->clustnum, count, (
long)size, (
long)cblk->
gcblknum );
291 SolverMatrix *solvmtx,
295 const MPI_Status *statuses )
297 pastix_int_t i, reqid;
298 int nbrequest = outcount;
300 for( i = 0; i < outcount; i++ ){
306 if ( solvmtx->reqidx[reqid] == -1 ) {
312 memcpy( &status, statuses + i,
sizeof(MPI_Status) );
313 MPI_Get_count( &status, MPI_CHAR, &size );
315 MALLOC_INTERN( recvbuf, size,
char );
316 memcpy( recvbuf, solvmtx->rcoeftab, size );
321 assert( solvmtx->recvcnt >= 0 );
322 if ( solvmtx->recvcnt > 0 ) {
323 MPI_Start( solvmtx->reqtab + reqid );
327 MPI_Request_free( solvmtx->reqtab + reqid );
328 solvmtx->reqtab[reqid] = MPI_REQUEST_NULL;
331 cpucblk_zrequest_handle_recv( side, solvmtx, threadid,
338 SolverCblk *cblk = solvmtx->cblktab + solvmtx->reqidx[ reqid ];
339 assert( cblk->
cblktype & CBLK_FANIN );
341 cpucblk_zrequest_handle_fanin( side, solvmtx, cblk );
344 solvmtx->reqidx[ reqid ] = -1;
365 cpucblk_zupdate_reqtab( SolverMatrix *solvmtx )
368 MPI_Request *outrequest = solvmtx->reqtab;
369 pastix_int_t *outreqloc = solvmtx->reqidx;
373 MPI_Request *inrequest = solvmtx->reqtab;
374 pastix_int_t *inreqloc = solvmtx->reqidx;
378 while( (outreqnbr < solvmtx->reqnum) &&
379 (*outrequest != MPI_REQUEST_NULL) )
386 inrequest = outrequest;
387 inreqloc = outreqloc;
388 inreqnbr = outreqnbr;
389 for( ; inreqnbr < solvmtx->reqnum;
390 inreqnbr++, inrequest++, inreqloc++ )
392 if ( *inrequest == MPI_REQUEST_NULL )
398 *outrequest = *inrequest;
399 *outreqloc = *inreqloc;
409 memset( outreqloc, 0xff, (solvmtx->reqnbr - outreqnbr) *
sizeof(pastix_int_t) );
412 #if defined(PASTIX_DEBUG_MPI)
414 for( i = outreqnbr; i < solvmtx->reqnbr; i++ )
416 solvmtx->reqtab[i] = MPI_REQUEST_NULL;
419 assert( outreqnbr < solvmtx->reqnum );
420 solvmtx->reqnum = outreqnbr;
450 SolverMatrix *solvmtx,
453 pthread_t tid = pthread_self();
455 int nbrequest, nbfree;
456 int indexes[ solvmtx->reqnbr ];
457 MPI_Status statuses[ solvmtx->reqnbr ];
460 pthread_mutex_lock( &pastix_comm_lock );
461 if ( pastix_comm_tid == (pthread_t)-1 ) {
462 pastix_comm_tid = tid;
464 pthread_mutex_unlock( &pastix_comm_lock );
466 if ( tid != pastix_comm_tid ) {
480 pastix_atomic_lock( &(solvmtx->reqlock) );
481 nbrequest = solvmtx->reqnum;
482 pastix_atomic_unlock( &(solvmtx->reqlock) );
484 while( (outcount > 0) && (nbrequest > 0) )
486 MPI_Testsome( nbrequest, solvmtx->reqtab, &outcount, indexes, statuses );
490 if ( outcount > 0 ) {
491 nbfree = cpucblk_zrequest_handle( side, solvmtx, threadid,
492 outcount, indexes, statuses );
499 pastix_atomic_lock( &(solvmtx->reqlock) );
501 cpucblk_zupdate_reqtab( solvmtx );
503 nbrequest = solvmtx->reqnum;
504 pastix_atomic_unlock( &(solvmtx->reqlock) );
507 pastix_comm_tid = -1;
543 SolverMatrix *solvmtx,
546 #if defined(PASTIX_WITH_MPI)
547 if ( cblk->
cblktype & CBLK_FANIN ) {
562 cpucblk_zmpi_progress( side, solvmtx, rank );
565 assert( !(cblk->
cblktype & (CBLK_FANIN | CBLK_RECV)) );
566 do { pastix_yield(); }
while( cblk->
ctrbcnt > 0 );
601 SolverMatrix *solvmtx,
606 ctrbcnt = pastix_atomic_dec_32b( &(fcbk->
ctrbcnt) );
608 #if defined(PASTIX_WITH_MPI)
609 if ( fcbk->
cblktype & CBLK_FANIN ) {
610 cpucblk_zisend( side, solvmtx, fcbk );
616 if ( solvmtx->computeQueue ) {
653 SolverMatrix *solvmtx )
661 #if defined(PASTIX_WITH_MPI)
665 int reqnbr = solvmtx->reqnum;
668 #if defined(PASTIX_DEBUG_MPI)
669 fprintf( stderr,
"[%2d] Wait for all pending communications\n",
673 for( i=0; i<reqnbr; i++ )
675 if ( solvmtx->reqtab[i] == MPI_REQUEST_NULL ) {
682 assert( solvmtx->reqidx[i] != -1 );
684 rc = MPI_Wait( solvmtx->reqtab + i, &status );
685 assert( rc == MPI_SUCCESS );
687 cblk = solvmtx->cblktab + solvmtx->reqidx[i];
690 assert( cblk->
cblktype & CBLK_FANIN );
692 cpucblk_zrequest_handle_fanin( side, solvmtx, cblk );
696 assert( solvmtx->reqnum == 0 );
size_t cpucblk_zcompute_size(pastix_coefside_t side, const SolverCblk *cblk)
Compute the size of the buffer to send.
void cpucblk_zunpack(pastix_coefside_t side, SolverCblk *cblk, void *buffer)
Unpack data and fill the column block concerned by the computation.
void * cpucblk_zpack(pastix_coefside_t side, SolverCblk *cblk, size_t size)
Pack a column block (Full rank or low rank).
static void pqueuePush1(pastix_queue_t *q, pastix_int_t elt, double key1)
Push an element with a single key.
void cpucblk_zadd(pastix_coefside_t side, double alpha, const SolverCblk *cblkA, SolverCblk *cblkB, const pastix_lr_t *lowrank)
Add two column bloks in full rank format.
int cpucblk_zincoming_deps(int rank, pastix_coefside_t side, SolverMatrix *solvmtx, SolverCblk *cblk)
Wait for incoming dependencies, and return when cblk->ctrbcnt has reached 0.
void cpucblk_zrelease_deps(pastix_coefside_t side, SolverMatrix *solvmtx, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
void cpucblk_zfree(pastix_coefside_t side, SolverCblk *cblk)
Free the cblk structure that store the coefficient.
void cpucblk_zrequest_cleanup(pastix_coefside_t side, pastix_int_t sched, SolverMatrix *solvmtx)
Waitall routine for current cblk request.
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Solver column block structure.