20 #include "common/common.h"
29 #if defined( PASTIX_WITH_MPI )
60 assert( cblk->
cblktype & CBLK_FANIN );
66 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
76 #if defined(PASTIX_DEBUG_MPI)
77 fprintf( stderr,
"[%2d] Post Isend for cblk %ld toward %2d ( %ld Bytes )\n",
81 rc = MPI_Isend( buffer, bufsize, MPI_CHAR,
83 assert( rc == MPI_SUCCESS );
85 solverCommMatrixAdd( solvmtx, cblk->
ownerid, bufsize );
88 pastix_atomic_lock( &(solvmtx->
reqlock) );
91 assert( solvmtx->
reqnum >= 0 );
98 pastix_atomic_unlock( &(solvmtx->
reqlock) );
128 assert( cblk->
cblktype & CBLK_FANIN );
130 #if defined(PASTIX_DEBUG_MPI)
133 cblksize = cblksize *
cblk_colnbr( cblk ) * cblk->
stride *
sizeof(pastix_complex64_t);
135 fprintf( stderr,
"[%2d] Isend for cblk %ld toward %2d ( %ld Bytes ) (DONE)\n",
136 solvmtx->clustnum, (
long)cblk->
gcblknum, cblk->
ownerid, (
long)cblksize );
140 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
187 const MPI_Status *status,
191 int src = status->MPI_SOURCE;
192 int tag = status->MPI_TAG;
194 assert( ( 0 <= src ) && ( src < solvmtx->clustnbr ) );
195 assert( ( 0 <= tag ) && ( tag < solvmtx->gcblknbr ) );
204 while( cblk->
ownerid != src ) {
206 assert( cblk >= solvmtx->
cblktab );
208 assert( cblk->
cblktype & CBLK_RECV );
212 #if defined(PASTIX_DEBUG_MPI)
222 rc = MPI_Get_count( status, MPI_CHAR, &count );
223 assert( rc == MPI_SUCCESS );
224 assert( (cblk->
cblktype & CBLK_COMPRESSED) ||
225 (!(cblk->
cblktype & CBLK_COMPRESSED) && (count == size)) );
228 fprintf( stderr,
"[%2d] Irecv of size %d/%ld for cblk %ld (DONE)\n",
229 solvmtx->clustnum, count, (
long)size, (
long)cblk->
gcblknum );
295 const MPI_Status *statuses )
298 int nbrequest = outcount;
300 for( i = 0; i < outcount; i++ ){
306 if ( solvmtx->
reqidx[reqid] == -1 ) {
312 memcpy( &status, statuses + i,
sizeof(MPI_Status) );
313 MPI_Get_count( &status, MPI_CHAR, &size );
315 MALLOC_INTERN( recvbuf, size,
char );
316 memcpy( recvbuf, solvmtx->
rcoeftab, size );
321 assert( solvmtx->
recvcnt >= 0 );
323 MPI_Start( solvmtx->
reqtab + reqid );
327 MPI_Request_free( solvmtx->
reqtab + reqid );
328 solvmtx->
reqtab[reqid] = MPI_REQUEST_NULL;
331 cpucblk_zrequest_handle_recv( side, solvmtx, threadid,
339 assert( cblk->
cblktype & CBLK_FANIN );
341 cpucblk_zrequest_handle_fanin( side, solvmtx, cblk );
344 solvmtx->
reqidx[ reqid ] = -1;
368 MPI_Request *outrequest = solvmtx->
reqtab;
373 MPI_Request *inrequest = solvmtx->
reqtab;
378 while( (outreqnbr < solvmtx->reqnum) &&
379 (*outrequest != MPI_REQUEST_NULL) )
386 inrequest = outrequest;
387 inreqloc = outreqloc;
388 inreqnbr = outreqnbr;
389 for( ; inreqnbr < solvmtx->
reqnum;
390 inreqnbr++, inrequest++, inreqloc++ )
392 if ( *inrequest == MPI_REQUEST_NULL )
398 *outrequest = *inrequest;
399 *outreqloc = *inreqloc;
412 #if defined(PASTIX_DEBUG_MPI)
414 for( i = outreqnbr; i < solvmtx->
reqnbr; i++ )
416 solvmtx->
reqtab[i] = MPI_REQUEST_NULL;
419 assert( outreqnbr < solvmtx->reqnum );
420 solvmtx->
reqnum = outreqnbr;
453 pthread_t tid = pthread_self();
455 int nbrequest, nbfree;
456 int indexes[ solvmtx->
reqnbr ];
457 MPI_Status statuses[ solvmtx->
reqnbr ];
460 pthread_mutex_lock( &pastix_comm_lock );
461 if ( pastix_comm_tid == (pthread_t)-1 ) {
462 pastix_comm_tid = tid;
464 pthread_mutex_unlock( &pastix_comm_lock );
466 if ( tid != pastix_comm_tid ) {
480 pastix_atomic_lock( &(solvmtx->
reqlock) );
481 nbrequest = solvmtx->
reqnum;
482 pastix_atomic_unlock( &(solvmtx->
reqlock) );
484 while( (outcount > 0) && (nbrequest > 0) )
486 MPI_Testsome( nbrequest, solvmtx->
reqtab, &outcount, indexes, statuses );
490 if ( outcount > 0 ) {
491 nbfree = cpucblk_zrequest_handle( side, solvmtx, threadid,
492 outcount, indexes, statuses );
499 pastix_atomic_lock( &(solvmtx->
reqlock) );
501 cpucblk_zupdate_reqtab( solvmtx );
503 nbrequest = solvmtx->
reqnum;
504 pastix_atomic_unlock( &(solvmtx->
reqlock) );
507 pastix_comm_tid = (pthread_t)-1;
546 #if defined(PASTIX_WITH_MPI)
547 if ( cblk->
cblktype & CBLK_FANIN ) {
562 cpucblk_zmpi_progress( side, solvmtx, rank );
565 assert( !(cblk->
cblktype & (CBLK_FANIN | CBLK_RECV)) );
566 do { pastix_yield(); }
while( cblk->
ctrbcnt > 0 );
606 ctrbcnt = pastix_atomic_dec_32b( &(fcbk->
ctrbcnt) );
608 #if defined(PASTIX_WITH_MPI)
609 if ( fcbk->
cblktype & CBLK_FANIN ) {
610 cpucblk_zisend( side, solvmtx, fcbk );
616 if ( solvmtx->computeQueue ) {
662 #if defined(PASTIX_WITH_MPI)
666 int reqnbr = solvmtx->
reqnum;
669 #if defined(PASTIX_DEBUG_MPI)
670 fprintf( stderr,
"[%2d] Wait for all pending communications\n",
674 for( i=0; i<reqnbr; i++ )
676 if ( solvmtx->
reqtab[i] == MPI_REQUEST_NULL ) {
683 assert( solvmtx->
reqidx[i] != -1 );
685 rc = MPI_Wait( solvmtx->
reqtab + i, &status );
686 assert( rc == MPI_SUCCESS );
691 assert( cblk->
cblktype & CBLK_FANIN );
693 cpucblk_zrequest_handle_fanin( side, solvmtx, cblk );
697 assert( solvmtx->
reqnum == 0 );
size_t cpucblk_zcompute_size(pastix_coefside_t side, const SolverCblk *cblk)
Compute the size of the buffer to send.
void cpucblk_zunpack(pastix_coefside_t side, SolverCblk *cblk, void *buffer)
Unpack data and fill the column block concerned by the computation.
void * cpucblk_zpack(pastix_coefside_t side, SolverCblk *cblk, size_t size)
Pack a column block (Full rank or low rank).
BEGIN_C_DECLS typedef int pastix_int_t
static void pqueuePush1(pastix_queue_t *q, pastix_int_t elt, double key1)
Push an element with a single key.
int cpucblk_zincoming_deps(int rank, pastix_coefside_t side, SolverMatrix *solvmtx, SolverCblk *cblk)
Wait for incoming dependencies, and return when cblk->ctrbcnt has reached 0.
void cpucblk_zrelease_deps(pastix_coefside_t side, SolverMatrix *solvmtx, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
pastix_fixdbl_t cpucblk_zadd(pastix_coefside_t side, double alpha, const SolverCblk *cblkA, SolverCblk *cblkB, const pastix_lr_t *lowrank)
Add two column bloks in full rank format.
void cpucblk_zfree(pastix_coefside_t side, SolverCblk *cblk)
Free the cblk structure that store the coefficient.
void cpucblk_zrequest_cleanup(pastix_coefside_t side, pastix_int_t sched, SolverMatrix *solvmtx)
Waitall routine for current cblk request.
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
pastix_atomic_lock_t reqlock
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
SolverCblk *restrict cblktab
Solver column block structure.
Solver column block structure.