22 #include "common/common.h"
31 #if defined( PASTIX_WITH_MPI )
62 assert( cblk->
cblktype & CBLK_FANIN );
68 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
78 #if defined(PASTIX_DEBUG_MPI)
79 fprintf( stderr,
"[%2d] Post Isend for cblk %ld toward %2d ( %ld Bytes )\n",
83 rc = MPI_Isend( buffer, bufsize, MPI_CHAR,
85 assert( rc == MPI_SUCCESS );
87 solverCommMatrixAdd( solvmtx, cblk->
ownerid, bufsize );
90 pastix_atomic_lock( &(solvmtx->
reqlock) );
93 assert( solvmtx->
reqnum >= 0 );
100 pastix_atomic_unlock( &(solvmtx->
reqlock) );
130 assert( cblk->
cblktype & CBLK_FANIN );
132 #if defined(PASTIX_DEBUG_MPI)
137 fprintf( stderr,
"[%2d] Isend for cblk %ld toward %2d ( %ld Bytes ) (DONE)\n",
138 solvmtx->clustnum, (
long)cblk->
gcblknum, cblk->
ownerid, (
long)cblksize );
142 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
189 const MPI_Status *status,
193 int src = status->MPI_SOURCE;
194 int tag = status->MPI_TAG;
196 assert( ( 0 <= src ) && ( src < solvmtx->clustnbr ) );
197 assert( ( 0 <= tag ) && ( tag < solvmtx->gcblknbr ) );
206 while( cblk->
ownerid != src ) {
208 assert( cblk >= solvmtx->
cblktab );
210 assert( cblk->
cblktype & CBLK_RECV );
214 #if defined(PASTIX_DEBUG_MPI)
224 rc = MPI_Get_count( status, MPI_CHAR, &count );
225 assert( rc == MPI_SUCCESS );
226 assert( (cblk->
cblktype & CBLK_COMPRESSED) ||
227 (!(cblk->
cblktype & CBLK_COMPRESSED) && (count == size)) );
230 fprintf( stderr,
"[%2d] Irecv of size %d/%ld for cblk %ld (DONE)\n",
231 solvmtx->clustnum, count, (
long)size, (
long)cblk->
gcblknum );
297 const MPI_Status *statuses )
300 int nbrequest = outcount;
302 for( i = 0; i < outcount; i++ ){
308 if ( solvmtx->
reqidx[reqid] == -1 ) {
314 memcpy( &status, statuses + i,
sizeof(MPI_Status) );
315 MPI_Get_count( &status, MPI_CHAR, &size );
317 MALLOC_INTERN( recvbuf, size,
char );
318 memcpy( recvbuf, solvmtx->
rcoeftab, size );
323 assert( solvmtx->
recvcnt >= 0 );
325 MPI_Start( solvmtx->
reqtab + reqid );
329 MPI_Request_free( solvmtx->
reqtab + reqid );
330 solvmtx->
reqtab[reqid] = MPI_REQUEST_NULL;
333 cpucblk_srequest_handle_recv( side, solvmtx, threadid,
341 assert( cblk->
cblktype & CBLK_FANIN );
343 cpucblk_srequest_handle_fanin( side, solvmtx, cblk );
346 solvmtx->
reqidx[ reqid ] = -1;
370 MPI_Request *outrequest = solvmtx->
reqtab;
375 MPI_Request *inrequest = solvmtx->
reqtab;
380 while( (outreqnbr < solvmtx->reqnum) &&
381 (*outrequest != MPI_REQUEST_NULL) )
388 inrequest = outrequest;
389 inreqloc = outreqloc;
390 inreqnbr = outreqnbr;
391 for( ; inreqnbr < solvmtx->
reqnum;
392 inreqnbr++, inrequest++, inreqloc++ )
394 if ( *inrequest == MPI_REQUEST_NULL )
400 *outrequest = *inrequest;
401 *outreqloc = *inreqloc;
414 #if defined(PASTIX_DEBUG_MPI)
416 for( i = outreqnbr; i < solvmtx->
reqnbr; i++ )
418 solvmtx->
reqtab[i] = MPI_REQUEST_NULL;
421 assert( outreqnbr < solvmtx->reqnum );
422 solvmtx->
reqnum = outreqnbr;
455 pthread_t tid = pthread_self();
457 int nbrequest, nbfree;
458 int indexes[ solvmtx->
reqnbr ];
459 MPI_Status statuses[ solvmtx->
reqnbr ];
462 pthread_mutex_lock( &pastix_comm_lock );
463 if ( pastix_comm_tid == (pthread_t)-1 ) {
464 pastix_comm_tid = tid;
466 pthread_mutex_unlock( &pastix_comm_lock );
468 if ( tid != pastix_comm_tid ) {
482 pastix_atomic_lock( &(solvmtx->
reqlock) );
483 nbrequest = solvmtx->
reqnum;
484 pastix_atomic_unlock( &(solvmtx->
reqlock) );
486 while( (outcount > 0) && (nbrequest > 0) )
488 MPI_Testsome( nbrequest, solvmtx->
reqtab, &outcount, indexes, statuses );
492 if ( outcount > 0 ) {
493 nbfree = cpucblk_srequest_handle( side, solvmtx, threadid,
494 outcount, indexes, statuses );
501 pastix_atomic_lock( &(solvmtx->
reqlock) );
503 cpucblk_supdate_reqtab( solvmtx );
505 nbrequest = solvmtx->
reqnum;
506 pastix_atomic_unlock( &(solvmtx->
reqlock) );
509 pastix_comm_tid = (pthread_t)-1;
548 #if defined(PASTIX_WITH_MPI)
549 if ( cblk->
cblktype & CBLK_FANIN ) {
564 cpucblk_smpi_progress( side, solvmtx, rank );
567 assert( !(cblk->
cblktype & (CBLK_FANIN | CBLK_RECV)) );
568 do { pastix_yield(); }
while( cblk->
ctrbcnt > 0 );
608 ctrbcnt = pastix_atomic_dec_32b( &(fcbk->
ctrbcnt) );
610 #if defined(PASTIX_WITH_MPI)
611 if ( fcbk->
cblktype & CBLK_FANIN ) {
612 cpucblk_sisend( side, solvmtx, fcbk );
618 if ( solvmtx->computeQueue ) {
664 #if defined(PASTIX_WITH_MPI)
668 int reqnbr = solvmtx->
reqnum;
671 #if defined(PASTIX_DEBUG_MPI)
672 fprintf( stderr,
"[%2d] Wait for all pending communications\n",
676 for( i=0; i<reqnbr; i++ )
678 if ( solvmtx->
reqtab[i] == MPI_REQUEST_NULL ) {
685 assert( solvmtx->
reqidx[i] != -1 );
687 rc = MPI_Wait( solvmtx->
reqtab + i, &status );
688 assert( rc == MPI_SUCCESS );
693 assert( cblk->
cblktype & CBLK_FANIN );
695 cpucblk_srequest_handle_fanin( side, solvmtx, cblk );
699 assert( solvmtx->
reqnum == 0 );
void cpucblk_sunpack(pastix_coefside_t side, SolverCblk *cblk, void *buffer)
Unpack data and fill the column block concerned by the computation.
void * cpucblk_spack(pastix_coefside_t side, SolverCblk *cblk, size_t size)
Pack a column block (Full rank or low rank).
size_t cpucblk_scompute_size(pastix_coefside_t side, const SolverCblk *cblk)
Compute the size of the buffer to send.
BEGIN_C_DECLS typedef int pastix_int_t
static void pqueuePush1(pastix_queue_t *q, pastix_int_t elt, double key1)
Push an element with a single key.
pastix_fixdbl_t cpucblk_sadd(float alpha, const SolverCblk *cblkA, SolverCblk *cblkB, const void *A, void *B, float *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Add two column bloks in full rank format.
void cpucblk_srequest_cleanup(pastix_coefside_t side, pastix_int_t sched, SolverMatrix *solvmtx)
Waitall routine for current cblk request.
void cpucblk_sfree(pastix_coefside_t side, SolverCblk *cblk)
Free the cblk structure that store the coefficient.
int cpucblk_sincoming_deps(int rank, pastix_coefside_t side, SolverMatrix *solvmtx, SolverCblk *cblk)
Wait for incoming dependencies, and return when cblk->ctrbcnt has reached 0.
void cpucblk_srelease_deps(pastix_coefside_t side, SolverMatrix *solvmtx, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
pastix_atomic_lock_t reqlock
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
static void * cblk_getdataU(const SolverCblk *cblk)
Get the pointer to the data associated to the upper part of the cblk.
static void * cblk_getdataL(const SolverCblk *cblk)
Get the pointer to the data associated to the lower part of the cblk.
SolverCblk *restrict cblktab
Solver column block structure.
Solver column block structure.