22 #if defined(PASTIX_WITH_PARSEC)
26 #if defined(PASTIX_WITH_STARPU)
65 #if defined(PASTIX_WITH_PARSEC)
66 if ( solvptr->parsec_desc ) {
70 #if defined(PASTIX_WITH_STARPU)
71 if ( solvptr->starpu_desc ) {
84 if ( solvptr->tasktab ) {
85 mem += solvptr->tasknbr *
sizeof(
Task);
87 if ( solvptr->ttsknbr ) {
92 for( i=0; i<solvptr->thrdnbr; i++ ) {
151 memFree_null(solvmtx->
cblktab);
154 memFree_null(solvmtx->
bloktab);
157 memFree_null(solvmtx->
browtab);
162 if(solvmtx->tasktab) {
163 memFree_null(solvmtx->tasktab);
165 memFree_null(solvmtx->ttsknbr);
166 for (i=0;i<solvmtx->bublnbr;i++)
168 if (solvmtx->ttsktab[i] != NULL) {
169 memFree_null(solvmtx->ttsktab[i]);
172 memFree_null(solvmtx->ttsktab);
173 #if defined(PASTIX_WITH_STARPU)
175 if ( solvmtx->starpu_desc_rhs != NULL ) {
177 free( solvmtx->starpu_desc_rhs );
179 solvmtx->starpu_desc_rhs = NULL;
200 size_t memstruct, memcoef;
205 int64_t fcol[3], lcol[3];
207 int64_t width[3] = { 0, 0, 0 };
208 int64_t height[3] = { 0, 0, 0 };
210 int64_t nbcblk[3] = { 0, 0, 0 };
211 int64_t nbblok[3] = { 0, 0, 0 };
212 int64_t fblok[3], lblok[3];
214 int64_t nbpartblok[3] = { 0, 0, 0 };
217 int64_t gemm_dense = 0;
218 int64_t gemm_nopart_full2 = 0;
219 int64_t gemm_nopart_hybrid = 0;
220 int64_t gemm_parsec_full2 = 0;
221 int64_t gemm_parsec_hybrid = 0;
222 int64_t gemm_starpu_full2 = 0;
223 int64_t gemm_starpu_hybrid = 0;
224 int64_t gemm_full1 = 0;
229 for(itercblk=0; itercblk<cblknbr; itercblk++, cblk++)
234 int64_t brow_size[3];
240 assert( brow_size[0] == (brow_size[1] + brow_size[2]) );
242 memcoef += colnbr * rownbr;
245 gemm_nopart_full2 += brow_size[0] * bcol_size;
246 gemm_nopart_hybrid += brow_size[1] + (brow_size[2] * bcol_size);
254 int64_t brow_csze[3] = { 0, 0, 0 };
256 for ( b = cblk[0].brownum; b < cblk[1].
brownum; b++, browptr++ ) {
257 blok = solvptr->
bloktab + (*browptr);
258 if ( blok->
lcblknm != lcblk ) {
262 if ( (solvptr->
cblktab + lcblk)->cblktype & CBLK_TASKS_2D ) {
270 assert( brow_csze[0] == (brow_csze[1] + brow_csze[2]) );
271 assert( brow_csze[0] <= brow_size[0] );
272 assert( brow_csze[1] <= brow_size[1] );
273 assert( brow_csze[2] <= brow_size[2] );
283 while( blok < cblk[1].fblokptr ) {
284 while( (blok < cblk[1].fblokptr-1) &&
285 (blok[0].fcblknm == blok[1].fcblknm) &&
286 (blok[0].lcblknm == blok[1].lcblknm) )
295 gemm_parsec_full2 += (nbpblok+1) * brow_size[0];
296 gemm_parsec_hybrid += ((nbpblok+1) * brow_size[2]) + brow_size[1];
299 gemm_starpu_full2 += (nbpblok * (nbpblok+1)) / 2;
301 if (cblk->
cblktype & CBLK_TASKS_2D) {
302 gemm_starpu_hybrid += (nbpblok * (nbpblok+1)) / 2;
304 nbpartblok[2] += nbpblok;
306 height[2] += rownbr - colnbr;
309 gemm_starpu_hybrid += bcol_size - 1;
311 nbpartblok[1] += nbpblok;
313 height[1] += rownbr - colnbr;
316 nbpartblok[0] += nbpblok;
318 height[0] += rownbr - colnbr;
321 assert( (width[1] + width[2]) == solvptr->
nodenbr );
322 assert( (width[1] + width[2]) == width[0] );
323 assert( (height[1] + height[2]) == height[0] );
327 gemm_dense = (cblknbr * ( cblknbr * cblknbr - 1 )) / 6;
339 nbcblk[1] = (cblknbr - solvptr->
nb2dcblk);
342 nbblok[0] = solvptr->
bloknbr - cblknbr;
355 " Solver Matrix statistics: | %-12s | %-12s | %-12s |\n"
356 " --------------------------------------------------------------------------------\n"
357 " Number of cblk | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
358 " Number of block | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
359 " Number of block (diag part.) | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
360 " Cblk: first | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
361 " last | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
362 " Block: first | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
363 " last | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
364 " rownum: first | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
365 " last | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
366 " Average width | %12.2lf | %12.2lf | %12.2lf |\n"
367 " Average height | %12.2lf | %12.2lf | %12.2lf |\n"
368 " Structure memory space %11.2lf %co\n"
369 " Number of coeficients stored %10ld\n",
373 nbcblk[0], nbcblk[1], nbcblk[2],
375 nbblok[0], nbblok[1], nbblok[2],
380 nbpartblok[0], nbpartblok[1], nbpartblok[2],
382 (int64_t)0, (int64_t)0, (int64_t)(solvptr->
cblkmin2d),
383 (int64_t)(cblknbr), (int64_t)(solvptr->
cblkmax1d + 1), (int64_t)(cblknbr),
385 fblok[0], fblok[1], fblok[2],
386 lblok[0], lblok[1], lblok[2],
388 fcol[0], fcol[1], fcol[2],
389 lcol[0], lcol[1], lcol[2],
391 (
double)(width[0]) / (
double)(nbcblk[0]),
392 (
double)(width[1]) / (
double)(nbcblk[1]),
393 (
double)(width[2]) / (
double)(nbcblk[2]),
395 (
double)(height[0]) / (
double)(nbblok[0]),
396 (
double)(height[1]) / (
double)(nbblok[1]),
397 (
double)(height[2]) / (
double)(nbblok[2]),
399 pastix_print_value( memstruct ),
400 pastix_print_unit( memstruct ),
405 " Number of GEMM tasks: | %-12s | %-12s | %-12s | %-12s |\n"
406 " - All blocks | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
407 " - PaRSEC | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
408 " - StarPU | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n",
409 "Dense",
"Full2d",
"Hybrid",
"Full1d",
410 gemm_dense, gemm_nopart_full2, gemm_nopart_hybrid, gemm_full1,
411 gemm_dense, gemm_parsec_full2, gemm_parsec_hybrid, gemm_full1,
412 gemm_dense, gemm_starpu_full2, gemm_starpu_hybrid, gemm_full1 );
436 MPI_Request *request;
440 if ( solve_step == PastixSolveBackward ) {
452 solvmtx->
reqlock = PASTIX_ATOMIC_UNLOCKED;
454 MALLOC_INTERN( solvmtx->
reqtab, reqnbr, MPI_Request );
457 request = solvmtx->
reqtab;
458 reqindx = solvmtx->
reqidx;
459 for ( i = 0; i < reqnbr; i++, request++, reqindx++ )
461 *request = MPI_REQUEST_NULL;
483 assert( solvmtx->
reqnum == 0 );
484 assert( solvmtx->
reqlock == PASTIX_ATOMIC_UNLOCKED );
487 memFree_null( solvmtx->
reqtab );
490 memFree_null( solvmtx->
reqidx );
534 assert( solvmtx->
maxrecv > 0 );
537 MALLOC_INTERN( solvmtx->
rcoeftab, size,
char );
538 MPI_Recv_init( solvmtx->
rcoeftab, size,
539 MPI_CHAR, MPI_ANY_SOURCE, MPI_ANY_TAG,
540 solvmtx->solv_comm, solvmtx->
reqtab );
541 MPI_Start( solvmtx->
reqtab );
543 assert( solvmtx->
reqnum == 0 );
545 #if defined(PASTIX_DEBUG_MPI)
546 fprintf( stderr,
"[%2d] Start persistant recv from any source\n",
566 assert( solvmtx->
reqtab == NULL );
595 for ( k = 0; k < cblknbr; k++, cblk++ ) {
596 if ( cblk->
cblktype & (CBLK_RECV | CBLK_FANIN) ) {
641 if ( ( ( solve_step == PastixSolveForward ) && ( solvmtx->
recvnbr == 0 ) ) ||
642 ( ( solve_step == PastixSolveBackward ) && ( solvmtx->
faninnbr == 0 ) ) )
650 MALLOC_INTERN( solvmtx->
rcoeftab, size,
char );
651 MPI_Recv_init( solvmtx->
rcoeftab, size,
652 MPI_CHAR, MPI_ANY_SOURCE, MPI_ANY_TAG,
653 solvmtx->solv_comm, solvmtx->
reqtab );
654 MPI_Start( solvmtx->
reqtab );
656 assert( solvmtx->
reqnum == 0 );
658 #if defined(PASTIX_DEBUG_MPI)
659 fprintf( stderr,
"[%2d] Start persistant recv from any source (max = %ld B)\n",
660 solvmtx->clustnum, (
long)size );
BEGIN_C_DECLS typedef int pastix_int_t
void solverRecvExit(SolverMatrix *solvmtx)
Free the array linked to pending reception.
void solverRequestExit(SolverMatrix *solvmtx)
Free the arrays related to the requests.
static pastix_int_t solverRhsRecvMax(SolverMatrix *solvmtx)
Computes the max size of recv cblk.
void solverRhsRecvExit(SolverMatrix *solvmtx)
Frees the array linked to pending reception.
void solverRecvInit(pastix_coefside_t side, SolverMatrix *solvmtx, pastix_coeftype_t flttype)
Allocate the reception buffer, and initiate the first persistant reception.
void solverRhsRecvInit(solve_step_t solve_step, SolverMatrix *solvmtx, pastix_coeftype_t flttype, pastix_rhs_t rhsb)
Allocates the reception buffer, and initiate the first persistant reception.
void solverPrintStats(const SolverMatrix *solvptr)
Print statistical information about the solver matrix structure.
void solverRequestInit(solve_step_t solve_step, SolverMatrix *solvmtx)
Instanciate the arrays for the requests according to the scheduler.
void solverInit(SolverMatrix *solvmtx)
Initialize the solver structure.
void solverExit(SolverMatrix *solvmtx)
Free the content of the solver matrix structure.
void coeftabExit(SolverMatrix *solvmtx)
Free the solver matrix structure.
spm_coeftype_t pastix_coeftype_t
Arithmetic types.
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
struct parsec_sparse_matrix_desc_s parsec_sparse_matrix_desc_t
PaRSEC descriptor stucture for the sparse matrix.
void starpu_dense_matrix_destroy(starpu_dense_matrix_desc_t *desc)
Free the StarPU descriptor of the dense matrix.
struct starpu_sparse_matrix_desc_s starpu_sparse_matrix_desc_t
StarPU descriptor stucture for the sparse matrix.
Main PaStiX RHS structure.
static size_t solver_size(const SolverMatrix *solvptr)
Compute the memory size used by the solver sturcture itself.
pastix_atomic_lock_t reqlock
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
struct task_s Task
The task structure for the numerical factorization.
SolverBlok *restrict bloktab
struct solver_cblk_s SolverCblk
Solver column block structure.
struct solver_blok_s SolverBlok
Solver block structure.
pastix_int_t *restrict browtab
SolverCblk *restrict cblktab
enum solve_step_e solve_step_t
Tags used in MPI communications.
Solver column block structure.
Solver column block structure.
void solverComMatrixExit(SolverMatrix *solvmtx)
Free the communication matrix.
void solverComMatrixGather(SolverMatrix *solvmtx)
Gather the volume of communication and save it as a csv.
void solverComMatrixInit(SolverMatrix *solvmtx)
Initialize the communication matrix.