22 #if defined(PASTIX_WITH_PARSEC)
26 #if defined(PASTIX_WITH_STARPU)
53 size_t mem =
sizeof(SolverMatrix);
56 if ( solvptr->cblktab ) {
57 mem += solvptr->cblknbr *
sizeof(
SolverCblk );
59 if ( solvptr->bloktab ) {
60 mem += solvptr->bloknbr *
sizeof(
SolverBlok );
62 if ( solvptr->browtab ) {
63 mem += solvptr->brownbr *
sizeof( pastix_int_t );
65 #if defined(PASTIX_WITH_PARSEC)
66 if ( solvptr->parsec_desc ) {
70 #if defined(PASTIX_WITH_STARPU)
71 if ( solvptr->starpu_desc ) {
84 if ( solvptr->tasktab ) {
85 mem += solvptr->tasknbr *
sizeof(
Task);
87 if ( solvptr->ttsknbr ) {
89 mem += solvptr->thrdnbr *
sizeof(pastix_int_t);
90 mem += solvptr->thrdnbr *
sizeof(pastix_int_t*);
92 for( i=0; i<solvptr->thrdnbr; i++ ) {
93 mem += solvptr->ttsknbr[i] *
sizeof(pastix_int_t);
120 memset(solvmtx, 0,
sizeof (SolverMatrix));
121 solvmtx->cblkmax1d = -1;
122 solvmtx->cblkmaxblk = 1;
150 if(solvmtx->cblktab) {
151 memFree_null(solvmtx->cblktab);
153 if(solvmtx->bloktab) {
154 memFree_null(solvmtx->bloktab);
156 if(solvmtx->browtab) {
157 memFree_null(solvmtx->browtab);
159 if(solvmtx->gcbl2loc) {
160 memFree_null(solvmtx->gcbl2loc);
162 if(solvmtx->tasktab) {
163 memFree_null(solvmtx->tasktab);
165 memFree_null(solvmtx->ttsknbr);
166 for (i=0;i<solvmtx->bublnbr;i++)
168 if (solvmtx->ttsktab[i] != NULL) {
169 memFree_null(solvmtx->ttsktab[i]);
172 memFree_null(solvmtx->ttsktab);
173 #if defined(PASTIX_WITH_STARPU)
175 if ( solvmtx->starpu_desc_rhs != NULL ) {
177 free( solvmtx->starpu_desc_rhs );
179 solvmtx->starpu_desc_rhs = NULL;
200 size_t memstruct, memcoef;
201 pastix_int_t itercblk;
205 int64_t fcol[3], lcol[3];
207 int64_t width[3] = { 0, 0, 0 };
208 int64_t height[3] = { 0, 0, 0 };
210 int64_t nbcblk[3] = { 0, 0, 0 };
211 int64_t nbblok[3] = { 0, 0, 0 };
212 int64_t fblok[3], lblok[3];
214 int64_t nbpartblok[3] = { 0, 0, 0 };
217 int64_t gemm_dense = 0;
218 int64_t gemm_nopart_full2 = 0;
219 int64_t gemm_nopart_hybrid = 0;
220 int64_t gemm_parsec_full2 = 0;
221 int64_t gemm_parsec_hybrid = 0;
222 int64_t gemm_starpu_full2 = 0;
223 int64_t gemm_starpu_hybrid = 0;
224 int64_t gemm_full1 = 0;
226 cblknbr = solvptr->cblknbr;
227 cblk = solvptr->cblktab;
229 for(itercblk=0; itercblk<cblknbr; itercblk++, cblk++)
232 pastix_int_t rownbr = cblk->
stride;
234 pastix_int_t brow_size[3];
235 pastix_int_t brow_csze[3] = { 0, 0, 0 };
236 pastix_int_t nbpblok = 0;
241 assert( brow_size[0] == (brow_size[1] + brow_size[2]) );
243 memcoef += colnbr * rownbr;
246 gemm_nopart_full2 += brow_size[0] * bcol_size;
247 gemm_nopart_hybrid += brow_size[1] + (brow_size[2] * bcol_size);
251 pastix_int_t b, lcblk = -1;
252 pastix_int_t *browptr = solvptr->browtab + cblk[0].
brownum;
253 for ( b = cblk[0].brownum; b < cblk[1].
brownum; b++, browptr++ ) {
254 blok = solvptr->bloktab + (*browptr);
255 if ( blok->
lcblknm != lcblk ) {
259 if ( (solvptr->cblktab + lcblk)->cblktype & CBLK_TASKS_2D ) {
267 assert( brow_csze[0] == (brow_csze[1] + brow_csze[2]) );
268 assert( brow_csze[0] <= brow_size[0] );
269 assert( brow_csze[1] <= brow_size[1] );
270 assert( brow_csze[2] <= brow_size[2] );
279 while( blok < cblk[1].fblokptr ) {
280 while( (blok < cblk[1].fblokptr-1) &&
281 (blok[0].fcblknm == blok[1].fcblknm) &&
282 (blok[0].lcblknm == blok[1].lcblknm) )
291 gemm_parsec_full2 += (nbpblok+1) * brow_size[0];
292 gemm_parsec_hybrid += ((nbpblok+1) * brow_size[2]) + brow_size[1];
295 gemm_starpu_full2 += (nbpblok * (nbpblok+1)) / 2;
297 if (cblk->
cblktype & CBLK_TASKS_2D) {
298 gemm_starpu_hybrid += (nbpblok * (nbpblok+1)) / 2;
300 nbpartblok[2] += nbpblok;
302 height[2] += rownbr - colnbr;
305 gemm_starpu_hybrid += bcol_size - 1;
307 nbpartblok[1] += nbpblok;
309 height[1] += rownbr - colnbr;
312 nbpartblok[0] += nbpblok;
314 height[0] += rownbr - colnbr;
317 assert( (width[1] + width[2]) == solvptr->nodenbr );
318 assert( (width[1] + width[2]) == width[0] );
319 assert( (height[1] + height[2]) == height[0] );
323 gemm_dense = (cblknbr * ( cblknbr * cblknbr - 1 )) / 6;
324 gemm_full1 = solvptr->bloknbr - solvptr->cblknbr;
328 fcol[2] = (solvptr->cblktab + solvptr->cblkmin2d)->fcolnum;
330 lcol[0] = (solvptr->cblktab + solvptr->cblknbr )->fcolnum;
331 lcol[1] = (solvptr->cblktab + solvptr->cblkmax1d)->lcolnum + 1;
332 lcol[2] = (solvptr->cblktab + solvptr->cblknbr )->fcolnum;
335 nbcblk[1] = (cblknbr - solvptr->nb2dcblk);
336 nbcblk[2] = solvptr->nb2dcblk;
338 nbblok[0] = solvptr->bloknbr - cblknbr;
339 nbblok[1] = (solvptr->bloknbr - cblknbr) - (solvptr->nb2dblok - solvptr->nb2dcblk);
340 nbblok[2] = solvptr->nb2dblok - solvptr->nb2dcblk;
344 fblok[2] = ((solvptr->cblktab + solvptr->cblkmin2d)->fblokptr - solvptr->bloktab);
346 lblok[0] = solvptr->bloknbr;
347 lblok[1] = (solvptr->cblktab + solvptr->cblkmax1d + 1)->fblokptr - solvptr->bloktab;
348 lblok[2] = solvptr->bloknbr;
351 " Solver Matrix statistics: | %-12s | %-12s | %-12s |\n"
352 " --------------------------------------------------------------------------------\n"
353 " Number of cblk | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
354 " Number of block | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
355 " Number of block (diag part.) | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
356 " Cblk: first | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
357 " last | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
358 " Block: first | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
359 " last | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
360 " rownum: first | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
361 " last | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
362 " Average width | %12.2lf | %12.2lf | %12.2lf |\n"
363 " Average height | %12.2lf | %12.2lf | %12.2lf |\n"
364 " Structure memory space %11.2lf %co\n"
365 " Number of coeficients stored %10ld\n",
369 nbcblk[0], nbcblk[1], nbcblk[2],
371 nbblok[0], nbblok[1], nbblok[2],
376 nbpartblok[0], nbpartblok[1], nbpartblok[2],
378 (int64_t)0, (int64_t)0, (int64_t)(solvptr->cblkmin2d),
379 (int64_t)(cblknbr), (int64_t)(solvptr->cblkmax1d + 1), (int64_t)(cblknbr),
381 fblok[0], fblok[1], fblok[2],
382 lblok[0], lblok[1], lblok[2],
384 fcol[0], fcol[1], fcol[2],
385 lcol[0], lcol[1], lcol[2],
387 (
double)(width[0]) / (
double)(nbcblk[0]),
388 (
double)(width[1]) / (
double)(nbcblk[1]),
389 (
double)(width[2]) / (
double)(nbcblk[2]),
391 (
double)(height[0]) / (
double)(nbblok[0]),
392 (
double)(height[1]) / (
double)(nbblok[1]),
393 (
double)(height[2]) / (
double)(nbblok[2]),
395 pastix_print_value( memstruct ),
396 pastix_print_unit( memstruct ),
401 " Number of GEMM tasks: | %-12s | %-12s | %-12s | %-12s |\n"
402 " - All blocks | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
403 " - PaRSEC | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
404 " - StarPU | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n",
405 "Dense",
"Full2d",
"Hybrid",
"Full1d",
406 gemm_dense, gemm_nopart_full2, gemm_nopart_hybrid, gemm_full1,
407 gemm_dense, gemm_parsec_full2, gemm_parsec_hybrid, gemm_full1,
408 gemm_dense, gemm_starpu_full2, gemm_starpu_hybrid, gemm_full1 );
425 MPI_Request *request;
426 pastix_int_t *reqindx;
427 pastix_int_t i, reqnbr;
429 reqnbr = solvmtx->faninnbr + 1;
430 solvmtx->reqnbr = reqnbr;
431 solvmtx->reqlock = PASTIX_ATOMIC_UNLOCKED;
433 MALLOC_INTERN( solvmtx->reqtab, reqnbr, MPI_Request );
434 MALLOC_INTERN( solvmtx->reqidx, reqnbr, pastix_int_t );
436 request = solvmtx->reqtab;
437 reqindx = solvmtx->reqidx;
438 for ( i = 0; i < reqnbr; i++, request++, reqindx++ )
440 *request = MPI_REQUEST_NULL;
461 assert( solvmtx->reqnum == 0 );
462 assert( solvmtx->reqlock == PASTIX_ATOMIC_UNLOCKED );
464 if( solvmtx->reqtab ) {
465 memFree_null( solvmtx->reqtab );
467 if( solvmtx->reqidx ) {
468 memFree_null( solvmtx->reqidx );
501 SolverMatrix *solvmtx,
505 pastix_int_t size = pastix_size_of(flttype) * solvmtx->maxrecv;
508 if( solvmtx->recvnbr == 0 ) {
512 assert( solvmtx->maxrecv > 0 );
515 MALLOC_INTERN( solvmtx->rcoeftab, size,
char );
516 MPI_Recv_init( solvmtx->rcoeftab, size,
517 MPI_CHAR, MPI_ANY_SOURCE, MPI_ANY_TAG,
518 solvmtx->solv_comm, solvmtx->reqtab );
519 MPI_Start( solvmtx->reqtab );
522 #if defined(PASTIX_DEBUG_MPI)
523 fprintf( stderr,
"[%2d] Start persistant recv from any source\n",
543 assert( solvmtx->reqtab == NULL );
544 if( solvmtx->rcoeftab ) {
545 memFree_null( solvmtx->rcoeftab );