191 size_t memstruct, memcoef;
196 int64_t fcol[3], lcol[3];
198 int64_t width[3] = { 0, 0, 0 };
199 int64_t height[3] = { 0, 0, 0 };
201 int64_t nbcblk[3] = { 0, 0, 0 };
202 int64_t nbblok[3] = { 0, 0, 0 };
203 int64_t fblok[3], lblok[3];
205 int64_t nbpartblok[3] = { 0, 0, 0 };
208 int64_t gemm_dense = 0;
209 int64_t gemm_nopart_full2 = 0;
210 int64_t gemm_nopart_hybrid = 0;
211 int64_t gemm_parsec_full2 = 0;
212 int64_t gemm_parsec_hybrid = 0;
213 int64_t gemm_starpu_full2 = 0;
214 int64_t gemm_starpu_hybrid = 0;
215 int64_t gemm_full1 = 0;
220 for(itercblk=0; itercblk<cblknbr; itercblk++, cblk++)
225 int64_t brow_size[3];
231 assert( brow_size[0] == (brow_size[1] + brow_size[2]) );
233 memcoef += colnbr * rownbr;
236 gemm_nopart_full2 += brow_size[0] * bcol_size;
237 gemm_nopart_hybrid += brow_size[1] + (brow_size[2] * bcol_size);
245 int64_t brow_csze[3] = { 0, 0, 0 };
247 for ( b = cblk[0].brownum; b < cblk[1].
brownum; b++, browptr++ ) {
248 blok = solvptr->
bloktab + (*browptr);
249 if ( blok->
lcblknm != lcblk ) {
253 if ( (solvptr->
cblktab + lcblk)->cblktype & CBLK_TASKS_2D ) {
261 assert( brow_csze[0] == (brow_csze[1] + brow_csze[2]) );
262 assert( brow_csze[0] <= brow_size[0] );
263 assert( brow_csze[1] <= brow_size[1] );
264 assert( brow_csze[2] <= brow_size[2] );
274 while( blok < cblk[1].fblokptr ) {
275 while( (blok < cblk[1].fblokptr-1) &&
276 (blok[0].fcblknm == blok[1].fcblknm) &&
277 (blok[0].lcblknm == blok[1].lcblknm) )
286 gemm_parsec_full2 += (nbpblok+1) * brow_size[0];
287 gemm_parsec_hybrid += ((nbpblok+1) * brow_size[2]) + brow_size[1];
290 gemm_starpu_full2 += (nbpblok * (nbpblok+1)) / 2;
292 if (cblk->
cblktype & CBLK_TASKS_2D) {
293 gemm_starpu_hybrid += (nbpblok * (nbpblok+1)) / 2;
295 nbpartblok[2] += nbpblok;
297 height[2] += rownbr - colnbr;
300 gemm_starpu_hybrid += bcol_size - 1;
302 nbpartblok[1] += nbpblok;
304 height[1] += rownbr - colnbr;
307 nbpartblok[0] += nbpblok;
309 height[0] += rownbr - colnbr;
312 assert( (width[1] + width[2]) == solvptr->
nodenbr );
313 assert( (width[1] + width[2]) == width[0] );
314 assert( (height[1] + height[2]) == height[0] );
318 gemm_dense = (cblknbr * ( cblknbr * cblknbr - 1 )) / 6;
330 nbcblk[1] = (cblknbr - solvptr->
nb2dcblk);
333 nbblok[0] = solvptr->
bloknbr - cblknbr;
346 " Solver Matrix statistics: | %-12s | %-12s | %-12s |\n"
347 " --------------------------------------------------------------------------------\n"
348 " Number of cblk | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
349 " Number of block | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
350 " Number of block (diag part.) | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
351 " Cblk: first | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
352 " last | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
353 " Block: first | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
354 " last | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
355 " rownum: first | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
356 " last | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
357 " Average width | %12.2lf | %12.2lf | %12.2lf |\n"
358 " Average height | %12.2lf | %12.2lf | %12.2lf |\n"
359 " Structure memory space %11.2lf %co\n"
360 " Number of coeficients stored %10ld\n",
364 nbcblk[0], nbcblk[1], nbcblk[2],
366 nbblok[0], nbblok[1], nbblok[2],
371 nbpartblok[0], nbpartblok[1], nbpartblok[2],
373 (int64_t)0, (int64_t)0, (int64_t)(solvptr->
cblkmin2d),
374 (int64_t)(cblknbr), (int64_t)(solvptr->
cblkmax1d + 1), (int64_t)(cblknbr),
376 fblok[0], fblok[1], fblok[2],
377 lblok[0], lblok[1], lblok[2],
379 fcol[0], fcol[1], fcol[2],
380 lcol[0], lcol[1], lcol[2],
382 (
double)(width[0]) / (
double)(nbcblk[0]),
383 (
double)(width[1]) / (
double)(nbcblk[1]),
384 (
double)(width[2]) / (
double)(nbcblk[2]),
386 (
double)(height[0]) / (
double)(nbblok[0]),
387 (
double)(height[1]) / (
double)(nbblok[1]),
388 (
double)(height[2]) / (
double)(nbblok[2]),
390 pastix_print_value( memstruct ),
391 pastix_print_unit( memstruct ),
396 " Number of GEMM tasks: | %-12s | %-12s | %-12s | %-12s |\n"
397 " - All blocks | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
398 " - PaRSEC | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n"
399 " - StarPU | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" | %12" PRIi64
" |\n",
400 "Dense",
"Full2d",
"Hybrid",
"Full1d",
401 gemm_dense, gemm_nopart_full2, gemm_nopart_hybrid, gemm_full1,
402 gemm_dense, gemm_parsec_full2, gemm_parsec_hybrid, gemm_full1,
403 gemm_dense, gemm_starpu_full2, gemm_starpu_hybrid, gemm_full1 );