PaStiX Handbook 6.4.0
Loading...
Searching...
No Matches
symbol_cost.c
Go to the documentation of this file.
1/**
2 *
3 * @file symbol_cost.c
4 *
5 * PaStiX symbol structure cost functions
6 *
7 * @copyright 1999-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8 * Univ. Bordeaux. All rights reserved.
9 *
10 * @version 6.4.0
11 * @author Pascal Henon
12 * @author Pierre Ramet
13 * @author Mathieu Faverge
14 * @date 2024-07-05
15 *
16 **/
17#include <stdio.h>
18#include <string.h>
19#include <strings.h>
20#include <assert.h>
21
22#include "common.h"
23#include "symbol/symbol.h"
24#include "symbol_cost.h"
25
26/**
27 *******************************************************************************
28 *
29 * @ingroup symbol_dev_cost
30 *
31 * @brief Template function to compute cost on a column-block based approach
32 * with a single update per column block
33 *
34 *******************************************************************************
35 *
36 * @param[in] fptr
37 * The structure that contains the cost functions (diag, trsm and
38 * update are used)
39 *
40 * @param[in] symbmtx
41 * The symbolic matrix structure on which to compute the costs.
42 *
43 * @param[in] cblknum
44 * The index of the column-block for which the cost will be computed
45 *
46 *******************************************************************************
47 *
48 * @return The cost associated to the cblk of index cblknum and evaluated with
49 * the set of given functions
50 *
51 *******************************************************************************/
52static double
54 const symbol_matrix_t *symbmtx,
55 pastix_int_t cblknum )
56{
57 symbol_cblk_t *cblk = symbmtx->cblktab + cblknum;
58 pastix_int_t M, N, k;
59 double nbops = 0.;
60 double dof = (double)(symbmtx->dof);
61
62 /*
63 * Size of the factorization kernel (square)
64 */
65 N = (cblk->lcolnum - cblk->fcolnum + 1);
66
67 /*
68 * Height of the TRSM to which apply the TRSM
69 */
70 M = 0;
71 for(k = cblk[0].bloknum+1; k < cblk[1].bloknum; k++)
72 {
73 M += (symbmtx->bloktab[k].lrownum - symbmtx->bloktab[k].frownum + 1);
74 }
75
76 if ( dof > 0. ) {
77 N *= dof;
78 M *= dof;
79 }
80
81 nbops = fptr->diag( N );
82 if( M > 0 ) {
83 nbops += fptr->trsm( M, N );
84 nbops += fptr->update( N, M );
85 }
86
87 return nbops;
88}
89
90/**
91 *******************************************************************************
92 *
93 * @ingroup symbol_dev_cost
94 *
95 * @brief Template function to compute cost on block based approach.
96 *
97 * As opposed to sum1d(), the updates are split in one per off-diagonal block
98 * making it more precise to evaluate the performance cost of the GEMMs, for
99 * example, as it exactly follow the 1D scheme used in the static scheduler of
100 * PaStiX.
101 *
102 *******************************************************************************
103 *
104 * @param[in] fptr
105 * The structure that contains the cost functions (diag, trsm and
106 * blkupdate are used)
107 *
108 * @param[in] symbmtx
109 * The symbolic matrix structure on which to compute the costs.
110 *
111 * @param[in] cblknum
112 * The index of the column-block for which the cost will be computed
113 *
114 *******************************************************************************
115 *
116 * @return The cost associated to the cblk of index cblknum and evaluated with
117 * the set of given functions
118 *
119 *******************************************************************************/
120static double
122 const symbol_matrix_t *symbmtx,
123 pastix_int_t cblknum )
124{
125 symbol_cblk_t *cblk = symbmtx->cblktab + cblknum;
126 pastix_int_t M, N, K, l;
127 double nbops = 0.;
128 double dof = (double)(symbmtx->dof);
129
130 /*
131 * Size of the factorization kernel (square)
132 */
133 N = (cblk->lcolnum - cblk->fcolnum + 1);
134
135 /*
136 * Height of the TRSM to which apply the TRSM
137 */
138 M = 0;
139 for(l = cblk[0].bloknum+1; l < cblk[1].bloknum; l++)
140 {
141 M += (symbmtx->bloktab[l].lrownum - symbmtx->bloktab[l].frownum + 1);
142 }
143
144 N *= dof;
145 M *= dof;
146
147 nbops = fptr->diag( N );
148 if( M > 0 ) {
149 nbops += fptr->trsm( M, N );
150 }
151
152 /*
153 * Compute the cost of each GEMM
154 */
155 K = N;
156 for(l = cblk[0].bloknum+1; l < cblk[1].bloknum; l++)
157 {
158 N = (symbmtx->bloktab[l].lrownum - symbmtx->bloktab[l].frownum + 1);
159 N *= dof;
160
161 nbops += fptr->blkupdate( K, M, N );
162
163 M -= N;
164 }
165 return nbops;
166}
167
168/**
169 *******************************************************************************
170 *
171 * @ingroup symbol_dev_cost
172 *
173 * @brief Template function to compute cost on block based approach which keeps
174 * the cost per block.
175 *
176 * As opposed to sum2d(), the cost of each update per block is stored in the
177 * blokcost array. Despite this storage, the function is completely identical.
178 *
179 *******************************************************************************
180 *
181 * @param[in] fptr
182 * The structure that contains the cost functions (diag, trsm and
183 * blkupdate are used)
184 *
185 * @param[in] symbmtx
186 * The symbolic matrix structure on which to compute the costs.
187 *
188 * @param[in] cblknum
189 * The index of the column-block for which the cost will be computed
190 *
191 * @param[inout] blokcost
192 * An array of size the number of blocks in the cblknum column-block in
193 * which to store the cost per block of each updates.
194 *
195 *******************************************************************************
196 *
197 * @return The cost associated to the cblk of index cblknum and evaluated with
198 * the set of given functions
199 *
200 *******************************************************************************/
201static double
203 const symbol_matrix_t *symbmtx,
204 pastix_int_t cblknum,
205 double *blokcost )
206{
207 symbol_cblk_t *cblk = symbmtx->cblktab + cblknum;
208 pastix_int_t M, N, K, l;
209 double nbops = 0.;
210 double dof = (double)(symbmtx->dof);
211
212 /*
213 * Size of the factorization kernel (square)
214 */
215 N = (cblk->lcolnum - cblk->fcolnum + 1);
216
217 /*
218 * Height of the TRSM to which apply the TRSM
219 */
220 M = 0;
221 for(l = cblk[0].bloknum+1; l < cblk[1].bloknum; l++)
222 {
223 M += (symbmtx->bloktab[l].lrownum - symbmtx->bloktab[l].frownum + 1);
224 }
225
226 N *= dof;
227 M *= dof;
228
229 nbops = fptr->diag( N );
230 if( M > 0 ) {
231 nbops += fptr->trsm( M, N );
232 }
233 *blokcost = nbops;
234 blokcost++;
235
236 /*
237 * Compute the cost of each GEMM
238 */
239 K = N;
240 for(l = cblk[0].bloknum+1; l < cblk[1].bloknum; l++, blokcost++)
241 {
242 N = (symbmtx->bloktab[l].lrownum - symbmtx->bloktab[l].frownum + 1);
243 N *= dof;
244
245 *blokcost = fptr->blkupdate( K, M, N );
246 nbops += *blokcost;
247
248 M -= N;
249 }
250 return nbops;
251}
252
253/**
254 *******************************************************************************
255 *
256 * @ingroup symbol_dev_cost
257 *
258 * @brief Recursive function to compute the cost of the full symbolic structure
259 * with either sum1d(), sum2d(), or sum2dext().
260 *
261 *******************************************************************************
262 *
263 * @param[in] a
264 * The first column-block index of the range to address
265 *
266 * @param[in] b
267 * The last column-block index of the range to address (inclusive)
268 *
269 * @param[in] fval
270 * The function to use to compute the cost. This can be sum1d(),
271 * sum2d(), or sum2dext()
272 *
273 * @param[in] fptr
274 * The set of functions that will be applied. It can be size function,
275 * floating point operation, performance models, ...
276 *
277 * @param[in] symbmtx
278 * The symbol matrix on which to compute the wanted information.
279 *
280 *******************************************************************************
281 *
282 * @return The cost associated to the integral of the symbol matrix structure
283 * and evaluated with the set of given functions
284 *
285 *******************************************************************************/
286static double
288 double (*fval)(const symbol_function_t *, const symbol_matrix_t *, pastix_int_t),
289 const symbol_function_t *fptr,
290 const symbol_matrix_t *symbmtx )
291{
292 if(a != b)
293 return recursive_sum( a, (a+b)/2, fval, fptr, symbmtx)
294 + recursive_sum((a+b)/2+1, b, fval, fptr, symbmtx);
295
296 return fval(fptr, symbmtx, a);
297}
298
299/**
300 * @addtogroup pastix_symbol
301 * @{
302 */
303
304/**
305 *******************************************************************************
306 *
307 * @brief Computes the number of non-zero elements in L.
308 *
309 * This computes the number of non-zero elements stored in the symbol matrix in
310 * order to compute the fill-in. This routines returns the number of non-zero of
311 * the strictly lower part of the matrix.
312 *
313 *******************************************************************************
314 *
315 * @param[in] symbptr
316 * The symbol structure to study.
317 *
318 *******************************************************************************
319 *
320 * @return The number of non zero elements in the strictly lower part of the
321 * full symbol matrix.
322 *
323 *******************************************************************************/
324size_t
326{
327 symbol_cblk_t *cblk;
328 symbol_blok_t *blok;
329 pastix_int_t itercblk;
330 pastix_int_t cblknbr;
331 size_t nnz = 0;
332 pastix_int_t dof = symbptr->dof;
333 const pastix_int_t *dofs = symbptr->dofs;
334
335 cblknbr = symbptr->cblknbr;
336 cblk = symbptr->cblktab;
337 blok = symbptr->bloktab;
338
339 if ( dof > 0 ) {
340 for(itercblk=0; itercblk<cblknbr; itercblk++, cblk++)
341 {
342 pastix_int_t iterblok = cblk[0].bloknum + 1;
343 pastix_int_t lbloknum = cblk[1].bloknum;
344
345 pastix_int_t colnbr = dof * (cblk->lcolnum - cblk->fcolnum + 1);
346
347 /* Diagonal block */
348 blok++;
349 nnz += ( colnbr * (colnbr+1) ) / 2 - colnbr;
350
351 /* Off-diagonal blocks */
352 for( ; iterblok < lbloknum; iterblok++, blok++)
353 {
354 pastix_int_t rownbr = (blok->lrownum - blok->frownum + 1) * dof;
355
356 nnz += rownbr * colnbr;
357 }
358 }
359 }
360 else {
361 assert( symbptr->baseval == 0 );
362
363 for(itercblk=0; itercblk<cblknbr; itercblk++, cblk++)
364 {
365 pastix_int_t iterblok = cblk[0].bloknum + 1;
366 pastix_int_t lbloknum = cblk[1].bloknum;
367 pastix_int_t colnbr = dofs[ cblk->lcolnum + 1 ] - dofs[ cblk->fcolnum ];
368
369 /* Diagonal block */
370 blok++;
371 nnz += ( colnbr * (colnbr+1) ) / 2 - colnbr;
372
373 /* Off-diagonal blocks */
374 for( ; iterblok < lbloknum; iterblok++, blok++)
375 {
376 pastix_int_t rownbr = dofs[ blok->lrownum + 1 ] - dofs[ blok->frownum ];
377
378 nnz += rownbr * colnbr;
379 }
380 }
381 }
382
383 return nnz;
384}
385
386/**
387 *******************************************************************************
388 *
389 * @brief Computes the number of theoretical and real flops.
390 *
391 * Given a symbolic factorization structure, the type of factorization: Cholesky
392 * or LU, and the arithmetic, this function will return the total number of
393 * floating point operation that will be performed during the numerical
394 * factorization.
395 *
396 *******************************************************************************
397 *
398 * @param[in] symbmtx
399 * The symbol structure to study.
400 *
401 * @param[in] flttype
402 * The floating type of the elements in the matrix.
403 * PastixPattern, PastixFloat, PastixDouble, PastixComplex32 or
404 * PastixComplex64. In case of PastixPattern, values for PastixDouble
405 * are returned.
406 *
407 * @param[in] factotype
408 * The factorization algorithm to perform: PastixFactLLT,
409 * PastixFactLDLT, PastixFactLLH, PastixFactLDLH or PastixFactLU.
410 *
411 * @param[out] thflops
412 * Returns the number of theoretical flops to perform.
413 * NULL if not asked.
414 *
415 * @param[out] rlflops
416 * Returns the number of real flops to perform, taking into account
417 * copies and scatter operations.
418 * NULL if not asked.
419 *
420 *******************************************************************************/
421void
423 pastix_coeftype_t flttype,
424 pastix_factotype_t factotype,
425 double *thflops,
426 double *rlflops )
427{
428 int iscomplex = ((flttype == PastixComplex32) || (flttype == PastixComplex64)) ? 1 : 0;
429
430 /* Compute theoretical flops */
431 if ( thflops != NULL ) {
432 *thflops = recursive_sum(0, symbmtx->cblknbr-1, sum1d,
433 &(flopstable[iscomplex][factotype]),
434 symbmtx);
435 }
436
437 /* Compute performed flops */
438 if ( rlflops != NULL ) {
439 *rlflops = recursive_sum(0, symbmtx->cblknbr-1, sum2d,
440 &(flopstable[iscomplex][factotype]),
441 symbmtx);
442 }
443}
444
445/**
446 *******************************************************************************
447 *
448 * @brief Computes the cost of structure for the costMatrixBuild() function.
449 *
450 * This function iterates on the column-blocks and blocks to compute the cost of
451 * the operation performed on each of those elements for the costMatrixBuild()
452 * function that is used in the simulation for the data mapping. It returns an
453 * array of cost for each type of element.
454 *
455 *******************************************************************************
456 *
457 * @param[in] symbmtx
458 * The symbol structure to study.
459 *
460 * @param[in] flttype
461 * The floating type of the elements in the matrix.
462 * PastixPattern, PastixFloat, PastixDouble, PastixComplex32 or
463 * PastixComplex64. In case of PastixPattern, values for PastixDouble
464 * are returned.
465 *
466 * @param[in] factotype
467 * The factorization algorithm to perform: PastixFactLLT,
468 * PastixFactLDLT, PastixFactLLH, PastixFactLDLH or PastixFactLU.
469 *
470 * @param[inout] cblkcost
471 * An allocated array of size cblknbr that will holds the cost per cblk
472 * on exit.
473 *
474 * @param[inout] blokcost
475 * An allocated array of size bloknbr that will holds the cost per blok
476 * on exit.
477 *
478 *******************************************************************************/
479void
481 pastix_coeftype_t flttype,
482 pastix_factotype_t factotype,
483 double *cblkcost,
484 double *blokcost )
485{
487 double *cblkptr, *blokptr;
488 pastix_int_t i;
489 int iscomplex = ((flttype == PastixComplex32) || (flttype == PastixComplex64)) ? 1 : 0;
490 f = &(perfstable[iscomplex][factotype]);
491
492 /* Initialize costs */
493 cblkptr = cblkcost;
494 blokptr = blokcost;
495
496 for(i=0; i<symbmtx->cblknbr; i++, cblkptr++) {
497 *cblkptr = sum2dext( f, symbmtx, i, blokptr );
498
499 blokptr += symbmtx->cblktab[i+1].bloknum
500 - symbmtx->cblktab[i ].bloknum;
501 }
502
503 assert( ( cblkptr - cblkcost ) == symbmtx->cblknbr );
504 assert( ( blokptr - blokcost ) == symbmtx->bloknbr );
505}
506
507/**
508 * @}
509 */
BEGIN_C_DECLS typedef int pastix_int_t
Definition datatypes.h:51
spm_coeftype_t pastix_coeftype_t
Arithmetic types.
Definition api.h:294
enum pastix_factotype_e pastix_factotype_t
Factorization algorithms available for IPARM_FACTORIZATION parameter.
pastix_int_t frownum
Definition symbol.h:60
pastix_int_t lrownum
Definition symbol.h:61
pastix_int_t bloknbr
Definition symbol.h:80
pastix_int_t baseval
Definition symbol.h:78
symbol_cblk_t * cblktab
Definition symbol.h:83
pastix_int_t bloknum
Definition symbol.h:48
pastix_int_t fcolnum
Definition symbol.h:46
pastix_int_t lcolnum
Definition symbol.h:47
pastix_int_t dof
Definition symbol.h:87
symbol_blok_t * bloktab
Definition symbol.h:84
pastix_int_t * dofs
Definition symbol.h:89
pastix_int_t cblknbr
Definition symbol.h:79
size_t pastixSymbolGetNNZ(const symbol_matrix_t *symbptr)
Computes the number of non-zero elements in L.
void pastixSymbolGetTimes(const symbol_matrix_t *symbmtx, pastix_coeftype_t flttype, pastix_factotype_t factotype, double *cblkcost, double *blokcost)
Computes the cost of structure for the costMatrixBuild() function.
void pastixSymbolGetFlops(const symbol_matrix_t *symbmtx, pastix_coeftype_t flttype, pastix_factotype_t factotype, double *thflops, double *rlflops)
Computes the number of theoretical and real flops.
Symbol block structure.
Definition symbol.h:59
Symbol column block structure.
Definition symbol.h:45
Symbol matrix structure.
Definition symbol.h:77
double(* update)(pastix_int_t, pastix_int_t)
Definition symbol_cost.h:31
double(* trsm)(pastix_int_t, pastix_int_t)
Definition symbol_cost.h:29
double(* diag)(pastix_int_t)
Definition symbol_cost.h:28
double(* blkupdate)(pastix_int_t, pastix_int_t, pastix_int_t)
Definition symbol_cost.h:33
static double sum2dext(const symbol_function_t *fptr, const symbol_matrix_t *symbmtx, pastix_int_t cblknum, double *blokcost)
Template function to compute cost on block based approach which keeps the cost per block.
symbol_function_t flopstable[2][5]
array of pointer to the flops functions per factorization and arithmetic
static double recursive_sum(pastix_int_t a, pastix_int_t b, double(*fval)(const symbol_function_t *, const symbol_matrix_t *, pastix_int_t), const symbol_function_t *fptr, const symbol_matrix_t *symbmtx)
Recursive function to compute the cost of the full symbolic structure with either sum1d(),...
symbol_function_t perfstable[2][5]
array of pointer to the performance functions per factorization and arithmetic
static double sum2d(const symbol_function_t *fptr, const symbol_matrix_t *symbmtx, pastix_int_t cblknum)
Template function to compute cost on block based approach.
static double sum1d(const symbol_function_t *fptr, const symbol_matrix_t *symbmtx, pastix_int_t cblknum)
Template function to compute cost on a column-block based approach with a single update per column bl...
Definition symbol_cost.c:53
Cost functions to compute statistics on the symbolic structure.
Definition symbol_cost.h:27