PaStiX Handbook  6.3.2
symbol_cost.c
Go to the documentation of this file.
1 /**
2  *
3  * @file symbol_cost.c
4  *
5  * PaStiX symbol structure cost functions
6  *
7  * @copyright 1999-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.3.2
11  * @author Pascal Henon
12  * @author Pierre Ramet
13  * @author Mathieu Faverge
14  * @date 2023-07-21
15  *
16  **/
17 #include <stdio.h>
18 #include <string.h>
19 #include <strings.h>
20 #include <assert.h>
21 
22 #include "common.h"
23 #include "symbol/symbol.h"
24 #include "symbol_cost.h"
25 
26 /**
27  *******************************************************************************
28  *
29  * @ingroup symbol_dev_cost
30  *
31  * @brief Template function to compute cost on a column-block based approach
32  * with a single update per column block
33  *
34  *******************************************************************************
35  *
36  * @param[in] fptr
37  * The structure that contains the cost functions (diag, trsm and
38  * update are used)
39  *
40  * @param[in] symbmtx
41  * The symbolic matrix structure on which to compute the costs.
42  *
43  * @param[in] cblknum
44  * The index of the column-block for which the cost will be computed
45  *
46  *******************************************************************************
47  *
48  * @return The cost associated to the cblk of index cblknum and evaluated with
49  * the set of given functions
50  *
51  *******************************************************************************/
52 static double
53 sum1d( const symbol_function_t *fptr,
54  const symbol_matrix_t *symbmtx,
55  pastix_int_t cblknum )
56 {
57  symbol_cblk_t *cblk = symbmtx->cblktab + cblknum;
58  pastix_int_t M, N, k;
59  double nbops = 0.;
60  double dof = (double)(symbmtx->dof);
61 
62  /*
63  * Size of the factorization kernel (square)
64  */
65  N = (cblk->lcolnum - cblk->fcolnum + 1);
66 
67  /*
68  * Height of the TRSM to which apply the TRSM
69  */
70  M = 0;
71  for(k = cblk[0].bloknum+1; k < cblk[1].bloknum; k++)
72  {
73  M += (symbmtx->bloktab[k].lrownum - symbmtx->bloktab[k].frownum + 1);
74  }
75 
76  if ( dof > 0. ) {
77  N *= dof;
78  M *= dof;
79  }
80 
81  nbops = fptr->diag( N );
82  if( M > 0 ) {
83  nbops += fptr->trsm( M, N );
84  nbops += fptr->update( N, M );
85  }
86 
87  return nbops;
88 }
89 
90 /**
91  *******************************************************************************
92  *
93  * @ingroup symbol_dev_cost
94  *
95  * @brief Template function to compute cost on block based approach.
96  *
97  * As opposed to sum1d(), the updates are split in one per off-diagonal block
98  * making it more precise to evaluate the performance cost of the GEMMs, for
99  * example, as it exactly follow the 1D scheme used in the static scheduler of
100  * PaStiX.
101  *
102  *******************************************************************************
103  *
104  * @param[in] fptr
105  * The structure that contains the cost functions (diag, trsm and
106  * blkupdate are used)
107  *
108  * @param[in] symbmtx
109  * The symbolic matrix structure on which to compute the costs.
110  *
111  * @param[in] cblknum
112  * The index of the column-block for which the cost will be computed
113  *
114  *******************************************************************************
115  *
116  * @return The cost associated to the cblk of index cblknum and evaluated with
117  * the set of given functions
118  *
119  *******************************************************************************/
120 static double
122  const symbol_matrix_t *symbmtx,
123  pastix_int_t cblknum )
124 {
125  symbol_cblk_t *cblk = symbmtx->cblktab + cblknum;
126  pastix_int_t M, N, K, l;
127  double nbops = 0.;
128  double dof = (double)(symbmtx->dof);
129 
130  /*
131  * Size of the factorization kernel (square)
132  */
133  N = (cblk->lcolnum - cblk->fcolnum + 1);
134 
135  /*
136  * Height of the TRSM to which apply the TRSM
137  */
138  M = 0;
139  for(l = cblk[0].bloknum+1; l < cblk[1].bloknum; l++)
140  {
141  M += (symbmtx->bloktab[l].lrownum - symbmtx->bloktab[l].frownum + 1);
142  }
143 
144  N *= dof;
145  M *= dof;
146 
147  nbops = fptr->diag( N );
148  if( M > 0 ) {
149  nbops += fptr->trsm( M, N );
150  }
151 
152  /*
153  * Compute the cost of each GEMM
154  */
155  K = N;
156  for(l = cblk[0].bloknum+1; l < cblk[1].bloknum; l++)
157  {
158  N = (symbmtx->bloktab[l].lrownum - symbmtx->bloktab[l].frownum + 1);
159  N *= dof;
160 
161  nbops += fptr->blkupdate( K, M, N );
162 
163  M -= N;
164  }
165  return nbops;
166 }
167 
168 /**
169  *******************************************************************************
170  *
171  * @ingroup symbol_dev_cost
172  *
173  * @brief Template function to compute cost on block based approach which keeps
174  * the cost per block.
175  *
176  * As opposed to sum2d(), the cost of each update per block is stored in the
177  * blokcost array. Despite this storage, the function is completely identical.
178  *
179  *******************************************************************************
180  *
181  * @param[in] fptr
182  * The structure that contains the cost functions (diag, trsm and
183  * blkupdate are used)
184  *
185  * @param[in] symbmtx
186  * The symbolic matrix structure on which to compute the costs.
187  *
188  * @param[in] cblknum
189  * The index of the column-block for which the cost will be computed
190  *
191  * @param[inout] blokcost
192  * An array of size the number of blocks in the cblknum column-block in
193  * which to store the cost per block of each updates.
194  *
195  *******************************************************************************
196  *
197  * @return The cost associated to the cblk of index cblknum and evaluated with
198  * the set of given functions
199  *
200  *******************************************************************************/
201 static double
203  const symbol_matrix_t *symbmtx,
204  pastix_int_t cblknum,
205  double *blokcost )
206 {
207  symbol_cblk_t *cblk = symbmtx->cblktab + cblknum;
208  pastix_int_t M, N, K, l;
209  double nbops = 0.;
210  double dof = (double)(symbmtx->dof);
211 
212  /*
213  * Size of the factorization kernel (square)
214  */
215  N = (cblk->lcolnum - cblk->fcolnum + 1);
216 
217  /*
218  * Height of the TRSM to which apply the TRSM
219  */
220  M = 0;
221  for(l = cblk[0].bloknum+1; l < cblk[1].bloknum; l++)
222  {
223  M += (symbmtx->bloktab[l].lrownum - symbmtx->bloktab[l].frownum + 1);
224  }
225 
226  N *= dof;
227  M *= dof;
228 
229  nbops = fptr->diag( N );
230  if( M > 0 ) {
231  nbops += fptr->trsm( M, N );
232  }
233  *blokcost = nbops;
234  blokcost++;
235 
236  /*
237  * Compute the cost of each GEMM
238  */
239  K = N;
240  for(l = cblk[0].bloknum+1; l < cblk[1].bloknum; l++, blokcost++)
241  {
242  N = (symbmtx->bloktab[l].lrownum - symbmtx->bloktab[l].frownum + 1);
243  N *= dof;
244 
245  *blokcost = fptr->blkupdate( K, M, N );
246  nbops += *blokcost;
247 
248  M -= N;
249  }
250  return nbops;
251 }
252 
253 /**
254  *******************************************************************************
255  *
256  * @ingroup symbol_dev_cost
257  *
258  * @brief Recursive function to compute the cost of the full symbolic structure
259  * with either sum1d(), sum2d(), or sum2dext().
260  *
261  *******************************************************************************
262  *
263  * @param[in] a
264  * The first column-block index of the range to address
265  *
266  * @param[in] b
267  * The last column-block index of the range to address (inclusive)
268  *
269  * @param[in] fval
270  * The function to use to compute the cost. This can be sum1d(),
271  * sum2d(), or sum2dext()
272  *
273  * @param[in] fptr
274  * The set of functions that will be applied. It can be size function,
275  * floating point operation, performance models, ...
276  *
277  * @param[in] symbmtx
278  * The symbol matrix on which to compute the wanted information.
279  *
280  *******************************************************************************
281  *
282  * @return The cost associated to the integral of the symbol matrix structure
283  * and evaluated with the set of given functions
284  *
285  *******************************************************************************/
286 static double
288  double (*fval)(const symbol_function_t *, const symbol_matrix_t *, pastix_int_t),
289  const symbol_function_t *fptr,
290  const symbol_matrix_t *symbmtx )
291 {
292  if(a != b)
293  return recursive_sum( a, (a+b)/2, fval, fptr, symbmtx)
294  + recursive_sum((a+b)/2+1, b, fval, fptr, symbmtx);
295 
296  return fval(fptr, symbmtx, a);
297 }
298 
299 /**
300  * @addtogroup pastix_symbol
301  * @{
302  */
303 
304 /**
305  *******************************************************************************
306  *
307  * @brief Computes the number of non-zero elements in L.
308  *
309  * This computes the number of non-zero elements stored in the symbol matrix in
310  * order to compute the fill-in. This routines returns the number of non-zero of
311  * the strictly lower part of the matrix.
312  *
313  *******************************************************************************
314  *
315  * @param[in] symbptr
316  * The symbol structure to study.
317  *
318  *******************************************************************************
319  *
320  * @return The number of non zero elements in the strictly lower part of the
321  * full symbol matrix.
322  *
323  *******************************************************************************/
324 size_t
326 {
327  symbol_cblk_t *cblk;
328  symbol_blok_t *blok;
329  pastix_int_t itercblk;
330  pastix_int_t cblknbr;
331  size_t nnz = 0;
332  pastix_int_t dof = symbptr->dof;
333  const pastix_int_t *dofs = symbptr->dofs;
334 
335  cblknbr = symbptr->cblknbr;
336  cblk = symbptr->cblktab;
337  blok = symbptr->bloktab;
338 
339  if ( dof > 0 ) {
340  for(itercblk=0; itercblk<cblknbr; itercblk++, cblk++)
341  {
342  pastix_int_t iterblok = cblk[0].bloknum + 1;
343  pastix_int_t lbloknum = cblk[1].bloknum;
344 
345  pastix_int_t colnbr = dof * (cblk->lcolnum - cblk->fcolnum + 1);
346 
347  /* Diagonal block */
348  blok++;
349  nnz += ( colnbr * (colnbr+1) ) / 2 - colnbr;
350 
351  /* Off-diagonal blocks */
352  for( ; iterblok < lbloknum; iterblok++, blok++)
353  {
354  pastix_int_t rownbr = (blok->lrownum - blok->frownum + 1) * dof;
355 
356  nnz += rownbr * colnbr;
357  }
358  }
359  }
360  else {
361  assert( symbptr->baseval == 0 );
362 
363  for(itercblk=0; itercblk<cblknbr; itercblk++, cblk++)
364  {
365  pastix_int_t iterblok = cblk[0].bloknum + 1;
366  pastix_int_t lbloknum = cblk[1].bloknum;
367  pastix_int_t colnbr = dofs[ cblk->lcolnum + 1 ] - dofs[ cblk->fcolnum ];
368 
369  /* Diagonal block */
370  blok++;
371  nnz += ( colnbr * (colnbr+1) ) / 2 - colnbr;
372 
373  /* Off-diagonal blocks */
374  for( ; iterblok < lbloknum; iterblok++, blok++)
375  {
376  pastix_int_t rownbr = dofs[ blok->lrownum + 1 ] - dofs[ blok->frownum ];
377 
378  nnz += rownbr * colnbr;
379  }
380  }
381  }
382 
383  return nnz;
384 }
385 
386 /**
387  *******************************************************************************
388  *
389  * @brief Computes the number of theoretical and real flops.
390  *
391  * Given a symbolic factorization structure, the type of factorization: Cholesky
392  * or LU, and the arithmetic, this function will return the total number of
393  * floating point operation that will be performed during the numerical
394  * factorization.
395  *
396  *******************************************************************************
397  *
398  * @param[in] symbmtx
399  * The symbol structure to study.
400  *
401  * @param[in] flttype
402  * The floating type of the elements in the matrix.
403  * PastixPattern, PastixFloat, PastixDouble, PastixComplex32 or
404  * PastixComplex64. In case of PastixPattern, values for PastixDouble
405  * are returned.
406  *
407  * @param[in] factotype
408  * The factorization algorithm to perform: PastixFactLLT,
409  * PastixFactLDLT, PastixFactLLH, PastixFactLDLH or PastixFactLU.
410  *
411  * @param[out] thflops
412  * Returns the number of theoretical flops to perform.
413  * NULL if not asked.
414  *
415  * @param[out] rlflops
416  * Returns the number of real flops to perform, taking into account
417  * copies and scatter operations.
418  * NULL if not asked.
419  *
420  *******************************************************************************/
421 void
423  pastix_coeftype_t flttype,
424  pastix_factotype_t factotype,
425  double *thflops,
426  double *rlflops )
427 {
428  int iscomplex = ((flttype == PastixComplex32) || (flttype == PastixComplex64)) ? 1 : 0;
429 
430  /* Compute theoretical flops */
431  if ( thflops != NULL ) {
432  *thflops = recursive_sum(0, symbmtx->cblknbr-1, sum1d,
433  &(flopstable[iscomplex][factotype]),
434  symbmtx);
435  }
436 
437  /* Compute performed flops */
438  if ( rlflops != NULL ) {
439  *rlflops = recursive_sum(0, symbmtx->cblknbr-1, sum2d,
440  &(flopstable[iscomplex][factotype]),
441  symbmtx);
442  }
443 }
444 
445 /**
446  *******************************************************************************
447  *
448  * @brief Computes the cost of structure for the costMatrixBuild() function.
449  *
450  * This function iterates on the column-blocks and blocks to compute the cost of
451  * the operation performed on each of those elements for the costMatrixBuild()
452  * function that is used in the simulation for the data mapping. It returns an
453  * array of cost for each type of element.
454  *
455  *******************************************************************************
456  *
457  * @param[in] symbmtx
458  * The symbol structure to study.
459  *
460  * @param[in] flttype
461  * The floating type of the elements in the matrix.
462  * PastixPattern, PastixFloat, PastixDouble, PastixComplex32 or
463  * PastixComplex64. In case of PastixPattern, values for PastixDouble
464  * are returned.
465  *
466  * @param[in] factotype
467  * The factorization algorithm to perform: PastixFactLLT,
468  * PastixFactLDLT, PastixFactLLH, PastixFactLDLH or PastixFactLU.
469  *
470  * @param[inout] cblkcost
471  * An allocated array of size cblknbr that will holds the cost per cblk
472  * on exit.
473  *
474  * @param[inout] blokcost
475  * An allocated array of size bloknbr that will holds the cost per blok
476  * on exit.
477  *
478  *******************************************************************************/
479 void
481  pastix_coeftype_t flttype,
482  pastix_factotype_t factotype,
483  double *cblkcost,
484  double *blokcost )
485 {
487  double *cblkptr, *blokptr;
488  pastix_int_t i;
489  int iscomplex = ((flttype == PastixComplex32) || (flttype == PastixComplex64)) ? 1 : 0;
490  f = &(perfstable[iscomplex][factotype]);
491 
492  /* Initialize costs */
493  cblkptr = cblkcost;
494  blokptr = blokcost;
495 
496  for(i=0; i<symbmtx->cblknbr; i++, cblkptr++) {
497  *cblkptr = sum2dext( f, symbmtx, i, blokptr );
498 
499  blokptr += symbmtx->cblktab[i+1].bloknum
500  - symbmtx->cblktab[i ].bloknum;
501  }
502 
503  assert( ( cblkptr - cblkcost ) == symbmtx->cblknbr );
504  assert( ( blokptr - blokcost ) == symbmtx->bloknbr );
505 }
506 
507 /**
508  * @}
509  */
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
spm_coeftype_t pastix_coeftype_t
Arithmetic types.
Definition: api.h:294
enum pastix_factotype_e pastix_factotype_t
Factorization algorithms available for IPARM_FACTORIZATION parameter.
pastix_int_t frownum
Definition: symbol.h:60
pastix_int_t lrownum
Definition: symbol.h:61
pastix_int_t bloknbr
Definition: symbol.h:80
pastix_int_t baseval
Definition: symbol.h:78
symbol_cblk_t * cblktab
Definition: symbol.h:83
pastix_int_t bloknum
Definition: symbol.h:48
pastix_int_t fcolnum
Definition: symbol.h:46
pastix_int_t lcolnum
Definition: symbol.h:47
pastix_int_t dof
Definition: symbol.h:87
symbol_blok_t * bloktab
Definition: symbol.h:84
pastix_int_t * dofs
Definition: symbol.h:89
pastix_int_t cblknbr
Definition: symbol.h:79
size_t pastixSymbolGetNNZ(const symbol_matrix_t *symbptr)
Computes the number of non-zero elements in L.
Definition: symbol_cost.c:325
void pastixSymbolGetTimes(const symbol_matrix_t *symbmtx, pastix_coeftype_t flttype, pastix_factotype_t factotype, double *cblkcost, double *blokcost)
Computes the cost of structure for the costMatrixBuild() function.
Definition: symbol_cost.c:480
void pastixSymbolGetFlops(const symbol_matrix_t *symbmtx, pastix_coeftype_t flttype, pastix_factotype_t factotype, double *thflops, double *rlflops)
Computes the number of theoretical and real flops.
Definition: symbol_cost.c:422
Symbol block structure.
Definition: symbol.h:59
Symbol column block structure.
Definition: symbol.h:45
Symbol matrix structure.
Definition: symbol.h:77
double(* update)(pastix_int_t, pastix_int_t)
Definition: symbol_cost.h:31
double(* trsm)(pastix_int_t, pastix_int_t)
Definition: symbol_cost.h:29
double(* diag)(pastix_int_t)
Definition: symbol_cost.h:28
double(* blkupdate)(pastix_int_t, pastix_int_t, pastix_int_t)
Definition: symbol_cost.h:33
static double sum2dext(const symbol_function_t *fptr, const symbol_matrix_t *symbmtx, pastix_int_t cblknum, double *blokcost)
Template function to compute cost on block based approach which keeps the cost per block.
Definition: symbol_cost.c:202
symbol_function_t flopstable[2][5]
array of pointer to the flops functions per factorization and arithmetic
static double recursive_sum(pastix_int_t a, pastix_int_t b, double(*fval)(const symbol_function_t *, const symbol_matrix_t *, pastix_int_t), const symbol_function_t *fptr, const symbol_matrix_t *symbmtx)
Recursive function to compute the cost of the full symbolic structure with either sum1d(),...
Definition: symbol_cost.c:287
symbol_function_t perfstable[2][5]
array of pointer to the performance functions per factorization and arithmetic
static double sum2d(const symbol_function_t *fptr, const symbol_matrix_t *symbmtx, pastix_int_t cblknum)
Template function to compute cost on block based approach.
Definition: symbol_cost.c:121
static double sum1d(const symbol_function_t *fptr, const symbol_matrix_t *symbmtx, pastix_int_t cblknum)
Template function to compute cost on a column-block based approach with a single update per column bl...
Definition: symbol_cost.c:53
Cost functions to compute statistics on the symbolic structure.
Definition: symbol_cost.h:27