PaStiX Handbook  6.2.1
symbol_cost.c
Go to the documentation of this file.
1 /**
2  *
3  * @file symbol_cost.c
4  *
5  * PaStiX symbol structure cost functions
6  *
7  * @copyright 1999-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.2.0
11  * @author Pascal Henon
12  * @author Pierre Ramet
13  * @author Mathieu Faverge
14  * @date 2021-01-03
15  *
16  **/
17 #include <stdio.h>
18 #include <string.h>
19 #include <strings.h>
20 #include <assert.h>
21 
22 #include "common.h"
23 #include "symbol/symbol.h"
24 #include "symbol_cost.h"
25 
26 /**
27  *******************************************************************************
28  *
29  * @ingroup symbol_dev_cost
30  *
31  * @brief Template function to compute cost on a column-block based approach
32  * with a single update per column block
33  *
34  *******************************************************************************
35  *
36  * @param[in] fptr
37  * The structure that contains the cost functions (diag, trsm and
38  * update are used)
39  *
40  * @param[in] symbmtx
41  * The symbolic matrix structure on which to compute the costs.
42  *
43  * @param[in] cblknum
44  * The index of the column-block for which the cost will be computed
45  *
46  *******************************************************************************
47  *
48  * @return The cost associated to the cblk of index cblknum and evaluated with
49  * the set of given functions
50  *
51  *******************************************************************************/
52 static double
53 sum1d( const symbol_function_t *fptr,
54  const symbol_matrix_t *symbmtx,
55  pastix_int_t cblknum )
56 {
57  symbol_cblk_t *cblk = symbmtx->cblktab + cblknum;
58  pastix_int_t M, N, k;
59  double nbops = 0.;
60  double dof = (double)(symbmtx->dof);
61 
62  /*
63  * Size of the factorization kernel (square)
64  */
65  N = (cblk->lcolnum - cblk->fcolnum + 1);
66 
67  /*
68  * Height of the TRSM to which apply the TRSM
69  */
70  M = 0;
71  for(k = cblk[0].bloknum+1; k < cblk[1].bloknum; k++)
72  {
73  M += (symbmtx->bloktab[k].lrownum - symbmtx->bloktab[k].frownum + 1);
74  }
75 
76  if ( dof > 0. ) {
77  N *= dof;
78  M *= dof;
79  }
80 
81  nbops = fptr->diag( N );
82  if( M > 0 ) {
83  nbops += fptr->trsm( M, N );
84  nbops += fptr->update( N, M );
85  }
86 
87  return nbops;
88 }
89 
90 /**
91  *******************************************************************************
92  *
93  * @ingroup symbol_dev_cost
94  *
95  * @brief Template function to compute cost on block based approach.
96  *
97  * As opposed to sum1d(), the updates are split in one per off-diagonal block
98  * making it more precise to evaluate the performance cost of the GEMMs, for
99  * example, as it exactly follow the 1D scheme used in the static scheduler of
100  * PaStiX.
101  *
102  *******************************************************************************
103  *
104  * @param[in] fptr
105  * The structure that contains the cost functions (diag, trsm and
106  * blkupdate are used)
107  *
108  * @param[in] symbmtx
109  * The symbolic matrix structure on which to compute the costs.
110  *
111  * @param[in] cblknum
112  * The index of the column-block for which the cost will be computed
113  *
114  *******************************************************************************
115  *
116  * @return The cost associated to the cblk of index cblknum and evaluated with
117  * the set of given functions
118  *
119  *******************************************************************************/
120 static double
122  const symbol_matrix_t *symbmtx,
123  pastix_int_t cblknum )
124 {
125  symbol_cblk_t *cblk = symbmtx->cblktab + cblknum;
126  pastix_int_t M, N, K, l;
127  double nbops = 0.;
128  double dof = (double)(symbmtx->dof);
129 
130  /*
131  * Size of the factorization kernel (square)
132  */
133  N = (cblk->lcolnum - cblk->fcolnum + 1);
134 
135  /*
136  * Height of the TRSM to which apply the TRSM
137  */
138  M = 0;
139  for(l = cblk[0].bloknum+1; l < cblk[1].bloknum; l++)
140  {
141  M += (symbmtx->bloktab[l].lrownum - symbmtx->bloktab[l].frownum + 1);
142  }
143 
144  N *= dof;
145  M *= dof;
146 
147  nbops = fptr->diag( N );
148  if( M > 0 ) {
149  nbops += fptr->trsm( M, N );
150  }
151 
152  /*
153  * Compute the cost of each GEMM
154  */
155  K = N;
156  for(l = cblk[0].bloknum+1; l < cblk[1].bloknum; l++)
157  {
158  N = (symbmtx->bloktab[l].lrownum - symbmtx->bloktab[l].frownum + 1);
159  N *= dof;
160 
161  nbops += fptr->blkupdate( K, M, N );
162 
163  M -= N;
164  }
165  return nbops;
166 }
167 
168 /**
169  *******************************************************************************
170  *
171  * @ingroup symbol_dev_cost
172  *
173  * @brief Template function to compute cost on block based approach which keeps
174  * the cost per block.
175  *
176  * As opposed to sum2d(), the cost of each update per block is stored in the
177  * blokcost array. Despite this storage, the function is completely identical.
178  *
179  *******************************************************************************
180  *
181  * @param[in] fptr
182  * The structure that contains the cost functions (diag, trsm and
183  * blkupdate are used)
184  *
185  * @param[in] symbmtx
186  * The symbolic matrix structure on which to compute the costs.
187  *
188  * @param[in] cblknum
189  * The index of the column-block for which the cost will be computed
190  *
191  * @param[inout] blokcost
192  * An array of size the number of blocks in the cblknum column-block in
193  * which to store the cost per block of each updates.
194  *
195  *******************************************************************************
196  *
197  * @return The cost associated to the cblk of index cblknum and evaluated with
198  * the set of given functions
199  *
200  *******************************************************************************/
201 static double
203  const symbol_matrix_t *symbmtx,
204  pastix_int_t cblknum,
205  double *blokcost )
206 {
207  symbol_cblk_t *cblk = symbmtx->cblktab + cblknum;
208  pastix_int_t M, N, K, l;
209  double nbops = 0.;
210  double dof = (double)(symbmtx->dof);
211 
212  /*
213  * Size of the factorization kernel (square)
214  */
215  N = (cblk->lcolnum - cblk->fcolnum + 1);
216 
217  /*
218  * Height of the TRSM to which apply the TRSM
219  */
220  M = 0;
221  for(l = cblk[0].bloknum+1; l < cblk[1].bloknum; l++)
222  {
223  M += (symbmtx->bloktab[l].lrownum - symbmtx->bloktab[l].frownum + 1);
224  }
225 
226  N *= dof;
227  M *= dof;
228 
229  nbops = fptr->diag( N );
230  if( M > 0 ) {
231  nbops += fptr->trsm( M, N );
232  }
233  *blokcost = nbops;
234  blokcost++;
235 
236  /*
237  * Compute the cost of each GEMM
238  */
239  K = N;
240  for(l = cblk[0].bloknum+1; l < cblk[1].bloknum; l++, blokcost++)
241  {
242  N = (symbmtx->bloktab[l].lrownum - symbmtx->bloktab[l].frownum + 1);
243  N *= dof;
244 
245  *blokcost = fptr->blkupdate( K, M, N );
246  nbops += *blokcost;
247 
248  M -= N;
249  }
250  return nbops;
251 }
252 
253 /**
254  *******************************************************************************
255  *
256  * @ingroup symbol_dev_cost
257  *
258  * @brief Recursive function to compute the cost of the full symbolic structure
259  * with either sum1d(), sum2d(), or sum2dext().
260  *
261  *******************************************************************************
262  *
263  * @param[in] a
264  * The first column-block index of the range to address
265  *
266  * @param[in] b
267  * The last column-block index of the range to address (inclusive)
268  *
269  * @param[in] fval
270  * The function to use to compute the cost. This can be sum1d(),
271  * sum2d(), or sum2dext()
272  *
273  * @param[in] fptr
274  * The set of functions that will be applied. It can be size function,
275  * floating point operation, performance models, ...
276  *
277  * @param[in] symbmtx
278  * The symbol matrix on which to compute the wanted information.
279  *
280  *******************************************************************************
281  *
282  * @return The cost associated to the integral of the symbol matrix structure
283  * and evaluated with the set of given functions
284  *
285  *******************************************************************************/
286 static double
287 recursive_sum( pastix_int_t a, pastix_int_t b,
288  double (*fval)(const symbol_function_t *, const symbol_matrix_t *, pastix_int_t),
289  const symbol_function_t *fptr,
290  const symbol_matrix_t *symbmtx )
291 {
292  if(a != b)
293  return recursive_sum( a, (a+b)/2, fval, fptr, symbmtx)
294  + recursive_sum((a+b)/2+1, b, fval, fptr, symbmtx);
295 
296  return fval(fptr, symbmtx, a);
297 }
298 /**
299  * @}
300  *
301  * @addtogroup pastix_symbol
302  * @{
303  */
304 
305 
306 /**
307  *******************************************************************************
308  *
309  * @brief Computes the number of non-zero elements in L.
310  *
311  * This computes the number of non-zero elements stored in the symbol matrix in
312  * order to compute the fill-in. This routines returns the number of non-zero of
313  * the strictly lower part of the matrix.
314  *
315  *******************************************************************************
316  *
317  * @param[in] symbptr
318  * The symbol structure to study.
319  *
320  *******************************************************************************
321  *
322  * @return The number of non zero elements in the strictly lower part of the
323  * full symbol matrix.
324  *
325  *******************************************************************************/
326 pastix_int_t
328 {
329  symbol_cblk_t *cblk;
330  symbol_blok_t *blok;
331  pastix_int_t itercblk;
332  pastix_int_t cblknbr;
333  pastix_int_t nnz = 0;
334  pastix_int_t dof = symbptr->dof;
335  const pastix_int_t *dofs = symbptr->dofs;
336 
337  cblknbr = symbptr->cblknbr;
338  cblk = symbptr->cblktab;
339  blok = symbptr->bloktab;
340 
341  if ( dof > 0 ) {
342  for(itercblk=0; itercblk<cblknbr; itercblk++, cblk++)
343  {
344  pastix_int_t iterblok = cblk[0].bloknum + 1;
345  pastix_int_t lbloknum = cblk[1].bloknum;
346 
347  pastix_int_t colnbr = dof * (cblk->lcolnum - cblk->fcolnum + 1);
348 
349  /* Diagonal block */
350  blok++;
351  nnz += ( colnbr * (colnbr+1) ) / 2 - colnbr;
352 
353  /* Off-diagonal blocks */
354  for( ; iterblok < lbloknum; iterblok++, blok++)
355  {
356  pastix_int_t rownbr = (blok->lrownum - blok->frownum + 1) * dof;
357 
358  nnz += rownbr * colnbr;
359  }
360  }
361  }
362  else {
363  assert( symbptr->baseval == 0 );
364 
365  for(itercblk=0; itercblk<cblknbr; itercblk++, cblk++)
366  {
367  pastix_int_t iterblok = cblk[0].bloknum + 1;
368  pastix_int_t lbloknum = cblk[1].bloknum;
369  pastix_int_t colnbr = dofs[ cblk->lcolnum + 1 ] - dofs[ cblk->fcolnum ];
370 
371  /* Diagonal block */
372  blok++;
373  nnz += ( colnbr * (colnbr+1) ) / 2 - colnbr;
374 
375  /* Off-diagonal blocks */
376  for( ; iterblok < lbloknum; iterblok++, blok++)
377  {
378  pastix_int_t rownbr = dofs[ blok->lrownum + 1 ] - dofs[ blok->frownum ];
379 
380  nnz += rownbr * colnbr;
381  }
382  }
383  }
384 
385  return nnz;
386 }
387 
388 /**
389  *******************************************************************************
390  *
391  * @brief Computes the number of theoretical and real flops.
392  *
393  * Given a symbolic factorization structure, the type of factorization: Cholesky
394  * or LU, and the arithmetic, this function will return the total number of
395  * floating point operation that will be performed during the numerical
396  * factorization.
397  *
398  *******************************************************************************
399  *
400  * @param[in] symbmtx
401  * The symbol structure to study.
402  *
403  * @param[in] flttype
404  * The floating type of the elements in the matrix.
405  * PastixPattern, PastixFloat, PastixDouble, PastixComplex32 or
406  * PastixComplex64. In case of PastixPattern, values for PastixDouble
407  * are returned.
408  *
409  * @param[in] factotype
410  * The factorization algorithm to perform: PastixFactLLT,
411  * PastixFactLDLT, PastixFactLLH, PastixFactLDLH or PastixFactLU.
412  *
413  * @param[out] thflops
414  * Returns the number of theoretical flops to perform.
415  * NULL if not asked.
416  *
417  * @param[out] rlflops
418  * Returns the number of real flops to perform, taking into account
419  * copies and scatter operations.
420  * NULL if not asked.
421  *
422  *******************************************************************************/
423 void
425  pastix_coeftype_t flttype,
426  pastix_factotype_t factotype,
427  double *thflops,
428  double *rlflops )
429 {
430  int iscomplex = ((flttype == PastixComplex32) || (flttype == PastixComplex64)) ? 1 : 0;
431 
432  /* Compute theoretical flops */
433  if ( thflops != NULL ) {
434  *thflops = recursive_sum(0, symbmtx->cblknbr-1, sum1d,
435  &(flopstable[iscomplex][factotype]),
436  symbmtx);
437  }
438 
439  /* Compute performed flops */
440  if ( rlflops != NULL ) {
441  *rlflops = recursive_sum(0, symbmtx->cblknbr-1, sum2d,
442  &(flopstable[iscomplex][factotype]),
443  symbmtx);
444  }
445 }
446 
447 /**
448  *******************************************************************************
449  *
450  * @brief Computes the cost of structure for the costMatrixBuild() function.
451  *
452  * This function iterates on the column-blocks and blocks to compute the cost of
453  * the operation performed on each of those elements for the costMatrixBuild()
454  * function that is used in the simulation for the data mapping. It returns an
455  * array of cost for each type of element.
456  *
457  *******************************************************************************
458  *
459  * @param[in] symbmtx
460  * The symbol structure to study.
461  *
462  * @param[in] flttype
463  * The floating type of the elements in the matrix.
464  * PastixPattern, PastixFloat, PastixDouble, PastixComplex32 or
465  * PastixComplex64. In case of PastixPattern, values for PastixDouble
466  * are returned.
467  *
468  * @param[in] factotype
469  * The factorization algorithm to perform: PastixFactLLT,
470  * PastixFactLDLT, PastixFactLLH, PastixFactLDLH or PastixFactLU.
471  *
472  * @param[inout] cblkcost
473  * An allocated array of size cblknbr that will holds the cost per cblk
474  * on exit.
475  *
476  * @param[inout] blokcost
477  * An allocated array of size bloknbr that will holds the cost per blok
478  * on exit.
479  *
480  *******************************************************************************/
481 void
483  pastix_coeftype_t flttype,
484  pastix_factotype_t factotype,
485  double *cblkcost,
486  double *blokcost )
487 {
489  double *cblkptr, *blokptr;
490  pastix_int_t i;
491  int iscomplex = ((flttype == PastixComplex32) || (flttype == PastixComplex64)) ? 1 : 0;
492  f = &(perfstable[iscomplex][factotype]);
493 
494  /* Initialize costs */
495  cblkptr = cblkcost;
496  blokptr = blokcost;
497 
498  for(i=0; i<symbmtx->cblknbr; i++, cblkptr++) {
499  *cblkptr = sum2dext( f, symbmtx, i, blokptr );
500 
501  blokptr += symbmtx->cblktab[i+1].bloknum
502  - symbmtx->cblktab[i ].bloknum;
503  }
504 
505  assert( ( cblkptr - cblkcost ) == symbmtx->cblknbr );
506  assert( ( blokptr - blokcost ) == symbmtx->bloknbr );
507 }
508 
509 /**
510  * @}
511  */
symbol_matrix_s
Symbol matrix structure.
Definition: symbol.h:75
perfstable
symbol_function_t perfstable[2][5]
array of pointer to the performance functions per factorization and arithmetic
Definition: symbol_cost_perfs.c:252
symbol_matrix_s::bloktab
symbol_blok_t * bloktab
Definition: symbol.h:82
pastixSymbolGetNNZ
pastix_int_t pastixSymbolGetNNZ(const symbol_matrix_t *symbptr)
Computes the number of non-zero elements in L.
Definition: symbol_cost.c:327
symbol_matrix_s::dofs
pastix_int_t * dofs
Definition: symbol.h:87
symbol_function_s::blkupdate
double(* blkupdate)(pastix_int_t, pastix_int_t, pastix_int_t)
Definition: symbol_cost.h:33
symbol_function_s::trsm
double(* trsm)(pastix_int_t, pastix_int_t)
Definition: symbol_cost.h:29
symbol_function_s::update
double(* update)(pastix_int_t, pastix_int_t)
Definition: symbol_cost.h:31
symbol_cblk_s::fcolnum
pastix_int_t fcolnum
Definition: symbol.h:44
sum1d
static double sum1d(const symbol_function_t *fptr, const symbol_matrix_t *symbmtx, pastix_int_t cblknum)
Template function to compute cost on a column-block based approach with a single update per column bl...
Definition: symbol_cost.c:53
symbol_matrix_s::cblknbr
pastix_int_t cblknbr
Definition: symbol.h:77
symbol_matrix_s::baseval
pastix_int_t baseval
Definition: symbol.h:76
symbol_matrix_s::dof
pastix_int_t dof
Definition: symbol.h:85
symbol_matrix_s::bloknbr
pastix_int_t bloknbr
Definition: symbol.h:78
symbol_cblk_s::bloknum
pastix_int_t bloknum
Definition: symbol.h:46
pastix_factotype_t
enum pastix_factotype_e pastix_factotype_t
Factorization algorithms available for IPARM_FACTORIZATION parameter.
pastixSymbolGetFlops
void pastixSymbolGetFlops(const symbol_matrix_t *symbmtx, pastix_coeftype_t flttype, pastix_factotype_t factotype, double *thflops, double *rlflops)
Computes the number of theoretical and real flops.
Definition: symbol_cost.c:424
symbol_function_s
Cost functions to compute statistics on the symbolic structure.
Definition: symbol_cost.h:27
symbol.h
sum2dext
static double sum2dext(const symbol_function_t *fptr, const symbol_matrix_t *symbmtx, pastix_int_t cblknum, double *blokcost)
Template function to compute cost on block based approach which keeps the cost per block.
Definition: symbol_cost.c:202
symbol_cblk_s::lcolnum
pastix_int_t lcolnum
Definition: symbol.h:45
symbol_blok_s::frownum
pastix_int_t frownum
Definition: symbol.h:58
symbol_cost.h
pastixSymbolGetTimes
void pastixSymbolGetTimes(const symbol_matrix_t *symbmtx, pastix_coeftype_t flttype, pastix_factotype_t factotype, double *cblkcost, double *blokcost)
Computes the cost of structure for the costMatrixBuild() function.
Definition: symbol_cost.c:482
symbol_matrix_s::cblktab
symbol_cblk_t * cblktab
Definition: symbol.h:81
symbol_blok_s
Symbol block structure.
Definition: symbol.h:57
symbol_cblk_s
Symbol column block structure.
Definition: symbol.h:43
recursive_sum
static double recursive_sum(pastix_int_t a, pastix_int_t b, double(*fval)(const symbol_function_t *, const symbol_matrix_t *, pastix_int_t), const symbol_function_t *fptr, const symbol_matrix_t *symbmtx)
Recursive function to compute the cost of the full symbolic structure with either sum1d(),...
Definition: symbol_cost.c:287
pastix_coeftype_t
#define pastix_coeftype_t
Arithmetic types.
Definition: api.h:283
symbol_blok_s::lrownum
pastix_int_t lrownum
Definition: symbol.h:59
symbol_function_s::diag
double(* diag)(pastix_int_t)
Definition: symbol_cost.h:28
flopstable
symbol_function_t flopstable[2][5]
array of pointer to the flops functions per factorization and arithmetic
Definition: symbol_cost_flops.c:291
sum2d
static double sum2d(const symbol_function_t *fptr, const symbol_matrix_t *symbmtx, pastix_int_t cblknum)
Template function to compute cost on block based approach.
Definition: symbol_cost.c:121