PaStiX Handbook  6.4.0
symbol_cost_flops.c
Go to the documentation of this file.
1 /**
2  *
3  * @file symbol_cost_flops.c
4  *
5  * PaStiX symbol functions to compute the number of flops induced by the chosen
6  * symbolic structure.
7  *
8  * @copyright 1999-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
9  * Univ. Bordeaux. All rights reserved.
10  *
11  * @version 6.4.0
12  * @author David Goudin
13  * @author Francois Pellegrini
14  * @author Mathieu Faverge
15  * @author Pascal Henon
16  * @author Pierre Ramet
17  * @date 2024-07-05
18  *
19  * @addtogroup symbol_dev_cost
20  * @{
21  *
22  **/
23 #include "common.h"
24 #include "symbol_cost.h"
25 #include "flops.h"
26 
27 /**
28  * @name Flops functions set
29  * @{
30  *
31  */
32 
33 /**
34  * @brief Computations flops of diagonal blocks
35  * @param[in] N Size of the matrix block
36  * @return Returns the number of flops for the given arithmetic and factorization
37  * @details Cholesky complex case
38  */
39 static inline double
41  return FLOPS_ZPOTRF( N );
42 }
43 
44 /**
45  * @copydoc flops_zpotrf_diag
46  * @details Cholesky real case
47  */
48 static inline double
50  return FLOPS_DPOTRF( N );
51 }
52 
53 /**
54  * @copydoc flops_zpotrf_diag
55  * @details LU complex case
56  */
57 static double
59  return FLOPS_ZGETRF( N, N );
60 }
61 
62 /**
63  * @copydoc flops_zpotrf_diag
64  * @details LU real case
65  */
66 static inline double
68  return FLOPS_DGETRF( N, N );
69 }
70 
71 /**
72  * @copydoc flops_zpotrf_diag
73  * @details LDL^t complex case
74  */
75 static inline double
77  return FLOPS_ZSYTRF( N );
78 }
79 
80 /**
81  * @copydoc flops_zpotrf_diag
82  * @details LDL^t real case
83  */
84 static inline double
86  return FLOPS_DSYTRF( N );
87 }
88 
89 /**
90  * @brief Computations flops of the solve step
91  * @param[in] M Number of rows of the B matrix in the TRSM, and size of the matrix A
92  * @param[in] N Number of columns of the B matrix in the TRSM
93  * @return Returns the number of flops for the given arithmetic and factorization
94  * @details Cholesky complex case
95  */
96 static inline double
98  return FLOPS_ZTRSM( PastixRight, M, N );
99 }
100 
101 /**
102  * @copydoc flops_zpotrf_trsm
103  * @details Cholesky real case
104  */
105 static inline double
107  return FLOPS_DTRSM( PastixRight, M, N );
108 }
109 
110 /**
111  * @copydoc flops_zpotrf_trsm
112  * @details LU complex case
113  */
114 static double
116  return 2. * FLOPS_ZTRSM( PastixRight, M, N );
117 }
118 
119 /**
120  * @copydoc flops_zpotrf_trsm
121  * @details LU real case
122  */
123 static inline double
125  return 2. * FLOPS_DTRSM( PastixRight, M, N );
126 }
127 
128 /**
129  * @copydoc flops_zpotrf_trsm
130  * @details LDL^t complex case
131  */
132 static inline double
134  return FLOPS_ZTRSM( PastixRight, M, N ) + 6. * (double)N * (double)M;
135 }
136 
137 /**
138  * @copydoc flops_zpotrf_trsm
139  * @details LDL^t real case
140  */
141 static inline double
143  return FLOPS_DTRSM( PastixRight, M, N ) + (double)N * (double)M;
144 }
145 
146 /**
147  * @brief Theroretical computation flops of the update step per coumn block (see sum1d())
148  * @param[in] K Number of columns of A, and rows of B in the GEMM operation
149  * @param[in] M Dimension of all other sizes in the GEMM
150  * @return Returns the number of flops for the given arithmetic and factorization
151  * @details Cholesky complex case
152  */
153 static inline double
155  return FLOPS_ZHERK( K, M );
156 }
157 
158 /**
159  * @copydoc flops_zpotrf_update
160  * @details Cholesky real case
161  */
162 static inline double
164  return FLOPS_DSYRK( K, M );
165 }
166 
167 /**
168  * @copydoc flops_zpotrf_update
169  * @details LU complex case
170  */
171 static double
173  return FLOPS_ZGEMM( M, M, K );
174 }
175 
176 /**
177  * @copydoc flops_zpotrf_update
178  * @details LU real case
179  */
180 static inline double
182  return FLOPS_DGEMM( M, M, K );
183 }
184 
185 /**
186  * @copydoc flops_zpotrf_update
187  * @details LDL^t complex case
188  */
189 static inline double
191  return FLOPS_ZSYRK( K, M ) + 6. * (double)M * (double)M;
192 }
193 
194 /**
195  * @copydoc flops_zpotrf_update
196  * @details LDL^t real case
197  */
198 static inline double
200  return FLOPS_DSYRK( K, M ) + (double)M * (double)M;
201 }
202 
203 /**
204  * @brief Computes the theoretical number of flops of the update step per block (see sum2d())
205  * @param[in] M Number of rows of the A and C matrices in the GEMM
206  * @param[in] N Number of columns of the B and C matrices in the GEMM
207  * @param[in] K Number of columns of the A matrix, and rows of the B matrix in the GEMM
208  * @return Returns the number of flops for the given arithmetic and factorization
209  * @details Cholesky complex case
210  */
211 static inline double
213 {
214  return FLOPS_ZGEMM( M, N, K ) + 2. * (double)M * (double)N;
215 }
216 
217 /**
218  * @copydoc flops_zpotrf_blkupdate
219  * @details Cholesky real case
220  */
221 static inline double
223 {
224  return FLOPS_DGEMM( M, N, K ) + (double)M * (double)N;
225 }
226 
227 /**
228  * @copydoc flops_zpotrf_blkupdate
229  * @details LU complex case
230  */
231 static double
233 {
234  return FLOPS_ZGEMM( M, N, K ) + FLOPS_ZGEMM( M-N, N, K )
235  + 2. * (double)M * (double)N + 2. * (double)(M-N) * (double)(N); /* Add step */
236 }
237 
238 /**
239  * @copydoc flops_zpotrf_blkupdate
240  * @details LU real case
241  */
242 static inline double
244 {
245  return FLOPS_DGEMM( M, N, K ) + FLOPS_DGEMM( M-N, N, K )
246  + (double)M * (double)N + (double)(M-N) * (double)(N); /* Add step */
247 }
248 
249 /**
250  * @copydoc flops_zpotrf_blkupdate
251  * @details LDL^t complex case
252  */
253 static inline double
255 {
256  /* If we consider that we stored the D * A^t somewhere */
257 #if 0
258  return FLOPS_ZGEMM( M, N, K )
259  + 2. * (double)M * (double)N;
260 #else
261  /* If not, as it is the case in the runtime */
262  return FLOPS_ZGEMM( M, N, K )
263  + 2. * (double)M * (double)N /* Add step */
264  + 6. * (double)M * (double)N; /* Scale step */
265 #endif
266 }
267 
268 /**
269  * @copydoc flops_zpotrf_blkupdate
270  * @details LDL^t real case
271  */
272 static inline double
274 {
275  /* If we consider that we stored the D * A^t somewhere */
276 #if 0
277  return FLOPS_DGEMM( M, N, K )
278  + (double)M * (double)N;
279 #else
280  /* If not, as it is the case in the runtime */
281  return FLOPS_DGEMM( M, N, K )
282  + (double)M * (double)N /* Add step */
283  + (double)M * (double)N; /* Scale step */
284 #endif
285 }
286 
287 /**
288  * @}
289  */
290 
292  {
298  },
299  {
305  }
306 };
307 
308 /**
309  * @}
310  */
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
@ PastixRight
Definition: api.h:496
static double flops_dgetrf_update(pastix_int_t K, pastix_int_t M)
Theroretical computation flops of the update step per coumn block (see sum1d())
symbol_function_t flopstable[2][5]
array of pointer to the flops functions per factorization and arithmetic
static double flops_dgetrf_trsm(pastix_int_t M, pastix_int_t N)
Computations flops of the solve step.
static double flops_zgetrf_diag(pastix_int_t N)
Computations flops of diagonal blocks.
static double flops_zsytrf_trsm(pastix_int_t M, pastix_int_t N)
Computations flops of the solve step.
static double flops_dsytrf_update(pastix_int_t K, pastix_int_t M)
Theroretical computation flops of the update step per coumn block (see sum1d())
static double flops_zgetrf_update(pastix_int_t K, pastix_int_t M)
Theroretical computation flops of the update step per coumn block (see sum1d())
static double flops_dpotrf_trsm(pastix_int_t M, pastix_int_t N)
Computations flops of the solve step.
static double flops_zpotrf_diag(pastix_int_t N)
Computations flops of diagonal blocks.
static double flops_zpotrf_blkupdate(pastix_int_t M, pastix_int_t N, pastix_int_t K)
Computes the theoretical number of flops of the update step per block (see sum2d())
static double flops_dsytrf_diag(pastix_int_t N)
Computations flops of diagonal blocks.
static double flops_zsytrf_blkupdate(pastix_int_t M, pastix_int_t N, pastix_int_t K)
Computes the theoretical number of flops of the update step per block (see sum2d())
static double flops_dpotrf_blkupdate(pastix_int_t M, pastix_int_t N, pastix_int_t K)
Computes the theoretical number of flops of the update step per block (see sum2d())
static double flops_zpotrf_trsm(pastix_int_t M, pastix_int_t N)
Computations flops of the solve step.
static double flops_zsytrf_update(pastix_int_t K, pastix_int_t M)
Theroretical computation flops of the update step per coumn block (see sum1d())
static double flops_dpotrf_diag(pastix_int_t N)
Computations flops of diagonal blocks.
static double flops_zpotrf_update(pastix_int_t K, pastix_int_t M)
Theroretical computation flops of the update step per coumn block (see sum1d())
static double flops_zgetrf_trsm(pastix_int_t M, pastix_int_t N)
Computations flops of the solve step.
static double flops_dgetrf_blkupdate(pastix_int_t M, pastix_int_t N, pastix_int_t K)
Computes the theoretical number of flops of the update step per block (see sum2d())
static double flops_zsytrf_diag(pastix_int_t N)
Computations flops of diagonal blocks.
static double flops_dpotrf_update(pastix_int_t K, pastix_int_t M)
Theroretical computation flops of the update step per coumn block (see sum1d())
static double flops_dsytrf_blkupdate(pastix_int_t M, pastix_int_t N, pastix_int_t K)
Computes the theoretical number of flops of the update step per block (see sum2d())
static double flops_dsytrf_trsm(pastix_int_t M, pastix_int_t N)
Computations flops of the solve step.
static double flops_dgetrf_diag(pastix_int_t N)
Computations flops of diagonal blocks.
static double flops_zgetrf_blkupdate(pastix_int_t M, pastix_int_t N, pastix_int_t K)
Computes the theoretical number of flops of the update step per block (see sum2d())
Cost functions to compute statistics on the symbolic structure.
Definition: symbol_cost.h:27