PaStiX Handbook  6.3.2
symbol_cost_perfs.c
Go to the documentation of this file.
1 /**
2  *
3  * @file symbol_cost_perfs.c
4  *
5  * PaStiX symbol functions to compute the computational time induced by the chosen
6  * symbolic structure with a given performance model.
7  *
8  * @copyright 1999-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
9  * Univ. Bordeaux. All rights reserved.
10  *
11  * @version 6.3.2
12  * @author David Goudin
13  * @author Francois Pellegrini
14  * @author Mathieu Faverge
15  * @author Pascal Henon
16  * @author Pierre Ramet
17  * @date 2023-07-21
18  *
19  * @addtogroup symbol_dev_cost
20  * @{
21  *
22  **/
23 #include "common.h"
24 #include "symbol/symbol_cost.h"
25 #include "blend/perf.h"
26 
27 /**
28  * @name Performance functions set
29  * @{
30  *
31  */
32 
33 /**
34  * @brief Time model of the computation of the diagonal block
35  * @param[in] N Size of the matrix block
36  * @return Returns the time cost for the given arithmetic and factorization
37  * @details Cholesky complex case
38  */
39 static inline double
41  double total = PERF_POTRF(N);
42  assert( N > 0 );
43  return (total > 0.) ? total : 0.;
44 }
45 
46 /**
47  * @copydoc perfs_zpotrf_diag
48  * @details Cholesky real case
49  */
50 static inline double
52  double total = PERF_POTRF(N);
53  assert( N > 0 );
54  return (total > 0.) ? total : 0.;
55 }
56 
57 /**
58  * @copydoc perfs_zpotrf_diag
59  * @details LU complex case
60  */
61 static inline double
63  /* Approximate to 2 times potrf */
64  return 2. * perfs_zpotrf_diag( N );
65 }
66 
67 /**
68  * @copydoc perfs_zpotrf_diag
69  * @details LU real case
70  */
71 static inline double
73  /* Approximate to 2 times potrf */
74  return 2. * perfs_dpotrf_diag( N );
75 }
76 
77 /**
78  * @copydoc perfs_zpotrf_diag
79  * @details LDL^t complex case
80  */
81 static inline double
83  double total = PERF_SYTRF(N) + (double)N * PERF_COPY(N);
84  assert( N > 0 );
85  return (total > 0.) ? total : 0.;
86 }
87 
88 /**
89  * @copydoc perfs_zpotrf_diag
90  * @details LDL^t real case
91  */
92 static inline double
94  double total = PERF_SYTRF(N) + (double)N * PERF_COPY(N);
95  assert( N > 0 );
96  return (total > 0.) ? total : 0.;
97 }
98 
99 /**
100  * @brief Time performance model of the solve step
101  * @param[in] M Number of rows of the B matrix in the TRSM, and size of the matrix A
102  * @param[in] N Number of columns of the B matrix in the TRSM
103  * @return Returns the time cost for the given arithmetic and factorization
104  * @details Cholesky complex case
105  */
106 static inline double
108  double total = PERF_TRSM( N, M );
109  assert( (M > 0) && (N > 0) );
110  return (total > 0.) ? total : 0.;
111 }
112 
113 /**
114  * @copydoc perfs_zpotrf_trsm
115  * @details Cholesky real case
116  */
117 static inline double
119  double total = PERF_TRSM( N, M );
120  assert( (M > 0) && (N > 0) );
121  return (total > 0.) ? total : 0.;
122 }
123 
124 /**
125  * @copydoc perfs_zpotrf_trsm
126  * @details LU complex case
127  */
128 static inline double
130  double total = 2. * PERF_TRSM( N, M );
131  assert( (M > 0) && (N > 0) );
132  return (total > 0.) ? total : 0.;
133 }
134 
135 /**
136  * @copydoc perfs_zpotrf_trsm
137  * @details LU real case
138  */
139 static inline double
141  double total = 2. * PERF_TRSM( N, M );
142  assert( (M > 0) && (N > 0) );
143  return (total > 0.) ? total : 0.;
144 }
145 
146 /**
147  * @copydoc perfs_zpotrf_trsm
148  * @details LDL^t complex case
149  */
150 static inline double
152  double total = PERF_TRSM( N, M )
153  + (double)N * (PERF_SCAL(M) + PERF_COPY(M));
154  assert( (M > 0) && (N > 0) );
155  return (total > 0.) ? total : 0.;
156 }
157 
158 /**
159  * @copydoc perfs_zpotrf_trsm
160  * @details LDL^t real case
161  */
162 static inline double
164  double total = PERF_TRSM( N, M )
165  + (double)N * (PERF_SCAL(M) + PERF_COPY(M));
166  assert( (M > 0) && (N > 0) );
167  return (total > 0.) ? total : 0.;
168 }
169 
170 /**
171  * @brief Time performance model of the update step per block (see sum2d())
172  * @param[in] M Number of rows of the A and C matrices in the GEMM
173  * @param[in] N Number of columns of the B and C matrices in the GEMM
174  * @param[in] K Number of columns of the A matrix, and rows of the B matrix in the GEMM
175  * @return Returns the time cost for the given arithmetic and factorization
176  * @details Cholesky complex case
177  */
178 static inline double
180 {
181  double total = PERF_GEMM( M, N, K ) + PERF_GEAM( M, N );
182  assert( (M > 0) && (N > 0) && (K > 0) );
183  return (total > 0.) ? total : 0.;
184 }
185 
186 /**
187  * @copydoc flops_zpotrf_blkupdate
188  * @details Cholesky real case
189  */
190 static inline double
192 {
193  double total = PERF_GEMM( M, N, K ) + PERF_GEAM( M, N );
194  assert( (M > 0) && (N > 0) && (K > 0) );
195  return (total > 0.) ? total : 0.;
196 }
197 
198 /**
199  * @copydoc flops_zpotrf_blkupdate
200  * @details LU complex case
201  */
202 static inline double
204 {
205  double total = PERF_GEMM( M, N, K ) + PERF_GEAM( M, N )
206  + PERF_GEMM( (M-N), N, K ) + PERF_GEAM( (M-N), N );
207  assert( (M > 0) && (N > 0) && (K > 0) );
208  return (total > 0.) ? total : 0.;
209 }
210 
211 /**
212  * @copydoc flops_zpotrf_blkupdate
213  * @details LU real case
214  */
215 static inline double
217 {
218  double total = PERF_GEMM( M, N, K ) + PERF_GEAM( M, N )
219  + PERF_GEMM( (M-N), N, K ) + PERF_GEAM( (M-N), N );
220  assert( (M > 0) && (N > 0) && (K > 0) );
221  return (total > 0.) ? total : 0.;
222 }
223 
224 /**
225  * @copydoc flops_zpotrf_blkupdate
226  * @details LDL^t complex case
227  */
228 static inline double
230 {
231  double total = PERF_GEMM( M, N, K ) + PERF_GEAM( M, N );
232  assert( (M > 0) && (N > 0) && (K > 0) );
233  return (total > 0.) ? total : 0.;
234 }
235 
236 /**
237  * @copydoc flops_zpotrf_blkupdate
238  * @details LDL^t real case
239  */
240 static inline double
242 {
243  double total = PERF_GEMM( M, N, K ) + PERF_GEAM( M, N );
244  assert( (M > 0) && (N > 0) && (K > 0) );
245  return (total > 0.) ? total : 0.;
246 }
247 
248 /**
249  * @}
250  */
251 
253  {
259  },
260  {
266  }
267 };
268 
269 /**
270  * @}
271  */
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
static double perfs_zpotrf_diag(pastix_int_t N)
Time model of the computation of the diagonal block.
static double perfs_zgetrf_blkupdate(pastix_int_t M, pastix_int_t N, pastix_int_t K)
Computes the theoretical number of flops of the update step per block (see sum2d())
static double perfs_zsytrf_diag(pastix_int_t N)
Time model of the computation of the diagonal block.
static double perfs_zgetrf_diag(pastix_int_t N)
Time model of the computation of the diagonal block.
static double perfs_zsytrf_trsm(pastix_int_t M, pastix_int_t N)
Time performance model of the solve step.
static double perfs_dsytrf_trsm(pastix_int_t M, pastix_int_t N)
Time performance model of the solve step.
static double perfs_dgetrf_blkupdate(pastix_int_t M, pastix_int_t N, pastix_int_t K)
Computes the theoretical number of flops of the update step per block (see sum2d())
static double perfs_zpotrf_blkupdate(pastix_int_t M, pastix_int_t N, pastix_int_t K)
Time performance model of the update step per block (see sum2d())
static double perfs_zgetrf_trsm(pastix_int_t M, pastix_int_t N)
Time performance model of the solve step.
symbol_function_t perfstable[2][5]
array of pointer to the performance functions per factorization and arithmetic
static double perfs_dpotrf_blkupdate(pastix_int_t M, pastix_int_t N, pastix_int_t K)
Computes the theoretical number of flops of the update step per block (see sum2d())
static double perfs_dgetrf_trsm(pastix_int_t M, pastix_int_t N)
Time performance model of the solve step.
static double perfs_dgetrf_diag(pastix_int_t N)
Time model of the computation of the diagonal block.
static double perfs_dpotrf_trsm(pastix_int_t M, pastix_int_t N)
Time performance model of the solve step.
static double perfs_zpotrf_trsm(pastix_int_t M, pastix_int_t N)
Time performance model of the solve step.
static double perfs_dsytrf_diag(pastix_int_t N)
Time model of the computation of the diagonal block.
static double perfs_dpotrf_diag(pastix_int_t N)
Time model of the computation of the diagonal block.
static double perfs_dsytrf_blkupdate(pastix_int_t M, pastix_int_t N, pastix_int_t K)
Computes the theoretical number of flops of the update step per block (see sum2d())
static double perfs_zsytrf_blkupdate(pastix_int_t M, pastix_int_t N, pastix_int_t K)
Computes the theoretical number of flops of the update step per block (see sum2d())
Cost functions to compute statistics on the symbolic structure.
Definition: symbol_cost.h:27