PaStiX Handbook 6.4.0
Loading...
Searching...
No Matches
symbol_cost_perfs.c
Go to the documentation of this file.
1/**
2 *
3 * @file symbol_cost_perfs.c
4 *
5 * PaStiX symbol functions to compute the computational time induced by the chosen
6 * symbolic structure with a given performance model.
7 *
8 * @copyright 1999-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
9 * Univ. Bordeaux. All rights reserved.
10 *
11 * @version 6.4.0
12 * @author David Goudin
13 * @author Francois Pellegrini
14 * @author Mathieu Faverge
15 * @author Pascal Henon
16 * @author Pierre Ramet
17 * @date 2024-07-05
18 *
19 * @addtogroup symbol_dev_cost
20 * @{
21 *
22 **/
23#include "common.h"
24#include "symbol/symbol_cost.h"
25#include "blend/perf.h"
26
27/**
28 * @name Performance functions set
29 * @{
30 *
31 */
32
33/**
34 * @brief Time model of the computation of the diagonal block
35 * @param[in] N Size of the matrix block
36 * @return Returns the time cost for the given arithmetic and factorization
37 * @details Cholesky complex case
38 */
39static inline double
41 double total = PERF_POTRF(N);
42 assert( N > 0 );
43 return (total > 0.) ? total : 0.;
44}
45
46/**
47 * @copydoc perfs_zpotrf_diag
48 * @details Cholesky real case
49 */
50static inline double
52 double total = PERF_POTRF(N);
53 assert( N > 0 );
54 return (total > 0.) ? total : 0.;
55}
56
57/**
58 * @copydoc perfs_zpotrf_diag
59 * @details LU complex case
60 */
61static inline double
63 /* Approximate to 2 times potrf */
64 return 2. * perfs_zpotrf_diag( N );
65}
66
67/**
68 * @copydoc perfs_zpotrf_diag
69 * @details LU real case
70 */
71static inline double
73 /* Approximate to 2 times potrf */
74 return 2. * perfs_dpotrf_diag( N );
75}
76
77/**
78 * @copydoc perfs_zpotrf_diag
79 * @details LDL^t complex case
80 */
81static inline double
83 double total = PERF_SYTRF(N) + (double)N * PERF_COPY(N);
84 assert( N > 0 );
85 return (total > 0.) ? total : 0.;
86}
87
88/**
89 * @copydoc perfs_zpotrf_diag
90 * @details LDL^t real case
91 */
92static inline double
94 double total = PERF_SYTRF(N) + (double)N * PERF_COPY(N);
95 assert( N > 0 );
96 return (total > 0.) ? total : 0.;
97}
98
99/**
100 * @brief Time performance model of the solve step
101 * @param[in] M Number of rows of the B matrix in the TRSM, and size of the matrix A
102 * @param[in] N Number of columns of the B matrix in the TRSM
103 * @return Returns the time cost for the given arithmetic and factorization
104 * @details Cholesky complex case
105 */
106static inline double
108 double total = PERF_TRSM( N, M );
109 assert( (M > 0) && (N > 0) );
110 return (total > 0.) ? total : 0.;
111}
112
113/**
114 * @copydoc perfs_zpotrf_trsm
115 * @details Cholesky real case
116 */
117static inline double
119 double total = PERF_TRSM( N, M );
120 assert( (M > 0) && (N > 0) );
121 return (total > 0.) ? total : 0.;
122}
123
124/**
125 * @copydoc perfs_zpotrf_trsm
126 * @details LU complex case
127 */
128static inline double
130 double total = 2. * PERF_TRSM( N, M );
131 assert( (M > 0) && (N > 0) );
132 return (total > 0.) ? total : 0.;
133}
134
135/**
136 * @copydoc perfs_zpotrf_trsm
137 * @details LU real case
138 */
139static inline double
141 double total = 2. * PERF_TRSM( N, M );
142 assert( (M > 0) && (N > 0) );
143 return (total > 0.) ? total : 0.;
144}
145
146/**
147 * @copydoc perfs_zpotrf_trsm
148 * @details LDL^t complex case
149 */
150static inline double
152 double total = PERF_TRSM( N, M )
153 + (double)N * (PERF_SCAL(M) + PERF_COPY(M));
154 assert( (M > 0) && (N > 0) );
155 return (total > 0.) ? total : 0.;
156}
157
158/**
159 * @copydoc perfs_zpotrf_trsm
160 * @details LDL^t real case
161 */
162static inline double
164 double total = PERF_TRSM( N, M )
165 + (double)N * (PERF_SCAL(M) + PERF_COPY(M));
166 assert( (M > 0) && (N > 0) );
167 return (total > 0.) ? total : 0.;
168}
169
170/**
171 * @brief Time performance model of the update step per block (see sum2d())
172 * @param[in] M Number of rows of the A and C matrices in the GEMM
173 * @param[in] N Number of columns of the B and C matrices in the GEMM
174 * @param[in] K Number of columns of the A matrix, and rows of the B matrix in the GEMM
175 * @return Returns the time cost for the given arithmetic and factorization
176 * @details Cholesky complex case
177 */
178static inline double
180{
181 double total = PERF_GEMM( M, N, K ) + PERF_GEAM( M, N );
182 assert( (M > 0) && (N > 0) && (K > 0) );
183 return (total > 0.) ? total : 0.;
184}
185
186/**
187 * @copydoc flops_zpotrf_blkupdate
188 * @details Cholesky real case
189 */
190static inline double
192{
193 double total = PERF_GEMM( M, N, K ) + PERF_GEAM( M, N );
194 assert( (M > 0) && (N > 0) && (K > 0) );
195 return (total > 0.) ? total : 0.;
196}
197
198/**
199 * @copydoc flops_zpotrf_blkupdate
200 * @details LU complex case
201 */
202static inline double
204{
205 double total = PERF_GEMM( M, N, K ) + PERF_GEAM( M, N )
206 + PERF_GEMM( (M-N), N, K ) + PERF_GEAM( (M-N), N );
207 assert( (M > 0) && (N > 0) && (K > 0) );
208 return (total > 0.) ? total : 0.;
209}
210
211/**
212 * @copydoc flops_zpotrf_blkupdate
213 * @details LU real case
214 */
215static inline double
217{
218 double total = PERF_GEMM( M, N, K ) + PERF_GEAM( M, N )
219 + PERF_GEMM( (M-N), N, K ) + PERF_GEAM( (M-N), N );
220 assert( (M > 0) && (N > 0) && (K > 0) );
221 return (total > 0.) ? total : 0.;
222}
223
224/**
225 * @copydoc flops_zpotrf_blkupdate
226 * @details LDL^t complex case
227 */
228static inline double
230{
231 double total = PERF_GEMM( M, N, K ) + PERF_GEAM( M, N );
232 assert( (M > 0) && (N > 0) && (K > 0) );
233 return (total > 0.) ? total : 0.;
234}
235
236/**
237 * @copydoc flops_zpotrf_blkupdate
238 * @details LDL^t real case
239 */
240static inline double
242{
243 double total = PERF_GEMM( M, N, K ) + PERF_GEAM( M, N );
244 assert( (M > 0) && (N > 0) && (K > 0) );
245 return (total > 0.) ? total : 0.;
246}
247
248/**
249 * @}
250 */
251
268
269/**
270 * @}
271 */
BEGIN_C_DECLS typedef int pastix_int_t
Definition datatypes.h:51
static double perfs_zpotrf_diag(pastix_int_t N)
Time model of the computation of the diagonal block.
static double perfs_zgetrf_blkupdate(pastix_int_t M, pastix_int_t N, pastix_int_t K)
Computes the theoretical number of flops of the update step per block (see sum2d())
static double perfs_zsytrf_diag(pastix_int_t N)
Time model of the computation of the diagonal block.
static double perfs_zgetrf_diag(pastix_int_t N)
Time model of the computation of the diagonal block.
static double perfs_zsytrf_trsm(pastix_int_t M, pastix_int_t N)
Time performance model of the solve step.
static double perfs_dsytrf_trsm(pastix_int_t M, pastix_int_t N)
Time performance model of the solve step.
static double perfs_dgetrf_blkupdate(pastix_int_t M, pastix_int_t N, pastix_int_t K)
Computes the theoretical number of flops of the update step per block (see sum2d())
static double perfs_zpotrf_blkupdate(pastix_int_t M, pastix_int_t N, pastix_int_t K)
Time performance model of the update step per block (see sum2d())
static double perfs_zgetrf_trsm(pastix_int_t M, pastix_int_t N)
Time performance model of the solve step.
symbol_function_t perfstable[2][5]
array of pointer to the performance functions per factorization and arithmetic
static double perfs_dpotrf_blkupdate(pastix_int_t M, pastix_int_t N, pastix_int_t K)
Computes the theoretical number of flops of the update step per block (see sum2d())
static double perfs_dgetrf_trsm(pastix_int_t M, pastix_int_t N)
Time performance model of the solve step.
static double perfs_dgetrf_diag(pastix_int_t N)
Time model of the computation of the diagonal block.
static double perfs_dpotrf_trsm(pastix_int_t M, pastix_int_t N)
Time performance model of the solve step.
static double perfs_zpotrf_trsm(pastix_int_t M, pastix_int_t N)
Time performance model of the solve step.
static double perfs_dsytrf_diag(pastix_int_t N)
Time model of the computation of the diagonal block.
static double perfs_dpotrf_diag(pastix_int_t N)
Time model of the computation of the diagonal block.
static double perfs_dsytrf_blkupdate(pastix_int_t M, pastix_int_t N, pastix_int_t K)
Computes the theoretical number of flops of the update step per block (see sum2d())
static double perfs_zsytrf_blkupdate(pastix_int_t M, pastix_int_t N, pastix_int_t K)
Computes the theoretical number of flops of the update step per block (see sum2d())
Cost functions to compute statistics on the symbolic structure.
Definition symbol_cost.h:27