PaStiX Handbook  6.3.2
perf.h
Go to the documentation of this file.
1 /**
2  *
3  * @file perf.h
4  *
5  * PaStiX header of the performance model.
6  *
7  * @copyright 2004-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.3.2
11  * @author Pierre Ramet
12  * @author Pascal Henon
13  * @author Mathieu Faverge
14  * @date 2023-07-21
15  *
16  * @addtogroup blend_dev_cost
17  * @{
18  *
19  **/
20 #ifndef _perf_h_
21 #define _perf_h_
22 
23 #ifndef DOXYGEN_SHOULD_SKIP_THIS
24 
25 #define PERF_MODEL "AMD 6180 MKL"
26 
27 /**GEMM**/
28 #define GEMM_A 2.429169e-10
29 #define GEMM_B 2.724804e-10
30 #define GEMM_C 1.328900e-09
31 #define GEMM_D 1.148989e-07
32 #define GEMM_E -2.704179e-10
33 #define GEMM_F 1.216278e-06
34 #define PERF_GEMM(i,j,k) (GEMM_A*(double)(i)*(double)(j)*(double)(k)+GEMM_B*(double)(i)*(double)(j)+GEMM_C*(double)(j)*(double)(k)+GEMM_D*(double)(i)+GEMM_E*(double)(j)+GEMM_F)
35 
36 
37 /**GEAM**/
38 #define GEAM_A 1.358111e-09
39 #define GEAM_B -4.416379e-09
40 #define GEAM_C 2.270780e-08
41 #define GEAM_D -3.335563e-07
42 #define PERF_GEAM(i,j) (GEAM_A*(double)(i)*(double)(j)+GEAM_B*(double)(i)+GEAM_C*(double)(j)+GEAM_D)
43 
44 /**TRSM (Works only for right case) **/
45 #define TRSM_A 2.626177e-10
46 #define TRSM_B 3.976198e-08
47 #define TRSM_C 3.255168e-06
48 #define PERF_TRSM( i, j ) (TRSM_A*(double)(i)*(double)(i)*(double)(j)+TRSM_B*(double)(i)+TRSM_C)
49 
50 /**POTRF**/
51 #define POTRF_A 2.439599e-11
52 #define POTRF_B 1.707006e-08
53 #define POTRF_C -1.469893e-07
54 #define POTRF_D 4.071507e-07
55 #define PERF_POTRF(i) (POTRF_A*(double)(i)*(double)(i)*(double)(i)+POTRF_B*(double)(i)*(double)(i)+POTRF_C*(double)(i)+POTRF_D)
56 
57 /**PPF**/
58 #define PPF_A 2.439599e-11
59 #define PPF_B 1.707006e-08
60 #define PPF_C -1.469893e-07
61 #define PPF_D 4.071507e-07
62 #define PERF_SYTRF(i) (PPF_A*(double)(i)*(double)(i)*(double)(i)+PPF_B*(double)(i)*(double)(i)+PPF_C*(double)(i)+PPF_D)
63 
64 /**SCAL**/
65 #define SCAL_A 4.371793e-10
66 #define SCAL_B 2.052399e-07
67 #define PERF_SCAL(i) (SCAL_A*(double)(i)+SCAL_B)
68 
69 /**COPY**/
70 #define COPY_A 9.177969e-10
71 #define COPY_B 2.266129e-07
72 #define PERF_COPY(i) (COPY_A*(double)(i)+COPY_B)
73 
74 /**AXPY**/
75 #define AXPY_A 4.620143e-10
76 #define AXPY_B 2.101008e-07
77 #define PERF_AXPY(i) (AXPY_A*(double)(i)+AXPY_B)
78 
79 /**GEMV**/
80 #define GEMV_A 6.192657e-10
81 #define GEMV_B -2.884799e-09
82 #define GEMV_C 7.594831e-10
83 #define GEMV_D 3.575035e-07
84 #define PERF_GEMV(i,j) (GEMV_A*(double)(i)*(double)(j)+GEMV_B*(double)(i)+GEMV_C*(double)(j)+GEMV_D)
85 
86 /**TRSV**/
87 #define TRSV_A 3.224536e-10
88 #define TRSV_B 1.709178e-08
89 #define TRSV_C 1.947268e-07
90 #define PERF_TRSV(i) (TRSV_A*(double)(i)*(double)(i)+TRSV_B*(double)(i)+TRSV_C)
91 
92 /* en octets ...
93  TIME : entre threads */
94 
95 /* en octets ...
96  CLUSTER : entre noeuds */
97 
98 /* en octets ...
99  SHARED : entre MPI shared */
100 
101 /* old version compatibility
102 #define TIME_BANDWIDTH 1.5e-9
103 #define TIME_STARTUP 5.2e-6
104 #define CLUSTER_BANDWIDTH 5.9e-10
105 #define CLUSTER_STARTUP 3.9e-6
106  end old */
107 
108 #define TIME_BANDWIDTH_1 0.0
109 #define TIME_STARTUP_1 1e-8
110 #define SHARED_BANDWIDTH_1 1.0e-10
111 #define SHARED_STARTUP_1 0.2e-6
112 #define CLUSTER_BANDWIDTH_1 3.0e-10
113 #define CLUSTER_STARTUP_1 3.0e-6
114 
115 #define TIME_BANDWIDTH_2 0.0
116 #define TIME_STARTUP_2 1e-8
117 #define SHARED_BANDWIDTH_2 3.0e-10
118 #define SHARED_STARTUP_2 0.4e-6
119 #define CLUSTER_BANDWIDTH_2 6.0e-10
120 #define CLUSTER_STARTUP_2 6.0e-6
121 
122 #define TIME_BANDWIDTH_4 0.0
123 #define TIME_STARTUP_4 1e-8
124 #define SHARED_BANDWIDTH_4 6.0e-10
125 #define SHARED_STARTUP_4 0.8e-6
126 #define CLUSTER_BANDWIDTH_4 9.0e-10
127 #define CLUSTER_STARTUP_4 9.0e-6
128 
129 #define TIME_BANDWIDTH_8 0.0
130 #define TIME_STARTUP_8 1e-8
131 #define SHARED_BANDWIDTH_8 6.0e-10
132 #define SHARED_STARTUP_8 0.8e-6
133 #define CLUSTER_BANDWIDTH_8 9.0e-10
134 #define CLUSTER_STARTUP_8 0.0e-6
135 
136 #define PENALTY_STARTUP 0.0
137 #define PENALTY_BANDWIDTH 0.0
138 
139 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
140 
141 #endif /* _perf_h_ */
142 
143 /**
144  * @}
145  */