19 #include "common/common.h"
22 #include "kernels_trace.h"
32 volatile double kernels_flops[PastixKernelLvl1Nbr];
34 volatile int32_t kernels_trace_started = 0;
36 #if defined(PASTIX_WITH_EZTRACE)
38 int pastix_eztrace_level = 1;
42 #if defined(PASTIX_GENERATE_MODEL)
44 pastix_model_entry_t *model_entries = NULL;
45 volatile int32_t model_entries_nbr = -1;
46 int32_t model_size = 0;
50 pastix_atomic_lock_t lock_flops = PASTIX_ATOMIC_UNLOCKED;
51 double overall_flops[3] = { 0.0, 0.0, 0.0 };
68 const SolverMatrix *solvmtx = pastix_data->solvmatr;
71 pastix_atomic_lock( &lock_flops );
72 nbstart = pastix_atomic_inc_32b( &(kernels_trace_started) );
74 pastix_atomic_unlock( &lock_flops );
78 #if defined(PASTIX_WITH_EZTRACE)
80 char *level = pastix_getenv(
"PASTIX_EZTRACE_LEVEL");
82 pastix_eztrace_level = atoi(level);
83 pastix_cleanenv(level);
86 if ( pastix_data->dir_global != NULL ) {
87 pastix_setenv(
"EZTRACE_TRACE_DIR", pastix_data->dir_global, 1 );
93 #if defined(PASTIX_GENERATE_MODEL)
95 pastix_int_t cblknbr = solvmtx->cblknbr;
96 pastix_int_t cblkmin2d = solvmtx->cblkmin2d;
97 pastix_int_t total_number_of_tasks = 0;
98 pastix_int_t nbfact, nbtrsm, nbgemm;
106 nbtrsm = cblkmin2d + (cblknbr - cblkmin2d) * solvmtx->cblkmaxblk;
112 nbgemm = solvmtx->bloknbr - cblknbr;
117 cblk = solvmtx->cblktab+cblkmin2d;
118 for(cblknum = cblkmin2d; cblknum < cblknbr; cblknum++, cblk++ ) {
122 nbgemm += nbodb * nbodb;
125 nbgemm += (nbodb * (nbodb-1)) / 2;
129 total_number_of_tasks = nbfact + nbtrsm + nbgemm;
130 model_entries = malloc( total_number_of_tasks *
sizeof(pastix_model_entry_t) );
131 model_size = total_number_of_tasks;
135 memset( (
void*)kernels_flops, 0, PastixKernelLvl1Nbr *
sizeof(
double) );
137 overall_flops[0] = 0.0;
138 overall_flops[1] = 0.0;
139 overall_flops[2] = 0.0;
140 kernels_trace_started = 1;
143 pastix_atomic_unlock( &lock_flops );
162 double total_flops = 0.0;
165 assert( kernels_trace_started > 0 );
166 pastix_atomic_lock( &lock_flops );
167 nbstart = pastix_atomic_dec_32b( &(kernels_trace_started) );
169 pastix_atomic_unlock( &lock_flops );
173 #if defined(PASTIX_WITH_EZTRACE)
177 #if defined(PASTIX_GENERATE_MODEL)
179 char *prec_names[4] = {
180 "s - single real",
"d - double real",
181 "c - single complex",
"z - double complex"
183 pastix_model_entry_t *entry = model_entries;
184 pastix_int_t i, gpucase;
187 f = fopen(
"model.csv",
"w" );
194 fprintf(f,
"# GPU Model data\n");
197 fprintf(f,
"# CPU Model data\n");
200 fprintf( f,
"# Precision: %d - %s\n", pastix_data->bcsc->flttype - 2, prec_names[ pastix_data->bcsc->flttype - 2 ] );
201 fprintf( f,
"Kernel;M;N;K;Time\n" );
203 for(i=0; i <= model_entries_nbr; i++, entry++ ) {
204 switch( entry->ktype ) {
205 case PastixKernelGETRF: pastix_attr_fallthrough;
206 case PastixKernelHETRF: pastix_attr_fallthrough;
207 case PastixKernelPOTRF: pastix_attr_fallthrough;
208 case PastixKernelPXTRF: pastix_attr_fallthrough;
209 case PastixKernelSYTRF: pastix_attr_fallthrough;
210 case PastixKernelSCALOCblk: pastix_attr_fallthrough;
211 case PastixKernelSCALOBlok: pastix_attr_fallthrough;
212 case PastixKernelTRSMCblk1d: pastix_attr_fallthrough;
213 case PastixKernelTRSMCblk2d: pastix_attr_fallthrough;
214 case PastixKernelTRSMCblkLR: pastix_attr_fallthrough;
215 case PastixKernelTRSMBlokLR: pastix_attr_fallthrough;
216 case PastixKernelGEMMCblk1d1d: pastix_attr_fallthrough;
217 case PastixKernelGEMMCblkFRLR: pastix_attr_fallthrough;
218 case PastixKernelGEMMCblkLRLR: pastix_attr_fallthrough;
219 case PastixKernelGEMMBlokLRLR:
224 pastix_attr_fallthrough;
226 fprintf( f,
"%d;%d;%d;%d;%e\n",
227 entry->ktype, entry->m, entry->n, entry->k, entry->time );
233 free( model_entries );
236 model_entries = NULL;
237 model_entries_nbr = -1;
244 pastix_data->dparm[
DPARM_FACT_RLFLOPS] = overall_flops[0] + overall_flops[1] + overall_flops[2];
246 #if defined(PASTIX_SUPERNODE_STATS)
249 " Details of the number of operations:\n"
250 " - POTRF(A11) + TRSM(A11, A21): %6.2lf %cFlops\n"
251 " - HERK(A21, A22) : %6.2lf %cFlops\n"
252 " - POTRF(A22) : %6.2lf %cFlops\n"
253 " Total : %6.2lf %cFlops\n",
254 pastix_print_value( overall_flops[0] ), pastix_print_unit( overall_flops[0] ),
255 pastix_print_value( overall_flops[1] ), pastix_print_unit( overall_flops[1] ),
256 pastix_print_value( overall_flops[2] ), pastix_print_unit( overall_flops[2] ),
262 kernels_trace_started = 0;
263 pastix_atomic_unlock( &lock_flops );