17 #ifndef DOXYGEN_SHOULD_SKIP_THIS
22 #include "common/common.h"
48 #ifndef DOXYGEN_SHOULD_SKIP_THIS
51 volatile int32_t kernels_trace_started = 0;
53 #if defined(PASTIX_WITH_EZTRACE)
55 int pastix_eztrace_level = 1;
61 typedef enum trace_status_e {
62 PastixTraceInitialised = 1,
63 PastixTraceRunning = 2
66 static trace_status_t trace_status = 0;
70 #if defined(PASTIX_GENERATE_MODEL)
72 pastix_model_entry_t *model_entries = NULL;
73 volatile int32_t model_entries_nbr = -1;
74 int32_t model_size = 0;
78 pastix_atomic_lock_t
lock_flops = PASTIX_ATOMIC_UNLOCKED;
92 #if defined(PASTIX_WITH_EZTRACE)
97 if ( ( trace_status & PastixTraceInitialised ) &&
98 ! ( trace_status & PastixTraceRunning ) ) {
99 trace_status |= PastixTraceRunning;
114 #if defined(PASTIX_WITH_EZTRACE)
119 if ( ( trace_status & PastixTraceInitialised ) &&
120 ( trace_status & PastixTraceRunning ) ) {
121 trace_status &= ~PastixTraceRunning;
154 nbstart = pastix_atomic_inc_32b( &(kernels_trace_started) );
160 #if defined(PASTIX_WITH_EZTRACE)
167 ! ( trace_status & PastixTraceInitialised ) ) {
168 char *level = pastix_getenv(
"PASTIX_EZTRACE_LEVEL");
170 pastix_eztrace_level = atoi(level);
171 pastix_cleanenv(level);
175 pastix_setenv(
"EZTRACE_TRACE_DIR", pastix_data->
dir_global, 1 );
186 trace_status |= PastixTraceInitialised;
191 #if defined(PASTIX_GENERATE_MODEL)
204 nbtrsm = cblkmin2d + (cblknbr - cblkmin2d) * solvmtx->
cblkmaxblk;
210 nbgemm = solvmtx->
bloknbr - cblknbr;
215 cblk = solvmtx->
cblktab+cblkmin2d;
216 for(cblknum = cblkmin2d; cblknum < cblknbr; cblknum++, cblk++ ) {
220 nbgemm += nbodb * nbodb;
223 nbgemm += (nbodb * (nbodb-1)) / 2;
227 total_number_of_tasks = nbfact + nbtrsm + nbgemm;
228 model_entries = malloc( total_number_of_tasks *
sizeof(pastix_model_entry_t) );
229 model_size = total_number_of_tasks;
233 memset( (
void*)
kernels_flops, 0, PastixKernelLvl1Nbr *
sizeof(
double) );
238 kernels_trace_started = 1;
264 nbstart = pastix_atomic_dec_32b( &(kernels_trace_started) );
267 assert( nbstart >= 0 );
270 assert( nbstart == 0 );
272 #if defined(PASTIX_WITH_EZTRACE)
276 if ( ( trace_status & PastixTraceInitialised ) ) {
282 #if defined(PASTIX_GENERATE_MODEL)
284 char *prec_names[4] = {
285 "s - single real",
"d - double real",
286 "c - single complex",
"z - double complex"
288 pastix_model_entry_t *entry = model_entries;
292 f = fopen(
"model.csv",
"w" );
299 fprintf(f,
"# GPU Model data\n");
302 fprintf(f,
"# CPU Model data\n");
305 fprintf( f,
"# Precision: %d - %s\n", pastix_data->
bcsc->flttype - 2, prec_names[ pastix_data->
bcsc->flttype - 2 ] );
306 fprintf( f,
"Kernel;M;N;K;Time\n" );
308 for(i=0; i <= model_entries_nbr; i++, entry++ ) {
309 switch( entry->ktype ) {
329 pastix_attr_fallthrough;
331 fprintf( f,
"%d;%d;%d;%d;%e\n",
332 entry->ktype, entry->m, entry->n, entry->k, entry->time );
338 free( model_entries );
341 model_entries = NULL;
342 model_entries_nbr = -1;
351 #if defined(PASTIX_SUPERNODE_STATS)
354 " Details of the number of operations:\n"
355 " - POTRF(A11) + TRSM(A11, A21): %6.2lf %cFlops\n"
356 " - HERK(A21, A22) : %6.2lf %cFlops\n"
357 " - POTRF(A22) : %6.2lf %cFlops\n"
358 " Total : %6.2lf %cFlops\n",
367 kernels_trace_started = 0;
BEGIN_C_DECLS typedef int pastix_int_t
double overall_flops[3]
Overall number of flops.
void kernelsTraceStart()
Resumes the trace module.
pastix_atomic_lock_t lock_flops
Lock to accumulate flops.
void kernelsTraceFinalize(const pastix_data_t *pastix_data)
Stops the trace module.
volatile double kernels_flops[PastixKernelLvl1Nbr]
Global array to store the number of flops executed per kernel.
void kernelsTraceStop()
Pauses the trace module.
void kernelsTraceInit(const pastix_data_t *pastix_data, pastix_trace_t trace)
Starts the trace module.
@ PastixKernelGEMMCblkFRLR
@ PastixKernelGEMMBlokLRLR
@ PastixKernelGEMMCblkLRLR
@ PastixKernelGEMMCblk1d1d
static pastix_int_t core_get_rklimit_end(pastix_int_t M, pastix_int_t N)
Compute the maximal rank accepted for a given matrix size for Just-In-Time strategy.
pastix_int_t(* core_get_rklimit)(pastix_int_t M, pastix_int_t N)
Compute the maximal rank accepted for a given matrix size. The pointer is set according to the low-ra...
enum pastix_trace_e pastix_trace_t
Steps to trace.
Main PaStiX data structure.
pastix_factotype_t factotype
SolverCblk *restrict cblktab
Solver column block structure.
Solver column block structure.