PaStiX Handbook  6.4.0
kernels_trace.c
Go to the documentation of this file.
1 /**
2  *
3  * @file kernels_trace.c
4  *
5  * @copyright 2004-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
6  * Univ. Bordeaux. All rights reserved.
7  *
8  * PaStiX trace and modelling routines
9  *
10  * @version 6.4.0
11  * @author Gregoire Pichon
12  * @author Mathieu Faverge
13  * @author Alycia Lisito
14  * @date 2024-07-09
15  *
16  **/
17 #ifndef DOXYGEN_SHOULD_SKIP_THIS
18 #ifndef _GNU_SOURCE
19 #define _GNU_SOURCE 1
20 #endif
21 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
22 #include "common/common.h"
23 #include "bcsc/bcsc.h"
24 #include "blend/solver.h"
25 #include "kernels_trace.h"
26 
27 /**
28  *******************************************************************************
29  *
30  * @brief Compute the maximal rank accepted for a given matrix size. The pointer
31  * is set according to the low-rank strategy used.
32  *
33  *******************************************************************************
34  *
35  * @param[in] M
36  * The number of rows of the matrix
37  *
38  * @param[in] N
39  * The number of columns of the matrix
40  *
41  *******************************************************************************
42  *
43  * @return The maximal rank accepted for this matrix size.
44  *
45  *******************************************************************************/
47 
48 #ifndef DOXYGEN_SHOULD_SKIP_THIS
49 volatile double kernels_flops[PastixKernelLvl1Nbr];
50 
51 volatile int32_t kernels_trace_started = 0;
52 
53 #if defined(PASTIX_WITH_EZTRACE)
54 
55 int pastix_eztrace_level = 1;
56 
57 /**
58  * @brief Status of the trace.
59  * @warning This is a set of bits for bitmask operations.
60  */
61 typedef enum trace_status_e {
62  PastixTraceInitialised = 1,
63  PastixTraceRunning = 2
64 } trace_status_t;
65 
66 static trace_status_t trace_status = 0;
67 
68 #endif
69 
70 #if defined(PASTIX_GENERATE_MODEL)
71 
72 pastix_model_entry_t *model_entries = NULL;
73 volatile int32_t model_entries_nbr = -1;
74 int32_t model_size = 0;
75 
76 #endif
77 
78 pastix_atomic_lock_t lock_flops = PASTIX_ATOMIC_UNLOCKED;
79 double overall_flops[3] = { 0.0, 0.0, 0.0 };
80 
81 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
82 
83 /**
84  *******************************************************************************
85  *
86  * @brief Resumes the trace module
87  *
88  *******************************************************************************/
89 void
91 {
92 #if defined(PASTIX_WITH_EZTRACE)
93  /*
94  * If the trace has been initialised (PastixTraceInitialised)
95  * If the trace is not currently running (!PastixTraceRunning)
96  */
97  if ( ( trace_status & PastixTraceInitialised ) &&
98  ! ( trace_status & PastixTraceRunning ) ) {
99  trace_status |= PastixTraceRunning;
100  eztrace_resume ();
101  }
102 #endif
103 }
104 
105 /**
106  *******************************************************************************
107  *
108  * @brief Pauses the trace module
109  *
110  *******************************************************************************/
111 void
113 {
114 #if defined(PASTIX_WITH_EZTRACE)
115  /*
116  * If the trace has been initialised (PastixTraceInitialised)
117  * If the trace is currently running (PastixTraceRunning)
118  */
119  if ( ( trace_status & PastixTraceInitialised ) &&
120  ( trace_status & PastixTraceRunning ) ) {
121  trace_status &= ~PastixTraceRunning;
122  eztrace_pause ();
123  }
124 #endif
125 }
126 
127 /**
128  *******************************************************************************
129  *
130  * @brief Starts the trace module
131  *
132  *******************************************************************************
133  *
134  * @param[in] pastix_data
135  * The pastix_data structure of the problem to give input information
136  * to the different trace modes.
137  *
138  * @param[in] trace
139  * Value of iparm_trace :
140  * - PastixTraceNot = no trace
141  * - PastixTraceNumfact = only traces the facto
142  * - PastixTraceSolve = only traces the solve
143  * - PastixTraceFactAndSolve = traces facto and solve
144  *
145  *******************************************************************************/
146 void
147 kernelsTraceInit( const pastix_data_t *pastix_data,
148  pastix_trace_t trace )
149 {
150  const SolverMatrix *solvmtx = pastix_data->solvmatr;
151  int32_t nbstart;
152 
153  pastix_atomic_lock( &lock_flops );
154  nbstart = pastix_atomic_inc_32b( &(kernels_trace_started) );
155  if ( nbstart > 1 ) {
156  pastix_atomic_unlock( &lock_flops );
157  return;
158  }
159 
160 #if defined(PASTIX_WITH_EZTRACE)
161  {
162  /*
163  * If the trace is not activated (!PastixTraceNot)
164  * If the trace has not already been initialised (!PastixTraceInitialised)
165  */
166  if ( ( trace != PastixTraceNot ) &&
167  ! ( trace_status & PastixTraceInitialised ) ) {
168  char *level = pastix_getenv("PASTIX_EZTRACE_LEVEL");
169  if (level != NULL) {
170  pastix_eztrace_level = atoi(level);
171  pastix_cleanenv(level);
172  }
173 
174  if ( pastix_data->dir_global != NULL ) {
175  pastix_setenv( "EZTRACE_TRACE_DIR", pastix_data->dir_global, 1 );
176  }
177 
178  /* Starts at the initialisation */
179  eztrace_start ();
180 
181  /*
182  * Pauses at the initialisation, will resume at factorisation
183  * and/or solve.
184  */
185  eztrace_pause ();
186  trace_status |= PastixTraceInitialised;
187  }
188  }
189 #endif /* defined(PASTIX_WITH_EZTRACE) */
190 
191 #if defined(PASTIX_GENERATE_MODEL)
192  {
193  pastix_int_t cblknbr = solvmtx->cblknbr;
194  pastix_int_t cblkmin2d = solvmtx->cblkmin2d;
195  pastix_int_t total_number_of_tasks = 0;
196  pastix_int_t nbfact, nbtrsm, nbgemm;
197  pastix_int_t cblknum;
198  SolverCblk *cblk;
199 
200  /* Factorization kernels */
201  nbfact = cblknbr;
202 
203  /* TRSM kernels */
204  nbtrsm = cblkmin2d + (cblknbr - cblkmin2d) * solvmtx->cblkmaxblk;
205  if ( solvmtx->factotype == PastixFactLU ) {
206  nbtrsm *= 2;
207  }
208 
209  /* GEMM kernels */
210  nbgemm = solvmtx->bloknbr - cblknbr;
211  if ( solvmtx->factotype == PastixFactLU ) {
212  nbgemm *= 2;
213  }
214 
215  cblk = solvmtx->cblktab+cblkmin2d;
216  for(cblknum = cblkmin2d; cblknum < cblknbr; cblknum++, cblk++ ) {
217  pastix_int_t nbodb = (cblk[1].fblokptr - cblk[0].fblokptr) - 1;
218 
219  if ( solvmtx->factotype == PastixFactLU ) {
220  nbgemm += nbodb * nbodb;
221  }
222  else {
223  nbgemm += (nbodb * (nbodb-1)) / 2;
224  }
225  }
226 
227  total_number_of_tasks = nbfact + nbtrsm + nbgemm;
228  model_entries = malloc( total_number_of_tasks * sizeof(pastix_model_entry_t) );
229  model_size = total_number_of_tasks;
230  }
231 #endif
232 
233  memset( (void*)kernels_flops, 0, PastixKernelLvl1Nbr * sizeof(double) );
234 
235  overall_flops[0] = 0.0;
236  overall_flops[1] = 0.0;
237  overall_flops[2] = 0.0;
238  kernels_trace_started = 1;
239 
240  (void)solvmtx;
241  (void)trace;
242  pastix_atomic_unlock( &lock_flops );
243  return;
244 }
245 
246 /**
247  *******************************************************************************
248  *
249  * @brief Stops the trace module
250  *
251  *******************************************************************************
252  *
253  * @param[in] pastix_data
254  * The pastix_data structure of the problem to get input information
255  * for the different trace modes, and store output statistics.
256  *
257  *******************************************************************************/
258 void
259 kernelsTraceFinalize( const pastix_data_t *pastix_data )
260 {
261  int32_t nbstart;
262 
263  pastix_atomic_lock( &lock_flops );
264  nbstart = pastix_atomic_dec_32b( &(kernels_trace_started) );
265  if ( nbstart > 0 ) {
266  pastix_atomic_unlock( &lock_flops );
267  assert( nbstart >= 0 );
268  return;
269  }
270  assert( nbstart == 0 );
271 
272 #if defined(PASTIX_WITH_EZTRACE)
273  /*
274  * If the trace has been initialised (PastixTraceInitialised)
275  */
276  if ( ( trace_status & PastixTraceInitialised ) ) {
277  eztrace_stop ();
278  trace_status = 0;
279  }
280 #endif
281 
282 #if defined(PASTIX_GENERATE_MODEL)
283  {
284  char *prec_names[4] = {
285  "s - single real", "d - double real",
286  "c - single complex", "z - double complex"
287  };
288  pastix_model_entry_t *entry = model_entries;
289  pastix_int_t i, gpucase;
290  FILE *f;
291 
292  f = fopen( "model.csv", "w" );
293  if ( f == NULL ) {
294  goto end_model;
295  }
296 
297  gpucase = pastix_data->iparm[IPARM_GPU_NBR];
298  if ( gpucase ) {
299  fprintf(f, "# GPU Model data\n");
300  }
301  else {
302  fprintf(f, "# CPU Model data\n");
303  }
304 
305  fprintf( f, "# Precision: %d - %s\n", pastix_data->bcsc->flttype - 2, prec_names[ pastix_data->bcsc->flttype - 2 ] );
306  fprintf( f, "Kernel;M;N;K;Time\n" );
307 
308  for(i=0; i <= model_entries_nbr; i++, entry++ ) {
309  switch( entry->ktype ) {
310  case PastixKernelGETRF: pastix_attr_fallthrough;
311  case PastixKernelHETRF: pastix_attr_fallthrough;
312  case PastixKernelPOTRF: pastix_attr_fallthrough;
313  case PastixKernelPXTRF: pastix_attr_fallthrough;
314  case PastixKernelSYTRF: pastix_attr_fallthrough;
315  case PastixKernelSCALOCblk: pastix_attr_fallthrough;
316  case PastixKernelSCALOBlok: pastix_attr_fallthrough;
317  case PastixKernelTRSMCblk1d: pastix_attr_fallthrough;
318  case PastixKernelTRSMCblk2d: pastix_attr_fallthrough;
319  case PastixKernelTRSMCblkLR: pastix_attr_fallthrough;
320  case PastixKernelTRSMBlokLR: pastix_attr_fallthrough;
321  case PastixKernelGEMMCblk1d1d: pastix_attr_fallthrough;
322  case PastixKernelGEMMCblkFRLR: pastix_attr_fallthrough;
323  case PastixKernelGEMMCblkLRLR: pastix_attr_fallthrough;
325  if ( gpucase ) {
326  continue;
327  }
328 
329  pastix_attr_fallthrough;
330  default:
331  fprintf( f, "%d;%d;%d;%d;%e\n",
332  entry->ktype, entry->m, entry->n, entry->k, entry->time );
333  }
334  }
335 
336  fclose( f );
337 
338  free( model_entries );
339 
340  /* Reinitialize values */
341  model_entries = NULL;
342  model_entries_nbr = -1;
343  model_size = 0;
344  }
345  end_model:
346 #endif
347 
348  /* Update the real number of Flops performed */
350 
351 #if defined(PASTIX_SUPERNODE_STATS)
352  if (pastix_data->iparm[IPARM_VERBOSE] > PastixVerboseNot) {
353  fprintf( stdout,
354  " Details of the number of operations:\n"
355  " - POTRF(A11) + TRSM(A11, A21): %6.2lf %cFlops\n"
356  " - HERK(A21, A22) : %6.2lf %cFlops\n"
357  " - POTRF(A22) : %6.2lf %cFlops\n"
358  " Total : %6.2lf %cFlops\n",
359  pastix_print_value( overall_flops[0] ), pastix_print_unit( overall_flops[0] ),
360  pastix_print_value( overall_flops[1] ), pastix_print_unit( overall_flops[1] ),
361  pastix_print_value( overall_flops[2] ), pastix_print_unit( overall_flops[2] ),
362  pastix_print_value( pastix_data->dparm[DPARM_FACT_RLFLOPS] ),
363  pastix_print_unit( pastix_data->dparm[DPARM_FACT_RLFLOPS] ) );
364  }
365 #endif /* defined(PASTIX_SUPERNODE_STATS) */
366 
367  kernels_trace_started = 0;
368  pastix_atomic_unlock( &lock_flops );
369  (void)pastix_data;
370 }
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
double overall_flops[3]
Overall number of flops.
void kernelsTraceStart()
Resumes the trace module.
Definition: kernels_trace.c:90
pastix_atomic_lock_t lock_flops
Lock to accumulate flops.
void kernelsTraceFinalize(const pastix_data_t *pastix_data)
Stops the trace module.
volatile double kernels_flops[PastixKernelLvl1Nbr]
Global array to store the number of flops executed per kernel.
void kernelsTraceStop()
Pauses the trace module.
void kernelsTraceInit(const pastix_data_t *pastix_data, pastix_trace_t trace)
Starts the trace module.
@ PastixKernelSCALOCblk
Definition: kernels_enums.h:53
@ PastixKernelGEMMCblkFRLR
Definition: kernels_enums.h:65
@ PastixKernelTRSMBlokLR
Definition: kernels_enums.h:61
@ PastixKernelTRSMCblk2d
Definition: kernels_enums.h:58
@ PastixKernelSYTRF
Definition: kernels_enums.h:52
@ PastixKernelSCALOBlok
Definition: kernels_enums.h:54
@ PastixKernelGEMMBlokLRLR
Definition: kernels_enums.h:68
@ PastixKernelPOTRF
Definition: kernels_enums.h:50
@ PastixKernelTRSMCblk1d
Definition: kernels_enums.h:57
@ PastixKernelHETRF
Definition: kernels_enums.h:49
@ PastixKernelPXTRF
Definition: kernels_enums.h:51
@ PastixKernelGETRF
Definition: kernels_enums.h:48
@ PastixKernelGEMMCblkLRLR
Definition: kernels_enums.h:66
@ PastixKernelGEMMCblk1d1d
Definition: kernels_enums.h:62
@ PastixKernelTRSMCblkLR
Definition: kernels_enums.h:59
static pastix_int_t core_get_rklimit_end(pastix_int_t M, pastix_int_t N)
Compute the maximal rank accepted for a given matrix size for Just-In-Time strategy.
pastix_int_t(* core_get_rklimit)(pastix_int_t M, pastix_int_t N)
Compute the maximal rank accepted for a given matrix size. The pointer is set according to the low-ra...
Definition: kernels_trace.c:46
enum pastix_trace_e pastix_trace_t
Steps to trace.
@ PastixFactLU
Definition: api.h:317
@ DPARM_FACT_RLFLOPS
Definition: api.h:173
@ IPARM_GPU_NBR
Definition: api.h:123
@ IPARM_VERBOSE
Definition: api.h:36
@ PastixVerboseNot
Definition: api.h:220
@ PastixTraceNot
Definition: api.h:210
SolverMatrix * solvmatr
Definition: pastixdata.h:103
pastix_int_t * iparm
Definition: pastixdata.h:70
double * dparm
Definition: pastixdata.h:71
pastix_bcsc_t * bcsc
Definition: pastixdata.h:102
char * dir_global
Definition: pastixdata.h:110
Main PaStiX data structure.
Definition: pastixdata.h:68
pastix_int_t cblkmin2d
Definition: solver.h:219
pastix_int_t cblknbr
Definition: solver.h:211
pastix_int_t cblkmaxblk
Definition: solver.h:220
pastix_factotype_t factotype
Definition: solver.h:237
SolverBlok * fblokptr
Definition: solver.h:168
pastix_int_t bloknbr
Definition: solver.h:224
SolverCblk *restrict cblktab
Definition: solver.h:228
Solver column block structure.
Definition: solver.h:161
Solver column block structure.
Definition: solver.h:203