PaStiX Handbook  6.3.0
kernels_trace.c
Go to the documentation of this file.
1 /**
2  *
3  * @file kernels_trace.c
4  *
5  * @copyright 2004-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
6  * Univ. Bordeaux. All rights reserved.
7  *
8  * PaStiX trace and modelling routines
9  *
10  * @version 6.2.0
11  * @author Gregoire Pichon
12  * @author Mathieu Faverge
13  * @date 2021-01-03
14  *
15  **/
16 #ifndef DOXYGEN_SHOULD_SKIP_THIS
17 #ifndef _GNU_SOURCE
18 #define _GNU_SOURCE 1
19 #endif
20 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
21 #include "common/common.h"
22 #include "bcsc/bcsc.h"
23 #include "blend/solver.h"
24 #include "kernels_trace.h"
25 
26 /**
27  *******************************************************************************
28  *
29  * @brief Compute the maximal rank accepted for a given matrix size. The pointer
30  * is set according to the low-rank strategy used.
31  *
32  *******************************************************************************
33  *
34  * @param[in] M
35  * The number of rows of the matrix
36  *
37  * @param[in] N
38  * The number of columns of the matrix
39  *
40  *******************************************************************************
41  *
42  * @return The maximal rank accepted for this matrix size.
43  *
44  *******************************************************************************/
45 pastix_int_t (*core_get_rklimit)( pastix_int_t M, pastix_int_t N ) = core_get_rklimit_end;
46 
47 #ifndef DOXYGEN_SHOULD_SKIP_THIS
48 volatile double kernels_flops[PastixKernelLvl1Nbr];
49 
50 volatile int32_t kernels_trace_started = 0;
51 
52 #if defined(PASTIX_WITH_EZTRACE)
53 
54 int pastix_eztrace_level = 1;
55 
56 #endif
57 
58 #if defined(PASTIX_GENERATE_MODEL)
59 
60 pastix_model_entry_t *model_entries = NULL;
61 volatile int32_t model_entries_nbr = -1;
62 int32_t model_size = 0;
63 
64 #endif
65 
66 pastix_atomic_lock_t lock_flops = PASTIX_ATOMIC_UNLOCKED;
67 double overall_flops[3] = { 0.0, 0.0, 0.0 };
68 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
69 
70 /**
71  *******************************************************************************
72  *
73  * @brief Start the trace module
74  *
75  *******************************************************************************
76  *
77  * @param[in] pastix_data
78  * The pastix_data structure of the problem to give input information
79  * to the different trace modes.
80  *
81  *******************************************************************************/
82 void
83 kernelsTraceStart( const pastix_data_t *pastix_data )
84 {
85  const SolverMatrix *solvmtx = pastix_data->solvmatr;
86  int32_t nbstart;
87 
88  pastix_atomic_lock( &lock_flops );
89  nbstart = pastix_atomic_inc_32b( &(kernels_trace_started) );
90  if ( nbstart > 1 ) {
91  pastix_atomic_unlock( &lock_flops );
92  return;
93  }
94 
95 #if defined(PASTIX_WITH_EZTRACE)
96  {
97  char *level = pastix_getenv("PASTIX_EZTRACE_LEVEL");
98  if (level != NULL) {
99  pastix_eztrace_level = atoi(level);
100  pastix_cleanenv(level);
101  }
102 
103  if ( pastix_data->dir_global != NULL ) {
104  pastix_setenv( "EZTRACE_TRACE_DIR", pastix_data->dir_global, 1 );
105  }
106  eztrace_start ();
107  }
108 #endif /* defined(PASTIX_WITH_EZTRACE) */
109 
110 #if defined(PASTIX_GENERATE_MODEL)
111  {
112  pastix_int_t cblknbr = solvmtx->cblknbr;
113  pastix_int_t cblkmin2d = solvmtx->cblkmin2d;
114  pastix_int_t total_number_of_tasks = 0;
115  pastix_int_t nbfact, nbtrsm, nbgemm;
116  pastix_int_t cblknum;
117  SolverCblk *cblk;
118 
119  /* Factorization kernels */
120  nbfact = cblknbr;
121 
122  /* TRSM kernels */
123  nbtrsm = cblkmin2d + (cblknbr - cblkmin2d) * solvmtx->cblkmaxblk;
124  if ( solvmtx->factotype == PastixFactLU ) {
125  nbtrsm *= 2;
126  }
127 
128  /* GEMM kernels */
129  nbgemm = solvmtx->bloknbr - cblknbr;
130  if ( solvmtx->factotype == PastixFactLU ) {
131  nbgemm *= 2;
132  }
133 
134  cblk = solvmtx->cblktab+cblkmin2d;
135  for(cblknum = cblkmin2d; cblknum < cblknbr; cblknum++, cblk++ ) {
136  pastix_int_t nbodb = (cblk[1].fblokptr - cblk[0].fblokptr) - 1;
137 
138  if ( solvmtx->factotype == PastixFactLU ) {
139  nbgemm += nbodb * nbodb;
140  }
141  else {
142  nbgemm += (nbodb * (nbodb-1)) / 2;
143  }
144  }
145 
146  total_number_of_tasks = nbfact + nbtrsm + nbgemm;
147  model_entries = malloc( total_number_of_tasks * sizeof(pastix_model_entry_t) );
148  model_size = total_number_of_tasks;
149  }
150 #endif
151 
152  memset( (void*)kernels_flops, 0, PastixKernelLvl1Nbr * sizeof(double) );
153 
154  overall_flops[0] = 0.0;
155  overall_flops[1] = 0.0;
156  overall_flops[2] = 0.0;
157  kernels_trace_started = 1;
158 
159  (void)solvmtx;
160  pastix_atomic_unlock( &lock_flops );
161  return;
162 }
163 
164 /**
165  *******************************************************************************
166  *
167  * @brief Stop the trace module
168  *
169  *******************************************************************************
170  *
171  * @param[in] pastix_data
172  * The pastix_data structure of the problem to get input information
173  * for the different trace modes, and store output statistics.
174  *
175  *******************************************************************************
176  *
177  * @return TODO
178  *
179  *******************************************************************************/
180 double
181 kernelsTraceStop( const pastix_data_t *pastix_data )
182 {
183  double total_flops = 0.0;
184  int32_t nbstart;
185 
186  assert( kernels_trace_started > 0 );
187  pastix_atomic_lock( &lock_flops );
188  nbstart = pastix_atomic_dec_32b( &(kernels_trace_started) );
189  if ( nbstart > 0 ) {
190  pastix_atomic_unlock( &lock_flops );
191  return total_flops;
192  }
193 
194 #if defined(PASTIX_WITH_EZTRACE)
195  eztrace_stop ();
196 #endif
197 
198 #if defined(PASTIX_GENERATE_MODEL)
199  {
200  char *prec_names[4] = {
201  "s - single real", "d - double real",
202  "c - single complex", "z - double complex"
203  };
204  pastix_model_entry_t *entry = model_entries;
205  pastix_int_t i, gpucase;
206  FILE *f;
207 
208  f = fopen( "model.csv", "w" );
209  if ( f == NULL ) {
210  goto end_model;
211  }
212 
213  gpucase = pastix_data->iparm[IPARM_GPU_NBR];
214  if ( gpucase ) {
215  fprintf(f, "# GPU Model data\n");
216  }
217  else {
218  fprintf(f, "# CPU Model data\n");
219  }
220 
221  fprintf( f, "# Precision: %d - %s\n", pastix_data->bcsc->flttype - 2, prec_names[ pastix_data->bcsc->flttype - 2 ] );
222  fprintf( f, "Kernel;M;N;K;Time\n" );
223 
224  for(i=0; i <= model_entries_nbr; i++, entry++ ) {
225  switch( entry->ktype ) {
226  case PastixKernelGETRF: pastix_attr_fallthrough;
227  case PastixKernelHETRF: pastix_attr_fallthrough;
228  case PastixKernelPOTRF: pastix_attr_fallthrough;
229  case PastixKernelPXTRF: pastix_attr_fallthrough;
230  case PastixKernelSYTRF: pastix_attr_fallthrough;
231  case PastixKernelSCALOCblk: pastix_attr_fallthrough;
232  case PastixKernelSCALOBlok: pastix_attr_fallthrough;
233  case PastixKernelTRSMCblk1d: pastix_attr_fallthrough;
234  case PastixKernelTRSMCblk2d: pastix_attr_fallthrough;
235  case PastixKernelTRSMCblkLR: pastix_attr_fallthrough;
236  case PastixKernelTRSMBlokLR: pastix_attr_fallthrough;
237  case PastixKernelGEMMCblk1d1d: pastix_attr_fallthrough;
238  case PastixKernelGEMMCblkFRLR: pastix_attr_fallthrough;
239  case PastixKernelGEMMCblkLRLR: pastix_attr_fallthrough;
240  case PastixKernelGEMMBlokLRLR:
241  if ( gpucase ) {
242  continue;
243  }
244 
245  pastix_attr_fallthrough;
246  default:
247  fprintf( f, "%d;%d;%d;%d;%e\n",
248  entry->ktype, entry->m, entry->n, entry->k, entry->time );
249  }
250  }
251 
252  fclose( f );
253 
254  free( model_entries );
255 
256  /* Reinitialize values */
257  model_entries = NULL;
258  model_entries_nbr = -1;
259  model_size = 0;
260  }
261  end_model:
262 #endif
263 
264  /* Update the real number of Flops performed */
265  pastix_data->dparm[DPARM_FACT_RLFLOPS] = overall_flops[0] + overall_flops[1] + overall_flops[2];
266 
267 #if defined(PASTIX_SUPERNODE_STATS)
268  if (pastix_data->iparm[IPARM_VERBOSE] > PastixVerboseNot) {
269  fprintf( stdout,
270  " Details of the number of operations:\n"
271  " - POTRF(A11) + TRSM(A11, A21): %6.2lf %cFlops\n"
272  " - HERK(A21, A22) : %6.2lf %cFlops\n"
273  " - POTRF(A22) : %6.2lf %cFlops\n"
274  " Total : %6.2lf %cFlops\n",
275  pastix_print_value( overall_flops[0] ), pastix_print_unit( overall_flops[0] ),
276  pastix_print_value( overall_flops[1] ), pastix_print_unit( overall_flops[1] ),
277  pastix_print_value( overall_flops[2] ), pastix_print_unit( overall_flops[2] ),
278  pastix_print_value( pastix_data->dparm[DPARM_FACT_RLFLOPS] ),
279  pastix_print_unit( pastix_data->dparm[DPARM_FACT_RLFLOPS] ) );
280  }
281 #endif /* defined(PASTIX_SUPERNODE_STATS) */
282 
283  kernels_trace_started = 0;
284  pastix_atomic_unlock( &lock_flops );
285  (void)pastix_data;
286  return total_flops;
287 }
static pastix_int_t core_get_rklimit_end(pastix_int_t M, pastix_int_t N)
Compute the maximal rank accepted for a given matrix size for Just-In-Time strategy.
pastix_int_t(* core_get_rklimit)(pastix_int_t M, pastix_int_t N)
Compute the maximal rank accepted for a given matrix size. The pointer is set according to the low-ra...
Definition: kernels_trace.c:45
@ PastixFactLU
Definition: api.h:316
@ DPARM_FACT_RLFLOPS
Definition: api.h:172
@ IPARM_GPU_NBR
Definition: api.h:122
@ IPARM_VERBOSE
Definition: api.h:36
@ PastixVerboseNot
Definition: api.h:215
double kernelsTraceStop(const pastix_data_t *pastix_data)
Stop the trace module.
void kernelsTraceStart(const pastix_data_t *pastix_data)
Start the trace module.
Definition: kernels_trace.c:83
SolverBlok * fblokptr
Definition: solver.h:163
Solver column block structure.
Definition: solver.h:156