PaStiX Handbook  6.2.1
kernels_trace.c
Go to the documentation of this file.
1 /**
2  *
3  * @file kernels_trace.c
4  *
5  * @copyright 2004-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
6  * Univ. Bordeaux. All rights reserved.
7  *
8  * PaStiX trace and modelling routines
9  *
10  * @version 6.2.0
11  * @author Gregoire Pichon
12  * @author Mathieu Faverge
13  * @date 2021-01-03
14  *
15  **/
16 #ifndef _GNU_SOURCE
17 #define _GNU_SOURCE 1
18 #endif
19 #include "common/common.h"
20 #include "bcsc/bcsc.h"
21 #include "blend/solver.h"
22 #include "kernels_trace.h"
23 
24 /**
25  * @brief Compute the maximal rank accepted for a given matrix size. The pointer is set according to the low-rank strategy used.
26  * @param[in] M The number of rows of the matrix
27  * @param[in] N The number of columns of the matrix
28  * @return The maximal rank accepted for this matrix size.
29  */
30 pastix_int_t (*core_get_rklimit)( pastix_int_t, pastix_int_t ) = core_get_rklimit_end;
31 
32 volatile double kernels_flops[PastixKernelLvl1Nbr];
33 
34 volatile int32_t kernels_trace_started = 0;
35 
36 #if defined(PASTIX_WITH_EZTRACE)
37 
38 int pastix_eztrace_level = 1;
39 
40 #endif
41 
42 #if defined(PASTIX_GENERATE_MODEL)
43 
44 pastix_model_entry_t *model_entries = NULL;
45 volatile int32_t model_entries_nbr = -1;
46 int32_t model_size = 0;
47 
48 #endif
49 
50 pastix_atomic_lock_t lock_flops = PASTIX_ATOMIC_UNLOCKED;
51 double overall_flops[3] = { 0.0, 0.0, 0.0 };
52 
53 /**
54  *******************************************************************************
55  *
56  * @brief Start the trace module
57  *
58  *******************************************************************************
59  *
60  * @param[in] pastix_data
61  * The pastix_data structure of the problem to give input information
62  * to the different trace modes.
63  *
64  *******************************************************************************/
65 void
66 kernelsTraceStart( const pastix_data_t *pastix_data )
67 {
68  const SolverMatrix *solvmtx = pastix_data->solvmatr;
69  int32_t nbstart;
70 
71  pastix_atomic_lock( &lock_flops );
72  nbstart = pastix_atomic_inc_32b( &(kernels_trace_started) );
73  if ( nbstart > 1 ) {
74  pastix_atomic_unlock( &lock_flops );
75  return;
76  }
77 
78 #if defined(PASTIX_WITH_EZTRACE)
79  {
80  char *level = pastix_getenv("PASTIX_EZTRACE_LEVEL");
81  if (level != NULL) {
82  pastix_eztrace_level = atoi(level);
83  pastix_cleanenv(level);
84  }
85 
86  if ( pastix_data->dir_global != NULL ) {
87  pastix_setenv( "EZTRACE_TRACE_DIR", pastix_data->dir_global, 1 );
88  }
89  eztrace_start ();
90  }
91 #endif /* defined(PASTIX_WITH_EZTRACE) */
92 
93 #if defined(PASTIX_GENERATE_MODEL)
94  {
95  pastix_int_t cblknbr = solvmtx->cblknbr;
96  pastix_int_t cblkmin2d = solvmtx->cblkmin2d;
97  pastix_int_t total_number_of_tasks = 0;
98  pastix_int_t nbfact, nbtrsm, nbgemm;
99  pastix_int_t cblknum;
100  SolverCblk *cblk;
101 
102  /* Factorization kernels */
103  nbfact = cblknbr;
104 
105  /* TRSM kernels */
106  nbtrsm = cblkmin2d + (cblknbr - cblkmin2d) * solvmtx->cblkmaxblk;
107  if ( solvmtx->factotype == PastixFactLU ) {
108  nbtrsm *= 2;
109  }
110 
111  /* GEMM kernels */
112  nbgemm = solvmtx->bloknbr - cblknbr;
113  if ( solvmtx->factotype == PastixFactLU ) {
114  nbgemm *= 2;
115  }
116 
117  cblk = solvmtx->cblktab+cblkmin2d;
118  for(cblknum = cblkmin2d; cblknum < cblknbr; cblknum++, cblk++ ) {
119  pastix_int_t nbodb = (cblk[1].fblokptr - cblk[0].fblokptr) - 1;
120 
121  if ( solvmtx->factotype == PastixFactLU ) {
122  nbgemm += nbodb * nbodb;
123  }
124  else {
125  nbgemm += (nbodb * (nbodb-1)) / 2;
126  }
127  }
128 
129  total_number_of_tasks = nbfact + nbtrsm + nbgemm;
130  model_entries = malloc( total_number_of_tasks * sizeof(pastix_model_entry_t) );
131  model_size = total_number_of_tasks;
132  }
133 #endif
134 
135  memset( (void*)kernels_flops, 0, PastixKernelLvl1Nbr * sizeof(double) );
136 
137  overall_flops[0] = 0.0;
138  overall_flops[1] = 0.0;
139  overall_flops[2] = 0.0;
140  kernels_trace_started = 1;
141 
142  (void)solvmtx;
143  pastix_atomic_unlock( &lock_flops );
144  return;
145 }
146 
147 /**
148  *******************************************************************************
149  *
150  * @brief Stop the trace module
151  *
152  *******************************************************************************
153  *
154  * @param[in] pastix_data
155  * The pastix_data structure of the problem to get input information
156  * for the different trace modes, and store output statistics.
157  *
158  *******************************************************************************/
159 double
160 kernelsTraceStop( const pastix_data_t *pastix_data )
161 {
162  double total_flops = 0.0;
163  int32_t nbstart;
164 
165  assert( kernels_trace_started > 0 );
166  pastix_atomic_lock( &lock_flops );
167  nbstart = pastix_atomic_dec_32b( &(kernels_trace_started) );
168  if ( nbstart > 0 ) {
169  pastix_atomic_unlock( &lock_flops );
170  return total_flops;
171  }
172 
173 #if defined(PASTIX_WITH_EZTRACE)
174  eztrace_stop ();
175 #endif
176 
177 #if defined(PASTIX_GENERATE_MODEL)
178  {
179  char *prec_names[4] = {
180  "s - single real", "d - double real",
181  "c - single complex", "z - double complex"
182  };
183  pastix_model_entry_t *entry = model_entries;
184  pastix_int_t i, gpucase;
185  FILE *f;
186 
187  f = fopen( "model.csv", "w" );
188  if ( f == NULL ) {
189  goto end_model;
190  }
191 
192  gpucase = pastix_data->iparm[IPARM_GPU_NBR];
193  if ( gpucase ) {
194  fprintf(f, "# GPU Model data\n");
195  }
196  else {
197  fprintf(f, "# CPU Model data\n");
198  }
199 
200  fprintf( f, "# Precision: %d - %s\n", pastix_data->bcsc->flttype - 2, prec_names[ pastix_data->bcsc->flttype - 2 ] );
201  fprintf( f, "Kernel;M;N;K;Time\n" );
202 
203  for(i=0; i <= model_entries_nbr; i++, entry++ ) {
204  switch( entry->ktype ) {
205  case PastixKernelGETRF: pastix_attr_fallthrough;
206  case PastixKernelHETRF: pastix_attr_fallthrough;
207  case PastixKernelPOTRF: pastix_attr_fallthrough;
208  case PastixKernelPXTRF: pastix_attr_fallthrough;
209  case PastixKernelSYTRF: pastix_attr_fallthrough;
210  case PastixKernelSCALOCblk: pastix_attr_fallthrough;
211  case PastixKernelSCALOBlok: pastix_attr_fallthrough;
212  case PastixKernelTRSMCblk1d: pastix_attr_fallthrough;
213  case PastixKernelTRSMCblk2d: pastix_attr_fallthrough;
214  case PastixKernelTRSMCblkLR: pastix_attr_fallthrough;
215  case PastixKernelTRSMBlokLR: pastix_attr_fallthrough;
216  case PastixKernelGEMMCblk1d1d: pastix_attr_fallthrough;
217  case PastixKernelGEMMCblkFRLR: pastix_attr_fallthrough;
218  case PastixKernelGEMMCblkLRLR: pastix_attr_fallthrough;
219  case PastixKernelGEMMBlokLRLR:
220  if ( gpucase ) {
221  continue;
222  }
223 
224  pastix_attr_fallthrough;
225  default:
226  fprintf( f, "%d;%d;%d;%d;%e\n",
227  entry->ktype, entry->m, entry->n, entry->k, entry->time );
228  }
229  }
230 
231  fclose( f );
232 
233  free( model_entries );
234 
235  /* Reinitialize values */
236  model_entries = NULL;
237  model_entries_nbr = -1;
238  model_size = 0;
239  }
240  end_model:
241 #endif
242 
243  /* Update the real number of Flops performed */
244  pastix_data->dparm[DPARM_FACT_RLFLOPS] = overall_flops[0] + overall_flops[1] + overall_flops[2];
245 
246 #if defined(PASTIX_SUPERNODE_STATS)
247  if (pastix_data->iparm[IPARM_VERBOSE] > PastixVerboseNot) {
248  fprintf( stdout,
249  " Details of the number of operations:\n"
250  " - POTRF(A11) + TRSM(A11, A21): %6.2lf %cFlops\n"
251  " - HERK(A21, A22) : %6.2lf %cFlops\n"
252  " - POTRF(A22) : %6.2lf %cFlops\n"
253  " Total : %6.2lf %cFlops\n",
254  pastix_print_value( overall_flops[0] ), pastix_print_unit( overall_flops[0] ),
255  pastix_print_value( overall_flops[1] ), pastix_print_unit( overall_flops[1] ),
256  pastix_print_value( overall_flops[2] ), pastix_print_unit( overall_flops[2] ),
257  pastix_print_value( pastix_data->dparm[DPARM_FACT_RLFLOPS] ),
258  pastix_print_unit( pastix_data->dparm[DPARM_FACT_RLFLOPS] ) );
259  }
260 #endif /* defined(PASTIX_SUPERNODE_STATS) */
261 
262  kernels_trace_started = 0;
263  pastix_atomic_unlock( &lock_flops );
264  (void)pastix_data;
265  return total_flops;
266 }
solver.h
core_get_rklimit
pastix_int_t(* core_get_rklimit)(pastix_int_t, pastix_int_t)
Compute the maximal rank accepted for a given matrix size. The pointer is set according to the low-ra...
Definition: kernels_trace.c:30
solver_cblk_s::fblokptr
SolverBlok * fblokptr
Definition: solver.h:134
core_get_rklimit_end
static pastix_int_t core_get_rklimit_end(pastix_int_t M, pastix_int_t N)
Compute the maximal rank accepted for a given matrix size for Just-In-Time strategy.
Definition: pastix_lowrank.h:84
solver_cblk_s
Solver column block structure.
Definition: solver.h:127
kernelsTraceStart
void kernelsTraceStart(const pastix_data_t *pastix_data)
Start the trace module.
Definition: kernels_trace.c:66
bcsc.h
PastixVerboseNot
@ PastixVerboseNot
Definition: api.h:209
IPARM_GPU_NBR
@ IPARM_GPU_NBR
Definition: api.h:121
IPARM_VERBOSE
@ IPARM_VERBOSE
Definition: api.h:36
PastixFactLU
@ PastixFactLU
Definition: api.h:304
DPARM_FACT_RLFLOPS
@ DPARM_FACT_RLFLOPS
Definition: api.h:166
kernelsTraceStop
double kernelsTraceStop(const pastix_data_t *pastix_data)
Stop the trace module.
Definition: kernels_trace.c:160