PaStiX Handbook  6.2.1
models.c
Go to the documentation of this file.
1 /**
2  *
3  * @file models.c
4  *
5  * PaStiX performance models routines
6  *
7  * @copyright 2004-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.2.0
11  * @author Mathieu Faverge
12  * @date 2020-03-02
13  *
14  **/
15 #include "common.h"
16 #include "models.h"
17 
18 /**
19  *******************************************************************************
20  *
21  * @ingroup pastix_internal
22  *
23  * @brief Convert a kernel string name found in a model file to its kernel Id
24  *
25  *******************************************************************************
26  *
27  * @param[in] kernelstr
28  * The kernel string name
29  *
30  * @param[out] nbcoef
31  * The number of coefficient that this kernel will use. Set to 0 on
32  * failure.
33  *
34  *******************************************************************************
35  *
36  * @retval The kernel Id on success
37  * @retval -1 on failure
38  *
39  *******************************************************************************/
40 int
41 modelsGetKernelId( const char *kernelstr,
42  int *nbcoef )
43 {
44  if(0 == strcasecmp("getrf", kernelstr)) { *nbcoef = 4; return PastixKernelGETRF; }
45  if(0 == strcasecmp("hetrf", kernelstr)) { *nbcoef = 4; return PastixKernelHETRF; }
46  if(0 == strcasecmp("potrf", kernelstr)) { *nbcoef = 4; return PastixKernelPOTRF; }
47  if(0 == strcasecmp("pxtrf", kernelstr)) { *nbcoef = 4; return PastixKernelPXTRF; }
48  if(0 == strcasecmp("sytrf", kernelstr)) { *nbcoef = 4; return PastixKernelSYTRF; }
49 
50  if(0 == strcasecmp("trsmcblk1d", kernelstr)) { *nbcoef = 6; return PastixKernelTRSMCblk1d; }
51  if(0 == strcasecmp("trsmcblk2d", kernelstr)) { *nbcoef = 6; return PastixKernelTRSMCblk2d; }
52  if(0 == strcasecmp("trsmcblklr", kernelstr)) { *nbcoef = 6; return PastixKernelTRSMCblkLR; }
53 
54  if(0 == strcasecmp("trsmblok2d", kernelstr)) { *nbcoef = 6; return PastixKernelTRSMBlok2d; }
55  if(0 == strcasecmp("trsmbloklr", kernelstr)) { *nbcoef = 6; return PastixKernelTRSMBlokLR; }
56 
57  if(0 == strcasecmp("gemmcblk1d1d", kernelstr)) { *nbcoef = 8; return PastixKernelGEMMCblk1d1d; }
58  if(0 == strcasecmp("gemmcblk1d2d", kernelstr)) { *nbcoef = 8; return PastixKernelGEMMCblk1d2d; }
59  if(0 == strcasecmp("gemmcblk2d2d", kernelstr)) { *nbcoef = 8; return PastixKernelGEMMCblk2d2d; }
60  if(0 == strcasecmp("gemmcblkfrlr", kernelstr)) { *nbcoef = 8; return PastixKernelGEMMCblkFRLR; }
61  if(0 == strcasecmp("gemmcblklrlr", kernelstr)) { *nbcoef = 8; return PastixKernelGEMMCblkLRLR; }
62 
63  if(0 == strcasecmp("gemmblok2d2d", kernelstr)) { *nbcoef = 8; return PastixKernelGEMMBlok2d2d; }
64  if(0 == strcasecmp("gemmbloklrlr", kernelstr)) { *nbcoef = 8; return PastixKernelGEMMBlokLRLR; }
65 
66  *nbcoef = 0;
67  return -1;
68 }
69 
70 /**
71  *******************************************************************************
72  *
73  * @ingroup pastix_internal
74  *
75  * @brief Propagate a given model to all other similare cases to be sure
76  * everything is initialized.
77  *
78  * The given model coefficients defined by the couple (arithm, kernelid) is
79  * first extended to all the kernels of the same family in the same arithmetic,
80  * and it is then propagated to the other arithmetic by applying a computation
81  * ratio on the coefficients.
82  * - Single real costs 1
83  * - Double real costs 2
84  * - Single complex costs 3
85  * - Double complex costs 4
86  *
87  *******************************************************************************
88  *
89  * @param[inout] model
90  * The pointer to the allocated model to complete.
91  *
92  * @param[in] arithm
93  * The arithmetic of the initial coefficients to replicate.
94  *
95  * @param[in] kernelid
96  * The kernel Id of the initial coefficients to replicate.
97  *
98  *******************************************************************************/
99 void
100 modelsPropagate( pastix_model_t *model,
101  int arithm, pastix_ktype_t kernelid )
102 {
103  double *coefs0 = model->coefficients[arithm][kernelid];
104  double ratio;
105  int a, k;
106  int kstart = 0;
107  int kend = -1;
108 
109  /* Look for loaded information about factorization kernels */
110  if ( kernelid < PastixKernelSCALOCblk ) {
111  for( k=PastixKernelGETRF; k<=PastixKernelSYTRF; k++) {
112  if ( (k == (int)kernelid) || (model->coefficients[arithm][k][0] != 0xdeadbeef) ) {
113  continue;
114  }
115 
116  ratio = (( k == (int)PastixKernelGETRF ) ? 2. : 1. ) / (( kernelid == PastixKernelGETRF ) ? 2. : 1. );
117 
118  model->coefficients[arithm][k][0] = coefs0[0];
119  model->coefficients[arithm][k][1] = coefs0[1];
120  model->coefficients[arithm][k][2] = ratio * coefs0[2];
121  model->coefficients[arithm][k][3] = ratio * coefs0[3];
122  }
123 
124  for( a=0; a<4; a++) {
125  if (a == arithm) {
126  continue;
127  }
128  ratio = (0.5 * a + 0.5) / (0.5 * arithm + 0.5);
129 
130  for( k=PastixKernelGETRF; k<=PastixKernelSYTRF; k++) {
131  if ( model->coefficients[a][k][0] != 0xdeadbeef ) {
132  continue;
133  }
134 
135  model->coefficients[a][k][0] = ratio * model->coefficients[arithm][k][0];
136  model->coefficients[a][k][1] = ratio * model->coefficients[arithm][k][1];
137  model->coefficients[a][k][2] = ratio * model->coefficients[arithm][k][2];
138  model->coefficients[a][k][3] = ratio * model->coefficients[arithm][k][3];
139  }
140  }
141  }
142  else if ( kernelid < PastixKernelTRSMCblk1d ) {
143  }
144  else if ( kernelid < PastixKernelGEMMCblk1d1d ) {
145  kstart = PastixKernelTRSMCblk1d;
146  kend = PastixKernelTRSMBlok2d;
147  }
148  else {
149  kstart = PastixKernelGEMMCblk1d1d;
150  kend = PastixKernelGEMMBlok2d2d;
151  }
152 
153  /*
154  * Propagate to other kernels of the same arithmetic
155  */
156  for( k=kstart; k<=kend; k++) {
157  if ( (k == (int)kernelid) || (model->coefficients[arithm][k][0] != 0xdeadbeef) ) {
158  continue;
159  }
160 
161  model->coefficients[arithm][k][0] = coefs0[0];
162  model->coefficients[arithm][k][1] = coefs0[1];
163  model->coefficients[arithm][k][2] = coefs0[2];
164  model->coefficients[arithm][k][3] = coefs0[3];
165  model->coefficients[arithm][k][4] = coefs0[4];
166  model->coefficients[arithm][k][5] = coefs0[5];
167  model->coefficients[arithm][k][6] = coefs0[6];
168  model->coefficients[arithm][k][7] = coefs0[7];
169  }
170 
171  /*
172  * Propagate to other arithmetics
173  */
174  for( a=0; a<4; a++) {
175  if (a == arithm) {
176  continue;
177  }
178  ratio = (0.5 * a + 0.5) / (0.5 * arithm + 0.5);
179 
180  for( k=kstart; k<=kend; k++) {
181  if ( model->coefficients[a][k][0] != 0xdeadbeef ) {
182  continue;
183  }
184 
185  model->coefficients[a][k][0] = ratio * model->coefficients[arithm][k][0];
186  model->coefficients[a][k][1] = ratio * model->coefficients[arithm][k][1];
187  model->coefficients[a][k][2] = ratio * model->coefficients[arithm][k][2];
188  model->coefficients[a][k][3] = ratio * model->coefficients[arithm][k][3];
189  model->coefficients[a][k][4] = ratio * model->coefficients[arithm][k][4];
190  model->coefficients[a][k][5] = ratio * model->coefficients[arithm][k][5];
191  model->coefficients[a][k][6] = ratio * model->coefficients[arithm][k][6];
192  model->coefficients[a][k][7] = ratio * model->coefficients[arithm][k][7];
193  }
194  }
195 }
196 
197 /**
198  *******************************************************************************
199  *
200  * @ingroup pastix_internal
201  *
202  * @brief Initialize the given model with the file given in parameters.
203  *
204  *******************************************************************************
205  *
206  * @param[inout] model
207  * The pointer to the allocated model to initialize.
208  *
209  * @param[in] modelfilename
210  * The name of the file in which the coefficient values are stored.
211  *
212  *******************************************************************************
213  *
214  * @return 0 on success.
215  * @return -1 on failure.
216  *
217  *******************************************************************************/
218 int
219 modelsRead( pastix_model_t *model,
220  const char *modelfilename )
221 {
222  FILE *f = pastix_fopen( modelfilename );
223  char *str, *strcoef;
224  char kernelstr[13];
225  int rc, arithm, nbcoef;
226  size_t strsize = 256;
227  pastix_ktype_t kernelid;
228  double *coefs;
229 
230  if ( f == NULL ) {
231  fprintf(stderr, "Can't open model file\n");
232  return -1;
233  }
234 
235  str = malloc( strsize * sizeof(char) );
236  do {
237  rc = getline( &str, &strsize, f );
238  if ( rc == -1 ) {
239  perror( "modelsRead(getline header)" );
240  return -1;
241  }
242  }
243  while( str[0] == '#' );
244 
245  /* Read the model name */
246  model->name = strdup( str );
247 
248  /* Read the model values */
249  while( getline( &str, &strsize, f ) != -1 ) {
250 
251  /* Skip commented lines */
252  if ( str[0] == '#' ) {
253  continue;
254  }
255 
256  /* Read the arithmetic, and the kernel name */
257  if ( sscanf( str, "%d;%12[a-z0-9];", &arithm, kernelstr ) != 2 ) {
258  fprintf(stderr, "modelRead: %s - Error reading line (%s)\n", model->name, str );
259  continue;
260  }
261 
262  if ( (arithm < 0) || (arithm > 3) ) {
263  fprintf(stderr, "modelRead: %s - Incorrect arithmetic %d in line:\n\t%s\n",
264  model->name, arithm, str );
265  continue;
266  }
267 
268  kernelid = modelsGetKernelId( kernelstr, &nbcoef );
269  if ( (int)kernelid == -1 ) {
270  fprintf(stderr, "modelRead: %s - Incorrect kernel type %s in line:\n\t%s\n",
271  model->name, kernelstr, str );
272  continue;
273  }
274 
275  /* Read the corrrect number of coefficients and store them */
276  coefs = model->coefficients[arithm][kernelid];
277  strcoef = str + 3 + strlen( kernelstr );
278 
279  switch ( nbcoef ) {
280  case 4:
281  if ( sscanf( strcoef, "%le;%le;%le;%le",
282  coefs, coefs+1, coefs+2, coefs+3 ) != 4 )
283  {
284  fprintf(stderr, "modelRead: %s - Pb reading the 4 coefficients in line:\n\t%s\n", model->name, str );
285  continue;
286  }
287  break;
288  case 6:
289  if ( sscanf( strcoef, "%le;%le;%le;%le;%le;%le",
290  coefs, coefs+1, coefs+2,
291  coefs+3, coefs+4, coefs+5 ) != 6 )
292  {
293  fprintf(stderr, "modelRead: %s - Pb reading the 6 coefficients in line:\n\t%s\n", model->name, str );
294  continue;
295  }
296  break;
297  case 8:
298  if ( sscanf( strcoef, "%le;%le;%le;%le;%le;%le;%le;%le",
299  coefs, coefs+1, coefs+2, coefs+3,
300  coefs+4, coefs+5, coefs+6, coefs+7 ) != 8 )
301  {
302  fprintf(stderr, "modelRead: %s - Pb reading the 8 coefficients in line:\n\t%s\n", model->name, str );
303  continue;
304  }
305  break;
306  default:
307  ;
308  }
309 
310  modelsPropagate( model, arithm, kernelid );
311  }
312 
313  fclose(f);
314  free(str);
315 
316  return 0;
317 }
318 
319 /**
320  *******************************************************************************
321  *
322  * @ingroup pastix_internal
323  *
324  * @brief Initialize the CPU model with default values.
325  *
326  *******************************************************************************
327  *
328  * @param[inout] model
329  * The pointer to the allocated model to initialize.
330  *
331  *******************************************************************************
332  *
333  * @return 0 on success.
334  *
335  *******************************************************************************/
336 int
337 modelsInitDefaultCPU( pastix_model_t *model )
338 {
339  int a = 1; /* Real double */
340  int ktype;
341  double *coefs;
342 
343  assert( model != NULL );
344 
345  /*
346  * All coefficiensts given are for double real arithmetic
347  */
348  model->name = strdup("AMD Opteron 6180 - Intel MKL");
349 
350  /* POTRF */
351  ktype = PastixKernelPOTRF;
352  coefs = &(model->coefficients[a][ktype][0]);
353  coefs[0] = 4.071507e-07;
354  coefs[1] = -1.469893e-07;
355  coefs[2] = 1.707006e-08;
356  coefs[3] = 2.439599e-11;
357  modelsPropagate( model, a, ktype );
358 
359  /* TRSM Cblk */
360  ktype = PastixKernelTRSMCblk2d;
361  coefs = &(model->coefficients[a][ktype][0]);
362  coefs[0] = 3.255168e-06;
363  coefs[1] = 3.976198e-08;
364  coefs[2] = 0.;
365  coefs[3] = 0.;
366  coefs[4] = 0.;
367  coefs[5] = 2.626177e-10;
368  modelsPropagate( model, a, ktype );
369 
370  /* TRSM Blok */
371  /*
372  * We don't have a TRSM blok model for this old architecture, so we use the
373  * TRSM Cblk
374  */
375 
376  /* GEMM Cblk */
377  ktype = PastixKernelGEMMCblk2d2d;
378  coefs = &(model->coefficients[a][ktype][0]);
379  coefs[0] = 1.216278e-06;
380  coefs[1] = 0.;
381  coefs[2] = -2.704179e-10;
382  coefs[3] = 1.148989e-07;
383  coefs[4] = 2.724804e-10;
384  coefs[5] = 1.328900e-09;
385  coefs[6] = 0.;
386  coefs[7] = 2.429169e-10;
387  modelsPropagate( model, a, ktype );
388 
389  /* GEMM Blok */
390  ktype = PastixKernelGEMMBlok2d2d;
391  coefs = &(model->coefficients[a][ktype][0]);
392  coefs[0] = 0.0;
393  coefs[1] = 0.0;
394  coefs[2] = 0.0;
395  coefs[3] = 0.0;
396  coefs[4] = 0.0;
397  coefs[5] = 0.0;
398  coefs[6] = 0.0;
399  coefs[7] = 2. / 24.e9;
400  modelsPropagate( model, a, ktype );
401 
402  return 0;
403 }
404 
405 /**
406  *******************************************************************************
407  *
408  * @ingroup pastix_internal
409  *
410  * @brief Initialize the GPU model with default values.
411  *
412  *******************************************************************************
413  *
414  * @param[inout] model
415  * The pointer to the allocated model to initialize.
416  *
417  *******************************************************************************
418  *
419  * @return 0 on success.
420  *
421  *******************************************************************************/
422 int
423 modelsInitDefaultGPU( pastix_model_t *model )
424 {
425  int a = 1; /* Real double */
426  int ktype;
427  double *coefs;
428 
429  assert( model != NULL );
430 
431  /*
432  * All coefficiensts given are for double real arithmetic
433  */
434  model->name = strdup("Nvidia K40 GK1108L - CUDA 8.0");
435 
436  /* TRSM Blok */
437  ktype = PastixKernelTRSMBlok2d;
438  coefs = &(model->coefficients[a][ktype][0]);
439  coefs[0] = -3.16663635648446e-05;
440  coefs[1] = 2.63809317549331e-06;
441  coefs[2] = 5.86447245256688e-07;
442  coefs[3] = -1.57859559108480e-09;
443  coefs[4] = -4.74303242824929e-09;
444  coefs[5] = 5.36284121953867e-12;
445  modelsPropagate( model, a, ktype );
446 
447  /* GEMM Cblk */
448  ktype = PastixKernelGEMMCblk2d2d;
449  coefs = &(model->coefficients[a][ktype][0]);
450  coefs[0] = 1.216278e-06;
451  coefs[1] = 0.;
452  coefs[2] = -2.704179e-10;
453  coefs[3] = 1.148989e-07;
454  coefs[4] = 2.724804e-10;
455  coefs[5] = 1.328900e-09;
456  coefs[6] = 0.;
457  coefs[7] = 2.429169e-10;
458  modelsPropagate( model, a, ktype );
459 
460  /* GEMM Blok */
461  ktype = PastixKernelGEMMBlok2d2d;
462  coefs = &(model->coefficients[a][ktype][0]);
463  coefs[0] = 0.0;
464  coefs[1] = 0.0;
465  coefs[2] = 0.0;
466  coefs[3] = 0.0;
467  coefs[4] = 0.0;
468  coefs[5] = 0.0;
469  coefs[6] = 0.0;
470  coefs[7] = 2. / 1.2e12;
471  modelsPropagate( model, a, ktype );
472 
473  return 0;
474 }
475 
476 /**
477  *******************************************************************************
478  *
479  * @ingroup pastix_api
480  *
481  * @brief Create a new model data structure and initialize the values to their
482  * default.
483  *
484  *******************************************************************************
485  *
486  * @return The pointer to the allocated and initialized data structure.
487  *
488  *******************************************************************************/
489 pastix_model_t *
491 {
492  pastix_model_t *model = malloc(sizeof(pastix_model_t));
493 
494  int a, k;
495 
496  memset( model, 0, sizeof( pastix_model_t ) );
497 
498  for(a=0; a<4; a++) {
499  for(k=0; k<PastixKernelLvl1Nbr; k++) {
500  model->coefficients[a][k][0] = 0xdeadbeef;
501  }
502  }
503  return model;
504 }
505 
506 /**
507  *******************************************************************************
508  *
509  * @ingroup pastix_api
510  *
511  * @brief Free a model data structure.
512  *
513  *******************************************************************************
514  *
515  * @param[inout] model
516  * The model structure to free.
517  *
518  *******************************************************************************/
519 void
520 pastixModelsFree( pastix_model_t *model )
521 {
522  if ( model != NULL ) {
523  if ( model->name != NULL ) {
524  free(model->name);
525  }
526  free(model);
527  }
528 }
529 
530 /**
531  *******************************************************************************
532  *
533  * @ingroup pastix_api
534  *
535  * @brief Load the performance models that will be used by the solver
536  *
537  * This function initializes the model coefficients with the values stored in
538  * the files defined by the environment variables PASTIX_MODELS_CPU and
539  * PASTIX_MODELS_GPU. If they are not defined, models are initialized with the
540  * embedded default models.
541  *
542  *******************************************************************************
543  *
544  * @param[inout] pastix_data
545  * The pastix_data structure in which to store the CPU and GPU models.
546  *
547  *******************************************************************************/
548 void
549 pastixModelsLoad( pastix_data_t *pastix_data )
550 {
551  char *filename = NULL;
552  int rc = 0;
553 
554  /*
555  * Get the model filename for the CPUs
556  */
557  pastix_data->cpu_models = pastixModelsNew();
558  filename = pastix_getenv( "PASTIX_MODELS_CPU" );
559 
560  if ( filename == NULL ) {
561  rc = modelsInitDefaultCPU( pastix_data->cpu_models );
562  }
563  else {
564  rc = modelsRead( pastix_data->cpu_models,
565  filename );
566  pastix_cleanenv( filename );
567  }
568  if ( rc == -1 ) {
569  pastixModelsFree( pastix_data->cpu_models );
570  pastix_data->cpu_models = NULL;
571  }
572 
573  /*
574  * Get the model filename for the GPUs
575  */
576  pastix_data->gpu_models = pastixModelsNew();
577  filename = pastix_getenv( "PASTIX_MODELS_GPU" );
578 
579  if ( filename == NULL ) {
580  rc = modelsInitDefaultGPU( pastix_data->gpu_models );
581  }
582  else {
583  rc = modelsRead( pastix_data->gpu_models,
584  filename );
585  pastix_cleanenv( filename );
586  }
587  if ( rc == -1 ) {
588  pastixModelsFree( pastix_data->gpu_models );
589  pastix_data->gpu_models = NULL;
590  }
591 }
modelsInitDefaultCPU
int modelsInitDefaultCPU(pastix_model_t *model)
Initialize the CPU model with default values.
Definition: models.c:337
pastixModelsFree
void pastixModelsFree(pastix_model_t *model)
Free a model data structure.
Definition: models.c:520
modelsInitDefaultGPU
int modelsInitDefaultGPU(pastix_model_t *model)
Initialize the GPU model with default values.
Definition: models.c:423
modelsPropagate
void modelsPropagate(pastix_model_t *model, int arithm, pastix_ktype_t kernelid)
Propagate a given model to all other similare cases to be sure everything is initialized.
Definition: models.c:100
modelsRead
int modelsRead(pastix_model_t *model, const char *modelfilename)
Initialize the given model with the file given in parameters.
Definition: models.c:219
pastixModelsNew
pastix_model_t * pastixModelsNew()
Create a new model data structure and initialize the values to their default.
Definition: models.c:490
modelsGetKernelId
int modelsGetKernelId(const char *kernelstr, int *nbcoef)
Convert a kernel string name found in a model file to its kernel Id.
Definition: models.c:41
pastix_fopen
FILE * pastix_fopen(const char *filename)
Open a file in the current directory in read only mode.
Definition: api.c:279
pastixModelsLoad
void pastixModelsLoad(pastix_data_t *pastix_data)
Load the performance models that will be used by the solver.
Definition: models.c:549