PaStiX Handbook 6.4.0
Loading...
Searching...
No Matches
models.c
Go to the documentation of this file.
1/**
2 *
3 * @file models.c
4 *
5 * PaStiX performance models routines
6 *
7 * @copyright 2004-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8 * Univ. Bordeaux. All rights reserved.
9 *
10 * @version 6.4.0
11 * @author Mathieu Faverge
12 * @date 2024-07-05
13 *
14 **/
15#include "common.h"
16#include "models.h"
17
18/**
19 *******************************************************************************
20 *
21 * @ingroup pastix_internal
22 *
23 * @brief Convert a kernel string name found in a model file to its kernel Id
24 *
25 *******************************************************************************
26 *
27 * @param[in] kernelstr
28 * The kernel string name
29 *
30 * @param[out] nbcoef
31 * The number of coefficient that this kernel will use. Set to 0 on
32 * failure.
33 *
34 *******************************************************************************
35 *
36 * @retval The kernel Id on success
37 * @retval -1 on failure
38 *
39 *******************************************************************************/
40int
41modelsGetKernelId( const char *kernelstr,
42 int *nbcoef )
43{
44 if(0 == strcasecmp("getrf", kernelstr)) { *nbcoef = 4; return PastixKernelGETRF; }
45 if(0 == strcasecmp("hetrf", kernelstr)) { *nbcoef = 4; return PastixKernelHETRF; }
46 if(0 == strcasecmp("potrf", kernelstr)) { *nbcoef = 4; return PastixKernelPOTRF; }
47 if(0 == strcasecmp("pxtrf", kernelstr)) { *nbcoef = 4; return PastixKernelPXTRF; }
48 if(0 == strcasecmp("sytrf", kernelstr)) { *nbcoef = 4; return PastixKernelSYTRF; }
49
50 if(0 == strcasecmp("trsmcblk1d", kernelstr)) { *nbcoef = 6; return PastixKernelTRSMCblk1d; }
51 if(0 == strcasecmp("trsmcblk2d", kernelstr)) { *nbcoef = 6; return PastixKernelTRSMCblk2d; }
52 if(0 == strcasecmp("trsmcblklr", kernelstr)) { *nbcoef = 6; return PastixKernelTRSMCblkLR; }
53
54 if(0 == strcasecmp("trsmblok2d", kernelstr)) { *nbcoef = 6; return PastixKernelTRSMBlok2d; }
55 if(0 == strcasecmp("trsmbloklr", kernelstr)) { *nbcoef = 6; return PastixKernelTRSMBlokLR; }
56
57 if(0 == strcasecmp("gemmcblk1d1d", kernelstr)) { *nbcoef = 8; return PastixKernelGEMMCblk1d1d; }
58 if(0 == strcasecmp("gemmcblk1d2d", kernelstr)) { *nbcoef = 8; return PastixKernelGEMMCblk1d2d; }
59 if(0 == strcasecmp("gemmcblk2d2d", kernelstr)) { *nbcoef = 8; return PastixKernelGEMMCblk2d2d; }
60 if(0 == strcasecmp("gemmcblkfrlr", kernelstr)) { *nbcoef = 8; return PastixKernelGEMMCblkFRLR; }
61 if(0 == strcasecmp("gemmcblklrlr", kernelstr)) { *nbcoef = 8; return PastixKernelGEMMCblkLRLR; }
62
63 if(0 == strcasecmp("gemmblok2d2d", kernelstr)) { *nbcoef = 8; return PastixKernelGEMMBlok2d2d; }
64 if(0 == strcasecmp("gemmbloklrlr", kernelstr)) { *nbcoef = 8; return PastixKernelGEMMBlokLRLR; }
65
66 *nbcoef = 0;
67 return -1;
68}
69
70/**
71 *******************************************************************************
72 *
73 * @ingroup pastix_internal
74 *
75 * @brief Propagate a given model to all other similare cases to be sure
76 * everything is initialized.
77 *
78 * The given model coefficients defined by the couple (arithm, kernelid) is
79 * first extended to all the kernels of the same family in the same arithmetic,
80 * and it is then propagated to the other arithmetic by applying a computation
81 * ratio on the coefficients.
82 * - Single real costs 1
83 * - Double real costs 2
84 * - Single complex costs 3
85 * - Double complex costs 4
86 *
87 *******************************************************************************
88 *
89 * @param[inout] model
90 * The pointer to the allocated model to complete.
91 *
92 * @param[in] arithm
93 * The arithmetic of the initial coefficients to replicate.
94 *
95 * @param[in] kernelid
96 * The kernel Id of the initial coefficients to replicate.
97 *
98 *******************************************************************************/
99void
100modelsPropagate( pastix_model_t *model,
101 int arithm, pastix_ktype_t kernelid )
102{
103 double *coefs0 = model->coefficients[arithm][kernelid];
104 double ratio;
105 int a, k;
106 int kstart = 0;
107 int kend = -1;
108
109 /* Look for loaded information about factorization kernels */
110 if ( kernelid < PastixKernelSCALOCblk ) {
111 for( k=PastixKernelGETRF; k<=PastixKernelSYTRF; k++) {
112 if ( (k == (int)kernelid) || (model->coefficients[arithm][k][0] != 0xdeadbeef) ) {
113 continue;
114 }
115
116 ratio = (( k == (int)PastixKernelGETRF ) ? 2. : 1. ) / (( kernelid == PastixKernelGETRF ) ? 2. : 1. );
117
118 model->coefficients[arithm][k][0] = coefs0[0];
119 model->coefficients[arithm][k][1] = coefs0[1];
120 model->coefficients[arithm][k][2] = ratio * coefs0[2];
121 model->coefficients[arithm][k][3] = ratio * coefs0[3];
122 }
123
124 for( a=0; a<4; a++) {
125 if (a == arithm) {
126 continue;
127 }
128 ratio = (0.5 * a + 0.5) / (0.5 * arithm + 0.5);
129
130 for( k=PastixKernelGETRF; k<=PastixKernelSYTRF; k++) {
131 if ( model->coefficients[a][k][0] != 0xdeadbeef ) {
132 continue;
133 }
134
135 model->coefficients[a][k][0] = ratio * model->coefficients[arithm][k][0];
136 model->coefficients[a][k][1] = ratio * model->coefficients[arithm][k][1];
137 model->coefficients[a][k][2] = ratio * model->coefficients[arithm][k][2];
138 model->coefficients[a][k][3] = ratio * model->coefficients[arithm][k][3];
139 }
140 }
141 }
142 else if ( kernelid < PastixKernelTRSMCblk1d ) {
143 }
144 else if ( kernelid < PastixKernelGEMMCblk1d1d ) {
145 kstart = PastixKernelTRSMCblk1d;
147 }
148 else {
151 }
152
153 /*
154 * Propagate to other kernels of the same arithmetic
155 */
156 for( k=kstart; k<=kend; k++) {
157 if ( (k == (int)kernelid) || (model->coefficients[arithm][k][0] != 0xdeadbeef) ) {
158 continue;
159 }
160
161 model->coefficients[arithm][k][0] = coefs0[0];
162 model->coefficients[arithm][k][1] = coefs0[1];
163 model->coefficients[arithm][k][2] = coefs0[2];
164 model->coefficients[arithm][k][3] = coefs0[3];
165 model->coefficients[arithm][k][4] = coefs0[4];
166 model->coefficients[arithm][k][5] = coefs0[5];
167 model->coefficients[arithm][k][6] = coefs0[6];
168 model->coefficients[arithm][k][7] = coefs0[7];
169 }
170
171 /*
172 * Propagate to other arithmetics
173 */
174 for( a=0; a<4; a++) {
175 if (a == arithm) {
176 continue;
177 }
178 ratio = (0.5 * a + 0.5) / (0.5 * arithm + 0.5);
179
180 for( k=kstart; k<=kend; k++) {
181 if ( model->coefficients[a][k][0] != 0xdeadbeef ) {
182 continue;
183 }
184
185 model->coefficients[a][k][0] = ratio * model->coefficients[arithm][k][0];
186 model->coefficients[a][k][1] = ratio * model->coefficients[arithm][k][1];
187 model->coefficients[a][k][2] = ratio * model->coefficients[arithm][k][2];
188 model->coefficients[a][k][3] = ratio * model->coefficients[arithm][k][3];
189 model->coefficients[a][k][4] = ratio * model->coefficients[arithm][k][4];
190 model->coefficients[a][k][5] = ratio * model->coefficients[arithm][k][5];
191 model->coefficients[a][k][6] = ratio * model->coefficients[arithm][k][6];
192 model->coefficients[a][k][7] = ratio * model->coefficients[arithm][k][7];
193 }
194 }
195}
196
197/**
198 *******************************************************************************
199 *
200 * @ingroup pastix_internal
201 *
202 * @brief Initialize the given model with the file given in parameters.
203 *
204 *******************************************************************************
205 *
206 * @param[inout] model
207 * The pointer to the allocated model to initialize.
208 *
209 * @param[in] modelfilename
210 * The name of the file in which the coefficient values are stored.
211 *
212 *******************************************************************************
213 *
214 * @return 0 on success.
215 * @return -1 on failure.
216 *
217 *******************************************************************************/
218int
219modelsRead( pastix_model_t *model,
220 const char *modelfilename )
221{
222 FILE *f = pastix_fopen( modelfilename );
223 char *str, *strcoef;
224 char kernelstr[13];
225 int rc, arithm, nbcoef;
226 size_t strsize = 256;
227 pastix_ktype_t kernelid;
228 double *coefs;
229
230 if ( f == NULL ) {
231 fprintf(stderr, "Can't open model file\n");
232 return -1;
233 }
234
235 str = malloc( strsize * sizeof(char) );
236 do {
237 rc = getline( &str, &strsize, f );
238 if ( rc == -1 ) {
239 perror( "modelsRead(getline header)" );
240 return -1;
241 }
242 }
243 while( str[0] == '#' );
244
245 /* Read the model name */
246 model->name = strdup( str );
247
248 /* Read the model values */
249 while( getline( &str, &strsize, f ) != -1 ) {
250
251 /* Skip commented lines */
252 if ( str[0] == '#' ) {
253 continue;
254 }
255
256 /* Read the arithmetic, and the kernel name */
257 if ( sscanf( str, "%d;%12[a-z0-9];", &arithm, kernelstr ) != 2 ) {
258 fprintf(stderr, "modelRead: %s - Error reading line (%s)\n", model->name, str );
259 continue;
260 }
261
262 if ( (arithm < 0) || (arithm > 3) ) {
263 fprintf(stderr, "modelRead: %s - Incorrect arithmetic %d in line:\n\t%s\n",
264 model->name, arithm, str );
265 continue;
266 }
267
268 kernelid = modelsGetKernelId( kernelstr, &nbcoef );
269 if ( (int)kernelid == -1 ) {
270 fprintf(stderr, "modelRead: %s - Incorrect kernel type %s in line:\n\t%s\n",
271 model->name, kernelstr, str );
272 continue;
273 }
274
275 /* Read the corrrect number of coefficients and store them */
276 coefs = model->coefficients[arithm][kernelid];
277 strcoef = str + 3 + strlen( kernelstr );
278
279 switch ( nbcoef ) {
280 case 4:
281 if ( sscanf( strcoef, "%le;%le;%le;%le",
282 coefs, coefs+1, coefs+2, coefs+3 ) != 4 )
283 {
284 fprintf(stderr, "modelRead: %s - Pb reading the 4 coefficients in line:\n\t%s\n", model->name, str );
285 continue;
286 }
287 break;
288 case 6:
289 if ( sscanf( strcoef, "%le;%le;%le;%le;%le;%le",
290 coefs, coefs+1, coefs+2,
291 coefs+3, coefs+4, coefs+5 ) != 6 )
292 {
293 fprintf(stderr, "modelRead: %s - Pb reading the 6 coefficients in line:\n\t%s\n", model->name, str );
294 continue;
295 }
296 break;
297 case 8:
298 if ( sscanf( strcoef, "%le;%le;%le;%le;%le;%le;%le;%le",
299 coefs, coefs+1, coefs+2, coefs+3,
300 coefs+4, coefs+5, coefs+6, coefs+7 ) != 8 )
301 {
302 fprintf(stderr, "modelRead: %s - Pb reading the 8 coefficients in line:\n\t%s\n", model->name, str );
303 continue;
304 }
305 break;
306 default:
307 ;
308 }
309
310 modelsPropagate( model, arithm, kernelid );
311 }
312
313 fclose(f);
314 free(str);
315
316 return 0;
317}
318
319/**
320 *******************************************************************************
321 *
322 * @ingroup pastix_internal
323 *
324 * @brief Initialize the CPU model with default values.
325 *
326 *******************************************************************************
327 *
328 * @param[inout] model
329 * The pointer to the allocated model to initialize.
330 *
331 *******************************************************************************
332 *
333 * @return 0 on success.
334 *
335 *******************************************************************************/
336int
337modelsInitDefaultCPU( pastix_model_t *model )
338{
339 int a = 1; /* Real double */
340 int ktype;
341 double *coefs;
342
343 assert( model != NULL );
344
345 /*
346 * All coefficiensts given are for double real arithmetic
347 */
348 model->name = strdup("AMD Opteron 6180 - Intel MKL");
349
350 /* POTRF */
351 ktype = PastixKernelPOTRF;
352 coefs = &(model->coefficients[a][ktype][0]);
353 coefs[0] = 4.071507e-07;
354 coefs[1] = -1.469893e-07;
355 coefs[2] = 1.707006e-08;
356 coefs[3] = 2.439599e-11;
357 modelsPropagate( model, a, ktype );
358
359 /* TRSM Cblk */
361 coefs = &(model->coefficients[a][ktype][0]);
362 coefs[0] = 3.255168e-06;
363 coefs[1] = 3.976198e-08;
364 coefs[2] = 0.;
365 coefs[3] = 0.;
366 coefs[4] = 0.;
367 coefs[5] = 2.626177e-10;
368 modelsPropagate( model, a, ktype );
369
370 /* TRSM Blok */
371 /*
372 * We don't have a TRSM blok model for this old architecture, so we use the
373 * TRSM Cblk
374 */
375
376 /* GEMM Cblk */
378 coefs = &(model->coefficients[a][ktype][0]);
379 coefs[0] = 1.216278e-06;
380 coefs[1] = 0.;
381 coefs[2] = -2.704179e-10;
382 coefs[3] = 1.148989e-07;
383 coefs[4] = 2.724804e-10;
384 coefs[5] = 1.328900e-09;
385 coefs[6] = 0.;
386 coefs[7] = 2.429169e-10;
387 modelsPropagate( model, a, ktype );
388
389 /* GEMM Blok */
391 coefs = &(model->coefficients[a][ktype][0]);
392 coefs[0] = 0.0;
393 coefs[1] = 0.0;
394 coefs[2] = 0.0;
395 coefs[3] = 0.0;
396 coefs[4] = 0.0;
397 coefs[5] = 0.0;
398 coefs[6] = 0.0;
399 coefs[7] = 2. / 24.e9;
400 modelsPropagate( model, a, ktype );
401
402 return 0;
403}
404
405/**
406 *******************************************************************************
407 *
408 * @ingroup pastix_internal
409 *
410 * @brief Initialize the GPU model with default values.
411 *
412 *******************************************************************************
413 *
414 * @param[inout] model
415 * The pointer to the allocated model to initialize.
416 *
417 *******************************************************************************
418 *
419 * @return 0 on success.
420 *
421 *******************************************************************************/
422int
423modelsInitDefaultGPU( pastix_model_t *model )
424{
425 int a = 1; /* Real double */
426 int ktype;
427 double *coefs;
428
429 assert( model != NULL );
430
431 /*
432 * All coefficiensts given are for double real arithmetic
433 */
434 model->name = strdup("Nvidia K40 GK1108L - CUDA 8.0");
435
436 /* TRSM Blok */
438 coefs = &(model->coefficients[a][ktype][0]);
439 coefs[0] = -3.16663635648446e-05;
440 coefs[1] = 2.63809317549331e-06;
441 coefs[2] = 5.86447245256688e-07;
442 coefs[3] = -1.57859559108480e-09;
443 coefs[4] = -4.74303242824929e-09;
444 coefs[5] = 5.36284121953867e-12;
445 modelsPropagate( model, a, ktype );
446
447 /* GEMM Cblk */
449 coefs = &(model->coefficients[a][ktype][0]);
450 coefs[0] = 1.216278e-06;
451 coefs[1] = 0.;
452 coefs[2] = -2.704179e-10;
453 coefs[3] = 1.148989e-07;
454 coefs[4] = 2.724804e-10;
455 coefs[5] = 1.328900e-09;
456 coefs[6] = 0.;
457 coefs[7] = 2.429169e-10;
458 modelsPropagate( model, a, ktype );
459
460 /* GEMM Blok */
462 coefs = &(model->coefficients[a][ktype][0]);
463 coefs[0] = 0.0;
464 coefs[1] = 0.0;
465 coefs[2] = 0.0;
466 coefs[3] = 0.0;
467 coefs[4] = 0.0;
468 coefs[5] = 0.0;
469 coefs[6] = 0.0;
470 coefs[7] = 2. / 1.2e12;
471 modelsPropagate( model, a, ktype );
472
473 return 0;
474}
475
476/**
477 *******************************************************************************
478 *
479 * @ingroup pastix_api
480 *
481 * @brief Create a new model data structure and initialize the values to their
482 * default.
483 *
484 *******************************************************************************
485 *
486 * @return The pointer to the allocated and initialized data structure.
487 *
488 *******************************************************************************/
489pastix_model_t *
491{
492 pastix_model_t *model = malloc(sizeof(pastix_model_t));
493
494 int a, k;
495
496 memset( model, 0, sizeof( pastix_model_t ) );
497
498 for(a=0; a<4; a++) {
499 for(k=0; k<PastixKernelLvl1Nbr; k++) {
500 model->coefficients[a][k][0] = 0xdeadbeef;
501 }
502 }
503 return model;
504}
505
506/**
507 *******************************************************************************
508 *
509 * @ingroup pastix_api
510 *
511 * @brief Free a model data structure.
512 *
513 *******************************************************************************
514 *
515 * @param[inout] model
516 * The model structure to free.
517 *
518 *******************************************************************************/
519void
520pastixModelsFree( pastix_model_t *model )
521{
522 if ( model != NULL ) {
523 if ( model->name != NULL ) {
524 free(model->name);
525 }
526 free(model);
527 }
528}
529
530/**
531 *******************************************************************************
532 *
533 * @ingroup pastix_api
534 *
535 * @brief Load the performance models that will be used by the solver
536 *
537 * This function initializes the model coefficients with the values stored in
538 * the files defined by the environment variables PASTIX_MODELS_CPU and
539 * PASTIX_MODELS_GPU. If they are not defined, models are initialized with the
540 * embedded default models.
541 *
542 *******************************************************************************
543 *
544 * @param[inout] pastix_data
545 * The pastix_data structure in which to store the CPU and GPU models.
546 *
547 *******************************************************************************/
548void
550{
551 char *filename = NULL;
552 int rc = 0;
553
554 /*
555 * Get the model filename for the CPUs
556 */
557 pastix_data->cpu_models = pastixModelsNew();
558 filename = pastix_getenv( "PASTIX_MODELS_CPU" );
559
560 if ( filename == NULL ) {
561 rc = modelsInitDefaultCPU( pastix_data->cpu_models );
562 }
563 else {
564 rc = modelsRead( pastix_data->cpu_models,
565 filename );
566 pastix_cleanenv( filename );
567 }
568 if ( rc == -1 ) {
569 pastixModelsFree( pastix_data->cpu_models );
570 pastix_data->cpu_models = NULL;
571 }
572
573 /*
574 * Get the model filename for the GPUs
575 */
576 pastix_data->gpu_models = pastixModelsNew();
577 filename = pastix_getenv( "PASTIX_MODELS_GPU" );
578
579 if ( filename == NULL ) {
580 rc = modelsInitDefaultGPU( pastix_data->gpu_models );
581 }
582 else {
583 rc = modelsRead( pastix_data->gpu_models,
584 filename );
585 pastix_cleanenv( filename );
586 }
587 if ( rc == -1 ) {
588 pastixModelsFree( pastix_data->gpu_models );
589 pastix_data->gpu_models = NULL;
590 }
591}
enum pastix_ktype_e pastix_ktype_t
List of the Level 1 events that may be traced in PaStiX.
@ PastixKernelSCALOCblk
@ PastixKernelGEMMCblkFRLR
@ PastixKernelTRSMBlokLR
@ PastixKernelTRSMCblk2d
@ PastixKernelSYTRF
@ PastixKernelGEMMBlokLRLR
@ PastixKernelGEMMCblk1d2d
@ PastixKernelPOTRF
@ PastixKernelTRSMCblk1d
@ PastixKernelHETRF
@ PastixKernelPXTRF
@ PastixKernelGETRF
@ PastixKernelGEMMCblkLRLR
@ PastixKernelTRSMBlok2d
@ PastixKernelGEMMCblk1d1d
@ PastixKernelTRSMCblkLR
@ PastixKernelGEMMCblk2d2d
@ PastixKernelGEMMBlok2d2d
pastix_model_t * pastixModelsNew(void)
Create a new model data structure and initialize the values to their default.
Definition models.c:490
void pastixModelsFree(pastix_model_t *model)
Free a model data structure.
Definition models.c:520
void pastixModelsLoad(pastix_data_t *pastix_data)
Load the performance models that will be used by the solver.
Definition models.c:549
FILE * pastix_fopen(const char *filename)
Open a file in the current directory in read only mode.
Definition api.c:298
pastix_model_t * cpu_models
Definition pastixdata.h:107
pastix_model_t * gpu_models
Definition pastixdata.h:108
Main PaStiX data structure.
Definition pastixdata.h:68
int modelsInitDefaultCPU(pastix_model_t *model)
Initialize the CPU model with default values.
Definition models.c:337
int modelsRead(pastix_model_t *model, const char *modelfilename)
Initialize the given model with the file given in parameters.
Definition models.c:219
void modelsPropagate(pastix_model_t *model, int arithm, pastix_ktype_t kernelid)
Propagate a given model to all other similare cases to be sure everything is initialized.
Definition models.c:100
int modelsInitDefaultGPU(pastix_model_t *model)
Initialize the GPU model with default values.
Definition models.c:423
int modelsGetKernelId(const char *kernelstr, int *nbcoef)
Convert a kernel string name found in a model file to its kernel Id.
Definition models.c:41