44 if(0 == strcasecmp(
"getrf", kernelstr)) { *nbcoef = 4;
return PastixKernelGETRF; }
45 if(0 == strcasecmp(
"hetrf", kernelstr)) { *nbcoef = 4;
return PastixKernelHETRF; }
46 if(0 == strcasecmp(
"potrf", kernelstr)) { *nbcoef = 4;
return PastixKernelPOTRF; }
47 if(0 == strcasecmp(
"pxtrf", kernelstr)) { *nbcoef = 4;
return PastixKernelPXTRF; }
48 if(0 == strcasecmp(
"sytrf", kernelstr)) { *nbcoef = 4;
return PastixKernelSYTRF; }
103 double *coefs0 = model->coefficients[arithm][kernelid];
112 if ( (k == (
int)kernelid) || (model->coefficients[arithm][k][0] != 0xdeadbeef) ) {
118 model->coefficients[arithm][k][0] = coefs0[0];
119 model->coefficients[arithm][k][1] = coefs0[1];
120 model->coefficients[arithm][k][2] = ratio * coefs0[2];
121 model->coefficients[arithm][k][3] = ratio * coefs0[3];
124 for( a=0; a<4; a++) {
128 ratio = (0.5 * a + 0.5) / (0.5 * arithm + 0.5);
131 if ( model->coefficients[a][k][0] != 0xdeadbeef ) {
135 model->coefficients[a][k][0] = ratio * model->coefficients[arithm][k][0];
136 model->coefficients[a][k][1] = ratio * model->coefficients[arithm][k][1];
137 model->coefficients[a][k][2] = ratio * model->coefficients[arithm][k][2];
138 model->coefficients[a][k][3] = ratio * model->coefficients[arithm][k][3];
156 for( k=kstart; k<=kend; k++) {
157 if ( (k == (
int)kernelid) || (model->coefficients[arithm][k][0] != 0xdeadbeef) ) {
161 model->coefficients[arithm][k][0] = coefs0[0];
162 model->coefficients[arithm][k][1] = coefs0[1];
163 model->coefficients[arithm][k][2] = coefs0[2];
164 model->coefficients[arithm][k][3] = coefs0[3];
165 model->coefficients[arithm][k][4] = coefs0[4];
166 model->coefficients[arithm][k][5] = coefs0[5];
167 model->coefficients[arithm][k][6] = coefs0[6];
168 model->coefficients[arithm][k][7] = coefs0[7];
174 for( a=0; a<4; a++) {
178 ratio = (0.5 * a + 0.5) / (0.5 * arithm + 0.5);
180 for( k=kstart; k<=kend; k++) {
181 if ( model->coefficients[a][k][0] != 0xdeadbeef ) {
185 model->coefficients[a][k][0] = ratio * model->coefficients[arithm][k][0];
186 model->coefficients[a][k][1] = ratio * model->coefficients[arithm][k][1];
187 model->coefficients[a][k][2] = ratio * model->coefficients[arithm][k][2];
188 model->coefficients[a][k][3] = ratio * model->coefficients[arithm][k][3];
189 model->coefficients[a][k][4] = ratio * model->coefficients[arithm][k][4];
190 model->coefficients[a][k][5] = ratio * model->coefficients[arithm][k][5];
191 model->coefficients[a][k][6] = ratio * model->coefficients[arithm][k][6];
192 model->coefficients[a][k][7] = ratio * model->coefficients[arithm][k][7];
220 const char *modelfilename )
225 int rc, arithm, nbcoef;
226 size_t strsize = 256;
231 fprintf(stderr,
"Can't open model file\n");
235 str = malloc( strsize *
sizeof(
char) );
237 rc = getline( &str, &strsize, f );
239 perror(
"modelsRead(getline header)" );
243 while( str[0] ==
'#' );
246 model->name = strdup( str );
249 while( getline( &str, &strsize, f ) != -1 ) {
252 if ( str[0] ==
'#' ) {
257 if ( sscanf( str,
"%d;%12[a-z0-9];", &arithm, kernelstr ) != 2 ) {
258 fprintf(stderr,
"modelRead: %s - Error reading line (%s)\n", model->name, str );
262 if ( (arithm < 0) || (arithm > 3) ) {
263 fprintf(stderr,
"modelRead: %s - Incorrect arithmetic %d in line:\n\t%s\n",
264 model->name, arithm, str );
269 if ( (
int)kernelid == -1 ) {
270 fprintf(stderr,
"modelRead: %s - Incorrect kernel type %s in line:\n\t%s\n",
271 model->name, kernelstr, str );
276 coefs = model->coefficients[arithm][kernelid];
277 strcoef = str + 3 + strlen( kernelstr );
281 if ( sscanf( strcoef,
"%le;%le;%le;%le",
282 coefs, coefs+1, coefs+2, coefs+3 ) != 4 )
284 fprintf(stderr,
"modelRead: %s - Pb reading the 4 coefficients in line:\n\t%s\n", model->name, str );
289 if ( sscanf( strcoef,
"%le;%le;%le;%le;%le;%le",
290 coefs, coefs+1, coefs+2,
291 coefs+3, coefs+4, coefs+5 ) != 6 )
293 fprintf(stderr,
"modelRead: %s - Pb reading the 6 coefficients in line:\n\t%s\n", model->name, str );
298 if ( sscanf( strcoef,
"%le;%le;%le;%le;%le;%le;%le;%le",
299 coefs, coefs+1, coefs+2, coefs+3,
300 coefs+4, coefs+5, coefs+6, coefs+7 ) != 8 )
302 fprintf(stderr,
"modelRead: %s - Pb reading the 8 coefficients in line:\n\t%s\n", model->name, str );
343 assert( model != NULL );
348 model->name = strdup(
"AMD Opteron 6180 - Intel MKL");
352 coefs = &(model->coefficients[a][ktype][0]);
353 coefs[0] = 4.071507e-07;
354 coefs[1] = -1.469893e-07;
355 coefs[2] = 1.707006e-08;
356 coefs[3] = 2.439599e-11;
361 coefs = &(model->coefficients[a][ktype][0]);
362 coefs[0] = 3.255168e-06;
363 coefs[1] = 3.976198e-08;
367 coefs[5] = 2.626177e-10;
378 coefs = &(model->coefficients[a][ktype][0]);
379 coefs[0] = 1.216278e-06;
381 coefs[2] = -2.704179e-10;
382 coefs[3] = 1.148989e-07;
383 coefs[4] = 2.724804e-10;
384 coefs[5] = 1.328900e-09;
386 coefs[7] = 2.429169e-10;
391 coefs = &(model->coefficients[a][ktype][0]);
399 coefs[7] = 2. / 24.e9;
429 assert( model != NULL );
434 model->name = strdup(
"Nvidia K40 GK1108L - CUDA 8.0");
438 coefs = &(model->coefficients[a][ktype][0]);
439 coefs[0] = -3.16663635648446e-05;
440 coefs[1] = 2.63809317549331e-06;
441 coefs[2] = 5.86447245256688e-07;
442 coefs[3] = -1.57859559108480e-09;
443 coefs[4] = -4.74303242824929e-09;
444 coefs[5] = 5.36284121953867e-12;
449 coefs = &(model->coefficients[a][ktype][0]);
450 coefs[0] = 1.216278e-06;
452 coefs[2] = -2.704179e-10;
453 coefs[3] = 1.148989e-07;
454 coefs[4] = 2.724804e-10;
455 coefs[5] = 1.328900e-09;
457 coefs[7] = 2.429169e-10;
462 coefs = &(model->coefficients[a][ktype][0]);
470 coefs[7] = 2. / 1.2e12;
492 pastix_model_t *model = malloc(
sizeof(pastix_model_t));
496 memset( model, 0,
sizeof( pastix_model_t ) );
499 for(k=0; k<PastixKernelLvl1Nbr; k++) {
500 model->coefficients[a][k][0] = 0xdeadbeef;
522 if ( model != NULL ) {
523 if ( model->name != NULL ) {
551 char *filename = NULL;
558 filename = pastix_getenv(
"PASTIX_MODELS_CPU" );
560 if ( filename == NULL ) {
566 pastix_cleanenv( filename );
577 filename = pastix_getenv(
"PASTIX_MODELS_GPU" );
579 if ( filename == NULL ) {
585 pastix_cleanenv( filename );
enum pastix_ktype_e pastix_ktype_t
List of the Level 1 events that may be traced in PaStiX.
@ PastixKernelGEMMCblkFRLR
@ PastixKernelGEMMBlokLRLR
@ PastixKernelGEMMCblk1d2d
@ PastixKernelGEMMCblkLRLR
@ PastixKernelGEMMCblk1d1d
@ PastixKernelGEMMCblk2d2d
@ PastixKernelGEMMBlok2d2d
void pastixModelsFree(pastix_model_t *model)
Free a model data structure.
void pastixModelsLoad(pastix_data_t *pastix_data)
Load the performance models that will be used by the solver.
pastix_model_t * pastixModelsNew()
Create a new model data structure and initialize the values to their default.
FILE * pastix_fopen(const char *filename)
Open a file in the current directory in read only mode.
pastix_model_t * cpu_models
pastix_model_t * gpu_models
Main PaStiX data structure.
int modelsInitDefaultCPU(pastix_model_t *model)
Initialize the CPU model with default values.
int modelsRead(pastix_model_t *model, const char *modelfilename)
Initialize the given model with the file given in parameters.
void modelsPropagate(pastix_model_t *model, int arithm, pastix_ktype_t kernelid)
Propagate a given model to all other similare cases to be sure everything is initialized.
int modelsInitDefaultGPU(pastix_model_t *model)
Initialize the GPU model with default values.
int modelsGetKernelId(const char *kernelstr, int *nbcoef)
Convert a kernel string name found in a model file to its kernel Id.