28 #include "sopalin/sopalin_data.h"
34 #include "kernels/kernels_trace.h"
36 #if defined(PASTIX_WITH_PARSEC)
40 #if defined(PASTIX_WITH_STARPU)
44 static void (*sopalinFacto[5][4])(pastix_data_t *, sopalin_data_t*) =
46 { sopalin_spotrf, sopalin_dpotrf, sopalin_cpotrf, sopalin_zpotrf },
47 { sopalin_ssytrf, sopalin_dsytrf, sopalin_csytrf, sopalin_zsytrf },
48 { sopalin_sgetrf, sopalin_dgetrf, sopalin_cgetrf, sopalin_zgetrf },
49 { sopalin_spotrf, sopalin_dpotrf, sopalin_cpxtrf, sopalin_zpxtrf },
50 { sopalin_ssytrf, sopalin_dsytrf, sopalin_chetrf, sopalin_zhetrf }
88 spmatrix_t *spmg = NULL;
94 if (pastix_data == NULL) {
95 pastix_print_error(
"pastix_subtask_spm2bcsc: wrong pastix_data parameter" );
99 pastix_print_error(
"pastix_subtask_spm2bcsc: wrong spm parameter" );
102 if ( !(pastix_data->steps & STEP_ANALYSE) ) {
103 pastix_print_error(
"pastix_subtask_spm2bcsc: All steps from pastix_task_init() to pastix_task_blend() have to be called before calling this function" );
111 pastix_data->dparm[
DPARM_A_NORM ] = spmNorm( SpmFrobeniusNorm, spm );
113 pastix_print( pastix_data->inter_node_procnum, 0,
128 if ( ( isSchedRuntime( isched ) ) &&
134 if ( isSchedRuntime( isched ) ) {
135 pastix_data->solvmatr = pastix_data->solvglob;
138 assert( isSchedPthread( isched ) );
139 pastix_data->solvmatr = pastix_data->solvloc;
141 pastix_data->sched = isched;
148 if ( pastix_data->bcsc != NULL )
151 memFree_null( pastix_data->bcsc );
156 if ( spm->loc2glob == NULL ) {
159 #if defined(PASTIX_WITH_MPI)
162 pastix_print(pastix_data->procnum, 0,
"bcscInit: the SPM has to be centralized for the moment\n");
164 spmg = malloc(
sizeof(spmatrix_t) );
165 spmGather( spm, -1, spmg );
170 pastix_data->ordemesh,
171 pastix_data->solvmatr,
178 memFree_null( spmg );
182 pastix_print( pastix_data->inter_node_procnum, 0, OUT_BCSC_TIME, time );
192 pastix_data->steps &= ~STEP_BCSC2CTAB;
193 pastix_data->steps |= STEP_CSC2BCSC;
236 if (pastix_data == NULL) {
237 pastix_print_error(
"pastix_subtask_bcsc2ctab: wrong pastix_data parameter" );
240 if ( !(pastix_data->steps & STEP_CSC2BCSC) ) {
241 pastix_print_error(
"pastix_subtask_bcsc2ctab: All steps from pastix_task_init() to pastix_stask_blend() have to be called before calling this function" );
244 if (pastix_data->bcsc == NULL) {
245 pastix_print_error(
"pastix_subtask_bcsc2ctab: wrong pastix_data->bcsc parameter" );
250 pastix_check_and_correct_scheduler( pastix_data );
252 clockSyncStart( timer, pastix_data->inter_node_comm );
255 lr = &(pastix_data->solvmatr->lowrank);
268 bcsc = pastix_data->bcsc;
299 #if defined(PASTIX_WITH_MPI)
302 ( pastix_data->procnbr > 1 ) )
304 pastix_print_error(
"pastix_task_sopalin: Low-Rank with MPI communication is not available yet with PaRSEC\n" );
313 if (pastix_data->bcsc != NULL)
337 #if defined(PASTIX_WITH_PARSEC)
341 if (pastix_data->parsec == NULL) {
347 pastix_size_of( bcsc->
flttype ), mtxtype,
348 pastix_data->inter_node_procnbr,
349 pastix_data->inter_node_procnum );
353 #if defined(PASTIX_WITH_STARPU)
359 pastix_data->inter_node_procnbr,
360 pastix_data->inter_node_procnum,
361 pastix_data->bcsc->flttype );
365 clockSyncStop( timer, pastix_data->inter_node_comm );
367 pastix_print( pastix_data->inter_node_procnum, 0, OUT_COEFTAB_TIME,
372 pastix_data->steps &= ~STEP_NUMFACT;
373 pastix_data->steps |= STEP_BCSC2CTAB;
408 sopalin_data_t sopalin_data;
409 SolverBackup_t *sbackup;
411 PASTIX_Comm pastix_comm;
418 if (pastix_data == NULL) {
419 pastix_print_error(
"pastix_subtask_sopalin: wrong pastix_data parameter" );
422 if ( !(pastix_data->steps & STEP_ANALYSE) ) {
423 pastix_print_error(
"pastix_subtask_sopalin: All steps from pastix_task_init() to pastix_task_analyze() have to be called before calling this function" );
426 if ( !(pastix_data->steps & STEP_CSC2BCSC) ) {
427 pastix_print_error(
"pastix_subtask_sopalin: All steps from pastix_task_init() to pastix_task_analyze() have to be called before calling this function" );
430 if ( !(pastix_data->steps & STEP_BCSC2CTAB) ) {
431 pastix_print_error(
"pastix_subtask_sopalin: All steps from pastix_task_init() to pastix_task_analyze() have to be called before calling this function" );
435 bcsc = pastix_data->bcsc;
437 pastix_print_error(
"pastix_subtask_sopalin: wrong pastix_data_bcsc parameter" );
442 pastix_check_and_correct_scheduler( pastix_data );
444 pastix_comm = pastix_data->inter_node_comm;
445 iparm = pastix_data->iparm;
446 dparm = pastix_data->dparm;
452 sopalin_data.solvmtx = pastix_data->solvmatr;
467 if ( (bcsc->
flttype == PastixFloat) || (bcsc->
flttype == PastixComplex32) ) {
468 eps = LAPACKE_slamch_work(
'e' );
471 eps = LAPACKE_dlamch_work(
'e' );
479 sopalin_data.solvmtx->diagthreshold = threshold;
480 sopalin_data.solvmtx->nbpivots = 0;
482 sopalin_data.cpu_coefs = &(pastix_data->cpu_models->coefficients[bcsc->
flttype-2]);
483 sopalin_data.gpu_coefs = &(pastix_data->gpu_models->coefficients[bcsc->
flttype-2]);
484 sopalin_data.cpu_models = pastix_data->cpu_models;
485 sopalin_data.gpu_models = pastix_data->gpu_models;
489 pastix_data->solvmatr->restore = 2;
491 void (*factofct)( pastix_data_t *, sopalin_data_t *);
492 double timer, timer_local, flops, flops_g = 0.;
498 clockSyncStart( timer, pastix_comm );
499 clockStart(timer_local);
501 factofct( pastix_data, &sopalin_data );
503 clockStop(timer_local);
504 clockSyncStop( timer, pastix_comm );
515 MPI_Reduce( &flops_l, &flops_g, 1, MPI_DOUBLE, MPI_SUM, 0, pastix_data->inter_node_comm );
520 int nbpivot_l = sopalin_data.solvmtx->nbpivots;
522 MPI_Allreduce( &nbpivot_l, &nbpivot_g, 1, MPI_INT, MPI_SUM, pastix_data->inter_node_comm );
527 pastix_print( pastix_data->inter_node_procnum, 0, OUT_SOPALIN_TIME,
529 pastix_print_value( flops ),
530 pastix_print_unit( flops ),
531 pastix_print_value( flops_g ),
532 pastix_print_unit( flops_g ),
536 #if defined(PASTIX_WITH_PARSEC) && defined(PASTIX_DEBUG_PARSEC)
539 fprintf(stderr,
"-- Check status of PaRSEC nbtasks counters\n" );
540 for (i=0; i<32; i++) {
541 if ( parsec_nbtasks_on_gpu[i] != 0 ) {
542 fprintf(stderr,
"Device %d => %d\n",
543 i, parsec_nbtasks_on_gpu[i] );
545 parsec_nbtasks_on_gpu[i] = 0;
553 #if defined(PASTIX_NUMFACT_DUMP_SOLVER)
558 stream =
pastix_fopenw( pastix_data->dir_local,
"solver.eps",
"w" );
562 pastix_data->dir_local );
573 #if defined(PASTIX_WITH_MPI)
574 MPI_Allreduce( MPI_IN_PLACE, pastix_data->dparm +
DPARM_MEM_FR, 1, MPI_DOUBLE, MPI_SUM, pastix_data->inter_node_comm );
575 MPI_Allreduce( MPI_IN_PLACE, pastix_data->dparm +
DPARM_MEM_LR, 1, MPI_DOUBLE, MPI_SUM, pastix_data->inter_node_comm );
580 pastix_data->steps &= ~( STEP_BCSC2CTAB |
583 pastix_data->steps |= STEP_NUMFACT;
627 pastix_int_t procnum;
634 if (pastix_data == NULL) {
635 pastix_print_error(
"pastix_task_sopalin: wrong pastix_data parameter" );
639 pastix_print_error(
"pastix_task_sopalin: wrong spm parameter" );
642 if ( !(pastix_data->steps & STEP_ANALYSE) ) {
643 pastix_print_error(
"pastix_task_sopalin: All steps from pastix_task_init() to pastix_task_blend() have to be called before calling this function" );
647 iparm = pastix_data->iparm;
648 procnum = pastix_data->inter_node_procnum;
651 pastix_print( procnum, 0, OUT_STEP_SOPALIN,
656 steps = ~(STEP_CSC2BCSC |
661 pastix_data->steps &= steps;