25 #include "sopalin/sopalin_data.h"
64 if ( cblk->
cblktype & CBLK_TASKS_2D ) {
65 for ( blokA = blokB; blokA < lblk; blokA++ ) {
66 if ( blokA == blokB ) {
68 cblk, fcblk, blokB, blokB, prio );
71 while ( ( blokA < lblk ) &&
72 ( blokA[0].fcblknm == blokA[1].fcblknm ) &&
73 ( blokA[0].lcblknm == blokA[1].lcblknm ) )
80 cblk, fcblk, blokA, blokB, prio );
83 cblk, fcblk, blokA, blokB, prio );
86 while ( ( blokA < lblk ) &&
87 ( blokA[0].fcblknm == blokA[1].fcblknm ) &&
88 ( blokA[0].lcblknm == blokA[1].lcblknm ) )
97 cblk, blokB, fcblk, prio );
100 if ( blokB+1 < lblk ) {
102 cblk, blokB, fcblk, prio );
132 if ( cblk->
cblktype & CBLK_TASKS_2D ) {
136 for ( blok = cblk->
fblokptr + 1, m = 0; blok < lblk; blok++, m++ ) {
147 while ( ( blok < lblk ) &&
148 ( blok[0].fcblknm == blok[1].fcblknm ) &&
149 ( blok[0].lcblknm == blok[1].lcblknm ) )
193 for (k=0; k<solvmtx->
cblknbr; k++, cblk++){
195 if ( cblk->
cblktype & CBLK_IN_SCHUR ) {
206 for(m=0; blok < lblk; blok++, m++ )
209 cblk_n = fcblk - solvmtx->
cblktab;
214 cblknbr - pastix_imin( k + m, cblk_n ) );
217 if ( blok+1 < lblk ) {
220 cblknbr - pastix_imin( k + m, cblk_n ) );
261 for (k=0; k<solvmtx->
cblknbr; k++, cblk++){
263 for ( m = cblk[0].brownum; m < cblk[1].
brownum; m++ ) {
267 if ( lcblk->
cblktype & CBLK_IN_SCHUR ) {
272 cblk_n = fcblk - solvmtx->
cblktab;
274 assert( fcblk == cblk );
279 cblknbr - pastix_imin( k + m, cblk_n ) );
284 if ( blok+1 < lblk ) {
287 cblknbr - pastix_imin( k + m, cblk_n ) );
291 if ( cblk->
cblktype & CBLK_IN_SCHUR ) {
300 for ( k = 0; k < solvmtx->
cblknbr; k++, cblk++ ) {
341 for ( k = 0; k <= solvmtx->
cblkmax1d; k++, cblk++ ) {
343 if ( cblk->
cblktype & CBLK_IN_SCHUR ) {
347 if ( cblk->
cblktype & CBLK_TASKS_2D ) {
357 for(m=0; blok < lblk; blok++, m++ )
360 cblk_n = fcblk - solvmtx->
cblktab;
363 cblknbr - pastix_imin( k + m, cblk_n ) );
370 for ( k = solvmtx->
cblkmin2d; k < solvmtx->cblknbr; k++, cblk++ ) {
372 if ( cblk->
cblktype & CBLK_IN_SCHUR ) {
376 if ( ! ( cblk->
cblktype & CBLK_TASKS_2D ) ) {
384 for ( blok = cblk->
fblokptr + 1, m = 0; blok < lblk; blok++, m++ ) {
389 cblknbr - pastix_imin( k + m, cblk_n ) );
392 while( ( blok < lblk ) &&
394 ( blok[0].lcblknm == blok[1].lcblknm ) )
440 for ( k = 0; k < cblknbr; k++, fcblk++ ) {
442 for ( m = fcblk[0].brownum; m < fcblk[1].
brownum; m++ ) {
447 if ( cblk->
cblktype & CBLK_IN_SCHUR ) {
451 if( ( cblk->
cblktype & CBLK_TASKS_2D ) &&
452 ( blok_prev != NULL ) &&
463 if ( fcblk->
cblktype & CBLK_IN_SCHUR ) {
471 for ( k = 0; k < solvmtx->
cblknbr; k++, cblk++ ) {
506 sopalin_data_t *sopalin_data )
515 if (pastix_data->
starpu == NULL) {
520 if ( sdesc == NULL ) {
527 sdesc = sopalin_data->
solvmtx->starpu_desc;
530 starpu_profiling_status_set(STARPU_PROFILING_ENABLE);
531 #if defined(STARPU_USE_FXT)
533 starpu_fxt_start_profiling();
536 #if defined(PASTIX_STARPU_STATS)
564 #if defined(PASTIX_STARPU_STATS)
569 starpu_task_wait_for_all();
570 #if defined(PASTIX_WITH_MPI)
571 starpu_mpi_wait_for_all( pastix_data->
pastix_comm );
575 #if defined(STARPU_USE_FXT)
577 starpu_fxt_stop_profiling();
580 starpu_profiling_status_set(STARPU_PROFILING_DISABLE);
581 #if defined(PASTIX_STARPU_STATS)
583 print_stats( sub, com, pastix_data->
solvmatr );
BEGIN_C_DECLS typedef int pastix_int_t
void starpu_sparse_matrix_getoncpu(starpu_sparse_matrix_desc_t *desc)
Submit asynchronous calls to retrieve the data on main memory.
void starpu_sparse_matrix_init(SolverMatrix *solvmtx, pastix_mtxtype_t mtxtype, int nodes, int myrank, pastix_coeftype_t flttype)
Generate the StarPU descriptor of the sparse matrix.
void pastix_starpu_init(pastix_data_t *pastix, int *argc, char **argv[], const int *bindtab)
Startup the StarPU runtime system.
void starpu_task_blok_sgemmsp(sopalin_data_t *sopalin_data, pastix_coefside_t sideA, pastix_coefside_t sideB, pastix_trans_t trans, SolverCblk *cblk, SolverCblk *fcblk, const SolverBlok *blokA, const SolverBlok *blokB, int prio)
StarPU GPU implementation.
void starpu_task_cblk_sgetrfsp(sopalin_data_t *sopalin_data, SolverCblk *cblk, int prio)
TODO.
void starpu_task_blok_strsmsp(sopalin_data_t *sopalin_data, pastix_coefside_t coef, pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, SolverBlok *blok, int prio)
StarPU GPU implementation.
void starpu_sparse_cblk_wont_use(pastix_coefside_t coef, SolverCblk *cblk)
Submit asynchronous calls to retrieve the data on main memory.
void starpu_task_cblk_sgemmsp(sopalin_data_t *sopalin_data, pastix_coefside_t sideA, pastix_coefside_t sideB, pastix_trans_t trans, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, int prio)
StarPU GPU implementation.
void starpu_task_blok_sgetrf(sopalin_data_t *sopalin_data, SolverCblk *cblk, int prio)
TODO.
StarPU descriptor stucture for the sparse matrix.
PASTIX_Comm inter_node_comm
Main PaStiX data structure.
void starpu_sgetrf_sp2d_ll(sopalin_data_t *sopalin_data, starpu_sparse_matrix_desc_t *desc)
Perform a sparse LU factorization with 1D and 2D kernels.
void starpu_sgetrf_sp1dplus_rl(sopalin_data_t *sopalin_data, starpu_sparse_matrix_desc_t *desc)
Perform a sparse LU factorization with 1D kernels.
void starpu_sgetrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
Perform a sparse LU factorization using StarPU runtime.
void starpu_task_sgetrfsp(sopalin_data_t *sopalin_data, SolverCblk *cblk, int prio)
TODO.
void starpu_task_getrf_sgemmsp(sopalin_data_t *sopalin_data, SolverCblk *cblk, const SolverBlok *blokB, SolverCblk *fcblk, int prio)
TODO.
void starpu_sgetrf_sp2d_rl(sopalin_data_t *sopalin_data, starpu_sparse_matrix_desc_t *desc)
Perform a sparse LU factorization with 1D and 2D kernels.
void starpu_sgetrf_sp1dplus_ll(sopalin_data_t *sopalin_data, starpu_sparse_matrix_desc_t *desc)
Perform a sparse LU factorization with 1D kernels.
SolverBlok *restrict bloktab
pastix_int_t *restrict browtab
SolverCblk *restrict cblktab
Solver column block structure.
Solver column block structure.