25 #include "sopalin/sopalin_data.h"
56 if ( cblk->
cblktype & CBLK_TASKS_2D ) {
60 for ( blok = cblk->
fblokptr + 1, m = 0; blok < lblk; blok++, m++ ) {
67 while ( ( blok < lblk ) &&
68 ( blok[0].fcblknm == blok[1].fcblknm ) &&
69 ( blok[0].lcblknm == blok[1].lcblknm ) )
113 if ( cblk->
cblktype & CBLK_TASKS_2D ) {
117 for ( blokA = blokB; blokA < lblk; blokA++ ) {
119 cblk, fcblk, blokA, blokB, prio );
122 while ( ( blokA < lblk ) &&
123 ( blokA[0].fcblknm == blokA[1].fcblknm ) &&
124 ( blokA[0].lcblknm == blokA[1].lcblknm ) )
131 starpu_data_unregister_submit( blokB->
handler[1] );
136 cblk, blokB, fcblk, prio );
173 for ( k = 0; k < solvmtx->
cblknbr; k++, cblk++ ) {
175 if ( cblk->
cblktype & CBLK_IN_SCHUR ) {
182 if ( cblk->
cblktype & CBLK_FANIN ) {
201 for ( m = 0; blok < lblk; blok++, m++ ) {
203 cblk_n = fcblk - solvmtx->
cblktab;
208 cblknbr - pastix_imin( k + m, cblk_n ) );
212 if ( cblk->
handler[1] != NULL ) {
213 starpu_data_unregister_submit( cblk->
handler[1] );
255 for ( k = 0; k < solvmtx->
cblknbr; k++, fcblk++ ) {
259 for ( m = fcblk[0].brownum; m < fcblk[1].
brownum; m++ ) {
263 if ( cblk->
cblktype & CBLK_IN_SCHUR ) {
266 assert( !( cblk->
cblktype & CBLK_FANIN ) );
277 cblk, blok, fcblk, prio);
280 if ( fcblk->
cblktype & ( CBLK_IN_SCHUR | CBLK_RECV ) ) {
285 if ( fcblk->
cblktype & CBLK_FANIN ) {
295 for ( k = 0; k < solvmtx->
cblknbr; k++, cblk++ ) {
297 if ( cblk->
handler[1] != NULL ) {
298 starpu_data_unregister_submit( cblk->
handler[1] );
302 if ( !(cblk->
cblktype & (CBLK_FANIN|CBLK_RECV)) ) {
344 for ( k = 0; k <= solvmtx->
cblkmax1d; k++, cblk++ ) {
346 if ( cblk->
cblktype & CBLK_IN_SCHUR ) {
350 if ( cblk->
cblktype & CBLK_TASKS_2D ) {
357 if ( cblk->
cblktype & CBLK_FANIN ) {
376 for ( m = 0; blok < lblk; blok++, m++ ) {
378 cblk_n = fcblk - solvmtx->
cblktab;
381 cblknbr - pastix_imin( k + m, cblk_n ) );
385 if ( cblk->
handler[1] != NULL ) {
386 starpu_data_unregister_submit( cblk->
handler[1] );
395 for ( k = solvmtx->
cblkmin2d; k < solvmtx->cblknbr; k++, cblk++ ) {
397 if ( cblk->
cblktype & CBLK_IN_SCHUR ) {
401 if ( ! ( cblk->
cblktype & CBLK_TASKS_2D ) ) {
408 if ( cblk->
cblktype & CBLK_FANIN ) {
425 for ( m = 0; blok < lblk; blok++, m++ ) {
430 cblknbr - pastix_imin( k + m, cblk_n ) );
433 while ( ( blok < lblk ) &&
434 ( blok[0].fcblknm == blok[1].fcblknm ) &&
435 ( blok[0].lcblknm == blok[1].lcblknm ) )
482 for ( k = 0; k < cblknbr; k++, fcblk++ ) {
486 for ( m = fcblk[0].brownum; m < fcblk[1].
brownum; m++ ) {
491 if ( cblk->
cblktype & CBLK_IN_SCHUR ) {
494 assert( !(cblk->
cblktype & CBLK_FANIN ) );
496 if( ( cblk->
cblktype & CBLK_TASKS_2D ) &&
497 ( blok_prev != NULL ) &&
514 if ( fcblk->
cblktype & ( CBLK_IN_SCHUR | CBLK_RECV ) ) {
518 if ( fcblk->
cblktype & CBLK_FANIN ) {
528 for ( k = 0; k < solvmtx->
cblknbr; k++, cblk++ ) {
530 if ( cblk->
handler[1] != NULL ) {
531 starpu_data_unregister_submit( cblk->
handler[1] );
535 if ( !(cblk->
cblktype & (CBLK_FANIN|CBLK_RECV)) ) {
571 sopalin_data_t *sopalin_data )
580 if (pastix_data->
starpu == NULL) {
585 if ( sdesc == NULL ) {
592 sdesc = sopalin_data->
solvmtx->starpu_desc;
595 starpu_profiling_status_set(STARPU_PROFILING_ENABLE);
596 #if defined(STARPU_USE_FXT)
598 starpu_fxt_start_profiling();
601 #if defined(PASTIX_STARPU_STATS)
629 #if defined(PASTIX_STARPU_STATS)
634 starpu_task_wait_for_all();
635 #if defined(PASTIX_WITH_MPI)
636 starpu_mpi_wait_for_all( pastix_data->
pastix_comm );
640 #if defined(STARPU_USE_FXT)
642 starpu_fxt_stop_profiling();
645 starpu_profiling_status_set(STARPU_PROFILING_DISABLE);
646 #if defined(PASTIX_STARPU_STATS)
648 print_stats( sub, com, pastix_data->
solvmatr );
BEGIN_C_DECLS typedef int pastix_int_t
void starpu_sparse_matrix_getoncpu(starpu_sparse_matrix_desc_t *desc)
Submit asynchronous calls to retrieve the data on main memory.
void starpu_sparse_matrix_init(SolverMatrix *solvmtx, pastix_mtxtype_t mtxtype, int nodes, int myrank, pastix_coeftype_t flttype)
Generate the StarPU descriptor of the sparse matrix.
void starpu_task_blok_cscalo(sopalin_data_t *sopalin_data, pastix_trans_t trans, const SolverCblk *cblk, SolverBlok *blok, int prio)
TODO.
void pastix_starpu_init(pastix_data_t *pastix, int *argc, char **argv[], const int *bindtab)
Startup the StarPU runtime system.
void starpu_task_cblk_csytrfsp(sopalin_data_t *sopalin_data, SolverCblk *cblk, int prio)
TODO.
void starpu_task_blok_ctrsmsp(sopalin_data_t *sopalin_data, pastix_coefside_t coef, pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, SolverBlok *blok, int prio)
StarPU GPU implementation.
void starpu_task_cblk_cgemmsp(sopalin_data_t *sopalin_data, pastix_coefside_t sideA, pastix_coefside_t sideB, pastix_trans_t trans, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, int prio)
StarPU GPU implementation.
void starpu_task_blok_csytrf(sopalin_data_t *sopalin_data, SolverCblk *cblk, int prio)
TODO.
void starpu_sparse_cblk_wont_use(pastix_coefside_t coef, SolverCblk *cblk)
Submit asynchronous calls to retrieve the data on main memory.
void starpu_task_blok_cgemmsp(sopalin_data_t *sopalin_data, pastix_coefside_t sideA, pastix_coefside_t sideB, pastix_trans_t trans, SolverCblk *cblk, SolverCblk *fcblk, const SolverBlok *blokA, const SolverBlok *blokB, int prio)
StarPU GPU implementation.
StarPU descriptor stucture for the sparse matrix.
PASTIX_Comm inter_node_comm
Main PaStiX data structure.
void starpu_task_sytrf_cgemmsp(sopalin_data_t *sopalin_data, SolverCblk *cblk, SolverBlok *blokB, SolverCblk *fcblk, int prio)
Submits starpu cgemmsp cblk or blok task.
void starpu_csytrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
Perform a sparse LDL^t factorization using StarPU runtime.
void starpu_csytrf_sp2d_rl(sopalin_data_t *sopalin_data, starpu_sparse_matrix_desc_t *desc)
Perform a sparse LDL^t factorization with 1D and 2D kernels.
void starpu_csytrf_sp1dplus_ll(sopalin_data_t *sopalin_data, starpu_sparse_matrix_desc_t *desc)
Perform a sparse LDL^t factorization with 1D kernels.
void starpu_csytrf_sp1dplus_rl(sopalin_data_t *sopalin_data, starpu_sparse_matrix_desc_t *desc)
Perform a sparse LDL^t factorization with 1D kernels.
void starpu_task_csytrfsp(sopalin_data_t *sopalin_data, SolverCblk *cblk, int prio)
Submits starpu sytrfsp cblk or blok task.
void starpu_csytrf_sp2d_ll(sopalin_data_t *sopalin_data, starpu_sparse_matrix_desc_t *desc)
Perform a sparse LDL^t factorization with 1D and 2D kernels.
void starpu_task_cadd_recv(sopalin_data_t *sopalin_data, pastix_coefside_t side, const SolverCblk *cblk, int prio)
Submits starpu zadd task to send the recv cblk.
void starpu_task_cadd_1dp_recv(sopalin_data_t *sopalin_data, pastix_coefside_t side, const SolverCblk *cblk, int prio)
Submits starpu zadd task to receive and add the recv cblk.
void starpu_task_cadd_fanin(sopalin_data_t *sopalin_data, pastix_coefside_t side, const SolverCblk *cblk, int prio)
Submits starpu zadd task to send the fanin cblk.
void starpu_task_cadd_2d_fanin(sopalin_data_t *sopalin_data, pastix_coefside_t side, const SolverCblk *cblk, int prio)
Submits starpu zadd task to send the fanin block.
void starpu_task_cadd_1dp_fanin(sopalin_data_t *sopalin_data, pastix_coefside_t side, const SolverCblk *cblk, int prio)
Submits starpu zadd task to send the fanin cblk.
void starpu_task_cadd_2d_recv(sopalin_data_t *sopalin_data, pastix_coefside_t side, const SolverCblk *cblk, int prio)
Submits starpu zadd task to receive and add the recv block.
SolverBlok *restrict bloktab
pastix_int_t *restrict browtab
SolverCblk *restrict cblktab
Solver column block structure.
Solver column block structure.