25 #include "sopalin/sopalin_data.h"
56 if ( cblk->
cblktype & CBLK_TASKS_2D ) {
60 for ( blok = cblk->
fblokptr + 1, m = 0; blok < lblk; blok++, m++ ) {
71 while ( ( blok < lblk ) &&
72 ( blok[0].fcblknm == blok[1].fcblknm ) &&
73 ( blok[0].lcblknm == blok[1].lcblknm ) )
117 if ( cblk->
cblktype & CBLK_TASKS_2D ) {
118 for ( blokA = blokB; blokA < lblk; blokA++ ) {
119 if ( blokA == blokB ) {
121 cblk, fcblk, blokB, blokB, prio );
124 while ( ( blokA < lblk ) &&
125 ( blokA[0].fcblknm == blokA[1].fcblknm ) &&
126 ( blokA[0].lcblknm == blokA[1].lcblknm ) )
133 cblk, fcblk, blokA, blokB, prio );
136 cblk, fcblk, blokA, blokB, prio );
139 while ( ( blokA < lblk ) &&
140 ( blokA[0].fcblknm == blokA[1].fcblknm ) &&
141 ( blokA[0].lcblknm == blokA[1].lcblknm ) )
150 cblk, blokB, fcblk, prio );
153 if ( blokB+1 < lblk ) {
155 cblk, blokB, fcblk, prio );
193 for ( k = 0; k < solvmtx->
cblknbr; k++, cblk++ ) {
195 if ( cblk->
cblktype & CBLK_IN_SCHUR ) {
202 if ( cblk->
cblktype & CBLK_FANIN ) {
223 for ( m = 0; blok < lblk; blok++, m++ ) {
225 cblk_n = fcblk - solvmtx->
cblktab;
226 prio = cblknbr - pastix_imin( k + m, cblk_n );
230 cblk, blok, fcblk, prio );
233 if ( blok+1 < lblk ) {
235 cblk, blok, fcblk, prio );
276 for ( k = 0; k < solvmtx->
cblknbr; k++, fcblk++ ) {
280 for ( m = fcblk[0].brownum; m < fcblk[1].
brownum; m++ ) {
284 if ( cblk->
cblktype & CBLK_IN_SCHUR ) {
287 assert( !( cblk->
cblktype & CBLK_FANIN ) );
299 cblk, blok, fcblk, prio );
304 if ( blok+1 < lblk ) {
306 cblk, blok, fcblk, prio);
310 if ( fcblk->
cblktype & ( CBLK_IN_SCHUR | CBLK_RECV ) ) {
315 if ( fcblk->
cblktype & CBLK_FANIN ) {
326 for ( k = 0; k < solvmtx->
cblknbr; k++, cblk++ ) {
327 if ( !(cblk->
cblktype & (CBLK_FANIN|CBLK_RECV)) ) {
369 for ( k = 0; k <= solvmtx->
cblkmax1d; k++, cblk++ ) {
371 if ( cblk->
cblktype & CBLK_IN_SCHUR ) {
375 if ( cblk->
cblktype & CBLK_TASKS_2D ) {
382 if ( cblk->
cblktype & CBLK_FANIN ) {
403 for ( m = 0; blok < lblk; blok++, m++ ) {
405 cblk_n = fcblk - solvmtx->
cblktab;
408 cblknbr - pastix_imin( k + m, cblk_n ) );
415 for ( k = solvmtx->
cblkmin2d; k < solvmtx->cblknbr; k++, cblk++ ) {
417 if ( cblk->
cblktype & CBLK_IN_SCHUR ) {
421 if ( ! ( cblk->
cblktype & CBLK_TASKS_2D ) ) {
428 if ( cblk->
cblktype & CBLK_FANIN ) {
448 for ( m = 0; blok < lblk; blok++, m++ ) {
453 cblknbr - pastix_imin( k + m, cblk_n ) );
456 while( ( blok < lblk ) &&
457 ( blok[0].fcblknm == blok[1].fcblknm ) &&
458 ( blok[0].lcblknm == blok[1].lcblknm ) )
503 for ( k = 0; k < cblknbr; k++, fcblk++ ) {
507 for ( m = fcblk[0].brownum; m < fcblk[1].
brownum; m++ ) {
512 if ( cblk->
cblktype & CBLK_IN_SCHUR ) {
515 assert( !(cblk->
cblktype & CBLK_FANIN ) );
517 if( ( cblk->
cblktype & CBLK_TASKS_2D ) &&
518 ( blok_prev != NULL ) &&
536 if ( fcblk->
cblktype & ( CBLK_IN_SCHUR | CBLK_RECV ) ) {
540 if ( fcblk->
cblktype & CBLK_FANIN ) {
551 for ( k = 0; k < solvmtx->
cblknbr; k++, cblk++ ) {
552 if ( !(cblk->
cblktype & (CBLK_FANIN|CBLK_RECV)) ) {
588 sopalin_data_t *sopalin_data )
597 if (pastix_data->
starpu == NULL) {
602 if ( sdesc == NULL ) {
609 sdesc = sopalin_data->
solvmtx->starpu_desc;
612 starpu_profiling_status_set(STARPU_PROFILING_ENABLE);
613 #if defined(STARPU_USE_FXT)
615 starpu_fxt_start_profiling();
618 #if defined(PASTIX_STARPU_STATS)
646 #if defined(PASTIX_STARPU_STATS)
651 starpu_task_wait_for_all();
652 #if defined(PASTIX_WITH_MPI)
653 starpu_mpi_wait_for_all( pastix_data->
pastix_comm );
657 #if defined(STARPU_USE_FXT)
659 starpu_fxt_stop_profiling();
662 starpu_profiling_status_set(STARPU_PROFILING_DISABLE);
663 #if defined(PASTIX_STARPU_STATS)
665 print_stats( sub, com, pastix_data->
solvmatr );
BEGIN_C_DECLS typedef int pastix_int_t
void starpu_sparse_matrix_getoncpu(starpu_sparse_matrix_desc_t *desc)
Submit asynchronous calls to retrieve the data on main memory.
void starpu_sparse_matrix_init(SolverMatrix *solvmtx, pastix_mtxtype_t mtxtype, int nodes, int myrank, pastix_coeftype_t flttype)
Generate the StarPU descriptor of the sparse matrix.
void pastix_starpu_init(pastix_data_t *pastix, int *argc, char **argv[], const int *bindtab)
Startup the StarPU runtime system.
void starpu_task_blok_sgemmsp(sopalin_data_t *sopalin_data, pastix_coefside_t sideA, pastix_coefside_t sideB, pastix_trans_t trans, SolverCblk *cblk, SolverCblk *fcblk, const SolverBlok *blokA, const SolverBlok *blokB, int prio)
StarPU GPU implementation.
void starpu_task_cblk_sgetrfsp(sopalin_data_t *sopalin_data, SolverCblk *cblk, int prio)
TODO.
void starpu_task_blok_strsmsp(sopalin_data_t *sopalin_data, pastix_coefside_t coef, pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, SolverBlok *blok, int prio)
StarPU GPU implementation.
void starpu_sparse_cblk_wont_use(pastix_coefside_t coef, SolverCblk *cblk)
Submit asynchronous calls to retrieve the data on main memory.
void starpu_task_cblk_sgemmsp(sopalin_data_t *sopalin_data, pastix_coefside_t sideA, pastix_coefside_t sideB, pastix_trans_t trans, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, int prio)
StarPU GPU implementation.
void starpu_task_blok_sgetrf(sopalin_data_t *sopalin_data, SolverCblk *cblk, int prio)
TODO.
StarPU descriptor stucture for the sparse matrix.
PASTIX_Comm inter_node_comm
Main PaStiX data structure.
void starpu_sgetrf_sp2d_ll(sopalin_data_t *sopalin_data, starpu_sparse_matrix_desc_t *desc)
Perform a sparse LU factorization with 1D and 2D kernels.
void starpu_sgetrf_sp1dplus_rl(sopalin_data_t *sopalin_data, starpu_sparse_matrix_desc_t *desc)
Perform a sparse LU factorization with 1D kernels.
void starpu_sgetrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
Perform a sparse LU factorization using StarPU runtime.
void starpu_task_sgetrfsp(sopalin_data_t *sopalin_data, SolverCblk *cblk, int prio)
Submits starpu getrfsp cblk or blok task.
void starpu_task_getrf_sgemmsp(sopalin_data_t *sopalin_data, SolverCblk *cblk, const SolverBlok *blokB, SolverCblk *fcblk, int prio)
Submits starpu sgemmsp cblk or blok task.
void starpu_sgetrf_sp2d_rl(sopalin_data_t *sopalin_data, starpu_sparse_matrix_desc_t *desc)
Perform a sparse LU factorization with 1D and 2D kernels.
void starpu_sgetrf_sp1dplus_ll(sopalin_data_t *sopalin_data, starpu_sparse_matrix_desc_t *desc)
Perform a sparse LU factorization with 1D kernels.
void starpu_task_sadd_1dp_recv(sopalin_data_t *sopalin_data, pastix_coefside_t side, const SolverCblk *cblk, int prio)
Submits starpu zadd task to receive and add the recv cblk.
void starpu_task_sadd_1dp_fanin(sopalin_data_t *sopalin_data, pastix_coefside_t side, const SolverCblk *cblk, int prio)
Submits starpu zadd task to send the fanin cblk.
void starpu_task_sadd_fanin(sopalin_data_t *sopalin_data, pastix_coefside_t side, const SolverCblk *cblk, int prio)
Submits starpu zadd task to send the fanin cblk.
void starpu_task_sadd_recv(sopalin_data_t *sopalin_data, pastix_coefside_t side, const SolverCblk *cblk, int prio)
Submits starpu zadd task to send the recv cblk.
void starpu_task_sadd_2d_fanin(sopalin_data_t *sopalin_data, pastix_coefside_t side, const SolverCblk *cblk, int prio)
Submits starpu zadd task to send the fanin block.
void starpu_task_sadd_2d_recv(sopalin_data_t *sopalin_data, pastix_coefside_t side, const SolverCblk *cblk, int prio)
Submits starpu zadd task to receive and add the recv block.
SolverBlok *restrict bloktab
pastix_int_t *restrict browtab
SolverCblk *restrict cblktab
Solver column block structure.
Solver column block structure.