25 #ifndef DOXYGEN_SHOULD_SKIP_THIS
30 #include "sopalin/sopalin_data.h"
36 #if defined( PASTIX_STARPU_PROFILING )
40 starpu_profile_t solve_blok_zgemm_profile = {
42 .name =
"solve_blok_zgemm"
48 void solve_blok_zgemm_profile_register(
void ) __attribute__( ( constructor ) );
50 solve_blok_zgemm_profile_register(
void )
52 profiling_register_cl( &solve_blok_zgemm_profile );
56 #ifndef DOXYGEN_SHOULD_SKIP_THIS
57 struct cl_solve_blok_zgemm_args_s {
66 static struct starpu_perfmodel starpu_solve_blok_zgemm_model = {
67 .type = STARPU_HISTORY_BASED,
68 .symbol =
"solve_blok_zgemm",
71 #if !defined(PASTIX_STARPU_SIMULATION)
73 fct_solve_blok_zgemm_cpu(
void *descr[],
void *cl_arg )
75 const void *dataA = NULL;
77 const pastix_complex64_t *A;
78 pastix_complex64_t *B, *C;
80 struct cl_solve_blok_zgemm_args_s *args = (
struct cl_solve_blok_zgemm_args_s *)cl_arg;
82 dataA = pastix_starpu_cblk_get_ptr( descr[0] );
83 B = (pastix_complex64_t *)STARPU_MATRIX_GET_PTR( descr[1] );
86 C = (pastix_complex64_t *)STARPU_MATRIX_GET_PTR( descr[2] );
92 if ( (args->side ==
PastixLeft) && (args->cblk->cblktype & CBLK_COMPRESSED) ) {
94 lrA += (args->blok - args->cblk->fblokptr);
97 else if ( (args->side ==
PastixRight) && (args->fcbk->cblktype & CBLK_COMPRESSED) ) {
99 lrA += (args->blok - args->fcbk->fblokptr);
104 A += args->blok->coefind;
109 args->cblk, args->blok, args->fcbk, dataA, B, ldb, C, ldc );
167 struct cl_solve_blok_zgemm_args_s *cl_arg;
171 starpu_data_handle_t handle;
172 #if defined(PASTIX_DEBUG_STARPU)
179 #if defined(PASTIX_WITH_MPI)
182 if ( (cblk->
cblktype & CBLK_FANIN) ||
183 (cblk->
ownerid == sopalin_data->solvmtx->clustnum) )
187 if ( (fcbk->
cblktype & CBLK_FANIN) ||
188 (fcbk->
ownerid == sopalin_data->solvmtx->clustnum) )
192 if ( starpu_mpi_cached_receive( rhsb->starpu_desc->handletab[fcbknum] ) ) {
195 if ( !need_submit ) {
201 #if defined(PASTIX_DEBUG_STARPU)
202 fprintf( stderr,
"[%2d][%s] cblk = %d, fcblk = %d, ownerid = %d, handler = %p, size = %ld\n",
203 solvmtx->clustnum, __func__, cblk->
gcblknum, fcbk->
gcblknum, cblk->
ownerid, solvmtx->starpu_desc_rhs->handletab[cblknum],
204 cblk_colnbr( cblk ) *
sizeof(pastix_complex64_t) );
210 cl_arg = malloc(
sizeof(
struct cl_solve_blok_zgemm_args_s) );
211 #if defined(PASTIX_STARPU_PROFILING)
212 cl_arg->profile_data.measures = solve_blok_zgemm_profile.measures;
213 cl_arg->profile_data.flops = NAN;
216 cl_arg->trans = trans;
221 #if defined(PASTIX_DEBUG_STARPU)
222 asprintf( &task_name,
"%s( %ld, %ld, %ld )",
223 cl_solve_blok_zgemm_cpu.name,
224 (
long)( ( side ==
PastixRight ) ? fcbknum : cblknum ),
236 pastix_starpu_insert_task(
237 &cl_solve_blok_zgemm_cpu,
238 STARPU_CL_ARGS, cl_arg,
sizeof(
struct cl_solve_blok_zgemm_args_s ),
239 #
if defined(PASTIX_STARPU_PROFILING)
240 STARPU_CALLBACK_WITH_ARG_NFREE, cl_profiling_callback, cl_arg,
243 STARPU_R, rhsb->starpu_desc->handletab[cblknum],
244 STARPU_RW, rhsb->starpu_desc->handletab[fcbknum],
245 #
if defined(PASTIX_DEBUG_STARPU)
246 STARPU_NAME, task_name,
248 #
if defined(PASTIX_STARPU_HETEROPRIO)
249 STARPU_PRIORITY, BucketSolveGEMM,
BEGIN_C_DECLS typedef int pastix_int_t
The block low-rank structure to hold a matrix in low-rank form.
void solve_blok_zgemm(pastix_side_t side, pastix_trans_t trans, pastix_int_t nrhs, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcbk, const void *dataA, const pastix_complex64_t *B, pastix_int_t ldb, pastix_complex64_t *C, pastix_int_t ldc)
Apply a solve gemm update related to a single block of the matrix A.
enum pastix_side_e pastix_side_t
Side of the operation.
enum pastix_trans_e pastix_trans_t
Transpostion.
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
void starpu_stask_blok_zgemm(sopalin_data_t *sopalin_data, pastix_rhs_t rhsb, pastix_coefside_t coef, pastix_side_t side, pastix_trans_t trans, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcbk, pastix_int_t prio)
Submit a task to perform a gemm.
Base structure to all codelet arguments that include the profiling data.
Main PaStiX RHS structure.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
SolverCblk *restrict cblktab
Solver column block structure.
Solver column block structure.