21 #ifndef DOXYGEN_SHOULD_SKIP_THIS
26 #include "sopalin/sopalin_data.h"
28 #if defined(PASTIX_WITH_CUDA)
38 struct cl_blok_cadd_args_s {
40 sopalin_data_t *sopalin_data;
48 #if defined( PASTIX_STARPU_PROFILING )
56 starpu_profile_t blok_cadd_profile = {
64 void blok_cadd_profile_register(
void ) __attribute__( ( constructor ) );
66 blok_cadd_profile_register(
void )
68 profiling_register_cl( &blok_cadd_profile );
71 #ifndef DOXYGEN_SHOULD_SKIP_THIS
72 #if defined(PASTIX_STARPU_PROFILING_LOG)
74 cl_profiling_cb_blok_cadd(
void *callback_arg )
76 cl_profiling_callback( callback_arg );
78 struct starpu_task *task = starpu_task_get_current();
79 struct starpu_profiling_task_info *info = task->profiling_info;
86 struct cl_blok_cadd_args_s *args = (
struct cl_blok_cadd_args_s *) callback_arg;
88 pastix_fixdbl_t duration = starpu_timing_timespec_delay_us( &info->start_time, &info->end_time );
91 const SolverBlok *blok = args->cblk->fblokptr + args->blok_m;
95 cl_profiling_log_register( task->name,
"blok_cadd", M, N, 0, flops, speed );
99 #if defined(PASTIX_STARPU_PROFILING_LOG)
100 static void (*blok_cadd_callback)(
void*) = cl_profiling_cb_blok_cadd;
102 static void (*blok_cadd_callback)(
void*) = cl_profiling_callback;
135 struct starpu_perfmodel_arch *arch,
138 struct cl_blok_cadd_args_s *args = (
struct cl_blok_cadd_args_s *)(task->cl_arg);
142 const SolverBlok *blok = args->cblk->fblokptr + args->blok_m;
146 switch( arch->devices->type ) {
147 case STARPU_CPU_WORKER:
150 case STARPU_CUDA_WORKER:
159 cost = modelsGetCost2Param( coefs, M, N ) * 1e6;
165 #ifndef DOXYGEN_SHOULD_SKIP_THIS
166 static struct starpu_perfmodel starpu_blok_cadd_model = {
167 #if defined( PASTIX_STARPU_COST_PER_ARCH )
168 .type = STARPU_PER_ARCH,
171 .type = STARPU_HISTORY_BASED,
173 .symbol =
"blok_cadd",
177 #if !defined(PASTIX_STARPU_SIMULATION)
195 struct cl_blok_cadd_args_s *args = (
struct cl_blok_cadd_args_s *)cl_arg;
199 A = pastix_starpu_blok_get_ptr( descr[0] );
200 B = pastix_starpu_blok_get_ptr( descr[1] );
202 assert( args->cblk->cblktype & CBLK_LAYOUT_2D );
203 assert( args->fcblk->cblktype & CBLK_LAYOUT_2D );
205 args->profile_data.flops =
cpublok_cadd( 1., args->cblk, args->fcblk, args->blok_m, args->fblok_m,
207 &( args->sopalin_data->solvmtx->lowrank ) );
210 #if defined(PASTIX_WITH_CUDA) && 0
226 fct_blok_cadd_gpu(
void *descr[],
void *cl_arg )
228 struct cl_template_args_s *args = (
struct cl_template_args_s *)cl_arg;
232 A = pastix_starpu_blok_get_ptr( descr[0] );
233 B = pastix_starpu_blok_get_ptr( descr[1] );
235 assert( args->cblk->cblktype & CBLK_TASKS_2D );
236 assert( args->fcblk->cblktype & CBLK_TASKS_2D );
238 args->profile_data.flops = gpublok_cadd( 1., args->cblk, args->fcblk, args->blok_m, args->fblok_m,
240 &( args->sopalin_data->solvmtx->lowrank ),
241 starpu_cuda_get_local_stream() );
247 #ifndef DOXYGEN_SHOULD_SKIP_THIS
248 CODELETS_CPU( blok_cadd, 2 );
294 struct cl_blok_cadd_args_s *cl_arg = NULL;
295 #if defined(PASTIX_DEBUG_STARPU)
304 cl_arg = malloc(
sizeof(
struct cl_blok_cadd_args_s) );
305 cl_arg->sopalin_data = sopalin_data;
306 #if defined(PASTIX_STARPU_PROFILING)
307 cl_arg->profile_data.measures = blok_cadd_profile.measures;
308 cl_arg->profile_data.flops = NAN;
312 cl_arg->fcblk = fcblk;
313 cl_arg->blok_m = blok - cblk->
fblokptr;
314 cl_arg->fblok_m = fblok - fcblk->
fblokptr;
316 #if defined(PASTIX_DEBUG_STARPU)
318 asprintf( &task_name,
"%s( %ld )",
319 cl_blok_cadd_cpu.name,
320 (
long)(cblk - sopalin_data->solvmtx->cblktab) );
323 assert( cblk->
cblktype & CBLK_RECV );
324 assert( !(fcblk->
cblktype & (CBLK_RECV|CBLK_FANIN)) );
326 pastix_starpu_insert_task(
328 STARPU_CL_ARGS, cl_arg,
sizeof(
struct cl_blok_cadd_args_s ),
329 STARPU_EXECUTE_ON_NODE, fcblk->
ownerid,
330 #
if defined(PASTIX_STARPU_PROFILING)
331 STARPU_CALLBACK_WITH_ARG_NFREE, blok_cadd_callback, cl_arg,
334 STARPU_RW, fblok->
handler[side],
335 #
if defined(PASTIX_DEBUG_STARPU)
336 STARPU_NAME, task_name,
338 #
if defined(PASTIX_STARPU_HETEROPRIO)
339 STARPU_PRIORITY, BucketFacto1D,
341 STARPU_PRIORITY, prio,
384 assert( cblk->
cblktype & CBLK_FANIN );
386 pastix_starpu_insert_task(
388 STARPU_EXECUTE_ON_NODE, cblk->
ownerid,
390 #
if defined(PASTIX_STARPU_HETEROPRIO)
391 STARPU_PRIORITY, BucketFacto1D,
393 STARPU_PRIORITY, prio,
BEGIN_C_DECLS typedef int pastix_int_t
@ PastixKernelGEADDCblkFRFR
pastix_fixdbl_t cpublok_cadd(pastix_complex32_t alpha, const SolverCblk *cblkA, SolverCblk *cblkB, pastix_int_t blokA_m, pastix_int_t blokB_m, const void *A, void *B, pastix_complex32_t *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Add two bloks.
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
void starpu_task_blok_cadd_recv(sopalin_data_t *sopalin_data, pastix_coefside_t side, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, SolverBlok *fblok, int prio)
Insert the task to add a fanin cblk on the receiver side (The fanin is seen on this side as the RECV ...
void starpu_task_blok_cadd_fanin(sopalin_data_t *sopalin_data, pastix_coefside_t side, const SolverCblk *cblk, const SolverBlok *blok, int prio)
Insert the task to add a fanin cblk on the emitter side. Note that this task is submitted only to emi...
static pastix_fixdbl_t fct_blok_cadd_cost(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl)
Cost model function.
static void fct_blok_cadd_cpu(void *descr[], void *cl_arg)
StarPU CPU implementation.
Base structure to all codelet arguments that include the profiling data.
static double cost(symbol_cblk_t *cblk)
Computes the cost of a cblk.
static pastix_int_t blok_rownbr(const SolverBlok *blok)
Compute the number of rows of a block.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Solver column block structure.