21 #ifndef DOXYGEN_SHOULD_SKIP_THIS
26 #include "sopalin/sopalin_data.h"
28 #if defined(PASTIX_WITH_CUDA)
38 struct cl_cblk_sadd_args_s {
40 sopalin_data_t *sopalin_data;
46 #if defined( PASTIX_STARPU_PROFILING )
54 starpu_profile_t cblk_sadd_profile = {
62 void cblk_sadd_profile_register(
void ) __attribute__( ( constructor ) );
64 cblk_sadd_profile_register(
void )
66 profiling_register_cl( &cblk_sadd_profile );
69 #ifndef DOXYGEN_SHOULD_SKIP_THIS
70 #if defined(PASTIX_STARPU_PROFILING_LOG)
72 cl_profiling_cb_cblk_sadd(
void *callback_arg )
74 cl_profiling_callback( callback_arg );
76 struct starpu_task *task = starpu_task_get_current();
77 struct starpu_profiling_task_info *info = task->profiling_info;
84 struct cl_cblk_sadd_args_s *args = (
struct cl_cblk_sadd_args_s *) callback_arg;
86 pastix_fixdbl_t duration = starpu_timing_timespec_delay_us( &info->start_time, &info->end_time );
92 cl_profiling_log_register( task->name,
"cblk_sadd", M, N, 0, flops, speed );
96 #if defined(PASTIX_STARPU_PROFILING_LOG)
97 static void (*cblk_sadd_callback)(
void*) = cl_profiling_cb_cblk_sadd;
99 static void (*cblk_sadd_callback)(
void*) = cl_profiling_callback;
131 struct starpu_perfmodel_arch *arch,
134 struct cl_cblk_sadd_args_s *args = (
struct cl_cblk_sadd_args_s *)(task->cl_arg);
141 switch( arch->devices->type ) {
142 case STARPU_CPU_WORKER:
145 case STARPU_CUDA_WORKER:
154 cost = modelsGetCost2Param( coefs, M, N ) * 1e6;
160 #ifndef DOXYGEN_SHOULD_SKIP_THIS
161 static struct starpu_perfmodel starpu_cblk_sadd_model = {
162 #if defined( PASTIX_STARPU_COST_PER_ARCH )
163 .type = STARPU_PER_ARCH,
166 .type = STARPU_HISTORY_BASED,
168 .symbol =
"cblk_sadd",
172 #if !defined(PASTIX_STARPU_SIMULATION)
190 struct cl_cblk_sadd_args_s *args = (
struct cl_cblk_sadd_args_s *)cl_arg;
194 A = pastix_starpu_cblk_get_ptr( descr[0] );
195 B = pastix_starpu_cblk_get_ptr( descr[1] );
197 assert( args->cblk->cblktype & CBLK_LAYOUT_2D );
198 assert( args->fcblk->cblktype & CBLK_LAYOUT_2D );
200 args->profile_data.flops =
cpucblk_sadd( 1., args->cblk, args->fcblk, A, B, NULL, 0,
201 &( args->sopalin_data->solvmtx->lowrank ) );
204 #if defined(PASTIX_WITH_CUDA) && 0
220 fct_cblk_sadd_gpu(
void *descr[],
void *cl_arg )
222 struct cl_template_args_s *args = (
struct cl_template_args_s *)cl_arg;
226 A = pastix_starpu_cblk_get_ptr( descr[0] );
227 B = pastix_starpu_cblk_get_ptr( descr[1] );
229 assert( args->cblk->cblktype & CBLK_TASKS_2D );
230 assert( args->fcblk->cblktype & CBLK_TASKS_2D );
232 args->profile_data.flops = gpucblk_sadd( 1., args->cblk, args->fcblk, 1, B,
233 &( args->sopalin_data->solvmtx->lowrank ),
234 starpu_cuda_get_local_stream() );
240 #ifndef DOXYGEN_SHOULD_SKIP_THIS
241 CODELETS_CPU( cblk_sadd, 2 );
279 struct cl_cblk_sadd_args_s *cl_arg = NULL;
280 #if defined(PASTIX_DEBUG_STARPU)
287 cl_arg = malloc(
sizeof(
struct cl_cblk_sadd_args_s) );
288 cl_arg->sopalin_data = sopalin_data;
289 #if defined(PASTIX_STARPU_PROFILING)
290 cl_arg->profile_data.measures = cblk_sadd_profile.measures;
291 cl_arg->profile_data.flops = NAN;
295 cl_arg->fcblk = fcblk;
297 #if defined(PASTIX_DEBUG_STARPU)
299 asprintf( &task_name,
"%s( %ld )",
300 cl_cblk_sadd_cpu.name,
301 (
long)(cblk - sopalin_data->solvmtx->cblktab) );
304 assert( cblk->
cblktype & CBLK_RECV );
305 assert( !(fcblk->
cblktype & (CBLK_RECV|CBLK_FANIN)) );
307 pastix_starpu_insert_task(
309 STARPU_CL_ARGS, cl_arg,
sizeof(
struct cl_cblk_sadd_args_s ),
310 STARPU_EXECUTE_ON_NODE, fcblk->
ownerid,
311 #
if defined(PASTIX_STARPU_PROFILING)
312 STARPU_CALLBACK_WITH_ARG_NFREE, cblk_sadd_callback, cl_arg,
315 STARPU_RW, fcblk->
handler[side],
316 #
if defined(PASTIX_DEBUG_STARPU)
317 STARPU_NAME, task_name,
319 #
if defined(PASTIX_STARPU_HETEROPRIO)
320 STARPU_PRIORITY, BucketFacto1D,
322 STARPU_PRIORITY, prio,
361 assert( cblk->
cblktype & CBLK_FANIN );
363 pastix_starpu_insert_task(
365 STARPU_EXECUTE_ON_NODE, cblk->
ownerid,
367 #
if defined(PASTIX_STARPU_HETEROPRIO)
368 STARPU_PRIORITY, BucketFacto1D,
370 STARPU_PRIORITY, prio,
BEGIN_C_DECLS typedef int pastix_int_t
@ PastixKernelGEADDCblkFRFR
pastix_fixdbl_t cpucblk_sadd(float alpha, const SolverCblk *cblkA, SolverCblk *cblkB, const void *A, void *B, float *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Add two column bloks in full rank format.
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
static void fct_cblk_sadd_cpu(void *descr[], void *cl_arg)
StarPU CPU implementation.
static pastix_fixdbl_t fct_cblk_sadd_cost(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl)
Cost model function.
void starpu_task_cblk_sadd_fanin(sopalin_data_t *sopalin_data, pastix_coefside_t side, const SolverCblk *cblk, int prio)
Insert the task to add a fanin cblk on the emitter side. Note that this task is submitted only to emi...
void starpu_task_cblk_sadd_recv(sopalin_data_t *sopalin_data, pastix_coefside_t side, const SolverCblk *cblk, SolverCblk *fcblk, int prio)
Insert the task to add a fanin cblk on the receiver side (The fanin is seen on this side as the RECV ...
Base structure to all codelet arguments that include the profiling data.
static double cost(symbol_cblk_t *cblk)
Computes the cost of a cblk.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Solver column block structure.