21#ifndef DOXYGEN_SHOULD_SKIP_THIS
26#include "sopalin/sopalin_data.h"
28#if defined(PASTIX_WITH_CUDA)
38struct cl_cblk_sadd_args_s {
40 sopalin_data_t *sopalin_data;
46#if defined( PASTIX_STARPU_PROFILING )
54starpu_profile_t cblk_sadd_profile = {
62void cblk_sadd_profile_register(
void ) __attribute__( ( constructor ) );
64cblk_sadd_profile_register(
void )
66 profiling_register_cl( &cblk_sadd_profile );
69#ifndef DOXYGEN_SHOULD_SKIP_THIS
70#if defined(PASTIX_STARPU_PROFILING_LOG)
72cl_profiling_cb_cblk_sadd(
void *callback_arg )
74 cl_profiling_callback( callback_arg );
76 struct starpu_task *task = starpu_task_get_current();
77 struct starpu_profiling_task_info *info = task->profiling_info;
84 struct cl_cblk_sadd_args_s *args = (
struct cl_cblk_sadd_args_s *) callback_arg;
86 pastix_fixdbl_t duration = starpu_timing_timespec_delay_us( &info->start_time, &info->end_time );
92 cl_profiling_log_register( task->name,
"cblk_sadd", M, N, 0, flops, speed );
96#if defined(PASTIX_STARPU_PROFILING_LOG)
97static void (*cblk_sadd_callback)(
void*) = cl_profiling_cb_cblk_sadd;
99static void (*cblk_sadd_callback)(
void*) = cl_profiling_callback;
105#if defined(PASTIX_STARPU_COST_PER_ARCH)
131fct_cblk_sadd_cost(
struct starpu_task *task,
132 struct starpu_perfmodel_arch *arch,
135 struct cl_cblk_sadd_args_s *args = (
struct cl_cblk_sadd_args_s *)(task->cl_arg);
142 switch( arch->devices->type ) {
143 case STARPU_CPU_WORKER:
146 case STARPU_CUDA_WORKER:
155 cost = modelsGetCost2Param( coefs, M, N ) * 1e6;
162#ifndef DOXYGEN_SHOULD_SKIP_THIS
163static struct starpu_perfmodel starpu_cblk_sadd_model = {
164#if defined( PASTIX_STARPU_COST_PER_ARCH )
165 .type = STARPU_PER_ARCH,
166 .arch_cost_function = fct_cblk_sadd_cost,
168 .type = STARPU_HISTORY_BASED,
170 .symbol =
"cblk_sadd",
174#if !defined(PASTIX_STARPU_SIMULATION)
192 struct cl_cblk_sadd_args_s *args = (
struct cl_cblk_sadd_args_s *)cl_arg;
196 A = pastix_starpu_cblk_get_ptr( descr[0] );
197 B = pastix_starpu_cblk_get_ptr( descr[1] );
199 assert( args->cblk->cblktype & CBLK_LAYOUT_2D );
200 assert( args->fcblk->cblktype & CBLK_LAYOUT_2D );
202 args->profile_data.flops =
cpucblk_sadd( 1., args->cblk, args->fcblk, A, B, NULL, 0,
203 &( args->sopalin_data->solvmtx->lowrank ) );
206#if defined(PASTIX_WITH_CUDA) && 0
222fct_cblk_sadd_gpu(
void *descr[],
void *cl_arg )
224 struct cl_template_args_s *args = (
struct cl_template_args_s *)cl_arg;
228 A = pastix_starpu_cblk_get_ptr( descr[0] );
229 B = pastix_starpu_cblk_get_ptr( descr[1] );
231 assert( args->cblk->cblktype & CBLK_TASKS_2D );
232 assert( args->fcblk->cblktype & CBLK_TASKS_2D );
234 args->profile_data.flops = gpucblk_sadd( 1., args->cblk, args->fcblk, 1, B,
235 &( args->sopalin_data->solvmtx->lowrank ),
236 starpu_cuda_get_local_stream() );
242#ifndef DOXYGEN_SHOULD_SKIP_THIS
243CODELETS_CPU( cblk_sadd, 2 );
281 struct cl_cblk_sadd_args_s *cl_arg = NULL;
282#if defined(PASTIX_DEBUG_STARPU)
289 cl_arg = malloc(
sizeof(
struct cl_cblk_sadd_args_s) );
290 cl_arg->sopalin_data = sopalin_data;
291#if defined(PASTIX_STARPU_PROFILING)
292 cl_arg->profile_data.measures = cblk_sadd_profile.measures;
293 cl_arg->profile_data.flops = NAN;
297 cl_arg->fcblk = fcblk;
299#if defined(PASTIX_DEBUG_STARPU)
301 asprintf( &task_name,
"%s( %ld )",
302 cl_cblk_sadd_cpu.name,
303 (
long)(cblk - sopalin_data->solvmtx->cblktab) );
306 assert( cblk->
cblktype & CBLK_RECV );
307 assert( !(fcblk->
cblktype & (CBLK_RECV|CBLK_FANIN)) );
309 pastix_starpu_insert_task(
311 STARPU_CL_ARGS, cl_arg,
sizeof(
struct cl_cblk_sadd_args_s ),
312 STARPU_EXECUTE_ON_NODE, fcblk->
ownerid,
313#
if defined(PASTIX_STARPU_PROFILING)
314 STARPU_CALLBACK_WITH_ARG_NFREE, cblk_sadd_callback, cl_arg,
317 STARPU_RW, fcblk->
handler[side],
318#
if defined(PASTIX_DEBUG_STARPU)
319 STARPU_NAME, task_name,
321#
if defined(PASTIX_STARPU_HETEROPRIO)
322 STARPU_PRIORITY, BucketFacto1D,
324 STARPU_PRIORITY, prio,
363 assert( cblk->
cblktype & CBLK_FANIN );
365 pastix_starpu_insert_task(
367 STARPU_EXECUTE_ON_NODE, cblk->
ownerid,
369#
if defined(PASTIX_STARPU_HETEROPRIO)
370 STARPU_PRIORITY, BucketFacto1D,
372 STARPU_PRIORITY, prio,
BEGIN_C_DECLS typedef int pastix_int_t
@ PastixKernelGEADDCblkFRFR
pastix_fixdbl_t cpucblk_sadd(float alpha, const SolverCblk *cblkA, SolverCblk *cblkB, const void *A, void *B, float *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Add two column bloks in full rank format.
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
static void fct_cblk_sadd_cpu(void *descr[], void *cl_arg)
StarPU CPU implementation.
void starpu_task_cblk_sadd_fanin(sopalin_data_t *sopalin_data, pastix_coefside_t side, const SolverCblk *cblk, int prio)
Insert the task to add a fanin cblk on the emitter side. Note that this task is submitted only to emi...
void starpu_task_cblk_sadd_recv(sopalin_data_t *sopalin_data, pastix_coefside_t side, const SolverCblk *cblk, SolverCblk *fcblk, int prio)
Insert the task to add a fanin cblk on the receiver side (The fanin is seen on this side as the RECV ...
Base structure to all codelet arguments that include the profiling data.
static double cost(symbol_cblk_t *cblk)
Computes the cost of a cblk.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Solver column block structure.