21 #ifndef DOXYGEN_SHOULD_SKIP_THIS
26 #include "sopalin/sopalin_data.h"
28 #if defined(PASTIX_WITH_CUDA)
38 struct cl_cblk_cadd_args_s {
40 sopalin_data_t *sopalin_data;
46 #if defined( PASTIX_STARPU_PROFILING )
54 starpu_profile_t cblk_cadd_profile = {
62 void cblk_cadd_profile_register(
void ) __attribute__( ( constructor ) );
64 cblk_cadd_profile_register(
void )
66 profiling_register_cl( &cblk_cadd_profile );
69 #ifndef DOXYGEN_SHOULD_SKIP_THIS
70 #if defined(PASTIX_STARPU_PROFILING_LOG)
72 cl_profiling_cb_cblk_cadd(
void *callback_arg )
74 cl_profiling_callback( callback_arg );
76 struct starpu_task *task = starpu_task_get_current();
77 struct starpu_profiling_task_info *info = task->profiling_info;
84 struct cl_cblk_cadd_args_s *args = (
struct cl_cblk_cadd_args_s *) callback_arg;
86 pastix_fixdbl_t duration = starpu_timing_timespec_delay_us( &info->start_time, &info->end_time );
92 cl_profiling_log_register( task->name,
"cblk_cadd", M, N, 0, flops, speed );
96 #if defined(PASTIX_STARPU_PROFILING_LOG)
97 static void (*cblk_cadd_callback)(
void*) = cl_profiling_cb_cblk_cadd;
99 static void (*cblk_cadd_callback)(
void*) = cl_profiling_callback;
131 struct starpu_perfmodel_arch *arch,
134 struct cl_cblk_cadd_args_s *args = (
struct cl_cblk_cadd_args_s *)(task->cl_arg);
141 switch( arch->devices->type ) {
142 case STARPU_CPU_WORKER:
145 case STARPU_CUDA_WORKER:
154 cost = modelsGetCost2Param( coefs, M, N ) * 1e6;
160 #ifndef DOXYGEN_SHOULD_SKIP_THIS
161 static struct starpu_perfmodel starpu_cblk_cadd_model = {
162 #if defined( PASTIX_STARPU_COST_PER_ARCH )
163 .type = STARPU_PER_ARCH,
166 .type = STARPU_HISTORY_BASED,
168 .symbol =
"cblk_cadd",
172 #if !defined(PASTIX_STARPU_SIMULATION)
190 struct cl_cblk_cadd_args_s *args = (
struct cl_cblk_cadd_args_s *)cl_arg;
194 A = pastix_starpu_cblk_get_ptr( descr[0] );
195 B = pastix_starpu_cblk_get_ptr( descr[1] );
197 assert( args->cblk->cblktype & CBLK_LAYOUT_2D );
198 assert( args->fcblk->cblktype & CBLK_LAYOUT_2D );
200 args->profile_data.flops =
cpucblk_cadd( 1., args->cblk, args->fcblk, A, B, NULL, 0,
201 &( args->sopalin_data->solvmtx->lowrank ) );
204 #if defined(PASTIX_WITH_CUDA) && 0
220 fct_cblk_cadd_gpu(
void *descr[],
void *cl_arg )
222 struct cl_template_args_s *args = (
struct cl_template_args_s *)cl_arg;
226 A = pastix_starpu_cblk_get_ptr( descr[0] );
227 B = pastix_starpu_cblk_get_ptr( descr[1] );
229 assert( args->cblk->cblktype & CBLK_TASKS_2D );
230 assert( args->fcblk->cblktype & CBLK_TASKS_2D );
232 args->profile_data.flops = gpucblk_cadd( 1., args->cblk, args->fcblk, 1, B,
233 &( args->sopalin_data->solvmtx->lowrank ),
234 starpu_cuda_get_local_stream() );
240 #ifndef DOXYGEN_SHOULD_SKIP_THIS
241 CODELETS_CPU( cblk_cadd, 2 );
279 struct cl_cblk_cadd_args_s *cl_arg = NULL;
280 #if defined(PASTIX_DEBUG_STARPU)
287 cl_arg = malloc(
sizeof(
struct cl_cblk_cadd_args_s) );
288 cl_arg->sopalin_data = sopalin_data;
289 #if defined(PASTIX_STARPU_PROFILING)
290 cl_arg->profile_data.measures = cblk_cadd_profile.measures;
291 cl_arg->profile_data.flops = NAN;
295 cl_arg->fcblk = fcblk;
297 #if defined(PASTIX_DEBUG_STARPU)
299 asprintf( &task_name,
"%s( %ld )",
300 cl_cblk_cadd_cpu.name,
301 (
long)(cblk - sopalin_data->solvmtx->cblktab) );
304 assert( cblk->
cblktype & CBLK_RECV );
305 assert( !(fcblk->
cblktype & (CBLK_RECV|CBLK_FANIN)) );
307 pastix_starpu_insert_task(
309 STARPU_CL_ARGS, cl_arg,
sizeof(
struct cl_cblk_cadd_args_s ),
310 STARPU_EXECUTE_ON_NODE, fcblk->
ownerid,
311 #
if defined(PASTIX_STARPU_PROFILING)
312 STARPU_CALLBACK_WITH_ARG_NFREE, cblk_cadd_callback, cl_arg,
315 STARPU_RW, fcblk->
handler[side],
316 #
if defined(PASTIX_DEBUG_STARPU)
317 STARPU_NAME, task_name,
319 #
if defined(PASTIX_STARPU_HETEROPRIO)
320 STARPU_PRIORITY, BucketFacto1D,
322 STARPU_PRIORITY, prio,
361 assert( cblk->
cblktype & CBLK_FANIN );
363 pastix_starpu_insert_task(
365 STARPU_EXECUTE_ON_NODE, cblk->
ownerid,
367 #
if defined(PASTIX_STARPU_HETEROPRIO)
368 STARPU_PRIORITY, BucketFacto1D,
370 STARPU_PRIORITY, prio,
BEGIN_C_DECLS typedef int pastix_int_t
@ PastixKernelGEADDCblkFRFR
pastix_fixdbl_t cpucblk_cadd(pastix_complex32_t alpha, const SolverCblk *cblkA, SolverCblk *cblkB, const void *A, void *B, pastix_complex32_t *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Add two column bloks in full rank format.
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
void starpu_task_cblk_cadd_recv(sopalin_data_t *sopalin_data, pastix_coefside_t side, const SolverCblk *cblk, SolverCblk *fcblk, int prio)
Insert the task to add a fanin cblk on the receiver side (The fanin is seen on this side as the RECV ...
static void fct_cblk_cadd_cpu(void *descr[], void *cl_arg)
StarPU CPU implementation.
static pastix_fixdbl_t fct_cblk_cadd_cost(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl)
Cost model function.
void starpu_task_cblk_cadd_fanin(sopalin_data_t *sopalin_data, pastix_coefside_t side, const SolverCblk *cblk, int prio)
Insert the task to add a fanin cblk on the emitter side. Note that this task is submitted only to emi...
Base structure to all codelet arguments that include the profiling data.
static double cost(symbol_cblk_t *cblk)
Computes the cost of a cblk.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Solver column block structure.