21#ifndef DOXYGEN_SHOULD_SKIP_THIS
26#include "sopalin/sopalin_data.h"
28#if defined(PASTIX_WITH_CUDA)
38struct cl_cblk_cadd_args_s {
40 sopalin_data_t *sopalin_data;
46#if defined( PASTIX_STARPU_PROFILING )
54starpu_profile_t cblk_cadd_profile = {
62void cblk_cadd_profile_register(
void ) __attribute__( ( constructor ) );
64cblk_cadd_profile_register(
void )
66 profiling_register_cl( &cblk_cadd_profile );
69#ifndef DOXYGEN_SHOULD_SKIP_THIS
70#if defined(PASTIX_STARPU_PROFILING_LOG)
72cl_profiling_cb_cblk_cadd(
void *callback_arg )
74 cl_profiling_callback( callback_arg );
76 struct starpu_task *task = starpu_task_get_current();
77 struct starpu_profiling_task_info *info = task->profiling_info;
84 struct cl_cblk_cadd_args_s *args = (
struct cl_cblk_cadd_args_s *) callback_arg;
86 pastix_fixdbl_t duration = starpu_timing_timespec_delay_us( &info->start_time, &info->end_time );
92 cl_profiling_log_register( task->name,
"cblk_cadd", M, N, 0, flops, speed );
96#if defined(PASTIX_STARPU_PROFILING_LOG)
97static void (*cblk_cadd_callback)(
void*) = cl_profiling_cb_cblk_cadd;
99static void (*cblk_cadd_callback)(
void*) = cl_profiling_callback;
105#if defined(PASTIX_STARPU_COST_PER_ARCH)
131fct_cblk_cadd_cost(
struct starpu_task *task,
132 struct starpu_perfmodel_arch *arch,
135 struct cl_cblk_cadd_args_s *args = (
struct cl_cblk_cadd_args_s *)(task->cl_arg);
142 switch( arch->devices->type ) {
143 case STARPU_CPU_WORKER:
146 case STARPU_CUDA_WORKER:
155 cost = modelsGetCost2Param( coefs, M, N ) * 1e6;
162#ifndef DOXYGEN_SHOULD_SKIP_THIS
163static struct starpu_perfmodel starpu_cblk_cadd_model = {
164#if defined( PASTIX_STARPU_COST_PER_ARCH )
165 .type = STARPU_PER_ARCH,
166 .arch_cost_function = fct_cblk_cadd_cost,
168 .type = STARPU_HISTORY_BASED,
170 .symbol =
"cblk_cadd",
174#if !defined(PASTIX_STARPU_SIMULATION)
192 struct cl_cblk_cadd_args_s *args = (
struct cl_cblk_cadd_args_s *)cl_arg;
196 A = pastix_starpu_cblk_get_ptr( descr[0] );
197 B = pastix_starpu_cblk_get_ptr( descr[1] );
199 assert( args->cblk->cblktype & CBLK_LAYOUT_2D );
200 assert( args->fcblk->cblktype & CBLK_LAYOUT_2D );
202 args->profile_data.flops =
cpucblk_cadd( 1., args->cblk, args->fcblk, A, B, NULL, 0,
203 &( args->sopalin_data->solvmtx->lowrank ) );
206#if defined(PASTIX_WITH_CUDA) && 0
222fct_cblk_cadd_gpu(
void *descr[],
void *cl_arg )
224 struct cl_template_args_s *args = (
struct cl_template_args_s *)cl_arg;
228 A = pastix_starpu_cblk_get_ptr( descr[0] );
229 B = pastix_starpu_cblk_get_ptr( descr[1] );
231 assert( args->cblk->cblktype & CBLK_TASKS_2D );
232 assert( args->fcblk->cblktype & CBLK_TASKS_2D );
234 args->profile_data.flops = gpucblk_cadd( 1., args->cblk, args->fcblk, 1, B,
235 &( args->sopalin_data->solvmtx->lowrank ),
236 starpu_cuda_get_local_stream() );
242#ifndef DOXYGEN_SHOULD_SKIP_THIS
243CODELETS_CPU( cblk_cadd, 2 );
281 struct cl_cblk_cadd_args_s *cl_arg = NULL;
282#if defined(PASTIX_DEBUG_STARPU)
289 cl_arg = malloc(
sizeof(
struct cl_cblk_cadd_args_s) );
290 cl_arg->sopalin_data = sopalin_data;
291#if defined(PASTIX_STARPU_PROFILING)
292 cl_arg->profile_data.measures = cblk_cadd_profile.measures;
293 cl_arg->profile_data.flops = NAN;
297 cl_arg->fcblk = fcblk;
299#if defined(PASTIX_DEBUG_STARPU)
301 asprintf( &task_name,
"%s( %ld )",
302 cl_cblk_cadd_cpu.name,
303 (
long)(cblk - sopalin_data->solvmtx->cblktab) );
306 assert( cblk->
cblktype & CBLK_RECV );
307 assert( !(fcblk->
cblktype & (CBLK_RECV|CBLK_FANIN)) );
309 pastix_starpu_insert_task(
311 STARPU_CL_ARGS, cl_arg,
sizeof(
struct cl_cblk_cadd_args_s ),
312 STARPU_EXECUTE_ON_NODE, fcblk->
ownerid,
313#
if defined(PASTIX_STARPU_PROFILING)
314 STARPU_CALLBACK_WITH_ARG_NFREE, cblk_cadd_callback, cl_arg,
317 STARPU_RW, fcblk->
handler[side],
318#
if defined(PASTIX_DEBUG_STARPU)
319 STARPU_NAME, task_name,
321#
if defined(PASTIX_STARPU_HETEROPRIO)
322 STARPU_PRIORITY, BucketFacto1D,
324 STARPU_PRIORITY, prio,
363 assert( cblk->
cblktype & CBLK_FANIN );
365 pastix_starpu_insert_task(
367 STARPU_EXECUTE_ON_NODE, cblk->
ownerid,
369#
if defined(PASTIX_STARPU_HETEROPRIO)
370 STARPU_PRIORITY, BucketFacto1D,
372 STARPU_PRIORITY, prio,
BEGIN_C_DECLS typedef int pastix_int_t
@ PastixKernelGEADDCblkFRFR
pastix_fixdbl_t cpucblk_cadd(pastix_complex32_t alpha, const SolverCblk *cblkA, SolverCblk *cblkB, const void *A, void *B, pastix_complex32_t *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Add two column bloks in full rank format.
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
void starpu_task_cblk_cadd_recv(sopalin_data_t *sopalin_data, pastix_coefside_t side, const SolverCblk *cblk, SolverCblk *fcblk, int prio)
Insert the task to add a fanin cblk on the receiver side (The fanin is seen on this side as the RECV ...
static void fct_cblk_cadd_cpu(void *descr[], void *cl_arg)
StarPU CPU implementation.
void starpu_task_cblk_cadd_fanin(sopalin_data_t *sopalin_data, pastix_coefside_t side, const SolverCblk *cblk, int prio)
Insert the task to add a fanin cblk on the emitter side. Note that this task is submitted only to emi...
Base structure to all codelet arguments that include the profiling data.
static double cost(symbol_cblk_t *cblk)
Computes the cost of a cblk.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Solver column block structure.