PaStiX Handbook 6.4.0
Loading...
Searching...
No Matches
codelet_cblk_cadd.c
Go to the documentation of this file.
1/**
2 *
3 * @file codelet_cblk_cadd.c
4 *
5 * StarPU codelet to sum fanin cblk together.
6 *
7 * @copyright 2016-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8 * Univ. Bordeaux. All rights reserved.
9 *
10 * @version 6.4.0
11 * @author Alycia Lisito
12 * @author Mathieu Faverge
13 * @date 2024-07-05
14 *
15 * @generated from /builds/7cspzRFxD/0/solverstack/pastix/sopalin/starpu/codelet_cblk_zadd.c, normal z -> c, Wed May 20 09:17:58 2026
16 *
17 * @addtogroup pastix_starpu
18 * @{
19 *
20 **/
21#ifndef DOXYGEN_SHOULD_SKIP_THIS
22#define _GNU_SOURCE
23#endif /* DOXYGEN_SHOULD_SKIP_THIS */
24#include "common.h"
25#include "blend/solver.h"
26#include "sopalin/sopalin_data.h"
27#include "pastix_ccores.h"
28#if defined(PASTIX_WITH_CUDA)
29#include "pastix_ccuda.h"
30#endif
31#include "pastix_starpu.h"
32#include "pastix_cstarpu.h"
33#include "codelets.h"
34
35/**
36 * @brief Main structure for all tasks of cblk_cadd type
37 */
38struct cl_cblk_cadd_args_s {
39 profile_data_t profile_data;
40 sopalin_data_t *sopalin_data;
42 const SolverCblk *cblk;
43 SolverCblk *fcblk;
44};
45
46#if defined( PASTIX_STARPU_PROFILING )
47/**
48 * @brief Functions to profile the codelet
49 *
50 * Two levels of profiling are available:
51 * 1) A generic one that returns the flops per worker
52 * 2) A more detailed one that generate logs of the performance for each kernel
53 */
54starpu_profile_t cblk_cadd_profile = {
55 .next = NULL,
56 .name = "cblk_cadd"
57};
58
59/**
60 * @brief Profiling registration function
61 */
62void cblk_cadd_profile_register( void ) __attribute__( ( constructor ) );
63void
64cblk_cadd_profile_register( void )
65{
66 profiling_register_cl( &cblk_cadd_profile );
67}
68
69#ifndef DOXYGEN_SHOULD_SKIP_THIS
70#if defined(PASTIX_STARPU_PROFILING_LOG)
71static void
72cl_profiling_cb_cblk_cadd( void *callback_arg )
73{
74 cl_profiling_callback( callback_arg );
75
76 struct starpu_task *task = starpu_task_get_current();
77 struct starpu_profiling_task_info *info = task->profiling_info;
78
79 /* Quick return */
80 if ( info == NULL ) {
81 return;
82 }
83
84 struct cl_cblk_cadd_args_s *args = (struct cl_cblk_cadd_args_s *) callback_arg;
85 pastix_fixdbl_t flops = args->profile_data.flops;
86 pastix_fixdbl_t duration = starpu_timing_timespec_delay_us( &info->start_time, &info->end_time );
87 pastix_fixdbl_t speed = flops / ( 1000.0 * duration );
88
89 pastix_int_t M = args->cblk->stride;
90 pastix_int_t N = cblk_colnbr( args->cblk );
91
92 cl_profiling_log_register( task->name, "cblk_cadd", M, N, 0, flops, speed );
93}
94#endif
95
96#if defined(PASTIX_STARPU_PROFILING_LOG)
97static void (*cblk_cadd_callback)(void*) = cl_profiling_cb_cblk_cadd;
98#else
99static void (*cblk_cadd_callback)(void*) = cl_profiling_callback;
100#endif
101#endif /* DOXYGEN_SHOULD_SKIP_THIS */
102
103#endif /* defined( PASTIX_STARPU_PROFILING ) */
104
105#if defined(PASTIX_STARPU_COST_PER_ARCH)
106/**
107 *******************************************************************************
108 *
109 * @brief Cost model function
110 *
111 * The user can switch from the pastix static model to an history based model
112 * computed automatically.
113 *
114 *******************************************************************************
115 *
116 * @param[in] task
117 * TODO
118 *
119 * @param[in] arch
120 * TODO
121 *
122 * @param[in] nimpl
123 * TODO
124 *
125 *******************************************************************************
126 *
127 * @retval TODO
128 *
129 *******************************************************************************/
130static inline pastix_fixdbl_t
131fct_cblk_cadd_cost( struct starpu_task *task,
132 struct starpu_perfmodel_arch *arch,
133 unsigned nimpl )
134{
135 struct cl_cblk_cadd_args_s *args = (struct cl_cblk_cadd_args_s *)(task->cl_arg);
136
138 pastix_fixdbl_t *coefs;
139 pastix_int_t M = args->cblk->stride;
140 pastix_int_t N = cblk_colnbr( args->cblk );
141
142 switch( arch->devices->type ) {
143 case STARPU_CPU_WORKER:
144 coefs = &(args->sopalin_data->cpu_models->coefficients[PastixComplex32-2][PastixKernelGEADDCblkFRFR][0]);
145 break;
146 case STARPU_CUDA_WORKER:
147 coefs = &(args->sopalin_data->gpu_models->coefficients[PastixComplex32-2][PastixKernelGEADDCblkFRFR][0]);
148 break;
149 default:
150 assert(0);
151 return 0.;
152 }
153
154 /* Get cost in us */
155 cost = modelsGetCost2Param( coefs, M, N ) * 1e6;
156
157 (void)nimpl;
158 return cost;
159}
160#endif
161
162#ifndef DOXYGEN_SHOULD_SKIP_THIS
163static struct starpu_perfmodel starpu_cblk_cadd_model = {
164#if defined( PASTIX_STARPU_COST_PER_ARCH )
165 .type = STARPU_PER_ARCH,
166 .arch_cost_function = fct_cblk_cadd_cost,
167#else
168 .type = STARPU_HISTORY_BASED,
169#endif
170 .symbol = "cblk_cadd",
171};
172#endif /* DOXYGEN_SHOULD_SKIP_THIS */
173
174#if !defined(PASTIX_STARPU_SIMULATION)
175/**
176 *******************************************************************************
177 *
178 * @brief StarPU CPU implementation
179 *
180 *******************************************************************************
181 *
182 * @param[in] descr
183 * TODO
184 *
185 * @param[in] cl_arg
186 * TODO
187 *
188 *******************************************************************************/
189static void
190fct_cblk_cadd_cpu( void *descr[], void *cl_arg )
191{
192 struct cl_cblk_cadd_args_s *args = (struct cl_cblk_cadd_args_s *)cl_arg;
193 const void *A;
194 void *B;
195
196 A = pastix_starpu_cblk_get_ptr( descr[0] );
197 B = pastix_starpu_cblk_get_ptr( descr[1] );
198
199 assert( args->cblk->cblktype & CBLK_LAYOUT_2D );
200 assert( args->fcblk->cblktype & CBLK_LAYOUT_2D );
201
202 args->profile_data.flops = cpucblk_cadd( 1., args->cblk, args->fcblk, A, B, NULL, 0,
203 &( args->sopalin_data->solvmtx->lowrank ) );
204}
205
206#if defined(PASTIX_WITH_CUDA) && 0
207/**
208 *******************************************************************************
209 *
210 * @brief StarPU GPU implementation
211 *
212 *******************************************************************************
213 *
214 * @param[in] descr
215 * TODO
216 *
217 * @param[in] cl_arg
218 * TODO
219 *
220 *******************************************************************************/
221static void
222fct_cblk_cadd_gpu( void *descr[], void *cl_arg )
223{
224 struct cl_template_args_s *args = (struct cl_template_args_s *)cl_arg;
225 const void *A;
226 void *B;
227
228 A = pastix_starpu_cblk_get_ptr( descr[0] );
229 B = pastix_starpu_cblk_get_ptr( descr[1] );
230
231 assert( args->cblk->cblktype & CBLK_TASKS_2D );
232 assert( args->fcblk->cblktype & CBLK_TASKS_2D );
233
234 args->profile_data.flops = gpucblk_cadd( 1., args->cblk, args->fcblk, 1, B,
235 &( args->sopalin_data->solvmtx->lowrank ),
236 starpu_cuda_get_local_stream() );
237
238}
239#endif /* defined(PASTIX_WITH_CUDA) */
240#endif /* !defined(PASTIX_STARPU_SIMULATION) */
241
242#ifndef DOXYGEN_SHOULD_SKIP_THIS
243CODELETS_CPU( cblk_cadd, 2 );
244#endif /* DOXYGEN_SHOULD_SKIP_THIS */
245
246/**
247 *******************************************************************************
248 *
249 * @brief Insert the task to add a fanin cblk on the receiver side (The fanin is
250 * seen on this side as the RECV cblk). Note that the caller always execute the
251 * task.
252 *
253 *******************************************************************************
254 *
255 * @param[in] sopalin_data
256 * Solver matrix information structure that will guide the algorithm.
257 *
258 * @param[in] side
259 * Define which side of the cblk must be tested.
260 * @arg PastixLCoef if lower part only
261 * @arg PastixUCoef if upper part only
262 * @arg PastixLUCoef if both sides.
263 *
264 * @param[in] cblk
265 * The column block of the matrix.
266 *
267 * @param[in] fcblk
268 * The facing column block of the matrix.
269 *
270 * @param[in] prio
271 * The task priority.
272 *
273 *******************************************************************************/
274void
275starpu_task_cblk_cadd_recv( sopalin_data_t *sopalin_data,
277 const SolverCblk *cblk,
278 SolverCblk *fcblk,
279 int prio )
280{
281 struct cl_cblk_cadd_args_s *cl_arg = NULL;
282#if defined(PASTIX_DEBUG_STARPU)
283 char *task_name;
284#endif
285
286 /*
287 * Create the arguments array
288 */
289 cl_arg = malloc( sizeof( struct cl_cblk_cadd_args_s) );
290 cl_arg->sopalin_data = sopalin_data;
291#if defined(PASTIX_STARPU_PROFILING)
292 cl_arg->profile_data.measures = cblk_cadd_profile.measures;
293 cl_arg->profile_data.flops = NAN;
294#endif
295 cl_arg->side = side;
296 cl_arg->cblk = cblk;
297 cl_arg->fcblk = fcblk;
298
299#if defined(PASTIX_DEBUG_STARPU)
300 /* This actually generates a memory leak */
301 asprintf( &task_name, "%s( %ld )",
302 cl_cblk_cadd_cpu.name,
303 (long)(cblk - sopalin_data->solvmtx->cblktab) );
304#endif
305
306 assert( cblk->cblktype & CBLK_RECV );
307 assert( !(fcblk->cblktype & (CBLK_RECV|CBLK_FANIN)) );
308
309 pastix_starpu_insert_task(
310 &cl_cblk_cadd_cpu,
311 STARPU_CL_ARGS, cl_arg, sizeof( struct cl_cblk_cadd_args_s ),
312 STARPU_EXECUTE_ON_NODE, fcblk->ownerid,
313#if defined(PASTIX_STARPU_PROFILING)
314 STARPU_CALLBACK_WITH_ARG_NFREE, cblk_cadd_callback, cl_arg,
315#endif
316 STARPU_R, cblk->handler[side],
317 STARPU_RW, fcblk->handler[side],
318#if defined(PASTIX_DEBUG_STARPU)
319 STARPU_NAME, task_name,
320#endif
321#if defined(PASTIX_STARPU_HETEROPRIO)
322 STARPU_PRIORITY, BucketFacto1D,
323#else
324 STARPU_PRIORITY, prio,
325#endif
326 0);
327
328 (void)prio;
329}
330
331/**
332 *******************************************************************************
333 *
334 * @brief Insert the task to add a fanin cblk on the emitter side. Note that
335 * this task is submitted only to emit a send to the owner of the associated
336 * recv cblk that will perform the add. Thus, the task is always submitted but
337 * never executed.
338 *
339 *******************************************************************************
340 *
341 * @param[in] sopalin_data
342 * Solver matrix information structure that will guide the algorithm.
343 *
344 * @param[in] side
345 * Define which side of the cblk must be tested.
346 * @arg PastixLCoef if lower part only
347 * @arg PastixUCoef if upper part only
348 * @arg PastixLUCoef if both sides.
349 *
350 * @param[in] cblk
351 * The column block of the matrix.
352 *
353 * @param[in] prio
354 * The task priority.
355 *
356 *******************************************************************************/
357void
358starpu_task_cblk_cadd_fanin( sopalin_data_t *sopalin_data,
360 const SolverCblk *cblk,
361 int prio )
362{
363 assert( cblk->cblktype & CBLK_FANIN );
364
365 pastix_starpu_insert_task(
366 NULL,
367 STARPU_EXECUTE_ON_NODE, cblk->ownerid,
368 STARPU_R, cblk->handler[side],
369#if defined(PASTIX_STARPU_HETEROPRIO)
370 STARPU_PRIORITY, BucketFacto1D,
371#else
372 STARPU_PRIORITY, prio,
373#endif
374 0);
375
376 (void)sopalin_data;
377 (void)prio;
378}
379
380/**
381 * @}
382 */
BEGIN_C_DECLS typedef int pastix_int_t
Definition datatypes.h:51
double pastix_fixdbl_t
Definition datatypes.h:65
@ PastixKernelGEADDCblkFRFR
pastix_fixdbl_t cpucblk_cadd(pastix_complex32_t alpha, const SolverCblk *cblkA, SolverCblk *cblkB, const void *A, void *B, pastix_complex32_t *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Add two column bloks in full rank format.
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
void starpu_task_cblk_cadd_recv(sopalin_data_t *sopalin_data, pastix_coefside_t side, const SolverCblk *cblk, SolverCblk *fcblk, int prio)
Insert the task to add a fanin cblk on the receiver side (The fanin is seen on this side as the RECV ...
static void fct_cblk_cadd_cpu(void *descr[], void *cl_arg)
StarPU CPU implementation.
void starpu_task_cblk_cadd_fanin(sopalin_data_t *sopalin_data, pastix_coefside_t side, const SolverCblk *cblk, int prio)
Insert the task to add a fanin cblk on the emitter side. Note that this task is submitted only to emi...
Base structure to all codelet arguments that include the profiling data.
static double cost(symbol_cblk_t *cblk)
Computes the cost of a cblk.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition solver.h:329
void * handler[2]
Definition solver.h:179
int8_t cblktype
Definition solver.h:164
Solver column block structure.
Definition solver.h:161