PaStiX Handbook 6.4.0
Loading...
Searching...
No Matches
codelet_cblk_sadd.c
Go to the documentation of this file.
1/**
2 *
3 * @file codelet_cblk_sadd.c
4 *
5 * StarPU codelet to sum fanin cblk together.
6 *
7 * @copyright 2016-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8 * Univ. Bordeaux. All rights reserved.
9 *
10 * @version 6.4.0
11 * @author Alycia Lisito
12 * @author Mathieu Faverge
13 * @date 2024-07-05
14 *
15 * @generated from /builds/2mk6rsew/0/solverstack/pastix/sopalin/starpu/codelet_cblk_zadd.c, normal z -> s, Tue Feb 25 14:35:22 2025
16 *
17 * @addtogroup pastix_starpu
18 * @{
19 *
20 **/
21#ifndef DOXYGEN_SHOULD_SKIP_THIS
22#define _GNU_SOURCE
23#endif /* DOXYGEN_SHOULD_SKIP_THIS */
24#include "common.h"
25#include "blend/solver.h"
26#include "sopalin/sopalin_data.h"
27#include "pastix_scores.h"
28#if defined(PASTIX_WITH_CUDA)
29#include "pastix_scuda.h"
30#endif
31#include "pastix_starpu.h"
32#include "pastix_sstarpu.h"
33#include "codelets.h"
34
35/**
36 * @brief Main structure for all tasks of cblk_sadd type
37 */
38struct cl_cblk_sadd_args_s {
39 profile_data_t profile_data;
40 sopalin_data_t *sopalin_data;
42 const SolverCblk *cblk;
43 SolverCblk *fcblk;
44};
45
46#if defined( PASTIX_STARPU_PROFILING )
47/**
48 * @brief Functions to profile the codelet
49 *
50 * Two levels of profiling are available:
51 * 1) A generic one that returns the flops per worker
52 * 2) A more detailed one that generate logs of the performance for each kernel
53 */
54starpu_profile_t cblk_sadd_profile = {
55 .next = NULL,
56 .name = "cblk_sadd"
57};
58
59/**
60 * @brief Profiling registration function
61 */
62void cblk_sadd_profile_register( void ) __attribute__( ( constructor ) );
63void
64cblk_sadd_profile_register( void )
65{
66 profiling_register_cl( &cblk_sadd_profile );
67}
68
69#ifndef DOXYGEN_SHOULD_SKIP_THIS
70#if defined(PASTIX_STARPU_PROFILING_LOG)
71static void
72cl_profiling_cb_cblk_sadd( void *callback_arg )
73{
74 cl_profiling_callback( callback_arg );
75
76 struct starpu_task *task = starpu_task_get_current();
77 struct starpu_profiling_task_info *info = task->profiling_info;
78
79 /* Quick return */
80 if ( info == NULL ) {
81 return;
82 }
83
84 struct cl_cblk_sadd_args_s *args = (struct cl_cblk_sadd_args_s *) callback_arg;
85 pastix_fixdbl_t flops = args->profile_data.flops;
86 pastix_fixdbl_t duration = starpu_timing_timespec_delay_us( &info->start_time, &info->end_time );
87 pastix_fixdbl_t speed = flops / ( 1000.0 * duration );
88
89 pastix_int_t M = args->cblk->stride;
90 pastix_int_t N = cblk_colnbr( args->cblk );
91
92 cl_profiling_log_register( task->name, "cblk_sadd", M, N, 0, flops, speed );
93}
94#endif
95
96#if defined(PASTIX_STARPU_PROFILING_LOG)
97static void (*cblk_sadd_callback)(void*) = cl_profiling_cb_cblk_sadd;
98#else
99static void (*cblk_sadd_callback)(void*) = cl_profiling_callback;
100#endif
101#endif /* DOXYGEN_SHOULD_SKIP_THIS */
102
103#endif /* defined( PASTIX_STARPU_PROFILING ) */
104
105#if defined(PASTIX_STARPU_COST_PER_ARCH)
106/**
107 *******************************************************************************
108 *
109 * @brief Cost model function
110 *
111 * The user can switch from the pastix static model to an history based model
112 * computed automatically.
113 *
114 *******************************************************************************
115 *
116 * @param[in] task
117 * TODO
118 *
119 * @param[in] arch
120 * TODO
121 *
122 * @param[in] nimpl
123 * TODO
124 *
125 *******************************************************************************
126 *
127 * @retval TODO
128 *
129 *******************************************************************************/
130static inline pastix_fixdbl_t
131fct_cblk_sadd_cost( struct starpu_task *task,
132 struct starpu_perfmodel_arch *arch,
133 unsigned nimpl )
134{
135 struct cl_cblk_sadd_args_s *args = (struct cl_cblk_sadd_args_s *)(task->cl_arg);
136
138 pastix_fixdbl_t *coefs;
139 pastix_int_t M = args->cblk->stride;
140 pastix_int_t N = cblk_colnbr( args->cblk );
141
142 switch( arch->devices->type ) {
143 case STARPU_CPU_WORKER:
144 coefs = &(args->sopalin_data->cpu_models->coefficients[PastixFloat-2][PastixKernelGEADDCblkFRFR][0]);
145 break;
146 case STARPU_CUDA_WORKER:
147 coefs = &(args->sopalin_data->gpu_models->coefficients[PastixFloat-2][PastixKernelGEADDCblkFRFR][0]);
148 break;
149 default:
150 assert(0);
151 return 0.;
152 }
153
154 /* Get cost in us */
155 cost = modelsGetCost2Param( coefs, M, N ) * 1e6;
156
157 (void)nimpl;
158 return cost;
159}
160#endif
161
162#ifndef DOXYGEN_SHOULD_SKIP_THIS
163static struct starpu_perfmodel starpu_cblk_sadd_model = {
164#if defined( PASTIX_STARPU_COST_PER_ARCH )
165 .type = STARPU_PER_ARCH,
166 .arch_cost_function = fct_cblk_sadd_cost,
167#else
168 .type = STARPU_HISTORY_BASED,
169#endif
170 .symbol = "cblk_sadd",
171};
172#endif /* DOXYGEN_SHOULD_SKIP_THIS */
173
174#if !defined(PASTIX_STARPU_SIMULATION)
175/**
176 *******************************************************************************
177 *
178 * @brief StarPU CPU implementation
179 *
180 *******************************************************************************
181 *
182 * @param[in] descr
183 * TODO
184 *
185 * @param[in] cl_arg
186 * TODO
187 *
188 *******************************************************************************/
189static void
190fct_cblk_sadd_cpu( void *descr[], void *cl_arg )
191{
192 struct cl_cblk_sadd_args_s *args = (struct cl_cblk_sadd_args_s *)cl_arg;
193 const void *A;
194 void *B;
195
196 A = pastix_starpu_cblk_get_ptr( descr[0] );
197 B = pastix_starpu_cblk_get_ptr( descr[1] );
198
199 assert( args->cblk->cblktype & CBLK_LAYOUT_2D );
200 assert( args->fcblk->cblktype & CBLK_LAYOUT_2D );
201
202 args->profile_data.flops = cpucblk_sadd( 1., args->cblk, args->fcblk, A, B, NULL, 0,
203 &( args->sopalin_data->solvmtx->lowrank ) );
204}
205
206#if defined(PASTIX_WITH_CUDA) && 0
207/**
208 *******************************************************************************
209 *
210 * @brief StarPU GPU implementation
211 *
212 *******************************************************************************
213 *
214 * @param[in] descr
215 * TODO
216 *
217 * @param[in] cl_arg
218 * TODO
219 *
220 *******************************************************************************/
221static void
222fct_cblk_sadd_gpu( void *descr[], void *cl_arg )
223{
224 struct cl_template_args_s *args = (struct cl_template_args_s *)cl_arg;
225 const void *A;
226 void *B;
227
228 A = pastix_starpu_cblk_get_ptr( descr[0] );
229 B = pastix_starpu_cblk_get_ptr( descr[1] );
230
231 assert( args->cblk->cblktype & CBLK_TASKS_2D );
232 assert( args->fcblk->cblktype & CBLK_TASKS_2D );
233
234 args->profile_data.flops = gpucblk_sadd( 1., args->cblk, args->fcblk, 1, B,
235 &( args->sopalin_data->solvmtx->lowrank ),
236 starpu_cuda_get_local_stream() );
237
238}
239#endif /* defined(PASTIX_WITH_CUDA) */
240#endif /* !defined(PASTIX_STARPU_SIMULATION) */
241
242#ifndef DOXYGEN_SHOULD_SKIP_THIS
243CODELETS_CPU( cblk_sadd, 2 );
244#endif /* DOXYGEN_SHOULD_SKIP_THIS */
245
246/**
247 *******************************************************************************
248 *
249 * @brief Insert the task to add a fanin cblk on the receiver side (The fanin is
250 * seen on this side as the RECV cblk). Note that the caller always execute the
251 * task.
252 *
253 *******************************************************************************
254 *
255 * @param[in] sopalin_data
256 * Solver matrix information structure that will guide the algorithm.
257 *
258 * @param[in] side
259 * Define which side of the cblk must be tested.
260 * @arg PastixLCoef if lower part only
261 * @arg PastixUCoef if upper part only
262 * @arg PastixLUCoef if both sides.
263 *
264 * @param[in] cblk
265 * The column block of the matrix.
266 *
267 * @param[in] fcblk
268 * The facing column block of the matrix.
269 *
270 * @param[in] prio
271 * The task priority.
272 *
273 *******************************************************************************/
274void
275starpu_task_cblk_sadd_recv( sopalin_data_t *sopalin_data,
277 const SolverCblk *cblk,
278 SolverCblk *fcblk,
279 int prio )
280{
281 struct cl_cblk_sadd_args_s *cl_arg = NULL;
282#if defined(PASTIX_DEBUG_STARPU)
283 char *task_name;
284#endif
285
286 /*
287 * Create the arguments array
288 */
289 cl_arg = malloc( sizeof( struct cl_cblk_sadd_args_s) );
290 cl_arg->sopalin_data = sopalin_data;
291#if defined(PASTIX_STARPU_PROFILING)
292 cl_arg->profile_data.measures = cblk_sadd_profile.measures;
293 cl_arg->profile_data.flops = NAN;
294#endif
295 cl_arg->side = side;
296 cl_arg->cblk = cblk;
297 cl_arg->fcblk = fcblk;
298
299#if defined(PASTIX_DEBUG_STARPU)
300 /* This actually generates a memory leak */
301 asprintf( &task_name, "%s( %ld )",
302 cl_cblk_sadd_cpu.name,
303 (long)(cblk - sopalin_data->solvmtx->cblktab) );
304#endif
305
306 assert( cblk->cblktype & CBLK_RECV );
307 assert( !(fcblk->cblktype & (CBLK_RECV|CBLK_FANIN)) );
308
309 pastix_starpu_insert_task(
310 &cl_cblk_sadd_cpu,
311 STARPU_CL_ARGS, cl_arg, sizeof( struct cl_cblk_sadd_args_s ),
312 STARPU_EXECUTE_ON_NODE, fcblk->ownerid,
313#if defined(PASTIX_STARPU_PROFILING)
314 STARPU_CALLBACK_WITH_ARG_NFREE, cblk_sadd_callback, cl_arg,
315#endif
316 STARPU_R, cblk->handler[side],
317 STARPU_RW, fcblk->handler[side],
318#if defined(PASTIX_DEBUG_STARPU)
319 STARPU_NAME, task_name,
320#endif
321#if defined(PASTIX_STARPU_HETEROPRIO)
322 STARPU_PRIORITY, BucketFacto1D,
323#else
324 STARPU_PRIORITY, prio,
325#endif
326 0);
327
328 (void)prio;
329}
330
331/**
332 *******************************************************************************
333 *
334 * @brief Insert the task to add a fanin cblk on the emitter side. Note that
335 * this task is submitted only to emit a send to the owner of the associated
336 * recv cblk that will perform the add. Thus, the task is always submitted but
337 * never executed.
338 *
339 *******************************************************************************
340 *
341 * @param[in] sopalin_data
342 * Solver matrix information structure that will guide the algorithm.
343 *
344 * @param[in] side
345 * Define which side of the cblk must be tested.
346 * @arg PastixLCoef if lower part only
347 * @arg PastixUCoef if upper part only
348 * @arg PastixLUCoef if both sides.
349 *
350 * @param[in] cblk
351 * The column block of the matrix.
352 *
353 * @param[in] prio
354 * The task priority.
355 *
356 *******************************************************************************/
357void
358starpu_task_cblk_sadd_fanin( sopalin_data_t *sopalin_data,
360 const SolverCblk *cblk,
361 int prio )
362{
363 assert( cblk->cblktype & CBLK_FANIN );
364
365 pastix_starpu_insert_task(
366 NULL,
367 STARPU_EXECUTE_ON_NODE, cblk->ownerid,
368 STARPU_R, cblk->handler[side],
369#if defined(PASTIX_STARPU_HETEROPRIO)
370 STARPU_PRIORITY, BucketFacto1D,
371#else
372 STARPU_PRIORITY, prio,
373#endif
374 0);
375
376 (void)sopalin_data;
377 (void)prio;
378}
379
380/**
381 * @}
382 */
BEGIN_C_DECLS typedef int pastix_int_t
Definition datatypes.h:51
double pastix_fixdbl_t
Definition datatypes.h:65
@ PastixKernelGEADDCblkFRFR
pastix_fixdbl_t cpucblk_sadd(float alpha, const SolverCblk *cblkA, SolverCblk *cblkB, const void *A, void *B, float *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Add two column bloks in full rank format.
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
static void fct_cblk_sadd_cpu(void *descr[], void *cl_arg)
StarPU CPU implementation.
void starpu_task_cblk_sadd_fanin(sopalin_data_t *sopalin_data, pastix_coefside_t side, const SolverCblk *cblk, int prio)
Insert the task to add a fanin cblk on the emitter side. Note that this task is submitted only to emi...
void starpu_task_cblk_sadd_recv(sopalin_data_t *sopalin_data, pastix_coefside_t side, const SolverCblk *cblk, SolverCblk *fcblk, int prio)
Insert the task to add a fanin cblk on the receiver side (The fanin is seen on this side as the RECV ...
Base structure to all codelet arguments that include the profiling data.
static double cost(symbol_cblk_t *cblk)
Computes the cost of a cblk.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition solver.h:329
void * handler[2]
Definition solver.h:179
int8_t cblktype
Definition solver.h:164
Solver column block structure.
Definition solver.h:161