PaStiX Handbook 6.4.0
Loading...
Searching...
No Matches
codelet_blok_strsmsp.c
Go to the documentation of this file.
1/**
2 *
3 * @file codelet_blok_strsmsp.c
4 *
5 * StarPU codelets for blas-like functions
6 *
7 * @copyright 2016-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8 * Univ. Bordeaux. All rights reserved.
9 *
10 * @version 6.4.0
11 * @author Mathieu Faverge
12 * @author Pierre Ramet
13 * @author Ian Masliah
14 * @author Tom Moenne-Loccoz
15 * @date 2024-07-05
16 *
17 * @generated from /builds/2mk6rsew/0/solverstack/pastix/sopalin/starpu/codelet_blok_ztrsmsp.c, normal z -> s, Tue Feb 25 14:35:20 2025
18 *
19 * @addtogroup pastix_starpu
20 * @{
21 *
22 **/
23#ifndef DOXYGEN_SHOULD_SKIP_THIS
24#define _GNU_SOURCE
25#endif /* DOXYGEN_SHOULD_SKIP_THIS */
26#include "common.h"
27#include "blend/solver.h"
28#include "sopalin/sopalin_data.h"
29#include "pastix_scores.h"
30#if defined(PASTIX_WITH_CUDA)
31#include "pastix_scuda.h"
32#endif
33#include "pastix_starpu.h"
34#include "pastix_sstarpu.h"
35#include "codelets.h"
36
37/**
38 * @brief Main structure for all tasks of blok_strsmsp type
39 */
40struct cl_blok_strsmsp_args_s {
41 profile_data_t profile_data;
42 sopalin_data_t *sopalin_data;
43 pastix_side_t side;
44 pastix_uplo_t uplo;
45 pastix_trans_t trans;
46 pastix_diag_t diag;
47 const SolverCblk *cblk;
48 pastix_int_t blok_m;
49};
50
51#if defined( PASTIX_STARPU_PROFILING )
52/**
53 * @brief Functions to profile the codelet
54 *
55 * Two levels of profiling are available:
56 * 1) A generic one that returns the flops per worker
57 * 2) A more detailed one that generate logs of the performance for each kernel
58 */
59starpu_profile_t blok_strsmsp_profile = {
60 .next = NULL,
61 .name = "blok_strsmsp"
62};
63
64/**
65 * @brief Profiling registration function
66 */
67void blok_strsmsp_profile_register( void ) __attribute__( ( constructor ) );
68void
69blok_strsmsp_profile_register( void )
70{
71 profiling_register_cl( &blok_strsmsp_profile );
72}
73
74#ifndef DOXYGEN_SHOULD_SKIP_THIS
75#if defined(PASTIX_STARPU_PROFILING_LOG)
76static void
77cl_profiling_cb_blok_strsmsp( void *callback_arg )
78{
79 cl_profiling_callback( callback_arg );
80
81 struct starpu_task *task = starpu_task_get_current();
82 struct starpu_profiling_task_info *info = task->profiling_info;
83
84 /* Quick return */
85 if ( info == NULL ) {
86 return;
87 }
88
89 struct cl_blok_strsmsp_args_s *args = (struct cl_blok_strsmsp_args_s *) callback_arg;
90 pastix_fixdbl_t flops = args->profile_data.flops;
91 pastix_fixdbl_t duration = starpu_timing_timespec_delay_us( &info->start_time, &info->end_time );
92 pastix_fixdbl_t speed = flops / ( 1000.0 * duration );
93
94 pastix_int_t M = blok_rownbr_ext( args->cblk->fblokptr + args->blok_m );
95 pastix_int_t N = cblk_colnbr( args->cblk );
96
97 cl_profiling_log_register( task->name, "blok_strsmsp", M, N, 0, flops, speed );
98}
99#endif
100
101#if defined(PASTIX_STARPU_PROFILING_LOG)
102static void (*blok_strsmsp_callback)(void*) = cl_profiling_cb_blok_strsmsp;
103#else
104static void (*blok_strsmsp_callback)(void*) = cl_profiling_callback;
105#endif
106#endif /* DOXYGEN_SHOULD_SKIP_THIS */
107
108#endif /* defined( PASTIX_STARPU_PROFILING ) */
109
110#if defined(PASTIX_STARPU_COST_PER_ARCH)
111/**
112 *******************************************************************************
113 *
114 * @brief Cost model function
115 *
116 * The user can switch from the pastix static model to an history based model
117 * computed automatically.
118 *
119 *******************************************************************************
120 *
121 * @param[in] task
122 * TODO
123 *
124 * @param[in] arch
125 * TODO
126 *
127 * @param[in] nimpl
128 * TODO
129 *
130 *******************************************************************************
131 *
132 * @retval TODO
133 *
134 *******************************************************************************/
135static inline pastix_fixdbl_t
136fct_blok_strsmsp_cost( struct starpu_task *task,
137 struct starpu_perfmodel_arch *arch,
138 unsigned nimpl )
139{
140 struct cl_blok_strsmsp_args_s *args = (struct cl_blok_strsmsp_args_s *)(task->cl_arg);
141
143 pastix_fixdbl_t *coefs;
144 pastix_int_t M = blok_rownbr_ext( args->cblk->fblokptr + args->blok_m );
145 pastix_int_t N = cblk_colnbr( args->cblk );
146
147 switch( arch->devices->type ) {
148 case STARPU_CPU_WORKER:
149 coefs = &(args->sopalin_data->cpu_models->coefficients[PastixFloat-2][PastixKernelTRSMBlok2d][0]);
150 break;
151 case STARPU_CUDA_WORKER:
152 coefs = &(args->sopalin_data->gpu_models->coefficients[PastixFloat-2][PastixKernelTRSMBlok2d][0]);
153 break;
154 default:
155 assert(0);
156 return 0.;
157 }
158
159 /* Get cost in us */
160 cost = modelsGetCost2Param( coefs, M, N ) * 1e6;
161
162 (void)nimpl;
163 return cost;
164}
165#endif
166
167#ifndef DOXYGEN_SHOULD_SKIP_THIS
168static struct starpu_perfmodel starpu_blok_strsmsp_model = {
169#if defined(PASTIX_STARPU_COST_PER_ARCH)
170 .type = STARPU_PER_ARCH,
171 .arch_cost_function = fct_blok_strsmsp_cost,
172#else
173 .type = STARPU_HISTORY_BASED,
174#endif
175 .symbol = "blok_strsmsp",
176};
177#endif /* DOXYGEN_SHOULD_SKIP_THIS */
178
179#if !defined(PASTIX_STARPU_SIMULATION)
180/**
181 *******************************************************************************
182 *
183 * @brief StarPU CPU implementation
184 *
185 *******************************************************************************
186 *
187 * @param[in] descr
188 * TODO
189 *
190 * @param[in] cl_arg
191 * TODO
192 *
193 *******************************************************************************/
194static void
195fct_blok_strsmsp_cpu( void *descr[], void *cl_arg )
196{
197 struct cl_blok_strsmsp_args_s *args = (struct cl_blok_strsmsp_args_s *)cl_arg;
198 const void *A;
199 void *C;
200
201 A = pastix_starpu_blok_get_ptr( descr[0] );
202 C = pastix_starpu_blok_get_ptr( descr[1] );
203
204 assert( args->cblk->cblktype & CBLK_TASKS_2D );
205
206 args->profile_data.flops = cpublok_strsmsp( args->side, args->uplo,
207 args->trans, args->diag,
208 args->cblk, args->blok_m, A, C,
209 &(args->sopalin_data->solvmtx->lowrank) );
210}
211
212/**
213 * @brief StarPU GPU implementation
214 */
215#if defined(PASTIX_WITH_CUDA)
216static void
217fct_blok_strsmsp_gpu( void *descr[], void *cl_arg )
218{
219 struct cl_blok_strsmsp_args_s *args = (struct cl_blok_strsmsp_args_s *)cl_arg;
220 const void *A;
221 void *C;
222
223 A = pastix_starpu_blok_get_ptr( descr[0] );
224 C = pastix_starpu_blok_get_ptr( descr[1] );
225
226 assert( args->cblk->cblktype & CBLK_TASKS_2D );
227
228 args->profile_data.flops = gpublok_strsmsp( args->side, args->uplo,
229 args->trans, args->diag,
230 args->cblk, args->blok_m, A, C,
231 &(args->sopalin_data->solvmtx->lowrank),
232 starpu_cuda_get_local_stream() );
233}
234#endif /* defined(PASTIX_WITH_CUDA) */
235#endif /* !defined(PASTIX_STARPU_SIMULATION) */
236
237#ifndef DOXYGEN_SHOULD_SKIP_THIS
238CODELETS_GPU( blok_strsmsp, 2, STARPU_CUDA_ASYNC );
239#endif /* DOXYGEN_SHOULD_SKIP_THIS */
240
241/**
242 *******************************************************************************
243 *
244 * @brief TODO
245 *
246 *******************************************************************************
247 *
248 * @param[in] sopalin_data
249 * TODO
250 *
251 * @param[in] coef
252 * TODO
253 *
254 * @param[in] side
255 * TODO
256 *
257 * @param[in] uplo
258 * TODO
259 *
260 * @param[in] trans
261 * TODO
262 *
263 * @param[in] diag
264 * TODO
265 *
266 * @param[in] cblk
267 * TODO
268 *
269 * @param[in] blok
270 * TODO
271 *
272 * @param[in] prio
273 * TODO
274 *
275 *******************************************************************************/
276void
277starpu_task_blok_strsmsp( sopalin_data_t *sopalin_data,
279 pastix_side_t side,
280 pastix_uplo_t uplo,
281 pastix_trans_t trans,
282 pastix_diag_t diag,
283 const SolverCblk *cblk,
284 SolverBlok *blok,
285 int prio )
286{
287 struct cl_blok_strsmsp_args_s *cl_arg = NULL;
288 long long execute_where = cl_blok_strsmsp_any.where;
289 int need_exec = 1;
290#if defined(PASTIX_DEBUG_STARPU) || defined(PASTIX_STARPU_PROFILING_LOG)
291 char *task_name;
292#endif
293
294 pastix_int_t blok_m = blok - cblk->fblokptr;
295
296 /*
297 * Check if it needs to be submitted
298 */
299#if defined(PASTIX_WITH_MPI)
300 {
301 int need_submit = 0;
302 if ( cblk->ownerid == sopalin_data->solvmtx->clustnum ) {
303 need_submit = 1;
304 }
305 else {
306 need_exec = 0;
307 }
308 if ( starpu_mpi_cached_receive( blok->handler[coef] ) ) {
309 need_submit = 1;
310 }
311 if ( !need_submit ) {
312 return;
313 }
314 }
315#endif
316
317 /*
318 * Create the arguments array
319 */
320 if ( need_exec ) {
321 cl_arg = malloc( sizeof( struct cl_blok_strsmsp_args_s ) );
322 cl_arg->sopalin_data = sopalin_data;
323#if defined(PASTIX_STARPU_PROFILING)
324 cl_arg->profile_data.measures = blok_strsmsp_profile.measures;
325 cl_arg->profile_data.flops = NAN;
326#endif
327 cl_arg->side = side;
328 cl_arg->uplo = uplo;
329 cl_arg->trans = trans;
330 cl_arg->diag = diag;
331 cl_arg->cblk = cblk;
332 cl_arg->blok_m = blok_m;
333
334#if defined(PASTIX_WITH_CUDA)
335 if ( (cblk->cblktype & CBLK_COMPRESSED) ) {
336 execute_where &= (~STARPU_CUDA);
337 }
338#endif
339 }
340
341#if defined(PASTIX_DEBUG_STARPU) || defined(PASTIX_STARPU_PROFILING_LOG)
342 /* This actually generates a memory leak */
343 asprintf( &task_name, "%s( %ld, %ld, %ld )",
344 cl_blok_strsmsp_any.name,
345 (long)(cblk - sopalin_data->solvmtx->cblktab),
346 (long)(blok - sopalin_data->solvmtx->bloktab),
347 (long)coef );
348#endif
349
350 pastix_starpu_insert_task(
351 &cl_blok_strsmsp_any,
352 STARPU_CL_ARGS, cl_arg, sizeof( struct cl_blok_strsmsp_args_s ),
353 STARPU_EXECUTE_WHERE, execute_where,
354#if defined(PASTIX_STARPU_PROFILING)
355 STARPU_CALLBACK_WITH_ARG_NFREE, blok_strsmsp_callback, cl_arg,
356#endif
357 STARPU_R, cblk->fblokptr->handler[coef],
358 STARPU_RW, blok->handler[coef],
359#if defined(PASTIX_DEBUG_STARPU) || defined(PASTIX_STARPU_PROFILING_LOG)
360 STARPU_NAME, task_name,
361#endif
362#if defined(PASTIX_STARPU_HETEROPRIO)
363 STARPU_PRIORITY, BucketTRSM2D,
364#else
365 STARPU_PRIORITY, prio,
366#endif
367 0);
368 (void)prio;
369}
370
371/**
372 * @}
373 */
BEGIN_C_DECLS typedef int pastix_int_t
Definition datatypes.h:51
double pastix_fixdbl_t
Definition datatypes.h:65
@ PastixKernelTRSMBlok2d
pastix_fixdbl_t cpublok_strsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, pastix_int_t blok_m, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
enum pastix_diag_e pastix_diag_t
Diagonal.
enum pastix_uplo_e pastix_uplo_t
Upper/Lower part.
enum pastix_side_e pastix_side_t
Side of the operation.
enum pastix_trans_e pastix_trans_t
Transpostion.
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
static void fct_blok_strsmsp_cpu(void *descr[], void *cl_arg)
StarPU CPU implementation.
void starpu_task_blok_strsmsp(sopalin_data_t *sopalin_data, pastix_coefside_t coef, pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, SolverBlok *blok, int prio)
StarPU GPU implementation.
Base structure to all codelet arguments that include the profiling data.
static double cost(symbol_cblk_t *cblk)
Computes the cost of a cblk.
static pastix_int_t blok_rownbr_ext(const SolverBlok *blok)
Compute the number of rows of a contiguous block in front of the same cblk.
Definition solver.h:407
void * handler[2]
Definition solver.h:142
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition solver.h:329
SolverBlok * fblokptr
Definition solver.h:168
int8_t cblktype
Definition solver.h:164
Solver block structure.
Definition solver.h:141
Solver column block structure.
Definition solver.h:161