PaStiX Handbook 6.4.0
Loading...
Searching...
No Matches
codelet_blok_sgetrfsp.c
Go to the documentation of this file.
1/**
2 *
3 * @file codelet_blok_sgetrfsp.c
4 *
5 * StarPU codelets for LU functions
6 *
7 * @copyright 2016-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8 * Univ. Bordeaux. All rights reserved.
9 *
10 * @version 6.4.0
11 * @author Mathieu Faverge
12 * @author Pierre Ramet
13 * @author Ian Masliah
14 * @author Tom Moenne-Loccoz
15 * @date 2024-07-05
16 *
17 * @generated from /builds/2mk6rsew/0/solverstack/pastix/sopalin/starpu/codelet_blok_zgetrfsp.c, normal z -> s, Tue Feb 25 14:35:20 2025
18 *
19 * @addtogroup pastix_starpu
20 * @{
21 *
22 **/
23#ifndef DOXYGEN_SHOULD_SKIP_THIS
24#define _GNU_SOURCE
25#endif /* DOXYGEN_SHOULD_SKIP_THIS */
26#include "common.h"
27#include "blend/solver.h"
28#include "sopalin/sopalin_data.h"
29#include "pastix_scores.h"
30#include "pastix_starpu.h"
31#include "pastix_sstarpu.h"
32#include "codelets.h"
33
34/**
35 * @brief Main structure for all tasks of blok_sgetrfsp type
36 */
37struct cl_blok_sgetrfsp_args_s {
38 profile_data_t profile_data;
39 sopalin_data_t *sopalin_data;
40 SolverCblk *cblk;
41};
42
43#if defined(PASTIX_STARPU_PROFILING)
44/**
45 * @brief Functions to profile the codelet
46 *
47 * Two levels of profiling are available:
48 * 1) A generic one that returns the flops per worker
49 * 2) A more detailed one that generate logs of the performance for each kernel
50 */
51starpu_profile_t blok_sgetrfsp_profile = {
52 .next = NULL,
53 .name = "blok_sgetrfsp"
54};
55
56/**
57 * @brief Profiling registration function
58 */
59void blok_sgetrfsp_profile_register( void ) __attribute__( ( constructor ) );
60void
61blok_sgetrfsp_profile_register( void )
62{
63 profiling_register_cl( &blok_sgetrfsp_profile );
64}
65
66#ifndef DOXYGEN_SHOULD_SKIP_THIS
67#if defined(PASTIX_STARPU_PROFILING_LOG)
68static void
69cl_profiling_cb_blok_sgetrfsp( void *callback_arg )
70{
71 cl_profiling_callback( callback_arg );
72
73 struct starpu_task *task = starpu_task_get_current();
74 struct starpu_profiling_task_info *info = task->profiling_info;
75
76 /* Quick return */
77 if ( info == NULL ) {
78 return;
79 }
80
81 struct cl_blok_sgetrfsp_args_s *args = (struct cl_blok_sgetrfsp_args_s *) callback_arg;
82 pastix_fixdbl_t flops = args->profile_data.flops;
83 pastix_fixdbl_t duration = starpu_timing_timespec_delay_us( &info->start_time, &info->end_time );
84 pastix_fixdbl_t speed = flops / ( 1000.0 * duration );
85
86 pastix_int_t N = cblk_colnbr( args->cblk );
87
88 cl_profiling_log_register( task->name, "blok_sgetrfsp", N, 0, 0, flops, speed );
89}
90#endif
91
92#if defined(PASTIX_STARPU_PROFILING_LOG)
93static void (*blok_sgetrfsp_callback)(void*) = cl_profiling_cb_blok_sgetrfsp;
94#else
95static void (*blok_sgetrfsp_callback)(void*) = cl_profiling_callback;
96#endif
97#endif /* DOXYGEN_SHOULD_SKIP_THIS */
98
99#endif /* defined(PASTIX_STARPU_PROFILING) */
100
101#if defined(PASTIX_STARPU_COST_PER_ARCH)
102/**
103 *******************************************************************************
104 *
105 * @brief Cost model function
106 *
107 * The user can switch from the pastix static model to an history based model
108 * computed automatically.
109 *
110 *******************************************************************************
111 *
112 * @param[in] task
113 * TODO
114 *
115 * @param[in] arch
116 * TODO
117 *
118 * @param[in] nimpl
119 * TODO
120 *
121 *******************************************************************************
122 *
123 * @retval TODO
124 *
125 *******************************************************************************/
126static inline pastix_fixdbl_t
127fct_blok_sgetrfsp_cost( struct starpu_task *task,
128 struct starpu_perfmodel_arch *arch,
129 unsigned nimpl )
130{
131 struct cl_blok_sgetrfsp_args_s *args = (struct cl_blok_sgetrfsp_args_s *)(task->cl_arg);
132
134 pastix_fixdbl_t *coefs;
135 pastix_int_t N = cblk_colnbr( args->cblk );
136
137 switch( arch->devices->type ) {
138 case STARPU_CPU_WORKER:
139 coefs = &(args->sopalin_data->cpu_models->coefficients[PastixFloat-2][PastixKernelGETRF][0]);
140 break;
141 case STARPU_CUDA_WORKER:
142 coefs = &(args->sopalin_data->gpu_models->coefficients[PastixFloat-2][PastixKernelGETRF][0]);
143 break;
144 default:
145 assert(0);
146 return 0.;
147 }
148
149 /* Get cost in us */
150 cost = modelsGetCost1Param( coefs, N );
151
152 (void)nimpl;
153 return cost;
154}
155#endif
156
157#ifndef DOXYGEN_SHOULD_SKIP_THIS
158static struct starpu_perfmodel starpu_blok_sgetrfsp_model = {
159#if defined(PASTIX_STARPU_COST_PER_ARCH)
160 .type = STARPU_PER_ARCH,
161 .arch_cost_function = fct_blok_sgetrfsp_cost,
162#else
163 .type = STARPU_HISTORY_BASED,
164#endif
165 .symbol = "blok_sgetrfsp",
166};
167#endif /* DOXYGEN_SHOULD_SKIP_THIS */
168
169
170#if !defined(PASTIX_STARPU_SIMULATION)
171/**
172 *******************************************************************************
173 *
174 * @brief StarPU CPU implementation
175 *
176 *******************************************************************************
177 *
178 * @param[in] descr
179 * TODO
180 *
181 * @param[in] cl_arg
182 * TODO
183 *
184 *******************************************************************************/
185static void
186fct_blok_sgetrfsp_cpu( void *descr[], void *cl_arg )
187{
188 struct cl_blok_sgetrfsp_args_s *args = (struct cl_blok_sgetrfsp_args_s *)cl_arg;
189 void *L, *U;
190
191 L = pastix_starpu_blok_get_ptr( descr[0] );
192 U = pastix_starpu_blok_get_ptr( descr[1] );
193
194 assert( args->cblk->cblktype & CBLK_TASKS_2D );
195
196 cpucblk_sgetrfsp1d_getrf( args->sopalin_data->solvmtx, args->cblk, L, U );
197}
198#endif /* !defined(PASTIX_STARPU_SIMULATION) */
199
200#ifndef DOXYGEN_SHOULD_SKIP_THIS
201CODELETS_CPU( blok_sgetrfsp, 2 );
202#endif /* DOXYGEN_SHOULD_SKIP_THIS */
203
204/**
205 *******************************************************************************
206 *
207 * @brief TODO
208 *
209 *******************************************************************************
210 *
211 * @param[in] sopalin_data
212 * TODO
213 *
214 * @param[in] cblk
215 * TODO
216 *
217 * @param[in] prio
218 * TODO
219 *
220 *******************************************************************************/
221void
222starpu_task_blok_sgetrf( sopalin_data_t *sopalin_data,
223 SolverCblk *cblk,
224 int prio )
225{
226 struct cl_blok_sgetrfsp_args_s *cl_arg = NULL;
227 int need_exec = 1;
228#if defined(PASTIX_DEBUG_STARPU)
229 char *task_name;
230#endif
231
232 /*
233 * Check if it needs to be submitted
234 */
235#if defined(PASTIX_WITH_MPI)
236 {
237 int need_submit = 0;
238 if ( cblk->ownerid == sopalin_data->solvmtx->clustnum ) {
239 need_submit = 1;
240 }
241 else {
242 need_exec = 0;
243 }
244 if ( starpu_mpi_cached_receive( cblk->fblokptr->handler[0] ) ) {
245 need_submit = 1;
246 }
247 if ( starpu_mpi_cached_receive( cblk->fblokptr->handler[1] ) ) {
248 need_submit = 1;
249 }
250 if ( !need_submit ) {
251 return;
252 }
253 }
254#endif
255
256 /*
257 * Create the arguments array
258 */
259 if ( need_exec ) {
260 cl_arg = malloc( sizeof( struct cl_blok_sgetrfsp_args_s) );
261 cl_arg->sopalin_data = sopalin_data;
262#if defined(PASTIX_STARPU_PROFILING)
263 cl_arg->profile_data.measures = blok_sgetrfsp_profile.measures;
264 cl_arg->profile_data.flops = NAN;
265#endif
266 cl_arg->cblk = cblk;
267 }
268
269#if defined(PASTIX_DEBUG_STARPU)
270 /* This actually generates a memory leak */
271 asprintf( &task_name, "%s( %ld )",
272 cl_blok_sgetrfsp_cpu.name,
273 (long)(cblk - sopalin_data->solvmtx->cblktab) );
274#endif
275
276 pastix_starpu_insert_task(
277 &cl_blok_sgetrfsp_cpu,
278 STARPU_CL_ARGS, cl_arg, sizeof( struct cl_blok_sgetrfsp_args_s ),
279#if defined(PASTIX_STARPU_PROFILING)
280 STARPU_CALLBACK_WITH_ARG_NFREE, blok_sgetrfsp_callback, cl_arg,
281#endif
282 STARPU_RW, cblk->fblokptr->handler[0],
283 STARPU_RW, cblk->fblokptr->handler[1],
284#if defined(PASTIX_DEBUG_STARPU)
285 STARPU_NAME, task_name,
286#endif
287#if defined(PASTIX_STARPU_HETEROPRIO)
288 STARPU_PRIORITY, BucketFacto2D,
289#else
290 STARPU_PRIORITY, prio,
291#endif
292 0);
293 (void)prio;
294}
295
296/**
297 * @}
298 */
BEGIN_C_DECLS typedef int pastix_int_t
Definition datatypes.h:51
double pastix_fixdbl_t
Definition datatypes.h:65
@ PastixKernelGETRF
int cpucblk_sgetrfsp1d_getrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL, void *dataU)
Compute the LU factorization of the diagonal block in a panel.
static void fct_blok_sgetrfsp_cpu(void *descr[], void *cl_arg)
StarPU CPU implementation.
void starpu_task_blok_sgetrf(sopalin_data_t *sopalin_data, SolverCblk *cblk, int prio)
TODO.
Base structure to all codelet arguments that include the profiling data.
static double cost(symbol_cblk_t *cblk)
Computes the cost of a cblk.
void * handler[2]
Definition solver.h:142
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition solver.h:329
SolverBlok * fblokptr
Definition solver.h:168
Solver column block structure.
Definition solver.h:161