PaStiX Handbook 6.4.0
Loading...
Searching...
No Matches
codelet_blok_chetrfsp.c
Go to the documentation of this file.
1/**
2 *
3 * @file codelet_blok_chetrfsp.c
4 *
5 * StarPU codelets for LDL^h functions
6 *
7 * @copyright 2016-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8 * Univ. Bordeaux. All rights reserved.
9 *
10 * @version 6.4.0
11 * @author Mathieu Faverge
12 * @author Pierre Ramet
13 * @author Ian Masliah
14 * @author Tom Moenne-Loccoz
15 * @date 2024-07-05
16 *
17 * @generated from /builds/2mk6rsew/0/solverstack/pastix/sopalin/starpu/codelet_blok_zhetrfsp.c, normal z -> c, Tue Feb 25 14:35:18 2025
18 *
19 * @addtogroup pastix_starpu
20 * @{
21 *
22 **/
23#ifndef DOXYGEN_SHOULD_SKIP_THIS
24#define _GNU_SOURCE
25#endif /* DOXYGEN_SHOULD_SKIP_THIS */
26#include "common.h"
27#include "blend/solver.h"
28#include "sopalin/sopalin_data.h"
29#include "pastix_ccores.h"
30#include "pastix_starpu.h"
31#include "pastix_cstarpu.h"
32#include "codelets.h"
33
34/**
35 * @brief Main structure for all tasks of blok_chetrfsp type
36 */
37struct cl_blok_chetrfsp_args_s {
38 profile_data_t profile_data;
39 sopalin_data_t *sopalin_data;
40 SolverCblk *cblk;
41};
42
43/**
44 * @brief Functions to profile the codelet
45 *
46 * Two levels of profiling are available:
47 * 1) A generic one that returns the flops per worker
48 * 2) A more detailed one that generate logs of the performance for each kernel
49 */
50#if defined(PASTIX_STARPU_PROFILING)
51starpu_profile_t blok_chetrfsp_profile = {
52 .next = NULL,
53 .name = "blok_chetrfsp"
54};
55
56/**
57 * @brief Profiling registration function
58 */
59void blok_chetrfsp_profile_register( void ) __attribute__( ( constructor ) );
60void
61blok_chetrfsp_profile_register( void )
62{
63 profiling_register_cl( &blok_chetrfsp_profile );
64}
65
66#ifndef DOXYGEN_SHOULD_SKIP_THIS
67#if defined(PASTIX_STARPU_PROFILING_LOG)
68static void
69cl_profiling_cb_blok_chetrfsp( void *callback_arg )
70{
71 cl_profiling_callback( callback_arg );
72
73 struct starpu_task *task = starpu_task_get_current();
74 struct starpu_profiling_task_info *info = task->profiling_info;
75
76 /* Quick return */
77 if ( info == NULL ) {
78 return;
79 }
80
81 struct cl_blok_chetrfsp_args_s *args = (struct cl_blok_chetrfsp_args_s *) callback_arg;
82 pastix_fixdbl_t flops = args->profile_data.flops;
83 pastix_fixdbl_t duration = starpu_timing_timespec_delay_us( &info->start_time, &info->end_time );
84 pastix_fixdbl_t speed = flops / ( 1000.0 * duration );
85
86 pastix_int_t N = cblk_colnbr( args->cblk );
87
88 cl_profiling_log_register( task->name, "blok_chetrfsp", N, 0, 0, flops, speed );
89}
90#endif
91
92#if defined(PASTIX_STARPU_PROFILING_LOG)
93static void (*blok_chetrfsp_callback)(void*) = cl_profiling_cb_blok_chetrfsp;
94#else
95static void (*blok_chetrfsp_callback)(void*) = cl_profiling_callback;
96#endif
97#endif /* DOXYGEN_SHOULD_SKIP_THIS */
98
99#endif /* defined(PASTIX_STARPU_PROFILING) */
100
101#if defined(PASTIX_STARPU_COST_PER_ARCH)
102/**
103 *******************************************************************************
104 *
105 * @brief Cost model function
106 *
107 * The user can switch from the pastix static model to an history based model
108 * computed automatically.
109 *
110 *******************************************************************************
111 *
112 * @param[in] task
113 * TODO
114 *
115 * @param[in] arch
116 * TODO
117 *
118 * @param[in] nimpl
119 * TODO
120 *
121 *******************************************************************************
122 *
123 * @retval TODO
124 *
125 *******************************************************************************/
126static inline pastix_fixdbl_t
127fct_blok_chetrfsp_cost( struct starpu_task *task,
128 struct starpu_perfmodel_arch *arch,
129 unsigned nimpl )
130{
131 struct cl_blok_chetrfsp_args_s *args = (struct cl_blok_chetrfsp_args_s *)(task->cl_arg);
132
134 pastix_fixdbl_t *coefs;
135 pastix_int_t N = cblk_colnbr( args->cblk );
136
137 switch( arch->devices->type ) {
138 case STARPU_CPU_WORKER:
139 coefs = &(args->sopalin_data->cpu_models->coefficients[PastixComplex32-2][PastixKernelHETRF][0]);
140 break;
141 case STARPU_CUDA_WORKER:
142 coefs = &(args->sopalin_data->gpu_models->coefficients[PastixComplex32-2][PastixKernelHETRF][0]);
143 break;
144 default:
145 assert(0);
146 return 0.;
147 }
148
149 /* Get cost in us */
150 cost = modelsGetCost1Param( coefs, N );
151
152 (void)nimpl;
153 return cost;
154}
155#endif
156
157#ifndef DOXYGEN_SHOULD_SKIP_THIS
158static struct starpu_perfmodel starpu_blok_chetrfsp_model = {
159#if defined(PASTIX_STARPU_COST_PER_ARCH)
160 .type = STARPU_PER_ARCH,
161 .arch_cost_function = fct_blok_chetrfsp_cost,
162#else
163 .type = STARPU_HISTORY_BASED,
164#endif
165 .symbol = "blok_chetrfsp",
166};
167#endif /* DOXYGEN_SHOULD_SKIP_THIS */
168
169#if !defined(PASTIX_STARPU_SIMULATION)
170/**
171 *******************************************************************************
172 *
173 * @brief StarPU CPU implementation
174 *
175 *******************************************************************************
176 *
177 * @param[in] descr
178 * TODO
179 *
180 * @param[in] cl_arg
181 * TODO
182 *
183 *******************************************************************************/
184static void
185fct_blok_chetrfsp_cpu( void *descr[], void *cl_arg )
186{
187 struct cl_blok_chetrfsp_args_s *args = (struct cl_blok_chetrfsp_args_s *)cl_arg;
188 void *L;
189
190 L = pastix_starpu_blok_get_ptr( descr[0] );
191
192 assert( args->cblk->cblktype & CBLK_TASKS_2D );
193
194 cpucblk_chetrfsp1d_hetrf( args->sopalin_data->solvmtx, args->cblk, L );
195}
196#endif /* !defined(PASTIX_STARPU_SIMULATION) */
197
198#ifndef DOXYGEN_SHOULD_SKIP_THIS
199CODELETS_CPU( blok_chetrfsp, 1 );
200#endif /* DOXYGEN_SHOULD_SKIP_THIS */
201
202/**
203 *******************************************************************************
204 *
205 * @brief TODO
206 *
207 *******************************************************************************
208 *
209 * @param[in] sopalin_data
210 * TODO
211 *
212 * @param[in] cblk
213 * TODO
214 *
215 * @param[in] prio
216 * TODO
217 *
218 *******************************************************************************/
219void
220starpu_task_blok_chetrf( sopalin_data_t *sopalin_data,
221 SolverCblk *cblk,
222 int prio )
223{
224 struct cl_blok_chetrfsp_args_s *cl_arg = NULL;
225 int need_exec = 1;
226#if defined(PASTIX_DEBUG_STARPU)
227 char *task_name;
228#endif
229
230 /*
231 * Check if it needs to be submitted
232 */
233#if defined(PASTIX_WITH_MPI)
234 {
235 int need_submit = 0;
236 if ( cblk->ownerid == sopalin_data->solvmtx->clustnum ) {
237 need_submit = 1;
238 }
239 else {
240 need_exec = 0;
241 }
242 if ( starpu_mpi_cached_receive( cblk->fblokptr->handler[0] ) ) {
243 need_submit = 1;
244 }
245 if ( !need_submit ) {
246 return;
247 }
248 }
249#endif
250
251 /*
252 * Create the arguments array
253 */
254 if ( need_exec ) {
255 cl_arg = malloc( sizeof( struct cl_blok_chetrfsp_args_s) );
256 cl_arg->sopalin_data = sopalin_data;
257#if defined(PASTIX_STARPU_PROFILING)
258 cl_arg->profile_data.measures = blok_chetrfsp_profile.measures;
259 cl_arg->profile_data.flops = NAN;
260#endif
261 cl_arg->cblk = cblk;
262 }
263
264#if defined(PASTIX_DEBUG_STARPU)
265 /* This actually generates a memory leak */
266 asprintf( &task_name, "%s( %ld )",
267 cl_blok_chetrfsp_cpu.name,
268 (long)(cblk - sopalin_data->solvmtx->cblktab) );
269#endif
270
271 pastix_starpu_insert_task(
272 &cl_blok_chetrfsp_cpu,
273 STARPU_CL_ARGS, cl_arg, sizeof( struct cl_blok_chetrfsp_args_s ),
274#if defined(PASTIX_STARPU_PROFILING)
275 STARPU_CALLBACK_WITH_ARG_NFREE, blok_chetrfsp_callback, cl_arg,
276#endif
277 STARPU_RW, cblk->fblokptr->handler[0],
278#if defined(PASTIX_DEBUG_STARPU)
279 STARPU_NAME, task_name,
280#endif
281#if defined(PASTIX_STARPU_HETEROPRIO)
282 STARPU_PRIORITY, BucketFacto2D,
283#else
284 STARPU_PRIORITY, prio,
285#endif
286 0);
287 (void)prio;
288}
289
290/**
291 * @}
292 */
int cpucblk_chetrfsp1d_hetrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Computes the LDL^h factorization of the diagonal block in a panel.
BEGIN_C_DECLS typedef int pastix_int_t
Definition datatypes.h:51
double pastix_fixdbl_t
Definition datatypes.h:65
@ PastixKernelHETRF
void starpu_task_blok_chetrf(sopalin_data_t *sopalin_data, SolverCblk *cblk, int prio)
TODO.
static void fct_blok_chetrfsp_cpu(void *descr[], void *cl_arg)
Functions to profile the codelet.
Base structure to all codelet arguments that include the profiling data.
static double cost(symbol_cblk_t *cblk)
Computes the cost of a cblk.
void * handler[2]
Definition solver.h:142
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition solver.h:329
SolverBlok * fblokptr
Definition solver.h:168
Solver column block structure.
Definition solver.h:161