PaStiX Handbook 6.4.0
Loading...
Searching...
No Matches
codelet_blok_csytrfsp.c
Go to the documentation of this file.
1/**
2 *
3 * @file codelet_blok_csytrfsp.c
4 *
5 * StarPU codelets for LDL^t functions
6 *
7 * @copyright 2016-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8 * Univ. Bordeaux. All rights reserved.
9 *
10 * @version 6.4.0
11 * @author Mathieu Faverge
12 * @author Pierre Ramet
13 * @author Tom Moenne-Loccoz
14 * @date 2024-07-05
15 *
16 * @generated from /builds/2mk6rsew/0/solverstack/pastix/sopalin/starpu/codelet_blok_zsytrfsp.c, normal z -> c, Tue Feb 25 14:35:19 2025
17 *
18 * @addtogroup pastix_starpu
19 * @{
20 *
21 **/
22#ifndef DOXYGEN_SHOULD_SKIP_THIS
23#define _GNU_SOURCE
24#endif /* DOXYGEN_SHOULD_SKIP_THIS */
25#include "common.h"
26#include "blend/solver.h"
27#include "sopalin/sopalin_data.h"
28#include "pastix_ccores.h"
29#include "pastix_starpu.h"
30#include "pastix_cstarpu.h"
31#include "codelets.h"
32
33/**
34 * @brief Main structure for all tasks of blok_csytrfsp type
35 */
36struct cl_blok_csytrfsp_args_s {
37 profile_data_t profile_data;
38 sopalin_data_t *sopalin_data;
39 SolverCblk *cblk;
40};
41
42#if defined(PASTIX_STARPU_PROFILING)
43/**
44 * @brief Functions to profile the codelet
45 *
46 * Two levels of profiling are available:
47 * 1) A generic one that returns the flops per worker
48 * 2) A more detailed one that generate logs of the performance for each kernel
49 */
50starpu_profile_t blok_csytrfsp_profile = {
51 .next = NULL,
52 .name = "blok_csytrfsp"
53};
54
55/**
56 * @brief Profiling registration function
57 */
58void blok_csytrfsp_profile_register( void ) __attribute__( ( constructor ) );
59void
60blok_csytrfsp_profile_register( void )
61{
62 profiling_register_cl( &blok_csytrfsp_profile );
63}
64
65#ifndef DOXYGEN_SHOULD_SKIP_THIS
66#if defined(PASTIX_STARPU_PROFILING_LOG)
67static void
68cl_profiling_cb_blok_csytrfsp( void *callback_arg )
69{
70 cl_profiling_callback( callback_arg );
71
72 struct starpu_task *task = starpu_task_get_current();
73 struct starpu_profiling_task_info *info = task->profiling_info;
74
75 /* Quick return */
76 if ( info == NULL ) {
77 return;
78 }
79
80 struct cl_blok_csytrfsp_args_s *args = (struct cl_blok_csytrfsp_args_s *) callback_arg;
81 pastix_fixdbl_t flops = args->profile_data.flops;
82 pastix_fixdbl_t duration = starpu_timing_timespec_delay_us( &info->start_time, &info->end_time );
83 pastix_fixdbl_t speed = flops / ( 1000.0 * duration );
84
85 pastix_int_t N = cblk_colnbr( args->cblk );
86
87 cl_profiling_log_register( task->name, "blok_csytrfsp", N, 0, 0, flops, speed );
88}
89#endif
90
91#if defined(PASTIX_STARPU_PROFILING_LOG)
92static void (*blok_csytrfsp_callback)(void*) = cl_profiling_cb_blok_csytrfsp;
93#else
94static void (*blok_csytrfsp_callback)(void*) = cl_profiling_callback;
95#endif
96#endif /* DOXYGEN_SHOULD_SKIP_THIS */
97
98#endif /* defined(PASTIX_STARPU_PROFILING) */
99
100#ifndef DOXYGEN_SHOULD_SKIP_THIS
101
102#if defined(PASTIX_STARPU_COST_PER_ARCH)
103/**
104 *******************************************************************************
105 *
106 * @brief Cost model function
107 *
108 * The user can switch from the pastix static model to an history based model
109 * computed automatically.
110 *
111 *******************************************************************************
112 *
113 * @param[in] task
114 * TODO
115 *
116 * @param[in] arch
117 * TODO
118 *
119 * @param[in] nimpl
120 * TODO
121 *
122 *******************************************************************************
123 *
124 * @retval TODO
125 *
126 *******************************************************************************/
127static inline pastix_fixdbl_t
128fct_blok_csytrfsp_cost( struct starpu_task *task,
129 struct starpu_perfmodel_arch *arch,
130 unsigned nimpl )
131{
132 struct cl_blok_csytrfsp_args_s *args = (struct cl_blok_csytrfsp_args_s *)(task->cl_arg);
133
135 pastix_fixdbl_t *coefs;
136 pastix_int_t N = cblk_colnbr( args->cblk );
137
138 switch( arch->devices->type ) {
139 case STARPU_CPU_WORKER:
140 coefs = &(args->sopalin_data->cpu_models->coefficients[PastixComplex32-2][PastixKernelSYTRF][0]);
141 break;
142 case STARPU_CUDA_WORKER:
143 coefs = &(args->sopalin_data->gpu_models->coefficients[PastixComplex32-2][PastixKernelSYTRF][0]);
144 break;
145 default:
146 assert(0);
147 return 0.;
148 }
149
150 /* Get cost in us */
151 cost = modelsGetCost1Param( coefs, N );
152
153 (void)nimpl;
154 return cost;
155}
156#endif
157
158static struct starpu_perfmodel starpu_blok_csytrfsp_model = {
159#if defined(PASTIX_STARPU_COST_PER_ARCH)
160 .type = STARPU_PER_ARCH,
161 .arch_cost_function = fct_blok_csytrfsp_cost,
162#else
163 .type = STARPU_HISTORY_BASED,
164#endif
165 .symbol = "blok_csytrfsp",
166};
167
168#if !defined(PASTIX_STARPU_SIMULATION)
169/**
170 *******************************************************************************
171 *
172 * @brief StarPU CPU implementation
173 *
174 *******************************************************************************
175 *
176 * @param[in] descr
177 * TODO
178 *
179 * @param[in] cl_arg
180 * TODO
181 *
182 *******************************************************************************/
183static void
184fct_blok_csytrfsp_cpu( void *descr[], void *cl_arg )
185{
186 struct cl_blok_csytrfsp_args_s *args = (struct cl_blok_csytrfsp_args_s *)cl_arg;
187 void *L;
188
189 L = pastix_starpu_blok_get_ptr( descr[0] );
190
191 assert( args->cblk->cblktype & CBLK_TASKS_2D );
192
193 cpucblk_csytrfsp1d_sytrf( args->sopalin_data->solvmtx, args->cblk, L );
194}
195#endif /* !defined(PASTIX_STARPU_SIMULATION) */
196
197CODELETS_CPU( blok_csytrfsp, 1 );
198#endif /* DOXYGEN_SHOULD_SKIP_THIS */
199
200/**
201 *******************************************************************************
202 *
203 * @brief TODO
204 *
205 *******************************************************************************
206 *
207 * @param[in] sopalin_data
208 * TODO
209 *
210 * @param[in] cblk
211 * TODO
212 *
213 * @param[in] prio
214 * TODO
215 *
216 *******************************************************************************/
217void
218starpu_task_blok_csytrf( sopalin_data_t *sopalin_data,
219 SolverCblk *cblk,
220 int prio )
221{
222 struct cl_blok_csytrfsp_args_s *cl_arg = NULL;
223 int need_exec = 1;
224#if defined(PASTIX_DEBUG_STARPU)
225 char *task_name;
226#endif
227
228 /*
229 * Check if it needs to be submitted
230 */
231#if defined(PASTIX_WITH_MPI)
232 {
233 int need_submit = 0;
234 if ( cblk->ownerid == sopalin_data->solvmtx->clustnum ) {
235 need_submit = 1;
236 }
237 else {
238 need_exec = 0;
239 }
240 if ( starpu_mpi_cached_receive( cblk->fblokptr->handler[0] ) ) {
241 need_submit = 1;
242 }
243 if ( !need_submit ) {
244 return;
245 }
246 }
247#endif
248
249 /*
250 * Create the arguments array
251 */
252 if ( need_exec ) {
253 cl_arg = malloc( sizeof( struct cl_blok_csytrfsp_args_s) );
254 cl_arg->sopalin_data = sopalin_data;
255#if defined(PASTIX_STARPU_PROFILING)
256 cl_arg->profile_data.measures = blok_csytrfsp_profile.measures;
257 cl_arg->profile_data.flops = NAN;
258#endif
259 cl_arg->cblk = cblk;
260 }
261
262#if defined(PASTIX_DEBUG_STARPU)
263 /* This actually generates a memory leak */
264 asprintf( &task_name, "%s( %ld )",
265 cl_blok_csytrfsp_cpu.name,
266 (long)(cblk - sopalin_data->solvmtx->cblktab) );
267#endif
268
269 pastix_starpu_insert_task(
270 &cl_blok_csytrfsp_cpu,
271 STARPU_CL_ARGS, cl_arg, sizeof( struct cl_blok_csytrfsp_args_s ),
272#if defined(PASTIX_STARPU_PROFILING)
273 STARPU_CALLBACK_WITH_ARG_NFREE, blok_csytrfsp_callback, cl_arg,
274#endif
275 STARPU_RW, cblk->fblokptr->handler[0],
276#if defined(PASTIX_DEBUG_STARPU)
277 STARPU_NAME, task_name,
278#endif
279#if defined(PASTIX_STARPU_HETEROPRIO)
280 STARPU_PRIORITY, BucketFacto2D,
281#else
282 STARPU_PRIORITY, prio,
283#endif
284 0);
285 (void)prio;
286}
287
288/**
289 * @}
290 */
BEGIN_C_DECLS typedef int pastix_int_t
Definition datatypes.h:51
double pastix_fixdbl_t
Definition datatypes.h:65
@ PastixKernelSYTRF
int cpucblk_csytrfsp1d_sytrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Computes the LDL^t factorization of the diagonal block in a panel.
void starpu_task_blok_csytrf(sopalin_data_t *sopalin_data, SolverCblk *cblk, int prio)
TODO.
Base structure to all codelet arguments that include the profiling data.
static double cost(symbol_cblk_t *cblk)
Computes the cost of a cblk.
void * handler[2]
Definition solver.h:142
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition solver.h:329
SolverBlok * fblokptr
Definition solver.h:168
Solver column block structure.
Definition solver.h:161