PaStiX Handbook 6.4.0
Loading...
Searching...
No Matches
codelet_cblk_ssytrfsp.c
Go to the documentation of this file.
1/**
2 *
3 * @file codelet_cblk_ssytrfsp.c
4 *
5 * StarPU codelets for LDL^t functions
6 *
7 * @copyright 2016-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8 * Univ. Bordeaux. All rights reserved.
9 *
10 * @version 6.4.0
11 * @author Mathieu Faverge
12 * @author Pierre Ramet
13 * @author Tom Moenne-Loccoz
14 * @author Alycia Lisito
15 * @author Nolan Bredel
16 * @date 2024-07-05
17 *
18 * @generated from /builds/2mk6rsew/0/solverstack/pastix/sopalin/starpu/codelet_cblk_zsytrfsp.c, normal z -> s, Tue Feb 25 14:35:23 2025
19 *
20 * @addtogroup pastix_starpu
21 * @{
22 *
23 **/
24#ifndef DOXYGEN_SHOULD_SKIP_THIS
25#define _GNU_SOURCE
26#endif /* DOXYGEN_SHOULD_SKIP_THIS */
27#include "common.h"
28#include "blend/solver.h"
29#include "sopalin/sopalin_data.h"
30#include "pastix_scores.h"
31#include "pastix_starpu.h"
32#include "pastix_sstarpu.h"
33#include "codelets.h"
34
35/**
36 * @brief Main structure for all tasks of cblk_sgemmsp type
37 */
38struct cl_cblk_ssytrfsp_args_s {
39 profile_data_t profile_data;
40 sopalin_data_t *sopalin_data;
41 SolverCblk *cblk;
42};
43
44#if defined(PASTIX_STARPU_PROFILING)
45/**
46 * @brief Functions to profile the codelet
47 *
48 * Two levels of profiling are available:
49 * 1) A generic one that returns the flops per worker
50 * 2) A more detailed one that generate logs of the performance for each kernel
51 */
52starpu_profile_t cblk_ssytrfsp_profile = {
53 .next = NULL,
54 .name = "cblk_ssytrfsp"
55};
56
57/**
58 * @brief Profiling registration function
59 */
60void cblk_ssytrfsp_profile_register( void ) __attribute__( ( constructor ) );
61void
62cblk_ssytrfsp_profile_register( void )
63{
64 profiling_register_cl( &cblk_ssytrfsp_profile );
65}
66
67#ifndef DOXYGEN_SHOULD_SKIP_THIS
68#if defined(PASTIX_STARPU_PROFILING_LOG)
69static void
70cl_profiling_cb_cblk_ssytrfsp( void *callback_arg )
71{
72 cl_profiling_callback( callback_arg );
73
74 struct starpu_task *task = starpu_task_get_current();
75 struct starpu_profiling_task_info *info = task->profiling_info;
76
77 /* Quick return */
78 if ( info == NULL ) {
79 return;
80 }
81
82 struct cl_cblk_ssytrfsp_args_s *args = (struct cl_cblk_ssytrfsp_args_s *) callback_arg;
83 pastix_fixdbl_t flops = args->profile_data.flops;
84 pastix_fixdbl_t duration = starpu_timing_timespec_delay_us( &info->start_time, &info->end_time );
85 pastix_fixdbl_t speed = flops / ( 1000.0 * duration );
86
87 pastix_int_t M = args->cblk->stride;
88 pastix_int_t N = cblk_colnbr( args->cblk );
89 M -= N;
90
91 cl_profiling_log_register( task->name, "cblk_ssytrfsp", M, N, 0, flops, speed );
92}
93#endif
94
95#if defined(PASTIX_STARPU_PROFILING_LOG)
96static void (*cblk_ssytrfsp_callback)(void*) = cl_profiling_cb_cblk_ssytrfsp;
97#else
98static void (*cblk_ssytrfsp_callback)(void*) = cl_profiling_callback;
99#endif
100#endif /* DOXYGEN_SHOULD_SKIP_THIS */
101
102#endif /* defined(PASTIX_STARPU_PROFILING) */
103
104#if defined(PASTIX_STARPU_COST_PER_ARCH)
105/**
106 *******************************************************************************
107 *
108 * @brief Cost model function
109 *
110 * The user can switch from the pastix static model to an history based model
111 * computed automatically.
112 *
113 *******************************************************************************
114 *
115 * @param[in] task
116 * TODO
117 *
118 * @param[in] arch
119 * TODO
120 *
121 * @param[in] nimpl
122 * TODO
123 *
124 *******************************************************************************
125 *
126 * @retval TODO
127 *
128 *******************************************************************************/
129static inline pastix_fixdbl_t
130fct_cblk_ssytrfsp_cost( struct starpu_task *task,
131 struct starpu_perfmodel_arch *arch,
132 unsigned nimpl )
133{
134 struct cl_cblk_ssytrfsp_args_s *args = (struct cl_cblk_ssytrfsp_args_s *)(task->cl_arg);
135
137 pastix_fixdbl_t *coefs1, *coefs2;
138 pastix_int_t M = args->cblk->stride;
139 pastix_int_t N = cblk_colnbr( args->cblk );
140 M -= N;
141
142 switch( arch->devices->type ) {
143 case STARPU_CPU_WORKER:
144 coefs1 = &(args->sopalin_data->cpu_models->coefficients[PastixFloat-2][PastixKernelSYTRF][0]);
145 coefs2 = &(args->sopalin_data->cpu_models->coefficients[PastixFloat-2][PastixKernelTRSMCblk2d][0]);
146 break;
147 case STARPU_CUDA_WORKER:
148 coefs1 = &(args->sopalin_data->gpu_models->coefficients[PastixFloat-2][PastixKernelSYTRF][0]);
149 coefs2 = &(args->sopalin_data->gpu_models->coefficients[PastixFloat-2][PastixKernelTRSMCblk2d][0]);
150 break;
151 default:
152 assert(0);
153 return 0.;
154 }
155
156 /* Get cost in us */
157 cost = modelsGetCost1Param( coefs1, N );
158 cost += modelsGetCost2Param( coefs2, M, N );
159
160 (void)nimpl;
161 return cost;
162}
163#endif
164
165#ifndef DOXYGEN_SHOULD_SKIP_THIS
166static struct starpu_perfmodel starpu_cblk_ssytrfsp_model = {
167#if defined( PASTIX_STARPU_COST_PER_ARCH )
168 .type = STARPU_PER_ARCH,
169 .arch_cost_function = cblk_sytrf_cost,
170#else
171 .type = STARPU_HISTORY_BASED,
172#endif
173 .symbol = "cblk_ssytrfsp",
174};
175
176#if !defined(PASTIX_STARPU_SIMULATION)
177/**
178 *******************************************************************************
179 *
180 * @brief StarPU CPU implementation
181 *
182 *******************************************************************************
183 *
184 * @param[in] descr
185 * TODO
186 *
187 * @param[in] cl_arg
188 * TODO
189 *
190 *******************************************************************************/
191static void
192fct_cblk_ssytrfsp_cpu( void *descr[], void *cl_arg )
193{
194 struct cl_cblk_ssytrfsp_args_s *args = (struct cl_cblk_ssytrfsp_args_s *)cl_arg;
195 void *L;
196 void *DL;
197
198 L = pastix_starpu_cblk_get_ptr( descr[0] );
199 DL = pastix_starpu_cblk_get_ptr( descr[1] );
200
201 if ( (args->cblk->cblktype & CBLK_COMPRESSED) && (DL != NULL) ) {
202 char *ws = DL;
203 ws += (args->cblk[1].fblokptr - args->cblk[0].fblokptr) * sizeof( pastix_lrblock_t );
204 cpucblk_salloc_lrws( args->cblk, DL, (float*)ws );
205 }
206 cpucblk_ssytrfsp1d_panel( args->sopalin_data->solvmtx, args->cblk, L, DL );
207}
208#endif /* !defined(PASTIX_STARPU_SIMULATION) */
209
210CODELETS_CPU( cblk_ssytrfsp, 2 );
211#endif /* DOXYGEN_SHOULD_SKIP_THIS */
212
213/**
214 *******************************************************************************
215 *
216 * @brief TODO
217 *
218 *******************************************************************************
219 *
220 * @param[in] sopalin_data
221 * TODO
222 *
223 * @param[in] cblk
224 * TODO
225 *
226 * @param[in] prio
227 * TODO
228 *
229 *******************************************************************************/
230void
231starpu_task_cblk_ssytrfsp( sopalin_data_t *sopalin_data,
232 SolverCblk *cblk,
233 int prio )
234{
235 struct cl_cblk_ssytrfsp_args_s *cl_arg = NULL;
236 int need_exec = 1;
237#if defined(PASTIX_DEBUG_STARPU)
238 char *task_name;
239#endif
240
241 starpu_data_handle_t *handler = (starpu_data_handle_t *)( cblk->handler );
242
243 pastix_starpu_register_ws( handler + 1, cblk, PastixFloat );
244
245#if defined(PASTIX_WITH_MPI)
246 {
247 int64_t tag_desc = sopalin_data->solvmtx->starpu_desc->mpitag;
248 int64_t tag_cblk = 2 * cblk->gcblknum + 1;
249
250 starpu_mpi_data_register( *(handler + 1),
251 tag_desc + tag_cblk,
252 cblk->ownerid );
253 }
254#endif /* PASTIX_WITH_MPI */
255
256 /*
257 * Check if it needs to be submitted
258 */
259#if defined(PASTIX_WITH_MPI)
260 {
261 int need_submit = 0;
262 if ( cblk->ownerid == sopalin_data->solvmtx->clustnum ) {
263 need_submit = 1;
264 }
265 else {
266 need_exec = 0;
267 }
268 if ( starpu_mpi_cached_receive( cblk->handler[0] ) ) {
269 need_submit = 1;
270 }
271 if ( !need_submit ) {
272 return;
273 }
274 }
275#endif
276
277 /*
278 * Create the arguments array
279 */
280 if ( need_exec ) {
281 cl_arg = malloc( sizeof( struct cl_cblk_ssytrfsp_args_s) );
282 cl_arg->sopalin_data = sopalin_data;
283#if defined(PASTIX_STARPU_PROFILING)
284 cl_arg->profile_data.measures = cblk_ssytrfsp_profile.measures;
285 cl_arg->profile_data.flops = NAN;
286#endif
287 cl_arg->cblk = cblk;
288 }
289
290#if defined(PASTIX_DEBUG_STARPU)
291 /* This actually generates a memory leak */
292 asprintf( &task_name, "%s( %ld )",
293 cl_cblk_ssytrfsp_cpu.name,
294 (long)(cblk - sopalin_data->solvmtx->cblktab) );
295#endif
296
297 pastix_starpu_insert_task(
298 &cl_cblk_ssytrfsp_cpu,
299 STARPU_CL_ARGS, cl_arg, sizeof( struct cl_cblk_ssytrfsp_args_s ),
300#if defined(PASTIX_STARPU_PROFILING)
301 STARPU_CALLBACK_WITH_ARG_NFREE, cblk_ssytrfsp_callback, cl_arg,
302#endif
303 STARPU_RW, cblk->handler[0],
304 STARPU_W, cblk->handler[1],
305#if defined(PASTIX_DEBUG_STARPU)
306 STARPU_NAME, task_name,
307#endif
308#if defined(PASTIX_STARPU_HETEROPRIO)
309 STARPU_PRIORITY, BucketFacto1D,
310#else
311 STARPU_PRIORITY, prio,
312#endif
313 0);
314 (void)prio;
315}
316
317/**
318 * @}
319 */
BEGIN_C_DECLS typedef int pastix_int_t
Definition datatypes.h:51
double pastix_fixdbl_t
Definition datatypes.h:65
@ PastixKernelTRSMCblk2d
@ PastixKernelSYTRF
void cpucblk_salloc_lrws(const SolverCblk *cblk, pastix_lrblock_t *lrblok, float *ws)
Initialize lrblock structure from a workspace for all blocks of the cblk associated.
int cpucblk_ssytrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *DLt)
Compute the LDL^t factorization of one panel.
The block low-rank structure to hold a matrix in low-rank form.
void starpu_task_cblk_ssytrfsp(sopalin_data_t *sopalin_data, SolverCblk *cblk, int prio)
TODO.
Base structure to all codelet arguments that include the profiling data.
static double cost(symbol_cblk_t *cblk)
Computes the cost of a cblk.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition solver.h:329
pastix_int_t gcblknum
Definition solver.h:174
void * handler[2]
Definition solver.h:179
Solver column block structure.
Definition solver.h:161