PaStiX Handbook 6.4.0
Loading...
Searching...
No Matches
codelet_cblk_cpxtrfsp.c
Go to the documentation of this file.
1/**
2 *
3 * @file codelet_cblk_cpxtrfsp.c
4 *
5 * StarPU codelets for complex LL^t functions
6 *
7 * @copyright 2016-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8 * Univ. Bordeaux. All rights reserved.
9 *
10 * @version 6.4.0
11 * @author Mathieu Faverge
12 * @author Pierre Ramet
13 * @author Tom Moenne-Loccoz
14 * @date 2024-07-05
15 *
16 * @generated from /builds/2mk6rsew/0/solverstack/pastix/sopalin/starpu/codelet_cblk_zpxtrfsp.c, normal z -> c, Tue Feb 25 14:35:22 2025
17 *
18 * @addtogroup pastix_starpu
19 * @{
20 *
21 **/
22#ifndef DOXYGEN_SHOULD_SKIP_THIS
23#define _GNU_SOURCE
24#endif /* DOXYGEN_SHOULD_SKIP_THIS */
25#include "common.h"
26#include "blend/solver.h"
27#include "sopalin/sopalin_data.h"
28#include "pastix_ccores.h"
29#include "pastix_starpu.h"
30#include "pastix_cstarpu.h"
31#include "codelets.h"
32
33/**
34 * @brief Main structure for all tasks of cblk_cgemmsp type
35 */
36struct cl_cblk_cpxtrfsp_args_s {
37 profile_data_t profile_data;
38 sopalin_data_t *sopalin_data;
39 SolverCblk *cblk;
40};
41
42#if defined(PASTIX_STARPU_PROFILING)
43/**
44 * @brief Functions to profile the codelet
45 *
46 * Two levels of profiling are available:
47 * 1) A generic one that returns the flops per worker
48 * 2) A more detailed one that generate logs of the performance for each kernel
49 */
50starpu_profile_t cblk_cpxtrfsp_profile = {
51 .next = NULL,
52 .name = "cblk_cpxtrfsp"
53};
54
55/**
56 * @brief Profiling registration function
57 */
58void cblk_cpxtrfsp_profile_register( void ) __attribute__( ( constructor ) );
59void
60cblk_cpxtrfsp_profile_register( void )
61{
62 profiling_register_cl( &cblk_cpxtrfsp_profile );
63}
64
65#ifndef DOXYGEN_SHOULD_SKIP_THIS
66#if defined(PASTIX_STARPU_PROFILING_LOG)
67static void
68cl_profiling_cb_cblk_cpxtrfsp( void *callback_arg )
69{
70 cl_profiling_callback( callback_arg );
71
72 struct starpu_task *task = starpu_task_get_current();
73 struct starpu_profiling_task_info *info = task->profiling_info;
74
75 /* Quick return */
76 if ( info == NULL ) {
77 return;
78 }
79
80 struct cl_cblk_cpxtrfsp_args_s *args = (struct cl_cblk_cpxtrfsp_args_s *) callback_arg;
81 pastix_fixdbl_t flops = args->profile_data.flops;
82 pastix_fixdbl_t duration = starpu_timing_timespec_delay_us( &info->start_time, &info->end_time );
83 pastix_fixdbl_t speed = flops / ( 1000.0 * duration );
84
85 pastix_int_t M = args->cblk->stride;
86 pastix_int_t N = cblk_colnbr( args->cblk );
87 M -= N;
88
89 cl_profiling_log_register( task->name, "cblk_cpxtrfsp", M, N, 0, flops, speed );
90}
91#endif
92
93#if defined(PASTIX_STARPU_PROFILING_LOG)
94static void (*cblk_cpxtrfsp_callback)(void*) = cl_profiling_cb_cblk_cpxtrfsp;
95#else
96static void (*cblk_cpxtrfsp_callback)(void*) = cl_profiling_callback;
97#endif
98#endif /* DOXYGEN_SHOULD_SKIP_THIS */
99
100#endif /* defined(PASTIX_STARPU_PROFILING) */
101
102#if defined(PASTIX_STARPU_COST_PER_ARCH)
103/**
104 *******************************************************************************
105 *
106 * @brief Cost model function
107 *
108 * The user can switch from the pastix static model to an history based model
109 * computed automatically.
110 *
111 *******************************************************************************
112 *
113 * @param[in] task
114 * TODO
115 *
116 * @param[in] arch
117 * TODO
118 *
119 * @param[in] nimpl
120 * TODO
121 *
122 *******************************************************************************
123 *
124 * @retval TODO
125 *
126 *******************************************************************************/
127static inline pastix_fixdbl_t
128fct_cblk_cpxtrfsp_cost( struct starpu_task *task,
129 struct starpu_perfmodel_arch *arch,
130 unsigned nimpl )
131{
132 struct cl_cblk_cpxtrfsp_args_s *args = (struct cl_cblk_cpxtrfsp_args_s *)(task->cl_arg);
133
135 pastix_fixdbl_t *coefs1, *coefs2;
136 pastix_int_t M = args->cblk->stride;
137 pastix_int_t N = cblk_colnbr( args->cblk );
138 M -= N;
139
140 switch( arch->devices->type ) {
141 case STARPU_CPU_WORKER:
142 coefs1 = &(args->sopalin_data->cpu_models->coefficients[PastixComplex32-2][PastixKernelPXTRF][0]);
143 coefs2 = &(args->sopalin_data->cpu_models->coefficients[PastixComplex32-2][PastixKernelTRSMCblk2d][0]);
144 break;
145 case STARPU_CUDA_WORKER:
146 coefs1 = &(args->sopalin_data->gpu_models->coefficients[PastixComplex32-2][PastixKernelPXTRF][0]);
147 coefs2 = &(args->sopalin_data->gpu_models->coefficients[PastixComplex32-2][PastixKernelTRSMCblk2d][0]);
148 break;
149 default:
150 assert(0);
151 return 0.;
152 }
153
154 /* Get cost in us */
155 cost = modelsGetCost1Param( coefs1, N );
156 cost += modelsGetCost2Param( coefs2, M, N );
157
158 (void)nimpl;
159 return cost;
160}
161#endif
162
163#ifndef DOXYGEN_SHOULD_SKIP_THIS
164static struct starpu_perfmodel starpu_cblk_cpxtrfsp_model = {
165#if defined( PASTIX_STARPU_COST_PER_ARCH )
166 .type = STARPU_PER_ARCH,
167 .arch_cost_function = cblk_pxtrf_cost,
168#else
169 .type = STARPU_HISTORY_BASED,
170#endif
171 .symbol = "cblk_cpxtrfsp",
172};
173
174#if !defined(PASTIX_STARPU_SIMULATION)
175/**
176 *******************************************************************************
177 *
178 * @brief StarPU CPU implementation
179 *
180 *******************************************************************************
181 *
182 * @param[in] descr
183 * TODO
184 *
185 * @param[in] cl_arg
186 * TODO
187 *
188 *******************************************************************************/
189static void
190fct_cblk_cpxtrfsp_cpu( void *descr[], void *cl_arg )
191{
192 struct cl_cblk_cpxtrfsp_args_s *args = (struct cl_cblk_cpxtrfsp_args_s *)cl_arg;
193 void *L;
194
195 L = pastix_starpu_cblk_get_ptr( descr[0] );
196
197 cpucblk_cpxtrfsp1d_panel( args->sopalin_data->solvmtx, args->cblk, L );
198}
199#endif /* !defined(PASTIX_STARPU_SIMULATION) */
200
201CODELETS_CPU( cblk_cpxtrfsp, 1 );
202#endif /* DOXYGEN_SHOULD_SKIP_THIS */
203
204/**
205 *******************************************************************************
206 *
207 * @brief TODO
208 *
209 *******************************************************************************
210 *
211 * @param[in] sopalin_data
212 * TODO
213 *
214 * @param[in] cblk
215 * TODO
216 *
217 * @param[in] prio
218 * TODO
219 *
220 *******************************************************************************/
221void
222starpu_task_cblk_cpxtrfsp( sopalin_data_t *sopalin_data,
223 SolverCblk *cblk,
224 int prio )
225{
226 struct cl_cblk_cpxtrfsp_args_s *cl_arg = NULL;
227 int need_exec = 1;
228#if defined(PASTIX_DEBUG_STARPU)
229 char *task_name;
230#endif
231
232 /*
233 * Check if it needs to be submitted
234 */
235#if defined(PASTIX_WITH_MPI)
236 {
237 int need_submit = 0;
238 if ( cblk->ownerid == sopalin_data->solvmtx->clustnum ) {
239 need_submit = 1;
240 }
241 else {
242 need_exec = 0;
243 }
244 if ( starpu_mpi_cached_receive( cblk->handler[0] ) ) {
245 need_submit = 1;
246 }
247 if ( !need_submit ) {
248 return;
249 }
250 }
251#endif
252
253 /*
254 * Create the arguments array
255 */
256 if ( need_exec ) {
257 cl_arg = malloc( sizeof( struct cl_cblk_cpxtrfsp_args_s) );
258 cl_arg->sopalin_data = sopalin_data;
259#if defined(PASTIX_STARPU_PROFILING)
260 cl_arg->profile_data.measures = cblk_cpxtrfsp_profile.measures;
261 cl_arg->profile_data.flops = NAN;
262#endif
263 cl_arg->cblk = cblk;
264 }
265
266#if defined(PASTIX_DEBUG_STARPU)
267 /* This actually generates a memory leak */
268 asprintf( &task_name, "%s( %ld )",
269 cl_cblk_cpxtrfsp_cpu.name,
270 (long)(cblk - sopalin_data->solvmtx->cblktab) );
271#endif
272
273 pastix_starpu_insert_task(
274 &cl_cblk_cpxtrfsp_cpu,
275 STARPU_CL_ARGS, cl_arg, sizeof( struct cl_cblk_cpxtrfsp_args_s ),
276#if defined(PASTIX_STARPU_PROFILING)
277 STARPU_CALLBACK_WITH_ARG_NFREE, cblk_cpxtrfsp_callback, cl_arg,
278#endif
279 STARPU_RW, cblk->handler[0],
280#if defined(PASTIX_DEBUG_STARPU)
281 STARPU_NAME, task_name,
282#endif
283#if defined(PASTIX_STARPU_HETEROPRIO)
284 STARPU_PRIORITY, BucketFacto1D,
285#else
286 STARPU_PRIORITY, prio,
287#endif
288 0);
289 (void)prio;
290}
291
292/**
293 * @}
294 */
int cpucblk_cpxtrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L)
Compute the LL^t factorization of one panel.
BEGIN_C_DECLS typedef int pastix_int_t
Definition datatypes.h:51
double pastix_fixdbl_t
Definition datatypes.h:65
@ PastixKernelTRSMCblk2d
@ PastixKernelPXTRF
void starpu_task_cblk_cpxtrfsp(sopalin_data_t *sopalin_data, SolverCblk *cblk, int prio)
TODO.
Base structure to all codelet arguments that include the profiling data.
static double cost(symbol_cblk_t *cblk)
Computes the cost of a cblk.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition solver.h:329
void * handler[2]
Definition solver.h:179
Solver column block structure.
Definition solver.h:161