PaStiX Handbook  6.4.0
codelet_cblk_csytrfsp.c
Go to the documentation of this file.
1 /**
2  *
3  * @file codelet_cblk_csytrfsp.c
4  *
5  * StarPU codelets for LDL^t functions
6  *
7  * @copyright 2016-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.4.0
11  * @author Mathieu Faverge
12  * @author Pierre Ramet
13  * @author Tom Moenne-Loccoz
14  * @author Alycia Lisito
15  * @author Nolan Bredel
16  * @date 2024-07-05
17  *
18  * @generated from /builds/solverstack/pastix/sopalin/starpu/codelet_cblk_zsytrfsp.c, normal z -> c, Tue Oct 8 14:17:34 2024
19  *
20  * @addtogroup pastix_starpu
21  * @{
22  *
23  **/
24 #ifndef DOXYGEN_SHOULD_SKIP_THIS
25 #define _GNU_SOURCE
26 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
27 #include "common.h"
28 #include "blend/solver.h"
29 #include "sopalin/sopalin_data.h"
30 #include "pastix_ccores.h"
31 #include "pastix_starpu.h"
32 #include "pastix_cstarpu.h"
33 #include "codelets.h"
34 
35 /**
36  * @brief Main structure for all tasks of cblk_cgemmsp type
37  */
38 struct cl_cblk_csytrfsp_args_s {
39  profile_data_t profile_data;
40  sopalin_data_t *sopalin_data;
41  SolverCblk *cblk;
42 };
43 
44 #if defined(PASTIX_STARPU_PROFILING)
45 /**
46  * @brief Functions to profile the codelet
47  *
48  * Two levels of profiling are available:
49  * 1) A generic one that returns the flops per worker
50  * 2) A more detailed one that generate logs of the performance for each kernel
51  */
52 starpu_profile_t cblk_csytrfsp_profile = {
53  .next = NULL,
54  .name = "cblk_csytrfsp"
55 };
56 
57 /**
58  * @brief Profiling registration function
59  */
60 void cblk_csytrfsp_profile_register( void ) __attribute__( ( constructor ) );
61 void
62 cblk_csytrfsp_profile_register( void )
63 {
64  profiling_register_cl( &cblk_csytrfsp_profile );
65 }
66 
67 #ifndef DOXYGEN_SHOULD_SKIP_THIS
68 #if defined(PASTIX_STARPU_PROFILING_LOG)
69 static void
70 cl_profiling_cb_cblk_csytrfsp( void *callback_arg )
71 {
72  cl_profiling_callback( callback_arg );
73 
74  struct starpu_task *task = starpu_task_get_current();
75  struct starpu_profiling_task_info *info = task->profiling_info;
76 
77  /* Quick return */
78  if ( info == NULL ) {
79  return;
80  }
81 
82  struct cl_cblk_csytrfsp_args_s *args = (struct cl_cblk_csytrfsp_args_s *) callback_arg;
83  pastix_fixdbl_t flops = args->profile_data.flops;
84  pastix_fixdbl_t duration = starpu_timing_timespec_delay_us( &info->start_time, &info->end_time );
85  pastix_fixdbl_t speed = flops / ( 1000.0 * duration );
86 
87  pastix_int_t M = args->cblk->stride;
88  pastix_int_t N = cblk_colnbr( args->cblk );
89  M -= N;
90 
91  cl_profiling_log_register( task->name, "cblk_csytrfsp", M, N, 0, flops, speed );
92 }
93 #endif
94 
95 #if defined(PASTIX_STARPU_PROFILING_LOG)
96 static void (*cblk_csytrfsp_callback)(void*) = cl_profiling_cb_cblk_csytrfsp;
97 #else
98 static void (*cblk_csytrfsp_callback)(void*) = cl_profiling_callback;
99 #endif
100 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
101 
102 #endif /* defined(PASTIX_STARPU_PROFILING) */
103 
104 /**
105  *******************************************************************************
106  *
107  * @brief Cost model function
108  *
109  * The user can switch from the pastix static model to an history based model
110  * computed automatically.
111  *
112  *******************************************************************************
113  *
114  * @param[in] task
115  * TODO
116  *
117  * @param[in] arch
118  * TODO
119  *
120  * @param[in] nimpl
121  * TODO
122  *
123  *******************************************************************************
124  *
125  * @retval TODO
126  *
127  *******************************************************************************/
128 static inline pastix_fixdbl_t
129 fct_cblk_csytrfsp_cost( struct starpu_task *task,
130  struct starpu_perfmodel_arch *arch,
131  unsigned nimpl )
132 {
133  struct cl_cblk_csytrfsp_args_s *args = (struct cl_cblk_csytrfsp_args_s *)(task->cl_arg);
134 
135  pastix_fixdbl_t cost = 0.;
136  pastix_fixdbl_t *coefs1, *coefs2;
137  pastix_int_t M = args->cblk->stride;
138  pastix_int_t N = cblk_colnbr( args->cblk );
139  M -= N;
140 
141  switch( arch->devices->type ) {
142  case STARPU_CPU_WORKER:
143  coefs1 = &(args->sopalin_data->cpu_models->coefficients[PastixComplex32-2][PastixKernelSYTRF][0]);
144  coefs2 = &(args->sopalin_data->cpu_models->coefficients[PastixComplex32-2][PastixKernelTRSMCblk2d][0]);
145  break;
146  case STARPU_CUDA_WORKER:
147  coefs1 = &(args->sopalin_data->gpu_models->coefficients[PastixComplex32-2][PastixKernelSYTRF][0]);
148  coefs2 = &(args->sopalin_data->gpu_models->coefficients[PastixComplex32-2][PastixKernelTRSMCblk2d][0]);
149  break;
150  default:
151  assert(0);
152  return 0.;
153  }
154 
155  /* Get cost in us */
156  cost = modelsGetCost1Param( coefs1, N );
157  cost += modelsGetCost2Param( coefs2, M, N );
158 
159  (void)nimpl;
160  return cost;
161 }
162 
163 #ifndef DOXYGEN_SHOULD_SKIP_THIS
164 static struct starpu_perfmodel starpu_cblk_csytrfsp_model = {
165 #if defined( PASTIX_STARPU_COST_PER_ARCH )
166  .type = STARPU_PER_ARCH,
167  .arch_cost_function = cblk_sytrf_cost,
168 #else
169  .type = STARPU_HISTORY_BASED,
170 #endif
171  .symbol = "cblk_csytrfsp",
172 };
173 
174 #if !defined(PASTIX_STARPU_SIMULATION)
175 /**
176  *******************************************************************************
177  *
178  * @brief StarPU CPU implementation
179  *
180  *******************************************************************************
181  *
182  * @param[in] descr
183  * TODO
184  *
185  * @param[in] cl_arg
186  * TODO
187  *
188  *******************************************************************************/
189 static void
190 fct_cblk_csytrfsp_cpu( void *descr[], void *cl_arg )
191 {
192  struct cl_cblk_csytrfsp_args_s *args = (struct cl_cblk_csytrfsp_args_s *)cl_arg;
193  void *L;
194  void *DL;
195 
196  L = pastix_starpu_cblk_get_ptr( descr[0] );
197  DL = pastix_starpu_cblk_get_ptr( descr[1] );
198 
199  if ( (args->cblk->cblktype & CBLK_COMPRESSED) && (DL != NULL) ) {
200  char *ws = DL;
201  ws += (args->cblk[1].fblokptr - args->cblk[0].fblokptr) * sizeof( pastix_lrblock_t );
202  cpucblk_calloc_lrws( args->cblk, DL, (pastix_complex32_t*)ws );
203  }
204  cpucblk_csytrfsp1d_panel( args->sopalin_data->solvmtx, args->cblk, L, DL );
205 }
206 #endif /* !defined(PASTIX_STARPU_SIMULATION) */
207 
208 CODELETS_CPU( cblk_csytrfsp, 2 );
209 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
210 
211 /**
212  *******************************************************************************
213  *
214  * @brief TODO
215  *
216  *******************************************************************************
217  *
218  * @param[in] sopalin_data
219  * TODO
220  *
221  * @param[in] cblk
222  * TODO
223  *
224  * @param[in] prio
225  * TODO
226  *
227  *******************************************************************************/
228 void
229 starpu_task_cblk_csytrfsp( sopalin_data_t *sopalin_data,
230  SolverCblk *cblk,
231  int prio )
232 {
233  struct cl_cblk_csytrfsp_args_s *cl_arg = NULL;
234  int need_exec = 1;
235 #if defined(PASTIX_DEBUG_STARPU)
236  char *task_name;
237 #endif
238 
239  starpu_data_handle_t *handler = (starpu_data_handle_t *)( cblk->handler );
240 
241  pastix_starpu_register_ws( handler + 1, cblk, PastixComplex32 );
242 
243 #if defined(PASTIX_WITH_MPI)
244  {
245  int64_t tag_desc = sopalin_data->solvmtx->starpu_desc->mpitag;
246  int64_t tag_cblk = 2 * cblk->gcblknum + 1;
247 
248  starpu_mpi_data_register( *(handler + 1),
249  tag_desc + tag_cblk,
250  cblk->ownerid );
251  }
252 #endif /* PASTIX_WITH_MPI */
253 
254  /*
255  * Check if it needs to be submitted
256  */
257 #if defined(PASTIX_WITH_MPI)
258  {
259  int need_submit = 0;
260  if ( cblk->ownerid == sopalin_data->solvmtx->clustnum ) {
261  need_submit = 1;
262  }
263  else {
264  need_exec = 0;
265  }
266  if ( starpu_mpi_cached_receive( cblk->handler[0] ) ) {
267  need_submit = 1;
268  }
269  if ( !need_submit ) {
270  return;
271  }
272  }
273 #endif
274 
275  /*
276  * Create the arguments array
277  */
278  if ( need_exec ) {
279  cl_arg = malloc( sizeof( struct cl_cblk_csytrfsp_args_s) );
280  cl_arg->sopalin_data = sopalin_data;
281 #if defined(PASTIX_STARPU_PROFILING)
282  cl_arg->profile_data.measures = cblk_csytrfsp_profile.measures;
283  cl_arg->profile_data.flops = NAN;
284 #endif
285  cl_arg->cblk = cblk;
286  }
287 
288 #if defined(PASTIX_DEBUG_STARPU)
289  /* This actually generates a memory leak */
290  asprintf( &task_name, "%s( %ld )",
291  cl_cblk_csytrfsp_cpu.name,
292  (long)(cblk - sopalin_data->solvmtx->cblktab) );
293 #endif
294 
295  pastix_starpu_insert_task(
296  &cl_cblk_csytrfsp_cpu,
297  STARPU_CL_ARGS, cl_arg, sizeof( struct cl_cblk_csytrfsp_args_s ),
298 #if defined(PASTIX_STARPU_PROFILING)
299  STARPU_CALLBACK_WITH_ARG_NFREE, cblk_csytrfsp_callback, cl_arg,
300 #endif
301  STARPU_RW, cblk->handler[0],
302  STARPU_W, cblk->handler[1],
303 #if defined(PASTIX_DEBUG_STARPU)
304  STARPU_NAME, task_name,
305 #endif
306 #if defined(PASTIX_STARPU_HETEROPRIO)
307  STARPU_PRIORITY, BucketFacto1D,
308 #else
309  STARPU_PRIORITY, prio,
310 #endif
311  0);
312  (void)prio;
313 }
314 
315 /**
316  * @}
317  */
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
float _Complex pastix_complex32_t
Definition: datatypes.h:76
double pastix_fixdbl_t
Definition: datatypes.h:65
@ PastixKernelTRSMCblk2d
Definition: kernels_enums.h:58
@ PastixKernelSYTRF
Definition: kernels_enums.h:52
void cpucblk_calloc_lrws(const SolverCblk *cblk, pastix_lrblock_t *lrblok, pastix_complex32_t *ws)
Initialize lrblock structure from a workspace for all blocks of the cblk associated.
Definition: cpucblk_cinit.c:98
int cpucblk_csytrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *DLt)
Compute the LDL^t factorization of one panel.
The block low-rank structure to hold a matrix in low-rank form.
static pastix_fixdbl_t fct_cblk_csytrfsp_cost(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl)
Cost model function.
void starpu_task_cblk_csytrfsp(sopalin_data_t *sopalin_data, SolverCblk *cblk, int prio)
TODO.
Base structure to all codelet arguments that include the profiling data.
static double cost(symbol_cblk_t *cblk)
Computes the cost of a cblk.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition: solver.h:329
pastix_int_t gcblknum
Definition: solver.h:174
void * handler[2]
Definition: solver.h:179
int ownerid
Definition: solver.h:181
Solver column block structure.
Definition: solver.h:161