PaStiX Handbook  6.3.2
codelet_blok_csytrfsp.c
Go to the documentation of this file.
1 /**
2  *
3  * @file codelet_blok_csytrfsp.c
4  *
5  * StarPU codelets for LDL^t functions
6  *
7  * @copyright 2016-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.3.2
11  * @author Mathieu Faverge
12  * @author Pierre Ramet
13  * @author Tom Moenne-Loccoz
14  * @date 2023-07-21
15  *
16  * @generated from /builds/solverstack/pastix/sopalin/starpu/codelet_blok_zsytrfsp.c, normal z -> c, Wed Dec 13 12:09:26 2023
17  *
18  * @addtogroup pastix_starpu
19  * @{
20  *
21  **/
22 #ifndef DOXYGEN_SHOULD_SKIP_THIS
23 #define _GNU_SOURCE
24 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
25 #include "common.h"
26 #include "blend/solver.h"
27 #include "sopalin/sopalin_data.h"
28 #include "pastix_ccores.h"
29 #include "pastix_starpu.h"
30 #include "pastix_cstarpu.h"
31 #include "codelets.h"
32 
33 /**
34  * @brief Main structure for all tasks of blok_csytrfsp type
35  */
36 struct cl_blok_csytrfsp_args_s {
37  profile_data_t profile_data;
38  sopalin_data_t *sopalin_data;
39  SolverCblk *cblk;
40 };
41 
42 #if defined(PASTIX_STARPU_PROFILING)
43 /**
44  * @brief Functions to profile the codelet
45  *
46  * Two levels of profiling are available:
47  * 1) A generic one that returns the flops per worker
48  * 2) A more detailed one that generate logs of the performance for each kernel
49  */
50 starpu_profile_t blok_csytrfsp_profile = {
51  .next = NULL,
52  .name = "blok_csytrfsp"
53 };
54 
55 /**
56  * @brief Profiling registration function
57  */
58 void blok_csytrfsp_profile_register( void ) __attribute__( ( constructor ) );
59 void
60 blok_csytrfsp_profile_register( void )
61 {
62  profiling_register_cl( &blok_csytrfsp_profile );
63 }
64 
65 #ifndef DOXYGEN_SHOULD_SKIP_THIS
66 #if defined(PASTIX_STARPU_PROFILING_LOG)
67 static void
68 cl_profiling_cb_blok_csytrfsp( void *callback_arg )
69 {
70  cl_profiling_callback( callback_arg );
71 
72  struct starpu_task *task = starpu_task_get_current();
73  struct starpu_profiling_task_info *info = task->profiling_info;
74 
75  /* Quick return */
76  if ( info == NULL ) {
77  return;
78  }
79 
80  struct cl_blok_csytrfsp_args_s *args = (struct cl_blok_csytrfsp_args_s *) callback_arg;
81  pastix_fixdbl_t flops = args->profile_data.flops;
82  pastix_fixdbl_t duration = starpu_timing_timespec_delay_us( &info->start_time, &info->end_time );
83  pastix_fixdbl_t speed = flops / ( 1000.0 * duration );
84 
85  pastix_int_t N = cblk_colnbr( args->cblk );
86 
87  cl_profiling_log_register( task->name, "blok_csytrfsp", N, 0, 0, flops, speed );
88 }
89 #endif
90 
91 #if defined(PASTIX_STARPU_PROFILING_LOG)
92 static void (*blok_csytrfsp_callback)(void*) = cl_profiling_cb_blok_csytrfsp;
93 #else
94 static void (*blok_csytrfsp_callback)(void*) = cl_profiling_callback;
95 #endif
96 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
97 
98 #endif /* defined(PASTIX_STARPU_PROFILING) */
99 
100 #ifndef DOXYGEN_SHOULD_SKIP_THIS
101 
102 /**
103  *******************************************************************************
104  *
105  * @brief Cost model function
106  *
107  * The user can switch from the pastix static model to an history based model
108  * computed automatically.
109  *
110  *******************************************************************************
111  *
112  * @param[in] task
113  * TODO
114  *
115  * @param[in] arch
116  * TODO
117  *
118  * @param[in] nimpl
119  * TODO
120  *
121  *******************************************************************************
122  *
123  * @retval TODO
124  *
125  *******************************************************************************/
126 static inline pastix_fixdbl_t
127 fct_blok_csytrfsp_cost( struct starpu_task *task,
128  struct starpu_perfmodel_arch *arch,
129  unsigned nimpl )
130 {
131  struct cl_blok_csytrfsp_args_s *args = (struct cl_blok_csytrfsp_args_s *)(task->cl_arg);
132 
133  pastix_fixdbl_t cost = 0.;
134  pastix_fixdbl_t *coefs;
135  pastix_int_t N = cblk_colnbr( args->cblk );
136 
137  switch( arch->devices->type ) {
138  case STARPU_CPU_WORKER:
139  coefs = &(args->sopalin_data->cpu_models->coefficients[PastixComplex32-2][PastixKernelSYTRF][0]);
140  break;
141  case STARPU_CUDA_WORKER:
142  coefs = &(args->sopalin_data->gpu_models->coefficients[PastixComplex32-2][PastixKernelSYTRF][0]);
143  break;
144  default:
145  assert(0);
146  return 0.;
147  }
148 
149  /* Get cost in us */
150  cost = modelsGetCost1Param( coefs, N );
151 
152  (void)nimpl;
153  return cost;
154 }
155 
156 static struct starpu_perfmodel starpu_blok_csytrfsp_model = {
157 #if defined(PASTIX_STARPU_COST_PER_ARCH)
158  .type = STARPU_PER_ARCH,
159  .arch_cost_function = fct_blok_csytrfsp_cost,
160 #else
161  .type = STARPU_HISTORY_BASED,
162 #endif
163  .symbol = "blok_csytrfsp",
164 };
165 
166 #if !defined(PASTIX_STARPU_SIMULATION)
167 /**
168  *******************************************************************************
169  *
170  * @brief StarPU CPU implementation
171  *
172  *******************************************************************************
173  *
174  * @param[in] descr
175  * TODO
176  *
177  * @param[in] cl_arg
178  * TODO
179  *
180  *******************************************************************************/
181 static void
182 fct_blok_csytrfsp_cpu( void *descr[], void *cl_arg )
183 {
184  struct cl_blok_csytrfsp_args_s *args = (struct cl_blok_csytrfsp_args_s *)cl_arg;
185  void *L;
186 
187  L = pastix_starpu_blok_get_ptr( descr[0] );
188 
189  assert( args->cblk->cblktype & CBLK_TASKS_2D );
190 
191  cpucblk_csytrfsp1d_sytrf( args->sopalin_data->solvmtx, args->cblk, L );
192 }
193 #endif /* !defined(PASTIX_STARPU_SIMULATION) */
194 
195 CODELETS_CPU( blok_csytrfsp, 1 );
196 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
197 
198 /**
199  *******************************************************************************
200  *
201  * @brief TODO
202  *
203  *******************************************************************************
204  *
205  * @param[in] sopalin_data
206  * TODO
207  *
208  * @param[in] cblk
209  * TODO
210  *
211  * @param[in] prio
212  * TODO
213  *
214  *******************************************************************************/
215 void
216 starpu_task_blok_csytrf( sopalin_data_t *sopalin_data,
217  SolverCblk *cblk,
218  int prio )
219 {
220  struct cl_blok_csytrfsp_args_s *cl_arg = NULL;
221  int need_exec = 1;
222 #if defined(PASTIX_DEBUG_STARPU)
223  char *task_name;
224 #endif
225 
226  /*
227  * Check if it needs to be submitted
228  */
229 #if defined(PASTIX_WITH_MPI)
230  {
231  int need_submit = 0;
232  if ( cblk->ownerid == sopalin_data->solvmtx->clustnum ) {
233  need_submit = 1;
234  }
235  else {
236  need_exec = 0;
237  }
238  if ( starpu_mpi_cached_receive( cblk->fblokptr->handler[0] ) ) {
239  need_submit = 1;
240  }
241  if ( !need_submit ) {
242  return;
243  }
244  }
245 #endif
246 
247  /*
248  * Create the arguments array
249  */
250  if ( need_exec ) {
251  cl_arg = malloc( sizeof( struct cl_blok_csytrfsp_args_s) );
252  cl_arg->sopalin_data = sopalin_data;
253 #if defined(PASTIX_STARPU_PROFILING)
254  cl_arg->profile_data.measures = blok_csytrfsp_profile.measures;
255  cl_arg->profile_data.flops = NAN;
256 #endif
257  cl_arg->cblk = cblk;
258  }
259 
260 #if defined(PASTIX_DEBUG_STARPU)
261  /* This actually generates a memory leak */
262  asprintf( &task_name, "%s( %ld )",
263  cl_blok_csytrfsp_cpu.name,
264  (long)(cblk - sopalin_data->solvmtx->cblktab) );
265 #endif
266 
267  pastix_starpu_insert_task(
268  &cl_blok_csytrfsp_cpu,
269  STARPU_CL_ARGS, cl_arg, sizeof( struct cl_blok_csytrfsp_args_s ),
270 #if defined(PASTIX_STARPU_PROFILING)
271  STARPU_CALLBACK_WITH_ARG_NFREE, blok_csytrfsp_callback, cl_arg,
272 #endif
273  STARPU_RW, cblk->fblokptr->handler[0],
274 #if defined(PASTIX_DEBUG_STARPU)
275  STARPU_NAME, task_name,
276 #endif
277 #if defined(PASTIX_STARPU_HETEROPRIO)
278  STARPU_PRIORITY, BucketFacto2D,
279 #else
280  STARPU_PRIORITY, prio,
281 #endif
282  0);
283  (void)prio;
284 }
285 
286 /**
287  * @}
288  */
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
double pastix_fixdbl_t
Definition: datatypes.h:65
@ PastixKernelSYTRF
Definition: kernels_enums.h:52
int cpucblk_csytrfsp1d_sytrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Computes the LDL^t factorization of the diagonal block in a panel.
void starpu_task_blok_csytrf(sopalin_data_t *sopalin_data, SolverCblk *cblk, int prio)
TODO.
Base structure to all codelet arguments that include the profiling data.
static double cost(symbol_cblk_t *cblk)
Computes the cost of a cblk.
void * handler[2]
Definition: solver.h:138
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition: solver.h:324
SolverBlok * fblokptr
Definition: solver.h:163
int ownerid
Definition: solver.h:175
Solver column block structure.
Definition: solver.h:156