PaStiX Handbook  6.3.2
codelet_cblk_cpotrfsp.c
Go to the documentation of this file.
1 /**
2  *
3  * @file codelet_cblk_cpotrfsp.c
4  *
5  * StarPU codelets for Cholesky functions
6  *
7  * @copyright 2016-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.3.2
11  * @author Mathieu Faverge
12  * @author Pierre Ramet
13  * @author Ian Masliah
14  * @author Tom Moenne-Loccoz
15  * @date 2023-07-21
16  *
17  * @generated from /builds/solverstack/pastix/sopalin/starpu/codelet_cblk_zpotrfsp.c, normal z -> c, Wed Dec 13 12:09:27 2023
18  *
19  * @addtogroup pastix_starpu
20  * @{
21  *
22  **/
23 #ifndef DOXYGEN_SHOULD_SKIP_THIS
24 #define _GNU_SOURCE
25 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
26 #include "common.h"
27 #include "blend/solver.h"
28 #include "sopalin/sopalin_data.h"
29 #include "pastix_ccores.h"
30 #include "pastix_starpu.h"
31 #include "pastix_cstarpu.h"
32 #include "codelets.h"
33 
34 /**
35  * @brief Main structure for all tasks of cblk_cgemmsp type
36  */
37 struct cl_cblk_cpotrfsp_args_s {
38  profile_data_t profile_data;
39  sopalin_data_t *sopalin_data;
40  SolverCblk *cblk;
41 };
42 
43 #if defined(PASTIX_STARPU_PROFILING)
44 /**
45  * @brief Functions to profile the codelet
46  *
47  * Two levels of profiling are available:
48  * 1) A generic one that returns the flops per worker
49  * 2) A more detailed one that generate logs of the performance for each kernel
50  */
51 starpu_profile_t cblk_cpotrfsp_profile = {
52  .next = NULL,
53  .name = "cblk_cpotrfsp"
54 };
55 
56 /**
57  * @brief Profiling registration function
58  */
59 void cblk_cpotrfsp_profile_register( void ) __attribute__( ( constructor ) );
60 void
61 cblk_cpotrfsp_profile_register( void )
62 {
63  profiling_register_cl( &cblk_cpotrfsp_profile );
64 }
65 
66 #ifndef DOXYGEN_SHOULD_SKIP_THIS
67 #if defined(PASTIX_STARPU_PROFILING_LOG)
68 static void
69 cl_profiling_cb_cblk_cpotrfsp( void *callback_arg )
70 {
71  cl_profiling_callback( callback_arg );
72 
73  struct starpu_task *task = starpu_task_get_current();
74  struct starpu_profiling_task_info *info = task->profiling_info;
75 
76  /* Quick return */
77  if ( info == NULL ) {
78  return;
79  }
80 
81  struct cl_cblk_cpotrfsp_args_s *args = (struct cl_cblk_cpotrfsp_args_s *) callback_arg;
82  pastix_fixdbl_t flops = args->profile_data.flops;
83  pastix_fixdbl_t duration = starpu_timing_timespec_delay_us( &info->start_time, &info->end_time );
84  pastix_fixdbl_t speed = flops / ( 1000.0 * duration );
85 
86  pastix_int_t M = args->cblk->stride;
87  pastix_int_t N = cblk_colnbr( args->cblk );
88  M -= N;
89 
90  cl_profiling_log_register( task->name, "cblk_cpotrfsp", M, N, 0, flops, speed );
91 }
92 #endif
93 
94 #if defined(PASTIX_STARPU_PROFILING_LOG)
95 static void (*cblk_cpotrfsp_callback)(void*) = cl_profiling_cb_cblk_cpotrfsp;
96 #else
97 static void (*cblk_cpotrfsp_callback)(void*) = cl_profiling_callback;
98 #endif
99 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
100 
101 #endif /* defined(PASTIX_STARPU_PROFILING) */
102 
103 /**
104  *******************************************************************************
105  *
106  * @brief Cost model function
107  *
108  * The user can switch from the pastix static model to an history based model
109  * computed automatically.
110  *
111  *******************************************************************************
112  *
113  * @param[in] task
114  * TODO
115  *
116  * @param[in] arch
117  * TODO
118  *
119  * @param[in] nimpl
120  * TODO
121  *
122  *******************************************************************************
123  *
124  * @retval TODO
125  *
126  *******************************************************************************/
127 static inline pastix_fixdbl_t
128 fct_cblk_cpotrfsp_cost( struct starpu_task *task,
129  struct starpu_perfmodel_arch *arch,
130  unsigned nimpl )
131 {
132  struct cl_cblk_cpotrfsp_args_s *args = (struct cl_cblk_cpotrfsp_args_s *)(task->cl_arg);
133 
134  pastix_fixdbl_t cost = 0.;
135  pastix_fixdbl_t *coefs1, *coefs2;
136  pastix_int_t M = args->cblk->stride;
137  pastix_int_t N = cblk_colnbr( args->cblk );
138  M -= N;
139 
140  switch( arch->devices->type ) {
141  case STARPU_CPU_WORKER:
142  coefs1 = &(args->sopalin_data->cpu_models->coefficients[PastixComplex32-2][PastixKernelPOTRF][0]);
143  coefs2 = &(args->sopalin_data->cpu_models->coefficients[PastixComplex32-2][PastixKernelTRSMCblk2d][0]);
144  break;
145  case STARPU_CUDA_WORKER:
146  coefs1 = &(args->sopalin_data->gpu_models->coefficients[PastixComplex32-2][PastixKernelPOTRF][0]);
147  coefs2 = &(args->sopalin_data->gpu_models->coefficients[PastixComplex32-2][PastixKernelTRSMCblk2d][0]);
148  break;
149  default:
150  assert(0);
151  return 0.;
152  }
153 
154  /* Get cost in us */
155  cost = modelsGetCost1Param( coefs1, N );
156  cost += modelsGetCost2Param( coefs2, M, N );
157 
158  (void)nimpl;
159  return cost;
160 }
161 
162 #ifndef DOXYGEN_SHOULD_SKIP_THIS
163 static struct starpu_perfmodel starpu_cblk_cpotrfsp_model = {
164 #if defined( PASTIX_STARPU_COST_PER_ARCH )
165  .type = STARPU_PER_ARCH,
166  .arch_cost_function = cblk_potrf_cost,
167 #else
168  .type = STARPU_HISTORY_BASED,
169 #endif
170  .symbol = "cblk_cpotrfsp",
171 };
172 
173 #if !defined(PASTIX_STARPU_SIMULATION)
174 /**
175  *******************************************************************************
176  *
177  * @brief StarPU CPU implementation
178  *
179  *******************************************************************************
180  *
181  * @param[in] descr
182  * TODO
183  *
184  * @param[in] cl_arg
185  * TODO
186  *
187  *******************************************************************************/
188 static void
189 fct_cblk_cpotrfsp_cpu( void *descr[], void *cl_arg )
190 {
191  struct cl_cblk_cpotrfsp_args_s *args = (struct cl_cblk_cpotrfsp_args_s *)cl_arg;
192  void *L;
193 
194  L = pastix_starpu_cblk_get_ptr( descr[0] );
195 
196  cpucblk_cpotrfsp1d_panel( args->sopalin_data->solvmtx, args->cblk, L );
197 }
198 #endif /* !defined(PASTIX_STARPU_SIMULATION) */
199 
200 CODELETS_CPU( cblk_cpotrfsp, 1 );
201 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
202 
203 /**
204  *******************************************************************************
205  *
206  * @brief TODO
207  *
208  *******************************************************************************
209  *
210  * @param[in] sopalin_data
211  * TODO
212  *
213  * @param[in] cblk
214  * TODO
215  *
216  * @param[in] prio
217  * TODO
218  *
219  *******************************************************************************/
220 void
221 starpu_task_cblk_cpotrfsp( sopalin_data_t *sopalin_data,
222  SolverCblk *cblk,
223  int prio )
224 {
225  struct cl_cblk_cpotrfsp_args_s *cl_arg = NULL;
226  int need_exec = 1;
227 #if defined(PASTIX_DEBUG_STARPU)
228  char *task_name;
229 #endif
230 
231  /*
232  * Check if it needs to be submitted
233  */
234 #if defined(PASTIX_WITH_MPI)
235  {
236  int need_submit = 0;
237  if ( cblk->ownerid == sopalin_data->solvmtx->clustnum ) {
238  need_submit = 1;
239  }
240  else {
241  need_exec = 0;
242  }
243  if ( starpu_mpi_cached_receive( cblk->handler[0] ) ) {
244  need_submit = 1;
245  }
246  if ( !need_submit ) {
247  return;
248  }
249  }
250 #endif
251 
252  /*
253  * Create the arguments array
254  */
255  if ( need_exec ) {
256  cl_arg = malloc( sizeof( struct cl_cblk_cpotrfsp_args_s) );
257  cl_arg->sopalin_data = sopalin_data;
258 #if defined(PASTIX_STARPU_PROFILING)
259  cl_arg->profile_data.measures = cblk_cpotrfsp_profile.measures;
260  cl_arg->profile_data.flops = NAN;
261 #endif
262  cl_arg->cblk = cblk;
263  }
264 
265 #if defined(PASTIX_DEBUG_STARPU)
266  /* This actually generates a memory leak */
267  asprintf( &task_name, "%s( %ld )",
268  cl_cblk_cpotrfsp_cpu.name,
269  (long)(cblk - sopalin_data->solvmtx->cblktab) );
270 #endif
271 
272  pastix_starpu_insert_task(
273  &cl_cblk_cpotrfsp_cpu,
274  STARPU_CL_ARGS, cl_arg, sizeof( struct cl_cblk_cpotrfsp_args_s ),
275 #if defined(PASTIX_STARPU_PROFILING)
276  STARPU_CALLBACK_WITH_ARG_NFREE, cblk_cpotrfsp_callback, cl_arg,
277 #endif
278  STARPU_RW, cblk->handler[0],
279 #if defined(PASTIX_DEBUG_STARPU)
280  STARPU_NAME, task_name,
281 #endif
282 #if defined(PASTIX_STARPU_HETEROPRIO)
283  STARPU_PRIORITY, BucketFacto1D,
284 #else
285  STARPU_PRIORITY, prio,
286 #endif
287  0);
288  (void)prio;
289 }
290 
291 /**
292  * @}
293  */
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
double pastix_fixdbl_t
Definition: datatypes.h:65
@ PastixKernelTRSMCblk2d
Definition: kernels_enums.h:58
@ PastixKernelPOTRF
Definition: kernels_enums.h:50
int cpucblk_cpotrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L)
Compute the Cholesky factorization of one panel.
static pastix_fixdbl_t fct_cblk_cpotrfsp_cost(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl)
Cost model function.
void starpu_task_cblk_cpotrfsp(sopalin_data_t *sopalin_data, SolverCblk *cblk, int prio)
TODO.
Base structure to all codelet arguments that include the profiling data.
static double cost(symbol_cblk_t *cblk)
Computes the cost of a cblk.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition: solver.h:324
void * handler[2]
Definition: solver.h:173
int ownerid
Definition: solver.h:175
Solver column block structure.
Definition: solver.h:156