PaStiX Handbook  6.4.0
codelet_blok_sgetrfsp.c
Go to the documentation of this file.
1 /**
2  *
3  * @file codelet_blok_sgetrfsp.c
4  *
5  * StarPU codelets for LU functions
6  *
7  * @copyright 2016-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.4.0
11  * @author Mathieu Faverge
12  * @author Pierre Ramet
13  * @author Ian Masliah
14  * @author Tom Moenne-Loccoz
15  * @date 2024-07-05
16  *
17  * @generated from /builds/solverstack/pastix/sopalin/starpu/codelet_blok_zgetrfsp.c, normal z -> s, Fri Jul 12 15:09:58 2024
18  *
19  * @addtogroup pastix_starpu
20  * @{
21  *
22  **/
23 #ifndef DOXYGEN_SHOULD_SKIP_THIS
24 #define _GNU_SOURCE
25 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
26 #include "common.h"
27 #include "blend/solver.h"
28 #include "sopalin/sopalin_data.h"
29 #include "pastix_scores.h"
30 #include "pastix_starpu.h"
31 #include "pastix_sstarpu.h"
32 #include "codelets.h"
33 
34 /**
35  * @brief Main structure for all tasks of blok_sgetrfsp type
36  */
37 struct cl_blok_sgetrfsp_args_s {
38  profile_data_t profile_data;
39  sopalin_data_t *sopalin_data;
40  SolverCblk *cblk;
41 };
42 
43 #if defined(PASTIX_STARPU_PROFILING)
44 /**
45  * @brief Functions to profile the codelet
46  *
47  * Two levels of profiling are available:
48  * 1) A generic one that returns the flops per worker
49  * 2) A more detailed one that generate logs of the performance for each kernel
50  */
51 starpu_profile_t blok_sgetrfsp_profile = {
52  .next = NULL,
53  .name = "blok_sgetrfsp"
54 };
55 
56 /**
57  * @brief Profiling registration function
58  */
59 void blok_sgetrfsp_profile_register( void ) __attribute__( ( constructor ) );
60 void
61 blok_sgetrfsp_profile_register( void )
62 {
63  profiling_register_cl( &blok_sgetrfsp_profile );
64 }
65 
66 #ifndef DOXYGEN_SHOULD_SKIP_THIS
67 #if defined(PASTIX_STARPU_PROFILING_LOG)
68 static void
69 cl_profiling_cb_blok_sgetrfsp( void *callback_arg )
70 {
71  cl_profiling_callback( callback_arg );
72 
73  struct starpu_task *task = starpu_task_get_current();
74  struct starpu_profiling_task_info *info = task->profiling_info;
75 
76  /* Quick return */
77  if ( info == NULL ) {
78  return;
79  }
80 
81  struct cl_blok_sgetrfsp_args_s *args = (struct cl_blok_sgetrfsp_args_s *) callback_arg;
82  pastix_fixdbl_t flops = args->profile_data.flops;
83  pastix_fixdbl_t duration = starpu_timing_timespec_delay_us( &info->start_time, &info->end_time );
84  pastix_fixdbl_t speed = flops / ( 1000.0 * duration );
85 
86  pastix_int_t N = cblk_colnbr( args->cblk );
87 
88  cl_profiling_log_register( task->name, "blok_sgetrfsp", N, 0, 0, flops, speed );
89 }
90 #endif
91 
92 #if defined(PASTIX_STARPU_PROFILING_LOG)
93 static void (*blok_sgetrfsp_callback)(void*) = cl_profiling_cb_blok_sgetrfsp;
94 #else
95 static void (*blok_sgetrfsp_callback)(void*) = cl_profiling_callback;
96 #endif
97 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
98 
99 #endif /* defined(PASTIX_STARPU_PROFILING) */
100 
101 /**
102  *******************************************************************************
103  *
104  * @brief Cost model function
105  *
106  * The user can switch from the pastix static model to an history based model
107  * computed automatically.
108  *
109  *******************************************************************************
110  *
111  * @param[in] task
112  * TODO
113  *
114  * @param[in] arch
115  * TODO
116  *
117  * @param[in] nimpl
118  * TODO
119  *
120  *******************************************************************************
121  *
122  * @retval TODO
123  *
124  *******************************************************************************/
125 static inline pastix_fixdbl_t
126 fct_blok_sgetrfsp_cost( struct starpu_task *task,
127  struct starpu_perfmodel_arch *arch,
128  unsigned nimpl )
129 {
130  struct cl_blok_sgetrfsp_args_s *args = (struct cl_blok_sgetrfsp_args_s *)(task->cl_arg);
131 
132  pastix_fixdbl_t cost = 0.;
133  pastix_fixdbl_t *coefs;
134  pastix_int_t N = cblk_colnbr( args->cblk );
135 
136  switch( arch->devices->type ) {
137  case STARPU_CPU_WORKER:
138  coefs = &(args->sopalin_data->cpu_models->coefficients[PastixFloat-2][PastixKernelGETRF][0]);
139  break;
140  case STARPU_CUDA_WORKER:
141  coefs = &(args->sopalin_data->gpu_models->coefficients[PastixFloat-2][PastixKernelGETRF][0]);
142  break;
143  default:
144  assert(0);
145  return 0.;
146  }
147 
148  /* Get cost in us */
149  cost = modelsGetCost1Param( coefs, N );
150 
151  (void)nimpl;
152  return cost;
153 }
154 
155 #ifndef DOXYGEN_SHOULD_SKIP_THIS
156 static struct starpu_perfmodel starpu_blok_sgetrfsp_model = {
157 #if defined(PASTIX_STARPU_COST_PER_ARCH)
158  .type = STARPU_PER_ARCH,
159  .arch_cost_function = fct_blok_sgetrfsp_cost,
160 #else
161  .type = STARPU_HISTORY_BASED,
162 #endif
163  .symbol = "blok_sgetrfsp",
164 };
165 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
166 
167 
168 #if !defined(PASTIX_STARPU_SIMULATION)
169 /**
170  *******************************************************************************
171  *
172  * @brief StarPU CPU implementation
173  *
174  *******************************************************************************
175  *
176  * @param[in] descr
177  * TODO
178  *
179  * @param[in] cl_arg
180  * TODO
181  *
182  *******************************************************************************/
183 static void
184 fct_blok_sgetrfsp_cpu( void *descr[], void *cl_arg )
185 {
186  struct cl_blok_sgetrfsp_args_s *args = (struct cl_blok_sgetrfsp_args_s *)cl_arg;
187  void *L, *U;
188 
189  L = pastix_starpu_blok_get_ptr( descr[0] );
190  U = pastix_starpu_blok_get_ptr( descr[1] );
191 
192  assert( args->cblk->cblktype & CBLK_TASKS_2D );
193 
194  cpucblk_sgetrfsp1d_getrf( args->sopalin_data->solvmtx, args->cblk, L, U );
195 }
196 #endif /* !defined(PASTIX_STARPU_SIMULATION) */
197 
198 #ifndef DOXYGEN_SHOULD_SKIP_THIS
199 CODELETS_CPU( blok_sgetrfsp, 2 );
200 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
201 
202 /**
203  *******************************************************************************
204  *
205  * @brief TODO
206  *
207  *******************************************************************************
208  *
209  * @param[in] sopalin_data
210  * TODO
211  *
212  * @param[in] cblk
213  * TODO
214  *
215  * @param[in] prio
216  * TODO
217  *
218  *******************************************************************************/
219 void
220 starpu_task_blok_sgetrf( sopalin_data_t *sopalin_data,
221  SolverCblk *cblk,
222  int prio )
223 {
224  struct cl_blok_sgetrfsp_args_s *cl_arg = NULL;
225  int need_exec = 1;
226 #if defined(PASTIX_DEBUG_STARPU)
227  char *task_name;
228 #endif
229 
230  /*
231  * Check if it needs to be submitted
232  */
233 #if defined(PASTIX_WITH_MPI)
234  {
235  int need_submit = 0;
236  if ( cblk->ownerid == sopalin_data->solvmtx->clustnum ) {
237  need_submit = 1;
238  }
239  else {
240  need_exec = 0;
241  }
242  if ( starpu_mpi_cached_receive( cblk->fblokptr->handler[0] ) ) {
243  need_submit = 1;
244  }
245  if ( starpu_mpi_cached_receive( cblk->fblokptr->handler[1] ) ) {
246  need_submit = 1;
247  }
248  if ( !need_submit ) {
249  return;
250  }
251  }
252 #endif
253 
254  /*
255  * Create the arguments array
256  */
257  if ( need_exec ) {
258  cl_arg = malloc( sizeof( struct cl_blok_sgetrfsp_args_s) );
259  cl_arg->sopalin_data = sopalin_data;
260 #if defined(PASTIX_STARPU_PROFILING)
261  cl_arg->profile_data.measures = blok_sgetrfsp_profile.measures;
262  cl_arg->profile_data.flops = NAN;
263 #endif
264  cl_arg->cblk = cblk;
265  }
266 
267 #if defined(PASTIX_DEBUG_STARPU)
268  /* This actually generates a memory leak */
269  asprintf( &task_name, "%s( %ld )",
270  cl_blok_sgetrfsp_cpu.name,
271  (long)(cblk - sopalin_data->solvmtx->cblktab) );
272 #endif
273 
274  pastix_starpu_insert_task(
275  &cl_blok_sgetrfsp_cpu,
276  STARPU_CL_ARGS, cl_arg, sizeof( struct cl_blok_sgetrfsp_args_s ),
277 #if defined(PASTIX_STARPU_PROFILING)
278  STARPU_CALLBACK_WITH_ARG_NFREE, blok_sgetrfsp_callback, cl_arg,
279 #endif
280  STARPU_RW, cblk->fblokptr->handler[0],
281  STARPU_RW, cblk->fblokptr->handler[1],
282 #if defined(PASTIX_DEBUG_STARPU)
283  STARPU_NAME, task_name,
284 #endif
285 #if defined(PASTIX_STARPU_HETEROPRIO)
286  STARPU_PRIORITY, BucketFacto2D,
287 #else
288  STARPU_PRIORITY, prio,
289 #endif
290  0);
291  (void)prio;
292 }
293 
294 /**
295  * @}
296  */
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
double pastix_fixdbl_t
Definition: datatypes.h:65
@ PastixKernelGETRF
Definition: kernels_enums.h:48
int cpucblk_sgetrfsp1d_getrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL, void *dataU)
Compute the LU factorization of the diagonal block in a panel.
static pastix_fixdbl_t fct_blok_sgetrfsp_cost(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl)
Cost model function.
static void fct_blok_sgetrfsp_cpu(void *descr[], void *cl_arg)
StarPU CPU implementation.
void starpu_task_blok_sgetrf(sopalin_data_t *sopalin_data, SolverCblk *cblk, int prio)
TODO.
Base structure to all codelet arguments that include the profiling data.
static double cost(symbol_cblk_t *cblk)
Computes the cost of a cblk.
void * handler[2]
Definition: solver.h:142
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition: solver.h:329
SolverBlok * fblokptr
Definition: solver.h:168
int ownerid
Definition: solver.h:181
Solver column block structure.
Definition: solver.h:161