PaStiX Handbook  6.4.0
codelet_cblk_dgetrfsp.c
Go to the documentation of this file.
1 /**
2  *
3  * @file codelet_cblk_dgetrfsp.c
4  *
5  * StarPU codelets for LU functions
6  *
7  * @copyright 2016-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.4.0
11  * @author Mathieu Faverge
12  * @author Pierre Ramet
13  * @author Ian Masliah
14  * @author Tom Moenne-Loccoz
15  * @date 2024-07-05
16  *
17  * @generated from /builds/solverstack/pastix/sopalin/starpu/codelet_cblk_zgetrfsp.c, normal z -> d, Tue Oct 8 14:17:34 2024
18  *
19  * @addtogroup pastix_starpu
20  * @{
21  *
22  **/
23 #ifndef DOXYGEN_SHOULD_SKIP_THIS
24 #define _GNU_SOURCE
25 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
26 #include "common.h"
27 #include "blend/solver.h"
28 #include "sopalin/sopalin_data.h"
29 #include "pastix_dcores.h"
30 #include "pastix_starpu.h"
31 #include "pastix_dstarpu.h"
32 #include "codelets.h"
33 
34 /**
35  * @brief Main structure for all tasks of cblk_dgemmsp type
36  */
37 struct cl_cblk_dgetrfsp_args_s {
38  profile_data_t profile_data;
39  sopalin_data_t *sopalin_data;
40  SolverCblk *cblk;
41 };
42 
43 #if defined(PASTIX_STARPU_PROFILING)
44 /**
45  * @brief Functions to profile the codelet
46  *
47  * Two levels of profiling are available:
48  * 1) A generic one that returns the flops per worker
49  * 2) A more detailed one that generate logs of the performance for each kernel
50  */
51 starpu_profile_t cblk_dgetrfsp_profile = {
52  .next = NULL,
53  .name = "cblk_dgetrfsp"
54 };
55 
56 /**
57  * @brief Profiling registration function
58  */
59 void cblk_dgetrfsp_profile_register( void ) __attribute__( ( constructor ) );
60 void
61 cblk_dgetrfsp_profile_register( void )
62 {
63  profiling_register_cl( &cblk_dgetrfsp_profile );
64 }
65 
66 #ifndef DOXYGEN_SHOULD_SKIP_THIS
67 #if defined(PASTIX_STARPU_PROFILING_LOG)
68 static void
69 cl_profiling_cb_cblk_dgetrfsp( void *callback_arg )
70 {
71  cl_profiling_callback( callback_arg );
72 
73  struct starpu_task *task = starpu_task_get_current();
74  struct starpu_profiling_task_info *info = task->profiling_info;
75 
76  /* Quick return */
77  if ( info == NULL ) {
78  return;
79  }
80 
81  struct cl_cblk_dgetrfsp_args_s *args = (struct cl_cblk_dgetrfsp_args_s *) callback_arg;
82  pastix_fixdbl_t flops = args->profile_data.flops;
83  pastix_fixdbl_t duration = starpu_timing_timespec_delay_us( &info->start_time, &info->end_time );
84  pastix_fixdbl_t speed = flops / ( 1000.0 * duration );
85 
86  pastix_int_t M = args->cblk->stride;
87  pastix_int_t N = cblk_colnbr( args->cblk );
88  M -= N;
89 
90  cl_profiling_log_register( task->name, "cblk_dgetrfsp", M, N, 0, flops, speed );
91 }
92 #endif
93 
94 #if defined(PASTIX_STARPU_PROFILING_LOG)
95 static void (*cblk_dgetrfsp_callback)(void*) = cl_profiling_cb_cblk_dgetrfsp;
96 #else
97 static void (*cblk_dgetrfsp_callback)(void*) = cl_profiling_callback;
98 #endif
99 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
100 
101 #endif /* defined(PASTIX_STARPU_PROFILING) */
102 
103 /**
104  *******************************************************************************
105  *
106  * @brief Cost model function
107  *
108  * The user can switch from the pastix static model to an history based model
109  * computed automatically.
110  *
111  *******************************************************************************
112  *
113  * @param[in] task
114  * TODO
115  *
116  * @param[in] arch
117  * TODO
118  *
119  * @param[in] nimpl
120  * TODO
121  *
122  *******************************************************************************
123  *
124  * @retval TODO
125  *
126  *******************************************************************************/
127 static inline pastix_fixdbl_t
128 fct_cblk_dgetrfsp_cost( struct starpu_task *task,
129  struct starpu_perfmodel_arch *arch,
130  unsigned nimpl )
131 {
132  struct cl_cblk_dgetrfsp_args_s *args = (struct cl_cblk_dgetrfsp_args_s *)(task->cl_arg);
133 
134  pastix_fixdbl_t cost = 0.;
135  pastix_fixdbl_t *coefs1, *coefs2;
136  pastix_int_t M = args->cblk->stride;
137  pastix_int_t N = cblk_colnbr( args->cblk );
138  M -= N;
139 
140  switch( arch->devices->type ) {
141  case STARPU_CPU_WORKER:
142  coefs1 = &(args->sopalin_data->cpu_models->coefficients[PastixDouble-2][PastixKernelGETRF][0]);
143  coefs2 = &(args->sopalin_data->cpu_models->coefficients[PastixDouble-2][PastixKernelTRSMCblk2d][0]);
144  break;
145  case STARPU_CUDA_WORKER:
146  coefs1 = &(args->sopalin_data->gpu_models->coefficients[PastixDouble-2][PastixKernelGETRF][0]);
147  coefs2 = &(args->sopalin_data->gpu_models->coefficients[PastixDouble-2][PastixKernelTRSMCblk2d][0]);
148  break;
149  default:
150  assert(0);
151  return 0.;
152  }
153 
154  /* Get cost in us */
155  cost = modelsGetCost1Param( coefs1, N );
156  cost += modelsGetCost2Param( coefs2, M, N ) * 2.;
157 
158  (void)nimpl;
159  return cost;
160 }
161 
162 #ifndef DOXYGEN_SHOULD_SKIP_THIS
163 static struct starpu_perfmodel starpu_cblk_dgetrfsp_model = {
164 #if defined( PASTIX_STARPU_COST_PER_ARCH )
165  .type = STARPU_PER_ARCH,
166  .arch_cost_function = cblk_getrf_cost,
167 #else
168  .type = STARPU_HISTORY_BASED,
169 #endif
170  .symbol = "cblk_dgetrfsp",
171 };
172 
173 #if !defined(PASTIX_STARPU_SIMULATION)
174 /**
175  *******************************************************************************
176  *
177  * @brief StarPU CPU implementation
178  *
179  *******************************************************************************
180  *
181  * @param[in] descr
182  * TODO
183  *
184  * @param[in] cl_arg
185  * TODO
186  *
187  *******************************************************************************/
188 static void
189 fct_cblk_dgetrfsp_cpu( void *descr[], void *cl_arg )
190 {
191  struct cl_cblk_dgetrfsp_args_s *args = (struct cl_cblk_dgetrfsp_args_s *)cl_arg;
192  void *L;
193  void *U;
194 
195  L = pastix_starpu_cblk_get_ptr( descr[0] );
196  U = pastix_starpu_cblk_get_ptr( descr[1] );
197 
198  cpucblk_dgetrfsp1d_panel( args->sopalin_data->solvmtx, args->cblk, L, U );
199 }
200 #endif /* !defined(PASTIX_STARPU_SIMULATION) */
201 
202 CODELETS_CPU( cblk_dgetrfsp, 2 );
203 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
204 
205 /**
206  *******************************************************************************
207  *
208  * @brief TODO
209  *
210  *******************************************************************************
211  *
212  * @param[in] sopalin_data
213  * TODO
214  *
215  * @param[in] cblk
216  * TODO
217  *
218  * @param[in] prio
219  * TODO
220  *
221  *******************************************************************************/
222 void
223 starpu_task_cblk_dgetrfsp( sopalin_data_t *sopalin_data,
224  SolverCblk *cblk,
225  int prio )
226 {
227  struct cl_cblk_dgetrfsp_args_s *cl_arg = NULL;
228  int need_exec = 1;
229 #if defined(PASTIX_DEBUG_STARPU)
230  char *task_name;
231 #endif
232 
233  /*
234  * Check if it needs to be submitted
235  */
236 #if defined(PASTIX_WITH_MPI)
237  {
238  int need_submit = 0;
239  if ( cblk->ownerid == sopalin_data->solvmtx->clustnum ) {
240  need_submit = 1;
241  }
242  else {
243  need_exec = 0;
244  }
245  if ( starpu_mpi_cached_receive( cblk->handler[0] ) ) {
246  need_submit = 1;
247  }
248  if ( starpu_mpi_cached_receive( cblk->handler[1] ) ) {
249  need_submit = 1;
250  }
251  if ( !need_submit ) {
252  return;
253  }
254  }
255 #endif
256 
257  /*
258  * Create the arguments array
259  */
260  if ( need_exec ) {
261  cl_arg = malloc( sizeof( struct cl_cblk_dgetrfsp_args_s) );
262  cl_arg->sopalin_data = sopalin_data;
263 #if defined(PASTIX_STARPU_PROFILING)
264  cl_arg->profile_data.measures = cblk_dgetrfsp_profile.measures;
265  cl_arg->profile_data.flops = NAN;
266 #endif
267  cl_arg->cblk = cblk;
268  }
269 
270 #if defined(PASTIX_DEBUG_STARPU)
271  /* This actually generates a memory leak */
272  asprintf( &task_name, "%s( %ld )",
273  cl_cblk_dgetrfsp_cpu.name,
274  (long)(cblk - sopalin_data->solvmtx->cblktab) );
275 #endif
276 
277  pastix_starpu_insert_task(
278  &cl_cblk_dgetrfsp_cpu,
279  STARPU_CL_ARGS, cl_arg, sizeof( struct cl_cblk_dgetrfsp_args_s ),
280 #if defined(PASTIX_STARPU_PROFILING)
281  STARPU_CALLBACK_WITH_ARG_NFREE, cblk_dgetrfsp_callback, cl_arg,
282 #endif
283  STARPU_RW, cblk->handler[0],
284  STARPU_RW, cblk->handler[1],
285 #if defined(PASTIX_DEBUG_STARPU)
286  STARPU_NAME, task_name,
287 #endif
288 #if defined(PASTIX_STARPU_HETEROPRIO)
289  STARPU_PRIORITY, BucketFacto1D,
290 #else
291  STARPU_PRIORITY, prio,
292 #endif
293  0);
294  (void)prio;
295 }
296 
297 /**
298  * @}
299  */
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
double pastix_fixdbl_t
Definition: datatypes.h:65
@ PastixKernelTRSMCblk2d
Definition: kernels_enums.h:58
@ PastixKernelGETRF
Definition: kernels_enums.h:48
int cpucblk_dgetrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *U)
Compute the LU factorization of one panel.
static pastix_fixdbl_t fct_cblk_dgetrfsp_cost(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl)
Cost model function.
void starpu_task_cblk_dgetrfsp(sopalin_data_t *sopalin_data, SolverCblk *cblk, int prio)
TODO.
Base structure to all codelet arguments that include the profiling data.
static double cost(symbol_cblk_t *cblk)
Computes the cost of a cblk.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition: solver.h:329
void * handler[2]
Definition: solver.h:179
int ownerid
Definition: solver.h:181
Solver column block structure.
Definition: solver.h:161