PaStiX Handbook  6.4.0
codelet_cblk_zpxtrfsp.c
Go to the documentation of this file.
1 /**
2  *
3  * @file codelet_cblk_zpxtrfsp.c
4  *
5  * StarPU codelets for complex LL^t functions
6  *
7  * @copyright 2016-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.4.0
11  * @author Mathieu Faverge
12  * @author Pierre Ramet
13  * @author Tom Moenne-Loccoz
14  * @date 2024-07-05
15  *
16  * @generated from /builds/solverstack/pastix/sopalin/starpu/codelet_cblk_zpxtrfsp.c, normal z -> z, Thu Aug 29 14:20:33 2024
17  *
18  * @addtogroup pastix_starpu
19  * @{
20  *
21  **/
22 #ifndef DOXYGEN_SHOULD_SKIP_THIS
23 #define _GNU_SOURCE
24 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
25 #include "common.h"
26 #include "blend/solver.h"
27 #include "sopalin/sopalin_data.h"
28 #include "pastix_zcores.h"
29 #include "pastix_starpu.h"
30 #include "pastix_zstarpu.h"
31 #include "codelets.h"
32 
33 /**
34  * @brief Main structure for all tasks of cblk_zgemmsp type
35  */
36 struct cl_cblk_zpxtrfsp_args_s {
37  profile_data_t profile_data;
38  sopalin_data_t *sopalin_data;
39  SolverCblk *cblk;
40 };
41 
42 #if defined(PASTIX_STARPU_PROFILING)
43 /**
44  * @brief Functions to profile the codelet
45  *
46  * Two levels of profiling are available:
47  * 1) A generic one that returns the flops per worker
48  * 2) A more detailed one that generate logs of the performance for each kernel
49  */
50 starpu_profile_t cblk_zpxtrfsp_profile = {
51  .next = NULL,
52  .name = "cblk_zpxtrfsp"
53 };
54 
55 /**
56  * @brief Profiling registration function
57  */
58 void cblk_zpxtrfsp_profile_register( void ) __attribute__( ( constructor ) );
59 void
60 cblk_zpxtrfsp_profile_register( void )
61 {
62  profiling_register_cl( &cblk_zpxtrfsp_profile );
63 }
64 
65 #ifndef DOXYGEN_SHOULD_SKIP_THIS
66 #if defined(PASTIX_STARPU_PROFILING_LOG)
67 static void
68 cl_profiling_cb_cblk_zpxtrfsp( void *callback_arg )
69 {
70  cl_profiling_callback( callback_arg );
71 
72  struct starpu_task *task = starpu_task_get_current();
73  struct starpu_profiling_task_info *info = task->profiling_info;
74 
75  /* Quick return */
76  if ( info == NULL ) {
77  return;
78  }
79 
80  struct cl_cblk_zpxtrfsp_args_s *args = (struct cl_cblk_zpxtrfsp_args_s *) callback_arg;
81  pastix_fixdbl_t flops = args->profile_data.flops;
82  pastix_fixdbl_t duration = starpu_timing_timespec_delay_us( &info->start_time, &info->end_time );
83  pastix_fixdbl_t speed = flops / ( 1000.0 * duration );
84 
85  pastix_int_t M = args->cblk->stride;
86  pastix_int_t N = cblk_colnbr( args->cblk );
87  M -= N;
88 
89  cl_profiling_log_register( task->name, "cblk_zpxtrfsp", M, N, 0, flops, speed );
90 }
91 #endif
92 
93 #if defined(PASTIX_STARPU_PROFILING_LOG)
94 static void (*cblk_zpxtrfsp_callback)(void*) = cl_profiling_cb_cblk_zpxtrfsp;
95 #else
96 static void (*cblk_zpxtrfsp_callback)(void*) = cl_profiling_callback;
97 #endif
98 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
99 
100 #endif /* defined(PASTIX_STARPU_PROFILING) */
101 
102 /**
103  *******************************************************************************
104  *
105  * @brief Cost model function
106  *
107  * The user can switch from the pastix static model to an history based model
108  * computed automatically.
109  *
110  *******************************************************************************
111  *
112  * @param[in] task
113  * TODO
114  *
115  * @param[in] arch
116  * TODO
117  *
118  * @param[in] nimpl
119  * TODO
120  *
121  *******************************************************************************
122  *
123  * @retval TODO
124  *
125  *******************************************************************************/
126 static inline pastix_fixdbl_t
127 fct_cblk_zpxtrfsp_cost( struct starpu_task *task,
128  struct starpu_perfmodel_arch *arch,
129  unsigned nimpl )
130 {
131  struct cl_cblk_zpxtrfsp_args_s *args = (struct cl_cblk_zpxtrfsp_args_s *)(task->cl_arg);
132 
133  pastix_fixdbl_t cost = 0.;
134  pastix_fixdbl_t *coefs1, *coefs2;
135  pastix_int_t M = args->cblk->stride;
136  pastix_int_t N = cblk_colnbr( args->cblk );
137  M -= N;
138 
139  switch( arch->devices->type ) {
140  case STARPU_CPU_WORKER:
141  coefs1 = &(args->sopalin_data->cpu_models->coefficients[PastixComplex64-2][PastixKernelPXTRF][0]);
142  coefs2 = &(args->sopalin_data->cpu_models->coefficients[PastixComplex64-2][PastixKernelTRSMCblk2d][0]);
143  break;
144  case STARPU_CUDA_WORKER:
145  coefs1 = &(args->sopalin_data->gpu_models->coefficients[PastixComplex64-2][PastixKernelPXTRF][0]);
146  coefs2 = &(args->sopalin_data->gpu_models->coefficients[PastixComplex64-2][PastixKernelTRSMCblk2d][0]);
147  break;
148  default:
149  assert(0);
150  return 0.;
151  }
152 
153  /* Get cost in us */
154  cost = modelsGetCost1Param( coefs1, N );
155  cost += modelsGetCost2Param( coefs2, M, N );
156 
157  (void)nimpl;
158  return cost;
159 }
160 
161 #ifndef DOXYGEN_SHOULD_SKIP_THIS
162 static struct starpu_perfmodel starpu_cblk_zpxtrfsp_model = {
163 #if defined( PASTIX_STARPU_COST_PER_ARCH )
164  .type = STARPU_PER_ARCH,
165  .arch_cost_function = cblk_pxtrf_cost,
166 #else
167  .type = STARPU_HISTORY_BASED,
168 #endif
169  .symbol = "cblk_zpxtrfsp",
170 };
171 
172 #if !defined(PASTIX_STARPU_SIMULATION)
173 /**
174  *******************************************************************************
175  *
176  * @brief StarPU CPU implementation
177  *
178  *******************************************************************************
179  *
180  * @param[in] descr
181  * TODO
182  *
183  * @param[in] cl_arg
184  * TODO
185  *
186  *******************************************************************************/
187 static void
188 fct_cblk_zpxtrfsp_cpu( void *descr[], void *cl_arg )
189 {
190  struct cl_cblk_zpxtrfsp_args_s *args = (struct cl_cblk_zpxtrfsp_args_s *)cl_arg;
191  void *L;
192 
193  L = pastix_starpu_cblk_get_ptr( descr[0] );
194 
195  cpucblk_zpxtrfsp1d_panel( args->sopalin_data->solvmtx, args->cblk, L );
196 }
197 #endif /* !defined(PASTIX_STARPU_SIMULATION) */
198 
199 CODELETS_CPU( cblk_zpxtrfsp, 1 );
200 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
201 
202 /**
203  *******************************************************************************
204  *
205  * @brief TODO
206  *
207  *******************************************************************************
208  *
209  * @param[in] sopalin_data
210  * TODO
211  *
212  * @param[in] cblk
213  * TODO
214  *
215  * @param[in] prio
216  * TODO
217  *
218  *******************************************************************************/
219 void
220 starpu_task_cblk_zpxtrfsp( sopalin_data_t *sopalin_data,
221  SolverCblk *cblk,
222  int prio )
223 {
224  struct cl_cblk_zpxtrfsp_args_s *cl_arg = NULL;
225  int need_exec = 1;
226 #if defined(PASTIX_DEBUG_STARPU)
227  char *task_name;
228 #endif
229 
230  /*
231  * Check if it needs to be submitted
232  */
233 #if defined(PASTIX_WITH_MPI)
234  {
235  int need_submit = 0;
236  if ( cblk->ownerid == sopalin_data->solvmtx->clustnum ) {
237  need_submit = 1;
238  }
239  else {
240  need_exec = 0;
241  }
242  if ( starpu_mpi_cached_receive( cblk->handler[0] ) ) {
243  need_submit = 1;
244  }
245  if ( !need_submit ) {
246  return;
247  }
248  }
249 #endif
250 
251  /*
252  * Create the arguments array
253  */
254  if ( need_exec ) {
255  cl_arg = malloc( sizeof( struct cl_cblk_zpxtrfsp_args_s) );
256  cl_arg->sopalin_data = sopalin_data;
257 #if defined(PASTIX_STARPU_PROFILING)
258  cl_arg->profile_data.measures = cblk_zpxtrfsp_profile.measures;
259  cl_arg->profile_data.flops = NAN;
260 #endif
261  cl_arg->cblk = cblk;
262  }
263 
264 #if defined(PASTIX_DEBUG_STARPU)
265  /* This actually generates a memory leak */
266  asprintf( &task_name, "%s( %ld )",
267  cl_cblk_zpxtrfsp_cpu.name,
268  (long)(cblk - sopalin_data->solvmtx->cblktab) );
269 #endif
270 
271  pastix_starpu_insert_task(
272  &cl_cblk_zpxtrfsp_cpu,
273  STARPU_CL_ARGS, cl_arg, sizeof( struct cl_cblk_zpxtrfsp_args_s ),
274 #if defined(PASTIX_STARPU_PROFILING)
275  STARPU_CALLBACK_WITH_ARG_NFREE, cblk_zpxtrfsp_callback, cl_arg,
276 #endif
277  STARPU_RW, cblk->handler[0],
278 #if defined(PASTIX_DEBUG_STARPU)
279  STARPU_NAME, task_name,
280 #endif
281 #if defined(PASTIX_STARPU_HETEROPRIO)
282  STARPU_PRIORITY, BucketFacto1D,
283 #else
284  STARPU_PRIORITY, prio,
285 #endif
286  0);
287  (void)prio;
288 }
289 
290 /**
291  * @}
292  */
int cpucblk_zpxtrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L)
Compute the LL^t factorization of one panel.
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
double pastix_fixdbl_t
Definition: datatypes.h:65
@ PastixKernelTRSMCblk2d
Definition: kernels_enums.h:58
@ PastixKernelPXTRF
Definition: kernels_enums.h:51
void starpu_task_cblk_zpxtrfsp(sopalin_data_t *sopalin_data, SolverCblk *cblk, int prio)
TODO.
static pastix_fixdbl_t fct_cblk_zpxtrfsp_cost(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl)
Cost model function.
Base structure to all codelet arguments that include the profiling data.
static double cost(symbol_cblk_t *cblk)
Computes the cost of a cblk.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition: solver.h:329
void * handler[2]
Definition: solver.h:179
int ownerid
Definition: solver.h:181
Solver column block structure.
Definition: solver.h:161