PaStiX Handbook  6.3.2
codelet_blok_sscalo.c
Go to the documentation of this file.
1 /**
2  *
3  * @file codelet_blok_sscalo.c
4  *
5  * StarPU codelets for blas-like functions
6  *
7  * @copyright 2016-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.3.2
11  * @author Mathieu Faverge
12  * @author Pierre Ramet
13  * @author Ian Masliah
14  * @author Alycia Lisito
15  * @author Nolan Bredel
16  * @author Tom Moenne-Loccoz
17  * @date 2023-11-07
18  *
19  * @generated from /builds/solverstack/pastix/sopalin/starpu/codelet_blok_zscalo.c, normal z -> s, Wed Dec 13 12:09:26 2023
20  *
21  * @addtogroup pastix_starpu
22  * @{
23  *
24  **/
25 #ifndef DOXYGEN_SHOULD_SKIP_THIS
26 #define _GNU_SOURCE
27 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
28 #include "common.h"
29 #include "blend/solver.h"
30 #include "sopalin/sopalin_data.h"
31 #include "pastix_scores.h"
32 #include "pastix_starpu.h"
33 #include "pastix_sstarpu.h"
34 #include "codelets.h"
35 
36 /**
37  * @brief Main structure for all tasks of blok_sscalo type
38  */
39 struct cl_blok_sscalo_args_s {
40  profile_data_t profile_data;
41  sopalin_data_t *sopalin_data;
42  pastix_trans_t trans;
43  const SolverCblk *cblk;
44  pastix_int_t blok_m;
45 };
46 
47 /**
48  * @brief Functions to profile the codelet
49  *
50  * Two levels of profiling are available:
51  * 1) A generic one that returns the flops per worker
52  * 2) A more detailed one that generate logs of the performance for each kernel
53  */
54 #if defined( PASTIX_STARPU_PROFILING )
55 starpu_profile_t blok_sscalo_profile = {
56  .next = NULL,
57  .name = "blok_sscalo"
58 };
59 
60 /**
61  * @brief Profiling registration function
62  */
63 void blok_sscalo_profile_register( void ) __attribute__( ( constructor ) );
64 void
65 blok_sscalo_profile_register( void )
66 {
67  profiling_register_cl( &blok_sscalo_profile );
68 }
69 
70 static void (*blok_strsmsp_callback)(void*) = cl_profiling_callback;
71 
72 #endif /* defined( PASTIX_STARPU_PROFILING ) */
73 
74 /**
75  * @brief Cost model function
76  *
77  * The user can switch from the pastix static model to an history based model
78  * computed automatically.
79  */
80 static inline pastix_fixdbl_t
81 fct_blok_sscalo_cost( struct starpu_task *task,
82  struct starpu_perfmodel_arch *arch,
83  unsigned nimpl )
84 {
85  struct cl_blok_sscalo_args_s *args = (struct cl_blok_sscalo_args_s *)(task->cl_arg);
86 
87  pastix_fixdbl_t cost = 0.;
88  pastix_fixdbl_t *coefs;
89  pastix_int_t M = blok_rownbr_ext( args->cblk->fblokptr + args->blok_m );
90  pastix_int_t N = cblk_colnbr( args->cblk );
91 
92  switch( arch->devices->type ) {
93  case STARPU_CPU_WORKER:
94  coefs = &(args->sopalin_data->cpu_models->coefficients[PastixFloat-2][PastixKernelSCALOBlok][0]);
95  break;
96  case STARPU_CUDA_WORKER:
97  coefs = &(args->sopalin_data->gpu_models->coefficients[PastixFloat-2][PastixKernelSCALOBlok][0]);
98  break;
99  default:
100  assert(0);
101  return 0.;
102  }
103 
104  /* Get cost in us */
105  cost = modelsGetCost2Param( coefs, M, N ) * 1e6;
106 
107  (void)nimpl;
108  return cost;
109 }
110 
111 static struct starpu_perfmodel starpu_blok_sscalo_model = {
112 #if defined(PASTIX_STARPU_COST_PER_ARCH)
113  .type = STARPU_PER_ARCH,
114  .arch_cost_function = fct_blok_sscalo_cost,
115 #else
116  .type = STARPU_HISTORY_BASED,
117 #endif
118  .symbol = "blok_sscalo",
119 };
120 
121 #if !defined(PASTIX_STARPU_SIMULATION)
122 /**
123  * @brief StarPU CPU implementation
124  */
125 static void
126 fct_blok_sscalo_cpu( void *descr[], void *cl_arg )
127 {
128  struct cl_blok_sscalo_args_s *args = (struct cl_blok_sscalo_args_s *)cl_arg;
129  const void *A;
130  const void *D;
131  void *B;
132 
133  A = pastix_starpu_blok_get_ptr( descr[0] );
134  D = pastix_starpu_blok_get_ptr( descr[1] );
135  B = pastix_starpu_blok_get_ptr( descr[2] );
136 
137  assert( args->cblk->cblktype & CBLK_TASKS_2D );
138 
139  cpublok_sscalo( args->trans, args->cblk, args->blok_m, A, D, B );
140 }
141 #endif /* !defined(PASTIX_STARPU_SIMULATION) */
142 
143 CODELETS_CPU( blok_sscalo, 3 );
144 
145 /**
146  *******************************************************************************
147  *
148  * @brief TODO
149  *
150  *******************************************************************************
151  *
152  * @param[in] sopalin_data
153  * TODO
154  *
155  * @param[in] trans
156  * TODO
157  *
158  * @param[in] cblk
159  * TODO
160  *
161  * @param[in] blok
162  * TODO
163  *
164  * @param[in] prio
165  * TODO
166  *
167  *******************************************************************************/
168 void
169 starpu_task_blok_sscalo( sopalin_data_t *sopalin_data,
170  pastix_trans_t trans,
171  const SolverCblk *cblk,
172  SolverBlok *blok,
173  int prio )
174 {
175  struct cl_blok_sscalo_args_s *cl_arg = NULL;
176  int need_exec = 1;
177 #if defined(PASTIX_DEBUG_STARPU) || defined(PASTIX_STARPU_PROFILING_LOG)
178  char *task_name;
179 #endif
180 
181  starpu_data_handle_t *handler = (starpu_data_handle_t *)( blok->handler );
182  pastix_int_t blok_m = blok - cblk->fblokptr;
183 
184  pastix_starpu_register_blok( handler+1, cblk, blok, PastixFloat );
185 
186 #if defined(PASTIX_WITH_MPI)
187  {
188  int64_t tag_desc = sopalin_data->solvmtx->starpu_desc->mpitag;
189  int64_t tag_cblk = 2 * sopalin_data->solvmtx->gcblknbr;
190  int64_t tag_blok = 2 * (blok - sopalin_data->solvmtx->bloktab) + 1;
191 
192  starpu_mpi_data_register( *(handler + 1),
193  tag_desc + tag_cblk + tag_blok,
194  cblk->ownerid );
195  }
196 #endif /* PASTIX_WITH_MPI */
197 
198  /*
199  * Check if it needs to be submitted
200  */
201 #if defined(PASTIX_WITH_MPI)
202  {
203  int need_submit = 0;
204  if ( cblk->ownerid == sopalin_data->solvmtx->clustnum ) {
205  need_submit = 1;
206  }
207  else {
208  need_exec = 0;
209  }
210  if ( !need_submit ) {
211  return;
212  }
213  }
214 #endif
215 
216  /*
217  * Create the arguments array
218  */
219  if ( need_exec ) {
220  cl_arg = malloc( sizeof(struct cl_blok_sscalo_args_s) );
221  cl_arg->sopalin_data = sopalin_data;
222 #if defined(PASTIX_STARPU_PROFILING)
223  cl_arg->profile_data.measures = blok_sscalo_profile.measures;
224  cl_arg->profile_data.flops = NAN;
225 #endif
226  cl_arg->trans = trans;
227  cl_arg->cblk = cblk;
228  cl_arg->blok_m = blok_m;
229  }
230 
231 #if defined(PASTIX_DEBUG_STARPU) || defined(PASTIX_STARPU_PROFILING_LOG)
232  /* This actually generates a memory leak */
233  asprintf( &task_name, "%s( %ld, %ld )",
234  cl_blok_sscalo_cpu.name,
235  (long)(cblk - sopalin_data->solvmtx->cblktab),
236  (long)(blok - sopalin_data->solvmtx->bloktab) );
237 #endif
238 
239  pastix_starpu_insert_task(
240  &cl_blok_sscalo_cpu,
241  STARPU_CL_ARGS, cl_arg, sizeof( struct cl_blok_sscalo_args_s ),
242 #if defined(PASTIX_STARPU_PROFILING)
243  STARPU_CALLBACK_WITH_ARG_NFREE, blok_sscalo_callback, cl_arg,
244 #endif
245  STARPU_R, blok->handler[0],
246  STARPU_R, cblk->fblokptr->handler[0],
247  STARPU_W, blok->handler[1],
248 #if defined(PASTIX_DEBUG_STARPU) || defined(PASTIX_STARPU_PROFILING_LOG)
249  STARPU_NAME, task_name,
250 #endif
251 #if defined(PASTIX_STARPU_HETEROPRIO)
252  STARPU_PRIORITY, BucketScalo,
253 #else
254  STARPU_PRIORITY, prio,
255 #endif
256  0);
257  (void)prio;
258 }
259 /**
260  * @}
261  */
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
double pastix_fixdbl_t
Definition: datatypes.h:65
@ PastixKernelSCALOBlok
Definition: kernels_enums.h:54
void cpublok_sscalo(pastix_trans_t trans, const SolverCblk *cblk, pastix_int_t blok_m, const void *dataA, const void *dataD, void *dataB)
Copy the lower terms of the block with scaling for the two-terms algorithm.
Definition: core_sscalo.c:316
enum pastix_trans_e pastix_trans_t
Transpostion.
void starpu_task_blok_sscalo(sopalin_data_t *sopalin_data, pastix_trans_t trans, const SolverCblk *cblk, SolverBlok *blok, int prio)
TODO.
static pastix_fixdbl_t fct_blok_sscalo_cost(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl)
Functions to profile the codelet.
static void fct_blok_sscalo_cpu(void *descr[], void *cl_arg)
StarPU CPU implementation.
Base structure to all codelet arguments that include the profiling data.
static double cost(symbol_cblk_t *cblk)
Computes the cost of a cblk.
static pastix_int_t blok_rownbr_ext(const SolverBlok *blok)
Compute the number of rows of a contiguous block in front of the same cblk.
Definition: solver.h:402
void * handler[2]
Definition: solver.h:138
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition: solver.h:324
SolverBlok * fblokptr
Definition: solver.h:163
int ownerid
Definition: solver.h:175
Solver block structure.
Definition: solver.h:137
Solver column block structure.
Definition: solver.h:156