PaStiX Handbook 6.4.0
Loading...
Searching...
No Matches
codelet_blok_sscalo.c
Go to the documentation of this file.
1/**
2 *
3 * @file codelet_blok_sscalo.c
4 *
5 * StarPU codelets for blas-like functions
6 *
7 * @copyright 2016-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8 * Univ. Bordeaux. All rights reserved.
9 *
10 * @version 6.4.0
11 * @author Mathieu Faverge
12 * @author Pierre Ramet
13 * @author Ian Masliah
14 * @author Alycia Lisito
15 * @author Nolan Bredel
16 * @author Tom Moenne-Loccoz
17 * @date 2024-07-05
18 *
19 * @generated from /builds/2mk6rsew/0/solverstack/pastix/sopalin/starpu/codelet_blok_zscalo.c, normal z -> s, Tue Feb 25 14:35:20 2025
20 *
21 * @addtogroup pastix_starpu
22 * @{
23 *
24 **/
25#ifndef DOXYGEN_SHOULD_SKIP_THIS
26#define _GNU_SOURCE
27#endif /* DOXYGEN_SHOULD_SKIP_THIS */
28#include "common.h"
29#include "blend/solver.h"
30#include "sopalin/sopalin_data.h"
31#include "pastix_scores.h"
32#include "pastix_starpu.h"
33#include "pastix_sstarpu.h"
34#include "codelets.h"
35
36/**
37 * @brief Main structure for all tasks of blok_sscalo type
38 */
39struct cl_blok_sscalo_args_s {
40 profile_data_t profile_data;
41 sopalin_data_t *sopalin_data;
42 pastix_trans_t trans;
43 const SolverCblk *cblk;
44 pastix_int_t blok_m;
45};
46
47/**
48 * @brief Functions to profile the codelet
49 *
50 * Two levels of profiling are available:
51 * 1) A generic one that returns the flops per worker
52 * 2) A more detailed one that generate logs of the performance for each kernel
53 */
54#if defined( PASTIX_STARPU_PROFILING )
55starpu_profile_t blok_sscalo_profile = {
56 .next = NULL,
57 .name = "blok_sscalo"
58};
59
60/**
61 * @brief Profiling registration function
62 */
63void blok_sscalo_profile_register( void ) __attribute__( ( constructor ) );
64void
65blok_sscalo_profile_register( void )
66{
67 profiling_register_cl( &blok_sscalo_profile );
68}
69
70static void (*blok_strsmsp_callback)(void*) = cl_profiling_callback;
71
72#endif /* defined( PASTIX_STARPU_PROFILING ) */
73
74#if defined(PASTIX_STARPU_COST_PER_ARCH)
75/**
76 * @brief Cost model function
77 *
78 * The user can switch from the pastix static model to an history based model
79 * computed automatically.
80 */
81static inline pastix_fixdbl_t
82fct_blok_sscalo_cost( struct starpu_task *task,
83 struct starpu_perfmodel_arch *arch,
84 unsigned nimpl )
85{
86 struct cl_blok_sscalo_args_s *args = (struct cl_blok_sscalo_args_s *)(task->cl_arg);
87
89 pastix_fixdbl_t *coefs;
90 pastix_int_t M = blok_rownbr_ext( args->cblk->fblokptr + args->blok_m );
91 pastix_int_t N = cblk_colnbr( args->cblk );
92
93 switch( arch->devices->type ) {
94 case STARPU_CPU_WORKER:
95 coefs = &(args->sopalin_data->cpu_models->coefficients[PastixFloat-2][PastixKernelSCALOBlok][0]);
96 break;
97 case STARPU_CUDA_WORKER:
98 coefs = &(args->sopalin_data->gpu_models->coefficients[PastixFloat-2][PastixKernelSCALOBlok][0]);
99 break;
100 default:
101 assert(0);
102 return 0.;
103 }
104
105 /* Get cost in us */
106 cost = modelsGetCost2Param( coefs, M, N ) * 1e6;
107
108 (void)nimpl;
109 return cost;
110}
111#endif
112
113static struct starpu_perfmodel starpu_blok_sscalo_model = {
114#if defined(PASTIX_STARPU_COST_PER_ARCH)
115 .type = STARPU_PER_ARCH,
116 .arch_cost_function = fct_blok_sscalo_cost,
117#else
118 .type = STARPU_HISTORY_BASED,
119#endif
120 .symbol = "blok_sscalo",
121};
122
123#if !defined(PASTIX_STARPU_SIMULATION)
124/**
125 * @brief StarPU CPU implementation
126 */
127static void
128fct_blok_sscalo_cpu( void *descr[], void *cl_arg )
129{
130 struct cl_blok_sscalo_args_s *args = (struct cl_blok_sscalo_args_s *)cl_arg;
131 const void *A;
132 const void *D;
133 void *B;
134
135 A = pastix_starpu_blok_get_ptr( descr[0] );
136 D = pastix_starpu_blok_get_ptr( descr[1] );
137 B = pastix_starpu_blok_get_ptr( descr[2] );
138
139 assert( args->cblk->cblktype & CBLK_TASKS_2D );
140
141 cpublok_sscalo( args->trans, args->cblk, args->blok_m, A, D, B );
142}
143#endif /* !defined(PASTIX_STARPU_SIMULATION) */
144
145CODELETS_CPU( blok_sscalo, 3 );
146
147/**
148 *******************************************************************************
149 *
150 * @brief TODO
151 *
152 *******************************************************************************
153 *
154 * @param[in] sopalin_data
155 * TODO
156 *
157 * @param[in] trans
158 * TODO
159 *
160 * @param[in] cblk
161 * TODO
162 *
163 * @param[in] blok
164 * TODO
165 *
166 * @param[in] prio
167 * TODO
168 *
169 *******************************************************************************/
170void
171starpu_task_blok_sscalo( sopalin_data_t *sopalin_data,
172 pastix_trans_t trans,
173 const SolverCblk *cblk,
174 SolverBlok *blok,
175 int prio )
176{
177 struct cl_blok_sscalo_args_s *cl_arg = NULL;
178 int need_exec = 1;
179#if defined(PASTIX_DEBUG_STARPU) || defined(PASTIX_STARPU_PROFILING_LOG)
180 char *task_name;
181#endif
182
183 starpu_data_handle_t *handler = (starpu_data_handle_t *)( blok->handler );
184 pastix_int_t blok_m = blok - cblk->fblokptr;
185
186 pastix_starpu_register_blok( handler+1, cblk, blok, PastixFloat );
187
188#if defined(PASTIX_WITH_MPI)
189 {
190 int64_t tag_desc = sopalin_data->solvmtx->starpu_desc->mpitag;
191 int64_t tag_cblk = 2 * sopalin_data->solvmtx->gcblknbr;
192 int64_t tag_blok = 2 * (blok - sopalin_data->solvmtx->bloktab) + 1;
193
194 starpu_mpi_data_register( *(handler + 1),
195 tag_desc + tag_cblk + tag_blok,
196 cblk->ownerid );
197 }
198#endif /* PASTIX_WITH_MPI */
199
200 /*
201 * Check if it needs to be submitted
202 */
203#if defined(PASTIX_WITH_MPI)
204 {
205 int need_submit = 0;
206 if ( cblk->ownerid == sopalin_data->solvmtx->clustnum ) {
207 need_submit = 1;
208 }
209 else {
210 need_exec = 0;
211 }
212 if ( !need_submit ) {
213 return;
214 }
215 }
216#endif
217
218 /*
219 * Create the arguments array
220 */
221 if ( need_exec ) {
222 cl_arg = malloc( sizeof(struct cl_blok_sscalo_args_s) );
223 cl_arg->sopalin_data = sopalin_data;
224#if defined(PASTIX_STARPU_PROFILING)
225 cl_arg->profile_data.measures = blok_sscalo_profile.measures;
226 cl_arg->profile_data.flops = NAN;
227#endif
228 cl_arg->trans = trans;
229 cl_arg->cblk = cblk;
230 cl_arg->blok_m = blok_m;
231 }
232
233#if defined(PASTIX_DEBUG_STARPU) || defined(PASTIX_STARPU_PROFILING_LOG)
234 /* This actually generates a memory leak */
235 asprintf( &task_name, "%s( %ld, %ld )",
236 cl_blok_sscalo_cpu.name,
237 (long)(cblk - sopalin_data->solvmtx->cblktab),
238 (long)(blok - sopalin_data->solvmtx->bloktab) );
239#endif
240
241 pastix_starpu_insert_task(
242 &cl_blok_sscalo_cpu,
243 STARPU_CL_ARGS, cl_arg, sizeof( struct cl_blok_sscalo_args_s ),
244#if defined(PASTIX_STARPU_PROFILING)
245 STARPU_CALLBACK_WITH_ARG_NFREE, blok_sscalo_callback, cl_arg,
246#endif
247 STARPU_R, blok->handler[0],
248 STARPU_R, cblk->fblokptr->handler[0],
249 STARPU_W, blok->handler[1],
250#if defined(PASTIX_DEBUG_STARPU) || defined(PASTIX_STARPU_PROFILING_LOG)
251 STARPU_NAME, task_name,
252#endif
253#if defined(PASTIX_STARPU_HETEROPRIO)
254 STARPU_PRIORITY, BucketScalo,
255#else
256 STARPU_PRIORITY, prio,
257#endif
258 0);
259 (void)prio;
260}
261/**
262 * @}
263 */
BEGIN_C_DECLS typedef int pastix_int_t
Definition datatypes.h:51
double pastix_fixdbl_t
Definition datatypes.h:65
@ PastixKernelSCALOBlok
void cpublok_sscalo(pastix_trans_t trans, const SolverCblk *cblk, pastix_int_t blok_m, const void *dataA, const void *dataD, void *dataB)
Copy the lower terms of the block with scaling for the two-terms algorithm.
enum pastix_trans_e pastix_trans_t
Transpostion.
static struct starpu_perfmodel starpu_blok_sscalo_model
Functions to profile the codelet.
void starpu_task_blok_sscalo(sopalin_data_t *sopalin_data, pastix_trans_t trans, const SolverCblk *cblk, SolverBlok *blok, int prio)
TODO.
static void fct_blok_sscalo_cpu(void *descr[], void *cl_arg)
StarPU CPU implementation.
Base structure to all codelet arguments that include the profiling data.
static double cost(symbol_cblk_t *cblk)
Computes the cost of a cblk.
static pastix_int_t blok_rownbr_ext(const SolverBlok *blok)
Compute the number of rows of a contiguous block in front of the same cblk.
Definition solver.h:407
void * handler[2]
Definition solver.h:142
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition solver.h:329
SolverBlok * fblokptr
Definition solver.h:168
Solver block structure.
Definition solver.h:141
Solver column block structure.
Definition solver.h:161