PaStiX Handbook  6.4.0
pastix_starpu.h
Go to the documentation of this file.
1 /**
2  *
3  * @file pastix_starpu.h
4  *
5  * StarPU support for the numerical factorization and solve of PaStiX.
6  *
7  * @copyright 2016-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.4.0
11  * @author Mathieu Faverge
12  * @author Pierre Ramet
13  * @author Alycia Lisito
14  * @author Florent Pruvost
15  * @author Nolan Bredel
16  * @author Tom Moenne-Loccoz
17  * @date 2024-07-05
18  *
19  * @addtogroup pastix_starpu
20  * @{
21  * This module describes the functionnality provided by the runtime system
22  * StarPU for the numerical factorization and solve.
23  *
24  **/
25 #ifndef _pastix_starpu_h_
26 #define _pastix_starpu_h_
27 
28 #include "common.h"
29 #include "blend/solver.h"
30 
31 #if defined(PASTIX_WITH_MPI)
32 #include <starpu_mpi.h>
33 #else
34 #include <starpu.h>
35 #endif
36 
37 #include <starpu_profiling.h>
38 
39 #if defined(PASTIX_WITH_CUDA) && !defined(PASTIX_STARPU_SIMULATION)
40 #include <starpu_scheduler.h>
41 #include <starpu_cuda.h>
42 
43 #include <cublas.h>
44 #include <starpu_cublas.h>
45 #if defined(PASTIX_WITH_CUBLAS_V2)
46 #include <cublas_v2.h>
47 #include <starpu_cublas_v2.h>
48 #endif
49 #endif
50 
51 #ifndef DOXYGEN_SHOULD_SKIP_THIS
52 typedef struct starpu_conf starpu_conf_t;
53 
54 #if defined(PASTIX_WITH_MPI)
55 
56 #if defined(PASTIX_STARPU_SYNC)
57 #define pastix_starpu_insert_task( _codelet_, ... ) \
58  starpu_mpi_insert_task( sopalin_data->solvmtx->solv_comm, _codelet_, STARPU_TASK_SYNCHRONOUS, 1, ##__VA_ARGS__ )
59 #else
60 #define pastix_starpu_insert_task( _codelet_, ... ) \
61  starpu_mpi_insert_task( sopalin_data->solvmtx->solv_comm, _codelet_, ##__VA_ARGS__ )
62 #endif
63 
64 #else
65 
66 #if defined(PASTIX_STARPU_SYNC)
67 #define pastix_starpu_insert_task( _codelet_, ... ) \
68  starpu_insert_task( _codelet_, STARPU_TASK_SYNCHRONOUS, 1, ##__VA_ARGS__ )
69 #else
70 #define pastix_starpu_insert_task( _codelet_, ... ) \
71  starpu_insert_task( _codelet_, ##__VA_ARGS__ )
72 #endif
73 
74 #endif
75 
76 #if defined( PASTIX_STARPU_HETEROPRIO )
77 typedef enum heteroprio_bucket_order_e {
78  BucketSolveDiag = 0,
79  BucketSolveGEMM = 0,
80  BucketSolveTRSM = 0,
81  BucketFacto1D = 0,
82  BucketFacto2D = 0,
83  BucketScalo = 0,
84  BucketTRSM1D = 2,
85  BucketTRSM2D = 2,
86  BucketGEMM1D = 1,
87  BucketGEMM2D = 3,
88  BucketNumber
89 } heteroprio_bucket_order_t;
90 #endif
91 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
92 
93 /**
94  * @brief Additional StarPU handlers for a column-block when using 2D kernels.
95  *
96  * Handle requirements for contiguous allocation of the block handlers when
97  * using StarPU data partitioning.
98  */
99 typedef struct starpu_cblk_s {
100  pastix_int_t handlenbr; /**< Number of 2D block handlers in the column-block */
101  starpu_data_handle_t *handletab; /**< Array of 2D block handlers for the column-block */
103 
104 /**
105  * @brief StarPU descriptor stucture for the sparse matrix.
106  */
108  int64_t mpitag; /**< MPI id of StarPU */
109  int typesze; /**< Arithmetic size */
110  pastix_mtxtype_t mtxtype; /**< Matrix structure: PastixGeneral, PastixSymmetric or PastixHermitian. */
111  SolverMatrix *solvmtx; /**< Solver matrix structure that describes the problem and stores the original data */
112  starpu_cblk_t *cblktab_handle; /**< Array of 2D column-block handlers (NULL when using 1D kernels only) */
113  void **gpu_blocktab; /**< Pointer to GPU arrays that contains frownum,lrownum of each block for Fermi (NULL otherwise) */
115 
116 /**
117  * @brief StarPU descriptor for the vectors linked to a given sparse matrix.
118  */
119 typedef struct starpu_rhs_desc_s {
120  int64_t mpitag; /**< MPI id of StarPU */
121  int ncol; /**< Number of columns of the matrix */
122  int typesze; /**< Arithmetic size */
123  SolverMatrix *solvmtx; /**< Solver matrix structure that describes the problem and stores the original data */
124  starpu_data_handle_t *handletab; /**< Array of handlers for the blocks */
125  void *dataptr; /**< Store the main data pointer to check that the descriptor matches the reference */
127 
129  pastix_mtxtype_t mtxtype,
130  int nodes,
131  int myrank,
132  pastix_coeftype_t flttype );
135 
136 void starpu_rhs_init( SolverMatrix *solvmtx,
137  pastix_rhs_t rhsb,
138  int typesze,
139  int nodes,
140  int myrank );
143 
146  int *argc,
147  char **argv[],
148  const int *bindtab );
150 
151 /**
152  * MPI tag management
153  */
155 int64_t pastix_starpu_tag_book( int64_t nbtags );
156 void pastix_starpu_tag_release( int64_t min );
157 
158 #ifndef DOXYGEN_SHOULD_SKIP_THIS
159 struct measure_s;
160 typedef struct measure_s measure_t;
161 
162 struct measure_s {
163  double sum;
164  double sum2;
165  long n;
166 };
167 
168 /**
169  * @brief Helper function and variable for the testings
170  */
171 struct starpu_profile_s;
172 typedef struct starpu_profile_s starpu_profile_t;
173 
174 /**
175  * @brief Profiling data structure to register a codelet to profile
176  */
177 struct starpu_profile_s {
178  starpu_profile_t *next; /**< Link to the next implementation */
179  const char *name; /**< Short name of the function */
180  measure_t measures[STARPU_NMAXWORKERS]; /**< Pointer to the array of measures */
181 };
182 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
183 
184 /**
185  * @brief Base structure to all codelet arguments that include the profiling data
186  */
187 typedef struct profile_data_s {
188 #if defined( PASTIX_STARPU_PROFILING )
189  measure_t *measures;
190 #endif
191  double flops;
193 
194 #if defined( PASTIX_STARPU_PROFILING )
195 void cl_profiling_callback( void *callback_arg );
196 void profiling_register_cl( starpu_profile_t *codelet );
198 #else
199 /**
200  *******************************************************************************
201  *
202  * @brief Displays all profiling data collected into all measurements tables of
203  * the profile_list.
204  *
205  ******************************************************************************/
206 static inline void profiling_display_allinfo() {}
207 #endif
208 
209 #if defined( PASTIX_STARPU_PROFILING_LOG )
210 void profiling_log_init( const char* dirname );
211 void cl_profiling_log_register( const char *task_name, const char* cl_name,
212  int m, int n, int k, double flops, double speed );
213 
214 void profiling_log_fini();
215 #else
216 #ifndef DOXYGEN_SHOULD_SKIP_THIS
217 static inline void profiling_log_init( const char* dirname ) {
218  (void) dirname;
219 }
220 static inline void profiling_log_fini() {}
221 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
222 #endif
223 
224 #ifndef DOXYGEN_SHOULD_SKIP_THIS
225 #ifdef PASTIX_STARPU_STATS
226 static inline void
227 print_stats( double sub, double com, __attribute__((unused)) SolverMatrix *solvmtx )
228 {
229  int src = 0;
230  MPI_Comm_rank( solvmtx->solv_comm, &src );
231  fprintf( stderr, " Time to submit tasks on node %d %e s\n", src, clockVal( sub ) );
232  fprintf( stderr, " Time to execute tasks on node %d %e s\n", src, clockVal( com ) );
233  fprintf( stderr, " Total time on node %d %e s\n", src, clockVal( sub ) + clockVal( com ) );
234 }
235 #endif
236 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
237 
238 /**
239  * @brief StarPU Interface to handle cblks and bloks
240  */
241 extern struct starpu_data_interface_ops pastix_starpu_interface_ops;
242 
243 /**
244  * @brief Alias to get the Interface id
245  */
246 #define PASTIX_STARPU_INTERFACE_ID pastix_starpu_interface_ops.interfaceid
247 
248 /**
249  * @brief Interface data structure to register the pieces of data in StarPU
250  */
252  enum starpu_data_interface_id id; /**< Identifier of the interface */
253  pastix_coeftype_t flttype; /**< Floating type of the elements */
254  int offset; /**< -1 for cblk, blok offset for the subdatas */
255  int nbblok; /**< Number of blocks */
256  size_t allocsize; /**< size currently allocated */
257  const SolverCblk *cblk; /**< Internal structure used to store the cblk */
258  void *dataptr; /**< Pointer on data */
260 
261 #ifndef DOXYGEN_SHOULD_SKIP_THIS
262 static inline void *
263 pastix_starpu_cblk_get_ptr( void *interf ) {
264  return ((pastix_starpu_interface_t *)interf)->dataptr;
265 }
266 
267 static inline void *
268 pastix_starpu_blok_get_ptr( void *interf ) {
269  return ((pastix_starpu_interface_t *)interf)->dataptr;
270 }
271 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
272 
273 /**
274  * @brief Register a cblk at the StarPU level
275  *
276  * @param[out] handleptr
277  * The StarPU data handle to the registered data. Space must be allocated on call.
278  *
279  * @param[in] home_node
280  * The StarPU memory node enum to specify where the initial data is located
281  * -1 if not local, STARPU_MAIN_RAM if local.
282  *
283  * @param[in] cblk
284  * The cblk to register
285  *
286  * @param[in] side
287  * Specify which part of the cblk (Upper or Lower) to register
288  *
289  * @param[in] flttype
290  * Specify the arithmetic floating type of the coefficients
291  */
292 void pastix_starpu_register( starpu_data_handle_t *handleptr,
293  const SolverCblk *cblk,
294  pastix_coefside_t side,
295  pastix_coeftype_t flttype );
296 
297 void pastix_starpu_register_ws( starpu_data_handle_t *handleptr,
298  const SolverCblk *cblk,
299  pastix_coeftype_t flttype );
300 
301 void pastix_starpu_register_blok( starpu_data_handle_t *handleptr,
302  const SolverCblk *cblk,
303  const SolverBlok *blok,
304  pastix_coeftype_t flttype );
305 
306 /**
307  * @brief Initialize the interface ID
308  */
310 
311 /**
312  * @brief Finalize the interface and reset the ID
313  */
315 
316 /**
317  * @brief Main structure for all tasks of fanin_init type
318  */
319 extern struct starpu_codelet cl_fanin_init_cpu;
320 
321 /**
322  * @brief Main structure for all tasks of rhs_init type
323  */
324 extern struct starpu_codelet cl_rhs_init_cpu;
325 
326 #endif /* _pastix_starpu_h_ */
327 
328 /**
329  * @}
330  */
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
spm_coeftype_t pastix_coeftype_t
Arithmetic types.
Definition: api.h:294
spm_mtxtype_t pastix_mtxtype_t
Matrix symmetry type property.
Definition: api.h:457
BEGIN_C_DECLS int pastix(pastix_data_t **pastix_data, PASTIX_Comm pastix_comm, pastix_int_t n, pastix_int_t *colptr, pastix_int_t *rowptr, void *values, pastix_int_t *perm, pastix_int_t *invp, void *B, pastix_int_t nrhs, pastix_int_t *iparm, double *dparm)
Main function for compatibility with former releases.
Definition: pastix.c:103
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
starpu_data_handle_t * handletab
SolverMatrix * solvmtx
starpu_data_handle_t * handletab
starpu_cblk_t * cblktab_handle
enum starpu_data_interface_id id
const SolverCblk * cblk
pastix_coeftype_t flttype
pastix_int_t handlenbr
struct starpu_codelet cl_rhs_init_cpu
Main structure for all tasks of rhs_init type.
void starpu_sparse_matrix_getoncpu(starpu_sparse_matrix_desc_t *desc)
Submit asynchronous calls to retrieve the data on main memory.
void starpu_rhs_getoncpu(starpu_rhs_desc_t *desc)
Submit asynchronous calls to retrieve the data on main memory.
Definition: starpu_rhs.c:216
struct starpu_cblk_s starpu_cblk_t
Additional StarPU handlers for a column-block when using 2D kernels.
int64_t pastix_starpu_tag_book(int64_t nbtags)
Book a range of StarPU unique tags of size nbtags.
Definition: starpu_tags.c:246
struct starpu_data_interface_ops pastix_starpu_interface_ops
StarPU Interface to handle cblks and bloks.
struct starpu_codelet cl_fanin_init_cpu
Main structure for all tasks of fanin_init type.
void starpu_rhs_init(SolverMatrix *solvmtx, pastix_rhs_t rhsb, int typesze, int nodes, int myrank)
Generate the StarPU descriptor of the dense matrix.
Definition: starpu_rhs.c:152
void pastix_starpu_interface_init()
Initialize the interface ID.
void starpu_sparse_matrix_init(SolverMatrix *solvmtx, pastix_mtxtype_t mtxtype, int nodes, int myrank, pastix_coeftype_t flttype)
Generate the StarPU descriptor of the sparse matrix.
void pastix_starpu_init(pastix_data_t *pastix, int *argc, char **argv[], const int *bindtab)
Startup the StarPU runtime system.
Definition: starpu.c:92
void pastix_starpu_tag_release(int64_t min)
Release the set of tags starting by min.
Definition: starpu_tags.c:264
struct starpu_rhs_desc_s starpu_rhs_desc_t
StarPU descriptor for the vectors linked to a given sparse matrix.
void pastix_starpu_interface_fini()
Finalize the interface and reset the ID.
struct profile_data_s profile_data_t
Base structure to all codelet arguments that include the profiling data.
void pastix_starpu_finalize(pastix_data_t *pastix)
Finalize the StarPU runtime system.
Definition: starpu.c:227
void starpu_sparse_cblk_wont_use(pastix_coefside_t coef, SolverCblk *cblk)
Submit asynchronous calls to retrieve the data on main memory.
struct pastix_starpu_interface_s pastix_starpu_interface_t
Interface data structure to register the pieces of data in StarPU.
struct starpu_sparse_matrix_desc_s starpu_sparse_matrix_desc_t
StarPU descriptor stucture for the sparse matrix.
int pastix_starpu_tag_init(pastix_data_t *pastix)
Initialize the StarPU tags manager.
Definition: starpu_tags.c:219
void starpu_rhs_destroy(starpu_rhs_desc_t *desc)
Free the StarPU descriptor of the dense matrix.
Definition: starpu_rhs.c:254
void pastix_starpu_register(starpu_data_handle_t *handleptr, const SolverCblk *cblk, pastix_coefside_t side, pastix_coeftype_t flttype)
Register a cblk at the StarPU level.
static void profiling_display_allinfo()
Displays all profiling data collected into all measurements tables of the profile_list.
void starpu_sparse_matrix_destroy(starpu_sparse_matrix_desc_t *desc)
Free the StarPU descriptor of the sparse matrix.
Interface data structure to register the pieces of data in StarPU.
Base structure to all codelet arguments that include the profiling data.
Additional StarPU handlers for a column-block when using 2D kernels.
Definition: pastix_starpu.h:99
StarPU descriptor for the vectors linked to a given sparse matrix.
StarPU descriptor stucture for the sparse matrix.
Main PaStiX data structure.
Definition: pastixdata.h:68
Main PaStiX RHS structure.
Definition: pastixdata.h:155
Solver block structure.
Definition: solver.h:141
Solver column block structure.
Definition: solver.h:161
Solver column block structure.
Definition: solver.h:203