PaStiX Handbook  6.3.2
pastix_starpu.h
Go to the documentation of this file.
1 /**
2  *
3  * @file pastix_starpu.h
4  *
5  * StarPU support for the numerical factorization and solve of PaStiX.
6  *
7  * @copyright 2016-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.3.2
11  * @author Mathieu Faverge
12  * @author Pierre Ramet
13  * @author Alycia Lisito
14  * @author Florent Pruvost
15  * @author Nolan Bredel
16  * @author Tom Moenne-Loccoz
17  * @date 2023-12-01
18  *
19  * @addtogroup pastix_starpu
20  * @{
21  * This module describes the functionnality provided by the runtime system
22  * StarPU for the numerical factorization and solve.
23  *
24  **/
25 #ifndef _pastix_starpu_h_
26 #define _pastix_starpu_h_
27 
28 #include "common.h"
29 #include "blend/solver.h"
30 
31 #if defined(PASTIX_WITH_MPI)
32 #include <starpu_mpi.h>
33 #else
34 #include <starpu.h>
35 #endif
36 
37 #include <starpu_profiling.h>
38 
39 #if defined(PASTIX_WITH_CUDA) && !defined(PASTIX_STARPU_SIMULATION)
40 #include <starpu_scheduler.h>
41 #include <starpu_cuda.h>
42 
43 #include <cublas.h>
44 #include <starpu_cublas.h>
45 #if defined(PASTIX_WITH_CUBLAS_V2)
46 #include <cublas_v2.h>
47 #include <starpu_cublas_v2.h>
48 #endif
49 #endif
50 
51 #ifndef DOXYGEN_SHOULD_SKIP_THIS
52 typedef struct starpu_conf starpu_conf_t;
53 
54 #if defined(PASTIX_WITH_MPI)
55 
56 #if defined(PASTIX_STARPU_SYNC)
57 #define pastix_starpu_insert_task( _codelet_, ... ) \
58  starpu_mpi_insert_task( sopalin_data->solvmtx->solv_comm, _codelet_, STARPU_TASK_SYNCHRONOUS, 1, ##__VA_ARGS__ )
59 #else
60 #define pastix_starpu_insert_task( _codelet_, ... ) \
61  starpu_mpi_insert_task( sopalin_data->solvmtx->solv_comm, _codelet_, ##__VA_ARGS__ )
62 #endif
63 
64 #else
65 
66 #if defined(PASTIX_STARPU_SYNC)
67 #define pastix_starpu_insert_task( _codelet_, ... ) \
68  starpu_insert_task( _codelet_, STARPU_TASK_SYNCHRONOUS, 1, ##__VA_ARGS__ )
69 #else
70 #define pastix_starpu_insert_task( _codelet_, ... ) \
71  starpu_insert_task( _codelet_, ##__VA_ARGS__ )
72 #endif
73 
74 #endif
75 
76 #if defined( PASTIX_STARPU_HETEROPRIO )
77 typedef enum heteroprio_bucket_order_e {
78  BucketSolveDiag = 0,
79  BucketSolveGEMM = 0,
80  BucketSolveTRSM = 0,
81  BucketFacto1D = 0,
82  BucketFacto2D = 0,
83  BucketScalo = 0,
84  BucketTRSM1D = 2,
85  BucketTRSM2D = 2,
86  BucketGEMM1D = 1,
87  BucketGEMM2D = 3,
88  BucketNumber
89 } heteroprio_bucket_order_t;
90 #endif
91 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
92 
93 /**
94  * @brief Additional StarPU handlers for a column-block when using 2D kernels.
95  *
96  * Handle requirements for contiguous allocation of the block handlers when
97  * using StarPU data partitioning.
98  */
99 typedef struct starpu_cblk_s {
100  pastix_int_t handlenbr; /**< Number of 2D block handlers in the column-block */
101  starpu_data_handle_t *handletab; /**< Array of 2D block handlers for the column-block */
103 
104 /**
105  * @brief StarPU descriptor stucture for the sparse matrix.
106  */
108  int64_t mpitag; /**< MPI id of StarPU */
109  int typesze; /**< Arithmetic size */
110  pastix_mtxtype_t mtxtype; /**< Matrix structure: PastixGeneral, PastixSymmetric or PastixHermitian. */
111  SolverMatrix *solvmtx; /**< Solver matrix structure that describes the problem and stores the original data */
112  starpu_cblk_t *cblktab_handle; /**< Array of 2D column-block handlers (NULL when using 1D kernels only) */
113  void **gpu_blocktab; /**< Pointer to GPU arrays that contains frownum,lrownum of each block for Fermi (NULL otherwise) */
115 
116 /**
117  * @brief StarPU descriptor for the vectors linked to a given sparse matrix.
118  */
120  int64_t mpitag; /**< MPI id of StarPU */
121  int ncol; /**< Number of columns of the matrix */
122  int typesze; /**< Arithmetic size */
123  SolverMatrix *solvmtx; /**< Solver matrix structure that describes the problem and stores the original data */
124  starpu_data_handle_t *handletab; /**< Array of handlers for the blocks */
125  void *dataptr; /**< Store the main data pointer to check that the descriptor matches the reference */
127 
129  pastix_mtxtype_t mtxtype,
130  int nodes, int myrank, pastix_coeftype_t flttype );
133 
134 void starpu_dense_matrix_init ( SolverMatrix *solvmtx,
135  pastix_int_t ncol, char *A, pastix_int_t lda,
136  int typesze, int nodes, int myrank );
139 
142  int *argc, char **argv[],
143  const int *bindtab );
145 
146 /**
147  * MPI tag management
148  */
150 int64_t pastix_starpu_tag_book( int64_t nbtags );
151 void pastix_starpu_tag_release( int64_t min );
152 
153 #ifndef DOXYGEN_SHOULD_SKIP_THIS
154 struct measure_s;
155 typedef struct measure_s measure_t;
156 
157 struct measure_s {
158  double sum;
159  double sum2;
160  long n;
161 };
162 
163 /**
164  * @brief Helper function and variable for the testings
165  */
166 struct starpu_profile_s;
167 typedef struct starpu_profile_s starpu_profile_t;
168 
169 /**
170  * @brief Profiling data structure to register a codelet to profile
171  */
172 struct starpu_profile_s {
173  starpu_profile_t *next; /**< Link to the next implementation */
174  const char *name; /**< Short name of the function */
175  measure_t measures[STARPU_NMAXWORKERS]; /**< Pointer to the array of measures */
176 };
177 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
178 
179 /**
180  * @brief Base structure to all codelet arguments that include the profiling data
181  */
182 typedef struct profile_data_s {
183 #if defined( PASTIX_STARPU_PROFILING )
184  measure_t *measures;
185 #endif
186  double flops;
188 
189 #if defined( PASTIX_STARPU_PROFILING )
190 void cl_profiling_callback( void *callback_arg );
191 void profiling_register_cl( starpu_profile_t *codelet );
193 #else
194 /**
195  *******************************************************************************
196  *
197  * @brief Displays all profiling data collected into all measurements tables of
198  * the profile_list.
199  *
200  ******************************************************************************/
201 static inline void profiling_display_allinfo() {}
202 #endif
203 
204 #if defined( PASTIX_STARPU_PROFILING_LOG )
205 void profiling_log_init( const char* dirname );
206 void cl_profiling_log_register( const char *task_name, const char* cl_name,
207  int m, int n, int k, double flops, double speed );
208 
209 void profiling_log_fini();
210 #else
211 #ifndef DOXYGEN_SHOULD_SKIP_THIS
212 static inline void profiling_log_init( const char* dirname ) {
213  (void) dirname;
214 }
215 static inline void profiling_log_fini() {}
216 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
217 #endif
218 
219 #ifndef DOXYGEN_SHOULD_SKIP_THIS
220 #ifdef PASTIX_STARPU_STATS
221 static inline void
222 print_stats( double sub, double com, __attribute__((unused)) SolverMatrix *solvmtx )
223 {
224  int src = 0;
225  MPI_Comm_rank( solvmtx->solv_comm, &src );
226  fprintf( stderr, " Time to submit tasks on node %d %e s\n", src, clockVal( sub ) );
227  fprintf( stderr, " Time to execute tasks on node %d %e s\n", src, clockVal( com ) );
228  fprintf( stderr, " Total time on node %d %e s\n", src, clockVal( sub ) + clockVal( com ) );
229 }
230 #endif
231 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
232 
233 /**
234  * @brief StarPU Interface to handle cblks and bloks
235  */
236 extern struct starpu_data_interface_ops pastix_starpu_interface_ops;
237 
238 /**
239  * @brief Alias to get the Interface id
240  */
241 #define PASTIX_STARPU_INTERFACE_ID pastix_starpu_interface_ops.interfaceid
242 
243 /**
244  * @brief Interface data structure to register the pieces of data in StarPU
245  */
247  enum starpu_data_interface_id id; /**< Identifier of the interface */
248  pastix_coeftype_t flttype; /**< Floating type of the elements */
249  int offset; /**< -1 for cblk, blok offset for the subdatas */
250  int nbblok; /**< Number of blocks */
251  size_t allocsize; /**< size currently allocated */
252  const SolverCblk *cblk; /**< Internal structure used to store the cblk */
253  void *dataptr; /**< Pointer on data */
255 
256 #ifndef DOXYGEN_SHOULD_SKIP_THIS
257 static inline void *
258 pastix_starpu_cblk_get_ptr( void *interf ) {
259  return ((pastix_starpu_interface_t *)interf)->dataptr;
260 }
261 
262 static inline void *
263 pastix_starpu_blok_get_ptr( void *interf ) {
264  return ((pastix_starpu_interface_t *)interf)->dataptr;
265 }
266 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
267 
268 /**
269  * @brief Register a cblk at the StarPU level
270  *
271  * @param[out] handleptr
272  * The StarPU data handle to the registered data. Space must be allocated on call.
273  *
274  * @param[in] home_node
275  * The StarPU memory node enum to specify where the initial data is located
276  * -1 if not local, STARPU_MAIN_RAM if local.
277  *
278  * @param[in] cblk
279  * The cblk to register
280  *
281  * @param[in] side
282  * Specify which part of the cblk (Upper or Lower) to register
283  *
284  * @param[in] flttype
285  * Specify the arithmetic floating type of the coefficients
286  */
287 void pastix_starpu_register( starpu_data_handle_t *handleptr,
288  const SolverCblk *cblk,
289  pastix_coefside_t side,
290  pastix_coeftype_t flttype );
291 
292 void pastix_starpu_register_ws( starpu_data_handle_t *handleptr,
293  const SolverCblk *cblk,
294  pastix_coeftype_t flttype );
295 
296 void pastix_starpu_register_blok( starpu_data_handle_t *handleptr,
297  const SolverCblk *cblk,
298  const SolverBlok *blok,
299  pastix_coeftype_t flttype );
300 
301 /**
302  * @brief Initialize the interface ID
303  */
305 
306 /**
307  * @brief Finalize the interface and reset the ID
308  */
310 
311 /**
312  * @brief Main structure for all tasks of fanin_init type
313  */
314 extern struct starpu_codelet cl_fanin_init_cpu;
315 
316 #endif /* _pastix_starpu_h_ */
317 
318 /**
319  * @}
320  */
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
spm_coeftype_t pastix_coeftype_t
Arithmetic types.
Definition: api.h:294
spm_mtxtype_t pastix_mtxtype_t
Matrix symmetry type property.
Definition: api.h:457
BEGIN_C_DECLS int pastix(pastix_data_t **pastix_data, PASTIX_Comm pastix_comm, pastix_int_t n, pastix_int_t *colptr, pastix_int_t *rowptr, void *values, pastix_int_t *perm, pastix_int_t *invp, void *B, pastix_int_t nrhs, pastix_int_t *iparm, double *dparm)
Main function for compatibility with former releases.
Definition: pastix.c:103
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
starpu_data_handle_t * handletab
starpu_cblk_t * cblktab_handle
starpu_data_handle_t * handletab
enum starpu_data_interface_id id
const SolverCblk * cblk
pastix_coeftype_t flttype
pastix_int_t handlenbr
void starpu_dense_matrix_init(SolverMatrix *solvmtx, pastix_int_t ncol, char *A, pastix_int_t lda, int typesze, int nodes, int myrank)
Generate the StarPU descriptor of the dense matrix.
void starpu_sparse_matrix_getoncpu(starpu_sparse_matrix_desc_t *desc)
Submit asynchronous calls to retrieve the data on main memory.
struct starpu_cblk_s starpu_cblk_t
Additional StarPU handlers for a column-block when using 2D kernels.
int64_t pastix_starpu_tag_book(int64_t nbtags)
Book a range of StarPU unique tags of size nbtags.
Definition: starpu_tags.c:246
struct starpu_data_interface_ops pastix_starpu_interface_ops
StarPU Interface to handle cblks and bloks.
struct starpu_codelet cl_fanin_init_cpu
Main structure for all tasks of fanin_init type.
void pastix_starpu_interface_init()
Initialize the interface ID.
void starpu_sparse_matrix_init(SolverMatrix *solvmtx, pastix_mtxtype_t mtxtype, int nodes, int myrank, pastix_coeftype_t flttype)
Generate the StarPU descriptor of the sparse matrix.
void pastix_starpu_init(pastix_data_t *pastix, int *argc, char **argv[], const int *bindtab)
Startup the StarPU runtime system.
Definition: starpu.c:92
void pastix_starpu_tag_release(int64_t min)
Release the set of tags starting by min.
Definition: starpu_tags.c:264
void pastix_starpu_interface_fini()
Finalize the interface and reset the ID.
void starpu_dense_matrix_destroy(starpu_dense_matrix_desc_t *desc)
Free the StarPU descriptor of the dense matrix.
struct profile_data_s profile_data_t
Base structure to all codelet arguments that include the profiling data.
void pastix_starpu_finalize(pastix_data_t *pastix)
Finalize the StarPU runtime system.
Definition: starpu.c:227
void starpu_sparse_cblk_wont_use(pastix_coefside_t coef, SolverCblk *cblk)
Submit asynchronous calls to retrieve the data on main memory.
void starpu_dense_matrix_getoncpu(starpu_dense_matrix_desc_t *desc)
Submit asynchronous calls to retrieve the data on main memory.
struct pastix_starpu_interface_s pastix_starpu_interface_t
Interface data structure to register the pieces of data in StarPU.
struct starpu_sparse_matrix_desc_s starpu_sparse_matrix_desc_t
StarPU descriptor stucture for the sparse matrix.
struct starpu_dense_matrix_desc_s starpu_dense_matrix_desc_t
StarPU descriptor for the vectors linked to a given sparse matrix.
int pastix_starpu_tag_init(pastix_data_t *pastix)
Initialize the StarPU tags manager.
Definition: starpu_tags.c:219
void pastix_starpu_register(starpu_data_handle_t *handleptr, const SolverCblk *cblk, pastix_coefside_t side, pastix_coeftype_t flttype)
Register a cblk at the StarPU level.
static void profiling_display_allinfo()
Displays all profiling data collected into all measurements tables of the profile_list.
void starpu_sparse_matrix_destroy(starpu_sparse_matrix_desc_t *desc)
Free the StarPU descriptor of the sparse matrix.
Interface data structure to register the pieces of data in StarPU.
Base structure to all codelet arguments that include the profiling data.
Additional StarPU handlers for a column-block when using 2D kernels.
Definition: pastix_starpu.h:99
StarPU descriptor for the vectors linked to a given sparse matrix.
StarPU descriptor stucture for the sparse matrix.
Main PaStiX data structure.
Definition: pastixdata.h:67
Solver block structure.
Definition: solver.h:137
Solver column block structure.
Definition: solver.h:156
Solver column block structure.
Definition: solver.h:200