PaStiX Handbook  6.3.2
starpu_sparse_matrix.c
Go to the documentation of this file.
1 /**
2  *
3  * @file starpu_sparse_matrix.c
4  *
5  * PaStiX sparse matrix descriptor for StarPU.
6  *
7  * @copyright 2016-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.3.2
11  * @author Mathieu Faverge
12  * @author Pierre Ramet
13  * @author Tony Delarue
14  * @author Alycia Lisito
15  * @author Nolan Bredel
16  * @date 2023-12-01
17  *
18  * @ingroup pastix_starpu
19  * @{
20  *
21  **/
22 #include "common.h"
23 #include "blend/solver.h"
24 #include "pastix_starpu.h"
25 #include <starpu_data.h>
26 
27 #ifndef DOXYGEN_SHOULD_SKIP_THIS
28 static inline void
29 pastix_starpu_filter_interface( void *father_interface,
30  void *child_interface,
31  struct starpu_data_filter *f,
32  unsigned id,
33  unsigned nchunks )
34 {
35  pastix_starpu_interface_t *father = (pastix_starpu_interface_t *)father_interface;
36  pastix_starpu_interface_t *child = (pastix_starpu_interface_t *)child_interface;
37  size_t *sizetab = (size_t *)f->filter_arg_ptr;
38  size_t childoff = 0;
39 
40  assert( father->id == PASTIX_STARPU_INTERFACE_ID );
41  assert( father->offset == -1 );
42  assert( father->cblk->cblktype & CBLK_LAYOUT_2D );
43 
44  child->id = father->id;
45  child->flttype = father->flttype;
46  child->offset = sizetab[id];
47  child->nbblok = sizetab[id+1] - sizetab[id];
48  child->allocsize = 0;
49  child->cblk = father->cblk;
50  child->dataptr = NULL;
51 
52  assert( child->offset >= 0 );
53 
54  if ( father->dataptr == NULL ) {
55  return;
56  }
57 
58  if ( father->cblk->cblktype & CBLK_COMPRESSED ) {
59  childoff = sizetab[id] * sizeof( pastix_lrblock_t );
60  child->allocsize = child->nbblok * sizeof( pastix_lrblock_t );
61  }
62  else {
63  SolverBlok *blok = father->cblk->fblokptr + sizetab[id];
64  SolverBlok *lblk = father->cblk->fblokptr + sizetab[id+1];
65  childoff = pastix_size_of( father->flttype ) * blok->coefind;
66 
67  if ( lblk->coefind == 0 ) {
68  int i;
69  int nbrow = 0;
70  for ( i=0; i<child->nbblok; i++, blok++) {
71  nbrow += blok_rownbr( blok );
72  }
73  child->allocsize = pastix_size_of( father->flttype ) * nbrow * cblk_colnbr( father->cblk );
74  }
75  else {
76  child->allocsize = pastix_size_of( father->flttype ) * (lblk->coefind - blok->coefind);
77  }
78  }
79 
80 #if defined(PASTIX_STARPU_INTERFACE_DEBUG)
81  fprintf( stderr,
82  "blok (%9s, size=%8zu, nbblok=%2ld )\n",
83  child->cblk->cblktype & CBLK_COMPRESSED ? "Low-rank" : "Full-rank",
84  child->allocsize, (long)(child->nbblok) );
85 #endif
86 
87  assert( child->allocsize > 0 );
88 
89  child->dataptr = ((char*)father->dataptr) + childoff;
90 
91  (void)nchunks;
92 }
93 
94 static inline void
95 pastix_starpu_register_interface( const starpu_sparse_matrix_desc_t *spmtx,
96  SolverCblk *cblk,
97  int side,
98  pastix_coeftype_t flttype )
99 {
100  starpu_data_handle_t *handler = ( (starpu_data_handle_t *)( cblk->handler ) ) + side;
101  int64_t tag_cblk = 2 * cblk->gcblknum + side;
102 
103  pastix_starpu_register( handler, cblk, side, flttype );
104 #if defined( PASTIX_WITH_MPI )
105  starpu_mpi_data_register( *handler, spmtx->mpitag + tag_cblk, cblk->ownerid );
106 #endif
107  (void)tag_cblk;
108  (void)spmtx;
109 }
110 
111 static inline void
112 pastix_starpu_register_cblk( const starpu_sparse_matrix_desc_t *spmtx,
113  SolverCblk *cblk,
114  pastix_coeftype_t flttype )
115 {
116  pastix_starpu_register_interface( spmtx, cblk, PastixLCoef, flttype );
117  if ( spmtx->mtxtype == PastixGeneral ) {
118  pastix_starpu_register_interface( spmtx, cblk, PastixUCoef, flttype );
119  }
120 }
121 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
122 
123 /**
124  *******************************************************************************
125  *
126  * @brief Generate the StarPU descriptor of the sparse matrix.
127  *
128  * This function creates the StarPU descriptor that will provide tha data
129  * mapping and memory location to StarPU for the computation.
130  *
131  *******************************************************************************
132  *
133  * @param[inout] solvmtx
134  * The solver matrix structure that describes the sparse matrix for
135  * PaStiX.
136  *
137  * @param[in] mtxtype
138  * The type of sparse matrix to describe.
139  * @arg PastixGeneral: The sparse matrix is general.
140  * @arg PastixSymmetric: The sparse matrix is lower triangular symmetric.
141  * @arg PastixHermitian: The sparse matrix is lower triangular hermitian.
142  *
143  * @param[in] nodes
144  * The number of processes used to solve the problem.
145  *
146  * @param[in] myrank
147  * The rank of the calling process.
148  *
149  * @param[in] flttype
150  * The memory size of the arithmetic used to store the matrix
151  * coefficients.
152  *
153  ******************************************************************************/
154 void
156  pastix_mtxtype_t mtxtype,
157  int nodes,
158  int myrank,
159  pastix_coeftype_t flttype )
160 {
161  pastix_int_t cblknbr, cblkmin2d;
162  size_t key1, key2;
163  SolverCblk *cblk;
164  SolverBlok *blok, *lblok;
165  pastix_int_t n = 0, cblknum;
166  pastix_int_t nbrow;
167  size_t size;
168  int64_t tag_desc;
169 #if defined( PASTIX_WITH_MPI )
170  int64_t tag;
171 #endif
172 
173  starpu_sparse_matrix_desc_t *spmtx = solvmtx->starpu_desc;
174  if ( spmtx != NULL ) {
176  }
177  else {
178  spmtx = (starpu_sparse_matrix_desc_t *)malloc( sizeof( starpu_sparse_matrix_desc_t ) );
179  }
180 
181  tag_desc = ( (int64_t) (solvmtx->gcblknbr + solvmtx->bloknbr) ) * 2;
182  spmtx->mpitag = pastix_starpu_tag_book( tag_desc );
183  tag_desc = spmtx->mpitag + 2 * solvmtx->gcblknbr;
184  spmtx->typesze = pastix_size_of( flttype );
185  spmtx->mtxtype = mtxtype;
186  spmtx->solvmtx = solvmtx;
187  spmtx->cblktab_handle = NULL;
188  spmtx->gpu_blocktab = NULL;
189 
190  cblknbr = solvmtx->cblknbr;
191  cblkmin2d = solvmtx->cblkmin2d;
192  key1 = 2 * cblknbr;
193 
194  /* Initialize 1D cblk handlers */
195  cblk = spmtx->solvmtx->cblktab;
196  for ( cblknum = 0; cblknum < cblkmin2d; cblknum++, n++, cblk++ ) {
197  pastix_starpu_register_cblk( spmtx, cblk, flttype );
198  }
199 
200  /* Initialize 2D cblk handlers */
201  if ( cblkmin2d < cblknbr ) {
202  struct starpu_data_filter filter = { .filter_func = pastix_starpu_filter_interface };
203  starpu_cblk_t *cblkhandle;
204  size_t *sizetab = NULL;
205  pastix_int_t nchildren, sizenbr = 0;
206 
207  spmtx->cblktab_handle =
208  (starpu_cblk_t *)malloc( ( cblknbr - cblkmin2d ) * sizeof( starpu_cblk_t ) );
209 
210  cblk = spmtx->solvmtx->cblktab + cblkmin2d;
211  cblkhandle = spmtx->cblktab_handle;
212 
213  sizenbr = ( cblk[1].fblokptr - cblk[0].fblokptr ) + 1;
214  sizetab = malloc( sizenbr * sizeof( size_t ) );
215  assert( sizenbr >= 1 );
216 
217  for ( cblknum = cblkmin2d, n = 0; cblknum < cblknbr;
218  cblknum++, n++, cblk++, cblkhandle++ ) {
219  pastix_starpu_register_cblk( spmtx, cblk, flttype );
220 
221  if ( !( cblk->cblktype & CBLK_TASKS_2D ) ) {
222  continue;
223  }
224 
225  /* Let's build the sizetab array */
226  blok = cblk[0].fblokptr;
227  lblok = cblk[1].fblokptr;
228 
229  if ( ( lblok - blok ) >= sizenbr ) {
230  sizenbr = ( lblok - blok ) + 1;
231  free( sizetab );
232  sizetab = malloc( sizenbr * sizeof( size_t ) );
233  }
234  nchildren = 0;
235  sizetab[0] = 0;
236 
237  /*
238  * Diagonal block
239  */
240  sizetab[nchildren + 1] = 1;
241  nchildren++;
242 
243  /*
244  * Off-diagonal blocks
245  */
246  blok++;
247  for ( ; blok < lblok; blok++ ) {
248  nbrow = 1;
249 
250  while ( ( blok + 1 < lblok ) &&
251  ( blok[0].fcblknm == blok[1].fcblknm ) &&
252  ( blok[0].lcblknm == blok[1].lcblknm ) )
253  {
254  blok++;
255  nbrow++;
256  }
257  size = nbrow;
258 
259  sizetab[nchildren + 1] = sizetab[nchildren] + size;
260  nchildren++;
261  }
262  filter.nchildren = nchildren;
263  filter.filter_arg_ptr = sizetab;
264 
265  cblkhandle->handlenbr = nchildren;
266  if ( mtxtype == PastixGeneral ) {
267  cblkhandle->handletab = (starpu_data_handle_t *)malloc(
268  2 * nchildren * sizeof( starpu_data_handle_t ) );
269 
270  starpu_data_partition_plan( cblk->handler[0], &filter, cblkhandle->handletab );
271 
272  starpu_data_partition_plan(
273  cblk->handler[1], &filter, cblkhandle->handletab + nchildren );
274  }
275  else {
276  cblkhandle->handletab =
277  (starpu_data_handle_t *)malloc( nchildren * sizeof( starpu_data_handle_t ) );
278 
279  starpu_data_partition_plan( cblk->handler[0], &filter, cblkhandle->handletab );
280  }
281 
282  nchildren = 0;
283  blok = cblk[0].fblokptr;
284  lblok = cblk[1].fblokptr;
285 
286  /*
287  * Diagonal block
288  */
289  blok->handler[0] = cblkhandle->handletab[nchildren];
290 #if defined( PASTIX_WITH_MPI )
291  tag = tag_desc + 2 * ( blok - solvmtx->bloktab );
292  starpu_mpi_data_register( blok->handler[0], tag, cblk->ownerid );
293 #endif
294  if ( mtxtype == PastixGeneral ) {
295  blok->handler[1] = cblkhandle->handletab[cblkhandle->handlenbr + nchildren];
296 #if defined( PASTIX_WITH_MPI )
297  tag = tag_desc + 2 * ( blok - solvmtx->bloktab ) + 1;
298  starpu_mpi_data_register( blok->handler[1], tag, cblk->ownerid );
299 #endif
300  }
301  else {
302  blok->handler[1] = NULL;
303  }
304  nchildren++;
305 
306  /*
307  * Off-diagonal blocks
308  */
309  blok++;
310  for ( ; blok < lblok; blok++ ) {
311  blok->handler[0] = cblkhandle->handletab[nchildren];
312 #if defined( PASTIX_WITH_MPI )
313  tag = tag_desc + 2 * ( blok - solvmtx->bloktab );
314  starpu_mpi_data_register( blok->handler[0], tag, cblk->ownerid );
315 #endif
316  if ( mtxtype == PastixGeneral ) {
317  blok->handler[1] = cblkhandle->handletab[cblkhandle->handlenbr + nchildren];
318 #if defined( PASTIX_WITH_MPI )
319  tag = tag_desc + 2 * ( blok - solvmtx->bloktab ) + 1;
320  starpu_mpi_data_register( blok->handler[1], tag, cblk->ownerid );
321 #endif
322  }
323  else {
324  blok->handler[1] = NULL;
325  }
326  nchildren++;
327 
328  while ( ( blok < lblok ) && ( blok[0].fcblknm == blok[1].fcblknm ) &&
329  ( blok[0].lcblknm == blok[1].lcblknm ) ) {
330  blok++;
331  blok->handler[0] = NULL;
332  blok->handler[1] = NULL;
333  }
334  }
335  }
336 
337  if ( sizetab != NULL ) {
338  free( sizetab );
339  }
340  }
341  solvmtx->starpu_desc = spmtx;
342 
343  (void)key1;
344  (void)key2;
345  (void)nodes;
346  (void)myrank;
347  (void)tag_desc;
348 }
349 
350 /**
351  *******************************************************************************
352  *
353  * @brief Submit asynchronous calls to retrieve the data on main memory.
354  *
355  *******************************************************************************
356  *
357  * @param[in] side
358  * The cblk for which the data needs to be flushed.
359  *
360  * @param[inout] cblk
361  * The cblk for which the data needs to be flushed.
362  *
363  ******************************************************************************/
364 void
366 {
367  if ( side != PastixUCoef ) {
368  assert( cblk->handler[0] );
369  starpu_data_wont_use( cblk->handler[0] );
370  }
371  if ( side != PastixLCoef ) {
372  assert( cblk->handler[1] );
373  starpu_data_wont_use( cblk->handler[1] );
374  }
375 }
376 
377 /**
378  *******************************************************************************
379  *
380  * @brief Submit asynchronous calls to retrieve the data on main memory.
381  *
382  *******************************************************************************
383  *
384  * @param[inout] spmtx
385  * The sparse matrix descriptor to retrieve on main memory.
386  *
387  ******************************************************************************/
388 void
390 {
391  SolverCblk *cblk;
392  pastix_int_t i;
393 
394  cblk = spmtx->solvmtx->cblktab;
395  for ( i = 0; i < spmtx->solvmtx->cblknbr; i++, cblk++ ) {
396  assert( cblk->handler[0] );
397 
398  if ( cblk->ownerid != spmtx->solvmtx->clustnum ) {
399  continue;
400  }
401 
402  starpu_data_acquire_cb( cblk->handler[0],
403  STARPU_R,
404  ( void( * )( void * ) ) & starpu_data_release,
405  cblk->handler[0] );
406 
407  if ( cblk->ucoeftab ) {
408  starpu_data_acquire_cb( cblk->handler[1],
409  STARPU_R,
410  ( void( * )( void * ) ) & starpu_data_release,
411  cblk->handler[1] );
412  }
413  }
414 }
415 
416 #if !defined(DOXYGEN_SHOULD_SKIP_THIS)
417 #if !defined(HAVE_STARPU_DATA_PARTITION_CLEAN_NODE)
418 /**
419  *******************************************************************************
420  *
421  * @brief StarPU function only avalaible with StarPU >= 1.4.0
422  *
423  ******************************************************************************/
424 static inline void
425 pastix_starpu_data_partition_clean_node( int unpartition,
426  starpu_data_handle_t root_handle,
427  unsigned nparts,
428  starpu_data_handle_t *children,
429  int gather_node )
430 {
431  if ( unpartition ) {
432  starpu_data_unpartition_submit( root_handle, nparts, children, gather_node );
433  }
434  starpu_data_partition_clean( root_handle, nparts, children );
435 }
436 #else
437 static inline void
438 pastix_starpu_data_partition_clean_node( int unpartition __attribute__((unused)),
439  starpu_data_handle_t root_handle,
440  unsigned nparts,
441  starpu_data_handle_t *children,
442  int gather_node )
443 {
444  starpu_data_partition_clean_node( root_handle, nparts, children, gather_node );
445 }
446 #endif
447 #endif
448 
449 /**
450  *******************************************************************************
451  *
452  * @brief Destroy a single cblk StarPU data structure of the sparse matrix
453  *
454  *******************************************************************************
455  *
456  * @param[in] is_owner
457  * Boolean to specify if the calling process owns the cblk or not
458  *
459  * @param[in,out] cblk
460  * Pointer to the cblk data structure
461  *
462  * @param[in,out] starpu_cblk
463  * Pointer to the starpu data structure associated to the cblk
464  *
465  ******************************************************************************/
466 void
468  SolverCblk *cblk,
469  starpu_cblk_t *cblkhandle )
470 {
471  if ( cblk->cblktype & CBLK_TASKS_2D ) {
472  int gather_node = -1;
473 
474  if ( is_owner && !(cblk->cblktype & CBLK_FANIN) ) {
475  gather_node = STARPU_MAIN_RAM;
476  }
477 
478  /*
479  * First, let's unpartition ourself as long as StarPU does not use the
480  * correct gather_node in starpu_data_partition_clean()
481  */
482  if ( cblk->handler[0] ) {
483  pastix_starpu_data_partition_clean_node( !is_owner && (cblk->partitioned & (PastixLCoef + 1)),
484  cblk->handler[0],
485  cblkhandle->handlenbr,
486  cblkhandle->handletab,
487  gather_node );
488  }
489 
490  if ( cblk->handler[1] ) {
491  pastix_starpu_data_partition_clean_node( !is_owner && (cblk->partitioned & (PastixUCoef + 1)),
492  cblk->handler[1],
493  cblkhandle->handlenbr,
494  cblkhandle->handletab + cblkhandle->handlenbr,
495  gather_node );
496  }
497 
498  free( cblkhandle->handletab );
499  cblkhandle->handletab = NULL;
500  }
501 
502  if ( cblk->handler[0] ) {
503  starpu_data_unregister( cblk->handler[0] );
504  }
505  if ( cblk->handler[1] ) {
506  starpu_data_unregister( cblk->handler[1] );
507  }
508  cblk->handler[0] = NULL;
509  cblk->handler[1] = NULL;
510 }
511 
512 /**
513  *******************************************************************************
514  *
515  * @brief Free the StarPU descriptor of the sparse matrix.
516  *
517  * This function destroys the StarPU descriptor, but do not free the matrix data
518  * that are managed by PaStiX.
519  *
520  *******************************************************************************
521  *
522  * @param[inout] spmtx
523  * The descriptor to free.
524  *
525  ******************************************************************************/
526 void
528 {
529  starpu_cblk_t *cblkhandle;
530  SolverCblk *cblk;
531  pastix_int_t i, cblkmin2d;
532  pastix_int_t rank = spmtx->solvmtx->clustnum;
533 
534  cblkmin2d = spmtx->solvmtx->cblkmin2d;
535  cblk = spmtx->solvmtx->cblktab;
536  for ( i = 0; i < cblkmin2d; i++, cblk++ ) {
537  pastix_starpu_cblk_destroy( cblk->ownerid == rank, cblk, NULL );
538  }
539 
540  cblkhandle = spmtx->cblktab_handle;
541  for ( i = cblkmin2d; i < spmtx->solvmtx->cblknbr; i++, cblk++, cblkhandle++ ) {
542  pastix_starpu_cblk_destroy( cblk->ownerid == rank, cblk, cblkhandle );
543  }
544 
545  if ( spmtx->cblktab_handle != NULL ) {
546  free( spmtx->cblktab_handle );
547  }
548 
550 }
551 
552 /**
553  * @}
554  */
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
struct pastix_lrblock_s pastix_lrblock_t
The block low-rank structure to hold a matrix in low-rank form.
spm_coeftype_t pastix_coeftype_t
Arithmetic types.
Definition: api.h:294
#define PastixGeneral
Definition: api.h:458
spm_mtxtype_t pastix_mtxtype_t
Matrix symmetry type property.
Definition: api.h:457
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
@ PastixLCoef
Definition: api.h:478
@ PastixUCoef
Definition: api.h:479
starpu_data_handle_t * handletab
starpu_cblk_t * cblktab_handle
enum starpu_data_interface_id id
const SolverCblk * cblk
pastix_coeftype_t flttype
pastix_int_t handlenbr
void pastix_starpu_cblk_destroy(int is_owner, SolverCblk *cblk, starpu_cblk_t *cblkhandle)
Destroy a single cblk StarPU data structure of the sparse matrix.
void starpu_sparse_matrix_getoncpu(starpu_sparse_matrix_desc_t *spmtx)
Submit asynchronous calls to retrieve the data on main memory.
struct starpu_cblk_s starpu_cblk_t
Additional StarPU handlers for a column-block when using 2D kernels.
int64_t pastix_starpu_tag_book(int64_t nbtags)
Book a range of StarPU unique tags of size nbtags.
Definition: starpu_tags.c:246
void starpu_sparse_matrix_init(SolverMatrix *solvmtx, pastix_mtxtype_t mtxtype, int nodes, int myrank, pastix_coeftype_t flttype)
Generate the StarPU descriptor of the sparse matrix.
void pastix_starpu_tag_release(int64_t min)
Release the set of tags starting by min.
Definition: starpu_tags.c:264
#define PASTIX_STARPU_INTERFACE_ID
Alias to get the Interface id.
void starpu_sparse_cblk_wont_use(pastix_coefside_t side, SolverCblk *cblk)
Submit asynchronous calls to retrieve the data on main memory.
void pastix_starpu_register(starpu_data_handle_t *handleptr, const SolverCblk *cblk, pastix_coefside_t side, pastix_coeftype_t flttype)
Register a cblk at the StarPU level.
void starpu_sparse_matrix_destroy(starpu_sparse_matrix_desc_t *spmtx)
Free the StarPU descriptor of the sparse matrix.
Interface data structure to register the pieces of data in StarPU.
Additional StarPU handlers for a column-block when using 2D kernels.
Definition: pastix_starpu.h:99
StarPU descriptor stucture for the sparse matrix.
pastix_int_t cblkmin2d
Definition: solver.h:215
static pastix_int_t blok_rownbr(const SolverBlok *blok)
Compute the number of rows of a block.
Definition: solver.h:390
void * ucoeftab
Definition: solver.h:172
pastix_int_t gcblknbr
Definition: solver.h:207
void * handler[2]
Definition: solver.h:138
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition: solver.h:324
pastix_int_t cblknbr
Definition: solver.h:208
SolverBlok *restrict bloktab
Definition: solver.h:223
pastix_int_t coefind
Definition: solver.h:144
SolverBlok * fblokptr
Definition: solver.h:163
pastix_int_t bloknbr
Definition: solver.h:220
pastix_int_t gcblknum
Definition: solver.h:169
SolverCblk *restrict cblktab
Definition: solver.h:222
void * handler[2]
Definition: solver.h:173
int8_t cblktype
Definition: solver.h:159
int ownerid
Definition: solver.h:175
int8_t partitioned
Definition: solver.h:160
Solver block structure.
Definition: solver.h:137
Solver column block structure.
Definition: solver.h:156
Solver column block structure.
Definition: solver.h:200