PaStiX Handbook  6.2.1
starpu_sparse_matrix.c
Go to the documentation of this file.
1 /**
2  *
3  * @file starpu_sparse_matrix.c
4  *
5  * PaStiX sparse matrix descriptor for StarPU.
6  *
7  * @copyright 2016-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.2.0
11  * @author Mathieu Faverge
12  * @author Pierre Ramet
13  * @author Tony Delarue
14  * @date 2021-01-03
15  *
16  * @addtogroup pastix_starpu
17  * @{
18  *
19  **/
20 #include "common.h"
21 #include "blend/solver.h"
22 #include "pastix_starpu.h"
23 #include <starpu_data.h>
24 
25 static inline void
26 pastix_starpu_filter_interface( void *father_interface,
27  void *child_interface,
28  struct starpu_data_filter *f,
29  unsigned id,
30  unsigned nchunks )
31 {
32  pastix_starpu_interface_t *father = (pastix_starpu_interface_t *)father_interface;
33  pastix_starpu_interface_t *child = (pastix_starpu_interface_t *)child_interface;
34  size_t *sizetab = (size_t *)f->filter_arg_ptr;
35  size_t childoff = 0;
36 
37  assert( father->id == PASTIX_STARPU_INTERFACE_ID );
38  assert( father->offset == -1 );
39  assert( father->cblk->cblktype & CBLK_LAYOUT_2D );
40 
41  child->id = father->id;
42  child->flttype = father->flttype;
43  child->offset = sizetab[id];
44  child->nbblok = sizetab[id+1] - sizetab[id];
45  child->allocsize = 0;
46  child->cblk = father->cblk;
47  child->dataptr = NULL;
48 
49  assert( child->offset >= 0 );
50 
51  if ( father->dataptr == NULL ) {
52  return;
53  }
54 
55  if ( father->cblk->cblktype & CBLK_COMPRESSED ) {
56  childoff = sizetab[id] * sizeof( pastix_lrblock_t );
57  child->allocsize = child->nbblok * sizeof( pastix_lrblock_t );
58  }
59  else {
60  SolverBlok *blok = father->cblk->fblokptr + sizetab[id];
61  SolverBlok *lblk = father->cblk->fblokptr + sizetab[id+1];
62  childoff = pastix_size_of( father->flttype ) * blok->coefind;
63 
64  if ( lblk->coefind == 0 ) {
65  int i;
66  int nbrow = 0;
67  for ( i=0; i<child->nbblok; i++, blok++) {
68  nbrow += blok_rownbr( blok );
69  }
70  child->allocsize = pastix_size_of( father->flttype ) * nbrow * cblk_colnbr( father->cblk );
71  }
72  else {
73  child->allocsize = pastix_size_of( father->flttype ) * (lblk->coefind - blok->coefind);
74  }
75  }
76 
77 #if defined(PASTIX_STARPU_INTERFACE_DEBUG)
78  fprintf( stderr,
79  "blok (%9s, size=%8ld, nbblok=%2ld )\n",
80  child->cblk->cblktype & CBLK_COMPRESSED ? "Low-rank" : "Full-rank",
81  child->allocsize, (long)(child->nbblok) );
82 #endif
83 
84  assert( child->allocsize > 0 );
85 
86  child->dataptr = father->dataptr + childoff;
87 
88  (void)nchunks;
89 }
90 
91 static inline void
92 pastix_starpu_register_interface( const starpu_sparse_matrix_desc_t *spmtx,
93  SolverCblk *cblk,
94  int myrank,
95  int side,
96  pastix_coeftype_t flttype )
97 {
98  starpu_data_handle_t *handler = ( (starpu_data_handle_t *)( cblk->handler ) ) + side;
99  int64_t tag_cblk = 2 * cblk->gcblknum + side;
100 
101  if ( cblk->ownerid == myrank ) {
102  pastix_starpu_register( handler, STARPU_MAIN_RAM, cblk, side, flttype );
103  }
104  else {
105  pastix_starpu_register( handler, -1, cblk, side, flttype );
106  }
107 
108 #if defined( PASTIX_WITH_MPI )
109  starpu_mpi_data_register( *handler, spmtx->mpitag + tag_cblk, cblk->ownerid );
110 #endif
111  (void)tag_cblk;
112  (void)spmtx;
113 }
114 
115 static inline void
116 pastix_starpu_register_cblk( const starpu_sparse_matrix_desc_t *spmtx,
117  SolverCblk *cblk,
118  int myrank,
119  pastix_coeftype_t flttype )
120 {
121  pastix_starpu_register_interface( spmtx, cblk, myrank, PastixLCoef, flttype );
122  if ( spmtx->mtxtype == PastixGeneral ) {
123  pastix_starpu_register_interface( spmtx, cblk, myrank, PastixUCoef, flttype );
124  }
125 }
126 
127 /**
128  *******************************************************************************
129  *
130  * @brief Generate the StarPU descriptor of the sparse matrix.
131  *
132  * This function creates the StarPU descriptor that will provide tha data
133  * mapping and memory location to StarPU for the computation.
134  *
135  *******************************************************************************
136  *
137  * @param[inout] solvmtx
138  * The solver matrix structure that describes the sparse matrix for
139  * PaStiX.
140  *
141  * @param[in] typesize
142  * The memory size of the arithmetic used to store the matrix
143  * coefficients.
144  *
145  * @param[in] mtxtype
146  * The type of sparse matrix to describe.
147  * @arg PastixGeneral: The sparse matrix is general.
148  * @arg PastixSymmetric: The sparse matrix is lower triangular symmetric.
149  * @arg PastixHermitian: The sparse matrix is lower triangular hermitian.
150  *
151  * @param[in] nodes
152  * The number of processes used to solve the problem.
153  *
154  * @param[in] myrank
155  * The rank of the calling process.
156  *
157  ******************************************************************************/
158 void
159 starpu_sparse_matrix_init( SolverMatrix *solvmtx,
160  int mtxtype,
161  int nodes,
162  int myrank,
163  pastix_coeftype_t flttype )
164 {
165  pastix_int_t cblknbr, cblkmin2d;
166  size_t key1, key2;
167  SolverCblk *cblk;
168  SolverBlok *blok, *lblok;
169  pastix_int_t n = 0, cblknum;
170  pastix_int_t nbrow;
171  size_t size;
172  int64_t tag_desc;
173 #if defined( PASTIX_WITH_MPI )
174  int64_t tag;
175 #endif
176 
177  starpu_sparse_matrix_desc_t *spmtx = solvmtx->starpu_desc;
178  if ( spmtx != NULL ) {
180  }
181  else {
182  spmtx = (starpu_sparse_matrix_desc_t *)malloc( sizeof( starpu_sparse_matrix_desc_t ) );
183  }
184 
185  tag_desc = ( solvmtx->gcblknbr + solvmtx->bloknbr ) * 2;
186  spmtx->mpitag = pastix_starpu_tag_book( tag_desc );
187  tag_desc = spmtx->mpitag + 2 * solvmtx->gcblknbr;
188  spmtx->typesze = pastix_size_of( flttype );
189  spmtx->mtxtype = mtxtype;
190  spmtx->solvmtx = solvmtx;
191  spmtx->cblktab_handle = NULL;
192  spmtx->gpu_blocktab = NULL;
193 
194  cblknbr = solvmtx->cblknbr;
195  cblkmin2d = solvmtx->cblkmin2d;
196  key1 = 2 * cblknbr;
197 
198  /* Initialize 1D cblk handlers */
199  cblk = spmtx->solvmtx->cblktab;
200  for ( cblknum = 0; cblknum < cblkmin2d; cblknum++, n++, cblk++ ) {
201  pastix_starpu_register_cblk( spmtx, cblk, myrank, flttype );
202  }
203 
204  /* Initialize 2D cblk handlers */
205  if ( cblkmin2d < cblknbr ) {
206  struct starpu_data_filter filter = { .filter_func = pastix_starpu_filter_interface };
207  starpu_cblk_t *cblkhandle;
208  size_t *sizetab = NULL;
209  pastix_int_t nchildren, sizenbr = 0;
210 
211  spmtx->cblktab_handle =
212  (starpu_cblk_t *)malloc( ( cblknbr - cblkmin2d ) * sizeof( starpu_cblk_t ) );
213 
214  cblk = spmtx->solvmtx->cblktab + cblkmin2d;
215  cblkhandle = spmtx->cblktab_handle;
216 
217  sizenbr = ( cblk[1].fblokptr - cblk[0].fblokptr ) + 1;
218  sizetab = malloc( sizenbr * sizeof( size_t ) );
219  assert( sizenbr >= 1 );
220 
221  for ( cblknum = cblkmin2d, n = 0; cblknum < cblknbr;
222  cblknum++, n++, cblk++, cblkhandle++ ) {
223  pastix_starpu_register_cblk( spmtx, cblk, myrank, flttype );
224 
225  if ( !( cblk->cblktype & CBLK_TASKS_2D ) ) {
226  continue;
227  }
228 
229  /* Let's build the sizetab array */
230  blok = cblk[0].fblokptr;
231  lblok = cblk[1].fblokptr;
232 
233  if ( ( lblok - blok ) >= sizenbr ) {
234  sizenbr = ( lblok - blok ) + 1;
235  free( sizetab );
236  sizetab = malloc( sizenbr * sizeof( size_t ) );
237  }
238  nchildren = 0;
239  sizetab[0] = 0;
240 
241  /*
242  * Diagonal block
243  */
244  sizetab[nchildren + 1] = 1;
245  nchildren++;
246 
247  /*
248  * Off-diagonal blocks
249  */
250  blok++;
251  for ( ; blok < lblok; blok++ ) {
252  nbrow = 1;
253 
254  while ( ( blok + 1 < lblok ) &&
255  ( blok[0].fcblknm == blok[1].fcblknm ) &&
256  ( blok[0].lcblknm == blok[1].lcblknm ) )
257  {
258  blok++;
259  nbrow++;
260  }
261  size = nbrow;
262 
263  sizetab[nchildren + 1] = sizetab[nchildren] + size;
264  nchildren++;
265  }
266  filter.nchildren = nchildren;
267  filter.filter_arg_ptr = sizetab;
268 
269  cblkhandle->handlenbr = nchildren;
270  if ( mtxtype == PastixGeneral ) {
271  cblkhandle->handletab = (starpu_data_handle_t *)malloc(
272  2 * nchildren * sizeof( starpu_data_handle_t ) );
273 
274  starpu_data_partition_plan( cblk->handler[0], &filter, cblkhandle->handletab );
275 
276  starpu_data_partition_plan(
277  cblk->handler[1], &filter, cblkhandle->handletab + nchildren );
278  }
279  else {
280  cblkhandle->handletab =
281  (starpu_data_handle_t *)malloc( nchildren * sizeof( starpu_data_handle_t ) );
282 
283  starpu_data_partition_plan( cblk->handler[0], &filter, cblkhandle->handletab );
284  }
285 
286  nchildren = 0;
287  blok = cblk[0].fblokptr;
288  lblok = cblk[1].fblokptr;
289 
290  /*
291  * Diagonal block
292  */
293  blok->handler[0] = cblkhandle->handletab[nchildren];
294 #if defined( PASTIX_WITH_MPI )
295  tag = tag_desc + 2 * ( blok - solvmtx->bloktab );
296  starpu_mpi_data_register( blok->handler[0], tag, cblk->ownerid );
297 #endif
298  if ( mtxtype == PastixGeneral ) {
299  blok->handler[1] = cblkhandle->handletab[cblkhandle->handlenbr + nchildren];
300 #if defined( PASTIX_WITH_MPI )
301  tag = tag_desc + 2 * ( blok - solvmtx->bloktab ) + 1;
302  starpu_mpi_data_register( blok->handler[1], tag, cblk->ownerid );
303 #endif
304  }
305  else {
306  blok->handler[1] = NULL;
307  }
308  nchildren++;
309 
310  /*
311  * Off-diagonal blocks
312  */
313  blok++;
314  for ( ; blok < lblok; blok++ ) {
315  blok->handler[0] = cblkhandle->handletab[nchildren];
316 #if defined( PASTIX_WITH_MPI )
317  tag = tag_desc + 2 * ( blok - solvmtx->bloktab );
318  starpu_mpi_data_register( blok->handler[0], tag, cblk->ownerid );
319 #endif
320  if ( mtxtype == PastixGeneral ) {
321  blok->handler[1] = cblkhandle->handletab[cblkhandle->handlenbr + nchildren];
322 #if defined( PASTIX_WITH_MPI )
323  tag = tag_desc + 2 * ( blok - solvmtx->bloktab ) + 1;
324  starpu_mpi_data_register( blok->handler[1], tag, cblk->ownerid );
325 #endif
326  }
327  else {
328  blok->handler[1] = NULL;
329  }
330  nchildren++;
331 
332  while ( ( blok < lblok ) && ( blok[0].fcblknm == blok[1].fcblknm ) &&
333  ( blok[0].lcblknm == blok[1].lcblknm ) ) {
334  blok++;
335  blok->handler[0] = NULL;
336  blok->handler[1] = NULL;
337  }
338  }
339  }
340 
341  if ( sizetab != NULL ) {
342  free( sizetab );
343  }
344  }
345  solvmtx->starpu_desc = spmtx;
346 
347  (void)key1;
348  (void)key2;
349  (void)nodes;
350  (void)myrank;
351  (void)tag_desc;
352 }
353 
354 /**
355  *******************************************************************************
356  *
357  * @brief Submit asynchronous calls to retrieve the data on main memory.
358  *
359  *******************************************************************************
360  *
361  * @param[inout] spmtx
362  * The sparse matrix descriptor to retrieve on main memory.
363  *
364  ******************************************************************************/
365 void
367 {
368  SolverCblk *cblk;
369  pastix_int_t i;
370 
371  cblk = spmtx->solvmtx->cblktab;
372  for ( i = 0; i < spmtx->solvmtx->cblknbr; i++, cblk++ ) {
373  assert( cblk->handler[0] );
374 
375 #if defined( PASTIX_WITH_MPI )
376  starpu_mpi_cache_flush( spmtx->solvmtx->solv_comm, cblk->handler[0] );
377 #endif
378  if ( cblk->ownerid == spmtx->solvmtx->clustnum ) {
379  starpu_data_acquire_cb( cblk->handler[0],
380  STARPU_R,
381  ( void( * )( void * ) ) & starpu_data_release,
382  cblk->handler[0] );
383  }
384 
385  if ( cblk->ucoeftab ) {
386 #if defined( PASTIX_WITH_MPI )
387  starpu_mpi_cache_flush( spmtx->solvmtx->solv_comm, cblk->handler[1] );
388 #endif
389  if ( cblk->ownerid == spmtx->solvmtx->clustnum ) {
390  starpu_data_acquire_cb( cblk->handler[1],
391  STARPU_R,
392  ( void( * )( void * ) ) & starpu_data_release,
393  cblk->handler[1] );
394  }
395  }
396  }
397 }
398 
399 /**
400  *******************************************************************************
401  *
402  * @brief Free the StarPU descriptor of the sparse matrix.
403  *
404  * This function destroys the StarPU descriptor, but do not free the matrix data
405  * that are managed by PaStiX.
406  *
407  *******************************************************************************
408  *
409  * @param[inout] spmtx
410  * The descriptor to free.
411  *
412  ******************************************************************************/
413 void
415 {
416  starpu_cblk_t *cblkhandle;
417  SolverCblk *cblk;
418  pastix_int_t i, cblkmin2d;
419 
420  cblkmin2d = spmtx->solvmtx->cblkmin2d;
421  cblk = spmtx->solvmtx->cblktab;
422  for ( i = 0; i < cblkmin2d; i++, cblk++ ) {
423  if ( cblk->handler[0] ) {
424  starpu_data_unregister( cblk->handler[0] );
425 
426  if ( cblk->handler[1] ) {
427  starpu_data_unregister( cblk->handler[1] );
428  }
429  }
430 
431  cblk->handler[0] = NULL;
432  cblk->handler[1] = NULL;
433  }
434 
435  cblkhandle = spmtx->cblktab_handle;
436  for ( i = cblkmin2d; i < spmtx->solvmtx->cblknbr; i++, cblk++, cblkhandle++ ) {
437  if ( cblk->cblktype & CBLK_TASKS_2D ) {
438  if ( cblk->handler[0] ) {
439  starpu_data_partition_clean(
440  cblk->handler[0], cblkhandle->handlenbr, cblkhandle->handletab );
441 
442  if ( cblk->handler[1] ) {
443  starpu_data_partition_clean( cblk->handler[1],
444  cblkhandle->handlenbr,
445  cblkhandle->handletab + cblkhandle->handlenbr );
446  }
447  free( cblkhandle->handletab );
448  }
449  }
450 
451  if ( cblk->handler[0] ) {
452  starpu_data_unregister( cblk->handler[0] );
453  if ( cblk->handler[1] ) {
454  starpu_data_unregister( cblk->handler[1] );
455  }
456  }
457 
458  cblk->handler[0] = NULL;
459  cblk->handler[1] = NULL;
460  }
461 
462  if ( spmtx->cblktab_handle != NULL ) {
463  free( spmtx->cblktab_handle );
464  }
465 
466  pastix_starpu_tag_release( spmtx->mpitag );
467 }
468 
469 /**
470  *******************************************************************************
471  *
472  * @brief TODO
473  *
474  * TODO
475  *
476  *******************************************************************************
477  *
478  * @param[in] spmtx
479  * The descriptor to free.
480  *
481  ******************************************************************************/
482 void
484  SolverCblk *cblk,
485  starpu_cblk_t *starpu_cblk )
486 {
487  starpu_data_handle_t cblkhandle = cblk->handler[side];
488  int nsubparts = starpu_cblk->handlenbr;
489  starpu_data_handle_t *blokhandles = starpu_cblk->handletab + side * nsubparts;
490 
491  starpu_data_partition_submit( cblkhandle, nsubparts, blokhandles );
492 }
493 
494 /**
495  *******************************************************************************
496  *
497  * @brief TODO
498  *
499  * TODO
500  *
501  *******************************************************************************
502  *
503  * @param[in] spmtx
504  * The descriptor to free.
505  *
506  ******************************************************************************/
507 void
509  int rank,
510  pastix_coefside_t side,
511  SolverCblk *cblk,
512  starpu_cblk_t *starpu_cblk )
513 {
514  starpu_data_handle_t cblkhandle = cblk->handler[side];
515  int nsubparts = starpu_cblk->handlenbr;
516  starpu_data_handle_t *blokhandles =
517  starpu_cblk->handletab + ( ( side == PastixUCoef ) ? nsubparts : 0 );
518 
519 #if defined( PASTIX_WITH_MPI )
520  int i;
521 
522  for ( i = 0; i < nsubparts; i++ ) {
523  starpu_mpi_cache_flush( spmtx->solvmtx->solv_comm, blokhandles[i] );
524  }
525 #endif
526 
527  if ( cblk->ownerid == rank ) {
528  starpu_data_unpartition_submit( cblkhandle, nsubparts, blokhandles, STARPU_MAIN_RAM );
529  }
530  else {
531  starpu_data_unpartition_submit( cblkhandle, nsubparts, blokhandles, -1 );
532  }
533 
534  (void)spmtx;
535 }
536 
537 /**
538  *@}
539  */
solver_cblk_s::ownerid
int ownerid
Definition: solver.h:146
solver.h
starpu_sparse_matrix_desc_s::solvmtx
SolverMatrix * solvmtx
Definition: pastix_starpu.h:96
blok_rownbr
static pastix_int_t blok_rownbr(const SolverBlok *blok)
Compute the number of rows of a block.
Definition: solver.h:313
cblk_colnbr
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition: solver.h:247
pastix_starpu_partition_submit
void pastix_starpu_partition_submit(pastix_coefside_t side, SolverCblk *cblk, starpu_cblk_t *starpu_cblk)
TODO.
Definition: starpu_sparse_matrix.c:483
pastix_starpu_interface_s
Interface data structure to register the pieces of data in StarPU.
Definition: pastix_starpu.h:228
solver_cblk_s::fblokptr
SolverBlok * fblokptr
Definition: solver.h:134
pastix_starpu_interface_s::allocsize
size_t allocsize
Definition: pastix_starpu.h:233
pastix_starpu_interface_s::cblk
SolverCblk * cblk
Definition: pastix_starpu.h:234
pastix_starpu_interface_s::nbblok
int nbblok
Definition: pastix_starpu.h:232
starpu_cblk_t
struct starpu_cblk_s starpu_cblk_t
Additional StarPU handlers for a column-block when using 2D kernels.
solver_cblk_s
Solver column block structure.
Definition: solver.h:127
pastix_coefside_t
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
solver_blok_s::handler
void * handler[2]
Definition: solver.h:108
starpu_sparse_matrix_init
void starpu_sparse_matrix_init(SolverMatrix *solvmtx, int mtxtype, int nodes, int myrank, pastix_coeftype_t flttype)
Generate the StarPU descriptor of the sparse matrix.
Definition: starpu_sparse_matrix.c:159
solver_blok_s
Solver block structure.
Definition: solver.h:107
PastixGeneral
@ PastixGeneral
Definition: api.h:435
pastix_starpu_interface_s::offset
int offset
Definition: pastix_starpu.h:231
solver_cblk_s::ucoeftab
void * ucoeftab
Definition: solver.h:143
pastix_lrblock_t
struct pastix_lrblock_s pastix_lrblock_t
The block low-rank structure to hold a matrix in low-rank form.
pastix_starpu_interface_s::id
enum starpu_data_interface_id id
Definition: pastix_starpu.h:229
pastix_starpu_interface_s::dataptr
void * dataptr
Definition: pastix_starpu.h:235
starpu_sparse_matrix_desc_s::mtxtype
int mtxtype
Definition: pastix_starpu.h:95
pastix_starpu_register
void pastix_starpu_register(starpu_data_handle_t *handleptr, int home_node, SolverCblk *cblk, pastix_coefside_t side, pastix_coeftype_t flttype)
Register a cblk at the StarPU level.
Definition: pastix_starpu_interface.c:581
PastixUCoef
@ PastixUCoef
Definition: api.h:457
PastixLCoef
@ PastixLCoef
Definition: api.h:456
starpu_cblk_s::handletab
starpu_data_handle_t * handletab
Definition: pastix_starpu.h:86
solver_cblk_s::gcblknum
pastix_int_t gcblknum
Definition: solver.h:140
solver_blok_s::coefind
pastix_int_t coefind
Definition: solver.h:114
starpu_cblk_s::handlenbr
pastix_int_t handlenbr
Definition: pastix_starpu.h:85
solver_cblk_s::cblktype
int8_t cblktype
Definition: solver.h:130
PASTIX_STARPU_INTERFACE_ID
#define PASTIX_STARPU_INTERFACE_ID
Alias to get the Interface id.
Definition: pastix_starpu.h:223
pastix_starpu.h
pastix_starpu_unpartition_submit
void pastix_starpu_unpartition_submit(const starpu_sparse_matrix_desc_t *spmtx, int rank, pastix_coefside_t side, SolverCblk *cblk, starpu_cblk_t *starpu_cblk)
TODO.
Definition: starpu_sparse_matrix.c:508
starpu_sparse_matrix_getoncpu
void starpu_sparse_matrix_getoncpu(starpu_sparse_matrix_desc_t *desc)
Submit asynchronous calls to retrieve the data on main memory.
Definition: starpu_sparse_matrix.c:366
starpu_sparse_matrix_desc_s::gpu_blocktab
void ** gpu_blocktab
Definition: pastix_starpu.h:98
starpu_sparse_matrix_desc_s::cblktab_handle
starpu_cblk_t * cblktab_handle
Definition: pastix_starpu.h:97
starpu_sparse_matrix_desc_s::typesze
int typesze
Definition: pastix_starpu.h:94
pastix_coeftype_t
#define pastix_coeftype_t
Arithmetic types.
Definition: api.h:283
pastix_starpu_interface_s::flttype
pastix_coeftype_t flttype
Definition: pastix_starpu.h:230
starpu_sparse_matrix_desc_s::mpitag
int64_t mpitag
Definition: pastix_starpu.h:93
solver_cblk_s::handler
void * handler[2]
Definition: solver.h:144
starpu_sparse_matrix_desc_s
StarPU descriptor stucture for the sparse matrix.
Definition: pastix_starpu.h:92
starpu_sparse_matrix_destroy
void starpu_sparse_matrix_destroy(starpu_sparse_matrix_desc_t *desc)
Free the StarPU descriptor of the sparse matrix.
Definition: starpu_sparse_matrix.c:414
starpu_cblk_s
Additional StarPU handlers for a column-block when using 2D kernels.
Definition: pastix_starpu.h:84