PaStiX Handbook  6.3.2
starpu_sgetrf.c
Go to the documentation of this file.
1 /**
2  *
3  * @file starpu_sgetrf.c
4  *
5  * PaStiX sgetrf StarPU wrapper.
6  *
7  * @copyright 2016-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.3.2
11  * @author Mathieu Faverge
12  * @author Pierre Ramet
13  * @author Alycia Lisito
14  * @author Nolan Bredel
15  * @author Tom Moenne-Loccoz
16  * @date 2023-11-07
17  * @generated from /builds/solverstack/pastix/sopalin/starpu/starpu_zgetrf.c, normal z -> s, Wed Dec 13 12:09:29 2023
18  *
19  * @addtogroup starpu_getrf
20  * @{
21  *
22  **/
23 #include "common.h"
24 #include "blend/solver.h"
25 #include "sopalin/sopalin_data.h"
26 #include "pastix_scores.h"
27 #include "pastix_starpu.h"
28 #include "pastix_sstarpu.h"
29 #include "timing.h"
30 
31 /**
32  *******************************************************************************
33  *
34  * @brief TODO
35  *
36  *******************************************************************************
37  *
38  * @param[in] sopalin_data
39  * TODO
40  *
41  * @param[in] cblk
42  * TODO
43  *
44  * @param[in] fcblk
45  * TODO
46  *
47  * @param[in] blokA
48  * TODO
49  *
50  * @param[in] prio
51  * TODO
52  *
53  *******************************************************************************/
54 void
55 starpu_task_getrf_sgemmsp( sopalin_data_t *sopalin_data,
56  SolverCblk *cblk,
57  const SolverBlok *blokB,
58  SolverCblk *fcblk,
59  int prio )
60 {
61  const SolverBlok *blokA, *lblk;
62  lblk = cblk[1].fblokptr;
63 
64  if ( cblk->cblktype & CBLK_TASKS_2D ) {
65  for ( blokA = blokB; blokA < lblk; blokA++ ) {
66  if ( blokA == blokB ) {
68  cblk, fcblk, blokB, blokB, prio );
69 
70  /* Skip A blocks facing the same cblk */
71  while ( ( blokA < lblk ) &&
72  ( blokA[0].fcblknm == blokA[1].fcblknm ) &&
73  ( blokA[0].lcblknm == blokA[1].lcblknm ) )
74  {
75  blokA++;
76  }
77  continue;
78  }
80  cblk, fcblk, blokA, blokB, prio );
81 
83  cblk, fcblk, blokA, blokB, prio );
84 
85  /* Skip A blocks facing the same cblk */
86  while ( ( blokA < lblk ) &&
87  ( blokA[0].fcblknm == blokA[1].fcblknm ) &&
88  ( blokA[0].lcblknm == blokA[1].lcblknm ) )
89  {
90  blokA++;
91  }
92  }
93  }
94  else {
95  /* Update on L */
97  cblk, blokB, fcblk, prio );
98 
99  /* Update on U */
100  if ( blokB+1 < lblk ) {
102  cblk, blokB, fcblk, prio );
103  }
104  }
105 }
106 
107 /**
108  *******************************************************************************
109  *
110  * @brief TODO
111  *
112  *******************************************************************************
113  *
114  * @param[in] sopalin_data
115  * TODO
116  *
117  * @param[in] cblk
118  * TODO
119  *
120  * @param[in] prio
121  * TODO
122  *
123  *******************************************************************************/
124 void
125 starpu_task_sgetrfsp( sopalin_data_t *sopalin_data,
126  SolverCblk *cblk,
127  int prio )
128 {
129  SolverBlok *lblk, *blok;
130  pastix_int_t m;
131 
132  if ( cblk->cblktype & CBLK_TASKS_2D ) {
133  starpu_task_blok_sgetrf( sopalin_data, cblk, prio );
134 
135  lblk = cblk[1].fblokptr;
136  for ( blok = cblk->fblokptr + 1, m = 0; blok < lblk; blok++, m++ ) {
137 
140  cblk, blok, prio );
141 
144  cblk, blok, prio );
145 
146  /* Skip blocks facing the same cblk */
147  while ( ( blok < lblk ) &&
148  ( blok[0].fcblknm == blok[1].fcblknm ) &&
149  ( blok[0].lcblknm == blok[1].lcblknm ) )
150  {
151  blok++;
152  }
153  }
154  }
155  else {
156  starpu_task_cblk_sgetrfsp( sopalin_data, cblk, prio );
157  }
158 }
159 
160 /**
161  *******************************************************************************
162  *
163  * @brief Perform a sparse LU factorization with 1D kernels.
164  *
165  * The function performs the LU factorization of a sparse general matrix A.
166  * The factorization has the form
167  *
168  * \f[ A = L\times U \f]
169  *
170  * where L is a sparse lower triangular matrix, and U a sparse upper triangular
171  * with the same pattern as L^t.
172  *
173  *******************************************************************************
174  *
175  * @param[inout] sopalin_data
176  * Solver matrix information structure that will guide the algorithm.
177  *
178  * @param[inout] desc
179  * StarPU descriptor of the sparse matrix.
180  *
181  ******************************************************************************/
182 void
183 starpu_sgetrf_sp1dplus_rl( sopalin_data_t *sopalin_data,
185 {
186  const SolverMatrix *solvmtx = sopalin_data->solvmtx;
187  SolverCblk *cblk, *fcblk;
188  SolverBlok *blok, *lblk;
189  pastix_int_t k, m, cblknbr, cblk_n;
190 
191  cblknbr = solvmtx->cblknbr;
192  cblk = solvmtx->cblktab;
193  for (k=0; k<solvmtx->cblknbr; k++, cblk++){
194 
195  if ( cblk->cblktype & CBLK_IN_SCHUR ) {
196  break;
197  }
198 
199  starpu_task_cblk_sgetrfsp( sopalin_data, cblk,
200  cblknbr - k );
201 
202  blok = cblk->fblokptr + 1; /* this diagonal block */
203  lblk = cblk[1].fblokptr; /* the next diagonal block */
204 
205  /* if there are off-diagonal supernodes in the column */
206  for(m=0; blok < lblk; blok++, m++ )
207  {
208  fcblk = (solvmtx->cblktab + blok->fcblknm);
209  cblk_n = fcblk - solvmtx->cblktab;
210 
211  /* Update on L */
213  cblk, blok, fcblk,
214  cblknbr - pastix_imin( k + m, cblk_n ) );
215 
216  /* Update on U */
217  if ( blok+1 < lblk ) {
219  cblk, blok, fcblk,
220  cblknbr - pastix_imin( k + m, cblk_n ) );
221  }
222  }
224  }
225  (void)desc;
226 }
227 
228 /**
229  *******************************************************************************
230  *
231  * @brief Perform a sparse LU factorization with 1D kernels.
232  *
233  * The function performs the LU factorization of a sparse general matrix A.
234  * The factorization has the form
235  *
236  * \f[ A = L\times U \f]
237  *
238  * where L is a sparse lower triangular matrix, and U a sparse upper triangular
239  * with the same pattern as L^t.
240  *
241  *******************************************************************************
242  *
243  * @param[inout] sopalin_data
244  * Solver matrix information structure that will guide the algorithm.
245  *
246  * @param[inout] desc
247  * StarPU descriptor of the sparse matrix.
248  *
249  ******************************************************************************/
250 void
251 starpu_sgetrf_sp1dplus_ll( sopalin_data_t *sopalin_data,
253 {
254  const SolverMatrix *solvmtx = sopalin_data->solvmtx;
255  SolverCblk *cblk, *fcblk, *lcblk;
256  SolverBlok *blok, *lblk;
257  pastix_int_t k, m, cblknbr, cblk_n;
258 
259  cblknbr = solvmtx->cblknbr;
260  cblk = solvmtx->cblktab;
261  for (k=0; k<solvmtx->cblknbr; k++, cblk++){
262 
263  for ( m = cblk[0].brownum; m < cblk[1].brownum; m++ ) {
264  blok = solvmtx->bloktab + solvmtx->browtab[m];
265  lcblk = solvmtx->cblktab + blok->lcblknm;
266 
267  if ( lcblk->cblktype & CBLK_IN_SCHUR ) {
268  break;
269  }
270 
271  fcblk = solvmtx->cblktab + blok->fcblknm;
272  cblk_n = fcblk - solvmtx->cblktab;
273 
274  assert( fcblk == cblk );
275 
276  /* Update on L */
278  lcblk, blok, cblk,
279  cblknbr - pastix_imin( k + m, cblk_n ) );
280 
281  lblk = fcblk[1].fblokptr;
282 
283  /* Update on U */
284  if ( blok+1 < lblk ) {
286  lcblk, blok, cblk,
287  cblknbr - pastix_imin( k + m, cblk_n ) );
288  }
289  }
290 
291  if ( cblk->cblktype & CBLK_IN_SCHUR ) {
292  continue;
293  }
294 
295  starpu_task_cblk_sgetrfsp( sopalin_data, cblk,
296  cblknbr - k );
297  }
298 
299  cblk = solvmtx->cblktab;
300  for ( k = 0; k < solvmtx->cblknbr; k++, cblk++ ) {
302  }
303  (void)desc;
304 }
305 
306 /**
307  *******************************************************************************
308  *
309  * @brief Perform a sparse LU factorization with 1D and 2D kernels.
310  *
311  * The function performs the LU factorization of a sparse general matrix A.
312  * The factorization has the form
313  *
314  * \f[ A = L\times U \f]
315  *
316  * where L is a sparse lower triangular matrix, and U a sparse upper triangular
317  * with the same pattern as L^t.
318  *
319  *******************************************************************************
320  *
321  * @param[inout] sopalin_data
322  * Solver matrix information structure that will guide the algorithm.
323  *
324  * @param[inout] desc
325  * StarPU descriptor of the sparse matrix.
326  *
327  ******************************************************************************/
328 void
329 starpu_sgetrf_sp2d_rl( sopalin_data_t *sopalin_data,
331 {
332  const SolverMatrix *solvmtx = sopalin_data->solvmtx;
333  SolverCblk *cblk, *fcblk;
334  SolverBlok *blok, *lblk;
335  pastix_int_t k, m, cblknbr, cblk_n;
336 
337  cblknbr = solvmtx->cblknbr;
338 
339  /* Let's submit all 1D tasks first */
340  cblk = solvmtx->cblktab;
341  for ( k = 0; k <= solvmtx->cblkmax1d; k++, cblk++ ) {
342 
343  if ( cblk->cblktype & CBLK_IN_SCHUR ) {
344  break;
345  }
346 
347  if ( cblk->cblktype & CBLK_TASKS_2D ) {
348  continue;
349  }
350 
351  starpu_task_sgetrfsp( sopalin_data, cblk, cblknbr - k );
352 
353  blok = cblk->fblokptr + 1; /* this diagonal block */
354  lblk = cblk[1].fblokptr; /* the next diagonal block */
355 
356  /* if there are off-diagonal supernodes in the column */
357  for(m=0; blok < lblk; blok++, m++ )
358  {
359  fcblk = (solvmtx->cblktab + blok->fcblknm);
360  cblk_n = fcblk - solvmtx->cblktab;
361 
362  starpu_task_getrf_sgemmsp( sopalin_data, cblk, blok, fcblk,
363  cblknbr - pastix_imin( k + m, cblk_n ) );
364  }
366  }
367 
368  /* Now we submit all 2D tasks */
369  cblk = solvmtx->cblktab + solvmtx->cblkmin2d;
370  for ( k = solvmtx->cblkmin2d; k < solvmtx->cblknbr; k++, cblk++ ) {
371 
372  if ( cblk->cblktype & CBLK_IN_SCHUR ) {
373  continue;
374  }
375 
376  if ( ! ( cblk->cblktype & CBLK_TASKS_2D ) ) {
377  continue; /* skip 1D cblk */
378  }
379 
380  starpu_task_sgetrfsp( sopalin_data, cblk, cblknbr - k );
381 
382  lblk = cblk[1].fblokptr;
383 
384  for ( blok = cblk->fblokptr + 1, m = 0; blok < lblk; blok++, m++ ) {
385  fcblk = ( solvmtx->cblktab + blok->fcblknm );
386  cblk_n = ( cblk->cblktype & CBLK_TASKS_2D ) ? blok->fcblknm : fcblk - solvmtx->cblktab;
387 
388  starpu_task_getrf_sgemmsp( sopalin_data, cblk, blok, fcblk,
389  cblknbr - pastix_imin( k + m, cblk_n ) );
390 
391  /* Skip blocks facing the same cblk */
392  while( ( blok < lblk ) &&
393  ( blok[0].fcblknm == blok[1].fcblknm ) &&
394  ( blok[0].lcblknm == blok[1].lcblknm ) )
395  {
396  blok++;
397  }
398  }
400  }
401 
402  (void)desc;
403 }
404 
405 /**
406  *******************************************************************************
407  *
408  * @brief Perform a sparse LU factorization with 1D and 2D kernels.
409  *
410  * The function performs the LU factorization of a sparse general matrix A.
411  * The factorization has the form
412  *
413  * \f[ A = L\times U \f]
414  *
415  * where L is a sparse lower triangular matrix, and U a sparse upper triangular
416  * with the same pattern as L^t.
417  *
418  *******************************************************************************
419  *
420  * @param[inout] sopalin_data
421  * Solver matrix information structure that will guide the algorithm.
422  *
423  * @param[inout] desc
424  * StarPU descriptor of the sparse matrix.
425  *
426  ******************************************************************************/
427 void
428 starpu_sgetrf_sp2d_ll( sopalin_data_t *sopalin_data,
430 {
431  const SolverMatrix *solvmtx = sopalin_data->solvmtx;
432  SolverCblk *cblk, *fcblk;
433  SolverBlok *blok = NULL;
434  SolverBlok *blok_prev;
435  pastix_int_t k, m, cblknbr;
436 
437  cblknbr = solvmtx->cblknbr;
438  fcblk = solvmtx->cblktab;
439 
440  for ( k = 0; k < cblknbr; k++, fcblk++ ) {
441 
442  for ( m = fcblk[0].brownum; m < fcblk[1].brownum; m++ ) {
443  blok_prev = blok;
444  blok = solvmtx->bloktab + solvmtx->browtab[m];
445  cblk = solvmtx->cblktab + blok->lcblknm;
446 
447  if ( cblk->cblktype & CBLK_IN_SCHUR ) {
448  continue;
449  }
450 
451  if( ( cblk->cblktype & CBLK_TASKS_2D ) &&
452  ( blok_prev != NULL ) &&
453  ( blok_prev->fcblknm == blok->fcblknm ) &&
454  ( blok_prev->lcblknm == blok->lcblknm ) )
455  {
456  continue;
457  }
458 
459  starpu_task_getrf_sgemmsp( sopalin_data, cblk, blok, fcblk,
460  cblknbr - k );
461  }
462 
463  if ( fcblk->cblktype & CBLK_IN_SCHUR ) {
464  continue;
465  }
466 
467  starpu_task_sgetrfsp( sopalin_data, fcblk, cblknbr - k );
468  }
469 
470  cblk = solvmtx->cblktab;
471  for ( k = 0; k < solvmtx->cblknbr; k++, cblk++ ) {
473  }
474 
475  (void)desc;
476 }
477 
478 /**
479  *******************************************************************************
480  *
481  * @brief Perform a sparse LU factorization using StarPU runtime.
482  *
483  * The function performs the LU factorization of a sparse general matrix A.
484  * The factorization has the form
485  *
486  * \f[ A = L\times U \f]
487  *
488  * where L is a sparse lower triangular matrix, and U a sparse upper triangular
489  * with the same pattern as L^t.
490  *
491  * The algorithm is automatically chosen between the 1D and 2D version based on
492  * the API parameter IPARM_TASKS2D_LEVEL. If IPARM_TASKS2D_LEVEL != 0
493  * the 2D scheme is applied, the 1D otherwise.
494  *
495  *******************************************************************************
496  *
497  * @param[inout] pastix_data
498  * The pastix_data structure that describes the solver instance.
499  *
500  * @param[inout] sopalin_data
501  * Solver matrix information structure that will guide the algorithm.
502  *
503  ******************************************************************************/
504 void
506  sopalin_data_t *sopalin_data )
507 {
508  starpu_sparse_matrix_desc_t *sdesc = sopalin_data->solvmtx->starpu_desc;
509  float sub = 0.;
510  float com = 0.;
511 
512  /*
513  * Start StarPU if not already started
514  */
515  if (pastix_data->starpu == NULL) {
516  int argc = 0;
517  pastix_starpu_init( pastix_data, &argc, NULL, NULL );
518  }
519 
520  if ( sdesc == NULL ) {
521  /* Create the matrix descriptor */
522  starpu_sparse_matrix_init( sopalin_data->solvmtx,
524  pastix_data->inter_node_procnbr,
525  pastix_data->inter_node_procnum,
526  PastixFloat );
527  sdesc = sopalin_data->solvmtx->starpu_desc;
528  }
529 
530  starpu_profiling_status_set(STARPU_PROFILING_ENABLE);
531 #if defined(STARPU_USE_FXT)
532  if (pastix_data->iparm[IPARM_TRACE] & PastixTraceNumfact) {
533  starpu_fxt_start_profiling();
534  }
535 #endif
536 #if defined(PASTIX_STARPU_STATS)
537  clockStart( sub );
538 #else
539  starpu_resume();
540 #endif
541  /*
542  * Select 1D or 2D algorithm based on 2d tasks level
543  */
544  if ( pastix_data->iparm[IPARM_TASKS2D_LEVEL] != 0 )
545  {
546  if ( pastix_data->iparm[IPARM_FACTO_LOOK_SIDE] == PastixFactLeftLooking ) {
547  starpu_sgetrf_sp2d_ll( sopalin_data, sdesc );
548  }
549  else {
550  starpu_sgetrf_sp2d_rl( sopalin_data, sdesc );
551  }
552  }
553  else
554  {
555  if ( pastix_data->iparm[IPARM_FACTO_LOOK_SIDE] == PastixFactLeftLooking ) {
556  starpu_sgetrf_sp1dplus_ll( sopalin_data, sdesc );
557  }
558  else {
559  starpu_sgetrf_sp1dplus_rl( sopalin_data, sdesc );
560  }
561  }
562 
564 #if defined(PASTIX_STARPU_STATS)
565  clockStop( sub );
566  clockStart( com );
567  starpu_resume();
568 #endif
569  starpu_task_wait_for_all();
570 #if defined(PASTIX_WITH_MPI)
571  starpu_mpi_wait_for_all( pastix_data->pastix_comm );
572  starpu_mpi_barrier( pastix_data->inter_node_comm );
573 #endif
574  starpu_pause();
575 #if defined(STARPU_USE_FXT)
576  if (pastix_data->iparm[IPARM_TRACE] & PastixTraceNumfact) {
577  starpu_fxt_stop_profiling();
578  }
579 #endif
580  starpu_profiling_status_set(STARPU_PROFILING_DISABLE);
581 #if defined(PASTIX_STARPU_STATS)
582  clockStop( com );
583  print_stats( sub, com, pastix_data->solvmatr );
584 #endif
585 
586  (void)com;
587  (void)sub;
588  return;
589 }
590 
591 /**
592  *@}
593  */
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
#define PastixGeneral
Definition: api.h:458
@ PastixLCoef
Definition: api.h:478
@ PastixLUCoef
Definition: api.h:480
@ PastixUCoef
Definition: api.h:479
@ IPARM_TASKS2D_LEVEL
Definition: api.h:90
@ IPARM_FACTO_LOOK_SIDE
Definition: api.h:100
@ IPARM_TRACE
Definition: api.h:44
@ PastixUpper
Definition: api.h:466
@ PastixRight
Definition: api.h:496
@ PastixUnit
Definition: api.h:488
@ PastixNonUnit
Definition: api.h:487
@ PastixNoTrans
Definition: api.h:445
@ PastixTrans
Definition: api.h:446
@ PastixFactLeftLooking
Definition: api.h:326
@ PastixTraceNumfact
Definition: api.h:211
void starpu_sparse_matrix_getoncpu(starpu_sparse_matrix_desc_t *desc)
Submit asynchronous calls to retrieve the data on main memory.
void starpu_sparse_matrix_init(SolverMatrix *solvmtx, pastix_mtxtype_t mtxtype, int nodes, int myrank, pastix_coeftype_t flttype)
Generate the StarPU descriptor of the sparse matrix.
void pastix_starpu_init(pastix_data_t *pastix, int *argc, char **argv[], const int *bindtab)
Startup the StarPU runtime system.
Definition: starpu.c:92
void starpu_task_blok_sgemmsp(sopalin_data_t *sopalin_data, pastix_coefside_t sideA, pastix_coefside_t sideB, pastix_trans_t trans, SolverCblk *cblk, SolverCblk *fcblk, const SolverBlok *blokA, const SolverBlok *blokB, int prio)
StarPU GPU implementation.
void starpu_task_cblk_sgetrfsp(sopalin_data_t *sopalin_data, SolverCblk *cblk, int prio)
TODO.
void starpu_task_blok_strsmsp(sopalin_data_t *sopalin_data, pastix_coefside_t coef, pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, SolverBlok *blok, int prio)
StarPU GPU implementation.
void starpu_sparse_cblk_wont_use(pastix_coefside_t coef, SolverCblk *cblk)
Submit asynchronous calls to retrieve the data on main memory.
void starpu_task_cblk_sgemmsp(sopalin_data_t *sopalin_data, pastix_coefside_t sideA, pastix_coefside_t sideB, pastix_trans_t trans, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, int prio)
StarPU GPU implementation.
void starpu_task_blok_sgetrf(sopalin_data_t *sopalin_data, SolverCblk *cblk, int prio)
TODO.
StarPU descriptor stucture for the sparse matrix.
PASTIX_Comm pastix_comm
Definition: pastixdata.h:75
int inter_node_procnum
Definition: pastixdata.h:83
SolverMatrix * solvmatr
Definition: pastixdata.h:102
int inter_node_procnbr
Definition: pastixdata.h:82
void * starpu
Definition: pastixdata.h:87
pastix_int_t * iparm
Definition: pastixdata.h:69
PASTIX_Comm inter_node_comm
Definition: pastixdata.h:77
Main PaStiX data structure.
Definition: pastixdata.h:67
void starpu_sgetrf_sp2d_ll(sopalin_data_t *sopalin_data, starpu_sparse_matrix_desc_t *desc)
Perform a sparse LU factorization with 1D and 2D kernels.
void starpu_sgetrf_sp1dplus_rl(sopalin_data_t *sopalin_data, starpu_sparse_matrix_desc_t *desc)
Perform a sparse LU factorization with 1D kernels.
void starpu_sgetrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
Perform a sparse LU factorization using StarPU runtime.
void starpu_task_sgetrfsp(sopalin_data_t *sopalin_data, SolverCblk *cblk, int prio)
TODO.
void starpu_task_getrf_sgemmsp(sopalin_data_t *sopalin_data, SolverCblk *cblk, const SolverBlok *blokB, SolverCblk *fcblk, int prio)
TODO.
Definition: starpu_sgetrf.c:55
void starpu_sgetrf_sp2d_rl(sopalin_data_t *sopalin_data, starpu_sparse_matrix_desc_t *desc)
Perform a sparse LU factorization with 1D and 2D kernels.
void starpu_sgetrf_sp1dplus_ll(sopalin_data_t *sopalin_data, starpu_sparse_matrix_desc_t *desc)
Perform a sparse LU factorization with 1D kernels.
pastix_int_t cblkmin2d
Definition: solver.h:215
pastix_int_t brownum
Definition: solver.h:166
pastix_int_t fcblknm
Definition: solver.h:140
pastix_int_t cblknbr
Definition: solver.h:208
SolverBlok *restrict bloktab
Definition: solver.h:223
pastix_int_t cblkmax1d
Definition: solver.h:214
SolverBlok * fblokptr
Definition: solver.h:163
pastix_int_t *restrict browtab
Definition: solver.h:224
pastix_int_t lcblknm
Definition: solver.h:139
SolverCblk *restrict cblktab
Definition: solver.h:222
int8_t cblktype
Definition: solver.h:159
Solver block structure.
Definition: solver.h:137
Solver column block structure.
Definition: solver.h:156
Solver column block structure.
Definition: solver.h:200