PaStiX Handbook  6.3.2
starpu_dpotrf.c
Go to the documentation of this file.
1 /**
2  *
3  * @file starpu_dpotrf.c
4  *
5  * PaStiX dpotrf StarPU wrapper.
6  *
7  * @copyright 2016-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.3.2
11  * @author Mathieu Faverge
12  * @author Pierre Ramet
13  * @date 2023-07-21
14  * @generated from /builds/solverstack/pastix/sopalin/starpu/starpu_zpotrf.c, normal z -> d, Wed Dec 13 12:09:29 2023
15  *
16  * @addtogroup starpu_potrf
17  * @{
18  *
19  **/
20 #include "common.h"
21 #include "blend/solver.h"
22 #include "sopalin/sopalin_data.h"
23 #include "pastix_dcores.h"
24 #include "pastix_starpu.h"
25 #include "pastix_dstarpu.h"
26 
27 /**
28  *******************************************************************************
29  *
30  * @brief TODO
31  *
32  *******************************************************************************
33  *
34  * @param[in] sopalin_data
35  * TODO
36  *
37  * @param[in] cblk
38  * TODO
39  *
40  * @param[in] fcblk
41  * TODO
42  *
43  * @param[in] blokA
44  * TODO
45  *
46  * @param[in] prio
47  * TODO
48  *
49  *******************************************************************************/
50 void
51 starpu_task_potrf_dgemmsp( sopalin_data_t *sopalin_data,
52  SolverCblk *cblk,
53  const SolverBlok *blokB,
54  SolverCblk *fcblk,
55  int prio )
56 {
57  const SolverBlok *blokA, *lblk;
58 
59  if ( cblk->cblktype & CBLK_TASKS_2D ) {
60  lblk = cblk[1].fblokptr;
61  for ( blokA = blokB; blokA < lblk; blokA++ ) {
62 
64  cblk, fcblk, blokA, blokB, prio );
65 
66  /* Skip B blocks facing the same cblk */
67  while ( ( blokA < lblk ) &&
68  ( blokA[0].fcblknm == blokA[1].fcblknm ) &&
69  ( blokA[0].lcblknm == blokA[1].lcblknm ) )
70  {
71  blokA++;
72  }
73  }
74  }
75  else {
77  cblk, blokB, fcblk, prio );
78  }
79 }
80 
81 /**
82  *******************************************************************************
83  *
84  * @brief TODO
85  *
86  *******************************************************************************
87  *
88  * @param[in] sopalin_data
89  * TODO
90  *
91  * @param[in] cblk
92  * TODO
93  *
94  * @param[in] prio
95  * TODO
96  *
97  *******************************************************************************/
98 void
99 starpu_task_dpotrfsp( sopalin_data_t *sopalin_data,
100  SolverCblk *cblk,
101  int prio )
102 {
103  SolverBlok *lblk, *blok;
104  pastix_int_t m;
105 
106  if ( cblk->cblktype & CBLK_TASKS_2D ) {
107  starpu_task_blok_dpotrf( sopalin_data, cblk, prio );
108 
109  lblk = cblk[1].fblokptr;
110  for ( blok = cblk->fblokptr + 1, m = 0; blok < lblk; blok++, m++ ) {
111 
114  cblk, blok, prio );
115 
116  /* Skip blocks facing the same cblk */
117  while ( ( blok < lblk ) &&
118  ( blok[0].fcblknm == blok[1].fcblknm ) &&
119  ( blok[0].lcblknm == blok[1].lcblknm ) )
120  {
121  blok++;
122  }
123  }
124  }
125  else {
126  starpu_task_cblk_dpotrfsp( sopalin_data, cblk, prio );
127  }
128 }
129 
130 /**
131  *******************************************************************************
132  *
133  * @brief Perform a sparse Cholesky factorization with 1D kernels.
134  *
135  * The function performs the Cholesky factorization of a sparse symmetric
136  * positive definite (or Symmetric positive definite in the real case) matrix
137  * A.
138  * The factorization has the form
139  *
140  * \f[ A = L\times L^T \f]
141  *
142  * where L is a sparse lower triangular matrix.
143  *
144  *******************************************************************************
145  *
146  * @param[inout] sopalin_data
147  * Solver matrix information structure that will guide the algorithm.
148  *
149  * @param[inout] desc
150  * StarPU descriptor of the sparse matrix.
151  *
152  ******************************************************************************/
153 void
154 starpu_dpotrf_sp1dplus_rl( sopalin_data_t *sopalin_data,
156 {
157  const SolverMatrix *solvmtx = sopalin_data->solvmtx;
158  SolverCblk *cblk, *fcblk;
159  SolverBlok *blok, *lblk;
160  pastix_int_t k, m, cblknbr, cblk_n;
161 
162  cblknbr = solvmtx->cblknbr;
163  cblk = solvmtx->cblktab;
164  for (k=0; k<solvmtx->cblknbr; k++, cblk++){
165 
166  if ( cblk->cblktype & CBLK_IN_SCHUR ) {
167  break;
168  }
169 
170  starpu_task_cblk_dpotrfsp( sopalin_data, cblk,
171  cblknbr - k );
172 
173  blok = cblk->fblokptr + 1; /* this diagonal block */
174  lblk = cblk[1].fblokptr; /* the next diagonal block */
175 
176  /* if there are off-diagonal supernodes in the column */
177  for(m=0; blok < lblk; blok++, m++ )
178  {
179  fcblk = (solvmtx->cblktab + blok->fcblknm);
180  cblk_n = fcblk - solvmtx->cblktab;
182  cblk, blok, fcblk,
183  cblknbr - pastix_imin( k + m, cblk_n ) );
184  }
186  }
187  (void)desc;
188 }
189 
190 /**
191  *******************************************************************************
192  *
193  * @brief Perform a sparse Cholesky factorization with 1D kernels.
194  *
195  * The function performs the Cholesky factorization of a sparse symmetric
196  * positive definite (or Symmetric positive definite in the real case) matrix
197  * A.
198  * The factorization has the form
199  *
200  * \f[ A = L\times L^T \f]
201  *
202  * where L is a sparse lower triangular matrix.
203  *
204  *******************************************************************************
205  *
206  * @param[inout] sopalin_data
207  * Solver matrix information structure that will guide the algorithm.
208  *
209  * @param[inout] desc
210  * StarPU descriptor of the sparse matrix.
211  *
212  ******************************************************************************/
213 void
214 starpu_dpotrf_sp1dplus_ll( sopalin_data_t *sopalin_data,
216 {
217  const SolverMatrix *solvmtx = sopalin_data->solvmtx;
218  SolverCblk *cblk, *fcblk;
219  SolverBlok *blok;
220  pastix_int_t k, m, cblknbr;
221 
222  cblknbr = solvmtx->cblknbr;
223  fcblk = solvmtx->cblktab;
224  for ( k = 0; k < solvmtx->cblknbr; k++, fcblk++ ) {
225 
226  for ( m = fcblk[0].brownum; m < fcblk[1].brownum; m++ ) {
227  blok = solvmtx->bloktab + solvmtx->browtab[m];
228  cblk = solvmtx->cblktab + blok->lcblknm;
229 
230  if ( cblk->cblktype & CBLK_IN_SCHUR ) {
231  break;
232  }
233 
235  cblk, blok, fcblk, cblknbr - k );
236  }
237 
238  if ( fcblk->cblktype & CBLK_IN_SCHUR ) {
239  continue;
240  }
241 
242  starpu_task_cblk_dpotrfsp( sopalin_data, fcblk,
243  cblknbr - k );
244  }
245 
246  cblk = solvmtx->cblktab;
247  for ( k = 0; k < solvmtx->cblknbr; k++, cblk++ ) {
249  }
250  (void)desc;
251 }
252 
253 /**
254  *******************************************************************************
255  *
256  * @brief Perform a sparse Cholesky factorization with 1D and 2D kernels.
257  *
258  * The function performs the Cholesky factorization of a sparse symmetric
259  * positive definite (or Symmetric positive definite in the real case) matrix
260  * A.
261  * The factorization has the form
262  *
263  * \f[ A = L\times L^T \f]
264  *
265  * where L is a sparse lower triangular matrix.
266  *
267  *******************************************************************************
268  *
269  * @param[inout] sopalin_data
270  * Solver matrix information structure that will guide the algorithm.
271  *
272  * @param[inout] desc
273  * StarPU descriptor of the sparse matrix.
274  *
275  ******************************************************************************/
276 void
277 starpu_dpotrf_sp2d_rl( sopalin_data_t *sopalin_data,
279 {
280  const SolverMatrix *solvmtx = sopalin_data->solvmtx;
281  SolverCblk *cblk, *fcblk;
282  SolverBlok *blok, *lblk;
283  pastix_int_t k, m, cblknbr, cblk_n;
284 
285  cblknbr = solvmtx->cblknbr;
286 
287  /* Let's submit all 1D tasks first */
288  cblk = solvmtx->cblktab;
289  for ( k = 0; k <= solvmtx->cblkmax1d; k++, cblk++ ) {
290 
291  if ( cblk->cblktype & CBLK_IN_SCHUR ) {
292  break;
293  }
294 
295  if ( cblk->cblktype & CBLK_TASKS_2D ) {
296  continue;
297  }
298 
299  starpu_task_dpotrfsp( sopalin_data, cblk, cblknbr - k );
300 
301  lblk = cblk[1].fblokptr; /* the next diagonal block */
302 
303  for ( blok = cblk->fblokptr + 1, m = 0; blok < lblk; blok++, m++ ) {
304  fcblk = ( solvmtx->cblktab + blok->fcblknm );
305  cblk_n = ( cblk->cblktype & CBLK_TASKS_2D ) ? blok->fcblknm : fcblk - solvmtx->cblktab;
306 
307  starpu_task_potrf_dgemmsp( sopalin_data, cblk, blok, fcblk,
308  cblknbr - pastix_imin( k + m, cblk_n ) );
309  }
311  }
312 
313  /* Let's submit all 2D tasks */
314  cblk = solvmtx->cblktab + solvmtx->cblkmin2d;
315  for ( k = solvmtx->cblkmin2d; k < solvmtx->cblknbr; k++, cblk++ ) {
316 
317  if ( cblk->cblktype & CBLK_IN_SCHUR ) {
318  continue;
319  }
320 
321  if ( ! ( cblk->cblktype & CBLK_TASKS_2D ) ) {
322  continue;
323  }
324 
325  starpu_task_dpotrfsp( sopalin_data, cblk, cblknbr - k );
326 
327  lblk = cblk[1].fblokptr; /* the next diagonal block */
328 
329  for ( blok = cblk->fblokptr + 1, m = 0; blok < lblk; blok++, m++ ) {
330  fcblk = ( solvmtx->cblktab + blok->fcblknm );
331  cblk_n = ( cblk->cblktype & CBLK_TASKS_2D ) ? blok->fcblknm : fcblk - solvmtx->cblktab;
332 
333  starpu_task_potrf_dgemmsp( sopalin_data, cblk, blok, fcblk,
334  cblknbr - pastix_imin( k + m, cblk_n ) );
335 
336  /* Skip A blocks facing the same cblk */
337  while( ( blok < lblk ) &&
338  ( blok[0].fcblknm == blok[1].fcblknm ) &&
339  ( blok[0].lcblknm == blok[1].lcblknm ) )
340  {
341  blok++;
342  }
343  }
345  }
346 
347  (void)desc;
348 }
349 
350 /**
351  *******************************************************************************
352  *
353  * @brief Perform a sparse Cholesky factorization with 1D and 2D kernels.
354  *
355  * The function performs the Cholesky factorization of a sparse symmetric
356  * positive definite (or Symmetric positive definite in the real case) matrix
357  * A.
358  * The factorization has the form
359  *
360  * \f[ A = L\times L^T \f]
361  *
362  * where L is a sparse lower triangular matrix.
363  *
364  *******************************************************************************
365  *
366  * @param[inout] sopalin_data
367  * Solver matrix information structure that will guide the algorithm.
368  *
369  * @param[inout] desc
370  * StarPU descriptor of the sparse matrix.
371  *
372  ******************************************************************************/
373 void
374 starpu_dpotrf_sp2d_ll( sopalin_data_t *sopalin_data,
376 {
377  const SolverMatrix *solvmtx = sopalin_data->solvmtx;
378  SolverCblk *cblk, *fcblk;
379  SolverBlok *blok = NULL;
380  SolverBlok *blok_prev;
381  pastix_int_t k, m, cblknbr;
382 
383  cblknbr = solvmtx->cblknbr;
384  fcblk = solvmtx->cblktab;
385  for ( k = 0; k < cblknbr; k++, fcblk++ ) {
386 
387  for ( m = fcblk[0].brownum; m < fcblk[1].brownum; m++ ) {
388  blok_prev = blok;
389  blok = solvmtx->bloktab + solvmtx->browtab[m];
390  cblk = solvmtx->cblktab + blok->lcblknm;
391 
392  if ( cblk->cblktype & CBLK_IN_SCHUR ) {
393  continue;
394  }
395 
396  if( ( cblk->cblktype & CBLK_TASKS_2D ) &&
397  ( blok_prev != NULL ) &&
398  ( blok_prev->fcblknm == blok->fcblknm ) &&
399  ( blok_prev->lcblknm == blok->lcblknm ) )
400  {
401  continue;
402  }
403 
404  starpu_task_potrf_dgemmsp( sopalin_data, cblk, blok, fcblk,
405  cblknbr - k );
406  }
407 
408  if ( fcblk->cblktype & CBLK_IN_SCHUR ) {
409  continue;
410  }
411 
412  starpu_task_dpotrfsp( sopalin_data, fcblk, cblknbr - k );
413  }
414 
415  cblk = solvmtx->cblktab;
416  for ( k = 0; k < solvmtx->cblknbr; k++, cblk++ ) {
418  }
419 
420  (void)desc;
421 }
422 
423 /**
424  *******************************************************************************
425  *
426  * @brief Perform a sparse Cholesky factorization using StarPU runtime.
427  *
428  * The function performs the Cholesky factorization of a sparse symmetric
429  * positive definite (or Symmetric positive definite in the real case) matrix
430  * A.
431  * The factorization has the form
432  *
433  * \f[ A = L\times L^T \f]
434  *
435  * where L is a sparse lower triangular matrix.
436  *
437  * The algorithm is automatically chosen between the 1D and 2D version based on
438  * the API parameter IPARM_TASKS2D_LEVEL. If IPARM_TASKS2D_LEVEL != 0
439  * the 2D scheme is applied, the 1D otherwise.
440  *
441  *******************************************************************************
442  *
443  * @param[inout] pastix_data
444  * The pastix_data structure that describes the solver instance.
445  *
446  * @param[inout] sopalin_data
447  * Solver matrix information structure that will guide the algorithm.
448  *
449  ******************************************************************************/
450 void
452  sopalin_data_t *sopalin_data )
453 {
454  starpu_sparse_matrix_desc_t *sdesc = sopalin_data->solvmtx->starpu_desc;
455  double sub = 0.;
456  double com = 0.;
457 
458  /*
459  * Start StarPU if not already started
460  */
461  if (pastix_data->starpu == NULL) {
462  int argc = 0;
463  pastix_starpu_init( pastix_data, &argc, NULL, NULL );
464  }
465 
466  if ( sdesc == NULL ) {
467  /* Create the matrix descriptor */
468  starpu_sparse_matrix_init( sopalin_data->solvmtx,
470  pastix_data->inter_node_procnbr,
471  pastix_data->inter_node_procnum,
472  PastixDouble );
473  sdesc = sopalin_data->solvmtx->starpu_desc;
474  }
475 
476  starpu_profiling_status_set(STARPU_PROFILING_ENABLE);
477 #if defined(STARPU_USE_FXT)
478  if (pastix_data->iparm[IPARM_TRACE] & PastixTraceNumfact) {
479  starpu_fxt_start_profiling();
480  }
481 #endif
482 #if defined(PASTIX_STARPU_STATS)
483  clockStart( sub );
484 #else
485  starpu_resume();
486 #endif
487  /*
488  * Select 1D or 2D algorithm based on 2d tasks level
489  */
490  if ( pastix_data->iparm[IPARM_TASKS2D_LEVEL] != 0 )
491  {
492  if ( pastix_data->iparm[IPARM_FACTO_LOOK_SIDE] == PastixFactLeftLooking ) {
493  starpu_dpotrf_sp2d_ll( sopalin_data, sdesc );
494  }
495  else {
496  starpu_dpotrf_sp2d_rl( sopalin_data, sdesc );
497  }
498  }
499  else
500  {
501  if ( pastix_data->iparm[IPARM_FACTO_LOOK_SIDE] == PastixFactLeftLooking ) {
502  starpu_dpotrf_sp1dplus_ll( sopalin_data, sdesc );
503  }
504  else {
505  starpu_dpotrf_sp1dplus_rl( sopalin_data, sdesc );
506  }
507  }
508 
510 #if defined(PASTIX_STARPU_STATS)
511  clockStop( sub );
512  clockStart( com );
513  starpu_resume();
514 #endif
515  starpu_task_wait_for_all();
516 #if defined(PASTIX_WITH_MPI)
517  starpu_mpi_wait_for_all( pastix_data->pastix_comm );
518  starpu_mpi_barrier( pastix_data->inter_node_comm );
519 #endif
520  starpu_pause();
521 #if defined(STARPU_USE_FXT)
522  if (pastix_data->iparm[IPARM_TRACE] & PastixTraceNumfact) {
523  starpu_fxt_stop_profiling();
524  }
525 #endif
526  starpu_profiling_status_set(STARPU_PROFILING_DISABLE);
527 #if defined(PASTIX_STARPU_STATS)
528  clockStop( com );
529  print_stats( sub, com, pastix_data->solvmatr );
530 #endif
531 
532  (void)com;
533  (void)sub;
534  return;
535 }
536 
537 /**
538  *@}
539  */
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
#define PastixSymmetric
Definition: api.h:459
@ PastixLCoef
Definition: api.h:478
@ IPARM_TASKS2D_LEVEL
Definition: api.h:90
@ IPARM_FACTO_LOOK_SIDE
Definition: api.h:100
@ IPARM_TRACE
Definition: api.h:44
@ PastixLower
Definition: api.h:467
@ PastixRight
Definition: api.h:496
@ PastixNonUnit
Definition: api.h:487
@ PastixTrans
Definition: api.h:446
@ PastixFactLeftLooking
Definition: api.h:326
@ PastixTraceNumfact
Definition: api.h:211
void starpu_sparse_matrix_getoncpu(starpu_sparse_matrix_desc_t *desc)
Submit asynchronous calls to retrieve the data on main memory.
void starpu_task_cblk_dpotrfsp(sopalin_data_t *sopalin_data, SolverCblk *cblk, int prio)
TODO.
void starpu_sparse_matrix_init(SolverMatrix *solvmtx, pastix_mtxtype_t mtxtype, int nodes, int myrank, pastix_coeftype_t flttype)
Generate the StarPU descriptor of the sparse matrix.
void pastix_starpu_init(pastix_data_t *pastix, int *argc, char **argv[], const int *bindtab)
Startup the StarPU runtime system.
Definition: starpu.c:92
void starpu_task_blok_dpotrf(sopalin_data_t *sopalin_data, SolverCblk *cblk, int prio)
TODO.
void starpu_sparse_cblk_wont_use(pastix_coefside_t coef, SolverCblk *cblk)
Submit asynchronous calls to retrieve the data on main memory.
void starpu_task_blok_dtrsmsp(sopalin_data_t *sopalin_data, pastix_coefside_t coef, pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, SolverBlok *blok, int prio)
StarPU GPU implementation.
void starpu_task_cblk_dgemmsp(sopalin_data_t *sopalin_data, pastix_coefside_t sideA, pastix_coefside_t sideB, pastix_trans_t trans, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, int prio)
StarPU GPU implementation.
void starpu_task_blok_dgemmsp(sopalin_data_t *sopalin_data, pastix_coefside_t sideA, pastix_coefside_t sideB, pastix_trans_t trans, SolverCblk *cblk, SolverCblk *fcblk, const SolverBlok *blokA, const SolverBlok *blokB, int prio)
StarPU GPU implementation.
StarPU descriptor stucture for the sparse matrix.
PASTIX_Comm pastix_comm
Definition: pastixdata.h:75
int inter_node_procnum
Definition: pastixdata.h:83
SolverMatrix * solvmatr
Definition: pastixdata.h:102
int inter_node_procnbr
Definition: pastixdata.h:82
void * starpu
Definition: pastixdata.h:87
pastix_int_t * iparm
Definition: pastixdata.h:69
PASTIX_Comm inter_node_comm
Definition: pastixdata.h:77
Main PaStiX data structure.
Definition: pastixdata.h:67
void starpu_dpotrf_sp1dplus_ll(sopalin_data_t *sopalin_data, starpu_sparse_matrix_desc_t *desc)
Perform a sparse Cholesky factorization with 1D kernels.
void starpu_task_dpotrfsp(sopalin_data_t *sopalin_data, SolverCblk *cblk, int prio)
TODO.
Definition: starpu_dpotrf.c:99
void starpu_dpotrf_sp1dplus_rl(sopalin_data_t *sopalin_data, starpu_sparse_matrix_desc_t *desc)
Perform a sparse Cholesky factorization with 1D kernels.
void starpu_dpotrf_sp2d_ll(sopalin_data_t *sopalin_data, starpu_sparse_matrix_desc_t *desc)
Perform a sparse Cholesky factorization with 1D and 2D kernels.
void starpu_dpotrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
Perform a sparse Cholesky factorization using StarPU runtime.
void starpu_dpotrf_sp2d_rl(sopalin_data_t *sopalin_data, starpu_sparse_matrix_desc_t *desc)
Perform a sparse Cholesky factorization with 1D and 2D kernels.
void starpu_task_potrf_dgemmsp(sopalin_data_t *sopalin_data, SolverCblk *cblk, const SolverBlok *blokB, SolverCblk *fcblk, int prio)
TODO.
Definition: starpu_dpotrf.c:51
pastix_int_t cblkmin2d
Definition: solver.h:215
pastix_int_t brownum
Definition: solver.h:166
pastix_int_t fcblknm
Definition: solver.h:140
pastix_int_t cblknbr
Definition: solver.h:208
SolverBlok *restrict bloktab
Definition: solver.h:223
pastix_int_t cblkmax1d
Definition: solver.h:214
SolverBlok * fblokptr
Definition: solver.h:163
pastix_int_t *restrict browtab
Definition: solver.h:224
pastix_int_t lcblknm
Definition: solver.h:139
SolverCblk *restrict cblktab
Definition: solver.h:222
int8_t cblktype
Definition: solver.h:159
Solver block structure.
Definition: solver.h:137
Solver column block structure.
Definition: solver.h:156
Solver column block structure.
Definition: solver.h:200