PaStiX Handbook  6.3.1
starpu_cpxtrf.c
Go to the documentation of this file.
1 /**
2  *
3  * @file starpu_cpxtrf.c
4  *
5  * PaStiX cpxtrf StarPU wrapper.
6  *
7  * @copyright 2016-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.3.1
11  * @author Mathieu Faverge
12  * @author Pierre Ramet
13  * @date 2023-07-21
14  * @generated from /builds/solverstack/pastix/sopalin/starpu/starpu_zpxtrf.c, normal z -> c, Thu Nov 23 09:55:59 2023
15  *
16  * @addtogroup starpu_pxtrf
17  * @{
18  *
19  **/
20 #include "common.h"
21 #include "blend/solver.h"
22 #include "sopalin/sopalin_data.h"
23 #include "pastix_ccores.h"
24 #include "pastix_starpu.h"
25 #include "pastix_cstarpu.h"
26 
27 /**
28  *******************************************************************************
29  *
30  * @brief Perform a sparse LL^t factorization with 1D kernels.
31  *
32  * The function performs the LL^t factorization of a sparse symmetric complex
33  * matrix A.
34  * The factorization has the form
35  *
36  * \f[ A = L\times L^t \f]
37  *
38  * where L is a sparse lower triangular matrix.
39  *
40  *******************************************************************************
41  *
42  * @param[inout] sopalin_data
43  * Solver matrix information structure that will guide the algorithm.
44  *
45  * @param[inout] desc
46  * StarPU descriptor of the sparse matrix.
47  *
48  ******************************************************************************/
49 void
50 starpu_cpxtrf_sp1dplus_rl( sopalin_data_t *sopalin_data,
52 {
53  const SolverMatrix *solvmtx = sopalin_data->solvmtx;
54  SolverCblk *cblk, *fcblk;
55  SolverBlok *blok, *lblk;
56  pastix_int_t k, m, cblknbr, cblk_n;
57 
58  cblknbr = solvmtx->cblknbr;
59  cblk = solvmtx->cblktab;
60  for (k=0; k<solvmtx->cblknbr; k++, cblk++){
61 
62  if ( cblk->cblktype & CBLK_IN_SCHUR ) {
63  break;
64  }
65 
66  starpu_task_cblk_cpxtrfsp( sopalin_data, cblk,
67  cblknbr - k );
68 
69  blok = cblk->fblokptr + 1; /* this diagonal block */
70  lblk = cblk[1].fblokptr; /* the next diagonal block */
71 
72  /* if there are off-diagonal supernodes in the column */
73  for(m=0; blok < lblk; blok++, m++ )
74  {
75  fcblk = (solvmtx->cblktab + blok->fcblknm);
76  cblk_n = fcblk - solvmtx->cblktab;
77 
79  cblk, blok, fcblk,
80  cblknbr - pastix_imin( k + m, cblk_n ) );
81  }
83  }
84  (void)desc;
85 }
86 
87 /**
88  *******************************************************************************
89  *
90  * @brief Perform a sparse LL^t factorization with 1D kernels.
91  *
92  * The function performs the LL^t factorization of a sparse symmetric complex
93  * matrix A.
94  * The factorization has the form
95  *
96  * \f[ A = L\times L^t \f]
97  *
98  * where L is a sparse lower triangular matrix.
99  *
100  *******************************************************************************
101  *
102  * @param[inout] sopalin_data
103  * Solver matrix information structure that will guide the algorithm.
104  *
105  * @param[inout] desc
106  * StarPU descriptor of the sparse matrix.
107  *
108  ******************************************************************************/
109 void
110 starpu_cpxtrf_sp1dplus_ll( sopalin_data_t *sopalin_data,
112 {
113  const SolverMatrix *solvmtx = sopalin_data->solvmtx;
114  SolverCblk *cblk, *fcblk, *lcblk;
115  SolverBlok *blok;
116  pastix_int_t k, m, cblknbr, cblk_n;
117 
118  cblknbr = solvmtx->cblknbr;
119  cblk = solvmtx->cblktab;
120  for ( k = 0; k < solvmtx->cblknbr; k++, cblk++ ) {
121 
122  for ( m = cblk[0].brownum; m < cblk[1].brownum; m++ ) {
123  blok = solvmtx->bloktab + solvmtx->browtab[m];
124  lcblk = solvmtx->cblktab + blok->lcblknm;
125 
126  if ( lcblk->cblktype & CBLK_IN_SCHUR ) {
127  break;
128  }
129 
130  fcblk = solvmtx->cblktab + blok->fcblknm;
131  cblk_n = fcblk - solvmtx->cblktab;
132 
133  assert( fcblk == cblk );
134 
136  lcblk, blok, cblk,
137  cblknbr - pastix_imin( k + m, cblk_n ) );
138  }
139 
140  if ( cblk->cblktype & CBLK_IN_SCHUR ) {
141  continue;
142  }
143 
144  starpu_task_cblk_cpxtrfsp( sopalin_data, cblk,
145  cblknbr - k );
146  }
147 
148  cblk = solvmtx->cblktab;
149  for ( k = 0; k < solvmtx->cblknbr; k++, cblk++ ) {
151  }
152  (void)desc;
153 }
154 
155 /**
156  *******************************************************************************
157  *
158  * @brief Perform a sparse LL^t factorization with 1D and 2D kernels.
159  *
160  * The function performs the LL^t factorization of a sparse symmetric complex
161  * matrix A.
162  * The factorization has the form
163  *
164  * \f[ A = L\times L^t \f]
165  *
166  * where L is a sparse lower triangular matrix.
167  *
168  *******************************************************************************
169  *
170  * @param[inout] sopalin_data
171  * Solver matrix information structure that will guide the algorithm.
172  *
173  * @param[inout] desc
174  * StarPU descriptor of the sparse matrix.
175  *
176  ******************************************************************************/
177 void
178 starpu_cpxtrf_sp2d( sopalin_data_t *sopalin_data,
180 {
181  const SolverMatrix *solvmtx = sopalin_data->solvmtx;
182  SolverCblk *cblk, *fcblk;
183  SolverBlok *blok, *lblk, *blokA, *blokB;
184  starpu_cblk_t *cblkhandle;
185  pastix_int_t k, m, cblknbr, cblk_n;
186 
187  cblknbr = solvmtx->cblknbr;
188 
189  /* Let's submit all 1D tasks first */
190  cblk = solvmtx->cblktab;
191  for (k=0; k<=solvmtx->cblkmax1d; k++, cblk++){
192 
193  if ( cblk->cblktype & CBLK_IN_SCHUR ) {
194  break;
195  }
196 
197  if ( cblk->cblktype & CBLK_TASKS_2D ) {
198  continue;
199  }
200 
201  starpu_task_cblk_cpxtrfsp( sopalin_data, cblk,
202  cblknbr - k );
203 
204  blok = cblk->fblokptr + 1; /* this diagonal block */
205  lblk = cblk[1].fblokptr; /* the next diagonal block */
206 
207  /* if there are off-diagonal supernodes in the column */
208  for(m=0; blok < lblk; blok++, m++ )
209  {
210  fcblk = (solvmtx->cblktab + blok->fcblknm);
211  cblk_n = fcblk - solvmtx->cblktab;
212 
214  cblk, blok, fcblk,
215  cblknbr - pastix_imin( k + m, cblk_n ) );
216  }
218  }
219 
220  /* Now we submit all 2D tasks */
221  cblk = solvmtx->cblktab + solvmtx->cblkmin2d;
222  cblkhandle = desc->cblktab_handle;
223  for (k=solvmtx->cblkmin2d; k<solvmtx->cblknbr; k++, cblk++, cblkhandle++){
224 
225  if ( !(cblk->cblktype & CBLK_TASKS_2D) ) {
226  continue; /* skip 1D cblk */
227  }
228 
229  if ( cblk->cblktype & CBLK_IN_SCHUR ) {
230  continue;
231  }
232 
233  starpu_task_blok_cpxtrf( sopalin_data, cblk,
234  cblknbr - k );
235 
236  lblk = cblk[1].fblokptr;
237  for(blokA=cblk->fblokptr + 1, m=0; blokA<lblk; blokA++, m++) {
238 
239  cblk_n = blokA->fcblknm;
240 
243  cblk, blokA,
244  cblknbr - k );
245 
246  for(blokB=cblk->fblokptr + 1; blokB<=blokA; blokB++) {
247 
249  cblk, solvmtx->cblktab + blokB->fcblknm,
250  blokA, blokB,
251  cblknbr - pastix_imin( k + m, cblk_n ) );
252 
253  /* Skip B blocks facing the same cblk */
254  while( (blokB < blokA) &&
255  (blokB[0].fcblknm == blokB[1].fcblknm) &&
256  (blokB[0].lcblknm == blokB[1].lcblknm) )
257  {
258  blokB++;
259  }
260  }
261 
262  /* Skip A blocks facing the same cblk */
263  while( (blokA < lblk) &&
264  (blokA[0].fcblknm == blokA[1].fcblknm) &&
265  (blokA[0].lcblknm == blokA[1].lcblknm) )
266  {
267  blokA++;
268  }
269  }
271  }
272  (void)desc;
273 }
274 
275 /**
276  *******************************************************************************
277  *
278  * @brief Perform a sparse LL^t factorization using StarPU runtime.
279  *
280  * The function performs the LL^t factorization of a sparse symmetric complex
281  * matrix A.
282  * The factorization has the form
283  *
284  * \f[ A = L\times L^t \f]
285  *
286  * where L is a sparse lower triangular matrix.
287  *
288  * The algorithm is automatically chosen between the 1D and 2D version based on
289  * the API parameter IPARM_TASKS2D_LEVEL. If IPARM_TASKS2D_LEVEL != 0
290  * the 2D scheme is applied, the 1D otherwise.
291  *
292  *******************************************************************************
293  *
294  * @param[inout] pastix_data
295  * The pastix_data structure that describes the solver instance.
296  *
297  * @param[inout] sopalin_data
298  * Solver matrix information structure that will guide the algorithm.
299  *
300  ******************************************************************************/
301 void
303  sopalin_data_t *sopalin_data )
304 {
305  starpu_sparse_matrix_desc_t *sdesc = sopalin_data->solvmtx->starpu_desc;
306  float sub = 0.;
307  float com = 0.;
308 
309  /*
310  * Start StarPU if not already started
311  */
312  if (pastix_data->starpu == NULL) {
313  int argc = 0;
314  pastix_starpu_init( pastix_data, &argc, NULL, NULL );
315  }
316 
317  if ( sdesc == NULL ) {
318  /* Create the matrix descriptor */
319  starpu_sparse_matrix_init( sopalin_data->solvmtx,
321  pastix_data->inter_node_procnbr,
322  pastix_data->inter_node_procnum,
323  PastixComplex32 );
324  sdesc = sopalin_data->solvmtx->starpu_desc;
325  }
326 
327  starpu_profiling_status_set(STARPU_PROFILING_ENABLE);
328 #if defined(STARPU_USE_FXT)
329  if (pastix_data->iparm[IPARM_TRACE] & PastixTraceNumfact) {
330  starpu_fxt_start_profiling();
331  }
332 #endif
333 #if defined(PASTIX_STARPU_STATS)
334  clockStart( sub );
335 #else
336  starpu_resume();
337 #endif
338  /*
339  * Select 1D or 2D algorithm based on 2d tasks level
340  */
341  if ( pastix_data->iparm[IPARM_TASKS2D_LEVEL] != 0 )
342  {
343  starpu_cpxtrf_sp2d( sopalin_data, sdesc );
344  }
345  else
346  {
347  if ( pastix_data->iparm[IPARM_FACTO_LOOK_SIDE] == PastixFactLeftLooking ) {
348  starpu_cpxtrf_sp1dplus_ll( sopalin_data, sdesc );
349  }
350  else {
351  starpu_cpxtrf_sp1dplus_rl( sopalin_data, sdesc );
352  }
353  }
354 
356 #if defined(PASTIX_STARPU_STATS)
357  clockStop( sub );
358  clockStart( com );
359  starpu_resume();
360 #endif
361  starpu_task_wait_for_all();
362 #if defined(PASTIX_WITH_MPI)
363  starpu_mpi_wait_for_all( pastix_data->pastix_comm );
364  starpu_mpi_barrier( pastix_data->inter_node_comm );
365 #endif
366  starpu_pause();
367 #if defined(STARPU_USE_FXT)
368  if (pastix_data->iparm[IPARM_TRACE] & PastixTraceNumfact) {
369  starpu_fxt_stop_profiling();
370  }
371 #endif
372  starpu_profiling_status_set(STARPU_PROFILING_DISABLE);
373 #if defined(PASTIX_STARPU_STATS)
374  clockStop( com );
375  print_stats( sub, com, pastix_data->solvmatr );
376 #endif
377 
378  (void)com;
379  (void)sub;
380  return;
381 }
382 
383 /**
384  *@}
385  */
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
#define PastixSymmetric
Definition: api.h:459
@ PastixLCoef
Definition: api.h:478
@ IPARM_TASKS2D_LEVEL
Definition: api.h:90
@ IPARM_FACTO_LOOK_SIDE
Definition: api.h:100
@ IPARM_TRACE
Definition: api.h:44
@ PastixLower
Definition: api.h:467
@ PastixRight
Definition: api.h:496
@ PastixNonUnit
Definition: api.h:487
@ PastixConjTrans
Definition: api.h:447
@ PastixFactLeftLooking
Definition: api.h:326
@ PastixTraceNumfact
Definition: api.h:211
starpu_cblk_t * cblktab_handle
void starpu_sparse_matrix_getoncpu(starpu_sparse_matrix_desc_t *desc)
Submit asynchronous calls to retrieve the data on main memory.
void starpu_task_blok_cpxtrf(sopalin_data_t *sopalin_data, SolverCblk *cblk, int prio)
TODO.
void starpu_task_cblk_cpxtrfsp(sopalin_data_t *sopalin_data, SolverCblk *cblk, int prio)
TODO.
void starpu_sparse_matrix_init(SolverMatrix *solvmtx, pastix_mtxtype_t mtxtype, int nodes, int myrank, pastix_coeftype_t flttype)
Generate the StarPU descriptor of the sparse matrix.
void pastix_starpu_init(pastix_data_t *pastix, int *argc, char **argv[], const int *bindtab)
Startup the StarPU runtime system.
Definition: starpu.c:92
void starpu_task_blok_ctrsmsp(sopalin_data_t *sopalin_data, pastix_coefside_t coef, pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, SolverBlok *blok, int prio)
StarPU GPU implementation.
void starpu_task_cblk_cgemmsp(sopalin_data_t *sopalin_data, pastix_coefside_t sideA, pastix_coefside_t sideB, pastix_trans_t trans, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, int prio)
StarPU GPU implementation.
void starpu_sparse_cblk_wont_use(pastix_coefside_t coef, SolverCblk *cblk)
Submit asynchronous calls to retrieve the data on main memory.
void starpu_task_blok_cgemmsp(sopalin_data_t *sopalin_data, pastix_coefside_t sideA, pastix_coefside_t sideB, pastix_trans_t trans, SolverCblk *cblk, SolverCblk *fcblk, const SolverBlok *blokA, const SolverBlok *blokB, int prio)
StarPU GPU implementation.
Additional StarPU handlers for a column-block when using 2D kernels.
Definition: pastix_starpu.h:99
StarPU descriptor stucture for the sparse matrix.
PASTIX_Comm pastix_comm
Definition: pastixdata.h:75
int inter_node_procnum
Definition: pastixdata.h:83
SolverMatrix * solvmatr
Definition: pastixdata.h:102
int inter_node_procnbr
Definition: pastixdata.h:82
void * starpu
Definition: pastixdata.h:87
pastix_int_t * iparm
Definition: pastixdata.h:69
PASTIX_Comm inter_node_comm
Definition: pastixdata.h:77
Main PaStiX data structure.
Definition: pastixdata.h:67
void starpu_cpxtrf_sp1dplus_ll(sopalin_data_t *sopalin_data, starpu_sparse_matrix_desc_t *desc)
Perform a sparse LL^t factorization with 1D kernels.
void starpu_cpxtrf_sp1dplus_rl(sopalin_data_t *sopalin_data, starpu_sparse_matrix_desc_t *desc)
Perform a sparse LL^t factorization with 1D kernels.
Definition: starpu_cpxtrf.c:50
void starpu_cpxtrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
Perform a sparse LL^t factorization using StarPU runtime.
void starpu_cpxtrf_sp2d(sopalin_data_t *sopalin_data, starpu_sparse_matrix_desc_t *desc)
Perform a sparse LL^t factorization with 1D and 2D kernels.
pastix_int_t cblkmin2d
Definition: solver.h:215
pastix_int_t brownum
Definition: solver.h:166
pastix_int_t fcblknm
Definition: solver.h:140
pastix_int_t cblknbr
Definition: solver.h:208
SolverBlok *restrict bloktab
Definition: solver.h:223
pastix_int_t cblkmax1d
Definition: solver.h:214
SolverBlok * fblokptr
Definition: solver.h:163
pastix_int_t *restrict browtab
Definition: solver.h:224
pastix_int_t lcblknm
Definition: solver.h:139
SolverCblk *restrict cblktab
Definition: solver.h:222
int8_t cblktype
Definition: solver.h:159
Solver block structure.
Definition: solver.h:137
Solver column block structure.
Definition: solver.h:156
Solver column block structure.
Definition: solver.h:200