PaStiX Handbook  6.4.0
sequential_dpotrf.c
Go to the documentation of this file.
1 /**
2  *
3  * @file sequential_dpotrf.c
4  *
5  * @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
6  * Univ. Bordeaux. All rights reserved.
7  *
8  * @version 6.4.0
9  * @author Pascal Henon
10  * @author Xavier Lacoste
11  * @author Pierre Ramet
12  * @author Mathieu Faverge
13  * @author Esragul Korkmaz
14  * @author Tony Delarue
15  * @author Alycia Lisito
16  * @date 2024-07-05
17  *
18  * @generated from /builds/solverstack/pastix/sopalin/sequential_zpotrf.c, normal z -> d, Thu Aug 29 14:20:52 2024
19  *
20  **/
21 #include "common.h"
22 #include "isched.h"
23 #include "blend/solver.h"
24 #include "sopalin/sopalin_data.h"
25 #include "sopalin/coeftab_d.h"
26 #include "pastix_dcores.h"
27 
28 #if defined(PASTIX_WITH_PARSEC)
29 #include "parsec/pastix_dparsec.h"
30 #endif
31 
32 #if defined(PASTIX_WITH_STARPU)
33 #include "starpu/pastix_dstarpu.h"
34 #endif
35 
36 /**
37  *******************************************************************************
38  *
39  * @brief TODO
40  *
41  *******************************************************************************
42  *
43  * @param[in] pastix_data
44  * TODO
45  *
46  * @param[in] sopalin_data
47  * TODO
48  *
49  *******************************************************************************/
50 void
52  sopalin_data_t *sopalin_data )
53 {
54  SolverMatrix *datacode = pastix_data->solvmatr;
55  SolverCblk *cblk;
56  double *work;
57  pastix_int_t i, lwork;
58  (void)sopalin_data;
59 
60  lwork = datacode->gemmmax;
61  if ( (datacode->lowrank.compress_when != PastixCompressNever) &&
62  (datacode->lowrank.ilu_lvl < INT_MAX) )
63  {
64  lwork = pastix_imax( lwork, 2 * datacode->blokmax );
65  }
66  MALLOC_INTERN( work, lwork, double );
67 
68  cblk = datacode->cblktab;
69  for (i=0; i<datacode->cblknbr; i++, cblk++){
70  if ( cblk->cblktype & CBLK_IN_SCHUR ) {
71  break;
72  }
73 
74  /* Wait for incoming dependencies */
76  datacode, cblk ) )
77  {
78  continue;
79  }
80 
81  /* Compute */
82  cpucblk_dpotrfsp1d( datacode, cblk,
83  work, lwork );
84  }
85 
86  memFree_null( work );
87 }
88 
89 /**
90  *******************************************************************************
91  *
92  * @brief TODO
93  *
94  *******************************************************************************
95  *
96  * @param[in] ctx
97  * TODO
98  *
99  * @param[in] args
100  * TODO
101  *
102  *******************************************************************************/
103 void
104 thread_dpotrf_static( isched_thread_t *ctx, void *args )
105 {
106  sopalin_data_t *sopalin_data = (sopalin_data_t*)args;
107  SolverMatrix *datacode = sopalin_data->solvmtx;
108  SolverCblk *cblk;
109  Task *t;
110  double *work;
111  pastix_int_t i, ii, lwork;
112  pastix_int_t tasknbr, *tasktab;
113  int rank = ctx->rank;
114 
115  lwork = datacode->gemmmax;
116  if ( (datacode->lowrank.compress_when != PastixCompressNever) &&
117  (datacode->lowrank.ilu_lvl < INT_MAX) )
118  {
119  lwork = pastix_imax( lwork, 2 * datacode->blokmax );
120  }
121  MALLOC_INTERN( work, lwork, double );
122 
123  tasknbr = datacode->ttsknbr[rank];
124  tasktab = datacode->ttsktab[rank];
125 
126  for (ii=0; ii<tasknbr; ii++) {
127  i = tasktab[ii];
128  t = datacode->tasktab + i;
129  cblk = datacode->cblktab + t->cblknum;
130 
131  if ( cblk->cblktype & CBLK_IN_SCHUR ) {
132  continue;
133  }
134 
135  /* Wait for incoming dependencies */
137  datacode, cblk ) )
138  {
139  continue;
140  }
141 
142  /* Compute */
143  cpucblk_dpotrfsp1d( datacode, cblk,
144  work, lwork );
145  }
146 
147  memFree_null( work );
148 }
149 
150 /**
151  *******************************************************************************
152  *
153  * @brief TODO
154  *
155  *******************************************************************************
156  *
157  * @param[in] pastix_data
158  * TODO
159  *
160  * @param[in] sopalin_data
161  * TODO
162  *
163  *******************************************************************************/
164 void
166  sopalin_data_t *sopalin_data )
167 {
168  isched_parallel_call( pastix_data->isched, thread_dpotrf_static, sopalin_data );
169 }
170 
171 /**
172  * @brief TODO
173  */
174 struct args_dpotrf_t
175 {
176  sopalin_data_t *sopalin_data;
177  volatile int32_t taskcnt;
178 };
179 
180 /**
181  *******************************************************************************
182  *
183  * @brief TODO
184  *
185  *******************************************************************************
186  *
187  * @param[in] ctx
188  * TODO
189  *
190  * @param[in] args
191  * TODO
192  *
193  *******************************************************************************/
194 void
195 thread_dpotrf_dynamic( isched_thread_t *ctx, void *args )
196 {
197  struct args_dpotrf_t *arg = (struct args_dpotrf_t*)args;
198  sopalin_data_t *sopalin_data = arg->sopalin_data;
199  SolverMatrix *datacode = sopalin_data->solvmtx;
200  SolverCblk *cblk;
201  SolverBlok *blok;
202  Task *t;
203  pastix_queue_t *computeQueue;
204  double *work;
205  pastix_int_t i, ii, lwork;
206  pastix_int_t tasknbr, *tasktab, cblknum, bloknum;
207  int32_t local_taskcnt = 0;
208  int rank = ctx->rank;
209 
210  lwork = datacode->gemmmax;
211  if ( (datacode->lowrank.compress_when != PastixCompressNever) &&
212  (datacode->lowrank.ilu_lvl < INT_MAX) )
213  {
214  lwork = pastix_imax( lwork, 2 * datacode->blokmax );
215  }
216  MALLOC_INTERN( work, lwork, double );
217  MALLOC_INTERN( datacode->computeQueue[rank], 1, pastix_queue_t );
218 
219  tasknbr = datacode->ttsknbr[rank];
220  tasktab = datacode->ttsktab[rank];
221  computeQueue = datacode->computeQueue[rank];
222  pqueueInit( computeQueue, tasknbr );
223 
224  /* Initialize the local task queue with available cblks */
225  for (ii=0; ii<tasknbr; ii++) {
226  i = tasktab[ii];
227  t = datacode->tasktab + i;
228 
229  if ( !(t->ctrbcnt) ) {
230  cblk = datacode->cblktab + t->cblknum;
231  pqueuePush1( computeQueue, t->cblknum, cblk->priority );
232  }
233  }
234 
235  /* Make sure that all computeQueues are allocated */
236  isched_barrier_wait( &(ctx->global_ctx->barrier) );
237 
238  while( arg->taskcnt > 0 )
239  {
240  cblknum = pqueuePop(computeQueue);
241 
242 #if defined(PASTIX_WITH_MPI)
243  /* Nothing to do, let's make progress on communications */
244  if( cblknum == -1 ) {
245  cpucblk_dmpi_progress( PastixLCoef, datacode, rank );
246  cblknum = pqueuePop( computeQueue );
247  }
248 #endif
249 
250  /* No more local job, let's steal our neighbors */
251  if( cblknum == -1 ) {
252  if ( local_taskcnt ) {
253  pastix_atomic_sub_32b( &(arg->taskcnt), local_taskcnt );
254  local_taskcnt = 0;
255  }
256  cblknum = stealQueue( datacode, rank,
257  ctx->global_ctx->world_size );
258  }
259 
260  /* Still no job, let's loop again */
261  if ( cblknum == -1 ) {
262  continue;
263  }
264 
265  if ( cblknum >= 0 ) {
266  cblk = datacode->cblktab + cblknum;
267  if ( cblk->cblktype & CBLK_IN_SCHUR ) {
268  continue;
269  }
270  cblk->threadid = rank;
271 
272  /* Compute */
273  if ( cblk->cblktype & CBLK_TASKS_2D ) {
274  cpucblk_dpotrfsp1dplus( datacode, cblk );
275  }
276  else {
277  cpucblk_dpotrfsp1d( datacode, cblk,
278  work, lwork );
279  }
280  }
281  else {
282  bloknum = - cblknum - 1;
283  blok = datacode->bloktab + bloknum;
284  cpucblk_dpotrfsp1dplus_update( datacode, blok, work, lwork );
285  }
286  local_taskcnt++;
287  }
288  memFree_null( work );
289 
290  /* Make sure that everyone is done before freeing */
291  isched_barrier_wait( &(ctx->global_ctx->barrier) );
292  pqueueExit( computeQueue );
293  memFree_null( computeQueue );
294 }
295 
296 /**
297  *******************************************************************************
298  *
299  * @brief TODO
300  *
301  *******************************************************************************
302  *
303  * @param[in] pastix_data
304  * TODO
305  *
306  * @param[in] sopalin_data
307  * TODO
308  *
309  *******************************************************************************/
310 void
312  sopalin_data_t *sopalin_data )
313 {
314  SolverMatrix *datacode = sopalin_data->solvmtx;
315  int32_t taskcnt = datacode->tasknbr_1dp;
316  struct args_dpotrf_t args_dpotrf = { sopalin_data, taskcnt };
317 
318  /* Allocate the computeQueue */
319  MALLOC_INTERN( datacode->computeQueue,
320  pastix_data->isched->world_size, pastix_queue_t * );
321 
322  isched_parallel_call( pastix_data->isched, thread_dpotrf_dynamic, &args_dpotrf );
323 
324  memFree_null( datacode->computeQueue );
325 }
326 
327 #ifndef DOXYGEN_SHOULD_SKIP_THIS
328 static void (*dpotrf_table[5])(pastix_data_t *, sopalin_data_t *) = {
331 #if defined(PASTIX_WITH_PARSEC)
333 #else
334  NULL,
335 #endif
336 #if defined(PASTIX_WITH_STARPU)
338 #else
339  NULL,
340 #endif
342 };
343 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
344 
345 /**
346  *******************************************************************************
347  *
348  * @brief TODO
349  *
350  *******************************************************************************
351  *
352  * @param[in] pastix_data
353  * TODO
354  *
355  * @param[in] sopalin_data
356  * TODO
357  *
358  *******************************************************************************/
359 void
361  sopalin_data_t *sopalin_data )
362 {
363  int sched = pastix_data->iparm[IPARM_SCHEDULER];
364  void (*dpotrf)(pastix_data_t *, sopalin_data_t *) = dpotrf_table[ sched ];
365 
366  if ( dpotrf == NULL ) {
367  sched = PastixSchedDynamic;
368  dpotrf = dynamic_dpotrf;
369  }
370 
371  if ( (sched == PastixSchedSequential) ||
372  (sched == PastixSchedStatic) ||
373  (sched == PastixSchedDynamic) )
374  {
375  solverRequestInit( PastixFacto, sopalin_data->solvmtx );
376  solverRecvInit( PastixLCoef, sopalin_data->solvmtx, PastixDouble );
377  }
378 
379  dpotrf( pastix_data, sopalin_data );
380 
381  if ( (sched == PastixSchedSequential) ||
382  (sched == PastixSchedStatic) ||
383  (sched == PastixSchedDynamic) )
384  {
385  cpucblk_drequest_cleanup( PastixLCoef, sched, sopalin_data->solvmtx );
386  solverRequestExit( sopalin_data->solvmtx );
387  solverRecvExit( sopalin_data->solvmtx );
388  }
389 
390 #if defined(PASTIX_DEBUG_FACTO)
391  coeftab_ddump( pastix_data, sopalin_data->solvmtx, "potrf" );
392 #endif
393 }
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
static void pqueuePush1(pastix_queue_t *q, pastix_int_t elt, double key1)
Push an element with a single key.
Definition: queue.h:64
void pqueueExit(pastix_queue_t *)
Free the structure associated to the queue.
Definition: queue.c:110
static pastix_int_t pqueuePop(pastix_queue_t *q)
Pop the head of the queue whithout returning the keys.
Definition: queue.h:75
int pqueueInit(pastix_queue_t *, pastix_int_t)
Initialize the queue structure with an initial space to store the elements.
Definition: queue.c:81
Queue structure.
Definition: queue.h:38
void solverRecvExit(SolverMatrix *solvmtx)
Free the array linked to pending reception.
Definition: solver.c:554
void solverRequestExit(SolverMatrix *solvmtx)
Free the arrays related to the requests.
Definition: solver.c:472
void solverRecvInit(pastix_coefside_t side, SolverMatrix *solvmtx, pastix_coeftype_t flttype)
Allocate the reception buffer, and initiate the first persistant reception.
Definition: solver.c:513
void solverRequestInit(solve_step_t solve_step, SolverMatrix *solvmtx)
Instanciate the arrays for the requests according to the scheduler.
Definition: solver.c:424
void coeftab_ddump(pastix_data_t *pastix_data, const SolverMatrix *solvmtx, const char *filename)
Dump the solver matrix coefficients into a file in human readable format.
Definition: coeftab_d.c:55
void cpucblk_dpotrfsp1dplus_update(SolverMatrix *solvmtx, SolverBlok *blok, double *work, pastix_int_t lwork)
Apply the updates of the cholesky factorisation of a given panel.
int cpucblk_dpotrfsp1dplus(SolverMatrix *solvmtx, SolverCblk *cblk)
Perform the Cholesky factorization of a given panel and submit tasks for the subsequent updates.
int cpucblk_dincoming_deps(int rank, pastix_coefside_t side, SolverMatrix *solvmtx, SolverCblk *cblk)
Wait for incoming dependencies, and return when cblk->ctrbcnt has reached 0.
void cpucblk_drequest_cleanup(pastix_coefside_t side, pastix_int_t sched, SolverMatrix *solvmtx)
Waitall routine for current cblk request.
int cpucblk_dpotrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, double *work, pastix_int_t lwork)
Perform the Cholesky factorization of a given panel and apply all its updates.
pastix_compress_when_t compress_when
void parsec_dpotrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
Perform a sparse Cholesky factorization using PaRSEC runtime.
@ PastixLCoef
Definition: api.h:478
@ PastixCompressNever
Definition: api.h:385
@ IPARM_SCHEDULER
Definition: api.h:117
@ PastixSchedStatic
Definition: api.h:335
@ PastixSchedDynamic
Definition: api.h:338
@ PastixSchedSequential
Definition: api.h:334
SolverMatrix * solvmatr
Definition: pastixdata.h:103
pastix_int_t * iparm
Definition: pastixdata.h:70
isched_t * isched
Definition: pastixdata.h:86
Main PaStiX data structure.
Definition: pastixdata.h:68
void starpu_dpotrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
Perform a sparse Cholesky factorization using StarPU runtime.
void dynamic_dpotrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
TODO.
void thread_dpotrf_dynamic(isched_thread_t *ctx, void *args)
TODO.
void static_dpotrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
TODO.
void thread_dpotrf_static(isched_thread_t *ctx, void *args)
TODO.
void sequential_dpotrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
TODO.
void sopalin_dpotrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
TODO.
pastix_int_t cblknum
Definition: solver.h:125
pastix_lr_t lowrank
Definition: solver.h:236
pastix_int_t priority
Definition: solver.h:183
static pastix_int_t stealQueue(SolverMatrix *solvmtx, int rank, int nbthreads)
Task stealing method.
Definition: solver.h:471
pastix_int_t cblknbr
Definition: solver.h:211
SolverBlok *restrict bloktab
Definition: solver.h:229
pastix_int_t volatile ctrbcnt
Definition: solver.h:127
int threadid
Definition: solver.h:182
SolverCblk *restrict cblktab
Definition: solver.h:228
int8_t cblktype
Definition: solver.h:164
Solver block structure.
Definition: solver.h:141
Solver column block structure.
Definition: solver.h:161
Solver column block structure.
Definition: solver.h:203
The task structure for the numerical factorization.
Definition: solver.h:122