PaStiX Handbook  6.3.2
sequential_cpxtrf.c
Go to the documentation of this file.
1 /**
2  *
3  * @file sequential_cpxtrf.c
4  *
5  * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
6  * Univ. Bordeaux. All rights reserved.
7  *
8  * @version 6.3.2
9  * @author Pascal Henon
10  * @author Xavier Lacoste
11  * @author Pierre Ramet
12  * @author Mathieu Faverge
13  * @author Esragul Korkmaz
14  * @author Tony Delarue
15  * @date 2023-10-23
16  *
17  * @generated from /builds/solverstack/pastix/sopalin/sequential_zpxtrf.c, normal z -> c, Wed Dec 13 12:09:47 2023
18  *
19  **/
20 #include "common.h"
21 #include "isched.h"
22 #include "blend/solver.h"
23 #include "sopalin/sopalin_data.h"
24 #include "sopalin/coeftab_c.h"
25 #include "pastix_ccores.h"
26 
27 #if defined(PASTIX_WITH_PARSEC)
28 #include "parsec/pastix_cparsec.h"
29 #endif
30 
31 #if defined(PASTIX_WITH_STARPU)
32 #include "starpu/pastix_cstarpu.h"
33 #endif
34 
35 /**
36  *******************************************************************************
37  *
38  * @brief TODO
39  *
40  *******************************************************************************
41  *
42  * @param[in] pastix_data
43  * TODO
44  *
45  * @param[in] sopalin_data
46  * TODO
47  *
48  *******************************************************************************/
49 void
51  sopalin_data_t *sopalin_data )
52 {
53  SolverMatrix *datacode = pastix_data->solvmatr;
54  SolverCblk *cblk;
55  pastix_complex32_t *work;
56  pastix_int_t i, lwork;
57  (void)sopalin_data;
58 
59  lwork = datacode->gemmmax;
60  if ( (datacode->lowrank.compress_when != PastixCompressNever) &&
61  (datacode->lowrank.ilu_lvl < INT_MAX) )
62  {
63  lwork = pastix_imax( lwork, 2 * datacode->blokmax );
64  }
65  MALLOC_INTERN( work, lwork, pastix_complex32_t );
66 
67  cblk = datacode->cblktab;
68  for (i=0; i<datacode->cblknbr; i++, cblk++){
69  if ( cblk->cblktype & CBLK_IN_SCHUR ) {
70  break;
71  }
72 
73  /* Wait for incoming dependencies */
75  datacode, cblk ) )
76  {
77  continue;
78  }
79 
80  /* Compute */
81  cpucblk_cpxtrfsp1d( datacode, cblk,
82  work, lwork );
83  }
84 
85  memFree_null( work );
86 }
87 
88 /**
89  *******************************************************************************
90  *
91  * @brief TODO
92  *
93  *******************************************************************************
94  *
95  * @param[in] ctx
96  * TODO
97  *
98  * @param[in] args
99  * TODO
100  *
101  *******************************************************************************/
102 void
103 thread_cpxtrf_static( isched_thread_t *ctx,
104  void *args )
105 {
106  sopalin_data_t *sopalin_data = (sopalin_data_t*)args;
107  SolverMatrix *datacode = sopalin_data->solvmtx;
108  SolverCblk *cblk;
109  Task *t;
110  pastix_complex32_t *work;
111  pastix_int_t i, ii, lwork;
112  pastix_int_t tasknbr, *tasktab;
113  int rank = ctx->rank;
114 
115  lwork = datacode->gemmmax;
116  if ( (datacode->lowrank.compress_when != PastixCompressNever) &&
117  (datacode->lowrank.ilu_lvl < INT_MAX) )
118  {
119  lwork = pastix_imax( lwork, 2 * datacode->blokmax );
120  }
121  MALLOC_INTERN( work, lwork, pastix_complex32_t );
122 
123  tasknbr = datacode->ttsknbr[rank];
124  tasktab = datacode->ttsktab[rank];
125 
126  for (ii=0; ii<tasknbr; ii++) {
127  i = tasktab[ii];
128  t = datacode->tasktab + i;
129  cblk = datacode->cblktab + t->cblknum;
130 
131  if ( cblk->cblktype & CBLK_IN_SCHUR ) {
132  continue;
133  }
134 
135  /* Wait for incoming dependencies */
137  datacode, cblk ) )
138  {
139  continue;
140  }
141 
142  /* Compute */
143  cpucblk_cpxtrfsp1d( datacode, cblk,
144  work, lwork );
145  }
146 
147  memFree_null( work );
148 }
149 
150 /**
151  *******************************************************************************
152  *
153  * @brief TODO
154  *
155  *******************************************************************************
156  *
157  * @param[in] pastix_data
158  * TODO
159  *
160  * @param[in] sopalin_data
161  * TODO
162  *
163  *******************************************************************************/
164 void
166  sopalin_data_t *sopalin_data )
167 {
168  isched_parallel_call( pastix_data->isched, thread_cpxtrf_static, sopalin_data );
169 }
170 
171 /**
172  * @brief TODO
173  */
174 struct args_cpxtrf_t
175 {
176  sopalin_data_t *sopalin_data;
177  volatile int32_t taskcnt;
178 };
179 
180 /**
181  *******************************************************************************
182  *
183  * @brief TODO
184  *
185  *******************************************************************************
186  *
187  * @param[in] ctx
188  * TODO
189  *
190  * @param[in] args
191  * TODO
192  *
193  *******************************************************************************/
194 void
195 thread_cpxtrf_dynamic( isched_thread_t *ctx,
196  void *args )
197 {
198  struct args_cpxtrf_t *arg = (struct args_cpxtrf_t*)args;
199  sopalin_data_t *sopalin_data = arg->sopalin_data;
200  SolverMatrix *datacode = sopalin_data->solvmtx;
201  SolverCblk *cblk;
202  SolverBlok *blok;
203  Task *t;
204  pastix_queue_t *computeQueue;
205  pastix_complex32_t *work;
206  pastix_int_t i, ii, lwork;
207  pastix_int_t tasknbr, *tasktab, cblknum, bloknum;
208  int32_t local_taskcnt = 0;
209  int rank = ctx->rank;
210 
211  lwork = datacode->gemmmax;
212  if ( (datacode->lowrank.compress_when != PastixCompressNever) &&
213  (datacode->lowrank.ilu_lvl < INT_MAX) )
214  {
215  lwork = pastix_imax( lwork, 2 * datacode->blokmax );
216  }
217  MALLOC_INTERN( work, lwork, pastix_complex32_t );
218  MALLOC_INTERN( datacode->computeQueue[rank], 1, pastix_queue_t );
219 
220  tasknbr = datacode->ttsknbr[rank];
221  tasktab = datacode->ttsktab[rank];
222  computeQueue = datacode->computeQueue[rank];
223  pqueueInit( computeQueue, tasknbr );
224 
225  /* Initialize the local task queue with available cblks */
226  for (ii=0; ii<tasknbr; ii++) {
227  i = tasktab[ii];
228  t = datacode->tasktab + i;
229 
230  if ( !(t->ctrbcnt) ) {
231  cblk = datacode->cblktab + t->cblknum;
232  pqueuePush1( computeQueue, t->cblknum, cblk->priority );
233  }
234  }
235 
236  /* Make sure that all computeQueues are allocated */
237  isched_barrier_wait( &(ctx->global_ctx->barrier) );
238 
239  while( arg->taskcnt > 0 )
240  {
241  cblknum = pqueuePop(computeQueue);
242 
243 #if defined(PASTIX_WITH_MPI)
244  /* Nothing to do, let's make progress on communications */
245  if( cblknum == -1 ) {
246  cpucblk_cmpi_progress( PastixLCoef, datacode, rank );
247  cblknum = pqueuePop( computeQueue );
248  }
249 #endif
250 
251  /* No more local job, let's steal our neighbors */
252  if( cblknum == -1 ) {
253  if ( local_taskcnt ) {
254  pastix_atomic_sub_32b( &(arg->taskcnt), local_taskcnt );
255  local_taskcnt = 0;
256  }
257  cblknum = stealQueue( datacode, rank,
258  ctx->global_ctx->world_size );
259  }
260 
261  /* Still no job, let's loop again */
262  if ( cblknum == -1 ) {
263  continue;
264  }
265 
266  if ( cblknum >= 0 ) {
267  cblk = datacode->cblktab + cblknum;
268  if ( cblk->cblktype & CBLK_IN_SCHUR ) {
269  continue;
270  }
271  cblk->threadid = rank;
272 
273  /* Compute */
274  if ( cblk->cblktype & CBLK_TASKS_2D ) {
275  cpucblk_cpxtrfsp1dplus( datacode, cblk );
276  }
277  else {
278  cpucblk_cpxtrfsp1d( datacode, cblk,
279  work, lwork );
280  }
281  }
282  else {
283  bloknum = - cblknum - 1;
284  blok = datacode->bloktab + bloknum;
285  cpucblk_cpxtrfsp1dplus_update( datacode, blok, work, lwork );
286  }
287  local_taskcnt++;
288  }
289  memFree_null( work );
290 
291  /* Make sure that everyone is done before freeing */
292  isched_barrier_wait( &(ctx->global_ctx->barrier) );
293  pqueueExit( computeQueue );
294  memFree_null( computeQueue );
295 }
296 
297 /**
298  *******************************************************************************
299  *
300  * @brief TODO
301  *
302  *******************************************************************************
303  *
304  * @param[in] pastix_data
305  * TODO
306  *
307  * @param[in] sopalin_data
308  * TODO
309  *
310  *******************************************************************************/
311 void
313  sopalin_data_t *sopalin_data )
314 {
315  SolverMatrix *datacode = sopalin_data->solvmtx;
316  int32_t taskcnt = datacode->tasknbr_1dp;
317  struct args_cpxtrf_t args_cpxtrf = { sopalin_data, taskcnt };
318 
319  /* Allocate the computeQueue */
320  MALLOC_INTERN( datacode->computeQueue,
321  pastix_data->isched->world_size, pastix_queue_t * );
322 
323  isched_parallel_call( pastix_data->isched, thread_cpxtrf_dynamic, &args_cpxtrf );
324 
325  memFree_null( datacode->computeQueue );
326 }
327 
328 #ifndef DOXYGEN_SHOULD_SKIP_THIS
329 static void (*cpxtrf_table[5])(pastix_data_t *, sopalin_data_t *) = {
332 #if defined(PASTIX_WITH_PARSEC)
334 #else
335  NULL,
336 #endif
337 #if defined(PASTIX_WITH_STARPU)
339 #else
340  NULL,
341 #endif
343 };
344 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
345 
346 /**
347  *******************************************************************************
348  *
349  * @brief TODO
350  *
351  *******************************************************************************
352  *
353  * @param[in] pastix_data
354  * TODO
355  *
356  * @param[in] sopalin_data
357  * TODO
358  *
359  *******************************************************************************/
360 void
362  sopalin_data_t *sopalin_data )
363 {
364  int sched = pastix_data->iparm[IPARM_SCHEDULER];
365  void (*cpxtrf)(pastix_data_t *, sopalin_data_t *) = cpxtrf_table[ sched ];
366 
367  if (cpxtrf == NULL) {
368  sched = PastixSchedDynamic;
369  cpxtrf = dynamic_cpxtrf;
370  }
371 
372  if ( (sched == PastixSchedSequential) ||
373  (sched == PastixSchedStatic) ||
374  (sched == PastixSchedDynamic) )
375  {
376  solverRequestInit( PastixFacto, sopalin_data->solvmtx );
377  solverRecvInit( PastixLCoef, sopalin_data->solvmtx, PastixComplex32 );
378  }
379 
380  cpxtrf( pastix_data, sopalin_data );
381 
382  if ( (sched == PastixSchedSequential) ||
383  (sched == PastixSchedStatic) ||
384  (sched == PastixSchedDynamic) )
385  {
386  cpucblk_crequest_cleanup( PastixLCoef, sched, sopalin_data->solvmtx );
387  solverRequestExit( sopalin_data->solvmtx );
388  solverRecvExit( sopalin_data->solvmtx );
389  }
390 
391 #if defined(PASTIX_DEBUG_FACTO)
392  coeftab_cdump( pastix_data, sopalin_data->solvmtx, "pxtrf" );
393 #endif
394 }
void cpucblk_cpxtrfsp1dplus_update(SolverMatrix *solvmtx, SolverBlok *blok, pastix_complex32_t *work, pastix_int_t lwork)
Apply the updates of the LL^t factorisation of a given panel.
int cpucblk_cpxtrfsp1dplus(SolverMatrix *solvmtx, SolverCblk *cblk)
Perform the LL^t factorization of a given panel.
int cpucblk_cpxtrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex32_t *work, pastix_int_t lwork)
Perform the LL^t factorization of a given panel and apply all its updates.
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
float _Complex pastix_complex32_t
Definition: datatypes.h:76
static void pqueuePush1(pastix_queue_t *q, pastix_int_t elt, double key1)
Push an element with a single key.
Definition: queue.h:64
void pqueueExit(pastix_queue_t *)
Free the structure associated to the queue.
Definition: queue.c:110
static pastix_int_t pqueuePop(pastix_queue_t *q)
Pop the head of the queue whithout returning the keys.
Definition: queue.h:75
int pqueueInit(pastix_queue_t *, pastix_int_t)
Initialize the queue structure with an initial space to store the elements.
Definition: queue.c:81
Queue structure.
Definition: queue.h:38
void solverRecvExit(SolverMatrix *solvmtx)
Free the array linked to pending reception.
Definition: solver.c:563
void solverRequestExit(SolverMatrix *solvmtx)
Free the arrays related to the requests.
Definition: solver.c:481
void solverRecvInit(pastix_coefside_t side, SolverMatrix *solvmtx, pastix_coeftype_t flttype)
Allocate the reception buffer, and initiate the first persistant reception.
Definition: solver.c:522
void solverRequestInit(solve_step_t solve_step, SolverMatrix *solvmtx)
Instanciate the arrays for the requests according to the scheduler.
Definition: solver.c:433
void coeftab_cdump(pastix_data_t *pastix_data, const SolverMatrix *solvmtx, const char *filename)
Dump the solver matrix coefficients into a file in human readable format.
Definition: coeftab_c.c:55
void cpucblk_crequest_cleanup(pastix_coefside_t side, pastix_int_t sched, SolverMatrix *solvmtx)
Waitall routine for current cblk request.
int cpucblk_cincoming_deps(int rank, pastix_coefside_t side, SolverMatrix *solvmtx, SolverCblk *cblk)
Wait for incoming dependencies, and return when cblk->ctrbcnt has reached 0.
pastix_compress_when_t compress_when
void parsec_cpxtrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
Perform a sparse LL^t factorization using PaRSEC runtime.
@ PastixLCoef
Definition: api.h:478
@ PastixCompressNever
Definition: api.h:385
@ IPARM_SCHEDULER
Definition: api.h:117
@ PastixSchedStatic
Definition: api.h:335
@ PastixSchedDynamic
Definition: api.h:338
@ PastixSchedSequential
Definition: api.h:334
SolverMatrix * solvmatr
Definition: pastixdata.h:102
pastix_int_t * iparm
Definition: pastixdata.h:69
isched_t * isched
Definition: pastixdata.h:85
Main PaStiX data structure.
Definition: pastixdata.h:67
void starpu_cpxtrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
Perform a sparse LL^t factorization using StarPU runtime.
void sequential_cpxtrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
TODO.
void thread_cpxtrf_static(isched_thread_t *ctx, void *args)
TODO.
void dynamic_cpxtrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
TODO.
void thread_cpxtrf_dynamic(isched_thread_t *ctx, void *args)
TODO.
void static_cpxtrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
TODO.
void sopalin_cpxtrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
TODO.
pastix_int_t cblknum
Definition: solver.h:121
pastix_lr_t lowrank
Definition: solver.h:230
pastix_int_t priority
Definition: solver.h:177
static pastix_int_t stealQueue(SolverMatrix *solvmtx, int rank, int nbthreads)
Task stealing method.
Definition: solver.h:466
pastix_int_t cblknbr
Definition: solver.h:208
SolverBlok *restrict bloktab
Definition: solver.h:223
pastix_int_t volatile ctrbcnt
Definition: solver.h:123
int threadid
Definition: solver.h:176
SolverCblk *restrict cblktab
Definition: solver.h:222
int8_t cblktype
Definition: solver.h:159
Solver block structure.
Definition: solver.h:137
Solver column block structure.
Definition: solver.h:156
Solver column block structure.
Definition: solver.h:200
The task structure for the numerical factorization.
Definition: solver.h:118