PaStiX Handbook  6.3.2
sequential_chetrf.c
Go to the documentation of this file.
1 /**
2  *
3  * @file sequential_chetrf.c
4  *
5  * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
6  * Univ. Bordeaux. All rights reserved.
7  *
8  * @version 6.3.2
9  * @author Pascal Henon
10  * @author Xavier Lacoste
11  * @author Pierre Ramet
12  * @author Mathieu Faverge
13  * @author Esragul Korkmaz
14  * @author Tony Delarue
15  * @author Alycia Lisito
16  * @date 2023-11-07
17  *
18  * @generated from /builds/solverstack/pastix/sopalin/sequential_zhetrf.c, normal z -> c, Wed Dec 13 12:09:47 2023
19  *
20  **/
21 #include "common.h"
22 #include "isched.h"
23 #include "blend/solver.h"
24 #include "sopalin/sopalin_data.h"
25 #include "sopalin/coeftab_c.h"
26 #include "pastix_ccores.h"
27 
28 #if defined(PASTIX_WITH_PARSEC)
29 #include "parsec/pastix_cparsec.h"
30 #endif
31 
32 #if defined(PASTIX_WITH_STARPU)
33 #include "starpu/pastix_cstarpu.h"
34 #endif
35 
36 /**
37  *******************************************************************************
38  *
39  * @brief TODO
40  *
41  *******************************************************************************
42  *
43  * @param[in] pastix_data
44  * TODO
45  *
46  * @param[in] sopalin_data
47  * TODO
48  *
49  *******************************************************************************/
50 void
52  sopalin_data_t *sopalin_data )
53 {
54  SolverMatrix *datacode = pastix_data->solvmatr;
55  SolverCblk *cblk;
56  pastix_complex32_t *work1, *work2;
57  pastix_int_t N, i, lwork1, lwork2;
58  (void)sopalin_data;
59 
60  lwork1 = datacode->offdmax;
61  lwork2 = pastix_imax( datacode->gemmmax, datacode->blokmax );
62  if ( (datacode->lowrank.compress_when != PastixCompressNever) &&
63  (datacode->lowrank.ilu_lvl < INT_MAX) )
64  {
65  lwork2 = pastix_imax( lwork2, 2 * datacode->blokmax );
66  }
67  MALLOC_INTERN( work1, lwork1, pastix_complex32_t );
68  MALLOC_INTERN( work2, lwork2, pastix_complex32_t );
69 
70  cblk = datacode->cblktab;
71  for (i=0; i<datacode->cblknbr; i++, cblk++){
72  if ( cblk->cblktype & CBLK_IN_SCHUR ) {
73  break;
74  }
75 
76  /* Wait for incoming dependencies */
78  datacode, cblk ) )
79  {
80  continue;
81  }
82 
83  N = cblk_colnbr( cblk );
84 
85  /* Compute */
86  cpucblk_chetrfsp1d( datacode, cblk,
87  /*
88  * Workspace size has been computed without the
89  * diagonal block, thus in order to work with generic
90  * TRSM and GEMM kernels, we must shift the DLh workspace
91  * by the diagonal block size
92  */
93  work1 - (N*N), work2, lwork2 );
94  }
95 
96  memFree_null( work1 );
97  memFree_null( work2 );
98 }
99 
100 /**
101  *******************************************************************************
102  *
103  * @brief TODO
104  *
105  *******************************************************************************
106  *
107  * @param[in] ctx
108  * TODO
109  *
110  * @param[in] args
111  * TODO
112  *
113  *******************************************************************************/
114 void
115 thread_chetrf_static( isched_thread_t *ctx,
116  void *args )
117 {
118  sopalin_data_t *sopalin_data = (sopalin_data_t*)args;
119  SolverMatrix *datacode = sopalin_data->solvmtx;
120  SolverCblk *cblk;
121  Task *t;
122  pastix_complex32_t *work1, *work2;
123  pastix_int_t N, i, ii, lwork1, lwork2;
124  pastix_int_t tasknbr, *tasktab;
125  int rank = ctx->rank;
126 
127  lwork1 = datacode->offdmax;
128  lwork2 = pastix_imax( datacode->gemmmax, datacode->blokmax );
129  if ( (datacode->lowrank.compress_when != PastixCompressNever) &&
130  (datacode->lowrank.ilu_lvl < INT_MAX) )
131  {
132  lwork2 = pastix_imax( lwork2, 2 * datacode->blokmax );
133  }
134  MALLOC_INTERN( work1, lwork1, pastix_complex32_t );
135  MALLOC_INTERN( work2, lwork2, pastix_complex32_t );
136 
137  tasknbr = datacode->ttsknbr[rank];
138  tasktab = datacode->ttsktab[rank];
139 
140  for (ii=0; ii<tasknbr; ii++) {
141  i = tasktab[ii];
142  t = datacode->tasktab + i;
143  cblk = datacode->cblktab + t->cblknum;
144 
145  if ( cblk->cblktype & CBLK_IN_SCHUR ) {
146  continue;
147  }
148 
149  /* Wait for incoming dependencies */
151  datacode, cblk ) )
152  {
153  continue;
154  }
155 
156  N = cblk_colnbr( cblk );
157 
158  /* Compute */
159  cpucblk_chetrfsp1d( datacode, cblk,
160  /*
161  * Workspace size has been computed without the
162  * diagonal block, thus in order to work with generic
163  * TRSM and GEMM kernels, we must shift the DLh workspace
164  * by the diagonal block size
165  */
166  work1 - (N*N), work2, lwork2 );
167  }
168 
169  memFree_null( work1 );
170  memFree_null( work2 );
171 }
172 
173 /**
174  *******************************************************************************
175  *
176  * @brief TODO
177  *
178  *******************************************************************************
179  *
180  * @param[in] pastix_data
181  * TODO
182  *
183  * @param[in] sopalin_data
184  * TODO
185  *
186  *******************************************************************************/
187 void
189  sopalin_data_t *sopalin_data )
190 {
191  isched_parallel_call( pastix_data->isched, thread_chetrf_static, sopalin_data );
192 }
193 
194 /**
195  * @brief TODO
196  */
197 struct args_chetrf_t
198 {
199  sopalin_data_t *sopalin_data;
200  volatile int32_t taskcnt;
201 };
202 
203 /**
204  *******************************************************************************
205  *
206  * @brief TODO
207  *
208  *******************************************************************************
209  *
210  * @param[in] ctx
211  * TODO
212  *
213  * @param[in] args
214  * TODO
215  *
216  *******************************************************************************/
217 void
218 thread_chetrf_dynamic( isched_thread_t *ctx,
219  void *args )
220 {
221  struct args_chetrf_t *arg = (struct args_chetrf_t *)args;
222  sopalin_data_t *sopalin_data = arg->sopalin_data;
223  SolverMatrix *datacode = sopalin_data->solvmtx;
224  SolverCblk *cblk;
225  SolverBlok *blok;
226  Task *t;
227  pastix_queue_t *computeQueue;
228  pastix_complex32_t *work1, *work2;
229  pastix_int_t N, i, ii, lwork1, lwork2;
230  pastix_int_t tasknbr, *tasktab, cblknum, bloknum;
231  int32_t local_taskcnt = 0;
232  int rank = ctx->rank;
233 
234  lwork1 = datacode->offdmax;
235  lwork2 = pastix_imax( datacode->gemmmax, datacode->blokmax );
236  if ( (datacode->lowrank.compress_when != PastixCompressNever) &&
237  (datacode->lowrank.ilu_lvl < INT_MAX) )
238  {
239  lwork2 = pastix_imax( lwork2, 2 * datacode->blokmax );
240  }
241  MALLOC_INTERN( work1, lwork1, pastix_complex32_t );
242  MALLOC_INTERN( work2, lwork2, pastix_complex32_t );
243  MALLOC_INTERN( datacode->computeQueue[rank], 1, pastix_queue_t );
244 
245  tasknbr = datacode->ttsknbr[rank];
246  tasktab = datacode->ttsktab[rank];
247  computeQueue = datacode->computeQueue[rank];
248  pqueueInit( computeQueue, tasknbr );
249 
250  /* Initialize the local task queue with available cblks */
251  for (ii=0; ii<tasknbr; ii++) {
252  i = tasktab[ii];
253  t = datacode->tasktab + i;
254 
255  if ( !(t->ctrbcnt) ) {
256  cblk = datacode->cblktab + t->cblknum;
257  pqueuePush1( computeQueue, t->cblknum, cblk->priority );
258  }
259  }
260 
261  /* Make sure that all computeQueues are allocated */
262  isched_barrier_wait( &(ctx->global_ctx->barrier) );
263 
264  while( arg->taskcnt > 0 )
265  {
266  cblknum = pqueuePop(computeQueue);
267 
268 #if defined(PASTIX_WITH_MPI)
269  /* Nothing to do, let's make progress on communications */
270  if( cblknum == -1 ) {
271  cpucblk_cmpi_progress( PastixLCoef, datacode, rank );
272  cblknum = pqueuePop( computeQueue );
273  }
274 #endif
275 
276  /* No more local job, let's steal our neighbors */
277  if( cblknum == -1 ) {
278  if ( local_taskcnt ) {
279  pastix_atomic_sub_32b( &(arg->taskcnt), local_taskcnt );
280  local_taskcnt = 0;
281  }
282  cblknum = stealQueue( datacode, rank,
283  ctx->global_ctx->world_size );
284  }
285 
286  /* Still no job, let's loop again */
287  if ( cblknum == -1 ) {
288  continue;
289  }
290 
291  if ( cblknum >= 0 ) {
292  cblk = datacode->cblktab + cblknum;
293  if ( cblk->cblktype & CBLK_IN_SCHUR ) {
294  continue;
295  }
296  cblk->threadid = rank;
297 
298  N = cblk_colnbr( cblk );
299 
300  /* Compute */
301  if ( cblk->cblktype & CBLK_TASKS_2D ) {
302  cpucblk_chetrfsp1dplus( datacode, cblk );
303  }
304  else {
305  cpucblk_chetrfsp1d( datacode, cblk,
306  /*
307  * Workspace size has been computed without the
308  * diagonal block, thus in order to work with generic
309  * TRSM and GEMM kernels, we must shift the DLh workspace
310  * by the diagonal block size
311  */
312  work1 - (N*N), work2, lwork2 );
313  }
314  }
315  else {
316  bloknum = - cblknum - 1;
317  blok = datacode->bloktab + bloknum;
318  cpucblk_chetrfsp1dplus_update( datacode, blok, work2 );
319  }
320  local_taskcnt++;
321  }
322  memFree_null( work1 );
323  memFree_null( work2 );
324 
325  /* Make sure that everyone is done before freeing */
326  isched_barrier_wait( &(ctx->global_ctx->barrier) );
327  pqueueExit( computeQueue );
328  memFree_null( computeQueue );
329 }
330 
331 /**
332  *******************************************************************************
333  *
334  * @brief TODO
335  *
336  *******************************************************************************
337  *
338  * @param[in] pastix_data
339  * TODO
340  *
341  * @param[in] sopalin_data
342  * TODO
343  *
344  *******************************************************************************/
345 void
347  sopalin_data_t *sopalin_data )
348 {
349  SolverMatrix *datacode = sopalin_data->solvmtx;
350  int32_t taskcnt = datacode->tasknbr_1dp;
351  struct args_chetrf_t args_chetrf = { sopalin_data, taskcnt };
352 
353  /* Allocate the computeQueue */
354  MALLOC_INTERN( datacode->computeQueue,
355  pastix_data->isched->world_size, pastix_queue_t * );
356 
357  isched_parallel_call( pastix_data->isched, thread_chetrf_dynamic, &args_chetrf );
358 
359  memFree_null( datacode->computeQueue );
360 }
361 
362 #ifndef DOXYGEN_SHOULD_SKIP_THIS
363 /*
364  * Need to uncomment
365  */
366 static void (*chetrf_table[5])(pastix_data_t *, sopalin_data_t *) = {
369 /* #if defined(PASTIX_WITH_PARSEC) */
370 /* parsec_chetrf, */
371 /* #else */
372  NULL,
373 /* #endif */
374 #if defined(PASTIX_WITH_STARPU)
376 #else
377  NULL,
378 #endif
380 };
381 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
382 
383 /**
384  *******************************************************************************
385  *
386  * @brief TODO
387  *
388  *******************************************************************************
389  *
390  * @param[in] pastix_data
391  * TODO
392  *
393  * @param[in] sopalin_data
394  * TODO
395  *
396  *******************************************************************************/
397 void
399  sopalin_data_t *sopalin_data )
400 {
401  int sched = pastix_data->iparm[IPARM_SCHEDULER];
402  void (*chetrf)(pastix_data_t *, sopalin_data_t *) = chetrf_table[ sched ];
403 
404  if ( chetrf == NULL ) {
405  sched = PastixSchedDynamic;
406  chetrf = dynamic_chetrf;
407  }
408 
409  if ( (sched == PastixSchedSequential) ||
410  (sched == PastixSchedStatic) ||
411  (sched == PastixSchedDynamic) )
412  {
413  solverRequestInit( PastixFacto, sopalin_data->solvmtx );
414  solverRecvInit( PastixLCoef, sopalin_data->solvmtx, PastixComplex32 );
415  }
416 
417  chetrf( pastix_data, sopalin_data );
418 
419  if ( (sched == PastixSchedSequential) ||
420  (sched == PastixSchedStatic) ||
421  (sched == PastixSchedDynamic) )
422  {
423  cpucblk_crequest_cleanup( PastixLCoef, sched, sopalin_data->solvmtx );
424  solverRequestExit( sopalin_data->solvmtx );
425  solverRecvExit( sopalin_data->solvmtx );
426  }
427 
428 #if defined(PASTIX_DEBUG_FACTO)
429  coeftab_cdump( pastix_data, sopalin_data->solvmtx, "hetrf" );
430 #endif
431 }
int cpucblk_chetrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex32_t *DLh, pastix_complex32_t *work, pastix_int_t lwork)
Perform the LDL^h factorization of a given panel and apply all its updates.
void cpucblk_chetrfsp1dplus_update(SolverMatrix *solvmtx, SolverBlok *blok, pastix_complex32_t *work)
Apply the updates of the LDL^h factorisation of a given panel.
int cpucblk_chetrfsp1dplus(SolverMatrix *solvmtx, SolverCblk *cblk)
Perform the LDL^h factorization of a given panel and submit tasks for the subsequent updates.
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
float _Complex pastix_complex32_t
Definition: datatypes.h:76
static void pqueuePush1(pastix_queue_t *q, pastix_int_t elt, double key1)
Push an element with a single key.
Definition: queue.h:64
void pqueueExit(pastix_queue_t *)
Free the structure associated to the queue.
Definition: queue.c:110
static pastix_int_t pqueuePop(pastix_queue_t *q)
Pop the head of the queue whithout returning the keys.
Definition: queue.h:75
int pqueueInit(pastix_queue_t *, pastix_int_t)
Initialize the queue structure with an initial space to store the elements.
Definition: queue.c:81
Queue structure.
Definition: queue.h:38
void solverRecvExit(SolverMatrix *solvmtx)
Free the array linked to pending reception.
Definition: solver.c:563
void solverRequestExit(SolverMatrix *solvmtx)
Free the arrays related to the requests.
Definition: solver.c:481
void solverRecvInit(pastix_coefside_t side, SolverMatrix *solvmtx, pastix_coeftype_t flttype)
Allocate the reception buffer, and initiate the first persistant reception.
Definition: solver.c:522
void solverRequestInit(solve_step_t solve_step, SolverMatrix *solvmtx)
Instanciate the arrays for the requests according to the scheduler.
Definition: solver.c:433
void coeftab_cdump(pastix_data_t *pastix_data, const SolverMatrix *solvmtx, const char *filename)
Dump the solver matrix coefficients into a file in human readable format.
Definition: coeftab_c.c:55
void cpucblk_crequest_cleanup(pastix_coefside_t side, pastix_int_t sched, SolverMatrix *solvmtx)
Waitall routine for current cblk request.
int cpucblk_cincoming_deps(int rank, pastix_coefside_t side, SolverMatrix *solvmtx, SolverCblk *cblk)
Wait for incoming dependencies, and return when cblk->ctrbcnt has reached 0.
pastix_compress_when_t compress_when
@ PastixLCoef
Definition: api.h:478
@ PastixCompressNever
Definition: api.h:385
@ IPARM_SCHEDULER
Definition: api.h:117
@ PastixSchedStatic
Definition: api.h:335
@ PastixSchedDynamic
Definition: api.h:338
@ PastixSchedSequential
Definition: api.h:334
SolverMatrix * solvmatr
Definition: pastixdata.h:102
pastix_int_t * iparm
Definition: pastixdata.h:69
isched_t * isched
Definition: pastixdata.h:85
Main PaStiX data structure.
Definition: pastixdata.h:67
void starpu_chetrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
Perform a sparse LDL^h factorization using StarPU runtime.
void dynamic_chetrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
TODO.
void static_chetrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
TODO.
void sequential_chetrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
TODO.
void thread_chetrf_static(isched_thread_t *ctx, void *args)
TODO.
void sopalin_chetrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
TODO.
void thread_chetrf_dynamic(isched_thread_t *ctx, void *args)
TODO.
pastix_int_t cblknum
Definition: solver.h:121
pastix_lr_t lowrank
Definition: solver.h:230
pastix_int_t priority
Definition: solver.h:177
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition: solver.h:324
static pastix_int_t stealQueue(SolverMatrix *solvmtx, int rank, int nbthreads)
Task stealing method.
Definition: solver.h:466
pastix_int_t cblknbr
Definition: solver.h:208
SolverBlok *restrict bloktab
Definition: solver.h:223
pastix_int_t volatile ctrbcnt
Definition: solver.h:123
int threadid
Definition: solver.h:176
SolverCblk *restrict cblktab
Definition: solver.h:222
int8_t cblktype
Definition: solver.h:159
Solver block structure.
Definition: solver.h:137
Solver column block structure.
Definition: solver.h:156
Solver column block structure.
Definition: solver.h:200
The task structure for the numerical factorization.
Definition: solver.h:118