PaStiX Handbook  6.4.0
sequential_sgetrf.c
Go to the documentation of this file.
1 /**
2  *
3  * @file sequential_sgetrf.c
4  *
5  * @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
6  * Univ. Bordeaux. All rights reserved.
7  *
8  * @version 6.4.0
9  * @author Pascal Henon
10  * @author Xavier Lacoste
11  * @author Pierre Ramet
12  * @author Mathieu Faverge
13  * @author Esragul Korkmaz
14  * @author Gregoire Pichon
15  * @author Tony Delarue
16  * @date 2024-07-05
17  *
18  * @generated from /builds/solverstack/pastix/sopalin/sequential_zgetrf.c, normal z -> s, Thu Aug 29 14:20:52 2024
19  *
20  **/
21 #include "common.h"
22 #include "isched.h"
23 #include "blend/solver.h"
24 #include "sopalin/sopalin_data.h"
25 #include "sopalin/coeftab_s.h"
26 #include "pastix_scores.h"
27 
28 #if defined(PASTIX_WITH_PARSEC)
29 #include "parsec/pastix_sparsec.h"
30 #endif
31 
32 #if defined(PASTIX_WITH_STARPU)
33 #include "starpu/pastix_sstarpu.h"
34 #endif
35 
36 /**
37  *******************************************************************************
38  *
39  * @brief TODO
40  *
41  *******************************************************************************
42  *
43  * @param[in] pastix_data
44  * TODO
45  *
46  * @param[in] sopalin_data
47  * TODO
48  *
49  *******************************************************************************/
50 void
52  sopalin_data_t *sopalin_data )
53 {
54  SolverMatrix *datacode = pastix_data->solvmatr;
55  SolverCblk *cblk;
56  float *work;
57  pastix_int_t i, lwork;
58  (void)sopalin_data;
59 
60  lwork = datacode->gemmmax;
61  if ( (datacode->lowrank.compress_when != PastixCompressNever) &&
62  (datacode->lowrank.ilu_lvl < INT_MAX) )
63  {
64  lwork = pastix_imax( lwork, 2 * datacode->blokmax );
65  }
66  MALLOC_INTERN( work, lwork, float );
67 
68  cblk = datacode->cblktab;
69  for (i=0; i<datacode->cblknbr; i++, cblk++){
70  if ( cblk->cblktype & CBLK_IN_SCHUR ) {
71  break;
72  }
73 
74  /* Wait for incoming dependencies */
76  datacode, cblk ) )
77  {
78  continue;
79  }
80 
81  /* Compute */
82  cpucblk_sgetrfsp1d( datacode, cblk,
83  work, lwork );
84  }
85 
86  memFree_null( work );
87 }
88 
89 /**
90  *******************************************************************************
91  *
92  * @brief TODO
93  *
94  *******************************************************************************
95  *
96  * @param[in] ctx
97  * TODO
98  *
99  * @param[in] args
100  * TODO
101  *
102  *******************************************************************************/
103 void
104 thread_sgetrf_static( isched_thread_t *ctx,
105  void *args )
106 {
107  sopalin_data_t *sopalin_data = (sopalin_data_t*)args;
108  SolverMatrix *datacode = sopalin_data->solvmtx;
109  SolverCblk *cblk;
110  Task *t;
111  float *work;
112  pastix_int_t i, ii, lwork;
113  pastix_int_t tasknbr, *tasktab;
114  int rank = ctx->rank;
115 
116  lwork = datacode->gemmmax;
117  if ( (datacode->lowrank.compress_when != PastixCompressNever) &&
118  (datacode->lowrank.ilu_lvl < INT_MAX) )
119  {
120  lwork = pastix_imax( lwork, 2 * datacode->blokmax );
121  }
122  MALLOC_INTERN( work, lwork, float );
123 
124  tasknbr = datacode->ttsknbr[rank];
125  tasktab = datacode->ttsktab[rank];
126 
127  for (ii=0; ii<tasknbr; ii++) {
128  i = tasktab[ii];
129  t = datacode->tasktab + i;
130  cblk = datacode->cblktab + t->cblknum;
131 
132  if ( cblk->cblktype & CBLK_IN_SCHUR ) {
133  continue;
134  }
135 
136  /* Wait for incoming dependencies */
138  datacode, cblk ) )
139  {
140  continue;
141  }
142 
143  /* Compute */
144  cpucblk_sgetrfsp1d( datacode, cblk,
145  work, lwork );
146  }
147 
148  memFree_null( work );
149 }
150 
151 /**
152  *******************************************************************************
153  *
154  * @brief TODO
155  *
156  *******************************************************************************
157  *
158  * @param[in] pastix_data
159  * TODO
160  *
161  * @param[in] sopalin_data
162  * TODO
163  *
164  *******************************************************************************/
165 void
167  sopalin_data_t *sopalin_data )
168 {
169  isched_parallel_call( pastix_data->isched, thread_sgetrf_static, sopalin_data );
170 }
171 
172 /**
173  * @brief TODO
174  */
175 struct args_sgetrf_t
176 {
177  sopalin_data_t *sopalin_data;
178  volatile int32_t taskcnt;
179 };
180 
181 /**
182  *******************************************************************************
183  *
184  * @brief TODO
185  *
186  *******************************************************************************
187  *
188  * @param[in] ctx
189  * TODO
190  *
191  * @param[in] args
192  * TODO
193  *
194  *******************************************************************************/
195 void
196 thread_sgetrf_dynamic( isched_thread_t *ctx,
197  void *args )
198 {
199  struct args_sgetrf_t *arg = (struct args_sgetrf_t*)args;
200  sopalin_data_t *sopalin_data = arg->sopalin_data;
201  SolverMatrix *datacode = sopalin_data->solvmtx;
202  SolverCblk *cblk;
203  SolverBlok *blok;
204  Task *t;
205  pastix_queue_t *computeQueue;
206  float *work;
207  pastix_int_t i, ii, lwork;
208  pastix_int_t tasknbr, *tasktab, cblknum, bloknum;
209  int32_t local_taskcnt = 0;
210  int rank = ctx->rank;
211 
212  lwork = datacode->gemmmax;
213  if ( (datacode->lowrank.compress_when != PastixCompressNever) &&
214  (datacode->lowrank.ilu_lvl < INT_MAX) )
215  {
216  lwork = pastix_imax( lwork, 2 * datacode->blokmax );
217  }
218  MALLOC_INTERN( work, lwork, float );
219  MALLOC_INTERN( datacode->computeQueue[rank], 1, pastix_queue_t );
220 
221  tasknbr = datacode->ttsknbr[rank];
222  tasktab = datacode->ttsktab[rank];
223  computeQueue = datacode->computeQueue[rank];
224  pqueueInit( computeQueue, tasknbr );
225 
226  /* Initialize the local task queue with available cblks */
227  for (ii=0; ii<tasknbr; ii++) {
228  i = tasktab[ii];
229  t = datacode->tasktab + i;
230 
231  if ( !(t->ctrbcnt) ) {
232  cblk = datacode->cblktab + t->cblknum;
233  pqueuePush1( computeQueue, t->cblknum, cblk->priority );
234  }
235  }
236 
237  /* Make sure that all computeQueues are allocated */
238  isched_barrier_wait( &(ctx->global_ctx->barrier) );
239 
240  while( arg->taskcnt > 0 )
241  {
242  cblknum = pqueuePop( computeQueue );
243 
244 #if defined(PASTIX_WITH_MPI)
245  /* Nothing to do, let's make progress on communications */
246  if( cblknum == -1 ) {
247  cpucblk_smpi_progress( PastixLUCoef, datacode, rank );
248  cblknum = pqueuePop( computeQueue );
249  }
250 #endif
251 
252  /* No more local job, let's steal our neighbors */
253  if( cblknum == -1 ) {
254  if ( local_taskcnt ) {
255  pastix_atomic_sub_32b( &(arg->taskcnt), local_taskcnt );
256  local_taskcnt = 0;
257  }
258  cblknum = stealQueue( datacode, rank,
259  ctx->global_ctx->world_size );
260  }
261 
262  /* Still no job, let's loop again */
263  if ( cblknum == -1 ) {
264  continue;
265  }
266 
267  if ( cblknum >= 0 ) {
268  cblk = datacode->cblktab + cblknum;
269  if ( cblk->cblktype & CBLK_IN_SCHUR ) {
270  continue;
271  }
272  cblk->threadid = rank;
273 
274  /* Compute */
275  if ( cblk->cblktype & CBLK_TASKS_2D ) {
276  cpucblk_sgetrfsp1dplus( datacode, cblk );
277  }
278  else {
279  cpucblk_sgetrfsp1d( datacode, cblk,
280  work, lwork );
281  }
282  }
283  else {
284  bloknum = - cblknum - 1;
285  blok = datacode->bloktab + bloknum;
286  cpucblk_sgetrfsp1dplus_update( datacode, blok, work, lwork );
287  }
288  local_taskcnt++;
289  }
290  memFree_null( work );
291 
292  /* Make sure that everyone is done before freeing */
293  isched_barrier_wait( &(ctx->global_ctx->barrier) );
294  pqueueExit( computeQueue );
295  memFree_null( computeQueue );
296 }
297 
298 /**
299  *******************************************************************************
300  *
301  * @brief TODO
302  *
303  *******************************************************************************
304  *
305  * @param[in] pastix_data
306  * TODO
307  *
308  * @param[in] sopalin_data
309  * TODO
310  *
311  *******************************************************************************/
312 void
314  sopalin_data_t *sopalin_data )
315 {
316  SolverMatrix *datacode = sopalin_data->solvmtx;
317  int32_t taskcnt = datacode->tasknbr_1dp;
318  struct args_sgetrf_t args_sgetrf = { sopalin_data, taskcnt };
319 
320  /* Allocate the computeQueue */
321  MALLOC_INTERN( datacode->computeQueue,
322  pastix_data->isched->world_size, pastix_queue_t * );
323 
324  isched_parallel_call( pastix_data->isched, thread_sgetrf_dynamic, &args_sgetrf );
325 
326  memFree_null( datacode->computeQueue );
327 }
328 
329 #ifndef DOXYGEN_SHOULD_SKIP_THIS
330 static void (*sgetrf_table[5])(pastix_data_t *, sopalin_data_t *) = {
333 #if defined(PASTIX_WITH_PARSEC)
335 #else
336  NULL,
337 #endif
338 #if defined(PASTIX_WITH_STARPU)
340 #else
341  NULL,
342 #endif
344 };
345 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
346 
347 /**
348  *******************************************************************************
349  *
350  * @brief TODO
351  *
352  *******************************************************************************
353  *
354  * @param[in] pastix_data
355  * TODO
356  *
357  * @param[in] sopalin_data
358  * TODO
359  *
360  *******************************************************************************/
361 void
363  sopalin_data_t *sopalin_data )
364 {
365  int sched = pastix_data->iparm[IPARM_SCHEDULER];
366  void (*sgetrf)(pastix_data_t *, sopalin_data_t *) = sgetrf_table[ sched ];
367 
368  if (sgetrf == NULL) {
369  sched = PastixSchedDynamic;
370  sgetrf = dynamic_sgetrf;
371  }
372 
373  if ( (sched == PastixSchedSequential) ||
374  (sched == PastixSchedStatic) ||
375  (sched == PastixSchedDynamic) )
376  {
377  solverRequestInit( PastixFacto, sopalin_data->solvmtx );
378  solverRecvInit( PastixLUCoef, sopalin_data->solvmtx, PastixFloat );
379  }
380 
381  sgetrf( pastix_data, sopalin_data );
382 
383  if ( (sched == PastixSchedSequential) ||
384  (sched == PastixSchedStatic) ||
385  (sched == PastixSchedDynamic) )
386  {
387  cpucblk_srequest_cleanup( PastixLUCoef, sched, sopalin_data->solvmtx );
388  solverRequestExit( sopalin_data->solvmtx );
389  solverRecvExit( sopalin_data->solvmtx );
390  }
391 
392 #if defined(PASTIX_DEBUG_FACTO)
393  coeftab_sdump( pastix_data, sopalin_data->solvmtx, "getrf" );
394 #endif
395 }
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
static void pqueuePush1(pastix_queue_t *q, pastix_int_t elt, double key1)
Push an element with a single key.
Definition: queue.h:64
void pqueueExit(pastix_queue_t *)
Free the structure associated to the queue.
Definition: queue.c:110
static pastix_int_t pqueuePop(pastix_queue_t *q)
Pop the head of the queue whithout returning the keys.
Definition: queue.h:75
int pqueueInit(pastix_queue_t *, pastix_int_t)
Initialize the queue structure with an initial space to store the elements.
Definition: queue.c:81
Queue structure.
Definition: queue.h:38
void solverRecvExit(SolverMatrix *solvmtx)
Free the array linked to pending reception.
Definition: solver.c:554
void solverRequestExit(SolverMatrix *solvmtx)
Free the arrays related to the requests.
Definition: solver.c:472
void solverRecvInit(pastix_coefside_t side, SolverMatrix *solvmtx, pastix_coeftype_t flttype)
Allocate the reception buffer, and initiate the first persistant reception.
Definition: solver.c:513
void solverRequestInit(solve_step_t solve_step, SolverMatrix *solvmtx)
Instanciate the arrays for the requests according to the scheduler.
Definition: solver.c:424
void coeftab_sdump(pastix_data_t *pastix_data, const SolverMatrix *solvmtx, const char *filename)
Dump the solver matrix coefficients into a file in human readable format.
Definition: coeftab_s.c:55
void cpucblk_sgetrfsp1dplus_update(SolverMatrix *solvmtx, SolverBlok *blok, float *work, pastix_int_t lwork)
Apply the updates of the LU factorisation of a given panel.
int cpucblk_sgetrfsp1dplus(SolverMatrix *solvmtx, SolverCblk *cblk)
Perform the LU factorization of a given panel and submit tasks for the subsequent updates.
void cpucblk_srequest_cleanup(pastix_coefside_t side, pastix_int_t sched, SolverMatrix *solvmtx)
Waitall routine for current cblk request.
int cpucblk_sgetrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, float *work, pastix_int_t lwork)
Perform the LU factorization of a given panel and apply all its updates.
int cpucblk_sincoming_deps(int rank, pastix_coefside_t side, SolverMatrix *solvmtx, SolverCblk *cblk)
Wait for incoming dependencies, and return when cblk->ctrbcnt has reached 0.
pastix_compress_when_t compress_when
void parsec_sgetrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
Perform a sparse LU factorization using PaRSEC runtime.
@ PastixLUCoef
Definition: api.h:480
@ PastixCompressNever
Definition: api.h:385
@ IPARM_SCHEDULER
Definition: api.h:117
@ PastixSchedStatic
Definition: api.h:335
@ PastixSchedDynamic
Definition: api.h:338
@ PastixSchedSequential
Definition: api.h:334
SolverMatrix * solvmatr
Definition: pastixdata.h:103
pastix_int_t * iparm
Definition: pastixdata.h:70
isched_t * isched
Definition: pastixdata.h:86
Main PaStiX data structure.
Definition: pastixdata.h:68
void starpu_sgetrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
Perform a sparse LU factorization using StarPU runtime.
void sequential_sgetrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
TODO.
void thread_sgetrf_dynamic(isched_thread_t *ctx, void *args)
TODO.
void dynamic_sgetrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
TODO.
void static_sgetrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
TODO.
void sopalin_sgetrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
TODO.
void thread_sgetrf_static(isched_thread_t *ctx, void *args)
TODO.
pastix_int_t cblknum
Definition: solver.h:125
pastix_lr_t lowrank
Definition: solver.h:236
pastix_int_t priority
Definition: solver.h:183
static pastix_int_t stealQueue(SolverMatrix *solvmtx, int rank, int nbthreads)
Task stealing method.
Definition: solver.h:471
pastix_int_t cblknbr
Definition: solver.h:211
SolverBlok *restrict bloktab
Definition: solver.h:229
pastix_int_t volatile ctrbcnt
Definition: solver.h:127
int threadid
Definition: solver.h:182
SolverCblk *restrict cblktab
Definition: solver.h:228
int8_t cblktype
Definition: solver.h:164
Solver block structure.
Definition: solver.h:141
Solver column block structure.
Definition: solver.h:161
Solver column block structure.
Definition: solver.h:203
The task structure for the numerical factorization.
Definition: solver.h:122