PaStiX Handbook 6.4.0
Loading...
Searching...
No Matches
sequential_sgetrf.c
Go to the documentation of this file.
1/**
2 *
3 * @file sequential_sgetrf.c
4 *
5 * @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
6 * Univ. Bordeaux. All rights reserved.
7 *
8 * @version 6.4.0
9 * @author Pascal Henon
10 * @author Xavier Lacoste
11 * @author Pierre Ramet
12 * @author Mathieu Faverge
13 * @author Esragul Korkmaz
14 * @author Gregoire Pichon
15 * @author Tony Delarue
16 * @date 2024-07-05
17 *
18 * @generated from /builds/2mk6rsew/0/solverstack/pastix/sopalin/sequential_zgetrf.c, normal z -> s, Tue Feb 25 14:36:05 2025
19 *
20 **/
21#include "common.h"
22#include "isched.h"
23#include "blend/solver.h"
24#include "sopalin/sopalin_data.h"
25#include "sopalin/coeftab_s.h"
26#include "pastix_scores.h"
27
28#if defined(PASTIX_WITH_PARSEC)
30#endif
31
32#if defined(PASTIX_WITH_STARPU)
34#endif
35
36/**
37 *******************************************************************************
38 *
39 * @brief TODO
40 *
41 *******************************************************************************
42 *
43 * @param[in] pastix_data
44 * TODO
45 *
46 * @param[in] sopalin_data
47 * TODO
48 *
49 *******************************************************************************/
50void
52 sopalin_data_t *sopalin_data )
53{
54 SolverMatrix *datacode = pastix_data->solvmatr;
55 SolverCblk *cblk;
56 float *work;
57 pastix_int_t i, lwork;
58 (void)sopalin_data;
59
60 lwork = datacode->gemmmax;
61 if ( (datacode->lowrank.compress_when != PastixCompressNever) &&
62 (datacode->lowrank.ilu_lvl < INT_MAX) )
63 {
64 lwork = pastix_imax( lwork, 2 * datacode->blokmax );
65 }
66 MALLOC_INTERN( work, lwork, float );
67
68 cblk = datacode->cblktab;
69 for (i=0; i<datacode->cblknbr; i++, cblk++){
70 if ( cblk->cblktype & CBLK_IN_SCHUR ) {
71 break;
72 }
73
74 /* Wait for incoming dependencies */
76 datacode, cblk ) )
77 {
78 continue;
79 }
80
81 /* Compute */
82 cpucblk_sgetrfsp1d( datacode, cblk,
83 work, lwork );
84 }
85
86 memFree_null( work );
87}
88
89/**
90 *******************************************************************************
91 *
92 * @brief TODO
93 *
94 *******************************************************************************
95 *
96 * @param[in] ctx
97 * TODO
98 *
99 * @param[in] args
100 * TODO
101 *
102 *******************************************************************************/
103void
104thread_sgetrf_static( isched_thread_t *ctx,
105 void *args )
106{
107 sopalin_data_t *sopalin_data = (sopalin_data_t*)args;
108 SolverMatrix *datacode = sopalin_data->solvmtx;
109 SolverCblk *cblk;
110 Task *t;
111 float *work;
112 pastix_int_t i, ii, lwork;
113 pastix_int_t tasknbr, *tasktab;
114 int rank = ctx->rank;
115
116 lwork = datacode->gemmmax;
117 if ( (datacode->lowrank.compress_when != PastixCompressNever) &&
118 (datacode->lowrank.ilu_lvl < INT_MAX) )
119 {
120 lwork = pastix_imax( lwork, 2 * datacode->blokmax );
121 }
122 MALLOC_INTERN( work, lwork, float );
123
124 tasknbr = datacode->ttsknbr[rank];
125 tasktab = datacode->ttsktab[rank];
126
127 for (ii=0; ii<tasknbr; ii++) {
128 i = tasktab[ii];
129 t = datacode->tasktab + i;
130 cblk = datacode->cblktab + t->cblknum;
131
132 if ( cblk->cblktype & CBLK_IN_SCHUR ) {
133 continue;
134 }
135
136 /* Wait for incoming dependencies */
138 datacode, cblk ) )
139 {
140 continue;
141 }
142
143 /* Compute */
144 cpucblk_sgetrfsp1d( datacode, cblk,
145 work, lwork );
146 }
147
148 memFree_null( work );
149}
150
151/**
152 *******************************************************************************
153 *
154 * @brief TODO
155 *
156 *******************************************************************************
157 *
158 * @param[in] pastix_data
159 * TODO
160 *
161 * @param[in] sopalin_data
162 * TODO
163 *
164 *******************************************************************************/
165void
167 sopalin_data_t *sopalin_data )
168{
169 isched_parallel_call( pastix_data->isched, thread_sgetrf_static, sopalin_data );
170}
171
172/**
173 * @brief TODO
174 */
175struct args_sgetrf_t
176{
177 sopalin_data_t *sopalin_data;
178 volatile int32_t taskcnt;
179};
180
181/**
182 *******************************************************************************
183 *
184 * @brief TODO
185 *
186 *******************************************************************************
187 *
188 * @param[in] ctx
189 * TODO
190 *
191 * @param[in] args
192 * TODO
193 *
194 *******************************************************************************/
195void
196thread_sgetrf_dynamic( isched_thread_t *ctx,
197 void *args )
198{
199 struct args_sgetrf_t *arg = (struct args_sgetrf_t*)args;
200 sopalin_data_t *sopalin_data = arg->sopalin_data;
201 SolverMatrix *datacode = sopalin_data->solvmtx;
202 SolverCblk *cblk;
203 SolverBlok *blok;
204 Task *t;
205 pastix_queue_t *computeQueue;
206 float *work;
207 pastix_int_t i, ii, lwork;
208 pastix_int_t tasknbr, *tasktab, cblknum, bloknum;
209 int32_t local_taskcnt = 0;
210 int rank = ctx->rank;
211
212 lwork = datacode->gemmmax;
213 if ( (datacode->lowrank.compress_when != PastixCompressNever) &&
214 (datacode->lowrank.ilu_lvl < INT_MAX) )
215 {
216 lwork = pastix_imax( lwork, 2 * datacode->blokmax );
217 }
218 MALLOC_INTERN( work, lwork, float );
219 MALLOC_INTERN( datacode->computeQueue[rank], 1, pastix_queue_t );
220
221 tasknbr = datacode->ttsknbr[rank];
222 tasktab = datacode->ttsktab[rank];
223 computeQueue = datacode->computeQueue[rank];
224 pqueueInit( computeQueue, tasknbr );
225
226 /* Initialize the local task queue with available cblks */
227 for (ii=0; ii<tasknbr; ii++) {
228 i = tasktab[ii];
229 t = datacode->tasktab + i;
230
231 if ( !(t->ctrbcnt) ) {
232 cblk = datacode->cblktab + t->cblknum;
233 pqueuePush1( computeQueue, t->cblknum, cblk->priority );
234 }
235 }
236
237 /* Make sure that all computeQueues are allocated */
238 isched_barrier_wait( &(ctx->global_ctx->barrier) );
239
240 while( arg->taskcnt > 0 )
241 {
242 cblknum = pqueuePop( computeQueue );
243
244#if defined(PASTIX_WITH_MPI)
245 /* Nothing to do, let's make progress on communications */
246 if( cblknum == -1 ) {
247 cpucblk_smpi_progress( PastixLUCoef, datacode, rank );
248 cblknum = pqueuePop( computeQueue );
249 }
250#endif
251
252 /* No more local job, let's steal our neighbors */
253 if( cblknum == -1 ) {
254 if ( local_taskcnt ) {
255 pastix_atomic_sub_32b( &(arg->taskcnt), local_taskcnt );
256 local_taskcnt = 0;
257 }
258 cblknum = stealQueue( datacode, rank,
259 ctx->global_ctx->world_size );
260 }
261
262 /* Still no job, let's loop again */
263 if ( cblknum == -1 ) {
264 continue;
265 }
266
267 if ( cblknum >= 0 ) {
268 cblk = datacode->cblktab + cblknum;
269 if ( cblk->cblktype & CBLK_IN_SCHUR ) {
270 continue;
271 }
272 cblk->threadid = rank;
273
274 /* Compute */
275 if ( cblk->cblktype & CBLK_TASKS_2D ) {
276 cpucblk_sgetrfsp1dplus( datacode, cblk );
277 }
278 else {
279 cpucblk_sgetrfsp1d( datacode, cblk,
280 work, lwork );
281 }
282 }
283 else {
284 bloknum = - cblknum - 1;
285 blok = datacode->bloktab + bloknum;
286 cpucblk_sgetrfsp1dplus_update( datacode, blok, work, lwork );
287 }
288 local_taskcnt++;
289 }
290 memFree_null( work );
291
292 /* Make sure that everyone is done before freeing */
293 isched_barrier_wait( &(ctx->global_ctx->barrier) );
294 pqueueExit( computeQueue );
295 memFree_null( computeQueue );
296}
297
298/**
299 *******************************************************************************
300 *
301 * @brief TODO
302 *
303 *******************************************************************************
304 *
305 * @param[in] pastix_data
306 * TODO
307 *
308 * @param[in] sopalin_data
309 * TODO
310 *
311 *******************************************************************************/
312void
314 sopalin_data_t *sopalin_data )
315{
316 SolverMatrix *datacode = sopalin_data->solvmtx;
317 int32_t taskcnt = datacode->tasknbr_1dp;
318 struct args_sgetrf_t args_sgetrf = { sopalin_data, taskcnt };
319
320 /* Allocate the computeQueue */
321 MALLOC_INTERN( datacode->computeQueue,
322 pastix_data->isched->world_size, pastix_queue_t * );
323
324 isched_parallel_call( pastix_data->isched, thread_sgetrf_dynamic, &args_sgetrf );
325
326 memFree_null( datacode->computeQueue );
327}
328
329#ifndef DOXYGEN_SHOULD_SKIP_THIS
330static void (*sgetrf_table[5])(pastix_data_t *, sopalin_data_t *) = {
333#if defined(PASTIX_WITH_PARSEC)
335#else
336 NULL,
337#endif
338#if defined(PASTIX_WITH_STARPU)
340#else
341 NULL,
342#endif
344};
345#endif /* DOXYGEN_SHOULD_SKIP_THIS */
346
347/**
348 *******************************************************************************
349 *
350 * @brief TODO
351 *
352 *******************************************************************************
353 *
354 * @param[in] pastix_data
355 * TODO
356 *
357 * @param[in] sopalin_data
358 * TODO
359 *
360 *******************************************************************************/
361void
363 sopalin_data_t *sopalin_data )
364{
365 int sched = pastix_data->iparm[IPARM_SCHEDULER];
366 void (*sgetrf)(pastix_data_t *, sopalin_data_t *) = sgetrf_table[ sched ];
367
368 if (sgetrf == NULL) {
369 sched = PastixSchedDynamic;
370 sgetrf = dynamic_sgetrf;
371 }
372
373 if ( (sched == PastixSchedSequential) ||
374 (sched == PastixSchedStatic) ||
375 (sched == PastixSchedDynamic) )
376 {
377 solverRequestInit( PastixFacto, sopalin_data->solvmtx );
378 solverRecvInit( PastixLUCoef, sopalin_data->solvmtx, PastixFloat );
379 }
380
381 sgetrf( pastix_data, sopalin_data );
382
383 if ( (sched == PastixSchedSequential) ||
384 (sched == PastixSchedStatic) ||
385 (sched == PastixSchedDynamic) )
386 {
387 cpucblk_srequest_cleanup( PastixLUCoef, sched, sopalin_data->solvmtx );
388 solverRequestExit( sopalin_data->solvmtx );
389 solverRecvExit( sopalin_data->solvmtx );
390 }
391
392#if defined(PASTIX_DEBUG_FACTO)
393 coeftab_sdump( pastix_data, sopalin_data->solvmtx, "getrf" );
394#endif
395}
BEGIN_C_DECLS typedef int pastix_int_t
Definition datatypes.h:51
static void pqueuePush1(pastix_queue_t *q, pastix_int_t elt, double key1)
Push an element with a single key.
Definition queue.h:64
void pqueueExit(pastix_queue_t *)
Free the structure associated to the queue.
Definition queue.c:110
static pastix_int_t pqueuePop(pastix_queue_t *q)
Pop the head of the queue whithout returning the keys.
Definition queue.h:75
int pqueueInit(pastix_queue_t *, pastix_int_t)
Initialize the queue structure with an initial space to store the elements.
Definition queue.c:81
Queue structure.
Definition queue.h:38
void solverRecvExit(SolverMatrix *solvmtx)
Free the array linked to pending reception.
Definition solver.c:554
void solverRequestExit(SolverMatrix *solvmtx)
Free the arrays related to the requests.
Definition solver.c:472
void solverRecvInit(pastix_coefside_t side, SolverMatrix *solvmtx, pastix_coeftype_t flttype)
Allocate the reception buffer, and initiate the first persistant reception.
Definition solver.c:513
void solverRequestInit(solve_step_t solve_step, SolverMatrix *solvmtx)
Instanciate the arrays for the requests according to the scheduler.
Definition solver.c:424
void coeftab_sdump(pastix_data_t *pastix_data, const SolverMatrix *solvmtx, const char *filename)
Dump the solver matrix coefficients into a file in human readable format.
Definition coeftab_s.c:55
void cpucblk_sgetrfsp1dplus_update(SolverMatrix *solvmtx, SolverBlok *blok, float *work, pastix_int_t lwork)
Apply the updates of the LU factorisation of a given panel.
int cpucblk_sgetrfsp1dplus(SolverMatrix *solvmtx, SolverCblk *cblk)
Perform the LU factorization of a given panel and submit tasks for the subsequent updates.
void cpucblk_srequest_cleanup(pastix_coefside_t side, pastix_int_t sched, SolverMatrix *solvmtx)
Waitall routine for current cblk request.
int cpucblk_sgetrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, float *work, pastix_int_t lwork)
Perform the LU factorization of a given panel and apply all its updates.
int cpucblk_sincoming_deps(int rank, pastix_coefside_t side, SolverMatrix *solvmtx, SolverCblk *cblk)
Wait for incoming dependencies, and return when cblk->ctrbcnt has reached 0.
pastix_compress_when_t compress_when
void parsec_sgetrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
Perform a sparse LU factorization using PaRSEC runtime.
@ PastixLUCoef
Definition api.h:480
@ PastixCompressNever
Definition api.h:385
@ IPARM_SCHEDULER
Definition api.h:117
@ PastixSchedStatic
Definition api.h:335
@ PastixSchedDynamic
Definition api.h:338
@ PastixSchedSequential
Definition api.h:334
SolverMatrix * solvmatr
Definition pastixdata.h:103
pastix_int_t * iparm
Definition pastixdata.h:70
isched_t * isched
Definition pastixdata.h:86
Main PaStiX data structure.
Definition pastixdata.h:68
void starpu_sgetrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
Perform a sparse LU factorization using StarPU runtime.
void sequential_sgetrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
TODO.
void thread_sgetrf_dynamic(isched_thread_t *ctx, void *args)
TODO.
void dynamic_sgetrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
TODO.
void static_sgetrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
TODO.
void sopalin_sgetrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
TODO.
void thread_sgetrf_static(isched_thread_t *ctx, void *args)
TODO.
pastix_int_t cblknum
Definition solver.h:125
pastix_lr_t lowrank
Definition solver.h:236
pastix_int_t priority
Definition solver.h:183
static pastix_int_t stealQueue(SolverMatrix *solvmtx, int rank, int nbthreads)
Task stealing method.
Definition solver.h:471
pastix_int_t cblknbr
Definition solver.h:211
SolverBlok *restrict bloktab
Definition solver.h:229
pastix_int_t volatile ctrbcnt
Definition solver.h:127
SolverCblk *restrict cblktab
Definition solver.h:228
int8_t cblktype
Definition solver.h:164
Solver block structure.
Definition solver.h:141
Solver column block structure.
Definition solver.h:161
Solver column block structure.
Definition solver.h:203
The task structure for the numerical factorization.
Definition solver.h:122