PaStiX Handbook 6.4.0
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
sequential_ssytrf.c
Go to the documentation of this file.
1/**
2 *
3 * @file sequential_ssytrf.c
4 *
5 * @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
6 * Univ. Bordeaux. All rights reserved.
7 *
8 * @version 6.4.0
9 * @author Pascal Henon
10 * @author Xavier Lacoste
11 * @author Pierre Ramet
12 * @author Mathieu Faverge
13 * @author Esragul Korkmaz
14 * @author Tony Delarue
15 * @author Alycia Lisito
16 * @date 2024-07-05
17 *
18 * @generated from /builds/2mk6rsew/0/solverstack/pastix/sopalin/sequential_zsytrf.c, normal z -> s, Tue Feb 25 14:36:05 2025
19 *
20 **/
21#include "common.h"
22#include "isched.h"
23#include "blend/solver.h"
24#include "sopalin/sopalin_data.h"
25#include "sopalin/coeftab_s.h"
26#include "pastix_scores.h"
27
28#if defined(PASTIX_WITH_PARSEC)
30#endif
31
32#if defined(PASTIX_WITH_STARPU)
34#endif
35
36/**
37 *******************************************************************************
38 *
39 * @brief TODO
40 *
41 *******************************************************************************
42 *
43 * @param[in] pastix_data
44 * TODO
45 *
46 * @param[in] sopalin_data
47 * TODO
48 *
49 *******************************************************************************/
50void
52 sopalin_data_t *sopalin_data )
53{
54 SolverMatrix *datacode = pastix_data->solvmatr;
55 SolverCblk *cblk;
56 float *work1, *work2;
57 pastix_int_t N, i, lwork1, lwork2;
58 (void)sopalin_data;
59
60 lwork1 = datacode->offdmax;
61 lwork2 = pastix_imax( datacode->gemmmax, datacode->blokmax );
62 if ( (datacode->lowrank.compress_when != PastixCompressNever) &&
63 (datacode->lowrank.ilu_lvl < INT_MAX) )
64 {
65 lwork2 = pastix_imax( lwork2, 2 * datacode->blokmax );
66 }
67 MALLOC_INTERN( work1, lwork1, float );
68 MALLOC_INTERN( work2, lwork2, float );
69
70 cblk = datacode->cblktab;
71 for (i=0; i<datacode->cblknbr; i++, cblk++){
72 if ( cblk->cblktype & CBLK_IN_SCHUR ) {
73 break;
74 }
75
76 /* Wait for incoming dependencies */
78 datacode, cblk ) )
79 {
80 continue;
81 }
82
83 N = cblk_colnbr( cblk );
84
85 /* Compute */
86 cpucblk_ssytrfsp1d( datacode, cblk,
87 /*
88 * Workspace size has been computed without the
89 * diagonal block, thus in order to work with generic
90 * TRSM and GEMM kernels, we must shift the DLh workspace
91 * by the diagonal block size
92 */
93 work1 - (N*N), work2, lwork2 );
94 }
95
96 memFree_null( work1 );
97 memFree_null( work2 );
98}
99
100/**
101 *******************************************************************************
102 *
103 * @brief TODO
104 *
105 *******************************************************************************
106 *
107 * @param[in] ctx
108 * TODO
109 *
110 * @param[in] args
111 * TODO
112 *
113 *******************************************************************************/
114void
115thread_ssytrf_static( isched_thread_t *ctx,
116 void *args )
117{
118 sopalin_data_t *sopalin_data = (sopalin_data_t*)args;
119 SolverMatrix *datacode = sopalin_data->solvmtx;
120 SolverCblk *cblk;
121 Task *t;
122 float *work1, *work2;
123 pastix_int_t N, i, ii, lwork1, lwork2;
124 pastix_int_t tasknbr, *tasktab;
125 int rank = ctx->rank;
126
127 lwork1 = datacode->offdmax;
128 lwork2 = pastix_imax( datacode->gemmmax, datacode->blokmax );
129 if ( (datacode->lowrank.compress_when != PastixCompressNever) &&
130 (datacode->lowrank.ilu_lvl < INT_MAX) )
131 {
132 lwork2 = pastix_imax( lwork2, 2 * datacode->blokmax );
133 }
134 MALLOC_INTERN( work1, lwork1, float );
135 MALLOC_INTERN( work2, lwork2, float );
136
137 tasknbr = datacode->ttsknbr[rank];
138 tasktab = datacode->ttsktab[rank];
139
140 for (ii=0; ii<tasknbr; ii++) {
141 i = tasktab[ii];
142 t = datacode->tasktab + i;
143 cblk = datacode->cblktab + t->cblknum;
144
145 if ( cblk->cblktype & CBLK_IN_SCHUR ) {
146 continue;
147 }
148
149 /* Wait for incoming dependencies */
151 datacode, cblk ) )
152 {
153 continue;
154 }
155
156 N = cblk_colnbr( cblk );
157
158 /* Compute */
159 cpucblk_ssytrfsp1d( datacode, cblk,
160 /*
161 * Workspace size has been computed without the
162 * diagonal block, thus in order to work with generic
163 * TRSM and GEMM kernels, we must shift the DLh workspace
164 * by the diagonal block size
165 */
166 work1 - (N*N), work2, lwork2 );
167 }
168
169 memFree_null( work1 );
170 memFree_null( work2 );
171}
172
173/**
174 *******************************************************************************
175 *
176 * @brief TODO
177 *
178 *******************************************************************************
179 *
180 * @param[in] pastix_data
181 * TODO
182 *
183 * @param[in] sopalin_data
184 * TODO
185 *
186 *******************************************************************************/
187void
189 sopalin_data_t *sopalin_data )
190{
191 isched_parallel_call( pastix_data->isched, thread_ssytrf_static, sopalin_data );
192}
193
194/**
195 * @brief TODO
196 */
197struct args_ssytrf_t
198{
199 sopalin_data_t *sopalin_data;
200 volatile int32_t taskcnt;
201};
202
203/**
204 *******************************************************************************
205 *
206 * @brief TODO
207 *
208 *******************************************************************************
209 *
210 * @param[in] ctx
211 * TODO
212 *
213 * @param[in] args
214 * TODO
215 *
216 *******************************************************************************/
217void
218thread_ssytrf_dynamic( isched_thread_t *ctx,
219 void *args )
220{
221 struct args_ssytrf_t *arg = (struct args_ssytrf_t *)args;
222 sopalin_data_t *sopalin_data = arg->sopalin_data;
223 SolverMatrix *datacode = sopalin_data->solvmtx;
224 SolverCblk *cblk;
225 SolverBlok *blok;
226 Task *t;
227 pastix_queue_t *computeQueue;
228 float *work1, *work2;
229 pastix_int_t N, i, ii, lwork1, lwork2;
230 pastix_int_t tasknbr, *tasktab, cblknum, bloknum;
231 int32_t local_taskcnt = 0;
232 int rank = ctx->rank;
233
234 lwork1 = datacode->offdmax;
235 lwork2 = pastix_imax( datacode->gemmmax, datacode->blokmax );
236 if ( (datacode->lowrank.compress_when != PastixCompressNever) &&
237 (datacode->lowrank.ilu_lvl < INT_MAX) )
238 {
239 lwork2 = pastix_imax( lwork2, 2 * datacode->blokmax );
240 }
241 MALLOC_INTERN( work1, lwork1, float );
242 MALLOC_INTERN( work2, lwork2, float );
243 MALLOC_INTERN( datacode->computeQueue[rank], 1, pastix_queue_t );
244
245 tasknbr = datacode->ttsknbr[rank];
246 tasktab = datacode->ttsktab[rank];
247 computeQueue = datacode->computeQueue[rank];
248 pqueueInit( computeQueue, tasknbr );
249
250 /* Initialize the local task queue with available cblks */
251 for (ii=0; ii<tasknbr; ii++) {
252 i = tasktab[ii];
253 t = datacode->tasktab + i;
254
255 if ( !(t->ctrbcnt) ) {
256 cblk = datacode->cblktab + t->cblknum;
257 pqueuePush1( computeQueue, t->cblknum, cblk->priority );
258 }
259 }
260
261 /* Make sure that all computeQueues are allocated */
262 isched_barrier_wait( &(ctx->global_ctx->barrier) );
263
264 while( arg->taskcnt > 0 )
265 {
266 cblknum = pqueuePop(computeQueue);
267
268#if defined(PASTIX_WITH_MPI)
269 /* Nothing to do, let's make progress on communications */
270 if( cblknum == -1 ) {
271 cpucblk_smpi_progress( PastixLCoef, datacode, rank );
272 cblknum = pqueuePop( computeQueue );
273 }
274#endif
275
276 /* No more local job, let's steal our neighbors */
277 if( cblknum == -1 ) {
278 if ( local_taskcnt ) {
279 pastix_atomic_sub_32b( &(arg->taskcnt), local_taskcnt );
280 local_taskcnt = 0;
281 }
282 cblknum = stealQueue( datacode, rank,
283 ctx->global_ctx->world_size );
284 }
285
286 /* Still no job, let's loop again */
287 if ( cblknum == -1 ) {
288 continue;
289 }
290
291 if ( cblknum >= 0 ) {
292 cblk = datacode->cblktab + cblknum;
293 if ( cblk->cblktype & CBLK_IN_SCHUR ) {
294 continue;
295 }
296 cblk->threadid = rank;
297
298 N = cblk_colnbr( cblk );
299
300 /* Compute */
301 if ( cblk->cblktype & CBLK_TASKS_2D ) {
302 cpucblk_ssytrfsp1dplus( datacode, cblk );
303 }
304 else {
305 cpucblk_ssytrfsp1d( datacode, cblk,
306 /*
307 * Workspace size has been computed without the
308 * diagonal block, thus in order to work with generic
309 * TRSM and GEMM kernels, we must shift the DLh workspace
310 * by the diagonal block size
311 */
312 work1 - (N*N), work2, lwork2 );
313 }
314 }
315 else {
316 bloknum = - cblknum - 1;
317 blok = datacode->bloktab + bloknum;
318 cpucblk_ssytrfsp1dplus_update( datacode, blok, work2 );
319 }
320 local_taskcnt++;
321 }
322 memFree_null( work1 );
323 memFree_null( work2 );
324
325 /* Make sure that everyone is done before freeing */
326 isched_barrier_wait( &(ctx->global_ctx->barrier) );
327 pqueueExit( computeQueue );
328 memFree_null( computeQueue );
329}
330
331/**
332 *******************************************************************************
333 *
334 * @brief TODO
335 *
336 *******************************************************************************
337 *
338 * @param[in] pastix_data
339 * TODO
340 *
341 * @param[in] sopalin_data
342 * TODO
343 *
344 *******************************************************************************/
345void
347 sopalin_data_t *sopalin_data )
348{
349 SolverMatrix *datacode = sopalin_data->solvmtx;
350 int32_t taskcnt = datacode->tasknbr_1dp;
351 struct args_ssytrf_t args_ssytrf = { sopalin_data, taskcnt };
352
353 /* Allocate the computeQueue */
354 MALLOC_INTERN( datacode->computeQueue,
355 pastix_data->isched->world_size, pastix_queue_t * );
356
357 isched_parallel_call( pastix_data->isched, thread_ssytrf_dynamic, &args_ssytrf );
358
359 memFree_null( datacode->computeQueue );
360}
361
362#ifndef DOXYGEN_SHOULD_SKIP_THIS
363static void (*ssytrf_table[5])(pastix_data_t *, sopalin_data_t *) = {
366/* #if defined(PASTIX_WITH_PARSEC) */
367/* parsec_ssytrf, */
368/* #else */
369 NULL,
370/* #endif */
371#if defined(PASTIX_WITH_STARPU)
373#else
374 NULL,
375#endif
377};
378#endif /* DOXYGEN_SHOULD_SKIP_THIS */
379
380/**
381 *******************************************************************************
382 *
383 * @brief TODO
384 *
385 *******************************************************************************
386 *
387 * @param[in] pastix_data
388 * TODO
389 *
390 * @param[in] sopalin_data
391 * TODO
392 *
393 *******************************************************************************/
394void
396 sopalin_data_t *sopalin_data )
397{
398 int sched = pastix_data->iparm[IPARM_SCHEDULER];
399 void (*ssytrf)(pastix_data_t *, sopalin_data_t *) = ssytrf_table[ sched ];
400
401 if ( ssytrf == NULL ) {
402 sched = PastixSchedDynamic;
403 ssytrf = dynamic_ssytrf;
404 }
405
406 if ( (sched == PastixSchedSequential) ||
407 (sched == PastixSchedStatic) ||
408 (sched == PastixSchedDynamic) )
409 {
410 solverRequestInit( PastixFacto, sopalin_data->solvmtx );
411 solverRecvInit( PastixLCoef, sopalin_data->solvmtx, PastixFloat );
412 }
413
414 ssytrf( pastix_data, sopalin_data );
415
416 if ( (sched == PastixSchedSequential) ||
417 (sched == PastixSchedStatic) ||
418 (sched == PastixSchedDynamic) )
419 {
420 cpucblk_srequest_cleanup( PastixLCoef, sched, sopalin_data->solvmtx );
421 solverRequestExit( sopalin_data->solvmtx );
422 solverRecvExit( sopalin_data->solvmtx );
423 }
424
425#if defined(PASTIX_DEBUG_FACTO)
426 coeftab_sdump( pastix_data, sopalin_data->solvmtx, "sytrf" );
427#endif
428}
BEGIN_C_DECLS typedef int pastix_int_t
Definition datatypes.h:51
static void pqueuePush1(pastix_queue_t *q, pastix_int_t elt, double key1)
Push an element with a single key.
Definition queue.h:64
void pqueueExit(pastix_queue_t *)
Free the structure associated to the queue.
Definition queue.c:110
static pastix_int_t pqueuePop(pastix_queue_t *q)
Pop the head of the queue whithout returning the keys.
Definition queue.h:75
int pqueueInit(pastix_queue_t *, pastix_int_t)
Initialize the queue structure with an initial space to store the elements.
Definition queue.c:81
Queue structure.
Definition queue.h:38
void solverRecvExit(SolverMatrix *solvmtx)
Free the array linked to pending reception.
Definition solver.c:554
void solverRequestExit(SolverMatrix *solvmtx)
Free the arrays related to the requests.
Definition solver.c:472
void solverRecvInit(pastix_coefside_t side, SolverMatrix *solvmtx, pastix_coeftype_t flttype)
Allocate the reception buffer, and initiate the first persistant reception.
Definition solver.c:513
void solverRequestInit(solve_step_t solve_step, SolverMatrix *solvmtx)
Instanciate the arrays for the requests according to the scheduler.
Definition solver.c:424
void coeftab_sdump(pastix_data_t *pastix_data, const SolverMatrix *solvmtx, const char *filename)
Dump the solver matrix coefficients into a file in human readable format.
Definition coeftab_s.c:55
void cpucblk_ssytrfsp1dplus_update(SolverMatrix *solvmtx, SolverBlok *blok, float *work)
Apply the updates of the LDL^t factorisation of a given panel.
int cpucblk_ssytrfsp1dplus(SolverMatrix *solvmtx, SolverCblk *cblk)
Perform the LDL^t factorization of a given panel and submit tasks for the subsequent updates.
void cpucblk_srequest_cleanup(pastix_coefside_t side, pastix_int_t sched, SolverMatrix *solvmtx)
Waitall routine for current cblk request.
int cpucblk_ssytrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, float *DLt, float *work, pastix_int_t lwork)
Perform the LDL^t factorization of a given panel and apply all its updates.
int cpucblk_sincoming_deps(int rank, pastix_coefside_t side, SolverMatrix *solvmtx, SolverCblk *cblk)
Wait for incoming dependencies, and return when cblk->ctrbcnt has reached 0.
pastix_compress_when_t compress_when
@ PastixLCoef
Definition api.h:478
@ PastixCompressNever
Definition api.h:385
@ IPARM_SCHEDULER
Definition api.h:117
@ PastixSchedStatic
Definition api.h:335
@ PastixSchedDynamic
Definition api.h:338
@ PastixSchedSequential
Definition api.h:334
SolverMatrix * solvmatr
Definition pastixdata.h:103
pastix_int_t * iparm
Definition pastixdata.h:70
isched_t * isched
Definition pastixdata.h:86
Main PaStiX data structure.
Definition pastixdata.h:68
void starpu_ssytrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
Perform a sparse LDL^t factorization using StarPU runtime.
void sopalin_ssytrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
TODO.
void sequential_ssytrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
TODO.
void dynamic_ssytrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
TODO.
void thread_ssytrf_dynamic(isched_thread_t *ctx, void *args)
TODO.
void thread_ssytrf_static(isched_thread_t *ctx, void *args)
TODO.
void static_ssytrf(pastix_data_t *pastix_data, sopalin_data_t *sopalin_data)
TODO.
pastix_int_t cblknum
Definition solver.h:125
pastix_lr_t lowrank
Definition solver.h:236
pastix_int_t priority
Definition solver.h:183
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition solver.h:329
static pastix_int_t stealQueue(SolverMatrix *solvmtx, int rank, int nbthreads)
Task stealing method.
Definition solver.h:471
pastix_int_t cblknbr
Definition solver.h:211
SolverBlok *restrict bloktab
Definition solver.h:229
pastix_int_t volatile ctrbcnt
Definition solver.h:127
SolverCblk *restrict cblktab
Definition solver.h:228
int8_t cblktype
Definition solver.h:164
Solver block structure.
Definition solver.h:141
Solver column block structure.
Definition solver.h:161
Solver column block structure.
Definition solver.h:203
The task structure for the numerical factorization.
Definition solver.h:122