PaStiX Handbook 6.4.0
Loading...
Searching...
No Matches
symbol_reorder.c
Go to the documentation of this file.
1/**
2 *
3 * @file symbol_reorder.c
4 *
5 * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
6 * Univ. Bordeaux. All rights reserved.
7 *
8 * @version 6.4.0
9 * @author Vincent Bridonneau
10 * @author Mathieu Faverge
11 * @author Tony Delarue
12 * @date 2024-07-05
13 *
14 * @precisions normal z -> s d c
15 *
16 **/
17#include "common.h"
18#include "blend/solver.h"
19#include "symbol/symbol.h"
20#include "symbol/symbol_reorder.h"
21#include "pastix/order.h"
22#include "queue.h"
23#include "blend/extendVector.h"
24
25/**
26 *******************************************************************************
27 *
28 * @ingroup symbol_dev_reordering
29 *
30 * @brief Sequential version for reordering
31 *
32 * This algorithm reorders cblks. (Sequential version).
33 *
34 *******************************************************************************
35 *
36 * @param[inout] pastix_data
37 * The pastix_data providing the scheduler, the symbolic structure,
38 * and the ordering providing by Scotch that will be updated with
39 * the new rows permutation for each supernode. It will also gives
40 * a stopping criterion the reordering of each supernode. The split_level
41 * field activates the split level heuristic, dividing distances
42 * computations into two stages: for upper and for lower
43 * contruibuting supernodes.
44 *
45 * @param[out] levels
46 * The pointer to the levels structure, giving the level of
47 * each supernode in the elimination tree. To be computed inside.
48 *
49 * @param[in] maxdepth
50 * The maximum depth in the elimination tree.
51 *
52 *******************************************************************************/
53static inline void
55 pastix_int_t maxdepth,
56 pastix_int_t *levels )
57{
58 symbol_matrix_t *symbptr = pastix_data->symbmtx;
59 symbol_cblk_t *cblk;
60 pastix_int_t cblknbr;
61 pastix_int_t *iparm = pastix_data->iparm;
62 pastix_order_t *order = pastix_data->ordemesh;
63 pastix_int_t itercblk;
64 pastix_int_t *depthweight;
65
66 cblk = symbptr->cblktab;
67 cblknbr = symbptr->cblknbr;
68
69 /*
70 * Solves the Traveler Salesman Problem on each cblk to minimize the number
71 * of off-diagonal blocks per row
72 */
73 MALLOC_INTERN( depthweight, maxdepth, pastix_int_t );
74
75 for (itercblk=0; itercblk<cblknbr; itercblk++, cblk++) {
76
77 if ( cblk->fcolnum >= symbptr->schurfcol )
78 continue;
79
80 memset( depthweight, 0, maxdepth * sizeof(pastix_int_t) );
81
82 symbol_reorder_cblk( symbptr, cblk, order,
83 levels,
84 depthweight, maxdepth,
86 iparm[IPARM_REORDERING_STOP] );
87 }
88
89 memFree_null( depthweight );
90}
91
92/**
93 *
94 * @brief The argument for reorder parallel functions data structure
95 *
96 * This structure provides parallel functions for reordering arguments
97 * to computes the same thing sequential functions do but in parallel.
98 *
99 */
100struct args_reorder_t
101{
102 pastix_data_t *pastix_data;
103 pastix_int_t maxdepth;
104 const pastix_int_t *levels;
105 ExtendVectorINT *tasktab;
106};
107
108/**
109 *******************************************************************************
110 *
111 * @ingroup symbol_dev_reordering
112 *
113 * @brief Parallel basic version for reordering
114 *
115 * This algorithm reorders cblks. Tasks are cut such as every thread
116 * has the same number of task. (Parallel version)
117 *
118 *******************************************************************************
119 *
120 * @param[in] ctx
121 * The context of the current thread. This provides information
122 * about total number of thread used and the rank of the current thread.
123 *
124 * @param[inout] args
125 * The argument for reordering functions.
126 *
127 *******************************************************************************/
128void
129thread_preorder_basic_stategy( isched_thread_t *ctx, void *args )
130{
131 struct args_reorder_t *arg = (struct args_reorder_t*)args;
132 pastix_data_t *pastix_data = arg->pastix_data;
133 symbol_matrix_t *symbptr = pastix_data->symbmtx;
134 symbol_cblk_t *cblk;
135 pastix_int_t *iparm = pastix_data->iparm;
136 pastix_order_t *order = pastix_data->ordemesh;
137 pastix_int_t maxdepth = arg->maxdepth;
138 pastix_int_t ii, cblknbr, tasknbr;
139 pastix_int_t *depthweight;
140 const pastix_int_t *levels = arg->levels;
141 pastix_int_t rank = (pastix_int_t)ctx->rank;
142 pastix_int_t size = (pastix_int_t)ctx->global_ctx->world_size;
143
144 cblknbr = symbptr->cblknbr;
145 tasknbr = cblknbr / size;
146
147 if (rank < (cblknbr % size)) {
148 tasknbr ++;
149 }
150
151 /*
152 * Solves the Traveler Salesman Problem on each cblk to minimize the number
153 * of off-diagonal blocks per row
154 */
155 MALLOC_INTERN( depthweight, maxdepth, pastix_int_t );
156
157 cblk = symbptr->cblktab + rank;
158 for (ii=0; ii<tasknbr; ii++, cblk += size) {
159
160 memset( depthweight, 0, maxdepth * sizeof(pastix_int_t) );
161
162 symbol_reorder_cblk( symbptr, cblk, order,
163 levels,
164 depthweight, maxdepth,
166 iparm[IPARM_REORDERING_STOP] );
167 }
168
169 memFree_null( depthweight );
170}
171
172/**
173 *******************************************************************************
174 *
175 * @ingroup symbol_dev_reordering
176 *
177 * @brief Parallel improved version for reordering
178 *
179 * This algorithm reorders cblks after ordering
180 * them according to their size. Priority queue are used
181 * to sort cblks increasing their cost and process decreasing
182 * their load. (Parallel version)
183 *
184 *******************************************************************************
185 *
186 * @param[in] ctx
187 * The context of the current thread. This provides information
188 * about total number of thread used and the rank of the current thread.
189 *
190 * @param[inout] args
191 * The argument for reordering functions.
192 *
193 *******************************************************************************/
194void
195thread_preorder_zigzag_stategy( isched_thread_t *ctx, void *args )
196{
197 struct args_reorder_t *arg = (struct args_reorder_t*)args;
198 pastix_data_t *pastix_data = arg->pastix_data;
199 symbol_matrix_t *symbptr = pastix_data->symbmtx;
200 symbol_cblk_t *cblk;
201 pastix_int_t *iparm = pastix_data->iparm;
202 pastix_order_t *order = pastix_data->ordemesh;
203 pastix_int_t maxdepth = arg->maxdepth;
204 pastix_int_t ii;
205 ExtendVectorINT *tasktab;
206 pastix_int_t tasknbr;
207 pastix_int_t *depthweight;
208 const pastix_int_t *levels = arg->levels;
209 pastix_int_t rank = (pastix_int_t)ctx->rank;
210
211 /*
212 * Solves the Traveler Salesman Problem on each cblk to minimize the number
213 * of off-diagonal blocks per row
214 */
215 MALLOC_INTERN( depthweight, maxdepth, pastix_int_t );
216
217 tasktab = arg->tasktab + rank;
218 tasknbr = extendint_Size( tasktab );
219
220 for ( ii=0; ii<tasknbr; ii++ ) {
221 cblk = symbptr->cblktab + extendint_Read(tasktab, ii);
222
223 memset( depthweight, 0, maxdepth * sizeof(pastix_int_t) );
224 symbol_reorder_cblk( symbptr, cblk, order,
225 levels,
226 depthweight, maxdepth,
228 iparm[IPARM_REORDERING_STOP] );
229 }
230
231 memFree_null( depthweight );
232}
233
234/**
235 *******************************************************************************
236 *
237 * @ingroup symbol_dev_reordering
238 *
239 * @brief Function called by each thread.
240 *
241 * This function select and call a strategy to reorder cblks.
242 * (Parallel version)
243 *
244 *******************************************************************************
245 *
246 * @param[in] ctx
247 * The context of the current thread. This provides information
248 * about total number of thread used and the rank of the current thread.
249 *
250 * @param[inout] args
251 * The argument for reordering functions.
252 *
253 *******************************************************************************/
254static inline void
255thread_preorder( isched_thread_t *ctx, void *args )
256{
257#ifdef BASIC_REORDERING_STRATEGY
258 thread_preorder_basic_stategy ( ctx, args );
259#else
261#endif
262}
263
264/**
265 *******************************************************************************
266 *
267 * @ingroup symbol_dev_reordering
268 *
269 * @brief Computes the cost of a cblk
270 *
271 * This function computes the value of a cblk using the
272 * formula : cost = a.(cblk->lcolcum - cblk->fcolnum + 1)^2
273 * where a = (cblk[1].bloknum - cblk[0].bloknum) / 2 + 1
274 * It represents the load (number of calcul) a process
275 * needs to reorder the cblk.
276 *
277 *******************************************************************************
278 *
279 * @param[in] cblk
280 * The cblk we want to compute the cost.
281 *
282 *******************************************************************************
283 *
284 * @retval TODO
285 *
286 *******************************************************************************/
287static inline double
289{
290 double n = (double)(cblk->lcolnum - cblk->fcolnum + 1);
291 return n*n * ((double)(cblk[1].bloknum - cblk[0].bloknum) / 2.0 + 1.0);
292}
293
294/**
295 *******************************************************************************
296 *
297 * @ingroup symbol_dev_reordering
298 *
299 * @brief Order cblks for each process
300 *
301 * This function sorts cblks increasing their cost
302 * and process decreasingly their load using priority queue.
303 * After that every process has a load nearly equal, except
304 * the first one wich has a greater load than others.
305 *
306 *******************************************************************************
307 *
308 * @param[in] ctx
309 * The context of the current thread. This provides information
310 * about total number of thread used and the rank of the current thread.
311 *
312 * @param[inout] args
313 * The argument for reordering functions.
314 *
315 *******************************************************************************/
316static inline void
317order_tasks( isched_t *ctx,
318 struct args_reorder_t *args )
319{
320 pastix_data_t *pastix_data = args->pastix_data;
321 symbol_matrix_t *symbmtx = pastix_data->symbmtx;
322 pastix_queue_t cblks;
323 pastix_queue_t procs;
324 pastix_int_t cblknbr = symbmtx->cblknbr;
325 pastix_int_t size = ctx->world_size;
326 pastix_int_t itercblk, iterproc;
327 pastix_int_t cblk_id, proc_id;
328 double cblk_cost, proc_cost;
329 symbol_cblk_t *cblk;
330
331 pqueueInit( &cblks, cblknbr );
332 pqueueInit( &procs, size );
333
334 /*
335 * Sort the cblks decreasing
336 */
337 cblk = symbmtx->cblktab;
338 for ( itercblk=0; itercblk < cblknbr; itercblk++, cblk++ ) {
339
340 if ( cblk->fcolnum >= symbmtx->schurfcol )
341 continue;
342
343 pqueuePush1( &cblks, itercblk, -cost(cblk) );
344 }
345
346 for ( iterproc=0; iterproc < size; ++iterproc) {
347 pqueuePush1( &procs, iterproc, 0. );
348 }
349
350 while ( pqueueSize( &cblks ) > 0 ) {
351 cblk_id = pqueuePop1( &cblks, &cblk_cost );
352 proc_id = pqueuePop1( &procs, &proc_cost );
353 proc_cost += -cblk_cost; // Negative because of reverse sort
354 pqueuePush1 ( &procs, proc_id, proc_cost );
355
356 extendint_Add( args->tasktab + proc_id, cblk_id );
357 }
358
359 pqueueExit( &cblks );
360 pqueueExit( &procs );
361}
362
363
364/**
365 *******************************************************************************
366 *
367 * @ingroup symbol_dev_reordering
368 *
369 * @brief Prepare arguments for parallel subroutines and order cblks.
370 *
371 * This function creates array for each process that countains cblks
372 * the process has to reorder. These cblks are provided by order_tasks
373 * functions. Afterwards, cblks are reordered by reordering functions.
374 *
375 *******************************************************************************
376 *
377 * @param[inout] pastix_data
378 * The pastix_data providing the scheduler, the symbolic structure,
379 * and the ordering providing by Scotch that will be updated with
380 * the new rows permutation for each supernode. It will also gives
381 * a stopping criterion the reordering of each supernode. The split_level
382 * field activates the split level heuristic, dividing distances
383 * computations into two stages: for upper and for lower
384 * contruibuting supernodes.
385 *
386 * @param[out] levels
387 * The pointer to the levels structure, giving the level of
388 * each supernode in the elimination tree. To be computed inside.
389 *
390 * @param[in] maxdepth
391 * The maximum depth in the elimination tree.
392 *
393 *******************************************************************************/
394static inline void
396 pastix_int_t maxdepth,
397 pastix_int_t *levels )
398{
399 struct args_reorder_t args_reorder = { pastix_data, maxdepth, levels, NULL };
400 pastix_int_t size = pastix_data->isched->world_size;
401 pastix_int_t iterproc;
402 pastix_int_t cblknbr = pastix_data->symbmtx->cblknbr;
403 pastix_int_t cblkavg = pastix_imax(1, (pastix_int_t)(cblknbr / size));
404
405 MALLOC_INTERN( args_reorder.tasktab, size, ExtendVectorINT );
406
407 for ( iterproc=0; iterproc < size; ++iterproc ) {
408 extendint_Init( args_reorder.tasktab + iterproc, cblkavg );
409 }
410 order_tasks( pastix_data->isched, &args_reorder );
411
412 isched_parallel_call( pastix_data->isched, thread_preorder, &args_reorder );
413
414 for ( iterproc=0; iterproc < size; ++iterproc ) {
415 extendint_Exit( args_reorder.tasktab + iterproc );
416 }
417
418 memFree_null( args_reorder.tasktab );
419}
420
421#ifndef DOXYGEN_SHOULD_SKIP_THIS
422static void (*reorder_table[5])(pastix_data_t *, pastix_int_t , pastix_int_t *) = {
425 NULL,
426 NULL,
428};
429#endif /* DOXYGEN_SHOULD_SKIP_THIS */
430
431/**
432 *******************************************************************************
433 *
434 * @ingroup symbol_dev_reordering
435 *
436 * @brief Reorder all node
437 *
438 * This function computes the set each cblks, and then call a TSP heuristic
439 * to minimize the Hamiltonian Path.
440 *
441 *******************************************************************************
442 *
443 * @param[inout] pastix_data
444 * The pastix_data providing the scheduler, the symbolic structure,
445 * and the ordering providing by Scotch that will be updated with
446 * the new rows permutation for each supernode. It will also gives
447 * a stopping criterion the reordering of each supernode. The split_level
448 * field activates the split level heuristic, dividing distances
449 * computations into two stages: for upper and for lower
450 * contruibuting supernodes.
451 *
452 * @param[out] levels
453 * The pointer to the levels structure, giving the level of
454 * each supernode in the elimination tree. To be computed inside.
455 *
456 * @param[in] maxdepth
457 * The maximum depth in the elimination tree.
458 *
459 *******************************************************************************/
460void
462 pastix_int_t maxdepth,
463 pastix_int_t *levels )
464{
465 int sched = pastix_data->iparm[IPARM_SCHEDULER];
466 void (*reorder)(pastix_data_t *, pastix_int_t , pastix_int_t * ) = reorder_table[ sched ];
467
468 if (reorder == NULL) {
469 reorder = thread_reorder;
470 }
471 reorder( pastix_data, maxdepth, levels );
472}
BEGIN_C_DECLS typedef int pastix_int_t
Definition datatypes.h:51
pastix_int_t extendint_Size(const ExtendVectorINT *)
Return the number of element stored in the vector.
pastix_int_t extendint_Read(const ExtendVectorINT *, pastix_int_t)
Return the element of index eltnum.
pastix_int_t * extendint_Init(ExtendVectorINT *, pastix_int_t)
Initialize the extendVector structure with the initial size given.
void extendint_Add(ExtendVectorINT *, pastix_int_t)
Add an element elt to the end of the vector.
void extendint_Exit(ExtendVectorINT *)
Free the extendVector structure.
The extend integer array structure.
static void pqueuePush1(pastix_queue_t *q, pastix_int_t elt, double key1)
Push an element with a single key.
Definition queue.h:64
void pqueueExit(pastix_queue_t *)
Free the structure associated to the queue.
Definition queue.c:110
pastix_int_t pqueueSize(const pastix_queue_t *)
Return the size of the queue.
Definition queue.c:135
static pastix_int_t pqueuePop1(pastix_queue_t *q, double *key1)
Pop the head of the queue and get the associated first key.
Definition queue.h:88
int pqueueInit(pastix_queue_t *, pastix_int_t)
Initialize the queue structure with an initial space to store the elements.
Definition queue.c:81
Queue structure.
Definition queue.h:38
@ IPARM_SCHEDULER
Definition api.h:117
@ IPARM_REORDERING_STOP
Definition api.h:79
@ IPARM_REORDERING_SPLIT
Definition api.h:78
Order structure.
Definition order.h:47
symbol_cblk_t * cblktab
Definition symbol.h:83
pastix_int_t schurfcol
Definition symbol.h:82
pastix_int_t fcolnum
Definition symbol.h:46
pastix_int_t lcolnum
Definition symbol.h:47
pastix_int_t cblknbr
Definition symbol.h:79
Symbol column block structure.
Definition symbol.h:45
Symbol matrix structure.
Definition symbol.h:77
pastix_order_t * ordemesh
Definition pastixdata.h:98
pastix_int_t * iparm
Definition pastixdata.h:70
isched_t * isched
Definition pastixdata.h:86
symbol_matrix_t * symbmtx
Definition pastixdata.h:100
Main PaStiX data structure.
Definition pastixdata.h:68
static void thread_reorder(pastix_data_t *pastix_data, pastix_int_t maxdepth, pastix_int_t *levels)
Prepare arguments for parallel subroutines and order cblks.
static void order_tasks(isched_t *ctx, struct args_reorder_t *args)
Order cblks for each process.
void thread_preorder_basic_stategy(isched_thread_t *ctx, void *args)
Parallel basic version for reordering.
static double cost(symbol_cblk_t *cblk)
Computes the cost of a cblk.
void symbol_reorder(pastix_data_t *pastix_data, pastix_int_t maxdepth, pastix_int_t *levels)
Reorder all node.
static void sequential_reorder(pastix_data_t *pastix_data, pastix_int_t maxdepth, pastix_int_t *levels)
Sequential version for reordering.
void thread_preorder_zigzag_stategy(isched_thread_t *ctx, void *args)
Parallel improved version for reordering.
void symbol_reorder_cblk(const symbol_matrix_t *symbptr, const symbol_cblk_t *cblk, pastix_order_t *order, const pastix_int_t *levels, pastix_int_t *depthweight, pastix_int_t depthmax, pastix_int_t split_level, pastix_int_t stop_criterion)
Reorder a supernode.
static void thread_preorder(isched_thread_t *ctx, void *args)
Function called by each thread.