PaStiX Handbook  6.4.0
symbol_reorder.c
Go to the documentation of this file.
1 /**
2  *
3  * @file symbol_reorder.c
4  *
5  * @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
6  * Univ. Bordeaux. All rights reserved.
7  *
8  * @version 6.4.0
9  * @author Vincent Bridonneau
10  * @author Mathieu Faverge
11  * @author Tony Delarue
12  * @date 2024-07-05
13  *
14  * @precisions normal z -> s d c
15  *
16  **/
17 #include "common.h"
18 #include "blend/solver.h"
19 #include "symbol/symbol.h"
20 #include "symbol/symbol_reorder.h"
21 #include "pastix/order.h"
22 #include "queue.h"
23 #include "blend/extendVector.h"
24 
25 /**
26  *******************************************************************************
27  *
28  * @ingroup symbol_dev_reordering
29  *
30  * @brief Sequential version for reordering
31  *
32  * This algorithm reorders cblks. (Sequential version).
33  *
34  *******************************************************************************
35  *
36  * @param[inout] pastix_data
37  * The pastix_data providing the scheduler, the symbolic structure,
38  * and the ordering providing by Scotch that will be updated with
39  * the new rows permutation for each supernode. It will also gives
40  * a stopping criterion the reordering of each supernode. The split_level
41  * field activates the split level heuristic, dividing distances
42  * computations into two stages: for upper and for lower
43  * contruibuting supernodes.
44  *
45  * @param[out] levels
46  * The pointer to the levels structure, giving the level of
47  * each supernode in the elimination tree. To be computed inside.
48  *
49  * @param[in] maxdepth
50  * The maximum depth in the elimination tree.
51  *
52  *******************************************************************************/
53 static inline void
55  pastix_int_t maxdepth,
56  pastix_int_t *levels )
57 {
58  symbol_matrix_t *symbptr = pastix_data->symbmtx;
59  symbol_cblk_t *cblk;
60  pastix_int_t cblknbr;
61  pastix_int_t *iparm = pastix_data->iparm;
62  pastix_order_t *order = pastix_data->ordemesh;
63  pastix_int_t itercblk;
64  pastix_int_t *depthweight;
65 
66  cblk = symbptr->cblktab;
67  cblknbr = symbptr->cblknbr;
68 
69  /*
70  * Solves the Traveler Salesman Problem on each cblk to minimize the number
71  * of off-diagonal blocks per row
72  */
73  MALLOC_INTERN( depthweight, maxdepth, pastix_int_t );
74 
75  for (itercblk=0; itercblk<cblknbr; itercblk++, cblk++) {
76 
77  if ( cblk->fcolnum >= symbptr->schurfcol )
78  continue;
79 
80  memset( depthweight, 0, maxdepth * sizeof(pastix_int_t) );
81 
82  symbol_reorder_cblk( symbptr, cblk, order,
83  levels,
84  depthweight, maxdepth,
86  iparm[IPARM_REORDERING_STOP] );
87  }
88 
89  memFree_null( depthweight );
90 }
91 
92 /**
93  *
94  * @brief The argument for reorder parallel functions data structure
95  *
96  * This structure provides parallel functions for reordering arguments
97  * to computes the same thing sequential functions do but in parallel.
98  *
99  */
100 struct args_reorder_t
101 {
102  pastix_data_t *pastix_data;
103  pastix_int_t maxdepth;
104  const pastix_int_t *levels;
105  ExtendVectorINT *tasktab;
106 };
107 
108 /**
109  *******************************************************************************
110  *
111  * @ingroup symbol_dev_reordering
112  *
113  * @brief Parallel basic version for reordering
114  *
115  * This algorithm reorders cblks. Tasks are cut such as every thread
116  * has the same number of task. (Parallel version)
117  *
118  *******************************************************************************
119  *
120  * @param[in] ctx
121  * The context of the current thread. This provides information
122  * about total number of thread used and the rank of the current thread.
123  *
124  * @param[inout] args
125  * The argument for reordering functions.
126  *
127  *******************************************************************************/
128 void
129 thread_preorder_basic_stategy( isched_thread_t *ctx, void *args )
130 {
131  struct args_reorder_t *arg = (struct args_reorder_t*)args;
132  pastix_data_t *pastix_data = arg->pastix_data;
133  symbol_matrix_t *symbptr = pastix_data->symbmtx;
134  symbol_cblk_t *cblk;
135  pastix_int_t *iparm = pastix_data->iparm;
136  pastix_order_t *order = pastix_data->ordemesh;
137  pastix_int_t maxdepth = arg->maxdepth;
138  pastix_int_t ii, cblknbr, tasknbr;
139  pastix_int_t *depthweight;
140  const pastix_int_t *levels = arg->levels;
141  pastix_int_t rank = (pastix_int_t)ctx->rank;
142  pastix_int_t size = (pastix_int_t)ctx->global_ctx->world_size;
143 
144  cblknbr = symbptr->cblknbr;
145  tasknbr = cblknbr / size;
146 
147  if (rank < (cblknbr % size)) {
148  tasknbr ++;
149  }
150 
151  /*
152  * Solves the Traveler Salesman Problem on each cblk to minimize the number
153  * of off-diagonal blocks per row
154  */
155  MALLOC_INTERN( depthweight, maxdepth, pastix_int_t );
156 
157  cblk = symbptr->cblktab + rank;
158  for (ii=0; ii<tasknbr; ii++, cblk += size) {
159 
160  memset( depthweight, 0, maxdepth * sizeof(pastix_int_t) );
161 
162  symbol_reorder_cblk( symbptr, cblk, order,
163  levels,
164  depthweight, maxdepth,
165  iparm[IPARM_REORDERING_SPLIT],
166  iparm[IPARM_REORDERING_STOP] );
167  }
168 
169  memFree_null( depthweight );
170 }
171 
172 /**
173  *******************************************************************************
174  *
175  * @ingroup symbol_dev_reordering
176  *
177  * @brief Parallel improved version for reordering
178  *
179  * This algorithm reorders cblks after ordering
180  * them according to their size. Priority queue are used
181  * to sort cblks increasing their cost and process decreasing
182  * their load. (Parallel version)
183  *
184  *******************************************************************************
185  *
186  * @param[in] ctx
187  * The context of the current thread. This provides information
188  * about total number of thread used and the rank of the current thread.
189  *
190  * @param[inout] args
191  * The argument for reordering functions.
192  *
193  *******************************************************************************/
194 void
195 thread_preorder_zigzag_stategy( isched_thread_t *ctx, void *args )
196 {
197  struct args_reorder_t *arg = (struct args_reorder_t*)args;
198  pastix_data_t *pastix_data = arg->pastix_data;
199  symbol_matrix_t *symbptr = pastix_data->symbmtx;
200  symbol_cblk_t *cblk;
201  pastix_int_t *iparm = pastix_data->iparm;
202  pastix_order_t *order = pastix_data->ordemesh;
203  pastix_int_t maxdepth = arg->maxdepth;
204  pastix_int_t ii;
205  ExtendVectorINT *tasktab;
206  pastix_int_t tasknbr;
207  pastix_int_t *depthweight;
208  const pastix_int_t *levels = arg->levels;
209  pastix_int_t rank = (pastix_int_t)ctx->rank;
210 
211  /*
212  * Solves the Traveler Salesman Problem on each cblk to minimize the number
213  * of off-diagonal blocks per row
214  */
215  MALLOC_INTERN( depthweight, maxdepth, pastix_int_t );
216 
217  tasktab = arg->tasktab + rank;
218  tasknbr = extendint_Size( tasktab );
219 
220  for ( ii=0; ii<tasknbr; ii++ ) {
221  cblk = symbptr->cblktab + extendint_Read(tasktab, ii);
222 
223  memset( depthweight, 0, maxdepth * sizeof(pastix_int_t) );
224  symbol_reorder_cblk( symbptr, cblk, order,
225  levels,
226  depthweight, maxdepth,
227  iparm[IPARM_REORDERING_SPLIT],
228  iparm[IPARM_REORDERING_STOP] );
229  }
230 
231  memFree_null( depthweight );
232 }
233 
234 /**
235  *******************************************************************************
236  *
237  * @ingroup symbol_dev_reordering
238  *
239  * @brief Function called by each thread.
240  *
241  * This function select and call a strategy to reorder cblks.
242  * (Parallel version)
243  *
244  *******************************************************************************
245  *
246  * @param[in] ctx
247  * The context of the current thread. This provides information
248  * about total number of thread used and the rank of the current thread.
249  *
250  * @param[inout] args
251  * The argument for reordering functions.
252  *
253  *******************************************************************************/
254 static inline void
255 thread_preorder( isched_thread_t *ctx, void *args )
256 {
257 #ifdef BASIC_REORDERING_STRATEGY
258  thread_preorder_basic_stategy ( ctx, args );
259 #else
260  thread_preorder_zigzag_stategy( ctx, args );
261 #endif
262 }
263 
264 /**
265  *******************************************************************************
266  *
267  * @ingroup symbol_dev_reordering
268  *
269  * @brief Computes the cost of a cblk
270  *
271  * This function computes the value of a cblk using the
272  * formula : cost = a.(cblk->lcolcum - cblk->fcolnum + 1)^2
273  * where a = (cblk[1].bloknum - cblk[0].bloknum) / 2 + 1
274  * It represents the load (number of calcul) a process
275  * needs to reorder the cblk.
276  *
277  *******************************************************************************
278  *
279  * @param[in] cblk
280  * The cblk we want to compute the cost.
281  *
282  *******************************************************************************
283  *
284  * @retval TODO
285  *
286  *******************************************************************************/
287 static inline double
289 {
290  double n = (double)(cblk->lcolnum - cblk->fcolnum + 1);
291  return n*n * ((double)(cblk[1].bloknum - cblk[0].bloknum) / 2.0 + 1.0);
292 }
293 
294 /**
295  *******************************************************************************
296  *
297  * @ingroup symbol_dev_reordering
298  *
299  * @brief Order cblks for each process
300  *
301  * This function sorts cblks increasing their cost
302  * and process decreasingly their load using priority queue.
303  * After that every process has a load nearly equal, except
304  * the first one wich has a greater load than others.
305  *
306  *******************************************************************************
307  *
308  * @param[in] ctx
309  * The context of the current thread. This provides information
310  * about total number of thread used and the rank of the current thread.
311  *
312  * @param[inout] args
313  * The argument for reordering functions.
314  *
315  *******************************************************************************/
316 static inline void
317 order_tasks( isched_t *ctx,
318  struct args_reorder_t *args )
319 {
320  pastix_data_t *pastix_data = args->pastix_data;
321  symbol_matrix_t *symbmtx = pastix_data->symbmtx;
322  pastix_queue_t cblks;
323  pastix_queue_t procs;
324  pastix_int_t cblknbr = symbmtx->cblknbr;
325  pastix_int_t size = ctx->world_size;
326  pastix_int_t itercblk, iterproc;
327  pastix_int_t cblk_id, proc_id;
328  double cblk_cost, proc_cost;
329  symbol_cblk_t *cblk;
330 
331  pqueueInit( &cblks, cblknbr );
332  pqueueInit( &procs, size );
333 
334  /*
335  * Sort the cblks decreasing
336  */
337  cblk = symbmtx->cblktab;
338  for ( itercblk=0; itercblk < cblknbr; itercblk++, cblk++ ) {
339 
340  if ( cblk->fcolnum >= symbmtx->schurfcol )
341  continue;
342 
343  pqueuePush1( &cblks, itercblk, -cost(cblk) );
344  }
345 
346  for ( iterproc=0; iterproc < size; ++iterproc) {
347  pqueuePush1( &procs, iterproc, 0. );
348  }
349 
350  while ( pqueueSize( &cblks ) > 0 ) {
351  cblk_id = pqueuePop1( &cblks, &cblk_cost );
352  proc_id = pqueuePop1( &procs, &proc_cost );
353  proc_cost += -cblk_cost; // Negative because of reverse sort
354  pqueuePush1 ( &procs, proc_id, proc_cost );
355 
356  extendint_Add( args->tasktab + proc_id, cblk_id );
357  }
358 
359  pqueueExit( &cblks );
360  pqueueExit( &procs );
361 }
362 
363 
364 /**
365  *******************************************************************************
366  *
367  * @ingroup symbol_dev_reordering
368  *
369  * @brief Prepare arguments for parallel subroutines and order cblks.
370  *
371  * This function creates array for each process that countains cblks
372  * the process has to reorder. These cblks are provided by order_tasks
373  * functions. Afterwards, cblks are reordered by reordering functions.
374  *
375  *******************************************************************************
376  *
377  * @param[inout] pastix_data
378  * The pastix_data providing the scheduler, the symbolic structure,
379  * and the ordering providing by Scotch that will be updated with
380  * the new rows permutation for each supernode. It will also gives
381  * a stopping criterion the reordering of each supernode. The split_level
382  * field activates the split level heuristic, dividing distances
383  * computations into two stages: for upper and for lower
384  * contruibuting supernodes.
385  *
386  * @param[out] levels
387  * The pointer to the levels structure, giving the level of
388  * each supernode in the elimination tree. To be computed inside.
389  *
390  * @param[in] maxdepth
391  * The maximum depth in the elimination tree.
392  *
393  *******************************************************************************/
394 static inline void
396  pastix_int_t maxdepth,
397  pastix_int_t *levels )
398 {
399  struct args_reorder_t args_reorder = { pastix_data, maxdepth, levels, NULL };
400  pastix_int_t size = pastix_data->isched->world_size;
401  pastix_int_t iterproc;
402  pastix_int_t cblknbr = pastix_data->symbmtx->cblknbr;
403  pastix_int_t cblkavg = pastix_imax(1, (pastix_int_t)(cblknbr / size));
404 
405  MALLOC_INTERN( args_reorder.tasktab, size, ExtendVectorINT );
406 
407  for ( iterproc=0; iterproc < size; ++iterproc ) {
408  extendint_Init( args_reorder.tasktab + iterproc, cblkavg );
409  }
410  order_tasks( pastix_data->isched, &args_reorder );
411 
412  isched_parallel_call( pastix_data->isched, thread_preorder, &args_reorder );
413 
414  for ( iterproc=0; iterproc < size; ++iterproc ) {
415  extendint_Exit( args_reorder.tasktab + iterproc );
416  }
417 
418  memFree_null( args_reorder.tasktab );
419 }
420 
421 #ifndef DOXYGEN_SHOULD_SKIP_THIS
422 static void (*reorder_table[5])(pastix_data_t *, pastix_int_t , pastix_int_t *) = {
425  NULL,
426  NULL,
428 };
429 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
430 
431 /**
432  *******************************************************************************
433  *
434  * @ingroup symbol_dev_reordering
435  *
436  * @brief Reorder all node
437  *
438  * This function computes the set each cblks, and then call a TSP heuristic
439  * to minimize the Hamiltonian Path.
440  *
441  *******************************************************************************
442  *
443  * @param[inout] pastix_data
444  * The pastix_data providing the scheduler, the symbolic structure,
445  * and the ordering providing by Scotch that will be updated with
446  * the new rows permutation for each supernode. It will also gives
447  * a stopping criterion the reordering of each supernode. The split_level
448  * field activates the split level heuristic, dividing distances
449  * computations into two stages: for upper and for lower
450  * contruibuting supernodes.
451  *
452  * @param[out] levels
453  * The pointer to the levels structure, giving the level of
454  * each supernode in the elimination tree. To be computed inside.
455  *
456  * @param[in] maxdepth
457  * The maximum depth in the elimination tree.
458  *
459  *******************************************************************************/
460 void
462  pastix_int_t maxdepth,
463  pastix_int_t *levels )
464 {
465  int sched = pastix_data->iparm[IPARM_SCHEDULER];
466  void (*reorder)(pastix_data_t *, pastix_int_t , pastix_int_t * ) = reorder_table[ sched ];
467 
468  if (reorder == NULL) {
469  reorder = thread_reorder;
470  }
471  reorder( pastix_data, maxdepth, levels );
472 }
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
pastix_int_t extendint_Size(const ExtendVectorINT *)
Return the number of element stored in the vector.
Definition: extendVector.c:112
pastix_int_t extendint_Read(const ExtendVectorINT *, pastix_int_t)
Return the element of index eltnum.
Definition: extendVector.c:136
pastix_int_t * extendint_Init(ExtendVectorINT *, pastix_int_t)
Initialize the extendVector structure with the initial size given.
Definition: extendVector.c:45
void extendint_Add(ExtendVectorINT *, pastix_int_t)
Add an element elt to the end of the vector.
Definition: extendVector.c:90
void extendint_Exit(ExtendVectorINT *)
Free the extendVector structure.
Definition: extendVector.c:66
The extend integer array structure.
Definition: extendVector.h:29
static void pqueuePush1(pastix_queue_t *q, pastix_int_t elt, double key1)
Push an element with a single key.
Definition: queue.h:64
void pqueueExit(pastix_queue_t *)
Free the structure associated to the queue.
Definition: queue.c:110
pastix_int_t pqueueSize(const pastix_queue_t *)
Return the size of the queue.
Definition: queue.c:135
static pastix_int_t pqueuePop1(pastix_queue_t *q, double *key1)
Pop the head of the queue and get the associated first key.
Definition: queue.h:88
int pqueueInit(pastix_queue_t *, pastix_int_t)
Initialize the queue structure with an initial space to store the elements.
Definition: queue.c:81
Queue structure.
Definition: queue.h:38
@ IPARM_SCHEDULER
Definition: api.h:117
@ IPARM_REORDERING_STOP
Definition: api.h:79
@ IPARM_REORDERING_SPLIT
Definition: api.h:78
Order structure.
Definition: order.h:47
symbol_cblk_t * cblktab
Definition: symbol.h:83
pastix_int_t schurfcol
Definition: symbol.h:82
pastix_int_t fcolnum
Definition: symbol.h:46
pastix_int_t lcolnum
Definition: symbol.h:47
pastix_int_t cblknbr
Definition: symbol.h:79
Symbol column block structure.
Definition: symbol.h:45
Symbol matrix structure.
Definition: symbol.h:77
pastix_order_t * ordemesh
Definition: pastixdata.h:98
pastix_int_t * iparm
Definition: pastixdata.h:70
isched_t * isched
Definition: pastixdata.h:86
symbol_matrix_t * symbmtx
Definition: pastixdata.h:100
Main PaStiX data structure.
Definition: pastixdata.h:68
static void thread_reorder(pastix_data_t *pastix_data, pastix_int_t maxdepth, pastix_int_t *levels)
Prepare arguments for parallel subroutines and order cblks.
static void order_tasks(isched_t *ctx, struct args_reorder_t *args)
Order cblks for each process.
void thread_preorder_basic_stategy(isched_thread_t *ctx, void *args)
Parallel basic version for reordering.
static double cost(symbol_cblk_t *cblk)
Computes the cost of a cblk.
void symbol_reorder(pastix_data_t *pastix_data, pastix_int_t maxdepth, pastix_int_t *levels)
Reorder all node.
static void sequential_reorder(pastix_data_t *pastix_data, pastix_int_t maxdepth, pastix_int_t *levels)
Sequential version for reordering.
void thread_preorder_zigzag_stategy(isched_thread_t *ctx, void *args)
Parallel improved version for reordering.
void symbol_reorder_cblk(const symbol_matrix_t *symbptr, const symbol_cblk_t *cblk, pastix_order_t *order, const pastix_int_t *levels, pastix_int_t *depthweight, pastix_int_t depthmax, pastix_int_t split_level, pastix_int_t stop_criterion)
Reorder a supernode.
static void thread_preorder(isched_thread_t *ctx, void *args)
Function called by each thread.