PaStiX Handbook 6.4.0
Loading...
Searching...
No Matches
propmap.c
Go to the documentation of this file.
1/**
2 *
3 * @file propmap.c
4 *
5 * PaStiX analyse proportionnal mapping functions.
6 *
7 * @copyright 2004-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8 * Univ. Bordeaux. All rights reserved.
9 *
10 * @version 6.4.0
11 * @author Pascal Henon
12 * @author Mathieu Faverge
13 * @author Gregoire Pichon
14 * @author Xavier Lacoste
15 * @date 2024-07-05
16 *
17 * @addtogroup blend_dev_propmap
18 * @{
19 *
20 **/
21#include "common.h"
22#include "symbol/symbol.h"
23#include "blend/elimintree.h"
24#include "blend/cost.h"
25#include "blend/cand.h"
26#include "blend/extendVector.h"
27#include "blend/blendctrl.h"
28#include "queue.h"
29
30/**
31 * @brief Minimal work ratio to accept the contribution from one additional candidate
32 */
33#define CROSS_TOLERANCE 0.1
34
35/**
36 * @brief Proportional mapping structure to forward the arguments throught the
37 * recursive calls.
38 */
39typedef struct propmap_s {
40 const EliminTree *etree; /**< Elimination tree to map */
41 Cand *candtab; /**< Candidate array for each node of the etree */
42 pastix_int_t candnbr; /**< Number of candidates available */
43 int nocrossproc; /**< Enable/disable the distribution of one candidate on multiple branches */
45
46/**
47 *******************************************************************************
48 *
49 * @brief Set the given candidates to all the subtree w/o conditions.
50 *
51 *******************************************************************************
52 *
53 * @param[in] pmptr
54 * Pointer to the parameters of the proportional mapping algorithm.
55 *
56 * @param[in] rootnum
57 * Index of the root of the subtree to be given to [fcandnum,lcandnum]
58 *
59 * @param[in] fcandnum
60 * Rank of the first candidate attributed to this subtree.
61 *
62 * @param[in] lcandnum
63 * Rank of the last candidate attributed to this subtree.
64 *
65 * @param[in] cluster
66 * The cluster value for the subtree.
67 *
68 *******************************************************************************/
69static inline void
71 pastix_int_t rootnum,
72 pastix_int_t fcandnum,
73 pastix_int_t lcandnum,
74 pastix_int_t cluster )
75{
77 pastix_int_t sonsnbr;
78
79 pmptr->candtab[rootnum].fcandnum = fcandnum;
80 pmptr->candtab[rootnum].lcandnum = lcandnum;
81 pmptr->candtab[rootnum].cluster = cluster;
82
83 /* Recursively apply the affectation to the sons */
84 sonsnbr = pmptr->etree->nodetab[rootnum].sonsnbr;
85 for(i=0;i<sonsnbr;i++) {
86 propMappSubtreeOn1P( pmptr, eTreeSonI(pmptr->etree, rootnum, i),
87 fcandnum, lcandnum, cluster );
88 }
89
90 return;
91}
92
93/**
94 *******************************************************************************
95 *
96 * @brief Apply the proportional mapping algorithm to a subtree.
97 *
98 *******************************************************************************
99 *
100 * @param[in] pmptr
101 * Pointer to the parameters of the proportional mapping algorithm.
102 *
103 * @param[in] rootnum
104 * Index of the root of the subtree to process.
105 *
106 * @param[in] fcandnum
107 * Rank of the first candidate attributed to this subtree.
108 *
109 * @param[in] lcandnum
110 * Rank of the last candidate attributed to this subtree.
111 *
112 * @param[in] cluster
113 * The cluster value for the subtree.
114 *
115 * @param[inout] cost_remain
116 * Array of size (lcandnum-fcandnum+1).
117 * Stores the remaining idle time of each candidate to balance the load
118 * among them. On exit, the cost is updated with the affected subtrees.
119 *
120 *******************************************************************************/
121static inline void
123 pastix_int_t rootnum,
124 pastix_int_t fcandnum,
125 pastix_int_t lcandnum,
126 pastix_int_t cluster,
127 double *cost_remain )
128{
129 pastix_queue_t *queue_tree;
130 pastix_int_t p;
131 pastix_int_t candnbr, scandnbr;
132 pastix_int_t fcand = 0;
133 pastix_int_t lcand = 0;
134 pastix_int_t i;
135 pastix_int_t sonsnbr;
136 double isocost;
137 double aspt_cost;
138 double cumul_cost;
139 double *sub_cost_remain = NULL;
140 double epsilon;
141
142 candnbr = lcandnum - fcandnum + 1;
143
144 /* If only one candidate, all the subtree belongs to it */
145 if (candnbr == 1)
146 {
147 propMappSubtreeOn1P( pmptr, rootnum, fcandnum, lcandnum, cluster );
148 return;
149 }
150
151 /* Set the cand group for this tree node */
152 pmptr->candtab[rootnum].fcandnum = fcandnum;
153 pmptr->candtab[rootnum].lcandnum = lcandnum;
154 pmptr->candtab[rootnum].cluster = cluster;
155
156 /* This treenode is a leave, return */
157 if(pmptr->etree->nodetab[rootnum].sonsnbr == 0)
158 {
159 return;
160 }
161
162 assert( pmptr->etree->nodetab[rootnum].ndecost <= pmptr->etree->nodetab[rootnum].subcost );
163
164 /* Work that each processor is intended to get from this treenode */
165 isocost = pmptr->etree->nodetab[rootnum].ndecost / candnbr;
166 for(p=0;p<candnbr;p++)
167 cost_remain[p] -= isocost;
168
169 /* Get cost remaining in the descendance of the treenode */
170 aspt_cost = pmptr->etree->nodetab[rootnum].subcost - pmptr->etree->nodetab[rootnum].ndecost;
171
172 /*
173 * If the first and last candidate have already received more work that they
174 * should, we remove them from the set
175 */
176 if(cost_remain[0] <= 0)
177 fcand = 1;
178 else
179 fcand = 0;
180 if (cost_remain[candnbr-1] <= 0)
181 candnbr--;
182
183 assert(fcand < candnbr);
184 assert(candnbr >= 1);
185
186 /* Make sure that the sum of cost_remain in used proc is at least equals to aspt_cost */
187 cumul_cost = 0;
188 for(i=fcand; i<candnbr; i++)
189 cumul_cost += cost_remain[i];
190
191 if (cumul_cost < aspt_cost) {
192 double ratio = aspt_cost / cumul_cost;
193 for(i=fcand; i<candnbr; i++)
194 cost_remain[i] *= ratio;
195 }
196
197 /* Compute the minimun participation rate of a candidat processor */
198 epsilon = (CROSS_TOLERANCE * cumul_cost) / ((double)candnbr);
199
200 /*
201 * Compute the cand group for each proc
202 */
203 sonsnbr = pmptr->etree->nodetab[rootnum].sonsnbr;
204
205 if ( sonsnbr == 1 ) {
206 /* Go on to subtree */
207 propMappSubtree( pmptr, eTreeSonI(pmptr->etree, rootnum, 0),
208 fcandnum, lcandnum, cluster, cost_remain );
209 return;
210 }
211
212 /* Create the list of sons sorted by descending order of cost */
213 MALLOC_INTERN(queue_tree, 1, pastix_queue_t);
214 pqueueInit(queue_tree, sonsnbr);
215 double soncost;
216 for(i=0; i<sonsnbr; i++)
217 {
218 /* Cost in the current subtree to be mapped */
219 cumul_cost = -pmptr->etree->nodetab[eTreeSonI(pmptr->etree, rootnum, i)].subcost;
220
221 /* Cost of the root node in the subtree */
222 soncost = -pmptr->etree->nodetab[eTreeSonI(pmptr->etree, rootnum, i)].ndecost;
223
224 pqueuePush2(queue_tree, i, cumul_cost, (pastix_int_t)soncost);
225 }
226
227 /* Proportionnal mapping of the subtree on remaining candidates */
228 /* The first stage deals only with nodes that require multiple candidates */
229 while (pqueueSize(queue_tree) > 0)
230 {
231 i = pqueuePop2( queue_tree, &cumul_cost, NULL );
232 cumul_cost = -cumul_cost;
233
234 /*
235 * If the subtree cost is smaller than what the first candidate can
236 * process, following nodes will be smaller and following candidates have
237 * at least the same amount of ressources available so, we skip to the
238 * second stage
239 */
240 if (cumul_cost <= cost_remain[fcand]) {
241 cost_remain[fcand] -= cumul_cost;
242 propMappSubtreeOn1P( pmptr, eTreeSonI(pmptr->etree, rootnum, i),
243 fcandnum+fcand, fcandnum+fcand, cluster);
244 break;
245 }
246
247 /*
248 * If a candidate cannot participate to multiple subtrees, we check with
249 * epsilon to avoid overflow
250 */
251 if ( pmptr->nocrossproc &&
252 (cumul_cost <= (cost_remain[fcand]+epsilon)) )
253 {
254 cost_remain[fcand] -= cumul_cost;
255 propMappSubtreeOn1P( pmptr, eTreeSonI(pmptr->etree, rootnum, i),
256 fcandnum+fcand, fcandnum+fcand, cluster );
257 break;
258 }
259
260 /* If the first candidate doesn't have enough ressources, we skip it */
261 if( (fcand < candnbr-1) && (cost_remain[fcand] <= epsilon) )
262 {
263 fcand++;
264 }
265
266 /*
267 * Computes how many candidate will participate to this node. We add
268 * candidate as long as we have some and they all have more than espilon
269 * extra work.
270 */
271 lcand = fcand;
272 scandnbr = 1;
273 cumul_cost -= cost_remain[fcand];
274 while ((( pmptr->nocrossproc && (cumul_cost > ((double)scandnbr)*epsilon)) ||
275 (!pmptr->nocrossproc && (cumul_cost > epsilon))) &&
276 (lcand < candnbr - 1))
277 {
278 lcand++; scandnbr++;
279 assert( cost_remain[lcand] > 0 );
280 cumul_cost -= cost_remain[lcand];
281 }
282
283 /*
284 * Prepare the sub_cost_remain array.
285 * If some cost is remaining, we distribute equally the work to each
286 * candidate, otherwise we give the cost remaining to the last candidate
287 * and set to zero the remaining cost of the first candidates.
288 */
289 MALLOC_INTERN(sub_cost_remain, lcand-fcand+1, double);
290 if (cumul_cost > 0)
291 {
292 isocost = cumul_cost / scandnbr;
293 for(p=0; p<scandnbr; p++)
294 {
295 sub_cost_remain[p] = cost_remain[fcand+p] + isocost;
296 cost_remain[fcand+p] = - isocost;
297 }
298 }
299 else
300 {
301 for(p=0; p<scandnbr-1; p++)
302 {
303 sub_cost_remain[p] = cost_remain[fcand+p];
304 cost_remain[fcand+p] = 0.0;
305 }
306 sub_cost_remain[scandnbr-1] = cost_remain[lcand] + cumul_cost;
307 cost_remain[fcand+scandnbr-1] = -cumul_cost; /* cumul_cost <= 0 */
308 }
309 /* Go on to subtree */
310 propMappSubtree( pmptr, eTreeSonI(pmptr->etree, rootnum, i),
311 fcandnum+fcand, fcandnum+lcand, cluster, sub_cost_remain);
312 memFree_null(sub_cost_remain);
313
314 if ( (lcand < candnbr - 1) &&
315 ( pmptr->nocrossproc ||
316 (cost_remain[lcand] < epsilon) ) )
317 {
318 fcand = lcand+1;
319 }
320 else
321 {
322 if ( pmptr->nocrossproc )
323 break;
324 fcand = lcand;
325 }
326 }
327
328 if (pqueueSize(queue_tree) > 0)
329 {
330 pastix_queue_t *queue_proc;
331
332 /*
333 * Second stage:
334 * Distribute the single candidate subtree. At each step of the algorithm we
335 * associate together the candidate with maximum cost_remain and the largest
336 * son.
337 */
338
339 /* Fill queue proc order by remain cost descending */
340 MALLOC_INTERN(queue_proc, 1, pastix_queue_t);
341 pqueueInit(queue_proc, candnbr);
342 for (i=0; i<candnbr; i++) {
343 pqueuePush1(queue_proc, i, -cost_remain[i]);
344 }
345
346 while (pqueueSize(queue_tree) > 0)
347 {
348 /* Get the largest node */
349 i = pqueuePop2( queue_tree, &cumul_cost, NULL );
350 cumul_cost = -cumul_cost;
351
352 /* Get the candidate with the largest cost_remain */
353 fcand = pqueuePop(queue_proc);
354
355 /* Map them together */
356 propMappSubtreeOn1P( pmptr, eTreeSonI(pmptr->etree, rootnum, i),
357 fcandnum+fcand, fcandnum+fcand, cluster);
358
359 /* Update cost_remain and re-insert into the sorted queue */
360 cost_remain[fcand] -= cumul_cost;
361 pqueuePush1(queue_proc, fcand, -cost_remain[fcand]);
362 }
363
364 pqueueExit(queue_proc);
365 memFree(queue_proc);
366 }
367
368 pqueueExit(queue_tree);
369 memFree(queue_tree);
370 return;
371}
372
373/**
374 * @}
375 */
376
377/**
378 *******************************************************************************
379 *
380 * @ingroup pastix_blend
381 *
382 * @brief Apply the proportional mapping algorithm.
383 *
384 * This function computes the proportionnal mapping of the elimination tree. The
385 * result is a set of potential candidates to compute each node of the
386 * elimination tree. The real candidate will be affected during the simulation
387 * with simuRun(). It is then important to reduce as much as possible the number
388 * of candidates per node, while keeping enough freedom for the scheduling to
389 * allow a good load balance and few idle times in the final static decision.
390 *
391 *******************************************************************************
392 *
393 * @param[inout] candtab
394 * On entry, the candtab array must contain the cost of each node of
395 * the elimination tree, and their depth in the tree as computed by
396 * candBuild().
397 * On exit, the fields fcandnum and lcandnum are computed with the
398 * proportional mapping algorithm that tries to balance the load
399 * between the candidates and distribute the branches to everyone
400 * according to their cost.
401 *
402 * @param[in] etree
403 * The elimination tree to map on the ressources.
404 *
405 * @param[in] candnbr
406 * The total number of candidates to distribute over the elimination
407 * tree.
408 *
409 * @param[in] nocrossproc
410 * If nocrossproc is enabled, candidates can NOT be part of two
411 * subranches with different co-workers in each branch.
412 * If nocrossproc is disabled, candidate can be shared between two
413 * subranches if the amount of extra work exceeds 10%.
414 *
415 * @param[in] allcand
416 * No proportional mapping is performed and everyone is candidate to
417 * everything. This will have a large performance impact on the
418 * simulation.
419 *
420 *******************************************************************************/
421void
423 const EliminTree *etree,
424 pastix_int_t candnbr,
425 int nocrossproc, int allcand )
426{
427 propmap_t pmdata;
428 pastix_int_t p;
429
430 /* Prepare the stucture */
431 pmdata.candtab = candtab;
432 pmdata.etree = etree;
433 pmdata.candnbr = candnbr;
434 pmdata.nocrossproc = nocrossproc;
435
436 if ( allcand ) {
437 propMappSubtreeOn1P( &pmdata, eTreeRoot(etree),
438 0, candnbr-1, 0 );
439 }
440 else {
441 double *cost_remain = NULL;
442 double isocost;
443
444 /* Prepare the initial cost_remain array */
445 MALLOC_INTERN(cost_remain, candnbr, double);
446 isocost = etree->nodetab[ eTreeRoot(etree) ].subcost / candnbr;
447
448 for(p=0; p<candnbr; p++) {
449 cost_remain[p] = isocost;
450 }
451
452 propMappSubtree( &pmdata, eTreeRoot(etree),
453 0, candnbr-1,
454 0, cost_remain );
455
456 memFree_null(cost_remain);
457 }
458}
BEGIN_C_DECLS typedef int pastix_int_t
Definition datatypes.h:51
pastix_int_t fcandnum
Definition cand.h:31
Processor candidate group to own a column blok.
Definition cand.h:28
double subcost
Definition elimintree.h:28
eTreeNode_t * nodetab
Definition elimintree.h:42
static pastix_int_t eTreeRoot(const EliminTree *etree)
Return the root of the elimination tree.
Definition elimintree.h:126
static pastix_int_t eTreeSonI(const EliminTree *etree, pastix_int_t node, pastix_int_t i)
Return the i^{th} son of a given node.
Definition elimintree.h:105
Elimination tree.
Definition elimintree.h:39
static void propMappSubtreeOn1P(const propmap_t *pmptr, pastix_int_t rootnum, pastix_int_t fcandnum, pastix_int_t lcandnum, pastix_int_t cluster)
Set the given candidates to all the subtree w/o conditions.
Definition propmap.c:70
#define CROSS_TOLERANCE
Minimal work ratio to accept the contribution from one additional candidate.
Definition propmap.c:33
static void propMappSubtree(const propmap_t *pmptr, pastix_int_t rootnum, pastix_int_t fcandnum, pastix_int_t lcandnum, pastix_int_t cluster, double *cost_remain)
Apply the proportional mapping algorithm to a subtree.
Definition propmap.c:122
struct propmap_s propmap_t
Proportional mapping structure to forward the arguments throught the recursive calls.
void pqueuePush2(pastix_queue_t *, pastix_int_t, double, double)
Insert an element into the sorted queue.
Definition queue.c:178
static void pqueuePush1(pastix_queue_t *q, pastix_int_t elt, double key1)
Push an element with a single key.
Definition queue.h:64
void pqueueExit(pastix_queue_t *)
Free the structure associated to the queue.
Definition queue.c:110
pastix_int_t pqueuePop2(pastix_queue_t *, double *, double *)
Remove the first element of the queue and return its keys if needed.
Definition queue.c:268
pastix_int_t pqueueSize(const pastix_queue_t *)
Return the size of the queue.
Definition queue.c:135
static pastix_int_t pqueuePop(pastix_queue_t *q)
Pop the head of the queue whithout returning the keys.
Definition queue.h:75
int pqueueInit(pastix_queue_t *, pastix_int_t)
Initialize the queue structure with an initial space to store the elements.
Definition queue.c:81
Queue structure.
Definition queue.h:38
void propMappTree(Cand *candtab, const EliminTree *etree, pastix_int_t candnbr, int nocrossproc, int allcand)
Apply the proportional mapping algorithm.
Definition propmap.c:422