PaStiX Handbook 6.4.0
Loading...
Searching...
No Matches
splitsymbol.c
Go to the documentation of this file.
1/**
2 *
3 * @file splitsymbol.c
4 *
5 * PaStiX simulation task basic functions.
6 *
7 * @copyright 2004-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8 * Univ. Bordeaux. All rights reserved.
9 *
10 * @version 6.4.0
11 * @author Pascal Henon
12 * @author Pierre Ramet
13 * @author Mathieu Faverge
14 * @author Xavier Lacoste
15 * @date 2024-07-05
16 *
17 * @addtogroup blend_dev_split
18 * @{
19 * This module handles the splitting of the existing symbol matrix to generate
20 * more parallelism with finer granularity tasks.
21 *
22 **/
23#include <stdio.h>
24#include <string.h>
25#include <strings.h>
26#include <assert.h>
27
28#include "common.h"
29#include "cost.h"
30#include "symbol/symbol.h"
31#include "elimintree.h"
32#include "extendVector.h"
33#include "cand.h"
34#include "queue.h"
35#include "blendctrl.h"
36#include "blend/solver.h"
37#include "simu.h"
38
39#include "extracblk.h"
40
41#ifndef DOXYGEN_SHOULD_SKIP_THIS
42static inline
43int pastix_blend_with_constant_split( void ) {
44 return pastix_env_is_set_to("PASTIX_BLEND_SPLIT", "CONSTANT");
45}
46
47static inline
48int pastix_blend_with_smallest_upper_split( void ) {
49 return pastix_env_is_set_to("PASTIX_BLEND_SPLIT", "UPPER");
50}
51
52static inline
53int pastix_blend_split_percent( void ) {
54 return
55 pastix_getenv_get_value_int( "PASTIX_BLEND_SPLIT_AUTORIZED_PERCENTAGE",
56 10);
57}
58#endif /* DOXYGEN_SHOULD_SKIP_THIS */
59
60/**
61 *******************************************************************************
62 *
63 * @brief Compute the number of cut for a given cblk width and number of candidates.
64 *
65 *******************************************************************************
66 *
67 * @param[in] ctrl
68 * The blend control structure with the parameters.
69 *
70 * @param[in] width
71 * The width of a cblk to cut.
72 *
73 *******************************************************************************
74 *
75 * @return The number of cut that should be done for this width.
76 *
77 *******************************************************************************/
78static inline pastix_int_t
80 pastix_int_t width )
81{
82 pastix_int_t blas_min_col;
83 pastix_int_t blas_max_col;
84 pastix_int_t nseq;
85
86 /* Compute minimun broadness for splitting this cblk */
87 blas_min_col = ctrl->blcolmin;
88 blas_max_col = ctrl->blcolmax;
89
90 if( width <= blas_max_col) {
91 return 1;
92 }
93 nseq = pastix_iceil( width, blas_max_col );
94
95 /* Make sure cblk are at least blas_min_col wide */
96 if ( nseq > 1 && (width / nseq) < blas_min_col ) {
97 nseq--;
98 }
99
100 assert( nseq > 0 );
101 return nseq;
102}
103
104/**
105 *******************************************************************************
106 *
107 * @brief Generate an array with the number of blocks facing each row to
108 * minimize the cuts.
109 *
110 *******************************************************************************
111 *
112 * @param[in] symbmtx
113 * The symbol matrix structure.
114 *
115 * @param[in] frowsplit
116 * The index of the first row that might be split to avoid computing it
117 * on the full matrix.
118 *
119 *******************************************************************************
120 *
121 * @return The array with the number of block on each row from frowsplit to n.
122 *
123 *******************************************************************************/
124static inline pastix_int_t *
126 pastix_int_t frowsplit )
127{
128 symbol_blok_t *curblok;
129 pastix_int_t *nblocksperline;
130 pastix_int_t bloknum, line;
131 pastix_int_t size = symbmtx->nodenbr - frowsplit + 1;
132
133 /*
134 * Allocate the temporary buffer nblocksperline, nbblocksperline stores the
135 * number of blocks that will be splitted if with split between line i and
136 * i+1.
137 */
138 MALLOC_INTERN( nblocksperline, size, pastix_int_t );
139 memset( nblocksperline, 0, size * sizeof(pastix_int_t) );
140
141 curblok = symbmtx->bloktab;
142 for(bloknum=0; bloknum<symbmtx->bloknbr; bloknum++, curblok++ )
143 {
144 if ( curblok->lrownum < frowsplit ) {
145 continue;
146 }
147
148 /*
149 * For each couple of rows (i,i+1) in the block, we increment
150 * the number blocks in regard of the row i
151 */
152 for(line = pastix_imax( curblok->frownum, frowsplit);
153 line < curblok->lrownum; line++ )
154 {
155 nblocksperline[ line-frowsplit ]++;
156 }
157 }
158 assert( nblocksperline[ size-1 ] == 0 );
159
160 return nblocksperline;
161}
162
163#ifndef DOXYGEN_SHOULD_SKIP_THIS
164typedef pastix_int_t (*split_fct_t)( const pastix_int_t *,
166#endif /* DOXYGEN_SHOULD_SKIP_THIS */
167
168/**
169 *******************************************************************************
170 *
171 * @brief Compute the cut that minimizes the generation of off-diagonal blocks.
172 *
173 * This function searched first for a smaller block than the initial step to
174 * generate the cut, and then for a larger one that is return only if it is
175 * strictly better than the first solution found.
176 *
177 *******************************************************************************
178 *
179 * @param[in] nblocksperline
180 * Array of size max with the number of blocks in front of the lines [0;max-1]
181 *
182 * @param[in] step
183 * The starting cut to refine. 0 < step
184 *
185 * @param[in] min
186 * TODO
187 *
188 * @param[in] max
189 * The size of the nblocksperline array. nmax > 0.
190 *
191 * @param[in] authorized_percent
192 * The authorized percentage move around the initial cut.
193 *
194 *******************************************************************************
195 *
196 * @return The optimal cut found.
197 *
198 *******************************************************************************/
199static inline pastix_int_t
200computeSmallestSplit( const pastix_int_t *nblocksperline,
202 pastix_int_t min,
203 pastix_int_t max,
204 pastix_int_t authorized_percent )
205{
206 pastix_int_t limit = pastix_iceil( step*authorized_percent, 100 );
207 pastix_int_t i, lcolnum, nbsplit;
208 pastix_int_t lmin, lmax, lavg;
209
210 if (step >= max) {
211 return max-1;
212 }
213 assert( step > 1 );
214
215 lavg = step - 1;
216 lmin = pastix_imax( lavg - limit - 1, min );
217 lmax = pastix_imin( lavg + limit + 1, max );
218
219 lcolnum = lavg;
220 nbsplit = nblocksperline[ lcolnum ];
221
222 /* Search for the minimal split */
223 for(i=lavg+1; i<lmax; i++ )
224 {
225 if ( nblocksperline[ i ] < nbsplit )
226 {
227 lcolnum = i;
228 nbsplit = nblocksperline[ i ];
229 }
230 }
231 for(i=lavg-1; i>lmin; i-- )
232 {
233 if ( nblocksperline[ i ] < nbsplit )
234 {
235 lcolnum = i;
236 nbsplit = nblocksperline[ i ];
237 }
238 }
239
240 return lcolnum;
241}
242
243/**
244 *******************************************************************************
245 *
246 * @brief Compute the cut that minimizes the generation of off-diagonal blocks.
247 *
248 * This function searched for the largest cut in the range [step -
249 * authorized_percent, step + authorized_percent] that minimizes the number of
250 * off-diagonal blocks generated.
251 *
252 *******************************************************************************
253 *
254 * @param[in] nblocksperline
255 * Array of size max with the number of blocks in front of the lines [0;max-1]
256 *
257 * @param[in] step
258 * The starting cut to refine. 0 < step
259 *
260 * @param[in] min
261 * TODO
262 *
263 * @param[in] max
264 * The size of the nblocksperline array. nmax > 0.
265 *
266 * @param[in] authorized_percent
267 * The authorized percentage move around the initial cut.
268 *
269 *******************************************************************************
270 *
271 * @return The optimal cut found.
272 *
273 *******************************************************************************/
274static inline pastix_int_t
277 pastix_int_t min,
278 pastix_int_t max,
279 pastix_int_t authorized_percent )
280{
281 pastix_int_t limit = pastix_iceil( step*authorized_percent, 100 );
282 pastix_int_t i, lcolnum, nbsplit;
283 pastix_int_t lmin, lmax, lavg;
284
285 if ( step >= max ) {
286 return max-1;
287 }
288 assert( step > 1 );
289
290 lavg = step - 1;
291 lmin = pastix_imax( lavg - limit, min );
292 lmax = pastix_imin( lavg + limit + 1, max );
293
294 lcolnum = lmin;
295 nbsplit = nblocksperline[ lcolnum ];
296
297 /* Search for the minimal split */
298 for(i=lmin; i<lmax; i++ )
299 {
300 if ( nblocksperline[ i ] <= nbsplit )
301 {
302 lcolnum = i;
303 nbsplit = nblocksperline[ i ];
304 }
305 }
306
307 return lcolnum;
308}
309
310/**
311 *******************************************************************************
312 *
313 * @brief Compute a constant cut with the given parameters.
314 *
315 *******************************************************************************
316 *
317 * @param[in] nblocksperline
318 * Array of size max with the number of blocks in front of the lines [0;max-1]
319 *
320 * @param[in] step
321 * The starting cut to refine. 0 < step
322 *
323 * @param[in] min
324 * TODO
325 *
326 * @param[in] max
327 * The size of the nblocksperline array. nmax > 0.
328 *
329 * @param[in] authorized_percent
330 * The authorized percentage move around the initial cut.
331 *
332 *******************************************************************************
333 *
334 * @return The optimal cut found.
335 *
336 *******************************************************************************/
337static inline pastix_int_t
338computeConstantSplit( const pastix_int_t *nblocksperline __attribute__((unused)),
340 pastix_int_t min __attribute__((unused)),
341 pastix_int_t max,
342 pastix_int_t authorized_percent __attribute__((unused)) )
343{
344 if ( step >= max ) {
345 return max-1;
346 }
347 assert( step > 1 );
348 return step-1;
349}
350
351/**
352 *******************************************************************************
353 *
354 * @brief Split the column blocks to generate parallelism
355 *
356 * This algorithm can use three different strategies to cut, two of them tries
357 * to minimize the number of generated off-diagnonal blocks, one with the
358 * minimal size, and the seoncd with teh maximal size in the error percentage
359 * around the initially computed split. The third strategy cuts regularly the
360 * column blocks without paying attention to the facing off-diagonal blocks.
361 *
362 *******************************************************************************
363 *
364 * @param[in] ctrl
365 * The blend control structure.
366 *
367 * @param[in] symbmtx
368 * The symbol matrix structure.
369 *
370 * @param[inout] extracblk
371 * The initialized structure to store the newly created column
372 * blocks. (Blocks are generated later during merge operation, see
373 * extraCblkMerge()).
374 *
375 *******************************************************************************/
376void
378 const symbol_matrix_t *symbmtx,
379 ExtraCblk_t *extracblk )
380{
381 symbol_cblk_t *curcblk;
382 symbol_blok_t *curblok;
383 pastix_int_t *nblocksperline = NULL;
384 pastix_int_t cblknum, bloknum, line;
385 pastix_int_t fsplitrow = -1;
386 pastix_int_t authorized_percent;
387 split_fct_t split_fct = computeSmallestSplit;
388
389 if (pastix_blend_with_constant_split()) {
390 split_fct = computeConstantSplit;
391 }
392 else if (pastix_blend_with_smallest_upper_split()) {
393 split_fct = computeSmallestSplit_max;
394 }
395
396 authorized_percent = pastix_blend_split_percent();
397
398 curcblk = symbmtx->cblktab;
399 for(cblknum = 0; cblknum<symbmtx->cblknbr; cblknum++, curcblk++)
400 {
401 pastix_int_t fcolnum = curcblk->fcolnum;
402 pastix_int_t lcolnum = curcblk->lcolnum;
403 pastix_int_t step, nseq;
404 pastix_int_t width, lwidth;
405 pastix_int_t fcol, lcol;
406 pastix_int_t nbcblk = 0;
407
408 if ( curcblk->selevtx == SYMBCBLK_KWAY ) {
409 continue;
410 }
411
412 /*
413 * Compute the number of cblk to be generated by split,
414 * for instance we choose to split at the maximum
415 */
416 width = lcolnum - fcolnum + 1;
417 nseq = computeNbSplit( ctrl, width );
418 if ( nseq <= 1 ) {
419 continue;
420 }
421
422 /* Initialize the nbblocksperline array */
423 if ( nblocksperline == NULL ) {
424 assert( fsplitrow == -1 );
425 assert( fcolnum != -1 );
426 fsplitrow = fcolnum;
427 nblocksperline = computeNbBlocksPerLine( symbmtx, fsplitrow );
428 nblocksperline -= fsplitrow;
429 }
430
431 /* Create the new cblk */
432 fcol = fcolnum;
433 while( fcol <= lcolnum )
434 {
435 /* Adapt the step to the remaining width */
436 nseq = computeNbSplit( ctrl, width );
437 if ( nseq == 1 ) {
438 extraCblkAdd( extracblk, fcol, lcolnum,
439 curcblk->selevtx );
440 nbcblk++;
441 fcol = lcolnum + 1;
442 continue;
443 }
444
445 step = pastix_iceil( width, nseq );
446 assert( step >= ctrl->blcolmin );
447
448 /* Make sure we leave enough columns */
449 lwidth = width - ctrl->blcolmin;
450 assert( lwidth > ctrl->blcolmin );
451
452 lcol = fcol + split_fct( nblocksperline + fcol,
453 step, ctrl->blcolmin, lwidth,
454 authorized_percent );
455
456 assert( (lcol >= fcol) && (lcol <= lcolnum) );
457
458 extraCblkAdd( extracblk, fcol, lcol,
459 curcblk->selevtx );
460 nbcblk++;
461
462 lwidth = lcol - fcol + 1;
463 width = width - lwidth;
464 fcol = lcol + 1;
465 }
466
467 /*
468 * Mark the cblk as being splitted
469 */
470 extracblk->addcblk += nbcblk-1;
471 extracblk->sptcblk[cblknum] = extracblk->curcblk - nbcblk + 1;
472 extracblk->sptcbnb[cblknum] = nbcblk;
473
474 /* Update the number of blocks per line */
475 curblok = &(symbmtx->bloktab[curcblk->bloknum + 1]) ;
476 for(bloknum = curcblk[0].bloknum + 1;
477 bloknum < curcblk[1].bloknum; bloknum++, curblok++)
478 {
479 for(line = curblok->frownum; line < curblok->lrownum; line++ )
480 {
481 nblocksperline[ line ] += nbcblk-1;
482 }
483 }
484 }
485
486 if ( nblocksperline != NULL ) {
487 assert( fsplitrow != -1 );
488 nblocksperline += fsplitrow;
489 memFree_null( nblocksperline );
490 }
491}
492/**
493 *@}
494 */
495
496/**
497 *******************************************************************************
498 *
499 * @ingroup pastix_blend
500 *
501 * @brief Split the column blocks of the symbol matrix to generate parallelism.
502 *
503 * This is the main function that cut the symbol matrix column blocks, and
504 * return the new symbolMatrix. Cost matrix, elimination tree, and candidate
505 * array are updated on exit of this function with respect to the newly created
506 * column blocks and blocks. See splitSmart() for the cutting algorithm.
507 *
508 *******************************************************************************
509 *
510 * @param[in] ctrl
511 * The blend control structure.
512 * On entry, candtab must be initialized.
513 * On exit, costmtx, etree, and candtab are updated accordinglyy to the
514 * extended symbol matrix, if new cblk are generated.
515 *
516 * @param[in] symbmtx
517 * On entry, the symbol matrix structure to split.
518 * On exit, the new symbol matrix with the new cblk and blocks.
519 *
520 *******************************************************************************/
521void
523 symbol_matrix_t *symbmtx )
524{
525 ExtraCblk_t extracblk;
526
527 /* Init structure to store extra cblks */
528 extraCblkInit( symbmtx->cblknbr, &extracblk );
529
530 splitSmart( ctrl, symbmtx, &extracblk );
531
532 /* Rk: addcblk field is not erased by Exit call, so we can freely use it */
533 if ( extracblk.addcblk )
534 {
535 /* Merge the initial matrix and the newly generated cblks */
536 extraCblkMerge( &extracblk, symbmtx, &(ctrl->candtab) );
537 extraCblkExit( &extracblk );
538
539 /* Check that the generated symbol matrix is correct */
540 if (ctrl->debug) {
541 pastixSymbolCheck(symbmtx);
542 }
543
544 if ( ctrl->up_after_split ) {
545 /* Update cost matrix to fill-in blank of newly generated blocks */
546 costMatrixExit(ctrl->costmtx);
547 memFree_null(ctrl->costmtx);
548 ctrl->costmtx = costMatrixBuild( symbmtx,
549 ctrl->iparm[IPARM_FLOAT],
550 ctrl->iparm[IPARM_FACTORIZATION] );
551
552 /* Update elimination tree */
553 if (ctrl->etree != NULL) {
554 eTreeExit(ctrl->etree);
555 }
556 ctrl->etree = eTreeBuild(symbmtx);
557
558 /*
559 * Let's update cost in the candtab for the proportionnal mapping and
560 * the simulation
561 */
562 candUpdate( ctrl->candtab, ctrl->etree, symbmtx, ctrl->costmtx );
563 }
564 }
565
566 if ( ctrl->clustnum == 0 ) {
567 if (ctrl->iparm[IPARM_VERBOSE] > PastixVerboseNo) {
568 pastixSymbolPrintStats( symbmtx );
569 }
570 }
571}
BEGIN_C_DECLS typedef int pastix_int_t
Definition datatypes.h:51
void candUpdate(Cand *candtab, EliminTree *etree, const symbol_matrix_t *symbmtx, const CostMatrix *costmtx)
Update the candtab array costs after the symbol split algorithm has been applied.
Definition cand.c:804
void costMatrixExit(CostMatrix *costmtx)
Free the cost matrix structure.
Definition cost.c:57
CostMatrix * costMatrixBuild(const symbol_matrix_t *symbmtx, pastix_coeftype_t flttype, pastix_factotype_t factotype)
Build the cost matrix structure from the symbol matrix structure.
Definition cost.c:91
pastix_int_t up_after_split
Definition blendctrl.h:55
EliminTree * etree
Definition blendctrl.h:96
pastix_int_t debug
Definition blendctrl.h:30
pastix_int_t clustnum
Definition blendctrl.h:70
pastix_int_t blcolmin
Definition blendctrl.h:50
pastix_int_t blcolmax
Definition blendctrl.h:51
Cand * candtab
Definition blendctrl.h:98
CostMatrix * costmtx
Definition blendctrl.h:97
pastix_int_t * iparm
Definition blendctrl.h:86
The type and structure definitions.
Definition blendctrl.h:28
EliminTree * eTreeBuild(const symbol_matrix_t *)
Build the elimination tree.
Definition elimintree.c:478
void eTreeExit(EliminTree *)
Free the elimination tree structure.
Definition elimintree.c:89
pastix_int_t addcblk
Definition extracblk.h:29
pastix_int_t curcblk
Definition extracblk.h:34
pastix_int_t * sptcbnb
Definition extracblk.h:33
pastix_int_t * sptcblk
Definition extracblk.h:32
static pastix_int_t computeNbSplit(const BlendCtrl *ctrl, pastix_int_t width)
Compute the number of cut for a given cblk width and number of candidates.
Definition splitsymbol.c:79
void extraCblkAdd(ExtraCblk_t *extracblk, pastix_int_t fcolnum, pastix_int_t lcolnum, int8_t selevtx)
Add a new additional cblk defined by its first and last columns.
Definition extracblk.c:187
void extraCblkExit(ExtraCblk_t *extracblk)
Free the extracblk structure.
Definition extracblk.c:154
static pastix_int_t computeSmallestSplit(const pastix_int_t *nblocksperline, pastix_int_t step, pastix_int_t min, pastix_int_t max, pastix_int_t authorized_percent)
Compute the cut that minimizes the generation of off-diagonal blocks.
static pastix_int_t computeConstantSplit(const pastix_int_t *nblocksperline, pastix_int_t step, pastix_int_t min, pastix_int_t max, pastix_int_t authorized_percent)
Compute a constant cut with the given parameters.
void extraCblkMerge(const ExtraCblk_t *extracblk, symbol_matrix_t *newsymb, Cand **candtab)
Merge the existing symbol structure with the additional information from the extracblk structure.
Definition extracblk.c:222
void splitSmart(const BlendCtrl *ctrl, const symbol_matrix_t *symbmtx, ExtraCblk_t *extracblk)
Split the column blocks to generate parallelism.
void extraCblkInit(pastix_int_t cblknbr, ExtraCblk_t *extracblk)
Initialize the extracblk structure.
Definition extracblk.c:127
static pastix_int_t * computeNbBlocksPerLine(const symbol_matrix_t *symbmtx, pastix_int_t frowsplit)
Generate an array with the number of blocks facing each row to minimize the cuts.
static pastix_int_t computeSmallestSplit_max(const pastix_int_t *nblocksperline, pastix_int_t step, pastix_int_t min, pastix_int_t max, pastix_int_t authorized_percent)
Compute the cut that minimizes the generation of off-diagonal blocks.
Extra symbol cblk structure.
Definition extracblk.h:27
@ IPARM_FACTORIZATION
Definition api.h:99
@ IPARM_FLOAT
Definition api.h:149
@ IPARM_VERBOSE
Definition api.h:36
@ PastixVerboseNo
Definition api.h:221
void splitSymbol(BlendCtrl *ctrl, symbol_matrix_t *symbmtx)
Split the column blocks of the symbol matrix to generate parallelism.
pastix_int_t frownum
Definition symbol.h:60
pastix_int_t lrownum
Definition symbol.h:61
pastix_int_t bloknbr
Definition symbol.h:80
symbol_cblk_t * cblktab
Definition symbol.h:83
pastix_int_t bloknum
Definition symbol.h:48
pastix_int_t fcolnum
Definition symbol.h:46
pastix_int_t lcolnum
Definition symbol.h:47
symbol_blok_t * bloktab
Definition symbol.h:84
pastix_int_t nodenbr
Definition symbol.h:81
pastix_int_t cblknbr
Definition symbol.h:79
void pastixSymbolPrintStats(const symbol_matrix_t *symbptr)
Print statistical information about the symbolic matrix structure.
Definition symbol.c:389
int pastixSymbolCheck(const symbol_matrix_t *symbptr)
Checks the consistency of the given symbolic block matrix.
Symbol block structure.
Definition symbol.h:59
Symbol column block structure.
Definition symbol.h:45
Symbol matrix structure.
Definition symbol.h:77
-by-step