PaStiX Handbook  6.3.2
splitsymbol.c
Go to the documentation of this file.
1 /**
2  *
3  * @file splitsymbol.c
4  *
5  * PaStiX simulation task basic functions.
6  *
7  * @copyright 2004-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.3.2
11  * @author Pascal Henon
12  * @author Pierre Ramet
13  * @author Mathieu Faverge
14  * @author Xavier Lacoste
15  * @date 2023-07-21
16  *
17  * @addtogroup blend_dev_split
18  * @{
19  * This module handles the splitting of the existing symbol matrix to generate
20  * more parallelism with finer granularity tasks.
21  *
22  **/
23 #include <stdio.h>
24 #include <string.h>
25 #include <strings.h>
26 #include <assert.h>
27 
28 #include "common.h"
29 #include "cost.h"
30 #include "symbol/symbol.h"
31 #include "elimintree.h"
32 #include "extendVector.h"
33 #include "cand.h"
34 #include "queue.h"
35 #include "blendctrl.h"
36 #include "blend/solver.h"
37 #include "simu.h"
38 
39 #include "extracblk.h"
40 
41 #ifndef DOXYGEN_SHOULD_SKIP_THIS
42 static inline
43 int pastix_blend_with_constant_split() {
44  return pastix_env_is_set_to("PASTIX_BLEND_SPLIT", "CONSTANT");
45 }
46 
47 static inline
48 int pastix_blend_with_smallest_upper_split() {
49  return pastix_env_is_set_to("PASTIX_BLEND_SPLIT", "UPPER");
50 }
51 
52 static inline
53 int pastix_blend_split_percent() {
54  return
55  pastix_getenv_get_value_int( "PASTIX_BLEND_SPLIT_AUTORIZED_PERCENTAGE",
56  10);
57 }
58 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
59 
60 /**
61  *******************************************************************************
62  *
63  * @brief Compute the number of cut for a given cblk width and number of candidates.
64  *
65  *******************************************************************************
66  *
67  * @param[in] ctrl
68  * The blend control structure with the parameters.
69  *
70  * @param[in] width
71  * The width of a cblk to cut.
72  *
73  *******************************************************************************
74  *
75  * @return The number of cut that should be done for this width.
76  *
77  *******************************************************************************/
78 static inline pastix_int_t
80  pastix_int_t width )
81 {
82  pastix_int_t blas_min_col;
83  pastix_int_t blas_max_col;
84  pastix_int_t nseq;
85 
86  /* Compute minimun broadness for splitting this cblk */
87  blas_min_col = ctrl->blcolmin;
88  blas_max_col = ctrl->blcolmax;
89 
90  if( width <= blas_max_col) {
91  return 1;
92  }
93  nseq = pastix_iceil( width, blas_max_col );
94 
95  /* Make sure cblk are at least blas_min_col wide */
96  if ( nseq > 1 && (width / nseq) < blas_min_col ) {
97  nseq--;
98  }
99 
100  assert( nseq > 0 );
101  return nseq;
102 }
103 
104 /**
105  *******************************************************************************
106  *
107  * @brief Generate an array with the number of blocks facing each row to
108  * minimize the cuts.
109  *
110  *******************************************************************************
111  *
112  * @param[in] symbmtx
113  * The symbol matrix structure.
114  *
115  * @param[in] frowsplit
116  * The index of the first row that might be split to avoid computing it
117  * on the full matrix.
118  *
119  *******************************************************************************
120  *
121  * @return The array with the number of block on each row from frowsplit to n.
122  *
123  *******************************************************************************/
124 static inline pastix_int_t *
126  pastix_int_t frowsplit )
127 {
128  symbol_blok_t *curblok;
129  pastix_int_t *nblocksperline;
130  pastix_int_t bloknum, line;
131  pastix_int_t size = symbmtx->nodenbr - frowsplit + 1;
132 
133  /*
134  * Allocate the temporary buffer nblocksperline, nbblocksperline stores the
135  * number of blocks that will be splitted if with split between line i and
136  * i+1.
137  */
138  MALLOC_INTERN( nblocksperline, size, pastix_int_t );
139  memset( nblocksperline, 0, size * sizeof(pastix_int_t) );
140 
141  curblok = symbmtx->bloktab;
142  for(bloknum=0; bloknum<symbmtx->bloknbr; bloknum++, curblok++ )
143  {
144  if ( curblok->lrownum < frowsplit ) {
145  continue;
146  }
147 
148  /*
149  * For each couple of rows (i,i+1) in the block, we increment
150  * the number blocks in regard of the row i
151  */
152  for(line = pastix_imax( curblok->frownum, frowsplit);
153  line < curblok->lrownum; line++ )
154  {
155  nblocksperline[ line-frowsplit ]++;
156  }
157  }
158  assert( nblocksperline[ size-1 ] == 0 );
159 
160  return nblocksperline;
161 }
162 
163 #ifndef DOXYGEN_SHOULD_SKIP_THIS
164 typedef pastix_int_t (*split_fct_t)( const pastix_int_t *,
166 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
167 
168 /**
169  *******************************************************************************
170  *
171  * @brief Compute the cut that minimizes the generation of off-diagonal blocks.
172  *
173  * This function searched first for a smaller block than the initial step to
174  * generate the cut, and then for a larger one that is return only if it is
175  * strictly better than the first solution found.
176  *
177  *******************************************************************************
178  *
179  * @param[in] nblocksperline
180  * Array of size max with the number of blocks in front of the lines [0;max-1]
181  *
182  * @param[in] step
183  * The starting cut to refine. 0 < step
184  *
185  * @param[in] min
186  * TODO
187  *
188  * @param[in] max
189  * The size of the nblocksperline array. nmax > 0.
190  *
191  * @param[in] authorized_percent
192  * The authorized percentage move around the initial cut.
193  *
194  *******************************************************************************
195  *
196  * @return The optimal cut found.
197  *
198  *******************************************************************************/
199 static inline pastix_int_t
200 computeSmallestSplit( const pastix_int_t *nblocksperline,
202  pastix_int_t min,
203  pastix_int_t max,
204  pastix_int_t authorized_percent )
205 {
206  pastix_int_t limit = pastix_iceil( step*authorized_percent, 100 );
207  pastix_int_t i, lcolnum, nbsplit;
208  pastix_int_t lmin, lmax, lavg;
209 
210  if (step >= max) {
211  return max-1;
212  }
213  assert( step > 1 );
214 
215  lavg = step - 1;
216  lmin = pastix_imax( lavg - limit - 1, min );
217  lmax = pastix_imin( lavg + limit + 1, max );
218 
219  lcolnum = lavg;
220  nbsplit = nblocksperline[ lcolnum ];
221 
222  /* Search for the minimal split */
223  for(i=lavg+1; i<lmax; i++ )
224  {
225  if ( nblocksperline[ i ] < nbsplit )
226  {
227  lcolnum = i;
228  nbsplit = nblocksperline[ i ];
229  }
230  }
231  for(i=lavg-1; i>lmin; i-- )
232  {
233  if ( nblocksperline[ i ] < nbsplit )
234  {
235  lcolnum = i;
236  nbsplit = nblocksperline[ i ];
237  }
238  }
239 
240  return lcolnum;
241 }
242 
243 /**
244  *******************************************************************************
245  *
246  * @brief Compute the cut that minimizes the generation of off-diagonal blocks.
247  *
248  * This function searched for the largest cut in the range [step -
249  * authorized_percent, step + authorized_percent] that minimizes the number of
250  * off-diagonal blocks generated.
251  *
252  *******************************************************************************
253  *
254  * @param[in] nblocksperline
255  * Array of size max with the number of blocks in front of the lines [0;max-1]
256  *
257  * @param[in] step
258  * The starting cut to refine. 0 < step
259  *
260  * @param[in] min
261  * TODO
262  *
263  * @param[in] max
264  * The size of the nblocksperline array. nmax > 0.
265  *
266  * @param[in] authorized_percent
267  * The authorized percentage move around the initial cut.
268  *
269  *******************************************************************************
270  *
271  * @return The optimal cut found.
272  *
273  *******************************************************************************/
274 static inline pastix_int_t
275 computeSmallestSplit_max( const pastix_int_t *nblocksperline,
277  pastix_int_t min,
278  pastix_int_t max,
279  pastix_int_t authorized_percent )
280 {
281  pastix_int_t limit = pastix_iceil( step*authorized_percent, 100 );
282  pastix_int_t i, lcolnum, nbsplit;
283  pastix_int_t lmin, lmax, lavg;
284 
285  if ( step >= max ) {
286  return max-1;
287  }
288  assert( step > 1 );
289 
290  lavg = step - 1;
291  lmin = pastix_imax( lavg - limit, min );
292  lmax = pastix_imin( lavg + limit + 1, max );
293 
294  lcolnum = lmin;
295  nbsplit = nblocksperline[ lcolnum ];
296 
297  /* Search for the minimal split */
298  for(i=lmin; i<lmax; i++ )
299  {
300  if ( nblocksperline[ i ] <= nbsplit )
301  {
302  lcolnum = i;
303  nbsplit = nblocksperline[ i ];
304  }
305  }
306 
307  return lcolnum;
308 }
309 
310 /**
311  *******************************************************************************
312  *
313  * @brief Compute a constant cut with the given parameters.
314  *
315  *******************************************************************************
316  *
317  * @param[in] nblocksperline
318  * Array of size max with the number of blocks in front of the lines [0;max-1]
319  *
320  * @param[in] step
321  * The starting cut to refine. 0 < step
322  *
323  * @param[in] min
324  * TODO
325  *
326  * @param[in] max
327  * The size of the nblocksperline array. nmax > 0.
328  *
329  * @param[in] authorized_percent
330  * The authorized percentage move around the initial cut.
331  *
332  *******************************************************************************
333  *
334  * @return The optimal cut found.
335  *
336  *******************************************************************************/
337 static inline pastix_int_t
338 computeConstantSplit( const pastix_int_t *nblocksperline __attribute__((unused)),
340  pastix_int_t min __attribute__((unused)),
341  pastix_int_t max,
342  pastix_int_t authorized_percent __attribute__((unused)) )
343 {
344  if ( step >= max ) {
345  return max-1;
346  }
347  assert( step > 1 );
348  return step-1;
349 }
350 
351 /**
352  *******************************************************************************
353  *
354  * @brief Split the column blocks to generate parallelism
355  *
356  * This algorithm can use three different strategies to cut, two of them tries
357  * to minimize the number of generated off-diagnonal blocks, one with the
358  * minimal size, and the seoncd with teh maximal size in the error percentage
359  * around the initially computed split. The third strategy cuts regularly the
360  * column blocks without paying attention to the facing off-diagonal blocks.
361  *
362  *******************************************************************************
363  *
364  * @param[in] ctrl
365  * The blend control structure.
366  *
367  * @param[in] symbmtx
368  * The symbol matrix structure.
369  *
370  * @param[inout] extracblk
371  * The initialized structure to store the newly created column
372  * blocks. (Blocks are generated later during merge operation, see
373  * extraCblkMerge()).
374  *
375  *******************************************************************************/
376 void
377 splitSmart( const BlendCtrl *ctrl,
378  const symbol_matrix_t *symbmtx,
379  ExtraCblk_t *extracblk )
380 {
381  symbol_cblk_t *curcblk;
382  symbol_blok_t *curblok;
383  pastix_int_t *nblocksperline = NULL;
384  pastix_int_t cblknum, bloknum, line;
385  pastix_int_t fsplitrow = -1;
386  pastix_int_t authorized_percent;
387  split_fct_t split_fct = computeSmallestSplit;
388 
389  if (pastix_blend_with_constant_split()) {
390  split_fct = computeConstantSplit;
391  }
392  else if (pastix_blend_with_smallest_upper_split()) {
393  split_fct = computeSmallestSplit_max;
394  }
395 
396  authorized_percent = pastix_blend_split_percent();
397 
398  curcblk = symbmtx->cblktab;
399  for(cblknum = 0; cblknum<symbmtx->cblknbr; cblknum++, curcblk++)
400  {
401  pastix_int_t fcolnum = curcblk->fcolnum;
402  pastix_int_t lcolnum = curcblk->lcolnum;
403  pastix_int_t step, nseq;
404  pastix_int_t width, lwidth;
405  pastix_int_t fcol, lcol;
406  pastix_int_t nbcblk = 0;
407 
408  if ( curcblk->selevtx == SYMBCBLK_KWAY ) {
409  continue;
410  }
411 
412  /*
413  * Compute the number of cblk to be generated by split,
414  * for instance we choose to split at the maximum
415  */
416  width = lcolnum - fcolnum + 1;
417  nseq = computeNbSplit( ctrl, width );
418  if ( nseq <= 1 ) {
419  continue;
420  }
421 
422  /* Initialize the nbblocksperline array */
423  if ( nblocksperline == NULL ) {
424  assert( fsplitrow == -1 );
425  assert( fcolnum != -1 );
426  fsplitrow = fcolnum;
427  nblocksperline = computeNbBlocksPerLine( symbmtx, fsplitrow );
428  nblocksperline -= fsplitrow;
429  }
430 
431  /* Create the new cblk */
432  fcol = fcolnum;
433  while( fcol <= lcolnum )
434  {
435  /* Adapt the step to the remaining width */
436  nseq = computeNbSplit( ctrl, width );
437  if ( nseq == 1 ) {
438  extraCblkAdd( extracblk, fcol, lcolnum,
439  curcblk->selevtx );
440  nbcblk++;
441  fcol = lcolnum + 1;
442  continue;
443  }
444 
445  step = pastix_iceil( width, nseq );
446  assert( step >= ctrl->blcolmin );
447 
448  /* Make sure we leave enough columns */
449  lwidth = width - ctrl->blcolmin;
450  assert( lwidth > ctrl->blcolmin );
451 
452  lcol = fcol + split_fct( nblocksperline + fcol,
453  step, ctrl->blcolmin, lwidth,
454  authorized_percent );
455 
456  assert( (lcol >= fcol) && (lcol <= lcolnum) );
457 
458  extraCblkAdd( extracblk, fcol, lcol,
459  curcblk->selevtx );
460  nbcblk++;
461 
462  lwidth = lcol - fcol + 1;
463  width = width - lwidth;
464  fcol = lcol + 1;
465  }
466 
467  /*
468  * Mark the cblk as being splitted
469  */
470  extracblk->addcblk += nbcblk-1;
471  extracblk->sptcblk[cblknum] = extracblk->curcblk - nbcblk + 1;
472  extracblk->sptcbnb[cblknum] = nbcblk;
473 
474  /* Update the number of blocks per line */
475  curblok = &(symbmtx->bloktab[curcblk->bloknum + 1]) ;
476  for(bloknum = curcblk[0].bloknum + 1;
477  bloknum < curcblk[1].bloknum; bloknum++, curblok++)
478  {
479  for(line = curblok->frownum; line < curblok->lrownum; line++ )
480  {
481  nblocksperline[ line ] += nbcblk-1;
482  }
483  }
484  }
485 
486  if ( nblocksperline != NULL ) {
487  assert( fsplitrow != -1 );
488  nblocksperline += fsplitrow;
489  memFree_null( nblocksperline );
490  }
491 }
492 /**
493  *@}
494  */
495 
496 /**
497  *******************************************************************************
498  *
499  * @ingroup pastix_blend
500  *
501  * @brief Split the column blocks of the symbol matrix to generate parallelism.
502  *
503  * This is the main function that cut the symbol matrix column blocks, and
504  * return the new symbolMatrix. Cost matrix, elimination tree, and candidate
505  * array are updated on exit of this function with respect to the newly created
506  * column blocks and blocks. See splitSmart() for the cutting algorithm.
507  *
508  *******************************************************************************
509  *
510  * @param[in] ctrl
511  * The blend control structure.
512  * On entry, candtab must be initialized.
513  * On exit, costmtx, etree, and candtab are updated accordinglyy to the
514  * extended symbol matrix, if new cblk are generated.
515  *
516  * @param[in] symbmtx
517  * On entry, the symbol matrix structure to split.
518  * On exit, the new symbol matrix with the new cblk and blocks.
519  *
520  *******************************************************************************/
521 void
523  symbol_matrix_t *symbmtx )
524 {
525  ExtraCblk_t extracblk;
526 
527  /* Init structure to store extra cblks */
528  extraCblkInit( symbmtx->cblknbr, &extracblk );
529 
530  splitSmart( ctrl, symbmtx, &extracblk );
531 
532  /* Rk: addcblk field is not erased by Exit call, so we can freely use it */
533  if ( extracblk.addcblk )
534  {
535  /* Merge the initial matrix and the newly generated cblks */
536  extraCblkMerge( &extracblk, symbmtx, &(ctrl->candtab) );
537  extraCblkExit( &extracblk );
538 
539  /* Check that the generated symbol matrix is correct */
540  if (ctrl->debug) {
541  pastixSymbolCheck(symbmtx);
542  }
543 
544  if ( ctrl->up_after_split ) {
545  /* Update cost matrix to fill-in blank of newly generated blocks */
546  costMatrixExit(ctrl->costmtx);
547  memFree_null(ctrl->costmtx);
548  ctrl->costmtx = costMatrixBuild( symbmtx,
549  ctrl->iparm[IPARM_FLOAT],
550  ctrl->iparm[IPARM_FACTORIZATION] );
551 
552  /* Update elimination tree */
553  if (ctrl->etree != NULL) {
554  eTreeExit(ctrl->etree);
555  }
556  ctrl->etree = eTreeBuild(symbmtx);
557 
558  /*
559  * Let's update cost in the candtab for the proportionnal mapping and
560  * the simulation
561  */
562  candUpdate( ctrl->candtab, ctrl->etree, symbmtx, ctrl->costmtx );
563  }
564  }
565 
566  if ( ctrl->clustnum == 0 ) {
567  if (ctrl->iparm[IPARM_VERBOSE] > PastixVerboseNo) {
568  pastixSymbolPrintStats( symbmtx );
569  }
570  }
571 }
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
void candUpdate(Cand *candtab, EliminTree *etree, const symbol_matrix_t *symbmtx, const CostMatrix *costmtx)
Update the candtab array costs after the symbol split algorithm has been applied.
Definition: cand.c:804
void costMatrixExit(CostMatrix *costmtx)
Free the cost matrix structure.
Definition: cost.c:57
CostMatrix * costMatrixBuild(const symbol_matrix_t *symbmtx, pastix_coeftype_t flttype, pastix_factotype_t factotype)
Build the cost matrix structure from the symbol matrix structure.
Definition: cost.c:91
pastix_int_t up_after_split
Definition: blendctrl.h:55
EliminTree * etree
Definition: blendctrl.h:96
pastix_int_t debug
Definition: blendctrl.h:30
pastix_int_t clustnum
Definition: blendctrl.h:70
pastix_int_t blcolmin
Definition: blendctrl.h:50
pastix_int_t blcolmax
Definition: blendctrl.h:51
Cand * candtab
Definition: blendctrl.h:98
CostMatrix * costmtx
Definition: blendctrl.h:97
pastix_int_t * iparm
Definition: blendctrl.h:86
The type and structure definitions.
Definition: blendctrl.h:28
EliminTree * eTreeBuild(const symbol_matrix_t *)
Build the elimination tree.
Definition: elimintree.c:478
void eTreeExit(EliminTree *)
Free the elimination tree structure.
Definition: elimintree.c:89
pastix_int_t addcblk
Definition: extracblk.h:29
pastix_int_t curcblk
Definition: extracblk.h:34
pastix_int_t * sptcbnb
Definition: extracblk.h:33
pastix_int_t * sptcblk
Definition: extracblk.h:32
static pastix_int_t computeNbSplit(const BlendCtrl *ctrl, pastix_int_t width)
Compute the number of cut for a given cblk width and number of candidates.
Definition: splitsymbol.c:79
void extraCblkAdd(ExtraCblk_t *extracblk, pastix_int_t fcolnum, pastix_int_t lcolnum, int8_t selevtx)
Add a new additional cblk defined by its first and last columns.
Definition: extracblk.c:187
void extraCblkExit(ExtraCblk_t *extracblk)
Free the extracblk structure.
Definition: extracblk.c:154
static pastix_int_t computeSmallestSplit(const pastix_int_t *nblocksperline, pastix_int_t step, pastix_int_t min, pastix_int_t max, pastix_int_t authorized_percent)
Compute the cut that minimizes the generation of off-diagonal blocks.
Definition: splitsymbol.c:200
static pastix_int_t computeConstantSplit(const pastix_int_t *nblocksperline, pastix_int_t step, pastix_int_t min, pastix_int_t max, pastix_int_t authorized_percent)
Compute a constant cut with the given parameters.
Definition: splitsymbol.c:338
void extraCblkMerge(const ExtraCblk_t *extracblk, symbol_matrix_t *newsymb, Cand **candtab)
Merge the existing symbol structure with the additional information from the extracblk structure.
Definition: extracblk.c:222
void splitSmart(const BlendCtrl *ctrl, const symbol_matrix_t *symbmtx, ExtraCblk_t *extracblk)
Split the column blocks to generate parallelism.
Definition: splitsymbol.c:377
void extraCblkInit(pastix_int_t cblknbr, ExtraCblk_t *extracblk)
Initialize the extracblk structure.
Definition: extracblk.c:127
static pastix_int_t * computeNbBlocksPerLine(const symbol_matrix_t *symbmtx, pastix_int_t frowsplit)
Generate an array with the number of blocks facing each row to minimize the cuts.
Definition: splitsymbol.c:125
static pastix_int_t computeSmallestSplit_max(const pastix_int_t *nblocksperline, pastix_int_t step, pastix_int_t min, pastix_int_t max, pastix_int_t authorized_percent)
Compute the cut that minimizes the generation of off-diagonal blocks.
Definition: splitsymbol.c:275
Extra symbol cblk structure.
Definition: extracblk.h:27
@ IPARM_FACTORIZATION
Definition: api.h:99
@ IPARM_FLOAT
Definition: api.h:149
@ IPARM_VERBOSE
Definition: api.h:36
@ PastixVerboseNo
Definition: api.h:221
void splitSymbol(BlendCtrl *ctrl, symbol_matrix_t *symbmtx)
Split the column blocks of the symbol matrix to generate parallelism.
Definition: splitsymbol.c:522
pastix_int_t frownum
Definition: symbol.h:60
pastix_int_t lrownum
Definition: symbol.h:61
pastix_int_t bloknbr
Definition: symbol.h:80
symbol_cblk_t * cblktab
Definition: symbol.h:83
pastix_int_t bloknum
Definition: symbol.h:48
pastix_int_t fcolnum
Definition: symbol.h:46
pastix_int_t lcolnum
Definition: symbol.h:47
symbol_blok_t * bloktab
Definition: symbol.h:84
pastix_int_t nodenbr
Definition: symbol.h:81
pastix_int_t cblknbr
Definition: symbol.h:79
void pastixSymbolPrintStats(const symbol_matrix_t *symbptr)
Print statistical information about the symbolic matrix structure.
Definition: symbol.c:389
int pastixSymbolCheck(const symbol_matrix_t *symbptr)
Checks the consistency of the given symbolic block matrix.
Definition: symbol_check.c:47
Symbol block structure.
Definition: symbol.h:59
Symbol column block structure.
Definition: symbol.h:45
Symbol matrix structure.
Definition: symbol.h:77
-by-step