PaStiX Handbook  6.4.0
fax_csr_amalgamate.c
Go to the documentation of this file.
1 /**
2  *
3  * @file fax_csr_amalgamate.c
4  *
5  * This file contains the routines to amalgamate a given graph with a fill-in
6  * ratio used in the amalgamation algorithm.
7  *
8  * @copyright 2004-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
9  * Univ. Bordeaux. All rights reserved.
10  *
11  * @version 6.4.0
12  * @author Pascal Henon
13  * @author Mathieu Faverge
14  * @author Gregoire Pichon
15  * @date 2024-07-05
16  *
17  * @addtogroup symbol_dev_csr
18  * @{
19  **/
20 #include "common.h"
21 #include "pastix/order.h"
22 #include "symbol/fax_csr.h"
23 #include "kernels/queue.h"
24 #include "blend/perf.h"
25 
26 /**
27  * @brief Minimal memory increase accepted by the amalgamation algorithm (2%)
28  */
29 #define RAT_CBLK 0.02
30 
31 /**
32  * @brief Define an infinite time
33  */
34 #define INFINI 1e9
35 
36 /**
37  *******************************************************************************
38  *
39  * @brief Compute a time estimation cost of the factorization
40  *
41  * This function is used during the amalgamation algorithm to decide whether it
42  * is more interesting to amalgamate the blocks or to keep them separated in
43  * terms of performance cost
44  *
45  *******************************************************************************
46  *
47  * @param[in] n
48  * The number of rows in the column-block
49  *
50  * @param[in] ja
51  * Array of size n that holds the indexes of the rows in the given
52  * column block
53  *
54  * @param[in] colnbr
55  * The number of columns in the column-block
56  *
57  *******************************************************************************
58  *
59  * @return The estimated time in second to factorize the column-block and apply
60  * the resulting updates.
61  *
62  *******************************************************************************/
63 static inline double
65 {
66  /*******************************************/
67  /* Compute the time to compute a cblk */
68  /* according to the BLAS modelization */
69  /*******************************************/
70  double cost;
71  pastix_int_t i;
72  pastix_int_t L, G, H;
73 
74  L = colnbr; /* Size of diagonal block */
75  G = n - L; /* Number of extra-diagonal rows */
76 
77  cost = PERF_POTRF( L ) + PERF_TRSM( L, G );
78 
79  /** Contributions **/
80  i = colnbr;
81  while ( i < n ) {
82  H = 1;
83  i++;
84  while ( ( i < n ) && ( ja[i] == ja[i - 1] + 1 ) ) {
85  i++;
86  H++;
87  }
88  cost += (double)( PERF_GEMM( G, H, L ) );
89  G -= H;
90  }
91  return cost;
92 }
93 
94 /**
95  *******************************************************************************
96  *
97  * @brief Compute a time estimation cost of the solve step
98  *
99  * This function is used during the amalgamation algorithm to decide whether it
100  * is more interesting to amalgamate the blocks or to keep them separated in
101  * terms of performance cost when doing incomplete factorization.
102  *
103  *******************************************************************************
104  *
105  * @param[in] n
106  * The number of rows in the column-block
107  *
108  * @param[in] ja
109  * Array of size n that holds the indexes of the rows in the given
110  * column block
111  *
112  * @param[in] colnbr
113  * The number of columns in the column-block
114  *
115  *******************************************************************************
116  *
117  * @return The estimated time in second to apply the solve step issued from this
118  * column-block
119  *
120  *******************************************************************************/
121 static inline double
123 {
124  double cost;
125  pastix_int_t L;
126  (void)ja;
127 
128  L = colnbr;
129 
130  cost = PERF_TRSV( L ) + PERF_GEMV( L, n - L );
131  return cost;
132 }
133 
134 /**
135  *******************************************************************************
136  *
137  * @brief Return the list of sons of a given node.
138  *
139  *******************************************************************************
140  *
141  * @param[in] node
142  * Node from which the list is wanted.
143  *
144  * @param[in] sonindex
145  * Integer array of the indexes of the first son of each node in sontab
146  * array.
147  *
148  * @param[in] sontab
149  * Integer array which contains the list of sons for each node.
150  *
151  * @param[in] colweight
152  * Integer array of the weight of each node.
153  *
154  * @param[out] list
155  * Integer array that can contains the list of sons, should be of size
156  * the number of unknowns to prevent overflow.
157  * On exit, contains the list of sons of node.
158  *
159  *******************************************************************************
160  *
161  * @return The number of sons of node.
162  *
163  *******************************************************************************/
164 static inline pastix_int_t
166  const pastix_int_t *sonindex,
167  const pastix_int_t *sontab,
168  const pastix_int_t *colweight,
169  pastix_int_t *list )
170 {
171  pastix_int_t i, s;
172  pastix_int_t ind;
173  ind = 0;
174  for ( i = sonindex[node]; i < sonindex[node + 1]; i++ ) {
175  s = sontab[i];
176  assert( s != -1 );
177  if ( colweight[s] <= 0 ) {
178  ind += amalgamate_get_sonslist( s, sonindex, sontab, colweight, list + ind );
179  }
180  else {
181  list[ind++] = s;
182  }
183  }
184  return ind;
185 }
186 
187 /**
188  *******************************************************************************
189  *
190  * @brief Compute the cost of merging two nodes a and b together. The additional
191  * cost is returned.
192  *
193  *******************************************************************************
194  *
195  * @param[in] a
196  * First node to merge. a >= 0.
197  *
198  * @param[in] b
199  * Second node to merge. b >= 0.
200  *
201  * @param[in] P
202  * The graph that contains a and b nodes with their associated rows.
203  *
204  * @param[in] colweight
205  * Integer array of size P->n with the weight of each node.
206  *
207  *******************************************************************************
208  *
209  * @return The additional non zero entries required if both nodes are merged
210  * together.
211  *
212  *******************************************************************************/
213 static inline pastix_int_t
215  pastix_int_t b,
216  const fax_csr_t * P,
217  const pastix_int_t *colweight )
218 {
219  pastix_int_t ia, na, ib, nb;
220  pastix_int_t *ja, *jb;
221  pastix_int_t cost, costa, costb;
222 
223  ja = P->rows[a];
224  jb = P->rows[b];
225  na = P->nnz[a];
226  nb = P->nnz[b];
227  costa = colweight[a];
228  costb = colweight[b];
229 
230  ia = ib = 0;
231 
232  /* The diagonal elements of row a does not create fill-in */
233  while ( ( ia < na ) && ( ja[ia] < jb[0] ) )
234  ia++;
235 
236  cost = 0;
237  while ( ( ia < na ) && ( ib < nb ) ) {
238  if ( ja[ia] < jb[ib] ) {
239  cost += costb;
240  ia++;
241  continue;
242  }
243  if ( ja[ia] > jb[ib] ) {
244  cost += costa;
245  ib++;
246  continue;
247  }
248 
249  assert( ja[ia] == jb[ib] );
250  ia++;
251  ib++;
252  }
253 
254  cost += costb * ( na - ia );
255  cost += costa * ( nb - ib );
256 
257  assert( cost >= 0 );
258  return cost;
259 }
260 
261 /**
262  *******************************************************************************
263  *
264  * @brief Computes the computational gain of merging two nodes a and b together.
265  *
266  * It uses the given cost function cblktime to estimate the cost with and
267  * without merge and returns the cost variation.
268  *
269  *******************************************************************************
270  *
271  * @param[in] a
272  * First node to merge. a >= 0.
273  *
274  * @param[in] b
275  * Second node to merge. b >= 0.
276  *
277  * @param[in] P
278  * The graph that contains a and b nodes with their associated rows.
279  *
280  * @param[in] colweight
281  * Integer array of size P->n with the weight of each node.
282  *
283  * @param[inout] tmp
284  * Workspace array to compute the union of the rows associated to both nodes.
285  *
286  * @param[in] cblktime
287  * Function that returns the computational cost of a cblk.
288  *
289  *******************************************************************************
290  *
291  * @return The cost variation between merging the two nodes together vs
292  * keeping them separated according to the cblktime function.
293  *
294  *******************************************************************************/
295 static inline double
297  pastix_int_t b,
298  const fax_csr_t *P,
299  const pastix_int_t *colweight,
300  pastix_int_t *tmp,
301  double ( *cblktime )( pastix_int_t, const pastix_int_t *, pastix_int_t ) )
302 {
303  double costa, costb, costm;
304  pastix_int_t nm;
305 
306  costa = cblktime( P->nnz[a], P->rows[a], colweight[a] );
307  costb = cblktime( P->nnz[b], P->rows[b], colweight[b] );
308 
309  nm = pastix_intset_union( P->nnz[a], P->rows[a], P->nnz[b], P->rows[b], tmp );
310 
311  costm = cblktime( nm, tmp, colweight[a] + colweight[b] );
312 
313  return costm - costa - costb;
314 }
315 
316 /**
317  *******************************************************************************
318  *
319  * @brief Merge two nodes together withing the given graph.
320  *
321  *******************************************************************************
322  *
323  * @param[in] a
324  * First node to merge. a >= 0.
325  *
326  * @param[in] b
327  * Second node to merge. b >= 0.
328  *
329  * @param[inout] P
330  * The graph that contains a and b nodes with their associated rows.
331  * On exit, a is merged into b node.
332  *
333  * @param[inout] tmp
334  * Workspace array to compute the union of the rows associated to both
335  * nodes.
336  *
337  *******************************************************************************/
338 static inline void
340 {
341  pastix_int_t n;
342 
343  n = pastix_intset_union( P->nnz[a], P->rows[a], P->nnz[b], P->rows[b], tmp );
344 
345  if ( P->rows[a] != NULL ) {
346  P->nnz[a] = 0;
347  memFree( P->rows[a] );
348  }
349 
350  if ( P->rows[b] != NULL ) {
351  memFree( P->rows[b] );
352  }
353 
354  P->nnz[b] = n;
355 
356  MALLOC_INTERN( P->rows[b], n, pastix_int_t );
357  memcpy( P->rows[b], tmp, n * sizeof( pastix_int_t ) );
358 }
359 
360 /**
361  *******************************************************************************
362  *
363  * @brief Amalgamate the given graph up to a given tolerance.
364  *
365  * This function takes a supernode graph and performs amalgamation
366  * of the columns until a fill tolerance is reached. Two level of fill tolerance
367  * are available, the first one, rat_cblk, defines a fill tolerance based on the
368  * graph structure only, and the second one, rat_blas, allows to keep amalgamate
369  * the structure until the higher ratio while it improves the BLAS performance
370  * based on the internal cost model.
371  *
372  *******************************************************************************
373  *
374  * @param[in] ilu
375  * - 1: amalgamation for incomplete factorization will be performed.
376  * - 0: amalgamation for direct factorization will be performed.
377  *
378  * @param[in] rat_cblk
379  * Percentage of fill-in allowed using structure only
380  * amalgamation. Must be >= 0.
381  *
382  * @param[in] rat_blas
383  * Percentage of fill-in allowed using structure BLAS performance
384  * model. rat_blas >= rat_cblk.
385  * The ratio is always on the number of non zero entries of the
386  * pattern, but the criterion to merge columns is based only on reducing
387  * the cost of computations. The cost function used is a model for
388  * Cholesky factorization or solve in double precision, other
389  * factorization and/or precision are not implemented. They are
390  * considered to be similar up to a factor, and so doesn't affect the
391  * amalgamation.
392  * The solve cost is used for ILU(k) factorization and the
393  * factorization cost for direct factorization.
394  *
395  * @param[inout] graphL
396  * The graph of the matrix L to amalgamate. On exit, the compressed
397  * graph is returned (not the quotient graph !!)
398  *
399  * @param[inout] order
400  * On entry the initial ordering structure associated to L.
401  * On exit, the update ordering structure with the amalgamation of close nodes.
402  *
403  * @param[in] pastix_comm
404  * MPI communicator. Used only for printf in this function.
405  *
406  *******************************************************************************/
407 void
409  double rat_cblk,
410  double rat_blas,
411  fax_csr_t *graphL,
412  pastix_order_t *order,
413  PASTIX_Comm pastix_comm )
414 {
415  double ( *cblktime )( pastix_int_t, const pastix_int_t *, pastix_int_t );
416 
417  pastix_int_t *nnzadd = NULL;
418  pastix_int_t *sonindex = NULL;
419  pastix_int_t *sontab = NULL;
420  pastix_int_t *tmp = NULL;
421  pastix_int_t *tmp2 = NULL;
422  pastix_int_t *newnum = NULL;
423  pastix_int_t *colweight = NULL;
424 
425  pastix_int_t oldcblknbr = order->cblknbr;
426  const pastix_int_t *oldrangtab = order->rangtab;
427  pastix_int_t *oldtreetab = order->treetab;
428  const pastix_int_t *oldperitab = order->peritab;
429  pastix_int_t *newrangtab;
430  pastix_int_t *newtreetab;
431  pastix_int_t *newperitab;
432 
433  pastix_int_t i, j, k, ind, nnzL;
434  pastix_int_t father, newfather;
435  pastix_int_t n, nn, nbcblk_merged;
436  pastix_int_t fillwhile, fillcblk, fillblas, fill;
437 
438  double *gain = NULL;
439  double key;
440  pastix_queue_t heap;
441  int blas_gain = 0;
442  int procnum;
443 
444  (void)pastix_comm;
445 
446  MPI_Comm_rank( pastix_comm, &procnum );
447 
448  /* If ilu(k), we compute fill-in on solve functions, otherwise we take factorization */
449  if ( ilu ) {
450  cblktime = cblk_time_solve;
451  }
452  else {
453  cblktime = cblk_time_fact;
454  }
455 
456  /*
457  * If rat is negative, then the amalgamation is computed first with nnz
458  * structure up to rat_cblk ratio and then an extra amalgamation is performed
459  * as long as it improves the BLAS performance, and stay under the (-rat)
460  * ratio.
461  */
462  if ( rat_blas < 0 ) {
463  rat_blas = -rat_blas;
464  rat_cblk = MIN( rat_blas, RAT_CBLK );
465  }
466 
467  /*
468  * We always start by the structural amalgamation and then we start the
469  * almalgamation leaded by the reduction of the blas cost
470  */
471  blas_gain = 0;
472 
473  /*
474  * Compute the initial weight of each cblk
475  */
476  n = graphL->n;
477  MALLOC_INTERN( colweight, n, pastix_int_t );
478 
479  assert( graphL->n == oldcblknbr );
480  for ( i = 0; i < n; i++ ) {
481  colweight[i] = oldrangtab[i + 1] - oldrangtab[i];
482  assert( ( graphL->nnz[i] >= colweight[i] ) && ( colweight[i] > 0 ) );
483 
484 #if !defined( NDEBUG ) && defined( PASTIX_DEBUG_SYMBOL )
485  {
486  pastix_int_t j;
487  /* Check that the first elements of graphL are those from the supernode (the
488  * diagonal elements) */
489  for ( j = 0; j < n; j++ ) {
490  pastix_int_t l, k = 0;
491  for ( l = oldrangtab[j]; l < oldrangtab[j + 1]; l++ ) {
492  assert( graphL->rows[j][k] == l );
493  k++;
494  }
495  }
496  }
497 #endif
498  }
499 
500  /** Number of unknowns **/
501  nn = oldrangtab[oldcblknbr];
502  assert( nn >= n );
503 
504  /**********************************************/
505  /*** Compute the maximum extra fill allowed ***/
506  /**********************************************/
507  /*
508  * - fillblas is the limit of nnz added during the whole amalgamation
509  * process
510  * - fillcblk is the limit of nnz added under the amalgamation process that
511  * tries to reduce the number of supernode
512  * - Between fillcblk and fillblas the amalgamation tries to reduce the time
513  * of the solve or factorization time
514  */
515  nnzL = graphL->total_nnz;
516  fillcblk = (pastix_int_t)lround( (double)nnzL * rat_cblk );
517  fillblas = (pastix_int_t)lround( (double)nnzL * rat_blas );
518  fillwhile = fillcblk;
519 
520 #if defined( PASTIX_DEBUG_SYMBOL )
521  {
522  double key = 0.0;
523 
524  pastix_print( procnum, 0, "fillcblk %ld fillmax %ld \n", (long)fillcblk, (long)fillblas );
525 
526  for ( i = 0; i < graphL->n; i++ ) {
527  key += cblktime( graphL->nnz[i], graphL->rows[i], colweight[i] );
528  }
529  pastix_print( procnum, 0, "COST of the NON AMALGAMATED MATRIX = %e\n", key );
530  }
531 #endif
532 
533  MALLOC_INTERN( tmp, nn, pastix_int_t );
534 
535  /**********************************************/
536  /* Compute the list of sons of each supernode */
537  /**********************************************/
538  {
539  pastix_int_t nbsons = 0;
540 
541  /* Compute the number of sons of each cblk */
542  memset( tmp, 0, n * sizeof( pastix_int_t ) );
543  for ( i = 0; i < n - 1; i++ ) {
544  if ( oldtreetab[i] >= 0 ) { /** IF THIS SNODE IS NOT A ROOT **/
545  tmp[oldtreetab[i]]++;
546  nbsons++;
547  }
548  }
549  assert( nbsons < n );
550 
551  MALLOC_INTERN( sonindex, n + 1, pastix_int_t );
552  MALLOC_INTERN( sontab, nbsons, pastix_int_t );
553 #ifndef NDEBUG
554  memset( sontab, 0xff, nbsons * sizeof(pastix_int_t) );
555 #endif
556 
557  /* Create the sonindex array */
558  sonindex[0] = 0;
559  for ( i = 0; i < n; i++ ) {
560  sonindex[i + 1] = sonindex[i] + tmp[i];
561  }
562 
563  /* Fill the sontab array */
564  memcpy( tmp, sonindex, n * sizeof( pastix_int_t ) );
565  for ( i = 0; i < n - 1; i++ ) {
566  pastix_int_t father = oldtreetab[i];
567  assert( ( father != i ) && ( father < n ) );
568  if ( father >= 0 ) /** IF THIS SNODE IS NOT A ROOT **/
569  {
570  assert( sontab[tmp[father]] == -1 );
571  sontab[tmp[father]] = i;
572  tmp[father]++;
573  assert( tmp[father] <= sonindex[father + 1] );
574  }
575  }
576  }
577 
578  /****************************************************************/
579  /* Compute the fill to merge a column of graphL with its father */
580  /****************************************************************/
581  MALLOC_INTERN( nnzadd, n, pastix_int_t );
582  MALLOC_INTERN( gain, n, double );
583  for ( i = 0; i < n; i++ ) {
584  father = oldtreetab[i];
585  if ( ( father == -1 ) || ( father == i ) ) {
586  nnzadd[i] = INFINI;
587  gain[i] = INFINI;
588  continue;
589  }
590 
591  nnzadd[i] = amalgamate_merge_cost( i, father, graphL, colweight );
592  gain[i] = (double)( nnzadd[i] );
593  }
594 
595  /*****************************************************/
596  /** Merge all the columns so it doesn't add fill-in **/
597  /*****************************************************/
598  nbcblk_merged = 0;
599  for ( i = 0; i < n; i++ ) {
600  assert( colweight[i] > 0 );
601 
602  if ( ( colweight[i] != 0 ) && ( nnzadd[i] == 0 ) ) {
603  father = oldtreetab[i];
604  assert( ( father > 0 ) && ( father != i ) );
605 
606  nbcblk_merged++;
607 
608  /* Merge the node i and its father **/
609  amalgamate_merge_col( i, father, graphL, tmp );
610 
611  /* Update cost of each node */
612  colweight[father] += colweight[i];
613  colweight[i] = 0;
614 
615  /* Update the nnzadd and gain array for the father */
616  k = oldtreetab[father];
617  if ( ( k != -1 ) && ( k != father ) ) {
618  nnzadd[father] = amalgamate_merge_cost( father, k, graphL, colweight );
619  gain[father] = (double)( nnzadd[father] );
620  }
621 
622  /* Update the list of sons of i now */
623  ind = amalgamate_get_sonslist( i, sonindex, sontab, colweight, tmp );
624  for ( j = 0; j < ind; j++ ) {
625  k = tmp[j];
626  assert( k != father );
627  oldtreetab[k] = father;
628  nnzadd[k] = amalgamate_merge_cost( k, father, graphL, colweight );
629  gain[k] = (double)( nnzadd[k] );
630  }
631  }
632  }
633  if ( 0 ) {
634  pastix_print( procnum,
635  0,
636  "Number of cblk after amalgamation initialization = %ld (reduced by %ld)\n",
637  (long)( n - nbcblk_merged ),
638  (long)nbcblk_merged );
639  }
640 
641  /* Initialize the first round with column sorted by nnzadd */
642  pqueueInit( &heap, ( n - nbcblk_merged ) );
643  for ( i = 0; i < n; i++ ) {
644  if ( ( colweight[i] > 0 ) && ( oldtreetab[i] > 0 ) && ( oldtreetab[i] != i ) ) {
645  pqueuePush1( &heap, i, gain[i] );
646  }
647  }
648 
649  /*********************************************/
650  /* Merge supernodes until we reach fillwhile */
651  /*********************************************/
652 restart:
653 
654  /* Merge supernodes untill we reach the fillwhile limit */
655  fill = 0.0;
656  while ( ( pqueueSize( &heap ) > 0 ) && ( fill < fillwhile ) ) {
657  i = pqueuePop1( &heap, &key );
658 
659  /* If we pop an old version of i, let's skip it */
660  if ( ( gain[i] != key ) || ( colweight[i] <= 0 ) )
661  continue;
662 
663  /* check if we reached the fillwhile */
664  if ( ( fill + nnzadd[i] ) > fillwhile )
665  continue;
666  else
667  fill += nnzadd[i];
668 
669  nbcblk_merged++;
670  father = oldtreetab[i];
671  assert( ( father > 0 ) && ( father != i ) );
672  assert( colweight[father] > 0 );
673 
674  /* Merge the node i and its father **/
675  amalgamate_merge_col( i, father, graphL, tmp );
676 
677  /* Update cost of each node */
678  colweight[father] += colweight[i];
679  colweight[i] = 0;
680 
681  /* Update the nnzadd and gain array for the father */
682  k = oldtreetab[father];
683  if ( ( k != -1 ) && ( k != father ) ) {
684  nnzadd[father] = amalgamate_merge_cost( father, k, graphL, colweight );
685  if ( blas_gain == 1 ) {
686  gain[father] =
687  amalgamate_merge_gain( father, k, graphL, colweight, tmp2, cblktime ) /
688  nnzadd[father];
689  if ( gain[father] <= 0 )
690  pqueuePush1( &heap, father, gain[father] );
691  }
692  else {
693  gain[father] = (double)( nnzadd[father] );
694  pqueuePush1( &heap, father, gain[father] );
695  }
696  }
697 
698  /* Update the list of sons of i now */
699  ind = amalgamate_get_sonslist( i, sonindex, sontab, colweight, tmp );
700  for ( j = 0; j < ind; j++ ) {
701  k = tmp[j];
702  assert( k != father );
703  oldtreetab[k] = father;
704  nnzadd[k] = amalgamate_merge_cost( k, father, graphL, colweight );
705 
706  if ( blas_gain == 1 ) {
707  gain[k] = amalgamate_merge_gain( k, father, graphL, colweight, tmp2, cblktime ) /
708  nnzadd[k];
709  if ( gain[k] <= 0 )
710  pqueuePush1( &heap, k, gain[k] );
711  }
712  else {
713  gain[k] = (double)( nnzadd[k] );
714  pqueuePush1( &heap, k, gain[k] );
715  }
716  }
717  }
718 
719  if ( 0 ) {
720  pastix_print( procnum,
721  0,
722  "Number of cblk after amalgamation phase = %ld (reduced by %ld)\n",
723  (long)( n - nbcblk_merged ),
724  (long)nbcblk_merged );
725  }
726  pqueueExit( &heap );
727  if ( fillwhile < fillblas ) {
728  /* Now the gain of amalgamation is based on the BLAS model */
729  assert( blas_gain == 0 );
730 
731  blas_gain = 1;
732  fillwhile = fillblas;
733 
734  /* Recompute the gain using BLAS model to restart the second round */
735  pqueueInit( &heap, ( n - nbcblk_merged ) );
736  for ( i = 0; i < n; i++ ) {
737  father = oldtreetab[i];
738  if ( father == -1 || father == i ) {
739  gain[i] = INFINI;
740  continue;
741  }
742  gain[i] =
743  amalgamate_merge_gain( i, father, graphL, colweight, tmp, cblktime ) / nnzadd[i];
744 
745  if ( ( colweight[i] > 0 ) && ( oldtreetab[i] > 0 ) && ( oldtreetab[i] != i ) &&
746  ( gain[i] <= 0. ) ) {
747  pqueuePush1( &heap, i, gain[i] );
748  }
749  }
750 
751  /* Allocate second temporary workspace for amalgamate_merge_gain */
752  MALLOC_INTERN( tmp2, nn, pastix_int_t );
753  goto restart;
754  }
755 
756  if ( tmp2 != NULL ) {
757  memFree( tmp2 );
758  }
759  memFree( nnzadd );
760  memFree( gain );
761 
762  /********************************/
763  /* Compute the new partition */
764  /********************************/
765 
766  /* Create the new numbering array of the comptacted supernodes (stored in
767  * tmp) and compute the size of each supernode */
768  newnum = tmp;
769  memset( newnum, 0, n * sizeof( pastix_int_t ) );
770 
771  order->cblknbr = n - nbcblk_merged;
772  MALLOC_INTERN( order->rangtab, order->cblknbr + 1, pastix_int_t );
773  MALLOC_INTERN( order->treetab, order->cblknbr, pastix_int_t );
774  MALLOC_INTERN( order->peritab, order->vertnbr, pastix_int_t );
775  memset( order->rangtab, 0, ( order->cblknbr + 1 ) * sizeof( pastix_int_t ) );
776  memset( order->treetab, 0xff, ( order->cblknbr ) * sizeof( pastix_int_t ) );
777  memset( order->peritab, 0xff, ( order->vertnbr ) * sizeof( pastix_int_t ) );
778 
779  /* Backup former peritab */
780  newrangtab = order->rangtab;
781  newtreetab = order->treetab;
782  newperitab = order->peritab;
783 
784  k = 0;
785  for ( i = 0; i < n; i++ ) {
786  if ( colweight[i] > 0 ) {
787  newnum[i] = k++;
788  newrangtab[k] = colweight[i];
789  }
790  }
791  assert( k == order->cblknbr );
792 
793  /* Create the newrangtab */
794  for ( i = 0; i < order->cblknbr; i++ ) {
795  newrangtab[i + 1] += newrangtab[i];
796  }
797 
798  /* Create the invp vector to adapt to the new partition */
799  for ( i = 0; i < n; i++ ) {
800  father = i;
801  if ( colweight[i] <= 0 ) {
802  /* find the cblk this node is in */
803  while ( colweight[father] <= 0 ) {
804  father = oldtreetab[father];
805  assert( father > 0 );
806  }
807  }
808 
809  newfather = newnum[father];
810 
811  /* Update inverse permutation */
812  for ( j = oldrangtab[i]; j < oldrangtab[i + 1]; j++ ) {
813  newperitab[newrangtab[newfather]++] = oldperitab[j];
814  }
815 
816  /* Update elimination tree */
817  if ( oldtreetab[father] != -1 ) {
818  newtreetab[newfather] = newnum[oldtreetab[father]];
819  }
820  else {
821  newtreetab[newfather] = -1;
822  }
823  }
824 
825  /* Reset newrangtab to its real value */
826  for ( i = order->cblknbr; i > 0; i-- ) {
827  newrangtab[i] = newrangtab[i - 1];
828  }
829  newrangtab[0] = 0;
830 
831  /* Update the permutation */
832  for ( i = 0; i < order->vertnbr; i++ ) {
833  order->permtab[newperitab[i]] = i;
834  }
835 
836 #if defined( PASTIX_DEBUG_SYMBOL )
837  {
838  double key = 0.0;
839 
840  /* Compact the graph to remove merged nodes */
841  faxCSRCompact( graphL );
842  assert( graphL->n == order->cblknbr );
843 
844  /* Apply the new permutation to graphL */
845  for ( i = 0; i < graphL->n; i++ ) {
846  pastix_int_t *ja;
847  ja = graphL->rows[i];
848  for ( j = 0; j < graphL->nnz[i]; j++ ) {
849  ja[j] = order->permtab[ja[j]];
850  }
851  }
852 
853  for ( i = 0; i < graphL->n; i++ ) {
854  key += cblktime( graphL->nnz[i], graphL->rows[i], colweight[i] );
855  }
856  pastix_print( procnum, 0, "COST of the AMALGAMATED MATRIX = %e\n", key );
857  }
858 #endif
859 
860  memFree( oldrangtab );
861  memFree( oldtreetab );
862  memFree( oldperitab );
863 
864  memFree( tmp );
865  memFree( sonindex );
866  memFree( sontab );
867  memFree( colweight );
868 
869 #if !defined( NDEBUG )
870  assert( pastixOrderCheck( order ) == PASTIX_SUCCESS );
871 #endif
872 }
873 
874 /**
875  * @}
876  */
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
static void pqueuePush1(pastix_queue_t *q, pastix_int_t elt, double key1)
Push an element with a single key.
Definition: queue.h:64
void pqueueExit(pastix_queue_t *)
Free the structure associated to the queue.
Definition: queue.c:110
pastix_int_t pqueueSize(const pastix_queue_t *)
Return the size of the queue.
Definition: queue.c:135
static pastix_int_t pqueuePop1(pastix_queue_t *q, double *key1)
Pop the head of the queue and get the associated first key.
Definition: queue.h:88
int pqueueInit(pastix_queue_t *, pastix_int_t)
Initialize the queue structure with an initial space to store the elements.
Definition: queue.c:81
Queue structure.
Definition: queue.h:38
@ PASTIX_SUCCESS
Definition: api.h:367
pastix_int_t * treetab
Definition: order.h:54
pastix_int_t * permtab
Definition: order.h:51
pastix_int_t * peritab
Definition: order.h:52
pastix_int_t cblknbr
Definition: order.h:50
pastix_int_t * rangtab
Definition: order.h:53
pastix_int_t vertnbr
Definition: order.h:49
int pastixOrderCheck(const pastix_order_t *ordeptr)
This routine checks the correctness of the ordering structure.
Definition: order_check.c:39
Order structure.
Definition: order.h:47
static pastix_int_t amalgamate_get_sonslist(pastix_int_t node, const pastix_int_t *sonindex, const pastix_int_t *sontab, const pastix_int_t *colweight, pastix_int_t *list)
Return the list of sons of a given node.
#define INFINI
Define an infinite time.
#define RAT_CBLK
Minimal memory increase accepted by the amalgamation algorithm (2%)
static void amalgamate_merge_col(pastix_int_t a, pastix_int_t b, fax_csr_t *P, pastix_int_t *tmp)
Merge two nodes together withing the given graph.
static double cblk_time_solve(pastix_int_t n, const pastix_int_t *ja, pastix_int_t colnbr)
Compute a time estimation cost of the solve step.
static pastix_int_t amalgamate_merge_cost(pastix_int_t a, pastix_int_t b, const fax_csr_t *P, const pastix_int_t *colweight)
Compute the cost of merging two nodes a and b together. The additional cost is returned.
void faxCSRAmalgamate(int ilu, double rat_cblk, double rat_blas, fax_csr_t *graphL, pastix_order_t *order, PASTIX_Comm pastix_comm)
Amalgamate the given graph up to a given tolerance.
void faxCSRCompact(fax_csr_t *csr)
Compact a compressed graph.
Definition: fax_csr.c:129
static double amalgamate_merge_gain(pastix_int_t a, pastix_int_t b, const fax_csr_t *P, const pastix_int_t *colweight, pastix_int_t *tmp, double(*cblktime)(pastix_int_t, const pastix_int_t *, pastix_int_t))
Computes the computational gain of merging two nodes a and b together.
static double cblk_time_fact(pastix_int_t n, const pastix_int_t *ja, pastix_int_t colnbr)
Compute a time estimation cost of the factorization.
Fax blocked csr structure.
Definition: fax_csr.h:25
static double cost(symbol_cblk_t *cblk)
Computes the cost of a cblk.