PaStiX Handbook 6.4.0
Loading...
Searching...
No Matches
fax_csr_amalgamate.c
Go to the documentation of this file.
1/**
2 *
3 * @file fax_csr_amalgamate.c
4 *
5 * This file contains the routines to amalgamate a given graph with a fill-in
6 * ratio used in the amalgamation algorithm.
7 *
8 * @copyright 2004-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
9 * Univ. Bordeaux. All rights reserved.
10 *
11 * @version 6.4.0
12 * @author Pascal Henon
13 * @author Mathieu Faverge
14 * @author Gregoire Pichon
15 * @date 2024-07-05
16 *
17 * @addtogroup symbol_dev_csr
18 * @{
19 **/
20#include "common.h"
21#include "pastix/order.h"
22#include "symbol/fax_csr.h"
23#include "kernels/queue.h"
24#include "blend/perf.h"
25
26/**
27 * @brief Minimal memory increase accepted by the amalgamation algorithm (2%)
28 */
29#define RAT_CBLK 0.02
30
31/**
32 * @brief Define an infinite time
33 */
34#define INFINI 1e9
35
36/**
37 *******************************************************************************
38 *
39 * @brief Compute a time estimation cost of the factorization
40 *
41 * This function is used during the amalgamation algorithm to decide whether it
42 * is more interesting to amalgamate the blocks or to keep them separated in
43 * terms of performance cost
44 *
45 *******************************************************************************
46 *
47 * @param[in] n
48 * The number of rows in the column-block
49 *
50 * @param[in] ja
51 * Array of size n that holds the indexes of the rows in the given
52 * column block
53 *
54 * @param[in] colnbr
55 * The number of columns in the column-block
56 *
57 *******************************************************************************
58 *
59 * @return The estimated time in second to factorize the column-block and apply
60 * the resulting updates.
61 *
62 *******************************************************************************/
63static inline double
65{
66 /*******************************************/
67 /* Compute the time to compute a cblk */
68 /* according to the BLAS modelization */
69 /*******************************************/
70 double cost;
72 pastix_int_t L, G, H;
73
74 L = colnbr; /* Size of diagonal block */
75 G = n - L; /* Number of extra-diagonal rows */
76
77 cost = PERF_POTRF( L ) + PERF_TRSM( L, G );
78
79 /** Contributions **/
80 i = colnbr;
81 while ( i < n ) {
82 H = 1;
83 i++;
84 while ( ( i < n ) && ( ja[i] == ja[i - 1] + 1 ) ) {
85 i++;
86 H++;
87 }
88 cost += (double)( PERF_GEMM( G, H, L ) );
89 G -= H;
90 }
91 return cost;
92}
93
94/**
95 *******************************************************************************
96 *
97 * @brief Compute a time estimation cost of the solve step
98 *
99 * This function is used during the amalgamation algorithm to decide whether it
100 * is more interesting to amalgamate the blocks or to keep them separated in
101 * terms of performance cost when doing incomplete factorization.
102 *
103 *******************************************************************************
104 *
105 * @param[in] n
106 * The number of rows in the column-block
107 *
108 * @param[in] ja
109 * Array of size n that holds the indexes of the rows in the given
110 * column block
111 *
112 * @param[in] colnbr
113 * The number of columns in the column-block
114 *
115 *******************************************************************************
116 *
117 * @return The estimated time in second to apply the solve step issued from this
118 * column-block
119 *
120 *******************************************************************************/
121static inline double
123{
124 double cost;
125 pastix_int_t L;
126 (void)ja;
127
128 L = colnbr;
129
130 cost = PERF_TRSV( L ) + PERF_GEMV( L, n - L );
131 return cost;
132}
133
134/**
135 *******************************************************************************
136 *
137 * @brief Return the list of sons of a given node.
138 *
139 *******************************************************************************
140 *
141 * @param[in] node
142 * Node from which the list is wanted.
143 *
144 * @param[in] sonindex
145 * Integer array of the indexes of the first son of each node in sontab
146 * array.
147 *
148 * @param[in] sontab
149 * Integer array which contains the list of sons for each node.
150 *
151 * @param[in] colweight
152 * Integer array of the weight of each node.
153 *
154 * @param[out] list
155 * Integer array that can contains the list of sons, should be of size
156 * the number of unknowns to prevent overflow.
157 * On exit, contains the list of sons of node.
158 *
159 *******************************************************************************
160 *
161 * @return The number of sons of node.
162 *
163 *******************************************************************************/
164static inline pastix_int_t
166 const pastix_int_t *sonindex,
167 const pastix_int_t *sontab,
168 const pastix_int_t *colweight,
169 pastix_int_t *list )
170{
171 pastix_int_t i, s;
172 pastix_int_t ind;
173 ind = 0;
174 for ( i = sonindex[node]; i < sonindex[node + 1]; i++ ) {
175 s = sontab[i];
176 assert( s != -1 );
177 if ( colweight[s] <= 0 ) {
178 ind += amalgamate_get_sonslist( s, sonindex, sontab, colweight, list + ind );
179 }
180 else {
181 list[ind++] = s;
182 }
183 }
184 return ind;
185}
186
187/**
188 *******************************************************************************
189 *
190 * @brief Compute the cost of merging two nodes a and b together. The additional
191 * cost is returned.
192 *
193 *******************************************************************************
194 *
195 * @param[in] a
196 * First node to merge. a >= 0.
197 *
198 * @param[in] b
199 * Second node to merge. b >= 0.
200 *
201 * @param[in] P
202 * The graph that contains a and b nodes with their associated rows.
203 *
204 * @param[in] colweight
205 * Integer array of size P->n with the weight of each node.
206 *
207 *******************************************************************************
208 *
209 * @return The additional non zero entries required if both nodes are merged
210 * together.
211 *
212 *******************************************************************************/
213static inline pastix_int_t
215 pastix_int_t b,
216 const fax_csr_t * P,
217 const pastix_int_t *colweight )
218{
219 pastix_int_t ia, na, ib, nb;
220 pastix_int_t *ja, *jb;
221 pastix_int_t cost, costa, costb;
222
223 ja = P->rows[a];
224 jb = P->rows[b];
225 na = P->nnz[a];
226 nb = P->nnz[b];
227 costa = colweight[a];
228 costb = colweight[b];
229
230 ia = ib = 0;
231
232 /* The diagonal elements of row a does not create fill-in */
233 while ( ( ia < na ) && ( ja[ia] < jb[0] ) )
234 ia++;
235
236 cost = 0;
237 while ( ( ia < na ) && ( ib < nb ) ) {
238 if ( ja[ia] < jb[ib] ) {
239 cost += costb;
240 ia++;
241 continue;
242 }
243 if ( ja[ia] > jb[ib] ) {
244 cost += costa;
245 ib++;
246 continue;
247 }
248
249 assert( ja[ia] == jb[ib] );
250 ia++;
251 ib++;
252 }
253
254 cost += costb * ( na - ia );
255 cost += costa * ( nb - ib );
256
257 assert( cost >= 0 );
258 return cost;
259}
260
261/**
262 *******************************************************************************
263 *
264 * @brief Computes the computational gain of merging two nodes a and b together.
265 *
266 * It uses the given cost function cblktime to estimate the cost with and
267 * without merge and returns the cost variation.
268 *
269 *******************************************************************************
270 *
271 * @param[in] a
272 * First node to merge. a >= 0.
273 *
274 * @param[in] b
275 * Second node to merge. b >= 0.
276 *
277 * @param[in] P
278 * The graph that contains a and b nodes with their associated rows.
279 *
280 * @param[in] colweight
281 * Integer array of size P->n with the weight of each node.
282 *
283 * @param[inout] tmp
284 * Workspace array to compute the union of the rows associated to both nodes.
285 *
286 * @param[in] cblktime
287 * Function that returns the computational cost of a cblk.
288 *
289 *******************************************************************************
290 *
291 * @return The cost variation between merging the two nodes together vs
292 * keeping them separated according to the cblktime function.
293 *
294 *******************************************************************************/
295static inline double
297 pastix_int_t b,
298 const fax_csr_t *P,
299 const pastix_int_t *colweight,
300 pastix_int_t *tmp,
301 double ( *cblktime )( pastix_int_t, const pastix_int_t *, pastix_int_t ) )
302{
303 double costa, costb, costm;
304 pastix_int_t nm;
305
306 costa = cblktime( P->nnz[a], P->rows[a], colweight[a] );
307 costb = cblktime( P->nnz[b], P->rows[b], colweight[b] );
308
309 nm = pastix_intset_union( P->nnz[a], P->rows[a], P->nnz[b], P->rows[b], tmp );
310
311 costm = cblktime( nm, tmp, colweight[a] + colweight[b] );
312
313 return costm - costa - costb;
314}
315
316/**
317 *******************************************************************************
318 *
319 * @brief Merge two nodes together withing the given graph.
320 *
321 *******************************************************************************
322 *
323 * @param[in] a
324 * First node to merge. a >= 0.
325 *
326 * @param[in] b
327 * Second node to merge. b >= 0.
328 *
329 * @param[inout] P
330 * The graph that contains a and b nodes with their associated rows.
331 * On exit, a is merged into b node.
332 *
333 * @param[inout] tmp
334 * Workspace array to compute the union of the rows associated to both
335 * nodes.
336 *
337 *******************************************************************************/
338static inline void
340{
341 pastix_int_t n;
342
343 n = pastix_intset_union( P->nnz[a], P->rows[a], P->nnz[b], P->rows[b], tmp );
344
345 if ( P->rows[a] != NULL ) {
346 P->nnz[a] = 0;
347 memFree( P->rows[a] );
348 }
349
350 if ( P->rows[b] != NULL ) {
351 memFree( P->rows[b] );
352 }
353
354 P->nnz[b] = n;
355
356 MALLOC_INTERN( P->rows[b], n, pastix_int_t );
357 memcpy( P->rows[b], tmp, n * sizeof( pastix_int_t ) );
358}
359
360/**
361 *******************************************************************************
362 *
363 * @brief Amalgamate the given graph up to a given tolerance.
364 *
365 * This function takes a supernode graph and performs amalgamation
366 * of the columns until a fill tolerance is reached. Two level of fill tolerance
367 * are available, the first one, rat_cblk, defines a fill tolerance based on the
368 * graph structure only, and the second one, rat_blas, allows to keep amalgamate
369 * the structure until the higher ratio while it improves the BLAS performance
370 * based on the internal cost model.
371 *
372 *******************************************************************************
373 *
374 * @param[in] ilu
375 * - 1: amalgamation for incomplete factorization will be performed.
376 * - 0: amalgamation for direct factorization will be performed.
377 *
378 * @param[in] rat_cblk
379 * Percentage of fill-in allowed using structure only
380 * amalgamation. Must be >= 0.
381 *
382 * @param[in] rat_blas
383 * Percentage of fill-in allowed using structure BLAS performance
384 * model. rat_blas >= rat_cblk.
385 * The ratio is always on the number of non zero entries of the
386 * pattern, but the criterion to merge columns is based only on reducing
387 * the cost of computations. The cost function used is a model for
388 * Cholesky factorization or solve in double precision, other
389 * factorization and/or precision are not implemented. They are
390 * considered to be similar up to a factor, and so doesn't affect the
391 * amalgamation.
392 * The solve cost is used for ILU(k) factorization and the
393 * factorization cost for direct factorization.
394 *
395 * @param[inout] graphL
396 * The graph of the matrix L to amalgamate. On exit, the compressed
397 * graph is returned (not the quotient graph !!)
398 *
399 * @param[inout] order
400 * On entry the initial ordering structure associated to L.
401 * On exit, the update ordering structure with the amalgamation of close nodes.
402 *
403 * @param[in] pastix_comm
404 * MPI communicator. Used only for printf in this function.
405 *
406 *******************************************************************************/
407void
409 double rat_cblk,
410 double rat_blas,
411 fax_csr_t *graphL,
412 pastix_order_t *order,
413 PASTIX_Comm pastix_comm )
414{
415 double ( *cblktime )( pastix_int_t, const pastix_int_t *, pastix_int_t );
416
417 pastix_int_t *nnzadd = NULL;
418 pastix_int_t *sonindex = NULL;
419 pastix_int_t *sontab = NULL;
420 pastix_int_t *tmp = NULL;
421 pastix_int_t *tmp2 = NULL;
422 pastix_int_t *newnum = NULL;
423 pastix_int_t *colweight = NULL;
424
425 pastix_int_t oldcblknbr = order->cblknbr;
426 const pastix_int_t *oldrangtab = order->rangtab;
427 pastix_int_t *oldtreetab = order->treetab;
428 const pastix_int_t *oldperitab = order->peritab;
429 pastix_int_t *newrangtab;
430 pastix_int_t *newtreetab;
431 pastix_int_t *newperitab;
432
433 pastix_int_t i, j, k, ind, nnzL;
434 pastix_int_t father, newfather;
435 pastix_int_t n, nn, nbcblk_merged;
436 pastix_int_t fillwhile, fillcblk, fillblas, fill;
437
438 double *gain = NULL;
439 double key;
440 pastix_queue_t heap;
441 int blas_gain = 0;
442 int procnum;
443
444 (void)pastix_comm;
445
446 MPI_Comm_rank( pastix_comm, &procnum );
447
448 /* If ilu(k), we compute fill-in on solve functions, otherwise we take factorization */
449 if ( ilu ) {
450 cblktime = cblk_time_solve;
451 }
452 else {
453 cblktime = cblk_time_fact;
454 }
455
456 /*
457 * If rat is negative, then the amalgamation is computed first with nnz
458 * structure up to rat_cblk ratio and then an extra amalgamation is performed
459 * as long as it improves the BLAS performance, and stay under the (-rat)
460 * ratio.
461 */
462 if ( rat_blas < 0 ) {
463 rat_blas = -rat_blas;
464 rat_cblk = MIN( rat_blas, RAT_CBLK );
465 }
466
467 /*
468 * We always start by the structural amalgamation and then we start the
469 * almalgamation leaded by the reduction of the blas cost
470 */
471 blas_gain = 0;
472
473 /*
474 * Compute the initial weight of each cblk
475 */
476 n = graphL->n;
477 MALLOC_INTERN( colweight, n, pastix_int_t );
478
479 assert( graphL->n == oldcblknbr );
480 for ( i = 0; i < n; i++ ) {
481 colweight[i] = oldrangtab[i + 1] - oldrangtab[i];
482 assert( ( graphL->nnz[i] >= colweight[i] ) && ( colweight[i] > 0 ) );
483
484#if !defined( NDEBUG ) && defined( PASTIX_DEBUG_SYMBOL )
485 {
486 pastix_int_t j;
487 /* Check that the first elements of graphL are those from the supernode (the
488 * diagonal elements) */
489 for ( j = 0; j < n; j++ ) {
490 pastix_int_t l, k = 0;
491 for ( l = oldrangtab[j]; l < oldrangtab[j + 1]; l++ ) {
492 assert( graphL->rows[j][k] == l );
493 k++;
494 }
495 }
496 }
497#endif
498 }
499
500 /** Number of unknowns **/
501 nn = oldrangtab[oldcblknbr];
502 assert( nn >= n );
503
504 /**********************************************/
505 /*** Compute the maximum extra fill allowed ***/
506 /**********************************************/
507 /*
508 * - fillblas is the limit of nnz added during the whole amalgamation
509 * process
510 * - fillcblk is the limit of nnz added under the amalgamation process that
511 * tries to reduce the number of supernode
512 * - Between fillcblk and fillblas the amalgamation tries to reduce the time
513 * of the solve or factorization time
514 */
515 nnzL = graphL->total_nnz;
516 fillcblk = (pastix_int_t)lround( (double)nnzL * rat_cblk );
517 fillblas = (pastix_int_t)lround( (double)nnzL * rat_blas );
518 fillwhile = fillcblk;
519
520#if defined( PASTIX_DEBUG_SYMBOL )
521 {
522 double key = 0.0;
523
524 pastix_print( procnum, 0, "fillcblk %ld fillmax %ld \n", (long)fillcblk, (long)fillblas );
525
526 for ( i = 0; i < graphL->n; i++ ) {
527 key += cblktime( graphL->nnz[i], graphL->rows[i], colweight[i] );
528 }
529 pastix_print( procnum, 0, "COST of the NON AMALGAMATED MATRIX = %e\n", key );
530 }
531#endif
532
533 MALLOC_INTERN( tmp, nn, pastix_int_t );
534
535 /**********************************************/
536 /* Compute the list of sons of each supernode */
537 /**********************************************/
538 {
539 pastix_int_t nbsons = 0;
540
541 /* Compute the number of sons of each cblk */
542 memset( tmp, 0, n * sizeof( pastix_int_t ) );
543 for ( i = 0; i < n - 1; i++ ) {
544 if ( oldtreetab[i] >= 0 ) { /** IF THIS SNODE IS NOT A ROOT **/
545 tmp[oldtreetab[i]]++;
546 nbsons++;
547 }
548 }
549 assert( nbsons < n );
550
551 MALLOC_INTERN( sonindex, n + 1, pastix_int_t );
552 MALLOC_INTERN( sontab, nbsons, pastix_int_t );
553#ifndef NDEBUG
554 memset( sontab, 0xff, nbsons * sizeof(pastix_int_t) );
555#endif
556
557 /* Create the sonindex array */
558 sonindex[0] = 0;
559 for ( i = 0; i < n; i++ ) {
560 sonindex[i + 1] = sonindex[i] + tmp[i];
561 }
562
563 /* Fill the sontab array */
564 memcpy( tmp, sonindex, n * sizeof( pastix_int_t ) );
565 for ( i = 0; i < n - 1; i++ ) {
566 pastix_int_t father = oldtreetab[i];
567 assert( ( father != i ) && ( father < n ) );
568 if ( father >= 0 ) /** IF THIS SNODE IS NOT A ROOT **/
569 {
570 assert( sontab[tmp[father]] == -1 );
571 sontab[tmp[father]] = i;
572 tmp[father]++;
573 assert( tmp[father] <= sonindex[father + 1] );
574 }
575 }
576 }
577
578 /****************************************************************/
579 /* Compute the fill to merge a column of graphL with its father */
580 /****************************************************************/
581 MALLOC_INTERN( nnzadd, n, pastix_int_t );
582 MALLOC_INTERN( gain, n, double );
583 for ( i = 0; i < n; i++ ) {
584 father = oldtreetab[i];
585 if ( ( father == -1 ) || ( father == i ) ) {
586 nnzadd[i] = INFINI;
587 gain[i] = INFINI;
588 continue;
589 }
590
591 nnzadd[i] = amalgamate_merge_cost( i, father, graphL, colweight );
592 gain[i] = (double)( nnzadd[i] );
593 }
594
595 /*****************************************************/
596 /** Merge all the columns so it doesn't add fill-in **/
597 /*****************************************************/
598 nbcblk_merged = 0;
599 for ( i = 0; i < n; i++ ) {
600 assert( colweight[i] > 0 );
601
602 if ( ( colweight[i] != 0 ) && ( nnzadd[i] == 0 ) ) {
603 father = oldtreetab[i];
604 assert( ( father > 0 ) && ( father != i ) );
605
606 nbcblk_merged++;
607
608 /* Merge the node i and its father **/
609 amalgamate_merge_col( i, father, graphL, tmp );
610
611 /* Update cost of each node */
612 colweight[father] += colweight[i];
613 colweight[i] = 0;
614
615 /* Update the nnzadd and gain array for the father */
616 k = oldtreetab[father];
617 if ( ( k != -1 ) && ( k != father ) ) {
618 nnzadd[father] = amalgamate_merge_cost( father, k, graphL, colweight );
619 gain[father] = (double)( nnzadd[father] );
620 }
621
622 /* Update the list of sons of i now */
623 ind = amalgamate_get_sonslist( i, sonindex, sontab, colweight, tmp );
624 for ( j = 0; j < ind; j++ ) {
625 k = tmp[j];
626 assert( k != father );
627 oldtreetab[k] = father;
628 nnzadd[k] = amalgamate_merge_cost( k, father, graphL, colweight );
629 gain[k] = (double)( nnzadd[k] );
630 }
631 }
632 }
633 if ( 0 ) {
634 pastix_print( procnum,
635 0,
636 "Number of cblk after amalgamation initialization = %ld (reduced by %ld)\n",
637 (long)( n - nbcblk_merged ),
638 (long)nbcblk_merged );
639 }
640
641 /* Initialize the first round with column sorted by nnzadd */
642 pqueueInit( &heap, ( n - nbcblk_merged ) );
643 for ( i = 0; i < n; i++ ) {
644 if ( ( colweight[i] > 0 ) && ( oldtreetab[i] > 0 ) && ( oldtreetab[i] != i ) ) {
645 pqueuePush1( &heap, i, gain[i] );
646 }
647 }
648
649 /*********************************************/
650 /* Merge supernodes until we reach fillwhile */
651 /*********************************************/
652restart:
653
654 /* Merge supernodes untill we reach the fillwhile limit */
655 fill = 0.0;
656 while ( ( pqueueSize( &heap ) > 0 ) && ( fill < fillwhile ) ) {
657 i = pqueuePop1( &heap, &key );
658
659 /* If we pop an old version of i, let's skip it */
660 if ( ( gain[i] != key ) || ( colweight[i] <= 0 ) )
661 continue;
662
663 /* check if we reached the fillwhile */
664 if ( ( fill + nnzadd[i] ) > fillwhile )
665 continue;
666 else
667 fill += nnzadd[i];
668
669 nbcblk_merged++;
670 father = oldtreetab[i];
671 assert( ( father > 0 ) && ( father != i ) );
672 assert( colweight[father] > 0 );
673
674 /* Merge the node i and its father **/
675 amalgamate_merge_col( i, father, graphL, tmp );
676
677 /* Update cost of each node */
678 colweight[father] += colweight[i];
679 colweight[i] = 0;
680
681 /* Update the nnzadd and gain array for the father */
682 k = oldtreetab[father];
683 if ( ( k != -1 ) && ( k != father ) ) {
684 nnzadd[father] = amalgamate_merge_cost( father, k, graphL, colweight );
685 if ( blas_gain == 1 ) {
686 gain[father] =
687 amalgamate_merge_gain( father, k, graphL, colweight, tmp2, cblktime ) /
688 nnzadd[father];
689 if ( gain[father] <= 0 )
690 pqueuePush1( &heap, father, gain[father] );
691 }
692 else {
693 gain[father] = (double)( nnzadd[father] );
694 pqueuePush1( &heap, father, gain[father] );
695 }
696 }
697
698 /* Update the list of sons of i now */
699 ind = amalgamate_get_sonslist( i, sonindex, sontab, colweight, tmp );
700 for ( j = 0; j < ind; j++ ) {
701 k = tmp[j];
702 assert( k != father );
703 oldtreetab[k] = father;
704 nnzadd[k] = amalgamate_merge_cost( k, father, graphL, colweight );
705
706 if ( blas_gain == 1 ) {
707 gain[k] = amalgamate_merge_gain( k, father, graphL, colweight, tmp2, cblktime ) /
708 nnzadd[k];
709 if ( gain[k] <= 0 )
710 pqueuePush1( &heap, k, gain[k] );
711 }
712 else {
713 gain[k] = (double)( nnzadd[k] );
714 pqueuePush1( &heap, k, gain[k] );
715 }
716 }
717 }
718
719 if ( 0 ) {
720 pastix_print( procnum,
721 0,
722 "Number of cblk after amalgamation phase = %ld (reduced by %ld)\n",
723 (long)( n - nbcblk_merged ),
724 (long)nbcblk_merged );
725 }
726 pqueueExit( &heap );
727 if ( fillwhile < fillblas ) {
728 /* Now the gain of amalgamation is based on the BLAS model */
729 assert( blas_gain == 0 );
730
731 blas_gain = 1;
732 fillwhile = fillblas;
733
734 /* Recompute the gain using BLAS model to restart the second round */
735 pqueueInit( &heap, ( n - nbcblk_merged ) );
736 for ( i = 0; i < n; i++ ) {
737 father = oldtreetab[i];
738 if ( father == -1 || father == i ) {
739 gain[i] = INFINI;
740 continue;
741 }
742 gain[i] =
743 amalgamate_merge_gain( i, father, graphL, colweight, tmp, cblktime ) / nnzadd[i];
744
745 if ( ( colweight[i] > 0 ) && ( oldtreetab[i] > 0 ) && ( oldtreetab[i] != i ) &&
746 ( gain[i] <= 0. ) ) {
747 pqueuePush1( &heap, i, gain[i] );
748 }
749 }
750
751 /* Allocate second temporary workspace for amalgamate_merge_gain */
752 MALLOC_INTERN( tmp2, nn, pastix_int_t );
753 goto restart;
754 }
755
756 if ( tmp2 != NULL ) {
757 memFree( tmp2 );
758 }
759 memFree( nnzadd );
760 memFree( gain );
761
762 /********************************/
763 /* Compute the new partition */
764 /********************************/
765
766 /* Create the new numbering array of the comptacted supernodes (stored in
767 * tmp) and compute the size of each supernode */
768 newnum = tmp;
769 memset( newnum, 0, n * sizeof( pastix_int_t ) );
770
771 order->cblknbr = n - nbcblk_merged;
772 MALLOC_INTERN( order->rangtab, order->cblknbr + 1, pastix_int_t );
773 MALLOC_INTERN( order->treetab, order->cblknbr, pastix_int_t );
774 MALLOC_INTERN( order->peritab, order->vertnbr, pastix_int_t );
775 memset( order->rangtab, 0, ( order->cblknbr + 1 ) * sizeof( pastix_int_t ) );
776 memset( order->treetab, 0xff, ( order->cblknbr ) * sizeof( pastix_int_t ) );
777 memset( order->peritab, 0xff, ( order->vertnbr ) * sizeof( pastix_int_t ) );
778
779 /* Backup former peritab */
780 newrangtab = order->rangtab;
781 newtreetab = order->treetab;
782 newperitab = order->peritab;
783
784 k = 0;
785 for ( i = 0; i < n; i++ ) {
786 if ( colweight[i] > 0 ) {
787 newnum[i] = k++;
788 newrangtab[k] = colweight[i];
789 }
790 }
791 assert( k == order->cblknbr );
792
793 /* Create the newrangtab */
794 for ( i = 0; i < order->cblknbr; i++ ) {
795 newrangtab[i + 1] += newrangtab[i];
796 }
797
798 /* Create the invp vector to adapt to the new partition */
799 for ( i = 0; i < n; i++ ) {
800 father = i;
801 if ( colweight[i] <= 0 ) {
802 /* find the cblk this node is in */
803 while ( colweight[father] <= 0 ) {
804 father = oldtreetab[father];
805 assert( father > 0 );
806 }
807 }
808
809 newfather = newnum[father];
810
811 /* Update inverse permutation */
812 for ( j = oldrangtab[i]; j < oldrangtab[i + 1]; j++ ) {
813 newperitab[newrangtab[newfather]++] = oldperitab[j];
814 }
815
816 /* Update elimination tree */
817 if ( oldtreetab[father] != -1 ) {
818 newtreetab[newfather] = newnum[oldtreetab[father]];
819 }
820 else {
821 newtreetab[newfather] = -1;
822 }
823 }
824
825 /* Reset newrangtab to its real value */
826 for ( i = order->cblknbr; i > 0; i-- ) {
827 newrangtab[i] = newrangtab[i - 1];
828 }
829 newrangtab[0] = 0;
830
831 /* Update the permutation */
832 for ( i = 0; i < order->vertnbr; i++ ) {
833 order->permtab[newperitab[i]] = i;
834 }
835
836#if defined( PASTIX_DEBUG_SYMBOL )
837 {
838 double key = 0.0;
839
840 /* Compact the graph to remove merged nodes */
841 faxCSRCompact( graphL );
842 assert( graphL->n == order->cblknbr );
843
844 /* Apply the new permutation to graphL */
845 for ( i = 0; i < graphL->n; i++ ) {
846 pastix_int_t *ja;
847 ja = graphL->rows[i];
848 for ( j = 0; j < graphL->nnz[i]; j++ ) {
849 ja[j] = order->permtab[ja[j]];
850 }
851 }
852
853 for ( i = 0; i < graphL->n; i++ ) {
854 key += cblktime( graphL->nnz[i], graphL->rows[i], colweight[i] );
855 }
856 pastix_print( procnum, 0, "COST of the AMALGAMATED MATRIX = %e\n", key );
857 }
858#endif
859
860 memFree( oldrangtab );
861 memFree( oldtreetab );
862 memFree( oldperitab );
863
864 memFree( tmp );
865 memFree( sonindex );
866 memFree( sontab );
867 memFree( colweight );
868
869#if !defined( NDEBUG )
870 assert( pastixOrderCheck( order ) == PASTIX_SUCCESS );
871#endif
872}
873
874/**
875 * @}
876 */
BEGIN_C_DECLS typedef int pastix_int_t
Definition datatypes.h:51
static void pqueuePush1(pastix_queue_t *q, pastix_int_t elt, double key1)
Push an element with a single key.
Definition queue.h:64
void pqueueExit(pastix_queue_t *)
Free the structure associated to the queue.
Definition queue.c:110
pastix_int_t pqueueSize(const pastix_queue_t *)
Return the size of the queue.
Definition queue.c:135
static pastix_int_t pqueuePop1(pastix_queue_t *q, double *key1)
Pop the head of the queue and get the associated first key.
Definition queue.h:88
int pqueueInit(pastix_queue_t *, pastix_int_t)
Initialize the queue structure with an initial space to store the elements.
Definition queue.c:81
Queue structure.
Definition queue.h:38
@ PASTIX_SUCCESS
Definition api.h:367
pastix_int_t * treetab
Definition order.h:54
pastix_int_t * permtab
Definition order.h:51
pastix_int_t * peritab
Definition order.h:52
pastix_int_t cblknbr
Definition order.h:50
pastix_int_t * rangtab
Definition order.h:53
pastix_int_t vertnbr
Definition order.h:49
int pastixOrderCheck(const pastix_order_t *ordeptr)
This routine checks the correctness of the ordering structure.
Definition order_check.c:39
Order structure.
Definition order.h:47
static pastix_int_t amalgamate_get_sonslist(pastix_int_t node, const pastix_int_t *sonindex, const pastix_int_t *sontab, const pastix_int_t *colweight, pastix_int_t *list)
Return the list of sons of a given node.
#define INFINI
Define an infinite time.
#define RAT_CBLK
Minimal memory increase accepted by the amalgamation algorithm (2%)
static void amalgamate_merge_col(pastix_int_t a, pastix_int_t b, fax_csr_t *P, pastix_int_t *tmp)
Merge two nodes together withing the given graph.
static double cblk_time_solve(pastix_int_t n, const pastix_int_t *ja, pastix_int_t colnbr)
Compute a time estimation cost of the solve step.
static pastix_int_t amalgamate_merge_cost(pastix_int_t a, pastix_int_t b, const fax_csr_t *P, const pastix_int_t *colweight)
Compute the cost of merging two nodes a and b together. The additional cost is returned.
void faxCSRAmalgamate(int ilu, double rat_cblk, double rat_blas, fax_csr_t *graphL, pastix_order_t *order, PASTIX_Comm pastix_comm)
Amalgamate the given graph up to a given tolerance.
void faxCSRCompact(fax_csr_t *csr)
Compact a compressed graph.
Definition fax_csr.c:129
static double amalgamate_merge_gain(pastix_int_t a, pastix_int_t b, const fax_csr_t *P, const pastix_int_t *colweight, pastix_int_t *tmp, double(*cblktime)(pastix_int_t, const pastix_int_t *, pastix_int_t))
Computes the computational gain of merging two nodes a and b together.
static double cblk_time_fact(pastix_int_t n, const pastix_int_t *ja, pastix_int_t colnbr)
Compute a time estimation cost of the factorization.
Fax blocked csr structure.
Definition fax_csr.h:25
static double cost(symbol_cblk_t *cblk)
Computes the cost of a cblk.